@volley/recognition-client-sdk-node22 0.1.424
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +344 -0
- package/dist/browser.bundled.d.ts +1280 -0
- package/dist/browser.d.ts +10 -0
- package/dist/browser.d.ts.map +1 -0
- package/dist/config-builder.d.ts +134 -0
- package/dist/config-builder.d.ts.map +1 -0
- package/dist/errors.d.ts +41 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/factory.d.ts +36 -0
- package/dist/factory.d.ts.map +1 -0
- package/dist/index.bundled.d.ts +2572 -0
- package/dist/index.d.ts +16 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +10199 -0
- package/dist/index.js.map +7 -0
- package/dist/recog-client-sdk.browser.d.ts +10 -0
- package/dist/recog-client-sdk.browser.d.ts.map +1 -0
- package/dist/recog-client-sdk.browser.js +5746 -0
- package/dist/recog-client-sdk.browser.js.map +7 -0
- package/dist/recognition-client.d.ts +128 -0
- package/dist/recognition-client.d.ts.map +1 -0
- package/dist/recognition-client.types.d.ts +271 -0
- package/dist/recognition-client.types.d.ts.map +1 -0
- package/dist/simplified-vgf-recognition-client.d.ts +178 -0
- package/dist/simplified-vgf-recognition-client.d.ts.map +1 -0
- package/dist/utils/audio-ring-buffer.d.ts +69 -0
- package/dist/utils/audio-ring-buffer.d.ts.map +1 -0
- package/dist/utils/message-handler.d.ts +45 -0
- package/dist/utils/message-handler.d.ts.map +1 -0
- package/dist/utils/url-builder.d.ts +28 -0
- package/dist/utils/url-builder.d.ts.map +1 -0
- package/dist/vgf-recognition-mapper.d.ts +66 -0
- package/dist/vgf-recognition-mapper.d.ts.map +1 -0
- package/dist/vgf-recognition-state.d.ts +91 -0
- package/dist/vgf-recognition-state.d.ts.map +1 -0
- package/package.json +74 -0
- package/src/browser.ts +24 -0
- package/src/config-builder.spec.ts +265 -0
- package/src/config-builder.ts +240 -0
- package/src/errors.ts +84 -0
- package/src/factory.spec.ts +215 -0
- package/src/factory.ts +47 -0
- package/src/index.ts +127 -0
- package/src/recognition-client.spec.ts +889 -0
- package/src/recognition-client.ts +844 -0
- package/src/recognition-client.types.ts +338 -0
- package/src/simplified-vgf-recognition-client.integration.spec.ts +718 -0
- package/src/simplified-vgf-recognition-client.spec.ts +1525 -0
- package/src/simplified-vgf-recognition-client.ts +524 -0
- package/src/utils/audio-ring-buffer.spec.ts +335 -0
- package/src/utils/audio-ring-buffer.ts +170 -0
- package/src/utils/message-handler.spec.ts +311 -0
- package/src/utils/message-handler.ts +131 -0
- package/src/utils/url-builder.spec.ts +252 -0
- package/src/utils/url-builder.ts +92 -0
- package/src/vgf-recognition-mapper.spec.ts +78 -0
- package/src/vgf-recognition-mapper.ts +232 -0
- package/src/vgf-recognition-state.ts +102 -0
|
@@ -0,0 +1,2572 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Provider types and enums for recognition services
|
|
5
|
+
* NOTE_TO_AI: DO NOT CHANGE THIS UNLESS EXPLICITLY ASKED. Always ask before making any changes.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Supported speech recognition providers
|
|
9
|
+
*/
|
|
10
|
+
declare enum RecognitionProvider {
|
|
11
|
+
ASSEMBLYAI = "assemblyai",
|
|
12
|
+
DEEPGRAM = "deepgram",
|
|
13
|
+
ELEVENLABS = "elevenlabs",
|
|
14
|
+
FIREWORKS = "fireworks",
|
|
15
|
+
GOOGLE = "google",
|
|
16
|
+
GEMINI_BATCH = "gemini-batch",
|
|
17
|
+
OPENAI_BATCH = "openai-batch",
|
|
18
|
+
OPENAI_REALTIME = "openai-realtime",
|
|
19
|
+
TEST_ASR_PROVIDER_QUOTA = "test-asr-provider-quota",
|
|
20
|
+
TEST_ASR_STREAMING = "test-asr-streaming"
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* ASR API type - distinguishes between streaming and file-based transcription APIs
|
|
24
|
+
* - STREAMING: Real-time streaming APIs (Deepgram, AssemblyAI, Google)
|
|
25
|
+
* - FILE_BASED: File upload/batch APIs (OpenAI Batch, Gemini Batch)
|
|
26
|
+
*/
|
|
27
|
+
declare enum ASRApiType {
|
|
28
|
+
STREAMING = "streaming",
|
|
29
|
+
FILE_BASED = "file-based"
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Deepgram model names
|
|
33
|
+
*/
|
|
34
|
+
declare enum DeepgramModel {
|
|
35
|
+
NOVA_2 = "nova-2",
|
|
36
|
+
NOVA_3 = "nova-3",
|
|
37
|
+
FLUX_GENERAL_EN = "flux-general-en"
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Google Cloud Speech models
|
|
41
|
+
* @see https://cloud.google.com/speech-to-text/docs/transcription-model
|
|
42
|
+
* @see https://cloud.google.com/speech-to-text/v2/docs/chirp_3-model
|
|
43
|
+
*/
|
|
44
|
+
declare enum GoogleModel {
|
|
45
|
+
CHIRP_3 = "chirp_3",
|
|
46
|
+
CHIRP_2 = "chirp_2",
|
|
47
|
+
CHIRP = "chirp",
|
|
48
|
+
LATEST_LONG = "latest_long",
|
|
49
|
+
LATEST_SHORT = "latest_short",
|
|
50
|
+
TELEPHONY = "telephony",
|
|
51
|
+
TELEPHONY_SHORT = "telephony_short",
|
|
52
|
+
DEFAULT = "default",
|
|
53
|
+
COMMAND_AND_SEARCH = "command_and_search",
|
|
54
|
+
PHONE_CALL = "phone_call",
|
|
55
|
+
VIDEO = "video"
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Fireworks AI models for ASR
|
|
59
|
+
* @see https://docs.fireworks.ai/guides/querying-asr-models
|
|
60
|
+
* @see https://fireworks.ai/models/fireworks/fireworks-asr-large
|
|
61
|
+
*/
|
|
62
|
+
declare enum FireworksModel {
|
|
63
|
+
ASR_V1 = "fireworks-asr-large",
|
|
64
|
+
ASR_V2 = "fireworks-asr-v2",
|
|
65
|
+
WHISPER_V3 = "whisper-v3",
|
|
66
|
+
WHISPER_V3_TURBO = "whisper-v3-turbo"
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* ElevenLabs Scribe models for speech-to-text
|
|
70
|
+
* @see https://elevenlabs.io/blog/introducing-scribe-v2-realtime
|
|
71
|
+
* @see https://elevenlabs.io/docs/cookbooks/speech-to-text/streaming
|
|
72
|
+
* @see https://elevenlabs.io/docs/api-reference/speech-to-text/convert
|
|
73
|
+
*/
|
|
74
|
+
declare enum ElevenLabsModel {
|
|
75
|
+
SCRIBE_V2_REALTIME = "scribe_v2_realtime",
|
|
76
|
+
SCRIBE_V1 = "scribe_v1"
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* OpenAI Realtime API transcription models
|
|
80
|
+
* These are the verified `input_audio_transcription.model` values.
|
|
81
|
+
* @see https://platform.openai.com/docs/guides/realtime
|
|
82
|
+
*/
|
|
83
|
+
declare enum OpenAIRealtimeModel {
|
|
84
|
+
GPT_4O_MINI_TRANSCRIBE = "gpt-4o-mini-transcribe"
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Type alias for any model from any provider
|
|
88
|
+
*/
|
|
89
|
+
type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | ElevenLabsModel | OpenAIRealtimeModel | string;
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Audio encoding types
|
|
93
|
+
*/
|
|
94
|
+
declare enum AudioEncoding {
|
|
95
|
+
ENCODING_UNSPECIFIED = 0,
|
|
96
|
+
LINEAR16 = 1,
|
|
97
|
+
OGG_OPUS = 2,
|
|
98
|
+
FLAC = 3,
|
|
99
|
+
MULAW = 4,
|
|
100
|
+
ALAW = 5
|
|
101
|
+
}
|
|
102
|
+
declare namespace AudioEncoding {
|
|
103
|
+
/**
|
|
104
|
+
* Convert numeric ID to AudioEncoding enum
|
|
105
|
+
* @param id - Numeric encoding identifier (0-5)
|
|
106
|
+
* @returns AudioEncoding enum value or undefined if invalid
|
|
107
|
+
*/
|
|
108
|
+
function fromId(id: number): AudioEncoding | undefined;
|
|
109
|
+
/**
|
|
110
|
+
* Convert string name to AudioEncoding enum
|
|
111
|
+
* @param nameStr - String name like "linear16", "LINEAR16", "ogg_opus", "OGG_OPUS", etc. (case insensitive)
|
|
112
|
+
* @returns AudioEncoding enum value or undefined if invalid
|
|
113
|
+
*/
|
|
114
|
+
function fromName(nameStr: string): AudioEncoding | undefined;
|
|
115
|
+
/**
|
|
116
|
+
* Convert AudioEncoding enum to numeric ID
|
|
117
|
+
* @param encoding - AudioEncoding enum value
|
|
118
|
+
* @returns Numeric ID (0-5)
|
|
119
|
+
*/
|
|
120
|
+
function toId(encoding: AudioEncoding): number;
|
|
121
|
+
/**
|
|
122
|
+
* Convert AudioEncoding enum to string name
|
|
123
|
+
* @param encoding - AudioEncoding enum value
|
|
124
|
+
* @returns String name like "LINEAR16", "MULAW", etc.
|
|
125
|
+
*/
|
|
126
|
+
function toName(encoding: AudioEncoding): string;
|
|
127
|
+
/**
|
|
128
|
+
* Check if a numeric ID is a valid encoding
|
|
129
|
+
* @param id - Numeric identifier to validate
|
|
130
|
+
* @returns true if valid encoding ID
|
|
131
|
+
*/
|
|
132
|
+
function isIdValid(id: number): boolean;
|
|
133
|
+
/**
|
|
134
|
+
* Check if a string name is a valid encoding
|
|
135
|
+
* @param nameStr - String name to validate
|
|
136
|
+
* @returns true if valid encoding name
|
|
137
|
+
*/
|
|
138
|
+
function isNameValid(nameStr: string): boolean;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Common sample rates (in Hz)
|
|
142
|
+
*/
|
|
143
|
+
declare enum SampleRate {
|
|
144
|
+
RATE_8000 = 8000,
|
|
145
|
+
RATE_16000 = 16000,
|
|
146
|
+
RATE_22050 = 22050,
|
|
147
|
+
RATE_24000 = 24000,
|
|
148
|
+
RATE_32000 = 32000,
|
|
149
|
+
RATE_44100 = 44100,
|
|
150
|
+
RATE_48000 = 48000
|
|
151
|
+
}
|
|
152
|
+
declare namespace SampleRate {
|
|
153
|
+
/**
|
|
154
|
+
* Convert Hz value to SampleRate enum
|
|
155
|
+
* @param hz - Sample rate in Hz (8000, 16000, etc.)
|
|
156
|
+
* @returns SampleRate enum value or undefined if invalid
|
|
157
|
+
*/
|
|
158
|
+
function fromHz(hz: number): SampleRate | undefined;
|
|
159
|
+
/**
|
|
160
|
+
* Convert string name to SampleRate enum
|
|
161
|
+
* @param nameStr - String name like "rate_8000", "RATE_16000", etc. (case insensitive)
|
|
162
|
+
* @returns SampleRate enum value or undefined if invalid
|
|
163
|
+
*/
|
|
164
|
+
function fromName(nameStr: string): SampleRate | undefined;
|
|
165
|
+
/**
|
|
166
|
+
* Convert SampleRate enum to Hz value
|
|
167
|
+
* @param rate - SampleRate enum value
|
|
168
|
+
* @returns Hz value (8000, 16000, etc.)
|
|
169
|
+
*/
|
|
170
|
+
function toHz(rate: SampleRate): number;
|
|
171
|
+
/**
|
|
172
|
+
* Convert SampleRate enum to string name
|
|
173
|
+
* @param rate - SampleRate enum value
|
|
174
|
+
* @returns String name like "RATE_8000", "RATE_16000", etc.
|
|
175
|
+
*/
|
|
176
|
+
function toName(rate: SampleRate): string;
|
|
177
|
+
/**
|
|
178
|
+
* Check if a numeric Hz value is a valid sample rate
|
|
179
|
+
* @param hz - Hz value to validate
|
|
180
|
+
* @returns true if valid sample rate
|
|
181
|
+
*/
|
|
182
|
+
function isHzValid(hz: number): boolean;
|
|
183
|
+
/**
|
|
184
|
+
* Check if a string name is a valid sample rate
|
|
185
|
+
* @param nameStr - String name to validate
|
|
186
|
+
* @returns true if valid sample rate name
|
|
187
|
+
*/
|
|
188
|
+
function isNameValid(nameStr: string): boolean;
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Supported languages for recognition
|
|
192
|
+
* Using BCP-47 language tags
|
|
193
|
+
*/
|
|
194
|
+
declare enum Language {
|
|
195
|
+
ENGLISH_US = "en-US",
|
|
196
|
+
ENGLISH_GB = "en-GB",
|
|
197
|
+
SPANISH_ES = "es-ES",
|
|
198
|
+
SPANISH_MX = "es-MX",
|
|
199
|
+
FRENCH_FR = "fr-FR",
|
|
200
|
+
GERMAN_DE = "de-DE",
|
|
201
|
+
ITALIAN_IT = "it-IT",
|
|
202
|
+
PORTUGUESE_BR = "pt-BR",
|
|
203
|
+
JAPANESE_JP = "ja-JP",
|
|
204
|
+
KOREAN_KR = "ko-KR",
|
|
205
|
+
CHINESE_CN = "zh-CN",
|
|
206
|
+
CHINESE_TW = "zh-TW"
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Recognition Result Types V1
|
|
211
|
+
* NOTE_TO_AI: DO NOT CHANGE THIS UNLESS EXPLICITLY ASKED. Always ask before making any changes.
|
|
212
|
+
* Types and schemas for recognition results sent to SDK clients
|
|
213
|
+
*/
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Message type discriminator for recognition results V1
|
|
217
|
+
*/
|
|
218
|
+
declare enum RecognitionResultTypeV1 {
|
|
219
|
+
TRANSCRIPTION = "Transcription",
|
|
220
|
+
FUNCTION_CALL = "FunctionCall",
|
|
221
|
+
METADATA = "Metadata",
|
|
222
|
+
ERROR = "Error",
|
|
223
|
+
CLIENT_CONTROL_MESSAGE = "ClientControlMessage",
|
|
224
|
+
AUDIO_METRICS = "AudioMetrics"
|
|
225
|
+
}
|
|
226
|
+
/**
|
|
227
|
+
* Transcription result V1 - contains transcript message
|
|
228
|
+
* In the long run game side should not need to know it. In the short run it is send back to client.
|
|
229
|
+
* NOTE_TO_AI: DO NOT CHANGE THIS UNLESS EXPLICITLY ASKED. Always ask before making any changes.
|
|
230
|
+
*/
|
|
231
|
+
declare const TranscriptionResultSchemaV1: z.ZodObject<{
|
|
232
|
+
type: z.ZodLiteral<RecognitionResultTypeV1.TRANSCRIPTION>;
|
|
233
|
+
audioUtteranceId: z.ZodString;
|
|
234
|
+
finalTranscript: z.ZodString;
|
|
235
|
+
finalTranscriptConfidence: z.ZodOptional<z.ZodNumber>;
|
|
236
|
+
pendingTranscript: z.ZodOptional<z.ZodString>;
|
|
237
|
+
pendingTranscriptConfidence: z.ZodOptional<z.ZodNumber>;
|
|
238
|
+
is_finished: z.ZodBoolean;
|
|
239
|
+
voiceStart: z.ZodOptional<z.ZodNumber>;
|
|
240
|
+
voiceDuration: z.ZodOptional<z.ZodNumber>;
|
|
241
|
+
voiceEnd: z.ZodOptional<z.ZodNumber>;
|
|
242
|
+
startTimestamp: z.ZodOptional<z.ZodNumber>;
|
|
243
|
+
endTimestamp: z.ZodOptional<z.ZodNumber>;
|
|
244
|
+
receivedAtMs: z.ZodOptional<z.ZodNumber>;
|
|
245
|
+
accumulatedAudioTimeMs: z.ZodOptional<z.ZodNumber>;
|
|
246
|
+
}, "strip", z.ZodTypeAny, {
|
|
247
|
+
type: RecognitionResultTypeV1.TRANSCRIPTION;
|
|
248
|
+
audioUtteranceId: string;
|
|
249
|
+
finalTranscript: string;
|
|
250
|
+
is_finished: boolean;
|
|
251
|
+
finalTranscriptConfidence?: number | undefined;
|
|
252
|
+
pendingTranscript?: string | undefined;
|
|
253
|
+
pendingTranscriptConfidence?: number | undefined;
|
|
254
|
+
voiceStart?: number | undefined;
|
|
255
|
+
voiceDuration?: number | undefined;
|
|
256
|
+
voiceEnd?: number | undefined;
|
|
257
|
+
startTimestamp?: number | undefined;
|
|
258
|
+
endTimestamp?: number | undefined;
|
|
259
|
+
receivedAtMs?: number | undefined;
|
|
260
|
+
accumulatedAudioTimeMs?: number | undefined;
|
|
261
|
+
}, {
|
|
262
|
+
type: RecognitionResultTypeV1.TRANSCRIPTION;
|
|
263
|
+
audioUtteranceId: string;
|
|
264
|
+
finalTranscript: string;
|
|
265
|
+
is_finished: boolean;
|
|
266
|
+
finalTranscriptConfidence?: number | undefined;
|
|
267
|
+
pendingTranscript?: string | undefined;
|
|
268
|
+
pendingTranscriptConfidence?: number | undefined;
|
|
269
|
+
voiceStart?: number | undefined;
|
|
270
|
+
voiceDuration?: number | undefined;
|
|
271
|
+
voiceEnd?: number | undefined;
|
|
272
|
+
startTimestamp?: number | undefined;
|
|
273
|
+
endTimestamp?: number | undefined;
|
|
274
|
+
receivedAtMs?: number | undefined;
|
|
275
|
+
accumulatedAudioTimeMs?: number | undefined;
|
|
276
|
+
}>;
|
|
277
|
+
type TranscriptionResultV1 = z.infer<typeof TranscriptionResultSchemaV1>;
|
|
278
|
+
/**
|
|
279
|
+
* Function call result V1 - similar to LLM function call
|
|
280
|
+
* In the long run game server should know it, rather than TV or client.
|
|
281
|
+
*/
|
|
282
|
+
declare const FunctionCallResultSchemaV1: z.ZodObject<{
|
|
283
|
+
type: z.ZodLiteral<RecognitionResultTypeV1.FUNCTION_CALL>;
|
|
284
|
+
audioUtteranceId: z.ZodString;
|
|
285
|
+
functionName: z.ZodString;
|
|
286
|
+
functionArgJson: z.ZodString;
|
|
287
|
+
}, "strip", z.ZodTypeAny, {
|
|
288
|
+
type: RecognitionResultTypeV1.FUNCTION_CALL;
|
|
289
|
+
audioUtteranceId: string;
|
|
290
|
+
functionName: string;
|
|
291
|
+
functionArgJson: string;
|
|
292
|
+
}, {
|
|
293
|
+
type: RecognitionResultTypeV1.FUNCTION_CALL;
|
|
294
|
+
audioUtteranceId: string;
|
|
295
|
+
functionName: string;
|
|
296
|
+
functionArgJson: string;
|
|
297
|
+
}>;
|
|
298
|
+
type FunctionCallResultV1 = z.infer<typeof FunctionCallResultSchemaV1>;
|
|
299
|
+
/**
|
|
300
|
+
* Transcript outcome type - categorizes final transcript state
|
|
301
|
+
* Used in Metadata schema. Maps 1:1 with Datadog metrics:
|
|
302
|
+
* - WITH_CONTENT → recog.client.websocket.transcript.final_with_content
|
|
303
|
+
* - EMPTY → recog.client.websocket.transcript.final_empty
|
|
304
|
+
* - NEVER_SENT → derived from sessions.streamed - final_with_content - final_empty
|
|
305
|
+
* - ERROR_* → 1:1 mapping to ErrorTypeV1 for error-caused outcomes
|
|
306
|
+
*/
|
|
307
|
+
declare enum TranscriptOutcomeType {
|
|
308
|
+
WITH_CONTENT = "with_content",
|
|
309
|
+
EMPTY = "empty",
|
|
310
|
+
NEVER_SENT = "never_sent",
|
|
311
|
+
ERROR_AUTHENTICATION = "error_authentication",
|
|
312
|
+
ERROR_VALIDATION = "error_validation",
|
|
313
|
+
ERROR_PROVIDER = "error_provider",
|
|
314
|
+
ERROR_TIMEOUT = "error_timeout",
|
|
315
|
+
ERROR_QUOTA = "error_quota",
|
|
316
|
+
ERROR_INTERNAL_QUOTA = "error_internal_quota",
|
|
317
|
+
ERROR_CONNECTION = "error_connection",
|
|
318
|
+
ERROR_NO_AUDIO = "error_no_audio",
|
|
319
|
+
ERROR_CIRCUIT_BREAKER = "error_circuit_breaker",
|
|
320
|
+
ERROR_UNKNOWN = "error_unknown"
|
|
321
|
+
}
|
|
322
|
+
/**
|
|
323
|
+
* Metadata result V1 - contains metadata, timing information, and ASR config
|
|
324
|
+
* Sent when the provider connection closes to provide final timing metrics and config
|
|
325
|
+
* In the long run game server should know it, rather than TV or client.
|
|
326
|
+
*/
|
|
327
|
+
declare const MetadataResultSchemaV1: z.ZodObject<{
|
|
328
|
+
type: z.ZodLiteral<RecognitionResultTypeV1.METADATA>;
|
|
329
|
+
audioUtteranceId: z.ZodString;
|
|
330
|
+
recordingStartMs: z.ZodOptional<z.ZodNumber>;
|
|
331
|
+
recordingEndMs: z.ZodOptional<z.ZodNumber>;
|
|
332
|
+
transcriptEndMs: z.ZodOptional<z.ZodNumber>;
|
|
333
|
+
socketCloseAtMs: z.ZodOptional<z.ZodNumber>;
|
|
334
|
+
duration: z.ZodOptional<z.ZodNumber>;
|
|
335
|
+
volume: z.ZodOptional<z.ZodNumber>;
|
|
336
|
+
accumulatedAudioTimeMs: z.ZodOptional<z.ZodNumber>;
|
|
337
|
+
costInUSD: z.ZodOptional<z.ZodDefault<z.ZodNumber>>;
|
|
338
|
+
apiType: z.ZodOptional<z.ZodNativeEnum<typeof ASRApiType>>;
|
|
339
|
+
asrConfig: z.ZodOptional<z.ZodString>;
|
|
340
|
+
rawAsrMetadata: z.ZodOptional<z.ZodString>;
|
|
341
|
+
transcriptOutcome: z.ZodOptional<z.ZodNativeEnum<typeof TranscriptOutcomeType>>;
|
|
342
|
+
audioMetrics: z.ZodOptional<z.ZodObject<{
|
|
343
|
+
valid: z.ZodBoolean;
|
|
344
|
+
audioBeginMs: z.ZodNumber;
|
|
345
|
+
audioEndMs: z.ZodNumber;
|
|
346
|
+
maxVolume: z.ZodNumber;
|
|
347
|
+
minVolume: z.ZodNumber;
|
|
348
|
+
avgVolume: z.ZodNumber;
|
|
349
|
+
silenceRatio: z.ZodNumber;
|
|
350
|
+
clippingRatio: z.ZodNumber;
|
|
351
|
+
snrEstimate: z.ZodNullable<z.ZodNumber>;
|
|
352
|
+
lastNonSilenceMs: z.ZodNumber;
|
|
353
|
+
timestamp: z.ZodString;
|
|
354
|
+
}, "strip", z.ZodTypeAny, {
|
|
355
|
+
valid: boolean;
|
|
356
|
+
audioBeginMs: number;
|
|
357
|
+
audioEndMs: number;
|
|
358
|
+
maxVolume: number;
|
|
359
|
+
minVolume: number;
|
|
360
|
+
avgVolume: number;
|
|
361
|
+
silenceRatio: number;
|
|
362
|
+
clippingRatio: number;
|
|
363
|
+
snrEstimate: number | null;
|
|
364
|
+
lastNonSilenceMs: number;
|
|
365
|
+
timestamp: string;
|
|
366
|
+
}, {
|
|
367
|
+
valid: boolean;
|
|
368
|
+
audioBeginMs: number;
|
|
369
|
+
audioEndMs: number;
|
|
370
|
+
maxVolume: number;
|
|
371
|
+
minVolume: number;
|
|
372
|
+
avgVolume: number;
|
|
373
|
+
silenceRatio: number;
|
|
374
|
+
clippingRatio: number;
|
|
375
|
+
snrEstimate: number | null;
|
|
376
|
+
lastNonSilenceMs: number;
|
|
377
|
+
timestamp: string;
|
|
378
|
+
}>>;
|
|
379
|
+
}, "strip", z.ZodTypeAny, {
|
|
380
|
+
type: RecognitionResultTypeV1.METADATA;
|
|
381
|
+
audioUtteranceId: string;
|
|
382
|
+
recordingStartMs?: number | undefined;
|
|
383
|
+
recordingEndMs?: number | undefined;
|
|
384
|
+
transcriptEndMs?: number | undefined;
|
|
385
|
+
socketCloseAtMs?: number | undefined;
|
|
386
|
+
duration?: number | undefined;
|
|
387
|
+
volume?: number | undefined;
|
|
388
|
+
accumulatedAudioTimeMs?: number | undefined;
|
|
389
|
+
costInUSD?: number | undefined;
|
|
390
|
+
apiType?: ASRApiType | undefined;
|
|
391
|
+
asrConfig?: string | undefined;
|
|
392
|
+
rawAsrMetadata?: string | undefined;
|
|
393
|
+
transcriptOutcome?: TranscriptOutcomeType | undefined;
|
|
394
|
+
audioMetrics?: {
|
|
395
|
+
valid: boolean;
|
|
396
|
+
audioBeginMs: number;
|
|
397
|
+
audioEndMs: number;
|
|
398
|
+
maxVolume: number;
|
|
399
|
+
minVolume: number;
|
|
400
|
+
avgVolume: number;
|
|
401
|
+
silenceRatio: number;
|
|
402
|
+
clippingRatio: number;
|
|
403
|
+
snrEstimate: number | null;
|
|
404
|
+
lastNonSilenceMs: number;
|
|
405
|
+
timestamp: string;
|
|
406
|
+
} | undefined;
|
|
407
|
+
}, {
|
|
408
|
+
type: RecognitionResultTypeV1.METADATA;
|
|
409
|
+
audioUtteranceId: string;
|
|
410
|
+
recordingStartMs?: number | undefined;
|
|
411
|
+
recordingEndMs?: number | undefined;
|
|
412
|
+
transcriptEndMs?: number | undefined;
|
|
413
|
+
socketCloseAtMs?: number | undefined;
|
|
414
|
+
duration?: number | undefined;
|
|
415
|
+
volume?: number | undefined;
|
|
416
|
+
accumulatedAudioTimeMs?: number | undefined;
|
|
417
|
+
costInUSD?: number | undefined;
|
|
418
|
+
apiType?: ASRApiType | undefined;
|
|
419
|
+
asrConfig?: string | undefined;
|
|
420
|
+
rawAsrMetadata?: string | undefined;
|
|
421
|
+
transcriptOutcome?: TranscriptOutcomeType | undefined;
|
|
422
|
+
audioMetrics?: {
|
|
423
|
+
valid: boolean;
|
|
424
|
+
audioBeginMs: number;
|
|
425
|
+
audioEndMs: number;
|
|
426
|
+
maxVolume: number;
|
|
427
|
+
minVolume: number;
|
|
428
|
+
avgVolume: number;
|
|
429
|
+
silenceRatio: number;
|
|
430
|
+
clippingRatio: number;
|
|
431
|
+
snrEstimate: number | null;
|
|
432
|
+
lastNonSilenceMs: number;
|
|
433
|
+
timestamp: string;
|
|
434
|
+
} | undefined;
|
|
435
|
+
}>;
|
|
436
|
+
type MetadataResultV1 = z.infer<typeof MetadataResultSchemaV1>;
|
|
437
|
+
/**
|
|
438
|
+
* Error type enum V1 - categorizes different types of errors
|
|
439
|
+
*/
|
|
440
|
+
declare enum ErrorTypeV1 {
|
|
441
|
+
AUTHENTICATION_ERROR = "authentication_error",
|
|
442
|
+
VALIDATION_ERROR = "validation_error",
|
|
443
|
+
PROVIDER_ERROR = "provider_error",
|
|
444
|
+
TIMEOUT_ERROR = "timeout_error",
|
|
445
|
+
QUOTA_EXCEEDED = "quota_exceeded",
|
|
446
|
+
INTERNAL_QUOTA_EXHAUSTED = "internal_quota_exhausted",
|
|
447
|
+
CONNECTION_ERROR = "connection_error",
|
|
448
|
+
NO_AUDIO_ERROR = "no_audio_error",
|
|
449
|
+
CIRCUIT_BREAKER_OPEN = "circuit_breaker_open",
|
|
450
|
+
UNKNOWN_ERROR = "unknown_error"
|
|
451
|
+
}
|
|
452
|
+
/**
|
|
453
|
+
* Error result V1 - contains error message
|
|
454
|
+
* In the long run game server should know it, rather than TV or client.
|
|
455
|
+
*/
|
|
456
|
+
declare const ErrorResultSchemaV1: z.ZodObject<{
|
|
457
|
+
type: z.ZodLiteral<RecognitionResultTypeV1.ERROR>;
|
|
458
|
+
audioUtteranceId: z.ZodString;
|
|
459
|
+
errorType: z.ZodOptional<z.ZodNativeEnum<typeof ErrorTypeV1>>;
|
|
460
|
+
message: z.ZodOptional<z.ZodString>;
|
|
461
|
+
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
|
|
462
|
+
description: z.ZodOptional<z.ZodString>;
|
|
463
|
+
}, "strip", z.ZodTypeAny, {
|
|
464
|
+
type: RecognitionResultTypeV1.ERROR;
|
|
465
|
+
audioUtteranceId: string;
|
|
466
|
+
errorType?: ErrorTypeV1 | undefined;
|
|
467
|
+
message?: string | undefined;
|
|
468
|
+
code?: string | number | undefined;
|
|
469
|
+
description?: string | undefined;
|
|
470
|
+
}, {
|
|
471
|
+
type: RecognitionResultTypeV1.ERROR;
|
|
472
|
+
audioUtteranceId: string;
|
|
473
|
+
errorType?: ErrorTypeV1 | undefined;
|
|
474
|
+
message?: string | undefined;
|
|
475
|
+
code?: string | number | undefined;
|
|
476
|
+
description?: string | undefined;
|
|
477
|
+
}>;
|
|
478
|
+
type ErrorResultV1 = z.infer<typeof ErrorResultSchemaV1>;
|
|
479
|
+
/**
|
|
480
|
+
* Client control actions enum V1
|
|
481
|
+
* Actions that can be sent from server to client to control the recognition stream
|
|
482
|
+
* In the long run audio client(mic) should know it, rather than servers.
|
|
483
|
+
*/
|
|
484
|
+
declare enum ClientControlActionV1 {
|
|
485
|
+
READY_FOR_UPLOADING_RECORDING = "ready_for_uploading_recording",
|
|
486
|
+
STOP_RECORDING = "stop_recording"
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
/**
|
|
490
|
+
* Error Exception Types
|
|
491
|
+
*
|
|
492
|
+
* Defines structured exception types for each ErrorTypeV1 category.
|
|
493
|
+
* Each exception type has metadata about whether it's immediately available
|
|
494
|
+
* (can be shown to user right away vs needs investigation/retry).
|
|
495
|
+
*/
|
|
496
|
+
|
|
497
|
+
/**
|
|
498
|
+
* Authentication/Authorization Error
|
|
499
|
+
* isImmediatelyAvailable: false
|
|
500
|
+
* These are system configuration issues, not user-facing
|
|
501
|
+
*/
|
|
502
|
+
declare const AuthenticationExceptionSchema: z.ZodObject<{
|
|
503
|
+
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
|
|
504
|
+
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
|
|
505
|
+
message: z.ZodString;
|
|
506
|
+
audioUtteranceId: z.ZodOptional<z.ZodString>;
|
|
507
|
+
timestamp: z.ZodOptional<z.ZodNumber>;
|
|
508
|
+
description: z.ZodOptional<z.ZodString>;
|
|
509
|
+
errorType: z.ZodLiteral<ErrorTypeV1.AUTHENTICATION_ERROR>;
|
|
510
|
+
isImmediatelyAvailable: z.ZodLiteral<false>;
|
|
511
|
+
service: z.ZodOptional<z.ZodString>;
|
|
512
|
+
authMethod: z.ZodOptional<z.ZodString>;
|
|
513
|
+
}, "strip", z.ZodTypeAny, {
|
|
514
|
+
message: string;
|
|
515
|
+
errorType: ErrorTypeV1.AUTHENTICATION_ERROR;
|
|
516
|
+
isImmediatelyAvailable: false;
|
|
517
|
+
provider?: RecognitionProvider | undefined;
|
|
518
|
+
code?: string | number | undefined;
|
|
519
|
+
audioUtteranceId?: string | undefined;
|
|
520
|
+
timestamp?: number | undefined;
|
|
521
|
+
description?: string | undefined;
|
|
522
|
+
service?: string | undefined;
|
|
523
|
+
authMethod?: string | undefined;
|
|
524
|
+
}, {
|
|
525
|
+
message: string;
|
|
526
|
+
errorType: ErrorTypeV1.AUTHENTICATION_ERROR;
|
|
527
|
+
isImmediatelyAvailable: false;
|
|
528
|
+
provider?: RecognitionProvider | undefined;
|
|
529
|
+
code?: string | number | undefined;
|
|
530
|
+
audioUtteranceId?: string | undefined;
|
|
531
|
+
timestamp?: number | undefined;
|
|
532
|
+
description?: string | undefined;
|
|
533
|
+
service?: string | undefined;
|
|
534
|
+
authMethod?: string | undefined;
|
|
535
|
+
}>;
|
|
536
|
+
type AuthenticationException = z.infer<typeof AuthenticationExceptionSchema>;
|
|
537
|
+
/**
|
|
538
|
+
* Validation Error
|
|
539
|
+
* isImmediatelyAvailable: true
|
|
540
|
+
* User provided invalid input - can show them what's wrong
|
|
541
|
+
*/
|
|
542
|
+
declare const ValidationExceptionSchema: z.ZodObject<{
|
|
543
|
+
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
|
|
544
|
+
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
|
|
545
|
+
message: z.ZodString;
|
|
546
|
+
audioUtteranceId: z.ZodOptional<z.ZodString>;
|
|
547
|
+
timestamp: z.ZodOptional<z.ZodNumber>;
|
|
548
|
+
description: z.ZodOptional<z.ZodString>;
|
|
549
|
+
errorType: z.ZodLiteral<ErrorTypeV1.VALIDATION_ERROR>;
|
|
550
|
+
isImmediatelyAvailable: z.ZodLiteral<true>;
|
|
551
|
+
field: z.ZodOptional<z.ZodString>;
|
|
552
|
+
expected: z.ZodOptional<z.ZodString>;
|
|
553
|
+
received: z.ZodOptional<z.ZodString>;
|
|
554
|
+
}, "strip", z.ZodTypeAny, {
|
|
555
|
+
message: string;
|
|
556
|
+
errorType: ErrorTypeV1.VALIDATION_ERROR;
|
|
557
|
+
isImmediatelyAvailable: true;
|
|
558
|
+
provider?: RecognitionProvider | undefined;
|
|
559
|
+
code?: string | number | undefined;
|
|
560
|
+
audioUtteranceId?: string | undefined;
|
|
561
|
+
timestamp?: number | undefined;
|
|
562
|
+
description?: string | undefined;
|
|
563
|
+
field?: string | undefined;
|
|
564
|
+
expected?: string | undefined;
|
|
565
|
+
received?: string | undefined;
|
|
566
|
+
}, {
|
|
567
|
+
message: string;
|
|
568
|
+
errorType: ErrorTypeV1.VALIDATION_ERROR;
|
|
569
|
+
isImmediatelyAvailable: true;
|
|
570
|
+
provider?: RecognitionProvider | undefined;
|
|
571
|
+
code?: string | number | undefined;
|
|
572
|
+
audioUtteranceId?: string | undefined;
|
|
573
|
+
timestamp?: number | undefined;
|
|
574
|
+
description?: string | undefined;
|
|
575
|
+
field?: string | undefined;
|
|
576
|
+
expected?: string | undefined;
|
|
577
|
+
received?: string | undefined;
|
|
578
|
+
}>;
|
|
579
|
+
type ValidationException = z.infer<typeof ValidationExceptionSchema>;
|
|
580
|
+
/**
|
|
581
|
+
* Provider Error
|
|
582
|
+
* isImmediatelyAvailable: false
|
|
583
|
+
* Error from ASR provider - usually transient or needs investigation
|
|
584
|
+
*/
|
|
585
|
+
declare const ProviderExceptionSchema: z.ZodObject<{
|
|
586
|
+
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
|
|
587
|
+
message: z.ZodString;
|
|
588
|
+
audioUtteranceId: z.ZodOptional<z.ZodString>;
|
|
589
|
+
timestamp: z.ZodOptional<z.ZodNumber>;
|
|
590
|
+
description: z.ZodOptional<z.ZodString>;
|
|
591
|
+
errorType: z.ZodLiteral<ErrorTypeV1.PROVIDER_ERROR>;
|
|
592
|
+
isImmediatelyAvailable: z.ZodLiteral<false>;
|
|
593
|
+
provider: z.ZodOptional<z.ZodString>;
|
|
594
|
+
providerErrorCode: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
|
|
595
|
+
isTransient: z.ZodOptional<z.ZodBoolean>;
|
|
596
|
+
}, "strip", z.ZodTypeAny, {
|
|
597
|
+
message: string;
|
|
598
|
+
errorType: ErrorTypeV1.PROVIDER_ERROR;
|
|
599
|
+
isImmediatelyAvailable: false;
|
|
600
|
+
code?: string | number | undefined;
|
|
601
|
+
audioUtteranceId?: string | undefined;
|
|
602
|
+
timestamp?: number | undefined;
|
|
603
|
+
description?: string | undefined;
|
|
604
|
+
provider?: string | undefined;
|
|
605
|
+
providerErrorCode?: string | number | undefined;
|
|
606
|
+
isTransient?: boolean | undefined;
|
|
607
|
+
}, {
|
|
608
|
+
message: string;
|
|
609
|
+
errorType: ErrorTypeV1.PROVIDER_ERROR;
|
|
610
|
+
isImmediatelyAvailable: false;
|
|
611
|
+
code?: string | number | undefined;
|
|
612
|
+
audioUtteranceId?: string | undefined;
|
|
613
|
+
timestamp?: number | undefined;
|
|
614
|
+
description?: string | undefined;
|
|
615
|
+
provider?: string | undefined;
|
|
616
|
+
providerErrorCode?: string | number | undefined;
|
|
617
|
+
isTransient?: boolean | undefined;
|
|
618
|
+
}>;
|
|
619
|
+
type ProviderException = z.infer<typeof ProviderExceptionSchema>;
|
|
620
|
+
/**
|
|
621
|
+
* Timeout Error
|
|
622
|
+
* isImmediatelyAvailable: true
|
|
623
|
+
* Request took too long - user should try again
|
|
624
|
+
*/
|
|
625
|
+
declare const TimeoutExceptionSchema: z.ZodObject<{
|
|
626
|
+
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
|
|
627
|
+
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
|
|
628
|
+
message: z.ZodString;
|
|
629
|
+
audioUtteranceId: z.ZodOptional<z.ZodString>;
|
|
630
|
+
timestamp: z.ZodOptional<z.ZodNumber>;
|
|
631
|
+
description: z.ZodOptional<z.ZodString>;
|
|
632
|
+
errorType: z.ZodLiteral<ErrorTypeV1.TIMEOUT_ERROR>;
|
|
633
|
+
isImmediatelyAvailable: z.ZodLiteral<true>;
|
|
634
|
+
timeoutMs: z.ZodOptional<z.ZodNumber>;
|
|
635
|
+
operation: z.ZodOptional<z.ZodString>;
|
|
636
|
+
}, "strip", z.ZodTypeAny, {
|
|
637
|
+
message: string;
|
|
638
|
+
errorType: ErrorTypeV1.TIMEOUT_ERROR;
|
|
639
|
+
isImmediatelyAvailable: true;
|
|
640
|
+
provider?: RecognitionProvider | undefined;
|
|
641
|
+
code?: string | number | undefined;
|
|
642
|
+
audioUtteranceId?: string | undefined;
|
|
643
|
+
timestamp?: number | undefined;
|
|
644
|
+
description?: string | undefined;
|
|
645
|
+
timeoutMs?: number | undefined;
|
|
646
|
+
operation?: string | undefined;
|
|
647
|
+
}, {
|
|
648
|
+
message: string;
|
|
649
|
+
errorType: ErrorTypeV1.TIMEOUT_ERROR;
|
|
650
|
+
isImmediatelyAvailable: true;
|
|
651
|
+
provider?: RecognitionProvider | undefined;
|
|
652
|
+
code?: string | number | undefined;
|
|
653
|
+
audioUtteranceId?: string | undefined;
|
|
654
|
+
timestamp?: number | undefined;
|
|
655
|
+
description?: string | undefined;
|
|
656
|
+
timeoutMs?: number | undefined;
|
|
657
|
+
operation?: string | undefined;
|
|
658
|
+
}>;
|
|
659
|
+
type TimeoutException = z.infer<typeof TimeoutExceptionSchema>;
|
|
660
|
+
/**
|
|
661
|
+
* Quota Exceeded Error
|
|
662
|
+
* isImmediatelyAvailable: true
|
|
663
|
+
* Rate limit or quota exceeded - user should wait
|
|
664
|
+
*/
|
|
665
|
+
declare const QuotaExceededExceptionSchema: z.ZodObject<{
|
|
666
|
+
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
|
|
667
|
+
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
|
|
668
|
+
message: z.ZodString;
|
|
669
|
+
audioUtteranceId: z.ZodOptional<z.ZodString>;
|
|
670
|
+
timestamp: z.ZodOptional<z.ZodNumber>;
|
|
671
|
+
description: z.ZodOptional<z.ZodString>;
|
|
672
|
+
errorType: z.ZodLiteral<ErrorTypeV1.QUOTA_EXCEEDED>;
|
|
673
|
+
isImmediatelyAvailable: z.ZodLiteral<true>;
|
|
674
|
+
quotaType: z.ZodOptional<z.ZodString>;
|
|
675
|
+
resetAt: z.ZodOptional<z.ZodNumber>;
|
|
676
|
+
retryAfterSeconds: z.ZodOptional<z.ZodNumber>;
|
|
677
|
+
}, "strip", z.ZodTypeAny, {
|
|
678
|
+
message: string;
|
|
679
|
+
errorType: ErrorTypeV1.QUOTA_EXCEEDED;
|
|
680
|
+
isImmediatelyAvailable: true;
|
|
681
|
+
provider?: RecognitionProvider | undefined;
|
|
682
|
+
code?: string | number | undefined;
|
|
683
|
+
audioUtteranceId?: string | undefined;
|
|
684
|
+
timestamp?: number | undefined;
|
|
685
|
+
description?: string | undefined;
|
|
686
|
+
quotaType?: string | undefined;
|
|
687
|
+
resetAt?: number | undefined;
|
|
688
|
+
retryAfterSeconds?: number | undefined;
|
|
689
|
+
}, {
|
|
690
|
+
message: string;
|
|
691
|
+
errorType: ErrorTypeV1.QUOTA_EXCEEDED;
|
|
692
|
+
isImmediatelyAvailable: true;
|
|
693
|
+
provider?: RecognitionProvider | undefined;
|
|
694
|
+
code?: string | number | undefined;
|
|
695
|
+
audioUtteranceId?: string | undefined;
|
|
696
|
+
timestamp?: number | undefined;
|
|
697
|
+
description?: string | undefined;
|
|
698
|
+
quotaType?: string | undefined;
|
|
699
|
+
resetAt?: number | undefined;
|
|
700
|
+
retryAfterSeconds?: number | undefined;
|
|
701
|
+
}>;
|
|
702
|
+
type QuotaExceededException = z.infer<typeof QuotaExceededExceptionSchema>;
|
|
703
|
+
/**
|
|
704
|
+
* Connection Error
|
|
705
|
+
* isImmediatelyAvailable: true
|
|
706
|
+
* Connection establishment or network failure - user should check network or retry
|
|
707
|
+
*/
|
|
708
|
+
declare const ConnectionExceptionSchema: z.ZodObject<{
|
|
709
|
+
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
|
|
710
|
+
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
|
|
711
|
+
message: z.ZodString;
|
|
712
|
+
audioUtteranceId: z.ZodOptional<z.ZodString>;
|
|
713
|
+
timestamp: z.ZodOptional<z.ZodNumber>;
|
|
714
|
+
description: z.ZodOptional<z.ZodString>;
|
|
715
|
+
errorType: z.ZodLiteral<ErrorTypeV1.CONNECTION_ERROR>;
|
|
716
|
+
isImmediatelyAvailable: z.ZodLiteral<true>;
|
|
717
|
+
attempts: z.ZodOptional<z.ZodNumber>;
|
|
718
|
+
url: z.ZodOptional<z.ZodString>;
|
|
719
|
+
underlyingError: z.ZodOptional<z.ZodString>;
|
|
720
|
+
}, "strip", z.ZodTypeAny, {
|
|
721
|
+
message: string;
|
|
722
|
+
errorType: ErrorTypeV1.CONNECTION_ERROR;
|
|
723
|
+
isImmediatelyAvailable: true;
|
|
724
|
+
provider?: RecognitionProvider | undefined;
|
|
725
|
+
code?: string | number | undefined;
|
|
726
|
+
audioUtteranceId?: string | undefined;
|
|
727
|
+
timestamp?: number | undefined;
|
|
728
|
+
description?: string | undefined;
|
|
729
|
+
attempts?: number | undefined;
|
|
730
|
+
url?: string | undefined;
|
|
731
|
+
underlyingError?: string | undefined;
|
|
732
|
+
}, {
|
|
733
|
+
message: string;
|
|
734
|
+
errorType: ErrorTypeV1.CONNECTION_ERROR;
|
|
735
|
+
isImmediatelyAvailable: true;
|
|
736
|
+
provider?: RecognitionProvider | undefined;
|
|
737
|
+
code?: string | number | undefined;
|
|
738
|
+
audioUtteranceId?: string | undefined;
|
|
739
|
+
timestamp?: number | undefined;
|
|
740
|
+
description?: string | undefined;
|
|
741
|
+
attempts?: number | undefined;
|
|
742
|
+
url?: string | undefined;
|
|
743
|
+
underlyingError?: string | undefined;
|
|
744
|
+
}>;
|
|
745
|
+
type ConnectionException = z.infer<typeof ConnectionExceptionSchema>;
|
|
746
|
+
/**
|
|
747
|
+
* Unknown Error
|
|
748
|
+
* isImmediatelyAvailable: false
|
|
749
|
+
* Unexpected error - needs investigation
|
|
750
|
+
*/
|
|
751
|
+
declare const UnknownExceptionSchema: z.ZodObject<{
|
|
752
|
+
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
|
|
753
|
+
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
|
|
754
|
+
message: z.ZodString;
|
|
755
|
+
audioUtteranceId: z.ZodOptional<z.ZodString>;
|
|
756
|
+
timestamp: z.ZodOptional<z.ZodNumber>;
|
|
757
|
+
description: z.ZodOptional<z.ZodString>;
|
|
758
|
+
errorType: z.ZodLiteral<ErrorTypeV1.UNKNOWN_ERROR>;
|
|
759
|
+
isImmediatelyAvailable: z.ZodLiteral<false>;
|
|
760
|
+
stack: z.ZodOptional<z.ZodString>;
|
|
761
|
+
context: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
762
|
+
}, "strip", z.ZodTypeAny, {
|
|
763
|
+
message: string;
|
|
764
|
+
errorType: ErrorTypeV1.UNKNOWN_ERROR;
|
|
765
|
+
isImmediatelyAvailable: false;
|
|
766
|
+
provider?: RecognitionProvider | undefined;
|
|
767
|
+
code?: string | number | undefined;
|
|
768
|
+
audioUtteranceId?: string | undefined;
|
|
769
|
+
timestamp?: number | undefined;
|
|
770
|
+
description?: string | undefined;
|
|
771
|
+
stack?: string | undefined;
|
|
772
|
+
context?: Record<string, unknown> | undefined;
|
|
773
|
+
}, {
|
|
774
|
+
message: string;
|
|
775
|
+
errorType: ErrorTypeV1.UNKNOWN_ERROR;
|
|
776
|
+
isImmediatelyAvailable: false;
|
|
777
|
+
provider?: RecognitionProvider | undefined;
|
|
778
|
+
code?: string | number | undefined;
|
|
779
|
+
audioUtteranceId?: string | undefined;
|
|
780
|
+
timestamp?: number | undefined;
|
|
781
|
+
description?: string | undefined;
|
|
782
|
+
stack?: string | undefined;
|
|
783
|
+
context?: Record<string, unknown> | undefined;
|
|
784
|
+
}>;
|
|
785
|
+
type UnknownException = z.infer<typeof UnknownExceptionSchema>;
|
|
786
|
+
/**
|
|
787
|
+
* Discriminated union of all exception types
|
|
788
|
+
* Use this for type-safe error handling
|
|
789
|
+
*/
|
|
790
|
+
declare const RecognitionExceptionSchema: z.ZodDiscriminatedUnion<"errorType", [z.ZodObject<{
|
|
791
|
+
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
|
|
792
|
+
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
|
|
793
|
+
message: z.ZodString;
|
|
794
|
+
audioUtteranceId: z.ZodOptional<z.ZodString>;
|
|
795
|
+
timestamp: z.ZodOptional<z.ZodNumber>;
|
|
796
|
+
description: z.ZodOptional<z.ZodString>;
|
|
797
|
+
errorType: z.ZodLiteral<ErrorTypeV1.AUTHENTICATION_ERROR>;
|
|
798
|
+
isImmediatelyAvailable: z.ZodLiteral<false>;
|
|
799
|
+
service: z.ZodOptional<z.ZodString>;
|
|
800
|
+
authMethod: z.ZodOptional<z.ZodString>;
|
|
801
|
+
}, "strip", z.ZodTypeAny, {
|
|
802
|
+
message: string;
|
|
803
|
+
errorType: ErrorTypeV1.AUTHENTICATION_ERROR;
|
|
804
|
+
isImmediatelyAvailable: false;
|
|
805
|
+
provider?: RecognitionProvider | undefined;
|
|
806
|
+
code?: string | number | undefined;
|
|
807
|
+
audioUtteranceId?: string | undefined;
|
|
808
|
+
timestamp?: number | undefined;
|
|
809
|
+
description?: string | undefined;
|
|
810
|
+
service?: string | undefined;
|
|
811
|
+
authMethod?: string | undefined;
|
|
812
|
+
}, {
|
|
813
|
+
message: string;
|
|
814
|
+
errorType: ErrorTypeV1.AUTHENTICATION_ERROR;
|
|
815
|
+
isImmediatelyAvailable: false;
|
|
816
|
+
provider?: RecognitionProvider | undefined;
|
|
817
|
+
code?: string | number | undefined;
|
|
818
|
+
audioUtteranceId?: string | undefined;
|
|
819
|
+
timestamp?: number | undefined;
|
|
820
|
+
description?: string | undefined;
|
|
821
|
+
service?: string | undefined;
|
|
822
|
+
authMethod?: string | undefined;
|
|
823
|
+
}>, z.ZodObject<{
|
|
824
|
+
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
|
|
825
|
+
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
|
|
826
|
+
message: z.ZodString;
|
|
827
|
+
audioUtteranceId: z.ZodOptional<z.ZodString>;
|
|
828
|
+
timestamp: z.ZodOptional<z.ZodNumber>;
|
|
829
|
+
description: z.ZodOptional<z.ZodString>;
|
|
830
|
+
errorType: z.ZodLiteral<ErrorTypeV1.VALIDATION_ERROR>;
|
|
831
|
+
isImmediatelyAvailable: z.ZodLiteral<true>;
|
|
832
|
+
field: z.ZodOptional<z.ZodString>;
|
|
833
|
+
expected: z.ZodOptional<z.ZodString>;
|
|
834
|
+
received: z.ZodOptional<z.ZodString>;
|
|
835
|
+
}, "strip", z.ZodTypeAny, {
|
|
836
|
+
message: string;
|
|
837
|
+
errorType: ErrorTypeV1.VALIDATION_ERROR;
|
|
838
|
+
isImmediatelyAvailable: true;
|
|
839
|
+
provider?: RecognitionProvider | undefined;
|
|
840
|
+
code?: string | number | undefined;
|
|
841
|
+
audioUtteranceId?: string | undefined;
|
|
842
|
+
timestamp?: number | undefined;
|
|
843
|
+
description?: string | undefined;
|
|
844
|
+
field?: string | undefined;
|
|
845
|
+
expected?: string | undefined;
|
|
846
|
+
received?: string | undefined;
|
|
847
|
+
}, {
|
|
848
|
+
message: string;
|
|
849
|
+
errorType: ErrorTypeV1.VALIDATION_ERROR;
|
|
850
|
+
isImmediatelyAvailable: true;
|
|
851
|
+
provider?: RecognitionProvider | undefined;
|
|
852
|
+
code?: string | number | undefined;
|
|
853
|
+
audioUtteranceId?: string | undefined;
|
|
854
|
+
timestamp?: number | undefined;
|
|
855
|
+
description?: string | undefined;
|
|
856
|
+
field?: string | undefined;
|
|
857
|
+
expected?: string | undefined;
|
|
858
|
+
received?: string | undefined;
|
|
859
|
+
}>, z.ZodObject<{
|
|
860
|
+
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
|
|
861
|
+
message: z.ZodString;
|
|
862
|
+
audioUtteranceId: z.ZodOptional<z.ZodString>;
|
|
863
|
+
timestamp: z.ZodOptional<z.ZodNumber>;
|
|
864
|
+
description: z.ZodOptional<z.ZodString>;
|
|
865
|
+
errorType: z.ZodLiteral<ErrorTypeV1.PROVIDER_ERROR>;
|
|
866
|
+
isImmediatelyAvailable: z.ZodLiteral<false>;
|
|
867
|
+
provider: z.ZodOptional<z.ZodString>;
|
|
868
|
+
providerErrorCode: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
|
|
869
|
+
isTransient: z.ZodOptional<z.ZodBoolean>;
|
|
870
|
+
}, "strip", z.ZodTypeAny, {
|
|
871
|
+
message: string;
|
|
872
|
+
errorType: ErrorTypeV1.PROVIDER_ERROR;
|
|
873
|
+
isImmediatelyAvailable: false;
|
|
874
|
+
code?: string | number | undefined;
|
|
875
|
+
audioUtteranceId?: string | undefined;
|
|
876
|
+
timestamp?: number | undefined;
|
|
877
|
+
description?: string | undefined;
|
|
878
|
+
provider?: string | undefined;
|
|
879
|
+
providerErrorCode?: string | number | undefined;
|
|
880
|
+
isTransient?: boolean | undefined;
|
|
881
|
+
}, {
|
|
882
|
+
message: string;
|
|
883
|
+
errorType: ErrorTypeV1.PROVIDER_ERROR;
|
|
884
|
+
isImmediatelyAvailable: false;
|
|
885
|
+
code?: string | number | undefined;
|
|
886
|
+
audioUtteranceId?: string | undefined;
|
|
887
|
+
timestamp?: number | undefined;
|
|
888
|
+
description?: string | undefined;
|
|
889
|
+
provider?: string | undefined;
|
|
890
|
+
providerErrorCode?: string | number | undefined;
|
|
891
|
+
isTransient?: boolean | undefined;
|
|
892
|
+
}>, z.ZodObject<{
|
|
893
|
+
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
|
|
894
|
+
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
|
|
895
|
+
message: z.ZodString;
|
|
896
|
+
audioUtteranceId: z.ZodOptional<z.ZodString>;
|
|
897
|
+
timestamp: z.ZodOptional<z.ZodNumber>;
|
|
898
|
+
description: z.ZodOptional<z.ZodString>;
|
|
899
|
+
errorType: z.ZodLiteral<ErrorTypeV1.TIMEOUT_ERROR>;
|
|
900
|
+
isImmediatelyAvailable: z.ZodLiteral<true>;
|
|
901
|
+
timeoutMs: z.ZodOptional<z.ZodNumber>;
|
|
902
|
+
operation: z.ZodOptional<z.ZodString>;
|
|
903
|
+
}, "strip", z.ZodTypeAny, {
|
|
904
|
+
message: string;
|
|
905
|
+
errorType: ErrorTypeV1.TIMEOUT_ERROR;
|
|
906
|
+
isImmediatelyAvailable: true;
|
|
907
|
+
provider?: RecognitionProvider | undefined;
|
|
908
|
+
code?: string | number | undefined;
|
|
909
|
+
audioUtteranceId?: string | undefined;
|
|
910
|
+
timestamp?: number | undefined;
|
|
911
|
+
description?: string | undefined;
|
|
912
|
+
timeoutMs?: number | undefined;
|
|
913
|
+
operation?: string | undefined;
|
|
914
|
+
}, {
|
|
915
|
+
message: string;
|
|
916
|
+
errorType: ErrorTypeV1.TIMEOUT_ERROR;
|
|
917
|
+
isImmediatelyAvailable: true;
|
|
918
|
+
provider?: RecognitionProvider | undefined;
|
|
919
|
+
code?: string | number | undefined;
|
|
920
|
+
audioUtteranceId?: string | undefined;
|
|
921
|
+
timestamp?: number | undefined;
|
|
922
|
+
description?: string | undefined;
|
|
923
|
+
timeoutMs?: number | undefined;
|
|
924
|
+
operation?: string | undefined;
|
|
925
|
+
}>, z.ZodObject<{
|
|
926
|
+
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
|
|
927
|
+
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
|
|
928
|
+
message: z.ZodString;
|
|
929
|
+
audioUtteranceId: z.ZodOptional<z.ZodString>;
|
|
930
|
+
timestamp: z.ZodOptional<z.ZodNumber>;
|
|
931
|
+
description: z.ZodOptional<z.ZodString>;
|
|
932
|
+
errorType: z.ZodLiteral<ErrorTypeV1.QUOTA_EXCEEDED>;
|
|
933
|
+
isImmediatelyAvailable: z.ZodLiteral<true>;
|
|
934
|
+
quotaType: z.ZodOptional<z.ZodString>;
|
|
935
|
+
resetAt: z.ZodOptional<z.ZodNumber>;
|
|
936
|
+
retryAfterSeconds: z.ZodOptional<z.ZodNumber>;
|
|
937
|
+
}, "strip", z.ZodTypeAny, {
|
|
938
|
+
message: string;
|
|
939
|
+
errorType: ErrorTypeV1.QUOTA_EXCEEDED;
|
|
940
|
+
isImmediatelyAvailable: true;
|
|
941
|
+
provider?: RecognitionProvider | undefined;
|
|
942
|
+
code?: string | number | undefined;
|
|
943
|
+
audioUtteranceId?: string | undefined;
|
|
944
|
+
timestamp?: number | undefined;
|
|
945
|
+
description?: string | undefined;
|
|
946
|
+
quotaType?: string | undefined;
|
|
947
|
+
resetAt?: number | undefined;
|
|
948
|
+
retryAfterSeconds?: number | undefined;
|
|
949
|
+
}, {
|
|
950
|
+
message: string;
|
|
951
|
+
errorType: ErrorTypeV1.QUOTA_EXCEEDED;
|
|
952
|
+
isImmediatelyAvailable: true;
|
|
953
|
+
provider?: RecognitionProvider | undefined;
|
|
954
|
+
code?: string | number | undefined;
|
|
955
|
+
audioUtteranceId?: string | undefined;
|
|
956
|
+
timestamp?: number | undefined;
|
|
957
|
+
description?: string | undefined;
|
|
958
|
+
quotaType?: string | undefined;
|
|
959
|
+
resetAt?: number | undefined;
|
|
960
|
+
retryAfterSeconds?: number | undefined;
|
|
961
|
+
}>, z.ZodObject<{
|
|
962
|
+
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
|
|
963
|
+
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
|
|
964
|
+
message: z.ZodString;
|
|
965
|
+
audioUtteranceId: z.ZodOptional<z.ZodString>;
|
|
966
|
+
timestamp: z.ZodOptional<z.ZodNumber>;
|
|
967
|
+
description: z.ZodOptional<z.ZodString>;
|
|
968
|
+
errorType: z.ZodLiteral<ErrorTypeV1.CONNECTION_ERROR>;
|
|
969
|
+
isImmediatelyAvailable: z.ZodLiteral<true>;
|
|
970
|
+
attempts: z.ZodOptional<z.ZodNumber>;
|
|
971
|
+
url: z.ZodOptional<z.ZodString>;
|
|
972
|
+
underlyingError: z.ZodOptional<z.ZodString>;
|
|
973
|
+
}, "strip", z.ZodTypeAny, {
|
|
974
|
+
message: string;
|
|
975
|
+
errorType: ErrorTypeV1.CONNECTION_ERROR;
|
|
976
|
+
isImmediatelyAvailable: true;
|
|
977
|
+
provider?: RecognitionProvider | undefined;
|
|
978
|
+
code?: string | number | undefined;
|
|
979
|
+
audioUtteranceId?: string | undefined;
|
|
980
|
+
timestamp?: number | undefined;
|
|
981
|
+
description?: string | undefined;
|
|
982
|
+
attempts?: number | undefined;
|
|
983
|
+
url?: string | undefined;
|
|
984
|
+
underlyingError?: string | undefined;
|
|
985
|
+
}, {
|
|
986
|
+
message: string;
|
|
987
|
+
errorType: ErrorTypeV1.CONNECTION_ERROR;
|
|
988
|
+
isImmediatelyAvailable: true;
|
|
989
|
+
provider?: RecognitionProvider | undefined;
|
|
990
|
+
code?: string | number | undefined;
|
|
991
|
+
audioUtteranceId?: string | undefined;
|
|
992
|
+
timestamp?: number | undefined;
|
|
993
|
+
description?: string | undefined;
|
|
994
|
+
attempts?: number | undefined;
|
|
995
|
+
url?: string | undefined;
|
|
996
|
+
underlyingError?: string | undefined;
|
|
997
|
+
}>, z.ZodObject<{
|
|
998
|
+
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
|
|
999
|
+
message: z.ZodString;
|
|
1000
|
+
audioUtteranceId: z.ZodOptional<z.ZodString>;
|
|
1001
|
+
timestamp: z.ZodOptional<z.ZodNumber>;
|
|
1002
|
+
description: z.ZodOptional<z.ZodString>;
|
|
1003
|
+
errorType: z.ZodLiteral<ErrorTypeV1.CIRCUIT_BREAKER_OPEN>;
|
|
1004
|
+
isImmediatelyAvailable: z.ZodLiteral<true>;
|
|
1005
|
+
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
|
|
1006
|
+
model: z.ZodOptional<z.ZodString>;
|
|
1007
|
+
}, "strip", z.ZodTypeAny, {
|
|
1008
|
+
message: string;
|
|
1009
|
+
errorType: ErrorTypeV1.CIRCUIT_BREAKER_OPEN;
|
|
1010
|
+
isImmediatelyAvailable: true;
|
|
1011
|
+
code?: string | number | undefined;
|
|
1012
|
+
audioUtteranceId?: string | undefined;
|
|
1013
|
+
timestamp?: number | undefined;
|
|
1014
|
+
description?: string | undefined;
|
|
1015
|
+
provider?: RecognitionProvider | undefined;
|
|
1016
|
+
model?: string | undefined;
|
|
1017
|
+
}, {
|
|
1018
|
+
message: string;
|
|
1019
|
+
errorType: ErrorTypeV1.CIRCUIT_BREAKER_OPEN;
|
|
1020
|
+
isImmediatelyAvailable: true;
|
|
1021
|
+
code?: string | number | undefined;
|
|
1022
|
+
audioUtteranceId?: string | undefined;
|
|
1023
|
+
timestamp?: number | undefined;
|
|
1024
|
+
description?: string | undefined;
|
|
1025
|
+
provider?: RecognitionProvider | undefined;
|
|
1026
|
+
model?: string | undefined;
|
|
1027
|
+
}>, z.ZodObject<{
|
|
1028
|
+
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
|
|
1029
|
+
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
|
|
1030
|
+
message: z.ZodString;
|
|
1031
|
+
audioUtteranceId: z.ZodOptional<z.ZodString>;
|
|
1032
|
+
timestamp: z.ZodOptional<z.ZodNumber>;
|
|
1033
|
+
description: z.ZodOptional<z.ZodString>;
|
|
1034
|
+
errorType: z.ZodLiteral<ErrorTypeV1.UNKNOWN_ERROR>;
|
|
1035
|
+
isImmediatelyAvailable: z.ZodLiteral<false>;
|
|
1036
|
+
stack: z.ZodOptional<z.ZodString>;
|
|
1037
|
+
context: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
1038
|
+
}, "strip", z.ZodTypeAny, {
|
|
1039
|
+
message: string;
|
|
1040
|
+
errorType: ErrorTypeV1.UNKNOWN_ERROR;
|
|
1041
|
+
isImmediatelyAvailable: false;
|
|
1042
|
+
provider?: RecognitionProvider | undefined;
|
|
1043
|
+
code?: string | number | undefined;
|
|
1044
|
+
audioUtteranceId?: string | undefined;
|
|
1045
|
+
timestamp?: number | undefined;
|
|
1046
|
+
description?: string | undefined;
|
|
1047
|
+
stack?: string | undefined;
|
|
1048
|
+
context?: Record<string, unknown> | undefined;
|
|
1049
|
+
}, {
|
|
1050
|
+
message: string;
|
|
1051
|
+
errorType: ErrorTypeV1.UNKNOWN_ERROR;
|
|
1052
|
+
isImmediatelyAvailable: false;
|
|
1053
|
+
provider?: RecognitionProvider | undefined;
|
|
1054
|
+
code?: string | number | undefined;
|
|
1055
|
+
audioUtteranceId?: string | undefined;
|
|
1056
|
+
timestamp?: number | undefined;
|
|
1057
|
+
description?: string | undefined;
|
|
1058
|
+
stack?: string | undefined;
|
|
1059
|
+
context?: Record<string, unknown> | undefined;
|
|
1060
|
+
}>]>;
|
|
1061
|
+
type RecognitionException = z.infer<typeof RecognitionExceptionSchema>;
|
|
1062
|
+
/**
|
|
1063
|
+
* Check if an exception should be shown to the user immediately
|
|
1064
|
+
*/
|
|
1065
|
+
declare function isExceptionImmediatelyAvailable(exception: RecognitionException): boolean;
|
|
1066
|
+
/**
|
|
1067
|
+
* Get user-friendly error message for exceptions
|
|
1068
|
+
*/
|
|
1069
|
+
declare function getUserFriendlyMessage(exception: RecognitionException): string;
|
|
1070
|
+
|
|
1071
|
+
/**
|
|
1072
|
+
* Recognition Context Types V1
|
|
1073
|
+
* NOTE_TO_AI: DO NOT CHANGE THIS UNLESS EXPLICITLY ASKED. Always ask before making any changes.
|
|
1074
|
+
* Types and schemas for recognition context data
|
|
1075
|
+
*/
|
|
1076
|
+
|
|
1077
|
+
/**
|
|
1078
|
+
* Message type discriminator for recognition context V1
|
|
1079
|
+
*/
|
|
1080
|
+
declare enum RecognitionContextTypeV1 {
|
|
1081
|
+
GAME_CONTEXT = "GameContext",
|
|
1082
|
+
CONTROL_SIGNAL = "ControlSignal",
|
|
1083
|
+
ASR_REQUEST = "ASRRequest"
|
|
1084
|
+
}
|
|
1085
|
+
/**
|
|
1086
|
+
* Control signal types for recognition V1
|
|
1087
|
+
*/
|
|
1088
|
+
declare enum ControlSignalTypeV1 {
|
|
1089
|
+
START_RECORDING = "start_recording",
|
|
1090
|
+
STOP_RECORDING = "stop_recording"
|
|
1091
|
+
}
|
|
1092
|
+
/**
|
|
1093
|
+
* SlotMap - A strongly typed map from slot names to lists of values
|
|
1094
|
+
* Used for entity extraction and slot filling in voice interactions
|
|
1095
|
+
*/
|
|
1096
|
+
declare const SlotMapSchema: z.ZodRecord<z.ZodString, z.ZodArray<z.ZodString, "many">>;
|
|
1097
|
+
type SlotMap = z.infer<typeof SlotMapSchema>;
|
|
1098
|
+
/**
|
|
1099
|
+
* Game context V1 - contains game state information
|
|
1100
|
+
*/
|
|
1101
|
+
declare const GameContextSchemaV1: z.ZodObject<{
|
|
1102
|
+
type: z.ZodLiteral<RecognitionContextTypeV1.GAME_CONTEXT>;
|
|
1103
|
+
gameId: z.ZodString;
|
|
1104
|
+
gamePhase: z.ZodString;
|
|
1105
|
+
promptSTT: z.ZodOptional<z.ZodString>;
|
|
1106
|
+
promptSTF: z.ZodOptional<z.ZodString>;
|
|
1107
|
+
promptTTF: z.ZodOptional<z.ZodString>;
|
|
1108
|
+
slotMap: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodArray<z.ZodString, "many">>>;
|
|
1109
|
+
}, "strip", z.ZodTypeAny, {
|
|
1110
|
+
type: RecognitionContextTypeV1.GAME_CONTEXT;
|
|
1111
|
+
gameId: string;
|
|
1112
|
+
gamePhase: string;
|
|
1113
|
+
promptSTT?: string | undefined;
|
|
1114
|
+
promptSTF?: string | undefined;
|
|
1115
|
+
promptTTF?: string | undefined;
|
|
1116
|
+
slotMap?: Record<string, string[]> | undefined;
|
|
1117
|
+
}, {
|
|
1118
|
+
type: RecognitionContextTypeV1.GAME_CONTEXT;
|
|
1119
|
+
gameId: string;
|
|
1120
|
+
gamePhase: string;
|
|
1121
|
+
promptSTT?: string | undefined;
|
|
1122
|
+
promptSTF?: string | undefined;
|
|
1123
|
+
promptTTF?: string | undefined;
|
|
1124
|
+
slotMap?: Record<string, string[]> | undefined;
|
|
1125
|
+
}>;
|
|
1126
|
+
type GameContextV1 = z.infer<typeof GameContextSchemaV1>;
|
|
1127
|
+
/**
|
|
1128
|
+
* ASR Request V1 - contains complete ASR setup information
|
|
1129
|
+
* Sent once at connection start to configure the session
|
|
1130
|
+
*/
|
|
1131
|
+
declare const ASRRequestSchemaV1: z.ZodObject<{
|
|
1132
|
+
type: z.ZodLiteral<RecognitionContextTypeV1.ASR_REQUEST>;
|
|
1133
|
+
audioUtteranceId: z.ZodOptional<z.ZodString>;
|
|
1134
|
+
provider: z.ZodString;
|
|
1135
|
+
model: z.ZodOptional<z.ZodString>;
|
|
1136
|
+
language: z.ZodString;
|
|
1137
|
+
sampleRate: z.ZodNumber;
|
|
1138
|
+
encoding: z.ZodNumber;
|
|
1139
|
+
interimResults: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
1140
|
+
useContext: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
1141
|
+
finalTranscriptStability: z.ZodOptional<z.ZodString>;
|
|
1142
|
+
priority: z.ZodDefault<z.ZodOptional<z.ZodEnum<["low", "high"]>>>;
|
|
1143
|
+
fallbackModels: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
1144
|
+
provider: z.ZodString;
|
|
1145
|
+
model: z.ZodOptional<z.ZodString>;
|
|
1146
|
+
language: z.ZodOptional<z.ZodString>;
|
|
1147
|
+
sampleRate: z.ZodOptional<z.ZodNumber>;
|
|
1148
|
+
encoding: z.ZodOptional<z.ZodNumber>;
|
|
1149
|
+
interimResults: z.ZodOptional<z.ZodBoolean>;
|
|
1150
|
+
useContext: z.ZodOptional<z.ZodBoolean>;
|
|
1151
|
+
finalTranscriptStability: z.ZodOptional<z.ZodString>;
|
|
1152
|
+
}, "strip", z.ZodTypeAny, {
|
|
1153
|
+
provider: string;
|
|
1154
|
+
model?: string | undefined;
|
|
1155
|
+
language?: string | undefined;
|
|
1156
|
+
sampleRate?: number | undefined;
|
|
1157
|
+
encoding?: number | undefined;
|
|
1158
|
+
interimResults?: boolean | undefined;
|
|
1159
|
+
useContext?: boolean | undefined;
|
|
1160
|
+
finalTranscriptStability?: string | undefined;
|
|
1161
|
+
}, {
|
|
1162
|
+
provider: string;
|
|
1163
|
+
model?: string | undefined;
|
|
1164
|
+
language?: string | undefined;
|
|
1165
|
+
sampleRate?: number | undefined;
|
|
1166
|
+
encoding?: number | undefined;
|
|
1167
|
+
interimResults?: boolean | undefined;
|
|
1168
|
+
useContext?: boolean | undefined;
|
|
1169
|
+
finalTranscriptStability?: string | undefined;
|
|
1170
|
+
}>, "many">>;
|
|
1171
|
+
debugCommand: z.ZodOptional<z.ZodObject<{
|
|
1172
|
+
enableDebugLog: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
1173
|
+
enableAudioStorage: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
1174
|
+
enableSongQuizSessionIdCheck: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
1175
|
+
enablePilotModels: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
1176
|
+
}, "strip", z.ZodTypeAny, {
|
|
1177
|
+
enableDebugLog: boolean;
|
|
1178
|
+
enableAudioStorage: boolean;
|
|
1179
|
+
enableSongQuizSessionIdCheck: boolean;
|
|
1180
|
+
enablePilotModels: boolean;
|
|
1181
|
+
}, {
|
|
1182
|
+
enableDebugLog?: boolean | undefined;
|
|
1183
|
+
enableAudioStorage?: boolean | undefined;
|
|
1184
|
+
enableSongQuizSessionIdCheck?: boolean | undefined;
|
|
1185
|
+
enablePilotModels?: boolean | undefined;
|
|
1186
|
+
}>>;
|
|
1187
|
+
}, "strip", z.ZodTypeAny, {
|
|
1188
|
+
provider: string;
|
|
1189
|
+
language: string;
|
|
1190
|
+
sampleRate: number;
|
|
1191
|
+
encoding: number;
|
|
1192
|
+
interimResults: boolean;
|
|
1193
|
+
useContext: boolean;
|
|
1194
|
+
priority: "low" | "high";
|
|
1195
|
+
type: RecognitionContextTypeV1.ASR_REQUEST;
|
|
1196
|
+
audioUtteranceId?: string | undefined;
|
|
1197
|
+
model?: string | undefined;
|
|
1198
|
+
finalTranscriptStability?: string | undefined;
|
|
1199
|
+
fallbackModels?: {
|
|
1200
|
+
provider: string;
|
|
1201
|
+
model?: string | undefined;
|
|
1202
|
+
language?: string | undefined;
|
|
1203
|
+
sampleRate?: number | undefined;
|
|
1204
|
+
encoding?: number | undefined;
|
|
1205
|
+
interimResults?: boolean | undefined;
|
|
1206
|
+
useContext?: boolean | undefined;
|
|
1207
|
+
finalTranscriptStability?: string | undefined;
|
|
1208
|
+
}[] | undefined;
|
|
1209
|
+
debugCommand?: {
|
|
1210
|
+
enableDebugLog: boolean;
|
|
1211
|
+
enableAudioStorage: boolean;
|
|
1212
|
+
enableSongQuizSessionIdCheck: boolean;
|
|
1213
|
+
enablePilotModels: boolean;
|
|
1214
|
+
} | undefined;
|
|
1215
|
+
}, {
|
|
1216
|
+
provider: string;
|
|
1217
|
+
language: string;
|
|
1218
|
+
sampleRate: number;
|
|
1219
|
+
encoding: number;
|
|
1220
|
+
type: RecognitionContextTypeV1.ASR_REQUEST;
|
|
1221
|
+
audioUtteranceId?: string | undefined;
|
|
1222
|
+
model?: string | undefined;
|
|
1223
|
+
interimResults?: boolean | undefined;
|
|
1224
|
+
useContext?: boolean | undefined;
|
|
1225
|
+
finalTranscriptStability?: string | undefined;
|
|
1226
|
+
priority?: "low" | "high" | undefined;
|
|
1227
|
+
fallbackModels?: {
|
|
1228
|
+
provider: string;
|
|
1229
|
+
model?: string | undefined;
|
|
1230
|
+
language?: string | undefined;
|
|
1231
|
+
sampleRate?: number | undefined;
|
|
1232
|
+
encoding?: number | undefined;
|
|
1233
|
+
interimResults?: boolean | undefined;
|
|
1234
|
+
useContext?: boolean | undefined;
|
|
1235
|
+
finalTranscriptStability?: string | undefined;
|
|
1236
|
+
}[] | undefined;
|
|
1237
|
+
debugCommand?: {
|
|
1238
|
+
enableDebugLog?: boolean | undefined;
|
|
1239
|
+
enableAudioStorage?: boolean | undefined;
|
|
1240
|
+
enableSongQuizSessionIdCheck?: boolean | undefined;
|
|
1241
|
+
enablePilotModels?: boolean | undefined;
|
|
1242
|
+
} | undefined;
|
|
1243
|
+
}>;
|
|
1244
|
+
type ASRRequestV1 = z.infer<typeof ASRRequestSchemaV1>;
|
|
1245
|
+
|
|
1246
|
+
/**
|
|
1247
|
+
* Unified ASR Request Configuration
|
|
1248
|
+
*
|
|
1249
|
+
* Provider-agnostic configuration for ASR (Automatic Speech Recognition) requests.
|
|
1250
|
+
* This interface provides a consistent API for clients regardless of the underlying provider.
|
|
1251
|
+
*
|
|
1252
|
+
* All fields use library-defined enums for type safety and consistency.
|
|
1253
|
+
* Provider-specific mappers will convert these to provider-native formats.
|
|
1254
|
+
*/
|
|
1255
|
+
|
|
1256
|
+
/**
|
|
1257
|
+
* Final transcript stability modes
|
|
1258
|
+
*
|
|
1259
|
+
* Controls timeout duration for fallback final transcript after stopRecording().
|
|
1260
|
+
* Similar to AssemblyAI's turn detection confidence modes but applied to our
|
|
1261
|
+
* internal timeout mechanism when vendors don't respond with is_final=true.
|
|
1262
|
+
*
|
|
1263
|
+
* @see https://www.assemblyai.com/docs/speech-to-text/universal-streaming/turn-detection
|
|
1264
|
+
*/
|
|
1265
|
+
declare enum FinalTranscriptStability {
|
|
1266
|
+
/**
|
|
1267
|
+
* Aggressive mode: 100ms timeout
|
|
1268
|
+
* Fast response, optimized for short utterances and quick back-and-forth
|
|
1269
|
+
* Use cases: IVR, quick commands, retail confirmations
|
|
1270
|
+
*/
|
|
1271
|
+
AGGRESSIVE = "aggressive",
|
|
1272
|
+
/**
|
|
1273
|
+
* Balanced mode: 200ms timeout (default)
|
|
1274
|
+
* Natural middle ground for most conversational scenarios
|
|
1275
|
+
* Use cases: General customer support, tech support, typical voice interactions
|
|
1276
|
+
*/
|
|
1277
|
+
BALANCED = "balanced",
|
|
1278
|
+
/**
|
|
1279
|
+
* Conservative mode: 400ms timeout
|
|
1280
|
+
* Wait longer for providers, optimized for complex/reflective speech
|
|
1281
|
+
* Use cases: Healthcare, complex queries, careful thought processes
|
|
1282
|
+
*/
|
|
1283
|
+
CONSERVATIVE = "conservative",
|
|
1284
|
+
/**
|
|
1285
|
+
* Experimental mode: 10000ms (10 seconds) timeout
|
|
1286
|
+
* Very long wait for batch/async providers that need significant processing time
|
|
1287
|
+
* Use cases: Batch processing (Gemini, OpenAI Whisper), complex audio analysis
|
|
1288
|
+
* Note: Should be cancelled immediately when transcript is received
|
|
1289
|
+
*/
|
|
1290
|
+
EXPERIMENTAL = "experimental"
|
|
1291
|
+
}
|
|
1292
|
+
/**
|
|
1293
|
+
* Unified ASR request configuration
|
|
1294
|
+
*
|
|
1295
|
+
* This configuration is used by:
|
|
1296
|
+
* - Client SDKs to specify recognition parameters
|
|
1297
|
+
* - Demo applications for user input
|
|
1298
|
+
* - Service layer to configure provider sessions
|
|
1299
|
+
*
|
|
1300
|
+
* Core fields only - all provider-specific options go in providerOptions
|
|
1301
|
+
*
|
|
1302
|
+
* @example
|
|
1303
|
+
* ```typescript
|
|
1304
|
+
* const config: ASRRequestConfig = {
|
|
1305
|
+
* provider: RecognitionProvider.GOOGLE,
|
|
1306
|
+
* model: GoogleModel.LATEST_LONG,
|
|
1307
|
+
* language: Language.ENGLISH_US,
|
|
1308
|
+
* sampleRate: SampleRate.RATE_16000, // or just 16000
|
|
1309
|
+
* encoding: AudioEncoding.LINEAR16,
|
|
1310
|
+
* providerOptions: {
|
|
1311
|
+
* google: {
|
|
1312
|
+
* enableAutomaticPunctuation: true,
|
|
1313
|
+
* interimResults: true,
|
|
1314
|
+
* singleUtterance: false
|
|
1315
|
+
* }
|
|
1316
|
+
* }
|
|
1317
|
+
* };
|
|
1318
|
+
* ```
|
|
1319
|
+
*/
|
|
1320
|
+
interface ASRRequestConfig {
|
|
1321
|
+
/**
|
|
1322
|
+
* The ASR provider to use
|
|
1323
|
+
* Must be one of the supported providers in RecognitionProvider enum
|
|
1324
|
+
*/
|
|
1325
|
+
provider: RecognitionProvider | string;
|
|
1326
|
+
/**
|
|
1327
|
+
* Optional model specification for the provider
|
|
1328
|
+
* Can be provider-specific model enum or string
|
|
1329
|
+
* If not specified, provider's default model will be used
|
|
1330
|
+
*/
|
|
1331
|
+
model?: RecognitionModel;
|
|
1332
|
+
/**
|
|
1333
|
+
* Language/locale for recognition
|
|
1334
|
+
* Use Language enum for common languages
|
|
1335
|
+
* Can also accept BCP-47 language tags as strings
|
|
1336
|
+
*/
|
|
1337
|
+
language: Language | string;
|
|
1338
|
+
/**
|
|
1339
|
+
* Audio sample rate in Hz
|
|
1340
|
+
* Prefer using SampleRate enum values for standard rates
|
|
1341
|
+
* Can also accept numeric Hz values (e.g., 16000)
|
|
1342
|
+
*/
|
|
1343
|
+
sampleRate: SampleRate | number;
|
|
1344
|
+
/**
|
|
1345
|
+
* Audio encoding format
|
|
1346
|
+
* Must match the actual audio data being sent
|
|
1347
|
+
* Use AudioEncoding enum for standard formats
|
|
1348
|
+
*/
|
|
1349
|
+
encoding: AudioEncoding | string;
|
|
1350
|
+
/**
|
|
1351
|
+
* Enable interim (partial) results during recognition
|
|
1352
|
+
* When true, receive real-time updates before finalization
|
|
1353
|
+
* When false, only receive final results
|
|
1354
|
+
* Default: false
|
|
1355
|
+
*/
|
|
1356
|
+
interimResults?: boolean;
|
|
1357
|
+
/**
|
|
1358
|
+
* Require GameContext before starting recognition such as song titles
|
|
1359
|
+
* When true, server waits for GameContext message before processing audio
|
|
1360
|
+
* When false, recognition starts immediately
|
|
1361
|
+
* Default: false
|
|
1362
|
+
*/
|
|
1363
|
+
useContext?: boolean;
|
|
1364
|
+
/**
|
|
1365
|
+
* Final transcript stability mode
|
|
1366
|
+
*
|
|
1367
|
+
* Controls timeout duration for fallback final transcript when provider
|
|
1368
|
+
* doesn't respond with is_final=true after stopRecording().
|
|
1369
|
+
*
|
|
1370
|
+
* - aggressive: 100ms - fast response, may cut off slow providers
|
|
1371
|
+
* - balanced: 200ms - current default, good for most cases
|
|
1372
|
+
* - conservative: 400ms - wait longer for complex utterances
|
|
1373
|
+
*
|
|
1374
|
+
* @default 'balanced'
|
|
1375
|
+
* @see FinalTranscriptStability enum for detailed descriptions
|
|
1376
|
+
*/
|
|
1377
|
+
finalTranscriptStability?: FinalTranscriptStability | string;
|
|
1378
|
+
/**
|
|
1379
|
+
* Traffic control priority for quota slot allocation
|
|
1380
|
+
*
|
|
1381
|
+
* Controls which quota slots this request can use when traffic control is enabled.
|
|
1382
|
+
* The quota system reserves a portion of slots for high-priority requests.
|
|
1383
|
+
*
|
|
1384
|
+
* - 'high': Can use all quota slots (reserved for critical games like song-quiz)
|
|
1385
|
+
* - 'low': Limited to non-reserved slots (default for most requests)
|
|
1386
|
+
*
|
|
1387
|
+
* @default 'low'
|
|
1388
|
+
*/
|
|
1389
|
+
priority?: 'low' | 'high';
|
|
1390
|
+
/**
|
|
1391
|
+
* Additional provider-specific options
|
|
1392
|
+
*
|
|
1393
|
+
* Common options per provider:
|
|
1394
|
+
* - Deepgram: punctuate, smart_format, diarize, utterances
|
|
1395
|
+
* - Google: enableAutomaticPunctuation, singleUtterance, enableWordTimeOffsets
|
|
1396
|
+
* - AssemblyAI: formatTurns, filter_profanity, word_boost
|
|
1397
|
+
*
|
|
1398
|
+
* Note: interimResults is now a top-level field, but can still be overridden per provider
|
|
1399
|
+
*
|
|
1400
|
+
* @example
|
|
1401
|
+
* ```typescript
|
|
1402
|
+
* providerOptions: {
|
|
1403
|
+
* google: {
|
|
1404
|
+
* enableAutomaticPunctuation: true,
|
|
1405
|
+
* singleUtterance: false,
|
|
1406
|
+
* enableWordTimeOffsets: false
|
|
1407
|
+
* }
|
|
1408
|
+
* }
|
|
1409
|
+
* ```
|
|
1410
|
+
*/
|
|
1411
|
+
providerOptions?: Record<string, any>;
|
|
1412
|
+
/**
|
|
1413
|
+
* Optional fallback ASR configurations
|
|
1414
|
+
*
|
|
1415
|
+
* List of alternative ASR configurations to use if the primary fails.
|
|
1416
|
+
* Each fallback config is a complete ASRRequestConfig that will be tried
|
|
1417
|
+
* in order until one succeeds.
|
|
1418
|
+
*
|
|
1419
|
+
* @example
|
|
1420
|
+
* ```typescript
|
|
1421
|
+
* fallbackModels: [
|
|
1422
|
+
* {
|
|
1423
|
+
* provider: RecognitionProvider.DEEPGRAM,
|
|
1424
|
+
* model: DeepgramModel.NOVA_2,
|
|
1425
|
+
* language: Language.ENGLISH_US,
|
|
1426
|
+
* sampleRate: 16000,
|
|
1427
|
+
* encoding: AudioEncoding.LINEAR16
|
|
1428
|
+
* },
|
|
1429
|
+
* {
|
|
1430
|
+
* provider: RecognitionProvider.GOOGLE,
|
|
1431
|
+
* model: GoogleModel.LATEST_SHORT,
|
|
1432
|
+
* language: Language.ENGLISH_US,
|
|
1433
|
+
* sampleRate: 16000,
|
|
1434
|
+
* encoding: AudioEncoding.LINEAR16
|
|
1435
|
+
* }
|
|
1436
|
+
* ]
|
|
1437
|
+
* ```
|
|
1438
|
+
*/
|
|
1439
|
+
fallbackModels?: ASRRequestConfig[];
|
|
1440
|
+
}
|
|
1441
|
+
/**
|
|
1442
|
+
* Partial ASR config for updates
|
|
1443
|
+
* All fields are optional for partial updates
|
|
1444
|
+
*/
|
|
1445
|
+
type PartialASRRequestConfig = Partial<ASRRequestConfig>;
|
|
1446
|
+
/**
|
|
1447
|
+
* Helper function to create a default ASR config
|
|
1448
|
+
*/
|
|
1449
|
+
declare function createDefaultASRConfig(overrides?: PartialASRRequestConfig): ASRRequestConfig;
|
|
1450
|
+
|
|
1451
|
+
/**
|
|
1452
|
+
* Gemini Model Types
|
|
1453
|
+
* Based on available models as of January 2025
|
|
1454
|
+
*
|
|
1455
|
+
* API Version Notes:
|
|
1456
|
+
* - Gemini 2.5+ models: Use v1beta API (early access features)
|
|
1457
|
+
* - Gemini 2.0 models: Use v1beta API (early access features)
|
|
1458
|
+
* - Gemini 1.5 models: Use v1 API (stable, production-ready)
|
|
1459
|
+
*
|
|
1460
|
+
* @see https://ai.google.dev/gemini-api/docs/models
|
|
1461
|
+
* @see https://ai.google.dev/gemini-api/docs/api-versions
|
|
1462
|
+
*/
|
|
1463
|
+
declare enum GeminiModel {
|
|
1464
|
+
GEMINI_2_5_PRO = "gemini-2.5-pro",
|
|
1465
|
+
GEMINI_2_5_FLASH = "gemini-2.5-flash",
|
|
1466
|
+
GEMINI_2_5_FLASH_LITE = "gemini-2.5-flash-lite",
|
|
1467
|
+
GEMINI_2_0_FLASH_LATEST = "gemini-2.0-flash-latest",
|
|
1468
|
+
GEMINI_2_0_FLASH_EXP = "gemini-2.0-flash-exp"
|
|
1469
|
+
}
|
|
1470
|
+
|
|
1471
|
+
/**
|
|
1472
|
+
* OpenAI Model Types
|
|
1473
|
+
*/
|
|
1474
|
+
declare enum OpenAIModel {
|
|
1475
|
+
WHISPER_1 = "whisper-1"
|
|
1476
|
+
}
|
|
1477
|
+
|
|
1478
|
+
/**
|
|
1479
|
+
* Standard stage/environment constants used across all services
|
|
1480
|
+
*/
|
|
1481
|
+
declare const STAGES: {
|
|
1482
|
+
readonly LOCAL: "local";
|
|
1483
|
+
readonly DEV: "dev";
|
|
1484
|
+
readonly STAGING: "staging";
|
|
1485
|
+
readonly PRODUCTION: "production";
|
|
1486
|
+
};
|
|
1487
|
+
type Stage = typeof STAGES[keyof typeof STAGES];
|
|
1488
|
+
|
|
1489
|
+
/**
|
|
1490
|
+
* Generic WebSocket protocol types and utilities
|
|
1491
|
+
* Supports flexible versioning and message types
|
|
1492
|
+
* Used by both client and server implementations
|
|
1493
|
+
*/
|
|
1494
|
+
|
|
1495
|
+
/**
|
|
1496
|
+
* Base message structure - completely flexible
|
|
1497
|
+
* @template V - Version type (number, string, etc.)
|
|
1498
|
+
*/
|
|
1499
|
+
interface Message<V = number> {
|
|
1500
|
+
v: V;
|
|
1501
|
+
type: string;
|
|
1502
|
+
data?: unknown;
|
|
1503
|
+
}
|
|
1504
|
+
/**
|
|
1505
|
+
* Version serializer interface
|
|
1506
|
+
* Converts between version type V and byte representation
|
|
1507
|
+
*/
|
|
1508
|
+
interface VersionSerializer<V> {
|
|
1509
|
+
serialize: (v: V) => number;
|
|
1510
|
+
deserialize: (byte: number) => V;
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
/**
|
|
1514
|
+
* WebSocketAudioClient - Abstract base class for WebSocket clients
|
|
1515
|
+
* Sends audio and control messages, receives responses from server
|
|
1516
|
+
*
|
|
1517
|
+
* Features:
|
|
1518
|
+
* - Generic version type support (number, string, etc.)
|
|
1519
|
+
* - Type-safe upward/downward message data
|
|
1520
|
+
* - Client-side backpressure monitoring
|
|
1521
|
+
* - Abstract hooks for application-specific logic
|
|
1522
|
+
* - Format-agnostic audio protocol (supports any encoding)
|
|
1523
|
+
*/
|
|
1524
|
+
|
|
1525
|
+
type ClientConfig = {
|
|
1526
|
+
url: string;
|
|
1527
|
+
highWM?: number;
|
|
1528
|
+
lowWM?: number;
|
|
1529
|
+
};
|
|
1530
|
+
/**
|
|
1531
|
+
* WebSocketAudioClient - Abstract base class for WebSocket clients
|
|
1532
|
+
* that send audio frames and JSON messages
|
|
1533
|
+
*
|
|
1534
|
+
* @template V - Version type (number, string, object, etc.)
|
|
1535
|
+
* @template TUpward - Type of upward message data (Client -> Server)
|
|
1536
|
+
* @template TDownward - Type of downward message data (Server -> Client)
|
|
1537
|
+
*
|
|
1538
|
+
* @example
|
|
1539
|
+
* ```typescript
|
|
1540
|
+
* class MyClient extends WebSocketAudioClient<number, MyUpMsg, MyDownMsg> {
|
|
1541
|
+
* protected onConnected() {
|
|
1542
|
+
* console.log('Connected!');
|
|
1543
|
+
* }
|
|
1544
|
+
*
|
|
1545
|
+
* protected onMessage(msg) {
|
|
1546
|
+
* console.log('Received:', msg.type, msg.data);
|
|
1547
|
+
* }
|
|
1548
|
+
*
|
|
1549
|
+
* protected onDisconnected(code, reason) {
|
|
1550
|
+
* console.log('Disconnected:', code, reason);
|
|
1551
|
+
* }
|
|
1552
|
+
*
|
|
1553
|
+
* protected onError(error) {
|
|
1554
|
+
* console.error('Error:', error);
|
|
1555
|
+
* }
|
|
1556
|
+
* }
|
|
1557
|
+
*
|
|
1558
|
+
* const client = new MyClient({ url: 'ws://localhost:8080' });
|
|
1559
|
+
* client.connect();
|
|
1560
|
+
* client.sendMessage(1, 'configure', { language: 'en' });
|
|
1561
|
+
* client.sendAudio(audioData);
|
|
1562
|
+
* ```
|
|
1563
|
+
*/
|
|
1564
|
+
declare abstract class WebSocketAudioClient<V = number, // Version type (default: number)
|
|
1565
|
+
TUpward = unknown, // Upward message data type
|
|
1566
|
+
TDownward = unknown> {
|
|
1567
|
+
private cfg;
|
|
1568
|
+
protected versionSerializer: VersionSerializer<V>;
|
|
1569
|
+
private ws;
|
|
1570
|
+
private seq;
|
|
1571
|
+
private HWM;
|
|
1572
|
+
private LWM;
|
|
1573
|
+
constructor(cfg: ClientConfig, versionSerializer?: VersionSerializer<V>);
|
|
1574
|
+
/**
|
|
1575
|
+
* Hook: Called when WebSocket connection is established
|
|
1576
|
+
*/
|
|
1577
|
+
protected abstract onConnected(): void;
|
|
1578
|
+
/**
|
|
1579
|
+
* Hook: Called when WebSocket connection closes
|
|
1580
|
+
* @param code - Close code (see WebSocketCloseCode enum)
|
|
1581
|
+
* @param reason - Human-readable close reason
|
|
1582
|
+
*/
|
|
1583
|
+
protected abstract onDisconnected(code: number, reason: string): void;
|
|
1584
|
+
/**
|
|
1585
|
+
* Hook: Called when WebSocket error occurs
|
|
1586
|
+
*/
|
|
1587
|
+
protected abstract onError(error: Event): void;
|
|
1588
|
+
/**
|
|
1589
|
+
* Hook: Called when downward message arrives from server
|
|
1590
|
+
* Override this to handle messages (optional - default does nothing)
|
|
1591
|
+
*/
|
|
1592
|
+
protected onMessage(_msg: Message<V> & {
|
|
1593
|
+
data: TDownward;
|
|
1594
|
+
}): void;
|
|
1595
|
+
connect(): void;
|
|
1596
|
+
/**
|
|
1597
|
+
* Send JSON message to server
|
|
1598
|
+
* @param version - Message version
|
|
1599
|
+
* @param type - Message type (developer defined)
|
|
1600
|
+
* @param data - Message payload (typed)
|
|
1601
|
+
*/
|
|
1602
|
+
sendMessage(version: V, type: string, data: TUpward): void;
|
|
1603
|
+
/**
|
|
1604
|
+
* Send audio frame with specified encoding and sample rate
|
|
1605
|
+
* @param audioData - Audio data (any format: Int16Array, Uint8Array, ArrayBuffer, etc.)
|
|
1606
|
+
* @param version - Audio frame version
|
|
1607
|
+
* @param encodingId - Audio encoding ID (0-5, e.g., AudioEncoding.LINEAR16)
|
|
1608
|
+
* @param sampleRate - Sample rate in Hz (e.g., 16000)
|
|
1609
|
+
*/
|
|
1610
|
+
sendAudio(audioData: ArrayBuffer | ArrayBufferView, version: V, encodingId: number, sampleRate: number): void;
|
|
1611
|
+
/**
|
|
1612
|
+
* Get current WebSocket buffer size
|
|
1613
|
+
*/
|
|
1614
|
+
getBufferedAmount(): number;
|
|
1615
|
+
/**
|
|
1616
|
+
* Check if local buffer is backpressured
|
|
1617
|
+
*/
|
|
1618
|
+
isLocalBackpressured(): boolean;
|
|
1619
|
+
/**
|
|
1620
|
+
* Check if ready to send audio
|
|
1621
|
+
* Verifies: connection open, no local buffer pressure
|
|
1622
|
+
*/
|
|
1623
|
+
canSend(): boolean;
|
|
1624
|
+
/**
|
|
1625
|
+
* Check if connection is open
|
|
1626
|
+
*/
|
|
1627
|
+
isOpen(): boolean;
|
|
1628
|
+
/**
|
|
1629
|
+
* Get current connection state
|
|
1630
|
+
*/
|
|
1631
|
+
getReadyState(): number;
|
|
1632
|
+
/**
|
|
1633
|
+
* Close the WebSocket connection
|
|
1634
|
+
* Protected method for subclasses to implement disconnect logic
|
|
1635
|
+
* @param code - WebSocket close code (default: 1000 = normal closure)
|
|
1636
|
+
* @param reason - Human-readable close reason
|
|
1637
|
+
*/
|
|
1638
|
+
protected closeConnection(code?: number, reason?: string): void;
|
|
1639
|
+
}
|
|
1640
|
+
|
|
1641
|
+
/**
|
|
1642
|
+
* Recognition Client Types
|
|
1643
|
+
*
|
|
1644
|
+
* Type definitions and interfaces for the recognition client SDK.
|
|
1645
|
+
* These interfaces enable dependency injection, testing, and alternative implementations.
|
|
1646
|
+
*/
|
|
1647
|
+
|
|
1648
|
+
/**
|
|
1649
|
+
* Client connection state enum
|
|
1650
|
+
* Represents the various states a recognition client can be in during its lifecycle
|
|
1651
|
+
*/
|
|
1652
|
+
declare enum ClientState {
|
|
1653
|
+
/** Initial state, no connection established */
|
|
1654
|
+
INITIAL = "initial",
|
|
1655
|
+
/** Actively establishing WebSocket connection */
|
|
1656
|
+
CONNECTING = "connecting",
|
|
1657
|
+
/** WebSocket connected but waiting for server ready signal */
|
|
1658
|
+
CONNECTED = "connected",
|
|
1659
|
+
/** Server ready, can send audio */
|
|
1660
|
+
READY = "ready",
|
|
1661
|
+
/** Sent stop signal, waiting for final transcript */
|
|
1662
|
+
STOPPING = "stopping",
|
|
1663
|
+
/** Connection closed normally after stop */
|
|
1664
|
+
STOPPED = "stopped",
|
|
1665
|
+
/** Connection failed or lost unexpectedly */
|
|
1666
|
+
FAILED = "failed"
|
|
1667
|
+
}
|
|
1668
|
+
/**
|
|
1669
|
+
* Callback URL configuration with message type filtering
|
|
1670
|
+
*/
|
|
1671
|
+
interface RecognitionCallbackUrl {
|
|
1672
|
+
/** The callback URL endpoint */
|
|
1673
|
+
url: string;
|
|
1674
|
+
/** Array of message types to send to this URL. If empty/undefined, all types are sent */
|
|
1675
|
+
messageTypes?: Array<string | number>;
|
|
1676
|
+
}
|
|
1677
|
+
interface IRecognitionClientConfig {
|
|
1678
|
+
/**
|
|
1679
|
+
* WebSocket endpoint URL (optional)
|
|
1680
|
+
* Either `url` or `stage` must be provided.
|
|
1681
|
+
* If both are provided, `url` takes precedence.
|
|
1682
|
+
*
|
|
1683
|
+
* Example with explicit URL:
|
|
1684
|
+
* ```typescript
|
|
1685
|
+
* { url: 'wss://custom-endpoint.example.com/ws/v1/recognize' }
|
|
1686
|
+
* ```
|
|
1687
|
+
*/
|
|
1688
|
+
url?: string;
|
|
1689
|
+
/**
|
|
1690
|
+
* Stage for recognition service (recommended)
|
|
1691
|
+
* Either `url` or `stage` must be provided.
|
|
1692
|
+
* If both are provided, `url` takes precedence.
|
|
1693
|
+
* Defaults to production if neither is provided.
|
|
1694
|
+
*
|
|
1695
|
+
* Example with STAGES enum (recommended):
|
|
1696
|
+
* ```typescript
|
|
1697
|
+
* import { STAGES } from '@recog/shared-types';
|
|
1698
|
+
* { stage: STAGES.STAGING }
|
|
1699
|
+
* ```
|
|
1700
|
+
*
|
|
1701
|
+
* String values also accepted:
|
|
1702
|
+
* ```typescript
|
|
1703
|
+
* { stage: 'staging' } // STAGES.LOCAL | STAGES.DEV | STAGES.STAGING | STAGES.PRODUCTION
|
|
1704
|
+
* ```
|
|
1705
|
+
*/
|
|
1706
|
+
stage?: Stage | string;
|
|
1707
|
+
/** ASR configuration (provider, model, language, etc.) - optional */
|
|
1708
|
+
asrRequestConfig?: ASRRequestConfig;
|
|
1709
|
+
/** Game context for improved recognition accuracy */
|
|
1710
|
+
gameContext?: GameContextV1;
|
|
1711
|
+
/**
|
|
1712
|
+
* Game ID for tracking and routing purposes (optional)
|
|
1713
|
+
* If provided, this is added to the WebSocket URL as a query parameter.
|
|
1714
|
+
* If gameContext is also provided, this takes precedence over gameContext.gameId.
|
|
1715
|
+
*/
|
|
1716
|
+
gameId?: string;
|
|
1717
|
+
/** Audio utterance ID (optional) - if not provided, a UUID v4 will be generated */
|
|
1718
|
+
audioUtteranceId?: string;
|
|
1719
|
+
/** Callback URLs for server-side notifications with optional message type filtering (optional)
|
|
1720
|
+
* Game side only need to use it if another service need to be notified about the transcription results.
|
|
1721
|
+
*/
|
|
1722
|
+
callbackUrls?: RecognitionCallbackUrl[];
|
|
1723
|
+
/** User identification (optional) */
|
|
1724
|
+
userId?: string;
|
|
1725
|
+
/** Game session identification (optional). called 'sessionId' in Platform and most games. */
|
|
1726
|
+
gameSessionId?: string;
|
|
1727
|
+
/** Device identification (optional) */
|
|
1728
|
+
deviceId?: string;
|
|
1729
|
+
/** Account identification (optional) */
|
|
1730
|
+
accountId?: string;
|
|
1731
|
+
/** Question answer identifier for tracking Q&A sessions (optional and tracking purpose only) */
|
|
1732
|
+
questionAnswerId?: string;
|
|
1733
|
+
/** Platform for audio recording device (optional, e.g., 'ios', 'android', 'web', 'unity') */
|
|
1734
|
+
platform?: string;
|
|
1735
|
+
/** Callback when transcript is received */
|
|
1736
|
+
onTranscript?: (result: TranscriptionResultV1) => void;
|
|
1737
|
+
/**
|
|
1738
|
+
* Callback when function call is received
|
|
1739
|
+
* Note: Not supported in 2025. P2 feature for future speech-to-function-call capability.
|
|
1740
|
+
*/
|
|
1741
|
+
onFunctionCall?: (result: FunctionCallResultV1) => void;
|
|
1742
|
+
/** Callback when metadata is received. Only once after transcription is complete.*/
|
|
1743
|
+
onMetadata?: (metadata: MetadataResultV1) => void;
|
|
1744
|
+
/** Callback when error occurs */
|
|
1745
|
+
onError?: (error: ErrorResultV1) => void;
|
|
1746
|
+
/** Callback when connected to WebSocket */
|
|
1747
|
+
onConnected?: () => void;
|
|
1748
|
+
/**
|
|
1749
|
+
* Callback when WebSocket disconnects
|
|
1750
|
+
* @param code - WebSocket close code (1000 = normal, 1006 = abnormal, etc.)
|
|
1751
|
+
* @param reason - Close reason string
|
|
1752
|
+
*/
|
|
1753
|
+
onDisconnected?: (code: number, reason: string) => void;
|
|
1754
|
+
/** High water mark for backpressure control (bytes) */
|
|
1755
|
+
highWaterMark?: number;
|
|
1756
|
+
/** Low water mark for backpressure control (bytes) */
|
|
1757
|
+
lowWaterMark?: number;
|
|
1758
|
+
/** Maximum buffer duration in seconds (default: 60s) */
|
|
1759
|
+
maxBufferDurationSec?: number;
|
|
1760
|
+
/** Expected chunks per second for ring buffer sizing (default: 100) */
|
|
1761
|
+
chunksPerSecond?: number;
|
|
1762
|
+
/**
|
|
1763
|
+
* Connection retry configuration (optional)
|
|
1764
|
+
* Only applies to initial connection establishment, not mid-stream interruptions.
|
|
1765
|
+
*
|
|
1766
|
+
* Default: { maxAttempts: 4, delayMs: 200 } (try once, retry 3 times = 4 total attempts)
|
|
1767
|
+
*
|
|
1768
|
+
* Timing: Attempt 1 → FAIL → wait 200ms → Attempt 2 → FAIL → wait 200ms → Attempt 3 → FAIL → wait 200ms → Attempt 4
|
|
1769
|
+
*
|
|
1770
|
+
* Example:
|
|
1771
|
+
* ```typescript
|
|
1772
|
+
* {
|
|
1773
|
+
* connectionRetry: {
|
|
1774
|
+
* maxAttempts: 2, // Try connecting up to 2 times (1 retry)
|
|
1775
|
+
* delayMs: 500 // Wait 500ms between attempts
|
|
1776
|
+
* }
|
|
1777
|
+
* }
|
|
1778
|
+
* ```
|
|
1779
|
+
*/
|
|
1780
|
+
connectionRetry?: {
|
|
1781
|
+
/** Maximum number of connection attempts (default: 4, min: 1, max: 5) */
|
|
1782
|
+
maxAttempts?: number;
|
|
1783
|
+
/** Delay in milliseconds between retry attempts (default: 200ms) */
|
|
1784
|
+
delayMs?: number;
|
|
1785
|
+
};
|
|
1786
|
+
/**
|
|
1787
|
+
* Optional logger function for debugging
|
|
1788
|
+
* If not provided, no logging will occur
|
|
1789
|
+
* @param level - Log level: 'debug', 'info', 'warn', 'error'
|
|
1790
|
+
* @param message - Log message
|
|
1791
|
+
* @param data - Optional additional data
|
|
1792
|
+
*/
|
|
1793
|
+
logger?: (level: 'debug' | 'info' | 'warn' | 'error', message: string, data?: any) => void;
|
|
1794
|
+
}
|
|
1795
|
+
/**
|
|
1796
|
+
* Recognition Client Interface
|
|
1797
|
+
*
|
|
1798
|
+
* Main interface for real-time speech recognition clients.
|
|
1799
|
+
* Provides methods for connection management, audio streaming, and session control.
|
|
1800
|
+
*/
|
|
1801
|
+
interface IRecognitionClient {
|
|
1802
|
+
/**
|
|
1803
|
+
* Connect to the WebSocket endpoint
|
|
1804
|
+
* @returns Promise that resolves when connected
|
|
1805
|
+
* @throws Error if connection fails or times out
|
|
1806
|
+
*/
|
|
1807
|
+
connect(): Promise<void>;
|
|
1808
|
+
/**
|
|
1809
|
+
* Send audio data to the recognition service
|
|
1810
|
+
* Audio is buffered locally and sent when connection is ready.
|
|
1811
|
+
* @param audioData - PCM audio data as ArrayBuffer, typed array view, or Blob
|
|
1812
|
+
*/
|
|
1813
|
+
sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
|
|
1814
|
+
/**
|
|
1815
|
+
* Stop recording and wait for final transcript
|
|
1816
|
+
* The server will close the connection after sending the final transcript.
|
|
1817
|
+
* @returns Promise that resolves when final transcript is received
|
|
1818
|
+
*/
|
|
1819
|
+
stopRecording(): Promise<void>;
|
|
1820
|
+
/**
|
|
1821
|
+
* Force stop and immediately close connection without waiting for server
|
|
1822
|
+
*
|
|
1823
|
+
* WARNING: This is an abnormal shutdown that bypasses the graceful stop flow:
|
|
1824
|
+
* - Does NOT wait for server to process remaining audio
|
|
1825
|
+
* - Does NOT receive final transcript from server
|
|
1826
|
+
* - Immediately closes WebSocket connection
|
|
1827
|
+
* - Cleans up resources (buffers, listeners)
|
|
1828
|
+
*
|
|
1829
|
+
* Use Cases:
|
|
1830
|
+
* - User explicitly cancels/abandons session
|
|
1831
|
+
* - Timeout scenarios where waiting is not acceptable
|
|
1832
|
+
* - Need immediate cleanup and can't wait for server
|
|
1833
|
+
*
|
|
1834
|
+
* RECOMMENDED: Use stopRecording() for normal shutdown.
|
|
1835
|
+
* Only use this when immediate disconnection is required.
|
|
1836
|
+
*/
|
|
1837
|
+
stopAbnormally(): void;
|
|
1838
|
+
/**
|
|
1839
|
+
* Get the audio utterance ID for this session
|
|
1840
|
+
* Available immediately after client construction.
|
|
1841
|
+
* @returns UUID v4 string identifying this recognition session
|
|
1842
|
+
*/
|
|
1843
|
+
getAudioUtteranceId(): string;
|
|
1844
|
+
/**
|
|
1845
|
+
* Get the current state of the client
|
|
1846
|
+
* @returns Current ClientState value
|
|
1847
|
+
*/
|
|
1848
|
+
getState(): ClientState;
|
|
1849
|
+
/**
|
|
1850
|
+
* Check if WebSocket connection is open
|
|
1851
|
+
* @returns true if connected and ready to communicate
|
|
1852
|
+
*/
|
|
1853
|
+
isConnected(): boolean;
|
|
1854
|
+
/**
|
|
1855
|
+
* Check if client is currently connecting
|
|
1856
|
+
* @returns true if connection is in progress
|
|
1857
|
+
*/
|
|
1858
|
+
isConnecting(): boolean;
|
|
1859
|
+
/**
|
|
1860
|
+
* Check if client is currently stopping
|
|
1861
|
+
* @returns true if stopRecording() is in progress
|
|
1862
|
+
*/
|
|
1863
|
+
isStopping(): boolean;
|
|
1864
|
+
/**
|
|
1865
|
+
* Check if transcription has finished
|
|
1866
|
+
* @returns true if the transcription is complete
|
|
1867
|
+
*/
|
|
1868
|
+
isTranscriptionFinished(): boolean;
|
|
1869
|
+
/**
|
|
1870
|
+
* Check if the audio buffer has overflowed
|
|
1871
|
+
* @returns true if the ring buffer has wrapped around
|
|
1872
|
+
*/
|
|
1873
|
+
isBufferOverflowing(): boolean;
|
|
1874
|
+
/**
|
|
1875
|
+
* Get client statistics
|
|
1876
|
+
* @returns Statistics about audio transmission and buffering
|
|
1877
|
+
*/
|
|
1878
|
+
getStats(): IRecognitionClientStats;
|
|
1879
|
+
/**
|
|
1880
|
+
* Get the WebSocket URL being used by this client
|
|
1881
|
+
* Available immediately after client construction.
|
|
1882
|
+
* @returns WebSocket URL string
|
|
1883
|
+
*/
|
|
1884
|
+
getUrl(): string;
|
|
1885
|
+
}
|
|
1886
|
+
/**
|
|
1887
|
+
* Client statistics interface
|
|
1888
|
+
*/
|
|
1889
|
+
interface IRecognitionClientStats {
|
|
1890
|
+
/** Total audio bytes sent to server */
|
|
1891
|
+
audioBytesSent: number;
|
|
1892
|
+
/** Total number of audio chunks sent */
|
|
1893
|
+
audioChunksSent: number;
|
|
1894
|
+
/** Total number of audio chunks buffered */
|
|
1895
|
+
audioChunksBuffered: number;
|
|
1896
|
+
/** Number of times the ring buffer overflowed */
|
|
1897
|
+
bufferOverflowCount: number;
|
|
1898
|
+
/** Current number of chunks in buffer */
|
|
1899
|
+
currentBufferedChunks: number;
|
|
1900
|
+
/** Whether the ring buffer has wrapped (overwritten old data) */
|
|
1901
|
+
hasWrapped: boolean;
|
|
1902
|
+
}
|
|
1903
|
+
/**
|
|
1904
|
+
* Configuration for RealTimeTwoWayWebSocketRecognitionClient
|
|
1905
|
+
* This extends IRecognitionClientConfig and is the main configuration interface
|
|
1906
|
+
* for creating a new RealTimeTwoWayWebSocketRecognitionClient instance.
|
|
1907
|
+
*/
|
|
1908
|
+
interface RealTimeTwoWayWebSocketRecognitionClientConfig extends IRecognitionClientConfig {
|
|
1909
|
+
}
|
|
1910
|
+
|
|
1911
|
+
/**
|
|
1912
|
+
* RealTimeTwoWayWebSocketRecognitionClient - Clean, compact SDK for real-time speech recognition
|
|
1913
|
+
*
|
|
1914
|
+
* Features:
|
|
1915
|
+
* - Ring buffer-based audio storage with fixed memory footprint
|
|
1916
|
+
* - Automatic buffering when disconnected, immediate send when connected
|
|
1917
|
+
* - Buffer persists after flush (for future retry/reconnection scenarios)
|
|
1918
|
+
* - Built on WebSocketAudioClient for robust protocol handling
|
|
1919
|
+
* - Simple API: connect() → sendAudio() → stopRecording()
|
|
1920
|
+
* - Type-safe message handling with callbacks
|
|
1921
|
+
* - Automatic backpressure management
|
|
1922
|
+
* - Overflow detection with buffer state tracking
|
|
1923
|
+
*
|
|
1924
|
+
* Example:
|
|
1925
|
+
* ```typescript
|
|
1926
|
+
* const client = new RealTimeTwoWayWebSocketRecognitionClient({
|
|
1927
|
+
* url: 'ws://localhost:3101/ws/v1/recognize',
|
|
1928
|
+
* onTranscript: (result) => console.log(result.finalTranscript),
|
|
1929
|
+
* onError: (error) => console.error(error),
|
|
1930
|
+
* maxBufferDurationSec: 60 // Ring buffer for 60 seconds
|
|
1931
|
+
* });
|
|
1932
|
+
*
|
|
1933
|
+
* await client.connect();
|
|
1934
|
+
*
|
|
1935
|
+
* // Send audio chunks - always stored in ring buffer, sent if connected
|
|
1936
|
+
* micStream.on('data', (chunk) => client.sendAudio(chunk));
|
|
1937
|
+
*
|
|
1938
|
+
* // Signal end of audio and wait for final results
|
|
1939
|
+
* await client.stopRecording();
|
|
1940
|
+
*
|
|
1941
|
+
* // Server will close connection after sending finals
|
|
1942
|
+
* // No manual cleanup needed - browser handles it
|
|
1943
|
+
* ```
|
|
1944
|
+
*/
|
|
1945
|
+
|
|
1946
|
+
/**
|
|
1947
|
+
* Check if a WebSocket close code indicates normal closure
|
|
1948
|
+
* @param code - WebSocket close code
|
|
1949
|
+
* @returns true if the disconnection was normal/expected, false if it was an error
|
|
1950
|
+
*/
|
|
1951
|
+
declare function isNormalDisconnection(code: number): boolean;
|
|
1952
|
+
/**
|
|
1953
|
+
* Re-export TranscriptionResultV1 as TranscriptionResult for backward compatibility
|
|
1954
|
+
*/
|
|
1955
|
+
type TranscriptionResult = TranscriptionResultV1;
|
|
1956
|
+
|
|
1957
|
+
/**
|
|
1958
|
+
* RealTimeTwoWayWebSocketRecognitionClient - SDK-level client for real-time speech recognition
|
|
1959
|
+
*
|
|
1960
|
+
* Implements IRecognitionClient interface for dependency injection and testing.
|
|
1961
|
+
* Extends WebSocketAudioClient with local audio buffering and simple callback-based API.
|
|
1962
|
+
*/
|
|
1963
|
+
declare class RealTimeTwoWayWebSocketRecognitionClient extends WebSocketAudioClient<number, any, any> implements IRecognitionClient {
|
|
1964
|
+
private static readonly PROTOCOL_VERSION;
|
|
1965
|
+
private config;
|
|
1966
|
+
private audioBuffer;
|
|
1967
|
+
private messageHandler;
|
|
1968
|
+
private state;
|
|
1969
|
+
private connectionPromise;
|
|
1970
|
+
private isDebugLogEnabled;
|
|
1971
|
+
private audioBytesSent;
|
|
1972
|
+
private audioChunksSent;
|
|
1973
|
+
private audioStatsLogInterval;
|
|
1974
|
+
private lastAudioStatsLog;
|
|
1975
|
+
constructor(config: RealTimeTwoWayWebSocketRecognitionClientConfig);
|
|
1976
|
+
/**
|
|
1977
|
+
* Internal logging helper - only logs if a logger was provided in config
|
|
1978
|
+
* Debug logs are additionally gated by isDebugLogEnabled flag
|
|
1979
|
+
* @param level - Log level: debug, info, warn, or error
|
|
1980
|
+
* @param message - Message to log
|
|
1981
|
+
* @param data - Optional additional data to log
|
|
1982
|
+
*/
|
|
1983
|
+
private log;
|
|
1984
|
+
/**
|
|
1985
|
+
* Clean up internal resources to free memory
|
|
1986
|
+
* Called when connection closes (normally or abnormally)
|
|
1987
|
+
*/
|
|
1988
|
+
private cleanup;
|
|
1989
|
+
connect(): Promise<void>;
|
|
1990
|
+
/**
|
|
1991
|
+
* Attempt to connect with retry logic
|
|
1992
|
+
* Only retries on initial connection establishment, not mid-stream interruptions
|
|
1993
|
+
*/
|
|
1994
|
+
private connectWithRetry;
|
|
1995
|
+
sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
|
|
1996
|
+
private sendAudioInternal;
|
|
1997
|
+
/**
|
|
1998
|
+
* Only active ehwne client is in READY state. otherwise it will return immediately.
|
|
1999
|
+
* @returns Promise that resolves when the recording is stopped
|
|
2000
|
+
*/
|
|
2001
|
+
stopRecording(): Promise<void>;
|
|
2002
|
+
stopAbnormally(): void;
|
|
2003
|
+
getAudioUtteranceId(): string;
|
|
2004
|
+
getUrl(): string;
|
|
2005
|
+
getState(): ClientState;
|
|
2006
|
+
isConnected(): boolean;
|
|
2007
|
+
isConnecting(): boolean;
|
|
2008
|
+
isStopping(): boolean;
|
|
2009
|
+
isTranscriptionFinished(): boolean;
|
|
2010
|
+
isBufferOverflowing(): boolean;
|
|
2011
|
+
getStats(): IRecognitionClientStats;
|
|
2012
|
+
protected onConnected(): void;
|
|
2013
|
+
protected onDisconnected(code: number, reason: string): void;
|
|
2014
|
+
/**
|
|
2015
|
+
* Get human-readable description for WebSocket close code
|
|
2016
|
+
*/
|
|
2017
|
+
private getCloseCodeDescription;
|
|
2018
|
+
protected onError(error: Event): void;
|
|
2019
|
+
protected onMessage(msg: {
|
|
2020
|
+
v: number;
|
|
2021
|
+
type: string;
|
|
2022
|
+
data: any;
|
|
2023
|
+
}): void;
|
|
2024
|
+
/**
|
|
2025
|
+
* Handle control messages from server
|
|
2026
|
+
* @param msg - Control message containing server actions
|
|
2027
|
+
*/
|
|
2028
|
+
private handleControlMessage;
|
|
2029
|
+
/**
|
|
2030
|
+
* Send audio immediately to the server (without buffering)
|
|
2031
|
+
* @param audioData - Audio data to send
|
|
2032
|
+
*/
|
|
2033
|
+
private sendAudioNow;
|
|
2034
|
+
}
|
|
2035
|
+
|
|
2036
|
+
/**
|
|
2037
|
+
* Configuration Builder for Recognition Client
|
|
2038
|
+
*
|
|
2039
|
+
* Simple builder pattern for RealTimeTwoWayWebSocketRecognitionClientConfig
|
|
2040
|
+
*/
|
|
2041
|
+
|
|
2042
|
+
/**
|
|
2043
|
+
* Builder for RealTimeTwoWayWebSocketRecognitionClientConfig
|
|
2044
|
+
*
|
|
2045
|
+
* Provides a fluent API for building client configurations.
|
|
2046
|
+
*
|
|
2047
|
+
* Example:
|
|
2048
|
+
* ```typescript
|
|
2049
|
+
* import { STAGES } from '@recog/shared-types';
|
|
2050
|
+
*
|
|
2051
|
+
* const config = new ConfigBuilder()
|
|
2052
|
+
* .stage(STAGES.STAGING) // Recommended: automatic environment selection
|
|
2053
|
+
* .asrRequestConfig({
|
|
2054
|
+
* provider: RecognitionProvider.DEEPGRAM,
|
|
2055
|
+
* model: 'nova-2-general'
|
|
2056
|
+
* })
|
|
2057
|
+
* .onTranscript((result) => console.log(result))
|
|
2058
|
+
* .build();
|
|
2059
|
+
* ```
|
|
2060
|
+
*/
|
|
2061
|
+
declare class ConfigBuilder {
|
|
2062
|
+
private config;
|
|
2063
|
+
/**
|
|
2064
|
+
* Set the WebSocket URL (advanced usage)
|
|
2065
|
+
* For standard environments, use stage() instead
|
|
2066
|
+
*/
|
|
2067
|
+
url(url: string): this;
|
|
2068
|
+
/**
|
|
2069
|
+
* Set the stage for automatic environment selection (recommended)
|
|
2070
|
+
* @param stage - STAGES.LOCAL | STAGES.DEV | STAGES.STAGING | STAGES.PRODUCTION
|
|
2071
|
+
* @example
|
|
2072
|
+
* ```typescript
|
|
2073
|
+
* import { STAGES } from '@recog/shared-types';
|
|
2074
|
+
* builder.stage(STAGES.STAGING)
|
|
2075
|
+
* ```
|
|
2076
|
+
*/
|
|
2077
|
+
stage(stage: Stage | string): this;
|
|
2078
|
+
/**
|
|
2079
|
+
* Set ASR request configuration
|
|
2080
|
+
*/
|
|
2081
|
+
asrRequestConfig(config: ASRRequestConfig): this;
|
|
2082
|
+
/**
|
|
2083
|
+
* Set game context
|
|
2084
|
+
*/
|
|
2085
|
+
gameContext(context: GameContextV1): this;
|
|
2086
|
+
/**
|
|
2087
|
+
* Set game ID directly (takes precedence over gameContext.gameId)
|
|
2088
|
+
* Use this when you only need to identify the game without full context.
|
|
2089
|
+
*/
|
|
2090
|
+
gameId(id: string): this;
|
|
2091
|
+
/**
|
|
2092
|
+
* Set audio utterance ID
|
|
2093
|
+
*/
|
|
2094
|
+
audioUtteranceId(id: string): this;
|
|
2095
|
+
/**
|
|
2096
|
+
* Set callback URLs
|
|
2097
|
+
*/
|
|
2098
|
+
callbackUrls(urls: RecognitionCallbackUrl[]): this;
|
|
2099
|
+
/**
|
|
2100
|
+
* Set user ID
|
|
2101
|
+
*/
|
|
2102
|
+
userId(id: string): this;
|
|
2103
|
+
/**
|
|
2104
|
+
* Set game session ID
|
|
2105
|
+
*/
|
|
2106
|
+
gameSessionId(id: string): this;
|
|
2107
|
+
/**
|
|
2108
|
+
* Set device ID
|
|
2109
|
+
*/
|
|
2110
|
+
deviceId(id: string): this;
|
|
2111
|
+
/**
|
|
2112
|
+
* Set account ID
|
|
2113
|
+
*/
|
|
2114
|
+
accountId(id: string): this;
|
|
2115
|
+
/**
|
|
2116
|
+
* Set question answer ID
|
|
2117
|
+
*/
|
|
2118
|
+
questionAnswerId(id: string): this;
|
|
2119
|
+
/**
|
|
2120
|
+
* Set platform
|
|
2121
|
+
*/
|
|
2122
|
+
platform(platform: string): this;
|
|
2123
|
+
/**
|
|
2124
|
+
* Set transcript callback
|
|
2125
|
+
*/
|
|
2126
|
+
onTranscript(callback: (result: TranscriptionResultV1) => void): this;
|
|
2127
|
+
/**
|
|
2128
|
+
* Set metadata callback
|
|
2129
|
+
*/
|
|
2130
|
+
onMetadata(callback: (metadata: MetadataResultV1) => void): this;
|
|
2131
|
+
/**
|
|
2132
|
+
* Set error callback
|
|
2133
|
+
*/
|
|
2134
|
+
onError(callback: (error: ErrorResultV1) => void): this;
|
|
2135
|
+
/**
|
|
2136
|
+
* Set connected callback
|
|
2137
|
+
*/
|
|
2138
|
+
onConnected(callback: () => void): this;
|
|
2139
|
+
/**
|
|
2140
|
+
* Set disconnected callback
|
|
2141
|
+
*/
|
|
2142
|
+
onDisconnected(callback: (code: number, reason: string) => void): this;
|
|
2143
|
+
/**
|
|
2144
|
+
* Set high water mark
|
|
2145
|
+
*/
|
|
2146
|
+
highWaterMark(bytes: number): this;
|
|
2147
|
+
/**
|
|
2148
|
+
* Set low water mark
|
|
2149
|
+
*/
|
|
2150
|
+
lowWaterMark(bytes: number): this;
|
|
2151
|
+
/**
|
|
2152
|
+
* Set max buffer duration in seconds
|
|
2153
|
+
*/
|
|
2154
|
+
maxBufferDurationSec(seconds: number): this;
|
|
2155
|
+
/**
|
|
2156
|
+
* Set chunks per second
|
|
2157
|
+
*/
|
|
2158
|
+
chunksPerSecond(chunks: number): this;
|
|
2159
|
+
/**
|
|
2160
|
+
* Set logger function
|
|
2161
|
+
*/
|
|
2162
|
+
logger(logger: (level: 'debug' | 'info' | 'warn' | 'error', message: string, data?: any) => void): this;
|
|
2163
|
+
/**
|
|
2164
|
+
* Build the configuration
|
|
2165
|
+
*/
|
|
2166
|
+
build(): RealTimeTwoWayWebSocketRecognitionClientConfig;
|
|
2167
|
+
}
|
|
2168
|
+
|
|
2169
|
+
/**
|
|
2170
|
+
* Factory function for creating Recognition Client instances
|
|
2171
|
+
*/
|
|
2172
|
+
|
|
2173
|
+
/**
|
|
2174
|
+
* Create a recognition client from a configuration object
|
|
2175
|
+
*
|
|
2176
|
+
* Example:
|
|
2177
|
+
* ```typescript
|
|
2178
|
+
* const client = createClient({
|
|
2179
|
+
* url: 'ws://localhost:3101/ws/v1/recognize',
|
|
2180
|
+
* audioUtteranceId: 'unique-id',
|
|
2181
|
+
* onTranscript: (result) => console.log(result)
|
|
2182
|
+
* });
|
|
2183
|
+
* ```
|
|
2184
|
+
*
|
|
2185
|
+
* @param config - Client configuration
|
|
2186
|
+
* @returns Configured recognition client instance
|
|
2187
|
+
*/
|
|
2188
|
+
declare function createClient(config: RealTimeTwoWayWebSocketRecognitionClientConfig): IRecognitionClient;
|
|
2189
|
+
/**
|
|
2190
|
+
* Create a recognition client using the builder pattern
|
|
2191
|
+
*
|
|
2192
|
+
* Example:
|
|
2193
|
+
* ```typescript
|
|
2194
|
+
* const client = createClientWithBuilder((builder) =>
|
|
2195
|
+
* builder
|
|
2196
|
+
* .url('ws://localhost:3101/ws/v1/recognize')
|
|
2197
|
+
* .onTranscript((result) => console.log(result))
|
|
2198
|
+
* .onError((error) => console.error(error))
|
|
2199
|
+
* );
|
|
2200
|
+
* ```
|
|
2201
|
+
*/
|
|
2202
|
+
declare function createClientWithBuilder(configure: (builder: ConfigBuilder) => ConfigBuilder): IRecognitionClient;
|
|
2203
|
+
|
|
2204
|
+
/**
|
|
2205
|
+
* SDK Error Classes
|
|
2206
|
+
*
|
|
2207
|
+
* Typed error classes that extend native Error with recognition-specific metadata
|
|
2208
|
+
*/
|
|
2209
|
+
|
|
2210
|
+
/**
|
|
2211
|
+
* Base class for all recognition SDK errors
|
|
2212
|
+
*/
|
|
2213
|
+
declare class RecognitionError extends Error {
|
|
2214
|
+
readonly errorType: ErrorTypeV1;
|
|
2215
|
+
readonly timestamp: number;
|
|
2216
|
+
constructor(errorType: ErrorTypeV1, message: string);
|
|
2217
|
+
}
|
|
2218
|
+
/**
|
|
2219
|
+
* Connection error - thrown when WebSocket connection fails after all retry attempts
|
|
2220
|
+
*/
|
|
2221
|
+
declare class ConnectionError extends RecognitionError {
|
|
2222
|
+
readonly attempts: number;
|
|
2223
|
+
readonly url: string;
|
|
2224
|
+
readonly underlyingError?: Error;
|
|
2225
|
+
constructor(message: string, attempts: number, url: string, underlyingError?: Error);
|
|
2226
|
+
}
|
|
2227
|
+
/**
|
|
2228
|
+
* Timeout error - thrown when operations exceed timeout limits
|
|
2229
|
+
*/
|
|
2230
|
+
declare class TimeoutError extends RecognitionError {
|
|
2231
|
+
readonly timeoutMs: number;
|
|
2232
|
+
readonly operation: string;
|
|
2233
|
+
constructor(message: string, timeoutMs: number, operation: string);
|
|
2234
|
+
}
|
|
2235
|
+
/**
|
|
2236
|
+
* Validation error - thrown when invalid configuration or input is provided
|
|
2237
|
+
*/
|
|
2238
|
+
declare class ValidationError extends RecognitionError {
|
|
2239
|
+
readonly field?: string;
|
|
2240
|
+
readonly expected?: string;
|
|
2241
|
+
readonly received?: string;
|
|
2242
|
+
constructor(message: string, field?: string, expected?: string, received?: string);
|
|
2243
|
+
}
|
|
2244
|
+
|
|
2245
|
+
/**
|
|
2246
|
+
* VGF-style state schema for game-side recognition state/results management.
|
|
2247
|
+
*
|
|
2248
|
+
* This schema provides a standardized way for game developers to manage
|
|
2249
|
+
* voice recognition state and results in their applications. It supports:
|
|
2250
|
+
*
|
|
2251
|
+
* STEP 1: Basic transcription flow
|
|
2252
|
+
* STEP 2: Mic auto-stop upon correct answer (using partial transcripts)
|
|
2253
|
+
* STEP 3: Semantic/function-call outcomes for game actions
|
|
2254
|
+
*
|
|
2255
|
+
* Ideally this should be part of a more centralized shared type library to free
|
|
2256
|
+
* game developers and provide helper functions (VGF? Platform SDK?).
|
|
2257
|
+
*/
|
|
2258
|
+
declare const RecognitionVGFStateSchema: z.ZodObject<{
|
|
2259
|
+
audioUtteranceId: z.ZodString;
|
|
2260
|
+
startRecordingStatus: z.ZodOptional<z.ZodString>;
|
|
2261
|
+
transcriptionStatus: z.ZodOptional<z.ZodString>;
|
|
2262
|
+
finalTranscript: z.ZodOptional<z.ZodString>;
|
|
2263
|
+
finalConfidence: z.ZodOptional<z.ZodNumber>;
|
|
2264
|
+
asrConfig: z.ZodOptional<z.ZodString>;
|
|
2265
|
+
startRecordingTimestamp: z.ZodOptional<z.ZodString>;
|
|
2266
|
+
finalRecordingTimestamp: z.ZodOptional<z.ZodString>;
|
|
2267
|
+
finalTranscriptionTimestamp: z.ZodOptional<z.ZodString>;
|
|
2268
|
+
pendingTranscript: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
2269
|
+
pendingConfidence: z.ZodOptional<z.ZodNumber>;
|
|
2270
|
+
functionCallMetadata: z.ZodOptional<z.ZodString>;
|
|
2271
|
+
functionCallConfidence: z.ZodOptional<z.ZodNumber>;
|
|
2272
|
+
finalFunctionCallTimestamp: z.ZodOptional<z.ZodString>;
|
|
2273
|
+
promptSlotMap: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodArray<z.ZodString, "many">>>;
|
|
2274
|
+
recognitionActionProcessingState: z.ZodOptional<z.ZodString>;
|
|
2275
|
+
}, "strip", z.ZodTypeAny, {
|
|
2276
|
+
audioUtteranceId: string;
|
|
2277
|
+
pendingTranscript: string;
|
|
2278
|
+
startRecordingStatus?: string | undefined;
|
|
2279
|
+
transcriptionStatus?: string | undefined;
|
|
2280
|
+
finalTranscript?: string | undefined;
|
|
2281
|
+
finalConfidence?: number | undefined;
|
|
2282
|
+
asrConfig?: string | undefined;
|
|
2283
|
+
startRecordingTimestamp?: string | undefined;
|
|
2284
|
+
finalRecordingTimestamp?: string | undefined;
|
|
2285
|
+
finalTranscriptionTimestamp?: string | undefined;
|
|
2286
|
+
pendingConfidence?: number | undefined;
|
|
2287
|
+
functionCallMetadata?: string | undefined;
|
|
2288
|
+
functionCallConfidence?: number | undefined;
|
|
2289
|
+
finalFunctionCallTimestamp?: string | undefined;
|
|
2290
|
+
promptSlotMap?: Record<string, string[]> | undefined;
|
|
2291
|
+
recognitionActionProcessingState?: string | undefined;
|
|
2292
|
+
}, {
|
|
2293
|
+
audioUtteranceId: string;
|
|
2294
|
+
startRecordingStatus?: string | undefined;
|
|
2295
|
+
transcriptionStatus?: string | undefined;
|
|
2296
|
+
finalTranscript?: string | undefined;
|
|
2297
|
+
finalConfidence?: number | undefined;
|
|
2298
|
+
asrConfig?: string | undefined;
|
|
2299
|
+
startRecordingTimestamp?: string | undefined;
|
|
2300
|
+
finalRecordingTimestamp?: string | undefined;
|
|
2301
|
+
finalTranscriptionTimestamp?: string | undefined;
|
|
2302
|
+
pendingTranscript?: string | undefined;
|
|
2303
|
+
pendingConfidence?: number | undefined;
|
|
2304
|
+
functionCallMetadata?: string | undefined;
|
|
2305
|
+
functionCallConfidence?: number | undefined;
|
|
2306
|
+
finalFunctionCallTimestamp?: string | undefined;
|
|
2307
|
+
promptSlotMap?: Record<string, string[]> | undefined;
|
|
2308
|
+
recognitionActionProcessingState?: string | undefined;
|
|
2309
|
+
}>;
|
|
2310
|
+
type RecognitionState = z.infer<typeof RecognitionVGFStateSchema>;
|
|
2311
|
+
declare const RecordingStatus: {
|
|
2312
|
+
readonly NOT_READY: "NOT_READY";
|
|
2313
|
+
readonly READY: "READY";
|
|
2314
|
+
readonly RECORDING: "RECORDING";
|
|
2315
|
+
readonly FINISHED: "FINISHED";
|
|
2316
|
+
};
|
|
2317
|
+
type RecordingStatusType = typeof RecordingStatus[keyof typeof RecordingStatus];
|
|
2318
|
+
declare const TranscriptionStatus: {
|
|
2319
|
+
readonly NOT_STARTED: "NOT_STARTED";
|
|
2320
|
+
readonly IN_PROGRESS: "IN_PROGRESS";
|
|
2321
|
+
readonly FINALIZED: "FINALIZED";
|
|
2322
|
+
readonly ABORTED: "ABORTED";
|
|
2323
|
+
readonly ERROR: "ERROR";
|
|
2324
|
+
};
|
|
2325
|
+
type TranscriptionStatusType = typeof TranscriptionStatus[keyof typeof TranscriptionStatus];
|
|
2326
|
+
declare function createInitialRecognitionState(audioUtteranceId: string): RecognitionState;
|
|
2327
|
+
declare function isValidRecordingStatusTransition(from: string | undefined, to: string): boolean;
|
|
2328
|
+
|
|
2329
|
+
/**
|
|
2330
|
+
* Simplified VGF Recognition Client
|
|
2331
|
+
*
|
|
2332
|
+
* A thin wrapper around RealTimeTwoWayWebSocketRecognitionClient that maintains
|
|
2333
|
+
* a VGF RecognitionState as a pure sink/output of recognition events.
|
|
2334
|
+
*
|
|
2335
|
+
* The VGF state is updated based on events but never influences client behavior.
|
|
2336
|
+
* All functionality is delegated to the underlying client.
|
|
2337
|
+
*/
|
|
2338
|
+
|
|
2339
|
+
/**
|
|
2340
|
+
* Configuration for SimplifiedVGFRecognitionClient
|
|
2341
|
+
*/
|
|
2342
|
+
interface SimplifiedVGFClientConfig extends IRecognitionClientConfig {
|
|
2343
|
+
/**
|
|
2344
|
+
* Callback invoked whenever the VGF state changes
|
|
2345
|
+
* Use this to update your UI or React state
|
|
2346
|
+
*/
|
|
2347
|
+
onStateChange?: (state: RecognitionState) => void;
|
|
2348
|
+
/**
|
|
2349
|
+
* Optional initial state to restore from a previous session
|
|
2350
|
+
* If provided, audioUtteranceId will be extracted and used
|
|
2351
|
+
*/
|
|
2352
|
+
initialState?: RecognitionState;
|
|
2353
|
+
}
|
|
2354
|
+
/**
|
|
2355
|
+
* Interface for SimplifiedVGFRecognitionClient
|
|
2356
|
+
*
|
|
2357
|
+
* A simplified client that maintains VGF state for game developers.
|
|
2358
|
+
* All methods from the underlying client are available, plus VGF state management.
|
|
2359
|
+
*/
|
|
2360
|
+
interface ISimplifiedVGFRecognitionClient {
|
|
2361
|
+
/**
|
|
2362
|
+
* Connect to the recognition service WebSocket
|
|
2363
|
+
* @returns Promise that resolves when connected and ready
|
|
2364
|
+
*/
|
|
2365
|
+
connect(): Promise<void>;
|
|
2366
|
+
/**
|
|
2367
|
+
* Send audio data for transcription
|
|
2368
|
+
* @param audioData - PCM audio data as ArrayBuffer, typed array, or Blob
|
|
2369
|
+
*/
|
|
2370
|
+
sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
|
|
2371
|
+
/**
|
|
2372
|
+
* Stop recording and wait for final transcription
|
|
2373
|
+
* @returns Promise that resolves when transcription is complete
|
|
2374
|
+
*/
|
|
2375
|
+
stopRecording(): Promise<void>;
|
|
2376
|
+
/**
|
|
2377
|
+
* Force stop and immediately close connection without waiting for server
|
|
2378
|
+
*
|
|
2379
|
+
* WARNING: This is an abnormal shutdown that bypasses the graceful stop flow:
|
|
2380
|
+
* - Does NOT wait for server to process remaining audio
|
|
2381
|
+
* - Does NOT receive final transcript from server (VGF state set to empty)
|
|
2382
|
+
* - Immediately closes WebSocket connection
|
|
2383
|
+
* - Cleans up resources (buffers, listeners)
|
|
2384
|
+
*
|
|
2385
|
+
* Use Cases:
|
|
2386
|
+
* - User explicitly cancels/abandons the session
|
|
2387
|
+
* - Timeout scenarios where waiting is not acceptable
|
|
2388
|
+
* - Need immediate cleanup and can't wait for server
|
|
2389
|
+
*
|
|
2390
|
+
* RECOMMENDED: Use stopRecording() for normal shutdown.
|
|
2391
|
+
* Only use this when immediate disconnection is required.
|
|
2392
|
+
*/
|
|
2393
|
+
stopAbnormally(): void;
|
|
2394
|
+
/**
|
|
2395
|
+
* Get the current VGF recognition state
|
|
2396
|
+
* @returns Current RecognitionState with all transcription data
|
|
2397
|
+
*/
|
|
2398
|
+
getVGFState(): RecognitionState;
|
|
2399
|
+
/**
|
|
2400
|
+
* Check if connected to the WebSocket
|
|
2401
|
+
*/
|
|
2402
|
+
isConnected(): boolean;
|
|
2403
|
+
/**
|
|
2404
|
+
* Check if currently connecting
|
|
2405
|
+
*/
|
|
2406
|
+
isConnecting(): boolean;
|
|
2407
|
+
/**
|
|
2408
|
+
* Check if currently stopping
|
|
2409
|
+
*/
|
|
2410
|
+
isStopping(): boolean;
|
|
2411
|
+
/**
|
|
2412
|
+
* Check if transcription has finished
|
|
2413
|
+
*/
|
|
2414
|
+
isTranscriptionFinished(): boolean;
|
|
2415
|
+
/**
|
|
2416
|
+
* Check if the audio buffer has overflowed
|
|
2417
|
+
*/
|
|
2418
|
+
isBufferOverflowing(): boolean;
|
|
2419
|
+
/**
|
|
2420
|
+
* Get the audio utterance ID for this session
|
|
2421
|
+
*/
|
|
2422
|
+
getAudioUtteranceId(): string;
|
|
2423
|
+
/**
|
|
2424
|
+
* Get the WebSocket URL being used
|
|
2425
|
+
*/
|
|
2426
|
+
getUrl(): string;
|
|
2427
|
+
/**
|
|
2428
|
+
* Get the underlying client state (for advanced usage)
|
|
2429
|
+
*/
|
|
2430
|
+
getState(): ClientState;
|
|
2431
|
+
}
|
|
2432
|
+
/**
|
|
2433
|
+
* This wrapper ONLY maintains VGF state as a sink.
|
|
2434
|
+
* All actual functionality is delegated to the underlying client.
|
|
2435
|
+
*/
|
|
2436
|
+
declare class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRecognitionClient {
|
|
2437
|
+
private client;
|
|
2438
|
+
private state;
|
|
2439
|
+
private isRecordingAudio;
|
|
2440
|
+
private stateChangeCallback;
|
|
2441
|
+
private expectedUuid;
|
|
2442
|
+
private logger;
|
|
2443
|
+
private lastSentTerminalUuid;
|
|
2444
|
+
constructor(config: SimplifiedVGFClientConfig);
|
|
2445
|
+
connect(): Promise<void>;
|
|
2446
|
+
sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
|
|
2447
|
+
stopRecording(): Promise<void>;
|
|
2448
|
+
stopAbnormally(): void;
|
|
2449
|
+
getAudioUtteranceId(): string;
|
|
2450
|
+
getUrl(): string;
|
|
2451
|
+
getState(): ClientState;
|
|
2452
|
+
isConnected(): boolean;
|
|
2453
|
+
isConnecting(): boolean;
|
|
2454
|
+
isStopping(): boolean;
|
|
2455
|
+
isTranscriptionFinished(): boolean;
|
|
2456
|
+
isBufferOverflowing(): boolean;
|
|
2457
|
+
getVGFState(): RecognitionState;
|
|
2458
|
+
private isTerminalStatus;
|
|
2459
|
+
private notifyStateChange;
|
|
2460
|
+
}
|
|
2461
|
+
/**
|
|
2462
|
+
* Factory function for creating simplified client
|
|
2463
|
+
* Usage examples:
|
|
2464
|
+
*
|
|
2465
|
+
* // Basic usage
|
|
2466
|
+
* const client = createSimplifiedVGFClient({
|
|
2467
|
+
* asrRequestConfig: { provider: 'deepgram', language: 'en' },
|
|
2468
|
+
* onStateChange: (state) => {
|
|
2469
|
+
* console.log('VGF State updated:', state);
|
|
2470
|
+
* // Update React state, game UI, etc.
|
|
2471
|
+
* }
|
|
2472
|
+
* });
|
|
2473
|
+
*
|
|
2474
|
+
* // With initial state (e.g., restoring from previous session)
|
|
2475
|
+
* const client = createSimplifiedVGFClient({
|
|
2476
|
+
* asrRequestConfig: { provider: 'deepgram', language: 'en' },
|
|
2477
|
+
* initialState: previousState, // Will use audioUtteranceId from state
|
|
2478
|
+
* onStateChange: (state) => setVGFState(state)
|
|
2479
|
+
* });
|
|
2480
|
+
*
|
|
2481
|
+
* // With initial state containing promptSlotMap for enhanced recognition
|
|
2482
|
+
* const stateWithSlots: RecognitionState = {
|
|
2483
|
+
* audioUtteranceId: 'session-123',
|
|
2484
|
+
* promptSlotMap: {
|
|
2485
|
+
* 'song_title': ['one time', 'baby'],
|
|
2486
|
+
* 'artists': ['justin bieber']
|
|
2487
|
+
* }
|
|
2488
|
+
* };
|
|
2489
|
+
* const client = createSimplifiedVGFClient({
|
|
2490
|
+
* asrRequestConfig: { provider: 'deepgram', language: 'en' },
|
|
2491
|
+
* gameContext: {
|
|
2492
|
+
* type: RecognitionContextTypeV1.GAME_CONTEXT,
|
|
2493
|
+
* gameId: 'music-quiz', // Your game's ID
|
|
2494
|
+
* gamePhase: 'song-guessing' // Current game phase
|
|
2495
|
+
* },
|
|
2496
|
+
* initialState: stateWithSlots, // promptSlotMap will be added to gameContext
|
|
2497
|
+
* onStateChange: (state) => setVGFState(state)
|
|
2498
|
+
* });
|
|
2499
|
+
*
|
|
2500
|
+
* await client.connect();
|
|
2501
|
+
* client.sendAudio(audioData);
|
|
2502
|
+
* // VGF state automatically updates based on transcription results
|
|
2503
|
+
*/
|
|
2504
|
+
declare function createSimplifiedVGFClient(config: SimplifiedVGFClientConfig): ISimplifiedVGFRecognitionClient;
|
|
2505
|
+
|
|
2506
|
+
/**
|
|
2507
|
+
* VGF Recognition Mapper
|
|
2508
|
+
*
|
|
2509
|
+
* Maps between the existing recognition client types and the simplified VGF state.
|
|
2510
|
+
* This provides a clean abstraction layer for game developers.
|
|
2511
|
+
*/
|
|
2512
|
+
|
|
2513
|
+
/**
|
|
2514
|
+
* Resets session state with a new UUID.
|
|
2515
|
+
*
|
|
2516
|
+
* This creates a fresh session state while preserving non-session fields
|
|
2517
|
+
* (like promptSlotMap, asrConfig, etc.)
|
|
2518
|
+
*
|
|
2519
|
+
* Resets:
|
|
2520
|
+
* - audioUtteranceId → new UUID
|
|
2521
|
+
* - transcriptionStatus → NOT_STARTED
|
|
2522
|
+
* - startRecordingStatus → READY
|
|
2523
|
+
* - recognitionActionProcessingState → NOT_STARTED
|
|
2524
|
+
* - finalTranscript → undefined
|
|
2525
|
+
*
|
|
2526
|
+
* @param currentState - The current recognition state
|
|
2527
|
+
* @returns A new state with reset session fields and a new UUID
|
|
2528
|
+
*/
|
|
2529
|
+
declare function resetRecognitionVGFState(currentState: RecognitionState): RecognitionState;
|
|
2530
|
+
|
|
2531
|
+
/**
|
|
2532
|
+
* Base URL schema shared across service endpoint helpers.
|
|
2533
|
+
*/
|
|
2534
|
+
type ServiceBaseUrls = {
|
|
2535
|
+
httpBase: string;
|
|
2536
|
+
wsBase: string;
|
|
2537
|
+
};
|
|
2538
|
+
/**
|
|
2539
|
+
* Base URL mappings keyed by stage.
|
|
2540
|
+
*/
|
|
2541
|
+
declare const RECOGNITION_SERVICE_BASES: Record<Stage, ServiceBaseUrls>;
|
|
2542
|
+
declare const RECOGNITION_CONDUCTOR_BASES: Record<Stage, ServiceBaseUrls>;
|
|
2543
|
+
/**
|
|
2544
|
+
* Normalize arbitrary stage input into a known `Stage`, defaulting to `local`.
|
|
2545
|
+
*/
|
|
2546
|
+
declare function normalizeStage(input?: Stage | string | null | undefined): Stage;
|
|
2547
|
+
/**
|
|
2548
|
+
* Resolve the recognition-service base URLs for a given stage.
|
|
2549
|
+
*/
|
|
2550
|
+
declare function getRecognitionServiceBase(stage?: Stage | string | null | undefined): ServiceBaseUrls;
|
|
2551
|
+
/**
|
|
2552
|
+
* Convenience helper for retrieving the HTTP base URL.
|
|
2553
|
+
*/
|
|
2554
|
+
declare function getRecognitionServiceHttpBase(stage?: Stage | string | null | undefined): string;
|
|
2555
|
+
/**
|
|
2556
|
+
* Convenience helper for retrieving the WebSocket base URL.
|
|
2557
|
+
*/
|
|
2558
|
+
declare function getRecognitionServiceWsBase(stage?: Stage | string | null | undefined): string;
|
|
2559
|
+
/**
|
|
2560
|
+
* Expose hostname lookup separately for callers that need raw host strings.
|
|
2561
|
+
*/
|
|
2562
|
+
declare function getRecognitionServiceHost(stage?: Stage | string | null | undefined): string;
|
|
2563
|
+
/**
|
|
2564
|
+
* Resolve the recognition-conductor base URLs for a given stage.
|
|
2565
|
+
*/
|
|
2566
|
+
declare function getRecognitionConductorBase(stage?: Stage | string | null | undefined): ServiceBaseUrls;
|
|
2567
|
+
declare function getRecognitionConductorHttpBase(stage?: Stage | string | null | undefined): string;
|
|
2568
|
+
declare function getRecognitionConductorWsBase(stage?: Stage | string | null | undefined): string;
|
|
2569
|
+
declare function getRecognitionConductorHost(stage?: Stage | string | null | undefined): string;
|
|
2570
|
+
|
|
2571
|
+
export { AudioEncoding, ClientControlActionV1, ClientState, ConfigBuilder, ConnectionError, ControlSignalTypeV1 as ControlSignal, ControlSignalTypeV1, DeepgramModel, ElevenLabsModel, ErrorTypeV1, FinalTranscriptStability, FireworksModel, GeminiModel, GoogleModel, Language, OpenAIModel, RECOGNITION_CONDUCTOR_BASES, RECOGNITION_SERVICE_BASES, RealTimeTwoWayWebSocketRecognitionClient, RecognitionContextTypeV1, RecognitionError, RecognitionProvider, RecognitionResultTypeV1, RecognitionVGFStateSchema, RecordingStatus, STAGES, SampleRate, SimplifiedVGFRecognitionClient, TimeoutError, TranscriptionStatus, ValidationError, createClient, createClientWithBuilder, createDefaultASRConfig, createInitialRecognitionState, createSimplifiedVGFClient, getRecognitionConductorBase, getRecognitionConductorHost, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionServiceBase, getRecognitionServiceHost, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getUserFriendlyMessage, isExceptionImmediatelyAvailable, isNormalDisconnection, isValidRecordingStatusTransition, normalizeStage, resetRecognitionVGFState };
|
|
2572
|
+
export type { ASRRequestConfig, ASRRequestV1, AuthenticationException, ConnectionException, ErrorResultV1, FunctionCallResultV1, GameContextV1, IRecognitionClient, IRecognitionClientConfig, IRecognitionClientStats, ISimplifiedVGFRecognitionClient, MetadataResultV1, ProviderException, QuotaExceededException, RealTimeTwoWayWebSocketRecognitionClientConfig, RecognitionCallbackUrl, RecognitionException, RecognitionState, RecordingStatusType, SimplifiedVGFClientConfig, SlotMap, Stage, TimeoutException, TranscriptionResult, TranscriptionResultV1, TranscriptionStatusType, UnknownException, ValidationException };
|