@volley/recognition-client-sdk 0.1.200
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +168 -0
- package/dist/browser-CDQ_TzeH.d.ts +1039 -0
- package/dist/index.d.ts +461 -0
- package/dist/index.js +2332 -0
- package/dist/index.js.map +1 -0
- package/dist/recog-client-sdk.browser.d.ts +2 -0
- package/dist/recog-client-sdk.browser.js +1843 -0
- package/dist/recog-client-sdk.browser.js.map +1 -0
- package/package.json +73 -0
- package/src/browser.ts +24 -0
- package/src/config-builder.ts +213 -0
- package/src/factory.ts +43 -0
- package/src/index.ts +86 -0
- package/src/recognition-client.spec.ts +551 -0
- package/src/recognition-client.ts +595 -0
- package/src/recognition-client.types.ts +260 -0
- package/src/simplified-vgf-recognition-client.spec.ts +671 -0
- package/src/simplified-vgf-recognition-client.ts +339 -0
- package/src/utils/audio-ring-buffer.ts +170 -0
- package/src/utils/message-handler.ts +131 -0
- package/src/utils/url-builder.ts +70 -0
- package/src/vgf-recognition-mapper.ts +225 -0
- package/src/vgf-recognition-state.ts +89 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,2332 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import 'ws';
|
|
3
|
+
import { v4 } from 'uuid';
|
|
4
|
+
|
|
5
|
+
var __defProp = Object.defineProperty;
|
|
6
|
+
var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
|
|
7
|
+
|
|
8
|
+
// ../../libs/types/dist/provider.types.js
|
|
9
|
+
var RecognitionProvider;
|
|
10
|
+
(function(RecognitionProvider2) {
|
|
11
|
+
RecognitionProvider2["ASSEMBLYAI"] = "assemblyai";
|
|
12
|
+
RecognitionProvider2["DEEPGRAM"] = "deepgram";
|
|
13
|
+
RecognitionProvider2["GOOGLE"] = "google";
|
|
14
|
+
RecognitionProvider2["GEMINI_BATCH"] = "gemini-batch";
|
|
15
|
+
RecognitionProvider2["OPENAI_BATCH"] = "openai-batch";
|
|
16
|
+
})(RecognitionProvider || (RecognitionProvider = {}));
|
|
17
|
+
var RecognitionMode;
|
|
18
|
+
(function(RecognitionMode2) {
|
|
19
|
+
RecognitionMode2["STREAMING"] = "streaming";
|
|
20
|
+
RecognitionMode2["BATCH"] = "batch";
|
|
21
|
+
})(RecognitionMode || (RecognitionMode = {}));
|
|
22
|
+
var DeepgramModel;
|
|
23
|
+
(function(DeepgramModel2) {
|
|
24
|
+
DeepgramModel2["NOVA_2"] = "nova-2";
|
|
25
|
+
DeepgramModel2["NOVA_3"] = "nova-3";
|
|
26
|
+
DeepgramModel2["FLUX_GENERAL_EN"] = "flux-general-en";
|
|
27
|
+
})(DeepgramModel || (DeepgramModel = {}));
|
|
28
|
+
var GoogleModel;
|
|
29
|
+
(function(GoogleModel2) {
|
|
30
|
+
GoogleModel2["LATEST_LONG"] = "latest_long";
|
|
31
|
+
GoogleModel2["LATEST_SHORT"] = "latest_short";
|
|
32
|
+
GoogleModel2["TELEPHONY"] = "telephony";
|
|
33
|
+
GoogleModel2["TELEPHONY_SHORT"] = "telephony_short";
|
|
34
|
+
GoogleModel2["MEDICAL_DICTATION"] = "medical_dictation";
|
|
35
|
+
GoogleModel2["MEDICAL_CONVERSATION"] = "medical_conversation";
|
|
36
|
+
GoogleModel2["DEFAULT"] = "default";
|
|
37
|
+
GoogleModel2["COMMAND_AND_SEARCH"] = "command_and_search";
|
|
38
|
+
GoogleModel2["PHONE_CALL"] = "phone_call";
|
|
39
|
+
GoogleModel2["VIDEO"] = "video";
|
|
40
|
+
})(GoogleModel || (GoogleModel = {}));
|
|
41
|
+
var RecognitionResultTypeV1;
|
|
42
|
+
(function(RecognitionResultTypeV12) {
|
|
43
|
+
RecognitionResultTypeV12["TRANSCRIPTION"] = "Transcription";
|
|
44
|
+
RecognitionResultTypeV12["FUNCTION_CALL"] = "FunctionCall";
|
|
45
|
+
RecognitionResultTypeV12["METADATA"] = "Metadata";
|
|
46
|
+
RecognitionResultTypeV12["ERROR"] = "Error";
|
|
47
|
+
RecognitionResultTypeV12["CLIENT_CONTROL_MESSAGE"] = "ClientControlMessage";
|
|
48
|
+
})(RecognitionResultTypeV1 || (RecognitionResultTypeV1 = {}));
|
|
49
|
+
var TranscriptionResultSchemaV1 = z.object({
|
|
50
|
+
type: z.literal(RecognitionResultTypeV1.TRANSCRIPTION),
|
|
51
|
+
audioUtteranceId: z.string(),
|
|
52
|
+
finalTranscript: z.string(),
|
|
53
|
+
finalTranscriptConfidence: z.number().min(0).max(1).optional(),
|
|
54
|
+
pendingTranscript: z.string().optional(),
|
|
55
|
+
pendingTranscriptConfidence: z.number().min(0).max(1).optional(),
|
|
56
|
+
is_finished: z.boolean(),
|
|
57
|
+
voiceStart: z.number().optional(),
|
|
58
|
+
voiceDuration: z.number().optional(),
|
|
59
|
+
voiceEnd: z.number().optional(),
|
|
60
|
+
startTimestamp: z.number().optional(),
|
|
61
|
+
endTimestamp: z.number().optional(),
|
|
62
|
+
receivedAtMs: z.number().optional(),
|
|
63
|
+
accumulatedAudioTimeMs: z.number().optional()
|
|
64
|
+
});
|
|
65
|
+
var FunctionCallResultSchemaV1 = z.object({
|
|
66
|
+
type: z.literal(RecognitionResultTypeV1.FUNCTION_CALL),
|
|
67
|
+
audioUtteranceId: z.string(),
|
|
68
|
+
functionName: z.string(),
|
|
69
|
+
functionArgJson: z.string()
|
|
70
|
+
});
|
|
71
|
+
var MetadataResultSchemaV1 = z.object({
|
|
72
|
+
type: z.literal(RecognitionResultTypeV1.METADATA),
|
|
73
|
+
audioUtteranceId: z.string(),
|
|
74
|
+
// Timing information
|
|
75
|
+
recordingStartMs: z.number().optional(),
|
|
76
|
+
recordingEndMs: z.number().optional(),
|
|
77
|
+
transcriptEndMs: z.number().optional(),
|
|
78
|
+
socketCloseAtMs: z.number().optional(),
|
|
79
|
+
// Audio Quality Metrics
|
|
80
|
+
duration: z.number().optional(),
|
|
81
|
+
volume: z.number().optional(),
|
|
82
|
+
accumulatedAudioTimeMs: z.number().optional(),
|
|
83
|
+
// Cost Information
|
|
84
|
+
costInUSD: z.number().default(0).optional(),
|
|
85
|
+
// ASR configuration as JSON string (no type validation)
|
|
86
|
+
asrConfig: z.string().optional(),
|
|
87
|
+
// Raw ASR metadata payload as provided by the provider (stringified if needed)
|
|
88
|
+
rawAsrMetadata: z.string().optional()
|
|
89
|
+
});
|
|
90
|
+
var ErrorTypeV1;
|
|
91
|
+
(function(ErrorTypeV12) {
|
|
92
|
+
ErrorTypeV12["AUTHENTICATION_ERROR"] = "authentication_error";
|
|
93
|
+
ErrorTypeV12["VALIDATION_ERROR"] = "validation_error";
|
|
94
|
+
ErrorTypeV12["PROVIDER_ERROR"] = "provider_error";
|
|
95
|
+
ErrorTypeV12["TIMEOUT_ERROR"] = "timeout_error";
|
|
96
|
+
ErrorTypeV12["QUOTA_EXCEEDED"] = "quota_exceeded";
|
|
97
|
+
ErrorTypeV12["UNKNOWN_ERROR"] = "unknown_error";
|
|
98
|
+
})(ErrorTypeV1 || (ErrorTypeV1 = {}));
|
|
99
|
+
var ErrorResultSchemaV1 = z.object({
|
|
100
|
+
type: z.literal(RecognitionResultTypeV1.ERROR),
|
|
101
|
+
audioUtteranceId: z.string(),
|
|
102
|
+
errorType: z.nativeEnum(ErrorTypeV1).optional(),
|
|
103
|
+
message: z.string().optional(),
|
|
104
|
+
code: z.union([
|
|
105
|
+
z.string(),
|
|
106
|
+
z.number()
|
|
107
|
+
]).optional(),
|
|
108
|
+
description: z.string().optional()
|
|
109
|
+
});
|
|
110
|
+
var ClientControlActionV1;
|
|
111
|
+
(function(ClientControlActionV12) {
|
|
112
|
+
ClientControlActionV12["READY_FOR_UPLOADING_RECORDING"] = "ready_for_uploading_recording";
|
|
113
|
+
ClientControlActionV12["STOP_RECORDING"] = "stop_recording";
|
|
114
|
+
})(ClientControlActionV1 || (ClientControlActionV1 = {}));
|
|
115
|
+
var ClientControlActionsV1 = z.nativeEnum(ClientControlActionV1);
|
|
116
|
+
var ClientControlMessageSchemaV1 = z.object({
|
|
117
|
+
type: z.literal(RecognitionResultTypeV1.CLIENT_CONTROL_MESSAGE),
|
|
118
|
+
audioUtteranceId: z.string(),
|
|
119
|
+
action: ClientControlActionsV1
|
|
120
|
+
});
|
|
121
|
+
z.discriminatedUnion("type", [
|
|
122
|
+
// P0
|
|
123
|
+
TranscriptionResultSchemaV1,
|
|
124
|
+
MetadataResultSchemaV1,
|
|
125
|
+
ErrorResultSchemaV1,
|
|
126
|
+
// P1 - P2
|
|
127
|
+
FunctionCallResultSchemaV1,
|
|
128
|
+
ClientControlMessageSchemaV1
|
|
129
|
+
]);
|
|
130
|
+
|
|
131
|
+
// ../../libs/types/dist/provider-transcription.types.js
|
|
132
|
+
var ProviderMessageType;
|
|
133
|
+
(function(ProviderMessageType2) {
|
|
134
|
+
ProviderMessageType2["TRANSCRIPT"] = "Transcript";
|
|
135
|
+
ProviderMessageType2["VAD_END_SIGNAL"] = "UtteranceEnd";
|
|
136
|
+
ProviderMessageType2["METADATA"] = "Metadata";
|
|
137
|
+
ProviderMessageType2["ERROR"] = "Error";
|
|
138
|
+
ProviderMessageType2["TIMER"] = "Timer";
|
|
139
|
+
ProviderMessageType2["RAW"] = "Raw";
|
|
140
|
+
})(ProviderMessageType || (ProviderMessageType = {}));
|
|
141
|
+
var RecognitionAlternativeSchema = z.object({
|
|
142
|
+
/**
|
|
143
|
+
* The transcribed text
|
|
144
|
+
* @example "hello world"
|
|
145
|
+
*/
|
|
146
|
+
transcript: z.string(),
|
|
147
|
+
/**
|
|
148
|
+
* Confidence score (0-1)
|
|
149
|
+
* Note: Google only provides confidence for final results
|
|
150
|
+
* @example 0.95
|
|
151
|
+
*/
|
|
152
|
+
confidence: z.number().min(0).max(1)
|
|
153
|
+
});
|
|
154
|
+
z.object({
|
|
155
|
+
/**
|
|
156
|
+
* Array of transcription alternatives, ordered by confidence
|
|
157
|
+
*/
|
|
158
|
+
alternatives: z.array(RecognitionAlternativeSchema)
|
|
159
|
+
});
|
|
160
|
+
z.object({
|
|
161
|
+
name: z.string(),
|
|
162
|
+
canonical_name: z.string(),
|
|
163
|
+
architecture: z.string(),
|
|
164
|
+
languages: z.array(z.string()).optional(),
|
|
165
|
+
version: z.string(),
|
|
166
|
+
uuid: z.string(),
|
|
167
|
+
batch: z.boolean(),
|
|
168
|
+
streaming: z.boolean()
|
|
169
|
+
});
|
|
170
|
+
var TranscriptMessageSchema = z.object({
|
|
171
|
+
type: z.literal(ProviderMessageType.TRANSCRIPT),
|
|
172
|
+
/**
|
|
173
|
+
* Provider that generated this message
|
|
174
|
+
* @example "deepgram"
|
|
175
|
+
*/
|
|
176
|
+
provider: z.nativeEnum(RecognitionProvider).optional(),
|
|
177
|
+
/**
|
|
178
|
+
* The transcribed text (extracted from first alternative)
|
|
179
|
+
* @example "hello world"
|
|
180
|
+
*/
|
|
181
|
+
text: z.string(),
|
|
182
|
+
/**
|
|
183
|
+
* Confidence score for the transcript (0-1)
|
|
184
|
+
* @example 0.95
|
|
185
|
+
*/
|
|
186
|
+
confidence: z.number().min(0).max(1),
|
|
187
|
+
/**
|
|
188
|
+
* Voice start time identified by ASR (in milliseconds from stream start)
|
|
189
|
+
* @example 500 (voice starts at 0.5 seconds)
|
|
190
|
+
*/
|
|
191
|
+
voiceStart: z.number().optional(),
|
|
192
|
+
/**
|
|
193
|
+
* Voice end time identified by ASR (in milliseconds from stream start)
|
|
194
|
+
* @example 2000 (voice ends at 2.0 seconds)
|
|
195
|
+
*/
|
|
196
|
+
voiceEnd: z.number().optional(),
|
|
197
|
+
/**
|
|
198
|
+
* Voice duration identified by ASR (in milliseconds)
|
|
199
|
+
* @example 1500 (1.5 seconds of speech, calculated as voiceEnd - voiceStart)
|
|
200
|
+
*/
|
|
201
|
+
voiceDuration: z.number().optional(),
|
|
202
|
+
/**
|
|
203
|
+
* Server timestamp when this transcript was received (in milliseconds)
|
|
204
|
+
* @example 1704096005500
|
|
205
|
+
*/
|
|
206
|
+
receivedAtMs: z.number().optional(),
|
|
207
|
+
/**
|
|
208
|
+
* Accumulated audio time watermark (in milliseconds)
|
|
209
|
+
* Total duration of all audio chunks sent to this provider session
|
|
210
|
+
* @example 2500 (2.5 seconds of audio has been sent)
|
|
211
|
+
*/
|
|
212
|
+
accumulatedAudioTimeMs: z.number().optional(),
|
|
213
|
+
/**
|
|
214
|
+
* Whether this transcript is finalized (won't change)
|
|
215
|
+
* @example true
|
|
216
|
+
*/
|
|
217
|
+
is_final: z.boolean()
|
|
218
|
+
});
|
|
219
|
+
var VADEndSignalSchema = z.object({
|
|
220
|
+
type: z.literal(ProviderMessageType.VAD_END_SIGNAL),
|
|
221
|
+
/**
|
|
222
|
+
* Provider that generated this message
|
|
223
|
+
*/
|
|
224
|
+
provider: z.nativeEnum(RecognitionProvider).optional(),
|
|
225
|
+
/**
|
|
226
|
+
* Time when the last word ended (in milliseconds)
|
|
227
|
+
* @example 2500 (2.5 seconds)
|
|
228
|
+
*/
|
|
229
|
+
last_word_end: z.number().optional()
|
|
230
|
+
});
|
|
231
|
+
var MetadataMessageSchema = z.object({
|
|
232
|
+
type: z.literal(ProviderMessageType.METADATA),
|
|
233
|
+
provider: z.nativeEnum(RecognitionProvider),
|
|
234
|
+
asrConfig: z.string().optional(),
|
|
235
|
+
data: z.string().optional()
|
|
236
|
+
});
|
|
237
|
+
var ErrorMessageSchema = z.object({
|
|
238
|
+
type: z.literal(ProviderMessageType.ERROR),
|
|
239
|
+
/**
|
|
240
|
+
* Provider that generated this message
|
|
241
|
+
*/
|
|
242
|
+
provider: z.nativeEnum(RecognitionProvider).optional(),
|
|
243
|
+
/**
|
|
244
|
+
* Categorized error type
|
|
245
|
+
* Defaults to UNKNOWN_ERROR if not specified by provider
|
|
246
|
+
* @example ErrorTypeV1.PROVIDER_ERROR
|
|
247
|
+
*/
|
|
248
|
+
errorType: z.nativeEnum(ErrorTypeV1),
|
|
249
|
+
/**
|
|
250
|
+
* Error message
|
|
251
|
+
* @example "Stream quota exceeded"
|
|
252
|
+
*/
|
|
253
|
+
provider_error: z.string(),
|
|
254
|
+
/**
|
|
255
|
+
* Provider-specific error code
|
|
256
|
+
* @example "QUOTA_EXCEEDED"
|
|
257
|
+
*/
|
|
258
|
+
provider_error_code: z.union([
|
|
259
|
+
z.string(),
|
|
260
|
+
z.number()
|
|
261
|
+
]).optional(),
|
|
262
|
+
/**
|
|
263
|
+
* Additional error description
|
|
264
|
+
*/
|
|
265
|
+
description: z.string().optional(),
|
|
266
|
+
/**
|
|
267
|
+
* Whether the error can be immediately recovered by retrying
|
|
268
|
+
* true = transient error, retry may succeed (e.g., UNAVAILABLE, INTERNAL)
|
|
269
|
+
* false = permanent error, retry will fail (e.g., INVALID_ARGUMENT, PERMISSION_DENIED)
|
|
270
|
+
* @example true
|
|
271
|
+
*/
|
|
272
|
+
is_immediately_recoverable: z.boolean().optional()
|
|
273
|
+
});
|
|
274
|
+
var TimerSchema = z.object({
|
|
275
|
+
type: z.literal(ProviderMessageType.TIMER),
|
|
276
|
+
/**
|
|
277
|
+
* Provider that generated this message
|
|
278
|
+
*/
|
|
279
|
+
provider: z.nativeEnum(RecognitionProvider).optional(),
|
|
280
|
+
/**
|
|
281
|
+
* Timestamp when recording started (in milliseconds)
|
|
282
|
+
* @example 1704096000000
|
|
283
|
+
*/
|
|
284
|
+
recordingStartMs: z.number().optional(),
|
|
285
|
+
/**
|
|
286
|
+
* Timestamp when recording ended (in milliseconds)
|
|
287
|
+
* @example 1704096005000
|
|
288
|
+
*/
|
|
289
|
+
recordingEndMs: z.number().optional(),
|
|
290
|
+
/**
|
|
291
|
+
* Timestamp when final transcript was received (in milliseconds)
|
|
292
|
+
* @example 1704096005500
|
|
293
|
+
*/
|
|
294
|
+
transcriptEndMs: z.number().optional(),
|
|
295
|
+
/**
|
|
296
|
+
* Timestamp when socket/grpc connection was closed (in milliseconds)
|
|
297
|
+
* @example 1704096006000
|
|
298
|
+
*/
|
|
299
|
+
socketCloseAtMs: z.number().optional(),
|
|
300
|
+
/**
|
|
301
|
+
* Accumulated audio time watermark (in milliseconds)
|
|
302
|
+
* Total duration of all audio chunks sent to this provider session
|
|
303
|
+
* @example 2500 (2.5 seconds of audio has been sent)
|
|
304
|
+
*/
|
|
305
|
+
accumulatedAudioTimeMs: z.number().optional(),
|
|
306
|
+
/**
|
|
307
|
+
* Estimated cost in USD for this session
|
|
308
|
+
* Calculated by the job based on audio duration and provider pricing
|
|
309
|
+
* @example 0.0025 (quarter of a cent)
|
|
310
|
+
*/
|
|
311
|
+
costInUSD: z.number().optional().default(0)
|
|
312
|
+
});
|
|
313
|
+
var RawMessageSchema = z.object({
|
|
314
|
+
type: z.literal(ProviderMessageType.RAW),
|
|
315
|
+
provider: z.string(),
|
|
316
|
+
data: z.any()
|
|
317
|
+
});
|
|
318
|
+
z.discriminatedUnion("type", [
|
|
319
|
+
TranscriptMessageSchema,
|
|
320
|
+
VADEndSignalSchema,
|
|
321
|
+
MetadataMessageSchema,
|
|
322
|
+
ErrorMessageSchema,
|
|
323
|
+
TimerSchema,
|
|
324
|
+
RawMessageSchema
|
|
325
|
+
]);
|
|
326
|
+
var BaseRecognitionExceptionSchema = z.object({
|
|
327
|
+
/** Error type category */
|
|
328
|
+
errorType: z.nativeEnum(ErrorTypeV1),
|
|
329
|
+
/** Error message for logging/debugging */
|
|
330
|
+
message: z.string(),
|
|
331
|
+
/** Optional error code from provider or system */
|
|
332
|
+
code: z.union([
|
|
333
|
+
z.string(),
|
|
334
|
+
z.number()
|
|
335
|
+
]).optional(),
|
|
336
|
+
/** Detailed description for debugging */
|
|
337
|
+
description: z.string().optional(),
|
|
338
|
+
/**
|
|
339
|
+
* Whether this error can be immediately shown to user.
|
|
340
|
+
* true = Show to user immediately (clear actionable error)
|
|
341
|
+
* false = Log for investigation, show generic error to user
|
|
342
|
+
*/
|
|
343
|
+
isImmediatelyAvailable: z.boolean(),
|
|
344
|
+
/** Recognition provider that generated this error */
|
|
345
|
+
provider: z.nativeEnum(RecognitionProvider).optional(),
|
|
346
|
+
/** Audio utterance ID this error relates to */
|
|
347
|
+
audioUtteranceId: z.string().optional(),
|
|
348
|
+
/** Timestamp when error occurred */
|
|
349
|
+
timestamp: z.number().optional()
|
|
350
|
+
});
|
|
351
|
+
var AuthenticationExceptionSchema = BaseRecognitionExceptionSchema.extend({
|
|
352
|
+
errorType: z.literal(ErrorTypeV1.AUTHENTICATION_ERROR),
|
|
353
|
+
isImmediatelyAvailable: z.literal(false),
|
|
354
|
+
/** Which service failed authentication (e.g., 'deepgram', 'google') */
|
|
355
|
+
service: z.string().optional(),
|
|
356
|
+
/** Authentication method that failed (e.g., 'api_key', 'oauth') */
|
|
357
|
+
authMethod: z.string().optional()
|
|
358
|
+
});
|
|
359
|
+
var ValidationExceptionSchema = BaseRecognitionExceptionSchema.extend({
|
|
360
|
+
errorType: z.literal(ErrorTypeV1.VALIDATION_ERROR),
|
|
361
|
+
isImmediatelyAvailable: z.literal(true),
|
|
362
|
+
/** Field name that failed validation */
|
|
363
|
+
field: z.string().optional(),
|
|
364
|
+
/** Expected value format/type */
|
|
365
|
+
expected: z.string().optional(),
|
|
366
|
+
/** Actual value received (sanitized) */
|
|
367
|
+
received: z.string().optional()
|
|
368
|
+
});
|
|
369
|
+
var ProviderExceptionSchema = BaseRecognitionExceptionSchema.extend({
|
|
370
|
+
errorType: z.literal(ErrorTypeV1.PROVIDER_ERROR),
|
|
371
|
+
isImmediatelyAvailable: z.literal(false),
|
|
372
|
+
/** Provider name (e.g., 'deepgram', 'assemblyai', 'google') */
|
|
373
|
+
provider: z.string().optional(),
|
|
374
|
+
/** Provider-specific error code */
|
|
375
|
+
providerErrorCode: z.union([
|
|
376
|
+
z.string(),
|
|
377
|
+
z.number()
|
|
378
|
+
]).optional(),
|
|
379
|
+
/** Whether this provider error might be transient (should retry) */
|
|
380
|
+
isTransient: z.boolean().optional()
|
|
381
|
+
});
|
|
382
|
+
var TimeoutExceptionSchema = BaseRecognitionExceptionSchema.extend({
|
|
383
|
+
errorType: z.literal(ErrorTypeV1.TIMEOUT_ERROR),
|
|
384
|
+
isImmediatelyAvailable: z.literal(true),
|
|
385
|
+
/** Timeout duration in milliseconds */
|
|
386
|
+
timeoutMs: z.number().optional(),
|
|
387
|
+
/** What operation timed out (e.g., 'connection', 'transcription', 'response') */
|
|
388
|
+
operation: z.string().optional()
|
|
389
|
+
});
|
|
390
|
+
var QuotaExceededExceptionSchema = BaseRecognitionExceptionSchema.extend({
|
|
391
|
+
errorType: z.literal(ErrorTypeV1.QUOTA_EXCEEDED),
|
|
392
|
+
isImmediatelyAvailable: z.literal(true),
|
|
393
|
+
/** Quota type that was exceeded (e.g., 'rate_limit', 'daily_quota', 'concurrent_requests') */
|
|
394
|
+
quotaType: z.string().optional(),
|
|
395
|
+
/** When quota resets (Unix timestamp in ms) */
|
|
396
|
+
resetAt: z.number().optional(),
|
|
397
|
+
/** How long to wait in seconds before retry */
|
|
398
|
+
retryAfterSeconds: z.number().optional()
|
|
399
|
+
});
|
|
400
|
+
var UnknownExceptionSchema = BaseRecognitionExceptionSchema.extend({
|
|
401
|
+
errorType: z.literal(ErrorTypeV1.UNKNOWN_ERROR),
|
|
402
|
+
isImmediatelyAvailable: z.literal(false),
|
|
403
|
+
/** Stack trace for debugging (sanitized) */
|
|
404
|
+
stack: z.string().optional(),
|
|
405
|
+
/** Additional context for debugging */
|
|
406
|
+
context: z.record(z.unknown()).optional()
|
|
407
|
+
});
|
|
408
|
+
z.discriminatedUnion("errorType", [
|
|
409
|
+
AuthenticationExceptionSchema,
|
|
410
|
+
ValidationExceptionSchema,
|
|
411
|
+
ProviderExceptionSchema,
|
|
412
|
+
TimeoutExceptionSchema,
|
|
413
|
+
QuotaExceededExceptionSchema,
|
|
414
|
+
UnknownExceptionSchema
|
|
415
|
+
]);
|
|
416
|
+
var RecognitionContextTypeV1;
|
|
417
|
+
(function(RecognitionContextTypeV12) {
|
|
418
|
+
RecognitionContextTypeV12["GAME_CONTEXT"] = "GameContext";
|
|
419
|
+
RecognitionContextTypeV12["CONTROL_SIGNAL"] = "ControlSignal";
|
|
420
|
+
RecognitionContextTypeV12["ASR_REQUEST"] = "ASRRequest";
|
|
421
|
+
})(RecognitionContextTypeV1 || (RecognitionContextTypeV1 = {}));
|
|
422
|
+
var ControlSignalTypeV1;
|
|
423
|
+
(function(ControlSignalTypeV12) {
|
|
424
|
+
ControlSignalTypeV12["START_RECORDING"] = "start_recording";
|
|
425
|
+
ControlSignalTypeV12["STOP_RECORDING"] = "stop_recording";
|
|
426
|
+
})(ControlSignalTypeV1 || (ControlSignalTypeV1 = {}));
|
|
427
|
+
var SlotMapSchema = z.record(z.string(), z.array(z.string()));
|
|
428
|
+
var GameContextSchemaV1 = z.object({
|
|
429
|
+
type: z.literal(RecognitionContextTypeV1.GAME_CONTEXT),
|
|
430
|
+
gameId: z.string(),
|
|
431
|
+
gamePhase: z.string(),
|
|
432
|
+
// Prompt fields for different processing stages
|
|
433
|
+
promptSTT: z.string().optional(),
|
|
434
|
+
promptSTF: z.string().optional(),
|
|
435
|
+
promptTTF: z.string().optional(),
|
|
436
|
+
// Slot map for entity extraction
|
|
437
|
+
slotMap: SlotMapSchema.optional()
|
|
438
|
+
});
|
|
439
|
+
var ControlSignalSchemaV1 = z.object({
|
|
440
|
+
type: z.literal(RecognitionContextTypeV1.CONTROL_SIGNAL),
|
|
441
|
+
signal: z.nativeEnum(ControlSignalTypeV1)
|
|
442
|
+
});
|
|
443
|
+
var RequestDebugCommandSchema = z.object({
|
|
444
|
+
// Enable verbose debug logging for this request
|
|
445
|
+
enableDebugLog: z.boolean().optional().default(false),
|
|
446
|
+
// Enable audio storage to database/filesystem for debugging
|
|
447
|
+
enableAudioStorage: z.boolean().optional().default(true),
|
|
448
|
+
// Enable validation that song quiz session ID exists before processing
|
|
449
|
+
enableSongQuizSessionIdCheck: z.boolean().optional().default(true),
|
|
450
|
+
// Enable experimental pilot models for testing new features
|
|
451
|
+
enablePilotModels: z.boolean().optional().default(false)
|
|
452
|
+
}).optional();
|
|
453
|
+
var ASRRequestSchemaV1 = z.object({
|
|
454
|
+
type: z.literal(RecognitionContextTypeV1.ASR_REQUEST),
|
|
455
|
+
// Session identification
|
|
456
|
+
audioUtteranceId: z.string().optional(),
|
|
457
|
+
// ASR configuration
|
|
458
|
+
provider: z.string(),
|
|
459
|
+
model: z.string().optional(),
|
|
460
|
+
language: z.string(),
|
|
461
|
+
sampleRate: z.number(),
|
|
462
|
+
encoding: z.number(),
|
|
463
|
+
// Recognition options
|
|
464
|
+
interimResults: z.boolean().optional().default(false),
|
|
465
|
+
useContext: z.boolean().optional().default(false),
|
|
466
|
+
// Debug options (FOR DEBUG/TESTING ONLY - not for production use)
|
|
467
|
+
debugCommand: RequestDebugCommandSchema
|
|
468
|
+
});
|
|
469
|
+
z.discriminatedUnion("type", [
|
|
470
|
+
GameContextSchemaV1,
|
|
471
|
+
ControlSignalSchemaV1,
|
|
472
|
+
ASRRequestSchemaV1
|
|
473
|
+
]);
|
|
474
|
+
var RecognitionGameInfoSchema = z.object({
|
|
475
|
+
userId: z.string().optional(),
|
|
476
|
+
gameSessionId: z.string().optional(),
|
|
477
|
+
deviceId: z.string().optional(),
|
|
478
|
+
accountId: z.string().optional(),
|
|
479
|
+
gameId: z.string().optional(),
|
|
480
|
+
gamePhase: z.string().optional(),
|
|
481
|
+
questionAnswerId: z.string().optional(),
|
|
482
|
+
platform: z.string().optional()
|
|
483
|
+
});
|
|
484
|
+
z.object({
|
|
485
|
+
audioUtteranceId: z.string(),
|
|
486
|
+
recognitionGameInfo: RecognitionGameInfoSchema.optional()
|
|
487
|
+
});
|
|
488
|
+
z.object({
|
|
489
|
+
audioUtteranceId: z.string(),
|
|
490
|
+
results: z.array(z.any())
|
|
491
|
+
});
|
|
492
|
+
|
|
493
|
+
// ../../libs/types/dist/audio.types.js
|
|
494
|
+
var AudioFormat;
|
|
495
|
+
(function(AudioFormat2) {
|
|
496
|
+
AudioFormat2[AudioFormat2["WAV"] = 1] = "WAV";
|
|
497
|
+
AudioFormat2[AudioFormat2["MP3"] = 2] = "MP3";
|
|
498
|
+
AudioFormat2[AudioFormat2["FLAC"] = 3] = "FLAC";
|
|
499
|
+
AudioFormat2[AudioFormat2["M4A"] = 4] = "M4A";
|
|
500
|
+
AudioFormat2[AudioFormat2["OPUS"] = 5] = "OPUS";
|
|
501
|
+
AudioFormat2[AudioFormat2["PCM"] = 6] = "PCM";
|
|
502
|
+
})(AudioFormat || (AudioFormat = {}));
|
|
503
|
+
(function(AudioFormat2) {
|
|
504
|
+
const ID_TO_ENUM = /* @__PURE__ */ new Map([
|
|
505
|
+
[
|
|
506
|
+
1,
|
|
507
|
+
AudioFormat2.WAV
|
|
508
|
+
],
|
|
509
|
+
[
|
|
510
|
+
2,
|
|
511
|
+
AudioFormat2.MP3
|
|
512
|
+
],
|
|
513
|
+
[
|
|
514
|
+
3,
|
|
515
|
+
AudioFormat2.FLAC
|
|
516
|
+
],
|
|
517
|
+
[
|
|
518
|
+
4,
|
|
519
|
+
AudioFormat2.M4A
|
|
520
|
+
],
|
|
521
|
+
[
|
|
522
|
+
5,
|
|
523
|
+
AudioFormat2.OPUS
|
|
524
|
+
],
|
|
525
|
+
[
|
|
526
|
+
6,
|
|
527
|
+
AudioFormat2.PCM
|
|
528
|
+
]
|
|
529
|
+
]);
|
|
530
|
+
const NAME_TO_ENUM = /* @__PURE__ */ new Map([
|
|
531
|
+
[
|
|
532
|
+
"WAV",
|
|
533
|
+
AudioFormat2.WAV
|
|
534
|
+
],
|
|
535
|
+
[
|
|
536
|
+
"MP3",
|
|
537
|
+
AudioFormat2.MP3
|
|
538
|
+
],
|
|
539
|
+
[
|
|
540
|
+
"FLAC",
|
|
541
|
+
AudioFormat2.FLAC
|
|
542
|
+
],
|
|
543
|
+
[
|
|
544
|
+
"M4A",
|
|
545
|
+
AudioFormat2.M4A
|
|
546
|
+
],
|
|
547
|
+
[
|
|
548
|
+
"OPUS",
|
|
549
|
+
AudioFormat2.OPUS
|
|
550
|
+
],
|
|
551
|
+
[
|
|
552
|
+
"PCM",
|
|
553
|
+
AudioFormat2.PCM
|
|
554
|
+
]
|
|
555
|
+
]);
|
|
556
|
+
const ENUM_TO_NAME = /* @__PURE__ */ new Map([
|
|
557
|
+
[
|
|
558
|
+
AudioFormat2.WAV,
|
|
559
|
+
"WAV"
|
|
560
|
+
],
|
|
561
|
+
[
|
|
562
|
+
AudioFormat2.MP3,
|
|
563
|
+
"MP3"
|
|
564
|
+
],
|
|
565
|
+
[
|
|
566
|
+
AudioFormat2.FLAC,
|
|
567
|
+
"FLAC"
|
|
568
|
+
],
|
|
569
|
+
[
|
|
570
|
+
AudioFormat2.M4A,
|
|
571
|
+
"M4A"
|
|
572
|
+
],
|
|
573
|
+
[
|
|
574
|
+
AudioFormat2.OPUS,
|
|
575
|
+
"OPUS"
|
|
576
|
+
],
|
|
577
|
+
[
|
|
578
|
+
AudioFormat2.PCM,
|
|
579
|
+
"PCM"
|
|
580
|
+
]
|
|
581
|
+
]);
|
|
582
|
+
function fromId(id) {
|
|
583
|
+
return ID_TO_ENUM.get(id);
|
|
584
|
+
}
|
|
585
|
+
__name(fromId, "fromId");
|
|
586
|
+
AudioFormat2.fromId = fromId;
|
|
587
|
+
function fromName(nameStr) {
|
|
588
|
+
return NAME_TO_ENUM.get(nameStr.toUpperCase());
|
|
589
|
+
}
|
|
590
|
+
__name(fromName, "fromName");
|
|
591
|
+
AudioFormat2.fromName = fromName;
|
|
592
|
+
function toId(format) {
|
|
593
|
+
return format;
|
|
594
|
+
}
|
|
595
|
+
__name(toId, "toId");
|
|
596
|
+
AudioFormat2.toId = toId;
|
|
597
|
+
function toName(format) {
|
|
598
|
+
return ENUM_TO_NAME.get(format) ?? AudioFormat2[format];
|
|
599
|
+
}
|
|
600
|
+
__name(toName, "toName");
|
|
601
|
+
AudioFormat2.toName = toName;
|
|
602
|
+
function isIdValid(id) {
|
|
603
|
+
return ID_TO_ENUM.has(id);
|
|
604
|
+
}
|
|
605
|
+
__name(isIdValid, "isIdValid");
|
|
606
|
+
AudioFormat2.isIdValid = isIdValid;
|
|
607
|
+
function isNameValid(nameStr) {
|
|
608
|
+
return NAME_TO_ENUM.has(nameStr.toUpperCase());
|
|
609
|
+
}
|
|
610
|
+
__name(isNameValid, "isNameValid");
|
|
611
|
+
AudioFormat2.isNameValid = isNameValid;
|
|
612
|
+
})(AudioFormat || (AudioFormat = {}));
|
|
613
|
+
var AudioEncoding;
|
|
614
|
+
(function(AudioEncoding2) {
|
|
615
|
+
AudioEncoding2[AudioEncoding2["ENCODING_UNSPECIFIED"] = 0] = "ENCODING_UNSPECIFIED";
|
|
616
|
+
AudioEncoding2[AudioEncoding2["LINEAR16"] = 1] = "LINEAR16";
|
|
617
|
+
AudioEncoding2[AudioEncoding2["OGG_OPUS"] = 2] = "OGG_OPUS";
|
|
618
|
+
AudioEncoding2[AudioEncoding2["FLAC"] = 3] = "FLAC";
|
|
619
|
+
AudioEncoding2[AudioEncoding2["MULAW"] = 4] = "MULAW";
|
|
620
|
+
AudioEncoding2[AudioEncoding2["ALAW"] = 5] = "ALAW";
|
|
621
|
+
})(AudioEncoding || (AudioEncoding = {}));
|
|
622
|
+
(function(AudioEncoding2) {
|
|
623
|
+
const ID_TO_ENUM = /* @__PURE__ */ new Map([
|
|
624
|
+
[
|
|
625
|
+
0,
|
|
626
|
+
AudioEncoding2.ENCODING_UNSPECIFIED
|
|
627
|
+
],
|
|
628
|
+
[
|
|
629
|
+
1,
|
|
630
|
+
AudioEncoding2.LINEAR16
|
|
631
|
+
],
|
|
632
|
+
[
|
|
633
|
+
2,
|
|
634
|
+
AudioEncoding2.OGG_OPUS
|
|
635
|
+
],
|
|
636
|
+
[
|
|
637
|
+
3,
|
|
638
|
+
AudioEncoding2.FLAC
|
|
639
|
+
],
|
|
640
|
+
[
|
|
641
|
+
4,
|
|
642
|
+
AudioEncoding2.MULAW
|
|
643
|
+
],
|
|
644
|
+
[
|
|
645
|
+
5,
|
|
646
|
+
AudioEncoding2.ALAW
|
|
647
|
+
]
|
|
648
|
+
]);
|
|
649
|
+
const NAME_TO_ENUM = /* @__PURE__ */ new Map([
|
|
650
|
+
[
|
|
651
|
+
"ENCODING_UNSPECIFIED",
|
|
652
|
+
AudioEncoding2.ENCODING_UNSPECIFIED
|
|
653
|
+
],
|
|
654
|
+
[
|
|
655
|
+
"LINEAR16",
|
|
656
|
+
AudioEncoding2.LINEAR16
|
|
657
|
+
],
|
|
658
|
+
[
|
|
659
|
+
"OGG_OPUS",
|
|
660
|
+
AudioEncoding2.OGG_OPUS
|
|
661
|
+
],
|
|
662
|
+
[
|
|
663
|
+
"FLAC",
|
|
664
|
+
AudioEncoding2.FLAC
|
|
665
|
+
],
|
|
666
|
+
[
|
|
667
|
+
"MULAW",
|
|
668
|
+
AudioEncoding2.MULAW
|
|
669
|
+
],
|
|
670
|
+
[
|
|
671
|
+
"ALAW",
|
|
672
|
+
AudioEncoding2.ALAW
|
|
673
|
+
]
|
|
674
|
+
]);
|
|
675
|
+
const ENUM_TO_NAME = /* @__PURE__ */ new Map([
|
|
676
|
+
[
|
|
677
|
+
AudioEncoding2.ENCODING_UNSPECIFIED,
|
|
678
|
+
"ENCODING_UNSPECIFIED"
|
|
679
|
+
],
|
|
680
|
+
[
|
|
681
|
+
AudioEncoding2.LINEAR16,
|
|
682
|
+
"LINEAR16"
|
|
683
|
+
],
|
|
684
|
+
[
|
|
685
|
+
AudioEncoding2.OGG_OPUS,
|
|
686
|
+
"OGG_OPUS"
|
|
687
|
+
],
|
|
688
|
+
[
|
|
689
|
+
AudioEncoding2.FLAC,
|
|
690
|
+
"FLAC"
|
|
691
|
+
],
|
|
692
|
+
[
|
|
693
|
+
AudioEncoding2.MULAW,
|
|
694
|
+
"MULAW"
|
|
695
|
+
],
|
|
696
|
+
[
|
|
697
|
+
AudioEncoding2.ALAW,
|
|
698
|
+
"ALAW"
|
|
699
|
+
]
|
|
700
|
+
]);
|
|
701
|
+
function fromId(id) {
|
|
702
|
+
return ID_TO_ENUM.get(id);
|
|
703
|
+
}
|
|
704
|
+
__name(fromId, "fromId");
|
|
705
|
+
AudioEncoding2.fromId = fromId;
|
|
706
|
+
function fromName(nameStr) {
|
|
707
|
+
return NAME_TO_ENUM.get(nameStr.toUpperCase());
|
|
708
|
+
}
|
|
709
|
+
__name(fromName, "fromName");
|
|
710
|
+
AudioEncoding2.fromName = fromName;
|
|
711
|
+
function toId(encoding) {
|
|
712
|
+
return encoding;
|
|
713
|
+
}
|
|
714
|
+
__name(toId, "toId");
|
|
715
|
+
AudioEncoding2.toId = toId;
|
|
716
|
+
function toName(encoding) {
|
|
717
|
+
return ENUM_TO_NAME.get(encoding) ?? AudioEncoding2[encoding];
|
|
718
|
+
}
|
|
719
|
+
__name(toName, "toName");
|
|
720
|
+
AudioEncoding2.toName = toName;
|
|
721
|
+
function isIdValid(id) {
|
|
722
|
+
return ID_TO_ENUM.has(id);
|
|
723
|
+
}
|
|
724
|
+
__name(isIdValid, "isIdValid");
|
|
725
|
+
AudioEncoding2.isIdValid = isIdValid;
|
|
726
|
+
function isNameValid(nameStr) {
|
|
727
|
+
return NAME_TO_ENUM.has(nameStr.toUpperCase());
|
|
728
|
+
}
|
|
729
|
+
__name(isNameValid, "isNameValid");
|
|
730
|
+
AudioEncoding2.isNameValid = isNameValid;
|
|
731
|
+
})(AudioEncoding || (AudioEncoding = {}));
|
|
732
|
+
var SampleRate;
|
|
733
|
+
(function(SampleRate2) {
|
|
734
|
+
SampleRate2[SampleRate2["RATE_8000"] = 8e3] = "RATE_8000";
|
|
735
|
+
SampleRate2[SampleRate2["RATE_16000"] = 16e3] = "RATE_16000";
|
|
736
|
+
SampleRate2[SampleRate2["RATE_22050"] = 22050] = "RATE_22050";
|
|
737
|
+
SampleRate2[SampleRate2["RATE_24000"] = 24e3] = "RATE_24000";
|
|
738
|
+
SampleRate2[SampleRate2["RATE_32000"] = 32e3] = "RATE_32000";
|
|
739
|
+
SampleRate2[SampleRate2["RATE_44100"] = 44100] = "RATE_44100";
|
|
740
|
+
SampleRate2[SampleRate2["RATE_48000"] = 48e3] = "RATE_48000";
|
|
741
|
+
})(SampleRate || (SampleRate = {}));
|
|
742
|
+
(function(SampleRate2) {
|
|
743
|
+
const HZ_TO_ENUM = /* @__PURE__ */ new Map([
|
|
744
|
+
[
|
|
745
|
+
8e3,
|
|
746
|
+
SampleRate2.RATE_8000
|
|
747
|
+
],
|
|
748
|
+
[
|
|
749
|
+
16e3,
|
|
750
|
+
SampleRate2.RATE_16000
|
|
751
|
+
],
|
|
752
|
+
[
|
|
753
|
+
22050,
|
|
754
|
+
SampleRate2.RATE_22050
|
|
755
|
+
],
|
|
756
|
+
[
|
|
757
|
+
24e3,
|
|
758
|
+
SampleRate2.RATE_24000
|
|
759
|
+
],
|
|
760
|
+
[
|
|
761
|
+
32e3,
|
|
762
|
+
SampleRate2.RATE_32000
|
|
763
|
+
],
|
|
764
|
+
[
|
|
765
|
+
44100,
|
|
766
|
+
SampleRate2.RATE_44100
|
|
767
|
+
],
|
|
768
|
+
[
|
|
769
|
+
48e3,
|
|
770
|
+
SampleRate2.RATE_48000
|
|
771
|
+
]
|
|
772
|
+
]);
|
|
773
|
+
const NAME_TO_ENUM = /* @__PURE__ */ new Map([
|
|
774
|
+
[
|
|
775
|
+
"RATE_8000",
|
|
776
|
+
SampleRate2.RATE_8000
|
|
777
|
+
],
|
|
778
|
+
[
|
|
779
|
+
"RATE_16000",
|
|
780
|
+
SampleRate2.RATE_16000
|
|
781
|
+
],
|
|
782
|
+
[
|
|
783
|
+
"RATE_22050",
|
|
784
|
+
SampleRate2.RATE_22050
|
|
785
|
+
],
|
|
786
|
+
[
|
|
787
|
+
"RATE_24000",
|
|
788
|
+
SampleRate2.RATE_24000
|
|
789
|
+
],
|
|
790
|
+
[
|
|
791
|
+
"RATE_32000",
|
|
792
|
+
SampleRate2.RATE_32000
|
|
793
|
+
],
|
|
794
|
+
[
|
|
795
|
+
"RATE_44100",
|
|
796
|
+
SampleRate2.RATE_44100
|
|
797
|
+
],
|
|
798
|
+
[
|
|
799
|
+
"RATE_48000",
|
|
800
|
+
SampleRate2.RATE_48000
|
|
801
|
+
]
|
|
802
|
+
]);
|
|
803
|
+
const ENUM_TO_NAME = /* @__PURE__ */ new Map([
|
|
804
|
+
[
|
|
805
|
+
SampleRate2.RATE_8000,
|
|
806
|
+
"RATE_8000"
|
|
807
|
+
],
|
|
808
|
+
[
|
|
809
|
+
SampleRate2.RATE_16000,
|
|
810
|
+
"RATE_16000"
|
|
811
|
+
],
|
|
812
|
+
[
|
|
813
|
+
SampleRate2.RATE_22050,
|
|
814
|
+
"RATE_22050"
|
|
815
|
+
],
|
|
816
|
+
[
|
|
817
|
+
SampleRate2.RATE_24000,
|
|
818
|
+
"RATE_24000"
|
|
819
|
+
],
|
|
820
|
+
[
|
|
821
|
+
SampleRate2.RATE_32000,
|
|
822
|
+
"RATE_32000"
|
|
823
|
+
],
|
|
824
|
+
[
|
|
825
|
+
SampleRate2.RATE_44100,
|
|
826
|
+
"RATE_44100"
|
|
827
|
+
],
|
|
828
|
+
[
|
|
829
|
+
SampleRate2.RATE_48000,
|
|
830
|
+
"RATE_48000"
|
|
831
|
+
]
|
|
832
|
+
]);
|
|
833
|
+
function fromHz(hz) {
|
|
834
|
+
return HZ_TO_ENUM.get(hz);
|
|
835
|
+
}
|
|
836
|
+
__name(fromHz, "fromHz");
|
|
837
|
+
SampleRate2.fromHz = fromHz;
|
|
838
|
+
function fromName(nameStr) {
|
|
839
|
+
return NAME_TO_ENUM.get(nameStr.toUpperCase());
|
|
840
|
+
}
|
|
841
|
+
__name(fromName, "fromName");
|
|
842
|
+
SampleRate2.fromName = fromName;
|
|
843
|
+
function toHz(rate) {
|
|
844
|
+
return rate;
|
|
845
|
+
}
|
|
846
|
+
__name(toHz, "toHz");
|
|
847
|
+
SampleRate2.toHz = toHz;
|
|
848
|
+
function toName(rate) {
|
|
849
|
+
return ENUM_TO_NAME.get(rate) ?? SampleRate2[rate];
|
|
850
|
+
}
|
|
851
|
+
__name(toName, "toName");
|
|
852
|
+
SampleRate2.toName = toName;
|
|
853
|
+
function isHzValid(hz) {
|
|
854
|
+
return HZ_TO_ENUM.has(hz);
|
|
855
|
+
}
|
|
856
|
+
__name(isHzValid, "isHzValid");
|
|
857
|
+
SampleRate2.isHzValid = isHzValid;
|
|
858
|
+
function isNameValid(nameStr) {
|
|
859
|
+
return NAME_TO_ENUM.has(nameStr.toUpperCase());
|
|
860
|
+
}
|
|
861
|
+
__name(isNameValid, "isNameValid");
|
|
862
|
+
SampleRate2.isNameValid = isNameValid;
|
|
863
|
+
})(SampleRate || (SampleRate = {}));
|
|
864
|
+
var Language;
|
|
865
|
+
(function(Language2) {
|
|
866
|
+
Language2["ENGLISH_US"] = "en-US";
|
|
867
|
+
Language2["ENGLISH_GB"] = "en-GB";
|
|
868
|
+
Language2["SPANISH_ES"] = "es-ES";
|
|
869
|
+
Language2["SPANISH_MX"] = "es-MX";
|
|
870
|
+
Language2["FRENCH_FR"] = "fr-FR";
|
|
871
|
+
Language2["GERMAN_DE"] = "de-DE";
|
|
872
|
+
Language2["ITALIAN_IT"] = "it-IT";
|
|
873
|
+
Language2["PORTUGUESE_BR"] = "pt-BR";
|
|
874
|
+
Language2["JAPANESE_JP"] = "ja-JP";
|
|
875
|
+
Language2["KOREAN_KR"] = "ko-KR";
|
|
876
|
+
Language2["CHINESE_CN"] = "zh-CN";
|
|
877
|
+
Language2["CHINESE_TW"] = "zh-TW";
|
|
878
|
+
})(Language || (Language = {}));
|
|
879
|
+
|
|
880
|
+
// ../../libs/types/dist/plumbing-types.js
|
|
881
|
+
var PlumbingType;
|
|
882
|
+
(function(PlumbingType2) {
|
|
883
|
+
PlumbingType2["AUDIO"] = "audio";
|
|
884
|
+
PlumbingType2["CONTROL"] = "control";
|
|
885
|
+
PlumbingType2["RESULT"] = "result";
|
|
886
|
+
PlumbingType2["RECOGNITION_CONTEXT"] = "recognition_context";
|
|
887
|
+
PlumbingType2["PROVIDER_TRANSCRIPT_RESULT"] = "provider_transcript_result";
|
|
888
|
+
PlumbingType2["PROVIDER_METADATA_RESULT"] = "provider_metadata_result";
|
|
889
|
+
PlumbingType2["PROVIDER_ERROR_RESULT"] = "provider_error_result";
|
|
890
|
+
PlumbingType2["PROVIDER_VAD_SIGNAL"] = "provider_vad_signal";
|
|
891
|
+
PlumbingType2["PROVIDER_TIMER_SIGNAL"] = "provider_timer_signal";
|
|
892
|
+
PlumbingType2["PROVIDER_RAW_MESSAGE"] = "provider_raw_message";
|
|
893
|
+
})(PlumbingType || (PlumbingType = {}));
|
|
894
|
+
|
|
895
|
+
// ../../libs/types/dist/game-id.types.js
|
|
896
|
+
var GameId;
|
|
897
|
+
(function(GameId2) {
|
|
898
|
+
GameId2["UNKNOWN"] = "unknown";
|
|
899
|
+
GameId2["HUB"] = "hub";
|
|
900
|
+
GameId2["JEOPARDY"] = "jeopardy";
|
|
901
|
+
GameId2["WHEEL_OF_FORTUNE"] = "wheel-of-fortune";
|
|
902
|
+
GameId2["SONG_QUIZ"] = "song-quiz";
|
|
903
|
+
GameId2["KARAOKE"] = "karaoke";
|
|
904
|
+
GameId2["TWENTY_QUESTIONS"] = "twenty-questions";
|
|
905
|
+
GameId2["GUESS_THE_EMOJI"] = "emoji";
|
|
906
|
+
})(GameId || (GameId = {}));
|
|
907
|
+
({
|
|
908
|
+
"unknown": GameId.UNKNOWN,
|
|
909
|
+
"hub": GameId.HUB,
|
|
910
|
+
"jeopardy": GameId.JEOPARDY,
|
|
911
|
+
"wheel-of-fortune": GameId.WHEEL_OF_FORTUNE,
|
|
912
|
+
"song-quiz": GameId.SONG_QUIZ,
|
|
913
|
+
"karaoke": GameId.KARAOKE,
|
|
914
|
+
"twenty-questions": GameId.TWENTY_QUESTIONS,
|
|
915
|
+
"emoji": GameId.GUESS_THE_EMOJI
|
|
916
|
+
});
|
|
917
|
+
|
|
918
|
+
// ../../libs/types/dist/gemini-types.js
|
|
919
|
+
var GeminiModel;
|
|
920
|
+
(function(GeminiModel2) {
|
|
921
|
+
GeminiModel2["GEMINI_2_5_PRO"] = "gemini-2.5-pro";
|
|
922
|
+
GeminiModel2["GEMINI_2_5_FLASH"] = "gemini-2.5-flash";
|
|
923
|
+
GeminiModel2["GEMINI_2_5_FLASH_LITE"] = "gemini-2.5-flash-lite";
|
|
924
|
+
GeminiModel2["GEMINI_2_0_FLASH_LATEST"] = "gemini-2.0-flash-latest";
|
|
925
|
+
GeminiModel2["GEMINI_2_0_FLASH"] = "gemini-2.0-flash-002";
|
|
926
|
+
GeminiModel2["GEMINI_2_0_FLASH_EXP"] = "gemini-2.0-flash-exp";
|
|
927
|
+
GeminiModel2["GEMINI_1_5_FLASH"] = "gemini-1.5-flash";
|
|
928
|
+
GeminiModel2["GEMINI_1_5_PRO"] = "gemini-1.5-pro";
|
|
929
|
+
})(GeminiModel || (GeminiModel = {}));
|
|
930
|
+
var GeminiApiVersion;
|
|
931
|
+
(function(GeminiApiVersion2) {
|
|
932
|
+
GeminiApiVersion2["V1"] = "v1";
|
|
933
|
+
GeminiApiVersion2["V1BETA"] = "v1beta";
|
|
934
|
+
})(GeminiApiVersion || (GeminiApiVersion = {}));
|
|
935
|
+
|
|
936
|
+
// ../../libs/types/dist/openai-types.js
|
|
937
|
+
var OpenAIModel;
|
|
938
|
+
(function(OpenAIModel2) {
|
|
939
|
+
OpenAIModel2["WHISPER_1"] = "whisper-1";
|
|
940
|
+
})(OpenAIModel || (OpenAIModel = {}));
|
|
941
|
+
|
|
942
|
+
// ../../libs/types/dist/conductor.types.js
|
|
943
|
+
var StatsIncrementType;
|
|
944
|
+
(function(StatsIncrementType2) {
|
|
945
|
+
StatsIncrementType2["CALL"] = "call";
|
|
946
|
+
StatsIncrementType2["SUCCESS"] = "success";
|
|
947
|
+
StatsIncrementType2["FAIL"] = "fail";
|
|
948
|
+
})(StatsIncrementType || (StatsIncrementType = {}));
|
|
949
|
+
|
|
950
|
+
// ../../libs/types/dist/stages.types.js
|
|
951
|
+
var STAGES = {
|
|
952
|
+
LOCAL: "local",
|
|
953
|
+
DEV: "dev",
|
|
954
|
+
STAGING: "staging",
|
|
955
|
+
PRODUCTION: "production"
|
|
956
|
+
};
|
|
957
|
+
|
|
958
|
+
// ../../libs/websocket/dist/core/audio-upload-websocket-protocol.js
|
|
959
|
+
var WebSocketCloseCode;
|
|
960
|
+
(function(WebSocketCloseCode2) {
|
|
961
|
+
WebSocketCloseCode2[WebSocketCloseCode2["NORMAL_CLOSURE"] = 1e3] = "NORMAL_CLOSURE";
|
|
962
|
+
WebSocketCloseCode2[WebSocketCloseCode2["GOING_AWAY"] = 1001] = "GOING_AWAY";
|
|
963
|
+
WebSocketCloseCode2[WebSocketCloseCode2["PROTOCOL_ERROR"] = 1002] = "PROTOCOL_ERROR";
|
|
964
|
+
WebSocketCloseCode2[WebSocketCloseCode2["UNSUPPORTED_DATA"] = 1003] = "UNSUPPORTED_DATA";
|
|
965
|
+
WebSocketCloseCode2[WebSocketCloseCode2["RESERVED_1004"] = 1004] = "RESERVED_1004";
|
|
966
|
+
WebSocketCloseCode2[WebSocketCloseCode2["NO_STATUS_RECEIVED"] = 1005] = "NO_STATUS_RECEIVED";
|
|
967
|
+
WebSocketCloseCode2[WebSocketCloseCode2["ABNORMAL_CLOSURE"] = 1006] = "ABNORMAL_CLOSURE";
|
|
968
|
+
WebSocketCloseCode2[WebSocketCloseCode2["INVALID_FRAME_PAYLOAD"] = 1007] = "INVALID_FRAME_PAYLOAD";
|
|
969
|
+
WebSocketCloseCode2[WebSocketCloseCode2["POLICY_VIOLATION"] = 1008] = "POLICY_VIOLATION";
|
|
970
|
+
WebSocketCloseCode2[WebSocketCloseCode2["MESSAGE_TOO_BIG"] = 1009] = "MESSAGE_TOO_BIG";
|
|
971
|
+
WebSocketCloseCode2[WebSocketCloseCode2["MANDATORY_EXTENSION"] = 1010] = "MANDATORY_EXTENSION";
|
|
972
|
+
WebSocketCloseCode2[WebSocketCloseCode2["INTERNAL_SERVER_ERROR"] = 1011] = "INTERNAL_SERVER_ERROR";
|
|
973
|
+
WebSocketCloseCode2[WebSocketCloseCode2["SERVICE_RESTART"] = 1012] = "SERVICE_RESTART";
|
|
974
|
+
WebSocketCloseCode2[WebSocketCloseCode2["TRY_AGAIN_LATER"] = 1013] = "TRY_AGAIN_LATER";
|
|
975
|
+
WebSocketCloseCode2[WebSocketCloseCode2["BAD_GATEWAY"] = 1014] = "BAD_GATEWAY";
|
|
976
|
+
WebSocketCloseCode2[WebSocketCloseCode2["TLS_HANDSHAKE"] = 1015] = "TLS_HANDSHAKE";
|
|
977
|
+
WebSocketCloseCode2[WebSocketCloseCode2["AUTH_REQUIRED"] = 4e3] = "AUTH_REQUIRED";
|
|
978
|
+
WebSocketCloseCode2[WebSocketCloseCode2["AUTH_FAILED"] = 4001] = "AUTH_FAILED";
|
|
979
|
+
WebSocketCloseCode2[WebSocketCloseCode2["RATE_LIMIT_EXCEEDED"] = 4002] = "RATE_LIMIT_EXCEEDED";
|
|
980
|
+
WebSocketCloseCode2[WebSocketCloseCode2["INVALID_SESSION"] = 4003] = "INVALID_SESSION";
|
|
981
|
+
WebSocketCloseCode2[WebSocketCloseCode2["SESSION_EXPIRED"] = 4004] = "SESSION_EXPIRED";
|
|
982
|
+
})(WebSocketCloseCode || (WebSocketCloseCode = {}));
|
|
983
|
+
function packHeader(h, payload, versionSerializer) {
|
|
984
|
+
if (h.e < 0 || h.e > 255 || !Number.isInteger(h.e)) {
|
|
985
|
+
throw new Error(`Encoding ID must be an integer between 0-255, got: ${h.e}`);
|
|
986
|
+
}
|
|
987
|
+
if (h.sr < 0 || h.sr > 65535 || !Number.isInteger(h.sr)) {
|
|
988
|
+
throw new Error(`Sample rate must be an integer between 0-65535, got: ${h.sr}`);
|
|
989
|
+
}
|
|
990
|
+
if (h.seq < 0 || h.seq > 4294967295 || !Number.isInteger(h.seq)) {
|
|
991
|
+
throw new Error(`Sequence number must be an integer between 0-4294967295, got: ${h.seq}`);
|
|
992
|
+
}
|
|
993
|
+
const out = new ArrayBuffer(8 + payload.byteLength);
|
|
994
|
+
const dv = new DataView(out);
|
|
995
|
+
dv.setUint8(0, versionSerializer(h.v));
|
|
996
|
+
dv.setUint8(1, h.e);
|
|
997
|
+
dv.setUint16(2, h.sr, true);
|
|
998
|
+
dv.setUint32(4, h.seq, true);
|
|
999
|
+
new Uint8Array(out, 8).set(new Uint8Array(payload));
|
|
1000
|
+
return out;
|
|
1001
|
+
}
|
|
1002
|
+
__name(packHeader, "packHeader");
|
|
1003
|
+
var numericVersionSerializer = {
|
|
1004
|
+
serialize: /* @__PURE__ */ __name((v) => v, "serialize"),
|
|
1005
|
+
deserialize: /* @__PURE__ */ __name((byte) => byte, "deserialize")
|
|
1006
|
+
};
|
|
1007
|
+
|
|
1008
|
+
// ../../libs/websocket/dist/core/audio-upload-websocket-client.js
|
|
1009
|
+
var WebSocketAudioClient = class {
|
|
1010
|
+
static {
|
|
1011
|
+
__name(this, "WebSocketAudioClient");
|
|
1012
|
+
}
|
|
1013
|
+
constructor(cfg, versionSerializer = numericVersionSerializer) {
|
|
1014
|
+
this.cfg = cfg;
|
|
1015
|
+
this.versionSerializer = versionSerializer;
|
|
1016
|
+
this.seq = 0;
|
|
1017
|
+
this.HWM = cfg.highWM ?? 512e3;
|
|
1018
|
+
this.LWM = cfg.lowWM ?? 128e3;
|
|
1019
|
+
}
|
|
1020
|
+
// ========================================================================
|
|
1021
|
+
// OPTIONAL HOOKS - Override if needed
|
|
1022
|
+
// ========================================================================
|
|
1023
|
+
/**
|
|
1024
|
+
* Hook: Called when downward message arrives from server
|
|
1025
|
+
* Override this to handle messages (optional - default does nothing)
|
|
1026
|
+
*/
|
|
1027
|
+
onMessage(msg) {
|
|
1028
|
+
}
|
|
1029
|
+
// ========================================================================
|
|
1030
|
+
// PUBLIC API
|
|
1031
|
+
// ========================================================================
|
|
1032
|
+
connect() {
|
|
1033
|
+
this.ws = new WebSocket(this.cfg.url);
|
|
1034
|
+
this.ws.binaryType = "arraybuffer";
|
|
1035
|
+
this.ws.onopen = () => {
|
|
1036
|
+
this.onConnected();
|
|
1037
|
+
};
|
|
1038
|
+
this.ws.onclose = (event) => {
|
|
1039
|
+
this.onDisconnected(event.code, event.reason);
|
|
1040
|
+
};
|
|
1041
|
+
this.ws.onerror = (error) => {
|
|
1042
|
+
this.onError(error);
|
|
1043
|
+
};
|
|
1044
|
+
this.ws.onmessage = (ev) => {
|
|
1045
|
+
if (typeof ev.data === "string") {
|
|
1046
|
+
const msg = JSON.parse(ev.data);
|
|
1047
|
+
this.onMessage(msg);
|
|
1048
|
+
}
|
|
1049
|
+
};
|
|
1050
|
+
}
|
|
1051
|
+
/**
|
|
1052
|
+
* Send JSON message to server
|
|
1053
|
+
* @param version - Message version
|
|
1054
|
+
* @param type - Message type (developer defined)
|
|
1055
|
+
* @param data - Message payload (typed)
|
|
1056
|
+
*/
|
|
1057
|
+
sendMessage(version, type, data) {
|
|
1058
|
+
if (this.ws?.readyState === WebSocket.OPEN) {
|
|
1059
|
+
this.ws.send(JSON.stringify({
|
|
1060
|
+
v: version,
|
|
1061
|
+
type,
|
|
1062
|
+
data
|
|
1063
|
+
}));
|
|
1064
|
+
}
|
|
1065
|
+
}
|
|
1066
|
+
/**
|
|
1067
|
+
* Send audio frame with specified encoding and sample rate
|
|
1068
|
+
* @param audioData - Audio data (any format: Int16Array, Uint8Array, ArrayBuffer, etc.)
|
|
1069
|
+
* @param version - Audio frame version
|
|
1070
|
+
* @param encodingId - Audio encoding ID (0-5, e.g., AudioEncoding.LINEAR16)
|
|
1071
|
+
* @param sampleRate - Sample rate in Hz (e.g., 16000)
|
|
1072
|
+
*/
|
|
1073
|
+
sendAudio(audioData, version, encodingId, sampleRate) {
|
|
1074
|
+
if (!this.canSend()) return;
|
|
1075
|
+
const buffer = ArrayBuffer.isView(audioData) ? audioData.buffer.slice(audioData.byteOffset, audioData.byteOffset + audioData.byteLength) : audioData;
|
|
1076
|
+
const hdr = {
|
|
1077
|
+
v: version,
|
|
1078
|
+
e: encodingId,
|
|
1079
|
+
sr: sampleRate,
|
|
1080
|
+
seq: this.seq++
|
|
1081
|
+
};
|
|
1082
|
+
this.ws.send(packHeader(hdr, buffer, this.versionSerializer.serialize));
|
|
1083
|
+
}
|
|
1084
|
+
/**
|
|
1085
|
+
* Get current WebSocket buffer size
|
|
1086
|
+
*/
|
|
1087
|
+
getBufferedAmount() {
|
|
1088
|
+
return this.ws?.bufferedAmount ?? 0;
|
|
1089
|
+
}
|
|
1090
|
+
/**
|
|
1091
|
+
* Check if local buffer is backpressured
|
|
1092
|
+
*/
|
|
1093
|
+
isLocalBackpressured() {
|
|
1094
|
+
return this.getBufferedAmount() > this.HWM;
|
|
1095
|
+
}
|
|
1096
|
+
/**
|
|
1097
|
+
* Check if ready to send audio
|
|
1098
|
+
* Verifies: connection open, no local buffer pressure
|
|
1099
|
+
*/
|
|
1100
|
+
canSend() {
|
|
1101
|
+
return this.ws?.readyState === WebSocket.OPEN && !this.isLocalBackpressured();
|
|
1102
|
+
}
|
|
1103
|
+
/**
|
|
1104
|
+
* Check if connection is open
|
|
1105
|
+
*/
|
|
1106
|
+
isOpen() {
|
|
1107
|
+
return this.ws?.readyState === WebSocket.OPEN;
|
|
1108
|
+
}
|
|
1109
|
+
/**
|
|
1110
|
+
* Get current connection state
|
|
1111
|
+
*/
|
|
1112
|
+
getReadyState() {
|
|
1113
|
+
return this.ws?.readyState ?? WebSocket.CLOSED;
|
|
1114
|
+
}
|
|
1115
|
+
};
|
|
1116
|
+
|
|
1117
|
+
// src/recognition-client.types.ts
|
|
1118
|
+
var ClientState = /* @__PURE__ */ (function(ClientState2) {
|
|
1119
|
+
ClientState2["INITIAL"] = "initial";
|
|
1120
|
+
ClientState2["CONNECTING"] = "connecting";
|
|
1121
|
+
ClientState2["CONNECTED"] = "connected";
|
|
1122
|
+
ClientState2["READY"] = "ready";
|
|
1123
|
+
ClientState2["STOPPING"] = "stopping";
|
|
1124
|
+
ClientState2["STOPPED"] = "stopped";
|
|
1125
|
+
ClientState2["FAILED"] = "failed";
|
|
1126
|
+
return ClientState2;
|
|
1127
|
+
})({});
|
|
1128
|
+
|
|
1129
|
+
// ../../libs/config/dist/service-endpoints.js
|
|
1130
|
+
var KNOWN_STAGE_VALUES = Object.values(STAGES);
|
|
1131
|
+
var RECOGNITION_SERVICE_HOST_BY_STAGE = {
|
|
1132
|
+
[STAGES.LOCAL]: "localhost:3101",
|
|
1133
|
+
[STAGES.DEV]: "recognition-service-dev.volley-services.net",
|
|
1134
|
+
[STAGES.STAGING]: "recognition-service-staging.volley-services.net",
|
|
1135
|
+
[STAGES.PRODUCTION]: "recognition-service.volley-services.net"
|
|
1136
|
+
};
|
|
1137
|
+
var RECOGNITION_CONDUCTOR_HOST_BY_STAGE = {
|
|
1138
|
+
[STAGES.LOCAL]: "localhost:3100",
|
|
1139
|
+
[STAGES.DEV]: "recognition-conductor-dev.volley-services.net",
|
|
1140
|
+
[STAGES.STAGING]: "recognition-conductor-staging.volley-services.net",
|
|
1141
|
+
[STAGES.PRODUCTION]: "recognition-conductor.volley-services.net"
|
|
1142
|
+
};
|
|
1143
|
+
var getProtocolsForStage = /* @__PURE__ */ __name((stage) => {
|
|
1144
|
+
if (stage === STAGES.LOCAL) {
|
|
1145
|
+
return {
|
|
1146
|
+
http: "http",
|
|
1147
|
+
ws: "ws"
|
|
1148
|
+
};
|
|
1149
|
+
}
|
|
1150
|
+
return {
|
|
1151
|
+
http: "https",
|
|
1152
|
+
ws: "wss"
|
|
1153
|
+
};
|
|
1154
|
+
}, "getProtocolsForStage");
|
|
1155
|
+
var buildBaseUrls = /* @__PURE__ */ __name((stage, host) => {
|
|
1156
|
+
const { http, ws } = getProtocolsForStage(stage);
|
|
1157
|
+
return {
|
|
1158
|
+
httpBase: `${http}://${host}`,
|
|
1159
|
+
wsBase: `${ws}://${host}`
|
|
1160
|
+
};
|
|
1161
|
+
}, "buildBaseUrls");
|
|
1162
|
+
var RECOGNITION_SERVICE_BASES = KNOWN_STAGE_VALUES.reduce((acc, stage) => {
|
|
1163
|
+
acc[stage] = buildBaseUrls(stage, RECOGNITION_SERVICE_HOST_BY_STAGE[stage]);
|
|
1164
|
+
return acc;
|
|
1165
|
+
}, {});
|
|
1166
|
+
var RECOGNITION_CONDUCTOR_BASES = KNOWN_STAGE_VALUES.reduce((acc, stage) => {
|
|
1167
|
+
acc[stage] = buildBaseUrls(stage, RECOGNITION_CONDUCTOR_HOST_BY_STAGE[stage]);
|
|
1168
|
+
return acc;
|
|
1169
|
+
}, {});
|
|
1170
|
+
function normalizeStage(input) {
|
|
1171
|
+
if (typeof input !== "string") {
|
|
1172
|
+
return STAGES.LOCAL;
|
|
1173
|
+
}
|
|
1174
|
+
const normalized = input.trim().toLowerCase();
|
|
1175
|
+
const match = KNOWN_STAGE_VALUES.find((stage) => stage === normalized);
|
|
1176
|
+
return match ?? STAGES.LOCAL;
|
|
1177
|
+
}
|
|
1178
|
+
__name(normalizeStage, "normalizeStage");
|
|
1179
|
+
function getRecognitionServiceBase(stage) {
|
|
1180
|
+
const normalizedStage = normalizeStage(stage);
|
|
1181
|
+
return RECOGNITION_SERVICE_BASES[normalizedStage];
|
|
1182
|
+
}
|
|
1183
|
+
__name(getRecognitionServiceBase, "getRecognitionServiceBase");
|
|
1184
|
+
function getRecognitionServiceHttpBase(stage) {
|
|
1185
|
+
return getRecognitionServiceBase(stage).httpBase;
|
|
1186
|
+
}
|
|
1187
|
+
__name(getRecognitionServiceHttpBase, "getRecognitionServiceHttpBase");
|
|
1188
|
+
function getRecognitionServiceWsBase(stage) {
|
|
1189
|
+
return getRecognitionServiceBase(stage).wsBase;
|
|
1190
|
+
}
|
|
1191
|
+
__name(getRecognitionServiceWsBase, "getRecognitionServiceWsBase");
|
|
1192
|
+
function getRecognitionServiceHost(stage) {
|
|
1193
|
+
const normalizedStage = normalizeStage(stage);
|
|
1194
|
+
return RECOGNITION_SERVICE_HOST_BY_STAGE[normalizedStage];
|
|
1195
|
+
}
|
|
1196
|
+
__name(getRecognitionServiceHost, "getRecognitionServiceHost");
|
|
1197
|
+
function getRecognitionConductorBase(stage) {
|
|
1198
|
+
const normalizedStage = normalizeStage(stage);
|
|
1199
|
+
return RECOGNITION_CONDUCTOR_BASES[normalizedStage];
|
|
1200
|
+
}
|
|
1201
|
+
__name(getRecognitionConductorBase, "getRecognitionConductorBase");
|
|
1202
|
+
function getRecognitionConductorHttpBase(stage) {
|
|
1203
|
+
return getRecognitionConductorBase(stage).httpBase;
|
|
1204
|
+
}
|
|
1205
|
+
__name(getRecognitionConductorHttpBase, "getRecognitionConductorHttpBase");
|
|
1206
|
+
function getRecognitionConductorWsBase(stage) {
|
|
1207
|
+
return getRecognitionConductorBase(stage).wsBase;
|
|
1208
|
+
}
|
|
1209
|
+
__name(getRecognitionConductorWsBase, "getRecognitionConductorWsBase");
|
|
1210
|
+
function getRecognitionConductorHost(stage) {
|
|
1211
|
+
const normalizedStage = normalizeStage(stage);
|
|
1212
|
+
return RECOGNITION_CONDUCTOR_HOST_BY_STAGE[normalizedStage];
|
|
1213
|
+
}
|
|
1214
|
+
__name(getRecognitionConductorHost, "getRecognitionConductorHost");
|
|
1215
|
+
|
|
1216
|
+
// src/utils/url-builder.ts
|
|
1217
|
+
function buildWebSocketUrl(config) {
|
|
1218
|
+
const defaultBase = getRecognitionServiceBase("production");
|
|
1219
|
+
const baseUrl = config.url || `${defaultBase.wsBase}/ws/v1/recognize`;
|
|
1220
|
+
const url = new URL(baseUrl);
|
|
1221
|
+
url.searchParams.set("audioUtteranceId", config.audioUtteranceId);
|
|
1222
|
+
if (config.callbackUrls && config.callbackUrls.length > 0) {
|
|
1223
|
+
url.searchParams.set("callbackUrls", JSON.stringify(config.callbackUrls));
|
|
1224
|
+
}
|
|
1225
|
+
if (config.userId) {
|
|
1226
|
+
url.searchParams.set("userId", config.userId);
|
|
1227
|
+
}
|
|
1228
|
+
if (config.gameSessionId) {
|
|
1229
|
+
url.searchParams.set("gameSessionId", config.gameSessionId);
|
|
1230
|
+
}
|
|
1231
|
+
if (config.deviceId) {
|
|
1232
|
+
url.searchParams.set("deviceId", config.deviceId);
|
|
1233
|
+
}
|
|
1234
|
+
if (config.accountId) {
|
|
1235
|
+
url.searchParams.set("accountId", config.accountId);
|
|
1236
|
+
}
|
|
1237
|
+
if (config.questionAnswerId) {
|
|
1238
|
+
url.searchParams.set("questionAnswerId", config.questionAnswerId);
|
|
1239
|
+
}
|
|
1240
|
+
if (config.platform) {
|
|
1241
|
+
url.searchParams.set("platform", config.platform);
|
|
1242
|
+
}
|
|
1243
|
+
if (config.gameContext) {
|
|
1244
|
+
url.searchParams.set("gameId", config.gameContext.gameId);
|
|
1245
|
+
url.searchParams.set("gamePhase", config.gameContext.gamePhase);
|
|
1246
|
+
}
|
|
1247
|
+
return url.toString();
|
|
1248
|
+
}
|
|
1249
|
+
__name(buildWebSocketUrl, "buildWebSocketUrl");
|
|
1250
|
+
|
|
1251
|
+
// src/utils/audio-ring-buffer.ts
|
|
1252
|
+
var AudioRingBuffer = class {
|
|
1253
|
+
static {
|
|
1254
|
+
__name(this, "AudioRingBuffer");
|
|
1255
|
+
}
|
|
1256
|
+
buffer = [];
|
|
1257
|
+
bufferSize;
|
|
1258
|
+
writeIndex = 0;
|
|
1259
|
+
readIndex = 0;
|
|
1260
|
+
hasWrapped = false;
|
|
1261
|
+
totalBufferedBytes = 0;
|
|
1262
|
+
overflowCount = 0;
|
|
1263
|
+
chunksBuffered = 0;
|
|
1264
|
+
logger;
|
|
1265
|
+
constructor(config) {
|
|
1266
|
+
this.bufferSize = config.maxBufferDurationSec * config.chunksPerSecond;
|
|
1267
|
+
this.buffer = new Array(this.bufferSize);
|
|
1268
|
+
if (config.logger) {
|
|
1269
|
+
this.logger = config.logger;
|
|
1270
|
+
}
|
|
1271
|
+
}
|
|
1272
|
+
/**
|
|
1273
|
+
* Write audio chunk to ring buffer with overflow detection
|
|
1274
|
+
*/
|
|
1275
|
+
write(audioData) {
|
|
1276
|
+
const bytes = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
|
|
1277
|
+
this.buffer[this.writeIndex] = {
|
|
1278
|
+
data: audioData,
|
|
1279
|
+
timestamp: Date.now()
|
|
1280
|
+
};
|
|
1281
|
+
const nextWriteIndex = (this.writeIndex + 1) % this.bufferSize;
|
|
1282
|
+
if (nextWriteIndex === this.readIndex && this.writeIndex !== this.readIndex) {
|
|
1283
|
+
this.hasWrapped = true;
|
|
1284
|
+
this.overflowCount++;
|
|
1285
|
+
if (this.logger) {
|
|
1286
|
+
this.logger("debug", "Buffer overflow detected", {
|
|
1287
|
+
bufferSize: this.bufferSize,
|
|
1288
|
+
totalOverflows: this.overflowCount,
|
|
1289
|
+
droppedChunk: this.buffer[this.readIndex]?.timestamp
|
|
1290
|
+
});
|
|
1291
|
+
}
|
|
1292
|
+
this.readIndex = (this.readIndex + 1) % this.bufferSize;
|
|
1293
|
+
}
|
|
1294
|
+
this.writeIndex = nextWriteIndex;
|
|
1295
|
+
this.chunksBuffered++;
|
|
1296
|
+
this.totalBufferedBytes += bytes;
|
|
1297
|
+
}
|
|
1298
|
+
/**
|
|
1299
|
+
* Read next chunk from buffer
|
|
1300
|
+
*/
|
|
1301
|
+
read() {
|
|
1302
|
+
if (this.isEmpty()) {
|
|
1303
|
+
return null;
|
|
1304
|
+
}
|
|
1305
|
+
const chunk = this.buffer[this.readIndex];
|
|
1306
|
+
this.readIndex = (this.readIndex + 1) % this.bufferSize;
|
|
1307
|
+
return chunk || null;
|
|
1308
|
+
}
|
|
1309
|
+
/**
|
|
1310
|
+
* Read all buffered chunks without removing them
|
|
1311
|
+
*/
|
|
1312
|
+
readAll() {
|
|
1313
|
+
const chunks = [];
|
|
1314
|
+
let index = this.readIndex;
|
|
1315
|
+
while (index !== this.writeIndex) {
|
|
1316
|
+
const chunk = this.buffer[index];
|
|
1317
|
+
if (chunk) {
|
|
1318
|
+
chunks.push(chunk);
|
|
1319
|
+
}
|
|
1320
|
+
index = (index + 1) % this.bufferSize;
|
|
1321
|
+
}
|
|
1322
|
+
return chunks;
|
|
1323
|
+
}
|
|
1324
|
+
/**
|
|
1325
|
+
* Flush all buffered data and advance read pointer
|
|
1326
|
+
*/
|
|
1327
|
+
flush() {
|
|
1328
|
+
const chunks = this.readAll();
|
|
1329
|
+
this.readIndex = this.writeIndex;
|
|
1330
|
+
return chunks;
|
|
1331
|
+
}
|
|
1332
|
+
/**
|
|
1333
|
+
* Get count of buffered chunks
|
|
1334
|
+
*/
|
|
1335
|
+
getBufferedCount() {
|
|
1336
|
+
if (this.writeIndex >= this.readIndex) {
|
|
1337
|
+
return this.writeIndex - this.readIndex;
|
|
1338
|
+
} else {
|
|
1339
|
+
return this.bufferSize - this.readIndex + this.writeIndex;
|
|
1340
|
+
}
|
|
1341
|
+
}
|
|
1342
|
+
/**
|
|
1343
|
+
* Check if buffer is empty
|
|
1344
|
+
*/
|
|
1345
|
+
isEmpty() {
|
|
1346
|
+
return this.readIndex === this.writeIndex;
|
|
1347
|
+
}
|
|
1348
|
+
/**
|
|
1349
|
+
* Check if buffer has overflowed
|
|
1350
|
+
*/
|
|
1351
|
+
isOverflowing() {
|
|
1352
|
+
return this.hasWrapped;
|
|
1353
|
+
}
|
|
1354
|
+
/**
|
|
1355
|
+
* Clear the buffer and reset all counters
|
|
1356
|
+
* Frees memory by releasing all stored audio chunks
|
|
1357
|
+
*/
|
|
1358
|
+
clear() {
|
|
1359
|
+
this.buffer = [];
|
|
1360
|
+
this.writeIndex = 0;
|
|
1361
|
+
this.readIndex = 0;
|
|
1362
|
+
this.hasWrapped = false;
|
|
1363
|
+
this.overflowCount = 0;
|
|
1364
|
+
this.chunksBuffered = 0;
|
|
1365
|
+
this.totalBufferedBytes = 0;
|
|
1366
|
+
if (this.logger) {
|
|
1367
|
+
this.logger("debug", "Audio buffer cleared");
|
|
1368
|
+
}
|
|
1369
|
+
}
|
|
1370
|
+
/**
|
|
1371
|
+
* Get buffer statistics
|
|
1372
|
+
*/
|
|
1373
|
+
getStats() {
|
|
1374
|
+
return {
|
|
1375
|
+
chunksBuffered: this.chunksBuffered,
|
|
1376
|
+
currentBufferedChunks: this.getBufferedCount(),
|
|
1377
|
+
overflowCount: this.overflowCount,
|
|
1378
|
+
hasWrapped: this.hasWrapped,
|
|
1379
|
+
totalBufferedBytes: this.totalBufferedBytes
|
|
1380
|
+
};
|
|
1381
|
+
}
|
|
1382
|
+
};
|
|
1383
|
+
|
|
1384
|
+
// src/utils/message-handler.ts
|
|
1385
|
+
var MessageHandler = class {
|
|
1386
|
+
static {
|
|
1387
|
+
__name(this, "MessageHandler");
|
|
1388
|
+
}
|
|
1389
|
+
firstTranscriptTime = null;
|
|
1390
|
+
sessionStartTime = null;
|
|
1391
|
+
callbacks;
|
|
1392
|
+
constructor(callbacks) {
|
|
1393
|
+
this.callbacks = callbacks;
|
|
1394
|
+
}
|
|
1395
|
+
/**
|
|
1396
|
+
* Set session start time for performance tracking
|
|
1397
|
+
*/
|
|
1398
|
+
setSessionStartTime(time) {
|
|
1399
|
+
this.sessionStartTime = time;
|
|
1400
|
+
}
|
|
1401
|
+
/**
|
|
1402
|
+
* Handle incoming WebSocket message
|
|
1403
|
+
*/
|
|
1404
|
+
handleMessage(msg) {
|
|
1405
|
+
if (this.callbacks.logger) {
|
|
1406
|
+
this.callbacks.logger("debug", "Received WebSocket message", {
|
|
1407
|
+
msgType: msg.type,
|
|
1408
|
+
msgDataType: msg.data && typeof msg.data === "object" && "type" in msg.data ? msg.data.type : "N/A",
|
|
1409
|
+
fullMessage: msg
|
|
1410
|
+
});
|
|
1411
|
+
}
|
|
1412
|
+
if (msg.data && typeof msg.data !== "object") {
|
|
1413
|
+
if (this.callbacks.logger) {
|
|
1414
|
+
this.callbacks.logger("error", "Received primitive msg.data from server", {
|
|
1415
|
+
dataType: typeof msg.data,
|
|
1416
|
+
data: msg.data,
|
|
1417
|
+
fullMessage: msg
|
|
1418
|
+
});
|
|
1419
|
+
}
|
|
1420
|
+
}
|
|
1421
|
+
const msgType = (msg.data && typeof msg.data === "object" && "type" in msg.data ? msg.data.type : void 0) || msg.type;
|
|
1422
|
+
const msgData = msg.data || msg;
|
|
1423
|
+
switch (msgType) {
|
|
1424
|
+
case RecognitionResultTypeV1.TRANSCRIPTION:
|
|
1425
|
+
this.handleTranscription(msgData);
|
|
1426
|
+
break;
|
|
1427
|
+
case RecognitionResultTypeV1.FUNCTION_CALL:
|
|
1428
|
+
this.callbacks.onFunctionCall(msgData);
|
|
1429
|
+
break;
|
|
1430
|
+
case RecognitionResultTypeV1.METADATA:
|
|
1431
|
+
this.callbacks.onMetadata(msgData);
|
|
1432
|
+
break;
|
|
1433
|
+
case RecognitionResultTypeV1.ERROR:
|
|
1434
|
+
this.callbacks.onError(msgData);
|
|
1435
|
+
break;
|
|
1436
|
+
case RecognitionResultTypeV1.CLIENT_CONTROL_MESSAGE:
|
|
1437
|
+
this.callbacks.onControlMessage(msgData);
|
|
1438
|
+
break;
|
|
1439
|
+
default:
|
|
1440
|
+
if (this.callbacks.logger) {
|
|
1441
|
+
this.callbacks.logger("debug", "Unknown message type", {
|
|
1442
|
+
type: msgType
|
|
1443
|
+
});
|
|
1444
|
+
}
|
|
1445
|
+
}
|
|
1446
|
+
}
|
|
1447
|
+
/**
|
|
1448
|
+
* Handle transcript message and track performance metrics
|
|
1449
|
+
* @param result - The transcription result from the server
|
|
1450
|
+
*/
|
|
1451
|
+
handleTranscription(result) {
|
|
1452
|
+
if (!this.firstTranscriptTime && this.sessionStartTime) {
|
|
1453
|
+
this.firstTranscriptTime = Date.now();
|
|
1454
|
+
const timeToFirstTranscript = this.firstTranscriptTime - this.sessionStartTime;
|
|
1455
|
+
if (this.callbacks.logger) {
|
|
1456
|
+
this.callbacks.logger("debug", "First transcript received", {
|
|
1457
|
+
timeToFirstTranscriptMs: timeToFirstTranscript
|
|
1458
|
+
});
|
|
1459
|
+
}
|
|
1460
|
+
}
|
|
1461
|
+
this.callbacks.onTranscript(result);
|
|
1462
|
+
}
|
|
1463
|
+
/**
|
|
1464
|
+
* Get performance metrics
|
|
1465
|
+
*/
|
|
1466
|
+
getMetrics() {
|
|
1467
|
+
return {
|
|
1468
|
+
sessionStartTime: this.sessionStartTime,
|
|
1469
|
+
firstTranscriptTime: this.firstTranscriptTime,
|
|
1470
|
+
timeToFirstTranscript: this.firstTranscriptTime && this.sessionStartTime ? this.firstTranscriptTime - this.sessionStartTime : null
|
|
1471
|
+
};
|
|
1472
|
+
}
|
|
1473
|
+
};
|
|
1474
|
+
|
|
1475
|
+
// src/recognition-client.ts
|
|
1476
|
+
function isNormalDisconnection(code) {
|
|
1477
|
+
return code === 1e3;
|
|
1478
|
+
}
|
|
1479
|
+
__name(isNormalDisconnection, "isNormalDisconnection");
|
|
1480
|
+
var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRecognitionClient extends WebSocketAudioClient {
|
|
1481
|
+
static {
|
|
1482
|
+
__name(this, "RealTimeTwoWayWebSocketRecognitionClient");
|
|
1483
|
+
}
|
|
1484
|
+
static PROTOCOL_VERSION = 1;
|
|
1485
|
+
config;
|
|
1486
|
+
audioBuffer;
|
|
1487
|
+
messageHandler;
|
|
1488
|
+
state = ClientState.INITIAL;
|
|
1489
|
+
connectionPromise;
|
|
1490
|
+
// Debug control (internal state, controlled by debugCommand in ASRRequest)
|
|
1491
|
+
isDebugLogEnabled = false;
|
|
1492
|
+
// Stats
|
|
1493
|
+
audioBytesSent = 0;
|
|
1494
|
+
audioChunksSent = 0;
|
|
1495
|
+
audioStatsLogInterval = 100;
|
|
1496
|
+
lastAudioStatsLog = 0;
|
|
1497
|
+
constructor(config) {
|
|
1498
|
+
const audioUtteranceId = config.audioUtteranceId || v4();
|
|
1499
|
+
const url = buildWebSocketUrl({
|
|
1500
|
+
audioUtteranceId,
|
|
1501
|
+
...config.url && {
|
|
1502
|
+
url: config.url
|
|
1503
|
+
},
|
|
1504
|
+
...config.callbackUrls && {
|
|
1505
|
+
callbackUrls: config.callbackUrls
|
|
1506
|
+
},
|
|
1507
|
+
...config.userId && {
|
|
1508
|
+
userId: config.userId
|
|
1509
|
+
},
|
|
1510
|
+
...config.gameSessionId && {
|
|
1511
|
+
gameSessionId: config.gameSessionId
|
|
1512
|
+
},
|
|
1513
|
+
...config.deviceId && {
|
|
1514
|
+
deviceId: config.deviceId
|
|
1515
|
+
},
|
|
1516
|
+
...config.accountId && {
|
|
1517
|
+
accountId: config.accountId
|
|
1518
|
+
},
|
|
1519
|
+
...config.questionAnswerId && {
|
|
1520
|
+
questionAnswerId: config.questionAnswerId
|
|
1521
|
+
},
|
|
1522
|
+
...config.platform && {
|
|
1523
|
+
platform: config.platform
|
|
1524
|
+
},
|
|
1525
|
+
...config.gameContext && {
|
|
1526
|
+
gameContext: config.gameContext
|
|
1527
|
+
}
|
|
1528
|
+
});
|
|
1529
|
+
super({
|
|
1530
|
+
url,
|
|
1531
|
+
highWM: config.highWaterMark ?? 512e3,
|
|
1532
|
+
lowWM: config.lowWaterMark ?? 128e3
|
|
1533
|
+
});
|
|
1534
|
+
this.config = {
|
|
1535
|
+
url,
|
|
1536
|
+
audioUtteranceId,
|
|
1537
|
+
...config.asrRequestConfig && {
|
|
1538
|
+
asrRequestConfig: config.asrRequestConfig
|
|
1539
|
+
},
|
|
1540
|
+
...config.gameContext && {
|
|
1541
|
+
gameContext: config.gameContext
|
|
1542
|
+
},
|
|
1543
|
+
...config.callbackUrls && {
|
|
1544
|
+
callbackUrls: config.callbackUrls
|
|
1545
|
+
},
|
|
1546
|
+
onTranscript: config.onTranscript || (() => {
|
|
1547
|
+
}),
|
|
1548
|
+
onFunctionCall: config.onFunctionCall || (() => {
|
|
1549
|
+
}),
|
|
1550
|
+
onMetadata: config.onMetadata || (() => {
|
|
1551
|
+
}),
|
|
1552
|
+
onError: config.onError || (() => {
|
|
1553
|
+
}),
|
|
1554
|
+
onConnected: config.onConnected || (() => {
|
|
1555
|
+
}),
|
|
1556
|
+
onDisconnected: config.onDisconnected || (() => {
|
|
1557
|
+
}),
|
|
1558
|
+
highWaterMark: config.highWaterMark ?? 512e3,
|
|
1559
|
+
lowWaterMark: config.lowWaterMark ?? 128e3,
|
|
1560
|
+
maxBufferDurationSec: config.maxBufferDurationSec ?? 60,
|
|
1561
|
+
chunksPerSecond: config.chunksPerSecond ?? 100,
|
|
1562
|
+
...config.logger && {
|
|
1563
|
+
logger: config.logger
|
|
1564
|
+
}
|
|
1565
|
+
};
|
|
1566
|
+
this.audioBuffer = new AudioRingBuffer({
|
|
1567
|
+
maxBufferDurationSec: this.config.maxBufferDurationSec,
|
|
1568
|
+
chunksPerSecond: this.config.chunksPerSecond,
|
|
1569
|
+
...this.config.logger && {
|
|
1570
|
+
logger: this.config.logger
|
|
1571
|
+
}
|
|
1572
|
+
});
|
|
1573
|
+
this.messageHandler = new MessageHandler({
|
|
1574
|
+
onTranscript: this.config.onTranscript,
|
|
1575
|
+
onFunctionCall: this.config.onFunctionCall,
|
|
1576
|
+
onMetadata: this.config.onMetadata,
|
|
1577
|
+
onError: this.config.onError,
|
|
1578
|
+
onControlMessage: this.handleControlMessage.bind(this),
|
|
1579
|
+
...this.config.logger && {
|
|
1580
|
+
logger: this.config.logger
|
|
1581
|
+
}
|
|
1582
|
+
});
|
|
1583
|
+
}
|
|
1584
|
+
// ==========================================================================
|
|
1585
|
+
// PRIVATE HELPERS
|
|
1586
|
+
// ==========================================================================
|
|
1587
|
+
/**
|
|
1588
|
+
* Internal logging helper - only logs if a logger was provided in config
|
|
1589
|
+
* Debug logs are additionally gated by isDebugLogEnabled flag
|
|
1590
|
+
* @param level - Log level: debug, info, warn, or error
|
|
1591
|
+
* @param message - Message to log
|
|
1592
|
+
* @param data - Optional additional data to log
|
|
1593
|
+
*/
|
|
1594
|
+
log(level, message, data) {
|
|
1595
|
+
if (level === "debug" && !this.isDebugLogEnabled) {
|
|
1596
|
+
return;
|
|
1597
|
+
}
|
|
1598
|
+
if (this.config.logger) {
|
|
1599
|
+
this.config.logger(level, `[SDK] ${message}`, data);
|
|
1600
|
+
}
|
|
1601
|
+
}
|
|
1602
|
+
/**
|
|
1603
|
+
* Clean up internal resources to free memory
|
|
1604
|
+
* Called when connection closes (normally or abnormally)
|
|
1605
|
+
*/
|
|
1606
|
+
cleanup() {
|
|
1607
|
+
this.log("debug", "Cleaning up resources");
|
|
1608
|
+
this.audioBuffer.clear();
|
|
1609
|
+
this.audioBytesSent = 0;
|
|
1610
|
+
this.audioChunksSent = 0;
|
|
1611
|
+
this.lastAudioStatsLog = 0;
|
|
1612
|
+
this.connectionPromise = void 0;
|
|
1613
|
+
}
|
|
1614
|
+
// ==========================================================================
|
|
1615
|
+
// PUBLIC API
|
|
1616
|
+
// ==========================================================================
|
|
1617
|
+
async connect() {
|
|
1618
|
+
if (this.connectionPromise) {
|
|
1619
|
+
this.log("debug", "Returning existing connection promise", {
|
|
1620
|
+
state: this.state,
|
|
1621
|
+
hasPromise: true
|
|
1622
|
+
});
|
|
1623
|
+
return this.connectionPromise;
|
|
1624
|
+
}
|
|
1625
|
+
if (this.state !== ClientState.INITIAL && this.state !== ClientState.FAILED && this.state !== ClientState.STOPPED) {
|
|
1626
|
+
this.log("debug", "Already connected or in wrong state", {
|
|
1627
|
+
state: this.state
|
|
1628
|
+
});
|
|
1629
|
+
return Promise.resolve();
|
|
1630
|
+
}
|
|
1631
|
+
this.log("debug", "Creating new connection to WebSocket", {
|
|
1632
|
+
url: this.config.url
|
|
1633
|
+
});
|
|
1634
|
+
this.state = ClientState.CONNECTING;
|
|
1635
|
+
const connectionStartTime = Date.now();
|
|
1636
|
+
this.connectionPromise = new Promise((resolve, reject) => {
|
|
1637
|
+
const timeout = setTimeout(() => {
|
|
1638
|
+
this.log("warn", "Connection timeout", {
|
|
1639
|
+
timeout: 1e4
|
|
1640
|
+
});
|
|
1641
|
+
this.state = ClientState.FAILED;
|
|
1642
|
+
reject(new Error("Timeout"));
|
|
1643
|
+
}, 1e4);
|
|
1644
|
+
const originalOnConnected = this.onConnected.bind(this);
|
|
1645
|
+
this.onConnected = () => {
|
|
1646
|
+
clearTimeout(timeout);
|
|
1647
|
+
const connectionTime = Date.now() - connectionStartTime;
|
|
1648
|
+
this.log("debug", "Connection established successfully", {
|
|
1649
|
+
connectionTimeMs: connectionTime,
|
|
1650
|
+
url: this.config.url
|
|
1651
|
+
});
|
|
1652
|
+
this.state = ClientState.CONNECTED;
|
|
1653
|
+
originalOnConnected();
|
|
1654
|
+
resolve();
|
|
1655
|
+
};
|
|
1656
|
+
const originalOnError = this.onError.bind(this);
|
|
1657
|
+
this.onError = (error) => {
|
|
1658
|
+
clearTimeout(timeout);
|
|
1659
|
+
this.log("warn", "Connection error", error);
|
|
1660
|
+
this.state = ClientState.FAILED;
|
|
1661
|
+
originalOnError(error);
|
|
1662
|
+
reject(error);
|
|
1663
|
+
};
|
|
1664
|
+
super.connect();
|
|
1665
|
+
});
|
|
1666
|
+
return this.connectionPromise;
|
|
1667
|
+
}
|
|
1668
|
+
sendAudio(audioData) {
|
|
1669
|
+
const bytes = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
|
|
1670
|
+
if (bytes === 0) return;
|
|
1671
|
+
this.audioBuffer.write(audioData);
|
|
1672
|
+
if (this.state === ClientState.READY && !super.isLocalBackpressured()) {
|
|
1673
|
+
this.log("debug", "Sending audio immediately", {
|
|
1674
|
+
bytes
|
|
1675
|
+
});
|
|
1676
|
+
this.sendAudioNow(audioData);
|
|
1677
|
+
this.audioBuffer.read();
|
|
1678
|
+
} else {
|
|
1679
|
+
this.log("debug", "Buffering audio", {
|
|
1680
|
+
bytes,
|
|
1681
|
+
state: this.state,
|
|
1682
|
+
backpressured: super.isLocalBackpressured()
|
|
1683
|
+
});
|
|
1684
|
+
}
|
|
1685
|
+
if (this.isDebugLogEnabled) {
|
|
1686
|
+
const totalChunks = this.audioChunksSent + this.audioBuffer.getStats().chunksBuffered;
|
|
1687
|
+
if (totalChunks - this.lastAudioStatsLog >= this.audioStatsLogInterval) {
|
|
1688
|
+
const stats = this.audioBuffer.getStats();
|
|
1689
|
+
this.log("debug", "Audio statistics", {
|
|
1690
|
+
totalBytesSent: this.audioBytesSent,
|
|
1691
|
+
totalChunksSent: this.audioChunksSent,
|
|
1692
|
+
...stats
|
|
1693
|
+
});
|
|
1694
|
+
this.lastAudioStatsLog = totalChunks;
|
|
1695
|
+
}
|
|
1696
|
+
}
|
|
1697
|
+
}
|
|
1698
|
+
async stopRecording() {
|
|
1699
|
+
if (this.state !== ClientState.READY) {
|
|
1700
|
+
this.log("warn", "Cannot stop recording - not in READY state", {
|
|
1701
|
+
state: this.state
|
|
1702
|
+
});
|
|
1703
|
+
return;
|
|
1704
|
+
}
|
|
1705
|
+
this.log("debug", "Stopping recording");
|
|
1706
|
+
this.state = ClientState.STOPPING;
|
|
1707
|
+
super.sendMessage(_RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION, "message", {
|
|
1708
|
+
type: RecognitionContextTypeV1.CONTROL_SIGNAL,
|
|
1709
|
+
signal: ControlSignalTypeV1.STOP_RECORDING
|
|
1710
|
+
});
|
|
1711
|
+
return new Promise((resolve) => {
|
|
1712
|
+
const timeout = setTimeout(() => {
|
|
1713
|
+
this.state = ClientState.STOPPED;
|
|
1714
|
+
resolve();
|
|
1715
|
+
}, 5e3);
|
|
1716
|
+
const original = this.config.onTranscript;
|
|
1717
|
+
this.config.onTranscript = (result) => {
|
|
1718
|
+
original(result);
|
|
1719
|
+
if (result.is_finished) {
|
|
1720
|
+
clearTimeout(timeout);
|
|
1721
|
+
this.state = ClientState.STOPPED;
|
|
1722
|
+
resolve();
|
|
1723
|
+
}
|
|
1724
|
+
};
|
|
1725
|
+
this.messageHandler.callbacks.onTranscript = this.config.onTranscript;
|
|
1726
|
+
});
|
|
1727
|
+
}
|
|
1728
|
+
getAudioUtteranceId() {
|
|
1729
|
+
return this.config.audioUtteranceId;
|
|
1730
|
+
}
|
|
1731
|
+
getState() {
|
|
1732
|
+
return this.state;
|
|
1733
|
+
}
|
|
1734
|
+
isConnected() {
|
|
1735
|
+
return this.state === ClientState.READY;
|
|
1736
|
+
}
|
|
1737
|
+
isConnecting() {
|
|
1738
|
+
return this.state === ClientState.CONNECTING;
|
|
1739
|
+
}
|
|
1740
|
+
isStopping() {
|
|
1741
|
+
return this.state === ClientState.STOPPING;
|
|
1742
|
+
}
|
|
1743
|
+
isTranscriptionFinished() {
|
|
1744
|
+
return this.state === ClientState.STOPPED;
|
|
1745
|
+
}
|
|
1746
|
+
isBufferOverflowing() {
|
|
1747
|
+
return this.audioBuffer.isOverflowing();
|
|
1748
|
+
}
|
|
1749
|
+
getStats() {
|
|
1750
|
+
const bufferStats = this.audioBuffer.getStats();
|
|
1751
|
+
return {
|
|
1752
|
+
audioBytesSent: this.audioBytesSent,
|
|
1753
|
+
audioChunksSent: this.audioChunksSent,
|
|
1754
|
+
audioChunksBuffered: bufferStats.chunksBuffered,
|
|
1755
|
+
bufferOverflowCount: bufferStats.overflowCount,
|
|
1756
|
+
currentBufferedChunks: bufferStats.currentBufferedChunks,
|
|
1757
|
+
hasWrapped: bufferStats.hasWrapped
|
|
1758
|
+
};
|
|
1759
|
+
}
|
|
1760
|
+
// ==========================================================================
|
|
1761
|
+
// WEBSOCKET HOOKS (from WebSocketAudioClient)
|
|
1762
|
+
// ==========================================================================
|
|
1763
|
+
onConnected() {
|
|
1764
|
+
this.log("debug", "WebSocket onConnected callback");
|
|
1765
|
+
if (this.config.asrRequestConfig) {
|
|
1766
|
+
const debugCommand = this.config.asrRequestConfig.debugCommand;
|
|
1767
|
+
if (debugCommand?.enableDebugLog) {
|
|
1768
|
+
this.isDebugLogEnabled = true;
|
|
1769
|
+
this.log("debug", "Debug logging enabled via debugCommand");
|
|
1770
|
+
}
|
|
1771
|
+
if (this.isDebugLogEnabled) {
|
|
1772
|
+
this.log("debug", "Sending ASR request", this.config.asrRequestConfig);
|
|
1773
|
+
}
|
|
1774
|
+
const asrRequest = {
|
|
1775
|
+
type: RecognitionContextTypeV1.ASR_REQUEST,
|
|
1776
|
+
audioUtteranceId: this.config.audioUtteranceId,
|
|
1777
|
+
provider: this.config.asrRequestConfig.provider.toString(),
|
|
1778
|
+
model: this.config.asrRequestConfig.model,
|
|
1779
|
+
language: this.config.asrRequestConfig.language?.toString() || "en",
|
|
1780
|
+
sampleRate: typeof this.config.asrRequestConfig.sampleRate === "number" ? this.config.asrRequestConfig.sampleRate : SampleRate.RATE_16000,
|
|
1781
|
+
encoding: typeof this.config.asrRequestConfig.encoding === "number" ? this.config.asrRequestConfig.encoding : AudioEncoding.LINEAR16,
|
|
1782
|
+
interimResults: this.config.asrRequestConfig.interimResults ?? false,
|
|
1783
|
+
// Auto-enable useContext if gameContext is provided, or use explicit value if set
|
|
1784
|
+
useContext: this.config.asrRequestConfig.useContext ?? !!this.config.gameContext,
|
|
1785
|
+
...debugCommand && {
|
|
1786
|
+
debugCommand
|
|
1787
|
+
}
|
|
1788
|
+
};
|
|
1789
|
+
super.sendMessage(_RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION, "message", asrRequest);
|
|
1790
|
+
}
|
|
1791
|
+
if (this.config.gameContext) {
|
|
1792
|
+
if (this.isDebugLogEnabled) {
|
|
1793
|
+
this.log("debug", "Sending game context", this.config.gameContext);
|
|
1794
|
+
}
|
|
1795
|
+
super.sendMessage(_RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION, "message", this.config.gameContext);
|
|
1796
|
+
}
|
|
1797
|
+
this.log("debug", "Waiting for server ready signal");
|
|
1798
|
+
this.config.onConnected();
|
|
1799
|
+
}
|
|
1800
|
+
onDisconnected(code, reason) {
|
|
1801
|
+
this.log("debug", "WebSocket disconnected", {
|
|
1802
|
+
code,
|
|
1803
|
+
reason,
|
|
1804
|
+
previousState: this.state
|
|
1805
|
+
});
|
|
1806
|
+
if (this.state === ClientState.STOPPING) {
|
|
1807
|
+
this.state = ClientState.STOPPED;
|
|
1808
|
+
} else if (this.state === ClientState.CONNECTED || this.state === ClientState.READY || this.state === ClientState.CONNECTING) {
|
|
1809
|
+
this.log("error", "Unexpected disconnection", {
|
|
1810
|
+
code,
|
|
1811
|
+
reason
|
|
1812
|
+
});
|
|
1813
|
+
this.state = ClientState.FAILED;
|
|
1814
|
+
}
|
|
1815
|
+
this.cleanup();
|
|
1816
|
+
this.config.onDisconnected(code, reason);
|
|
1817
|
+
}
|
|
1818
|
+
onError(error) {
|
|
1819
|
+
this.state = ClientState.FAILED;
|
|
1820
|
+
const errorResult = {
|
|
1821
|
+
type: RecognitionResultTypeV1.ERROR,
|
|
1822
|
+
audioUtteranceId: "",
|
|
1823
|
+
message: "WebSocket error",
|
|
1824
|
+
description: error.type || "Connection error"
|
|
1825
|
+
};
|
|
1826
|
+
this.config.onError(errorResult);
|
|
1827
|
+
}
|
|
1828
|
+
onMessage(msg) {
|
|
1829
|
+
this.messageHandler.handleMessage(msg);
|
|
1830
|
+
}
|
|
1831
|
+
// ==========================================================================
|
|
1832
|
+
// INTERNAL HELPERS
|
|
1833
|
+
// ==========================================================================
|
|
1834
|
+
/**
|
|
1835
|
+
* Handle control messages from server
|
|
1836
|
+
* @param msg - Control message containing server actions
|
|
1837
|
+
*/
|
|
1838
|
+
handleControlMessage(msg) {
|
|
1839
|
+
switch (msg.action) {
|
|
1840
|
+
case ClientControlActionV1.READY_FOR_UPLOADING_RECORDING: {
|
|
1841
|
+
this.log("debug", "Server ready for audio upload");
|
|
1842
|
+
this.state = ClientState.READY;
|
|
1843
|
+
this.messageHandler.setSessionStartTime(Date.now());
|
|
1844
|
+
const bufferedChunks = this.audioBuffer.flush();
|
|
1845
|
+
if (bufferedChunks.length > 0) {
|
|
1846
|
+
this.log("debug", "Flushing buffered audio", {
|
|
1847
|
+
chunks: bufferedChunks.length
|
|
1848
|
+
});
|
|
1849
|
+
bufferedChunks.forEach((chunk) => this.sendAudioNow(chunk.data));
|
|
1850
|
+
}
|
|
1851
|
+
break;
|
|
1852
|
+
}
|
|
1853
|
+
case ClientControlActionV1.STOP_RECORDING:
|
|
1854
|
+
this.log("debug", "Received stop recording signal from server");
|
|
1855
|
+
break;
|
|
1856
|
+
default:
|
|
1857
|
+
this.log("warn", "Unknown control action", {
|
|
1858
|
+
action: msg.action
|
|
1859
|
+
});
|
|
1860
|
+
}
|
|
1861
|
+
}
|
|
1862
|
+
/**
|
|
1863
|
+
* Send audio immediately to the server (without buffering)
|
|
1864
|
+
* @param audioData - Audio data to send
|
|
1865
|
+
*/
|
|
1866
|
+
sendAudioNow(audioData) {
|
|
1867
|
+
const byteLength = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
|
|
1868
|
+
const encodingId = this.config.asrRequestConfig?.encoding || AudioEncoding.LINEAR16;
|
|
1869
|
+
const sampleRate = typeof this.config.asrRequestConfig?.sampleRate === "number" ? this.config.asrRequestConfig.sampleRate : SampleRate.RATE_16000;
|
|
1870
|
+
super.sendAudio(audioData, _RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION, encodingId, sampleRate);
|
|
1871
|
+
this.audioBytesSent += byteLength;
|
|
1872
|
+
this.audioChunksSent++;
|
|
1873
|
+
}
|
|
1874
|
+
};
|
|
1875
|
+
|
|
1876
|
+
// src/config-builder.ts
|
|
1877
|
+
var ConfigBuilder = class {
|
|
1878
|
+
static {
|
|
1879
|
+
__name(this, "ConfigBuilder");
|
|
1880
|
+
}
|
|
1881
|
+
config = {};
|
|
1882
|
+
/**
|
|
1883
|
+
* Set the WebSocket URL
|
|
1884
|
+
*/
|
|
1885
|
+
url(url) {
|
|
1886
|
+
this.config.url = url;
|
|
1887
|
+
return this;
|
|
1888
|
+
}
|
|
1889
|
+
/**
|
|
1890
|
+
* Set ASR request configuration
|
|
1891
|
+
*/
|
|
1892
|
+
asrRequestConfig(config) {
|
|
1893
|
+
this.config.asrRequestConfig = config;
|
|
1894
|
+
return this;
|
|
1895
|
+
}
|
|
1896
|
+
/**
|
|
1897
|
+
* Set game context
|
|
1898
|
+
*/
|
|
1899
|
+
gameContext(context) {
|
|
1900
|
+
this.config.gameContext = context;
|
|
1901
|
+
return this;
|
|
1902
|
+
}
|
|
1903
|
+
/**
|
|
1904
|
+
* Set audio utterance ID
|
|
1905
|
+
*/
|
|
1906
|
+
audioUtteranceId(id) {
|
|
1907
|
+
this.config.audioUtteranceId = id;
|
|
1908
|
+
return this;
|
|
1909
|
+
}
|
|
1910
|
+
/**
|
|
1911
|
+
* Set callback URLs
|
|
1912
|
+
*/
|
|
1913
|
+
callbackUrls(urls) {
|
|
1914
|
+
this.config.callbackUrls = urls;
|
|
1915
|
+
return this;
|
|
1916
|
+
}
|
|
1917
|
+
/**
|
|
1918
|
+
* Set user ID
|
|
1919
|
+
*/
|
|
1920
|
+
userId(id) {
|
|
1921
|
+
this.config.userId = id;
|
|
1922
|
+
return this;
|
|
1923
|
+
}
|
|
1924
|
+
/**
|
|
1925
|
+
* Set game session ID
|
|
1926
|
+
*/
|
|
1927
|
+
gameSessionId(id) {
|
|
1928
|
+
this.config.gameSessionId = id;
|
|
1929
|
+
return this;
|
|
1930
|
+
}
|
|
1931
|
+
/**
|
|
1932
|
+
* Set device ID
|
|
1933
|
+
*/
|
|
1934
|
+
deviceId(id) {
|
|
1935
|
+
this.config.deviceId = id;
|
|
1936
|
+
return this;
|
|
1937
|
+
}
|
|
1938
|
+
/**
|
|
1939
|
+
* Set account ID
|
|
1940
|
+
*/
|
|
1941
|
+
accountId(id) {
|
|
1942
|
+
this.config.accountId = id;
|
|
1943
|
+
return this;
|
|
1944
|
+
}
|
|
1945
|
+
/**
|
|
1946
|
+
* Set question answer ID
|
|
1947
|
+
*/
|
|
1948
|
+
questionAnswerId(id) {
|
|
1949
|
+
this.config.questionAnswerId = id;
|
|
1950
|
+
return this;
|
|
1951
|
+
}
|
|
1952
|
+
/**
|
|
1953
|
+
* Set platform
|
|
1954
|
+
*/
|
|
1955
|
+
platform(platform) {
|
|
1956
|
+
this.config.platform = platform;
|
|
1957
|
+
return this;
|
|
1958
|
+
}
|
|
1959
|
+
/**
|
|
1960
|
+
* Set transcript callback
|
|
1961
|
+
*/
|
|
1962
|
+
onTranscript(callback) {
|
|
1963
|
+
this.config.onTranscript = callback;
|
|
1964
|
+
return this;
|
|
1965
|
+
}
|
|
1966
|
+
/**
|
|
1967
|
+
* Set metadata callback
|
|
1968
|
+
*/
|
|
1969
|
+
onMetadata(callback) {
|
|
1970
|
+
this.config.onMetadata = callback;
|
|
1971
|
+
return this;
|
|
1972
|
+
}
|
|
1973
|
+
/**
|
|
1974
|
+
* Set error callback
|
|
1975
|
+
*/
|
|
1976
|
+
onError(callback) {
|
|
1977
|
+
this.config.onError = callback;
|
|
1978
|
+
return this;
|
|
1979
|
+
}
|
|
1980
|
+
/**
|
|
1981
|
+
* Set connected callback
|
|
1982
|
+
*/
|
|
1983
|
+
onConnected(callback) {
|
|
1984
|
+
this.config.onConnected = callback;
|
|
1985
|
+
return this;
|
|
1986
|
+
}
|
|
1987
|
+
/**
|
|
1988
|
+
* Set disconnected callback
|
|
1989
|
+
*/
|
|
1990
|
+
onDisconnected(callback) {
|
|
1991
|
+
this.config.onDisconnected = callback;
|
|
1992
|
+
return this;
|
|
1993
|
+
}
|
|
1994
|
+
/**
|
|
1995
|
+
* Set high water mark
|
|
1996
|
+
*/
|
|
1997
|
+
highWaterMark(bytes) {
|
|
1998
|
+
this.config.highWaterMark = bytes;
|
|
1999
|
+
return this;
|
|
2000
|
+
}
|
|
2001
|
+
/**
|
|
2002
|
+
* Set low water mark
|
|
2003
|
+
*/
|
|
2004
|
+
lowWaterMark(bytes) {
|
|
2005
|
+
this.config.lowWaterMark = bytes;
|
|
2006
|
+
return this;
|
|
2007
|
+
}
|
|
2008
|
+
/**
|
|
2009
|
+
* Set max buffer duration in seconds
|
|
2010
|
+
*/
|
|
2011
|
+
maxBufferDurationSec(seconds) {
|
|
2012
|
+
this.config.maxBufferDurationSec = seconds;
|
|
2013
|
+
return this;
|
|
2014
|
+
}
|
|
2015
|
+
/**
|
|
2016
|
+
* Set chunks per second
|
|
2017
|
+
*/
|
|
2018
|
+
chunksPerSecond(chunks) {
|
|
2019
|
+
this.config.chunksPerSecond = chunks;
|
|
2020
|
+
return this;
|
|
2021
|
+
}
|
|
2022
|
+
/**
|
|
2023
|
+
* Set logger function
|
|
2024
|
+
*/
|
|
2025
|
+
logger(logger) {
|
|
2026
|
+
this.config.logger = logger;
|
|
2027
|
+
return this;
|
|
2028
|
+
}
|
|
2029
|
+
/**
|
|
2030
|
+
* Build the configuration
|
|
2031
|
+
*/
|
|
2032
|
+
build() {
|
|
2033
|
+
return this.config;
|
|
2034
|
+
}
|
|
2035
|
+
};
|
|
2036
|
+
|
|
2037
|
+
// src/factory.ts
|
|
2038
|
+
function createClient(config) {
|
|
2039
|
+
return new RealTimeTwoWayWebSocketRecognitionClient(config);
|
|
2040
|
+
}
|
|
2041
|
+
__name(createClient, "createClient");
|
|
2042
|
+
function createClientWithBuilder(configure) {
|
|
2043
|
+
const builder = new ConfigBuilder();
|
|
2044
|
+
const config = configure(builder).build();
|
|
2045
|
+
return new RealTimeTwoWayWebSocketRecognitionClient(config);
|
|
2046
|
+
}
|
|
2047
|
+
__name(createClientWithBuilder, "createClientWithBuilder");
|
|
2048
|
+
var RecognitionVGFStateSchema = z.object({
|
|
2049
|
+
// Core STT state
|
|
2050
|
+
audioUtteranceId: z.string(),
|
|
2051
|
+
startRecordingStatus: z.string().optional(),
|
|
2052
|
+
// Streaming should only start when "READY". Other states control mic UI and recording.
|
|
2053
|
+
transcriptionStatus: z.string().optional(),
|
|
2054
|
+
finalTranscript: z.string().optional(),
|
|
2055
|
+
finalConfidence: z.number().optional(),
|
|
2056
|
+
// Tracking-only metadata
|
|
2057
|
+
asrConfig: z.string().optional(),
|
|
2058
|
+
startRecordingTimestamp: z.string().optional(),
|
|
2059
|
+
finalRecordingTimestamp: z.string().optional(),
|
|
2060
|
+
finalTranscriptionTimestamp: z.string().optional(),
|
|
2061
|
+
// STEP 2: Support for mic auto-stop upon correct answer
|
|
2062
|
+
pendingTranscript: z.string().optional().default(""),
|
|
2063
|
+
pendingConfidence: z.number().optional(),
|
|
2064
|
+
// STEP 3: Support for semantic/function-call outcomes
|
|
2065
|
+
functionCallMetadata: z.string().optional(),
|
|
2066
|
+
functionCallConfidence: z.number().optional(),
|
|
2067
|
+
finalFunctionCallTimestamp: z.string().optional(),
|
|
2068
|
+
// Support for prompt slot mapping - passed to recognition context when present
|
|
2069
|
+
promptSlotMap: z.record(z.string(), z.array(z.string())).optional()
|
|
2070
|
+
});
|
|
2071
|
+
var RecordingStatus = {
|
|
2072
|
+
NOT_READY: "NOT_READY",
|
|
2073
|
+
READY: "READY",
|
|
2074
|
+
RECORDING: "RECORDING",
|
|
2075
|
+
FINISHED: "FINISHED"
|
|
2076
|
+
};
|
|
2077
|
+
var TranscriptionStatus = {
|
|
2078
|
+
NOT_STARTED: "NOT_STARTED",
|
|
2079
|
+
IN_PROGRESS: "IN_PROGRESS",
|
|
2080
|
+
FINALIZED: "FINALIZED",
|
|
2081
|
+
ERROR: "ERROR"
|
|
2082
|
+
};
|
|
2083
|
+
function createInitialRecognitionState(audioUtteranceId) {
|
|
2084
|
+
return {
|
|
2085
|
+
audioUtteranceId,
|
|
2086
|
+
startRecordingStatus: RecordingStatus.NOT_READY,
|
|
2087
|
+
transcriptionStatus: TranscriptionStatus.NOT_STARTED,
|
|
2088
|
+
pendingTranscript: ""
|
|
2089
|
+
};
|
|
2090
|
+
}
|
|
2091
|
+
__name(createInitialRecognitionState, "createInitialRecognitionState");
|
|
2092
|
+
function isValidRecordingStatusTransition(from, to) {
|
|
2093
|
+
const statusOrder = [
|
|
2094
|
+
RecordingStatus.NOT_READY,
|
|
2095
|
+
RecordingStatus.READY,
|
|
2096
|
+
RecordingStatus.RECORDING,
|
|
2097
|
+
RecordingStatus.FINISHED
|
|
2098
|
+
];
|
|
2099
|
+
const fromIndex = from ? statusOrder.indexOf(from) : -1;
|
|
2100
|
+
const toIndex = statusOrder.indexOf(to);
|
|
2101
|
+
return toIndex > fromIndex && toIndex !== -1;
|
|
2102
|
+
}
|
|
2103
|
+
__name(isValidRecordingStatusTransition, "isValidRecordingStatusTransition");
|
|
2104
|
+
|
|
2105
|
+
// src/vgf-recognition-mapper.ts
|
|
2106
|
+
function mapTranscriptionResultToState(currentState, result, isRecording) {
|
|
2107
|
+
const newState = {
|
|
2108
|
+
...currentState
|
|
2109
|
+
};
|
|
2110
|
+
if (isRecording && currentState.startRecordingStatus !== RecordingStatus.FINISHED) {
|
|
2111
|
+
newState.startRecordingStatus = RecordingStatus.RECORDING;
|
|
2112
|
+
if (!newState.startRecordingTimestamp) {
|
|
2113
|
+
newState.startRecordingTimestamp = (/* @__PURE__ */ new Date()).toISOString();
|
|
2114
|
+
}
|
|
2115
|
+
}
|
|
2116
|
+
if (!result.is_finished) {
|
|
2117
|
+
newState.transcriptionStatus = TranscriptionStatus.IN_PROGRESS;
|
|
2118
|
+
newState.pendingTranscript = result.pendingTranscript || "";
|
|
2119
|
+
if (result.pendingTranscriptConfidence !== void 0) {
|
|
2120
|
+
newState.pendingConfidence = result.pendingTranscriptConfidence;
|
|
2121
|
+
}
|
|
2122
|
+
if (result.finalTranscript) {
|
|
2123
|
+
newState.finalTranscript = result.finalTranscript;
|
|
2124
|
+
if (result.finalTranscriptConfidence !== void 0) {
|
|
2125
|
+
newState.finalConfidence = result.finalTranscriptConfidence;
|
|
2126
|
+
}
|
|
2127
|
+
}
|
|
2128
|
+
} else {
|
|
2129
|
+
newState.transcriptionStatus = TranscriptionStatus.FINALIZED;
|
|
2130
|
+
newState.finalTranscript = result.finalTranscript || "";
|
|
2131
|
+
if (result.finalTranscriptConfidence !== void 0) {
|
|
2132
|
+
newState.finalConfidence = result.finalTranscriptConfidence;
|
|
2133
|
+
}
|
|
2134
|
+
newState.finalTranscriptionTimestamp = (/* @__PURE__ */ new Date()).toISOString();
|
|
2135
|
+
newState.pendingTranscript = "";
|
|
2136
|
+
newState.pendingConfidence = void 0;
|
|
2137
|
+
}
|
|
2138
|
+
return newState;
|
|
2139
|
+
}
|
|
2140
|
+
__name(mapTranscriptionResultToState, "mapTranscriptionResultToState");
|
|
2141
|
+
function mapMetadataToState(currentState, metadata) {
|
|
2142
|
+
const newState = {
|
|
2143
|
+
...currentState
|
|
2144
|
+
};
|
|
2145
|
+
if (!newState.finalRecordingTimestamp) {
|
|
2146
|
+
newState.finalRecordingTimestamp = (/* @__PURE__ */ new Date()).toISOString();
|
|
2147
|
+
}
|
|
2148
|
+
newState.startRecordingStatus = RecordingStatus.FINISHED;
|
|
2149
|
+
return newState;
|
|
2150
|
+
}
|
|
2151
|
+
__name(mapMetadataToState, "mapMetadataToState");
|
|
2152
|
+
function mapErrorToState(currentState, error) {
|
|
2153
|
+
return {
|
|
2154
|
+
...currentState,
|
|
2155
|
+
transcriptionStatus: TranscriptionStatus.ERROR,
|
|
2156
|
+
startRecordingStatus: RecordingStatus.FINISHED,
|
|
2157
|
+
finalRecordingTimestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
2158
|
+
};
|
|
2159
|
+
}
|
|
2160
|
+
__name(mapErrorToState, "mapErrorToState");
|
|
2161
|
+
function createVGFStateFromConfig(config) {
|
|
2162
|
+
const audioUtteranceId = config.audioUtteranceId || generateUUID();
|
|
2163
|
+
const state = createInitialRecognitionState(audioUtteranceId);
|
|
2164
|
+
if (config.asrRequestConfig) {
|
|
2165
|
+
state.asrConfig = JSON.stringify(config.asrRequestConfig);
|
|
2166
|
+
}
|
|
2167
|
+
return state;
|
|
2168
|
+
}
|
|
2169
|
+
__name(createVGFStateFromConfig, "createVGFStateFromConfig");
|
|
2170
|
+
function updateStateOnStop(currentState) {
|
|
2171
|
+
return {
|
|
2172
|
+
...currentState,
|
|
2173
|
+
startRecordingStatus: RecordingStatus.FINISHED,
|
|
2174
|
+
finalRecordingTimestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
2175
|
+
};
|
|
2176
|
+
}
|
|
2177
|
+
__name(updateStateOnStop, "updateStateOnStop");
|
|
2178
|
+
function generateUUID() {
|
|
2179
|
+
return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, function(c) {
|
|
2180
|
+
const r = Math.random() * 16 | 0;
|
|
2181
|
+
const v = c === "x" ? r : r & 3 | 8;
|
|
2182
|
+
return v.toString(16);
|
|
2183
|
+
});
|
|
2184
|
+
}
|
|
2185
|
+
__name(generateUUID, "generateUUID");
|
|
2186
|
+
|
|
2187
|
+
// src/simplified-vgf-recognition-client.ts
|
|
2188
|
+
var SimplifiedVGFRecognitionClient = class {
|
|
2189
|
+
static {
|
|
2190
|
+
__name(this, "SimplifiedVGFRecognitionClient");
|
|
2191
|
+
}
|
|
2192
|
+
client;
|
|
2193
|
+
state;
|
|
2194
|
+
isRecordingAudio = false;
|
|
2195
|
+
stateChangeCallback;
|
|
2196
|
+
constructor(config) {
|
|
2197
|
+
const { onStateChange, initialState, ...clientConfig } = config;
|
|
2198
|
+
this.stateChangeCallback = onStateChange;
|
|
2199
|
+
if (initialState) {
|
|
2200
|
+
this.state = initialState;
|
|
2201
|
+
if (initialState.audioUtteranceId && !clientConfig.audioUtteranceId) {
|
|
2202
|
+
clientConfig.audioUtteranceId = initialState.audioUtteranceId;
|
|
2203
|
+
}
|
|
2204
|
+
} else {
|
|
2205
|
+
this.state = createVGFStateFromConfig(clientConfig);
|
|
2206
|
+
}
|
|
2207
|
+
this.state = {
|
|
2208
|
+
...this.state,
|
|
2209
|
+
startRecordingStatus: "READY"
|
|
2210
|
+
};
|
|
2211
|
+
if (this.state.promptSlotMap) {
|
|
2212
|
+
if (clientConfig.asrRequestConfig) {
|
|
2213
|
+
clientConfig.asrRequestConfig.useContext = true;
|
|
2214
|
+
}
|
|
2215
|
+
if (!clientConfig.gameContext) {
|
|
2216
|
+
if (clientConfig.logger) {
|
|
2217
|
+
clientConfig.logger("warn", "[VGF] promptSlotMap found but no gameContext provided. SlotMap will not be sent.");
|
|
2218
|
+
}
|
|
2219
|
+
} else {
|
|
2220
|
+
clientConfig.gameContext.slotMap = this.state.promptSlotMap;
|
|
2221
|
+
}
|
|
2222
|
+
}
|
|
2223
|
+
this.client = new RealTimeTwoWayWebSocketRecognitionClient({
|
|
2224
|
+
...clientConfig,
|
|
2225
|
+
// These callbacks ONLY update the VGF state sink
|
|
2226
|
+
onTranscript: /* @__PURE__ */ __name((result) => {
|
|
2227
|
+
this.state = mapTranscriptionResultToState(this.state, result, this.isRecordingAudio);
|
|
2228
|
+
this.notifyStateChange();
|
|
2229
|
+
if (clientConfig.onTranscript) {
|
|
2230
|
+
clientConfig.onTranscript(result);
|
|
2231
|
+
}
|
|
2232
|
+
}, "onTranscript"),
|
|
2233
|
+
onMetadata: /* @__PURE__ */ __name((metadata) => {
|
|
2234
|
+
this.state = mapMetadataToState(this.state);
|
|
2235
|
+
this.notifyStateChange();
|
|
2236
|
+
if (clientConfig.onMetadata) {
|
|
2237
|
+
clientConfig.onMetadata(metadata);
|
|
2238
|
+
}
|
|
2239
|
+
}, "onMetadata"),
|
|
2240
|
+
onFunctionCall: /* @__PURE__ */ __name((result) => {
|
|
2241
|
+
if (clientConfig.onFunctionCall) {
|
|
2242
|
+
clientConfig.onFunctionCall(result);
|
|
2243
|
+
}
|
|
2244
|
+
}, "onFunctionCall"),
|
|
2245
|
+
onError: /* @__PURE__ */ __name((error) => {
|
|
2246
|
+
this.isRecordingAudio = false;
|
|
2247
|
+
this.state = mapErrorToState(this.state);
|
|
2248
|
+
this.notifyStateChange();
|
|
2249
|
+
if (clientConfig.onError) {
|
|
2250
|
+
clientConfig.onError(error);
|
|
2251
|
+
}
|
|
2252
|
+
}, "onError"),
|
|
2253
|
+
onConnected: /* @__PURE__ */ __name(() => {
|
|
2254
|
+
if (clientConfig.onConnected) {
|
|
2255
|
+
clientConfig.onConnected();
|
|
2256
|
+
}
|
|
2257
|
+
}, "onConnected"),
|
|
2258
|
+
onDisconnected: /* @__PURE__ */ __name((code, reason) => {
|
|
2259
|
+
this.isRecordingAudio = false;
|
|
2260
|
+
if (clientConfig.onDisconnected) {
|
|
2261
|
+
clientConfig.onDisconnected(code, reason);
|
|
2262
|
+
}
|
|
2263
|
+
}, "onDisconnected")
|
|
2264
|
+
});
|
|
2265
|
+
}
|
|
2266
|
+
// DELEGATE ALL METHODS TO UNDERLYING CLIENT
|
|
2267
|
+
// The wrapper ONLY updates VGF state, doesn't use it for decisions
|
|
2268
|
+
async connect() {
|
|
2269
|
+
await this.client.connect();
|
|
2270
|
+
}
|
|
2271
|
+
sendAudio(audioData) {
|
|
2272
|
+
if (!this.isRecordingAudio) {
|
|
2273
|
+
this.isRecordingAudio = true;
|
|
2274
|
+
this.state = {
|
|
2275
|
+
...this.state,
|
|
2276
|
+
startRecordingStatus: "RECORDING",
|
|
2277
|
+
startRecordingTimestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
2278
|
+
};
|
|
2279
|
+
this.notifyStateChange();
|
|
2280
|
+
}
|
|
2281
|
+
this.client.sendAudio(audioData);
|
|
2282
|
+
}
|
|
2283
|
+
async stopRecording() {
|
|
2284
|
+
this.isRecordingAudio = false;
|
|
2285
|
+
this.state = updateStateOnStop(this.state);
|
|
2286
|
+
this.notifyStateChange();
|
|
2287
|
+
await this.client.stopRecording();
|
|
2288
|
+
}
|
|
2289
|
+
// Pure delegation methods - no state logic
|
|
2290
|
+
getAudioUtteranceId() {
|
|
2291
|
+
return this.client.getAudioUtteranceId();
|
|
2292
|
+
}
|
|
2293
|
+
getState() {
|
|
2294
|
+
return this.client.getState();
|
|
2295
|
+
}
|
|
2296
|
+
isConnected() {
|
|
2297
|
+
return this.client.isConnected();
|
|
2298
|
+
}
|
|
2299
|
+
isConnecting() {
|
|
2300
|
+
return this.client.isConnecting();
|
|
2301
|
+
}
|
|
2302
|
+
isStopping() {
|
|
2303
|
+
return this.client.isStopping();
|
|
2304
|
+
}
|
|
2305
|
+
isTranscriptionFinished() {
|
|
2306
|
+
return this.client.isTranscriptionFinished();
|
|
2307
|
+
}
|
|
2308
|
+
isBufferOverflowing() {
|
|
2309
|
+
return this.client.isBufferOverflowing();
|
|
2310
|
+
}
|
|
2311
|
+
// VGF State access (read-only for consumers)
|
|
2312
|
+
getVGFState() {
|
|
2313
|
+
return {
|
|
2314
|
+
...this.state
|
|
2315
|
+
};
|
|
2316
|
+
}
|
|
2317
|
+
notifyStateChange() {
|
|
2318
|
+
if (this.stateChangeCallback) {
|
|
2319
|
+
this.stateChangeCallback({
|
|
2320
|
+
...this.state
|
|
2321
|
+
});
|
|
2322
|
+
}
|
|
2323
|
+
}
|
|
2324
|
+
};
|
|
2325
|
+
function createSimplifiedVGFClient(config) {
|
|
2326
|
+
return new SimplifiedVGFRecognitionClient(config);
|
|
2327
|
+
}
|
|
2328
|
+
__name(createSimplifiedVGFClient, "createSimplifiedVGFClient");
|
|
2329
|
+
|
|
2330
|
+
export { AudioEncoding, ClientState, ConfigBuilder, ControlSignalTypeV1 as ControlSignal, ControlSignalTypeV1, DeepgramModel, GeminiModel, GoogleModel, Language, OpenAIModel, RECOGNITION_CONDUCTOR_BASES, RECOGNITION_SERVICE_BASES, RealTimeTwoWayWebSocketRecognitionClient, RecognitionContextTypeV1, RecognitionProvider, RecognitionResultTypeV1, RecognitionVGFStateSchema, RecordingStatus, SampleRate, SimplifiedVGFRecognitionClient, TranscriptionStatus, createClient, createClientWithBuilder, createInitialRecognitionState, createSimplifiedVGFClient, getRecognitionConductorBase, getRecognitionConductorHost, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionServiceBase, getRecognitionServiceHost, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, isNormalDisconnection, isValidRecordingStatusTransition, normalizeStage };
|
|
2331
|
+
//# sourceMappingURL=index.js.map
|
|
2332
|
+
//# sourceMappingURL=index.js.map
|