@volley/recognition-client-sdk 0.1.424 → 0.1.622
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.bundled.d.ts +236 -7
- package/dist/index.bundled.d.ts +393 -52
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +268 -15
- package/dist/index.js.map +4 -4
- package/dist/recog-client-sdk.browser.js +236 -14
- package/dist/recog-client-sdk.browser.js.map +4 -4
- package/dist/recognition-client.d.ts +28 -1
- package/dist/recognition-client.d.ts.map +1 -1
- package/dist/recognition-client.types.d.ts +20 -0
- package/dist/recognition-client.types.d.ts.map +1 -1
- package/dist/simplified-vgf-recognition-client.d.ts +17 -0
- package/dist/simplified-vgf-recognition-client.d.ts.map +1 -1
- package/dist/vgf-recognition-mapper.d.ts.map +1 -1
- package/dist/vgf-recognition-state.d.ts +6 -0
- package/dist/vgf-recognition-state.d.ts.map +1 -1
- package/package.json +8 -8
- package/src/index.ts +3 -0
- package/src/recognition-client.ts +158 -8
- package/src/recognition-client.types.ts +23 -0
- package/src/simplified-vgf-recognition-client.integration.spec.ts +15 -3
- package/src/simplified-vgf-recognition-client.ts +28 -1
- package/src/utils/audio-ring-buffer.spec.ts +335 -0
- package/src/vgf-recognition-mapper.ts +19 -1
- package/src/vgf-recognition-state.ts +4 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// ../../node_modules/.pnpm/zod@3.22.
|
|
1
|
+
// ../../node_modules/.pnpm/zod@3.22.5/node_modules/zod/lib/index.mjs
|
|
2
2
|
var util;
|
|
3
3
|
(function(util2) {
|
|
4
4
|
util2.assertEqual = (val) => val;
|
|
@@ -3741,6 +3741,10 @@ var RecognitionProvider;
|
|
|
3741
3741
|
RecognitionProvider2["GEMINI_BATCH"] = "gemini-batch";
|
|
3742
3742
|
RecognitionProvider2["OPENAI_BATCH"] = "openai-batch";
|
|
3743
3743
|
RecognitionProvider2["OPENAI_REALTIME"] = "openai-realtime";
|
|
3744
|
+
RecognitionProvider2["MISTRAL_VOXTRAL"] = "mistral-voxtral";
|
|
3745
|
+
RecognitionProvider2["DASHSCOPE"] = "dashscope";
|
|
3746
|
+
RecognitionProvider2["TEST_ASR_PROVIDER_QUOTA"] = "test-asr-provider-quota";
|
|
3747
|
+
RecognitionProvider2["TEST_ASR_STREAMING"] = "test-asr-streaming";
|
|
3744
3748
|
})(RecognitionProvider || (RecognitionProvider = {}));
|
|
3745
3749
|
var RecognitionMode;
|
|
3746
3750
|
(function(RecognitionMode2) {
|
|
@@ -3786,8 +3790,18 @@ var ElevenLabsModel;
|
|
|
3786
3790
|
})(ElevenLabsModel || (ElevenLabsModel = {}));
|
|
3787
3791
|
var OpenAIRealtimeModel;
|
|
3788
3792
|
(function(OpenAIRealtimeModel2) {
|
|
3793
|
+
OpenAIRealtimeModel2["GPT_4O_TRANSCRIBE"] = "gpt-4o-transcribe";
|
|
3789
3794
|
OpenAIRealtimeModel2["GPT_4O_MINI_TRANSCRIBE"] = "gpt-4o-mini-transcribe";
|
|
3790
3795
|
})(OpenAIRealtimeModel || (OpenAIRealtimeModel = {}));
|
|
3796
|
+
var MistralVoxtralModel;
|
|
3797
|
+
(function(MistralVoxtralModel2) {
|
|
3798
|
+
MistralVoxtralModel2["VOXTRAL_MINI_REALTIME_2602"] = "voxtral-mini-transcribe-realtime-2602";
|
|
3799
|
+
})(MistralVoxtralModel || (MistralVoxtralModel = {}));
|
|
3800
|
+
var DashScopeModel;
|
|
3801
|
+
(function(DashScopeModel2) {
|
|
3802
|
+
DashScopeModel2["QWEN3_ASR_FLASH_REALTIME_2602"] = "qwen3-asr-flash-realtime-2026-02-10";
|
|
3803
|
+
DashScopeModel2["QWEN3_ASR_FLASH_REALTIME"] = "qwen3-asr-flash-realtime";
|
|
3804
|
+
})(DashScopeModel || (DashScopeModel = {}));
|
|
3791
3805
|
|
|
3792
3806
|
// ../../libs/types/dist/recognition-result-v1.types.js
|
|
3793
3807
|
var RecognitionResultTypeV1;
|
|
@@ -3803,18 +3817,22 @@ var TranscriptionResultSchemaV1 = z.object({
|
|
|
3803
3817
|
type: z.literal(RecognitionResultTypeV1.TRANSCRIPTION),
|
|
3804
3818
|
audioUtteranceId: z.string(),
|
|
3805
3819
|
finalTranscript: z.string(),
|
|
3820
|
+
finalTranscriptRaw: z.string(),
|
|
3806
3821
|
finalTranscriptConfidence: z.number().min(0).max(1).optional(),
|
|
3807
3822
|
pendingTranscript: z.string().optional(),
|
|
3823
|
+
pendingTranscriptRaw: z.string().optional(),
|
|
3808
3824
|
pendingTranscriptConfidence: z.number().min(0).max(1).optional(),
|
|
3809
3825
|
is_finished: z.boolean(),
|
|
3810
3826
|
voiceStart: z.number().optional(),
|
|
3811
3827
|
voiceDuration: z.number().optional(),
|
|
3812
3828
|
voiceEnd: z.number().optional(),
|
|
3829
|
+
lastNonSilence: z.number().optional(),
|
|
3813
3830
|
startTimestamp: z.number().optional(),
|
|
3814
3831
|
endTimestamp: z.number().optional(),
|
|
3815
3832
|
receivedAtMs: z.number().optional(),
|
|
3816
|
-
accumulatedAudioTimeMs: z.number().optional()
|
|
3817
|
-
|
|
3833
|
+
accumulatedAudioTimeMs: z.number().optional(),
|
|
3834
|
+
rawAudioTimeMs: z.number().optional()
|
|
3835
|
+
// Total audio duration sent to provider (includes prefix)
|
|
3818
3836
|
});
|
|
3819
3837
|
var FunctionCallResultSchemaV1 = z.object({
|
|
3820
3838
|
type: z.literal(RecognitionResultTypeV1.FUNCTION_CALL),
|
|
@@ -3827,11 +3845,22 @@ var TranscriptOutcomeType;
|
|
|
3827
3845
|
TranscriptOutcomeType2["WITH_CONTENT"] = "with_content";
|
|
3828
3846
|
TranscriptOutcomeType2["EMPTY"] = "empty";
|
|
3829
3847
|
TranscriptOutcomeType2["NEVER_SENT"] = "never_sent";
|
|
3848
|
+
TranscriptOutcomeType2["ERROR_AUTHENTICATION"] = "error_authentication";
|
|
3849
|
+
TranscriptOutcomeType2["ERROR_VALIDATION"] = "error_validation";
|
|
3850
|
+
TranscriptOutcomeType2["ERROR_PROVIDER"] = "error_provider";
|
|
3851
|
+
TranscriptOutcomeType2["ERROR_TIMEOUT"] = "error_timeout";
|
|
3852
|
+
TranscriptOutcomeType2["ERROR_QUOTA"] = "error_quota";
|
|
3853
|
+
TranscriptOutcomeType2["ERROR_INTERNAL_QUOTA"] = "error_internal_quota";
|
|
3854
|
+
TranscriptOutcomeType2["ERROR_CONNECTION"] = "error_connection";
|
|
3855
|
+
TranscriptOutcomeType2["ERROR_NO_AUDIO"] = "error_no_audio";
|
|
3856
|
+
TranscriptOutcomeType2["ERROR_CIRCUIT_BREAKER"] = "error_circuit_breaker";
|
|
3857
|
+
TranscriptOutcomeType2["ERROR_UNKNOWN"] = "error_unknown";
|
|
3830
3858
|
})(TranscriptOutcomeType || (TranscriptOutcomeType = {}));
|
|
3831
3859
|
var MetadataResultSchemaV1 = z.object({
|
|
3832
3860
|
type: z.literal(RecognitionResultTypeV1.METADATA),
|
|
3833
3861
|
audioUtteranceId: z.string(),
|
|
3834
3862
|
// Timing information
|
|
3863
|
+
connectionInitiatedAtMs: z.number().optional(),
|
|
3835
3864
|
recordingStartMs: z.number().optional(),
|
|
3836
3865
|
recordingEndMs: z.number().optional(),
|
|
3837
3866
|
transcriptEndMs: z.number().optional(),
|
|
@@ -3840,6 +3869,7 @@ var MetadataResultSchemaV1 = z.object({
|
|
|
3840
3869
|
duration: z.number().optional(),
|
|
3841
3870
|
volume: z.number().optional(),
|
|
3842
3871
|
accumulatedAudioTimeMs: z.number().optional(),
|
|
3872
|
+
rawAudioTimeMs: z.number().optional(),
|
|
3843
3873
|
// Cost Information
|
|
3844
3874
|
costInUSD: z.number().default(0).optional(),
|
|
3845
3875
|
// ASR API Type
|
|
@@ -3849,7 +3879,22 @@ var MetadataResultSchemaV1 = z.object({
|
|
|
3849
3879
|
// Raw ASR metadata payload as provided by the provider (stringified if needed)
|
|
3850
3880
|
rawAsrMetadata: z.string().optional(),
|
|
3851
3881
|
// Transcript outcome - categorizes the final transcript state
|
|
3852
|
-
transcriptOutcome: z.nativeEnum(TranscriptOutcomeType).optional()
|
|
3882
|
+
transcriptOutcome: z.nativeEnum(TranscriptOutcomeType).optional(),
|
|
3883
|
+
// Audio metrics - embedded audio quality metrics (from AudioMetricsAccumulator)
|
|
3884
|
+
// Omit 'type' field since it's embedded in METADATA, not a separate message
|
|
3885
|
+
audioMetrics: z.object({
|
|
3886
|
+
valid: z.boolean(),
|
|
3887
|
+
audioBeginMs: z.number(),
|
|
3888
|
+
audioEndMs: z.number(),
|
|
3889
|
+
maxVolume: z.number(),
|
|
3890
|
+
minVolume: z.number(),
|
|
3891
|
+
avgVolume: z.number(),
|
|
3892
|
+
silenceRatio: z.number(),
|
|
3893
|
+
clippingRatio: z.number(),
|
|
3894
|
+
snrEstimate: z.number().nullable(),
|
|
3895
|
+
lastNonSilenceMs: z.number(),
|
|
3896
|
+
timestamp: z.string()
|
|
3897
|
+
}).optional()
|
|
3853
3898
|
});
|
|
3854
3899
|
var ErrorTypeV1;
|
|
3855
3900
|
(function(ErrorTypeV12) {
|
|
@@ -3858,7 +3903,10 @@ var ErrorTypeV1;
|
|
|
3858
3903
|
ErrorTypeV12["PROVIDER_ERROR"] = "provider_error";
|
|
3859
3904
|
ErrorTypeV12["TIMEOUT_ERROR"] = "timeout_error";
|
|
3860
3905
|
ErrorTypeV12["QUOTA_EXCEEDED"] = "quota_exceeded";
|
|
3906
|
+
ErrorTypeV12["INTERNAL_QUOTA_EXHAUSTED"] = "internal_quota_exhausted";
|
|
3861
3907
|
ErrorTypeV12["CONNECTION_ERROR"] = "connection_error";
|
|
3908
|
+
ErrorTypeV12["NO_AUDIO_ERROR"] = "no_audio_error";
|
|
3909
|
+
ErrorTypeV12["CIRCUIT_BREAKER_OPEN"] = "circuit_breaker_open";
|
|
3862
3910
|
ErrorTypeV12["UNKNOWN_ERROR"] = "unknown_error";
|
|
3863
3911
|
})(ErrorTypeV1 || (ErrorTypeV1 = {}));
|
|
3864
3912
|
var ErrorResultSchemaV1 = z.object({
|
|
@@ -4075,6 +4123,12 @@ var TimerSchema = z.object({
|
|
|
4075
4123
|
* Provider that generated this message
|
|
4076
4124
|
*/
|
|
4077
4125
|
provider: z.nativeEnum(RecognitionProvider).optional(),
|
|
4126
|
+
/**
|
|
4127
|
+
* Timestamp when provider connection was initiated (in milliseconds)
|
|
4128
|
+
* Set before doConnect() - captures the moment before WebSocket creation starts
|
|
4129
|
+
* @example 1704095999800
|
|
4130
|
+
*/
|
|
4131
|
+
connectionInitiatedAtMs: z.number().optional(),
|
|
4078
4132
|
/**
|
|
4079
4133
|
* Timestamp when recording started (in milliseconds)
|
|
4080
4134
|
* @example 1704096000000
|
|
@@ -4208,6 +4262,14 @@ var ConnectionExceptionSchema = BaseRecognitionExceptionSchema.extend({
|
|
|
4208
4262
|
/** Underlying error message */
|
|
4209
4263
|
underlyingError: z.string().optional()
|
|
4210
4264
|
});
|
|
4265
|
+
var CircuitBreakerExceptionSchema = BaseRecognitionExceptionSchema.extend({
|
|
4266
|
+
errorType: z.literal(ErrorTypeV1.CIRCUIT_BREAKER_OPEN),
|
|
4267
|
+
isImmediatelyAvailable: z.literal(true),
|
|
4268
|
+
/** Provider that is unavailable */
|
|
4269
|
+
provider: z.nativeEnum(RecognitionProvider).optional(),
|
|
4270
|
+
/** Model that is unavailable */
|
|
4271
|
+
model: z.string().optional()
|
|
4272
|
+
});
|
|
4211
4273
|
var UnknownExceptionSchema = BaseRecognitionExceptionSchema.extend({
|
|
4212
4274
|
errorType: z.literal(ErrorTypeV1.UNKNOWN_ERROR),
|
|
4213
4275
|
isImmediatelyAvailable: z.literal(false),
|
|
@@ -4223,6 +4285,7 @@ var RecognitionExceptionSchema = z.discriminatedUnion("errorType", [
|
|
|
4223
4285
|
TimeoutExceptionSchema,
|
|
4224
4286
|
QuotaExceededExceptionSchema,
|
|
4225
4287
|
ConnectionExceptionSchema,
|
|
4288
|
+
CircuitBreakerExceptionSchema,
|
|
4226
4289
|
UnknownExceptionSchema
|
|
4227
4290
|
]);
|
|
4228
4291
|
|
|
@@ -4238,6 +4301,12 @@ var ControlSignalTypeV1;
|
|
|
4238
4301
|
ControlSignalTypeV12["START_RECORDING"] = "start_recording";
|
|
4239
4302
|
ControlSignalTypeV12["STOP_RECORDING"] = "stop_recording";
|
|
4240
4303
|
})(ControlSignalTypeV1 || (ControlSignalTypeV1 = {}));
|
|
4304
|
+
var PrefixMode;
|
|
4305
|
+
(function(PrefixMode2) {
|
|
4306
|
+
PrefixMode2["NONE"] = "none";
|
|
4307
|
+
PrefixMode2["CLIENT"] = "client";
|
|
4308
|
+
PrefixMode2["STORED"] = "stored";
|
|
4309
|
+
})(PrefixMode || (PrefixMode = {}));
|
|
4241
4310
|
var SlotMapSchema = z.record(z.string(), z.array(z.string()));
|
|
4242
4311
|
var GameContextSchemaV1 = z.object({
|
|
4243
4312
|
type: z.literal(RecognitionContextTypeV1.GAME_CONTEXT),
|
|
@@ -4269,6 +4338,19 @@ var RequestDebugCommandSchema = z.object({
|
|
|
4269
4338
|
// Enable experimental pilot models for testing new features
|
|
4270
4339
|
enablePilotModels: z.boolean().optional().default(false)
|
|
4271
4340
|
}).optional();
|
|
4341
|
+
var FallbackASRConfigSchema = z.object({
|
|
4342
|
+
// Required - the fallback provider to use
|
|
4343
|
+
provider: z.string(),
|
|
4344
|
+
// Optional - inherits from primary if not specified
|
|
4345
|
+
model: z.string().optional(),
|
|
4346
|
+
language: z.string().optional(),
|
|
4347
|
+
sampleRate: z.number().optional(),
|
|
4348
|
+
encoding: z.number().optional(),
|
|
4349
|
+
// Recognition options - optional, inherits from primary
|
|
4350
|
+
interimResults: z.boolean().optional(),
|
|
4351
|
+
useContext: z.boolean().optional(),
|
|
4352
|
+
finalTranscriptStability: z.string().optional()
|
|
4353
|
+
});
|
|
4272
4354
|
var ASRRequestSchemaV1 = z.object({
|
|
4273
4355
|
type: z.literal(RecognitionContextTypeV1.ASR_REQUEST),
|
|
4274
4356
|
// Session identification
|
|
@@ -4284,6 +4366,16 @@ var ASRRequestSchemaV1 = z.object({
|
|
|
4284
4366
|
useContext: z.boolean().optional().default(false),
|
|
4285
4367
|
// Final transcript stability mode (timeout for fallback final transcript)
|
|
4286
4368
|
finalTranscriptStability: z.string().optional(),
|
|
4369
|
+
// Traffic control priority (affects quota slot allocation)
|
|
4370
|
+
// 'high' = can use all quota slots (reserved for critical games like song-quiz)
|
|
4371
|
+
// 'low' = limited to non-reserved slots (default for most requests)
|
|
4372
|
+
priority: z.enum(["low", "high"]).optional().default("low"),
|
|
4373
|
+
// Fallback providers - tried in order if primary provider is unavailable (circuit breaker open)
|
|
4374
|
+
fallbackModels: z.array(FallbackASRConfigSchema).optional(),
|
|
4375
|
+
// Prefix audio configuration
|
|
4376
|
+
prefixMode: z.nativeEnum(PrefixMode).optional().default(PrefixMode.NONE),
|
|
4377
|
+
prefixId: z.string().optional(),
|
|
4378
|
+
prefixTextToRemove: z.array(z.string()).optional(),
|
|
4287
4379
|
// Debug options (FOR DEBUG/TESTING ONLY - not for production use)
|
|
4288
4380
|
debugCommand: RequestDebugCommandSchema
|
|
4289
4381
|
});
|
|
@@ -4301,6 +4393,8 @@ var RecognitionGameInfoSchema = z.object({
|
|
|
4301
4393
|
accountId: z.string().optional(),
|
|
4302
4394
|
gameId: z.string().optional(),
|
|
4303
4395
|
gamePhase: z.string().optional(),
|
|
4396
|
+
questionAskedId: z.string().optional(),
|
|
4397
|
+
/** @deprecated Use questionAskedId instead. Kept for backward compatibility during migration. */
|
|
4304
4398
|
questionAnswerId: z.string().optional(),
|
|
4305
4399
|
platform: z.string().optional()
|
|
4306
4400
|
// Platform for audio recording device (use the definition of platform teams)
|
|
@@ -4435,6 +4529,7 @@ var AudioEncoding;
|
|
|
4435
4529
|
}
|
|
4436
4530
|
AudioEncoding2.isNameValid = isNameValid;
|
|
4437
4531
|
})(AudioEncoding || (AudioEncoding = {}));
|
|
4532
|
+
var PREFIX_AUDIO_ENCODING_OFFSET = 128;
|
|
4438
4533
|
var SampleRate;
|
|
4439
4534
|
(function(SampleRate2) {
|
|
4440
4535
|
SampleRate2[SampleRate2["RATE_8000"] = 8e3] = "RATE_8000";
|
|
@@ -4527,6 +4622,7 @@ var FinalTranscriptStability;
|
|
|
4527
4622
|
var PlumbingType;
|
|
4528
4623
|
(function(PlumbingType2) {
|
|
4529
4624
|
PlumbingType2["AUDIO"] = "audio";
|
|
4625
|
+
PlumbingType2["PREFIX_AUDIO"] = "prefix_audio";
|
|
4530
4626
|
PlumbingType2["CONTROL"] = "control";
|
|
4531
4627
|
PlumbingType2["RESULT"] = "result";
|
|
4532
4628
|
PlumbingType2["RECOGNITION_CONTEXT"] = "recognition_context";
|
|
@@ -4605,6 +4701,11 @@ var StatsIncrementType;
|
|
|
4605
4701
|
StatsIncrementType2["SUCCESS"] = "success";
|
|
4606
4702
|
StatsIncrementType2["FAIL"] = "fail";
|
|
4607
4703
|
})(StatsIncrementType || (StatsIncrementType = {}));
|
|
4704
|
+
var QuotaPriority;
|
|
4705
|
+
(function(QuotaPriority2) {
|
|
4706
|
+
QuotaPriority2[QuotaPriority2["LOW"] = 0] = "LOW";
|
|
4707
|
+
QuotaPriority2[QuotaPriority2["HIGH"] = 1] = "HIGH";
|
|
4708
|
+
})(QuotaPriority || (QuotaPriority = {}));
|
|
4608
4709
|
|
|
4609
4710
|
// ../../libs/types/dist/stages.types.js
|
|
4610
4711
|
var STAGES = {
|
|
@@ -4781,7 +4882,7 @@ var WebSocketAudioClient = class {
|
|
|
4781
4882
|
// ../../libs/websocket/dist/core/audio-upload-websocket-server.js
|
|
4782
4883
|
import { WebSocketServer, WebSocket as WebSocket2 } from "ws";
|
|
4783
4884
|
|
|
4784
|
-
// ../../node_modules/.pnpm/uuid@11.
|
|
4885
|
+
// ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/stringify.js
|
|
4785
4886
|
var byteToHex = [];
|
|
4786
4887
|
for (let i = 0; i < 256; ++i) {
|
|
4787
4888
|
byteToHex.push((i + 256).toString(16).slice(1));
|
|
@@ -4790,7 +4891,7 @@ function unsafeStringify(arr, offset = 0) {
|
|
|
4790
4891
|
return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
|
|
4791
4892
|
}
|
|
4792
4893
|
|
|
4793
|
-
// ../../node_modules/.pnpm/uuid@11.
|
|
4894
|
+
// ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/rng.js
|
|
4794
4895
|
var getRandomValues;
|
|
4795
4896
|
var rnds8 = new Uint8Array(16);
|
|
4796
4897
|
function rng() {
|
|
@@ -4803,21 +4904,27 @@ function rng() {
|
|
|
4803
4904
|
return getRandomValues(rnds8);
|
|
4804
4905
|
}
|
|
4805
4906
|
|
|
4806
|
-
// ../../node_modules/.pnpm/uuid@11.
|
|
4907
|
+
// ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/native.js
|
|
4807
4908
|
var randomUUID = typeof crypto !== "undefined" && crypto.randomUUID && crypto.randomUUID.bind(crypto);
|
|
4808
4909
|
var native_default = { randomUUID };
|
|
4809
4910
|
|
|
4810
|
-
// ../../node_modules/.pnpm/uuid@11.
|
|
4911
|
+
// ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/v4.js
|
|
4811
4912
|
function v4(options, buf, offset) {
|
|
4812
4913
|
if (native_default.randomUUID && !buf && !options) {
|
|
4813
4914
|
return native_default.randomUUID();
|
|
4814
4915
|
}
|
|
4815
4916
|
options = options || {};
|
|
4816
|
-
const rnds = options.random
|
|
4917
|
+
const rnds = options.random ?? options.rng?.() ?? rng();
|
|
4918
|
+
if (rnds.length < 16) {
|
|
4919
|
+
throw new Error("Random bytes length must be >= 16");
|
|
4920
|
+
}
|
|
4817
4921
|
rnds[6] = rnds[6] & 15 | 64;
|
|
4818
4922
|
rnds[8] = rnds[8] & 63 | 128;
|
|
4819
4923
|
if (buf) {
|
|
4820
4924
|
offset = offset || 0;
|
|
4925
|
+
if (offset < 0 || offset + 16 > buf.length) {
|
|
4926
|
+
throw new RangeError(`UUID byte range ${offset}:${offset + 15} is out of buffer bounds`);
|
|
4927
|
+
}
|
|
4821
4928
|
for (let i = 0; i < 16; ++i) {
|
|
4822
4929
|
buf[offset + i] = rnds[i];
|
|
4823
4930
|
}
|
|
@@ -5191,6 +5298,9 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5191
5298
|
highWM: config.highWaterMark ?? 512e3,
|
|
5192
5299
|
lowWM: config.lowWaterMark ?? 128e3
|
|
5193
5300
|
});
|
|
5301
|
+
this.prefixBuffer = [];
|
|
5302
|
+
// Buffer prefix audio until READY
|
|
5303
|
+
this.prefixBufferBytes = 0;
|
|
5194
5304
|
this.state = "initial" /* INITIAL */;
|
|
5195
5305
|
// Debug control (internal state, controlled by debugCommand in ASRRequest)
|
|
5196
5306
|
this.isDebugLogEnabled = false;
|
|
@@ -5247,6 +5357,9 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5247
5357
|
static {
|
|
5248
5358
|
this.PROTOCOL_VERSION = 1;
|
|
5249
5359
|
}
|
|
5360
|
+
static {
|
|
5361
|
+
this.MAX_PREFIX_BUFFER_BYTES = 10 * 1024 * 1024;
|
|
5362
|
+
}
|
|
5250
5363
|
// ==========================================================================
|
|
5251
5364
|
// PRIVATE HELPERS
|
|
5252
5365
|
// ==========================================================================
|
|
@@ -5272,6 +5385,8 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5272
5385
|
cleanup() {
|
|
5273
5386
|
this.log("debug", "Cleaning up resources");
|
|
5274
5387
|
this.audioBuffer.clear();
|
|
5388
|
+
this.prefixBuffer = [];
|
|
5389
|
+
this.prefixBufferBytes = 0;
|
|
5275
5390
|
this.audioBytesSent = 0;
|
|
5276
5391
|
this.audioChunksSent = 0;
|
|
5277
5392
|
this.lastAudioStatsLog = 0;
|
|
@@ -5321,7 +5436,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5321
5436
|
const timeout = setTimeout(() => {
|
|
5322
5437
|
if (settled) return;
|
|
5323
5438
|
settled = true;
|
|
5324
|
-
this.log("warn",
|
|
5439
|
+
this.log("warn", `Connection timeout url=${this.config.url}`, { timeout: connectionTimeout, attempt });
|
|
5325
5440
|
this.state = "failed" /* FAILED */;
|
|
5326
5441
|
reject(new Error(`Connection timeout after ${connectionTimeout}ms`));
|
|
5327
5442
|
}, connectionTimeout);
|
|
@@ -5343,7 +5458,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5343
5458
|
if (settled) return;
|
|
5344
5459
|
settled = true;
|
|
5345
5460
|
clearTimeout(timeout);
|
|
5346
|
-
this.log("warn",
|
|
5461
|
+
this.log("warn", `Connection error url=${this.config.url}`, { error, attempt });
|
|
5347
5462
|
this.state = "failed" /* FAILED */;
|
|
5348
5463
|
reject(error);
|
|
5349
5464
|
};
|
|
@@ -5358,14 +5473,14 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5358
5473
|
lastError = error;
|
|
5359
5474
|
if (attempt < maxAttempts) {
|
|
5360
5475
|
const logLevel = attempt < 3 ? "info" : "warn";
|
|
5361
|
-
this.log(logLevel, `Connection attempt ${attempt} failed, retrying after ${delayMs}ms`, {
|
|
5476
|
+
this.log(logLevel, `Connection attempt ${attempt} failed, retrying after ${delayMs}ms url=${this.config.url}`, {
|
|
5362
5477
|
error: lastError.message,
|
|
5363
5478
|
nextAttempt: attempt + 1
|
|
5364
5479
|
});
|
|
5365
5480
|
this.state = "initial" /* INITIAL */;
|
|
5366
5481
|
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
5367
5482
|
} else {
|
|
5368
|
-
this.log("warn", `All ${maxAttempts} connection attempts failed`, {
|
|
5483
|
+
this.log("warn", `All ${maxAttempts} connection attempts failed url=${this.config.url}`, {
|
|
5369
5484
|
error: lastError.message
|
|
5370
5485
|
});
|
|
5371
5486
|
}
|
|
@@ -5488,6 +5603,25 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5488
5603
|
isBufferOverflowing() {
|
|
5489
5604
|
return this.audioBuffer.isOverflowing();
|
|
5490
5605
|
}
|
|
5606
|
+
isServerReady() {
|
|
5607
|
+
return this.state === "ready" /* READY */;
|
|
5608
|
+
}
|
|
5609
|
+
sendGameContext(context) {
|
|
5610
|
+
if (this.state !== "connected" /* CONNECTED */ && this.state !== "ready" /* READY */) {
|
|
5611
|
+
this.log("warn", "sendGameContext called in wrong state", { state: this.state });
|
|
5612
|
+
return;
|
|
5613
|
+
}
|
|
5614
|
+
this.log("debug", "Sending game context (deferred)", {
|
|
5615
|
+
gameId: context.gameId,
|
|
5616
|
+
gamePhase: context.gamePhase,
|
|
5617
|
+
hasSlotMap: !!context.slotMap
|
|
5618
|
+
});
|
|
5619
|
+
super.sendMessage(
|
|
5620
|
+
_RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
|
|
5621
|
+
"message",
|
|
5622
|
+
context
|
|
5623
|
+
);
|
|
5624
|
+
}
|
|
5491
5625
|
getStats() {
|
|
5492
5626
|
const bufferStats = this.audioBuffer.getStats();
|
|
5493
5627
|
return {
|
|
@@ -5513,6 +5647,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5513
5647
|
if (this.isDebugLogEnabled) {
|
|
5514
5648
|
this.log("debug", "Sending ASR request", this.config.asrRequestConfig);
|
|
5515
5649
|
}
|
|
5650
|
+
const fallbackModels = this.config.asrRequestConfig.fallbackModels;
|
|
5516
5651
|
const asrRequest = {
|
|
5517
5652
|
type: RecognitionContextTypeV1.ASR_REQUEST,
|
|
5518
5653
|
audioUtteranceId: this.config.audioUtteranceId,
|
|
@@ -5528,7 +5663,20 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5528
5663
|
...this.config.asrRequestConfig.finalTranscriptStability && {
|
|
5529
5664
|
finalTranscriptStability: this.config.asrRequestConfig.finalTranscriptStability
|
|
5530
5665
|
},
|
|
5531
|
-
|
|
5666
|
+
// Include fallbackModels if provided (for circuit breaker fallback)
|
|
5667
|
+
...fallbackModels && { fallbackModels },
|
|
5668
|
+
...debugCommand && { debugCommand },
|
|
5669
|
+
// Include prefix mode if provided (for server-side stored prefix injection)
|
|
5670
|
+
...this.config.asrRequestConfig.prefixMode && {
|
|
5671
|
+
prefixMode: this.config.asrRequestConfig.prefixMode
|
|
5672
|
+
},
|
|
5673
|
+
...this.config.asrRequestConfig.prefixId && {
|
|
5674
|
+
prefixId: this.config.asrRequestConfig.prefixId
|
|
5675
|
+
},
|
|
5676
|
+
// Include prefix text to remove if provided (for server-side prefix text removal)
|
|
5677
|
+
...this.config.asrRequestConfig.prefixTextToRemove && {
|
|
5678
|
+
prefixTextToRemove: this.config.asrRequestConfig.prefixTextToRemove
|
|
5679
|
+
}
|
|
5532
5680
|
};
|
|
5533
5681
|
super.sendMessage(
|
|
5534
5682
|
_RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
|
|
@@ -5635,6 +5783,12 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5635
5783
|
this.log("debug", "Server ready for audio upload");
|
|
5636
5784
|
this.state = "ready" /* READY */;
|
|
5637
5785
|
this.messageHandler.setSessionStartTime(Date.now());
|
|
5786
|
+
if (this.prefixBuffer.length > 0) {
|
|
5787
|
+
this.log("debug", "Flushing buffered prefix audio", { chunks: this.prefixBuffer.length });
|
|
5788
|
+
this.prefixBuffer.forEach((chunk) => this.sendPrefixAudioNow(chunk));
|
|
5789
|
+
this.prefixBuffer = [];
|
|
5790
|
+
this.prefixBufferBytes = 0;
|
|
5791
|
+
}
|
|
5638
5792
|
const bufferedChunks = this.audioBuffer.flush();
|
|
5639
5793
|
if (bufferedChunks.length > 0) {
|
|
5640
5794
|
this.log("debug", "Flushing buffered audio", { chunks: bufferedChunks.length });
|
|
@@ -5666,6 +5820,74 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5666
5820
|
this.audioBytesSent += byteLength;
|
|
5667
5821
|
this.audioChunksSent++;
|
|
5668
5822
|
}
|
|
5823
|
+
/**
|
|
5824
|
+
* Send prefix audio to the server.
|
|
5825
|
+
* Prefix audio is sent before user audio and is used for context/priming.
|
|
5826
|
+
* The server will process it but adjust timing so transcripts reflect user audio timing.
|
|
5827
|
+
*
|
|
5828
|
+
* Note: Prefix audio is buffered until READY state, then flushed before user audio.
|
|
5829
|
+
* This ensures proper ordering even if called before server is ready.
|
|
5830
|
+
*
|
|
5831
|
+
* @param audioData - Prefix audio data (ArrayBuffer, ArrayBufferView, or Blob)
|
|
5832
|
+
*/
|
|
5833
|
+
sendPrefixAudio(audioData) {
|
|
5834
|
+
if (audioData instanceof Blob) {
|
|
5835
|
+
blobToArrayBuffer(audioData).then((arrayBuffer) => {
|
|
5836
|
+
this.sendPrefixAudioInternal(arrayBuffer);
|
|
5837
|
+
}).catch((error) => {
|
|
5838
|
+
this.log("error", "Failed to convert Blob to ArrayBuffer for prefix audio", error);
|
|
5839
|
+
});
|
|
5840
|
+
return;
|
|
5841
|
+
}
|
|
5842
|
+
this.sendPrefixAudioInternal(audioData);
|
|
5843
|
+
}
|
|
5844
|
+
/**
|
|
5845
|
+
* Internal method to handle prefix audio with buffering
|
|
5846
|
+
* Buffers if not READY, sends immediately if READY
|
|
5847
|
+
*/
|
|
5848
|
+
sendPrefixAudioInternal(audioData) {
|
|
5849
|
+
const bytes = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
|
|
5850
|
+
if (bytes === 0) return;
|
|
5851
|
+
if (this.state === "stopped" /* STOPPED */ || this.state === "failed" /* FAILED */) {
|
|
5852
|
+
this.log("debug", "Ignoring prefix audio in terminal state", { bytes, state: this.state });
|
|
5853
|
+
return;
|
|
5854
|
+
}
|
|
5855
|
+
if (this.state === "ready" /* READY */) {
|
|
5856
|
+
this.log("debug", "Sending prefix audio immediately", { bytes });
|
|
5857
|
+
this.sendPrefixAudioNow(audioData);
|
|
5858
|
+
} else {
|
|
5859
|
+
if (this.prefixBufferBytes + bytes > _RealTimeTwoWayWebSocketRecognitionClient.MAX_PREFIX_BUFFER_BYTES) {
|
|
5860
|
+
this.log("warn", "Prefix buffer limit exceeded, dropping chunk", {
|
|
5861
|
+
bytes,
|
|
5862
|
+
current: this.prefixBufferBytes,
|
|
5863
|
+
max: _RealTimeTwoWayWebSocketRecognitionClient.MAX_PREFIX_BUFFER_BYTES
|
|
5864
|
+
});
|
|
5865
|
+
return;
|
|
5866
|
+
}
|
|
5867
|
+
this.log("debug", "Buffering prefix audio until READY", { bytes, state: this.state });
|
|
5868
|
+
this.prefixBuffer.push(audioData);
|
|
5869
|
+
this.prefixBufferBytes += bytes;
|
|
5870
|
+
}
|
|
5871
|
+
}
|
|
5872
|
+
/**
|
|
5873
|
+
* Send prefix audio immediately to the server (without buffering)
|
|
5874
|
+
* Uses encoding offset to mark as prefix audio
|
|
5875
|
+
* @param audioData - Prefix audio data to send
|
|
5876
|
+
*/
|
|
5877
|
+
sendPrefixAudioNow(audioData) {
|
|
5878
|
+
const byteLength = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
|
|
5879
|
+
if (byteLength === 0) return;
|
|
5880
|
+
const baseEncodingId = this.config.asrRequestConfig?.encoding || AudioEncoding.LINEAR16;
|
|
5881
|
+
const prefixEncodingId = baseEncodingId + PREFIX_AUDIO_ENCODING_OFFSET;
|
|
5882
|
+
const sampleRate = typeof this.config.asrRequestConfig?.sampleRate === "number" ? this.config.asrRequestConfig.sampleRate : SampleRate.RATE_16000;
|
|
5883
|
+
this.log("debug", "Sending prefix audio", { bytes: byteLength, encoding: prefixEncodingId });
|
|
5884
|
+
super.sendAudio(
|
|
5885
|
+
audioData,
|
|
5886
|
+
_RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
|
|
5887
|
+
prefixEncodingId,
|
|
5888
|
+
sampleRate
|
|
5889
|
+
);
|
|
5890
|
+
}
|
|
5669
5891
|
};
|
|
5670
5892
|
export {
|
|
5671
5893
|
AudioEncoding,
|