@volley/recognition-client-sdk 0.1.423 → 0.1.621
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.bundled.d.ts +237 -7
- package/dist/index.bundled.d.ts +346 -10
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +248 -12
- package/dist/index.js.map +4 -4
- package/dist/recog-client-sdk.browser.js +236 -11
- package/dist/recog-client-sdk.browser.js.map +4 -4
- package/dist/recognition-client.d.ts +32 -1
- package/dist/recognition-client.d.ts.map +1 -1
- package/dist/recognition-client.types.d.ts +20 -0
- package/dist/recognition-client.types.d.ts.map +1 -1
- package/dist/simplified-vgf-recognition-client.d.ts +17 -0
- package/dist/simplified-vgf-recognition-client.d.ts.map +1 -1
- package/package.json +7 -7
- package/src/index.ts +2 -0
- package/src/recognition-client.ts +160 -5
- package/src/recognition-client.types.ts +23 -0
- package/src/simplified-vgf-recognition-client.integration.spec.ts +15 -3
- package/src/simplified-vgf-recognition-client.ts +30 -3
- package/src/utils/audio-ring-buffer.spec.ts +335 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// ../../node_modules/.pnpm/zod@3.22.
|
|
1
|
+
// ../../node_modules/.pnpm/zod@3.22.5/node_modules/zod/lib/index.mjs
|
|
2
2
|
var util;
|
|
3
3
|
(function(util2) {
|
|
4
4
|
util2.assertEqual = (val) => val;
|
|
@@ -3741,6 +3741,10 @@ var RecognitionProvider;
|
|
|
3741
3741
|
RecognitionProvider2["GEMINI_BATCH"] = "gemini-batch";
|
|
3742
3742
|
RecognitionProvider2["OPENAI_BATCH"] = "openai-batch";
|
|
3743
3743
|
RecognitionProvider2["OPENAI_REALTIME"] = "openai-realtime";
|
|
3744
|
+
RecognitionProvider2["MISTRAL_VOXTRAL"] = "mistral-voxtral";
|
|
3745
|
+
RecognitionProvider2["DASHSCOPE"] = "dashscope";
|
|
3746
|
+
RecognitionProvider2["TEST_ASR_PROVIDER_QUOTA"] = "test-asr-provider-quota";
|
|
3747
|
+
RecognitionProvider2["TEST_ASR_STREAMING"] = "test-asr-streaming";
|
|
3744
3748
|
})(RecognitionProvider || (RecognitionProvider = {}));
|
|
3745
3749
|
var RecognitionMode;
|
|
3746
3750
|
(function(RecognitionMode2) {
|
|
@@ -3786,8 +3790,18 @@ var ElevenLabsModel;
|
|
|
3786
3790
|
})(ElevenLabsModel || (ElevenLabsModel = {}));
|
|
3787
3791
|
var OpenAIRealtimeModel;
|
|
3788
3792
|
(function(OpenAIRealtimeModel2) {
|
|
3793
|
+
OpenAIRealtimeModel2["GPT_4O_TRANSCRIBE"] = "gpt-4o-transcribe";
|
|
3789
3794
|
OpenAIRealtimeModel2["GPT_4O_MINI_TRANSCRIBE"] = "gpt-4o-mini-transcribe";
|
|
3790
3795
|
})(OpenAIRealtimeModel || (OpenAIRealtimeModel = {}));
|
|
3796
|
+
var MistralVoxtralModel;
|
|
3797
|
+
(function(MistralVoxtralModel2) {
|
|
3798
|
+
MistralVoxtralModel2["VOXTRAL_MINI_REALTIME_2602"] = "voxtral-mini-transcribe-realtime-2602";
|
|
3799
|
+
})(MistralVoxtralModel || (MistralVoxtralModel = {}));
|
|
3800
|
+
var DashScopeModel;
|
|
3801
|
+
(function(DashScopeModel2) {
|
|
3802
|
+
DashScopeModel2["QWEN3_ASR_FLASH_REALTIME_2602"] = "qwen3-asr-flash-realtime-2026-02-10";
|
|
3803
|
+
DashScopeModel2["QWEN3_ASR_FLASH_REALTIME"] = "qwen3-asr-flash-realtime";
|
|
3804
|
+
})(DashScopeModel || (DashScopeModel = {}));
|
|
3791
3805
|
|
|
3792
3806
|
// ../../libs/types/dist/recognition-result-v1.types.js
|
|
3793
3807
|
var RecognitionResultTypeV1;
|
|
@@ -3803,8 +3817,10 @@ var TranscriptionResultSchemaV1 = z.object({
|
|
|
3803
3817
|
type: z.literal(RecognitionResultTypeV1.TRANSCRIPTION),
|
|
3804
3818
|
audioUtteranceId: z.string(),
|
|
3805
3819
|
finalTranscript: z.string(),
|
|
3820
|
+
finalTranscriptRaw: z.string(),
|
|
3806
3821
|
finalTranscriptConfidence: z.number().min(0).max(1).optional(),
|
|
3807
3822
|
pendingTranscript: z.string().optional(),
|
|
3823
|
+
pendingTranscriptRaw: z.string().optional(),
|
|
3808
3824
|
pendingTranscriptConfidence: z.number().min(0).max(1).optional(),
|
|
3809
3825
|
is_finished: z.boolean(),
|
|
3810
3826
|
voiceStart: z.number().optional(),
|
|
@@ -3813,8 +3829,9 @@ var TranscriptionResultSchemaV1 = z.object({
|
|
|
3813
3829
|
startTimestamp: z.number().optional(),
|
|
3814
3830
|
endTimestamp: z.number().optional(),
|
|
3815
3831
|
receivedAtMs: z.number().optional(),
|
|
3816
|
-
accumulatedAudioTimeMs: z.number().optional()
|
|
3817
|
-
|
|
3832
|
+
accumulatedAudioTimeMs: z.number().optional(),
|
|
3833
|
+
rawAudioTimeMs: z.number().optional()
|
|
3834
|
+
// Total audio duration sent to provider (includes prefix)
|
|
3818
3835
|
});
|
|
3819
3836
|
var FunctionCallResultSchemaV1 = z.object({
|
|
3820
3837
|
type: z.literal(RecognitionResultTypeV1.FUNCTION_CALL),
|
|
@@ -3827,11 +3844,22 @@ var TranscriptOutcomeType;
|
|
|
3827
3844
|
TranscriptOutcomeType2["WITH_CONTENT"] = "with_content";
|
|
3828
3845
|
TranscriptOutcomeType2["EMPTY"] = "empty";
|
|
3829
3846
|
TranscriptOutcomeType2["NEVER_SENT"] = "never_sent";
|
|
3847
|
+
TranscriptOutcomeType2["ERROR_AUTHENTICATION"] = "error_authentication";
|
|
3848
|
+
TranscriptOutcomeType2["ERROR_VALIDATION"] = "error_validation";
|
|
3849
|
+
TranscriptOutcomeType2["ERROR_PROVIDER"] = "error_provider";
|
|
3850
|
+
TranscriptOutcomeType2["ERROR_TIMEOUT"] = "error_timeout";
|
|
3851
|
+
TranscriptOutcomeType2["ERROR_QUOTA"] = "error_quota";
|
|
3852
|
+
TranscriptOutcomeType2["ERROR_INTERNAL_QUOTA"] = "error_internal_quota";
|
|
3853
|
+
TranscriptOutcomeType2["ERROR_CONNECTION"] = "error_connection";
|
|
3854
|
+
TranscriptOutcomeType2["ERROR_NO_AUDIO"] = "error_no_audio";
|
|
3855
|
+
TranscriptOutcomeType2["ERROR_CIRCUIT_BREAKER"] = "error_circuit_breaker";
|
|
3856
|
+
TranscriptOutcomeType2["ERROR_UNKNOWN"] = "error_unknown";
|
|
3830
3857
|
})(TranscriptOutcomeType || (TranscriptOutcomeType = {}));
|
|
3831
3858
|
var MetadataResultSchemaV1 = z.object({
|
|
3832
3859
|
type: z.literal(RecognitionResultTypeV1.METADATA),
|
|
3833
3860
|
audioUtteranceId: z.string(),
|
|
3834
3861
|
// Timing information
|
|
3862
|
+
connectionInitiatedAtMs: z.number().optional(),
|
|
3835
3863
|
recordingStartMs: z.number().optional(),
|
|
3836
3864
|
recordingEndMs: z.number().optional(),
|
|
3837
3865
|
transcriptEndMs: z.number().optional(),
|
|
@@ -3840,6 +3868,7 @@ var MetadataResultSchemaV1 = z.object({
|
|
|
3840
3868
|
duration: z.number().optional(),
|
|
3841
3869
|
volume: z.number().optional(),
|
|
3842
3870
|
accumulatedAudioTimeMs: z.number().optional(),
|
|
3871
|
+
rawAudioTimeMs: z.number().optional(),
|
|
3843
3872
|
// Cost Information
|
|
3844
3873
|
costInUSD: z.number().default(0).optional(),
|
|
3845
3874
|
// ASR API Type
|
|
@@ -3849,7 +3878,22 @@ var MetadataResultSchemaV1 = z.object({
|
|
|
3849
3878
|
// Raw ASR metadata payload as provided by the provider (stringified if needed)
|
|
3850
3879
|
rawAsrMetadata: z.string().optional(),
|
|
3851
3880
|
// Transcript outcome - categorizes the final transcript state
|
|
3852
|
-
transcriptOutcome: z.nativeEnum(TranscriptOutcomeType).optional()
|
|
3881
|
+
transcriptOutcome: z.nativeEnum(TranscriptOutcomeType).optional(),
|
|
3882
|
+
// Audio metrics - embedded audio quality metrics (from AudioMetricsAccumulator)
|
|
3883
|
+
// Omit 'type' field since it's embedded in METADATA, not a separate message
|
|
3884
|
+
audioMetrics: z.object({
|
|
3885
|
+
valid: z.boolean(),
|
|
3886
|
+
audioBeginMs: z.number(),
|
|
3887
|
+
audioEndMs: z.number(),
|
|
3888
|
+
maxVolume: z.number(),
|
|
3889
|
+
minVolume: z.number(),
|
|
3890
|
+
avgVolume: z.number(),
|
|
3891
|
+
silenceRatio: z.number(),
|
|
3892
|
+
clippingRatio: z.number(),
|
|
3893
|
+
snrEstimate: z.number().nullable(),
|
|
3894
|
+
lastNonSilenceMs: z.number(),
|
|
3895
|
+
timestamp: z.string()
|
|
3896
|
+
}).optional()
|
|
3853
3897
|
});
|
|
3854
3898
|
var ErrorTypeV1;
|
|
3855
3899
|
(function(ErrorTypeV12) {
|
|
@@ -3858,7 +3902,10 @@ var ErrorTypeV1;
|
|
|
3858
3902
|
ErrorTypeV12["PROVIDER_ERROR"] = "provider_error";
|
|
3859
3903
|
ErrorTypeV12["TIMEOUT_ERROR"] = "timeout_error";
|
|
3860
3904
|
ErrorTypeV12["QUOTA_EXCEEDED"] = "quota_exceeded";
|
|
3905
|
+
ErrorTypeV12["INTERNAL_QUOTA_EXHAUSTED"] = "internal_quota_exhausted";
|
|
3861
3906
|
ErrorTypeV12["CONNECTION_ERROR"] = "connection_error";
|
|
3907
|
+
ErrorTypeV12["NO_AUDIO_ERROR"] = "no_audio_error";
|
|
3908
|
+
ErrorTypeV12["CIRCUIT_BREAKER_OPEN"] = "circuit_breaker_open";
|
|
3862
3909
|
ErrorTypeV12["UNKNOWN_ERROR"] = "unknown_error";
|
|
3863
3910
|
})(ErrorTypeV1 || (ErrorTypeV1 = {}));
|
|
3864
3911
|
var ErrorResultSchemaV1 = z.object({
|
|
@@ -4075,6 +4122,12 @@ var TimerSchema = z.object({
|
|
|
4075
4122
|
* Provider that generated this message
|
|
4076
4123
|
*/
|
|
4077
4124
|
provider: z.nativeEnum(RecognitionProvider).optional(),
|
|
4125
|
+
/**
|
|
4126
|
+
* Timestamp when provider connection was initiated (in milliseconds)
|
|
4127
|
+
* Set before doConnect() - captures the moment before WebSocket creation starts
|
|
4128
|
+
* @example 1704095999800
|
|
4129
|
+
*/
|
|
4130
|
+
connectionInitiatedAtMs: z.number().optional(),
|
|
4078
4131
|
/**
|
|
4079
4132
|
* Timestamp when recording started (in milliseconds)
|
|
4080
4133
|
* @example 1704096000000
|
|
@@ -4208,6 +4261,14 @@ var ConnectionExceptionSchema = BaseRecognitionExceptionSchema.extend({
|
|
|
4208
4261
|
/** Underlying error message */
|
|
4209
4262
|
underlyingError: z.string().optional()
|
|
4210
4263
|
});
|
|
4264
|
+
var CircuitBreakerExceptionSchema = BaseRecognitionExceptionSchema.extend({
|
|
4265
|
+
errorType: z.literal(ErrorTypeV1.CIRCUIT_BREAKER_OPEN),
|
|
4266
|
+
isImmediatelyAvailable: z.literal(true),
|
|
4267
|
+
/** Provider that is unavailable */
|
|
4268
|
+
provider: z.nativeEnum(RecognitionProvider).optional(),
|
|
4269
|
+
/** Model that is unavailable */
|
|
4270
|
+
model: z.string().optional()
|
|
4271
|
+
});
|
|
4211
4272
|
var UnknownExceptionSchema = BaseRecognitionExceptionSchema.extend({
|
|
4212
4273
|
errorType: z.literal(ErrorTypeV1.UNKNOWN_ERROR),
|
|
4213
4274
|
isImmediatelyAvailable: z.literal(false),
|
|
@@ -4223,6 +4284,7 @@ var RecognitionExceptionSchema = z.discriminatedUnion("errorType", [
|
|
|
4223
4284
|
TimeoutExceptionSchema,
|
|
4224
4285
|
QuotaExceededExceptionSchema,
|
|
4225
4286
|
ConnectionExceptionSchema,
|
|
4287
|
+
CircuitBreakerExceptionSchema,
|
|
4226
4288
|
UnknownExceptionSchema
|
|
4227
4289
|
]);
|
|
4228
4290
|
|
|
@@ -4238,6 +4300,12 @@ var ControlSignalTypeV1;
|
|
|
4238
4300
|
ControlSignalTypeV12["START_RECORDING"] = "start_recording";
|
|
4239
4301
|
ControlSignalTypeV12["STOP_RECORDING"] = "stop_recording";
|
|
4240
4302
|
})(ControlSignalTypeV1 || (ControlSignalTypeV1 = {}));
|
|
4303
|
+
var PrefixMode;
|
|
4304
|
+
(function(PrefixMode2) {
|
|
4305
|
+
PrefixMode2["NONE"] = "none";
|
|
4306
|
+
PrefixMode2["CLIENT"] = "client";
|
|
4307
|
+
PrefixMode2["STORED"] = "stored";
|
|
4308
|
+
})(PrefixMode || (PrefixMode = {}));
|
|
4241
4309
|
var SlotMapSchema = z.record(z.string(), z.array(z.string()));
|
|
4242
4310
|
var GameContextSchemaV1 = z.object({
|
|
4243
4311
|
type: z.literal(RecognitionContextTypeV1.GAME_CONTEXT),
|
|
@@ -4269,6 +4337,19 @@ var RequestDebugCommandSchema = z.object({
|
|
|
4269
4337
|
// Enable experimental pilot models for testing new features
|
|
4270
4338
|
enablePilotModels: z.boolean().optional().default(false)
|
|
4271
4339
|
}).optional();
|
|
4340
|
+
var FallbackASRConfigSchema = z.object({
|
|
4341
|
+
// Required - the fallback provider to use
|
|
4342
|
+
provider: z.string(),
|
|
4343
|
+
// Optional - inherits from primary if not specified
|
|
4344
|
+
model: z.string().optional(),
|
|
4345
|
+
language: z.string().optional(),
|
|
4346
|
+
sampleRate: z.number().optional(),
|
|
4347
|
+
encoding: z.number().optional(),
|
|
4348
|
+
// Recognition options - optional, inherits from primary
|
|
4349
|
+
interimResults: z.boolean().optional(),
|
|
4350
|
+
useContext: z.boolean().optional(),
|
|
4351
|
+
finalTranscriptStability: z.string().optional()
|
|
4352
|
+
});
|
|
4272
4353
|
var ASRRequestSchemaV1 = z.object({
|
|
4273
4354
|
type: z.literal(RecognitionContextTypeV1.ASR_REQUEST),
|
|
4274
4355
|
// Session identification
|
|
@@ -4284,6 +4365,16 @@ var ASRRequestSchemaV1 = z.object({
|
|
|
4284
4365
|
useContext: z.boolean().optional().default(false),
|
|
4285
4366
|
// Final transcript stability mode (timeout for fallback final transcript)
|
|
4286
4367
|
finalTranscriptStability: z.string().optional(),
|
|
4368
|
+
// Traffic control priority (affects quota slot allocation)
|
|
4369
|
+
// 'high' = can use all quota slots (reserved for critical games like song-quiz)
|
|
4370
|
+
// 'low' = limited to non-reserved slots (default for most requests)
|
|
4371
|
+
priority: z.enum(["low", "high"]).optional().default("low"),
|
|
4372
|
+
// Fallback providers - tried in order if primary provider is unavailable (circuit breaker open)
|
|
4373
|
+
fallbackModels: z.array(FallbackASRConfigSchema).optional(),
|
|
4374
|
+
// Prefix audio configuration
|
|
4375
|
+
prefixMode: z.nativeEnum(PrefixMode).optional().default(PrefixMode.NONE),
|
|
4376
|
+
prefixId: z.string().optional(),
|
|
4377
|
+
prefixTextToRemove: z.array(z.string()).optional(),
|
|
4287
4378
|
// Debug options (FOR DEBUG/TESTING ONLY - not for production use)
|
|
4288
4379
|
debugCommand: RequestDebugCommandSchema
|
|
4289
4380
|
});
|
|
@@ -4301,6 +4392,8 @@ var RecognitionGameInfoSchema = z.object({
|
|
|
4301
4392
|
accountId: z.string().optional(),
|
|
4302
4393
|
gameId: z.string().optional(),
|
|
4303
4394
|
gamePhase: z.string().optional(),
|
|
4395
|
+
questionAskedId: z.string().optional(),
|
|
4396
|
+
/** @deprecated Use questionAskedId instead. Kept for backward compatibility during migration. */
|
|
4304
4397
|
questionAnswerId: z.string().optional(),
|
|
4305
4398
|
platform: z.string().optional()
|
|
4306
4399
|
// Platform for audio recording device (use the definition of platform teams)
|
|
@@ -4435,6 +4528,7 @@ var AudioEncoding;
|
|
|
4435
4528
|
}
|
|
4436
4529
|
AudioEncoding2.isNameValid = isNameValid;
|
|
4437
4530
|
})(AudioEncoding || (AudioEncoding = {}));
|
|
4531
|
+
var PREFIX_AUDIO_ENCODING_OFFSET = 128;
|
|
4438
4532
|
var SampleRate;
|
|
4439
4533
|
(function(SampleRate2) {
|
|
4440
4534
|
SampleRate2[SampleRate2["RATE_8000"] = 8e3] = "RATE_8000";
|
|
@@ -4527,6 +4621,7 @@ var FinalTranscriptStability;
|
|
|
4527
4621
|
var PlumbingType;
|
|
4528
4622
|
(function(PlumbingType2) {
|
|
4529
4623
|
PlumbingType2["AUDIO"] = "audio";
|
|
4624
|
+
PlumbingType2["PREFIX_AUDIO"] = "prefix_audio";
|
|
4530
4625
|
PlumbingType2["CONTROL"] = "control";
|
|
4531
4626
|
PlumbingType2["RESULT"] = "result";
|
|
4532
4627
|
PlumbingType2["RECOGNITION_CONTEXT"] = "recognition_context";
|
|
@@ -4605,6 +4700,11 @@ var StatsIncrementType;
|
|
|
4605
4700
|
StatsIncrementType2["SUCCESS"] = "success";
|
|
4606
4701
|
StatsIncrementType2["FAIL"] = "fail";
|
|
4607
4702
|
})(StatsIncrementType || (StatsIncrementType = {}));
|
|
4703
|
+
var QuotaPriority;
|
|
4704
|
+
(function(QuotaPriority2) {
|
|
4705
|
+
QuotaPriority2[QuotaPriority2["LOW"] = 0] = "LOW";
|
|
4706
|
+
QuotaPriority2[QuotaPriority2["HIGH"] = 1] = "HIGH";
|
|
4707
|
+
})(QuotaPriority || (QuotaPriority = {}));
|
|
4608
4708
|
|
|
4609
4709
|
// ../../libs/types/dist/stages.types.js
|
|
4610
4710
|
var STAGES = {
|
|
@@ -4781,7 +4881,7 @@ var WebSocketAudioClient = class {
|
|
|
4781
4881
|
// ../../libs/websocket/dist/core/audio-upload-websocket-server.js
|
|
4782
4882
|
import { WebSocketServer, WebSocket as WebSocket2 } from "ws";
|
|
4783
4883
|
|
|
4784
|
-
// ../../node_modules/.pnpm/uuid@11.
|
|
4884
|
+
// ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/stringify.js
|
|
4785
4885
|
var byteToHex = [];
|
|
4786
4886
|
for (let i = 0; i < 256; ++i) {
|
|
4787
4887
|
byteToHex.push((i + 256).toString(16).slice(1));
|
|
@@ -4790,7 +4890,7 @@ function unsafeStringify(arr, offset = 0) {
|
|
|
4790
4890
|
return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
|
|
4791
4891
|
}
|
|
4792
4892
|
|
|
4793
|
-
// ../../node_modules/.pnpm/uuid@11.
|
|
4893
|
+
// ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/rng.js
|
|
4794
4894
|
var getRandomValues;
|
|
4795
4895
|
var rnds8 = new Uint8Array(16);
|
|
4796
4896
|
function rng() {
|
|
@@ -4803,21 +4903,27 @@ function rng() {
|
|
|
4803
4903
|
return getRandomValues(rnds8);
|
|
4804
4904
|
}
|
|
4805
4905
|
|
|
4806
|
-
// ../../node_modules/.pnpm/uuid@11.
|
|
4906
|
+
// ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/native.js
|
|
4807
4907
|
var randomUUID = typeof crypto !== "undefined" && crypto.randomUUID && crypto.randomUUID.bind(crypto);
|
|
4808
4908
|
var native_default = { randomUUID };
|
|
4809
4909
|
|
|
4810
|
-
// ../../node_modules/.pnpm/uuid@11.
|
|
4910
|
+
// ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/v4.js
|
|
4811
4911
|
function v4(options, buf, offset) {
|
|
4812
4912
|
if (native_default.randomUUID && !buf && !options) {
|
|
4813
4913
|
return native_default.randomUUID();
|
|
4814
4914
|
}
|
|
4815
4915
|
options = options || {};
|
|
4816
|
-
const rnds = options.random
|
|
4916
|
+
const rnds = options.random ?? options.rng?.() ?? rng();
|
|
4917
|
+
if (rnds.length < 16) {
|
|
4918
|
+
throw new Error("Random bytes length must be >= 16");
|
|
4919
|
+
}
|
|
4817
4920
|
rnds[6] = rnds[6] & 15 | 64;
|
|
4818
4921
|
rnds[8] = rnds[8] & 63 | 128;
|
|
4819
4922
|
if (buf) {
|
|
4820
4923
|
offset = offset || 0;
|
|
4924
|
+
if (offset < 0 || offset + 16 > buf.length) {
|
|
4925
|
+
throw new RangeError(`UUID byte range ${offset}:${offset + 15} is out of buffer bounds`);
|
|
4926
|
+
}
|
|
4821
4927
|
for (let i = 0; i < 16; ++i) {
|
|
4822
4928
|
buf[offset + i] = rnds[i];
|
|
4823
4929
|
}
|
|
@@ -5191,6 +5297,9 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5191
5297
|
highWM: config.highWaterMark ?? 512e3,
|
|
5192
5298
|
lowWM: config.lowWaterMark ?? 128e3
|
|
5193
5299
|
});
|
|
5300
|
+
this.prefixBuffer = [];
|
|
5301
|
+
// Buffer prefix audio until READY
|
|
5302
|
+
this.prefixBufferBytes = 0;
|
|
5194
5303
|
this.state = "initial" /* INITIAL */;
|
|
5195
5304
|
// Debug control (internal state, controlled by debugCommand in ASRRequest)
|
|
5196
5305
|
this.isDebugLogEnabled = false;
|
|
@@ -5247,6 +5356,9 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5247
5356
|
static {
|
|
5248
5357
|
this.PROTOCOL_VERSION = 1;
|
|
5249
5358
|
}
|
|
5359
|
+
static {
|
|
5360
|
+
this.MAX_PREFIX_BUFFER_BYTES = 10 * 1024 * 1024;
|
|
5361
|
+
}
|
|
5250
5362
|
// ==========================================================================
|
|
5251
5363
|
// PRIVATE HELPERS
|
|
5252
5364
|
// ==========================================================================
|
|
@@ -5272,6 +5384,8 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5272
5384
|
cleanup() {
|
|
5273
5385
|
this.log("debug", "Cleaning up resources");
|
|
5274
5386
|
this.audioBuffer.clear();
|
|
5387
|
+
this.prefixBuffer = [];
|
|
5388
|
+
this.prefixBufferBytes = 0;
|
|
5275
5389
|
this.audioBytesSent = 0;
|
|
5276
5390
|
this.audioChunksSent = 0;
|
|
5277
5391
|
this.lastAudioStatsLog = 0;
|
|
@@ -5422,9 +5536,13 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5422
5536
|
}
|
|
5423
5537
|
}
|
|
5424
5538
|
}
|
|
5539
|
+
/**
|
|
5540
|
+
* Only active ehwne client is in READY state. otherwise it will return immediately.
|
|
5541
|
+
* @returns Promise that resolves when the recording is stopped
|
|
5542
|
+
*/
|
|
5425
5543
|
async stopRecording() {
|
|
5426
5544
|
if (this.state !== "ready" /* READY */) {
|
|
5427
|
-
this.log("
|
|
5545
|
+
this.log("warn", "stopRecording called but not in READY state", { state: this.state });
|
|
5428
5546
|
return;
|
|
5429
5547
|
}
|
|
5430
5548
|
this.log("debug", "Stopping recording");
|
|
@@ -5484,6 +5602,25 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5484
5602
|
isBufferOverflowing() {
|
|
5485
5603
|
return this.audioBuffer.isOverflowing();
|
|
5486
5604
|
}
|
|
5605
|
+
isServerReady() {
|
|
5606
|
+
return this.state === "ready" /* READY */;
|
|
5607
|
+
}
|
|
5608
|
+
sendGameContext(context) {
|
|
5609
|
+
if (this.state !== "connected" /* CONNECTED */ && this.state !== "ready" /* READY */) {
|
|
5610
|
+
this.log("warn", "sendGameContext called in wrong state", { state: this.state });
|
|
5611
|
+
return;
|
|
5612
|
+
}
|
|
5613
|
+
this.log("debug", "Sending game context (deferred)", {
|
|
5614
|
+
gameId: context.gameId,
|
|
5615
|
+
gamePhase: context.gamePhase,
|
|
5616
|
+
hasSlotMap: !!context.slotMap
|
|
5617
|
+
});
|
|
5618
|
+
super.sendMessage(
|
|
5619
|
+
_RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
|
|
5620
|
+
"message",
|
|
5621
|
+
context
|
|
5622
|
+
);
|
|
5623
|
+
}
|
|
5487
5624
|
getStats() {
|
|
5488
5625
|
const bufferStats = this.audioBuffer.getStats();
|
|
5489
5626
|
return {
|
|
@@ -5509,6 +5646,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5509
5646
|
if (this.isDebugLogEnabled) {
|
|
5510
5647
|
this.log("debug", "Sending ASR request", this.config.asrRequestConfig);
|
|
5511
5648
|
}
|
|
5649
|
+
const fallbackModels = this.config.asrRequestConfig.fallbackModels;
|
|
5512
5650
|
const asrRequest = {
|
|
5513
5651
|
type: RecognitionContextTypeV1.ASR_REQUEST,
|
|
5514
5652
|
audioUtteranceId: this.config.audioUtteranceId,
|
|
@@ -5524,7 +5662,20 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5524
5662
|
...this.config.asrRequestConfig.finalTranscriptStability && {
|
|
5525
5663
|
finalTranscriptStability: this.config.asrRequestConfig.finalTranscriptStability
|
|
5526
5664
|
},
|
|
5527
|
-
|
|
5665
|
+
// Include fallbackModels if provided (for circuit breaker fallback)
|
|
5666
|
+
...fallbackModels && { fallbackModels },
|
|
5667
|
+
...debugCommand && { debugCommand },
|
|
5668
|
+
// Include prefix mode if provided (for server-side stored prefix injection)
|
|
5669
|
+
...this.config.asrRequestConfig.prefixMode && {
|
|
5670
|
+
prefixMode: this.config.asrRequestConfig.prefixMode
|
|
5671
|
+
},
|
|
5672
|
+
...this.config.asrRequestConfig.prefixId && {
|
|
5673
|
+
prefixId: this.config.asrRequestConfig.prefixId
|
|
5674
|
+
},
|
|
5675
|
+
// Include prefix text to remove if provided (for server-side prefix text removal)
|
|
5676
|
+
...this.config.asrRequestConfig.prefixTextToRemove && {
|
|
5677
|
+
prefixTextToRemove: this.config.asrRequestConfig.prefixTextToRemove
|
|
5678
|
+
}
|
|
5528
5679
|
};
|
|
5529
5680
|
super.sendMessage(
|
|
5530
5681
|
_RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
|
|
@@ -5631,6 +5782,12 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5631
5782
|
this.log("debug", "Server ready for audio upload");
|
|
5632
5783
|
this.state = "ready" /* READY */;
|
|
5633
5784
|
this.messageHandler.setSessionStartTime(Date.now());
|
|
5785
|
+
if (this.prefixBuffer.length > 0) {
|
|
5786
|
+
this.log("debug", "Flushing buffered prefix audio", { chunks: this.prefixBuffer.length });
|
|
5787
|
+
this.prefixBuffer.forEach((chunk) => this.sendPrefixAudioNow(chunk));
|
|
5788
|
+
this.prefixBuffer = [];
|
|
5789
|
+
this.prefixBufferBytes = 0;
|
|
5790
|
+
}
|
|
5634
5791
|
const bufferedChunks = this.audioBuffer.flush();
|
|
5635
5792
|
if (bufferedChunks.length > 0) {
|
|
5636
5793
|
this.log("debug", "Flushing buffered audio", { chunks: bufferedChunks.length });
|
|
@@ -5662,6 +5819,74 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5662
5819
|
this.audioBytesSent += byteLength;
|
|
5663
5820
|
this.audioChunksSent++;
|
|
5664
5821
|
}
|
|
5822
|
+
/**
|
|
5823
|
+
* Send prefix audio to the server.
|
|
5824
|
+
* Prefix audio is sent before user audio and is used for context/priming.
|
|
5825
|
+
* The server will process it but adjust timing so transcripts reflect user audio timing.
|
|
5826
|
+
*
|
|
5827
|
+
* Note: Prefix audio is buffered until READY state, then flushed before user audio.
|
|
5828
|
+
* This ensures proper ordering even if called before server is ready.
|
|
5829
|
+
*
|
|
5830
|
+
* @param audioData - Prefix audio data (ArrayBuffer, ArrayBufferView, or Blob)
|
|
5831
|
+
*/
|
|
5832
|
+
sendPrefixAudio(audioData) {
|
|
5833
|
+
if (audioData instanceof Blob) {
|
|
5834
|
+
blobToArrayBuffer(audioData).then((arrayBuffer) => {
|
|
5835
|
+
this.sendPrefixAudioInternal(arrayBuffer);
|
|
5836
|
+
}).catch((error) => {
|
|
5837
|
+
this.log("error", "Failed to convert Blob to ArrayBuffer for prefix audio", error);
|
|
5838
|
+
});
|
|
5839
|
+
return;
|
|
5840
|
+
}
|
|
5841
|
+
this.sendPrefixAudioInternal(audioData);
|
|
5842
|
+
}
|
|
5843
|
+
/**
|
|
5844
|
+
* Internal method to handle prefix audio with buffering
|
|
5845
|
+
* Buffers if not READY, sends immediately if READY
|
|
5846
|
+
*/
|
|
5847
|
+
sendPrefixAudioInternal(audioData) {
|
|
5848
|
+
const bytes = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
|
|
5849
|
+
if (bytes === 0) return;
|
|
5850
|
+
if (this.state === "stopped" /* STOPPED */ || this.state === "failed" /* FAILED */) {
|
|
5851
|
+
this.log("debug", "Ignoring prefix audio in terminal state", { bytes, state: this.state });
|
|
5852
|
+
return;
|
|
5853
|
+
}
|
|
5854
|
+
if (this.state === "ready" /* READY */) {
|
|
5855
|
+
this.log("debug", "Sending prefix audio immediately", { bytes });
|
|
5856
|
+
this.sendPrefixAudioNow(audioData);
|
|
5857
|
+
} else {
|
|
5858
|
+
if (this.prefixBufferBytes + bytes > _RealTimeTwoWayWebSocketRecognitionClient.MAX_PREFIX_BUFFER_BYTES) {
|
|
5859
|
+
this.log("warn", "Prefix buffer limit exceeded, dropping chunk", {
|
|
5860
|
+
bytes,
|
|
5861
|
+
current: this.prefixBufferBytes,
|
|
5862
|
+
max: _RealTimeTwoWayWebSocketRecognitionClient.MAX_PREFIX_BUFFER_BYTES
|
|
5863
|
+
});
|
|
5864
|
+
return;
|
|
5865
|
+
}
|
|
5866
|
+
this.log("debug", "Buffering prefix audio until READY", { bytes, state: this.state });
|
|
5867
|
+
this.prefixBuffer.push(audioData);
|
|
5868
|
+
this.prefixBufferBytes += bytes;
|
|
5869
|
+
}
|
|
5870
|
+
}
|
|
5871
|
+
/**
|
|
5872
|
+
* Send prefix audio immediately to the server (without buffering)
|
|
5873
|
+
* Uses encoding offset to mark as prefix audio
|
|
5874
|
+
* @param audioData - Prefix audio data to send
|
|
5875
|
+
*/
|
|
5876
|
+
sendPrefixAudioNow(audioData) {
|
|
5877
|
+
const byteLength = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
|
|
5878
|
+
if (byteLength === 0) return;
|
|
5879
|
+
const baseEncodingId = this.config.asrRequestConfig?.encoding || AudioEncoding.LINEAR16;
|
|
5880
|
+
const prefixEncodingId = baseEncodingId + PREFIX_AUDIO_ENCODING_OFFSET;
|
|
5881
|
+
const sampleRate = typeof this.config.asrRequestConfig?.sampleRate === "number" ? this.config.asrRequestConfig.sampleRate : SampleRate.RATE_16000;
|
|
5882
|
+
this.log("debug", "Sending prefix audio", { bytes: byteLength, encoding: prefixEncodingId });
|
|
5883
|
+
super.sendAudio(
|
|
5884
|
+
audioData,
|
|
5885
|
+
_RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
|
|
5886
|
+
prefixEncodingId,
|
|
5887
|
+
sampleRate
|
|
5888
|
+
);
|
|
5889
|
+
}
|
|
5665
5890
|
};
|
|
5666
5891
|
export {
|
|
5667
5892
|
AudioEncoding,
|