@volley/recognition-client-sdk 0.1.424 → 0.1.621
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.bundled.d.ts +233 -7
- package/dist/index.bundled.d.ts +342 -10
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +242 -10
- package/dist/index.js.map +4 -4
- package/dist/recog-client-sdk.browser.js +231 -10
- package/dist/recog-client-sdk.browser.js.map +4 -4
- package/dist/recognition-client.d.ts +28 -1
- package/dist/recognition-client.d.ts.map +1 -1
- package/dist/recognition-client.types.d.ts +20 -0
- package/dist/recognition-client.types.d.ts.map +1 -1
- package/dist/simplified-vgf-recognition-client.d.ts +17 -0
- package/dist/simplified-vgf-recognition-client.d.ts.map +1 -1
- package/package.json +7 -7
- package/src/index.ts +2 -0
- package/src/recognition-client.ts +154 -4
- package/src/recognition-client.types.ts +23 -0
- package/src/simplified-vgf-recognition-client.integration.spec.ts +15 -3
- package/src/simplified-vgf-recognition-client.ts +28 -1
- package/src/utils/audio-ring-buffer.spec.ts +335 -0
package/dist/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// ../../node_modules/.pnpm/zod@3.22.
|
|
1
|
+
// ../../node_modules/.pnpm/zod@3.22.5/node_modules/zod/lib/index.mjs
|
|
2
2
|
var util;
|
|
3
3
|
(function(util2) {
|
|
4
4
|
util2.assertEqual = (val) => val;
|
|
@@ -3741,6 +3741,10 @@ var RecognitionProvider;
|
|
|
3741
3741
|
RecognitionProvider2["GEMINI_BATCH"] = "gemini-batch";
|
|
3742
3742
|
RecognitionProvider2["OPENAI_BATCH"] = "openai-batch";
|
|
3743
3743
|
RecognitionProvider2["OPENAI_REALTIME"] = "openai-realtime";
|
|
3744
|
+
RecognitionProvider2["MISTRAL_VOXTRAL"] = "mistral-voxtral";
|
|
3745
|
+
RecognitionProvider2["DASHSCOPE"] = "dashscope";
|
|
3746
|
+
RecognitionProvider2["TEST_ASR_PROVIDER_QUOTA"] = "test-asr-provider-quota";
|
|
3747
|
+
RecognitionProvider2["TEST_ASR_STREAMING"] = "test-asr-streaming";
|
|
3744
3748
|
})(RecognitionProvider || (RecognitionProvider = {}));
|
|
3745
3749
|
var RecognitionMode;
|
|
3746
3750
|
(function(RecognitionMode2) {
|
|
@@ -3786,8 +3790,18 @@ var ElevenLabsModel;
|
|
|
3786
3790
|
})(ElevenLabsModel || (ElevenLabsModel = {}));
|
|
3787
3791
|
var OpenAIRealtimeModel;
|
|
3788
3792
|
(function(OpenAIRealtimeModel2) {
|
|
3793
|
+
OpenAIRealtimeModel2["GPT_4O_TRANSCRIBE"] = "gpt-4o-transcribe";
|
|
3789
3794
|
OpenAIRealtimeModel2["GPT_4O_MINI_TRANSCRIBE"] = "gpt-4o-mini-transcribe";
|
|
3790
3795
|
})(OpenAIRealtimeModel || (OpenAIRealtimeModel = {}));
|
|
3796
|
+
var MistralVoxtralModel;
|
|
3797
|
+
(function(MistralVoxtralModel2) {
|
|
3798
|
+
MistralVoxtralModel2["VOXTRAL_MINI_REALTIME_2602"] = "voxtral-mini-transcribe-realtime-2602";
|
|
3799
|
+
})(MistralVoxtralModel || (MistralVoxtralModel = {}));
|
|
3800
|
+
var DashScopeModel;
|
|
3801
|
+
(function(DashScopeModel2) {
|
|
3802
|
+
DashScopeModel2["QWEN3_ASR_FLASH_REALTIME_2602"] = "qwen3-asr-flash-realtime-2026-02-10";
|
|
3803
|
+
DashScopeModel2["QWEN3_ASR_FLASH_REALTIME"] = "qwen3-asr-flash-realtime";
|
|
3804
|
+
})(DashScopeModel || (DashScopeModel = {}));
|
|
3791
3805
|
|
|
3792
3806
|
// ../../libs/types/dist/recognition-result-v1.types.js
|
|
3793
3807
|
var RecognitionResultTypeV1;
|
|
@@ -3803,8 +3817,10 @@ var TranscriptionResultSchemaV1 = z.object({
|
|
|
3803
3817
|
type: z.literal(RecognitionResultTypeV1.TRANSCRIPTION),
|
|
3804
3818
|
audioUtteranceId: z.string(),
|
|
3805
3819
|
finalTranscript: z.string(),
|
|
3820
|
+
finalTranscriptRaw: z.string(),
|
|
3806
3821
|
finalTranscriptConfidence: z.number().min(0).max(1).optional(),
|
|
3807
3822
|
pendingTranscript: z.string().optional(),
|
|
3823
|
+
pendingTranscriptRaw: z.string().optional(),
|
|
3808
3824
|
pendingTranscriptConfidence: z.number().min(0).max(1).optional(),
|
|
3809
3825
|
is_finished: z.boolean(),
|
|
3810
3826
|
voiceStart: z.number().optional(),
|
|
@@ -3813,8 +3829,9 @@ var TranscriptionResultSchemaV1 = z.object({
|
|
|
3813
3829
|
startTimestamp: z.number().optional(),
|
|
3814
3830
|
endTimestamp: z.number().optional(),
|
|
3815
3831
|
receivedAtMs: z.number().optional(),
|
|
3816
|
-
accumulatedAudioTimeMs: z.number().optional()
|
|
3817
|
-
|
|
3832
|
+
accumulatedAudioTimeMs: z.number().optional(),
|
|
3833
|
+
rawAudioTimeMs: z.number().optional()
|
|
3834
|
+
// Total audio duration sent to provider (includes prefix)
|
|
3818
3835
|
});
|
|
3819
3836
|
var FunctionCallResultSchemaV1 = z.object({
|
|
3820
3837
|
type: z.literal(RecognitionResultTypeV1.FUNCTION_CALL),
|
|
@@ -3827,11 +3844,22 @@ var TranscriptOutcomeType;
|
|
|
3827
3844
|
TranscriptOutcomeType2["WITH_CONTENT"] = "with_content";
|
|
3828
3845
|
TranscriptOutcomeType2["EMPTY"] = "empty";
|
|
3829
3846
|
TranscriptOutcomeType2["NEVER_SENT"] = "never_sent";
|
|
3847
|
+
TranscriptOutcomeType2["ERROR_AUTHENTICATION"] = "error_authentication";
|
|
3848
|
+
TranscriptOutcomeType2["ERROR_VALIDATION"] = "error_validation";
|
|
3849
|
+
TranscriptOutcomeType2["ERROR_PROVIDER"] = "error_provider";
|
|
3850
|
+
TranscriptOutcomeType2["ERROR_TIMEOUT"] = "error_timeout";
|
|
3851
|
+
TranscriptOutcomeType2["ERROR_QUOTA"] = "error_quota";
|
|
3852
|
+
TranscriptOutcomeType2["ERROR_INTERNAL_QUOTA"] = "error_internal_quota";
|
|
3853
|
+
TranscriptOutcomeType2["ERROR_CONNECTION"] = "error_connection";
|
|
3854
|
+
TranscriptOutcomeType2["ERROR_NO_AUDIO"] = "error_no_audio";
|
|
3855
|
+
TranscriptOutcomeType2["ERROR_CIRCUIT_BREAKER"] = "error_circuit_breaker";
|
|
3856
|
+
TranscriptOutcomeType2["ERROR_UNKNOWN"] = "error_unknown";
|
|
3830
3857
|
})(TranscriptOutcomeType || (TranscriptOutcomeType = {}));
|
|
3831
3858
|
var MetadataResultSchemaV1 = z.object({
|
|
3832
3859
|
type: z.literal(RecognitionResultTypeV1.METADATA),
|
|
3833
3860
|
audioUtteranceId: z.string(),
|
|
3834
3861
|
// Timing information
|
|
3862
|
+
connectionInitiatedAtMs: z.number().optional(),
|
|
3835
3863
|
recordingStartMs: z.number().optional(),
|
|
3836
3864
|
recordingEndMs: z.number().optional(),
|
|
3837
3865
|
transcriptEndMs: z.number().optional(),
|
|
@@ -3840,6 +3868,7 @@ var MetadataResultSchemaV1 = z.object({
|
|
|
3840
3868
|
duration: z.number().optional(),
|
|
3841
3869
|
volume: z.number().optional(),
|
|
3842
3870
|
accumulatedAudioTimeMs: z.number().optional(),
|
|
3871
|
+
rawAudioTimeMs: z.number().optional(),
|
|
3843
3872
|
// Cost Information
|
|
3844
3873
|
costInUSD: z.number().default(0).optional(),
|
|
3845
3874
|
// ASR API Type
|
|
@@ -3849,7 +3878,22 @@ var MetadataResultSchemaV1 = z.object({
|
|
|
3849
3878
|
// Raw ASR metadata payload as provided by the provider (stringified if needed)
|
|
3850
3879
|
rawAsrMetadata: z.string().optional(),
|
|
3851
3880
|
// Transcript outcome - categorizes the final transcript state
|
|
3852
|
-
transcriptOutcome: z.nativeEnum(TranscriptOutcomeType).optional()
|
|
3881
|
+
transcriptOutcome: z.nativeEnum(TranscriptOutcomeType).optional(),
|
|
3882
|
+
// Audio metrics - embedded audio quality metrics (from AudioMetricsAccumulator)
|
|
3883
|
+
// Omit 'type' field since it's embedded in METADATA, not a separate message
|
|
3884
|
+
audioMetrics: z.object({
|
|
3885
|
+
valid: z.boolean(),
|
|
3886
|
+
audioBeginMs: z.number(),
|
|
3887
|
+
audioEndMs: z.number(),
|
|
3888
|
+
maxVolume: z.number(),
|
|
3889
|
+
minVolume: z.number(),
|
|
3890
|
+
avgVolume: z.number(),
|
|
3891
|
+
silenceRatio: z.number(),
|
|
3892
|
+
clippingRatio: z.number(),
|
|
3893
|
+
snrEstimate: z.number().nullable(),
|
|
3894
|
+
lastNonSilenceMs: z.number(),
|
|
3895
|
+
timestamp: z.string()
|
|
3896
|
+
}).optional()
|
|
3853
3897
|
});
|
|
3854
3898
|
var ErrorTypeV1;
|
|
3855
3899
|
(function(ErrorTypeV12) {
|
|
@@ -3858,7 +3902,10 @@ var ErrorTypeV1;
|
|
|
3858
3902
|
ErrorTypeV12["PROVIDER_ERROR"] = "provider_error";
|
|
3859
3903
|
ErrorTypeV12["TIMEOUT_ERROR"] = "timeout_error";
|
|
3860
3904
|
ErrorTypeV12["QUOTA_EXCEEDED"] = "quota_exceeded";
|
|
3905
|
+
ErrorTypeV12["INTERNAL_QUOTA_EXHAUSTED"] = "internal_quota_exhausted";
|
|
3861
3906
|
ErrorTypeV12["CONNECTION_ERROR"] = "connection_error";
|
|
3907
|
+
ErrorTypeV12["NO_AUDIO_ERROR"] = "no_audio_error";
|
|
3908
|
+
ErrorTypeV12["CIRCUIT_BREAKER_OPEN"] = "circuit_breaker_open";
|
|
3862
3909
|
ErrorTypeV12["UNKNOWN_ERROR"] = "unknown_error";
|
|
3863
3910
|
})(ErrorTypeV1 || (ErrorTypeV1 = {}));
|
|
3864
3911
|
var ErrorResultSchemaV1 = z.object({
|
|
@@ -4075,6 +4122,12 @@ var TimerSchema = z.object({
|
|
|
4075
4122
|
* Provider that generated this message
|
|
4076
4123
|
*/
|
|
4077
4124
|
provider: z.nativeEnum(RecognitionProvider).optional(),
|
|
4125
|
+
/**
|
|
4126
|
+
* Timestamp when provider connection was initiated (in milliseconds)
|
|
4127
|
+
* Set before doConnect() - captures the moment before WebSocket creation starts
|
|
4128
|
+
* @example 1704095999800
|
|
4129
|
+
*/
|
|
4130
|
+
connectionInitiatedAtMs: z.number().optional(),
|
|
4078
4131
|
/**
|
|
4079
4132
|
* Timestamp when recording started (in milliseconds)
|
|
4080
4133
|
* @example 1704096000000
|
|
@@ -4208,6 +4261,14 @@ var ConnectionExceptionSchema = BaseRecognitionExceptionSchema.extend({
|
|
|
4208
4261
|
/** Underlying error message */
|
|
4209
4262
|
underlyingError: z.string().optional()
|
|
4210
4263
|
});
|
|
4264
|
+
var CircuitBreakerExceptionSchema = BaseRecognitionExceptionSchema.extend({
|
|
4265
|
+
errorType: z.literal(ErrorTypeV1.CIRCUIT_BREAKER_OPEN),
|
|
4266
|
+
isImmediatelyAvailable: z.literal(true),
|
|
4267
|
+
/** Provider that is unavailable */
|
|
4268
|
+
provider: z.nativeEnum(RecognitionProvider).optional(),
|
|
4269
|
+
/** Model that is unavailable */
|
|
4270
|
+
model: z.string().optional()
|
|
4271
|
+
});
|
|
4211
4272
|
var UnknownExceptionSchema = BaseRecognitionExceptionSchema.extend({
|
|
4212
4273
|
errorType: z.literal(ErrorTypeV1.UNKNOWN_ERROR),
|
|
4213
4274
|
isImmediatelyAvailable: z.literal(false),
|
|
@@ -4223,6 +4284,7 @@ var RecognitionExceptionSchema = z.discriminatedUnion("errorType", [
|
|
|
4223
4284
|
TimeoutExceptionSchema,
|
|
4224
4285
|
QuotaExceededExceptionSchema,
|
|
4225
4286
|
ConnectionExceptionSchema,
|
|
4287
|
+
CircuitBreakerExceptionSchema,
|
|
4226
4288
|
UnknownExceptionSchema
|
|
4227
4289
|
]);
|
|
4228
4290
|
function isExceptionImmediatelyAvailable(exception) {
|
|
@@ -4244,6 +4306,8 @@ function getUserFriendlyMessage(exception) {
|
|
|
4244
4306
|
return exception.message || "Rate limit exceeded. Please try again later.";
|
|
4245
4307
|
case ErrorTypeV1.CONNECTION_ERROR:
|
|
4246
4308
|
return exception.message || "Connection failed. Please check your network and try again.";
|
|
4309
|
+
case ErrorTypeV1.CIRCUIT_BREAKER_OPEN:
|
|
4310
|
+
return exception.message || "Service temporarily unavailable. Please try again.";
|
|
4247
4311
|
}
|
|
4248
4312
|
}
|
|
4249
4313
|
|
|
@@ -4259,6 +4323,12 @@ var ControlSignalTypeV1;
|
|
|
4259
4323
|
ControlSignalTypeV12["START_RECORDING"] = "start_recording";
|
|
4260
4324
|
ControlSignalTypeV12["STOP_RECORDING"] = "stop_recording";
|
|
4261
4325
|
})(ControlSignalTypeV1 || (ControlSignalTypeV1 = {}));
|
|
4326
|
+
var PrefixMode;
|
|
4327
|
+
(function(PrefixMode2) {
|
|
4328
|
+
PrefixMode2["NONE"] = "none";
|
|
4329
|
+
PrefixMode2["CLIENT"] = "client";
|
|
4330
|
+
PrefixMode2["STORED"] = "stored";
|
|
4331
|
+
})(PrefixMode || (PrefixMode = {}));
|
|
4262
4332
|
var SlotMapSchema = z.record(z.string(), z.array(z.string()));
|
|
4263
4333
|
var GameContextSchemaV1 = z.object({
|
|
4264
4334
|
type: z.literal(RecognitionContextTypeV1.GAME_CONTEXT),
|
|
@@ -4290,6 +4360,19 @@ var RequestDebugCommandSchema = z.object({
|
|
|
4290
4360
|
// Enable experimental pilot models for testing new features
|
|
4291
4361
|
enablePilotModels: z.boolean().optional().default(false)
|
|
4292
4362
|
}).optional();
|
|
4363
|
+
var FallbackASRConfigSchema = z.object({
|
|
4364
|
+
// Required - the fallback provider to use
|
|
4365
|
+
provider: z.string(),
|
|
4366
|
+
// Optional - inherits from primary if not specified
|
|
4367
|
+
model: z.string().optional(),
|
|
4368
|
+
language: z.string().optional(),
|
|
4369
|
+
sampleRate: z.number().optional(),
|
|
4370
|
+
encoding: z.number().optional(),
|
|
4371
|
+
// Recognition options - optional, inherits from primary
|
|
4372
|
+
interimResults: z.boolean().optional(),
|
|
4373
|
+
useContext: z.boolean().optional(),
|
|
4374
|
+
finalTranscriptStability: z.string().optional()
|
|
4375
|
+
});
|
|
4293
4376
|
var ASRRequestSchemaV1 = z.object({
|
|
4294
4377
|
type: z.literal(RecognitionContextTypeV1.ASR_REQUEST),
|
|
4295
4378
|
// Session identification
|
|
@@ -4305,6 +4388,16 @@ var ASRRequestSchemaV1 = z.object({
|
|
|
4305
4388
|
useContext: z.boolean().optional().default(false),
|
|
4306
4389
|
// Final transcript stability mode (timeout for fallback final transcript)
|
|
4307
4390
|
finalTranscriptStability: z.string().optional(),
|
|
4391
|
+
// Traffic control priority (affects quota slot allocation)
|
|
4392
|
+
// 'high' = can use all quota slots (reserved for critical games like song-quiz)
|
|
4393
|
+
// 'low' = limited to non-reserved slots (default for most requests)
|
|
4394
|
+
priority: z.enum(["low", "high"]).optional().default("low"),
|
|
4395
|
+
// Fallback providers - tried in order if primary provider is unavailable (circuit breaker open)
|
|
4396
|
+
fallbackModels: z.array(FallbackASRConfigSchema).optional(),
|
|
4397
|
+
// Prefix audio configuration
|
|
4398
|
+
prefixMode: z.nativeEnum(PrefixMode).optional().default(PrefixMode.NONE),
|
|
4399
|
+
prefixId: z.string().optional(),
|
|
4400
|
+
prefixTextToRemove: z.array(z.string()).optional(),
|
|
4308
4401
|
// Debug options (FOR DEBUG/TESTING ONLY - not for production use)
|
|
4309
4402
|
debugCommand: RequestDebugCommandSchema
|
|
4310
4403
|
});
|
|
@@ -4322,6 +4415,8 @@ var RecognitionGameInfoSchema = z.object({
|
|
|
4322
4415
|
accountId: z.string().optional(),
|
|
4323
4416
|
gameId: z.string().optional(),
|
|
4324
4417
|
gamePhase: z.string().optional(),
|
|
4418
|
+
questionAskedId: z.string().optional(),
|
|
4419
|
+
/** @deprecated Use questionAskedId instead. Kept for backward compatibility during migration. */
|
|
4325
4420
|
questionAnswerId: z.string().optional(),
|
|
4326
4421
|
platform: z.string().optional()
|
|
4327
4422
|
// Platform for audio recording device (use the definition of platform teams)
|
|
@@ -4456,6 +4551,7 @@ var AudioEncoding;
|
|
|
4456
4551
|
}
|
|
4457
4552
|
AudioEncoding2.isNameValid = isNameValid;
|
|
4458
4553
|
})(AudioEncoding || (AudioEncoding = {}));
|
|
4554
|
+
var PREFIX_AUDIO_ENCODING_OFFSET = 128;
|
|
4459
4555
|
var SampleRate;
|
|
4460
4556
|
(function(SampleRate2) {
|
|
4461
4557
|
SampleRate2[SampleRate2["RATE_8000"] = 8e3] = "RATE_8000";
|
|
@@ -4557,6 +4653,7 @@ function createDefaultASRConfig(overrides) {
|
|
|
4557
4653
|
var PlumbingType;
|
|
4558
4654
|
(function(PlumbingType2) {
|
|
4559
4655
|
PlumbingType2["AUDIO"] = "audio";
|
|
4656
|
+
PlumbingType2["PREFIX_AUDIO"] = "prefix_audio";
|
|
4560
4657
|
PlumbingType2["CONTROL"] = "control";
|
|
4561
4658
|
PlumbingType2["RESULT"] = "result";
|
|
4562
4659
|
PlumbingType2["RECOGNITION_CONTEXT"] = "recognition_context";
|
|
@@ -4635,6 +4732,11 @@ var StatsIncrementType;
|
|
|
4635
4732
|
StatsIncrementType2["SUCCESS"] = "success";
|
|
4636
4733
|
StatsIncrementType2["FAIL"] = "fail";
|
|
4637
4734
|
})(StatsIncrementType || (StatsIncrementType = {}));
|
|
4735
|
+
var QuotaPriority;
|
|
4736
|
+
(function(QuotaPriority2) {
|
|
4737
|
+
QuotaPriority2[QuotaPriority2["LOW"] = 0] = "LOW";
|
|
4738
|
+
QuotaPriority2[QuotaPriority2["HIGH"] = 1] = "HIGH";
|
|
4739
|
+
})(QuotaPriority || (QuotaPriority = {}));
|
|
4638
4740
|
|
|
4639
4741
|
// ../../libs/types/dist/stages.types.js
|
|
4640
4742
|
var STAGES = {
|
|
@@ -4811,7 +4913,7 @@ var WebSocketAudioClient = class {
|
|
|
4811
4913
|
// ../../libs/websocket/dist/core/audio-upload-websocket-server.js
|
|
4812
4914
|
import { WebSocketServer, WebSocket as WebSocket2 } from "ws";
|
|
4813
4915
|
|
|
4814
|
-
// ../../node_modules/.pnpm/uuid@11.
|
|
4916
|
+
// ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/stringify.js
|
|
4815
4917
|
var byteToHex = [];
|
|
4816
4918
|
for (let i = 0; i < 256; ++i) {
|
|
4817
4919
|
byteToHex.push((i + 256).toString(16).slice(1));
|
|
@@ -4820,7 +4922,7 @@ function unsafeStringify(arr, offset = 0) {
|
|
|
4820
4922
|
return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
|
|
4821
4923
|
}
|
|
4822
4924
|
|
|
4823
|
-
// ../../node_modules/.pnpm/uuid@11.
|
|
4925
|
+
// ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/rng.js
|
|
4824
4926
|
var getRandomValues;
|
|
4825
4927
|
var rnds8 = new Uint8Array(16);
|
|
4826
4928
|
function rng() {
|
|
@@ -4833,21 +4935,27 @@ function rng() {
|
|
|
4833
4935
|
return getRandomValues(rnds8);
|
|
4834
4936
|
}
|
|
4835
4937
|
|
|
4836
|
-
// ../../node_modules/.pnpm/uuid@11.
|
|
4938
|
+
// ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/native.js
|
|
4837
4939
|
var randomUUID = typeof crypto !== "undefined" && crypto.randomUUID && crypto.randomUUID.bind(crypto);
|
|
4838
4940
|
var native_default = { randomUUID };
|
|
4839
4941
|
|
|
4840
|
-
// ../../node_modules/.pnpm/uuid@11.
|
|
4942
|
+
// ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/v4.js
|
|
4841
4943
|
function v4(options, buf, offset) {
|
|
4842
4944
|
if (native_default.randomUUID && !buf && !options) {
|
|
4843
4945
|
return native_default.randomUUID();
|
|
4844
4946
|
}
|
|
4845
4947
|
options = options || {};
|
|
4846
|
-
const rnds = options.random
|
|
4948
|
+
const rnds = options.random ?? options.rng?.() ?? rng();
|
|
4949
|
+
if (rnds.length < 16) {
|
|
4950
|
+
throw new Error("Random bytes length must be >= 16");
|
|
4951
|
+
}
|
|
4847
4952
|
rnds[6] = rnds[6] & 15 | 64;
|
|
4848
4953
|
rnds[8] = rnds[8] & 63 | 128;
|
|
4849
4954
|
if (buf) {
|
|
4850
4955
|
offset = offset || 0;
|
|
4956
|
+
if (offset < 0 || offset + 16 > buf.length) {
|
|
4957
|
+
throw new RangeError(`UUID byte range ${offset}:${offset + 15} is out of buffer bounds`);
|
|
4958
|
+
}
|
|
4851
4959
|
for (let i = 0; i < 16; ++i) {
|
|
4852
4960
|
buf[offset + i] = rnds[i];
|
|
4853
4961
|
}
|
|
@@ -5283,6 +5391,9 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5283
5391
|
highWM: config.highWaterMark ?? 512e3,
|
|
5284
5392
|
lowWM: config.lowWaterMark ?? 128e3
|
|
5285
5393
|
});
|
|
5394
|
+
this.prefixBuffer = [];
|
|
5395
|
+
// Buffer prefix audio until READY
|
|
5396
|
+
this.prefixBufferBytes = 0;
|
|
5286
5397
|
this.state = "initial" /* INITIAL */;
|
|
5287
5398
|
// Debug control (internal state, controlled by debugCommand in ASRRequest)
|
|
5288
5399
|
this.isDebugLogEnabled = false;
|
|
@@ -5339,6 +5450,9 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5339
5450
|
static {
|
|
5340
5451
|
this.PROTOCOL_VERSION = 1;
|
|
5341
5452
|
}
|
|
5453
|
+
static {
|
|
5454
|
+
this.MAX_PREFIX_BUFFER_BYTES = 10 * 1024 * 1024;
|
|
5455
|
+
}
|
|
5342
5456
|
// ==========================================================================
|
|
5343
5457
|
// PRIVATE HELPERS
|
|
5344
5458
|
// ==========================================================================
|
|
@@ -5364,6 +5478,8 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5364
5478
|
cleanup() {
|
|
5365
5479
|
this.log("debug", "Cleaning up resources");
|
|
5366
5480
|
this.audioBuffer.clear();
|
|
5481
|
+
this.prefixBuffer = [];
|
|
5482
|
+
this.prefixBufferBytes = 0;
|
|
5367
5483
|
this.audioBytesSent = 0;
|
|
5368
5484
|
this.audioChunksSent = 0;
|
|
5369
5485
|
this.lastAudioStatsLog = 0;
|
|
@@ -5580,6 +5696,25 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5580
5696
|
isBufferOverflowing() {
|
|
5581
5697
|
return this.audioBuffer.isOverflowing();
|
|
5582
5698
|
}
|
|
5699
|
+
isServerReady() {
|
|
5700
|
+
return this.state === "ready" /* READY */;
|
|
5701
|
+
}
|
|
5702
|
+
sendGameContext(context) {
|
|
5703
|
+
if (this.state !== "connected" /* CONNECTED */ && this.state !== "ready" /* READY */) {
|
|
5704
|
+
this.log("warn", "sendGameContext called in wrong state", { state: this.state });
|
|
5705
|
+
return;
|
|
5706
|
+
}
|
|
5707
|
+
this.log("debug", "Sending game context (deferred)", {
|
|
5708
|
+
gameId: context.gameId,
|
|
5709
|
+
gamePhase: context.gamePhase,
|
|
5710
|
+
hasSlotMap: !!context.slotMap
|
|
5711
|
+
});
|
|
5712
|
+
super.sendMessage(
|
|
5713
|
+
_RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
|
|
5714
|
+
"message",
|
|
5715
|
+
context
|
|
5716
|
+
);
|
|
5717
|
+
}
|
|
5583
5718
|
getStats() {
|
|
5584
5719
|
const bufferStats = this.audioBuffer.getStats();
|
|
5585
5720
|
return {
|
|
@@ -5605,6 +5740,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5605
5740
|
if (this.isDebugLogEnabled) {
|
|
5606
5741
|
this.log("debug", "Sending ASR request", this.config.asrRequestConfig);
|
|
5607
5742
|
}
|
|
5743
|
+
const fallbackModels = this.config.asrRequestConfig.fallbackModels;
|
|
5608
5744
|
const asrRequest = {
|
|
5609
5745
|
type: RecognitionContextTypeV1.ASR_REQUEST,
|
|
5610
5746
|
audioUtteranceId: this.config.audioUtteranceId,
|
|
@@ -5620,7 +5756,20 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5620
5756
|
...this.config.asrRequestConfig.finalTranscriptStability && {
|
|
5621
5757
|
finalTranscriptStability: this.config.asrRequestConfig.finalTranscriptStability
|
|
5622
5758
|
},
|
|
5623
|
-
|
|
5759
|
+
// Include fallbackModels if provided (for circuit breaker fallback)
|
|
5760
|
+
...fallbackModels && { fallbackModels },
|
|
5761
|
+
...debugCommand && { debugCommand },
|
|
5762
|
+
// Include prefix mode if provided (for server-side stored prefix injection)
|
|
5763
|
+
...this.config.asrRequestConfig.prefixMode && {
|
|
5764
|
+
prefixMode: this.config.asrRequestConfig.prefixMode
|
|
5765
|
+
},
|
|
5766
|
+
...this.config.asrRequestConfig.prefixId && {
|
|
5767
|
+
prefixId: this.config.asrRequestConfig.prefixId
|
|
5768
|
+
},
|
|
5769
|
+
// Include prefix text to remove if provided (for server-side prefix text removal)
|
|
5770
|
+
...this.config.asrRequestConfig.prefixTextToRemove && {
|
|
5771
|
+
prefixTextToRemove: this.config.asrRequestConfig.prefixTextToRemove
|
|
5772
|
+
}
|
|
5624
5773
|
};
|
|
5625
5774
|
super.sendMessage(
|
|
5626
5775
|
_RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
|
|
@@ -5727,6 +5876,12 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5727
5876
|
this.log("debug", "Server ready for audio upload");
|
|
5728
5877
|
this.state = "ready" /* READY */;
|
|
5729
5878
|
this.messageHandler.setSessionStartTime(Date.now());
|
|
5879
|
+
if (this.prefixBuffer.length > 0) {
|
|
5880
|
+
this.log("debug", "Flushing buffered prefix audio", { chunks: this.prefixBuffer.length });
|
|
5881
|
+
this.prefixBuffer.forEach((chunk) => this.sendPrefixAudioNow(chunk));
|
|
5882
|
+
this.prefixBuffer = [];
|
|
5883
|
+
this.prefixBufferBytes = 0;
|
|
5884
|
+
}
|
|
5730
5885
|
const bufferedChunks = this.audioBuffer.flush();
|
|
5731
5886
|
if (bufferedChunks.length > 0) {
|
|
5732
5887
|
this.log("debug", "Flushing buffered audio", { chunks: bufferedChunks.length });
|
|
@@ -5758,6 +5913,74 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5758
5913
|
this.audioBytesSent += byteLength;
|
|
5759
5914
|
this.audioChunksSent++;
|
|
5760
5915
|
}
|
|
5916
|
+
/**
|
|
5917
|
+
* Send prefix audio to the server.
|
|
5918
|
+
* Prefix audio is sent before user audio and is used for context/priming.
|
|
5919
|
+
* The server will process it but adjust timing so transcripts reflect user audio timing.
|
|
5920
|
+
*
|
|
5921
|
+
* Note: Prefix audio is buffered until READY state, then flushed before user audio.
|
|
5922
|
+
* This ensures proper ordering even if called before server is ready.
|
|
5923
|
+
*
|
|
5924
|
+
* @param audioData - Prefix audio data (ArrayBuffer, ArrayBufferView, or Blob)
|
|
5925
|
+
*/
|
|
5926
|
+
sendPrefixAudio(audioData) {
|
|
5927
|
+
if (audioData instanceof Blob) {
|
|
5928
|
+
blobToArrayBuffer(audioData).then((arrayBuffer) => {
|
|
5929
|
+
this.sendPrefixAudioInternal(arrayBuffer);
|
|
5930
|
+
}).catch((error) => {
|
|
5931
|
+
this.log("error", "Failed to convert Blob to ArrayBuffer for prefix audio", error);
|
|
5932
|
+
});
|
|
5933
|
+
return;
|
|
5934
|
+
}
|
|
5935
|
+
this.sendPrefixAudioInternal(audioData);
|
|
5936
|
+
}
|
|
5937
|
+
/**
|
|
5938
|
+
* Internal method to handle prefix audio with buffering
|
|
5939
|
+
* Buffers if not READY, sends immediately if READY
|
|
5940
|
+
*/
|
|
5941
|
+
sendPrefixAudioInternal(audioData) {
|
|
5942
|
+
const bytes = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
|
|
5943
|
+
if (bytes === 0) return;
|
|
5944
|
+
if (this.state === "stopped" /* STOPPED */ || this.state === "failed" /* FAILED */) {
|
|
5945
|
+
this.log("debug", "Ignoring prefix audio in terminal state", { bytes, state: this.state });
|
|
5946
|
+
return;
|
|
5947
|
+
}
|
|
5948
|
+
if (this.state === "ready" /* READY */) {
|
|
5949
|
+
this.log("debug", "Sending prefix audio immediately", { bytes });
|
|
5950
|
+
this.sendPrefixAudioNow(audioData);
|
|
5951
|
+
} else {
|
|
5952
|
+
if (this.prefixBufferBytes + bytes > _RealTimeTwoWayWebSocketRecognitionClient.MAX_PREFIX_BUFFER_BYTES) {
|
|
5953
|
+
this.log("warn", "Prefix buffer limit exceeded, dropping chunk", {
|
|
5954
|
+
bytes,
|
|
5955
|
+
current: this.prefixBufferBytes,
|
|
5956
|
+
max: _RealTimeTwoWayWebSocketRecognitionClient.MAX_PREFIX_BUFFER_BYTES
|
|
5957
|
+
});
|
|
5958
|
+
return;
|
|
5959
|
+
}
|
|
5960
|
+
this.log("debug", "Buffering prefix audio until READY", { bytes, state: this.state });
|
|
5961
|
+
this.prefixBuffer.push(audioData);
|
|
5962
|
+
this.prefixBufferBytes += bytes;
|
|
5963
|
+
}
|
|
5964
|
+
}
|
|
5965
|
+
/**
|
|
5966
|
+
* Send prefix audio immediately to the server (without buffering)
|
|
5967
|
+
* Uses encoding offset to mark as prefix audio
|
|
5968
|
+
* @param audioData - Prefix audio data to send
|
|
5969
|
+
*/
|
|
5970
|
+
sendPrefixAudioNow(audioData) {
|
|
5971
|
+
const byteLength = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
|
|
5972
|
+
if (byteLength === 0) return;
|
|
5973
|
+
const baseEncodingId = this.config.asrRequestConfig?.encoding || AudioEncoding.LINEAR16;
|
|
5974
|
+
const prefixEncodingId = baseEncodingId + PREFIX_AUDIO_ENCODING_OFFSET;
|
|
5975
|
+
const sampleRate = typeof this.config.asrRequestConfig?.sampleRate === "number" ? this.config.asrRequestConfig.sampleRate : SampleRate.RATE_16000;
|
|
5976
|
+
this.log("debug", "Sending prefix audio", { bytes: byteLength, encoding: prefixEncodingId });
|
|
5977
|
+
super.sendAudio(
|
|
5978
|
+
audioData,
|
|
5979
|
+
_RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
|
|
5980
|
+
prefixEncodingId,
|
|
5981
|
+
sampleRate
|
|
5982
|
+
);
|
|
5983
|
+
}
|
|
5761
5984
|
};
|
|
5762
5985
|
|
|
5763
5986
|
// src/config-builder.ts
|
|
@@ -6135,6 +6358,7 @@ var SimplifiedVGFRecognitionClient = class {
|
|
|
6135
6358
|
}
|
|
6136
6359
|
} else {
|
|
6137
6360
|
this.state = createVGFStateFromConfig(clientConfig);
|
|
6361
|
+
clientConfig.audioUtteranceId = this.state.audioUtteranceId;
|
|
6138
6362
|
}
|
|
6139
6363
|
this.state = { ...this.state, startRecordingStatus: "READY" };
|
|
6140
6364
|
this.expectedUuid = this.state.audioUtteranceId;
|
|
@@ -6302,6 +6526,12 @@ var SimplifiedVGFRecognitionClient = class {
|
|
|
6302
6526
|
isBufferOverflowing() {
|
|
6303
6527
|
return this.client.isBufferOverflowing();
|
|
6304
6528
|
}
|
|
6529
|
+
sendGameContext(context) {
|
|
6530
|
+
this.client.sendGameContext(context);
|
|
6531
|
+
}
|
|
6532
|
+
isServerReady() {
|
|
6533
|
+
return this.client.isServerReady();
|
|
6534
|
+
}
|
|
6305
6535
|
// VGF State access (read-only for consumers)
|
|
6306
6536
|
getVGFState() {
|
|
6307
6537
|
return { ...this.state };
|
|
@@ -6347,6 +6577,7 @@ export {
|
|
|
6347
6577
|
ConnectionError,
|
|
6348
6578
|
ControlSignalTypeV1 as ControlSignal,
|
|
6349
6579
|
ControlSignalTypeV1,
|
|
6580
|
+
DashScopeModel,
|
|
6350
6581
|
DeepgramModel,
|
|
6351
6582
|
ElevenLabsModel,
|
|
6352
6583
|
ErrorTypeV1,
|
|
@@ -6355,6 +6586,7 @@ export {
|
|
|
6355
6586
|
GeminiModel,
|
|
6356
6587
|
GoogleModel,
|
|
6357
6588
|
Language,
|
|
6589
|
+
MistralVoxtralModel,
|
|
6358
6590
|
OpenAIModel,
|
|
6359
6591
|
RECOGNITION_CONDUCTOR_BASES,
|
|
6360
6592
|
RECOGNITION_SERVICE_BASES,
|