@volley/recognition-client-sdk 0.1.424 → 0.1.622
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.bundled.d.ts +236 -7
- package/dist/index.bundled.d.ts +393 -52
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +268 -15
- package/dist/index.js.map +4 -4
- package/dist/recog-client-sdk.browser.js +236 -14
- package/dist/recog-client-sdk.browser.js.map +4 -4
- package/dist/recognition-client.d.ts +28 -1
- package/dist/recognition-client.d.ts.map +1 -1
- package/dist/recognition-client.types.d.ts +20 -0
- package/dist/recognition-client.types.d.ts.map +1 -1
- package/dist/simplified-vgf-recognition-client.d.ts +17 -0
- package/dist/simplified-vgf-recognition-client.d.ts.map +1 -1
- package/dist/vgf-recognition-mapper.d.ts.map +1 -1
- package/dist/vgf-recognition-state.d.ts +6 -0
- package/dist/vgf-recognition-state.d.ts.map +1 -1
- package/package.json +8 -8
- package/src/index.ts +3 -0
- package/src/recognition-client.ts +158 -8
- package/src/recognition-client.types.ts +23 -0
- package/src/simplified-vgf-recognition-client.integration.spec.ts +15 -3
- package/src/simplified-vgf-recognition-client.ts +28 -1
- package/src/utils/audio-ring-buffer.spec.ts +335 -0
- package/src/vgf-recognition-mapper.ts +19 -1
- package/src/vgf-recognition-state.ts +4 -0
package/dist/index.d.ts
CHANGED
|
@@ -11,6 +11,6 @@ export { type RecognitionState, RecognitionVGFStateSchema, RecordingStatus, Tran
|
|
|
11
11
|
export { resetRecognitionVGFState } from './vgf-recognition-mapper.js';
|
|
12
12
|
export { AudioEncoding } from '@recog/websocket';
|
|
13
13
|
export { type GameContextV1, type SlotMap, RecognitionContextTypeV1, ControlSignalTypeV1, ControlSignalTypeV1 as ControlSignal, // Alias for backward compatibility
|
|
14
|
-
type TranscriptionResultV1, type FunctionCallResultV1, type MetadataResultV1, type ErrorResultV1, RecognitionResultTypeV1, ClientControlActionV1, type ASRRequestConfig, type ASRRequestV1, FinalTranscriptStability, createDefaultASRConfig, RecognitionProvider, DeepgramModel, ElevenLabsModel, FireworksModel, GoogleModel, GeminiModel, OpenAIModel, Language, SampleRate, STAGES, type Stage } from '@recog/shared-types';
|
|
14
|
+
type TranscriptionResultV1, type FunctionCallResultV1, type MetadataResultV1, type ErrorResultV1, RecognitionResultTypeV1, ClientControlActionV1, type ASRRequestConfig, type ASRRequestV1, FinalTranscriptStability, createDefaultASRConfig, RecognitionProvider, DeepgramModel, ElevenLabsModel, FireworksModel, GoogleModel, GeminiModel, OpenAIModel, OpenAIRealtimeModel, MistralVoxtralModel, DashScopeModel, Language, SampleRate, STAGES, type Stage } from '@recog/shared-types';
|
|
15
15
|
export { getRecognitionServiceBase, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getRecognitionServiceHost, getRecognitionConductorBase, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionConductorHost, normalizeStage, RECOGNITION_SERVICE_BASES, RECOGNITION_CONDUCTOR_BASES } from '@recog/shared-config';
|
|
16
16
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAGvE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,eAAe,EACf,cAAc,EACd,WAAW,EACX,WAAW,EACX,WAAW,EACX,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAGvE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,eAAe,EACf,cAAc,EACd,WAAW,EACX,WAAW,EACX,WAAW,EACX,mBAAmB,EACnB,mBAAmB,EACnB,cAAc,EACd,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// ../../node_modules/.pnpm/zod@3.22.
|
|
1
|
+
// ../../node_modules/.pnpm/zod@3.22.5/node_modules/zod/lib/index.mjs
|
|
2
2
|
var util;
|
|
3
3
|
(function(util2) {
|
|
4
4
|
util2.assertEqual = (val) => val;
|
|
@@ -3741,6 +3741,10 @@ var RecognitionProvider;
|
|
|
3741
3741
|
RecognitionProvider2["GEMINI_BATCH"] = "gemini-batch";
|
|
3742
3742
|
RecognitionProvider2["OPENAI_BATCH"] = "openai-batch";
|
|
3743
3743
|
RecognitionProvider2["OPENAI_REALTIME"] = "openai-realtime";
|
|
3744
|
+
RecognitionProvider2["MISTRAL_VOXTRAL"] = "mistral-voxtral";
|
|
3745
|
+
RecognitionProvider2["DASHSCOPE"] = "dashscope";
|
|
3746
|
+
RecognitionProvider2["TEST_ASR_PROVIDER_QUOTA"] = "test-asr-provider-quota";
|
|
3747
|
+
RecognitionProvider2["TEST_ASR_STREAMING"] = "test-asr-streaming";
|
|
3744
3748
|
})(RecognitionProvider || (RecognitionProvider = {}));
|
|
3745
3749
|
var RecognitionMode;
|
|
3746
3750
|
(function(RecognitionMode2) {
|
|
@@ -3786,8 +3790,18 @@ var ElevenLabsModel;
|
|
|
3786
3790
|
})(ElevenLabsModel || (ElevenLabsModel = {}));
|
|
3787
3791
|
var OpenAIRealtimeModel;
|
|
3788
3792
|
(function(OpenAIRealtimeModel2) {
|
|
3793
|
+
OpenAIRealtimeModel2["GPT_4O_TRANSCRIBE"] = "gpt-4o-transcribe";
|
|
3789
3794
|
OpenAIRealtimeModel2["GPT_4O_MINI_TRANSCRIBE"] = "gpt-4o-mini-transcribe";
|
|
3790
3795
|
})(OpenAIRealtimeModel || (OpenAIRealtimeModel = {}));
|
|
3796
|
+
var MistralVoxtralModel;
|
|
3797
|
+
(function(MistralVoxtralModel2) {
|
|
3798
|
+
MistralVoxtralModel2["VOXTRAL_MINI_REALTIME_2602"] = "voxtral-mini-transcribe-realtime-2602";
|
|
3799
|
+
})(MistralVoxtralModel || (MistralVoxtralModel = {}));
|
|
3800
|
+
var DashScopeModel;
|
|
3801
|
+
(function(DashScopeModel2) {
|
|
3802
|
+
DashScopeModel2["QWEN3_ASR_FLASH_REALTIME_2602"] = "qwen3-asr-flash-realtime-2026-02-10";
|
|
3803
|
+
DashScopeModel2["QWEN3_ASR_FLASH_REALTIME"] = "qwen3-asr-flash-realtime";
|
|
3804
|
+
})(DashScopeModel || (DashScopeModel = {}));
|
|
3791
3805
|
|
|
3792
3806
|
// ../../libs/types/dist/recognition-result-v1.types.js
|
|
3793
3807
|
var RecognitionResultTypeV1;
|
|
@@ -3803,18 +3817,22 @@ var TranscriptionResultSchemaV1 = z.object({
|
|
|
3803
3817
|
type: z.literal(RecognitionResultTypeV1.TRANSCRIPTION),
|
|
3804
3818
|
audioUtteranceId: z.string(),
|
|
3805
3819
|
finalTranscript: z.string(),
|
|
3820
|
+
finalTranscriptRaw: z.string(),
|
|
3806
3821
|
finalTranscriptConfidence: z.number().min(0).max(1).optional(),
|
|
3807
3822
|
pendingTranscript: z.string().optional(),
|
|
3823
|
+
pendingTranscriptRaw: z.string().optional(),
|
|
3808
3824
|
pendingTranscriptConfidence: z.number().min(0).max(1).optional(),
|
|
3809
3825
|
is_finished: z.boolean(),
|
|
3810
3826
|
voiceStart: z.number().optional(),
|
|
3811
3827
|
voiceDuration: z.number().optional(),
|
|
3812
3828
|
voiceEnd: z.number().optional(),
|
|
3829
|
+
lastNonSilence: z.number().optional(),
|
|
3813
3830
|
startTimestamp: z.number().optional(),
|
|
3814
3831
|
endTimestamp: z.number().optional(),
|
|
3815
3832
|
receivedAtMs: z.number().optional(),
|
|
3816
|
-
accumulatedAudioTimeMs: z.number().optional()
|
|
3817
|
-
|
|
3833
|
+
accumulatedAudioTimeMs: z.number().optional(),
|
|
3834
|
+
rawAudioTimeMs: z.number().optional()
|
|
3835
|
+
// Total audio duration sent to provider (includes prefix)
|
|
3818
3836
|
});
|
|
3819
3837
|
var FunctionCallResultSchemaV1 = z.object({
|
|
3820
3838
|
type: z.literal(RecognitionResultTypeV1.FUNCTION_CALL),
|
|
@@ -3827,11 +3845,22 @@ var TranscriptOutcomeType;
|
|
|
3827
3845
|
TranscriptOutcomeType2["WITH_CONTENT"] = "with_content";
|
|
3828
3846
|
TranscriptOutcomeType2["EMPTY"] = "empty";
|
|
3829
3847
|
TranscriptOutcomeType2["NEVER_SENT"] = "never_sent";
|
|
3848
|
+
TranscriptOutcomeType2["ERROR_AUTHENTICATION"] = "error_authentication";
|
|
3849
|
+
TranscriptOutcomeType2["ERROR_VALIDATION"] = "error_validation";
|
|
3850
|
+
TranscriptOutcomeType2["ERROR_PROVIDER"] = "error_provider";
|
|
3851
|
+
TranscriptOutcomeType2["ERROR_TIMEOUT"] = "error_timeout";
|
|
3852
|
+
TranscriptOutcomeType2["ERROR_QUOTA"] = "error_quota";
|
|
3853
|
+
TranscriptOutcomeType2["ERROR_INTERNAL_QUOTA"] = "error_internal_quota";
|
|
3854
|
+
TranscriptOutcomeType2["ERROR_CONNECTION"] = "error_connection";
|
|
3855
|
+
TranscriptOutcomeType2["ERROR_NO_AUDIO"] = "error_no_audio";
|
|
3856
|
+
TranscriptOutcomeType2["ERROR_CIRCUIT_BREAKER"] = "error_circuit_breaker";
|
|
3857
|
+
TranscriptOutcomeType2["ERROR_UNKNOWN"] = "error_unknown";
|
|
3830
3858
|
})(TranscriptOutcomeType || (TranscriptOutcomeType = {}));
|
|
3831
3859
|
var MetadataResultSchemaV1 = z.object({
|
|
3832
3860
|
type: z.literal(RecognitionResultTypeV1.METADATA),
|
|
3833
3861
|
audioUtteranceId: z.string(),
|
|
3834
3862
|
// Timing information
|
|
3863
|
+
connectionInitiatedAtMs: z.number().optional(),
|
|
3835
3864
|
recordingStartMs: z.number().optional(),
|
|
3836
3865
|
recordingEndMs: z.number().optional(),
|
|
3837
3866
|
transcriptEndMs: z.number().optional(),
|
|
@@ -3840,6 +3869,7 @@ var MetadataResultSchemaV1 = z.object({
|
|
|
3840
3869
|
duration: z.number().optional(),
|
|
3841
3870
|
volume: z.number().optional(),
|
|
3842
3871
|
accumulatedAudioTimeMs: z.number().optional(),
|
|
3872
|
+
rawAudioTimeMs: z.number().optional(),
|
|
3843
3873
|
// Cost Information
|
|
3844
3874
|
costInUSD: z.number().default(0).optional(),
|
|
3845
3875
|
// ASR API Type
|
|
@@ -3849,7 +3879,22 @@ var MetadataResultSchemaV1 = z.object({
|
|
|
3849
3879
|
// Raw ASR metadata payload as provided by the provider (stringified if needed)
|
|
3850
3880
|
rawAsrMetadata: z.string().optional(),
|
|
3851
3881
|
// Transcript outcome - categorizes the final transcript state
|
|
3852
|
-
transcriptOutcome: z.nativeEnum(TranscriptOutcomeType).optional()
|
|
3882
|
+
transcriptOutcome: z.nativeEnum(TranscriptOutcomeType).optional(),
|
|
3883
|
+
// Audio metrics - embedded audio quality metrics (from AudioMetricsAccumulator)
|
|
3884
|
+
// Omit 'type' field since it's embedded in METADATA, not a separate message
|
|
3885
|
+
audioMetrics: z.object({
|
|
3886
|
+
valid: z.boolean(),
|
|
3887
|
+
audioBeginMs: z.number(),
|
|
3888
|
+
audioEndMs: z.number(),
|
|
3889
|
+
maxVolume: z.number(),
|
|
3890
|
+
minVolume: z.number(),
|
|
3891
|
+
avgVolume: z.number(),
|
|
3892
|
+
silenceRatio: z.number(),
|
|
3893
|
+
clippingRatio: z.number(),
|
|
3894
|
+
snrEstimate: z.number().nullable(),
|
|
3895
|
+
lastNonSilenceMs: z.number(),
|
|
3896
|
+
timestamp: z.string()
|
|
3897
|
+
}).optional()
|
|
3853
3898
|
});
|
|
3854
3899
|
var ErrorTypeV1;
|
|
3855
3900
|
(function(ErrorTypeV12) {
|
|
@@ -3858,7 +3903,10 @@ var ErrorTypeV1;
|
|
|
3858
3903
|
ErrorTypeV12["PROVIDER_ERROR"] = "provider_error";
|
|
3859
3904
|
ErrorTypeV12["TIMEOUT_ERROR"] = "timeout_error";
|
|
3860
3905
|
ErrorTypeV12["QUOTA_EXCEEDED"] = "quota_exceeded";
|
|
3906
|
+
ErrorTypeV12["INTERNAL_QUOTA_EXHAUSTED"] = "internal_quota_exhausted";
|
|
3861
3907
|
ErrorTypeV12["CONNECTION_ERROR"] = "connection_error";
|
|
3908
|
+
ErrorTypeV12["NO_AUDIO_ERROR"] = "no_audio_error";
|
|
3909
|
+
ErrorTypeV12["CIRCUIT_BREAKER_OPEN"] = "circuit_breaker_open";
|
|
3862
3910
|
ErrorTypeV12["UNKNOWN_ERROR"] = "unknown_error";
|
|
3863
3911
|
})(ErrorTypeV1 || (ErrorTypeV1 = {}));
|
|
3864
3912
|
var ErrorResultSchemaV1 = z.object({
|
|
@@ -4075,6 +4123,12 @@ var TimerSchema = z.object({
|
|
|
4075
4123
|
* Provider that generated this message
|
|
4076
4124
|
*/
|
|
4077
4125
|
provider: z.nativeEnum(RecognitionProvider).optional(),
|
|
4126
|
+
/**
|
|
4127
|
+
* Timestamp when provider connection was initiated (in milliseconds)
|
|
4128
|
+
* Set before doConnect() - captures the moment before WebSocket creation starts
|
|
4129
|
+
* @example 1704095999800
|
|
4130
|
+
*/
|
|
4131
|
+
connectionInitiatedAtMs: z.number().optional(),
|
|
4078
4132
|
/**
|
|
4079
4133
|
* Timestamp when recording started (in milliseconds)
|
|
4080
4134
|
* @example 1704096000000
|
|
@@ -4208,6 +4262,14 @@ var ConnectionExceptionSchema = BaseRecognitionExceptionSchema.extend({
|
|
|
4208
4262
|
/** Underlying error message */
|
|
4209
4263
|
underlyingError: z.string().optional()
|
|
4210
4264
|
});
|
|
4265
|
+
var CircuitBreakerExceptionSchema = BaseRecognitionExceptionSchema.extend({
|
|
4266
|
+
errorType: z.literal(ErrorTypeV1.CIRCUIT_BREAKER_OPEN),
|
|
4267
|
+
isImmediatelyAvailable: z.literal(true),
|
|
4268
|
+
/** Provider that is unavailable */
|
|
4269
|
+
provider: z.nativeEnum(RecognitionProvider).optional(),
|
|
4270
|
+
/** Model that is unavailable */
|
|
4271
|
+
model: z.string().optional()
|
|
4272
|
+
});
|
|
4211
4273
|
var UnknownExceptionSchema = BaseRecognitionExceptionSchema.extend({
|
|
4212
4274
|
errorType: z.literal(ErrorTypeV1.UNKNOWN_ERROR),
|
|
4213
4275
|
isImmediatelyAvailable: z.literal(false),
|
|
@@ -4223,6 +4285,7 @@ var RecognitionExceptionSchema = z.discriminatedUnion("errorType", [
|
|
|
4223
4285
|
TimeoutExceptionSchema,
|
|
4224
4286
|
QuotaExceededExceptionSchema,
|
|
4225
4287
|
ConnectionExceptionSchema,
|
|
4288
|
+
CircuitBreakerExceptionSchema,
|
|
4226
4289
|
UnknownExceptionSchema
|
|
4227
4290
|
]);
|
|
4228
4291
|
function isExceptionImmediatelyAvailable(exception) {
|
|
@@ -4244,6 +4307,8 @@ function getUserFriendlyMessage(exception) {
|
|
|
4244
4307
|
return exception.message || "Rate limit exceeded. Please try again later.";
|
|
4245
4308
|
case ErrorTypeV1.CONNECTION_ERROR:
|
|
4246
4309
|
return exception.message || "Connection failed. Please check your network and try again.";
|
|
4310
|
+
case ErrorTypeV1.CIRCUIT_BREAKER_OPEN:
|
|
4311
|
+
return exception.message || "Service temporarily unavailable. Please try again.";
|
|
4247
4312
|
}
|
|
4248
4313
|
}
|
|
4249
4314
|
|
|
@@ -4259,6 +4324,12 @@ var ControlSignalTypeV1;
|
|
|
4259
4324
|
ControlSignalTypeV12["START_RECORDING"] = "start_recording";
|
|
4260
4325
|
ControlSignalTypeV12["STOP_RECORDING"] = "stop_recording";
|
|
4261
4326
|
})(ControlSignalTypeV1 || (ControlSignalTypeV1 = {}));
|
|
4327
|
+
var PrefixMode;
|
|
4328
|
+
(function(PrefixMode2) {
|
|
4329
|
+
PrefixMode2["NONE"] = "none";
|
|
4330
|
+
PrefixMode2["CLIENT"] = "client";
|
|
4331
|
+
PrefixMode2["STORED"] = "stored";
|
|
4332
|
+
})(PrefixMode || (PrefixMode = {}));
|
|
4262
4333
|
var SlotMapSchema = z.record(z.string(), z.array(z.string()));
|
|
4263
4334
|
var GameContextSchemaV1 = z.object({
|
|
4264
4335
|
type: z.literal(RecognitionContextTypeV1.GAME_CONTEXT),
|
|
@@ -4290,6 +4361,19 @@ var RequestDebugCommandSchema = z.object({
|
|
|
4290
4361
|
// Enable experimental pilot models for testing new features
|
|
4291
4362
|
enablePilotModels: z.boolean().optional().default(false)
|
|
4292
4363
|
}).optional();
|
|
4364
|
+
var FallbackASRConfigSchema = z.object({
|
|
4365
|
+
// Required - the fallback provider to use
|
|
4366
|
+
provider: z.string(),
|
|
4367
|
+
// Optional - inherits from primary if not specified
|
|
4368
|
+
model: z.string().optional(),
|
|
4369
|
+
language: z.string().optional(),
|
|
4370
|
+
sampleRate: z.number().optional(),
|
|
4371
|
+
encoding: z.number().optional(),
|
|
4372
|
+
// Recognition options - optional, inherits from primary
|
|
4373
|
+
interimResults: z.boolean().optional(),
|
|
4374
|
+
useContext: z.boolean().optional(),
|
|
4375
|
+
finalTranscriptStability: z.string().optional()
|
|
4376
|
+
});
|
|
4293
4377
|
var ASRRequestSchemaV1 = z.object({
|
|
4294
4378
|
type: z.literal(RecognitionContextTypeV1.ASR_REQUEST),
|
|
4295
4379
|
// Session identification
|
|
@@ -4305,6 +4389,16 @@ var ASRRequestSchemaV1 = z.object({
|
|
|
4305
4389
|
useContext: z.boolean().optional().default(false),
|
|
4306
4390
|
// Final transcript stability mode (timeout for fallback final transcript)
|
|
4307
4391
|
finalTranscriptStability: z.string().optional(),
|
|
4392
|
+
// Traffic control priority (affects quota slot allocation)
|
|
4393
|
+
// 'high' = can use all quota slots (reserved for critical games like song-quiz)
|
|
4394
|
+
// 'low' = limited to non-reserved slots (default for most requests)
|
|
4395
|
+
priority: z.enum(["low", "high"]).optional().default("low"),
|
|
4396
|
+
// Fallback providers - tried in order if primary provider is unavailable (circuit breaker open)
|
|
4397
|
+
fallbackModels: z.array(FallbackASRConfigSchema).optional(),
|
|
4398
|
+
// Prefix audio configuration
|
|
4399
|
+
prefixMode: z.nativeEnum(PrefixMode).optional().default(PrefixMode.NONE),
|
|
4400
|
+
prefixId: z.string().optional(),
|
|
4401
|
+
prefixTextToRemove: z.array(z.string()).optional(),
|
|
4308
4402
|
// Debug options (FOR DEBUG/TESTING ONLY - not for production use)
|
|
4309
4403
|
debugCommand: RequestDebugCommandSchema
|
|
4310
4404
|
});
|
|
@@ -4322,6 +4416,8 @@ var RecognitionGameInfoSchema = z.object({
|
|
|
4322
4416
|
accountId: z.string().optional(),
|
|
4323
4417
|
gameId: z.string().optional(),
|
|
4324
4418
|
gamePhase: z.string().optional(),
|
|
4419
|
+
questionAskedId: z.string().optional(),
|
|
4420
|
+
/** @deprecated Use questionAskedId instead. Kept for backward compatibility during migration. */
|
|
4325
4421
|
questionAnswerId: z.string().optional(),
|
|
4326
4422
|
platform: z.string().optional()
|
|
4327
4423
|
// Platform for audio recording device (use the definition of platform teams)
|
|
@@ -4456,6 +4552,7 @@ var AudioEncoding;
|
|
|
4456
4552
|
}
|
|
4457
4553
|
AudioEncoding2.isNameValid = isNameValid;
|
|
4458
4554
|
})(AudioEncoding || (AudioEncoding = {}));
|
|
4555
|
+
var PREFIX_AUDIO_ENCODING_OFFSET = 128;
|
|
4459
4556
|
var SampleRate;
|
|
4460
4557
|
(function(SampleRate2) {
|
|
4461
4558
|
SampleRate2[SampleRate2["RATE_8000"] = 8e3] = "RATE_8000";
|
|
@@ -4557,6 +4654,7 @@ function createDefaultASRConfig(overrides) {
|
|
|
4557
4654
|
var PlumbingType;
|
|
4558
4655
|
(function(PlumbingType2) {
|
|
4559
4656
|
PlumbingType2["AUDIO"] = "audio";
|
|
4657
|
+
PlumbingType2["PREFIX_AUDIO"] = "prefix_audio";
|
|
4560
4658
|
PlumbingType2["CONTROL"] = "control";
|
|
4561
4659
|
PlumbingType2["RESULT"] = "result";
|
|
4562
4660
|
PlumbingType2["RECOGNITION_CONTEXT"] = "recognition_context";
|
|
@@ -4635,6 +4733,11 @@ var StatsIncrementType;
|
|
|
4635
4733
|
StatsIncrementType2["SUCCESS"] = "success";
|
|
4636
4734
|
StatsIncrementType2["FAIL"] = "fail";
|
|
4637
4735
|
})(StatsIncrementType || (StatsIncrementType = {}));
|
|
4736
|
+
var QuotaPriority;
|
|
4737
|
+
(function(QuotaPriority2) {
|
|
4738
|
+
QuotaPriority2[QuotaPriority2["LOW"] = 0] = "LOW";
|
|
4739
|
+
QuotaPriority2[QuotaPriority2["HIGH"] = 1] = "HIGH";
|
|
4740
|
+
})(QuotaPriority || (QuotaPriority = {}));
|
|
4638
4741
|
|
|
4639
4742
|
// ../../libs/types/dist/stages.types.js
|
|
4640
4743
|
var STAGES = {
|
|
@@ -4811,7 +4914,7 @@ var WebSocketAudioClient = class {
|
|
|
4811
4914
|
// ../../libs/websocket/dist/core/audio-upload-websocket-server.js
|
|
4812
4915
|
import { WebSocketServer, WebSocket as WebSocket2 } from "ws";
|
|
4813
4916
|
|
|
4814
|
-
// ../../node_modules/.pnpm/uuid@11.
|
|
4917
|
+
// ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/stringify.js
|
|
4815
4918
|
var byteToHex = [];
|
|
4816
4919
|
for (let i = 0; i < 256; ++i) {
|
|
4817
4920
|
byteToHex.push((i + 256).toString(16).slice(1));
|
|
@@ -4820,7 +4923,7 @@ function unsafeStringify(arr, offset = 0) {
|
|
|
4820
4923
|
return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
|
|
4821
4924
|
}
|
|
4822
4925
|
|
|
4823
|
-
// ../../node_modules/.pnpm/uuid@11.
|
|
4926
|
+
// ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/rng.js
|
|
4824
4927
|
var getRandomValues;
|
|
4825
4928
|
var rnds8 = new Uint8Array(16);
|
|
4826
4929
|
function rng() {
|
|
@@ -4833,21 +4936,27 @@ function rng() {
|
|
|
4833
4936
|
return getRandomValues(rnds8);
|
|
4834
4937
|
}
|
|
4835
4938
|
|
|
4836
|
-
// ../../node_modules/.pnpm/uuid@11.
|
|
4939
|
+
// ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/native.js
|
|
4837
4940
|
var randomUUID = typeof crypto !== "undefined" && crypto.randomUUID && crypto.randomUUID.bind(crypto);
|
|
4838
4941
|
var native_default = { randomUUID };
|
|
4839
4942
|
|
|
4840
|
-
// ../../node_modules/.pnpm/uuid@11.
|
|
4943
|
+
// ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/v4.js
|
|
4841
4944
|
function v4(options, buf, offset) {
|
|
4842
4945
|
if (native_default.randomUUID && !buf && !options) {
|
|
4843
4946
|
return native_default.randomUUID();
|
|
4844
4947
|
}
|
|
4845
4948
|
options = options || {};
|
|
4846
|
-
const rnds = options.random
|
|
4949
|
+
const rnds = options.random ?? options.rng?.() ?? rng();
|
|
4950
|
+
if (rnds.length < 16) {
|
|
4951
|
+
throw new Error("Random bytes length must be >= 16");
|
|
4952
|
+
}
|
|
4847
4953
|
rnds[6] = rnds[6] & 15 | 64;
|
|
4848
4954
|
rnds[8] = rnds[8] & 63 | 128;
|
|
4849
4955
|
if (buf) {
|
|
4850
4956
|
offset = offset || 0;
|
|
4957
|
+
if (offset < 0 || offset + 16 > buf.length) {
|
|
4958
|
+
throw new RangeError(`UUID byte range ${offset}:${offset + 15} is out of buffer bounds`);
|
|
4959
|
+
}
|
|
4851
4960
|
for (let i = 0; i < 16; ++i) {
|
|
4852
4961
|
buf[offset + i] = rnds[i];
|
|
4853
4962
|
}
|
|
@@ -5283,6 +5392,9 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5283
5392
|
highWM: config.highWaterMark ?? 512e3,
|
|
5284
5393
|
lowWM: config.lowWaterMark ?? 128e3
|
|
5285
5394
|
});
|
|
5395
|
+
this.prefixBuffer = [];
|
|
5396
|
+
// Buffer prefix audio until READY
|
|
5397
|
+
this.prefixBufferBytes = 0;
|
|
5286
5398
|
this.state = "initial" /* INITIAL */;
|
|
5287
5399
|
// Debug control (internal state, controlled by debugCommand in ASRRequest)
|
|
5288
5400
|
this.isDebugLogEnabled = false;
|
|
@@ -5339,6 +5451,9 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5339
5451
|
static {
|
|
5340
5452
|
this.PROTOCOL_VERSION = 1;
|
|
5341
5453
|
}
|
|
5454
|
+
static {
|
|
5455
|
+
this.MAX_PREFIX_BUFFER_BYTES = 10 * 1024 * 1024;
|
|
5456
|
+
}
|
|
5342
5457
|
// ==========================================================================
|
|
5343
5458
|
// PRIVATE HELPERS
|
|
5344
5459
|
// ==========================================================================
|
|
@@ -5364,6 +5479,8 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5364
5479
|
cleanup() {
|
|
5365
5480
|
this.log("debug", "Cleaning up resources");
|
|
5366
5481
|
this.audioBuffer.clear();
|
|
5482
|
+
this.prefixBuffer = [];
|
|
5483
|
+
this.prefixBufferBytes = 0;
|
|
5367
5484
|
this.audioBytesSent = 0;
|
|
5368
5485
|
this.audioChunksSent = 0;
|
|
5369
5486
|
this.lastAudioStatsLog = 0;
|
|
@@ -5413,7 +5530,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5413
5530
|
const timeout = setTimeout(() => {
|
|
5414
5531
|
if (settled) return;
|
|
5415
5532
|
settled = true;
|
|
5416
|
-
this.log("warn",
|
|
5533
|
+
this.log("warn", `Connection timeout url=${this.config.url}`, { timeout: connectionTimeout, attempt });
|
|
5417
5534
|
this.state = "failed" /* FAILED */;
|
|
5418
5535
|
reject(new Error(`Connection timeout after ${connectionTimeout}ms`));
|
|
5419
5536
|
}, connectionTimeout);
|
|
@@ -5435,7 +5552,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5435
5552
|
if (settled) return;
|
|
5436
5553
|
settled = true;
|
|
5437
5554
|
clearTimeout(timeout);
|
|
5438
|
-
this.log("warn",
|
|
5555
|
+
this.log("warn", `Connection error url=${this.config.url}`, { error, attempt });
|
|
5439
5556
|
this.state = "failed" /* FAILED */;
|
|
5440
5557
|
reject(error);
|
|
5441
5558
|
};
|
|
@@ -5450,14 +5567,14 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5450
5567
|
lastError = error;
|
|
5451
5568
|
if (attempt < maxAttempts) {
|
|
5452
5569
|
const logLevel = attempt < 3 ? "info" : "warn";
|
|
5453
|
-
this.log(logLevel, `Connection attempt ${attempt} failed, retrying after ${delayMs}ms`, {
|
|
5570
|
+
this.log(logLevel, `Connection attempt ${attempt} failed, retrying after ${delayMs}ms url=${this.config.url}`, {
|
|
5454
5571
|
error: lastError.message,
|
|
5455
5572
|
nextAttempt: attempt + 1
|
|
5456
5573
|
});
|
|
5457
5574
|
this.state = "initial" /* INITIAL */;
|
|
5458
5575
|
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
5459
5576
|
} else {
|
|
5460
|
-
this.log("warn", `All ${maxAttempts} connection attempts failed`, {
|
|
5577
|
+
this.log("warn", `All ${maxAttempts} connection attempts failed url=${this.config.url}`, {
|
|
5461
5578
|
error: lastError.message
|
|
5462
5579
|
});
|
|
5463
5580
|
}
|
|
@@ -5580,6 +5697,25 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5580
5697
|
isBufferOverflowing() {
|
|
5581
5698
|
return this.audioBuffer.isOverflowing();
|
|
5582
5699
|
}
|
|
5700
|
+
isServerReady() {
|
|
5701
|
+
return this.state === "ready" /* READY */;
|
|
5702
|
+
}
|
|
5703
|
+
sendGameContext(context) {
|
|
5704
|
+
if (this.state !== "connected" /* CONNECTED */ && this.state !== "ready" /* READY */) {
|
|
5705
|
+
this.log("warn", "sendGameContext called in wrong state", { state: this.state });
|
|
5706
|
+
return;
|
|
5707
|
+
}
|
|
5708
|
+
this.log("debug", "Sending game context (deferred)", {
|
|
5709
|
+
gameId: context.gameId,
|
|
5710
|
+
gamePhase: context.gamePhase,
|
|
5711
|
+
hasSlotMap: !!context.slotMap
|
|
5712
|
+
});
|
|
5713
|
+
super.sendMessage(
|
|
5714
|
+
_RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
|
|
5715
|
+
"message",
|
|
5716
|
+
context
|
|
5717
|
+
);
|
|
5718
|
+
}
|
|
5583
5719
|
getStats() {
|
|
5584
5720
|
const bufferStats = this.audioBuffer.getStats();
|
|
5585
5721
|
return {
|
|
@@ -5605,6 +5741,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5605
5741
|
if (this.isDebugLogEnabled) {
|
|
5606
5742
|
this.log("debug", "Sending ASR request", this.config.asrRequestConfig);
|
|
5607
5743
|
}
|
|
5744
|
+
const fallbackModels = this.config.asrRequestConfig.fallbackModels;
|
|
5608
5745
|
const asrRequest = {
|
|
5609
5746
|
type: RecognitionContextTypeV1.ASR_REQUEST,
|
|
5610
5747
|
audioUtteranceId: this.config.audioUtteranceId,
|
|
@@ -5620,7 +5757,20 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5620
5757
|
...this.config.asrRequestConfig.finalTranscriptStability && {
|
|
5621
5758
|
finalTranscriptStability: this.config.asrRequestConfig.finalTranscriptStability
|
|
5622
5759
|
},
|
|
5623
|
-
|
|
5760
|
+
// Include fallbackModels if provided (for circuit breaker fallback)
|
|
5761
|
+
...fallbackModels && { fallbackModels },
|
|
5762
|
+
...debugCommand && { debugCommand },
|
|
5763
|
+
// Include prefix mode if provided (for server-side stored prefix injection)
|
|
5764
|
+
...this.config.asrRequestConfig.prefixMode && {
|
|
5765
|
+
prefixMode: this.config.asrRequestConfig.prefixMode
|
|
5766
|
+
},
|
|
5767
|
+
...this.config.asrRequestConfig.prefixId && {
|
|
5768
|
+
prefixId: this.config.asrRequestConfig.prefixId
|
|
5769
|
+
},
|
|
5770
|
+
// Include prefix text to remove if provided (for server-side prefix text removal)
|
|
5771
|
+
...this.config.asrRequestConfig.prefixTextToRemove && {
|
|
5772
|
+
prefixTextToRemove: this.config.asrRequestConfig.prefixTextToRemove
|
|
5773
|
+
}
|
|
5624
5774
|
};
|
|
5625
5775
|
super.sendMessage(
|
|
5626
5776
|
_RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
|
|
@@ -5727,6 +5877,12 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5727
5877
|
this.log("debug", "Server ready for audio upload");
|
|
5728
5878
|
this.state = "ready" /* READY */;
|
|
5729
5879
|
this.messageHandler.setSessionStartTime(Date.now());
|
|
5880
|
+
if (this.prefixBuffer.length > 0) {
|
|
5881
|
+
this.log("debug", "Flushing buffered prefix audio", { chunks: this.prefixBuffer.length });
|
|
5882
|
+
this.prefixBuffer.forEach((chunk) => this.sendPrefixAudioNow(chunk));
|
|
5883
|
+
this.prefixBuffer = [];
|
|
5884
|
+
this.prefixBufferBytes = 0;
|
|
5885
|
+
}
|
|
5730
5886
|
const bufferedChunks = this.audioBuffer.flush();
|
|
5731
5887
|
if (bufferedChunks.length > 0) {
|
|
5732
5888
|
this.log("debug", "Flushing buffered audio", { chunks: bufferedChunks.length });
|
|
@@ -5758,6 +5914,74 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5758
5914
|
this.audioBytesSent += byteLength;
|
|
5759
5915
|
this.audioChunksSent++;
|
|
5760
5916
|
}
|
|
5917
|
+
/**
|
|
5918
|
+
* Send prefix audio to the server.
|
|
5919
|
+
* Prefix audio is sent before user audio and is used for context/priming.
|
|
5920
|
+
* The server will process it but adjust timing so transcripts reflect user audio timing.
|
|
5921
|
+
*
|
|
5922
|
+
* Note: Prefix audio is buffered until READY state, then flushed before user audio.
|
|
5923
|
+
* This ensures proper ordering even if called before server is ready.
|
|
5924
|
+
*
|
|
5925
|
+
* @param audioData - Prefix audio data (ArrayBuffer, ArrayBufferView, or Blob)
|
|
5926
|
+
*/
|
|
5927
|
+
sendPrefixAudio(audioData) {
|
|
5928
|
+
if (audioData instanceof Blob) {
|
|
5929
|
+
blobToArrayBuffer(audioData).then((arrayBuffer) => {
|
|
5930
|
+
this.sendPrefixAudioInternal(arrayBuffer);
|
|
5931
|
+
}).catch((error) => {
|
|
5932
|
+
this.log("error", "Failed to convert Blob to ArrayBuffer for prefix audio", error);
|
|
5933
|
+
});
|
|
5934
|
+
return;
|
|
5935
|
+
}
|
|
5936
|
+
this.sendPrefixAudioInternal(audioData);
|
|
5937
|
+
}
|
|
5938
|
+
/**
|
|
5939
|
+
* Internal method to handle prefix audio with buffering
|
|
5940
|
+
* Buffers if not READY, sends immediately if READY
|
|
5941
|
+
*/
|
|
5942
|
+
sendPrefixAudioInternal(audioData) {
|
|
5943
|
+
const bytes = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
|
|
5944
|
+
if (bytes === 0) return;
|
|
5945
|
+
if (this.state === "stopped" /* STOPPED */ || this.state === "failed" /* FAILED */) {
|
|
5946
|
+
this.log("debug", "Ignoring prefix audio in terminal state", { bytes, state: this.state });
|
|
5947
|
+
return;
|
|
5948
|
+
}
|
|
5949
|
+
if (this.state === "ready" /* READY */) {
|
|
5950
|
+
this.log("debug", "Sending prefix audio immediately", { bytes });
|
|
5951
|
+
this.sendPrefixAudioNow(audioData);
|
|
5952
|
+
} else {
|
|
5953
|
+
if (this.prefixBufferBytes + bytes > _RealTimeTwoWayWebSocketRecognitionClient.MAX_PREFIX_BUFFER_BYTES) {
|
|
5954
|
+
this.log("warn", "Prefix buffer limit exceeded, dropping chunk", {
|
|
5955
|
+
bytes,
|
|
5956
|
+
current: this.prefixBufferBytes,
|
|
5957
|
+
max: _RealTimeTwoWayWebSocketRecognitionClient.MAX_PREFIX_BUFFER_BYTES
|
|
5958
|
+
});
|
|
5959
|
+
return;
|
|
5960
|
+
}
|
|
5961
|
+
this.log("debug", "Buffering prefix audio until READY", { bytes, state: this.state });
|
|
5962
|
+
this.prefixBuffer.push(audioData);
|
|
5963
|
+
this.prefixBufferBytes += bytes;
|
|
5964
|
+
}
|
|
5965
|
+
}
|
|
5966
|
+
/**
|
|
5967
|
+
* Send prefix audio immediately to the server (without buffering)
|
|
5968
|
+
* Uses encoding offset to mark as prefix audio
|
|
5969
|
+
* @param audioData - Prefix audio data to send
|
|
5970
|
+
*/
|
|
5971
|
+
sendPrefixAudioNow(audioData) {
|
|
5972
|
+
const byteLength = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
|
|
5973
|
+
if (byteLength === 0) return;
|
|
5974
|
+
const baseEncodingId = this.config.asrRequestConfig?.encoding || AudioEncoding.LINEAR16;
|
|
5975
|
+
const prefixEncodingId = baseEncodingId + PREFIX_AUDIO_ENCODING_OFFSET;
|
|
5976
|
+
const sampleRate = typeof this.config.asrRequestConfig?.sampleRate === "number" ? this.config.asrRequestConfig.sampleRate : SampleRate.RATE_16000;
|
|
5977
|
+
this.log("debug", "Sending prefix audio", { bytes: byteLength, encoding: prefixEncodingId });
|
|
5978
|
+
super.sendAudio(
|
|
5979
|
+
audioData,
|
|
5980
|
+
_RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
|
|
5981
|
+
prefixEncodingId,
|
|
5982
|
+
sampleRate
|
|
5983
|
+
);
|
|
5984
|
+
}
|
|
5761
5985
|
};
|
|
5762
5986
|
|
|
5763
5987
|
// src/config-builder.ts
|
|
@@ -5964,6 +6188,11 @@ var RecognitionVGFStateSchema = z.object({
|
|
|
5964
6188
|
finalTranscript: z.string().optional(),
|
|
5965
6189
|
// Full finalized transcript for the utterance. Will not change.
|
|
5966
6190
|
finalConfidence: z.number().optional(),
|
|
6191
|
+
// Voice timing (ms from stream start, prefix-adjusted)
|
|
6192
|
+
voiceEnd: z.number().optional(),
|
|
6193
|
+
// voice end time identified by ASR
|
|
6194
|
+
lastNonSilence: z.number().optional(),
|
|
6195
|
+
// last non-silence sample time from PCM analysis
|
|
5967
6196
|
// Tracking-only metadata
|
|
5968
6197
|
asrConfig: z.string().optional(),
|
|
5969
6198
|
// Json format of the ASR config
|
|
@@ -6052,6 +6281,12 @@ function mapTranscriptionResultToState(currentState, result, isRecording) {
|
|
|
6052
6281
|
newState.finalConfidence = result.finalTranscriptConfidence;
|
|
6053
6282
|
}
|
|
6054
6283
|
}
|
|
6284
|
+
if (result.voiceEnd !== void 0) {
|
|
6285
|
+
newState.voiceEnd = result.voiceEnd;
|
|
6286
|
+
}
|
|
6287
|
+
if (result.lastNonSilence !== void 0) {
|
|
6288
|
+
newState.lastNonSilence = result.lastNonSilence;
|
|
6289
|
+
}
|
|
6055
6290
|
} else {
|
|
6056
6291
|
newState.transcriptionStatus = TranscriptionStatus.FINALIZED;
|
|
6057
6292
|
newState.finalTranscript = result.finalTranscript || "";
|
|
@@ -6059,6 +6294,12 @@ function mapTranscriptionResultToState(currentState, result, isRecording) {
|
|
|
6059
6294
|
newState.finalConfidence = result.finalTranscriptConfidence;
|
|
6060
6295
|
}
|
|
6061
6296
|
newState.finalTranscriptionTimestamp = (/* @__PURE__ */ new Date()).toISOString();
|
|
6297
|
+
if (result.voiceEnd !== void 0) {
|
|
6298
|
+
newState.voiceEnd = result.voiceEnd;
|
|
6299
|
+
}
|
|
6300
|
+
if (result.lastNonSilence !== void 0) {
|
|
6301
|
+
newState.lastNonSilence = result.lastNonSilence;
|
|
6302
|
+
}
|
|
6062
6303
|
newState.pendingTranscript = "";
|
|
6063
6304
|
newState.pendingConfidence = void 0;
|
|
6064
6305
|
}
|
|
@@ -6094,7 +6335,9 @@ function resetRecognitionVGFState(currentState) {
|
|
|
6094
6335
|
transcriptionStatus: TranscriptionStatus.NOT_STARTED,
|
|
6095
6336
|
startRecordingStatus: RecordingStatus.READY,
|
|
6096
6337
|
recognitionActionProcessingState: RecognitionActionProcessingState.NOT_STARTED,
|
|
6097
|
-
finalTranscript: void 0
|
|
6338
|
+
finalTranscript: void 0,
|
|
6339
|
+
voiceEnd: void 0,
|
|
6340
|
+
lastNonSilence: void 0
|
|
6098
6341
|
};
|
|
6099
6342
|
}
|
|
6100
6343
|
function generateUUID() {
|
|
@@ -6135,6 +6378,7 @@ var SimplifiedVGFRecognitionClient = class {
|
|
|
6135
6378
|
}
|
|
6136
6379
|
} else {
|
|
6137
6380
|
this.state = createVGFStateFromConfig(clientConfig);
|
|
6381
|
+
clientConfig.audioUtteranceId = this.state.audioUtteranceId;
|
|
6138
6382
|
}
|
|
6139
6383
|
this.state = { ...this.state, startRecordingStatus: "READY" };
|
|
6140
6384
|
this.expectedUuid = this.state.audioUtteranceId;
|
|
@@ -6302,6 +6546,12 @@ var SimplifiedVGFRecognitionClient = class {
|
|
|
6302
6546
|
isBufferOverflowing() {
|
|
6303
6547
|
return this.client.isBufferOverflowing();
|
|
6304
6548
|
}
|
|
6549
|
+
sendGameContext(context) {
|
|
6550
|
+
this.client.sendGameContext(context);
|
|
6551
|
+
}
|
|
6552
|
+
isServerReady() {
|
|
6553
|
+
return this.client.isServerReady();
|
|
6554
|
+
}
|
|
6305
6555
|
// VGF State access (read-only for consumers)
|
|
6306
6556
|
getVGFState() {
|
|
6307
6557
|
return { ...this.state };
|
|
@@ -6347,6 +6597,7 @@ export {
|
|
|
6347
6597
|
ConnectionError,
|
|
6348
6598
|
ControlSignalTypeV1 as ControlSignal,
|
|
6349
6599
|
ControlSignalTypeV1,
|
|
6600
|
+
DashScopeModel,
|
|
6350
6601
|
DeepgramModel,
|
|
6351
6602
|
ElevenLabsModel,
|
|
6352
6603
|
ErrorTypeV1,
|
|
@@ -6355,7 +6606,9 @@ export {
|
|
|
6355
6606
|
GeminiModel,
|
|
6356
6607
|
GoogleModel,
|
|
6357
6608
|
Language,
|
|
6609
|
+
MistralVoxtralModel,
|
|
6358
6610
|
OpenAIModel,
|
|
6611
|
+
OpenAIRealtimeModel,
|
|
6359
6612
|
RECOGNITION_CONDUCTOR_BASES,
|
|
6360
6613
|
RECOGNITION_SERVICE_BASES,
|
|
6361
6614
|
RealTimeTwoWayWebSocketRecognitionClient,
|