@volley/recognition-client-sdk 0.1.296 → 0.1.381

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -10,6 +10,6 @@ export { SimplifiedVGFRecognitionClient, createSimplifiedVGFClient, type ISimpli
10
10
  export { type RecognitionState, RecognitionVGFStateSchema, RecordingStatus, TranscriptionStatus, type RecordingStatusType, type TranscriptionStatusType, createInitialRecognitionState, isValidRecordingStatusTransition } from './vgf-recognition-state.js';
11
11
  export { AudioEncoding } from '@recog/websocket';
12
12
  export { type GameContextV1, type SlotMap, RecognitionContextTypeV1, ControlSignalTypeV1, ControlSignalTypeV1 as ControlSignal, // Alias for backward compatibility
13
- type TranscriptionResultV1, type FunctionCallResultV1, type MetadataResultV1, type ErrorResultV1, RecognitionResultTypeV1, ClientControlActionV1, type ASRRequestConfig, type ASRRequestV1, FinalTranscriptStability, createDefaultASRConfig, RecognitionProvider, DeepgramModel, GoogleModel, GeminiModel, OpenAIModel, Language, SampleRate, STAGES, type Stage } from '@recog/shared-types';
13
+ type TranscriptionResultV1, type FunctionCallResultV1, type MetadataResultV1, type ErrorResultV1, RecognitionResultTypeV1, ClientControlActionV1, type ASRRequestConfig, type ASRRequestV1, FinalTranscriptStability, createDefaultASRConfig, RecognitionProvider, DeepgramModel, ElevenLabsModel, FireworksModel, GoogleModel, GeminiModel, OpenAIModel, Language, SampleRate, STAGES, type Stage } from '@recog/shared-types';
14
14
  export { getRecognitionServiceBase, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getRecognitionServiceHost, getRecognitionConductorBase, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionConductorHost, normalizeStage, RECOGNITION_SERVICE_BASES, RECOGNITION_CONDUCTOR_BASES } from '@recog/shared-config';
15
15
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAGpC,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,WAAW,EACX,WAAW,EACX,WAAW,EACX,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAGpC,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,eAAe,EACf,cAAc,EACd,WAAW,EACX,WAAW,EACX,WAAW,EACX,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}
package/dist/index.js CHANGED
@@ -1,4 +1,4 @@
1
- // ../../node_modules/.pnpm/zod@3.22.5/node_modules/zod/lib/index.mjs
1
+ // ../../node_modules/.pnpm/zod@3.22.4/node_modules/zod/lib/index.mjs
2
2
  var util;
3
3
  (function(util2) {
4
4
  util2.assertEqual = (val) => val;
@@ -3604,14 +3604,14 @@ var ostring = () => stringType().optional();
3604
3604
  var onumber = () => numberType().optional();
3605
3605
  var oboolean = () => booleanType().optional();
3606
3606
  var coerce = {
3607
- string: ((arg) => ZodString.create({ ...arg, coerce: true })),
3608
- number: ((arg) => ZodNumber.create({ ...arg, coerce: true })),
3609
- boolean: ((arg) => ZodBoolean.create({
3607
+ string: (arg) => ZodString.create({ ...arg, coerce: true }),
3608
+ number: (arg) => ZodNumber.create({ ...arg, coerce: true }),
3609
+ boolean: (arg) => ZodBoolean.create({
3610
3610
  ...arg,
3611
3611
  coerce: true
3612
- })),
3613
- bigint: ((arg) => ZodBigInt.create({ ...arg, coerce: true })),
3614
- date: ((arg) => ZodDate.create({ ...arg, coerce: true }))
3612
+ }),
3613
+ bigint: (arg) => ZodBigInt.create({ ...arg, coerce: true }),
3614
+ date: (arg) => ZodDate.create({ ...arg, coerce: true })
3615
3615
  };
3616
3616
  var NEVER = INVALID;
3617
3617
  var z = /* @__PURE__ */ Object.freeze({
@@ -3735,9 +3735,12 @@ var RecognitionProvider;
3735
3735
  (function(RecognitionProvider2) {
3736
3736
  RecognitionProvider2["ASSEMBLYAI"] = "assemblyai";
3737
3737
  RecognitionProvider2["DEEPGRAM"] = "deepgram";
3738
+ RecognitionProvider2["ELEVENLABS"] = "elevenlabs";
3739
+ RecognitionProvider2["FIREWORKS"] = "fireworks";
3738
3740
  RecognitionProvider2["GOOGLE"] = "google";
3739
3741
  RecognitionProvider2["GEMINI_BATCH"] = "gemini-batch";
3740
3742
  RecognitionProvider2["OPENAI_BATCH"] = "openai-batch";
3743
+ RecognitionProvider2["OPENAI_REALTIME"] = "openai-realtime";
3741
3744
  })(RecognitionProvider || (RecognitionProvider = {}));
3742
3745
  var RecognitionMode;
3743
3746
  (function(RecognitionMode2) {
@@ -3757,17 +3760,34 @@ var DeepgramModel;
3757
3760
  })(DeepgramModel || (DeepgramModel = {}));
3758
3761
  var GoogleModel;
3759
3762
  (function(GoogleModel2) {
3763
+ GoogleModel2["CHIRP_3"] = "chirp_3";
3764
+ GoogleModel2["CHIRP_2"] = "chirp_2";
3765
+ GoogleModel2["CHIRP"] = "chirp";
3760
3766
  GoogleModel2["LATEST_LONG"] = "latest_long";
3761
3767
  GoogleModel2["LATEST_SHORT"] = "latest_short";
3762
3768
  GoogleModel2["TELEPHONY"] = "telephony";
3763
3769
  GoogleModel2["TELEPHONY_SHORT"] = "telephony_short";
3764
- GoogleModel2["MEDICAL_DICTATION"] = "medical_dictation";
3765
- GoogleModel2["MEDICAL_CONVERSATION"] = "medical_conversation";
3766
3770
  GoogleModel2["DEFAULT"] = "default";
3767
3771
  GoogleModel2["COMMAND_AND_SEARCH"] = "command_and_search";
3768
3772
  GoogleModel2["PHONE_CALL"] = "phone_call";
3769
3773
  GoogleModel2["VIDEO"] = "video";
3770
3774
  })(GoogleModel || (GoogleModel = {}));
3775
+ var FireworksModel;
3776
+ (function(FireworksModel2) {
3777
+ FireworksModel2["ASR_V1"] = "fireworks-asr-large";
3778
+ FireworksModel2["ASR_V2"] = "fireworks-asr-v2";
3779
+ FireworksModel2["WHISPER_V3"] = "whisper-v3";
3780
+ FireworksModel2["WHISPER_V3_TURBO"] = "whisper-v3-turbo";
3781
+ })(FireworksModel || (FireworksModel = {}));
3782
+ var ElevenLabsModel;
3783
+ (function(ElevenLabsModel2) {
3784
+ ElevenLabsModel2["SCRIBE_V2_REALTIME"] = "scribe_v2_realtime";
3785
+ ElevenLabsModel2["SCRIBE_V1"] = "scribe_v1";
3786
+ })(ElevenLabsModel || (ElevenLabsModel = {}));
3787
+ var OpenAIRealtimeModel;
3788
+ (function(OpenAIRealtimeModel2) {
3789
+ OpenAIRealtimeModel2["GPT_4O_MINI_TRANSCRIBE"] = "gpt-4o-mini-transcribe";
3790
+ })(OpenAIRealtimeModel || (OpenAIRealtimeModel = {}));
3771
3791
 
3772
3792
  // ../../libs/types/dist/recognition-result-v1.types.js
3773
3793
  var RecognitionResultTypeV1;
@@ -3781,68 +3801,44 @@ var RecognitionResultTypeV1;
3781
3801
  var TranscriptionResultSchemaV1 = z.object({
3782
3802
  type: z.literal(RecognitionResultTypeV1.TRANSCRIPTION),
3783
3803
  audioUtteranceId: z.string(),
3784
- // audio utterance id for this transcription session
3785
3804
  finalTranscript: z.string(),
3786
- // transcript that is final and won't be overwritten by ASR. default empty string.
3787
3805
  finalTranscriptConfidence: z.number().min(0).max(1).optional(),
3788
- // confidence score for the final transcript (0-1). Optional.
3789
3806
  pendingTranscript: z.string().optional(),
3790
- // transcript that may be overwritten by ASR. Optional.
3791
3807
  pendingTranscriptConfidence: z.number().min(0).max(1).optional(),
3792
- // confidence score for the pending transcript (0-1). Optional.
3793
3808
  is_finished: z.boolean(),
3794
- // entire transcription finished. This will be the last message in the transcription.
3795
3809
  voiceStart: z.number().optional(),
3796
- // voice start time identified by ASR (in milliseconds from stream start). Optional.
3797
3810
  voiceDuration: z.number().optional(),
3798
- // voice duration identified by ASR (in milliseconds, calculated as maxVoiceEnd - voiceStart). Optional.
3799
3811
  voiceEnd: z.number().optional(),
3800
- // voice end time identified by ASR (in milliseconds from stream start). Optional.
3801
3812
  startTimestamp: z.number().optional(),
3802
- // start timestamp of the transcription in milliseconds. Optional.
3803
3813
  endTimestamp: z.number().optional(),
3804
- // end timestamp of the transcription in milliseconds. Optional.
3805
3814
  receivedAtMs: z.number().optional(),
3806
- // server timestamp when this transcript was received (ms since epoch). Optional.
3807
3815
  accumulatedAudioTimeMs: z.number().optional()
3808
3816
  // accumulated audio time watermark in milliseconds. Total duration of all audio chunks sent. Optional.
3809
3817
  });
3810
3818
  var FunctionCallResultSchemaV1 = z.object({
3811
3819
  type: z.literal(RecognitionResultTypeV1.FUNCTION_CALL),
3812
3820
  audioUtteranceId: z.string(),
3813
- // audio utterance id for this function call
3814
3821
  functionName: z.string(),
3815
3822
  functionArgJson: z.string()
3816
3823
  });
3817
3824
  var MetadataResultSchemaV1 = z.object({
3818
3825
  type: z.literal(RecognitionResultTypeV1.METADATA),
3819
3826
  audioUtteranceId: z.string(),
3820
- // audio utterance id for this metadata
3821
3827
  // Timing information
3822
3828
  recordingStartMs: z.number().optional(),
3823
- // When recording started
3824
3829
  recordingEndMs: z.number().optional(),
3825
- // When recording ended
3826
3830
  transcriptEndMs: z.number().optional(),
3827
- // When final transcript was received
3828
3831
  socketCloseAtMs: z.number().optional(),
3829
- // When socket/connection closed
3830
3832
  // Audio Quality Metrics
3831
3833
  duration: z.number().optional(),
3832
- // Duration of the audio in milliseconds
3833
3834
  volume: z.number().optional(),
3834
- // Volume of the audio in decibels
3835
3835
  accumulatedAudioTimeMs: z.number().optional(),
3836
- // Accumulated audio time watermark in milliseconds. Total duration of all audio chunks sent. Optional.
3837
3836
  // Cost Information
3838
3837
  costInUSD: z.number().default(0).optional(),
3839
- // Estimated cost in USD for this recognition session. Default: 0
3840
3838
  // ASR API Type
3841
3839
  apiType: z.nativeEnum(ASRApiType).optional(),
3842
- // ASR API type from job (streaming vs file-based)
3843
3840
  // ASR configuration as JSON string (no type validation)
3844
3841
  asrConfig: z.string().optional(),
3845
- // Stringified JSON of the ASR provider config
3846
3842
  // Raw ASR metadata payload as provided by the provider (stringified if needed)
3847
3843
  rawAsrMetadata: z.string().optional()
3848
3844
  });
@@ -3859,13 +3855,9 @@ var ErrorTypeV1;
3859
3855
  var ErrorResultSchemaV1 = z.object({
3860
3856
  type: z.literal(RecognitionResultTypeV1.ERROR),
3861
3857
  audioUtteranceId: z.string(),
3862
- // audio utterance id for this error
3863
3858
  errorType: z.nativeEnum(ErrorTypeV1).optional(),
3864
- // error type category
3865
3859
  message: z.string().optional(),
3866
- // Error message
3867
3860
  code: z.union([z.string(), z.number()]).optional(),
3868
- // Error code
3869
3861
  description: z.string().optional()
3870
3862
  // Detailed description
3871
3863
  });
@@ -3877,9 +3869,7 @@ var ClientControlActionV1;
3877
3869
  var ClientControlActionsV1 = z.nativeEnum(ClientControlActionV1);
3878
3870
  var ClientControlMessageSchemaV1 = z.object({
3879
3871
  type: z.literal(RecognitionResultTypeV1.CLIENT_CONTROL_MESSAGE),
3880
- // Message type discriminator
3881
3872
  audioUtteranceId: z.string(),
3882
- // audio utterance id for this control message
3883
3873
  action: ClientControlActionsV1
3884
3874
  // The control action to perform
3885
3875
  });
@@ -4017,7 +4007,6 @@ var MetadataMessageSchema = z.object({
4017
4007
  type: z.literal(ProviderMessageType.METADATA),
4018
4008
  provider: z.nativeEnum(RecognitionProvider),
4019
4009
  asrConfig: z.string().optional(),
4020
- // ASR config we sent to provider
4021
4010
  data: z.string().optional()
4022
4011
  // Raw metadata from provider (Deepgram only)
4023
4012
  });
@@ -4141,7 +4130,6 @@ var BaseRecognitionExceptionSchema = z.object({
4141
4130
  var AuthenticationExceptionSchema = BaseRecognitionExceptionSchema.extend({
4142
4131
  errorType: z.literal(ErrorTypeV1.AUTHENTICATION_ERROR),
4143
4132
  isImmediatelyAvailable: z.literal(false),
4144
- // System issue, not user-facing
4145
4133
  /** Which service failed authentication (e.g., 'deepgram', 'google') */
4146
4134
  service: z.string().optional(),
4147
4135
  /** Authentication method that failed (e.g., 'api_key', 'oauth') */
@@ -4150,7 +4138,6 @@ var AuthenticationExceptionSchema = BaseRecognitionExceptionSchema.extend({
4150
4138
  var ValidationExceptionSchema = BaseRecognitionExceptionSchema.extend({
4151
4139
  errorType: z.literal(ErrorTypeV1.VALIDATION_ERROR),
4152
4140
  isImmediatelyAvailable: z.literal(true),
4153
- // Can show to user
4154
4141
  /** Field name that failed validation */
4155
4142
  field: z.string().optional(),
4156
4143
  /** Expected value format/type */
@@ -4161,7 +4148,6 @@ var ValidationExceptionSchema = BaseRecognitionExceptionSchema.extend({
4161
4148
  var ProviderExceptionSchema = BaseRecognitionExceptionSchema.extend({
4162
4149
  errorType: z.literal(ErrorTypeV1.PROVIDER_ERROR),
4163
4150
  isImmediatelyAvailable: z.literal(false),
4164
- // Provider issue, not user-facing
4165
4151
  /** Provider name (e.g., 'deepgram', 'assemblyai', 'google') */
4166
4152
  provider: z.string().optional(),
4167
4153
  /** Provider-specific error code */
@@ -4172,7 +4158,6 @@ var ProviderExceptionSchema = BaseRecognitionExceptionSchema.extend({
4172
4158
  var TimeoutExceptionSchema = BaseRecognitionExceptionSchema.extend({
4173
4159
  errorType: z.literal(ErrorTypeV1.TIMEOUT_ERROR),
4174
4160
  isImmediatelyAvailable: z.literal(true),
4175
- // Can tell user to try again
4176
4161
  /** Timeout duration in milliseconds */
4177
4162
  timeoutMs: z.number().optional(),
4178
4163
  /** What operation timed out (e.g., 'connection', 'transcription', 'response') */
@@ -4181,7 +4166,6 @@ var TimeoutExceptionSchema = BaseRecognitionExceptionSchema.extend({
4181
4166
  var QuotaExceededExceptionSchema = BaseRecognitionExceptionSchema.extend({
4182
4167
  errorType: z.literal(ErrorTypeV1.QUOTA_EXCEEDED),
4183
4168
  isImmediatelyAvailable: z.literal(true),
4184
- // Can tell user to wait
4185
4169
  /** Quota type that was exceeded (e.g., 'rate_limit', 'daily_quota', 'concurrent_requests') */
4186
4170
  quotaType: z.string().optional(),
4187
4171
  /** When quota resets (Unix timestamp in ms) */
@@ -4192,7 +4176,6 @@ var QuotaExceededExceptionSchema = BaseRecognitionExceptionSchema.extend({
4192
4176
  var ConnectionExceptionSchema = BaseRecognitionExceptionSchema.extend({
4193
4177
  errorType: z.literal(ErrorTypeV1.CONNECTION_ERROR),
4194
4178
  isImmediatelyAvailable: z.literal(true),
4195
- // Can tell user about network issues
4196
4179
  /** Number of connection attempts made */
4197
4180
  attempts: z.number().optional(),
4198
4181
  /** URL that failed to connect */
@@ -4203,7 +4186,6 @@ var ConnectionExceptionSchema = BaseRecognitionExceptionSchema.extend({
4203
4186
  var UnknownExceptionSchema = BaseRecognitionExceptionSchema.extend({
4204
4187
  errorType: z.literal(ErrorTypeV1.UNKNOWN_ERROR),
4205
4188
  isImmediatelyAvailable: z.literal(false),
4206
- // Unknown cause, not user-facing
4207
4189
  /** Stack trace for debugging (sanitized) */
4208
4190
  stack: z.string().optional(),
4209
4191
  /** Additional context for debugging */
@@ -4256,16 +4238,11 @@ var SlotMapSchema = z.record(z.string(), z.array(z.string()));
4256
4238
  var GameContextSchemaV1 = z.object({
4257
4239
  type: z.literal(RecognitionContextTypeV1.GAME_CONTEXT),
4258
4240
  gameId: z.string(),
4259
- // Unique identifier for the game
4260
4241
  gamePhase: z.string(),
4261
- // Current phase of the game
4262
4242
  // Prompt fields for different processing stages
4263
4243
  promptSTT: z.string().optional(),
4264
- // Speech-to-Text: Keywords/phrases to optimize ASR (e.g., for Deepgram keywords)
4265
4244
  promptSTF: z.string().optional(),
4266
- // Speech-to-Function: Direct speech to function mapping instructions
4267
4245
  promptTTF: z.string().optional(),
4268
- // Text-to-Function: Text to function mapping instructions after transcription
4269
4246
  // Slot map for entity extraction
4270
4247
  slotMap: SlotMapSchema.optional()
4271
4248
  // Map of slot names to possible values to enhance prompting and keyword matching.
@@ -4292,7 +4269,6 @@ var ASRRequestSchemaV1 = z.object({
4292
4269
  type: z.literal(RecognitionContextTypeV1.ASR_REQUEST),
4293
4270
  // Session identification
4294
4271
  audioUtteranceId: z.string().optional(),
4295
- // Optional utterance ID (prefer query param)
4296
4272
  // ASR configuration
4297
4273
  provider: z.string(),
4298
4274
  model: z.string().optional(),
@@ -4301,12 +4277,9 @@ var ASRRequestSchemaV1 = z.object({
4301
4277
  encoding: z.number(),
4302
4278
  // Recognition options
4303
4279
  interimResults: z.boolean().optional().default(false),
4304
- // Enable interim/partial results
4305
4280
  useContext: z.boolean().optional().default(false),
4306
- // If true, requires GameContext before session creation
4307
4281
  // Final transcript stability mode (timeout for fallback final transcript)
4308
4282
  finalTranscriptStability: z.string().optional(),
4309
- // Stability mode: 'aggressive' | 'balanced' | 'conservative'
4310
4283
  // Debug options (FOR DEBUG/TESTING ONLY - not for production use)
4311
4284
  debugCommand: RequestDebugCommandSchema
4312
4285
  });
@@ -4319,31 +4292,22 @@ var RecognitionContextSchemaV1 = z.discriminatedUnion("type", [
4319
4292
  // ../../libs/types/dist/recognition-query.types.js
4320
4293
  var RecognitionGameInfoSchema = z.object({
4321
4294
  userId: z.string().optional(),
4322
- // User identifier
4323
4295
  gameSessionId: z.string().optional(),
4324
- // Volly session identifier . Called "sessionId" in Platform and most games.
4325
4296
  deviceId: z.string().optional(),
4326
- // Device identifier
4327
4297
  accountId: z.string().optional(),
4328
- // Account identifier
4329
4298
  gameId: z.string().optional(),
4330
- // Game identifier
4331
4299
  gamePhase: z.string().optional(),
4332
- // Game phase or scene. A string that describes the current state of the game for client team to optimize specific turn.
4333
4300
  questionAnswerId: z.string().optional(),
4334
- // Question answer identifier for tracking Q&A sessions
4335
4301
  platform: z.string().optional()
4336
4302
  // Platform for audio recording device (use the definition of platform teams)
4337
4303
  });
4338
4304
  var RecognitionQueryMetadataSchema = z.object({
4339
4305
  audioUtteranceId: z.string(),
4340
- // Audio utterance ID to query
4341
4306
  recognitionGameInfo: RecognitionGameInfoSchema.optional()
4342
4307
  // Optional game info for security
4343
4308
  });
4344
4309
  var UtteranceResourceResponseSchema = z.object({
4345
4310
  audioUtteranceId: z.string(),
4346
- // Audio utterance ID
4347
4311
  results: z.array(z.any())
4348
4312
  // Array of RecognitionResult (version-specific)
4349
4313
  });
@@ -4609,10 +4573,7 @@ var GeminiModel;
4609
4573
  GeminiModel2["GEMINI_2_5_FLASH"] = "gemini-2.5-flash";
4610
4574
  GeminiModel2["GEMINI_2_5_FLASH_LITE"] = "gemini-2.5-flash-lite";
4611
4575
  GeminiModel2["GEMINI_2_0_FLASH_LATEST"] = "gemini-2.0-flash-latest";
4612
- GeminiModel2["GEMINI_2_0_FLASH"] = "gemini-2.0-flash-002";
4613
4576
  GeminiModel2["GEMINI_2_0_FLASH_EXP"] = "gemini-2.0-flash-exp";
4614
- GeminiModel2["GEMINI_1_5_FLASH"] = "gemini-1.5-flash";
4615
- GeminiModel2["GEMINI_1_5_PRO"] = "gemini-1.5-pro";
4616
4577
  })(GeminiModel || (GeminiModel = {}));
4617
4578
  var GeminiApiVersion;
4618
4579
  (function(GeminiApiVersion2) {
@@ -4809,7 +4770,7 @@ var WebSocketAudioClient = class {
4809
4770
  // ../../libs/websocket/dist/core/audio-upload-websocket-server.js
4810
4771
  import { WebSocketServer, WebSocket as WebSocket2 } from "ws";
4811
4772
 
4812
- // ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/stringify.js
4773
+ // ../../node_modules/.pnpm/uuid@11.0.0/node_modules/uuid/dist/esm-browser/stringify.js
4813
4774
  var byteToHex = [];
4814
4775
  for (let i = 0; i < 256; ++i) {
4815
4776
  byteToHex.push((i + 256).toString(16).slice(1));
@@ -4818,7 +4779,7 @@ function unsafeStringify(arr, offset = 0) {
4818
4779
  return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
4819
4780
  }
4820
4781
 
4821
- // ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/rng.js
4782
+ // ../../node_modules/.pnpm/uuid@11.0.0/node_modules/uuid/dist/esm-browser/rng.js
4822
4783
  var getRandomValues;
4823
4784
  var rnds8 = new Uint8Array(16);
4824
4785
  function rng() {
@@ -4831,27 +4792,21 @@ function rng() {
4831
4792
  return getRandomValues(rnds8);
4832
4793
  }
4833
4794
 
4834
- // ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/native.js
4795
+ // ../../node_modules/.pnpm/uuid@11.0.0/node_modules/uuid/dist/esm-browser/native.js
4835
4796
  var randomUUID = typeof crypto !== "undefined" && crypto.randomUUID && crypto.randomUUID.bind(crypto);
4836
4797
  var native_default = { randomUUID };
4837
4798
 
4838
- // ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/v4.js
4799
+ // ../../node_modules/.pnpm/uuid@11.0.0/node_modules/uuid/dist/esm-browser/v4.js
4839
4800
  function v4(options, buf, offset) {
4840
4801
  if (native_default.randomUUID && !buf && !options) {
4841
4802
  return native_default.randomUUID();
4842
4803
  }
4843
4804
  options = options || {};
4844
- const rnds = options.random ?? options.rng?.() ?? rng();
4845
- if (rnds.length < 16) {
4846
- throw new Error("Random bytes length must be >= 16");
4847
- }
4805
+ const rnds = options.random || (options.rng || rng)();
4848
4806
  rnds[6] = rnds[6] & 15 | 64;
4849
4807
  rnds[8] = rnds[8] & 63 | 128;
4850
4808
  if (buf) {
4851
4809
  offset = offset || 0;
4852
- if (offset < 0 || offset + 16 > buf.length) {
4853
- throw new RangeError(`UUID byte range ${offset}:${offset + 15} is out of buffer bounds`);
4854
- }
4855
4810
  for (let i = 0; i < 16; ++i) {
4856
4811
  buf[offset + i] = rnds[i];
4857
4812
  }
@@ -5638,16 +5593,65 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5638
5593
  this.config.onConnected();
5639
5594
  }
5640
5595
  onDisconnected(code, reason) {
5641
- this.log("debug", "WebSocket disconnected", { code, reason, previousState: this.state });
5596
+ const closeCodeDescription = this.getCloseCodeDescription(code);
5597
+ const is1006 = code === 1006;
5598
+ this.log("debug", "[DIAGNOSTIC] WebSocket disconnected", {
5599
+ code,
5600
+ codeDescription: closeCodeDescription,
5601
+ reason: reason || "(empty)",
5602
+ previousState: this.state,
5603
+ is1006Abnormal: is1006,
5604
+ audioChunksSent: this.audioChunksSent,
5605
+ audioBytesSent: this.audioBytesSent,
5606
+ bufferStats: this.audioBuffer.getStats()
5607
+ });
5642
5608
  if (this.state === "stopping" /* STOPPING */) {
5643
5609
  this.state = "stopped" /* STOPPED */;
5644
5610
  } else if (this.state === "connected" /* CONNECTED */ || this.state === "ready" /* READY */ || this.state === "connecting" /* CONNECTING */) {
5645
- this.log("error", "Unexpected disconnection", { code, reason });
5611
+ this.log("error", "[DIAGNOSTIC] Unexpected disconnection", {
5612
+ code,
5613
+ codeDescription: closeCodeDescription,
5614
+ reason: reason || "(empty)",
5615
+ is1006,
5616
+ possibleCauses: is1006 ? [
5617
+ "Network connection lost",
5618
+ "Server process crashed",
5619
+ "Provider (Deepgram/AssemblyAI) WebSocket closed abnormally",
5620
+ "Firewall/proxy terminated connection",
5621
+ "Browser/tab suspended (mobile)"
5622
+ ] : []
5623
+ });
5646
5624
  this.state = "failed" /* FAILED */;
5647
5625
  }
5648
5626
  this.cleanup();
5649
5627
  this.config.onDisconnected(code, reason);
5650
5628
  }
5629
+ /**
5630
+ * Get human-readable description for WebSocket close code
5631
+ */
5632
+ getCloseCodeDescription(code) {
5633
+ const descriptions = {
5634
+ 1e3: "Normal Closure",
5635
+ 1001: "Going Away",
5636
+ 1002: "Protocol Error",
5637
+ 1003: "Unsupported Data",
5638
+ 1005: "No Status Received",
5639
+ 1006: "Abnormal Closure (no close frame received)",
5640
+ 1007: "Invalid Frame Payload",
5641
+ 1008: "Policy Violation",
5642
+ 1009: "Message Too Big",
5643
+ 1010: "Mandatory Extension",
5644
+ 1011: "Internal Server Error",
5645
+ 1012: "Service Restart",
5646
+ 1013: "Try Again Later",
5647
+ 4e3: "Auth Required",
5648
+ 4001: "Auth Failed",
5649
+ 4002: "Rate Limit Exceeded",
5650
+ 4003: "Invalid Session",
5651
+ 4004: "Session Expired"
5652
+ };
5653
+ return descriptions[code] || `Unknown (${code})`;
5654
+ }
5651
5655
  onError(error) {
5652
5656
  this.state = "failed" /* FAILED */;
5653
5657
  const errorResult = {
@@ -5924,8 +5928,11 @@ var RecognitionVGFStateSchema = z.object({
5924
5928
  finalFunctionCallTimestamp: z.string().optional(),
5925
5929
  // When the final action after interpreting the transcript was taken. Immutable.
5926
5930
  // Support for prompt slot mapping - passed to recognition context when present
5927
- promptSlotMap: z.record(z.string(), z.array(z.string())).optional()
5931
+ promptSlotMap: z.record(z.string(), z.array(z.string())).optional(),
5928
5932
  // Optional map of slot names to prompt values for recognition context
5933
+ // Recognition action processing state - managed externally, SDK preserves but never modifies
5934
+ recognitionActionProcessingState: z.string().optional()
5935
+ // "NOT_STARTED", "IN_PROGRESS", "COMPLETED"
5929
5936
  });
5930
5937
  var RecordingStatus = {
5931
5938
  NOT_READY: "NOT_READY",
@@ -5941,12 +5948,18 @@ var TranscriptionStatus = {
5941
5948
  // Session was cancelled/abandoned by user
5942
5949
  ERROR: "ERROR"
5943
5950
  };
5951
+ var RecognitionActionProcessingState = {
5952
+ NOT_STARTED: "NOT_STARTED",
5953
+ IN_PROGRESS: "IN_PROGRESS",
5954
+ COMPLETED: "COMPLETED"
5955
+ };
5944
5956
  function createInitialRecognitionState(audioUtteranceId) {
5945
5957
  return {
5946
5958
  audioUtteranceId,
5947
5959
  startRecordingStatus: RecordingStatus.NOT_READY,
5948
5960
  transcriptionStatus: TranscriptionStatus.NOT_STARTED,
5949
- pendingTranscript: ""
5961
+ pendingTranscript: "",
5962
+ recognitionActionProcessingState: RecognitionActionProcessingState.NOT_STARTED
5950
5963
  };
5951
5964
  }
5952
5965
  function isValidRecordingStatusTransition(from, to) {
@@ -6039,20 +6052,24 @@ var SimplifiedVGFRecognitionClient = class {
6039
6052
  this.isRecordingAudio = false;
6040
6053
  const { onStateChange, initialState, ...clientConfig } = config;
6041
6054
  this.stateChangeCallback = onStateChange;
6055
+ this.logger = clientConfig.logger;
6042
6056
  if (initialState) {
6043
- if (initialState.transcriptionStatus === TranscriptionStatus.ABORTED || initialState.transcriptionStatus === TranscriptionStatus.FINALIZED) {
6057
+ const needsNewUuid = !initialState.audioUtteranceId || initialState.audioUtteranceId === "" || initialState.transcriptionStatus === TranscriptionStatus.ABORTED || initialState.transcriptionStatus === TranscriptionStatus.FINALIZED;
6058
+ if (needsNewUuid) {
6044
6059
  const newUUID = crypto.randomUUID();
6045
6060
  if (clientConfig.logger) {
6046
- clientConfig.logger("info", `Terminal session detected (${initialState.transcriptionStatus}), generating new UUID: ${newUUID}`);
6061
+ const reason = !initialState.audioUtteranceId ? "Missing UUID" : initialState.audioUtteranceId === "" ? "Empty UUID" : `Terminal session (${initialState.transcriptionStatus})`;
6062
+ clientConfig.logger("info", `${reason} detected, generating new UUID: ${newUUID}`);
6047
6063
  }
6048
6064
  this.state = {
6049
6065
  ...initialState,
6050
6066
  audioUtteranceId: newUUID,
6051
- // Reset status fields for fresh session
6052
- transcriptionStatus: TranscriptionStatus.NOT_STARTED,
6053
- startRecordingStatus: RecordingStatus.READY,
6054
- // Clear previous session's transcript
6055
- finalTranscript: void 0
6067
+ // Reset status fields for fresh session if terminal state
6068
+ ...initialState.transcriptionStatus === TranscriptionStatus.ABORTED || initialState.transcriptionStatus === TranscriptionStatus.FINALIZED ? {
6069
+ transcriptionStatus: TranscriptionStatus.NOT_STARTED,
6070
+ startRecordingStatus: RecordingStatus.READY,
6071
+ finalTranscript: void 0
6072
+ } : {}
6056
6073
  };
6057
6074
  clientConfig.audioUtteranceId = newUUID;
6058
6075
  if (onStateChange) {
@@ -6068,6 +6085,7 @@ var SimplifiedVGFRecognitionClient = class {
6068
6085
  this.state = createVGFStateFromConfig(clientConfig);
6069
6086
  }
6070
6087
  this.state = { ...this.state, startRecordingStatus: "READY" };
6088
+ this.expectedUuid = this.state.audioUtteranceId;
6071
6089
  if (this.state.promptSlotMap) {
6072
6090
  if (clientConfig.asrRequestConfig) {
6073
6091
  clientConfig.asrRequestConfig.useContext = true;
@@ -6084,6 +6102,18 @@ var SimplifiedVGFRecognitionClient = class {
6084
6102
  ...clientConfig,
6085
6103
  // These callbacks ONLY update the VGF state sink
6086
6104
  onTranscript: (result) => {
6105
+ if (result.audioUtteranceId && result.audioUtteranceId !== this.expectedUuid) {
6106
+ if (this.logger) {
6107
+ this.logger(
6108
+ "warn",
6109
+ `[VGF] Skipping transcript update: UUID mismatch (expected: ${this.expectedUuid}, got: ${result.audioUtteranceId})`
6110
+ );
6111
+ }
6112
+ if (clientConfig.onTranscript) {
6113
+ clientConfig.onTranscript(result);
6114
+ }
6115
+ return;
6116
+ }
6087
6117
  this.state = mapTranscriptionResultToState(this.state, result, this.isRecordingAudio);
6088
6118
  this.notifyStateChange();
6089
6119
  if (clientConfig.onTranscript) {
@@ -6091,6 +6121,18 @@ var SimplifiedVGFRecognitionClient = class {
6091
6121
  }
6092
6122
  },
6093
6123
  onMetadata: (metadata) => {
6124
+ if (metadata.audioUtteranceId && metadata.audioUtteranceId !== this.expectedUuid) {
6125
+ if (this.logger) {
6126
+ this.logger(
6127
+ "warn",
6128
+ `[VGF] Skipping metadata update: UUID mismatch (expected: ${this.expectedUuid}, got: ${metadata.audioUtteranceId})`
6129
+ );
6130
+ }
6131
+ if (clientConfig.onMetadata) {
6132
+ clientConfig.onMetadata(metadata);
6133
+ }
6134
+ return;
6135
+ }
6094
6136
  this.state = mapMetadataToState(this.state, metadata);
6095
6137
  this.notifyStateChange();
6096
6138
  if (clientConfig.onMetadata) {
@@ -6103,6 +6145,18 @@ var SimplifiedVGFRecognitionClient = class {
6103
6145
  }
6104
6146
  },
6105
6147
  onError: (error) => {
6148
+ if (error.audioUtteranceId && error.audioUtteranceId !== this.expectedUuid) {
6149
+ if (this.logger) {
6150
+ this.logger(
6151
+ "warn",
6152
+ `[VGF] Skipping error update: UUID mismatch (expected: ${this.expectedUuid}, got: ${error.audioUtteranceId})`
6153
+ );
6154
+ }
6155
+ if (clientConfig.onError) {
6156
+ clientConfig.onError(error);
6157
+ }
6158
+ return;
6159
+ }
6106
6160
  this.isRecordingAudio = false;
6107
6161
  this.state = mapErrorToState(this.state, error);
6108
6162
  this.notifyStateChange();
@@ -6211,8 +6265,10 @@ export {
6211
6265
  ControlSignalTypeV1 as ControlSignal,
6212
6266
  ControlSignalTypeV1,
6213
6267
  DeepgramModel,
6268
+ ElevenLabsModel,
6214
6269
  ErrorTypeV1,
6215
6270
  FinalTranscriptStability,
6271
+ FireworksModel,
6216
6272
  GeminiModel,
6217
6273
  GoogleModel,
6218
6274
  Language,