@volley/recognition-client-sdk 0.1.423 → 0.1.621

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,4 +1,4 @@
1
- // ../../node_modules/.pnpm/zod@3.22.4/node_modules/zod/lib/index.mjs
1
+ // ../../node_modules/.pnpm/zod@3.22.5/node_modules/zod/lib/index.mjs
2
2
  var util;
3
3
  (function(util2) {
4
4
  util2.assertEqual = (val) => val;
@@ -3741,6 +3741,10 @@ var RecognitionProvider;
3741
3741
  RecognitionProvider2["GEMINI_BATCH"] = "gemini-batch";
3742
3742
  RecognitionProvider2["OPENAI_BATCH"] = "openai-batch";
3743
3743
  RecognitionProvider2["OPENAI_REALTIME"] = "openai-realtime";
3744
+ RecognitionProvider2["MISTRAL_VOXTRAL"] = "mistral-voxtral";
3745
+ RecognitionProvider2["DASHSCOPE"] = "dashscope";
3746
+ RecognitionProvider2["TEST_ASR_PROVIDER_QUOTA"] = "test-asr-provider-quota";
3747
+ RecognitionProvider2["TEST_ASR_STREAMING"] = "test-asr-streaming";
3744
3748
  })(RecognitionProvider || (RecognitionProvider = {}));
3745
3749
  var RecognitionMode;
3746
3750
  (function(RecognitionMode2) {
@@ -3786,8 +3790,18 @@ var ElevenLabsModel;
3786
3790
  })(ElevenLabsModel || (ElevenLabsModel = {}));
3787
3791
  var OpenAIRealtimeModel;
3788
3792
  (function(OpenAIRealtimeModel2) {
3793
+ OpenAIRealtimeModel2["GPT_4O_TRANSCRIBE"] = "gpt-4o-transcribe";
3789
3794
  OpenAIRealtimeModel2["GPT_4O_MINI_TRANSCRIBE"] = "gpt-4o-mini-transcribe";
3790
3795
  })(OpenAIRealtimeModel || (OpenAIRealtimeModel = {}));
3796
+ var MistralVoxtralModel;
3797
+ (function(MistralVoxtralModel2) {
3798
+ MistralVoxtralModel2["VOXTRAL_MINI_REALTIME_2602"] = "voxtral-mini-transcribe-realtime-2602";
3799
+ })(MistralVoxtralModel || (MistralVoxtralModel = {}));
3800
+ var DashScopeModel;
3801
+ (function(DashScopeModel2) {
3802
+ DashScopeModel2["QWEN3_ASR_FLASH_REALTIME_2602"] = "qwen3-asr-flash-realtime-2026-02-10";
3803
+ DashScopeModel2["QWEN3_ASR_FLASH_REALTIME"] = "qwen3-asr-flash-realtime";
3804
+ })(DashScopeModel || (DashScopeModel = {}));
3791
3805
 
3792
3806
  // ../../libs/types/dist/recognition-result-v1.types.js
3793
3807
  var RecognitionResultTypeV1;
@@ -3803,8 +3817,10 @@ var TranscriptionResultSchemaV1 = z.object({
3803
3817
  type: z.literal(RecognitionResultTypeV1.TRANSCRIPTION),
3804
3818
  audioUtteranceId: z.string(),
3805
3819
  finalTranscript: z.string(),
3820
+ finalTranscriptRaw: z.string(),
3806
3821
  finalTranscriptConfidence: z.number().min(0).max(1).optional(),
3807
3822
  pendingTranscript: z.string().optional(),
3823
+ pendingTranscriptRaw: z.string().optional(),
3808
3824
  pendingTranscriptConfidence: z.number().min(0).max(1).optional(),
3809
3825
  is_finished: z.boolean(),
3810
3826
  voiceStart: z.number().optional(),
@@ -3813,8 +3829,9 @@ var TranscriptionResultSchemaV1 = z.object({
3813
3829
  startTimestamp: z.number().optional(),
3814
3830
  endTimestamp: z.number().optional(),
3815
3831
  receivedAtMs: z.number().optional(),
3816
- accumulatedAudioTimeMs: z.number().optional()
3817
- // accumulated audio time watermark in milliseconds. Total duration of all audio chunks sent. Optional.
3832
+ accumulatedAudioTimeMs: z.number().optional(),
3833
+ rawAudioTimeMs: z.number().optional()
3834
+ // Total audio duration sent to provider (includes prefix)
3818
3835
  });
3819
3836
  var FunctionCallResultSchemaV1 = z.object({
3820
3837
  type: z.literal(RecognitionResultTypeV1.FUNCTION_CALL),
@@ -3827,11 +3844,22 @@ var TranscriptOutcomeType;
3827
3844
  TranscriptOutcomeType2["WITH_CONTENT"] = "with_content";
3828
3845
  TranscriptOutcomeType2["EMPTY"] = "empty";
3829
3846
  TranscriptOutcomeType2["NEVER_SENT"] = "never_sent";
3847
+ TranscriptOutcomeType2["ERROR_AUTHENTICATION"] = "error_authentication";
3848
+ TranscriptOutcomeType2["ERROR_VALIDATION"] = "error_validation";
3849
+ TranscriptOutcomeType2["ERROR_PROVIDER"] = "error_provider";
3850
+ TranscriptOutcomeType2["ERROR_TIMEOUT"] = "error_timeout";
3851
+ TranscriptOutcomeType2["ERROR_QUOTA"] = "error_quota";
3852
+ TranscriptOutcomeType2["ERROR_INTERNAL_QUOTA"] = "error_internal_quota";
3853
+ TranscriptOutcomeType2["ERROR_CONNECTION"] = "error_connection";
3854
+ TranscriptOutcomeType2["ERROR_NO_AUDIO"] = "error_no_audio";
3855
+ TranscriptOutcomeType2["ERROR_CIRCUIT_BREAKER"] = "error_circuit_breaker";
3856
+ TranscriptOutcomeType2["ERROR_UNKNOWN"] = "error_unknown";
3830
3857
  })(TranscriptOutcomeType || (TranscriptOutcomeType = {}));
3831
3858
  var MetadataResultSchemaV1 = z.object({
3832
3859
  type: z.literal(RecognitionResultTypeV1.METADATA),
3833
3860
  audioUtteranceId: z.string(),
3834
3861
  // Timing information
3862
+ connectionInitiatedAtMs: z.number().optional(),
3835
3863
  recordingStartMs: z.number().optional(),
3836
3864
  recordingEndMs: z.number().optional(),
3837
3865
  transcriptEndMs: z.number().optional(),
@@ -3840,6 +3868,7 @@ var MetadataResultSchemaV1 = z.object({
3840
3868
  duration: z.number().optional(),
3841
3869
  volume: z.number().optional(),
3842
3870
  accumulatedAudioTimeMs: z.number().optional(),
3871
+ rawAudioTimeMs: z.number().optional(),
3843
3872
  // Cost Information
3844
3873
  costInUSD: z.number().default(0).optional(),
3845
3874
  // ASR API Type
@@ -3849,7 +3878,22 @@ var MetadataResultSchemaV1 = z.object({
3849
3878
  // Raw ASR metadata payload as provided by the provider (stringified if needed)
3850
3879
  rawAsrMetadata: z.string().optional(),
3851
3880
  // Transcript outcome - categorizes the final transcript state
3852
- transcriptOutcome: z.nativeEnum(TranscriptOutcomeType).optional()
3881
+ transcriptOutcome: z.nativeEnum(TranscriptOutcomeType).optional(),
3882
+ // Audio metrics - embedded audio quality metrics (from AudioMetricsAccumulator)
3883
+ // Omit 'type' field since it's embedded in METADATA, not a separate message
3884
+ audioMetrics: z.object({
3885
+ valid: z.boolean(),
3886
+ audioBeginMs: z.number(),
3887
+ audioEndMs: z.number(),
3888
+ maxVolume: z.number(),
3889
+ minVolume: z.number(),
3890
+ avgVolume: z.number(),
3891
+ silenceRatio: z.number(),
3892
+ clippingRatio: z.number(),
3893
+ snrEstimate: z.number().nullable(),
3894
+ lastNonSilenceMs: z.number(),
3895
+ timestamp: z.string()
3896
+ }).optional()
3853
3897
  });
3854
3898
  var ErrorTypeV1;
3855
3899
  (function(ErrorTypeV12) {
@@ -3858,7 +3902,10 @@ var ErrorTypeV1;
3858
3902
  ErrorTypeV12["PROVIDER_ERROR"] = "provider_error";
3859
3903
  ErrorTypeV12["TIMEOUT_ERROR"] = "timeout_error";
3860
3904
  ErrorTypeV12["QUOTA_EXCEEDED"] = "quota_exceeded";
3905
+ ErrorTypeV12["INTERNAL_QUOTA_EXHAUSTED"] = "internal_quota_exhausted";
3861
3906
  ErrorTypeV12["CONNECTION_ERROR"] = "connection_error";
3907
+ ErrorTypeV12["NO_AUDIO_ERROR"] = "no_audio_error";
3908
+ ErrorTypeV12["CIRCUIT_BREAKER_OPEN"] = "circuit_breaker_open";
3862
3909
  ErrorTypeV12["UNKNOWN_ERROR"] = "unknown_error";
3863
3910
  })(ErrorTypeV1 || (ErrorTypeV1 = {}));
3864
3911
  var ErrorResultSchemaV1 = z.object({
@@ -4075,6 +4122,12 @@ var TimerSchema = z.object({
4075
4122
  * Provider that generated this message
4076
4123
  */
4077
4124
  provider: z.nativeEnum(RecognitionProvider).optional(),
4125
+ /**
4126
+ * Timestamp when provider connection was initiated (in milliseconds)
4127
+ * Set before doConnect() - captures the moment before WebSocket creation starts
4128
+ * @example 1704095999800
4129
+ */
4130
+ connectionInitiatedAtMs: z.number().optional(),
4078
4131
  /**
4079
4132
  * Timestamp when recording started (in milliseconds)
4080
4133
  * @example 1704096000000
@@ -4208,6 +4261,14 @@ var ConnectionExceptionSchema = BaseRecognitionExceptionSchema.extend({
4208
4261
  /** Underlying error message */
4209
4262
  underlyingError: z.string().optional()
4210
4263
  });
4264
+ var CircuitBreakerExceptionSchema = BaseRecognitionExceptionSchema.extend({
4265
+ errorType: z.literal(ErrorTypeV1.CIRCUIT_BREAKER_OPEN),
4266
+ isImmediatelyAvailable: z.literal(true),
4267
+ /** Provider that is unavailable */
4268
+ provider: z.nativeEnum(RecognitionProvider).optional(),
4269
+ /** Model that is unavailable */
4270
+ model: z.string().optional()
4271
+ });
4211
4272
  var UnknownExceptionSchema = BaseRecognitionExceptionSchema.extend({
4212
4273
  errorType: z.literal(ErrorTypeV1.UNKNOWN_ERROR),
4213
4274
  isImmediatelyAvailable: z.literal(false),
@@ -4223,6 +4284,7 @@ var RecognitionExceptionSchema = z.discriminatedUnion("errorType", [
4223
4284
  TimeoutExceptionSchema,
4224
4285
  QuotaExceededExceptionSchema,
4225
4286
  ConnectionExceptionSchema,
4287
+ CircuitBreakerExceptionSchema,
4226
4288
  UnknownExceptionSchema
4227
4289
  ]);
4228
4290
  function isExceptionImmediatelyAvailable(exception) {
@@ -4244,6 +4306,8 @@ function getUserFriendlyMessage(exception) {
4244
4306
  return exception.message || "Rate limit exceeded. Please try again later.";
4245
4307
  case ErrorTypeV1.CONNECTION_ERROR:
4246
4308
  return exception.message || "Connection failed. Please check your network and try again.";
4309
+ case ErrorTypeV1.CIRCUIT_BREAKER_OPEN:
4310
+ return exception.message || "Service temporarily unavailable. Please try again.";
4247
4311
  }
4248
4312
  }
4249
4313
 
@@ -4259,6 +4323,12 @@ var ControlSignalTypeV1;
4259
4323
  ControlSignalTypeV12["START_RECORDING"] = "start_recording";
4260
4324
  ControlSignalTypeV12["STOP_RECORDING"] = "stop_recording";
4261
4325
  })(ControlSignalTypeV1 || (ControlSignalTypeV1 = {}));
4326
+ var PrefixMode;
4327
+ (function(PrefixMode2) {
4328
+ PrefixMode2["NONE"] = "none";
4329
+ PrefixMode2["CLIENT"] = "client";
4330
+ PrefixMode2["STORED"] = "stored";
4331
+ })(PrefixMode || (PrefixMode = {}));
4262
4332
  var SlotMapSchema = z.record(z.string(), z.array(z.string()));
4263
4333
  var GameContextSchemaV1 = z.object({
4264
4334
  type: z.literal(RecognitionContextTypeV1.GAME_CONTEXT),
@@ -4290,6 +4360,19 @@ var RequestDebugCommandSchema = z.object({
4290
4360
  // Enable experimental pilot models for testing new features
4291
4361
  enablePilotModels: z.boolean().optional().default(false)
4292
4362
  }).optional();
4363
+ var FallbackASRConfigSchema = z.object({
4364
+ // Required - the fallback provider to use
4365
+ provider: z.string(),
4366
+ // Optional - inherits from primary if not specified
4367
+ model: z.string().optional(),
4368
+ language: z.string().optional(),
4369
+ sampleRate: z.number().optional(),
4370
+ encoding: z.number().optional(),
4371
+ // Recognition options - optional, inherits from primary
4372
+ interimResults: z.boolean().optional(),
4373
+ useContext: z.boolean().optional(),
4374
+ finalTranscriptStability: z.string().optional()
4375
+ });
4293
4376
  var ASRRequestSchemaV1 = z.object({
4294
4377
  type: z.literal(RecognitionContextTypeV1.ASR_REQUEST),
4295
4378
  // Session identification
@@ -4305,6 +4388,16 @@ var ASRRequestSchemaV1 = z.object({
4305
4388
  useContext: z.boolean().optional().default(false),
4306
4389
  // Final transcript stability mode (timeout for fallback final transcript)
4307
4390
  finalTranscriptStability: z.string().optional(),
4391
+ // Traffic control priority (affects quota slot allocation)
4392
+ // 'high' = can use all quota slots (reserved for critical games like song-quiz)
4393
+ // 'low' = limited to non-reserved slots (default for most requests)
4394
+ priority: z.enum(["low", "high"]).optional().default("low"),
4395
+ // Fallback providers - tried in order if primary provider is unavailable (circuit breaker open)
4396
+ fallbackModels: z.array(FallbackASRConfigSchema).optional(),
4397
+ // Prefix audio configuration
4398
+ prefixMode: z.nativeEnum(PrefixMode).optional().default(PrefixMode.NONE),
4399
+ prefixId: z.string().optional(),
4400
+ prefixTextToRemove: z.array(z.string()).optional(),
4308
4401
  // Debug options (FOR DEBUG/TESTING ONLY - not for production use)
4309
4402
  debugCommand: RequestDebugCommandSchema
4310
4403
  });
@@ -4322,6 +4415,8 @@ var RecognitionGameInfoSchema = z.object({
4322
4415
  accountId: z.string().optional(),
4323
4416
  gameId: z.string().optional(),
4324
4417
  gamePhase: z.string().optional(),
4418
+ questionAskedId: z.string().optional(),
4419
+ /** @deprecated Use questionAskedId instead. Kept for backward compatibility during migration. */
4325
4420
  questionAnswerId: z.string().optional(),
4326
4421
  platform: z.string().optional()
4327
4422
  // Platform for audio recording device (use the definition of platform teams)
@@ -4456,6 +4551,7 @@ var AudioEncoding;
4456
4551
  }
4457
4552
  AudioEncoding2.isNameValid = isNameValid;
4458
4553
  })(AudioEncoding || (AudioEncoding = {}));
4554
+ var PREFIX_AUDIO_ENCODING_OFFSET = 128;
4459
4555
  var SampleRate;
4460
4556
  (function(SampleRate2) {
4461
4557
  SampleRate2[SampleRate2["RATE_8000"] = 8e3] = "RATE_8000";
@@ -4557,6 +4653,7 @@ function createDefaultASRConfig(overrides) {
4557
4653
  var PlumbingType;
4558
4654
  (function(PlumbingType2) {
4559
4655
  PlumbingType2["AUDIO"] = "audio";
4656
+ PlumbingType2["PREFIX_AUDIO"] = "prefix_audio";
4560
4657
  PlumbingType2["CONTROL"] = "control";
4561
4658
  PlumbingType2["RESULT"] = "result";
4562
4659
  PlumbingType2["RECOGNITION_CONTEXT"] = "recognition_context";
@@ -4635,6 +4732,11 @@ var StatsIncrementType;
4635
4732
  StatsIncrementType2["SUCCESS"] = "success";
4636
4733
  StatsIncrementType2["FAIL"] = "fail";
4637
4734
  })(StatsIncrementType || (StatsIncrementType = {}));
4735
+ var QuotaPriority;
4736
+ (function(QuotaPriority2) {
4737
+ QuotaPriority2[QuotaPriority2["LOW"] = 0] = "LOW";
4738
+ QuotaPriority2[QuotaPriority2["HIGH"] = 1] = "HIGH";
4739
+ })(QuotaPriority || (QuotaPriority = {}));
4638
4740
 
4639
4741
  // ../../libs/types/dist/stages.types.js
4640
4742
  var STAGES = {
@@ -4811,7 +4913,7 @@ var WebSocketAudioClient = class {
4811
4913
  // ../../libs/websocket/dist/core/audio-upload-websocket-server.js
4812
4914
  import { WebSocketServer, WebSocket as WebSocket2 } from "ws";
4813
4915
 
4814
- // ../../node_modules/.pnpm/uuid@11.0.0/node_modules/uuid/dist/esm-browser/stringify.js
4916
+ // ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/stringify.js
4815
4917
  var byteToHex = [];
4816
4918
  for (let i = 0; i < 256; ++i) {
4817
4919
  byteToHex.push((i + 256).toString(16).slice(1));
@@ -4820,7 +4922,7 @@ function unsafeStringify(arr, offset = 0) {
4820
4922
  return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
4821
4923
  }
4822
4924
 
4823
- // ../../node_modules/.pnpm/uuid@11.0.0/node_modules/uuid/dist/esm-browser/rng.js
4925
+ // ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/rng.js
4824
4926
  var getRandomValues;
4825
4927
  var rnds8 = new Uint8Array(16);
4826
4928
  function rng() {
@@ -4833,21 +4935,27 @@ function rng() {
4833
4935
  return getRandomValues(rnds8);
4834
4936
  }
4835
4937
 
4836
- // ../../node_modules/.pnpm/uuid@11.0.0/node_modules/uuid/dist/esm-browser/native.js
4938
+ // ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/native.js
4837
4939
  var randomUUID = typeof crypto !== "undefined" && crypto.randomUUID && crypto.randomUUID.bind(crypto);
4838
4940
  var native_default = { randomUUID };
4839
4941
 
4840
- // ../../node_modules/.pnpm/uuid@11.0.0/node_modules/uuid/dist/esm-browser/v4.js
4942
+ // ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/v4.js
4841
4943
  function v4(options, buf, offset) {
4842
4944
  if (native_default.randomUUID && !buf && !options) {
4843
4945
  return native_default.randomUUID();
4844
4946
  }
4845
4947
  options = options || {};
4846
- const rnds = options.random || (options.rng || rng)();
4948
+ const rnds = options.random ?? options.rng?.() ?? rng();
4949
+ if (rnds.length < 16) {
4950
+ throw new Error("Random bytes length must be >= 16");
4951
+ }
4847
4952
  rnds[6] = rnds[6] & 15 | 64;
4848
4953
  rnds[8] = rnds[8] & 63 | 128;
4849
4954
  if (buf) {
4850
4955
  offset = offset || 0;
4956
+ if (offset < 0 || offset + 16 > buf.length) {
4957
+ throw new RangeError(`UUID byte range ${offset}:${offset + 15} is out of buffer bounds`);
4958
+ }
4851
4959
  for (let i = 0; i < 16; ++i) {
4852
4960
  buf[offset + i] = rnds[i];
4853
4961
  }
@@ -5283,6 +5391,9 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5283
5391
  highWM: config.highWaterMark ?? 512e3,
5284
5392
  lowWM: config.lowWaterMark ?? 128e3
5285
5393
  });
5394
+ this.prefixBuffer = [];
5395
+ // Buffer prefix audio until READY
5396
+ this.prefixBufferBytes = 0;
5286
5397
  this.state = "initial" /* INITIAL */;
5287
5398
  // Debug control (internal state, controlled by debugCommand in ASRRequest)
5288
5399
  this.isDebugLogEnabled = false;
@@ -5339,6 +5450,9 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5339
5450
  static {
5340
5451
  this.PROTOCOL_VERSION = 1;
5341
5452
  }
5453
+ static {
5454
+ this.MAX_PREFIX_BUFFER_BYTES = 10 * 1024 * 1024;
5455
+ }
5342
5456
  // ==========================================================================
5343
5457
  // PRIVATE HELPERS
5344
5458
  // ==========================================================================
@@ -5364,6 +5478,8 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5364
5478
  cleanup() {
5365
5479
  this.log("debug", "Cleaning up resources");
5366
5480
  this.audioBuffer.clear();
5481
+ this.prefixBuffer = [];
5482
+ this.prefixBufferBytes = 0;
5367
5483
  this.audioBytesSent = 0;
5368
5484
  this.audioChunksSent = 0;
5369
5485
  this.lastAudioStatsLog = 0;
@@ -5514,9 +5630,13 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5514
5630
  }
5515
5631
  }
5516
5632
  }
5633
+ /**
5634
+ * Only active ehwne client is in READY state. otherwise it will return immediately.
5635
+ * @returns Promise that resolves when the recording is stopped
5636
+ */
5517
5637
  async stopRecording() {
5518
5638
  if (this.state !== "ready" /* READY */) {
5519
- this.log("debug", "stopRecording called but not in READY state", { state: this.state });
5639
+ this.log("warn", "stopRecording called but not in READY state", { state: this.state });
5520
5640
  return;
5521
5641
  }
5522
5642
  this.log("debug", "Stopping recording");
@@ -5576,6 +5696,25 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5576
5696
  isBufferOverflowing() {
5577
5697
  return this.audioBuffer.isOverflowing();
5578
5698
  }
5699
+ isServerReady() {
5700
+ return this.state === "ready" /* READY */;
5701
+ }
5702
+ sendGameContext(context) {
5703
+ if (this.state !== "connected" /* CONNECTED */ && this.state !== "ready" /* READY */) {
5704
+ this.log("warn", "sendGameContext called in wrong state", { state: this.state });
5705
+ return;
5706
+ }
5707
+ this.log("debug", "Sending game context (deferred)", {
5708
+ gameId: context.gameId,
5709
+ gamePhase: context.gamePhase,
5710
+ hasSlotMap: !!context.slotMap
5711
+ });
5712
+ super.sendMessage(
5713
+ _RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
5714
+ "message",
5715
+ context
5716
+ );
5717
+ }
5579
5718
  getStats() {
5580
5719
  const bufferStats = this.audioBuffer.getStats();
5581
5720
  return {
@@ -5601,6 +5740,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5601
5740
  if (this.isDebugLogEnabled) {
5602
5741
  this.log("debug", "Sending ASR request", this.config.asrRequestConfig);
5603
5742
  }
5743
+ const fallbackModels = this.config.asrRequestConfig.fallbackModels;
5604
5744
  const asrRequest = {
5605
5745
  type: RecognitionContextTypeV1.ASR_REQUEST,
5606
5746
  audioUtteranceId: this.config.audioUtteranceId,
@@ -5616,7 +5756,20 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5616
5756
  ...this.config.asrRequestConfig.finalTranscriptStability && {
5617
5757
  finalTranscriptStability: this.config.asrRequestConfig.finalTranscriptStability
5618
5758
  },
5619
- ...debugCommand && { debugCommand }
5759
+ // Include fallbackModels if provided (for circuit breaker fallback)
5760
+ ...fallbackModels && { fallbackModels },
5761
+ ...debugCommand && { debugCommand },
5762
+ // Include prefix mode if provided (for server-side stored prefix injection)
5763
+ ...this.config.asrRequestConfig.prefixMode && {
5764
+ prefixMode: this.config.asrRequestConfig.prefixMode
5765
+ },
5766
+ ...this.config.asrRequestConfig.prefixId && {
5767
+ prefixId: this.config.asrRequestConfig.prefixId
5768
+ },
5769
+ // Include prefix text to remove if provided (for server-side prefix text removal)
5770
+ ...this.config.asrRequestConfig.prefixTextToRemove && {
5771
+ prefixTextToRemove: this.config.asrRequestConfig.prefixTextToRemove
5772
+ }
5620
5773
  };
5621
5774
  super.sendMessage(
5622
5775
  _RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
@@ -5723,6 +5876,12 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5723
5876
  this.log("debug", "Server ready for audio upload");
5724
5877
  this.state = "ready" /* READY */;
5725
5878
  this.messageHandler.setSessionStartTime(Date.now());
5879
+ if (this.prefixBuffer.length > 0) {
5880
+ this.log("debug", "Flushing buffered prefix audio", { chunks: this.prefixBuffer.length });
5881
+ this.prefixBuffer.forEach((chunk) => this.sendPrefixAudioNow(chunk));
5882
+ this.prefixBuffer = [];
5883
+ this.prefixBufferBytes = 0;
5884
+ }
5726
5885
  const bufferedChunks = this.audioBuffer.flush();
5727
5886
  if (bufferedChunks.length > 0) {
5728
5887
  this.log("debug", "Flushing buffered audio", { chunks: bufferedChunks.length });
@@ -5754,6 +5913,74 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5754
5913
  this.audioBytesSent += byteLength;
5755
5914
  this.audioChunksSent++;
5756
5915
  }
5916
+ /**
5917
+ * Send prefix audio to the server.
5918
+ * Prefix audio is sent before user audio and is used for context/priming.
5919
+ * The server will process it but adjust timing so transcripts reflect user audio timing.
5920
+ *
5921
+ * Note: Prefix audio is buffered until READY state, then flushed before user audio.
5922
+ * This ensures proper ordering even if called before server is ready.
5923
+ *
5924
+ * @param audioData - Prefix audio data (ArrayBuffer, ArrayBufferView, or Blob)
5925
+ */
5926
+ sendPrefixAudio(audioData) {
5927
+ if (audioData instanceof Blob) {
5928
+ blobToArrayBuffer(audioData).then((arrayBuffer) => {
5929
+ this.sendPrefixAudioInternal(arrayBuffer);
5930
+ }).catch((error) => {
5931
+ this.log("error", "Failed to convert Blob to ArrayBuffer for prefix audio", error);
5932
+ });
5933
+ return;
5934
+ }
5935
+ this.sendPrefixAudioInternal(audioData);
5936
+ }
5937
+ /**
5938
+ * Internal method to handle prefix audio with buffering
5939
+ * Buffers if not READY, sends immediately if READY
5940
+ */
5941
+ sendPrefixAudioInternal(audioData) {
5942
+ const bytes = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
5943
+ if (bytes === 0) return;
5944
+ if (this.state === "stopped" /* STOPPED */ || this.state === "failed" /* FAILED */) {
5945
+ this.log("debug", "Ignoring prefix audio in terminal state", { bytes, state: this.state });
5946
+ return;
5947
+ }
5948
+ if (this.state === "ready" /* READY */) {
5949
+ this.log("debug", "Sending prefix audio immediately", { bytes });
5950
+ this.sendPrefixAudioNow(audioData);
5951
+ } else {
5952
+ if (this.prefixBufferBytes + bytes > _RealTimeTwoWayWebSocketRecognitionClient.MAX_PREFIX_BUFFER_BYTES) {
5953
+ this.log("warn", "Prefix buffer limit exceeded, dropping chunk", {
5954
+ bytes,
5955
+ current: this.prefixBufferBytes,
5956
+ max: _RealTimeTwoWayWebSocketRecognitionClient.MAX_PREFIX_BUFFER_BYTES
5957
+ });
5958
+ return;
5959
+ }
5960
+ this.log("debug", "Buffering prefix audio until READY", { bytes, state: this.state });
5961
+ this.prefixBuffer.push(audioData);
5962
+ this.prefixBufferBytes += bytes;
5963
+ }
5964
+ }
5965
+ /**
5966
+ * Send prefix audio immediately to the server (without buffering)
5967
+ * Uses encoding offset to mark as prefix audio
5968
+ * @param audioData - Prefix audio data to send
5969
+ */
5970
+ sendPrefixAudioNow(audioData) {
5971
+ const byteLength = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
5972
+ if (byteLength === 0) return;
5973
+ const baseEncodingId = this.config.asrRequestConfig?.encoding || AudioEncoding.LINEAR16;
5974
+ const prefixEncodingId = baseEncodingId + PREFIX_AUDIO_ENCODING_OFFSET;
5975
+ const sampleRate = typeof this.config.asrRequestConfig?.sampleRate === "number" ? this.config.asrRequestConfig.sampleRate : SampleRate.RATE_16000;
5976
+ this.log("debug", "Sending prefix audio", { bytes: byteLength, encoding: prefixEncodingId });
5977
+ super.sendAudio(
5978
+ audioData,
5979
+ _RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
5980
+ prefixEncodingId,
5981
+ sampleRate
5982
+ );
5983
+ }
5757
5984
  };
5758
5985
 
5759
5986
  // src/config-builder.ts
@@ -6131,6 +6358,7 @@ var SimplifiedVGFRecognitionClient = class {
6131
6358
  }
6132
6359
  } else {
6133
6360
  this.state = createVGFStateFromConfig(clientConfig);
6361
+ clientConfig.audioUtteranceId = this.state.audioUtteranceId;
6134
6362
  }
6135
6363
  this.state = { ...this.state, startRecordingStatus: "READY" };
6136
6364
  this.expectedUuid = this.state.audioUtteranceId;
@@ -6235,7 +6463,7 @@ var SimplifiedVGFRecognitionClient = class {
6235
6463
  this.isRecordingAudio = false;
6236
6464
  this.state = updateStateOnStop(this.state);
6237
6465
  this.notifyStateChange();
6238
- if (this.state.transcriptionStatus === TranscriptionStatus.NOT_STARTED) {
6466
+ if (this.client.getState() === "connected" /* CONNECTED */ || this.client.getState() === "connecting" /* CONNECTING */) {
6239
6467
  if (this.logger) {
6240
6468
  this.logger(
6241
6469
  "info",
@@ -6298,6 +6526,12 @@ var SimplifiedVGFRecognitionClient = class {
6298
6526
  isBufferOverflowing() {
6299
6527
  return this.client.isBufferOverflowing();
6300
6528
  }
6529
+ sendGameContext(context) {
6530
+ this.client.sendGameContext(context);
6531
+ }
6532
+ isServerReady() {
6533
+ return this.client.isServerReady();
6534
+ }
6301
6535
  // VGF State access (read-only for consumers)
6302
6536
  getVGFState() {
6303
6537
  return { ...this.state };
@@ -6343,6 +6577,7 @@ export {
6343
6577
  ConnectionError,
6344
6578
  ControlSignalTypeV1 as ControlSignal,
6345
6579
  ControlSignalTypeV1,
6580
+ DashScopeModel,
6346
6581
  DeepgramModel,
6347
6582
  ElevenLabsModel,
6348
6583
  ErrorTypeV1,
@@ -6351,6 +6586,7 @@ export {
6351
6586
  GeminiModel,
6352
6587
  GoogleModel,
6353
6588
  Language,
6589
+ MistralVoxtralModel,
6354
6590
  OpenAIModel,
6355
6591
  RECOGNITION_CONDUCTOR_BASES,
6356
6592
  RECOGNITION_SERVICE_BASES,