@volley/recognition-client-sdk 0.1.424 → 0.1.622

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- // ../../node_modules/.pnpm/zod@3.22.4/node_modules/zod/lib/index.mjs
1
+ // ../../node_modules/.pnpm/zod@3.22.5/node_modules/zod/lib/index.mjs
2
2
  var util;
3
3
  (function(util2) {
4
4
  util2.assertEqual = (val) => val;
@@ -3741,6 +3741,10 @@ var RecognitionProvider;
3741
3741
  RecognitionProvider2["GEMINI_BATCH"] = "gemini-batch";
3742
3742
  RecognitionProvider2["OPENAI_BATCH"] = "openai-batch";
3743
3743
  RecognitionProvider2["OPENAI_REALTIME"] = "openai-realtime";
3744
+ RecognitionProvider2["MISTRAL_VOXTRAL"] = "mistral-voxtral";
3745
+ RecognitionProvider2["DASHSCOPE"] = "dashscope";
3746
+ RecognitionProvider2["TEST_ASR_PROVIDER_QUOTA"] = "test-asr-provider-quota";
3747
+ RecognitionProvider2["TEST_ASR_STREAMING"] = "test-asr-streaming";
3744
3748
  })(RecognitionProvider || (RecognitionProvider = {}));
3745
3749
  var RecognitionMode;
3746
3750
  (function(RecognitionMode2) {
@@ -3786,8 +3790,18 @@ var ElevenLabsModel;
3786
3790
  })(ElevenLabsModel || (ElevenLabsModel = {}));
3787
3791
  var OpenAIRealtimeModel;
3788
3792
  (function(OpenAIRealtimeModel2) {
3793
+ OpenAIRealtimeModel2["GPT_4O_TRANSCRIBE"] = "gpt-4o-transcribe";
3789
3794
  OpenAIRealtimeModel2["GPT_4O_MINI_TRANSCRIBE"] = "gpt-4o-mini-transcribe";
3790
3795
  })(OpenAIRealtimeModel || (OpenAIRealtimeModel = {}));
3796
+ var MistralVoxtralModel;
3797
+ (function(MistralVoxtralModel2) {
3798
+ MistralVoxtralModel2["VOXTRAL_MINI_REALTIME_2602"] = "voxtral-mini-transcribe-realtime-2602";
3799
+ })(MistralVoxtralModel || (MistralVoxtralModel = {}));
3800
+ var DashScopeModel;
3801
+ (function(DashScopeModel2) {
3802
+ DashScopeModel2["QWEN3_ASR_FLASH_REALTIME_2602"] = "qwen3-asr-flash-realtime-2026-02-10";
3803
+ DashScopeModel2["QWEN3_ASR_FLASH_REALTIME"] = "qwen3-asr-flash-realtime";
3804
+ })(DashScopeModel || (DashScopeModel = {}));
3791
3805
 
3792
3806
  // ../../libs/types/dist/recognition-result-v1.types.js
3793
3807
  var RecognitionResultTypeV1;
@@ -3803,18 +3817,22 @@ var TranscriptionResultSchemaV1 = z.object({
3803
3817
  type: z.literal(RecognitionResultTypeV1.TRANSCRIPTION),
3804
3818
  audioUtteranceId: z.string(),
3805
3819
  finalTranscript: z.string(),
3820
+ finalTranscriptRaw: z.string(),
3806
3821
  finalTranscriptConfidence: z.number().min(0).max(1).optional(),
3807
3822
  pendingTranscript: z.string().optional(),
3823
+ pendingTranscriptRaw: z.string().optional(),
3808
3824
  pendingTranscriptConfidence: z.number().min(0).max(1).optional(),
3809
3825
  is_finished: z.boolean(),
3810
3826
  voiceStart: z.number().optional(),
3811
3827
  voiceDuration: z.number().optional(),
3812
3828
  voiceEnd: z.number().optional(),
3829
+ lastNonSilence: z.number().optional(),
3813
3830
  startTimestamp: z.number().optional(),
3814
3831
  endTimestamp: z.number().optional(),
3815
3832
  receivedAtMs: z.number().optional(),
3816
- accumulatedAudioTimeMs: z.number().optional()
3817
- // accumulated audio time watermark in milliseconds. Total duration of all audio chunks sent. Optional.
3833
+ accumulatedAudioTimeMs: z.number().optional(),
3834
+ rawAudioTimeMs: z.number().optional()
3835
+ // Total audio duration sent to provider (includes prefix)
3818
3836
  });
3819
3837
  var FunctionCallResultSchemaV1 = z.object({
3820
3838
  type: z.literal(RecognitionResultTypeV1.FUNCTION_CALL),
@@ -3827,11 +3845,22 @@ var TranscriptOutcomeType;
3827
3845
  TranscriptOutcomeType2["WITH_CONTENT"] = "with_content";
3828
3846
  TranscriptOutcomeType2["EMPTY"] = "empty";
3829
3847
  TranscriptOutcomeType2["NEVER_SENT"] = "never_sent";
3848
+ TranscriptOutcomeType2["ERROR_AUTHENTICATION"] = "error_authentication";
3849
+ TranscriptOutcomeType2["ERROR_VALIDATION"] = "error_validation";
3850
+ TranscriptOutcomeType2["ERROR_PROVIDER"] = "error_provider";
3851
+ TranscriptOutcomeType2["ERROR_TIMEOUT"] = "error_timeout";
3852
+ TranscriptOutcomeType2["ERROR_QUOTA"] = "error_quota";
3853
+ TranscriptOutcomeType2["ERROR_INTERNAL_QUOTA"] = "error_internal_quota";
3854
+ TranscriptOutcomeType2["ERROR_CONNECTION"] = "error_connection";
3855
+ TranscriptOutcomeType2["ERROR_NO_AUDIO"] = "error_no_audio";
3856
+ TranscriptOutcomeType2["ERROR_CIRCUIT_BREAKER"] = "error_circuit_breaker";
3857
+ TranscriptOutcomeType2["ERROR_UNKNOWN"] = "error_unknown";
3830
3858
  })(TranscriptOutcomeType || (TranscriptOutcomeType = {}));
3831
3859
  var MetadataResultSchemaV1 = z.object({
3832
3860
  type: z.literal(RecognitionResultTypeV1.METADATA),
3833
3861
  audioUtteranceId: z.string(),
3834
3862
  // Timing information
3863
+ connectionInitiatedAtMs: z.number().optional(),
3835
3864
  recordingStartMs: z.number().optional(),
3836
3865
  recordingEndMs: z.number().optional(),
3837
3866
  transcriptEndMs: z.number().optional(),
@@ -3840,6 +3869,7 @@ var MetadataResultSchemaV1 = z.object({
3840
3869
  duration: z.number().optional(),
3841
3870
  volume: z.number().optional(),
3842
3871
  accumulatedAudioTimeMs: z.number().optional(),
3872
+ rawAudioTimeMs: z.number().optional(),
3843
3873
  // Cost Information
3844
3874
  costInUSD: z.number().default(0).optional(),
3845
3875
  // ASR API Type
@@ -3849,7 +3879,22 @@ var MetadataResultSchemaV1 = z.object({
3849
3879
  // Raw ASR metadata payload as provided by the provider (stringified if needed)
3850
3880
  rawAsrMetadata: z.string().optional(),
3851
3881
  // Transcript outcome - categorizes the final transcript state
3852
- transcriptOutcome: z.nativeEnum(TranscriptOutcomeType).optional()
3882
+ transcriptOutcome: z.nativeEnum(TranscriptOutcomeType).optional(),
3883
+ // Audio metrics - embedded audio quality metrics (from AudioMetricsAccumulator)
3884
+ // Omit 'type' field since it's embedded in METADATA, not a separate message
3885
+ audioMetrics: z.object({
3886
+ valid: z.boolean(),
3887
+ audioBeginMs: z.number(),
3888
+ audioEndMs: z.number(),
3889
+ maxVolume: z.number(),
3890
+ minVolume: z.number(),
3891
+ avgVolume: z.number(),
3892
+ silenceRatio: z.number(),
3893
+ clippingRatio: z.number(),
3894
+ snrEstimate: z.number().nullable(),
3895
+ lastNonSilenceMs: z.number(),
3896
+ timestamp: z.string()
3897
+ }).optional()
3853
3898
  });
3854
3899
  var ErrorTypeV1;
3855
3900
  (function(ErrorTypeV12) {
@@ -3858,7 +3903,10 @@ var ErrorTypeV1;
3858
3903
  ErrorTypeV12["PROVIDER_ERROR"] = "provider_error";
3859
3904
  ErrorTypeV12["TIMEOUT_ERROR"] = "timeout_error";
3860
3905
  ErrorTypeV12["QUOTA_EXCEEDED"] = "quota_exceeded";
3906
+ ErrorTypeV12["INTERNAL_QUOTA_EXHAUSTED"] = "internal_quota_exhausted";
3861
3907
  ErrorTypeV12["CONNECTION_ERROR"] = "connection_error";
3908
+ ErrorTypeV12["NO_AUDIO_ERROR"] = "no_audio_error";
3909
+ ErrorTypeV12["CIRCUIT_BREAKER_OPEN"] = "circuit_breaker_open";
3862
3910
  ErrorTypeV12["UNKNOWN_ERROR"] = "unknown_error";
3863
3911
  })(ErrorTypeV1 || (ErrorTypeV1 = {}));
3864
3912
  var ErrorResultSchemaV1 = z.object({
@@ -4075,6 +4123,12 @@ var TimerSchema = z.object({
4075
4123
  * Provider that generated this message
4076
4124
  */
4077
4125
  provider: z.nativeEnum(RecognitionProvider).optional(),
4126
+ /**
4127
+ * Timestamp when provider connection was initiated (in milliseconds)
4128
+ * Set before doConnect() - captures the moment before WebSocket creation starts
4129
+ * @example 1704095999800
4130
+ */
4131
+ connectionInitiatedAtMs: z.number().optional(),
4078
4132
  /**
4079
4133
  * Timestamp when recording started (in milliseconds)
4080
4134
  * @example 1704096000000
@@ -4208,6 +4262,14 @@ var ConnectionExceptionSchema = BaseRecognitionExceptionSchema.extend({
4208
4262
  /** Underlying error message */
4209
4263
  underlyingError: z.string().optional()
4210
4264
  });
4265
+ var CircuitBreakerExceptionSchema = BaseRecognitionExceptionSchema.extend({
4266
+ errorType: z.literal(ErrorTypeV1.CIRCUIT_BREAKER_OPEN),
4267
+ isImmediatelyAvailable: z.literal(true),
4268
+ /** Provider that is unavailable */
4269
+ provider: z.nativeEnum(RecognitionProvider).optional(),
4270
+ /** Model that is unavailable */
4271
+ model: z.string().optional()
4272
+ });
4211
4273
  var UnknownExceptionSchema = BaseRecognitionExceptionSchema.extend({
4212
4274
  errorType: z.literal(ErrorTypeV1.UNKNOWN_ERROR),
4213
4275
  isImmediatelyAvailable: z.literal(false),
@@ -4223,6 +4285,7 @@ var RecognitionExceptionSchema = z.discriminatedUnion("errorType", [
4223
4285
  TimeoutExceptionSchema,
4224
4286
  QuotaExceededExceptionSchema,
4225
4287
  ConnectionExceptionSchema,
4288
+ CircuitBreakerExceptionSchema,
4226
4289
  UnknownExceptionSchema
4227
4290
  ]);
4228
4291
 
@@ -4238,6 +4301,12 @@ var ControlSignalTypeV1;
4238
4301
  ControlSignalTypeV12["START_RECORDING"] = "start_recording";
4239
4302
  ControlSignalTypeV12["STOP_RECORDING"] = "stop_recording";
4240
4303
  })(ControlSignalTypeV1 || (ControlSignalTypeV1 = {}));
4304
+ var PrefixMode;
4305
+ (function(PrefixMode2) {
4306
+ PrefixMode2["NONE"] = "none";
4307
+ PrefixMode2["CLIENT"] = "client";
4308
+ PrefixMode2["STORED"] = "stored";
4309
+ })(PrefixMode || (PrefixMode = {}));
4241
4310
  var SlotMapSchema = z.record(z.string(), z.array(z.string()));
4242
4311
  var GameContextSchemaV1 = z.object({
4243
4312
  type: z.literal(RecognitionContextTypeV1.GAME_CONTEXT),
@@ -4269,6 +4338,19 @@ var RequestDebugCommandSchema = z.object({
4269
4338
  // Enable experimental pilot models for testing new features
4270
4339
  enablePilotModels: z.boolean().optional().default(false)
4271
4340
  }).optional();
4341
+ var FallbackASRConfigSchema = z.object({
4342
+ // Required - the fallback provider to use
4343
+ provider: z.string(),
4344
+ // Optional - inherits from primary if not specified
4345
+ model: z.string().optional(),
4346
+ language: z.string().optional(),
4347
+ sampleRate: z.number().optional(),
4348
+ encoding: z.number().optional(),
4349
+ // Recognition options - optional, inherits from primary
4350
+ interimResults: z.boolean().optional(),
4351
+ useContext: z.boolean().optional(),
4352
+ finalTranscriptStability: z.string().optional()
4353
+ });
4272
4354
  var ASRRequestSchemaV1 = z.object({
4273
4355
  type: z.literal(RecognitionContextTypeV1.ASR_REQUEST),
4274
4356
  // Session identification
@@ -4284,6 +4366,16 @@ var ASRRequestSchemaV1 = z.object({
4284
4366
  useContext: z.boolean().optional().default(false),
4285
4367
  // Final transcript stability mode (timeout for fallback final transcript)
4286
4368
  finalTranscriptStability: z.string().optional(),
4369
+ // Traffic control priority (affects quota slot allocation)
4370
+ // 'high' = can use all quota slots (reserved for critical games like song-quiz)
4371
+ // 'low' = limited to non-reserved slots (default for most requests)
4372
+ priority: z.enum(["low", "high"]).optional().default("low"),
4373
+ // Fallback providers - tried in order if primary provider is unavailable (circuit breaker open)
4374
+ fallbackModels: z.array(FallbackASRConfigSchema).optional(),
4375
+ // Prefix audio configuration
4376
+ prefixMode: z.nativeEnum(PrefixMode).optional().default(PrefixMode.NONE),
4377
+ prefixId: z.string().optional(),
4378
+ prefixTextToRemove: z.array(z.string()).optional(),
4287
4379
  // Debug options (FOR DEBUG/TESTING ONLY - not for production use)
4288
4380
  debugCommand: RequestDebugCommandSchema
4289
4381
  });
@@ -4301,6 +4393,8 @@ var RecognitionGameInfoSchema = z.object({
4301
4393
  accountId: z.string().optional(),
4302
4394
  gameId: z.string().optional(),
4303
4395
  gamePhase: z.string().optional(),
4396
+ questionAskedId: z.string().optional(),
4397
+ /** @deprecated Use questionAskedId instead. Kept for backward compatibility during migration. */
4304
4398
  questionAnswerId: z.string().optional(),
4305
4399
  platform: z.string().optional()
4306
4400
  // Platform for audio recording device (use the definition of platform teams)
@@ -4435,6 +4529,7 @@ var AudioEncoding;
4435
4529
  }
4436
4530
  AudioEncoding2.isNameValid = isNameValid;
4437
4531
  })(AudioEncoding || (AudioEncoding = {}));
4532
+ var PREFIX_AUDIO_ENCODING_OFFSET = 128;
4438
4533
  var SampleRate;
4439
4534
  (function(SampleRate2) {
4440
4535
  SampleRate2[SampleRate2["RATE_8000"] = 8e3] = "RATE_8000";
@@ -4527,6 +4622,7 @@ var FinalTranscriptStability;
4527
4622
  var PlumbingType;
4528
4623
  (function(PlumbingType2) {
4529
4624
  PlumbingType2["AUDIO"] = "audio";
4625
+ PlumbingType2["PREFIX_AUDIO"] = "prefix_audio";
4530
4626
  PlumbingType2["CONTROL"] = "control";
4531
4627
  PlumbingType2["RESULT"] = "result";
4532
4628
  PlumbingType2["RECOGNITION_CONTEXT"] = "recognition_context";
@@ -4605,6 +4701,11 @@ var StatsIncrementType;
4605
4701
  StatsIncrementType2["SUCCESS"] = "success";
4606
4702
  StatsIncrementType2["FAIL"] = "fail";
4607
4703
  })(StatsIncrementType || (StatsIncrementType = {}));
4704
+ var QuotaPriority;
4705
+ (function(QuotaPriority2) {
4706
+ QuotaPriority2[QuotaPriority2["LOW"] = 0] = "LOW";
4707
+ QuotaPriority2[QuotaPriority2["HIGH"] = 1] = "HIGH";
4708
+ })(QuotaPriority || (QuotaPriority = {}));
4608
4709
 
4609
4710
  // ../../libs/types/dist/stages.types.js
4610
4711
  var STAGES = {
@@ -4781,7 +4882,7 @@ var WebSocketAudioClient = class {
4781
4882
  // ../../libs/websocket/dist/core/audio-upload-websocket-server.js
4782
4883
  import { WebSocketServer, WebSocket as WebSocket2 } from "ws";
4783
4884
 
4784
- // ../../node_modules/.pnpm/uuid@11.0.0/node_modules/uuid/dist/esm-browser/stringify.js
4885
+ // ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/stringify.js
4785
4886
  var byteToHex = [];
4786
4887
  for (let i = 0; i < 256; ++i) {
4787
4888
  byteToHex.push((i + 256).toString(16).slice(1));
@@ -4790,7 +4891,7 @@ function unsafeStringify(arr, offset = 0) {
4790
4891
  return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
4791
4892
  }
4792
4893
 
4793
- // ../../node_modules/.pnpm/uuid@11.0.0/node_modules/uuid/dist/esm-browser/rng.js
4894
+ // ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/rng.js
4794
4895
  var getRandomValues;
4795
4896
  var rnds8 = new Uint8Array(16);
4796
4897
  function rng() {
@@ -4803,21 +4904,27 @@ function rng() {
4803
4904
  return getRandomValues(rnds8);
4804
4905
  }
4805
4906
 
4806
- // ../../node_modules/.pnpm/uuid@11.0.0/node_modules/uuid/dist/esm-browser/native.js
4907
+ // ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/native.js
4807
4908
  var randomUUID = typeof crypto !== "undefined" && crypto.randomUUID && crypto.randomUUID.bind(crypto);
4808
4909
  var native_default = { randomUUID };
4809
4910
 
4810
- // ../../node_modules/.pnpm/uuid@11.0.0/node_modules/uuid/dist/esm-browser/v4.js
4911
+ // ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/v4.js
4811
4912
  function v4(options, buf, offset) {
4812
4913
  if (native_default.randomUUID && !buf && !options) {
4813
4914
  return native_default.randomUUID();
4814
4915
  }
4815
4916
  options = options || {};
4816
- const rnds = options.random || (options.rng || rng)();
4917
+ const rnds = options.random ?? options.rng?.() ?? rng();
4918
+ if (rnds.length < 16) {
4919
+ throw new Error("Random bytes length must be >= 16");
4920
+ }
4817
4921
  rnds[6] = rnds[6] & 15 | 64;
4818
4922
  rnds[8] = rnds[8] & 63 | 128;
4819
4923
  if (buf) {
4820
4924
  offset = offset || 0;
4925
+ if (offset < 0 || offset + 16 > buf.length) {
4926
+ throw new RangeError(`UUID byte range ${offset}:${offset + 15} is out of buffer bounds`);
4927
+ }
4821
4928
  for (let i = 0; i < 16; ++i) {
4822
4929
  buf[offset + i] = rnds[i];
4823
4930
  }
@@ -5191,6 +5298,9 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5191
5298
  highWM: config.highWaterMark ?? 512e3,
5192
5299
  lowWM: config.lowWaterMark ?? 128e3
5193
5300
  });
5301
+ this.prefixBuffer = [];
5302
+ // Buffer prefix audio until READY
5303
+ this.prefixBufferBytes = 0;
5194
5304
  this.state = "initial" /* INITIAL */;
5195
5305
  // Debug control (internal state, controlled by debugCommand in ASRRequest)
5196
5306
  this.isDebugLogEnabled = false;
@@ -5247,6 +5357,9 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5247
5357
  static {
5248
5358
  this.PROTOCOL_VERSION = 1;
5249
5359
  }
5360
+ static {
5361
+ this.MAX_PREFIX_BUFFER_BYTES = 10 * 1024 * 1024;
5362
+ }
5250
5363
  // ==========================================================================
5251
5364
  // PRIVATE HELPERS
5252
5365
  // ==========================================================================
@@ -5272,6 +5385,8 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5272
5385
  cleanup() {
5273
5386
  this.log("debug", "Cleaning up resources");
5274
5387
  this.audioBuffer.clear();
5388
+ this.prefixBuffer = [];
5389
+ this.prefixBufferBytes = 0;
5275
5390
  this.audioBytesSent = 0;
5276
5391
  this.audioChunksSent = 0;
5277
5392
  this.lastAudioStatsLog = 0;
@@ -5321,7 +5436,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5321
5436
  const timeout = setTimeout(() => {
5322
5437
  if (settled) return;
5323
5438
  settled = true;
5324
- this.log("warn", "Connection timeout", { timeout: connectionTimeout, attempt });
5439
+ this.log("warn", `Connection timeout url=${this.config.url}`, { timeout: connectionTimeout, attempt });
5325
5440
  this.state = "failed" /* FAILED */;
5326
5441
  reject(new Error(`Connection timeout after ${connectionTimeout}ms`));
5327
5442
  }, connectionTimeout);
@@ -5343,7 +5458,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5343
5458
  if (settled) return;
5344
5459
  settled = true;
5345
5460
  clearTimeout(timeout);
5346
- this.log("warn", "Connection error", { error, attempt });
5461
+ this.log("warn", `Connection error url=${this.config.url}`, { error, attempt });
5347
5462
  this.state = "failed" /* FAILED */;
5348
5463
  reject(error);
5349
5464
  };
@@ -5358,14 +5473,14 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5358
5473
  lastError = error;
5359
5474
  if (attempt < maxAttempts) {
5360
5475
  const logLevel = attempt < 3 ? "info" : "warn";
5361
- this.log(logLevel, `Connection attempt ${attempt} failed, retrying after ${delayMs}ms`, {
5476
+ this.log(logLevel, `Connection attempt ${attempt} failed, retrying after ${delayMs}ms url=${this.config.url}`, {
5362
5477
  error: lastError.message,
5363
5478
  nextAttempt: attempt + 1
5364
5479
  });
5365
5480
  this.state = "initial" /* INITIAL */;
5366
5481
  await new Promise((resolve) => setTimeout(resolve, delayMs));
5367
5482
  } else {
5368
- this.log("warn", `All ${maxAttempts} connection attempts failed`, {
5483
+ this.log("warn", `All ${maxAttempts} connection attempts failed url=${this.config.url}`, {
5369
5484
  error: lastError.message
5370
5485
  });
5371
5486
  }
@@ -5488,6 +5603,25 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5488
5603
  isBufferOverflowing() {
5489
5604
  return this.audioBuffer.isOverflowing();
5490
5605
  }
5606
+ isServerReady() {
5607
+ return this.state === "ready" /* READY */;
5608
+ }
5609
+ sendGameContext(context) {
5610
+ if (this.state !== "connected" /* CONNECTED */ && this.state !== "ready" /* READY */) {
5611
+ this.log("warn", "sendGameContext called in wrong state", { state: this.state });
5612
+ return;
5613
+ }
5614
+ this.log("debug", "Sending game context (deferred)", {
5615
+ gameId: context.gameId,
5616
+ gamePhase: context.gamePhase,
5617
+ hasSlotMap: !!context.slotMap
5618
+ });
5619
+ super.sendMessage(
5620
+ _RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
5621
+ "message",
5622
+ context
5623
+ );
5624
+ }
5491
5625
  getStats() {
5492
5626
  const bufferStats = this.audioBuffer.getStats();
5493
5627
  return {
@@ -5513,6 +5647,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5513
5647
  if (this.isDebugLogEnabled) {
5514
5648
  this.log("debug", "Sending ASR request", this.config.asrRequestConfig);
5515
5649
  }
5650
+ const fallbackModels = this.config.asrRequestConfig.fallbackModels;
5516
5651
  const asrRequest = {
5517
5652
  type: RecognitionContextTypeV1.ASR_REQUEST,
5518
5653
  audioUtteranceId: this.config.audioUtteranceId,
@@ -5528,7 +5663,20 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5528
5663
  ...this.config.asrRequestConfig.finalTranscriptStability && {
5529
5664
  finalTranscriptStability: this.config.asrRequestConfig.finalTranscriptStability
5530
5665
  },
5531
- ...debugCommand && { debugCommand }
5666
+ // Include fallbackModels if provided (for circuit breaker fallback)
5667
+ ...fallbackModels && { fallbackModels },
5668
+ ...debugCommand && { debugCommand },
5669
+ // Include prefix mode if provided (for server-side stored prefix injection)
5670
+ ...this.config.asrRequestConfig.prefixMode && {
5671
+ prefixMode: this.config.asrRequestConfig.prefixMode
5672
+ },
5673
+ ...this.config.asrRequestConfig.prefixId && {
5674
+ prefixId: this.config.asrRequestConfig.prefixId
5675
+ },
5676
+ // Include prefix text to remove if provided (for server-side prefix text removal)
5677
+ ...this.config.asrRequestConfig.prefixTextToRemove && {
5678
+ prefixTextToRemove: this.config.asrRequestConfig.prefixTextToRemove
5679
+ }
5532
5680
  };
5533
5681
  super.sendMessage(
5534
5682
  _RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
@@ -5635,6 +5783,12 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5635
5783
  this.log("debug", "Server ready for audio upload");
5636
5784
  this.state = "ready" /* READY */;
5637
5785
  this.messageHandler.setSessionStartTime(Date.now());
5786
+ if (this.prefixBuffer.length > 0) {
5787
+ this.log("debug", "Flushing buffered prefix audio", { chunks: this.prefixBuffer.length });
5788
+ this.prefixBuffer.forEach((chunk) => this.sendPrefixAudioNow(chunk));
5789
+ this.prefixBuffer = [];
5790
+ this.prefixBufferBytes = 0;
5791
+ }
5638
5792
  const bufferedChunks = this.audioBuffer.flush();
5639
5793
  if (bufferedChunks.length > 0) {
5640
5794
  this.log("debug", "Flushing buffered audio", { chunks: bufferedChunks.length });
@@ -5666,6 +5820,74 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5666
5820
  this.audioBytesSent += byteLength;
5667
5821
  this.audioChunksSent++;
5668
5822
  }
5823
+ /**
5824
+ * Send prefix audio to the server.
5825
+ * Prefix audio is sent before user audio and is used for context/priming.
5826
+ * The server will process it but adjust timing so transcripts reflect user audio timing.
5827
+ *
5828
+ * Note: Prefix audio is buffered until READY state, then flushed before user audio.
5829
+ * This ensures proper ordering even if called before server is ready.
5830
+ *
5831
+ * @param audioData - Prefix audio data (ArrayBuffer, ArrayBufferView, or Blob)
5832
+ */
5833
+ sendPrefixAudio(audioData) {
5834
+ if (audioData instanceof Blob) {
5835
+ blobToArrayBuffer(audioData).then((arrayBuffer) => {
5836
+ this.sendPrefixAudioInternal(arrayBuffer);
5837
+ }).catch((error) => {
5838
+ this.log("error", "Failed to convert Blob to ArrayBuffer for prefix audio", error);
5839
+ });
5840
+ return;
5841
+ }
5842
+ this.sendPrefixAudioInternal(audioData);
5843
+ }
5844
+ /**
5845
+ * Internal method to handle prefix audio with buffering
5846
+ * Buffers if not READY, sends immediately if READY
5847
+ */
5848
+ sendPrefixAudioInternal(audioData) {
5849
+ const bytes = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
5850
+ if (bytes === 0) return;
5851
+ if (this.state === "stopped" /* STOPPED */ || this.state === "failed" /* FAILED */) {
5852
+ this.log("debug", "Ignoring prefix audio in terminal state", { bytes, state: this.state });
5853
+ return;
5854
+ }
5855
+ if (this.state === "ready" /* READY */) {
5856
+ this.log("debug", "Sending prefix audio immediately", { bytes });
5857
+ this.sendPrefixAudioNow(audioData);
5858
+ } else {
5859
+ if (this.prefixBufferBytes + bytes > _RealTimeTwoWayWebSocketRecognitionClient.MAX_PREFIX_BUFFER_BYTES) {
5860
+ this.log("warn", "Prefix buffer limit exceeded, dropping chunk", {
5861
+ bytes,
5862
+ current: this.prefixBufferBytes,
5863
+ max: _RealTimeTwoWayWebSocketRecognitionClient.MAX_PREFIX_BUFFER_BYTES
5864
+ });
5865
+ return;
5866
+ }
5867
+ this.log("debug", "Buffering prefix audio until READY", { bytes, state: this.state });
5868
+ this.prefixBuffer.push(audioData);
5869
+ this.prefixBufferBytes += bytes;
5870
+ }
5871
+ }
5872
+ /**
5873
+ * Send prefix audio immediately to the server (without buffering)
5874
+ * Uses encoding offset to mark as prefix audio
5875
+ * @param audioData - Prefix audio data to send
5876
+ */
5877
+ sendPrefixAudioNow(audioData) {
5878
+ const byteLength = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
5879
+ if (byteLength === 0) return;
5880
+ const baseEncodingId = this.config.asrRequestConfig?.encoding || AudioEncoding.LINEAR16;
5881
+ const prefixEncodingId = baseEncodingId + PREFIX_AUDIO_ENCODING_OFFSET;
5882
+ const sampleRate = typeof this.config.asrRequestConfig?.sampleRate === "number" ? this.config.asrRequestConfig.sampleRate : SampleRate.RATE_16000;
5883
+ this.log("debug", "Sending prefix audio", { bytes: byteLength, encoding: prefixEncodingId });
5884
+ super.sendAudio(
5885
+ audioData,
5886
+ _RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
5887
+ prefixEncodingId,
5888
+ sampleRate
5889
+ );
5890
+ }
5669
5891
  };
5670
5892
  export {
5671
5893
  AudioEncoding,