@volley/recognition-client-sdk 0.1.424 → 0.1.621

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- // ../../node_modules/.pnpm/zod@3.22.4/node_modules/zod/lib/index.mjs
1
+ // ../../node_modules/.pnpm/zod@3.22.5/node_modules/zod/lib/index.mjs
2
2
  var util;
3
3
  (function(util2) {
4
4
  util2.assertEqual = (val) => val;
@@ -3741,6 +3741,10 @@ var RecognitionProvider;
3741
3741
  RecognitionProvider2["GEMINI_BATCH"] = "gemini-batch";
3742
3742
  RecognitionProvider2["OPENAI_BATCH"] = "openai-batch";
3743
3743
  RecognitionProvider2["OPENAI_REALTIME"] = "openai-realtime";
3744
+ RecognitionProvider2["MISTRAL_VOXTRAL"] = "mistral-voxtral";
3745
+ RecognitionProvider2["DASHSCOPE"] = "dashscope";
3746
+ RecognitionProvider2["TEST_ASR_PROVIDER_QUOTA"] = "test-asr-provider-quota";
3747
+ RecognitionProvider2["TEST_ASR_STREAMING"] = "test-asr-streaming";
3744
3748
  })(RecognitionProvider || (RecognitionProvider = {}));
3745
3749
  var RecognitionMode;
3746
3750
  (function(RecognitionMode2) {
@@ -3786,8 +3790,18 @@ var ElevenLabsModel;
3786
3790
  })(ElevenLabsModel || (ElevenLabsModel = {}));
3787
3791
  var OpenAIRealtimeModel;
3788
3792
  (function(OpenAIRealtimeModel2) {
3793
+ OpenAIRealtimeModel2["GPT_4O_TRANSCRIBE"] = "gpt-4o-transcribe";
3789
3794
  OpenAIRealtimeModel2["GPT_4O_MINI_TRANSCRIBE"] = "gpt-4o-mini-transcribe";
3790
3795
  })(OpenAIRealtimeModel || (OpenAIRealtimeModel = {}));
3796
+ var MistralVoxtralModel;
3797
+ (function(MistralVoxtralModel2) {
3798
+ MistralVoxtralModel2["VOXTRAL_MINI_REALTIME_2602"] = "voxtral-mini-transcribe-realtime-2602";
3799
+ })(MistralVoxtralModel || (MistralVoxtralModel = {}));
3800
+ var DashScopeModel;
3801
+ (function(DashScopeModel2) {
3802
+ DashScopeModel2["QWEN3_ASR_FLASH_REALTIME_2602"] = "qwen3-asr-flash-realtime-2026-02-10";
3803
+ DashScopeModel2["QWEN3_ASR_FLASH_REALTIME"] = "qwen3-asr-flash-realtime";
3804
+ })(DashScopeModel || (DashScopeModel = {}));
3791
3805
 
3792
3806
  // ../../libs/types/dist/recognition-result-v1.types.js
3793
3807
  var RecognitionResultTypeV1;
@@ -3803,8 +3817,10 @@ var TranscriptionResultSchemaV1 = z.object({
3803
3817
  type: z.literal(RecognitionResultTypeV1.TRANSCRIPTION),
3804
3818
  audioUtteranceId: z.string(),
3805
3819
  finalTranscript: z.string(),
3820
+ finalTranscriptRaw: z.string(),
3806
3821
  finalTranscriptConfidence: z.number().min(0).max(1).optional(),
3807
3822
  pendingTranscript: z.string().optional(),
3823
+ pendingTranscriptRaw: z.string().optional(),
3808
3824
  pendingTranscriptConfidence: z.number().min(0).max(1).optional(),
3809
3825
  is_finished: z.boolean(),
3810
3826
  voiceStart: z.number().optional(),
@@ -3813,8 +3829,9 @@ var TranscriptionResultSchemaV1 = z.object({
3813
3829
  startTimestamp: z.number().optional(),
3814
3830
  endTimestamp: z.number().optional(),
3815
3831
  receivedAtMs: z.number().optional(),
3816
- accumulatedAudioTimeMs: z.number().optional()
3817
- // accumulated audio time watermark in milliseconds. Total duration of all audio chunks sent. Optional.
3832
+ accumulatedAudioTimeMs: z.number().optional(),
3833
+ rawAudioTimeMs: z.number().optional()
3834
+ // Total audio duration sent to provider (includes prefix)
3818
3835
  });
3819
3836
  var FunctionCallResultSchemaV1 = z.object({
3820
3837
  type: z.literal(RecognitionResultTypeV1.FUNCTION_CALL),
@@ -3827,11 +3844,22 @@ var TranscriptOutcomeType;
3827
3844
  TranscriptOutcomeType2["WITH_CONTENT"] = "with_content";
3828
3845
  TranscriptOutcomeType2["EMPTY"] = "empty";
3829
3846
  TranscriptOutcomeType2["NEVER_SENT"] = "never_sent";
3847
+ TranscriptOutcomeType2["ERROR_AUTHENTICATION"] = "error_authentication";
3848
+ TranscriptOutcomeType2["ERROR_VALIDATION"] = "error_validation";
3849
+ TranscriptOutcomeType2["ERROR_PROVIDER"] = "error_provider";
3850
+ TranscriptOutcomeType2["ERROR_TIMEOUT"] = "error_timeout";
3851
+ TranscriptOutcomeType2["ERROR_QUOTA"] = "error_quota";
3852
+ TranscriptOutcomeType2["ERROR_INTERNAL_QUOTA"] = "error_internal_quota";
3853
+ TranscriptOutcomeType2["ERROR_CONNECTION"] = "error_connection";
3854
+ TranscriptOutcomeType2["ERROR_NO_AUDIO"] = "error_no_audio";
3855
+ TranscriptOutcomeType2["ERROR_CIRCUIT_BREAKER"] = "error_circuit_breaker";
3856
+ TranscriptOutcomeType2["ERROR_UNKNOWN"] = "error_unknown";
3830
3857
  })(TranscriptOutcomeType || (TranscriptOutcomeType = {}));
3831
3858
  var MetadataResultSchemaV1 = z.object({
3832
3859
  type: z.literal(RecognitionResultTypeV1.METADATA),
3833
3860
  audioUtteranceId: z.string(),
3834
3861
  // Timing information
3862
+ connectionInitiatedAtMs: z.number().optional(),
3835
3863
  recordingStartMs: z.number().optional(),
3836
3864
  recordingEndMs: z.number().optional(),
3837
3865
  transcriptEndMs: z.number().optional(),
@@ -3840,6 +3868,7 @@ var MetadataResultSchemaV1 = z.object({
3840
3868
  duration: z.number().optional(),
3841
3869
  volume: z.number().optional(),
3842
3870
  accumulatedAudioTimeMs: z.number().optional(),
3871
+ rawAudioTimeMs: z.number().optional(),
3843
3872
  // Cost Information
3844
3873
  costInUSD: z.number().default(0).optional(),
3845
3874
  // ASR API Type
@@ -3849,7 +3878,22 @@ var MetadataResultSchemaV1 = z.object({
3849
3878
  // Raw ASR metadata payload as provided by the provider (stringified if needed)
3850
3879
  rawAsrMetadata: z.string().optional(),
3851
3880
  // Transcript outcome - categorizes the final transcript state
3852
- transcriptOutcome: z.nativeEnum(TranscriptOutcomeType).optional()
3881
+ transcriptOutcome: z.nativeEnum(TranscriptOutcomeType).optional(),
3882
+ // Audio metrics - embedded audio quality metrics (from AudioMetricsAccumulator)
3883
+ // Omit 'type' field since it's embedded in METADATA, not a separate message
3884
+ audioMetrics: z.object({
3885
+ valid: z.boolean(),
3886
+ audioBeginMs: z.number(),
3887
+ audioEndMs: z.number(),
3888
+ maxVolume: z.number(),
3889
+ minVolume: z.number(),
3890
+ avgVolume: z.number(),
3891
+ silenceRatio: z.number(),
3892
+ clippingRatio: z.number(),
3893
+ snrEstimate: z.number().nullable(),
3894
+ lastNonSilenceMs: z.number(),
3895
+ timestamp: z.string()
3896
+ }).optional()
3853
3897
  });
3854
3898
  var ErrorTypeV1;
3855
3899
  (function(ErrorTypeV12) {
@@ -3858,7 +3902,10 @@ var ErrorTypeV1;
3858
3902
  ErrorTypeV12["PROVIDER_ERROR"] = "provider_error";
3859
3903
  ErrorTypeV12["TIMEOUT_ERROR"] = "timeout_error";
3860
3904
  ErrorTypeV12["QUOTA_EXCEEDED"] = "quota_exceeded";
3905
+ ErrorTypeV12["INTERNAL_QUOTA_EXHAUSTED"] = "internal_quota_exhausted";
3861
3906
  ErrorTypeV12["CONNECTION_ERROR"] = "connection_error";
3907
+ ErrorTypeV12["NO_AUDIO_ERROR"] = "no_audio_error";
3908
+ ErrorTypeV12["CIRCUIT_BREAKER_OPEN"] = "circuit_breaker_open";
3862
3909
  ErrorTypeV12["UNKNOWN_ERROR"] = "unknown_error";
3863
3910
  })(ErrorTypeV1 || (ErrorTypeV1 = {}));
3864
3911
  var ErrorResultSchemaV1 = z.object({
@@ -4075,6 +4122,12 @@ var TimerSchema = z.object({
4075
4122
  * Provider that generated this message
4076
4123
  */
4077
4124
  provider: z.nativeEnum(RecognitionProvider).optional(),
4125
+ /**
4126
+ * Timestamp when provider connection was initiated (in milliseconds)
4127
+ * Set before doConnect() - captures the moment before WebSocket creation starts
4128
+ * @example 1704095999800
4129
+ */
4130
+ connectionInitiatedAtMs: z.number().optional(),
4078
4131
  /**
4079
4132
  * Timestamp when recording started (in milliseconds)
4080
4133
  * @example 1704096000000
@@ -4208,6 +4261,14 @@ var ConnectionExceptionSchema = BaseRecognitionExceptionSchema.extend({
4208
4261
  /** Underlying error message */
4209
4262
  underlyingError: z.string().optional()
4210
4263
  });
4264
+ var CircuitBreakerExceptionSchema = BaseRecognitionExceptionSchema.extend({
4265
+ errorType: z.literal(ErrorTypeV1.CIRCUIT_BREAKER_OPEN),
4266
+ isImmediatelyAvailable: z.literal(true),
4267
+ /** Provider that is unavailable */
4268
+ provider: z.nativeEnum(RecognitionProvider).optional(),
4269
+ /** Model that is unavailable */
4270
+ model: z.string().optional()
4271
+ });
4211
4272
  var UnknownExceptionSchema = BaseRecognitionExceptionSchema.extend({
4212
4273
  errorType: z.literal(ErrorTypeV1.UNKNOWN_ERROR),
4213
4274
  isImmediatelyAvailable: z.literal(false),
@@ -4223,6 +4284,7 @@ var RecognitionExceptionSchema = z.discriminatedUnion("errorType", [
4223
4284
  TimeoutExceptionSchema,
4224
4285
  QuotaExceededExceptionSchema,
4225
4286
  ConnectionExceptionSchema,
4287
+ CircuitBreakerExceptionSchema,
4226
4288
  UnknownExceptionSchema
4227
4289
  ]);
4228
4290
 
@@ -4238,6 +4300,12 @@ var ControlSignalTypeV1;
4238
4300
  ControlSignalTypeV12["START_RECORDING"] = "start_recording";
4239
4301
  ControlSignalTypeV12["STOP_RECORDING"] = "stop_recording";
4240
4302
  })(ControlSignalTypeV1 || (ControlSignalTypeV1 = {}));
4303
+ var PrefixMode;
4304
+ (function(PrefixMode2) {
4305
+ PrefixMode2["NONE"] = "none";
4306
+ PrefixMode2["CLIENT"] = "client";
4307
+ PrefixMode2["STORED"] = "stored";
4308
+ })(PrefixMode || (PrefixMode = {}));
4241
4309
  var SlotMapSchema = z.record(z.string(), z.array(z.string()));
4242
4310
  var GameContextSchemaV1 = z.object({
4243
4311
  type: z.literal(RecognitionContextTypeV1.GAME_CONTEXT),
@@ -4269,6 +4337,19 @@ var RequestDebugCommandSchema = z.object({
4269
4337
  // Enable experimental pilot models for testing new features
4270
4338
  enablePilotModels: z.boolean().optional().default(false)
4271
4339
  }).optional();
4340
+ var FallbackASRConfigSchema = z.object({
4341
+ // Required - the fallback provider to use
4342
+ provider: z.string(),
4343
+ // Optional - inherits from primary if not specified
4344
+ model: z.string().optional(),
4345
+ language: z.string().optional(),
4346
+ sampleRate: z.number().optional(),
4347
+ encoding: z.number().optional(),
4348
+ // Recognition options - optional, inherits from primary
4349
+ interimResults: z.boolean().optional(),
4350
+ useContext: z.boolean().optional(),
4351
+ finalTranscriptStability: z.string().optional()
4352
+ });
4272
4353
  var ASRRequestSchemaV1 = z.object({
4273
4354
  type: z.literal(RecognitionContextTypeV1.ASR_REQUEST),
4274
4355
  // Session identification
@@ -4284,6 +4365,16 @@ var ASRRequestSchemaV1 = z.object({
4284
4365
  useContext: z.boolean().optional().default(false),
4285
4366
  // Final transcript stability mode (timeout for fallback final transcript)
4286
4367
  finalTranscriptStability: z.string().optional(),
4368
+ // Traffic control priority (affects quota slot allocation)
4369
+ // 'high' = can use all quota slots (reserved for critical games like song-quiz)
4370
+ // 'low' = limited to non-reserved slots (default for most requests)
4371
+ priority: z.enum(["low", "high"]).optional().default("low"),
4372
+ // Fallback providers - tried in order if primary provider is unavailable (circuit breaker open)
4373
+ fallbackModels: z.array(FallbackASRConfigSchema).optional(),
4374
+ // Prefix audio configuration
4375
+ prefixMode: z.nativeEnum(PrefixMode).optional().default(PrefixMode.NONE),
4376
+ prefixId: z.string().optional(),
4377
+ prefixTextToRemove: z.array(z.string()).optional(),
4287
4378
  // Debug options (FOR DEBUG/TESTING ONLY - not for production use)
4288
4379
  debugCommand: RequestDebugCommandSchema
4289
4380
  });
@@ -4301,6 +4392,8 @@ var RecognitionGameInfoSchema = z.object({
4301
4392
  accountId: z.string().optional(),
4302
4393
  gameId: z.string().optional(),
4303
4394
  gamePhase: z.string().optional(),
4395
+ questionAskedId: z.string().optional(),
4396
+ /** @deprecated Use questionAskedId instead. Kept for backward compatibility during migration. */
4304
4397
  questionAnswerId: z.string().optional(),
4305
4398
  platform: z.string().optional()
4306
4399
  // Platform for audio recording device (use the definition of platform teams)
@@ -4435,6 +4528,7 @@ var AudioEncoding;
4435
4528
  }
4436
4529
  AudioEncoding2.isNameValid = isNameValid;
4437
4530
  })(AudioEncoding || (AudioEncoding = {}));
4531
+ var PREFIX_AUDIO_ENCODING_OFFSET = 128;
4438
4532
  var SampleRate;
4439
4533
  (function(SampleRate2) {
4440
4534
  SampleRate2[SampleRate2["RATE_8000"] = 8e3] = "RATE_8000";
@@ -4527,6 +4621,7 @@ var FinalTranscriptStability;
4527
4621
  var PlumbingType;
4528
4622
  (function(PlumbingType2) {
4529
4623
  PlumbingType2["AUDIO"] = "audio";
4624
+ PlumbingType2["PREFIX_AUDIO"] = "prefix_audio";
4530
4625
  PlumbingType2["CONTROL"] = "control";
4531
4626
  PlumbingType2["RESULT"] = "result";
4532
4627
  PlumbingType2["RECOGNITION_CONTEXT"] = "recognition_context";
@@ -4605,6 +4700,11 @@ var StatsIncrementType;
4605
4700
  StatsIncrementType2["SUCCESS"] = "success";
4606
4701
  StatsIncrementType2["FAIL"] = "fail";
4607
4702
  })(StatsIncrementType || (StatsIncrementType = {}));
4703
+ var QuotaPriority;
4704
+ (function(QuotaPriority2) {
4705
+ QuotaPriority2[QuotaPriority2["LOW"] = 0] = "LOW";
4706
+ QuotaPriority2[QuotaPriority2["HIGH"] = 1] = "HIGH";
4707
+ })(QuotaPriority || (QuotaPriority = {}));
4608
4708
 
4609
4709
  // ../../libs/types/dist/stages.types.js
4610
4710
  var STAGES = {
@@ -4781,7 +4881,7 @@ var WebSocketAudioClient = class {
4781
4881
  // ../../libs/websocket/dist/core/audio-upload-websocket-server.js
4782
4882
  import { WebSocketServer, WebSocket as WebSocket2 } from "ws";
4783
4883
 
4784
- // ../../node_modules/.pnpm/uuid@11.0.0/node_modules/uuid/dist/esm-browser/stringify.js
4884
+ // ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/stringify.js
4785
4885
  var byteToHex = [];
4786
4886
  for (let i = 0; i < 256; ++i) {
4787
4887
  byteToHex.push((i + 256).toString(16).slice(1));
@@ -4790,7 +4890,7 @@ function unsafeStringify(arr, offset = 0) {
4790
4890
  return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + "-" + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + "-" + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + "-" + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + "-" + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
4791
4891
  }
4792
4892
 
4793
- // ../../node_modules/.pnpm/uuid@11.0.0/node_modules/uuid/dist/esm-browser/rng.js
4893
+ // ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/rng.js
4794
4894
  var getRandomValues;
4795
4895
  var rnds8 = new Uint8Array(16);
4796
4896
  function rng() {
@@ -4803,21 +4903,27 @@ function rng() {
4803
4903
  return getRandomValues(rnds8);
4804
4904
  }
4805
4905
 
4806
- // ../../node_modules/.pnpm/uuid@11.0.0/node_modules/uuid/dist/esm-browser/native.js
4906
+ // ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/native.js
4807
4907
  var randomUUID = typeof crypto !== "undefined" && crypto.randomUUID && crypto.randomUUID.bind(crypto);
4808
4908
  var native_default = { randomUUID };
4809
4909
 
4810
- // ../../node_modules/.pnpm/uuid@11.0.0/node_modules/uuid/dist/esm-browser/v4.js
4910
+ // ../../node_modules/.pnpm/uuid@11.1.0/node_modules/uuid/dist/esm-browser/v4.js
4811
4911
  function v4(options, buf, offset) {
4812
4912
  if (native_default.randomUUID && !buf && !options) {
4813
4913
  return native_default.randomUUID();
4814
4914
  }
4815
4915
  options = options || {};
4816
- const rnds = options.random || (options.rng || rng)();
4916
+ const rnds = options.random ?? options.rng?.() ?? rng();
4917
+ if (rnds.length < 16) {
4918
+ throw new Error("Random bytes length must be >= 16");
4919
+ }
4817
4920
  rnds[6] = rnds[6] & 15 | 64;
4818
4921
  rnds[8] = rnds[8] & 63 | 128;
4819
4922
  if (buf) {
4820
4923
  offset = offset || 0;
4924
+ if (offset < 0 || offset + 16 > buf.length) {
4925
+ throw new RangeError(`UUID byte range ${offset}:${offset + 15} is out of buffer bounds`);
4926
+ }
4821
4927
  for (let i = 0; i < 16; ++i) {
4822
4928
  buf[offset + i] = rnds[i];
4823
4929
  }
@@ -5191,6 +5297,9 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5191
5297
  highWM: config.highWaterMark ?? 512e3,
5192
5298
  lowWM: config.lowWaterMark ?? 128e3
5193
5299
  });
5300
+ this.prefixBuffer = [];
5301
+ // Buffer prefix audio until READY
5302
+ this.prefixBufferBytes = 0;
5194
5303
  this.state = "initial" /* INITIAL */;
5195
5304
  // Debug control (internal state, controlled by debugCommand in ASRRequest)
5196
5305
  this.isDebugLogEnabled = false;
@@ -5247,6 +5356,9 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5247
5356
  static {
5248
5357
  this.PROTOCOL_VERSION = 1;
5249
5358
  }
5359
+ static {
5360
+ this.MAX_PREFIX_BUFFER_BYTES = 10 * 1024 * 1024;
5361
+ }
5250
5362
  // ==========================================================================
5251
5363
  // PRIVATE HELPERS
5252
5364
  // ==========================================================================
@@ -5272,6 +5384,8 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5272
5384
  cleanup() {
5273
5385
  this.log("debug", "Cleaning up resources");
5274
5386
  this.audioBuffer.clear();
5387
+ this.prefixBuffer = [];
5388
+ this.prefixBufferBytes = 0;
5275
5389
  this.audioBytesSent = 0;
5276
5390
  this.audioChunksSent = 0;
5277
5391
  this.lastAudioStatsLog = 0;
@@ -5488,6 +5602,25 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5488
5602
  isBufferOverflowing() {
5489
5603
  return this.audioBuffer.isOverflowing();
5490
5604
  }
5605
+ isServerReady() {
5606
+ return this.state === "ready" /* READY */;
5607
+ }
5608
+ sendGameContext(context) {
5609
+ if (this.state !== "connected" /* CONNECTED */ && this.state !== "ready" /* READY */) {
5610
+ this.log("warn", "sendGameContext called in wrong state", { state: this.state });
5611
+ return;
5612
+ }
5613
+ this.log("debug", "Sending game context (deferred)", {
5614
+ gameId: context.gameId,
5615
+ gamePhase: context.gamePhase,
5616
+ hasSlotMap: !!context.slotMap
5617
+ });
5618
+ super.sendMessage(
5619
+ _RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
5620
+ "message",
5621
+ context
5622
+ );
5623
+ }
5491
5624
  getStats() {
5492
5625
  const bufferStats = this.audioBuffer.getStats();
5493
5626
  return {
@@ -5513,6 +5646,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5513
5646
  if (this.isDebugLogEnabled) {
5514
5647
  this.log("debug", "Sending ASR request", this.config.asrRequestConfig);
5515
5648
  }
5649
+ const fallbackModels = this.config.asrRequestConfig.fallbackModels;
5516
5650
  const asrRequest = {
5517
5651
  type: RecognitionContextTypeV1.ASR_REQUEST,
5518
5652
  audioUtteranceId: this.config.audioUtteranceId,
@@ -5528,7 +5662,20 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5528
5662
  ...this.config.asrRequestConfig.finalTranscriptStability && {
5529
5663
  finalTranscriptStability: this.config.asrRequestConfig.finalTranscriptStability
5530
5664
  },
5531
- ...debugCommand && { debugCommand }
5665
+ // Include fallbackModels if provided (for circuit breaker fallback)
5666
+ ...fallbackModels && { fallbackModels },
5667
+ ...debugCommand && { debugCommand },
5668
+ // Include prefix mode if provided (for server-side stored prefix injection)
5669
+ ...this.config.asrRequestConfig.prefixMode && {
5670
+ prefixMode: this.config.asrRequestConfig.prefixMode
5671
+ },
5672
+ ...this.config.asrRequestConfig.prefixId && {
5673
+ prefixId: this.config.asrRequestConfig.prefixId
5674
+ },
5675
+ // Include prefix text to remove if provided (for server-side prefix text removal)
5676
+ ...this.config.asrRequestConfig.prefixTextToRemove && {
5677
+ prefixTextToRemove: this.config.asrRequestConfig.prefixTextToRemove
5678
+ }
5532
5679
  };
5533
5680
  super.sendMessage(
5534
5681
  _RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
@@ -5635,6 +5782,12 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5635
5782
  this.log("debug", "Server ready for audio upload");
5636
5783
  this.state = "ready" /* READY */;
5637
5784
  this.messageHandler.setSessionStartTime(Date.now());
5785
+ if (this.prefixBuffer.length > 0) {
5786
+ this.log("debug", "Flushing buffered prefix audio", { chunks: this.prefixBuffer.length });
5787
+ this.prefixBuffer.forEach((chunk) => this.sendPrefixAudioNow(chunk));
5788
+ this.prefixBuffer = [];
5789
+ this.prefixBufferBytes = 0;
5790
+ }
5638
5791
  const bufferedChunks = this.audioBuffer.flush();
5639
5792
  if (bufferedChunks.length > 0) {
5640
5793
  this.log("debug", "Flushing buffered audio", { chunks: bufferedChunks.length });
@@ -5666,6 +5819,74 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5666
5819
  this.audioBytesSent += byteLength;
5667
5820
  this.audioChunksSent++;
5668
5821
  }
5822
+ /**
5823
+ * Send prefix audio to the server.
5824
+ * Prefix audio is sent before user audio and is used for context/priming.
5825
+ * The server will process it but adjust timing so transcripts reflect user audio timing.
5826
+ *
5827
+ * Note: Prefix audio is buffered until READY state, then flushed before user audio.
5828
+ * This ensures proper ordering even if called before server is ready.
5829
+ *
5830
+ * @param audioData - Prefix audio data (ArrayBuffer, ArrayBufferView, or Blob)
5831
+ */
5832
+ sendPrefixAudio(audioData) {
5833
+ if (audioData instanceof Blob) {
5834
+ blobToArrayBuffer(audioData).then((arrayBuffer) => {
5835
+ this.sendPrefixAudioInternal(arrayBuffer);
5836
+ }).catch((error) => {
5837
+ this.log("error", "Failed to convert Blob to ArrayBuffer for prefix audio", error);
5838
+ });
5839
+ return;
5840
+ }
5841
+ this.sendPrefixAudioInternal(audioData);
5842
+ }
5843
+ /**
5844
+ * Internal method to handle prefix audio with buffering
5845
+ * Buffers if not READY, sends immediately if READY
5846
+ */
5847
+ sendPrefixAudioInternal(audioData) {
5848
+ const bytes = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
5849
+ if (bytes === 0) return;
5850
+ if (this.state === "stopped" /* STOPPED */ || this.state === "failed" /* FAILED */) {
5851
+ this.log("debug", "Ignoring prefix audio in terminal state", { bytes, state: this.state });
5852
+ return;
5853
+ }
5854
+ if (this.state === "ready" /* READY */) {
5855
+ this.log("debug", "Sending prefix audio immediately", { bytes });
5856
+ this.sendPrefixAudioNow(audioData);
5857
+ } else {
5858
+ if (this.prefixBufferBytes + bytes > _RealTimeTwoWayWebSocketRecognitionClient.MAX_PREFIX_BUFFER_BYTES) {
5859
+ this.log("warn", "Prefix buffer limit exceeded, dropping chunk", {
5860
+ bytes,
5861
+ current: this.prefixBufferBytes,
5862
+ max: _RealTimeTwoWayWebSocketRecognitionClient.MAX_PREFIX_BUFFER_BYTES
5863
+ });
5864
+ return;
5865
+ }
5866
+ this.log("debug", "Buffering prefix audio until READY", { bytes, state: this.state });
5867
+ this.prefixBuffer.push(audioData);
5868
+ this.prefixBufferBytes += bytes;
5869
+ }
5870
+ }
5871
+ /**
5872
+ * Send prefix audio immediately to the server (without buffering)
5873
+ * Uses encoding offset to mark as prefix audio
5874
+ * @param audioData - Prefix audio data to send
5875
+ */
5876
+ sendPrefixAudioNow(audioData) {
5877
+ const byteLength = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
5878
+ if (byteLength === 0) return;
5879
+ const baseEncodingId = this.config.asrRequestConfig?.encoding || AudioEncoding.LINEAR16;
5880
+ const prefixEncodingId = baseEncodingId + PREFIX_AUDIO_ENCODING_OFFSET;
5881
+ const sampleRate = typeof this.config.asrRequestConfig?.sampleRate === "number" ? this.config.asrRequestConfig.sampleRate : SampleRate.RATE_16000;
5882
+ this.log("debug", "Sending prefix audio", { bytes: byteLength, encoding: prefixEncodingId });
5883
+ super.sendAudio(
5884
+ audioData,
5885
+ _RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
5886
+ prefixEncodingId,
5887
+ sampleRate
5888
+ );
5889
+ }
5669
5890
  };
5670
5891
  export {
5671
5892
  AudioEncoding,