@volley/recognition-client-sdk 0.1.622 → 0.1.689

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3737,11 +3737,14 @@ var RecognitionProvider;
3737
3737
  RecognitionProvider2["DEEPGRAM"] = "deepgram";
3738
3738
  RecognitionProvider2["ELEVENLABS"] = "elevenlabs";
3739
3739
  RecognitionProvider2["FIREWORKS"] = "fireworks";
3740
+ RecognitionProvider2["GLADIA"] = "gladia";
3740
3741
  RecognitionProvider2["GOOGLE"] = "google";
3741
3742
  RecognitionProvider2["GEMINI_BATCH"] = "gemini-batch";
3742
3743
  RecognitionProvider2["OPENAI_BATCH"] = "openai-batch";
3744
+ RecognitionProvider2["SELF_SERVE_VLLM"] = "self-serve-vllm";
3743
3745
  RecognitionProvider2["OPENAI_REALTIME"] = "openai-realtime";
3744
3746
  RecognitionProvider2["MISTRAL_VOXTRAL"] = "mistral-voxtral";
3747
+ RecognitionProvider2["CARTESIA"] = "cartesia";
3745
3748
  RecognitionProvider2["DASHSCOPE"] = "dashscope";
3746
3749
  RecognitionProvider2["TEST_ASR_PROVIDER_QUOTA"] = "test-asr-provider-quota";
3747
3750
  RecognitionProvider2["TEST_ASR_STREAMING"] = "test-asr-streaming";
@@ -3783,10 +3786,13 @@ var FireworksModel;
3783
3786
  FireworksModel2["WHISPER_V3"] = "whisper-v3";
3784
3787
  FireworksModel2["WHISPER_V3_TURBO"] = "whisper-v3-turbo";
3785
3788
  })(FireworksModel || (FireworksModel = {}));
3789
+ var GladiaModel;
3790
+ (function(GladiaModel2) {
3791
+ GladiaModel2["SOLARIA_1"] = "solaria-1";
3792
+ })(GladiaModel || (GladiaModel = {}));
3786
3793
  var ElevenLabsModel;
3787
3794
  (function(ElevenLabsModel2) {
3788
3795
  ElevenLabsModel2["SCRIBE_V2_REALTIME"] = "scribe_v2_realtime";
3789
- ElevenLabsModel2["SCRIBE_V1"] = "scribe_v1";
3790
3796
  })(ElevenLabsModel || (ElevenLabsModel = {}));
3791
3797
  var OpenAIRealtimeModel;
3792
3798
  (function(OpenAIRealtimeModel2) {
@@ -3797,11 +3803,20 @@ var MistralVoxtralModel;
3797
3803
  (function(MistralVoxtralModel2) {
3798
3804
  MistralVoxtralModel2["VOXTRAL_MINI_REALTIME_2602"] = "voxtral-mini-transcribe-realtime-2602";
3799
3805
  })(MistralVoxtralModel || (MistralVoxtralModel = {}));
3806
+ var CartesiaModel;
3807
+ (function(CartesiaModel2) {
3808
+ CartesiaModel2["INK_WHISPER"] = "ink-whisper";
3809
+ CartesiaModel2["INK_WHISPER_20250604"] = "ink-whisper-2025-06-04";
3810
+ })(CartesiaModel || (CartesiaModel = {}));
3800
3811
  var DashScopeModel;
3801
3812
  (function(DashScopeModel2) {
3802
3813
  DashScopeModel2["QWEN3_ASR_FLASH_REALTIME_2602"] = "qwen3-asr-flash-realtime-2026-02-10";
3803
3814
  DashScopeModel2["QWEN3_ASR_FLASH_REALTIME"] = "qwen3-asr-flash-realtime";
3804
3815
  })(DashScopeModel || (DashScopeModel = {}));
3816
+ var SelfServeVllmModel;
3817
+ (function(SelfServeVllmModel2) {
3818
+ SelfServeVllmModel2["QWEN3_ASR_1_7B"] = "qwen3-asr-1.7b";
3819
+ })(SelfServeVllmModel || (SelfServeVllmModel = {}));
3805
3820
 
3806
3821
  // ../../libs/types/dist/recognition-result-v1.types.js
3807
3822
  var RecognitionResultTypeV1;
@@ -3812,6 +3827,7 @@ var RecognitionResultTypeV1;
3812
3827
  RecognitionResultTypeV12["ERROR"] = "Error";
3813
3828
  RecognitionResultTypeV12["CLIENT_CONTROL_MESSAGE"] = "ClientControlMessage";
3814
3829
  RecognitionResultTypeV12["AUDIO_METRICS"] = "AudioMetrics";
3830
+ RecognitionResultTypeV12["SESSION_CONFIGURED"] = "SessionConfigured";
3815
3831
  })(RecognitionResultTypeV1 || (RecognitionResultTypeV1 = {}));
3816
3832
  var TranscriptionResultSchemaV1 = z.object({
3817
3833
  type: z.literal(RecognitionResultTypeV1.TRANSCRIPTION),
@@ -3874,6 +3890,9 @@ var MetadataResultSchemaV1 = z.object({
3874
3890
  costInUSD: z.number().default(0).optional(),
3875
3891
  // ASR API Type
3876
3892
  apiType: z.nativeEnum(ASRApiType).optional(),
3893
+ // Provider identification
3894
+ provider: z.string().optional(),
3895
+ model: z.string().optional(),
3877
3896
  // ASR configuration as JSON string (no type validation)
3878
3897
  asrConfig: z.string().optional(),
3879
3898
  // Raw ASR metadata payload as provided by the provider (stringified if needed)
@@ -3930,6 +3949,22 @@ var ClientControlMessageSchemaV1 = z.object({
3930
3949
  action: ClientControlActionsV1
3931
3950
  // The control action to perform
3932
3951
  });
3952
+ var SessionConfiguredSchemaV1 = z.object({
3953
+ type: z.literal(RecognitionResultTypeV1.SESSION_CONFIGURED),
3954
+ audioUtteranceId: z.string(),
3955
+ // Provider identification
3956
+ provider: z.string().optional(),
3957
+ model: z.string().optional(),
3958
+ sampleRate: z.number().optional(),
3959
+ encoding: z.string().optional(),
3960
+ apiType: z.nativeEnum(ASRApiType).optional(),
3961
+ isFallback: z.boolean().optional(),
3962
+ // Original ASR request as JSON string (includes prefixMode, prefixId, etc.)
3963
+ asrRequest: z.string().optional(),
3964
+ // Provider-specific config as JSON string (raw config sent to provider API, e.g. Deepgram's punctuate/endpointing)
3965
+ providerConfig: z.string().optional()
3966
+ // Stringified JSON of provider-specific config
3967
+ });
3933
3968
  var AudioMetricsResultSchemaV1 = z.object({
3934
3969
  type: z.literal(RecognitionResultTypeV1.AUDIO_METRICS),
3935
3970
  valid: z.boolean(),
@@ -3953,7 +3988,8 @@ var RecognitionResultSchemaV1 = z.discriminatedUnion("type", [
3953
3988
  ErrorResultSchemaV1,
3954
3989
  // P1 - P2
3955
3990
  FunctionCallResultSchemaV1,
3956
- ClientControlMessageSchemaV1
3991
+ ClientControlMessageSchemaV1,
3992
+ SessionConfiguredSchemaV1
3957
3993
  ]);
3958
3994
 
3959
3995
  // ../../libs/types/dist/provider-transcription.types.js
@@ -4396,8 +4432,9 @@ var RecognitionGameInfoSchema = z.object({
4396
4432
  questionAskedId: z.string().optional(),
4397
4433
  /** @deprecated Use questionAskedId instead. Kept for backward compatibility during migration. */
4398
4434
  questionAnswerId: z.string().optional(),
4399
- platform: z.string().optional()
4400
- // Platform for audio recording device (use the definition of platform teams)
4435
+ platform: z.string().optional(),
4436
+ experimentCohort: z.enum(["treatment", "control"]).optional()
4437
+ // Experiment cohort, defaults to 'control' if not provided
4401
4438
  });
4402
4439
  var RecognitionQueryMetadataSchema = z.object({
4403
4440
  audioUtteranceId: z.string(),
@@ -5024,6 +5061,9 @@ function buildWebSocketUrl(config) {
5024
5061
  if (config.gameContext?.gamePhase) {
5025
5062
  url.searchParams.set("gamePhase", config.gameContext.gamePhase);
5026
5063
  }
5064
+ if (config.experimentCohort) {
5065
+ url.searchParams.set("experimentCohort", config.experimentCohort);
5066
+ }
5027
5067
  return url.toString();
5028
5068
  }
5029
5069
 
@@ -5181,7 +5221,7 @@ var MessageHandler = class {
5181
5221
  }
5182
5222
  if (msg.data && typeof msg.data !== "object") {
5183
5223
  if (this.callbacks.logger) {
5184
- this.callbacks.logger("error", "[RecogSDK] Received primitive msg.data from server", {
5224
+ this.callbacks.logger("warn", "[RecogSDK] Received primitive msg.data from server", {
5185
5225
  dataType: typeof msg.data,
5186
5226
  data: msg.data,
5187
5227
  fullMessage: msg
@@ -5206,6 +5246,9 @@ var MessageHandler = class {
5206
5246
  case RecognitionResultTypeV1.CLIENT_CONTROL_MESSAGE:
5207
5247
  this.callbacks.onControlMessage(msgData);
5208
5248
  break;
5249
+ case RecognitionResultTypeV1.SESSION_CONFIGURED:
5250
+ this.callbacks.onSessionConfigured?.(msgData);
5251
+ break;
5209
5252
  default:
5210
5253
  if (this.callbacks.logger) {
5211
5254
  this.callbacks.logger("debug", "[RecogSDK] Unknown message type", { type: msgType });
@@ -5291,7 +5334,8 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5291
5334
  ...config.questionAnswerId && { questionAnswerId: config.questionAnswerId },
5292
5335
  ...config.platform && { platform: config.platform },
5293
5336
  ...config.gameContext && { gameContext: config.gameContext },
5294
- ...config.gameId && { gameId: config.gameId }
5337
+ ...config.gameId && { gameId: config.gameId },
5338
+ ...config.experimentCohort && { experimentCohort: config.experimentCohort }
5295
5339
  });
5296
5340
  super({
5297
5341
  url,
@@ -5324,6 +5368,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5324
5368
  }),
5325
5369
  onMetadata: config.onMetadata || (() => {
5326
5370
  }),
5371
+ onSessionConfigured: config.onSessionConfigured,
5327
5372
  onError: config.onError || (() => {
5328
5373
  }),
5329
5374
  onConnected: config.onConnected || (() => {
@@ -5351,6 +5396,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5351
5396
  onMetadata: this.config.onMetadata,
5352
5397
  onError: this.config.onError,
5353
5398
  onControlMessage: this.handleControlMessage.bind(this),
5399
+ onSessionConfigured: this.config.onSessionConfigured,
5354
5400
  ...this.config.logger && { logger: this.config.logger }
5355
5401
  });
5356
5402
  }
@@ -5503,7 +5549,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5503
5549
  blobToArrayBuffer(audioData).then((arrayBuffer) => {
5504
5550
  this.sendAudioInternal(arrayBuffer);
5505
5551
  }).catch((error) => {
5506
- this.log("error", "Failed to convert Blob to ArrayBuffer", error);
5552
+ this.log("warn", "Failed to convert Blob to ArrayBuffer", error);
5507
5553
  });
5508
5554
  return;
5509
5555
  }
@@ -5543,7 +5589,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5543
5589
  */
5544
5590
  async stopRecording() {
5545
5591
  if (this.state !== "ready" /* READY */) {
5546
- this.log("warn", "stopRecording called but not in READY state", { state: this.state });
5592
+ this.log("info", "stopRecording called but not in READY state", { state: this.state });
5547
5593
  return;
5548
5594
  }
5549
5595
  this.log("debug", "Stopping recording");
@@ -5713,7 +5759,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5713
5759
  if (this.state === "stopping" /* STOPPING */) {
5714
5760
  this.state = "stopped" /* STOPPED */;
5715
5761
  } else if (this.state === "connected" /* CONNECTED */ || this.state === "ready" /* READY */ || this.state === "connecting" /* CONNECTING */) {
5716
- this.log("error", "[DIAGNOSTIC] Unexpected disconnection", {
5762
+ this.log("warn", "[DIAGNOSTIC] Unexpected disconnection", {
5717
5763
  code,
5718
5764
  codeDescription: closeCodeDescription,
5719
5765
  reason: reason || "(empty)",
@@ -5835,7 +5881,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5835
5881
  blobToArrayBuffer(audioData).then((arrayBuffer) => {
5836
5882
  this.sendPrefixAudioInternal(arrayBuffer);
5837
5883
  }).catch((error) => {
5838
- this.log("error", "Failed to convert Blob to ArrayBuffer for prefix audio", error);
5884
+ this.log("warn", "Failed to convert Blob to ArrayBuffer for prefix audio", error);
5839
5885
  });
5840
5886
  return;
5841
5887
  }