@volley/recognition-client-sdk 0.1.622 → 0.1.689

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3737,11 +3737,14 @@ var RecognitionProvider;
3737
3737
  RecognitionProvider2["DEEPGRAM"] = "deepgram";
3738
3738
  RecognitionProvider2["ELEVENLABS"] = "elevenlabs";
3739
3739
  RecognitionProvider2["FIREWORKS"] = "fireworks";
3740
+ RecognitionProvider2["GLADIA"] = "gladia";
3740
3741
  RecognitionProvider2["GOOGLE"] = "google";
3741
3742
  RecognitionProvider2["GEMINI_BATCH"] = "gemini-batch";
3742
3743
  RecognitionProvider2["OPENAI_BATCH"] = "openai-batch";
3744
+ RecognitionProvider2["SELF_SERVE_VLLM"] = "self-serve-vllm";
3743
3745
  RecognitionProvider2["OPENAI_REALTIME"] = "openai-realtime";
3744
3746
  RecognitionProvider2["MISTRAL_VOXTRAL"] = "mistral-voxtral";
3747
+ RecognitionProvider2["CARTESIA"] = "cartesia";
3745
3748
  RecognitionProvider2["DASHSCOPE"] = "dashscope";
3746
3749
  RecognitionProvider2["TEST_ASR_PROVIDER_QUOTA"] = "test-asr-provider-quota";
3747
3750
  RecognitionProvider2["TEST_ASR_STREAMING"] = "test-asr-streaming";
@@ -3783,10 +3786,13 @@ var FireworksModel;
3783
3786
  FireworksModel2["WHISPER_V3"] = "whisper-v3";
3784
3787
  FireworksModel2["WHISPER_V3_TURBO"] = "whisper-v3-turbo";
3785
3788
  })(FireworksModel || (FireworksModel = {}));
3789
+ var GladiaModel;
3790
+ (function(GladiaModel2) {
3791
+ GladiaModel2["SOLARIA_1"] = "solaria-1";
3792
+ })(GladiaModel || (GladiaModel = {}));
3786
3793
  var ElevenLabsModel;
3787
3794
  (function(ElevenLabsModel2) {
3788
3795
  ElevenLabsModel2["SCRIBE_V2_REALTIME"] = "scribe_v2_realtime";
3789
- ElevenLabsModel2["SCRIBE_V1"] = "scribe_v1";
3790
3796
  })(ElevenLabsModel || (ElevenLabsModel = {}));
3791
3797
  var OpenAIRealtimeModel;
3792
3798
  (function(OpenAIRealtimeModel2) {
@@ -3797,11 +3803,20 @@ var MistralVoxtralModel;
3797
3803
  (function(MistralVoxtralModel2) {
3798
3804
  MistralVoxtralModel2["VOXTRAL_MINI_REALTIME_2602"] = "voxtral-mini-transcribe-realtime-2602";
3799
3805
  })(MistralVoxtralModel || (MistralVoxtralModel = {}));
3806
+ var CartesiaModel;
3807
+ (function(CartesiaModel2) {
3808
+ CartesiaModel2["INK_WHISPER"] = "ink-whisper";
3809
+ CartesiaModel2["INK_WHISPER_20250604"] = "ink-whisper-2025-06-04";
3810
+ })(CartesiaModel || (CartesiaModel = {}));
3800
3811
  var DashScopeModel;
3801
3812
  (function(DashScopeModel2) {
3802
3813
  DashScopeModel2["QWEN3_ASR_FLASH_REALTIME_2602"] = "qwen3-asr-flash-realtime-2026-02-10";
3803
3814
  DashScopeModel2["QWEN3_ASR_FLASH_REALTIME"] = "qwen3-asr-flash-realtime";
3804
3815
  })(DashScopeModel || (DashScopeModel = {}));
3816
+ var SelfServeVllmModel;
3817
+ (function(SelfServeVllmModel2) {
3818
+ SelfServeVllmModel2["QWEN3_ASR_1_7B"] = "qwen3-asr-1.7b";
3819
+ })(SelfServeVllmModel || (SelfServeVllmModel = {}));
3805
3820
 
3806
3821
  // ../../libs/types/dist/recognition-result-v1.types.js
3807
3822
  var RecognitionResultTypeV1;
@@ -3812,6 +3827,7 @@ var RecognitionResultTypeV1;
3812
3827
  RecognitionResultTypeV12["ERROR"] = "Error";
3813
3828
  RecognitionResultTypeV12["CLIENT_CONTROL_MESSAGE"] = "ClientControlMessage";
3814
3829
  RecognitionResultTypeV12["AUDIO_METRICS"] = "AudioMetrics";
3830
+ RecognitionResultTypeV12["SESSION_CONFIGURED"] = "SessionConfigured";
3815
3831
  })(RecognitionResultTypeV1 || (RecognitionResultTypeV1 = {}));
3816
3832
  var TranscriptionResultSchemaV1 = z.object({
3817
3833
  type: z.literal(RecognitionResultTypeV1.TRANSCRIPTION),
@@ -3874,6 +3890,9 @@ var MetadataResultSchemaV1 = z.object({
3874
3890
  costInUSD: z.number().default(0).optional(),
3875
3891
  // ASR API Type
3876
3892
  apiType: z.nativeEnum(ASRApiType).optional(),
3893
+ // Provider identification
3894
+ provider: z.string().optional(),
3895
+ model: z.string().optional(),
3877
3896
  // ASR configuration as JSON string (no type validation)
3878
3897
  asrConfig: z.string().optional(),
3879
3898
  // Raw ASR metadata payload as provided by the provider (stringified if needed)
@@ -3930,6 +3949,22 @@ var ClientControlMessageSchemaV1 = z.object({
3930
3949
  action: ClientControlActionsV1
3931
3950
  // The control action to perform
3932
3951
  });
3952
+ var SessionConfiguredSchemaV1 = z.object({
3953
+ type: z.literal(RecognitionResultTypeV1.SESSION_CONFIGURED),
3954
+ audioUtteranceId: z.string(),
3955
+ // Provider identification
3956
+ provider: z.string().optional(),
3957
+ model: z.string().optional(),
3958
+ sampleRate: z.number().optional(),
3959
+ encoding: z.string().optional(),
3960
+ apiType: z.nativeEnum(ASRApiType).optional(),
3961
+ isFallback: z.boolean().optional(),
3962
+ // Original ASR request as JSON string (includes prefixMode, prefixId, etc.)
3963
+ asrRequest: z.string().optional(),
3964
+ // Provider-specific config as JSON string (raw config sent to provider API, e.g. Deepgram's punctuate/endpointing)
3965
+ providerConfig: z.string().optional()
3966
+ // Stringified JSON of provider-specific config
3967
+ });
3933
3968
  var AudioMetricsResultSchemaV1 = z.object({
3934
3969
  type: z.literal(RecognitionResultTypeV1.AUDIO_METRICS),
3935
3970
  valid: z.boolean(),
@@ -3953,7 +3988,8 @@ var RecognitionResultSchemaV1 = z.discriminatedUnion("type", [
3953
3988
  ErrorResultSchemaV1,
3954
3989
  // P1 - P2
3955
3990
  FunctionCallResultSchemaV1,
3956
- ClientControlMessageSchemaV1
3991
+ ClientControlMessageSchemaV1,
3992
+ SessionConfiguredSchemaV1
3957
3993
  ]);
3958
3994
 
3959
3995
  // ../../libs/types/dist/provider-transcription.types.js
@@ -4419,8 +4455,9 @@ var RecognitionGameInfoSchema = z.object({
4419
4455
  questionAskedId: z.string().optional(),
4420
4456
  /** @deprecated Use questionAskedId instead. Kept for backward compatibility during migration. */
4421
4457
  questionAnswerId: z.string().optional(),
4422
- platform: z.string().optional()
4423
- // Platform for audio recording device (use the definition of platform teams)
4458
+ platform: z.string().optional(),
4459
+ experimentCohort: z.enum(["treatment", "control"]).optional()
4460
+ // Experiment cohort, defaults to 'control' if not provided
4424
4461
  });
4425
4462
  var RecognitionQueryMetadataSchema = z.object({
4426
4463
  audioUtteranceId: z.string(),
@@ -5092,6 +5129,9 @@ function buildWebSocketUrl(config) {
5092
5129
  if (config.gameContext?.gamePhase) {
5093
5130
  url.searchParams.set("gamePhase", config.gameContext.gamePhase);
5094
5131
  }
5132
+ if (config.experimentCohort) {
5133
+ url.searchParams.set("experimentCohort", config.experimentCohort);
5134
+ }
5095
5135
  return url.toString();
5096
5136
  }
5097
5137
 
@@ -5249,7 +5289,7 @@ var MessageHandler = class {
5249
5289
  }
5250
5290
  if (msg.data && typeof msg.data !== "object") {
5251
5291
  if (this.callbacks.logger) {
5252
- this.callbacks.logger("error", "[RecogSDK] Received primitive msg.data from server", {
5292
+ this.callbacks.logger("warn", "[RecogSDK] Received primitive msg.data from server", {
5253
5293
  dataType: typeof msg.data,
5254
5294
  data: msg.data,
5255
5295
  fullMessage: msg
@@ -5274,6 +5314,9 @@ var MessageHandler = class {
5274
5314
  case RecognitionResultTypeV1.CLIENT_CONTROL_MESSAGE:
5275
5315
  this.callbacks.onControlMessage(msgData);
5276
5316
  break;
5317
+ case RecognitionResultTypeV1.SESSION_CONFIGURED:
5318
+ this.callbacks.onSessionConfigured?.(msgData);
5319
+ break;
5277
5320
  default:
5278
5321
  if (this.callbacks.logger) {
5279
5322
  this.callbacks.logger("debug", "[RecogSDK] Unknown message type", { type: msgType });
@@ -5385,7 +5428,8 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5385
5428
  ...config.questionAnswerId && { questionAnswerId: config.questionAnswerId },
5386
5429
  ...config.platform && { platform: config.platform },
5387
5430
  ...config.gameContext && { gameContext: config.gameContext },
5388
- ...config.gameId && { gameId: config.gameId }
5431
+ ...config.gameId && { gameId: config.gameId },
5432
+ ...config.experimentCohort && { experimentCohort: config.experimentCohort }
5389
5433
  });
5390
5434
  super({
5391
5435
  url,
@@ -5418,6 +5462,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5418
5462
  }),
5419
5463
  onMetadata: config.onMetadata || (() => {
5420
5464
  }),
5465
+ onSessionConfigured: config.onSessionConfigured,
5421
5466
  onError: config.onError || (() => {
5422
5467
  }),
5423
5468
  onConnected: config.onConnected || (() => {
@@ -5445,6 +5490,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5445
5490
  onMetadata: this.config.onMetadata,
5446
5491
  onError: this.config.onError,
5447
5492
  onControlMessage: this.handleControlMessage.bind(this),
5493
+ onSessionConfigured: this.config.onSessionConfigured,
5448
5494
  ...this.config.logger && { logger: this.config.logger }
5449
5495
  });
5450
5496
  }
@@ -5597,7 +5643,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5597
5643
  blobToArrayBuffer(audioData).then((arrayBuffer) => {
5598
5644
  this.sendAudioInternal(arrayBuffer);
5599
5645
  }).catch((error) => {
5600
- this.log("error", "Failed to convert Blob to ArrayBuffer", error);
5646
+ this.log("warn", "Failed to convert Blob to ArrayBuffer", error);
5601
5647
  });
5602
5648
  return;
5603
5649
  }
@@ -5637,7 +5683,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5637
5683
  */
5638
5684
  async stopRecording() {
5639
5685
  if (this.state !== "ready" /* READY */) {
5640
- this.log("warn", "stopRecording called but not in READY state", { state: this.state });
5686
+ this.log("info", "stopRecording called but not in READY state", { state: this.state });
5641
5687
  return;
5642
5688
  }
5643
5689
  this.log("debug", "Stopping recording");
@@ -5807,7 +5853,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5807
5853
  if (this.state === "stopping" /* STOPPING */) {
5808
5854
  this.state = "stopped" /* STOPPED */;
5809
5855
  } else if (this.state === "connected" /* CONNECTED */ || this.state === "ready" /* READY */ || this.state === "connecting" /* CONNECTING */) {
5810
- this.log("error", "[DIAGNOSTIC] Unexpected disconnection", {
5856
+ this.log("warn", "[DIAGNOSTIC] Unexpected disconnection", {
5811
5857
  code,
5812
5858
  codeDescription: closeCodeDescription,
5813
5859
  reason: reason || "(empty)",
@@ -5929,7 +5975,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5929
5975
  blobToArrayBuffer(audioData).then((arrayBuffer) => {
5930
5976
  this.sendPrefixAudioInternal(arrayBuffer);
5931
5977
  }).catch((error) => {
5932
- this.log("error", "Failed to convert Blob to ArrayBuffer for prefix audio", error);
5978
+ this.log("warn", "Failed to convert Blob to ArrayBuffer for prefix audio", error);
5933
5979
  });
5934
5980
  return;
5935
5981
  }
@@ -6088,6 +6134,13 @@ var ConfigBuilder = class {
6088
6134
  this.config.platform = platform;
6089
6135
  return this;
6090
6136
  }
6137
+ /**
6138
+ * Set experiment cohort (optional, defaults to 'control')
6139
+ */
6140
+ experimentCohort(cohort) {
6141
+ this.config.experimentCohort = cohort;
6142
+ return this;
6143
+ }
6091
6144
  /**
6092
6145
  * Set transcript callback
6093
6146
  */
@@ -6102,6 +6155,13 @@ var ConfigBuilder = class {
6102
6155
  this.config.onMetadata = callback;
6103
6156
  return this;
6104
6157
  }
6158
+ /**
6159
+ * Set session configured callback (optional)
6160
+ */
6161
+ onSessionConfigured(callback) {
6162
+ this.config.onSessionConfigured = callback;
6163
+ return this;
6164
+ }
6105
6165
  /**
6106
6166
  * Set error callback
6107
6167
  */
@@ -6591,6 +6651,7 @@ function createSimplifiedVGFClient(config) {
6591
6651
  }
6592
6652
  export {
6593
6653
  AudioEncoding,
6654
+ CartesiaModel,
6594
6655
  ClientControlActionV1,
6595
6656
  ClientState,
6596
6657
  ConfigBuilder,
@@ -6604,6 +6665,7 @@ export {
6604
6665
  FinalTranscriptStability,
6605
6666
  FireworksModel,
6606
6667
  GeminiModel,
6668
+ GladiaModel,
6607
6669
  GoogleModel,
6608
6670
  Language,
6609
6671
  MistralVoxtralModel,
@@ -6620,6 +6682,7 @@ export {
6620
6682
  RecordingStatus,
6621
6683
  STAGES,
6622
6684
  SampleRate,
6685
+ SelfServeVllmModel,
6623
6686
  SimplifiedVGFRecognitionClient,
6624
6687
  TimeoutError,
6625
6688
  TranscriptionStatus,