@volley/recognition-client-sdk 0.1.707 → 0.1.782

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3747,6 +3747,9 @@ var RecognitionProvider;
3747
3747
  RecognitionProvider2["CARTESIA"] = "cartesia";
3748
3748
  RecognitionProvider2["DASHSCOPE"] = "dashscope";
3749
3749
  RecognitionProvider2["BEDROCK"] = "bedrock";
3750
+ RecognitionProvider2["INWORLD_STT"] = "inworld-stt";
3751
+ RecognitionProvider2["AWS_TRANSCRIBE"] = "aws-transcribe";
3752
+ RecognitionProvider2["AMAZON_NOVA_SONIC"] = "amazon-nova-sonic";
3750
3753
  RecognitionProvider2["TEST_ASR_PROVIDER_QUOTA"] = "test-asr-provider-quota";
3751
3754
  RecognitionProvider2["TEST_ASR_STREAMING"] = "test-asr-streaming";
3752
3755
  })(RecognitionProvider || (RecognitionProvider = {}));
@@ -3797,6 +3800,7 @@ var ElevenLabsModel;
3797
3800
  })(ElevenLabsModel || (ElevenLabsModel = {}));
3798
3801
  var OpenAIRealtimeModel;
3799
3802
  (function(OpenAIRealtimeModel2) {
3803
+ OpenAIRealtimeModel2["GPT_REALTIME_WHISPER"] = "gpt-realtime-whisper";
3800
3804
  OpenAIRealtimeModel2["GPT_4O_TRANSCRIBE"] = "gpt-4o-transcribe";
3801
3805
  OpenAIRealtimeModel2["GPT_4O_MINI_TRANSCRIBE"] = "gpt-4o-mini-transcribe";
3802
3806
  })(OpenAIRealtimeModel || (OpenAIRealtimeModel = {}));
@@ -3819,6 +3823,19 @@ var BedrockModel;
3819
3823
  BedrockModel2["VOXTRAL_MINI_3B_2507"] = "mistral.voxtral-mini-3b-2507";
3820
3824
  BedrockModel2["VOXTRAL_SMALL_24B_2507"] = "mistral.voxtral-small-24b-2507";
3821
3825
  })(BedrockModel || (BedrockModel = {}));
3826
+ var InworldSttModel;
3827
+ (function(InworldSttModel2) {
3828
+ InworldSttModel2["INWORLD_STT_1"] = "inworld/inworld-stt-1";
3829
+ })(InworldSttModel || (InworldSttModel = {}));
3830
+ var AwsTranscribeModel;
3831
+ (function(AwsTranscribeModel2) {
3832
+ AwsTranscribeModel2["DEFAULT"] = "default";
3833
+ })(AwsTranscribeModel || (AwsTranscribeModel = {}));
3834
+ var AmazonNovaSonicModel;
3835
+ (function(AmazonNovaSonicModel2) {
3836
+ AmazonNovaSonicModel2["AMAZON_NOVA_SONIC_V1"] = "amazon.nova-sonic-v1:0";
3837
+ AmazonNovaSonicModel2["AMAZON_NOVA_2_SONIC"] = "amazon.nova-2-sonic-v1:0";
3838
+ })(AmazonNovaSonicModel || (AmazonNovaSonicModel = {}));
3822
3839
  var SelfServeVllmModel;
3823
3840
  (function(SelfServeVllmModel2) {
3824
3841
  SelfServeVllmModel2["QWEN3_ASR_1_7B"] = "qwen3-asr-1.7b";
@@ -3835,6 +3852,18 @@ var RecognitionResultTypeV1;
3835
3852
  RecognitionResultTypeV12["AUDIO_METRICS"] = "AudioMetrics";
3836
3853
  RecognitionResultTypeV12["SESSION_CONFIGURED"] = "SessionConfigured";
3837
3854
  })(RecognitionResultTypeV1 || (RecognitionResultTypeV1 = {}));
3855
+ var DetectionTypeV1;
3856
+ (function(DetectionTypeV12) {
3857
+ DetectionTypeV12["SEARCH"] = "search";
3858
+ })(DetectionTypeV1 || (DetectionTypeV1 = {}));
3859
+ var DetectionV1Schema = z.object({
3860
+ type: z.nativeEnum(DetectionTypeV1),
3861
+ query: z.string(),
3862
+ score: z.number().min(0).max(1),
3863
+ startMs: z.number().optional(),
3864
+ endMs: z.number().optional()
3865
+ // Audio time (ms from stream start) where the hit ends
3866
+ });
3838
3867
  var TranscriptionResultSchemaV1 = z.object({
3839
3868
  type: z.literal(RecognitionResultTypeV1.TRANSCRIPTION),
3840
3869
  audioUtteranceId: z.string(),
@@ -3853,8 +3882,9 @@ var TranscriptionResultSchemaV1 = z.object({
3853
3882
  endTimestamp: z.number().optional(),
3854
3883
  receivedAtMs: z.number().optional(),
3855
3884
  accumulatedAudioTimeMs: z.number().optional(),
3856
- rawAudioTimeMs: z.number().optional()
3857
- // Total audio duration sent to provider (includes prefix)
3885
+ rawAudioTimeMs: z.number().optional(),
3886
+ detections: z.array(DetectionV1Schema).optional()
3887
+ // Provider-reported phrase detections (query + score, optionally startMs/endMs). Always populated when the provider returns hits, regardless of `appendSearch`. Other providers leave this undefined.
3858
3888
  });
3859
3889
  var FunctionCallResultSchemaV1 = z.object({
3860
3890
  type: z.literal(RecognitionResultTypeV1.FUNCTION_CALL),
@@ -3944,9 +3974,9 @@ var ErrorResultSchemaV1 = z.object({
3944
3974
  // Detailed description
3945
3975
  });
3946
3976
  var ClientControlActionV1;
3947
- (function(ClientControlActionV13) {
3948
- ClientControlActionV13["READY_FOR_UPLOADING_RECORDING"] = "ready_for_uploading_recording";
3949
- ClientControlActionV13["STOP_RECORDING"] = "stop_recording";
3977
+ (function(ClientControlActionV12) {
3978
+ ClientControlActionV12["READY_FOR_UPLOADING_RECORDING"] = "ready_for_uploading_recording";
3979
+ ClientControlActionV12["STOP_RECORDING"] = "stop_recording";
3950
3980
  })(ClientControlActionV1 || (ClientControlActionV1 = {}));
3951
3981
  var ClientControlActionsV1 = z.nativeEnum(ClientControlActionV1);
3952
3982
  var ClientControlMessageSchemaV1 = z.object({
@@ -3979,6 +4009,8 @@ var AudioMetricsResultSchemaV1 = z.object({
3979
4009
  maxVolume: z.number(),
3980
4010
  minVolume: z.number(),
3981
4011
  avgVolume: z.number(),
4012
+ peakVolumeDb: z.number().nullable(),
4013
+ avgVolumeDb: z.number().nullable(),
3982
4014
  silenceRatio: z.number(),
3983
4015
  clippingRatio: z.number(),
3984
4016
  snrEstimate: z.number().nullable(),
@@ -3995,7 +4027,8 @@ var RecognitionResultSchemaV1 = z.discriminatedUnion("type", [
3995
4027
  // P1 - P2
3996
4028
  FunctionCallResultSchemaV1,
3997
4029
  ClientControlMessageSchemaV1,
3998
- SessionConfiguredSchemaV1
4030
+ SessionConfiguredSchemaV1,
4031
+ AudioMetricsResultSchemaV1
3999
4032
  ]);
4000
4033
 
4001
4034
  // ../../libs/types/dist/provider-transcription.types.js
@@ -4104,7 +4137,15 @@ var TranscriptMessageSchema = z.object({
4104
4137
  * @example true
4105
4138
  * @default false
4106
4139
  */
4107
- is_fallback: z.boolean().optional()
4140
+ is_fallback: z.boolean().optional(),
4141
+ /**
4142
+ * Provider-reported phrase detections (query + score, optionally
4143
+ * startMs/endMs). Always populated when the provider returns hits,
4144
+ * regardless of `appendSearch` or scene gating. Other providers leave
4145
+ * this undefined.
4146
+ * @example [{ query: 'justin bieber one time', score: 0.78, startMs: 1200, endMs: 2800 }]
4147
+ */
4148
+ detections: z.array(DetectionV1Schema).optional()
4108
4149
  });
4109
4150
  var VADEndSignalSchema = z.object({
4110
4151
  type: z.literal(ProviderMessageType.VAD_END_SIGNAL),
@@ -4418,6 +4459,12 @@ var ASRRequestSchemaV1 = z.object({
4418
4459
  prefixMode: z.nativeEnum(PrefixMode).optional().default(PrefixMode.NONE),
4419
4460
  prefixId: z.string().optional(),
4420
4461
  prefixTextToRemove: z.array(z.string()).optional(),
4462
+ // Streaming audio metrics opt-in: when > 0, server emits AudioMetrics results throttled to this interval (ms).
4463
+ // Undefined / 0 disables streaming audio metrics (final metrics still embedded in Metadata).
4464
+ audioMetricsIntervalMs: z.number().optional(),
4465
+ // Opt-in: round-trip Deepgram `search` phrase hits into the transcript.
4466
+ // Active only when (model = deepgram nova-2) AND (GameContext.gamePhase = 'Solve Puzzle'). See ASRRequestConfig.appendSearch in asr-config.types.ts for full semantics.
4467
+ appendSearch: z.boolean().optional(),
4421
4468
  // Debug options (FOR DEBUG/TESTING ONLY - not for production use)
4422
4469
  debugCommand: RequestDebugCommandSchema
4423
4470
  });
@@ -5221,6 +5268,7 @@ var MessageHandler = class {
5221
5268
  /**
5222
5269
  * Handle incoming WebSocket message
5223
5270
  */
5271
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
5224
5272
  handleMessage(msg) {
5225
5273
  if (this.callbacks.logger) {
5226
5274
  this.callbacks.logger("debug", "[RecogSDK] Received WebSocket message", {
@@ -5259,6 +5307,9 @@ var MessageHandler = class {
5259
5307
  case RecognitionResultTypeV1.SESSION_CONFIGURED:
5260
5308
  this.callbacks.onSessionConfigured?.(msgData);
5261
5309
  break;
5310
+ case RecognitionResultTypeV1.AUDIO_METRICS:
5311
+ this.callbacks.onAudioMetrics?.(msgData);
5312
+ break;
5262
5313
  default:
5263
5314
  if (this.callbacks.logger) {
5264
5315
  this.callbacks.logger("debug", "[RecogSDK] Unknown message type", { type: msgType });
@@ -5380,6 +5431,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5380
5431
  onMetadata: config.onMetadata || (() => {
5381
5432
  }),
5382
5433
  onSessionConfigured: config.onSessionConfigured,
5434
+ onAudioMetrics: config.onAudioMetrics,
5383
5435
  onError: config.onError || (() => {
5384
5436
  }),
5385
5437
  onConnected: config.onConnected || (() => {
@@ -5408,6 +5460,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5408
5460
  onError: this.config.onError,
5409
5461
  onControlMessage: this.handleControlMessage.bind(this),
5410
5462
  onSessionConfigured: this.config.onSessionConfigured,
5463
+ onAudioMetrics: this.config.onAudioMetrics,
5411
5464
  ...this.config.logger && { logger: this.config.logger }
5412
5465
  });
5413
5466
  }
@@ -5733,6 +5786,16 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5733
5786
  // Include prefix text to remove if provided (for server-side prefix text removal)
5734
5787
  ...this.config.asrRequestConfig.prefixTextToRemove && {
5735
5788
  prefixTextToRemove: this.config.asrRequestConfig.prefixTextToRemove
5789
+ },
5790
+ // Streaming audio metrics opt-in (ms interval). Server only forwards metrics if > 0.
5791
+ ...this.config.asrRequestConfig.audioMetricsIntervalMs !== void 0 && {
5792
+ audioMetricsIntervalMs: this.config.asrRequestConfig.audioMetricsIntervalMs
5793
+ },
5794
+ // Opt-in: round-trip Deepgram nova-2 search-phrase hits into the
5795
+ // transcript. Only fires server-side when (model = nova-2) AND
5796
+ // (GameContext.gamePhase = 'Solve Puzzle'). See ASRRequestConfig.appendSearch.
5797
+ ...this.config.asrRequestConfig.appendSearch !== void 0 && {
5798
+ appendSearch: this.config.asrRequestConfig.appendSearch
5736
5799
  }
5737
5800
  };
5738
5801
  super.sendMessage(