@volley/recognition-client-sdk 0.1.767 → 0.1.782

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3749,6 +3749,7 @@ var RecognitionProvider;
3749
3749
  RecognitionProvider2["BEDROCK"] = "bedrock";
3750
3750
  RecognitionProvider2["INWORLD_STT"] = "inworld-stt";
3751
3751
  RecognitionProvider2["AWS_TRANSCRIBE"] = "aws-transcribe";
3752
+ RecognitionProvider2["AMAZON_NOVA_SONIC"] = "amazon-nova-sonic";
3752
3753
  RecognitionProvider2["TEST_ASR_PROVIDER_QUOTA"] = "test-asr-provider-quota";
3753
3754
  RecognitionProvider2["TEST_ASR_STREAMING"] = "test-asr-streaming";
3754
3755
  })(RecognitionProvider || (RecognitionProvider = {}));
@@ -3799,6 +3800,7 @@ var ElevenLabsModel;
3799
3800
  })(ElevenLabsModel || (ElevenLabsModel = {}));
3800
3801
  var OpenAIRealtimeModel;
3801
3802
  (function(OpenAIRealtimeModel2) {
3803
+ OpenAIRealtimeModel2["GPT_REALTIME_WHISPER"] = "gpt-realtime-whisper";
3802
3804
  OpenAIRealtimeModel2["GPT_4O_TRANSCRIBE"] = "gpt-4o-transcribe";
3803
3805
  OpenAIRealtimeModel2["GPT_4O_MINI_TRANSCRIBE"] = "gpt-4o-mini-transcribe";
3804
3806
  })(OpenAIRealtimeModel || (OpenAIRealtimeModel = {}));
@@ -3829,6 +3831,11 @@ var AwsTranscribeModel;
3829
3831
  (function(AwsTranscribeModel2) {
3830
3832
  AwsTranscribeModel2["DEFAULT"] = "default";
3831
3833
  })(AwsTranscribeModel || (AwsTranscribeModel = {}));
3834
+ var AmazonNovaSonicModel;
3835
+ (function(AmazonNovaSonicModel2) {
3836
+ AmazonNovaSonicModel2["AMAZON_NOVA_SONIC_V1"] = "amazon.nova-sonic-v1:0";
3837
+ AmazonNovaSonicModel2["AMAZON_NOVA_2_SONIC"] = "amazon.nova-2-sonic-v1:0";
3838
+ })(AmazonNovaSonicModel || (AmazonNovaSonicModel = {}));
3832
3839
  var SelfServeVllmModel;
3833
3840
  (function(SelfServeVllmModel2) {
3834
3841
  SelfServeVllmModel2["QWEN3_ASR_1_7B"] = "qwen3-asr-1.7b";
@@ -3845,6 +3852,18 @@ var RecognitionResultTypeV1;
3845
3852
  RecognitionResultTypeV12["AUDIO_METRICS"] = "AudioMetrics";
3846
3853
  RecognitionResultTypeV12["SESSION_CONFIGURED"] = "SessionConfigured";
3847
3854
  })(RecognitionResultTypeV1 || (RecognitionResultTypeV1 = {}));
3855
+ var DetectionTypeV1;
3856
+ (function(DetectionTypeV12) {
3857
+ DetectionTypeV12["SEARCH"] = "search";
3858
+ })(DetectionTypeV1 || (DetectionTypeV1 = {}));
3859
+ var DetectionV1Schema = z.object({
3860
+ type: z.nativeEnum(DetectionTypeV1),
3861
+ query: z.string(),
3862
+ score: z.number().min(0).max(1),
3863
+ startMs: z.number().optional(),
3864
+ endMs: z.number().optional()
3865
+ // Audio time (ms from stream start) where the hit ends
3866
+ });
3848
3867
  var TranscriptionResultSchemaV1 = z.object({
3849
3868
  type: z.literal(RecognitionResultTypeV1.TRANSCRIPTION),
3850
3869
  audioUtteranceId: z.string(),
@@ -3863,8 +3882,9 @@ var TranscriptionResultSchemaV1 = z.object({
3863
3882
  endTimestamp: z.number().optional(),
3864
3883
  receivedAtMs: z.number().optional(),
3865
3884
  accumulatedAudioTimeMs: z.number().optional(),
3866
- rawAudioTimeMs: z.number().optional()
3867
- // Total audio duration sent to provider (includes prefix)
3885
+ rawAudioTimeMs: z.number().optional(),
3886
+ detections: z.array(DetectionV1Schema).optional()
3887
+ // Provider-reported phrase detections (query + score, optionally startMs/endMs). Always populated when the provider returns hits, regardless of `appendSearch`. Other providers leave this undefined.
3868
3888
  });
3869
3889
  var FunctionCallResultSchemaV1 = z.object({
3870
3890
  type: z.literal(RecognitionResultTypeV1.FUNCTION_CALL),
@@ -4117,7 +4137,15 @@ var TranscriptMessageSchema = z.object({
4117
4137
  * @example true
4118
4138
  * @default false
4119
4139
  */
4120
- is_fallback: z.boolean().optional()
4140
+ is_fallback: z.boolean().optional(),
4141
+ /**
4142
+ * Provider-reported phrase detections (query + score, optionally
4143
+ * startMs/endMs). Always populated when the provider returns hits,
4144
+ * regardless of `appendSearch` or scene gating. Other providers leave
4145
+ * this undefined.
4146
+ * @example [{ query: 'justin bieber one time', score: 0.78, startMs: 1200, endMs: 2800 }]
4147
+ */
4148
+ detections: z.array(DetectionV1Schema).optional()
4121
4149
  });
4122
4150
  var VADEndSignalSchema = z.object({
4123
4151
  type: z.literal(ProviderMessageType.VAD_END_SIGNAL),
@@ -4434,6 +4462,9 @@ var ASRRequestSchemaV1 = z.object({
4434
4462
  // Streaming audio metrics opt-in: when > 0, server emits AudioMetrics results throttled to this interval (ms).
4435
4463
  // Undefined / 0 disables streaming audio metrics (final metrics still embedded in Metadata).
4436
4464
  audioMetricsIntervalMs: z.number().optional(),
4465
+ // Opt-in: round-trip Deepgram `search` phrase hits into the transcript.
4466
+ // Active only when (model = deepgram nova-2) AND (GameContext.gamePhase = 'Solve Puzzle'). See ASRRequestConfig.appendSearch in asr-config.types.ts for full semantics.
4467
+ appendSearch: z.boolean().optional(),
4437
4468
  // Debug options (FOR DEBUG/TESTING ONLY - not for production use)
4438
4469
  debugCommand: RequestDebugCommandSchema
4439
4470
  });
@@ -5759,6 +5790,12 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5759
5790
  // Streaming audio metrics opt-in (ms interval). Server only forwards metrics if > 0.
5760
5791
  ...this.config.asrRequestConfig.audioMetricsIntervalMs !== void 0 && {
5761
5792
  audioMetricsIntervalMs: this.config.asrRequestConfig.audioMetricsIntervalMs
5793
+ },
5794
+ // Opt-in: round-trip Deepgram nova-2 search-phrase hits into the
5795
+ // transcript. Only fires server-side when (model = nova-2) AND
5796
+ // (GameContext.gamePhase = 'Solve Puzzle'). See ASRRequestConfig.appendSearch.
5797
+ ...this.config.asrRequestConfig.appendSearch !== void 0 && {
5798
+ appendSearch: this.config.asrRequestConfig.appendSearch
5762
5799
  }
5763
5800
  };
5764
5801
  super.sendMessage(