@volley/recognition-client-sdk 0.1.767 → 0.1.782

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,6 +24,7 @@ declare enum RecognitionProvider {
24
24
  BEDROCK = "bedrock",
25
25
  INWORLD_STT = "inworld-stt",
26
26
  AWS_TRANSCRIBE = "aws-transcribe",
27
+ AMAZON_NOVA_SONIC = "amazon-nova-sonic",
27
28
  TEST_ASR_PROVIDER_QUOTA = "test-asr-provider-quota",
28
29
  TEST_ASR_STREAMING = "test-asr-streaming"
29
30
  }
@@ -96,6 +97,7 @@ declare enum ElevenLabsModel {
96
97
  * @see https://platform.openai.com/docs/models/gpt-4o-transcribe
97
98
  */
98
99
  declare enum OpenAIRealtimeModel {
100
+ GPT_REALTIME_WHISPER = "gpt-realtime-whisper",
99
101
  GPT_4O_TRANSCRIBE = "gpt-4o-transcribe",
100
102
  GPT_4O_MINI_TRANSCRIBE = "gpt-4o-mini-transcribe"
101
103
  }
@@ -146,6 +148,15 @@ declare enum InworldSttModel {
146
148
  declare enum AwsTranscribeModel {
147
149
  DEFAULT = "default"
148
150
  }
151
+ /**
152
+ * Amazon Nova Sonic bidirectional streaming model (Bedrock).
153
+ * Speech-to-speech model; we consume the USER FINAL transcript and discard the assistant text/audio output.
154
+ * @see https://docs.aws.amazon.com/nova/latest/userguide/speech-bidirection.html
155
+ */
156
+ declare enum AmazonNovaSonicModel {
157
+ AMAZON_NOVA_SONIC_V1 = "amazon.nova-sonic-v1:0",
158
+ AMAZON_NOVA_2_SONIC = "amazon.nova-2-sonic-v1:0"
159
+ }
149
160
  /**
150
161
  * Self-serve vLLM batch transcription models
151
162
  * Backed by recognition-inference / RunPod `/transcribe`
@@ -156,125 +167,7 @@ declare enum SelfServeVllmModel {
156
167
  /**
157
168
  * Type alias for any model from any provider
158
169
  */
159
- type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | GladiaModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | CartesiaModel | DashScopeModel | InworldSttModel | SelfServeVllmModel | BedrockModel | AwsTranscribeModel | string;
160
-
161
- /**
162
- * Audio encoding types
163
- */
164
- declare enum AudioEncoding {
165
- ENCODING_UNSPECIFIED = 0,
166
- LINEAR16 = 1,
167
- OGG_OPUS = 2,
168
- FLAC = 3,
169
- MULAW = 4,
170
- ALAW = 5
171
- }
172
- declare namespace AudioEncoding {
173
- /**
174
- * Convert numeric ID to AudioEncoding enum
175
- * @param id - Numeric encoding identifier (0-5)
176
- * @returns AudioEncoding enum value or undefined if invalid
177
- */
178
- function fromId(id: number): AudioEncoding | undefined;
179
- /**
180
- * Convert string name to AudioEncoding enum
181
- * @param nameStr - String name like "linear16", "LINEAR16", "ogg_opus", "OGG_OPUS", etc. (case insensitive)
182
- * @returns AudioEncoding enum value or undefined if invalid
183
- */
184
- function fromName(nameStr: string): AudioEncoding | undefined;
185
- /**
186
- * Convert AudioEncoding enum to numeric ID
187
- * @param encoding - AudioEncoding enum value
188
- * @returns Numeric ID (0-5)
189
- */
190
- function toId(encoding: AudioEncoding): number;
191
- /**
192
- * Convert AudioEncoding enum to string name
193
- * @param encoding - AudioEncoding enum value
194
- * @returns String name like "LINEAR16", "MULAW", etc.
195
- */
196
- function toName(encoding: AudioEncoding): string;
197
- /**
198
- * Check if a numeric ID is a valid encoding
199
- * @param id - Numeric identifier to validate
200
- * @returns true if valid encoding ID
201
- */
202
- function isIdValid(id: number): boolean;
203
- /**
204
- * Check if a string name is a valid encoding
205
- * @param nameStr - String name to validate
206
- * @returns true if valid encoding name
207
- */
208
- function isNameValid(nameStr: string): boolean;
209
- }
210
- /**
211
- * Common sample rates (in Hz)
212
- */
213
- declare enum SampleRate {
214
- RATE_8000 = 8000,
215
- RATE_16000 = 16000,
216
- RATE_22050 = 22050,
217
- RATE_24000 = 24000,
218
- RATE_32000 = 32000,
219
- RATE_44100 = 44100,
220
- RATE_48000 = 48000
221
- }
222
- declare namespace SampleRate {
223
- /**
224
- * Convert Hz value to SampleRate enum
225
- * @param hz - Sample rate in Hz (8000, 16000, etc.)
226
- * @returns SampleRate enum value or undefined if invalid
227
- */
228
- function fromHz(hz: number): SampleRate | undefined;
229
- /**
230
- * Convert string name to SampleRate enum
231
- * @param nameStr - String name like "rate_8000", "RATE_16000", etc. (case insensitive)
232
- * @returns SampleRate enum value or undefined if invalid
233
- */
234
- function fromName(nameStr: string): SampleRate | undefined;
235
- /**
236
- * Convert SampleRate enum to Hz value
237
- * @param rate - SampleRate enum value
238
- * @returns Hz value (8000, 16000, etc.)
239
- */
240
- function toHz(rate: SampleRate): number;
241
- /**
242
- * Convert SampleRate enum to string name
243
- * @param rate - SampleRate enum value
244
- * @returns String name like "RATE_8000", "RATE_16000", etc.
245
- */
246
- function toName(rate: SampleRate): string;
247
- /**
248
- * Check if a numeric Hz value is a valid sample rate
249
- * @param hz - Hz value to validate
250
- * @returns true if valid sample rate
251
- */
252
- function isHzValid(hz: number): boolean;
253
- /**
254
- * Check if a string name is a valid sample rate
255
- * @param nameStr - String name to validate
256
- * @returns true if valid sample rate name
257
- */
258
- function isNameValid(nameStr: string): boolean;
259
- }
260
- /**
261
- * Supported languages for recognition
262
- * Using BCP-47 language tags
263
- */
264
- declare enum Language {
265
- ENGLISH_US = "en-US",
266
- ENGLISH_GB = "en-GB",
267
- SPANISH_ES = "es-ES",
268
- SPANISH_MX = "es-MX",
269
- FRENCH_FR = "fr-FR",
270
- GERMAN_DE = "de-DE",
271
- ITALIAN_IT = "it-IT",
272
- PORTUGUESE_BR = "pt-BR",
273
- JAPANESE_JP = "ja-JP",
274
- KOREAN_KR = "ko-KR",
275
- CHINESE_CN = "zh-CN",
276
- CHINESE_TW = "zh-TW"
277
- }
170
+ type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | GladiaModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | CartesiaModel | DashScopeModel | InworldSttModel | SelfServeVllmModel | BedrockModel | AwsTranscribeModel | AmazonNovaSonicModel | string;
278
171
 
279
172
  /**
280
173
  * Recognition Result Types V1
@@ -294,6 +187,16 @@ declare enum RecognitionResultTypeV1 {
294
187
  AUDIO_METRICS = "AudioMetrics",
295
188
  SESSION_CONFIGURED = "SessionConfigured"
296
189
  }
190
+ /**
191
+ * Source of a phrase detection — what kind of provider feature produced
192
+ * the hit. Currently only Deepgram's `search` parameter is wired up, so
193
+ * this enum has one value. New entries (e.g. KEYWORDS, KEYTERMS,
194
+ * SPEECH_CONTEXTS) get added when other providers join.
195
+ */
196
+ declare enum DetectionTypeV1 {
197
+ /** Deepgram phonetic phrase match via the `search=…` request parameter */
198
+ SEARCH = "search"
199
+ }
297
200
  /**
298
201
  * Transcription result V1 - contains transcript message
299
202
  * In the long run game side should not need to know it. In the short run it is send back to client.
@@ -318,6 +221,25 @@ declare const TranscriptionResultSchemaV1: z.ZodObject<{
318
221
  receivedAtMs: z.ZodOptional<z.ZodNumber>;
319
222
  accumulatedAudioTimeMs: z.ZodOptional<z.ZodNumber>;
320
223
  rawAudioTimeMs: z.ZodOptional<z.ZodNumber>;
224
+ detections: z.ZodOptional<z.ZodArray<z.ZodObject<{
225
+ type: z.ZodNativeEnum<typeof DetectionTypeV1>;
226
+ query: z.ZodString;
227
+ score: z.ZodNumber;
228
+ startMs: z.ZodOptional<z.ZodNumber>;
229
+ endMs: z.ZodOptional<z.ZodNumber>;
230
+ }, "strip", z.ZodTypeAny, {
231
+ type: DetectionTypeV1;
232
+ query: string;
233
+ score: number;
234
+ startMs?: number | undefined;
235
+ endMs?: number | undefined;
236
+ }, {
237
+ type: DetectionTypeV1;
238
+ query: string;
239
+ score: number;
240
+ startMs?: number | undefined;
241
+ endMs?: number | undefined;
242
+ }>, "many">>;
321
243
  }, "strip", z.ZodTypeAny, {
322
244
  type: RecognitionResultTypeV1.TRANSCRIPTION;
323
245
  audioUtteranceId: string;
@@ -337,6 +259,13 @@ declare const TranscriptionResultSchemaV1: z.ZodObject<{
337
259
  receivedAtMs?: number | undefined;
338
260
  accumulatedAudioTimeMs?: number | undefined;
339
261
  rawAudioTimeMs?: number | undefined;
262
+ detections?: {
263
+ type: DetectionTypeV1;
264
+ query: string;
265
+ score: number;
266
+ startMs?: number | undefined;
267
+ endMs?: number | undefined;
268
+ }[] | undefined;
340
269
  }, {
341
270
  type: RecognitionResultTypeV1.TRANSCRIPTION;
342
271
  audioUtteranceId: string;
@@ -356,6 +285,13 @@ declare const TranscriptionResultSchemaV1: z.ZodObject<{
356
285
  receivedAtMs?: number | undefined;
357
286
  accumulatedAudioTimeMs?: number | undefined;
358
287
  rawAudioTimeMs?: number | undefined;
288
+ detections?: {
289
+ type: DetectionTypeV1;
290
+ query: string;
291
+ score: number;
292
+ startMs?: number | undefined;
293
+ endMs?: number | undefined;
294
+ }[] | undefined;
359
295
  }>;
360
296
  type TranscriptionResultV1 = z.infer<typeof TranscriptionResultSchemaV1>;
361
297
  /**
@@ -676,6 +612,124 @@ declare const AudioMetricsResultSchemaV1: z.ZodObject<{
676
612
  }>;
677
613
  type AudioMetricsResultV1 = z.infer<typeof AudioMetricsResultSchemaV1>;
678
614
 
615
+ /**
616
+ * Audio encoding types
617
+ */
618
+ declare enum AudioEncoding {
619
+ ENCODING_UNSPECIFIED = 0,
620
+ LINEAR16 = 1,
621
+ OGG_OPUS = 2,
622
+ FLAC = 3,
623
+ MULAW = 4,
624
+ ALAW = 5
625
+ }
626
+ declare namespace AudioEncoding {
627
+ /**
628
+ * Convert numeric ID to AudioEncoding enum
629
+ * @param id - Numeric encoding identifier (0-5)
630
+ * @returns AudioEncoding enum value or undefined if invalid
631
+ */
632
+ function fromId(id: number): AudioEncoding | undefined;
633
+ /**
634
+ * Convert string name to AudioEncoding enum
635
+ * @param nameStr - String name like "linear16", "LINEAR16", "ogg_opus", "OGG_OPUS", etc. (case insensitive)
636
+ * @returns AudioEncoding enum value or undefined if invalid
637
+ */
638
+ function fromName(nameStr: string): AudioEncoding | undefined;
639
+ /**
640
+ * Convert AudioEncoding enum to numeric ID
641
+ * @param encoding - AudioEncoding enum value
642
+ * @returns Numeric ID (0-5)
643
+ */
644
+ function toId(encoding: AudioEncoding): number;
645
+ /**
646
+ * Convert AudioEncoding enum to string name
647
+ * @param encoding - AudioEncoding enum value
648
+ * @returns String name like "LINEAR16", "MULAW", etc.
649
+ */
650
+ function toName(encoding: AudioEncoding): string;
651
+ /**
652
+ * Check if a numeric ID is a valid encoding
653
+ * @param id - Numeric identifier to validate
654
+ * @returns true if valid encoding ID
655
+ */
656
+ function isIdValid(id: number): boolean;
657
+ /**
658
+ * Check if a string name is a valid encoding
659
+ * @param nameStr - String name to validate
660
+ * @returns true if valid encoding name
661
+ */
662
+ function isNameValid(nameStr: string): boolean;
663
+ }
664
+ /**
665
+ * Common sample rates (in Hz)
666
+ */
667
+ declare enum SampleRate {
668
+ RATE_8000 = 8000,
669
+ RATE_16000 = 16000,
670
+ RATE_22050 = 22050,
671
+ RATE_24000 = 24000,
672
+ RATE_32000 = 32000,
673
+ RATE_44100 = 44100,
674
+ RATE_48000 = 48000
675
+ }
676
+ declare namespace SampleRate {
677
+ /**
678
+ * Convert Hz value to SampleRate enum
679
+ * @param hz - Sample rate in Hz (8000, 16000, etc.)
680
+ * @returns SampleRate enum value or undefined if invalid
681
+ */
682
+ function fromHz(hz: number): SampleRate | undefined;
683
+ /**
684
+ * Convert string name to SampleRate enum
685
+ * @param nameStr - String name like "rate_8000", "RATE_16000", etc. (case insensitive)
686
+ * @returns SampleRate enum value or undefined if invalid
687
+ */
688
+ function fromName(nameStr: string): SampleRate | undefined;
689
+ /**
690
+ * Convert SampleRate enum to Hz value
691
+ * @param rate - SampleRate enum value
692
+ * @returns Hz value (8000, 16000, etc.)
693
+ */
694
+ function toHz(rate: SampleRate): number;
695
+ /**
696
+ * Convert SampleRate enum to string name
697
+ * @param rate - SampleRate enum value
698
+ * @returns String name like "RATE_8000", "RATE_16000", etc.
699
+ */
700
+ function toName(rate: SampleRate): string;
701
+ /**
702
+ * Check if a numeric Hz value is a valid sample rate
703
+ * @param hz - Hz value to validate
704
+ * @returns true if valid sample rate
705
+ */
706
+ function isHzValid(hz: number): boolean;
707
+ /**
708
+ * Check if a string name is a valid sample rate
709
+ * @param nameStr - String name to validate
710
+ * @returns true if valid sample rate name
711
+ */
712
+ function isNameValid(nameStr: string): boolean;
713
+ }
714
+ /**
715
+ * Supported languages for recognition
716
+ * Using BCP-47 language tags
717
+ */
718
+ declare enum Language {
719
+ ENGLISH_US = "en-US",
720
+ ENGLISH_GB = "en-GB",
721
+ SPANISH_ES = "es-ES",
722
+ SPANISH_MX = "es-MX",
723
+ FRENCH_FR = "fr-FR",
724
+ GERMAN_DE = "de-DE",
725
+ ITALIAN_IT = "it-IT",
726
+ PORTUGUESE_BR = "pt-BR",
727
+ JAPANESE_JP = "ja-JP",
728
+ KOREAN_KR = "ko-KR",
729
+ CHINESE_CN = "zh-CN",
730
+ CHINESE_TW = "zh-TW"
731
+ }
732
+
679
733
  /**
680
734
  * Error Exception Types
681
735
  *
@@ -1371,6 +1425,7 @@ declare const ASRRequestSchemaV1: z.ZodObject<{
1371
1425
  prefixId: z.ZodOptional<z.ZodString>;
1372
1426
  prefixTextToRemove: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
1373
1427
  audioMetricsIntervalMs: z.ZodOptional<z.ZodNumber>;
1428
+ appendSearch: z.ZodOptional<z.ZodBoolean>;
1374
1429
  debugCommand: z.ZodOptional<z.ZodObject<{
1375
1430
  enableDebugLog: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
1376
1431
  enableAudioStorage: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
@@ -1413,6 +1468,7 @@ declare const ASRRequestSchemaV1: z.ZodObject<{
1413
1468
  prefixId?: string | undefined;
1414
1469
  prefixTextToRemove?: string[] | undefined;
1415
1470
  audioMetricsIntervalMs?: number | undefined;
1471
+ appendSearch?: boolean | undefined;
1416
1472
  debugCommand?: {
1417
1473
  enableDebugLog: boolean;
1418
1474
  enableAudioStorage: boolean;
@@ -1445,6 +1501,7 @@ declare const ASRRequestSchemaV1: z.ZodObject<{
1445
1501
  prefixId?: string | undefined;
1446
1502
  prefixTextToRemove?: string[] | undefined;
1447
1503
  audioMetricsIntervalMs?: number | undefined;
1504
+ appendSearch?: boolean | undefined;
1448
1505
  debugCommand?: {
1449
1506
  enableDebugLog?: boolean | undefined;
1450
1507
  enableAudioStorage?: boolean | undefined;
@@ -1676,6 +1733,29 @@ interface ASRRequestConfig {
1676
1733
  * @example 500
1677
1734
  */
1678
1735
  audioMetricsIntervalMs?: number;
1736
+ /**
1737
+ * Opt-in: round-trip Deepgram `search` phrase hits into the transcript.
1738
+ *
1739
+ * When `true` AND the resolved provider/model is **deepgram nova-2** AND the
1740
+ * GameContext `gamePhase` is `'Solve Puzzle'`, every Deepgram Results event
1741
+ * with a `channel.search` hit at confidence ≥ 0.6 has the original query
1742
+ * prepended to the transcript text delivered to the client. This restores
1743
+ * parity with the legacy Roku→Deepgram WoF Puzzle-Solve path where the
1744
+ * phrase round-trip lets downstream NLU match multi-word puzzle solutions
1745
+ * even when nova-2's primary transcription drifts.
1746
+ *
1747
+ * Default: `false` (no prepend; transcript is whatever nova-2 produces).
1748
+ *
1749
+ * Scope guard rationale:
1750
+ * - nova-2 only: nova-3 / flux do not need this (they handle phrase
1751
+ * spotting differently and the prepend would only add noise).
1752
+ * - Solve-Puzzle scene only: other WoF scenes (Letter-Guess,
1753
+ * Bonus-Round, etc.) do NOT want the slotMap phrase prepended — only
1754
+ * Puzzle-Solve depends on the phrase round-trip.
1755
+ *
1756
+ * @default false
1757
+ */
1758
+ appendSearch?: boolean;
1679
1759
  /**
1680
1760
  * Optional fallback ASR configurations
1681
1761
  *
@@ -1736,7 +1816,10 @@ declare enum GeminiModel {
1736
1816
  }
1737
1817
 
1738
1818
  /**
1739
- * OpenAI Model Types
1819
+ * OpenAI Batch API Model Types (HTTP API)
1820
+ * @see https://platform.openai.com/docs/guides/speech-to-text
1821
+ *
1822
+ * Note: For openai-realtime provider models, see OpenAIRealtimeModel in provider.types.ts
1740
1823
  */
1741
1824
  declare enum OpenAIModel {
1742
1825
  WHISPER_1 = "whisper-1"
@@ -2934,5 +3017,5 @@ declare function getRecognitionConductorHttpBase(stage?: Stage | string | null |
2934
3017
  declare function getRecognitionConductorWsBase(stage?: Stage | string | null | undefined): string;
2935
3018
  declare function getRecognitionConductorHost(stage?: Stage | string | null | undefined): string;
2936
3019
 
2937
- export { AudioEncoding, AwsTranscribeModel, BedrockModel, CartesiaModel, ClientControlActionV1, ClientState, ConfigBuilder, ConnectionError, ControlSignalTypeV1 as ControlSignal, ControlSignalTypeV1, DashScopeModel, DeepgramModel, ElevenLabsModel, ErrorTypeV1, FinalTranscriptStability, FireworksModel, GeminiModel, GladiaModel, GoogleModel, Language, MistralVoxtralModel, OpenAIModel, OpenAIRealtimeModel, RECOGNITION_CONDUCTOR_BASES, RECOGNITION_SERVICE_BASES, RealTimeTwoWayWebSocketRecognitionClient, RecognitionContextTypeV1, RecognitionError, RecognitionProvider, RecognitionResultTypeV1, RecognitionVGFStateSchema, RecordingStatus, STAGES, SampleRate, SelfServeVllmModel, SimplifiedVGFRecognitionClient, TimeoutError, TranscriptionStatus, ValidationError, createClient, createClientWithBuilder, createDefaultASRConfig, createInitialRecognitionState, createSimplifiedVGFClient, getRecognitionConductorBase, getRecognitionConductorHost, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionServiceBase, getRecognitionServiceHost, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getUserFriendlyMessage, isExceptionImmediatelyAvailable, isNormalDisconnection, isValidRecordingStatusTransition, normalizeStage, resetRecognitionVGFState };
3020
+ export { AmazonNovaSonicModel, AudioEncoding, AwsTranscribeModel, BedrockModel, CartesiaModel, ClientControlActionV1, ClientState, ConfigBuilder, ConnectionError, ControlSignalTypeV1 as ControlSignal, ControlSignalTypeV1, DashScopeModel, DeepgramModel, ElevenLabsModel, ErrorTypeV1, FinalTranscriptStability, FireworksModel, GeminiModel, GladiaModel, GoogleModel, Language, MistralVoxtralModel, OpenAIModel, OpenAIRealtimeModel, RECOGNITION_CONDUCTOR_BASES, RECOGNITION_SERVICE_BASES, RealTimeTwoWayWebSocketRecognitionClient, RecognitionContextTypeV1, RecognitionError, RecognitionProvider, RecognitionResultTypeV1, RecognitionVGFStateSchema, RecordingStatus, STAGES, SampleRate, SelfServeVllmModel, SimplifiedVGFRecognitionClient, TimeoutError, TranscriptionStatus, ValidationError, createClient, createClientWithBuilder, createDefaultASRConfig, createInitialRecognitionState, createSimplifiedVGFClient, getRecognitionConductorBase, getRecognitionConductorHost, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionServiceBase, getRecognitionServiceHost, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getUserFriendlyMessage, isExceptionImmediatelyAvailable, isNormalDisconnection, isValidRecordingStatusTransition, normalizeStage, resetRecognitionVGFState };
2938
3021
  export type { ASRRequestConfig, ASRRequestV1, AudioMetricsResultV1, AuthenticationException, ConnectionException, ErrorResultV1, FunctionCallResultV1, GameContextV1, IRecognitionClient, IRecognitionClientConfig, IRecognitionClientStats, ISimplifiedVGFRecognitionClient, MetadataResultV1, ProviderException, QuotaExceededException, RealTimeTwoWayWebSocketRecognitionClientConfig, RecognitionCallbackUrl, RecognitionException, RecognitionState, RecordingStatusType, SimplifiedVGFClientConfig, SlotMap, Stage, TimeoutException, TranscriptionResult, TranscriptionResultV1, TranscriptionStatusType, UnknownException, ValidationException };
package/dist/index.d.ts CHANGED
@@ -11,6 +11,6 @@ export { type RecognitionState, RecognitionVGFStateSchema, RecordingStatus, Tran
11
11
  export { resetRecognitionVGFState } from './vgf-recognition-mapper.js';
12
12
  export { AudioEncoding } from '@recog/websocket';
13
13
  export { type GameContextV1, type SlotMap, RecognitionContextTypeV1, ControlSignalTypeV1, ControlSignalTypeV1 as ControlSignal, // Alias for backward compatibility
14
- type TranscriptionResultV1, type FunctionCallResultV1, type MetadataResultV1, type AudioMetricsResultV1, type ErrorResultV1, RecognitionResultTypeV1, ClientControlActionV1, type ASRRequestConfig, type ASRRequestV1, FinalTranscriptStability, createDefaultASRConfig, RecognitionProvider, DeepgramModel, ElevenLabsModel, FireworksModel, GladiaModel, GoogleModel, GeminiModel, OpenAIModel, SelfServeVllmModel, OpenAIRealtimeModel, MistralVoxtralModel, CartesiaModel, DashScopeModel, BedrockModel, AwsTranscribeModel, Language, SampleRate, STAGES, type Stage } from '@recog/shared-types';
14
+ type TranscriptionResultV1, type FunctionCallResultV1, type MetadataResultV1, type AudioMetricsResultV1, type ErrorResultV1, RecognitionResultTypeV1, ClientControlActionV1, type ASRRequestConfig, type ASRRequestV1, FinalTranscriptStability, createDefaultASRConfig, RecognitionProvider, DeepgramModel, ElevenLabsModel, FireworksModel, GladiaModel, GoogleModel, GeminiModel, OpenAIModel, SelfServeVllmModel, OpenAIRealtimeModel, MistralVoxtralModel, CartesiaModel, DashScopeModel, BedrockModel, AwsTranscribeModel, AmazonNovaSonicModel, Language, SampleRate, STAGES, type Stage } from '@recog/shared-types';
15
15
  export { getRecognitionServiceBase, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getRecognitionServiceHost, getRecognitionConductorBase, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionConductorHost, normalizeStage, RECOGNITION_SERVICE_BASES, RECOGNITION_CONDUCTOR_BASES } from '@recog/shared-config';
16
16
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAGvE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,oBAAoB,EACzB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,eAAe,EACf,cAAc,EACd,WAAW,EACX,WAAW,EACX,WAAW,EACX,WAAW,EACX,kBAAkB,EAClB,mBAAmB,EACnB,mBAAmB,EACnB,aAAa,EACb,cAAc,EACd,YAAY,EACZ,kBAAkB,EAClB,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAGvE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,oBAAoB,EACzB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,eAAe,EACf,cAAc,EACd,WAAW,EACX,WAAW,EACX,WAAW,EACX,WAAW,EACX,kBAAkB,EAClB,mBAAmB,EACnB,mBAAmB,EACnB,aAAa,EACb,cAAc,EACd,YAAY,EACZ,kBAAkB,EAClB,oBAAoB,EACpB,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}
package/dist/index.js CHANGED
@@ -3749,6 +3749,7 @@ var RecognitionProvider;
3749
3749
  RecognitionProvider2["BEDROCK"] = "bedrock";
3750
3750
  RecognitionProvider2["INWORLD_STT"] = "inworld-stt";
3751
3751
  RecognitionProvider2["AWS_TRANSCRIBE"] = "aws-transcribe";
3752
+ RecognitionProvider2["AMAZON_NOVA_SONIC"] = "amazon-nova-sonic";
3752
3753
  RecognitionProvider2["TEST_ASR_PROVIDER_QUOTA"] = "test-asr-provider-quota";
3753
3754
  RecognitionProvider2["TEST_ASR_STREAMING"] = "test-asr-streaming";
3754
3755
  })(RecognitionProvider || (RecognitionProvider = {}));
@@ -3799,6 +3800,7 @@ var ElevenLabsModel;
3799
3800
  })(ElevenLabsModel || (ElevenLabsModel = {}));
3800
3801
  var OpenAIRealtimeModel;
3801
3802
  (function(OpenAIRealtimeModel2) {
3803
+ OpenAIRealtimeModel2["GPT_REALTIME_WHISPER"] = "gpt-realtime-whisper";
3802
3804
  OpenAIRealtimeModel2["GPT_4O_TRANSCRIBE"] = "gpt-4o-transcribe";
3803
3805
  OpenAIRealtimeModel2["GPT_4O_MINI_TRANSCRIBE"] = "gpt-4o-mini-transcribe";
3804
3806
  })(OpenAIRealtimeModel || (OpenAIRealtimeModel = {}));
@@ -3829,6 +3831,11 @@ var AwsTranscribeModel;
3829
3831
  (function(AwsTranscribeModel2) {
3830
3832
  AwsTranscribeModel2["DEFAULT"] = "default";
3831
3833
  })(AwsTranscribeModel || (AwsTranscribeModel = {}));
3834
+ var AmazonNovaSonicModel;
3835
+ (function(AmazonNovaSonicModel2) {
3836
+ AmazonNovaSonicModel2["AMAZON_NOVA_SONIC_V1"] = "amazon.nova-sonic-v1:0";
3837
+ AmazonNovaSonicModel2["AMAZON_NOVA_2_SONIC"] = "amazon.nova-2-sonic-v1:0";
3838
+ })(AmazonNovaSonicModel || (AmazonNovaSonicModel = {}));
3832
3839
  var SelfServeVllmModel;
3833
3840
  (function(SelfServeVllmModel2) {
3834
3841
  SelfServeVllmModel2["QWEN3_ASR_1_7B"] = "qwen3-asr-1.7b";
@@ -3845,6 +3852,18 @@ var RecognitionResultTypeV1;
3845
3852
  RecognitionResultTypeV12["AUDIO_METRICS"] = "AudioMetrics";
3846
3853
  RecognitionResultTypeV12["SESSION_CONFIGURED"] = "SessionConfigured";
3847
3854
  })(RecognitionResultTypeV1 || (RecognitionResultTypeV1 = {}));
3855
+ var DetectionTypeV1;
3856
+ (function(DetectionTypeV12) {
3857
+ DetectionTypeV12["SEARCH"] = "search";
3858
+ })(DetectionTypeV1 || (DetectionTypeV1 = {}));
3859
+ var DetectionV1Schema = z.object({
3860
+ type: z.nativeEnum(DetectionTypeV1),
3861
+ query: z.string(),
3862
+ score: z.number().min(0).max(1),
3863
+ startMs: z.number().optional(),
3864
+ endMs: z.number().optional()
3865
+ // Audio time (ms from stream start) where the hit ends
3866
+ });
3848
3867
  var TranscriptionResultSchemaV1 = z.object({
3849
3868
  type: z.literal(RecognitionResultTypeV1.TRANSCRIPTION),
3850
3869
  audioUtteranceId: z.string(),
@@ -3863,8 +3882,9 @@ var TranscriptionResultSchemaV1 = z.object({
3863
3882
  endTimestamp: z.number().optional(),
3864
3883
  receivedAtMs: z.number().optional(),
3865
3884
  accumulatedAudioTimeMs: z.number().optional(),
3866
- rawAudioTimeMs: z.number().optional()
3867
- // Total audio duration sent to provider (includes prefix)
3885
+ rawAudioTimeMs: z.number().optional(),
3886
+ detections: z.array(DetectionV1Schema).optional()
3887
+ // Provider-reported phrase detections (query + score, optionally startMs/endMs). Always populated when the provider returns hits, regardless of `appendSearch`. Other providers leave this undefined.
3868
3888
  });
3869
3889
  var FunctionCallResultSchemaV1 = z.object({
3870
3890
  type: z.literal(RecognitionResultTypeV1.FUNCTION_CALL),
@@ -4117,7 +4137,15 @@ var TranscriptMessageSchema = z.object({
4117
4137
  * @example true
4118
4138
  * @default false
4119
4139
  */
4120
- is_fallback: z.boolean().optional()
4140
+ is_fallback: z.boolean().optional(),
4141
+ /**
4142
+ * Provider-reported phrase detections (query + score, optionally
4143
+ * startMs/endMs). Always populated when the provider returns hits,
4144
+ * regardless of `appendSearch` or scene gating. Other providers leave
4145
+ * this undefined.
4146
+ * @example [{ query: 'justin bieber one time', score: 0.78, startMs: 1200, endMs: 2800 }]
4147
+ */
4148
+ detections: z.array(DetectionV1Schema).optional()
4121
4149
  });
4122
4150
  var VADEndSignalSchema = z.object({
4123
4151
  type: z.literal(ProviderMessageType.VAD_END_SIGNAL),
@@ -4457,6 +4485,9 @@ var ASRRequestSchemaV1 = z.object({
4457
4485
  // Streaming audio metrics opt-in: when > 0, server emits AudioMetrics results throttled to this interval (ms).
4458
4486
  // Undefined / 0 disables streaming audio metrics (final metrics still embedded in Metadata).
4459
4487
  audioMetricsIntervalMs: z.number().optional(),
4488
+ // Opt-in: round-trip Deepgram `search` phrase hits into the transcript.
4489
+ // Active only when (model = deepgram nova-2) AND (GameContext.gamePhase = 'Solve Puzzle'). See ASRRequestConfig.appendSearch in asr-config.types.ts for full semantics.
4490
+ appendSearch: z.boolean().optional(),
4460
4491
  // Debug options (FOR DEBUG/TESTING ONLY - not for production use)
4461
4492
  debugCommand: RequestDebugCommandSchema
4462
4493
  });
@@ -5853,6 +5884,12 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5853
5884
  // Streaming audio metrics opt-in (ms interval). Server only forwards metrics if > 0.
5854
5885
  ...this.config.asrRequestConfig.audioMetricsIntervalMs !== void 0 && {
5855
5886
  audioMetricsIntervalMs: this.config.asrRequestConfig.audioMetricsIntervalMs
5887
+ },
5888
+ // Opt-in: round-trip Deepgram nova-2 search-phrase hits into the
5889
+ // transcript. Only fires server-side when (model = nova-2) AND
5890
+ // (GameContext.gamePhase = 'Solve Puzzle'). See ASRRequestConfig.appendSearch.
5891
+ ...this.config.asrRequestConfig.appendSearch !== void 0 && {
5892
+ appendSearch: this.config.asrRequestConfig.appendSearch
5856
5893
  }
5857
5894
  };
5858
5895
  super.sendMessage(
@@ -6704,6 +6741,7 @@ function createSimplifiedVGFClient(config) {
6704
6741
  return new SimplifiedVGFRecognitionClient(config);
6705
6742
  }
6706
6743
  export {
6744
+ AmazonNovaSonicModel,
6707
6745
  AudioEncoding,
6708
6746
  AwsTranscribeModel,
6709
6747
  BedrockModel,