@volley/recognition-client-sdk 0.1.689 → 0.1.767

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,6 +21,9 @@ declare enum RecognitionProvider {
21
21
  MISTRAL_VOXTRAL = "mistral-voxtral",
22
22
  CARTESIA = "cartesia",
23
23
  DASHSCOPE = "dashscope",
24
+ BEDROCK = "bedrock",
25
+ INWORLD_STT = "inworld-stt",
26
+ AWS_TRANSCRIBE = "aws-transcribe",
24
27
  TEST_ASR_PROVIDER_QUOTA = "test-asr-provider-quota",
25
28
  TEST_ASR_STREAMING = "test-asr-streaming"
26
29
  }
@@ -119,6 +122,30 @@ declare enum DashScopeModel {
119
122
  QWEN3_ASR_FLASH_REALTIME_2602 = "qwen3-asr-flash-realtime-2026-02-10",
120
123
  QWEN3_ASR_FLASH_REALTIME = "qwen3-asr-flash-realtime"
121
124
  }
125
+ /**
126
+ * AWS Bedrock batch transcription models
127
+ * Accessed via AWS Bedrock InvokeModelWithResponseStream
128
+ * @see https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-mistral.html
129
+ */
130
+ declare enum BedrockModel {
131
+ VOXTRAL_MINI_3B_2507 = "mistral.voxtral-mini-3b-2507",
132
+ VOXTRAL_SMALL_24B_2507 = "mistral.voxtral-small-24b-2507"
133
+ }
134
+ /**
135
+ * Inworld AI STT models
136
+ * @see https://docs.inworld.ai/stt/overview
137
+ */
138
+ declare enum InworldSttModel {
139
+ INWORLD_STT_1 = "inworld/inworld-stt-1"
140
+ }
141
+ /**
142
+ * AWS Transcribe streaming model
143
+ * AWS Transcribe uses a single default streaming model
144
+ * @see https://docs.aws.amazon.com/transcribe/latest/dg/streaming.html
145
+ */
146
+ declare enum AwsTranscribeModel {
147
+ DEFAULT = "default"
148
+ }
122
149
  /**
123
150
  * Self-serve vLLM batch transcription models
124
151
  * Backed by recognition-inference / RunPod `/transcribe`
@@ -129,7 +156,7 @@ declare enum SelfServeVllmModel {
129
156
  /**
130
157
  * Type alias for any model from any provider
131
158
  */
132
- type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | GladiaModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | CartesiaModel | DashScopeModel | SelfServeVllmModel | string;
159
+ type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | GladiaModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | CartesiaModel | DashScopeModel | InworldSttModel | SelfServeVllmModel | BedrockModel | AwsTranscribeModel | string;
133
160
 
134
161
  /**
135
162
  * Audio encoding types
@@ -584,6 +611,61 @@ declare const SessionConfiguredSchemaV1: z.ZodObject<{
584
611
  providerConfig?: string | undefined;
585
612
  }>;
586
613
  type SessionConfiguredV1 = z.infer<typeof SessionConfiguredSchemaV1>;
614
+ /**
615
+ * Audio metrics result V1 - contains audio quality metrics
616
+ * Extracted from raw PCM audio without AI/ML, pure signal analysis
617
+ * Used to detect mic issues (muted, low gain, clipping) and audio quality
618
+ */
619
+ declare const AudioMetricsResultSchemaV1: z.ZodObject<{
620
+ type: z.ZodLiteral<RecognitionResultTypeV1.AUDIO_METRICS>;
621
+ valid: z.ZodBoolean;
622
+ audioBeginMs: z.ZodNumber;
623
+ audioEndMs: z.ZodNumber;
624
+ maxVolume: z.ZodNumber;
625
+ minVolume: z.ZodNumber;
626
+ avgVolume: z.ZodNumber;
627
+ peakVolumeDb: z.ZodNullable<z.ZodNumber>;
628
+ avgVolumeDb: z.ZodNullable<z.ZodNumber>;
629
+ silenceRatio: z.ZodNumber;
630
+ clippingRatio: z.ZodNumber;
631
+ snrEstimate: z.ZodNullable<z.ZodNumber>;
632
+ lastNonSilenceMs: z.ZodNumber;
633
+ timestamp: z.ZodString;
634
+ isFinal: z.ZodOptional<z.ZodBoolean>;
635
+ }, "strip", z.ZodTypeAny, {
636
+ valid: boolean;
637
+ type: RecognitionResultTypeV1.AUDIO_METRICS;
638
+ audioBeginMs: number;
639
+ audioEndMs: number;
640
+ maxVolume: number;
641
+ minVolume: number;
642
+ avgVolume: number;
643
+ silenceRatio: number;
644
+ clippingRatio: number;
645
+ snrEstimate: number | null;
646
+ lastNonSilenceMs: number;
647
+ timestamp: string;
648
+ peakVolumeDb: number | null;
649
+ avgVolumeDb: number | null;
650
+ isFinal?: boolean | undefined;
651
+ }, {
652
+ valid: boolean;
653
+ type: RecognitionResultTypeV1.AUDIO_METRICS;
654
+ audioBeginMs: number;
655
+ audioEndMs: number;
656
+ maxVolume: number;
657
+ minVolume: number;
658
+ avgVolume: number;
659
+ silenceRatio: number;
660
+ clippingRatio: number;
661
+ snrEstimate: number | null;
662
+ lastNonSilenceMs: number;
663
+ timestamp: string;
664
+ peakVolumeDb: number | null;
665
+ avgVolumeDb: number | null;
666
+ isFinal?: boolean | undefined;
667
+ }>;
668
+ type AudioMetricsResultV1 = z.infer<typeof AudioMetricsResultSchemaV1>;
587
669
 
588
670
  /**
589
671
  * Recognition Context Types V1
@@ -856,6 +938,17 @@ interface ASRRequestConfig {
856
938
  * ```
857
939
  */
858
940
  providerOptions?: Record<string, any>;
941
+ /**
942
+ * Streaming audio metrics opt-in interval (ms).
943
+ *
944
+ * When set to a positive number, server forwards AudioMetrics results to the
945
+ * client over the WebSocket, throttled so at most one result is sent per
946
+ * `audioMetricsIntervalMs`. Undefined / 0 disables streaming audio metrics
947
+ * (final metrics still embedded in the Metadata result).
948
+ *
949
+ * @example 500
950
+ */
951
+ audioMetricsIntervalMs?: number;
859
952
  /**
860
953
  * Optional fallback ASR configurations
861
954
  *
@@ -1145,6 +1238,8 @@ interface IRecognitionClientConfig {
1145
1238
  platform?: string;
1146
1239
  /** Experiment cohort (optional). Defaults to 'control' if not provided. */
1147
1240
  experimentCohort?: 'treatment' | 'control';
1241
+ /** Explicit major version for ASR config selection (e.g. 1, 3). Takes precedence over experimentCohort. */
1242
+ experimentMajorVersion?: number;
1148
1243
  /** Callback when transcript is received */
1149
1244
  onTranscript?: (result: TranscriptionResultV1) => void;
1150
1245
  /**
@@ -1154,6 +1249,12 @@ interface IRecognitionClientConfig {
1154
1249
  onFunctionCall?: (result: FunctionCallResultV1) => void;
1155
1250
  /** Callback when metadata is received. Only once after transcription is complete.*/
1156
1251
  onMetadata?: (metadata: MetadataResultV1) => void;
1252
+ /**
1253
+ * Callback when streaming audio metrics arrive (volume, silence ratio, clipping, SNR, etc.).
1254
+ * Only fires when `asrRequestConfig.audioMetricsIntervalMs > 0`.
1255
+ * Final metrics still arrive embedded in `onMetadata.audioMetrics`.
1256
+ */
1257
+ onAudioMetrics?: (metrics: AudioMetricsResultV1) => void;
1157
1258
  /** Callback when session is configured with actual ASR provider/model (optional) */
1158
1259
  onSessionConfigured?: (config: SessionConfiguredV1) => void;
1159
1260
  /** Callback when error occurs */
@@ -4,7 +4,7 @@
4
4
  * Simple builder pattern for RealTimeTwoWayWebSocketRecognitionClientConfig
5
5
  */
6
6
  import type { RealTimeTwoWayWebSocketRecognitionClientConfig, RecognitionCallbackUrl } from './recognition-client.types.js';
7
- import type { ASRRequestConfig, GameContextV1, TranscriptionResultV1, MetadataResultV1, SessionConfiguredV1, ErrorResultV1, Stage } from '@recog/shared-types';
7
+ import type { ASRRequestConfig, GameContextV1, TranscriptionResultV1, MetadataResultV1, SessionConfiguredV1, AudioMetricsResultV1, ErrorResultV1, Stage } from '@recog/shared-types';
8
8
  /**
9
9
  * Builder for RealTimeTwoWayWebSocketRecognitionClientConfig
10
10
  *
@@ -90,6 +90,11 @@ export declare class ConfigBuilder {
90
90
  * Set experiment cohort (optional, defaults to 'control')
91
91
  */
92
92
  experimentCohort(cohort: 'treatment' | 'control'): this;
93
+ /**
94
+ * Set explicit major version for ASR config selection.
95
+ * Takes precedence over experimentCohort-based version resolution.
96
+ */
97
+ experimentMajorVersion(version: number): this;
93
98
  /**
94
99
  * Set transcript callback
95
100
  */
@@ -102,6 +107,11 @@ export declare class ConfigBuilder {
102
107
  * Set session configured callback (optional)
103
108
  */
104
109
  onSessionConfigured(callback: (config: SessionConfiguredV1) => void): this;
110
+ /**
111
+ * Set streaming audio metrics callback (optional).
112
+ * Only fires when `asrRequestConfig.audioMetricsIntervalMs > 0`.
113
+ */
114
+ onAudioMetrics(callback: (metrics: AudioMetricsResultV1) => void): this;
105
115
  /**
106
116
  * Set error callback
107
117
  */
@@ -1 +1 @@
1
- {"version":3,"file":"config-builder.d.ts","sourceRoot":"","sources":["../src/config-builder.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EACV,8CAA8C,EAC9C,sBAAsB,EACvB,MAAM,+BAA+B,CAAC;AACvC,OAAO,KAAK,EACV,gBAAgB,EAChB,aAAa,EACb,qBAAqB,EACrB,gBAAgB,EAChB,mBAAmB,EACnB,aAAa,EACb,KAAK,EACN,MAAM,qBAAqB,CAAC;AAE7B;;;;;;;;;;;;;;;;;;GAkBG;AACH,qBAAa,aAAa;IACxB,OAAO,CAAC,MAAM,CAA+D;IAE7E;;;OAGG;IACH,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;IAKtB;;;;;;;;OAQG;IACH,KAAK,CAAC,KAAK,EAAE,KAAK,GAAG,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,gBAAgB,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI;IAKhD;;OAEG;IACH,WAAW,CAAC,OAAO,EAAE,aAAa,GAAG,IAAI;IAKzC;;;OAGG;IACH,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKxB;;OAEG;IACH,gBAAgB,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,YAAY,CAAC,IAAI,EAAE,sBAAsB,EAAE,GAAG,IAAI;IAKlD;;OAEG;IACH,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKxB;;OAEG;IACH,aAAa,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAK/B;;OAEG;IACH,QAAQ,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAK1B;;OAEG;IACH,SAAS,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAK3B;;OAEG;IACH,gBAAgB,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI;IAKhC;;OAEG;IACH,gBAAgB,CAAC,MAAM,EAAE,WAAW,GAAG,SAAS,GAAG,IAAI;IAKvD;;OAEG;IACH,YAAY,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,qBAAqB,KAAK,IAAI,GAAG,IAAI;IAKrE;;OAEG;IACH,UAAU,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE,gBAAgB,KAAK,IAAI,GAAG,IAAI;IAKhE;;OAEG;IACH,mBAAmB,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,mBAAmB,KAAK,IAAI,GAAG,IAAI;IAK1E;;OAEG;IACH,OAAO,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,aAAa,KAAK,IAAI,GAAG,IAAI;IAKvD;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,MAAM,IAAI,GAAG,IAAI;IAKvC;;OAEG;IACH,cAAc,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,KAAK,IAAI,GAAG,IAAI;IAKtE;;OAEG;IACH,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAKjC;;OAEG;IACH,oBAAoB,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI;IAK3C;;OAEG;IACH,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI;IAKrC;;OAEG;IACH,MAAM,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,GAAG,KAAK,IAAI,GAAG,IAAI;IAKvG;;OAEG;IACH,KAAK,IAAI,8CAA8C;CAGxD"}
1
+ {"version":3,"file":"config-builder.d.ts","sourceRoot":"","sources":["../src/config-builder.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EACV,8CAA8C,EAC9C,sBAAsB,EACvB,MAAM,+BAA+B,CAAC;AACvC,OAAO,KAAK,EACV,gBAAgB,EAChB,aAAa,EACb,qBAAqB,EACrB,gBAAgB,EAChB,mBAAmB,EACnB,oBAAoB,EACpB,aAAa,EACb,KAAK,EACN,MAAM,qBAAqB,CAAC;AAE7B;;;;;;;;;;;;;;;;;;GAkBG;AACH,qBAAa,aAAa;IACxB,OAAO,CAAC,MAAM,CAA+D;IAE7E;;;OAGG;IACH,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;IAKtB;;;;;;;;OAQG;IACH,KAAK,CAAC,KAAK,EAAE,KAAK,GAAG,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,gBAAgB,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI;IAKhD;;OAEG;IACH,WAAW,CAAC,OAAO,EAAE,aAAa,GAAG,IAAI;IAKzC;;;OAGG;IACH,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKxB;;OAEG;IACH,gBAAgB,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,YAAY,CAAC,IAAI,EAAE,sBAAsB,EAAE,GAAG,IAAI;IAKlD;;OAEG;IACH,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKxB;;OAEG;IACH,aAAa,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAK/B;;OAEG;IACH,QAAQ,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAK1B;;OAEG;IACH,SAAS,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAK3B;;OAEG;IACH,gBAAgB,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI;IAKhC;;OAEG;IACH,gBAAgB,CAAC,MAAM,EAAE,WAAW,GAAG,SAAS,GAAG,IAAI;IAKvD;;;OAGG;IACH,sBAAsB,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI;IAK7C;;OAEG;IACH,YAAY,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,qBAAqB,KAAK,IAAI,GAAG,IAAI;IAKrE;;OAEG;IACH,UAAU,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE,gBAAgB,KAAK,IAAI,GAAG,IAAI;IAKhE;;OAEG;IACH,mBAAmB,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,mBAAmB,KAAK,IAAI,GAAG,IAAI;IAK1E;;;OAGG;IACH,cAAc,CAAC,QAAQ,EAAE,CAAC,OAAO,EAAE,oBAAoB,KAAK,IAAI,GAAG,IAAI;IAKvE;;OAEG;IACH,OAAO,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,aAAa,KAAK,IAAI,GAAG,IAAI;IAKvD;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,MAAM,IAAI,GAAG,IAAI;IAKvC;;OAEG;IACH,cAAc,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,KAAK,IAAI,GAAG,IAAI;IAKtE;;OAEG;IACH,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAKjC;;OAEG;IACH,oBAAoB,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI;IAK3C;;OAEG;IACH,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI;IAKrC;;OAEG;IAEH,MAAM,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,GAAG,KAAK,IAAI,GAAG,IAAI;IAKvG;;OAEG;IACH,KAAK,IAAI,8CAA8C;CAGxD"}
@@ -21,6 +21,9 @@ declare enum RecognitionProvider {
21
21
  MISTRAL_VOXTRAL = "mistral-voxtral",
22
22
  CARTESIA = "cartesia",
23
23
  DASHSCOPE = "dashscope",
24
+ BEDROCK = "bedrock",
25
+ INWORLD_STT = "inworld-stt",
26
+ AWS_TRANSCRIBE = "aws-transcribe",
24
27
  TEST_ASR_PROVIDER_QUOTA = "test-asr-provider-quota",
25
28
  TEST_ASR_STREAMING = "test-asr-streaming"
26
29
  }
@@ -119,6 +122,30 @@ declare enum DashScopeModel {
119
122
  QWEN3_ASR_FLASH_REALTIME_2602 = "qwen3-asr-flash-realtime-2026-02-10",
120
123
  QWEN3_ASR_FLASH_REALTIME = "qwen3-asr-flash-realtime"
121
124
  }
125
+ /**
126
+ * AWS Bedrock batch transcription models
127
+ * Accessed via AWS Bedrock InvokeModelWithResponseStream
128
+ * @see https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-mistral.html
129
+ */
130
+ declare enum BedrockModel {
131
+ VOXTRAL_MINI_3B_2507 = "mistral.voxtral-mini-3b-2507",
132
+ VOXTRAL_SMALL_24B_2507 = "mistral.voxtral-small-24b-2507"
133
+ }
134
+ /**
135
+ * Inworld AI STT models
136
+ * @see https://docs.inworld.ai/stt/overview
137
+ */
138
+ declare enum InworldSttModel {
139
+ INWORLD_STT_1 = "inworld/inworld-stt-1"
140
+ }
141
+ /**
142
+ * AWS Transcribe streaming model
143
+ * AWS Transcribe uses a single default streaming model
144
+ * @see https://docs.aws.amazon.com/transcribe/latest/dg/streaming.html
145
+ */
146
+ declare enum AwsTranscribeModel {
147
+ DEFAULT = "default"
148
+ }
122
149
  /**
123
150
  * Self-serve vLLM batch transcription models
124
151
  * Backed by recognition-inference / RunPod `/transcribe`
@@ -129,7 +156,7 @@ declare enum SelfServeVllmModel {
129
156
  /**
130
157
  * Type alias for any model from any provider
131
158
  */
132
- type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | GladiaModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | CartesiaModel | DashScopeModel | SelfServeVllmModel | string;
159
+ type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | GladiaModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | CartesiaModel | DashScopeModel | InworldSttModel | SelfServeVllmModel | BedrockModel | AwsTranscribeModel | string;
133
160
 
134
161
  /**
135
162
  * Audio encoding types
@@ -593,6 +620,61 @@ declare const SessionConfiguredSchemaV1: z.ZodObject<{
593
620
  providerConfig?: string | undefined;
594
621
  }>;
595
622
  type SessionConfiguredV1 = z.infer<typeof SessionConfiguredSchemaV1>;
623
+ /**
624
+ * Audio metrics result V1 - contains audio quality metrics
625
+ * Extracted from raw PCM audio without AI/ML, pure signal analysis
626
+ * Used to detect mic issues (muted, low gain, clipping) and audio quality
627
+ */
628
+ declare const AudioMetricsResultSchemaV1: z.ZodObject<{
629
+ type: z.ZodLiteral<RecognitionResultTypeV1.AUDIO_METRICS>;
630
+ valid: z.ZodBoolean;
631
+ audioBeginMs: z.ZodNumber;
632
+ audioEndMs: z.ZodNumber;
633
+ maxVolume: z.ZodNumber;
634
+ minVolume: z.ZodNumber;
635
+ avgVolume: z.ZodNumber;
636
+ peakVolumeDb: z.ZodNullable<z.ZodNumber>;
637
+ avgVolumeDb: z.ZodNullable<z.ZodNumber>;
638
+ silenceRatio: z.ZodNumber;
639
+ clippingRatio: z.ZodNumber;
640
+ snrEstimate: z.ZodNullable<z.ZodNumber>;
641
+ lastNonSilenceMs: z.ZodNumber;
642
+ timestamp: z.ZodString;
643
+ isFinal: z.ZodOptional<z.ZodBoolean>;
644
+ }, "strip", z.ZodTypeAny, {
645
+ valid: boolean;
646
+ type: RecognitionResultTypeV1.AUDIO_METRICS;
647
+ audioBeginMs: number;
648
+ audioEndMs: number;
649
+ maxVolume: number;
650
+ minVolume: number;
651
+ avgVolume: number;
652
+ silenceRatio: number;
653
+ clippingRatio: number;
654
+ snrEstimate: number | null;
655
+ lastNonSilenceMs: number;
656
+ timestamp: string;
657
+ peakVolumeDb: number | null;
658
+ avgVolumeDb: number | null;
659
+ isFinal?: boolean | undefined;
660
+ }, {
661
+ valid: boolean;
662
+ type: RecognitionResultTypeV1.AUDIO_METRICS;
663
+ audioBeginMs: number;
664
+ audioEndMs: number;
665
+ maxVolume: number;
666
+ minVolume: number;
667
+ avgVolume: number;
668
+ silenceRatio: number;
669
+ clippingRatio: number;
670
+ snrEstimate: number | null;
671
+ lastNonSilenceMs: number;
672
+ timestamp: string;
673
+ peakVolumeDb: number | null;
674
+ avgVolumeDb: number | null;
675
+ isFinal?: boolean | undefined;
676
+ }>;
677
+ type AudioMetricsResultV1 = z.infer<typeof AudioMetricsResultSchemaV1>;
596
678
 
597
679
  /**
598
680
  * Error Exception Types
@@ -1288,6 +1370,7 @@ declare const ASRRequestSchemaV1: z.ZodObject<{
1288
1370
  prefixMode: z.ZodDefault<z.ZodOptional<z.ZodNativeEnum<typeof PrefixMode>>>;
1289
1371
  prefixId: z.ZodOptional<z.ZodString>;
1290
1372
  prefixTextToRemove: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
1373
+ audioMetricsIntervalMs: z.ZodOptional<z.ZodNumber>;
1291
1374
  debugCommand: z.ZodOptional<z.ZodObject<{
1292
1375
  enableDebugLog: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
1293
1376
  enableAudioStorage: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
@@ -1329,6 +1412,7 @@ declare const ASRRequestSchemaV1: z.ZodObject<{
1329
1412
  }[] | undefined;
1330
1413
  prefixId?: string | undefined;
1331
1414
  prefixTextToRemove?: string[] | undefined;
1415
+ audioMetricsIntervalMs?: number | undefined;
1332
1416
  debugCommand?: {
1333
1417
  enableDebugLog: boolean;
1334
1418
  enableAudioStorage: boolean;
@@ -1360,6 +1444,7 @@ declare const ASRRequestSchemaV1: z.ZodObject<{
1360
1444
  prefixMode?: PrefixMode | undefined;
1361
1445
  prefixId?: string | undefined;
1362
1446
  prefixTextToRemove?: string[] | undefined;
1447
+ audioMetricsIntervalMs?: number | undefined;
1363
1448
  debugCommand?: {
1364
1449
  enableDebugLog?: boolean | undefined;
1365
1450
  enableAudioStorage?: boolean | undefined;
@@ -1580,6 +1665,17 @@ interface ASRRequestConfig {
1580
1665
  * ```
1581
1666
  */
1582
1667
  providerOptions?: Record<string, any>;
1668
+ /**
1669
+ * Streaming audio metrics opt-in interval (ms).
1670
+ *
1671
+ * When set to a positive number, server forwards AudioMetrics results to the
1672
+ * client over the WebSocket, throttled so at most one result is sent per
1673
+ * `audioMetricsIntervalMs`. Undefined / 0 disables streaming audio metrics
1674
+ * (final metrics still embedded in the Metadata result).
1675
+ *
1676
+ * @example 500
1677
+ */
1678
+ audioMetricsIntervalMs?: number;
1583
1679
  /**
1584
1680
  * Optional fallback ASR configurations
1585
1681
  *
@@ -1905,6 +2001,8 @@ interface IRecognitionClientConfig {
1905
2001
  platform?: string;
1906
2002
  /** Experiment cohort (optional). Defaults to 'control' if not provided. */
1907
2003
  experimentCohort?: 'treatment' | 'control';
2004
+ /** Explicit major version for ASR config selection (e.g. 1, 3). Takes precedence over experimentCohort. */
2005
+ experimentMajorVersion?: number;
1908
2006
  /** Callback when transcript is received */
1909
2007
  onTranscript?: (result: TranscriptionResultV1) => void;
1910
2008
  /**
@@ -1914,6 +2012,12 @@ interface IRecognitionClientConfig {
1914
2012
  onFunctionCall?: (result: FunctionCallResultV1) => void;
1915
2013
  /** Callback when metadata is received. Only once after transcription is complete.*/
1916
2014
  onMetadata?: (metadata: MetadataResultV1) => void;
2015
+ /**
2016
+ * Callback when streaming audio metrics arrive (volume, silence ratio, clipping, SNR, etc.).
2017
+ * Only fires when `asrRequestConfig.audioMetricsIntervalMs > 0`.
2018
+ * Final metrics still arrive embedded in `onMetadata.audioMetrics`.
2019
+ */
2020
+ onAudioMetrics?: (metrics: AudioMetricsResultV1) => void;
1917
2021
  /** Callback when session is configured with actual ASR provider/model (optional) */
1918
2022
  onSessionConfigured?: (config: SessionConfiguredV1) => void;
1919
2023
  /** Callback when error occurs */
@@ -2346,6 +2450,11 @@ declare class ConfigBuilder {
2346
2450
  * Set experiment cohort (optional, defaults to 'control')
2347
2451
  */
2348
2452
  experimentCohort(cohort: 'treatment' | 'control'): this;
2453
+ /**
2454
+ * Set explicit major version for ASR config selection.
2455
+ * Takes precedence over experimentCohort-based version resolution.
2456
+ */
2457
+ experimentMajorVersion(version: number): this;
2349
2458
  /**
2350
2459
  * Set transcript callback
2351
2460
  */
@@ -2358,6 +2467,11 @@ declare class ConfigBuilder {
2358
2467
  * Set session configured callback (optional)
2359
2468
  */
2360
2469
  onSessionConfigured(callback: (config: SessionConfiguredV1) => void): this;
2470
+ /**
2471
+ * Set streaming audio metrics callback (optional).
2472
+ * Only fires when `asrRequestConfig.audioMetricsIntervalMs > 0`.
2473
+ */
2474
+ onAudioMetrics(callback: (metrics: AudioMetricsResultV1) => void): this;
2361
2475
  /**
2362
2476
  * Set error callback
2363
2477
  */
@@ -2820,5 +2934,5 @@ declare function getRecognitionConductorHttpBase(stage?: Stage | string | null |
2820
2934
  declare function getRecognitionConductorWsBase(stage?: Stage | string | null | undefined): string;
2821
2935
  declare function getRecognitionConductorHost(stage?: Stage | string | null | undefined): string;
2822
2936
 
2823
- export { AudioEncoding, CartesiaModel, ClientControlActionV1, ClientState, ConfigBuilder, ConnectionError, ControlSignalTypeV1 as ControlSignal, ControlSignalTypeV1, DashScopeModel, DeepgramModel, ElevenLabsModel, ErrorTypeV1, FinalTranscriptStability, FireworksModel, GeminiModel, GladiaModel, GoogleModel, Language, MistralVoxtralModel, OpenAIModel, OpenAIRealtimeModel, RECOGNITION_CONDUCTOR_BASES, RECOGNITION_SERVICE_BASES, RealTimeTwoWayWebSocketRecognitionClient, RecognitionContextTypeV1, RecognitionError, RecognitionProvider, RecognitionResultTypeV1, RecognitionVGFStateSchema, RecordingStatus, STAGES, SampleRate, SelfServeVllmModel, SimplifiedVGFRecognitionClient, TimeoutError, TranscriptionStatus, ValidationError, createClient, createClientWithBuilder, createDefaultASRConfig, createInitialRecognitionState, createSimplifiedVGFClient, getRecognitionConductorBase, getRecognitionConductorHost, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionServiceBase, getRecognitionServiceHost, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getUserFriendlyMessage, isExceptionImmediatelyAvailable, isNormalDisconnection, isValidRecordingStatusTransition, normalizeStage, resetRecognitionVGFState };
2824
- export type { ASRRequestConfig, ASRRequestV1, AuthenticationException, ConnectionException, ErrorResultV1, FunctionCallResultV1, GameContextV1, IRecognitionClient, IRecognitionClientConfig, IRecognitionClientStats, ISimplifiedVGFRecognitionClient, MetadataResultV1, ProviderException, QuotaExceededException, RealTimeTwoWayWebSocketRecognitionClientConfig, RecognitionCallbackUrl, RecognitionException, RecognitionState, RecordingStatusType, SimplifiedVGFClientConfig, SlotMap, Stage, TimeoutException, TranscriptionResult, TranscriptionResultV1, TranscriptionStatusType, UnknownException, ValidationException };
2937
+ export { AudioEncoding, AwsTranscribeModel, BedrockModel, CartesiaModel, ClientControlActionV1, ClientState, ConfigBuilder, ConnectionError, ControlSignalTypeV1 as ControlSignal, ControlSignalTypeV1, DashScopeModel, DeepgramModel, ElevenLabsModel, ErrorTypeV1, FinalTranscriptStability, FireworksModel, GeminiModel, GladiaModel, GoogleModel, Language, MistralVoxtralModel, OpenAIModel, OpenAIRealtimeModel, RECOGNITION_CONDUCTOR_BASES, RECOGNITION_SERVICE_BASES, RealTimeTwoWayWebSocketRecognitionClient, RecognitionContextTypeV1, RecognitionError, RecognitionProvider, RecognitionResultTypeV1, RecognitionVGFStateSchema, RecordingStatus, STAGES, SampleRate, SelfServeVllmModel, SimplifiedVGFRecognitionClient, TimeoutError, TranscriptionStatus, ValidationError, createClient, createClientWithBuilder, createDefaultASRConfig, createInitialRecognitionState, createSimplifiedVGFClient, getRecognitionConductorBase, getRecognitionConductorHost, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionServiceBase, getRecognitionServiceHost, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getUserFriendlyMessage, isExceptionImmediatelyAvailable, isNormalDisconnection, isValidRecordingStatusTransition, normalizeStage, resetRecognitionVGFState };
2938
+ export type { ASRRequestConfig, ASRRequestV1, AudioMetricsResultV1, AuthenticationException, ConnectionException, ErrorResultV1, FunctionCallResultV1, GameContextV1, IRecognitionClient, IRecognitionClientConfig, IRecognitionClientStats, ISimplifiedVGFRecognitionClient, MetadataResultV1, ProviderException, QuotaExceededException, RealTimeTwoWayWebSocketRecognitionClientConfig, RecognitionCallbackUrl, RecognitionException, RecognitionState, RecordingStatusType, SimplifiedVGFClientConfig, SlotMap, Stage, TimeoutException, TranscriptionResult, TranscriptionResultV1, TranscriptionStatusType, UnknownException, ValidationException };
package/dist/index.d.ts CHANGED
@@ -11,6 +11,6 @@ export { type RecognitionState, RecognitionVGFStateSchema, RecordingStatus, Tran
11
11
  export { resetRecognitionVGFState } from './vgf-recognition-mapper.js';
12
12
  export { AudioEncoding } from '@recog/websocket';
13
13
  export { type GameContextV1, type SlotMap, RecognitionContextTypeV1, ControlSignalTypeV1, ControlSignalTypeV1 as ControlSignal, // Alias for backward compatibility
14
- type TranscriptionResultV1, type FunctionCallResultV1, type MetadataResultV1, type ErrorResultV1, RecognitionResultTypeV1, ClientControlActionV1, type ASRRequestConfig, type ASRRequestV1, FinalTranscriptStability, createDefaultASRConfig, RecognitionProvider, DeepgramModel, ElevenLabsModel, FireworksModel, GladiaModel, GoogleModel, GeminiModel, OpenAIModel, SelfServeVllmModel, OpenAIRealtimeModel, MistralVoxtralModel, CartesiaModel, DashScopeModel, Language, SampleRate, STAGES, type Stage } from '@recog/shared-types';
14
+ type TranscriptionResultV1, type FunctionCallResultV1, type MetadataResultV1, type AudioMetricsResultV1, type ErrorResultV1, RecognitionResultTypeV1, ClientControlActionV1, type ASRRequestConfig, type ASRRequestV1, FinalTranscriptStability, createDefaultASRConfig, RecognitionProvider, DeepgramModel, ElevenLabsModel, FireworksModel, GladiaModel, GoogleModel, GeminiModel, OpenAIModel, SelfServeVllmModel, OpenAIRealtimeModel, MistralVoxtralModel, CartesiaModel, DashScopeModel, BedrockModel, AwsTranscribeModel, Language, SampleRate, STAGES, type Stage } from '@recog/shared-types';
15
15
  export { getRecognitionServiceBase, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getRecognitionServiceHost, getRecognitionConductorBase, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionConductorHost, normalizeStage, RECOGNITION_SERVICE_BASES, RECOGNITION_CONDUCTOR_BASES } from '@recog/shared-config';
16
16
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAGvE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,eAAe,EACf,cAAc,EACd,WAAW,EACX,WAAW,EACX,WAAW,EACX,WAAW,EACX,kBAAkB,EAClB,mBAAmB,EACnB,mBAAmB,EACnB,aAAa,EACb,cAAc,EACd,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAGvE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,oBAAoB,EACzB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,eAAe,EACf,cAAc,EACd,WAAW,EACX,WAAW,EACX,WAAW,EACX,WAAW,EACX,kBAAkB,EAClB,mBAAmB,EACnB,mBAAmB,EACnB,aAAa,EACb,cAAc,EACd,YAAY,EACZ,kBAAkB,EAClB,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}
package/dist/index.js CHANGED
@@ -3746,6 +3746,9 @@ var RecognitionProvider;
3746
3746
  RecognitionProvider2["MISTRAL_VOXTRAL"] = "mistral-voxtral";
3747
3747
  RecognitionProvider2["CARTESIA"] = "cartesia";
3748
3748
  RecognitionProvider2["DASHSCOPE"] = "dashscope";
3749
+ RecognitionProvider2["BEDROCK"] = "bedrock";
3750
+ RecognitionProvider2["INWORLD_STT"] = "inworld-stt";
3751
+ RecognitionProvider2["AWS_TRANSCRIBE"] = "aws-transcribe";
3749
3752
  RecognitionProvider2["TEST_ASR_PROVIDER_QUOTA"] = "test-asr-provider-quota";
3750
3753
  RecognitionProvider2["TEST_ASR_STREAMING"] = "test-asr-streaming";
3751
3754
  })(RecognitionProvider || (RecognitionProvider = {}));
@@ -3813,6 +3816,19 @@ var DashScopeModel;
3813
3816
  DashScopeModel2["QWEN3_ASR_FLASH_REALTIME_2602"] = "qwen3-asr-flash-realtime-2026-02-10";
3814
3817
  DashScopeModel2["QWEN3_ASR_FLASH_REALTIME"] = "qwen3-asr-flash-realtime";
3815
3818
  })(DashScopeModel || (DashScopeModel = {}));
3819
+ var BedrockModel;
3820
+ (function(BedrockModel2) {
3821
+ BedrockModel2["VOXTRAL_MINI_3B_2507"] = "mistral.voxtral-mini-3b-2507";
3822
+ BedrockModel2["VOXTRAL_SMALL_24B_2507"] = "mistral.voxtral-small-24b-2507";
3823
+ })(BedrockModel || (BedrockModel = {}));
3824
+ var InworldSttModel;
3825
+ (function(InworldSttModel2) {
3826
+ InworldSttModel2["INWORLD_STT_1"] = "inworld/inworld-stt-1";
3827
+ })(InworldSttModel || (InworldSttModel = {}));
3828
+ var AwsTranscribeModel;
3829
+ (function(AwsTranscribeModel2) {
3830
+ AwsTranscribeModel2["DEFAULT"] = "default";
3831
+ })(AwsTranscribeModel || (AwsTranscribeModel = {}));
3816
3832
  var SelfServeVllmModel;
3817
3833
  (function(SelfServeVllmModel2) {
3818
3834
  SelfServeVllmModel2["QWEN3_ASR_1_7B"] = "qwen3-asr-1.7b";
@@ -3938,9 +3954,9 @@ var ErrorResultSchemaV1 = z.object({
3938
3954
  // Detailed description
3939
3955
  });
3940
3956
  var ClientControlActionV1;
3941
- (function(ClientControlActionV13) {
3942
- ClientControlActionV13["READY_FOR_UPLOADING_RECORDING"] = "ready_for_uploading_recording";
3943
- ClientControlActionV13["STOP_RECORDING"] = "stop_recording";
3957
+ (function(ClientControlActionV12) {
3958
+ ClientControlActionV12["READY_FOR_UPLOADING_RECORDING"] = "ready_for_uploading_recording";
3959
+ ClientControlActionV12["STOP_RECORDING"] = "stop_recording";
3944
3960
  })(ClientControlActionV1 || (ClientControlActionV1 = {}));
3945
3961
  var ClientControlActionsV1 = z.nativeEnum(ClientControlActionV1);
3946
3962
  var ClientControlMessageSchemaV1 = z.object({
@@ -3973,6 +3989,8 @@ var AudioMetricsResultSchemaV1 = z.object({
3973
3989
  maxVolume: z.number(),
3974
3990
  minVolume: z.number(),
3975
3991
  avgVolume: z.number(),
3992
+ peakVolumeDb: z.number().nullable(),
3993
+ avgVolumeDb: z.number().nullable(),
3976
3994
  silenceRatio: z.number(),
3977
3995
  clippingRatio: z.number(),
3978
3996
  snrEstimate: z.number().nullable(),
@@ -3989,7 +4007,8 @@ var RecognitionResultSchemaV1 = z.discriminatedUnion("type", [
3989
4007
  // P1 - P2
3990
4008
  FunctionCallResultSchemaV1,
3991
4009
  ClientControlMessageSchemaV1,
3992
- SessionConfiguredSchemaV1
4010
+ SessionConfiguredSchemaV1,
4011
+ AudioMetricsResultSchemaV1
3993
4012
  ]);
3994
4013
 
3995
4014
  // ../../libs/types/dist/provider-transcription.types.js
@@ -4435,6 +4454,9 @@ var ASRRequestSchemaV1 = z.object({
4435
4454
  prefixMode: z.nativeEnum(PrefixMode).optional().default(PrefixMode.NONE),
4436
4455
  prefixId: z.string().optional(),
4437
4456
  prefixTextToRemove: z.array(z.string()).optional(),
4457
+ // Streaming audio metrics opt-in: when > 0, server emits AudioMetrics results throttled to this interval (ms).
4458
+ // Undefined / 0 disables streaming audio metrics (final metrics still embedded in Metadata).
4459
+ audioMetricsIntervalMs: z.number().optional(),
4438
4460
  // Debug options (FOR DEBUG/TESTING ONLY - not for production use)
4439
4461
  debugCommand: RequestDebugCommandSchema
4440
4462
  });
@@ -4456,8 +4478,9 @@ var RecognitionGameInfoSchema = z.object({
4456
4478
  /** @deprecated Use questionAskedId instead. Kept for backward compatibility during migration. */
4457
4479
  questionAnswerId: z.string().optional(),
4458
4480
  platform: z.string().optional(),
4459
- experimentCohort: z.enum(["treatment", "control"]).optional()
4460
- // Experiment cohort, defaults to 'control' if not provided
4481
+ experimentCohort: z.enum(["treatment", "control"]).optional(),
4482
+ experimentMajorVersion: z.number().int().optional()
4483
+ // Explicit major version for ASR config selection (e.g. 1, 3)
4461
4484
  });
4462
4485
  var RecognitionQueryMetadataSchema = z.object({
4463
4486
  audioUtteranceId: z.string(),
@@ -5132,6 +5155,9 @@ function buildWebSocketUrl(config) {
5132
5155
  if (config.experimentCohort) {
5133
5156
  url.searchParams.set("experimentCohort", config.experimentCohort);
5134
5157
  }
5158
+ if (config.experimentMajorVersion !== void 0) {
5159
+ url.searchParams.set("experimentMajorVersion", String(config.experimentMajorVersion));
5160
+ }
5135
5161
  return url.toString();
5136
5162
  }
5137
5163
 
@@ -5279,6 +5305,7 @@ var MessageHandler = class {
5279
5305
  /**
5280
5306
  * Handle incoming WebSocket message
5281
5307
  */
5308
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
5282
5309
  handleMessage(msg) {
5283
5310
  if (this.callbacks.logger) {
5284
5311
  this.callbacks.logger("debug", "[RecogSDK] Received WebSocket message", {
@@ -5317,6 +5344,9 @@ var MessageHandler = class {
5317
5344
  case RecognitionResultTypeV1.SESSION_CONFIGURED:
5318
5345
  this.callbacks.onSessionConfigured?.(msgData);
5319
5346
  break;
5347
+ case RecognitionResultTypeV1.AUDIO_METRICS:
5348
+ this.callbacks.onAudioMetrics?.(msgData);
5349
+ break;
5320
5350
  default:
5321
5351
  if (this.callbacks.logger) {
5322
5352
  this.callbacks.logger("debug", "[RecogSDK] Unknown message type", { type: msgType });
@@ -5429,7 +5459,8 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5429
5459
  ...config.platform && { platform: config.platform },
5430
5460
  ...config.gameContext && { gameContext: config.gameContext },
5431
5461
  ...config.gameId && { gameId: config.gameId },
5432
- ...config.experimentCohort && { experimentCohort: config.experimentCohort }
5462
+ ...config.experimentCohort && { experimentCohort: config.experimentCohort },
5463
+ ...config.experimentMajorVersion !== void 0 && { experimentMajorVersion: config.experimentMajorVersion }
5433
5464
  });
5434
5465
  super({
5435
5466
  url,
@@ -5463,6 +5494,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5463
5494
  onMetadata: config.onMetadata || (() => {
5464
5495
  }),
5465
5496
  onSessionConfigured: config.onSessionConfigured,
5497
+ onAudioMetrics: config.onAudioMetrics,
5466
5498
  onError: config.onError || (() => {
5467
5499
  }),
5468
5500
  onConnected: config.onConnected || (() => {
@@ -5491,6 +5523,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5491
5523
  onError: this.config.onError,
5492
5524
  onControlMessage: this.handleControlMessage.bind(this),
5493
5525
  onSessionConfigured: this.config.onSessionConfigured,
5526
+ onAudioMetrics: this.config.onAudioMetrics,
5494
5527
  ...this.config.logger && { logger: this.config.logger }
5495
5528
  });
5496
5529
  }
@@ -5816,6 +5849,10 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5816
5849
  // Include prefix text to remove if provided (for server-side prefix text removal)
5817
5850
  ...this.config.asrRequestConfig.prefixTextToRemove && {
5818
5851
  prefixTextToRemove: this.config.asrRequestConfig.prefixTextToRemove
5852
+ },
5853
+ // Streaming audio metrics opt-in (ms interval). Server only forwards metrics if > 0.
5854
+ ...this.config.asrRequestConfig.audioMetricsIntervalMs !== void 0 && {
5855
+ audioMetricsIntervalMs: this.config.asrRequestConfig.audioMetricsIntervalMs
5819
5856
  }
5820
5857
  };
5821
5858
  super.sendMessage(
@@ -6141,6 +6178,14 @@ var ConfigBuilder = class {
6141
6178
  this.config.experimentCohort = cohort;
6142
6179
  return this;
6143
6180
  }
6181
+ /**
6182
+ * Set explicit major version for ASR config selection.
6183
+ * Takes precedence over experimentCohort-based version resolution.
6184
+ */
6185
+ experimentMajorVersion(version) {
6186
+ this.config.experimentMajorVersion = version;
6187
+ return this;
6188
+ }
6144
6189
  /**
6145
6190
  * Set transcript callback
6146
6191
  */
@@ -6162,6 +6207,14 @@ var ConfigBuilder = class {
6162
6207
  this.config.onSessionConfigured = callback;
6163
6208
  return this;
6164
6209
  }
6210
+ /**
6211
+ * Set streaming audio metrics callback (optional).
6212
+ * Only fires when `asrRequestConfig.audioMetricsIntervalMs > 0`.
6213
+ */
6214
+ onAudioMetrics(callback) {
6215
+ this.config.onAudioMetrics = callback;
6216
+ return this;
6217
+ }
6165
6218
  /**
6166
6219
  * Set error callback
6167
6220
  */
@@ -6214,6 +6267,7 @@ var ConfigBuilder = class {
6214
6267
  /**
6215
6268
  * Set logger function
6216
6269
  */
6270
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
6217
6271
  logger(logger) {
6218
6272
  this.config.logger = logger;
6219
6273
  return this;
@@ -6651,6 +6705,8 @@ function createSimplifiedVGFClient(config) {
6651
6705
  }
6652
6706
  export {
6653
6707
  AudioEncoding,
6708
+ AwsTranscribeModel,
6709
+ BedrockModel,
6654
6710
  CartesiaModel,
6655
6711
  ClientControlActionV1,
6656
6712
  ClientState,