@volley/recognition-client-sdk 0.1.689 → 0.1.767
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.bundled.d.ts +102 -1
- package/dist/config-builder.d.ts +11 -1
- package/dist/config-builder.d.ts.map +1 -1
- package/dist/index.bundled.d.ts +117 -3
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +63 -7
- package/dist/index.js.map +3 -3
- package/dist/recog-client-sdk.browser.js +44 -7
- package/dist/recog-client-sdk.browser.js.map +3 -3
- package/dist/recognition-client.d.ts.map +1 -1
- package/dist/recognition-client.types.d.ts +9 -1
- package/dist/recognition-client.types.d.ts.map +1 -1
- package/dist/utils/message-handler.d.ts +2 -1
- package/dist/utils/message-handler.d.ts.map +1 -1
- package/dist/utils/url-builder.d.ts +2 -0
- package/dist/utils/url-builder.d.ts.map +1 -1
- package/package.json +3 -3
- package/src/config-builder.ts +20 -0
- package/src/index.ts +3 -0
- package/src/recognition-client.ts +11 -1
- package/src/recognition-client.types.ts +12 -0
- package/src/utils/message-handler.ts +14 -3
- package/src/utils/url-builder.spec.ts +43 -0
- package/src/utils/url-builder.ts +7 -0
|
@@ -21,6 +21,9 @@ declare enum RecognitionProvider {
|
|
|
21
21
|
MISTRAL_VOXTRAL = "mistral-voxtral",
|
|
22
22
|
CARTESIA = "cartesia",
|
|
23
23
|
DASHSCOPE = "dashscope",
|
|
24
|
+
BEDROCK = "bedrock",
|
|
25
|
+
INWORLD_STT = "inworld-stt",
|
|
26
|
+
AWS_TRANSCRIBE = "aws-transcribe",
|
|
24
27
|
TEST_ASR_PROVIDER_QUOTA = "test-asr-provider-quota",
|
|
25
28
|
TEST_ASR_STREAMING = "test-asr-streaming"
|
|
26
29
|
}
|
|
@@ -119,6 +122,30 @@ declare enum DashScopeModel {
|
|
|
119
122
|
QWEN3_ASR_FLASH_REALTIME_2602 = "qwen3-asr-flash-realtime-2026-02-10",
|
|
120
123
|
QWEN3_ASR_FLASH_REALTIME = "qwen3-asr-flash-realtime"
|
|
121
124
|
}
|
|
125
|
+
/**
|
|
126
|
+
* AWS Bedrock batch transcription models
|
|
127
|
+
* Accessed via AWS Bedrock InvokeModelWithResponseStream
|
|
128
|
+
* @see https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-mistral.html
|
|
129
|
+
*/
|
|
130
|
+
declare enum BedrockModel {
|
|
131
|
+
VOXTRAL_MINI_3B_2507 = "mistral.voxtral-mini-3b-2507",
|
|
132
|
+
VOXTRAL_SMALL_24B_2507 = "mistral.voxtral-small-24b-2507"
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Inworld AI STT models
|
|
136
|
+
* @see https://docs.inworld.ai/stt/overview
|
|
137
|
+
*/
|
|
138
|
+
declare enum InworldSttModel {
|
|
139
|
+
INWORLD_STT_1 = "inworld/inworld-stt-1"
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* AWS Transcribe streaming model
|
|
143
|
+
* AWS Transcribe uses a single default streaming model
|
|
144
|
+
* @see https://docs.aws.amazon.com/transcribe/latest/dg/streaming.html
|
|
145
|
+
*/
|
|
146
|
+
declare enum AwsTranscribeModel {
|
|
147
|
+
DEFAULT = "default"
|
|
148
|
+
}
|
|
122
149
|
/**
|
|
123
150
|
* Self-serve vLLM batch transcription models
|
|
124
151
|
* Backed by recognition-inference / RunPod `/transcribe`
|
|
@@ -129,7 +156,7 @@ declare enum SelfServeVllmModel {
|
|
|
129
156
|
/**
|
|
130
157
|
* Type alias for any model from any provider
|
|
131
158
|
*/
|
|
132
|
-
type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | GladiaModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | CartesiaModel | DashScopeModel | SelfServeVllmModel | string;
|
|
159
|
+
type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | GladiaModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | CartesiaModel | DashScopeModel | InworldSttModel | SelfServeVllmModel | BedrockModel | AwsTranscribeModel | string;
|
|
133
160
|
|
|
134
161
|
/**
|
|
135
162
|
* Audio encoding types
|
|
@@ -584,6 +611,61 @@ declare const SessionConfiguredSchemaV1: z.ZodObject<{
|
|
|
584
611
|
providerConfig?: string | undefined;
|
|
585
612
|
}>;
|
|
586
613
|
type SessionConfiguredV1 = z.infer<typeof SessionConfiguredSchemaV1>;
|
|
614
|
+
/**
|
|
615
|
+
* Audio metrics result V1 - contains audio quality metrics
|
|
616
|
+
* Extracted from raw PCM audio without AI/ML, pure signal analysis
|
|
617
|
+
* Used to detect mic issues (muted, low gain, clipping) and audio quality
|
|
618
|
+
*/
|
|
619
|
+
declare const AudioMetricsResultSchemaV1: z.ZodObject<{
|
|
620
|
+
type: z.ZodLiteral<RecognitionResultTypeV1.AUDIO_METRICS>;
|
|
621
|
+
valid: z.ZodBoolean;
|
|
622
|
+
audioBeginMs: z.ZodNumber;
|
|
623
|
+
audioEndMs: z.ZodNumber;
|
|
624
|
+
maxVolume: z.ZodNumber;
|
|
625
|
+
minVolume: z.ZodNumber;
|
|
626
|
+
avgVolume: z.ZodNumber;
|
|
627
|
+
peakVolumeDb: z.ZodNullable<z.ZodNumber>;
|
|
628
|
+
avgVolumeDb: z.ZodNullable<z.ZodNumber>;
|
|
629
|
+
silenceRatio: z.ZodNumber;
|
|
630
|
+
clippingRatio: z.ZodNumber;
|
|
631
|
+
snrEstimate: z.ZodNullable<z.ZodNumber>;
|
|
632
|
+
lastNonSilenceMs: z.ZodNumber;
|
|
633
|
+
timestamp: z.ZodString;
|
|
634
|
+
isFinal: z.ZodOptional<z.ZodBoolean>;
|
|
635
|
+
}, "strip", z.ZodTypeAny, {
|
|
636
|
+
valid: boolean;
|
|
637
|
+
type: RecognitionResultTypeV1.AUDIO_METRICS;
|
|
638
|
+
audioBeginMs: number;
|
|
639
|
+
audioEndMs: number;
|
|
640
|
+
maxVolume: number;
|
|
641
|
+
minVolume: number;
|
|
642
|
+
avgVolume: number;
|
|
643
|
+
silenceRatio: number;
|
|
644
|
+
clippingRatio: number;
|
|
645
|
+
snrEstimate: number | null;
|
|
646
|
+
lastNonSilenceMs: number;
|
|
647
|
+
timestamp: string;
|
|
648
|
+
peakVolumeDb: number | null;
|
|
649
|
+
avgVolumeDb: number | null;
|
|
650
|
+
isFinal?: boolean | undefined;
|
|
651
|
+
}, {
|
|
652
|
+
valid: boolean;
|
|
653
|
+
type: RecognitionResultTypeV1.AUDIO_METRICS;
|
|
654
|
+
audioBeginMs: number;
|
|
655
|
+
audioEndMs: number;
|
|
656
|
+
maxVolume: number;
|
|
657
|
+
minVolume: number;
|
|
658
|
+
avgVolume: number;
|
|
659
|
+
silenceRatio: number;
|
|
660
|
+
clippingRatio: number;
|
|
661
|
+
snrEstimate: number | null;
|
|
662
|
+
lastNonSilenceMs: number;
|
|
663
|
+
timestamp: string;
|
|
664
|
+
peakVolumeDb: number | null;
|
|
665
|
+
avgVolumeDb: number | null;
|
|
666
|
+
isFinal?: boolean | undefined;
|
|
667
|
+
}>;
|
|
668
|
+
type AudioMetricsResultV1 = z.infer<typeof AudioMetricsResultSchemaV1>;
|
|
587
669
|
|
|
588
670
|
/**
|
|
589
671
|
* Recognition Context Types V1
|
|
@@ -856,6 +938,17 @@ interface ASRRequestConfig {
|
|
|
856
938
|
* ```
|
|
857
939
|
*/
|
|
858
940
|
providerOptions?: Record<string, any>;
|
|
941
|
+
/**
|
|
942
|
+
* Streaming audio metrics opt-in interval (ms).
|
|
943
|
+
*
|
|
944
|
+
* When set to a positive number, server forwards AudioMetrics results to the
|
|
945
|
+
* client over the WebSocket, throttled so at most one result is sent per
|
|
946
|
+
* `audioMetricsIntervalMs`. Undefined / 0 disables streaming audio metrics
|
|
947
|
+
* (final metrics still embedded in the Metadata result).
|
|
948
|
+
*
|
|
949
|
+
* @example 500
|
|
950
|
+
*/
|
|
951
|
+
audioMetricsIntervalMs?: number;
|
|
859
952
|
/**
|
|
860
953
|
* Optional fallback ASR configurations
|
|
861
954
|
*
|
|
@@ -1145,6 +1238,8 @@ interface IRecognitionClientConfig {
|
|
|
1145
1238
|
platform?: string;
|
|
1146
1239
|
/** Experiment cohort (optional). Defaults to 'control' if not provided. */
|
|
1147
1240
|
experimentCohort?: 'treatment' | 'control';
|
|
1241
|
+
/** Explicit major version for ASR config selection (e.g. 1, 3). Takes precedence over experimentCohort. */
|
|
1242
|
+
experimentMajorVersion?: number;
|
|
1148
1243
|
/** Callback when transcript is received */
|
|
1149
1244
|
onTranscript?: (result: TranscriptionResultV1) => void;
|
|
1150
1245
|
/**
|
|
@@ -1154,6 +1249,12 @@ interface IRecognitionClientConfig {
|
|
|
1154
1249
|
onFunctionCall?: (result: FunctionCallResultV1) => void;
|
|
1155
1250
|
/** Callback when metadata is received. Only once after transcription is complete.*/
|
|
1156
1251
|
onMetadata?: (metadata: MetadataResultV1) => void;
|
|
1252
|
+
/**
|
|
1253
|
+
* Callback when streaming audio metrics arrive (volume, silence ratio, clipping, SNR, etc.).
|
|
1254
|
+
* Only fires when `asrRequestConfig.audioMetricsIntervalMs > 0`.
|
|
1255
|
+
* Final metrics still arrive embedded in `onMetadata.audioMetrics`.
|
|
1256
|
+
*/
|
|
1257
|
+
onAudioMetrics?: (metrics: AudioMetricsResultV1) => void;
|
|
1157
1258
|
/** Callback when session is configured with actual ASR provider/model (optional) */
|
|
1158
1259
|
onSessionConfigured?: (config: SessionConfiguredV1) => void;
|
|
1159
1260
|
/** Callback when error occurs */
|
package/dist/config-builder.d.ts
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Simple builder pattern for RealTimeTwoWayWebSocketRecognitionClientConfig
|
|
5
5
|
*/
|
|
6
6
|
import type { RealTimeTwoWayWebSocketRecognitionClientConfig, RecognitionCallbackUrl } from './recognition-client.types.js';
|
|
7
|
-
import type { ASRRequestConfig, GameContextV1, TranscriptionResultV1, MetadataResultV1, SessionConfiguredV1, ErrorResultV1, Stage } from '@recog/shared-types';
|
|
7
|
+
import type { ASRRequestConfig, GameContextV1, TranscriptionResultV1, MetadataResultV1, SessionConfiguredV1, AudioMetricsResultV1, ErrorResultV1, Stage } from '@recog/shared-types';
|
|
8
8
|
/**
|
|
9
9
|
* Builder for RealTimeTwoWayWebSocketRecognitionClientConfig
|
|
10
10
|
*
|
|
@@ -90,6 +90,11 @@ export declare class ConfigBuilder {
|
|
|
90
90
|
* Set experiment cohort (optional, defaults to 'control')
|
|
91
91
|
*/
|
|
92
92
|
experimentCohort(cohort: 'treatment' | 'control'): this;
|
|
93
|
+
/**
|
|
94
|
+
* Set explicit major version for ASR config selection.
|
|
95
|
+
* Takes precedence over experimentCohort-based version resolution.
|
|
96
|
+
*/
|
|
97
|
+
experimentMajorVersion(version: number): this;
|
|
93
98
|
/**
|
|
94
99
|
* Set transcript callback
|
|
95
100
|
*/
|
|
@@ -102,6 +107,11 @@ export declare class ConfigBuilder {
|
|
|
102
107
|
* Set session configured callback (optional)
|
|
103
108
|
*/
|
|
104
109
|
onSessionConfigured(callback: (config: SessionConfiguredV1) => void): this;
|
|
110
|
+
/**
|
|
111
|
+
* Set streaming audio metrics callback (optional).
|
|
112
|
+
* Only fires when `asrRequestConfig.audioMetricsIntervalMs > 0`.
|
|
113
|
+
*/
|
|
114
|
+
onAudioMetrics(callback: (metrics: AudioMetricsResultV1) => void): this;
|
|
105
115
|
/**
|
|
106
116
|
* Set error callback
|
|
107
117
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config-builder.d.ts","sourceRoot":"","sources":["../src/config-builder.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EACV,8CAA8C,EAC9C,sBAAsB,EACvB,MAAM,+BAA+B,CAAC;AACvC,OAAO,KAAK,EACV,gBAAgB,EAChB,aAAa,EACb,qBAAqB,EACrB,gBAAgB,EAChB,mBAAmB,EACnB,aAAa,EACb,KAAK,EACN,MAAM,qBAAqB,CAAC;AAE7B;;;;;;;;;;;;;;;;;;GAkBG;AACH,qBAAa,aAAa;IACxB,OAAO,CAAC,MAAM,CAA+D;IAE7E;;;OAGG;IACH,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;IAKtB;;;;;;;;OAQG;IACH,KAAK,CAAC,KAAK,EAAE,KAAK,GAAG,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,gBAAgB,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI;IAKhD;;OAEG;IACH,WAAW,CAAC,OAAO,EAAE,aAAa,GAAG,IAAI;IAKzC;;;OAGG;IACH,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKxB;;OAEG;IACH,gBAAgB,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,YAAY,CAAC,IAAI,EAAE,sBAAsB,EAAE,GAAG,IAAI;IAKlD;;OAEG;IACH,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKxB;;OAEG;IACH,aAAa,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAK/B;;OAEG;IACH,QAAQ,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAK1B;;OAEG;IACH,SAAS,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAK3B;;OAEG;IACH,gBAAgB,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI;IAKhC;;OAEG;IACH,gBAAgB,CAAC,MAAM,EAAE,WAAW,GAAG,SAAS,GAAG,IAAI;IAKvD;;OAEG;IACH,YAAY,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,qBAAqB,KAAK,IAAI,GAAG,IAAI;IAKrE;;OAEG;IACH,UAAU,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE,gBAAgB,KAAK,IAAI,GAAG,IAAI;IAKhE;;OAEG;IACH,mBAAmB,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,mBAAmB,KAAK,IAAI,GAAG,IAAI;IAK1E;;OAEG;IACH,OAAO,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,aAAa,KAAK,IAAI,GAAG,IAAI;IAKvD;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,MAAM,IAAI,GAAG,IAAI;IAKvC;;OAEG;IACH,cAAc,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,KAAK,IAAI,GAAG,IAAI;IAKtE;;OAEG;IACH,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAKjC;;OAEG;IACH,oBAAoB,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI;IAK3C;;OAEG;IACH,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI;IAKrC;;OAEG;
|
|
1
|
+
{"version":3,"file":"config-builder.d.ts","sourceRoot":"","sources":["../src/config-builder.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EACV,8CAA8C,EAC9C,sBAAsB,EACvB,MAAM,+BAA+B,CAAC;AACvC,OAAO,KAAK,EACV,gBAAgB,EAChB,aAAa,EACb,qBAAqB,EACrB,gBAAgB,EAChB,mBAAmB,EACnB,oBAAoB,EACpB,aAAa,EACb,KAAK,EACN,MAAM,qBAAqB,CAAC;AAE7B;;;;;;;;;;;;;;;;;;GAkBG;AACH,qBAAa,aAAa;IACxB,OAAO,CAAC,MAAM,CAA+D;IAE7E;;;OAGG;IACH,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;IAKtB;;;;;;;;OAQG;IACH,KAAK,CAAC,KAAK,EAAE,KAAK,GAAG,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,gBAAgB,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI;IAKhD;;OAEG;IACH,WAAW,CAAC,OAAO,EAAE,aAAa,GAAG,IAAI;IAKzC;;;OAGG;IACH,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKxB;;OAEG;IACH,gBAAgB,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,YAAY,CAAC,IAAI,EAAE,sBAAsB,EAAE,GAAG,IAAI;IAKlD;;OAEG;IACH,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKxB;;OAEG;IACH,aAAa,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAK/B;;OAEG;IACH,QAAQ,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAK1B;;OAEG;IACH,SAAS,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAK3B;;OAEG;IACH,gBAAgB,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI;IAKhC;;OAEG;IACH,gBAAgB,CAAC,MAAM,EAAE,WAAW,GAAG,SAAS,GAAG,IAAI;IAKvD;;;OAGG;IACH,sBAAsB,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI;IAK7C;;OAEG;IACH,YAAY,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,qBAAqB,KAAK,IAAI,GAAG,IAAI;IAKrE;;OAEG;IACH,UAAU,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE,gBAAgB,KAAK,IAAI,GAAG,IAAI;IAKhE;;OAEG;IACH,mBAAmB,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,mBAAmB,KAAK,IAAI,GAAG,IAAI;IAK1E;;;OAGG;IACH,cAAc,CAAC,QAAQ,EAAE,CAAC,OAAO,EAAE,oBAAoB,KAAK,IAAI,GAAG,IAAI;IAKvE;;OAEG;IACH,OAAO,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,aAAa,KAAK,IAAI,GAAG,IAAI;IAKvD;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,MAAM,IAAI,GAAG,IAAI;IAKvC;;OAEG;IACH,cAAc,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,KAAK,IAAI,GAAG,IAAI;IAKtE;;OAEG;IACH,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAKjC;;OAEG;IACH,oBAAoB,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI;IAK3C;;OAEG;IACH,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI;IAKrC;;OAEG;IAEH,MAAM,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,GAAG,KAAK,IAAI,GAAG,IAAI;IAKvG;;OAEG;IACH,KAAK,IAAI,8CAA8C;CAGxD"}
|
package/dist/index.bundled.d.ts
CHANGED
|
@@ -21,6 +21,9 @@ declare enum RecognitionProvider {
|
|
|
21
21
|
MISTRAL_VOXTRAL = "mistral-voxtral",
|
|
22
22
|
CARTESIA = "cartesia",
|
|
23
23
|
DASHSCOPE = "dashscope",
|
|
24
|
+
BEDROCK = "bedrock",
|
|
25
|
+
INWORLD_STT = "inworld-stt",
|
|
26
|
+
AWS_TRANSCRIBE = "aws-transcribe",
|
|
24
27
|
TEST_ASR_PROVIDER_QUOTA = "test-asr-provider-quota",
|
|
25
28
|
TEST_ASR_STREAMING = "test-asr-streaming"
|
|
26
29
|
}
|
|
@@ -119,6 +122,30 @@ declare enum DashScopeModel {
|
|
|
119
122
|
QWEN3_ASR_FLASH_REALTIME_2602 = "qwen3-asr-flash-realtime-2026-02-10",
|
|
120
123
|
QWEN3_ASR_FLASH_REALTIME = "qwen3-asr-flash-realtime"
|
|
121
124
|
}
|
|
125
|
+
/**
|
|
126
|
+
* AWS Bedrock batch transcription models
|
|
127
|
+
* Accessed via AWS Bedrock InvokeModelWithResponseStream
|
|
128
|
+
* @see https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-mistral.html
|
|
129
|
+
*/
|
|
130
|
+
declare enum BedrockModel {
|
|
131
|
+
VOXTRAL_MINI_3B_2507 = "mistral.voxtral-mini-3b-2507",
|
|
132
|
+
VOXTRAL_SMALL_24B_2507 = "mistral.voxtral-small-24b-2507"
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Inworld AI STT models
|
|
136
|
+
* @see https://docs.inworld.ai/stt/overview
|
|
137
|
+
*/
|
|
138
|
+
declare enum InworldSttModel {
|
|
139
|
+
INWORLD_STT_1 = "inworld/inworld-stt-1"
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* AWS Transcribe streaming model
|
|
143
|
+
* AWS Transcribe uses a single default streaming model
|
|
144
|
+
* @see https://docs.aws.amazon.com/transcribe/latest/dg/streaming.html
|
|
145
|
+
*/
|
|
146
|
+
declare enum AwsTranscribeModel {
|
|
147
|
+
DEFAULT = "default"
|
|
148
|
+
}
|
|
122
149
|
/**
|
|
123
150
|
* Self-serve vLLM batch transcription models
|
|
124
151
|
* Backed by recognition-inference / RunPod `/transcribe`
|
|
@@ -129,7 +156,7 @@ declare enum SelfServeVllmModel {
|
|
|
129
156
|
/**
|
|
130
157
|
* Type alias for any model from any provider
|
|
131
158
|
*/
|
|
132
|
-
type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | GladiaModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | CartesiaModel | DashScopeModel | SelfServeVllmModel | string;
|
|
159
|
+
type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | GladiaModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | CartesiaModel | DashScopeModel | InworldSttModel | SelfServeVllmModel | BedrockModel | AwsTranscribeModel | string;
|
|
133
160
|
|
|
134
161
|
/**
|
|
135
162
|
* Audio encoding types
|
|
@@ -593,6 +620,61 @@ declare const SessionConfiguredSchemaV1: z.ZodObject<{
|
|
|
593
620
|
providerConfig?: string | undefined;
|
|
594
621
|
}>;
|
|
595
622
|
type SessionConfiguredV1 = z.infer<typeof SessionConfiguredSchemaV1>;
|
|
623
|
+
/**
|
|
624
|
+
* Audio metrics result V1 - contains audio quality metrics
|
|
625
|
+
* Extracted from raw PCM audio without AI/ML, pure signal analysis
|
|
626
|
+
* Used to detect mic issues (muted, low gain, clipping) and audio quality
|
|
627
|
+
*/
|
|
628
|
+
declare const AudioMetricsResultSchemaV1: z.ZodObject<{
|
|
629
|
+
type: z.ZodLiteral<RecognitionResultTypeV1.AUDIO_METRICS>;
|
|
630
|
+
valid: z.ZodBoolean;
|
|
631
|
+
audioBeginMs: z.ZodNumber;
|
|
632
|
+
audioEndMs: z.ZodNumber;
|
|
633
|
+
maxVolume: z.ZodNumber;
|
|
634
|
+
minVolume: z.ZodNumber;
|
|
635
|
+
avgVolume: z.ZodNumber;
|
|
636
|
+
peakVolumeDb: z.ZodNullable<z.ZodNumber>;
|
|
637
|
+
avgVolumeDb: z.ZodNullable<z.ZodNumber>;
|
|
638
|
+
silenceRatio: z.ZodNumber;
|
|
639
|
+
clippingRatio: z.ZodNumber;
|
|
640
|
+
snrEstimate: z.ZodNullable<z.ZodNumber>;
|
|
641
|
+
lastNonSilenceMs: z.ZodNumber;
|
|
642
|
+
timestamp: z.ZodString;
|
|
643
|
+
isFinal: z.ZodOptional<z.ZodBoolean>;
|
|
644
|
+
}, "strip", z.ZodTypeAny, {
|
|
645
|
+
valid: boolean;
|
|
646
|
+
type: RecognitionResultTypeV1.AUDIO_METRICS;
|
|
647
|
+
audioBeginMs: number;
|
|
648
|
+
audioEndMs: number;
|
|
649
|
+
maxVolume: number;
|
|
650
|
+
minVolume: number;
|
|
651
|
+
avgVolume: number;
|
|
652
|
+
silenceRatio: number;
|
|
653
|
+
clippingRatio: number;
|
|
654
|
+
snrEstimate: number | null;
|
|
655
|
+
lastNonSilenceMs: number;
|
|
656
|
+
timestamp: string;
|
|
657
|
+
peakVolumeDb: number | null;
|
|
658
|
+
avgVolumeDb: number | null;
|
|
659
|
+
isFinal?: boolean | undefined;
|
|
660
|
+
}, {
|
|
661
|
+
valid: boolean;
|
|
662
|
+
type: RecognitionResultTypeV1.AUDIO_METRICS;
|
|
663
|
+
audioBeginMs: number;
|
|
664
|
+
audioEndMs: number;
|
|
665
|
+
maxVolume: number;
|
|
666
|
+
minVolume: number;
|
|
667
|
+
avgVolume: number;
|
|
668
|
+
silenceRatio: number;
|
|
669
|
+
clippingRatio: number;
|
|
670
|
+
snrEstimate: number | null;
|
|
671
|
+
lastNonSilenceMs: number;
|
|
672
|
+
timestamp: string;
|
|
673
|
+
peakVolumeDb: number | null;
|
|
674
|
+
avgVolumeDb: number | null;
|
|
675
|
+
isFinal?: boolean | undefined;
|
|
676
|
+
}>;
|
|
677
|
+
type AudioMetricsResultV1 = z.infer<typeof AudioMetricsResultSchemaV1>;
|
|
596
678
|
|
|
597
679
|
/**
|
|
598
680
|
* Error Exception Types
|
|
@@ -1288,6 +1370,7 @@ declare const ASRRequestSchemaV1: z.ZodObject<{
|
|
|
1288
1370
|
prefixMode: z.ZodDefault<z.ZodOptional<z.ZodNativeEnum<typeof PrefixMode>>>;
|
|
1289
1371
|
prefixId: z.ZodOptional<z.ZodString>;
|
|
1290
1372
|
prefixTextToRemove: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
|
|
1373
|
+
audioMetricsIntervalMs: z.ZodOptional<z.ZodNumber>;
|
|
1291
1374
|
debugCommand: z.ZodOptional<z.ZodObject<{
|
|
1292
1375
|
enableDebugLog: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
1293
1376
|
enableAudioStorage: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
@@ -1329,6 +1412,7 @@ declare const ASRRequestSchemaV1: z.ZodObject<{
|
|
|
1329
1412
|
}[] | undefined;
|
|
1330
1413
|
prefixId?: string | undefined;
|
|
1331
1414
|
prefixTextToRemove?: string[] | undefined;
|
|
1415
|
+
audioMetricsIntervalMs?: number | undefined;
|
|
1332
1416
|
debugCommand?: {
|
|
1333
1417
|
enableDebugLog: boolean;
|
|
1334
1418
|
enableAudioStorage: boolean;
|
|
@@ -1360,6 +1444,7 @@ declare const ASRRequestSchemaV1: z.ZodObject<{
|
|
|
1360
1444
|
prefixMode?: PrefixMode | undefined;
|
|
1361
1445
|
prefixId?: string | undefined;
|
|
1362
1446
|
prefixTextToRemove?: string[] | undefined;
|
|
1447
|
+
audioMetricsIntervalMs?: number | undefined;
|
|
1363
1448
|
debugCommand?: {
|
|
1364
1449
|
enableDebugLog?: boolean | undefined;
|
|
1365
1450
|
enableAudioStorage?: boolean | undefined;
|
|
@@ -1580,6 +1665,17 @@ interface ASRRequestConfig {
|
|
|
1580
1665
|
* ```
|
|
1581
1666
|
*/
|
|
1582
1667
|
providerOptions?: Record<string, any>;
|
|
1668
|
+
/**
|
|
1669
|
+
* Streaming audio metrics opt-in interval (ms).
|
|
1670
|
+
*
|
|
1671
|
+
* When set to a positive number, server forwards AudioMetrics results to the
|
|
1672
|
+
* client over the WebSocket, throttled so at most one result is sent per
|
|
1673
|
+
* `audioMetricsIntervalMs`. Undefined / 0 disables streaming audio metrics
|
|
1674
|
+
* (final metrics still embedded in the Metadata result).
|
|
1675
|
+
*
|
|
1676
|
+
* @example 500
|
|
1677
|
+
*/
|
|
1678
|
+
audioMetricsIntervalMs?: number;
|
|
1583
1679
|
/**
|
|
1584
1680
|
* Optional fallback ASR configurations
|
|
1585
1681
|
*
|
|
@@ -1905,6 +2001,8 @@ interface IRecognitionClientConfig {
|
|
|
1905
2001
|
platform?: string;
|
|
1906
2002
|
/** Experiment cohort (optional). Defaults to 'control' if not provided. */
|
|
1907
2003
|
experimentCohort?: 'treatment' | 'control';
|
|
2004
|
+
/** Explicit major version for ASR config selection (e.g. 1, 3). Takes precedence over experimentCohort. */
|
|
2005
|
+
experimentMajorVersion?: number;
|
|
1908
2006
|
/** Callback when transcript is received */
|
|
1909
2007
|
onTranscript?: (result: TranscriptionResultV1) => void;
|
|
1910
2008
|
/**
|
|
@@ -1914,6 +2012,12 @@ interface IRecognitionClientConfig {
|
|
|
1914
2012
|
onFunctionCall?: (result: FunctionCallResultV1) => void;
|
|
1915
2013
|
/** Callback when metadata is received. Only once after transcription is complete.*/
|
|
1916
2014
|
onMetadata?: (metadata: MetadataResultV1) => void;
|
|
2015
|
+
/**
|
|
2016
|
+
* Callback when streaming audio metrics arrive (volume, silence ratio, clipping, SNR, etc.).
|
|
2017
|
+
* Only fires when `asrRequestConfig.audioMetricsIntervalMs > 0`.
|
|
2018
|
+
* Final metrics still arrive embedded in `onMetadata.audioMetrics`.
|
|
2019
|
+
*/
|
|
2020
|
+
onAudioMetrics?: (metrics: AudioMetricsResultV1) => void;
|
|
1917
2021
|
/** Callback when session is configured with actual ASR provider/model (optional) */
|
|
1918
2022
|
onSessionConfigured?: (config: SessionConfiguredV1) => void;
|
|
1919
2023
|
/** Callback when error occurs */
|
|
@@ -2346,6 +2450,11 @@ declare class ConfigBuilder {
|
|
|
2346
2450
|
* Set experiment cohort (optional, defaults to 'control')
|
|
2347
2451
|
*/
|
|
2348
2452
|
experimentCohort(cohort: 'treatment' | 'control'): this;
|
|
2453
|
+
/**
|
|
2454
|
+
* Set explicit major version for ASR config selection.
|
|
2455
|
+
* Takes precedence over experimentCohort-based version resolution.
|
|
2456
|
+
*/
|
|
2457
|
+
experimentMajorVersion(version: number): this;
|
|
2349
2458
|
/**
|
|
2350
2459
|
* Set transcript callback
|
|
2351
2460
|
*/
|
|
@@ -2358,6 +2467,11 @@ declare class ConfigBuilder {
|
|
|
2358
2467
|
* Set session configured callback (optional)
|
|
2359
2468
|
*/
|
|
2360
2469
|
onSessionConfigured(callback: (config: SessionConfiguredV1) => void): this;
|
|
2470
|
+
/**
|
|
2471
|
+
* Set streaming audio metrics callback (optional).
|
|
2472
|
+
* Only fires when `asrRequestConfig.audioMetricsIntervalMs > 0`.
|
|
2473
|
+
*/
|
|
2474
|
+
onAudioMetrics(callback: (metrics: AudioMetricsResultV1) => void): this;
|
|
2361
2475
|
/**
|
|
2362
2476
|
* Set error callback
|
|
2363
2477
|
*/
|
|
@@ -2820,5 +2934,5 @@ declare function getRecognitionConductorHttpBase(stage?: Stage | string | null |
|
|
|
2820
2934
|
declare function getRecognitionConductorWsBase(stage?: Stage | string | null | undefined): string;
|
|
2821
2935
|
declare function getRecognitionConductorHost(stage?: Stage | string | null | undefined): string;
|
|
2822
2936
|
|
|
2823
|
-
export { AudioEncoding, CartesiaModel, ClientControlActionV1, ClientState, ConfigBuilder, ConnectionError, ControlSignalTypeV1 as ControlSignal, ControlSignalTypeV1, DashScopeModel, DeepgramModel, ElevenLabsModel, ErrorTypeV1, FinalTranscriptStability, FireworksModel, GeminiModel, GladiaModel, GoogleModel, Language, MistralVoxtralModel, OpenAIModel, OpenAIRealtimeModel, RECOGNITION_CONDUCTOR_BASES, RECOGNITION_SERVICE_BASES, RealTimeTwoWayWebSocketRecognitionClient, RecognitionContextTypeV1, RecognitionError, RecognitionProvider, RecognitionResultTypeV1, RecognitionVGFStateSchema, RecordingStatus, STAGES, SampleRate, SelfServeVllmModel, SimplifiedVGFRecognitionClient, TimeoutError, TranscriptionStatus, ValidationError, createClient, createClientWithBuilder, createDefaultASRConfig, createInitialRecognitionState, createSimplifiedVGFClient, getRecognitionConductorBase, getRecognitionConductorHost, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionServiceBase, getRecognitionServiceHost, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getUserFriendlyMessage, isExceptionImmediatelyAvailable, isNormalDisconnection, isValidRecordingStatusTransition, normalizeStage, resetRecognitionVGFState };
|
|
2824
|
-
export type { ASRRequestConfig, ASRRequestV1, AuthenticationException, ConnectionException, ErrorResultV1, FunctionCallResultV1, GameContextV1, IRecognitionClient, IRecognitionClientConfig, IRecognitionClientStats, ISimplifiedVGFRecognitionClient, MetadataResultV1, ProviderException, QuotaExceededException, RealTimeTwoWayWebSocketRecognitionClientConfig, RecognitionCallbackUrl, RecognitionException, RecognitionState, RecordingStatusType, SimplifiedVGFClientConfig, SlotMap, Stage, TimeoutException, TranscriptionResult, TranscriptionResultV1, TranscriptionStatusType, UnknownException, ValidationException };
|
|
2937
|
+
export { AudioEncoding, AwsTranscribeModel, BedrockModel, CartesiaModel, ClientControlActionV1, ClientState, ConfigBuilder, ConnectionError, ControlSignalTypeV1 as ControlSignal, ControlSignalTypeV1, DashScopeModel, DeepgramModel, ElevenLabsModel, ErrorTypeV1, FinalTranscriptStability, FireworksModel, GeminiModel, GladiaModel, GoogleModel, Language, MistralVoxtralModel, OpenAIModel, OpenAIRealtimeModel, RECOGNITION_CONDUCTOR_BASES, RECOGNITION_SERVICE_BASES, RealTimeTwoWayWebSocketRecognitionClient, RecognitionContextTypeV1, RecognitionError, RecognitionProvider, RecognitionResultTypeV1, RecognitionVGFStateSchema, RecordingStatus, STAGES, SampleRate, SelfServeVllmModel, SimplifiedVGFRecognitionClient, TimeoutError, TranscriptionStatus, ValidationError, createClient, createClientWithBuilder, createDefaultASRConfig, createInitialRecognitionState, createSimplifiedVGFClient, getRecognitionConductorBase, getRecognitionConductorHost, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionServiceBase, getRecognitionServiceHost, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getUserFriendlyMessage, isExceptionImmediatelyAvailable, isNormalDisconnection, isValidRecordingStatusTransition, normalizeStage, resetRecognitionVGFState };
|
|
2938
|
+
export type { ASRRequestConfig, ASRRequestV1, AudioMetricsResultV1, AuthenticationException, ConnectionException, ErrorResultV1, FunctionCallResultV1, GameContextV1, IRecognitionClient, IRecognitionClientConfig, IRecognitionClientStats, ISimplifiedVGFRecognitionClient, MetadataResultV1, ProviderException, QuotaExceededException, RealTimeTwoWayWebSocketRecognitionClientConfig, RecognitionCallbackUrl, RecognitionException, RecognitionState, RecordingStatusType, SimplifiedVGFClientConfig, SlotMap, Stage, TimeoutException, TranscriptionResult, TranscriptionResultV1, TranscriptionStatusType, UnknownException, ValidationException };
|
package/dist/index.d.ts
CHANGED
|
@@ -11,6 +11,6 @@ export { type RecognitionState, RecognitionVGFStateSchema, RecordingStatus, Tran
|
|
|
11
11
|
export { resetRecognitionVGFState } from './vgf-recognition-mapper.js';
|
|
12
12
|
export { AudioEncoding } from '@recog/websocket';
|
|
13
13
|
export { type GameContextV1, type SlotMap, RecognitionContextTypeV1, ControlSignalTypeV1, ControlSignalTypeV1 as ControlSignal, // Alias for backward compatibility
|
|
14
|
-
type TranscriptionResultV1, type FunctionCallResultV1, type MetadataResultV1, type ErrorResultV1, RecognitionResultTypeV1, ClientControlActionV1, type ASRRequestConfig, type ASRRequestV1, FinalTranscriptStability, createDefaultASRConfig, RecognitionProvider, DeepgramModel, ElevenLabsModel, FireworksModel, GladiaModel, GoogleModel, GeminiModel, OpenAIModel, SelfServeVllmModel, OpenAIRealtimeModel, MistralVoxtralModel, CartesiaModel, DashScopeModel, Language, SampleRate, STAGES, type Stage } from '@recog/shared-types';
|
|
14
|
+
type TranscriptionResultV1, type FunctionCallResultV1, type MetadataResultV1, type AudioMetricsResultV1, type ErrorResultV1, RecognitionResultTypeV1, ClientControlActionV1, type ASRRequestConfig, type ASRRequestV1, FinalTranscriptStability, createDefaultASRConfig, RecognitionProvider, DeepgramModel, ElevenLabsModel, FireworksModel, GladiaModel, GoogleModel, GeminiModel, OpenAIModel, SelfServeVllmModel, OpenAIRealtimeModel, MistralVoxtralModel, CartesiaModel, DashScopeModel, BedrockModel, AwsTranscribeModel, Language, SampleRate, STAGES, type Stage } from '@recog/shared-types';
|
|
15
15
|
export { getRecognitionServiceBase, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getRecognitionServiceHost, getRecognitionConductorBase, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionConductorHost, normalizeStage, RECOGNITION_SERVICE_BASES, RECOGNITION_CONDUCTOR_BASES } from '@recog/shared-config';
|
|
16
16
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAGvE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,eAAe,EACf,cAAc,EACd,WAAW,EACX,WAAW,EACX,WAAW,EACX,WAAW,EACX,kBAAkB,EAClB,mBAAmB,EACnB,mBAAmB,EACnB,aAAa,EACb,cAAc,EACd,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAGvE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,oBAAoB,EACzB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,eAAe,EACf,cAAc,EACd,WAAW,EACX,WAAW,EACX,WAAW,EACX,WAAW,EACX,kBAAkB,EAClB,mBAAmB,EACnB,mBAAmB,EACnB,aAAa,EACb,cAAc,EACd,YAAY,EACZ,kBAAkB,EAClB,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -3746,6 +3746,9 @@ var RecognitionProvider;
|
|
|
3746
3746
|
RecognitionProvider2["MISTRAL_VOXTRAL"] = "mistral-voxtral";
|
|
3747
3747
|
RecognitionProvider2["CARTESIA"] = "cartesia";
|
|
3748
3748
|
RecognitionProvider2["DASHSCOPE"] = "dashscope";
|
|
3749
|
+
RecognitionProvider2["BEDROCK"] = "bedrock";
|
|
3750
|
+
RecognitionProvider2["INWORLD_STT"] = "inworld-stt";
|
|
3751
|
+
RecognitionProvider2["AWS_TRANSCRIBE"] = "aws-transcribe";
|
|
3749
3752
|
RecognitionProvider2["TEST_ASR_PROVIDER_QUOTA"] = "test-asr-provider-quota";
|
|
3750
3753
|
RecognitionProvider2["TEST_ASR_STREAMING"] = "test-asr-streaming";
|
|
3751
3754
|
})(RecognitionProvider || (RecognitionProvider = {}));
|
|
@@ -3813,6 +3816,19 @@ var DashScopeModel;
|
|
|
3813
3816
|
DashScopeModel2["QWEN3_ASR_FLASH_REALTIME_2602"] = "qwen3-asr-flash-realtime-2026-02-10";
|
|
3814
3817
|
DashScopeModel2["QWEN3_ASR_FLASH_REALTIME"] = "qwen3-asr-flash-realtime";
|
|
3815
3818
|
})(DashScopeModel || (DashScopeModel = {}));
|
|
3819
|
+
var BedrockModel;
|
|
3820
|
+
(function(BedrockModel2) {
|
|
3821
|
+
BedrockModel2["VOXTRAL_MINI_3B_2507"] = "mistral.voxtral-mini-3b-2507";
|
|
3822
|
+
BedrockModel2["VOXTRAL_SMALL_24B_2507"] = "mistral.voxtral-small-24b-2507";
|
|
3823
|
+
})(BedrockModel || (BedrockModel = {}));
|
|
3824
|
+
var InworldSttModel;
|
|
3825
|
+
(function(InworldSttModel2) {
|
|
3826
|
+
InworldSttModel2["INWORLD_STT_1"] = "inworld/inworld-stt-1";
|
|
3827
|
+
})(InworldSttModel || (InworldSttModel = {}));
|
|
3828
|
+
var AwsTranscribeModel;
|
|
3829
|
+
(function(AwsTranscribeModel2) {
|
|
3830
|
+
AwsTranscribeModel2["DEFAULT"] = "default";
|
|
3831
|
+
})(AwsTranscribeModel || (AwsTranscribeModel = {}));
|
|
3816
3832
|
var SelfServeVllmModel;
|
|
3817
3833
|
(function(SelfServeVllmModel2) {
|
|
3818
3834
|
SelfServeVllmModel2["QWEN3_ASR_1_7B"] = "qwen3-asr-1.7b";
|
|
@@ -3938,9 +3954,9 @@ var ErrorResultSchemaV1 = z.object({
|
|
|
3938
3954
|
// Detailed description
|
|
3939
3955
|
});
|
|
3940
3956
|
var ClientControlActionV1;
|
|
3941
|
-
(function(
|
|
3942
|
-
|
|
3943
|
-
|
|
3957
|
+
(function(ClientControlActionV12) {
|
|
3958
|
+
ClientControlActionV12["READY_FOR_UPLOADING_RECORDING"] = "ready_for_uploading_recording";
|
|
3959
|
+
ClientControlActionV12["STOP_RECORDING"] = "stop_recording";
|
|
3944
3960
|
})(ClientControlActionV1 || (ClientControlActionV1 = {}));
|
|
3945
3961
|
var ClientControlActionsV1 = z.nativeEnum(ClientControlActionV1);
|
|
3946
3962
|
var ClientControlMessageSchemaV1 = z.object({
|
|
@@ -3973,6 +3989,8 @@ var AudioMetricsResultSchemaV1 = z.object({
|
|
|
3973
3989
|
maxVolume: z.number(),
|
|
3974
3990
|
minVolume: z.number(),
|
|
3975
3991
|
avgVolume: z.number(),
|
|
3992
|
+
peakVolumeDb: z.number().nullable(),
|
|
3993
|
+
avgVolumeDb: z.number().nullable(),
|
|
3976
3994
|
silenceRatio: z.number(),
|
|
3977
3995
|
clippingRatio: z.number(),
|
|
3978
3996
|
snrEstimate: z.number().nullable(),
|
|
@@ -3989,7 +4007,8 @@ var RecognitionResultSchemaV1 = z.discriminatedUnion("type", [
|
|
|
3989
4007
|
// P1 - P2
|
|
3990
4008
|
FunctionCallResultSchemaV1,
|
|
3991
4009
|
ClientControlMessageSchemaV1,
|
|
3992
|
-
SessionConfiguredSchemaV1
|
|
4010
|
+
SessionConfiguredSchemaV1,
|
|
4011
|
+
AudioMetricsResultSchemaV1
|
|
3993
4012
|
]);
|
|
3994
4013
|
|
|
3995
4014
|
// ../../libs/types/dist/provider-transcription.types.js
|
|
@@ -4435,6 +4454,9 @@ var ASRRequestSchemaV1 = z.object({
|
|
|
4435
4454
|
prefixMode: z.nativeEnum(PrefixMode).optional().default(PrefixMode.NONE),
|
|
4436
4455
|
prefixId: z.string().optional(),
|
|
4437
4456
|
prefixTextToRemove: z.array(z.string()).optional(),
|
|
4457
|
+
// Streaming audio metrics opt-in: when > 0, server emits AudioMetrics results throttled to this interval (ms).
|
|
4458
|
+
// Undefined / 0 disables streaming audio metrics (final metrics still embedded in Metadata).
|
|
4459
|
+
audioMetricsIntervalMs: z.number().optional(),
|
|
4438
4460
|
// Debug options (FOR DEBUG/TESTING ONLY - not for production use)
|
|
4439
4461
|
debugCommand: RequestDebugCommandSchema
|
|
4440
4462
|
});
|
|
@@ -4456,8 +4478,9 @@ var RecognitionGameInfoSchema = z.object({
|
|
|
4456
4478
|
/** @deprecated Use questionAskedId instead. Kept for backward compatibility during migration. */
|
|
4457
4479
|
questionAnswerId: z.string().optional(),
|
|
4458
4480
|
platform: z.string().optional(),
|
|
4459
|
-
experimentCohort: z.enum(["treatment", "control"]).optional()
|
|
4460
|
-
|
|
4481
|
+
experimentCohort: z.enum(["treatment", "control"]).optional(),
|
|
4482
|
+
experimentMajorVersion: z.number().int().optional()
|
|
4483
|
+
// Explicit major version for ASR config selection (e.g. 1, 3)
|
|
4461
4484
|
});
|
|
4462
4485
|
var RecognitionQueryMetadataSchema = z.object({
|
|
4463
4486
|
audioUtteranceId: z.string(),
|
|
@@ -5132,6 +5155,9 @@ function buildWebSocketUrl(config) {
|
|
|
5132
5155
|
if (config.experimentCohort) {
|
|
5133
5156
|
url.searchParams.set("experimentCohort", config.experimentCohort);
|
|
5134
5157
|
}
|
|
5158
|
+
if (config.experimentMajorVersion !== void 0) {
|
|
5159
|
+
url.searchParams.set("experimentMajorVersion", String(config.experimentMajorVersion));
|
|
5160
|
+
}
|
|
5135
5161
|
return url.toString();
|
|
5136
5162
|
}
|
|
5137
5163
|
|
|
@@ -5279,6 +5305,7 @@ var MessageHandler = class {
|
|
|
5279
5305
|
/**
|
|
5280
5306
|
* Handle incoming WebSocket message
|
|
5281
5307
|
*/
|
|
5308
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
5282
5309
|
handleMessage(msg) {
|
|
5283
5310
|
if (this.callbacks.logger) {
|
|
5284
5311
|
this.callbacks.logger("debug", "[RecogSDK] Received WebSocket message", {
|
|
@@ -5317,6 +5344,9 @@ var MessageHandler = class {
|
|
|
5317
5344
|
case RecognitionResultTypeV1.SESSION_CONFIGURED:
|
|
5318
5345
|
this.callbacks.onSessionConfigured?.(msgData);
|
|
5319
5346
|
break;
|
|
5347
|
+
case RecognitionResultTypeV1.AUDIO_METRICS:
|
|
5348
|
+
this.callbacks.onAudioMetrics?.(msgData);
|
|
5349
|
+
break;
|
|
5320
5350
|
default:
|
|
5321
5351
|
if (this.callbacks.logger) {
|
|
5322
5352
|
this.callbacks.logger("debug", "[RecogSDK] Unknown message type", { type: msgType });
|
|
@@ -5429,7 +5459,8 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5429
5459
|
...config.platform && { platform: config.platform },
|
|
5430
5460
|
...config.gameContext && { gameContext: config.gameContext },
|
|
5431
5461
|
...config.gameId && { gameId: config.gameId },
|
|
5432
|
-
...config.experimentCohort && { experimentCohort: config.experimentCohort }
|
|
5462
|
+
...config.experimentCohort && { experimentCohort: config.experimentCohort },
|
|
5463
|
+
...config.experimentMajorVersion !== void 0 && { experimentMajorVersion: config.experimentMajorVersion }
|
|
5433
5464
|
});
|
|
5434
5465
|
super({
|
|
5435
5466
|
url,
|
|
@@ -5463,6 +5494,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5463
5494
|
onMetadata: config.onMetadata || (() => {
|
|
5464
5495
|
}),
|
|
5465
5496
|
onSessionConfigured: config.onSessionConfigured,
|
|
5497
|
+
onAudioMetrics: config.onAudioMetrics,
|
|
5466
5498
|
onError: config.onError || (() => {
|
|
5467
5499
|
}),
|
|
5468
5500
|
onConnected: config.onConnected || (() => {
|
|
@@ -5491,6 +5523,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5491
5523
|
onError: this.config.onError,
|
|
5492
5524
|
onControlMessage: this.handleControlMessage.bind(this),
|
|
5493
5525
|
onSessionConfigured: this.config.onSessionConfigured,
|
|
5526
|
+
onAudioMetrics: this.config.onAudioMetrics,
|
|
5494
5527
|
...this.config.logger && { logger: this.config.logger }
|
|
5495
5528
|
});
|
|
5496
5529
|
}
|
|
@@ -5816,6 +5849,10 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5816
5849
|
// Include prefix text to remove if provided (for server-side prefix text removal)
|
|
5817
5850
|
...this.config.asrRequestConfig.prefixTextToRemove && {
|
|
5818
5851
|
prefixTextToRemove: this.config.asrRequestConfig.prefixTextToRemove
|
|
5852
|
+
},
|
|
5853
|
+
// Streaming audio metrics opt-in (ms interval). Server only forwards metrics if > 0.
|
|
5854
|
+
...this.config.asrRequestConfig.audioMetricsIntervalMs !== void 0 && {
|
|
5855
|
+
audioMetricsIntervalMs: this.config.asrRequestConfig.audioMetricsIntervalMs
|
|
5819
5856
|
}
|
|
5820
5857
|
};
|
|
5821
5858
|
super.sendMessage(
|
|
@@ -6141,6 +6178,14 @@ var ConfigBuilder = class {
|
|
|
6141
6178
|
this.config.experimentCohort = cohort;
|
|
6142
6179
|
return this;
|
|
6143
6180
|
}
|
|
6181
|
+
/**
|
|
6182
|
+
* Set explicit major version for ASR config selection.
|
|
6183
|
+
* Takes precedence over experimentCohort-based version resolution.
|
|
6184
|
+
*/
|
|
6185
|
+
experimentMajorVersion(version) {
|
|
6186
|
+
this.config.experimentMajorVersion = version;
|
|
6187
|
+
return this;
|
|
6188
|
+
}
|
|
6144
6189
|
/**
|
|
6145
6190
|
* Set transcript callback
|
|
6146
6191
|
*/
|
|
@@ -6162,6 +6207,14 @@ var ConfigBuilder = class {
|
|
|
6162
6207
|
this.config.onSessionConfigured = callback;
|
|
6163
6208
|
return this;
|
|
6164
6209
|
}
|
|
6210
|
+
/**
|
|
6211
|
+
* Set streaming audio metrics callback (optional).
|
|
6212
|
+
* Only fires when `asrRequestConfig.audioMetricsIntervalMs > 0`.
|
|
6213
|
+
*/
|
|
6214
|
+
onAudioMetrics(callback) {
|
|
6215
|
+
this.config.onAudioMetrics = callback;
|
|
6216
|
+
return this;
|
|
6217
|
+
}
|
|
6165
6218
|
/**
|
|
6166
6219
|
* Set error callback
|
|
6167
6220
|
*/
|
|
@@ -6214,6 +6267,7 @@ var ConfigBuilder = class {
|
|
|
6214
6267
|
/**
|
|
6215
6268
|
* Set logger function
|
|
6216
6269
|
*/
|
|
6270
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
6217
6271
|
logger(logger) {
|
|
6218
6272
|
this.config.logger = logger;
|
|
6219
6273
|
return this;
|
|
@@ -6651,6 +6705,8 @@ function createSimplifiedVGFClient(config) {
|
|
|
6651
6705
|
}
|
|
6652
6706
|
export {
|
|
6653
6707
|
AudioEncoding,
|
|
6708
|
+
AwsTranscribeModel,
|
|
6709
|
+
BedrockModel,
|
|
6654
6710
|
CartesiaModel,
|
|
6655
6711
|
ClientControlActionV1,
|
|
6656
6712
|
ClientState,
|