@volley/recognition-client-sdk 0.1.707 → 0.1.782
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -1
- package/dist/browser.bundled.d.ts +282 -116
- package/dist/config-builder.d.ts +6 -1
- package/dist/config-builder.d.ts.map +1 -1
- package/dist/index.bundled.d.ts +344 -164
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +81 -7
- package/dist/index.js.map +3 -3
- package/dist/recog-client-sdk.browser.js +70 -7
- package/dist/recog-client-sdk.browser.js.map +3 -3
- package/dist/recognition-client.d.ts.map +1 -1
- package/dist/recognition-client.types.d.ts +7 -1
- package/dist/recognition-client.types.d.ts.map +1 -1
- package/dist/utils/message-handler.d.ts +2 -1
- package/dist/utils/message-handler.d.ts.map +1 -1
- package/package.json +3 -3
- package/src/config-builder.ts +11 -0
- package/src/index.ts +3 -0
- package/src/recognition-client.ts +15 -0
- package/src/recognition-client.types.ts +9 -0
- package/src/utils/message-handler.ts +14 -3
- package/src/utils/url-builder.spec.ts +43 -0
package/README.md
CHANGED
|
@@ -152,9 +152,12 @@ builder
|
|
|
152
152
|
.onError(error => {}) // Handle errors
|
|
153
153
|
.onConnected(() => {}) // Connection established
|
|
154
154
|
.onDisconnected((code) => {}) // Connection closed
|
|
155
|
-
.onMetadata(meta => {}) // Timing information
|
|
155
|
+
.onMetadata(meta => {}) // Timing information + final audio metrics (always-on)
|
|
156
|
+
.onAudioMetrics(m => {}) // Live audio-quality metrics (opt-in, since 0.1.767)
|
|
156
157
|
```
|
|
157
158
|
|
|
159
|
+
> **Audio metrics**: every session delivers a final `audioMetrics` snapshot embedded in `Metadata` (volume, silence ratio, clipping, SNR — all derived from PCM, not from the ASR provider). To also receive live per-chunk updates while audio is flowing, set `asrRequestConfig.audioMetricsIntervalMs > 0` and register `.onAudioMetrics()`. Available in SDK **≥ 0.1.767**. See [audio-metrics-alpha.md](https://github.com/Volley-Inc/recognition-service/blob/dev/docs/design/functional-features/observability/audio-metrics-alpha.md) for the full schema, and the repo-root [CHANGELOG.md](https://github.com/Volley-Inc/recognition-service/blob/dev/CHANGELOG.md) for SDK release history.
|
|
160
|
+
|
|
158
161
|
### Optional Parameters
|
|
159
162
|
|
|
160
163
|
```typescript
|
|
@@ -22,6 +22,9 @@ declare enum RecognitionProvider {
|
|
|
22
22
|
CARTESIA = "cartesia",
|
|
23
23
|
DASHSCOPE = "dashscope",
|
|
24
24
|
BEDROCK = "bedrock",
|
|
25
|
+
INWORLD_STT = "inworld-stt",
|
|
26
|
+
AWS_TRANSCRIBE = "aws-transcribe",
|
|
27
|
+
AMAZON_NOVA_SONIC = "amazon-nova-sonic",
|
|
25
28
|
TEST_ASR_PROVIDER_QUOTA = "test-asr-provider-quota",
|
|
26
29
|
TEST_ASR_STREAMING = "test-asr-streaming"
|
|
27
30
|
}
|
|
@@ -94,6 +97,7 @@ declare enum ElevenLabsModel {
|
|
|
94
97
|
* @see https://platform.openai.com/docs/models/gpt-4o-transcribe
|
|
95
98
|
*/
|
|
96
99
|
declare enum OpenAIRealtimeModel {
|
|
100
|
+
GPT_REALTIME_WHISPER = "gpt-realtime-whisper",
|
|
97
101
|
GPT_4O_TRANSCRIBE = "gpt-4o-transcribe",
|
|
98
102
|
GPT_4O_MINI_TRANSCRIBE = "gpt-4o-mini-transcribe"
|
|
99
103
|
}
|
|
@@ -130,134 +134,40 @@ declare enum BedrockModel {
|
|
|
130
134
|
VOXTRAL_SMALL_24B_2507 = "mistral.voxtral-small-24b-2507"
|
|
131
135
|
}
|
|
132
136
|
/**
|
|
133
|
-
*
|
|
134
|
-
*
|
|
137
|
+
* Inworld AI STT models
|
|
138
|
+
* @see https://docs.inworld.ai/stt/overview
|
|
135
139
|
*/
|
|
136
|
-
declare enum
|
|
137
|
-
|
|
140
|
+
declare enum InworldSttModel {
|
|
141
|
+
INWORLD_STT_1 = "inworld/inworld-stt-1"
|
|
138
142
|
}
|
|
139
143
|
/**
|
|
140
|
-
*
|
|
144
|
+
* AWS Transcribe streaming model
|
|
145
|
+
* AWS Transcribe uses a single default streaming model
|
|
146
|
+
* @see https://docs.aws.amazon.com/transcribe/latest/dg/streaming.html
|
|
141
147
|
*/
|
|
142
|
-
|
|
143
|
-
|
|
148
|
+
declare enum AwsTranscribeModel {
|
|
149
|
+
DEFAULT = "default"
|
|
150
|
+
}
|
|
144
151
|
/**
|
|
145
|
-
*
|
|
152
|
+
* Amazon Nova Sonic bidirectional streaming model (Bedrock).
|
|
153
|
+
* Speech-to-speech model; we consume the USER FINAL transcript and discard the assistant text/audio output.
|
|
154
|
+
* @see https://docs.aws.amazon.com/nova/latest/userguide/speech-bidirection.html
|
|
146
155
|
*/
|
|
147
|
-
declare enum
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
OGG_OPUS = 2,
|
|
151
|
-
FLAC = 3,
|
|
152
|
-
MULAW = 4,
|
|
153
|
-
ALAW = 5
|
|
154
|
-
}
|
|
155
|
-
declare namespace AudioEncoding {
|
|
156
|
-
/**
|
|
157
|
-
* Convert numeric ID to AudioEncoding enum
|
|
158
|
-
* @param id - Numeric encoding identifier (0-5)
|
|
159
|
-
* @returns AudioEncoding enum value or undefined if invalid
|
|
160
|
-
*/
|
|
161
|
-
function fromId(id: number): AudioEncoding | undefined;
|
|
162
|
-
/**
|
|
163
|
-
* Convert string name to AudioEncoding enum
|
|
164
|
-
* @param nameStr - String name like "linear16", "LINEAR16", "ogg_opus", "OGG_OPUS", etc. (case insensitive)
|
|
165
|
-
* @returns AudioEncoding enum value or undefined if invalid
|
|
166
|
-
*/
|
|
167
|
-
function fromName(nameStr: string): AudioEncoding | undefined;
|
|
168
|
-
/**
|
|
169
|
-
* Convert AudioEncoding enum to numeric ID
|
|
170
|
-
* @param encoding - AudioEncoding enum value
|
|
171
|
-
* @returns Numeric ID (0-5)
|
|
172
|
-
*/
|
|
173
|
-
function toId(encoding: AudioEncoding): number;
|
|
174
|
-
/**
|
|
175
|
-
* Convert AudioEncoding enum to string name
|
|
176
|
-
* @param encoding - AudioEncoding enum value
|
|
177
|
-
* @returns String name like "LINEAR16", "MULAW", etc.
|
|
178
|
-
*/
|
|
179
|
-
function toName(encoding: AudioEncoding): string;
|
|
180
|
-
/**
|
|
181
|
-
* Check if a numeric ID is a valid encoding
|
|
182
|
-
* @param id - Numeric identifier to validate
|
|
183
|
-
* @returns true if valid encoding ID
|
|
184
|
-
*/
|
|
185
|
-
function isIdValid(id: number): boolean;
|
|
186
|
-
/**
|
|
187
|
-
* Check if a string name is a valid encoding
|
|
188
|
-
* @param nameStr - String name to validate
|
|
189
|
-
* @returns true if valid encoding name
|
|
190
|
-
*/
|
|
191
|
-
function isNameValid(nameStr: string): boolean;
|
|
156
|
+
declare enum AmazonNovaSonicModel {
|
|
157
|
+
AMAZON_NOVA_SONIC_V1 = "amazon.nova-sonic-v1:0",
|
|
158
|
+
AMAZON_NOVA_2_SONIC = "amazon.nova-2-sonic-v1:0"
|
|
192
159
|
}
|
|
193
160
|
/**
|
|
194
|
-
*
|
|
161
|
+
* Self-serve vLLM batch transcription models
|
|
162
|
+
* Backed by recognition-inference / RunPod `/transcribe`
|
|
195
163
|
*/
|
|
196
|
-
declare enum
|
|
197
|
-
|
|
198
|
-
RATE_16000 = 16000,
|
|
199
|
-
RATE_22050 = 22050,
|
|
200
|
-
RATE_24000 = 24000,
|
|
201
|
-
RATE_32000 = 32000,
|
|
202
|
-
RATE_44100 = 44100,
|
|
203
|
-
RATE_48000 = 48000
|
|
204
|
-
}
|
|
205
|
-
declare namespace SampleRate {
|
|
206
|
-
/**
|
|
207
|
-
* Convert Hz value to SampleRate enum
|
|
208
|
-
* @param hz - Sample rate in Hz (8000, 16000, etc.)
|
|
209
|
-
* @returns SampleRate enum value or undefined if invalid
|
|
210
|
-
*/
|
|
211
|
-
function fromHz(hz: number): SampleRate | undefined;
|
|
212
|
-
/**
|
|
213
|
-
* Convert string name to SampleRate enum
|
|
214
|
-
* @param nameStr - String name like "rate_8000", "RATE_16000", etc. (case insensitive)
|
|
215
|
-
* @returns SampleRate enum value or undefined if invalid
|
|
216
|
-
*/
|
|
217
|
-
function fromName(nameStr: string): SampleRate | undefined;
|
|
218
|
-
/**
|
|
219
|
-
* Convert SampleRate enum to Hz value
|
|
220
|
-
* @param rate - SampleRate enum value
|
|
221
|
-
* @returns Hz value (8000, 16000, etc.)
|
|
222
|
-
*/
|
|
223
|
-
function toHz(rate: SampleRate): number;
|
|
224
|
-
/**
|
|
225
|
-
* Convert SampleRate enum to string name
|
|
226
|
-
* @param rate - SampleRate enum value
|
|
227
|
-
* @returns String name like "RATE_8000", "RATE_16000", etc.
|
|
228
|
-
*/
|
|
229
|
-
function toName(rate: SampleRate): string;
|
|
230
|
-
/**
|
|
231
|
-
* Check if a numeric Hz value is a valid sample rate
|
|
232
|
-
* @param hz - Hz value to validate
|
|
233
|
-
* @returns true if valid sample rate
|
|
234
|
-
*/
|
|
235
|
-
function isHzValid(hz: number): boolean;
|
|
236
|
-
/**
|
|
237
|
-
* Check if a string name is a valid sample rate
|
|
238
|
-
* @param nameStr - String name to validate
|
|
239
|
-
* @returns true if valid sample rate name
|
|
240
|
-
*/
|
|
241
|
-
function isNameValid(nameStr: string): boolean;
|
|
164
|
+
declare enum SelfServeVllmModel {
|
|
165
|
+
QWEN3_ASR_1_7B = "qwen3-asr-1.7b"
|
|
242
166
|
}
|
|
243
167
|
/**
|
|
244
|
-
*
|
|
245
|
-
* Using BCP-47 language tags
|
|
168
|
+
* Type alias for any model from any provider
|
|
246
169
|
*/
|
|
247
|
-
|
|
248
|
-
ENGLISH_US = "en-US",
|
|
249
|
-
ENGLISH_GB = "en-GB",
|
|
250
|
-
SPANISH_ES = "es-ES",
|
|
251
|
-
SPANISH_MX = "es-MX",
|
|
252
|
-
FRENCH_FR = "fr-FR",
|
|
253
|
-
GERMAN_DE = "de-DE",
|
|
254
|
-
ITALIAN_IT = "it-IT",
|
|
255
|
-
PORTUGUESE_BR = "pt-BR",
|
|
256
|
-
JAPANESE_JP = "ja-JP",
|
|
257
|
-
KOREAN_KR = "ko-KR",
|
|
258
|
-
CHINESE_CN = "zh-CN",
|
|
259
|
-
CHINESE_TW = "zh-TW"
|
|
260
|
-
}
|
|
170
|
+
type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | GladiaModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | CartesiaModel | DashScopeModel | InworldSttModel | SelfServeVllmModel | BedrockModel | AwsTranscribeModel | AmazonNovaSonicModel | string;
|
|
261
171
|
|
|
262
172
|
/**
|
|
263
173
|
* Recognition Result Types V1
|
|
@@ -277,6 +187,16 @@ declare enum RecognitionResultTypeV1 {
|
|
|
277
187
|
AUDIO_METRICS = "AudioMetrics",
|
|
278
188
|
SESSION_CONFIGURED = "SessionConfigured"
|
|
279
189
|
}
|
|
190
|
+
/**
|
|
191
|
+
* Source of a phrase detection — what kind of provider feature produced
|
|
192
|
+
* the hit. Currently only Deepgram's `search` parameter is wired up, so
|
|
193
|
+
* this enum has one value. New entries (e.g. KEYWORDS, KEYTERMS,
|
|
194
|
+
* SPEECH_CONTEXTS) get added when other providers join.
|
|
195
|
+
*/
|
|
196
|
+
declare enum DetectionTypeV1 {
|
|
197
|
+
/** Deepgram phonetic phrase match via the `search=…` request parameter */
|
|
198
|
+
SEARCH = "search"
|
|
199
|
+
}
|
|
280
200
|
/**
|
|
281
201
|
* Transcription result V1 - contains transcript message
|
|
282
202
|
* In the long run game side should not need to know it. In the short run it is send back to client.
|
|
@@ -301,6 +221,25 @@ declare const TranscriptionResultSchemaV1: z.ZodObject<{
|
|
|
301
221
|
receivedAtMs: z.ZodOptional<z.ZodNumber>;
|
|
302
222
|
accumulatedAudioTimeMs: z.ZodOptional<z.ZodNumber>;
|
|
303
223
|
rawAudioTimeMs: z.ZodOptional<z.ZodNumber>;
|
|
224
|
+
detections: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
225
|
+
type: z.ZodNativeEnum<typeof DetectionTypeV1>;
|
|
226
|
+
query: z.ZodString;
|
|
227
|
+
score: z.ZodNumber;
|
|
228
|
+
startMs: z.ZodOptional<z.ZodNumber>;
|
|
229
|
+
endMs: z.ZodOptional<z.ZodNumber>;
|
|
230
|
+
}, "strip", z.ZodTypeAny, {
|
|
231
|
+
type: DetectionTypeV1;
|
|
232
|
+
query: string;
|
|
233
|
+
score: number;
|
|
234
|
+
startMs?: number | undefined;
|
|
235
|
+
endMs?: number | undefined;
|
|
236
|
+
}, {
|
|
237
|
+
type: DetectionTypeV1;
|
|
238
|
+
query: string;
|
|
239
|
+
score: number;
|
|
240
|
+
startMs?: number | undefined;
|
|
241
|
+
endMs?: number | undefined;
|
|
242
|
+
}>, "many">>;
|
|
304
243
|
}, "strip", z.ZodTypeAny, {
|
|
305
244
|
type: RecognitionResultTypeV1.TRANSCRIPTION;
|
|
306
245
|
audioUtteranceId: string;
|
|
@@ -320,6 +259,13 @@ declare const TranscriptionResultSchemaV1: z.ZodObject<{
|
|
|
320
259
|
receivedAtMs?: number | undefined;
|
|
321
260
|
accumulatedAudioTimeMs?: number | undefined;
|
|
322
261
|
rawAudioTimeMs?: number | undefined;
|
|
262
|
+
detections?: {
|
|
263
|
+
type: DetectionTypeV1;
|
|
264
|
+
query: string;
|
|
265
|
+
score: number;
|
|
266
|
+
startMs?: number | undefined;
|
|
267
|
+
endMs?: number | undefined;
|
|
268
|
+
}[] | undefined;
|
|
323
269
|
}, {
|
|
324
270
|
type: RecognitionResultTypeV1.TRANSCRIPTION;
|
|
325
271
|
audioUtteranceId: string;
|
|
@@ -339,6 +285,13 @@ declare const TranscriptionResultSchemaV1: z.ZodObject<{
|
|
|
339
285
|
receivedAtMs?: number | undefined;
|
|
340
286
|
accumulatedAudioTimeMs?: number | undefined;
|
|
341
287
|
rawAudioTimeMs?: number | undefined;
|
|
288
|
+
detections?: {
|
|
289
|
+
type: DetectionTypeV1;
|
|
290
|
+
query: string;
|
|
291
|
+
score: number;
|
|
292
|
+
startMs?: number | undefined;
|
|
293
|
+
endMs?: number | undefined;
|
|
294
|
+
}[] | undefined;
|
|
342
295
|
}>;
|
|
343
296
|
type TranscriptionResultV1 = z.infer<typeof TranscriptionResultSchemaV1>;
|
|
344
297
|
/**
|
|
@@ -594,6 +547,179 @@ declare const SessionConfiguredSchemaV1: z.ZodObject<{
|
|
|
594
547
|
providerConfig?: string | undefined;
|
|
595
548
|
}>;
|
|
596
549
|
type SessionConfiguredV1 = z.infer<typeof SessionConfiguredSchemaV1>;
|
|
550
|
+
/**
|
|
551
|
+
* Audio metrics result V1 - contains audio quality metrics
|
|
552
|
+
* Extracted from raw PCM audio without AI/ML, pure signal analysis
|
|
553
|
+
* Used to detect mic issues (muted, low gain, clipping) and audio quality
|
|
554
|
+
*/
|
|
555
|
+
declare const AudioMetricsResultSchemaV1: z.ZodObject<{
|
|
556
|
+
type: z.ZodLiteral<RecognitionResultTypeV1.AUDIO_METRICS>;
|
|
557
|
+
valid: z.ZodBoolean;
|
|
558
|
+
audioBeginMs: z.ZodNumber;
|
|
559
|
+
audioEndMs: z.ZodNumber;
|
|
560
|
+
maxVolume: z.ZodNumber;
|
|
561
|
+
minVolume: z.ZodNumber;
|
|
562
|
+
avgVolume: z.ZodNumber;
|
|
563
|
+
peakVolumeDb: z.ZodNullable<z.ZodNumber>;
|
|
564
|
+
avgVolumeDb: z.ZodNullable<z.ZodNumber>;
|
|
565
|
+
silenceRatio: z.ZodNumber;
|
|
566
|
+
clippingRatio: z.ZodNumber;
|
|
567
|
+
snrEstimate: z.ZodNullable<z.ZodNumber>;
|
|
568
|
+
lastNonSilenceMs: z.ZodNumber;
|
|
569
|
+
timestamp: z.ZodString;
|
|
570
|
+
isFinal: z.ZodOptional<z.ZodBoolean>;
|
|
571
|
+
}, "strip", z.ZodTypeAny, {
|
|
572
|
+
valid: boolean;
|
|
573
|
+
type: RecognitionResultTypeV1.AUDIO_METRICS;
|
|
574
|
+
audioBeginMs: number;
|
|
575
|
+
audioEndMs: number;
|
|
576
|
+
maxVolume: number;
|
|
577
|
+
minVolume: number;
|
|
578
|
+
avgVolume: number;
|
|
579
|
+
silenceRatio: number;
|
|
580
|
+
clippingRatio: number;
|
|
581
|
+
snrEstimate: number | null;
|
|
582
|
+
lastNonSilenceMs: number;
|
|
583
|
+
timestamp: string;
|
|
584
|
+
peakVolumeDb: number | null;
|
|
585
|
+
avgVolumeDb: number | null;
|
|
586
|
+
isFinal?: boolean | undefined;
|
|
587
|
+
}, {
|
|
588
|
+
valid: boolean;
|
|
589
|
+
type: RecognitionResultTypeV1.AUDIO_METRICS;
|
|
590
|
+
audioBeginMs: number;
|
|
591
|
+
audioEndMs: number;
|
|
592
|
+
maxVolume: number;
|
|
593
|
+
minVolume: number;
|
|
594
|
+
avgVolume: number;
|
|
595
|
+
silenceRatio: number;
|
|
596
|
+
clippingRatio: number;
|
|
597
|
+
snrEstimate: number | null;
|
|
598
|
+
lastNonSilenceMs: number;
|
|
599
|
+
timestamp: string;
|
|
600
|
+
peakVolumeDb: number | null;
|
|
601
|
+
avgVolumeDb: number | null;
|
|
602
|
+
isFinal?: boolean | undefined;
|
|
603
|
+
}>;
|
|
604
|
+
type AudioMetricsResultV1 = z.infer<typeof AudioMetricsResultSchemaV1>;
|
|
605
|
+
|
|
606
|
+
/**
|
|
607
|
+
* Audio encoding types
|
|
608
|
+
*/
|
|
609
|
+
declare enum AudioEncoding {
|
|
610
|
+
ENCODING_UNSPECIFIED = 0,
|
|
611
|
+
LINEAR16 = 1,
|
|
612
|
+
OGG_OPUS = 2,
|
|
613
|
+
FLAC = 3,
|
|
614
|
+
MULAW = 4,
|
|
615
|
+
ALAW = 5
|
|
616
|
+
}
|
|
617
|
+
declare namespace AudioEncoding {
|
|
618
|
+
/**
|
|
619
|
+
* Convert numeric ID to AudioEncoding enum
|
|
620
|
+
* @param id - Numeric encoding identifier (0-5)
|
|
621
|
+
* @returns AudioEncoding enum value or undefined if invalid
|
|
622
|
+
*/
|
|
623
|
+
function fromId(id: number): AudioEncoding | undefined;
|
|
624
|
+
/**
|
|
625
|
+
* Convert string name to AudioEncoding enum
|
|
626
|
+
* @param nameStr - String name like "linear16", "LINEAR16", "ogg_opus", "OGG_OPUS", etc. (case insensitive)
|
|
627
|
+
* @returns AudioEncoding enum value or undefined if invalid
|
|
628
|
+
*/
|
|
629
|
+
function fromName(nameStr: string): AudioEncoding | undefined;
|
|
630
|
+
/**
|
|
631
|
+
* Convert AudioEncoding enum to numeric ID
|
|
632
|
+
* @param encoding - AudioEncoding enum value
|
|
633
|
+
* @returns Numeric ID (0-5)
|
|
634
|
+
*/
|
|
635
|
+
function toId(encoding: AudioEncoding): number;
|
|
636
|
+
/**
|
|
637
|
+
* Convert AudioEncoding enum to string name
|
|
638
|
+
* @param encoding - AudioEncoding enum value
|
|
639
|
+
* @returns String name like "LINEAR16", "MULAW", etc.
|
|
640
|
+
*/
|
|
641
|
+
function toName(encoding: AudioEncoding): string;
|
|
642
|
+
/**
|
|
643
|
+
* Check if a numeric ID is a valid encoding
|
|
644
|
+
* @param id - Numeric identifier to validate
|
|
645
|
+
* @returns true if valid encoding ID
|
|
646
|
+
*/
|
|
647
|
+
function isIdValid(id: number): boolean;
|
|
648
|
+
/**
|
|
649
|
+
* Check if a string name is a valid encoding
|
|
650
|
+
* @param nameStr - String name to validate
|
|
651
|
+
* @returns true if valid encoding name
|
|
652
|
+
*/
|
|
653
|
+
function isNameValid(nameStr: string): boolean;
|
|
654
|
+
}
|
|
655
|
+
/**
|
|
656
|
+
* Common sample rates (in Hz)
|
|
657
|
+
*/
|
|
658
|
+
declare enum SampleRate {
|
|
659
|
+
RATE_8000 = 8000,
|
|
660
|
+
RATE_16000 = 16000,
|
|
661
|
+
RATE_22050 = 22050,
|
|
662
|
+
RATE_24000 = 24000,
|
|
663
|
+
RATE_32000 = 32000,
|
|
664
|
+
RATE_44100 = 44100,
|
|
665
|
+
RATE_48000 = 48000
|
|
666
|
+
}
|
|
667
|
+
declare namespace SampleRate {
|
|
668
|
+
/**
|
|
669
|
+
* Convert Hz value to SampleRate enum
|
|
670
|
+
* @param hz - Sample rate in Hz (8000, 16000, etc.)
|
|
671
|
+
* @returns SampleRate enum value or undefined if invalid
|
|
672
|
+
*/
|
|
673
|
+
function fromHz(hz: number): SampleRate | undefined;
|
|
674
|
+
/**
|
|
675
|
+
* Convert string name to SampleRate enum
|
|
676
|
+
* @param nameStr - String name like "rate_8000", "RATE_16000", etc. (case insensitive)
|
|
677
|
+
* @returns SampleRate enum value or undefined if invalid
|
|
678
|
+
*/
|
|
679
|
+
function fromName(nameStr: string): SampleRate | undefined;
|
|
680
|
+
/**
|
|
681
|
+
* Convert SampleRate enum to Hz value
|
|
682
|
+
* @param rate - SampleRate enum value
|
|
683
|
+
* @returns Hz value (8000, 16000, etc.)
|
|
684
|
+
*/
|
|
685
|
+
function toHz(rate: SampleRate): number;
|
|
686
|
+
/**
|
|
687
|
+
* Convert SampleRate enum to string name
|
|
688
|
+
* @param rate - SampleRate enum value
|
|
689
|
+
* @returns String name like "RATE_8000", "RATE_16000", etc.
|
|
690
|
+
*/
|
|
691
|
+
function toName(rate: SampleRate): string;
|
|
692
|
+
/**
|
|
693
|
+
* Check if a numeric Hz value is a valid sample rate
|
|
694
|
+
* @param hz - Hz value to validate
|
|
695
|
+
* @returns true if valid sample rate
|
|
696
|
+
*/
|
|
697
|
+
function isHzValid(hz: number): boolean;
|
|
698
|
+
/**
|
|
699
|
+
* Check if a string name is a valid sample rate
|
|
700
|
+
* @param nameStr - String name to validate
|
|
701
|
+
* @returns true if valid sample rate name
|
|
702
|
+
*/
|
|
703
|
+
function isNameValid(nameStr: string): boolean;
|
|
704
|
+
}
|
|
705
|
+
/**
|
|
706
|
+
* Supported languages for recognition
|
|
707
|
+
* Using BCP-47 language tags
|
|
708
|
+
*/
|
|
709
|
+
declare enum Language {
|
|
710
|
+
ENGLISH_US = "en-US",
|
|
711
|
+
ENGLISH_GB = "en-GB",
|
|
712
|
+
SPANISH_ES = "es-ES",
|
|
713
|
+
SPANISH_MX = "es-MX",
|
|
714
|
+
FRENCH_FR = "fr-FR",
|
|
715
|
+
GERMAN_DE = "de-DE",
|
|
716
|
+
ITALIAN_IT = "it-IT",
|
|
717
|
+
PORTUGUESE_BR = "pt-BR",
|
|
718
|
+
JAPANESE_JP = "ja-JP",
|
|
719
|
+
KOREAN_KR = "ko-KR",
|
|
720
|
+
CHINESE_CN = "zh-CN",
|
|
721
|
+
CHINESE_TW = "zh-TW"
|
|
722
|
+
}
|
|
597
723
|
|
|
598
724
|
/**
|
|
599
725
|
* Recognition Context Types V1
|
|
@@ -866,6 +992,40 @@ interface ASRRequestConfig {
|
|
|
866
992
|
* ```
|
|
867
993
|
*/
|
|
868
994
|
providerOptions?: Record<string, any>;
|
|
995
|
+
/**
|
|
996
|
+
* Streaming audio metrics opt-in interval (ms).
|
|
997
|
+
*
|
|
998
|
+
* When set to a positive number, server forwards AudioMetrics results to the
|
|
999
|
+
* client over the WebSocket, throttled so at most one result is sent per
|
|
1000
|
+
* `audioMetricsIntervalMs`. Undefined / 0 disables streaming audio metrics
|
|
1001
|
+
* (final metrics still embedded in the Metadata result).
|
|
1002
|
+
*
|
|
1003
|
+
* @example 500
|
|
1004
|
+
*/
|
|
1005
|
+
audioMetricsIntervalMs?: number;
|
|
1006
|
+
/**
|
|
1007
|
+
* Opt-in: round-trip Deepgram `search` phrase hits into the transcript.
|
|
1008
|
+
*
|
|
1009
|
+
* When `true` AND the resolved provider/model is **deepgram nova-2** AND the
|
|
1010
|
+
* GameContext `gamePhase` is `'Solve Puzzle'`, every Deepgram Results event
|
|
1011
|
+
* with a `channel.search` hit at confidence ≥ 0.6 has the original query
|
|
1012
|
+
* prepended to the transcript text delivered to the client. This restores
|
|
1013
|
+
* parity with the legacy Roku→Deepgram WoF Puzzle-Solve path where the
|
|
1014
|
+
* phrase round-trip lets downstream NLU match multi-word puzzle solutions
|
|
1015
|
+
* even when nova-2's primary transcription drifts.
|
|
1016
|
+
*
|
|
1017
|
+
* Default: `false` (no prepend; transcript is whatever nova-2 produces).
|
|
1018
|
+
*
|
|
1019
|
+
* Scope guard rationale:
|
|
1020
|
+
* - nova-2 only: nova-3 / flux do not need this (they handle phrase
|
|
1021
|
+
* spotting differently and the prepend would only add noise).
|
|
1022
|
+
* - Solve-Puzzle scene only: other WoF scenes (Letter-Guess,
|
|
1023
|
+
* Bonus-Round, etc.) do NOT want the slotMap phrase prepended — only
|
|
1024
|
+
* Puzzle-Solve depends on the phrase round-trip.
|
|
1025
|
+
*
|
|
1026
|
+
* @default false
|
|
1027
|
+
*/
|
|
1028
|
+
appendSearch?: boolean;
|
|
869
1029
|
/**
|
|
870
1030
|
* Optional fallback ASR configurations
|
|
871
1031
|
*
|
|
@@ -1166,6 +1326,12 @@ interface IRecognitionClientConfig {
|
|
|
1166
1326
|
onFunctionCall?: (result: FunctionCallResultV1) => void;
|
|
1167
1327
|
/** Callback when metadata is received. Only once after transcription is complete.*/
|
|
1168
1328
|
onMetadata?: (metadata: MetadataResultV1) => void;
|
|
1329
|
+
/**
|
|
1330
|
+
* Callback when streaming audio metrics arrive (volume, silence ratio, clipping, SNR, etc.).
|
|
1331
|
+
* Only fires when `asrRequestConfig.audioMetricsIntervalMs > 0`.
|
|
1332
|
+
* Final metrics still arrive embedded in `onMetadata.audioMetrics`.
|
|
1333
|
+
*/
|
|
1334
|
+
onAudioMetrics?: (metrics: AudioMetricsResultV1) => void;
|
|
1169
1335
|
/** Callback when session is configured with actual ASR provider/model (optional) */
|
|
1170
1336
|
onSessionConfigured?: (config: SessionConfiguredV1) => void;
|
|
1171
1337
|
/** Callback when error occurs */
|
package/dist/config-builder.d.ts
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Simple builder pattern for RealTimeTwoWayWebSocketRecognitionClientConfig
|
|
5
5
|
*/
|
|
6
6
|
import type { RealTimeTwoWayWebSocketRecognitionClientConfig, RecognitionCallbackUrl } from './recognition-client.types.js';
|
|
7
|
-
import type { ASRRequestConfig, GameContextV1, TranscriptionResultV1, MetadataResultV1, SessionConfiguredV1, ErrorResultV1, Stage } from '@recog/shared-types';
|
|
7
|
+
import type { ASRRequestConfig, GameContextV1, TranscriptionResultV1, MetadataResultV1, SessionConfiguredV1, AudioMetricsResultV1, ErrorResultV1, Stage } from '@recog/shared-types';
|
|
8
8
|
/**
|
|
9
9
|
* Builder for RealTimeTwoWayWebSocketRecognitionClientConfig
|
|
10
10
|
*
|
|
@@ -107,6 +107,11 @@ export declare class ConfigBuilder {
|
|
|
107
107
|
* Set session configured callback (optional)
|
|
108
108
|
*/
|
|
109
109
|
onSessionConfigured(callback: (config: SessionConfiguredV1) => void): this;
|
|
110
|
+
/**
|
|
111
|
+
* Set streaming audio metrics callback (optional).
|
|
112
|
+
* Only fires when `asrRequestConfig.audioMetricsIntervalMs > 0`.
|
|
113
|
+
*/
|
|
114
|
+
onAudioMetrics(callback: (metrics: AudioMetricsResultV1) => void): this;
|
|
110
115
|
/**
|
|
111
116
|
* Set error callback
|
|
112
117
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config-builder.d.ts","sourceRoot":"","sources":["../src/config-builder.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EACV,8CAA8C,EAC9C,sBAAsB,EACvB,MAAM,+BAA+B,CAAC;AACvC,OAAO,KAAK,EACV,gBAAgB,EAChB,aAAa,EACb,qBAAqB,EACrB,gBAAgB,EAChB,mBAAmB,EACnB,aAAa,EACb,KAAK,EACN,MAAM,qBAAqB,CAAC;AAE7B;;;;;;;;;;;;;;;;;;GAkBG;AACH,qBAAa,aAAa;IACxB,OAAO,CAAC,MAAM,CAA+D;IAE7E;;;OAGG;IACH,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;IAKtB;;;;;;;;OAQG;IACH,KAAK,CAAC,KAAK,EAAE,KAAK,GAAG,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,gBAAgB,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI;IAKhD;;OAEG;IACH,WAAW,CAAC,OAAO,EAAE,aAAa,GAAG,IAAI;IAKzC;;;OAGG;IACH,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKxB;;OAEG;IACH,gBAAgB,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,YAAY,CAAC,IAAI,EAAE,sBAAsB,EAAE,GAAG,IAAI;IAKlD;;OAEG;IACH,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKxB;;OAEG;IACH,aAAa,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAK/B;;OAEG;IACH,QAAQ,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAK1B;;OAEG;IACH,SAAS,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAK3B;;OAEG;IACH,gBAAgB,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI;IAKhC;;OAEG;IACH,gBAAgB,CAAC,MAAM,EAAE,WAAW,GAAG,SAAS,GAAG,IAAI;IAKvD;;;OAGG;IACH,sBAAsB,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI;IAK7C;;OAEG;IACH,YAAY,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,qBAAqB,KAAK,IAAI,GAAG,IAAI;IAKrE;;OAEG;IACH,UAAU,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE,gBAAgB,KAAK,IAAI,GAAG,IAAI;IAKhE;;OAEG;IACH,mBAAmB,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,mBAAmB,KAAK,IAAI,GAAG,IAAI;IAK1E;;OAEG;IACH,OAAO,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,aAAa,KAAK,IAAI,GAAG,IAAI;IAKvD;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,MAAM,IAAI,GAAG,IAAI;IAKvC;;OAEG;IACH,cAAc,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,KAAK,IAAI,GAAG,IAAI;IAKtE;;OAEG;IACH,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAKjC;;OAEG;IACH,oBAAoB,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI;IAK3C;;OAEG;IACH,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI;IAKrC;;OAEG;
|
|
1
|
+
{"version":3,"file":"config-builder.d.ts","sourceRoot":"","sources":["../src/config-builder.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EACV,8CAA8C,EAC9C,sBAAsB,EACvB,MAAM,+BAA+B,CAAC;AACvC,OAAO,KAAK,EACV,gBAAgB,EAChB,aAAa,EACb,qBAAqB,EACrB,gBAAgB,EAChB,mBAAmB,EACnB,oBAAoB,EACpB,aAAa,EACb,KAAK,EACN,MAAM,qBAAqB,CAAC;AAE7B;;;;;;;;;;;;;;;;;;GAkBG;AACH,qBAAa,aAAa;IACxB,OAAO,CAAC,MAAM,CAA+D;IAE7E;;;OAGG;IACH,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;IAKtB;;;;;;;;OAQG;IACH,KAAK,CAAC,KAAK,EAAE,KAAK,GAAG,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,gBAAgB,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI;IAKhD;;OAEG;IACH,WAAW,CAAC,OAAO,EAAE,aAAa,GAAG,IAAI;IAKzC;;;OAGG;IACH,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKxB;;OAEG;IACH,gBAAgB,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,YAAY,CAAC,IAAI,EAAE,sBAAsB,EAAE,GAAG,IAAI;IAKlD;;OAEG;IACH,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKxB;;OAEG;IACH,aAAa,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAK/B;;OAEG;IACH,QAAQ,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAK1B;;OAEG;IACH,SAAS,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAK3B;;OAEG;IACH,gBAAgB,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI;IAKhC;;OAEG;IACH,gBAAgB,CAAC,MAAM,EAAE,WAAW,GAAG,SAAS,GAAG,IAAI;IAKvD;;;OAGG;IACH,sBAAsB,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI;IAK7C;;OAEG;IACH,YAAY,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,qBAAqB,KAAK,IAAI,GAAG,IAAI;IAKrE;;OAEG;IACH,UAAU,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE,gBAAgB,KAAK,IAAI,GAAG,IAAI;IAKhE;;OAEG;IACH,mBAAmB,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,mBAAmB,KAAK,IAAI,GAAG,IAAI;IAK1E;;;OAGG;IACH,cAAc,CAAC,QAAQ,EAAE,CAAC,OAAO,EAAE,oBAAoB,KAAK,IAAI,GAAG,IAAI;IAKvE;;OAEG;IACH,OAAO,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,aAAa,KAAK,IAAI,GAAG,IAAI;IAKvD;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,MAAM,IAAI,GAAG,IAAI;IAKvC;;OAEG;IACH,cAAc,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,KAAK,IAAI,GAAG,IAAI;IAKtE;;OAEG;IACH,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAKlC;;OAEG;IACH,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAKjC;;OAEG;IACH,oBAAoB,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI;IAK3C;;OAEG;IACH,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI;IAKrC;;OAEG;IAEH,MAAM,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,GAAG,KAAK,IAAI,GAAG,IAAI;IAKvG;;OAEG;IACH,KAAK,IAAI,8CAA8C;CAGxD"}
|