@volley/recognition-client-sdk 0.1.621 → 0.1.670
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.bundled.d.ts +36 -3
- package/dist/index.bundled.d.ts +88 -49
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +53 -11
- package/dist/index.js.map +3 -3
- package/dist/recog-client-sdk.browser.js +29 -10
- package/dist/recog-client-sdk.browser.js.map +3 -3
- package/dist/vgf-recognition-mapper.d.ts.map +1 -1
- package/dist/vgf-recognition-state.d.ts +6 -0
- package/dist/vgf-recognition-state.d.ts.map +1 -1
- package/package.json +3 -3
- package/src/index.spec.ts +11 -0
- package/src/index.ts +4 -0
- package/src/recognition-client.ts +8 -8
- package/src/utils/message-handler.ts +1 -1
- package/src/vgf-recognition-mapper.ts +19 -1
- package/src/vgf-recognition-state.ts +4 -0
|
@@ -12,11 +12,14 @@ declare enum RecognitionProvider {
|
|
|
12
12
|
DEEPGRAM = "deepgram",
|
|
13
13
|
ELEVENLABS = "elevenlabs",
|
|
14
14
|
FIREWORKS = "fireworks",
|
|
15
|
+
GLADIA = "gladia",
|
|
15
16
|
GOOGLE = "google",
|
|
16
17
|
GEMINI_BATCH = "gemini-batch",
|
|
17
18
|
OPENAI_BATCH = "openai-batch",
|
|
19
|
+
SELF_SERVE_VLLM = "self-serve-vllm",
|
|
18
20
|
OPENAI_REALTIME = "openai-realtime",
|
|
19
21
|
MISTRAL_VOXTRAL = "mistral-voxtral",
|
|
22
|
+
CARTESIA = "cartesia",
|
|
20
23
|
DASHSCOPE = "dashscope",
|
|
21
24
|
TEST_ASR_PROVIDER_QUOTA = "test-asr-provider-quota",
|
|
22
25
|
TEST_ASR_STREAMING = "test-asr-streaming"
|
|
@@ -67,6 +70,13 @@ declare enum FireworksModel {
|
|
|
67
70
|
WHISPER_V3 = "whisper-v3",
|
|
68
71
|
WHISPER_V3_TURBO = "whisper-v3-turbo"
|
|
69
72
|
}
|
|
73
|
+
/**
|
|
74
|
+
* Gladia Solaria realtime transcription models
|
|
75
|
+
* @see https://docs.gladia.io/api-reference/v2/live/init
|
|
76
|
+
*/
|
|
77
|
+
declare enum GladiaModel {
|
|
78
|
+
SOLARIA_1 = "solaria-1"
|
|
79
|
+
}
|
|
70
80
|
/**
|
|
71
81
|
* ElevenLabs Scribe models for speech-to-text
|
|
72
82
|
* @see https://elevenlabs.io/blog/introducing-scribe-v2-realtime
|
|
@@ -74,8 +84,7 @@ declare enum FireworksModel {
|
|
|
74
84
|
* @see https://elevenlabs.io/docs/api-reference/speech-to-text/convert
|
|
75
85
|
*/
|
|
76
86
|
declare enum ElevenLabsModel {
|
|
77
|
-
SCRIBE_V2_REALTIME = "scribe_v2_realtime"
|
|
78
|
-
SCRIBE_V1 = "scribe_v1"
|
|
87
|
+
SCRIBE_V2_REALTIME = "scribe_v2_realtime"
|
|
79
88
|
}
|
|
80
89
|
/**
|
|
81
90
|
* OpenAI Realtime API transcription models
|
|
@@ -94,6 +103,14 @@ declare enum OpenAIRealtimeModel {
|
|
|
94
103
|
declare enum MistralVoxtralModel {
|
|
95
104
|
VOXTRAL_MINI_REALTIME_2602 = "voxtral-mini-transcribe-realtime-2602"
|
|
96
105
|
}
|
|
106
|
+
/**
|
|
107
|
+
* Cartesia Ink-Whisper Realtime transcription models
|
|
108
|
+
* @see https://docs.cartesia.ai/build-with-cartesia/stt-models
|
|
109
|
+
*/
|
|
110
|
+
declare enum CartesiaModel {
|
|
111
|
+
INK_WHISPER = "ink-whisper",
|
|
112
|
+
INK_WHISPER_20250604 = "ink-whisper-2025-06-04"
|
|
113
|
+
}
|
|
97
114
|
/**
|
|
98
115
|
* DashScope Qwen-ASR Realtime transcription models
|
|
99
116
|
* @see https://www.alibabacloud.com/help/en/model-studio/qwen-real-time-speech-recognition
|
|
@@ -102,10 +119,17 @@ declare enum DashScopeModel {
|
|
|
102
119
|
QWEN3_ASR_FLASH_REALTIME_2602 = "qwen3-asr-flash-realtime-2026-02-10",
|
|
103
120
|
QWEN3_ASR_FLASH_REALTIME = "qwen3-asr-flash-realtime"
|
|
104
121
|
}
|
|
122
|
+
/**
|
|
123
|
+
* Self-serve vLLM batch transcription models
|
|
124
|
+
* Backed by recognition-inference / RunPod `/transcribe`
|
|
125
|
+
*/
|
|
126
|
+
declare enum SelfServeVllmModel {
|
|
127
|
+
QWEN3_ASR_1_7B = "qwen3-asr-1.7b"
|
|
128
|
+
}
|
|
105
129
|
/**
|
|
106
130
|
* Type alias for any model from any provider
|
|
107
131
|
*/
|
|
108
|
-
type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | DashScopeModel | string;
|
|
132
|
+
type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | GladiaModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | CartesiaModel | DashScopeModel | SelfServeVllmModel | string;
|
|
109
133
|
|
|
110
134
|
/**
|
|
111
135
|
* Audio encoding types
|
|
@@ -260,6 +284,7 @@ declare const TranscriptionResultSchemaV1: z.ZodObject<{
|
|
|
260
284
|
voiceStart: z.ZodOptional<z.ZodNumber>;
|
|
261
285
|
voiceDuration: z.ZodOptional<z.ZodNumber>;
|
|
262
286
|
voiceEnd: z.ZodOptional<z.ZodNumber>;
|
|
287
|
+
lastNonSilence: z.ZodOptional<z.ZodNumber>;
|
|
263
288
|
startTimestamp: z.ZodOptional<z.ZodNumber>;
|
|
264
289
|
endTimestamp: z.ZodOptional<z.ZodNumber>;
|
|
265
290
|
receivedAtMs: z.ZodOptional<z.ZodNumber>;
|
|
@@ -278,6 +303,7 @@ declare const TranscriptionResultSchemaV1: z.ZodObject<{
|
|
|
278
303
|
voiceStart?: number | undefined;
|
|
279
304
|
voiceDuration?: number | undefined;
|
|
280
305
|
voiceEnd?: number | undefined;
|
|
306
|
+
lastNonSilence?: number | undefined;
|
|
281
307
|
startTimestamp?: number | undefined;
|
|
282
308
|
endTimestamp?: number | undefined;
|
|
283
309
|
receivedAtMs?: number | undefined;
|
|
@@ -296,6 +322,7 @@ declare const TranscriptionResultSchemaV1: z.ZodObject<{
|
|
|
296
322
|
voiceStart?: number | undefined;
|
|
297
323
|
voiceDuration?: number | undefined;
|
|
298
324
|
voiceEnd?: number | undefined;
|
|
325
|
+
lastNonSilence?: number | undefined;
|
|
299
326
|
startTimestamp?: number | undefined;
|
|
300
327
|
endTimestamp?: number | undefined;
|
|
301
328
|
receivedAtMs?: number | undefined;
|
|
@@ -366,6 +393,8 @@ declare const MetadataResultSchemaV1: z.ZodObject<{
|
|
|
366
393
|
rawAudioTimeMs: z.ZodOptional<z.ZodNumber>;
|
|
367
394
|
costInUSD: z.ZodOptional<z.ZodDefault<z.ZodNumber>>;
|
|
368
395
|
apiType: z.ZodOptional<z.ZodNativeEnum<typeof ASRApiType>>;
|
|
396
|
+
provider: z.ZodOptional<z.ZodString>;
|
|
397
|
+
model: z.ZodOptional<z.ZodString>;
|
|
369
398
|
asrConfig: z.ZodOptional<z.ZodString>;
|
|
370
399
|
rawAsrMetadata: z.ZodOptional<z.ZodString>;
|
|
371
400
|
transcriptOutcome: z.ZodOptional<z.ZodNativeEnum<typeof TranscriptOutcomeType>>;
|
|
@@ -420,6 +449,8 @@ declare const MetadataResultSchemaV1: z.ZodObject<{
|
|
|
420
449
|
rawAudioTimeMs?: number | undefined;
|
|
421
450
|
costInUSD?: number | undefined;
|
|
422
451
|
apiType?: ASRApiType | undefined;
|
|
452
|
+
provider?: string | undefined;
|
|
453
|
+
model?: string | undefined;
|
|
423
454
|
asrConfig?: string | undefined;
|
|
424
455
|
rawAsrMetadata?: string | undefined;
|
|
425
456
|
transcriptOutcome?: TranscriptOutcomeType | undefined;
|
|
@@ -450,6 +481,8 @@ declare const MetadataResultSchemaV1: z.ZodObject<{
|
|
|
450
481
|
rawAudioTimeMs?: number | undefined;
|
|
451
482
|
costInUSD?: number | undefined;
|
|
452
483
|
apiType?: ASRApiType | undefined;
|
|
484
|
+
provider?: string | undefined;
|
|
485
|
+
model?: string | undefined;
|
|
453
486
|
asrConfig?: string | undefined;
|
|
454
487
|
rawAsrMetadata?: string | undefined;
|
|
455
488
|
transcriptOutcome?: TranscriptOutcomeType | undefined;
|