@volley/recognition-client-sdk 0.1.622 → 0.1.670
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.bundled.d.ts +33 -3
- package/dist/index.bundled.d.ts +34 -4
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +27 -6
- package/dist/index.js.map +3 -3
- package/dist/recog-client-sdk.browser.js +24 -6
- package/dist/recog-client-sdk.browser.js.map +3 -3
- package/package.json +3 -3
- package/src/index.spec.ts +11 -0
- package/src/index.ts +3 -0
- package/src/recognition-client.ts +4 -4
- package/src/utils/message-handler.ts +1 -1
|
@@ -12,11 +12,14 @@ declare enum RecognitionProvider {
|
|
|
12
12
|
DEEPGRAM = "deepgram",
|
|
13
13
|
ELEVENLABS = "elevenlabs",
|
|
14
14
|
FIREWORKS = "fireworks",
|
|
15
|
+
GLADIA = "gladia",
|
|
15
16
|
GOOGLE = "google",
|
|
16
17
|
GEMINI_BATCH = "gemini-batch",
|
|
17
18
|
OPENAI_BATCH = "openai-batch",
|
|
19
|
+
SELF_SERVE_VLLM = "self-serve-vllm",
|
|
18
20
|
OPENAI_REALTIME = "openai-realtime",
|
|
19
21
|
MISTRAL_VOXTRAL = "mistral-voxtral",
|
|
22
|
+
CARTESIA = "cartesia",
|
|
20
23
|
DASHSCOPE = "dashscope",
|
|
21
24
|
TEST_ASR_PROVIDER_QUOTA = "test-asr-provider-quota",
|
|
22
25
|
TEST_ASR_STREAMING = "test-asr-streaming"
|
|
@@ -67,6 +70,13 @@ declare enum FireworksModel {
|
|
|
67
70
|
WHISPER_V3 = "whisper-v3",
|
|
68
71
|
WHISPER_V3_TURBO = "whisper-v3-turbo"
|
|
69
72
|
}
|
|
73
|
+
/**
|
|
74
|
+
* Gladia Solaria realtime transcription models
|
|
75
|
+
* @see https://docs.gladia.io/api-reference/v2/live/init
|
|
76
|
+
*/
|
|
77
|
+
declare enum GladiaModel {
|
|
78
|
+
SOLARIA_1 = "solaria-1"
|
|
79
|
+
}
|
|
70
80
|
/**
|
|
71
81
|
* ElevenLabs Scribe models for speech-to-text
|
|
72
82
|
* @see https://elevenlabs.io/blog/introducing-scribe-v2-realtime
|
|
@@ -74,8 +84,7 @@ declare enum FireworksModel {
|
|
|
74
84
|
* @see https://elevenlabs.io/docs/api-reference/speech-to-text/convert
|
|
75
85
|
*/
|
|
76
86
|
declare enum ElevenLabsModel {
|
|
77
|
-
SCRIBE_V2_REALTIME = "scribe_v2_realtime"
|
|
78
|
-
SCRIBE_V1 = "scribe_v1"
|
|
87
|
+
SCRIBE_V2_REALTIME = "scribe_v2_realtime"
|
|
79
88
|
}
|
|
80
89
|
/**
|
|
81
90
|
* OpenAI Realtime API transcription models
|
|
@@ -94,6 +103,14 @@ declare enum OpenAIRealtimeModel {
|
|
|
94
103
|
declare enum MistralVoxtralModel {
|
|
95
104
|
VOXTRAL_MINI_REALTIME_2602 = "voxtral-mini-transcribe-realtime-2602"
|
|
96
105
|
}
|
|
106
|
+
/**
|
|
107
|
+
* Cartesia Ink-Whisper Realtime transcription models
|
|
108
|
+
* @see https://docs.cartesia.ai/build-with-cartesia/stt-models
|
|
109
|
+
*/
|
|
110
|
+
declare enum CartesiaModel {
|
|
111
|
+
INK_WHISPER = "ink-whisper",
|
|
112
|
+
INK_WHISPER_20250604 = "ink-whisper-2025-06-04"
|
|
113
|
+
}
|
|
97
114
|
/**
|
|
98
115
|
* DashScope Qwen-ASR Realtime transcription models
|
|
99
116
|
* @see https://www.alibabacloud.com/help/en/model-studio/qwen-real-time-speech-recognition
|
|
@@ -102,10 +119,17 @@ declare enum DashScopeModel {
|
|
|
102
119
|
QWEN3_ASR_FLASH_REALTIME_2602 = "qwen3-asr-flash-realtime-2026-02-10",
|
|
103
120
|
QWEN3_ASR_FLASH_REALTIME = "qwen3-asr-flash-realtime"
|
|
104
121
|
}
|
|
122
|
+
/**
|
|
123
|
+
* Self-serve vLLM batch transcription models
|
|
124
|
+
* Backed by recognition-inference / RunPod `/transcribe`
|
|
125
|
+
*/
|
|
126
|
+
declare enum SelfServeVllmModel {
|
|
127
|
+
QWEN3_ASR_1_7B = "qwen3-asr-1.7b"
|
|
128
|
+
}
|
|
105
129
|
/**
|
|
106
130
|
* Type alias for any model from any provider
|
|
107
131
|
*/
|
|
108
|
-
type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | DashScopeModel | string;
|
|
132
|
+
type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | GladiaModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | CartesiaModel | DashScopeModel | SelfServeVllmModel | string;
|
|
109
133
|
|
|
110
134
|
/**
|
|
111
135
|
* Audio encoding types
|
|
@@ -369,6 +393,8 @@ declare const MetadataResultSchemaV1: z.ZodObject<{
|
|
|
369
393
|
rawAudioTimeMs: z.ZodOptional<z.ZodNumber>;
|
|
370
394
|
costInUSD: z.ZodOptional<z.ZodDefault<z.ZodNumber>>;
|
|
371
395
|
apiType: z.ZodOptional<z.ZodNativeEnum<typeof ASRApiType>>;
|
|
396
|
+
provider: z.ZodOptional<z.ZodString>;
|
|
397
|
+
model: z.ZodOptional<z.ZodString>;
|
|
372
398
|
asrConfig: z.ZodOptional<z.ZodString>;
|
|
373
399
|
rawAsrMetadata: z.ZodOptional<z.ZodString>;
|
|
374
400
|
transcriptOutcome: z.ZodOptional<z.ZodNativeEnum<typeof TranscriptOutcomeType>>;
|
|
@@ -423,6 +449,8 @@ declare const MetadataResultSchemaV1: z.ZodObject<{
|
|
|
423
449
|
rawAudioTimeMs?: number | undefined;
|
|
424
450
|
costInUSD?: number | undefined;
|
|
425
451
|
apiType?: ASRApiType | undefined;
|
|
452
|
+
provider?: string | undefined;
|
|
453
|
+
model?: string | undefined;
|
|
426
454
|
asrConfig?: string | undefined;
|
|
427
455
|
rawAsrMetadata?: string | undefined;
|
|
428
456
|
transcriptOutcome?: TranscriptOutcomeType | undefined;
|
|
@@ -453,6 +481,8 @@ declare const MetadataResultSchemaV1: z.ZodObject<{
|
|
|
453
481
|
rawAudioTimeMs?: number | undefined;
|
|
454
482
|
costInUSD?: number | undefined;
|
|
455
483
|
apiType?: ASRApiType | undefined;
|
|
484
|
+
provider?: string | undefined;
|
|
485
|
+
model?: string | undefined;
|
|
456
486
|
asrConfig?: string | undefined;
|
|
457
487
|
rawAsrMetadata?: string | undefined;
|
|
458
488
|
transcriptOutcome?: TranscriptOutcomeType | undefined;
|
package/dist/index.bundled.d.ts
CHANGED
|
@@ -12,11 +12,14 @@ declare enum RecognitionProvider {
|
|
|
12
12
|
DEEPGRAM = "deepgram",
|
|
13
13
|
ELEVENLABS = "elevenlabs",
|
|
14
14
|
FIREWORKS = "fireworks",
|
|
15
|
+
GLADIA = "gladia",
|
|
15
16
|
GOOGLE = "google",
|
|
16
17
|
GEMINI_BATCH = "gemini-batch",
|
|
17
18
|
OPENAI_BATCH = "openai-batch",
|
|
19
|
+
SELF_SERVE_VLLM = "self-serve-vllm",
|
|
18
20
|
OPENAI_REALTIME = "openai-realtime",
|
|
19
21
|
MISTRAL_VOXTRAL = "mistral-voxtral",
|
|
22
|
+
CARTESIA = "cartesia",
|
|
20
23
|
DASHSCOPE = "dashscope",
|
|
21
24
|
TEST_ASR_PROVIDER_QUOTA = "test-asr-provider-quota",
|
|
22
25
|
TEST_ASR_STREAMING = "test-asr-streaming"
|
|
@@ -67,6 +70,13 @@ declare enum FireworksModel {
|
|
|
67
70
|
WHISPER_V3 = "whisper-v3",
|
|
68
71
|
WHISPER_V3_TURBO = "whisper-v3-turbo"
|
|
69
72
|
}
|
|
73
|
+
/**
|
|
74
|
+
* Gladia Solaria realtime transcription models
|
|
75
|
+
* @see https://docs.gladia.io/api-reference/v2/live/init
|
|
76
|
+
*/
|
|
77
|
+
declare enum GladiaModel {
|
|
78
|
+
SOLARIA_1 = "solaria-1"
|
|
79
|
+
}
|
|
70
80
|
/**
|
|
71
81
|
* ElevenLabs Scribe models for speech-to-text
|
|
72
82
|
* @see https://elevenlabs.io/blog/introducing-scribe-v2-realtime
|
|
@@ -74,8 +84,7 @@ declare enum FireworksModel {
|
|
|
74
84
|
* @see https://elevenlabs.io/docs/api-reference/speech-to-text/convert
|
|
75
85
|
*/
|
|
76
86
|
declare enum ElevenLabsModel {
|
|
77
|
-
SCRIBE_V2_REALTIME = "scribe_v2_realtime"
|
|
78
|
-
SCRIBE_V1 = "scribe_v1"
|
|
87
|
+
SCRIBE_V2_REALTIME = "scribe_v2_realtime"
|
|
79
88
|
}
|
|
80
89
|
/**
|
|
81
90
|
* OpenAI Realtime API transcription models
|
|
@@ -94,6 +103,14 @@ declare enum OpenAIRealtimeModel {
|
|
|
94
103
|
declare enum MistralVoxtralModel {
|
|
95
104
|
VOXTRAL_MINI_REALTIME_2602 = "voxtral-mini-transcribe-realtime-2602"
|
|
96
105
|
}
|
|
106
|
+
/**
|
|
107
|
+
* Cartesia Ink-Whisper Realtime transcription models
|
|
108
|
+
* @see https://docs.cartesia.ai/build-with-cartesia/stt-models
|
|
109
|
+
*/
|
|
110
|
+
declare enum CartesiaModel {
|
|
111
|
+
INK_WHISPER = "ink-whisper",
|
|
112
|
+
INK_WHISPER_20250604 = "ink-whisper-2025-06-04"
|
|
113
|
+
}
|
|
97
114
|
/**
|
|
98
115
|
* DashScope Qwen-ASR Realtime transcription models
|
|
99
116
|
* @see https://www.alibabacloud.com/help/en/model-studio/qwen-real-time-speech-recognition
|
|
@@ -102,10 +119,17 @@ declare enum DashScopeModel {
|
|
|
102
119
|
QWEN3_ASR_FLASH_REALTIME_2602 = "qwen3-asr-flash-realtime-2026-02-10",
|
|
103
120
|
QWEN3_ASR_FLASH_REALTIME = "qwen3-asr-flash-realtime"
|
|
104
121
|
}
|
|
122
|
+
/**
|
|
123
|
+
* Self-serve vLLM batch transcription models
|
|
124
|
+
* Backed by recognition-inference / RunPod `/transcribe`
|
|
125
|
+
*/
|
|
126
|
+
declare enum SelfServeVllmModel {
|
|
127
|
+
QWEN3_ASR_1_7B = "qwen3-asr-1.7b"
|
|
128
|
+
}
|
|
105
129
|
/**
|
|
106
130
|
* Type alias for any model from any provider
|
|
107
131
|
*/
|
|
108
|
-
type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | DashScopeModel | string;
|
|
132
|
+
type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | GladiaModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | CartesiaModel | DashScopeModel | SelfServeVllmModel | string;
|
|
109
133
|
|
|
110
134
|
/**
|
|
111
135
|
* Audio encoding types
|
|
@@ -369,6 +393,8 @@ declare const MetadataResultSchemaV1: z.ZodObject<{
|
|
|
369
393
|
rawAudioTimeMs: z.ZodOptional<z.ZodNumber>;
|
|
370
394
|
costInUSD: z.ZodOptional<z.ZodDefault<z.ZodNumber>>;
|
|
371
395
|
apiType: z.ZodOptional<z.ZodNativeEnum<typeof ASRApiType>>;
|
|
396
|
+
provider: z.ZodOptional<z.ZodString>;
|
|
397
|
+
model: z.ZodOptional<z.ZodString>;
|
|
372
398
|
asrConfig: z.ZodOptional<z.ZodString>;
|
|
373
399
|
rawAsrMetadata: z.ZodOptional<z.ZodString>;
|
|
374
400
|
transcriptOutcome: z.ZodOptional<z.ZodNativeEnum<typeof TranscriptOutcomeType>>;
|
|
@@ -423,6 +449,8 @@ declare const MetadataResultSchemaV1: z.ZodObject<{
|
|
|
423
449
|
rawAudioTimeMs?: number | undefined;
|
|
424
450
|
costInUSD?: number | undefined;
|
|
425
451
|
apiType?: ASRApiType | undefined;
|
|
452
|
+
provider?: string | undefined;
|
|
453
|
+
model?: string | undefined;
|
|
426
454
|
asrConfig?: string | undefined;
|
|
427
455
|
rawAsrMetadata?: string | undefined;
|
|
428
456
|
transcriptOutcome?: TranscriptOutcomeType | undefined;
|
|
@@ -453,6 +481,8 @@ declare const MetadataResultSchemaV1: z.ZodObject<{
|
|
|
453
481
|
rawAudioTimeMs?: number | undefined;
|
|
454
482
|
costInUSD?: number | undefined;
|
|
455
483
|
apiType?: ASRApiType | undefined;
|
|
484
|
+
provider?: string | undefined;
|
|
485
|
+
model?: string | undefined;
|
|
456
486
|
asrConfig?: string | undefined;
|
|
457
487
|
rawAsrMetadata?: string | undefined;
|
|
458
488
|
transcriptOutcome?: TranscriptOutcomeType | undefined;
|
|
@@ -2737,5 +2767,5 @@ declare function getRecognitionConductorHttpBase(stage?: Stage | string | null |
|
|
|
2737
2767
|
declare function getRecognitionConductorWsBase(stage?: Stage | string | null | undefined): string;
|
|
2738
2768
|
declare function getRecognitionConductorHost(stage?: Stage | string | null | undefined): string;
|
|
2739
2769
|
|
|
2740
|
-
export { AudioEncoding, ClientControlActionV1, ClientState, ConfigBuilder, ConnectionError, ControlSignalTypeV1 as ControlSignal, ControlSignalTypeV1, DashScopeModel, DeepgramModel, ElevenLabsModel, ErrorTypeV1, FinalTranscriptStability, FireworksModel, GeminiModel, GoogleModel, Language, MistralVoxtralModel, OpenAIModel, OpenAIRealtimeModel, RECOGNITION_CONDUCTOR_BASES, RECOGNITION_SERVICE_BASES, RealTimeTwoWayWebSocketRecognitionClient, RecognitionContextTypeV1, RecognitionError, RecognitionProvider, RecognitionResultTypeV1, RecognitionVGFStateSchema, RecordingStatus, STAGES, SampleRate, SimplifiedVGFRecognitionClient, TimeoutError, TranscriptionStatus, ValidationError, createClient, createClientWithBuilder, createDefaultASRConfig, createInitialRecognitionState, createSimplifiedVGFClient, getRecognitionConductorBase, getRecognitionConductorHost, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionServiceBase, getRecognitionServiceHost, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getUserFriendlyMessage, isExceptionImmediatelyAvailable, isNormalDisconnection, isValidRecordingStatusTransition, normalizeStage, resetRecognitionVGFState };
|
|
2770
|
+
export { AudioEncoding, CartesiaModel, ClientControlActionV1, ClientState, ConfigBuilder, ConnectionError, ControlSignalTypeV1 as ControlSignal, ControlSignalTypeV1, DashScopeModel, DeepgramModel, ElevenLabsModel, ErrorTypeV1, FinalTranscriptStability, FireworksModel, GeminiModel, GladiaModel, GoogleModel, Language, MistralVoxtralModel, OpenAIModel, OpenAIRealtimeModel, RECOGNITION_CONDUCTOR_BASES, RECOGNITION_SERVICE_BASES, RealTimeTwoWayWebSocketRecognitionClient, RecognitionContextTypeV1, RecognitionError, RecognitionProvider, RecognitionResultTypeV1, RecognitionVGFStateSchema, RecordingStatus, STAGES, SampleRate, SelfServeVllmModel, SimplifiedVGFRecognitionClient, TimeoutError, TranscriptionStatus, ValidationError, createClient, createClientWithBuilder, createDefaultASRConfig, createInitialRecognitionState, createSimplifiedVGFClient, getRecognitionConductorBase, getRecognitionConductorHost, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionServiceBase, getRecognitionServiceHost, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getUserFriendlyMessage, isExceptionImmediatelyAvailable, isNormalDisconnection, isValidRecordingStatusTransition, normalizeStage, resetRecognitionVGFState };
|
|
2741
2771
|
export type { ASRRequestConfig, ASRRequestV1, AuthenticationException, ConnectionException, ErrorResultV1, FunctionCallResultV1, GameContextV1, IRecognitionClient, IRecognitionClientConfig, IRecognitionClientStats, ISimplifiedVGFRecognitionClient, MetadataResultV1, ProviderException, QuotaExceededException, RealTimeTwoWayWebSocketRecognitionClientConfig, RecognitionCallbackUrl, RecognitionException, RecognitionState, RecordingStatusType, SimplifiedVGFClientConfig, SlotMap, Stage, TimeoutException, TranscriptionResult, TranscriptionResultV1, TranscriptionStatusType, UnknownException, ValidationException };
|
package/dist/index.d.ts
CHANGED
|
@@ -11,6 +11,6 @@ export { type RecognitionState, RecognitionVGFStateSchema, RecordingStatus, Tran
|
|
|
11
11
|
export { resetRecognitionVGFState } from './vgf-recognition-mapper.js';
|
|
12
12
|
export { AudioEncoding } from '@recog/websocket';
|
|
13
13
|
export { type GameContextV1, type SlotMap, RecognitionContextTypeV1, ControlSignalTypeV1, ControlSignalTypeV1 as ControlSignal, // Alias for backward compatibility
|
|
14
|
-
type TranscriptionResultV1, type FunctionCallResultV1, type MetadataResultV1, type ErrorResultV1, RecognitionResultTypeV1, ClientControlActionV1, type ASRRequestConfig, type ASRRequestV1, FinalTranscriptStability, createDefaultASRConfig, RecognitionProvider, DeepgramModel, ElevenLabsModel, FireworksModel, GoogleModel, GeminiModel, OpenAIModel, OpenAIRealtimeModel, MistralVoxtralModel, DashScopeModel, Language, SampleRate, STAGES, type Stage } from '@recog/shared-types';
|
|
14
|
+
type TranscriptionResultV1, type FunctionCallResultV1, type MetadataResultV1, type ErrorResultV1, RecognitionResultTypeV1, ClientControlActionV1, type ASRRequestConfig, type ASRRequestV1, FinalTranscriptStability, createDefaultASRConfig, RecognitionProvider, DeepgramModel, ElevenLabsModel, FireworksModel, GladiaModel, GoogleModel, GeminiModel, OpenAIModel, SelfServeVllmModel, OpenAIRealtimeModel, MistralVoxtralModel, CartesiaModel, DashScopeModel, Language, SampleRate, STAGES, type Stage } from '@recog/shared-types';
|
|
15
15
|
export { getRecognitionServiceBase, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getRecognitionServiceHost, getRecognitionConductorBase, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionConductorHost, normalizeStage, RECOGNITION_SERVICE_BASES, RECOGNITION_CONDUCTOR_BASES } from '@recog/shared-config';
|
|
16
16
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAGvE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,eAAe,EACf,cAAc,EACd,WAAW,EACX,WAAW,EACX,WAAW,EACX,mBAAmB,EACnB,mBAAmB,EACnB,cAAc,EACd,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAGvE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,eAAe,EACf,cAAc,EACd,WAAW,EACX,WAAW,EACX,WAAW,EACX,WAAW,EACX,kBAAkB,EAClB,mBAAmB,EACnB,mBAAmB,EACnB,aAAa,EACb,cAAc,EACd,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -3737,11 +3737,14 @@ var RecognitionProvider;
|
|
|
3737
3737
|
RecognitionProvider2["DEEPGRAM"] = "deepgram";
|
|
3738
3738
|
RecognitionProvider2["ELEVENLABS"] = "elevenlabs";
|
|
3739
3739
|
RecognitionProvider2["FIREWORKS"] = "fireworks";
|
|
3740
|
+
RecognitionProvider2["GLADIA"] = "gladia";
|
|
3740
3741
|
RecognitionProvider2["GOOGLE"] = "google";
|
|
3741
3742
|
RecognitionProvider2["GEMINI_BATCH"] = "gemini-batch";
|
|
3742
3743
|
RecognitionProvider2["OPENAI_BATCH"] = "openai-batch";
|
|
3744
|
+
RecognitionProvider2["SELF_SERVE_VLLM"] = "self-serve-vllm";
|
|
3743
3745
|
RecognitionProvider2["OPENAI_REALTIME"] = "openai-realtime";
|
|
3744
3746
|
RecognitionProvider2["MISTRAL_VOXTRAL"] = "mistral-voxtral";
|
|
3747
|
+
RecognitionProvider2["CARTESIA"] = "cartesia";
|
|
3745
3748
|
RecognitionProvider2["DASHSCOPE"] = "dashscope";
|
|
3746
3749
|
RecognitionProvider2["TEST_ASR_PROVIDER_QUOTA"] = "test-asr-provider-quota";
|
|
3747
3750
|
RecognitionProvider2["TEST_ASR_STREAMING"] = "test-asr-streaming";
|
|
@@ -3783,10 +3786,13 @@ var FireworksModel;
|
|
|
3783
3786
|
FireworksModel2["WHISPER_V3"] = "whisper-v3";
|
|
3784
3787
|
FireworksModel2["WHISPER_V3_TURBO"] = "whisper-v3-turbo";
|
|
3785
3788
|
})(FireworksModel || (FireworksModel = {}));
|
|
3789
|
+
var GladiaModel;
|
|
3790
|
+
(function(GladiaModel2) {
|
|
3791
|
+
GladiaModel2["SOLARIA_1"] = "solaria-1";
|
|
3792
|
+
})(GladiaModel || (GladiaModel = {}));
|
|
3786
3793
|
var ElevenLabsModel;
|
|
3787
3794
|
(function(ElevenLabsModel2) {
|
|
3788
3795
|
ElevenLabsModel2["SCRIBE_V2_REALTIME"] = "scribe_v2_realtime";
|
|
3789
|
-
ElevenLabsModel2["SCRIBE_V1"] = "scribe_v1";
|
|
3790
3796
|
})(ElevenLabsModel || (ElevenLabsModel = {}));
|
|
3791
3797
|
var OpenAIRealtimeModel;
|
|
3792
3798
|
(function(OpenAIRealtimeModel2) {
|
|
@@ -3797,11 +3803,20 @@ var MistralVoxtralModel;
|
|
|
3797
3803
|
(function(MistralVoxtralModel2) {
|
|
3798
3804
|
MistralVoxtralModel2["VOXTRAL_MINI_REALTIME_2602"] = "voxtral-mini-transcribe-realtime-2602";
|
|
3799
3805
|
})(MistralVoxtralModel || (MistralVoxtralModel = {}));
|
|
3806
|
+
var CartesiaModel;
|
|
3807
|
+
(function(CartesiaModel2) {
|
|
3808
|
+
CartesiaModel2["INK_WHISPER"] = "ink-whisper";
|
|
3809
|
+
CartesiaModel2["INK_WHISPER_20250604"] = "ink-whisper-2025-06-04";
|
|
3810
|
+
})(CartesiaModel || (CartesiaModel = {}));
|
|
3800
3811
|
var DashScopeModel;
|
|
3801
3812
|
(function(DashScopeModel2) {
|
|
3802
3813
|
DashScopeModel2["QWEN3_ASR_FLASH_REALTIME_2602"] = "qwen3-asr-flash-realtime-2026-02-10";
|
|
3803
3814
|
DashScopeModel2["QWEN3_ASR_FLASH_REALTIME"] = "qwen3-asr-flash-realtime";
|
|
3804
3815
|
})(DashScopeModel || (DashScopeModel = {}));
|
|
3816
|
+
var SelfServeVllmModel;
|
|
3817
|
+
(function(SelfServeVllmModel2) {
|
|
3818
|
+
SelfServeVllmModel2["QWEN3_ASR_1_7B"] = "qwen3-asr-1.7b";
|
|
3819
|
+
})(SelfServeVllmModel || (SelfServeVllmModel = {}));
|
|
3805
3820
|
|
|
3806
3821
|
// ../../libs/types/dist/recognition-result-v1.types.js
|
|
3807
3822
|
var RecognitionResultTypeV1;
|
|
@@ -3874,6 +3889,9 @@ var MetadataResultSchemaV1 = z.object({
|
|
|
3874
3889
|
costInUSD: z.number().default(0).optional(),
|
|
3875
3890
|
// ASR API Type
|
|
3876
3891
|
apiType: z.nativeEnum(ASRApiType).optional(),
|
|
3892
|
+
// Provider identification
|
|
3893
|
+
provider: z.string().optional(),
|
|
3894
|
+
model: z.string().optional(),
|
|
3877
3895
|
// ASR configuration as JSON string (no type validation)
|
|
3878
3896
|
asrConfig: z.string().optional(),
|
|
3879
3897
|
// Raw ASR metadata payload as provided by the provider (stringified if needed)
|
|
@@ -5249,7 +5267,7 @@ var MessageHandler = class {
|
|
|
5249
5267
|
}
|
|
5250
5268
|
if (msg.data && typeof msg.data !== "object") {
|
|
5251
5269
|
if (this.callbacks.logger) {
|
|
5252
|
-
this.callbacks.logger("
|
|
5270
|
+
this.callbacks.logger("warn", "[RecogSDK] Received primitive msg.data from server", {
|
|
5253
5271
|
dataType: typeof msg.data,
|
|
5254
5272
|
data: msg.data,
|
|
5255
5273
|
fullMessage: msg
|
|
@@ -5597,7 +5615,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5597
5615
|
blobToArrayBuffer(audioData).then((arrayBuffer) => {
|
|
5598
5616
|
this.sendAudioInternal(arrayBuffer);
|
|
5599
5617
|
}).catch((error) => {
|
|
5600
|
-
this.log("
|
|
5618
|
+
this.log("warn", "Failed to convert Blob to ArrayBuffer", error);
|
|
5601
5619
|
});
|
|
5602
5620
|
return;
|
|
5603
5621
|
}
|
|
@@ -5637,7 +5655,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5637
5655
|
*/
|
|
5638
5656
|
async stopRecording() {
|
|
5639
5657
|
if (this.state !== "ready" /* READY */) {
|
|
5640
|
-
this.log("
|
|
5658
|
+
this.log("info", "stopRecording called but not in READY state", { state: this.state });
|
|
5641
5659
|
return;
|
|
5642
5660
|
}
|
|
5643
5661
|
this.log("debug", "Stopping recording");
|
|
@@ -5807,7 +5825,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5807
5825
|
if (this.state === "stopping" /* STOPPING */) {
|
|
5808
5826
|
this.state = "stopped" /* STOPPED */;
|
|
5809
5827
|
} else if (this.state === "connected" /* CONNECTED */ || this.state === "ready" /* READY */ || this.state === "connecting" /* CONNECTING */) {
|
|
5810
|
-
this.log("
|
|
5828
|
+
this.log("warn", "[DIAGNOSTIC] Unexpected disconnection", {
|
|
5811
5829
|
code,
|
|
5812
5830
|
codeDescription: closeCodeDescription,
|
|
5813
5831
|
reason: reason || "(empty)",
|
|
@@ -5929,7 +5947,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5929
5947
|
blobToArrayBuffer(audioData).then((arrayBuffer) => {
|
|
5930
5948
|
this.sendPrefixAudioInternal(arrayBuffer);
|
|
5931
5949
|
}).catch((error) => {
|
|
5932
|
-
this.log("
|
|
5950
|
+
this.log("warn", "Failed to convert Blob to ArrayBuffer for prefix audio", error);
|
|
5933
5951
|
});
|
|
5934
5952
|
return;
|
|
5935
5953
|
}
|
|
@@ -6591,6 +6609,7 @@ function createSimplifiedVGFClient(config) {
|
|
|
6591
6609
|
}
|
|
6592
6610
|
export {
|
|
6593
6611
|
AudioEncoding,
|
|
6612
|
+
CartesiaModel,
|
|
6594
6613
|
ClientControlActionV1,
|
|
6595
6614
|
ClientState,
|
|
6596
6615
|
ConfigBuilder,
|
|
@@ -6604,6 +6623,7 @@ export {
|
|
|
6604
6623
|
FinalTranscriptStability,
|
|
6605
6624
|
FireworksModel,
|
|
6606
6625
|
GeminiModel,
|
|
6626
|
+
GladiaModel,
|
|
6607
6627
|
GoogleModel,
|
|
6608
6628
|
Language,
|
|
6609
6629
|
MistralVoxtralModel,
|
|
@@ -6620,6 +6640,7 @@ export {
|
|
|
6620
6640
|
RecordingStatus,
|
|
6621
6641
|
STAGES,
|
|
6622
6642
|
SampleRate,
|
|
6643
|
+
SelfServeVllmModel,
|
|
6623
6644
|
SimplifiedVGFRecognitionClient,
|
|
6624
6645
|
TimeoutError,
|
|
6625
6646
|
TranscriptionStatus,
|