@volley/recognition-client-sdk 0.1.622 → 0.1.670

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,11 +12,14 @@ declare enum RecognitionProvider {
12
12
  DEEPGRAM = "deepgram",
13
13
  ELEVENLABS = "elevenlabs",
14
14
  FIREWORKS = "fireworks",
15
+ GLADIA = "gladia",
15
16
  GOOGLE = "google",
16
17
  GEMINI_BATCH = "gemini-batch",
17
18
  OPENAI_BATCH = "openai-batch",
19
+ SELF_SERVE_VLLM = "self-serve-vllm",
18
20
  OPENAI_REALTIME = "openai-realtime",
19
21
  MISTRAL_VOXTRAL = "mistral-voxtral",
22
+ CARTESIA = "cartesia",
20
23
  DASHSCOPE = "dashscope",
21
24
  TEST_ASR_PROVIDER_QUOTA = "test-asr-provider-quota",
22
25
  TEST_ASR_STREAMING = "test-asr-streaming"
@@ -67,6 +70,13 @@ declare enum FireworksModel {
67
70
  WHISPER_V3 = "whisper-v3",
68
71
  WHISPER_V3_TURBO = "whisper-v3-turbo"
69
72
  }
73
+ /**
74
+ * Gladia Solaria realtime transcription models
75
+ * @see https://docs.gladia.io/api-reference/v2/live/init
76
+ */
77
+ declare enum GladiaModel {
78
+ SOLARIA_1 = "solaria-1"
79
+ }
70
80
  /**
71
81
  * ElevenLabs Scribe models for speech-to-text
72
82
  * @see https://elevenlabs.io/blog/introducing-scribe-v2-realtime
@@ -74,8 +84,7 @@ declare enum FireworksModel {
74
84
  * @see https://elevenlabs.io/docs/api-reference/speech-to-text/convert
75
85
  */
76
86
  declare enum ElevenLabsModel {
77
- SCRIBE_V2_REALTIME = "scribe_v2_realtime",
78
- SCRIBE_V1 = "scribe_v1"
87
+ SCRIBE_V2_REALTIME = "scribe_v2_realtime"
79
88
  }
80
89
  /**
81
90
  * OpenAI Realtime API transcription models
@@ -94,6 +103,14 @@ declare enum OpenAIRealtimeModel {
94
103
  declare enum MistralVoxtralModel {
95
104
  VOXTRAL_MINI_REALTIME_2602 = "voxtral-mini-transcribe-realtime-2602"
96
105
  }
106
+ /**
107
+ * Cartesia Ink-Whisper Realtime transcription models
108
+ * @see https://docs.cartesia.ai/build-with-cartesia/stt-models
109
+ */
110
+ declare enum CartesiaModel {
111
+ INK_WHISPER = "ink-whisper",
112
+ INK_WHISPER_20250604 = "ink-whisper-2025-06-04"
113
+ }
97
114
  /**
98
115
  * DashScope Qwen-ASR Realtime transcription models
99
116
  * @see https://www.alibabacloud.com/help/en/model-studio/qwen-real-time-speech-recognition
@@ -102,10 +119,17 @@ declare enum DashScopeModel {
102
119
  QWEN3_ASR_FLASH_REALTIME_2602 = "qwen3-asr-flash-realtime-2026-02-10",
103
120
  QWEN3_ASR_FLASH_REALTIME = "qwen3-asr-flash-realtime"
104
121
  }
122
+ /**
123
+ * Self-serve vLLM batch transcription models
124
+ * Backed by recognition-inference / RunPod `/transcribe`
125
+ */
126
+ declare enum SelfServeVllmModel {
127
+ QWEN3_ASR_1_7B = "qwen3-asr-1.7b"
128
+ }
105
129
  /**
106
130
  * Type alias for any model from any provider
107
131
  */
108
- type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | DashScopeModel | string;
132
+ type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | GladiaModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | CartesiaModel | DashScopeModel | SelfServeVllmModel | string;
109
133
 
110
134
  /**
111
135
  * Audio encoding types
@@ -369,6 +393,8 @@ declare const MetadataResultSchemaV1: z.ZodObject<{
369
393
  rawAudioTimeMs: z.ZodOptional<z.ZodNumber>;
370
394
  costInUSD: z.ZodOptional<z.ZodDefault<z.ZodNumber>>;
371
395
  apiType: z.ZodOptional<z.ZodNativeEnum<typeof ASRApiType>>;
396
+ provider: z.ZodOptional<z.ZodString>;
397
+ model: z.ZodOptional<z.ZodString>;
372
398
  asrConfig: z.ZodOptional<z.ZodString>;
373
399
  rawAsrMetadata: z.ZodOptional<z.ZodString>;
374
400
  transcriptOutcome: z.ZodOptional<z.ZodNativeEnum<typeof TranscriptOutcomeType>>;
@@ -423,6 +449,8 @@ declare const MetadataResultSchemaV1: z.ZodObject<{
423
449
  rawAudioTimeMs?: number | undefined;
424
450
  costInUSD?: number | undefined;
425
451
  apiType?: ASRApiType | undefined;
452
+ provider?: string | undefined;
453
+ model?: string | undefined;
426
454
  asrConfig?: string | undefined;
427
455
  rawAsrMetadata?: string | undefined;
428
456
  transcriptOutcome?: TranscriptOutcomeType | undefined;
@@ -453,6 +481,8 @@ declare const MetadataResultSchemaV1: z.ZodObject<{
453
481
  rawAudioTimeMs?: number | undefined;
454
482
  costInUSD?: number | undefined;
455
483
  apiType?: ASRApiType | undefined;
484
+ provider?: string | undefined;
485
+ model?: string | undefined;
456
486
  asrConfig?: string | undefined;
457
487
  rawAsrMetadata?: string | undefined;
458
488
  transcriptOutcome?: TranscriptOutcomeType | undefined;
@@ -12,11 +12,14 @@ declare enum RecognitionProvider {
12
12
  DEEPGRAM = "deepgram",
13
13
  ELEVENLABS = "elevenlabs",
14
14
  FIREWORKS = "fireworks",
15
+ GLADIA = "gladia",
15
16
  GOOGLE = "google",
16
17
  GEMINI_BATCH = "gemini-batch",
17
18
  OPENAI_BATCH = "openai-batch",
19
+ SELF_SERVE_VLLM = "self-serve-vllm",
18
20
  OPENAI_REALTIME = "openai-realtime",
19
21
  MISTRAL_VOXTRAL = "mistral-voxtral",
22
+ CARTESIA = "cartesia",
20
23
  DASHSCOPE = "dashscope",
21
24
  TEST_ASR_PROVIDER_QUOTA = "test-asr-provider-quota",
22
25
  TEST_ASR_STREAMING = "test-asr-streaming"
@@ -67,6 +70,13 @@ declare enum FireworksModel {
67
70
  WHISPER_V3 = "whisper-v3",
68
71
  WHISPER_V3_TURBO = "whisper-v3-turbo"
69
72
  }
73
+ /**
74
+ * Gladia Solaria realtime transcription models
75
+ * @see https://docs.gladia.io/api-reference/v2/live/init
76
+ */
77
+ declare enum GladiaModel {
78
+ SOLARIA_1 = "solaria-1"
79
+ }
70
80
  /**
71
81
  * ElevenLabs Scribe models for speech-to-text
72
82
  * @see https://elevenlabs.io/blog/introducing-scribe-v2-realtime
@@ -74,8 +84,7 @@ declare enum FireworksModel {
74
84
  * @see https://elevenlabs.io/docs/api-reference/speech-to-text/convert
75
85
  */
76
86
  declare enum ElevenLabsModel {
77
- SCRIBE_V2_REALTIME = "scribe_v2_realtime",
78
- SCRIBE_V1 = "scribe_v1"
87
+ SCRIBE_V2_REALTIME = "scribe_v2_realtime"
79
88
  }
80
89
  /**
81
90
  * OpenAI Realtime API transcription models
@@ -94,6 +103,14 @@ declare enum OpenAIRealtimeModel {
94
103
  declare enum MistralVoxtralModel {
95
104
  VOXTRAL_MINI_REALTIME_2602 = "voxtral-mini-transcribe-realtime-2602"
96
105
  }
106
+ /**
107
+ * Cartesia Ink-Whisper Realtime transcription models
108
+ * @see https://docs.cartesia.ai/build-with-cartesia/stt-models
109
+ */
110
+ declare enum CartesiaModel {
111
+ INK_WHISPER = "ink-whisper",
112
+ INK_WHISPER_20250604 = "ink-whisper-2025-06-04"
113
+ }
97
114
  /**
98
115
  * DashScope Qwen-ASR Realtime transcription models
99
116
  * @see https://www.alibabacloud.com/help/en/model-studio/qwen-real-time-speech-recognition
@@ -102,10 +119,17 @@ declare enum DashScopeModel {
102
119
  QWEN3_ASR_FLASH_REALTIME_2602 = "qwen3-asr-flash-realtime-2026-02-10",
103
120
  QWEN3_ASR_FLASH_REALTIME = "qwen3-asr-flash-realtime"
104
121
  }
122
+ /**
123
+ * Self-serve vLLM batch transcription models
124
+ * Backed by recognition-inference / RunPod `/transcribe`
125
+ */
126
+ declare enum SelfServeVllmModel {
127
+ QWEN3_ASR_1_7B = "qwen3-asr-1.7b"
128
+ }
105
129
  /**
106
130
  * Type alias for any model from any provider
107
131
  */
108
- type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | DashScopeModel | string;
132
+ type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | GladiaModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | CartesiaModel | DashScopeModel | SelfServeVllmModel | string;
109
133
 
110
134
  /**
111
135
  * Audio encoding types
@@ -369,6 +393,8 @@ declare const MetadataResultSchemaV1: z.ZodObject<{
369
393
  rawAudioTimeMs: z.ZodOptional<z.ZodNumber>;
370
394
  costInUSD: z.ZodOptional<z.ZodDefault<z.ZodNumber>>;
371
395
  apiType: z.ZodOptional<z.ZodNativeEnum<typeof ASRApiType>>;
396
+ provider: z.ZodOptional<z.ZodString>;
397
+ model: z.ZodOptional<z.ZodString>;
372
398
  asrConfig: z.ZodOptional<z.ZodString>;
373
399
  rawAsrMetadata: z.ZodOptional<z.ZodString>;
374
400
  transcriptOutcome: z.ZodOptional<z.ZodNativeEnum<typeof TranscriptOutcomeType>>;
@@ -423,6 +449,8 @@ declare const MetadataResultSchemaV1: z.ZodObject<{
423
449
  rawAudioTimeMs?: number | undefined;
424
450
  costInUSD?: number | undefined;
425
451
  apiType?: ASRApiType | undefined;
452
+ provider?: string | undefined;
453
+ model?: string | undefined;
426
454
  asrConfig?: string | undefined;
427
455
  rawAsrMetadata?: string | undefined;
428
456
  transcriptOutcome?: TranscriptOutcomeType | undefined;
@@ -453,6 +481,8 @@ declare const MetadataResultSchemaV1: z.ZodObject<{
453
481
  rawAudioTimeMs?: number | undefined;
454
482
  costInUSD?: number | undefined;
455
483
  apiType?: ASRApiType | undefined;
484
+ provider?: string | undefined;
485
+ model?: string | undefined;
456
486
  asrConfig?: string | undefined;
457
487
  rawAsrMetadata?: string | undefined;
458
488
  transcriptOutcome?: TranscriptOutcomeType | undefined;
@@ -2737,5 +2767,5 @@ declare function getRecognitionConductorHttpBase(stage?: Stage | string | null |
2737
2767
  declare function getRecognitionConductorWsBase(stage?: Stage | string | null | undefined): string;
2738
2768
  declare function getRecognitionConductorHost(stage?: Stage | string | null | undefined): string;
2739
2769
 
2740
- export { AudioEncoding, ClientControlActionV1, ClientState, ConfigBuilder, ConnectionError, ControlSignalTypeV1 as ControlSignal, ControlSignalTypeV1, DashScopeModel, DeepgramModel, ElevenLabsModel, ErrorTypeV1, FinalTranscriptStability, FireworksModel, GeminiModel, GoogleModel, Language, MistralVoxtralModel, OpenAIModel, OpenAIRealtimeModel, RECOGNITION_CONDUCTOR_BASES, RECOGNITION_SERVICE_BASES, RealTimeTwoWayWebSocketRecognitionClient, RecognitionContextTypeV1, RecognitionError, RecognitionProvider, RecognitionResultTypeV1, RecognitionVGFStateSchema, RecordingStatus, STAGES, SampleRate, SimplifiedVGFRecognitionClient, TimeoutError, TranscriptionStatus, ValidationError, createClient, createClientWithBuilder, createDefaultASRConfig, createInitialRecognitionState, createSimplifiedVGFClient, getRecognitionConductorBase, getRecognitionConductorHost, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionServiceBase, getRecognitionServiceHost, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getUserFriendlyMessage, isExceptionImmediatelyAvailable, isNormalDisconnection, isValidRecordingStatusTransition, normalizeStage, resetRecognitionVGFState };
2770
+ export { AudioEncoding, CartesiaModel, ClientControlActionV1, ClientState, ConfigBuilder, ConnectionError, ControlSignalTypeV1 as ControlSignal, ControlSignalTypeV1, DashScopeModel, DeepgramModel, ElevenLabsModel, ErrorTypeV1, FinalTranscriptStability, FireworksModel, GeminiModel, GladiaModel, GoogleModel, Language, MistralVoxtralModel, OpenAIModel, OpenAIRealtimeModel, RECOGNITION_CONDUCTOR_BASES, RECOGNITION_SERVICE_BASES, RealTimeTwoWayWebSocketRecognitionClient, RecognitionContextTypeV1, RecognitionError, RecognitionProvider, RecognitionResultTypeV1, RecognitionVGFStateSchema, RecordingStatus, STAGES, SampleRate, SelfServeVllmModel, SimplifiedVGFRecognitionClient, TimeoutError, TranscriptionStatus, ValidationError, createClient, createClientWithBuilder, createDefaultASRConfig, createInitialRecognitionState, createSimplifiedVGFClient, getRecognitionConductorBase, getRecognitionConductorHost, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionServiceBase, getRecognitionServiceHost, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getUserFriendlyMessage, isExceptionImmediatelyAvailable, isNormalDisconnection, isValidRecordingStatusTransition, normalizeStage, resetRecognitionVGFState };
2741
2771
  export type { ASRRequestConfig, ASRRequestV1, AuthenticationException, ConnectionException, ErrorResultV1, FunctionCallResultV1, GameContextV1, IRecognitionClient, IRecognitionClientConfig, IRecognitionClientStats, ISimplifiedVGFRecognitionClient, MetadataResultV1, ProviderException, QuotaExceededException, RealTimeTwoWayWebSocketRecognitionClientConfig, RecognitionCallbackUrl, RecognitionException, RecognitionState, RecordingStatusType, SimplifiedVGFClientConfig, SlotMap, Stage, TimeoutException, TranscriptionResult, TranscriptionResultV1, TranscriptionStatusType, UnknownException, ValidationException };
package/dist/index.d.ts CHANGED
@@ -11,6 +11,6 @@ export { type RecognitionState, RecognitionVGFStateSchema, RecordingStatus, Tran
11
11
  export { resetRecognitionVGFState } from './vgf-recognition-mapper.js';
12
12
  export { AudioEncoding } from '@recog/websocket';
13
13
  export { type GameContextV1, type SlotMap, RecognitionContextTypeV1, ControlSignalTypeV1, ControlSignalTypeV1 as ControlSignal, // Alias for backward compatibility
14
- type TranscriptionResultV1, type FunctionCallResultV1, type MetadataResultV1, type ErrorResultV1, RecognitionResultTypeV1, ClientControlActionV1, type ASRRequestConfig, type ASRRequestV1, FinalTranscriptStability, createDefaultASRConfig, RecognitionProvider, DeepgramModel, ElevenLabsModel, FireworksModel, GoogleModel, GeminiModel, OpenAIModel, OpenAIRealtimeModel, MistralVoxtralModel, DashScopeModel, Language, SampleRate, STAGES, type Stage } from '@recog/shared-types';
14
+ type TranscriptionResultV1, type FunctionCallResultV1, type MetadataResultV1, type ErrorResultV1, RecognitionResultTypeV1, ClientControlActionV1, type ASRRequestConfig, type ASRRequestV1, FinalTranscriptStability, createDefaultASRConfig, RecognitionProvider, DeepgramModel, ElevenLabsModel, FireworksModel, GladiaModel, GoogleModel, GeminiModel, OpenAIModel, SelfServeVllmModel, OpenAIRealtimeModel, MistralVoxtralModel, CartesiaModel, DashScopeModel, Language, SampleRate, STAGES, type Stage } from '@recog/shared-types';
15
15
  export { getRecognitionServiceBase, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getRecognitionServiceHost, getRecognitionConductorBase, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionConductorHost, normalizeStage, RECOGNITION_SERVICE_BASES, RECOGNITION_CONDUCTOR_BASES } from '@recog/shared-config';
16
16
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAGvE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,eAAe,EACf,cAAc,EACd,WAAW,EACX,WAAW,EACX,WAAW,EACX,mBAAmB,EACnB,mBAAmB,EACnB,cAAc,EACd,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAGvE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,eAAe,EACf,cAAc,EACd,WAAW,EACX,WAAW,EACX,WAAW,EACX,WAAW,EACX,kBAAkB,EAClB,mBAAmB,EACnB,mBAAmB,EACnB,aAAa,EACb,cAAc,EACd,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}
package/dist/index.js CHANGED
@@ -3737,11 +3737,14 @@ var RecognitionProvider;
3737
3737
  RecognitionProvider2["DEEPGRAM"] = "deepgram";
3738
3738
  RecognitionProvider2["ELEVENLABS"] = "elevenlabs";
3739
3739
  RecognitionProvider2["FIREWORKS"] = "fireworks";
3740
+ RecognitionProvider2["GLADIA"] = "gladia";
3740
3741
  RecognitionProvider2["GOOGLE"] = "google";
3741
3742
  RecognitionProvider2["GEMINI_BATCH"] = "gemini-batch";
3742
3743
  RecognitionProvider2["OPENAI_BATCH"] = "openai-batch";
3744
+ RecognitionProvider2["SELF_SERVE_VLLM"] = "self-serve-vllm";
3743
3745
  RecognitionProvider2["OPENAI_REALTIME"] = "openai-realtime";
3744
3746
  RecognitionProvider2["MISTRAL_VOXTRAL"] = "mistral-voxtral";
3747
+ RecognitionProvider2["CARTESIA"] = "cartesia";
3745
3748
  RecognitionProvider2["DASHSCOPE"] = "dashscope";
3746
3749
  RecognitionProvider2["TEST_ASR_PROVIDER_QUOTA"] = "test-asr-provider-quota";
3747
3750
  RecognitionProvider2["TEST_ASR_STREAMING"] = "test-asr-streaming";
@@ -3783,10 +3786,13 @@ var FireworksModel;
3783
3786
  FireworksModel2["WHISPER_V3"] = "whisper-v3";
3784
3787
  FireworksModel2["WHISPER_V3_TURBO"] = "whisper-v3-turbo";
3785
3788
  })(FireworksModel || (FireworksModel = {}));
3789
+ var GladiaModel;
3790
+ (function(GladiaModel2) {
3791
+ GladiaModel2["SOLARIA_1"] = "solaria-1";
3792
+ })(GladiaModel || (GladiaModel = {}));
3786
3793
  var ElevenLabsModel;
3787
3794
  (function(ElevenLabsModel2) {
3788
3795
  ElevenLabsModel2["SCRIBE_V2_REALTIME"] = "scribe_v2_realtime";
3789
- ElevenLabsModel2["SCRIBE_V1"] = "scribe_v1";
3790
3796
  })(ElevenLabsModel || (ElevenLabsModel = {}));
3791
3797
  var OpenAIRealtimeModel;
3792
3798
  (function(OpenAIRealtimeModel2) {
@@ -3797,11 +3803,20 @@ var MistralVoxtralModel;
3797
3803
  (function(MistralVoxtralModel2) {
3798
3804
  MistralVoxtralModel2["VOXTRAL_MINI_REALTIME_2602"] = "voxtral-mini-transcribe-realtime-2602";
3799
3805
  })(MistralVoxtralModel || (MistralVoxtralModel = {}));
3806
+ var CartesiaModel;
3807
+ (function(CartesiaModel2) {
3808
+ CartesiaModel2["INK_WHISPER"] = "ink-whisper";
3809
+ CartesiaModel2["INK_WHISPER_20250604"] = "ink-whisper-2025-06-04";
3810
+ })(CartesiaModel || (CartesiaModel = {}));
3800
3811
  var DashScopeModel;
3801
3812
  (function(DashScopeModel2) {
3802
3813
  DashScopeModel2["QWEN3_ASR_FLASH_REALTIME_2602"] = "qwen3-asr-flash-realtime-2026-02-10";
3803
3814
  DashScopeModel2["QWEN3_ASR_FLASH_REALTIME"] = "qwen3-asr-flash-realtime";
3804
3815
  })(DashScopeModel || (DashScopeModel = {}));
3816
+ var SelfServeVllmModel;
3817
+ (function(SelfServeVllmModel2) {
3818
+ SelfServeVllmModel2["QWEN3_ASR_1_7B"] = "qwen3-asr-1.7b";
3819
+ })(SelfServeVllmModel || (SelfServeVllmModel = {}));
3805
3820
 
3806
3821
  // ../../libs/types/dist/recognition-result-v1.types.js
3807
3822
  var RecognitionResultTypeV1;
@@ -3874,6 +3889,9 @@ var MetadataResultSchemaV1 = z.object({
3874
3889
  costInUSD: z.number().default(0).optional(),
3875
3890
  // ASR API Type
3876
3891
  apiType: z.nativeEnum(ASRApiType).optional(),
3892
+ // Provider identification
3893
+ provider: z.string().optional(),
3894
+ model: z.string().optional(),
3877
3895
  // ASR configuration as JSON string (no type validation)
3878
3896
  asrConfig: z.string().optional(),
3879
3897
  // Raw ASR metadata payload as provided by the provider (stringified if needed)
@@ -5249,7 +5267,7 @@ var MessageHandler = class {
5249
5267
  }
5250
5268
  if (msg.data && typeof msg.data !== "object") {
5251
5269
  if (this.callbacks.logger) {
5252
- this.callbacks.logger("error", "[RecogSDK] Received primitive msg.data from server", {
5270
+ this.callbacks.logger("warn", "[RecogSDK] Received primitive msg.data from server", {
5253
5271
  dataType: typeof msg.data,
5254
5272
  data: msg.data,
5255
5273
  fullMessage: msg
@@ -5597,7 +5615,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5597
5615
  blobToArrayBuffer(audioData).then((arrayBuffer) => {
5598
5616
  this.sendAudioInternal(arrayBuffer);
5599
5617
  }).catch((error) => {
5600
- this.log("error", "Failed to convert Blob to ArrayBuffer", error);
5618
+ this.log("warn", "Failed to convert Blob to ArrayBuffer", error);
5601
5619
  });
5602
5620
  return;
5603
5621
  }
@@ -5637,7 +5655,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5637
5655
  */
5638
5656
  async stopRecording() {
5639
5657
  if (this.state !== "ready" /* READY */) {
5640
- this.log("warn", "stopRecording called but not in READY state", { state: this.state });
5658
+ this.log("info", "stopRecording called but not in READY state", { state: this.state });
5641
5659
  return;
5642
5660
  }
5643
5661
  this.log("debug", "Stopping recording");
@@ -5807,7 +5825,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5807
5825
  if (this.state === "stopping" /* STOPPING */) {
5808
5826
  this.state = "stopped" /* STOPPED */;
5809
5827
  } else if (this.state === "connected" /* CONNECTED */ || this.state === "ready" /* READY */ || this.state === "connecting" /* CONNECTING */) {
5810
- this.log("error", "[DIAGNOSTIC] Unexpected disconnection", {
5828
+ this.log("warn", "[DIAGNOSTIC] Unexpected disconnection", {
5811
5829
  code,
5812
5830
  codeDescription: closeCodeDescription,
5813
5831
  reason: reason || "(empty)",
@@ -5929,7 +5947,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5929
5947
  blobToArrayBuffer(audioData).then((arrayBuffer) => {
5930
5948
  this.sendPrefixAudioInternal(arrayBuffer);
5931
5949
  }).catch((error) => {
5932
- this.log("error", "Failed to convert Blob to ArrayBuffer for prefix audio", error);
5950
+ this.log("warn", "Failed to convert Blob to ArrayBuffer for prefix audio", error);
5933
5951
  });
5934
5952
  return;
5935
5953
  }
@@ -6591,6 +6609,7 @@ function createSimplifiedVGFClient(config) {
6591
6609
  }
6592
6610
  export {
6593
6611
  AudioEncoding,
6612
+ CartesiaModel,
6594
6613
  ClientControlActionV1,
6595
6614
  ClientState,
6596
6615
  ConfigBuilder,
@@ -6604,6 +6623,7 @@ export {
6604
6623
  FinalTranscriptStability,
6605
6624
  FireworksModel,
6606
6625
  GeminiModel,
6626
+ GladiaModel,
6607
6627
  GoogleModel,
6608
6628
  Language,
6609
6629
  MistralVoxtralModel,
@@ -6620,6 +6640,7 @@ export {
6620
6640
  RecordingStatus,
6621
6641
  STAGES,
6622
6642
  SampleRate,
6643
+ SelfServeVllmModel,
6623
6644
  SimplifiedVGFRecognitionClient,
6624
6645
  TimeoutError,
6625
6646
  TranscriptionStatus,