voice-router-dev 0.5.4 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -5,6 +5,374 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.6.0] - 2026-01-11
9
+
10
+ ### Added
11
+
12
+ #### OpenAI Official Spec Integration
13
+
14
+ OpenAI types now auto-generated from the official [Stainless-hosted OpenAPI spec](https://app.stainless.com/api/spec/documented/openai/openapi.documented.yml):
15
+
16
+ ```typescript
17
+ import { OpenAIModel, OpenAIResponseFormat } from 'voice-router-dev/constants'
18
+ import type {
19
+ RealtimeSessionCreateRequest,
20
+ RealtimeTranscriptionSessionCreateRequest,
21
+ CreateTranscriptionResponseDiarizedJson
22
+ } from 'voice-router-dev'
23
+
24
+ // All models from official spec
25
+ const model = OpenAIModel["gpt-4o-transcribe-diarize"]
26
+
27
+ // Response formats including diarization
28
+ const format = OpenAIResponseFormat.diarized_json
29
+ ```
30
+
31
+ **What changed:**
32
+ - **Single source of truth**: Stainless live spec (auto-updated by OpenAI)
33
+ - **54 schemas** generated (up from 15 manual types)
34
+ - **7 endpoints** included: batch audio + realtime streaming
35
+ - **Diarization types** now from official spec (`CreateTranscriptionResponseDiarizedJson`)
36
+ - **Realtime API types**: `RealtimeSessionCreateRequest`, `RealtimeTranscriptionSessionCreateRequest`, `VadConfig`, etc.
37
+
38
+ **New models in `OpenAIModel`:**
39
+ - `whisper-1` - Open source Whisper V2
40
+ - `gpt-4o-transcribe` - GPT-4o based transcription
41
+ - `gpt-4o-mini-transcribe` - Faster, cost-effective
42
+ - `gpt-4o-mini-transcribe-2025-12-15` - Dated version
43
+ - `gpt-4o-transcribe-diarize` - With speaker diarization
44
+
45
+ **New response formats in `OpenAIResponseFormat`:**
46
+ - `diarized_json` - JSON with speaker annotations (requires `gpt-4o-transcribe-diarize`)
47
+
48
+ #### OpenAI Realtime Streaming Types
49
+
50
+ WebSocket event types for OpenAI Realtime API:
51
+
52
+ ```typescript
53
+ import { OpenAIStreamingTypes } from 'voice-router-dev'
54
+
55
+ // Session creation
56
+ const session: OpenAIStreamingTypes.RealtimeSessionConfig = {
57
+ modalities: ['text', 'audio'],
58
+ voice: 'ash',
59
+ input_audio_format: 'pcm16',
60
+ input_audio_transcription: { model: 'whisper-1' },
61
+ turn_detection: { type: 'server_vad', threshold: 0.6 }
62
+ }
63
+
64
+ // WebSocket event handling
65
+ type ServerEvent = OpenAIStreamingTypes.RealtimeServerEvent
66
+ type ClientEvent = OpenAIStreamingTypes.RealtimeClientEvent
67
+ ```
68
+
69
+ **Endpoints:**
70
+ - OpenAI: `wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview`
71
+ - Azure OpenAI: `wss://{endpoint}/openai/realtime?deployment={model}&api-version={version}`
72
+
73
+ #### Soniox Provider (8th Provider)
74
+
75
+ New adapter for [Soniox](https://soniox.com) speech-to-text with batch and streaming support:
76
+
77
+ ```typescript
78
+ import { createSonioxAdapter, SonioxLanguages } from 'voice-router-dev'
79
+
80
+ const adapter = createSonioxAdapter({
81
+ apiKey: process.env.SONIOX_API_KEY
82
+ })
83
+
84
+ // Batch transcription
85
+ const result = await adapter.transcribe({
86
+ type: 'url',
87
+ url: 'https://example.com/audio.mp3'
88
+ }, {
89
+ language: 'en',
90
+ diarization: true
91
+ })
92
+
93
+ // Real-time streaming
94
+ const session = await adapter.transcribeStream({
95
+ language: 'en',
96
+ sampleRate: 16000
97
+ }, {
98
+ onTranscript: (event) => console.log(event.text),
99
+ onError: (error) => console.error(error)
100
+ })
101
+
102
+ // Dynamic model/language discovery
103
+ const models = await adapter.getModels()
104
+ const languages = await adapter.getLanguagesForModel('stt-rt-preview')
105
+ ```
106
+
107
+ **Features:**
108
+ - Batch transcription via URL or file upload
109
+ - Real-time WebSocket streaming with endpoint detection
110
+ - Speaker diarization
111
+ - Language identification (auto-detect)
112
+ - Translation support (one-way and bidirectional)
113
+ - Custom vocabulary via structured context
114
+ - 60+ supported languages
115
+
116
+ **Generated types from OpenAPI spec (`api.soniox.com/v1/openapi.json`):**
117
+ - `SonioxLanguages` - Array of `{code, name}` for all 60 languages
118
+ - `SonioxLanguageCodes` - ISO 639-1 language codes
119
+ - `SonioxLanguageLabels` - Code-to-name mapping
120
+ - 90+ schema types via Orval (Transcription, Model, Language, etc.)
121
+
122
+ #### Speechmatics Batch API Type Generation
123
+
124
+ Full type generation from Speechmatics SDK batch spec (`speechmatics-batch.yml`):
125
+
126
+ ```typescript
127
+ import type { JobConfig, RetrieveTranscriptResponse } from 'voice-router-dev'
128
+ import { OperatingPoint, TranscriptionConfigDiarization } from 'voice-router-dev'
129
+
130
+ // Use generated enums instead of hardcoded strings
131
+ const config: JobConfig = {
132
+ type: 'transcription',
133
+ transcription_config: {
134
+ language: 'en',
135
+ operating_point: OperatingPoint.enhanced,
136
+ diarization: TranscriptionConfigDiarization.speaker
137
+ }
138
+ }
139
+ ```
140
+
141
+ **Generated from SDK spec:**
142
+ - 100+ TypeScript types from `speechmatics-batch.yml`
143
+ - Enums: `OperatingPoint`, `TranscriptionConfigDiarization`, `SummarizationConfigSummaryType`, `SummarizationConfigSummaryLength`, `JobDetailsStatus`
144
+ - Removed manual `src/types/speechmatics.ts` (replaced by generated types)
145
+
146
+ #### Soniox Field Configs
147
+
148
+ Field config functions for Soniox now available:
149
+
150
+ ```typescript
151
+ import {
152
+ getSonioxTranscriptionFields,
153
+ getSonioxStreamingFields,
154
+ getSonioxListFilterFields,
155
+ getSonioxFieldConfigs
156
+ } from 'voice-router-dev/field-configs'
157
+
158
+ const fields = getSonioxTranscriptionFields()
159
+ // → [{ name: 'model', type: 'string', ... }, { name: 'language_hints', ... }, ...]
160
+ ```
161
+
162
+ #### Field Config Coverage (All Providers)
163
+
164
+ | Provider | Transcription | Streaming | List Filters | Update Config |
165
+ |--------------|:-------------:|:---------:|:------------:|:-------------:|
166
+ | Gladia | ✓ | ✓ | ✓ | - |
167
+ | Deepgram | ✓ | ✓ | ✓ | - |
168
+ | AssemblyAI | ✓ | ✓ | ✓ | ✓ |
169
+ | OpenAI | ✓ | - | - | - |
170
+ | Speechmatics | ✓ | ✓ | ✓ | ✓ |
171
+ | Soniox | ✓ | ✓ | ✓ | - |
172
+ | Azure | - | - | - | - |
173
+
174
+ > **Note:** Azure field configs not yet implemented (no OpenAPI spec available).
175
+
176
+ #### Zod Schema Exports Reference
177
+
178
+ All generated Zod schemas are exported for direct use with `zodToFieldConfigs()`:
179
+
180
+ | Export Name | Provider | Source |
181
+ |---------------------------|--------------|----------------------------------|
182
+ | `GladiaZodSchemas` | Gladia | OpenAPI spec |
183
+ | `DeepgramZodSchemas` | Deepgram | OpenAPI spec |
184
+ | `AssemblyAIZodSchemas` | AssemblyAI | OpenAPI spec |
185
+ | `OpenAIZodSchemas` | OpenAI | OpenAPI spec |
186
+ | `SpeechmaticsZodSchemas` | Speechmatics | OpenAPI spec (batch) |
187
+ | `SonioxApiZodSchemas` | Soniox | OpenAPI spec (batch) |
188
+ | `SonioxStreamingZodSchemas` | Soniox | Manual spec (real-time WebSocket)|
189
+
190
+ > **Note on manual specs:** Soniox and Deepgram streaming types are manually maintained because
191
+ > these providers do not publish AsyncAPI specs for their WebSocket APIs. Types were extracted
192
+ > from their official SDKs (`@soniox/speech-to-text-web` and `@deepgram/sdk`). The REST API
193
+ > types are auto-synced from their OpenAPI specs. If these providers publish AsyncAPI specs
194
+ > in the future, we will switch to auto-generation.
195
+
196
+ ```typescript
197
+ import { zodToFieldConfigs, SonioxApiZodSchemas } from 'voice-router-dev'
198
+
199
+ // Extract fields from any Zod schema
200
+ const transcriptionFields = zodToFieldConfigs(SonioxApiZodSchemas.createTranscriptionBody)
201
+ ```
202
+
203
+ #### SDK Generation Pipeline Diagram
204
+
205
+ New auto-generated Mermaid diagram showing the SDK generation flow:
206
+
207
+ ```bash
208
+ pnpm openapi:diagram
209
+ ```
210
+
211
+ Generates `docs/sdk-generation-pipeline.mmd` from codebase analysis:
212
+ - Analyzes `sync-specs.js` for remote/manual spec sources
213
+ - Extracts orval config for API/Zod generation
214
+ - Maps streaming type sync scripts
215
+ - Includes consumer layer (router, webhooks, adapters)
216
+ - Shows public API exports
217
+
218
+ ### Changed
219
+
220
+ - **OpenAI spec source**: Now uses Stainless live spec instead of manual `openai-whisper-openapi.yml`
221
+ - **`fix-openai-spec.js`**: Filters full OpenAI API to audio + realtime endpoints only
222
+ - **OpenAI adapter**: Uses `OpenAIModel` constants instead of hardcoded strings
223
+ - **Provider capabilities**: OpenAI now shows `streaming: true` (via Realtime API)
224
+ - **Azure adapter**: Uses generated enums instead of hardcoded strings, removed `any` type casts
225
+ - **Speechmatics adapter** now uses generated enums instead of hardcoded string values
226
+ - **Speechmatics adapter** fixed API structure: `sentiment_analysis_config` and `summarization_config` moved to job level (was incorrectly in `transcription_config`)
227
+ - **Speechmatics adapter** fixed `additional_vocab` format: now uses `{content: string}[]` per spec
228
+ - **Speechmatics adapter** fixed `speaker_diarization_config`: uses `speaker_sensitivity` (not `max_speakers`)
229
+ - **Soniox language codes** now generated from OpenAPI spec (60 languages vs 28 hardcoded)
230
+ - OpenAPI sync scripts now include Speechmatics batch spec and Soniox specs
231
+ - Added `openapi:generate:speechmatics`, `openapi:generate:soniox`, `openapi:clean:speechmatics`, `openapi:clean:soniox` scripts
232
+ - Added `openapi:sync-soniox-languages` to generate flow
233
+
234
+ ### Fixed
235
+
236
+ - OpenAI model values now stay in sync with official spec
237
+ - `OpenAIResponseFormat` now includes `diarized_json` from official spec
238
+ - OpenAI `languageDetection` capability is now `true` (language is optional in request)
239
+ - Azure `languageDetection` capability fixed (was incorrectly `false`)
240
+ - Azure `customVocabulary` capability fixed
241
+ - AssemblyAI/Speechmatics streaming types now survive `openapi:clean` (stored in `specs/`)
242
+ - Speechmatics batch field configs now work (was returning empty array)
243
+ - Speechmatics webhook handler now uses generated `RetrieveTranscriptResponse` type
244
+ - **AssemblyAI streaming field configs** now include SDK v3 fields (`keyterms`, `keytermsPrompt`, `speechModel`, `languageDetection`, etc.) - sync script parses both AsyncAPI spec and SDK TypeScript types
245
+
246
+ #### Soniox Regional Endpoints (Sovereign Cloud)
247
+
248
+ Regional endpoint support for Soniox data residency:
249
+
250
+ ```typescript
251
+ import { createSonioxAdapter, SonioxRegion } from 'voice-router-dev'
252
+
253
+ const adapter = createSonioxAdapter({
254
+ apiKey: process.env.SONIOX_EU_API_KEY,
255
+ region: SonioxRegion.eu // EU data residency
256
+ })
257
+ ```
258
+
259
+ | Region | REST API | WebSocket |
260
+ |--------|----------|-----------|
261
+ | `us` (default) | `api.soniox.com` | `stt-rt.soniox.com` |
262
+ | `eu` | `api.eu.soniox.com` | `stt-rt.eu.soniox.com` |
263
+ | `jp` | `api.jp.soniox.com` | `stt-rt.jp.soniox.com` |
264
+
265
+ **Note:** Soniox API keys are region-specific. Each project is created with a specific region, and the API key only works with that region's endpoint.
266
+
267
+ ---
268
+
269
+ ## [0.5.5] - 2026-01-09
270
+
271
+ ### Changed
272
+
273
+ - Dynamic streaming types synced from AsyncAPI/SDK specs for all providers
274
+ - Deepgram streaming params derived from official SDK (`TranscriptionSchema.ts`)
275
+ - AssemblyAI streaming Zod auto-generated from SDK types
276
+ - Speechmatics streaming types from AsyncAPI spec
277
+
278
+ ---
279
+
280
+ ## [0.5.0] - 2026-01-09
281
+
282
+ ### Added
283
+
284
+ #### Zero-Hardcoding Field Configs
285
+
286
+ All field configs are now derived from Zod schemas at runtime - zero hardcoded field definitions:
287
+
288
+ ```typescript
289
+ import { zodToFieldConfigs, DeepgramZodSchemas } from 'voice-router-dev'
290
+
291
+ // Extract fields directly from generated Zod schemas
292
+ const fields = zodToFieldConfigs(DeepgramZodSchemas.listenV1MediaTranscribeQueryParams)
293
+ // → [{ name, type, description, options, default, min, max, ... }]
294
+
295
+ // Or use pre-built helpers
296
+ import { getDeepgramTranscriptionFields } from 'voice-router-dev'
297
+ const deepgramFields = getDeepgramTranscriptionFields() // 36 fields from Zod
298
+ ```
299
+
300
+ **Exports:**
301
+ - `zodToFieldConfigs(schema)` - Extract field configs from any Zod schema
302
+ - `filterFields(fields, names)` - Include only specified fields
303
+ - `excludeFields(fields, names)` - Exclude specified fields
304
+ - `GladiaZodSchemas`, `DeepgramZodSchemas`, `AssemblyAIZodSchemas`, etc.
305
+
306
+ #### 100% Streaming Field Coverage
307
+
308
+ | Provider | Fields | Source |
309
+ |------------|--------|--------|
310
+ | Gladia | 10 | OpenAPI Zod |
311
+ | Deepgram | 30 | OpenAPI Zod |
312
+ | AssemblyAI | 13 | SDK Zod |
313
+
314
+ ### Changed
315
+
316
+ - Deleted `streaming-field-schemas.ts` (was 461 lines of hardcoding)
317
+ - Rewrote `field-configs.ts`: 890 → 205 lines (zero hardcoded fields)
318
+ - All field configs now derived from Zod schemas at runtime
319
+
320
+ ---
321
+
322
+ ## [0.4.1] - 2026-01-09
323
+
324
+ ### Added
325
+
326
+ #### Provider Metadata Exports for UI Rendering
327
+
328
+ Static runtime data derived from OpenAPI specs and adapter definitions:
329
+
330
+ ```typescript
331
+ import {
332
+ ProviderCapabilitiesMap,
333
+ CapabilityLabels,
334
+ LanguageLabels,
335
+ AllLanguageCodes,
336
+ ProviderDisplayNames,
337
+ StreamingProviders,
338
+ BatchOnlyProviders
339
+ } from 'voice-router-dev/provider-metadata'
340
+
341
+ // Capability matrix for all providers
342
+ const capabilities = ProviderCapabilitiesMap['deepgram']
343
+ // → { streaming: true, diarization: true, ... }
344
+
345
+ // Language dropdown data
346
+ const languages = AllLanguageCodes['gladia']
347
+ // → ['en', 'es', 'fr', ...]
348
+ const label = LanguageLabels['en'] // → 'English'
349
+ ```
350
+
351
+ #### Browser-Safe Subpath Exports
352
+
353
+ New subpath exports with no `node:crypto` dependency:
354
+
355
+ ```typescript
356
+ // Browser-safe imports
357
+ import { AllFieldConfigs } from 'voice-router-dev/field-configs'
358
+ import { ProviderCapabilitiesMap } from 'voice-router-dev/provider-metadata'
359
+
360
+ // Full SDK (server-side only)
361
+ import { VoiceRouter } from 'voice-router-dev'
362
+ ```
363
+
364
+ **Exports:**
365
+ - `voice-router-dev/constants` - Enums only (existing)
366
+ - `voice-router-dev/field-configs` - Field configurations
367
+ - `voice-router-dev/provider-metadata` - Capabilities, languages, display names
368
+
369
+ ### Changed
370
+
371
+ - Types refactored to shared `src/types/core.ts` for browser compatibility
372
+ - `router/types.ts` re-exports from `core.ts` (no duplication)
373
+
374
+ ---
375
+
8
376
  ## [0.3.7] - 2026-01-09
9
377
 
10
378
  ### Added
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Voice Router SDK
2
2
 
3
- > Universal speech-to-text router for 6+ transcription providers with a single, unified API.
3
+ > Universal speech-to-text router for 8 transcription providers with a single, unified API.
4
4
 
5
5
  [![npm version](https://badge.fury.io/js/voice-router-dev.svg)](https://www.npmjs.com/package/voice-router-dev)
6
6
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
@@ -8,7 +8,7 @@
8
8
 
9
9
  ## Why Voice Router?
10
10
 
11
- Switch between speech-to-text providers **without changing your code**. One API for Gladia, AssemblyAI, Deepgram, Azure, OpenAI Whisper, and Speechmatics.
11
+ Switch between speech-to-text providers **without changing your code**. One API for Gladia, AssemblyAI, Deepgram, Azure, OpenAI Whisper, Speechmatics, and Soniox.
12
12
 
13
13
  ```typescript
14
14
  import { VoiceRouter } from 'voice-router-dev';
@@ -31,7 +31,7 @@ const result = await router.transcribe(audio, {
31
31
  - **Provider-Agnostic** - Switch providers with one line
32
32
  - **Unified API** - Same interface for all providers
33
33
  - **Webhook Normalization** - Auto-detect and parse webhooks
34
- - **Real-time Streaming** - WebSocket support (Gladia, AssemblyAI, Deepgram)
34
+ - **Real-time Streaming** - WebSocket support (Gladia, AssemblyAI, Deepgram, Soniox, OpenAI Realtime)
35
35
  - **Advanced Features** - Diarization, sentiment, summarization, chapters, entities
36
36
  - **Type-Safe** - Full TypeScript support with OpenAPI-generated types
37
37
  - **Typed Extended Data** - Access provider-specific features with full autocomplete
@@ -46,8 +46,9 @@ const result = await router.transcribe(audio, {
46
46
  | **AssemblyAI** | Yes | Real-time | HMAC | Chapters, entities, content moderation |
47
47
  | **Deepgram** | Sync | WebSocket | Yes | PII redaction, keyword boosting |
48
48
  | **Azure STT** | Async | No | HMAC | Custom models, language ID |
49
- | **OpenAI Whisper** | Sync | No | No | gpt-4o, diarization |
49
+ | **OpenAI** | Sync | Realtime | No | gpt-4o, diarization, Realtime API |
50
50
  | **Speechmatics** | Async | No | Query params | High accuracy, summarization |
51
+ | **Soniox** | Yes | WebSocket | No | 60+ languages, translation, regions |
51
52
 
52
53
  ## Installation
53
54
 
@@ -371,8 +372,9 @@ Provider-specific implementations:
371
372
  - `AssemblyAIAdapter` - AssemblyAI transcription
372
373
  - `DeepgramAdapter` - Deepgram transcription
373
374
  - `AzureSTTAdapter` - Azure Speech-to-Text
374
- - `OpenAIWhisperAdapter` - OpenAI Whisper
375
+ - `OpenAIWhisperAdapter` - OpenAI Whisper + Realtime API
375
376
  - `SpeechmaticsAdapter` - Speechmatics transcription
377
+ - `SonioxAdapter` - Soniox transcription (batch + streaming)
376
378
 
377
379
  ## TypeScript Support
378
380
 
@@ -651,6 +653,23 @@ router.registerAdapter(new SpeechmaticsAdapter());
651
653
 
652
654
  Get your API key: https://speechmatics.com
653
655
 
656
+ ### Soniox
657
+ ```typescript
658
+ import { VoiceRouter, SonioxAdapter, SonioxRegion } from 'voice-router-dev';
659
+
660
+ const router = new VoiceRouter({
661
+ providers: {
662
+ soniox: {
663
+ apiKey: 'YOUR_KEY',
664
+ region: SonioxRegion.us // or 'eu', 'jp'
665
+ }
666
+ }
667
+ });
668
+ router.registerAdapter(new SonioxAdapter());
669
+ ```
670
+
671
+ Get your API key: https://soniox.com
672
+
654
673
  ## Contributing
655
674
 
656
675
  Contributions welcome! Please read our [Contributing Guide](CONTRIBUTING.md).
@@ -736,10 +736,46 @@ declare const DeepgramRegion: {
736
736
  /** European Union endpoint */
737
737
  readonly eu: "eu";
738
738
  };
739
+ /**
740
+ * Soniox regional endpoints (Sovereign Cloud)
741
+ *
742
+ * Soniox offers regional endpoints for data residency compliance.
743
+ * All audio, transcripts, and logs stay fully in-region.
744
+ *
745
+ * | Region | REST API | WebSocket (Real-time) |
746
+ * |--------|----------|----------------------|
747
+ * | US (default) | api.soniox.com | stt-rt.soniox.com |
748
+ * | EU | api.eu.soniox.com | stt-rt.eu.soniox.com |
749
+ * | Japan | api.jp.soniox.com | stt-rt.jp.soniox.com |
750
+ *
751
+ * **Coming soon:** Korea, Australia, India, Canada, Saudi Arabia, UK, Brazil
752
+ *
753
+ * @example
754
+ * ```typescript
755
+ * import { SonioxRegion } from 'voice-router-dev/constants'
756
+ *
757
+ * const adapter = createSonioxAdapter({
758
+ * apiKey: process.env.SONIOX_API_KEY,
759
+ * region: SonioxRegion.eu
760
+ * })
761
+ * ```
762
+ *
763
+ * @see https://soniox.com/docs/stt/data-residency - Official data residency docs
764
+ */
765
+ declare const SonioxRegion: {
766
+ /** United States (default) */
767
+ readonly us: "us";
768
+ /** European Union */
769
+ readonly eu: "eu";
770
+ /** Japan */
771
+ readonly jp: "jp";
772
+ };
739
773
  /** Speechmatics region type derived from const object */
740
774
  type SpeechmaticsRegionType = (typeof SpeechmaticsRegion)[keyof typeof SpeechmaticsRegion];
741
775
  /** Deepgram region type derived from const object */
742
776
  type DeepgramRegionType = (typeof DeepgramRegion)[keyof typeof DeepgramRegion];
777
+ /** Soniox region type derived from const object */
778
+ type SonioxRegionType = (typeof SonioxRegion)[keyof typeof SonioxRegion];
743
779
  /**
744
780
  * Deepgram TTS voice models
745
781
  *
@@ -890,7 +926,12 @@ type DeepgramTTSSampleRateType = (typeof DeepgramTTSSampleRate)[keyof typeof Dee
890
926
  /**
891
927
  * OpenAI Whisper transcription models
892
928
  *
893
- * Values: `whisper-1`, `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, `gpt-4o-transcribe-diarize`
929
+ * Values from official spec (auto-synced from Stainless):
930
+ * - `whisper-1`: Open source Whisper V2 model
931
+ * - `gpt-4o-transcribe`: GPT-4o based transcription (more accurate)
932
+ * - `gpt-4o-mini-transcribe`: Faster, cost-effective GPT-4o mini
933
+ * - `gpt-4o-mini-transcribe-2025-12-15`: Dated version of GPT-4o mini
934
+ * - `gpt-4o-transcribe-diarize`: GPT-4o with speaker diarization
894
935
  *
895
936
  * @example
896
937
  * ```typescript
@@ -898,28 +939,36 @@ type DeepgramTTSSampleRateType = (typeof DeepgramTTSSampleRate)[keyof typeof Dee
898
939
  *
899
940
  * { model: OpenAIModel["whisper-1"] }
900
941
  * { model: OpenAIModel["gpt-4o-transcribe"] }
942
+ * { model: OpenAIModel["gpt-4o-transcribe-diarize"] }
901
943
  * ```
902
944
  */
903
945
  declare const OpenAIModel: {
904
946
  readonly "whisper-1": "whisper-1";
905
- readonly "gpt-4o-mini-transcribe": "gpt-4o-mini-transcribe";
906
947
  readonly "gpt-4o-transcribe": "gpt-4o-transcribe";
948
+ readonly "gpt-4o-mini-transcribe": "gpt-4o-mini-transcribe";
949
+ readonly "gpt-4o-mini-transcribe-2025-12-15": "gpt-4o-mini-transcribe-2025-12-15";
907
950
  readonly "gpt-4o-transcribe-diarize": "gpt-4o-transcribe-diarize";
908
951
  };
909
952
  /**
910
953
  * OpenAI transcription response formats
911
954
  *
912
- * Values: `json`, `text`, `srt`, `verbose_json`, `vtt`, `diarized_json`
955
+ * Values from official spec (auto-synced from Stainless):
956
+ * - `json`: Basic JSON response
957
+ * - `text`: Plain text
958
+ * - `srt`: SRT subtitle format
959
+ * - `verbose_json`: Detailed JSON with timestamps
960
+ * - `vtt`: VTT subtitle format
961
+ * - `diarized_json`: JSON with speaker annotations (gpt-4o-transcribe-diarize only)
913
962
  *
914
- * Note: `diarized_json` is only available with `gpt-4o-transcribe-diarize` model.
915
- * GPT-4o transcribe models only support `json` format.
963
+ * Note: GPT-4o transcribe models only support `json` format.
964
+ * For diarization, use `diarized_json` with `gpt-4o-transcribe-diarize` model.
916
965
  *
917
966
  * @example
918
967
  * ```typescript
919
968
  * import { OpenAIResponseFormat } from 'voice-router-dev/constants'
920
969
  *
921
970
  * { responseFormat: OpenAIResponseFormat.verbose_json }
922
- * { responseFormat: OpenAIResponseFormat.srt }
971
+ * { responseFormat: OpenAIResponseFormat.diarized_json }
923
972
  * ```
924
973
  */
925
974
  declare const OpenAIResponseFormat: {
@@ -935,4 +984,4 @@ type OpenAIModelType = (typeof OpenAIModel)[keyof typeof OpenAIModel];
935
984
  /** OpenAI response format type derived from const object */
936
985
  type OpenAIResponseFormatType = (typeof OpenAIResponseFormat)[keyof typeof OpenAIResponseFormat];
937
986
 
938
- export { AssemblyAIEncoding, type AssemblyAIEncodingType, AssemblyAISampleRate, type AssemblyAISampleRateType, AssemblyAISpeechModel, type AssemblyAISpeechModelType, AssemblyAIStatus, type AssemblyAIStatusType, AzureStatus, type AzureStatusType, DeepgramCallbackMethod, type DeepgramCallbackMethodType, DeepgramEncoding, type DeepgramEncodingType, DeepgramIntentMode, type DeepgramIntentModeType, DeepgramModel, type DeepgramModelType, DeepgramRedact, type DeepgramRedactType, DeepgramRegion, type DeepgramRegionType, DeepgramSampleRate, type DeepgramSampleRateType, DeepgramStatus, type DeepgramStatusType, DeepgramTTSContainer, type DeepgramTTSContainerType, DeepgramTTSEncoding, type DeepgramTTSEncodingType, DeepgramTTSModel, type DeepgramTTSModelType, DeepgramTTSSampleRate, type DeepgramTTSSampleRateType, DeepgramTopicMode, type DeepgramTopicModeType, GladiaBitDepth, type GladiaBitDepthType, GladiaEncoding, type GladiaEncodingType, GladiaLanguage, type GladiaLanguageType, GladiaModel, type GladiaModelType, GladiaRegion, type GladiaRegionType, GladiaSampleRate, type GladiaSampleRateType, GladiaStatus, type GladiaStatusType, GladiaTranslationLanguage, type GladiaTranslationLanguageType, OpenAIModel, type OpenAIModelType, OpenAIResponseFormat, type OpenAIResponseFormatType, SpeechmaticsRegion, type SpeechmaticsRegionType };
987
+ export { AssemblyAIEncoding, type AssemblyAIEncodingType, AssemblyAISampleRate, type AssemblyAISampleRateType, AssemblyAISpeechModel, type AssemblyAISpeechModelType, AssemblyAIStatus, type AssemblyAIStatusType, AzureStatus, type AzureStatusType, DeepgramCallbackMethod, type DeepgramCallbackMethodType, DeepgramEncoding, type DeepgramEncodingType, DeepgramIntentMode, type DeepgramIntentModeType, DeepgramModel, type DeepgramModelType, DeepgramRedact, type DeepgramRedactType, DeepgramRegion, type DeepgramRegionType, DeepgramSampleRate, type DeepgramSampleRateType, DeepgramStatus, type DeepgramStatusType, DeepgramTTSContainer, type DeepgramTTSContainerType, DeepgramTTSEncoding, type DeepgramTTSEncodingType, DeepgramTTSModel, type DeepgramTTSModelType, DeepgramTTSSampleRate, type DeepgramTTSSampleRateType, DeepgramTopicMode, type DeepgramTopicModeType, GladiaBitDepth, type GladiaBitDepthType, GladiaEncoding, type GladiaEncodingType, GladiaLanguage, type GladiaLanguageType, GladiaModel, type GladiaModelType, GladiaRegion, type GladiaRegionType, GladiaSampleRate, type GladiaSampleRateType, GladiaStatus, type GladiaStatusType, GladiaTranslationLanguage, type GladiaTranslationLanguageType, OpenAIModel, type OpenAIModelType, OpenAIResponseFormat, type OpenAIResponseFormatType, SonioxRegion, type SonioxRegionType, SpeechmaticsRegion, type SpeechmaticsRegionType };
@@ -736,10 +736,46 @@ declare const DeepgramRegion: {
736
736
  /** European Union endpoint */
737
737
  readonly eu: "eu";
738
738
  };
739
+ /**
740
+ * Soniox regional endpoints (Sovereign Cloud)
741
+ *
742
+ * Soniox offers regional endpoints for data residency compliance.
743
+ * All audio, transcripts, and logs stay fully in-region.
744
+ *
745
+ * | Region | REST API | WebSocket (Real-time) |
746
+ * |--------|----------|----------------------|
747
+ * | US (default) | api.soniox.com | stt-rt.soniox.com |
748
+ * | EU | api.eu.soniox.com | stt-rt.eu.soniox.com |
749
+ * | Japan | api.jp.soniox.com | stt-rt.jp.soniox.com |
750
+ *
751
+ * **Coming soon:** Korea, Australia, India, Canada, Saudi Arabia, UK, Brazil
752
+ *
753
+ * @example
754
+ * ```typescript
755
+ * import { SonioxRegion } from 'voice-router-dev/constants'
756
+ *
757
+ * const adapter = createSonioxAdapter({
758
+ * apiKey: process.env.SONIOX_API_KEY,
759
+ * region: SonioxRegion.eu
760
+ * })
761
+ * ```
762
+ *
763
+ * @see https://soniox.com/docs/stt/data-residency - Official data residency docs
764
+ */
765
+ declare const SonioxRegion: {
766
+ /** United States (default) */
767
+ readonly us: "us";
768
+ /** European Union */
769
+ readonly eu: "eu";
770
+ /** Japan */
771
+ readonly jp: "jp";
772
+ };
739
773
  /** Speechmatics region type derived from const object */
740
774
  type SpeechmaticsRegionType = (typeof SpeechmaticsRegion)[keyof typeof SpeechmaticsRegion];
741
775
  /** Deepgram region type derived from const object */
742
776
  type DeepgramRegionType = (typeof DeepgramRegion)[keyof typeof DeepgramRegion];
777
+ /** Soniox region type derived from const object */
778
+ type SonioxRegionType = (typeof SonioxRegion)[keyof typeof SonioxRegion];
743
779
  /**
744
780
  * Deepgram TTS voice models
745
781
  *
@@ -890,7 +926,12 @@ type DeepgramTTSSampleRateType = (typeof DeepgramTTSSampleRate)[keyof typeof Dee
890
926
  /**
891
927
  * OpenAI Whisper transcription models
892
928
  *
893
- * Values: `whisper-1`, `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, `gpt-4o-transcribe-diarize`
929
+ * Values from official spec (auto-synced from Stainless):
930
+ * - `whisper-1`: Open source Whisper V2 model
931
+ * - `gpt-4o-transcribe`: GPT-4o based transcription (more accurate)
932
+ * - `gpt-4o-mini-transcribe`: Faster, cost-effective GPT-4o mini
933
+ * - `gpt-4o-mini-transcribe-2025-12-15`: Dated version of GPT-4o mini
934
+ * - `gpt-4o-transcribe-diarize`: GPT-4o with speaker diarization
894
935
  *
895
936
  * @example
896
937
  * ```typescript
@@ -898,28 +939,36 @@ type DeepgramTTSSampleRateType = (typeof DeepgramTTSSampleRate)[keyof typeof Dee
898
939
  *
899
940
  * { model: OpenAIModel["whisper-1"] }
900
941
  * { model: OpenAIModel["gpt-4o-transcribe"] }
942
+ * { model: OpenAIModel["gpt-4o-transcribe-diarize"] }
901
943
  * ```
902
944
  */
903
945
  declare const OpenAIModel: {
904
946
  readonly "whisper-1": "whisper-1";
905
- readonly "gpt-4o-mini-transcribe": "gpt-4o-mini-transcribe";
906
947
  readonly "gpt-4o-transcribe": "gpt-4o-transcribe";
948
+ readonly "gpt-4o-mini-transcribe": "gpt-4o-mini-transcribe";
949
+ readonly "gpt-4o-mini-transcribe-2025-12-15": "gpt-4o-mini-transcribe-2025-12-15";
907
950
  readonly "gpt-4o-transcribe-diarize": "gpt-4o-transcribe-diarize";
908
951
  };
909
952
  /**
910
953
  * OpenAI transcription response formats
911
954
  *
912
- * Values: `json`, `text`, `srt`, `verbose_json`, `vtt`, `diarized_json`
955
+ * Values from official spec (auto-synced from Stainless):
956
+ * - `json`: Basic JSON response
957
+ * - `text`: Plain text
958
+ * - `srt`: SRT subtitle format
959
+ * - `verbose_json`: Detailed JSON with timestamps
960
+ * - `vtt`: VTT subtitle format
961
+ * - `diarized_json`: JSON with speaker annotations (gpt-4o-transcribe-diarize only)
913
962
  *
914
- * Note: `diarized_json` is only available with `gpt-4o-transcribe-diarize` model.
915
- * GPT-4o transcribe models only support `json` format.
963
+ * Note: GPT-4o transcribe models only support `json` format.
964
+ * For diarization, use `diarized_json` with `gpt-4o-transcribe-diarize` model.
916
965
  *
917
966
  * @example
918
967
  * ```typescript
919
968
  * import { OpenAIResponseFormat } from 'voice-router-dev/constants'
920
969
  *
921
970
  * { responseFormat: OpenAIResponseFormat.verbose_json }
922
- * { responseFormat: OpenAIResponseFormat.srt }
971
+ * { responseFormat: OpenAIResponseFormat.diarized_json }
923
972
  * ```
924
973
  */
925
974
  declare const OpenAIResponseFormat: {
@@ -935,4 +984,4 @@ type OpenAIModelType = (typeof OpenAIModel)[keyof typeof OpenAIModel];
935
984
  /** OpenAI response format type derived from const object */
936
985
  type OpenAIResponseFormatType = (typeof OpenAIResponseFormat)[keyof typeof OpenAIResponseFormat];
937
986
 
938
- export { AssemblyAIEncoding, type AssemblyAIEncodingType, AssemblyAISampleRate, type AssemblyAISampleRateType, AssemblyAISpeechModel, type AssemblyAISpeechModelType, AssemblyAIStatus, type AssemblyAIStatusType, AzureStatus, type AzureStatusType, DeepgramCallbackMethod, type DeepgramCallbackMethodType, DeepgramEncoding, type DeepgramEncodingType, DeepgramIntentMode, type DeepgramIntentModeType, DeepgramModel, type DeepgramModelType, DeepgramRedact, type DeepgramRedactType, DeepgramRegion, type DeepgramRegionType, DeepgramSampleRate, type DeepgramSampleRateType, DeepgramStatus, type DeepgramStatusType, DeepgramTTSContainer, type DeepgramTTSContainerType, DeepgramTTSEncoding, type DeepgramTTSEncodingType, DeepgramTTSModel, type DeepgramTTSModelType, DeepgramTTSSampleRate, type DeepgramTTSSampleRateType, DeepgramTopicMode, type DeepgramTopicModeType, GladiaBitDepth, type GladiaBitDepthType, GladiaEncoding, type GladiaEncodingType, GladiaLanguage, type GladiaLanguageType, GladiaModel, type GladiaModelType, GladiaRegion, type GladiaRegionType, GladiaSampleRate, type GladiaSampleRateType, GladiaStatus, type GladiaStatusType, GladiaTranslationLanguage, type GladiaTranslationLanguageType, OpenAIModel, type OpenAIModelType, OpenAIResponseFormat, type OpenAIResponseFormatType, SpeechmaticsRegion, type SpeechmaticsRegionType };
987
+ export { AssemblyAIEncoding, type AssemblyAIEncodingType, AssemblyAISampleRate, type AssemblyAISampleRateType, AssemblyAISpeechModel, type AssemblyAISpeechModelType, AssemblyAIStatus, type AssemblyAIStatusType, AzureStatus, type AzureStatusType, DeepgramCallbackMethod, type DeepgramCallbackMethodType, DeepgramEncoding, type DeepgramEncodingType, DeepgramIntentMode, type DeepgramIntentModeType, DeepgramModel, type DeepgramModelType, DeepgramRedact, type DeepgramRedactType, DeepgramRegion, type DeepgramRegionType, DeepgramSampleRate, type DeepgramSampleRateType, DeepgramStatus, type DeepgramStatusType, DeepgramTTSContainer, type DeepgramTTSContainerType, DeepgramTTSEncoding, type DeepgramTTSEncodingType, DeepgramTTSModel, type DeepgramTTSModelType, DeepgramTTSSampleRate, type DeepgramTTSSampleRateType, DeepgramTopicMode, type DeepgramTopicModeType, GladiaBitDepth, type GladiaBitDepthType, GladiaEncoding, type GladiaEncodingType, GladiaLanguage, type GladiaLanguageType, GladiaModel, type GladiaModelType, GladiaRegion, type GladiaRegionType, GladiaSampleRate, type GladiaSampleRateType, GladiaStatus, type GladiaStatusType, GladiaTranslationLanguage, type GladiaTranslationLanguageType, OpenAIModel, type OpenAIModelType, OpenAIResponseFormat, type OpenAIResponseFormatType, SonioxRegion, type SonioxRegionType, SpeechmaticsRegion, type SpeechmaticsRegionType };