voice-router-dev 0.5.8 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -5,10 +5,71 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
- ## [0.6.0] - 2026-01-10
8
+ ## [0.6.0] - 2026-01-11
9
9
 
10
10
  ### Added
11
11
 
12
+ #### OpenAI Official Spec Integration
13
+
14
+ OpenAI types now auto-generated from the official [Stainless-hosted OpenAPI spec](https://app.stainless.com/api/spec/documented/openai/openapi.documented.yml):
15
+
16
+ ```typescript
17
+ import { OpenAIModel, OpenAIResponseFormat } from 'voice-router-dev/constants'
18
+ import type {
19
+ RealtimeSessionCreateRequest,
20
+ RealtimeTranscriptionSessionCreateRequest,
21
+ CreateTranscriptionResponseDiarizedJson
22
+ } from 'voice-router-dev'
23
+
24
+ // All models from official spec
25
+ const model = OpenAIModel["gpt-4o-transcribe-diarize"]
26
+
27
+ // Response formats including diarization
28
+ const format = OpenAIResponseFormat.diarized_json
29
+ ```
30
+
31
+ **What changed:**
32
+ - **Single source of truth**: Stainless live spec (auto-updated by OpenAI)
33
+ - **54 schemas** generated (up from 15 manual types)
34
+ - **7 endpoints** included: batch audio + realtime streaming
35
+ - **Diarization types** now from official spec (`CreateTranscriptionResponseDiarizedJson`)
36
+ - **Realtime API types**: `RealtimeSessionCreateRequest`, `RealtimeTranscriptionSessionCreateRequest`, `VadConfig`, etc.
37
+
38
+ **New models in `OpenAIModel`:**
39
+ - `whisper-1` - Open source Whisper V2
40
+ - `gpt-4o-transcribe` - GPT-4o based transcription
41
+ - `gpt-4o-mini-transcribe` - Faster, cost-effective
42
+ - `gpt-4o-mini-transcribe-2025-12-15` - Dated version
43
+ - `gpt-4o-transcribe-diarize` - With speaker diarization
44
+
45
+ **New response formats in `OpenAIResponseFormat`:**
46
+ - `diarized_json` - JSON with speaker annotations (requires `gpt-4o-transcribe-diarize`)
47
+
48
+ #### OpenAI Realtime Streaming Types
49
+
50
+ WebSocket event types for OpenAI Realtime API:
51
+
52
+ ```typescript
53
+ import { OpenAIStreamingTypes } from 'voice-router-dev'
54
+
55
+ // Session creation
56
+ const session: OpenAIStreamingTypes.RealtimeSessionConfig = {
57
+ modalities: ['text', 'audio'],
58
+ voice: 'ash',
59
+ input_audio_format: 'pcm16',
60
+ input_audio_transcription: { model: 'whisper-1' },
61
+ turn_detection: { type: 'server_vad', threshold: 0.6 }
62
+ }
63
+
64
+ // WebSocket event handling
65
+ type ServerEvent = OpenAIStreamingTypes.RealtimeServerEvent
66
+ type ClientEvent = OpenAIStreamingTypes.RealtimeClientEvent
67
+ ```
68
+
69
+ **Endpoints:**
70
+ - OpenAI: `wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview`
71
+ - Azure OpenAI: `wss://{endpoint}/openai/realtime?deployment={model}&api-version={version}`
72
+
12
73
  #### Soniox Provider (8th Provider)
13
74
 
14
75
  New adapter for [Soniox](https://soniox.com) speech-to-text with batch and streaming support:
@@ -139,8 +200,28 @@ import { zodToFieldConfigs, SonioxApiZodSchemas } from 'voice-router-dev'
139
200
  const transcriptionFields = zodToFieldConfigs(SonioxApiZodSchemas.createTranscriptionBody)
140
201
  ```
141
202
 
203
+ #### SDK Generation Pipeline Diagram
204
+
205
+ New auto-generated Mermaid diagram showing the SDK generation flow:
206
+
207
+ ```bash
208
+ pnpm openapi:diagram
209
+ ```
210
+
211
+ Generates `docs/sdk-generation-pipeline.mmd` from codebase analysis:
212
+ - Analyzes `sync-specs.js` for remote/manual spec sources
213
+ - Extracts orval config for API/Zod generation
214
+ - Maps streaming type sync scripts
215
+ - Includes consumer layer (router, webhooks, adapters)
216
+ - Shows public API exports
217
+
142
218
  ### Changed
143
219
 
220
+ - **OpenAI spec source**: Now uses Stainless live spec instead of manual `openai-whisper-openapi.yml`
221
+ - **`fix-openai-spec.js`**: Filters full OpenAI API to audio + realtime endpoints only
222
+ - **OpenAI adapter**: Uses `OpenAIModel` constants instead of hardcoded strings
223
+ - **Provider capabilities**: OpenAI now shows `streaming: true` (via Realtime API)
224
+ - **Azure adapter**: Uses generated enums instead of hardcoded strings, removed `any` type casts
144
225
  - **Speechmatics adapter** now uses generated enums instead of hardcoded string values
145
226
  - **Speechmatics adapter** fixed API structure: `sentiment_analysis_config` and `summarization_config` moved to job level (was incorrectly in `transcription_config`)
146
227
  - **Speechmatics adapter** fixed `additional_vocab` format: now uses `{content: string}[]` per spec
@@ -152,8 +233,36 @@ const transcriptionFields = zodToFieldConfigs(SonioxApiZodSchemas.createTranscri
152
233
 
153
234
  ### Fixed
154
235
 
236
+ - OpenAI model values now stay in sync with official spec
237
+ - `OpenAIResponseFormat` now includes `diarized_json` from official spec
238
+ - OpenAI `languageDetection` capability is now `true` (language is optional in request)
239
+ - Azure `languageDetection` capability fixed (was incorrectly `false`)
240
+ - Azure `customVocabulary` capability fixed
241
+ - AssemblyAI/Speechmatics streaming types now survive `openapi:clean` (stored in `specs/`)
155
242
  - Speechmatics batch field configs now work (was returning empty array)
156
243
  - Speechmatics webhook handler now uses generated `RetrieveTranscriptResponse` type
244
+ - **AssemblyAI streaming field configs** now include SDK v3 fields (`keyterms`, `keytermsPrompt`, `speechModel`, `languageDetection`, etc.) - sync script parses both AsyncAPI spec and SDK TypeScript types
245
+
246
+ #### Soniox Regional Endpoints (Sovereign Cloud)
247
+
248
+ Regional endpoint support for Soniox data residency:
249
+
250
+ ```typescript
251
+ import { createSonioxAdapter, SonioxRegion } from 'voice-router-dev'
252
+
253
+ const adapter = createSonioxAdapter({
254
+ apiKey: process.env.SONIOX_EU_API_KEY,
255
+ region: SonioxRegion.eu // EU data residency
256
+ })
257
+ ```
258
+
259
+ | Region | REST API | WebSocket |
260
+ |--------|----------|-----------|
261
+ | `us` (default) | `api.soniox.com` | `stt-rt.soniox.com` |
262
+ | `eu` | `api.eu.soniox.com` | `stt-rt.eu.soniox.com` |
263
+ | `jp` | `api.jp.soniox.com` | `stt-rt.jp.soniox.com` |
264
+
265
+ **Note:** Soniox API keys are region-specific. Each project is created with a specific region, and the API key only works with that region's endpoint.
157
266
 
158
267
  ---
159
268
 
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Voice Router SDK
2
2
 
3
- > Universal speech-to-text router for 6+ transcription providers with a single, unified API.
3
+ > Universal speech-to-text router for 8 transcription providers with a single, unified API.
4
4
 
5
5
  [![npm version](https://badge.fury.io/js/voice-router-dev.svg)](https://www.npmjs.com/package/voice-router-dev)
6
6
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
@@ -8,7 +8,7 @@
8
8
 
9
9
  ## Why Voice Router?
10
10
 
11
- Switch between speech-to-text providers **without changing your code**. One API for Gladia, AssemblyAI, Deepgram, Azure, OpenAI Whisper, and Speechmatics.
11
+ Switch between speech-to-text providers **without changing your code**. One API for Gladia, AssemblyAI, Deepgram, Azure, OpenAI Whisper, Speechmatics, and Soniox.
12
12
 
13
13
  ```typescript
14
14
  import { VoiceRouter } from 'voice-router-dev';
@@ -31,7 +31,7 @@ const result = await router.transcribe(audio, {
31
31
  - **Provider-Agnostic** - Switch providers with one line
32
32
  - **Unified API** - Same interface for all providers
33
33
  - **Webhook Normalization** - Auto-detect and parse webhooks
34
- - **Real-time Streaming** - WebSocket support (Gladia, AssemblyAI, Deepgram)
34
+ - **Real-time Streaming** - WebSocket support (Gladia, AssemblyAI, Deepgram, Soniox, OpenAI Realtime)
35
35
  - **Advanced Features** - Diarization, sentiment, summarization, chapters, entities
36
36
  - **Type-Safe** - Full TypeScript support with OpenAPI-generated types
37
37
  - **Typed Extended Data** - Access provider-specific features with full autocomplete
@@ -46,8 +46,9 @@ const result = await router.transcribe(audio, {
46
46
  | **AssemblyAI** | Yes | Real-time | HMAC | Chapters, entities, content moderation |
47
47
  | **Deepgram** | Sync | WebSocket | Yes | PII redaction, keyword boosting |
48
48
  | **Azure STT** | Async | No | HMAC | Custom models, language ID |
49
- | **OpenAI Whisper** | Sync | No | No | gpt-4o, diarization |
49
+ | **OpenAI** | Sync | Realtime | No | gpt-4o, diarization, Realtime API |
50
50
  | **Speechmatics** | Async | No | Query params | High accuracy, summarization |
51
+ | **Soniox** | Yes | WebSocket | No | 60+ languages, translation, regions |
51
52
 
52
53
  ## Installation
53
54
 
@@ -371,8 +372,9 @@ Provider-specific implementations:
371
372
  - `AssemblyAIAdapter` - AssemblyAI transcription
372
373
  - `DeepgramAdapter` - Deepgram transcription
373
374
  - `AzureSTTAdapter` - Azure Speech-to-Text
374
- - `OpenAIWhisperAdapter` - OpenAI Whisper
375
+ - `OpenAIWhisperAdapter` - OpenAI Whisper + Realtime API
375
376
  - `SpeechmaticsAdapter` - Speechmatics transcription
377
+ - `SonioxAdapter` - Soniox transcription (batch + streaming)
376
378
 
377
379
  ## TypeScript Support
378
380
 
@@ -651,6 +653,23 @@ router.registerAdapter(new SpeechmaticsAdapter());
651
653
 
652
654
  Get your API key: https://speechmatics.com
653
655
 
656
+ ### Soniox
657
+ ```typescript
658
+ import { VoiceRouter, SonioxAdapter, SonioxRegion } from 'voice-router-dev';
659
+
660
+ const router = new VoiceRouter({
661
+ providers: {
662
+ soniox: {
663
+ apiKey: 'YOUR_KEY',
664
+ region: SonioxRegion.us // or 'eu', 'jp'
665
+ }
666
+ }
667
+ });
668
+ router.registerAdapter(new SonioxAdapter());
669
+ ```
670
+
671
+ Get your API key: https://soniox.com
672
+
654
673
  ## Contributing
655
674
 
656
675
  Contributions welcome! Please read our [Contributing Guide](CONTRIBUTING.md).
@@ -736,10 +736,46 @@ declare const DeepgramRegion: {
736
736
  /** European Union endpoint */
737
737
  readonly eu: "eu";
738
738
  };
739
+ /**
740
+ * Soniox regional endpoints (Sovereign Cloud)
741
+ *
742
+ * Soniox offers regional endpoints for data residency compliance.
743
+ * All audio, transcripts, and logs stay fully in-region.
744
+ *
745
+ * | Region | REST API | WebSocket (Real-time) |
746
+ * |--------|----------|----------------------|
747
+ * | US (default) | api.soniox.com | stt-rt.soniox.com |
748
+ * | EU | api.eu.soniox.com | stt-rt.eu.soniox.com |
749
+ * | Japan | api.jp.soniox.com | stt-rt.jp.soniox.com |
750
+ *
751
+ * **Coming soon:** Korea, Australia, India, Canada, Saudi Arabia, UK, Brazil
752
+ *
753
+ * @example
754
+ * ```typescript
755
+ * import { SonioxRegion } from 'voice-router-dev/constants'
756
+ *
757
+ * const adapter = createSonioxAdapter({
758
+ * apiKey: process.env.SONIOX_API_KEY,
759
+ * region: SonioxRegion.eu
760
+ * })
761
+ * ```
762
+ *
763
+ * @see https://soniox.com/docs/stt/data-residency - Official data residency docs
764
+ */
765
+ declare const SonioxRegion: {
766
+ /** United States (default) */
767
+ readonly us: "us";
768
+ /** European Union */
769
+ readonly eu: "eu";
770
+ /** Japan */
771
+ readonly jp: "jp";
772
+ };
739
773
  /** Speechmatics region type derived from const object */
740
774
  type SpeechmaticsRegionType = (typeof SpeechmaticsRegion)[keyof typeof SpeechmaticsRegion];
741
775
  /** Deepgram region type derived from const object */
742
776
  type DeepgramRegionType = (typeof DeepgramRegion)[keyof typeof DeepgramRegion];
777
+ /** Soniox region type derived from const object */
778
+ type SonioxRegionType = (typeof SonioxRegion)[keyof typeof SonioxRegion];
743
779
  /**
744
780
  * Deepgram TTS voice models
745
781
  *
@@ -890,7 +926,12 @@ type DeepgramTTSSampleRateType = (typeof DeepgramTTSSampleRate)[keyof typeof Dee
890
926
  /**
891
927
  * OpenAI Whisper transcription models
892
928
  *
893
- * Values: `whisper-1`, `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, `gpt-4o-transcribe-diarize`
929
+ * Values from official spec (auto-synced from Stainless):
930
+ * - `whisper-1`: Open source Whisper V2 model
931
+ * - `gpt-4o-transcribe`: GPT-4o based transcription (more accurate)
932
+ * - `gpt-4o-mini-transcribe`: Faster, cost-effective GPT-4o mini
933
+ * - `gpt-4o-mini-transcribe-2025-12-15`: Dated version of GPT-4o mini
934
+ * - `gpt-4o-transcribe-diarize`: GPT-4o with speaker diarization
894
935
  *
895
936
  * @example
896
937
  * ```typescript
@@ -898,28 +939,36 @@ type DeepgramTTSSampleRateType = (typeof DeepgramTTSSampleRate)[keyof typeof Dee
898
939
  *
899
940
  * { model: OpenAIModel["whisper-1"] }
900
941
  * { model: OpenAIModel["gpt-4o-transcribe"] }
942
+ * { model: OpenAIModel["gpt-4o-transcribe-diarize"] }
901
943
  * ```
902
944
  */
903
945
  declare const OpenAIModel: {
904
946
  readonly "whisper-1": "whisper-1";
905
- readonly "gpt-4o-mini-transcribe": "gpt-4o-mini-transcribe";
906
947
  readonly "gpt-4o-transcribe": "gpt-4o-transcribe";
948
+ readonly "gpt-4o-mini-transcribe": "gpt-4o-mini-transcribe";
949
+ readonly "gpt-4o-mini-transcribe-2025-12-15": "gpt-4o-mini-transcribe-2025-12-15";
907
950
  readonly "gpt-4o-transcribe-diarize": "gpt-4o-transcribe-diarize";
908
951
  };
909
952
  /**
910
953
  * OpenAI transcription response formats
911
954
  *
912
- * Values: `json`, `text`, `srt`, `verbose_json`, `vtt`, `diarized_json`
955
+ * Values from official spec (auto-synced from Stainless):
956
+ * - `json`: Basic JSON response
957
+ * - `text`: Plain text
958
+ * - `srt`: SRT subtitle format
959
+ * - `verbose_json`: Detailed JSON with timestamps
960
+ * - `vtt`: VTT subtitle format
961
+ * - `diarized_json`: JSON with speaker annotations (gpt-4o-transcribe-diarize only)
913
962
  *
914
- * Note: `diarized_json` is only available with `gpt-4o-transcribe-diarize` model.
915
- * GPT-4o transcribe models only support `json` format.
963
+ * Note: GPT-4o transcribe models only support `json` format.
964
+ * For diarization, use `diarized_json` with `gpt-4o-transcribe-diarize` model.
916
965
  *
917
966
  * @example
918
967
  * ```typescript
919
968
  * import { OpenAIResponseFormat } from 'voice-router-dev/constants'
920
969
  *
921
970
  * { responseFormat: OpenAIResponseFormat.verbose_json }
922
- * { responseFormat: OpenAIResponseFormat.srt }
971
+ * { responseFormat: OpenAIResponseFormat.diarized_json }
923
972
  * ```
924
973
  */
925
974
  declare const OpenAIResponseFormat: {
@@ -935,4 +984,4 @@ type OpenAIModelType = (typeof OpenAIModel)[keyof typeof OpenAIModel];
935
984
  /** OpenAI response format type derived from const object */
936
985
  type OpenAIResponseFormatType = (typeof OpenAIResponseFormat)[keyof typeof OpenAIResponseFormat];
937
986
 
938
- export { AssemblyAIEncoding, type AssemblyAIEncodingType, AssemblyAISampleRate, type AssemblyAISampleRateType, AssemblyAISpeechModel, type AssemblyAISpeechModelType, AssemblyAIStatus, type AssemblyAIStatusType, AzureStatus, type AzureStatusType, DeepgramCallbackMethod, type DeepgramCallbackMethodType, DeepgramEncoding, type DeepgramEncodingType, DeepgramIntentMode, type DeepgramIntentModeType, DeepgramModel, type DeepgramModelType, DeepgramRedact, type DeepgramRedactType, DeepgramRegion, type DeepgramRegionType, DeepgramSampleRate, type DeepgramSampleRateType, DeepgramStatus, type DeepgramStatusType, DeepgramTTSContainer, type DeepgramTTSContainerType, DeepgramTTSEncoding, type DeepgramTTSEncodingType, DeepgramTTSModel, type DeepgramTTSModelType, DeepgramTTSSampleRate, type DeepgramTTSSampleRateType, DeepgramTopicMode, type DeepgramTopicModeType, GladiaBitDepth, type GladiaBitDepthType, GladiaEncoding, type GladiaEncodingType, GladiaLanguage, type GladiaLanguageType, GladiaModel, type GladiaModelType, GladiaRegion, type GladiaRegionType, GladiaSampleRate, type GladiaSampleRateType, GladiaStatus, type GladiaStatusType, GladiaTranslationLanguage, type GladiaTranslationLanguageType, OpenAIModel, type OpenAIModelType, OpenAIResponseFormat, type OpenAIResponseFormatType, SpeechmaticsRegion, type SpeechmaticsRegionType };
987
+ export { AssemblyAIEncoding, type AssemblyAIEncodingType, AssemblyAISampleRate, type AssemblyAISampleRateType, AssemblyAISpeechModel, type AssemblyAISpeechModelType, AssemblyAIStatus, type AssemblyAIStatusType, AzureStatus, type AzureStatusType, DeepgramCallbackMethod, type DeepgramCallbackMethodType, DeepgramEncoding, type DeepgramEncodingType, DeepgramIntentMode, type DeepgramIntentModeType, DeepgramModel, type DeepgramModelType, DeepgramRedact, type DeepgramRedactType, DeepgramRegion, type DeepgramRegionType, DeepgramSampleRate, type DeepgramSampleRateType, DeepgramStatus, type DeepgramStatusType, DeepgramTTSContainer, type DeepgramTTSContainerType, DeepgramTTSEncoding, type DeepgramTTSEncodingType, DeepgramTTSModel, type DeepgramTTSModelType, DeepgramTTSSampleRate, type DeepgramTTSSampleRateType, DeepgramTopicMode, type DeepgramTopicModeType, GladiaBitDepth, type GladiaBitDepthType, GladiaEncoding, type GladiaEncodingType, GladiaLanguage, type GladiaLanguageType, GladiaModel, type GladiaModelType, GladiaRegion, type GladiaRegionType, GladiaSampleRate, type GladiaSampleRateType, GladiaStatus, type GladiaStatusType, GladiaTranslationLanguage, type GladiaTranslationLanguageType, OpenAIModel, type OpenAIModelType, OpenAIResponseFormat, type OpenAIResponseFormatType, SonioxRegion, type SonioxRegionType, SpeechmaticsRegion, type SpeechmaticsRegionType };
@@ -736,10 +736,46 @@ declare const DeepgramRegion: {
736
736
  /** European Union endpoint */
737
737
  readonly eu: "eu";
738
738
  };
739
+ /**
740
+ * Soniox regional endpoints (Sovereign Cloud)
741
+ *
742
+ * Soniox offers regional endpoints for data residency compliance.
743
+ * All audio, transcripts, and logs stay fully in-region.
744
+ *
745
+ * | Region | REST API | WebSocket (Real-time) |
746
+ * |--------|----------|----------------------|
747
+ * | US (default) | api.soniox.com | stt-rt.soniox.com |
748
+ * | EU | api.eu.soniox.com | stt-rt.eu.soniox.com |
749
+ * | Japan | api.jp.soniox.com | stt-rt.jp.soniox.com |
750
+ *
751
+ * **Coming soon:** Korea, Australia, India, Canada, Saudi Arabia, UK, Brazil
752
+ *
753
+ * @example
754
+ * ```typescript
755
+ * import { SonioxRegion } from 'voice-router-dev/constants'
756
+ *
757
+ * const adapter = createSonioxAdapter({
758
+ * apiKey: process.env.SONIOX_API_KEY,
759
+ * region: SonioxRegion.eu
760
+ * })
761
+ * ```
762
+ *
763
+ * @see https://soniox.com/docs/stt/data-residency - Official data residency docs
764
+ */
765
+ declare const SonioxRegion: {
766
+ /** United States (default) */
767
+ readonly us: "us";
768
+ /** European Union */
769
+ readonly eu: "eu";
770
+ /** Japan */
771
+ readonly jp: "jp";
772
+ };
739
773
  /** Speechmatics region type derived from const object */
740
774
  type SpeechmaticsRegionType = (typeof SpeechmaticsRegion)[keyof typeof SpeechmaticsRegion];
741
775
  /** Deepgram region type derived from const object */
742
776
  type DeepgramRegionType = (typeof DeepgramRegion)[keyof typeof DeepgramRegion];
777
+ /** Soniox region type derived from const object */
778
+ type SonioxRegionType = (typeof SonioxRegion)[keyof typeof SonioxRegion];
743
779
  /**
744
780
  * Deepgram TTS voice models
745
781
  *
@@ -890,7 +926,12 @@ type DeepgramTTSSampleRateType = (typeof DeepgramTTSSampleRate)[keyof typeof Dee
890
926
  /**
891
927
  * OpenAI Whisper transcription models
892
928
  *
893
- * Values: `whisper-1`, `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, `gpt-4o-transcribe-diarize`
929
+ * Values from official spec (auto-synced from Stainless):
930
+ * - `whisper-1`: Open source Whisper V2 model
931
+ * - `gpt-4o-transcribe`: GPT-4o based transcription (more accurate)
932
+ * - `gpt-4o-mini-transcribe`: Faster, cost-effective GPT-4o mini
933
+ * - `gpt-4o-mini-transcribe-2025-12-15`: Dated version of GPT-4o mini
934
+ * - `gpt-4o-transcribe-diarize`: GPT-4o with speaker diarization
894
935
  *
895
936
  * @example
896
937
  * ```typescript
@@ -898,28 +939,36 @@ type DeepgramTTSSampleRateType = (typeof DeepgramTTSSampleRate)[keyof typeof Dee
898
939
  *
899
940
  * { model: OpenAIModel["whisper-1"] }
900
941
  * { model: OpenAIModel["gpt-4o-transcribe"] }
942
+ * { model: OpenAIModel["gpt-4o-transcribe-diarize"] }
901
943
  * ```
902
944
  */
903
945
  declare const OpenAIModel: {
904
946
  readonly "whisper-1": "whisper-1";
905
- readonly "gpt-4o-mini-transcribe": "gpt-4o-mini-transcribe";
906
947
  readonly "gpt-4o-transcribe": "gpt-4o-transcribe";
948
+ readonly "gpt-4o-mini-transcribe": "gpt-4o-mini-transcribe";
949
+ readonly "gpt-4o-mini-transcribe-2025-12-15": "gpt-4o-mini-transcribe-2025-12-15";
907
950
  readonly "gpt-4o-transcribe-diarize": "gpt-4o-transcribe-diarize";
908
951
  };
909
952
  /**
910
953
  * OpenAI transcription response formats
911
954
  *
912
- * Values: `json`, `text`, `srt`, `verbose_json`, `vtt`, `diarized_json`
955
+ * Values from official spec (auto-synced from Stainless):
956
+ * - `json`: Basic JSON response
957
+ * - `text`: Plain text
958
+ * - `srt`: SRT subtitle format
959
+ * - `verbose_json`: Detailed JSON with timestamps
960
+ * - `vtt`: VTT subtitle format
961
+ * - `diarized_json`: JSON with speaker annotations (gpt-4o-transcribe-diarize only)
913
962
  *
914
- * Note: `diarized_json` is only available with `gpt-4o-transcribe-diarize` model.
915
- * GPT-4o transcribe models only support `json` format.
963
+ * Note: GPT-4o transcribe models only support `json` format.
964
+ * For diarization, use `diarized_json` with `gpt-4o-transcribe-diarize` model.
916
965
  *
917
966
  * @example
918
967
  * ```typescript
919
968
  * import { OpenAIResponseFormat } from 'voice-router-dev/constants'
920
969
  *
921
970
  * { responseFormat: OpenAIResponseFormat.verbose_json }
922
- * { responseFormat: OpenAIResponseFormat.srt }
971
+ * { responseFormat: OpenAIResponseFormat.diarized_json }
923
972
  * ```
924
973
  */
925
974
  declare const OpenAIResponseFormat: {
@@ -935,4 +984,4 @@ type OpenAIModelType = (typeof OpenAIModel)[keyof typeof OpenAIModel];
935
984
  /** OpenAI response format type derived from const object */
936
985
  type OpenAIResponseFormatType = (typeof OpenAIResponseFormat)[keyof typeof OpenAIResponseFormat];
937
986
 
938
- export { AssemblyAIEncoding, type AssemblyAIEncodingType, AssemblyAISampleRate, type AssemblyAISampleRateType, AssemblyAISpeechModel, type AssemblyAISpeechModelType, AssemblyAIStatus, type AssemblyAIStatusType, AzureStatus, type AzureStatusType, DeepgramCallbackMethod, type DeepgramCallbackMethodType, DeepgramEncoding, type DeepgramEncodingType, DeepgramIntentMode, type DeepgramIntentModeType, DeepgramModel, type DeepgramModelType, DeepgramRedact, type DeepgramRedactType, DeepgramRegion, type DeepgramRegionType, DeepgramSampleRate, type DeepgramSampleRateType, DeepgramStatus, type DeepgramStatusType, DeepgramTTSContainer, type DeepgramTTSContainerType, DeepgramTTSEncoding, type DeepgramTTSEncodingType, DeepgramTTSModel, type DeepgramTTSModelType, DeepgramTTSSampleRate, type DeepgramTTSSampleRateType, DeepgramTopicMode, type DeepgramTopicModeType, GladiaBitDepth, type GladiaBitDepthType, GladiaEncoding, type GladiaEncodingType, GladiaLanguage, type GladiaLanguageType, GladiaModel, type GladiaModelType, GladiaRegion, type GladiaRegionType, GladiaSampleRate, type GladiaSampleRateType, GladiaStatus, type GladiaStatusType, GladiaTranslationLanguage, type GladiaTranslationLanguageType, OpenAIModel, type OpenAIModelType, OpenAIResponseFormat, type OpenAIResponseFormatType, SpeechmaticsRegion, type SpeechmaticsRegionType };
987
+ export { AssemblyAIEncoding, type AssemblyAIEncodingType, AssemblyAISampleRate, type AssemblyAISampleRateType, AssemblyAISpeechModel, type AssemblyAISpeechModelType, AssemblyAIStatus, type AssemblyAIStatusType, AzureStatus, type AzureStatusType, DeepgramCallbackMethod, type DeepgramCallbackMethodType, DeepgramEncoding, type DeepgramEncodingType, DeepgramIntentMode, type DeepgramIntentModeType, DeepgramModel, type DeepgramModelType, DeepgramRedact, type DeepgramRedactType, DeepgramRegion, type DeepgramRegionType, DeepgramSampleRate, type DeepgramSampleRateType, DeepgramStatus, type DeepgramStatusType, DeepgramTTSContainer, type DeepgramTTSContainerType, DeepgramTTSEncoding, type DeepgramTTSEncodingType, DeepgramTTSModel, type DeepgramTTSModelType, DeepgramTTSSampleRate, type DeepgramTTSSampleRateType, DeepgramTopicMode, type DeepgramTopicModeType, GladiaBitDepth, type GladiaBitDepthType, GladiaEncoding, type GladiaEncodingType, GladiaLanguage, type GladiaLanguageType, GladiaModel, type GladiaModelType, GladiaRegion, type GladiaRegionType, GladiaSampleRate, type GladiaSampleRateType, GladiaStatus, type GladiaStatusType, GladiaTranslationLanguage, type GladiaTranslationLanguageType, OpenAIModel, type OpenAIModelType, OpenAIResponseFormat, type OpenAIResponseFormatType, SonioxRegion, type SonioxRegionType, SpeechmaticsRegion, type SpeechmaticsRegionType };
package/dist/constants.js CHANGED
@@ -49,6 +49,7 @@ __export(constants_exports, {
49
49
  GladiaTranslationLanguage: () => GladiaTranslationLanguage,
50
50
  OpenAIModel: () => OpenAIModel,
51
51
  OpenAIResponseFormat: () => OpenAIResponseFormat,
52
+ SonioxRegion: () => SonioxRegion,
52
53
  SpeechmaticsRegion: () => SpeechmaticsRegion
53
54
  });
54
55
  module.exports = __toCommonJS(constants_exports);
@@ -70,38 +71,22 @@ var ListenV1RedactParameterOneOfItem = {
70
71
  numbers: "numbers"
71
72
  };
72
73
 
73
- // src/generated/deepgram/schema/sharedCustomTopicModeParameter.ts
74
- var SharedCustomTopicModeParameter = {
75
- extended: "extended",
76
- strict: "strict"
77
- };
78
-
79
- // src/generated/deepgram/schema/sharedCustomIntentModeParameter.ts
80
- var SharedCustomIntentModeParameter = {
81
- extended: "extended",
82
- strict: "strict"
83
- };
84
-
85
74
  // src/generated/deepgram/schema/sharedCallbackMethodParameter.ts
86
75
  var SharedCallbackMethodParameter = {
87
76
  POST: "POST",
88
77
  PUT: "PUT"
89
78
  };
90
79
 
91
- // src/generated/gladia/schema/streamingSupportedEncodingEnum.ts
92
- var StreamingSupportedEncodingEnum = {
93
- "wav/pcm": "wav/pcm",
94
- "wav/alaw": "wav/alaw",
95
- "wav/ulaw": "wav/ulaw"
80
+ // src/generated/deepgram/schema/sharedCustomIntentModeParameter.ts
81
+ var SharedCustomIntentModeParameter = {
82
+ extended: "extended",
83
+ strict: "strict"
96
84
  };
97
85
 
98
- // src/generated/gladia/schema/streamingSupportedSampleRateEnum.ts
99
- var StreamingSupportedSampleRateEnum = {
100
- NUMBER_8000: 8e3,
101
- NUMBER_16000: 16e3,
102
- NUMBER_32000: 32e3,
103
- NUMBER_44100: 44100,
104
- NUMBER_48000: 48e3
86
+ // src/generated/deepgram/schema/sharedCustomTopicModeParameter.ts
87
+ var SharedCustomTopicModeParameter = {
88
+ extended: "extended",
89
+ strict: "strict"
105
90
  };
106
91
 
107
92
  // src/generated/gladia/schema/streamingSupportedBitDepthEnum.ts
@@ -112,6 +97,13 @@ var StreamingSupportedBitDepthEnum = {
112
97
  NUMBER_32: 32
113
98
  };
114
99
 
100
+ // src/generated/gladia/schema/streamingSupportedEncodingEnum.ts
101
+ var StreamingSupportedEncodingEnum = {
102
+ "wav/pcm": "wav/pcm",
103
+ "wav/alaw": "wav/alaw",
104
+ "wav/ulaw": "wav/ulaw"
105
+ };
106
+
115
107
  // src/generated/gladia/schema/streamingSupportedModels.ts
116
108
  var StreamingSupportedModels = {
117
109
  "solaria-1": "solaria-1"
@@ -123,6 +115,15 @@ var StreamingSupportedRegions = {
123
115
  "eu-west": "eu-west"
124
116
  };
125
117
 
118
+ // src/generated/gladia/schema/streamingSupportedSampleRateEnum.ts
119
+ var StreamingSupportedSampleRateEnum = {
120
+ NUMBER_8000: 8e3,
121
+ NUMBER_16000: 16e3,
122
+ NUMBER_32000: 32e3,
123
+ NUMBER_44100: 44100,
124
+ NUMBER_48000: 48e3
125
+ };
126
+
126
127
  // src/generated/gladia/schema/transcriptionLanguageCodeEnum.ts
127
128
  var TranscriptionLanguageCodeEnum = {
128
129
  af: "af",
@@ -338,14 +339,6 @@ var TranscriptStatus = {
338
339
  error: "error"
339
340
  };
340
341
 
341
- // src/generated/gladia/schema/transcriptionControllerListV2StatusItem.ts
342
- var TranscriptionControllerListV2StatusItem = {
343
- queued: "queued",
344
- processing: "processing",
345
- done: "done",
346
- error: "error"
347
- };
348
-
349
342
  // src/generated/azure/schema/status.ts
350
343
  var Status = {
351
344
  NotStarted: "NotStarted",
@@ -360,6 +353,32 @@ var ManageV1FilterStatusParameter = {
360
353
  failed: "failed"
361
354
  };
362
355
 
356
+ // src/generated/gladia/schema/transcriptionControllerListV2StatusItem.ts
357
+ var TranscriptionControllerListV2StatusItem = {
358
+ queued: "queued",
359
+ processing: "processing",
360
+ done: "done",
361
+ error: "error"
362
+ };
363
+
364
+ // src/generated/deepgram/schema/speakV1ContainerParameter.ts
365
+ var SpeakV1ContainerParameter = {
366
+ none: "none",
367
+ wav: "wav",
368
+ ogg: "ogg"
369
+ };
370
+
371
+ // src/generated/deepgram/schema/speakV1EncodingParameter.ts
372
+ var SpeakV1EncodingParameter = {
373
+ linear16: "linear16",
374
+ aac: "aac",
375
+ opus: "opus",
376
+ mp3: "mp3",
377
+ flac: "flac",
378
+ mulaw: "mulaw",
379
+ alaw: "alaw"
380
+ };
381
+
363
382
  // src/generated/deepgram/schema/speakV1ModelParameter.ts
364
383
  var SpeakV1ModelParameter = {
365
384
  "aura-asteria-en": "aura-asteria-en",
@@ -427,24 +446,6 @@ var SpeakV1ModelParameter = {
427
446
  "aura-2-javier-es": "aura-2-javier-es"
428
447
  };
429
448
 
430
- // src/generated/deepgram/schema/speakV1EncodingParameter.ts
431
- var SpeakV1EncodingParameter = {
432
- linear16: "linear16",
433
- aac: "aac",
434
- opus: "opus",
435
- mp3: "mp3",
436
- flac: "flac",
437
- mulaw: "mulaw",
438
- alaw: "alaw"
439
- };
440
-
441
- // src/generated/deepgram/schema/speakV1ContainerParameter.ts
442
- var SpeakV1ContainerParameter = {
443
- none: "none",
444
- wav: "wav",
445
- ogg: "ogg"
446
- };
447
-
448
449
  // src/generated/deepgram/schema/speakV1SampleRateParameter.ts
449
450
  var SpeakV1SampleRateParameter = {
450
451
  NUMBER_8000: 8e3,
@@ -456,14 +457,6 @@ var SpeakV1SampleRateParameter = {
456
457
  null: null
457
458
  };
458
459
 
459
- // src/generated/openai/schema/audioTranscriptionModel.ts
460
- var AudioTranscriptionModel = {
461
- "whisper-1": "whisper-1",
462
- "gpt-4o-mini-transcribe": "gpt-4o-mini-transcribe",
463
- "gpt-4o-transcribe": "gpt-4o-transcribe",
464
- "gpt-4o-transcribe-diarize": "gpt-4o-transcribe-diarize"
465
- };
466
-
467
460
  // src/generated/openai/schema/audioResponseFormat.ts
468
461
  var AudioResponseFormat = {
469
462
  json: "json",
@@ -571,11 +564,25 @@ var DeepgramRegion = {
571
564
  /** European Union endpoint */
572
565
  eu: "eu"
573
566
  };
567
+ var SonioxRegion = {
568
+ /** United States (default) */
569
+ us: "us",
570
+ /** European Union */
571
+ eu: "eu",
572
+ /** Japan */
573
+ jp: "jp"
574
+ };
574
575
  var DeepgramTTSModel = SpeakV1ModelParameter;
575
576
  var DeepgramTTSEncoding = SpeakV1EncodingParameter;
576
577
  var DeepgramTTSContainer = SpeakV1ContainerParameter;
577
578
  var DeepgramTTSSampleRate = SpeakV1SampleRateParameter;
578
- var OpenAIModel = AudioTranscriptionModel;
579
+ var OpenAIModel = {
580
+ "whisper-1": "whisper-1",
581
+ "gpt-4o-transcribe": "gpt-4o-transcribe",
582
+ "gpt-4o-mini-transcribe": "gpt-4o-mini-transcribe",
583
+ "gpt-4o-mini-transcribe-2025-12-15": "gpt-4o-mini-transcribe-2025-12-15",
584
+ "gpt-4o-transcribe-diarize": "gpt-4o-transcribe-diarize"
585
+ };
579
586
  var OpenAIResponseFormat = AudioResponseFormat;
580
587
  // Annotate the CommonJS export names for ESM import in node:
581
588
  0 && (module.exports = {
@@ -607,6 +614,7 @@ var OpenAIResponseFormat = AudioResponseFormat;
607
614
  GladiaTranslationLanguage,
608
615
  OpenAIModel,
609
616
  OpenAIResponseFormat,
617
+ SonioxRegion,
610
618
  SpeechmaticsRegion
611
619
  });
612
620
  //# sourceMappingURL=constants.js.map