voice-router-dev 0.5.8 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +110 -1
- package/README.md +24 -5
- package/dist/constants.d.mts +56 -7
- package/dist/constants.d.ts +56 -7
- package/dist/constants.js +67 -59
- package/dist/constants.mjs +66 -59
- package/dist/field-configs.js +1346 -159
- package/dist/field-configs.mjs +1346 -159
- package/dist/index.d.mts +21539 -60346
- package/dist/index.d.ts +21539 -60346
- package/dist/index.js +1978 -5389
- package/dist/index.mjs +1976 -5391
- package/dist/provider-metadata.js +2 -1
- package/dist/provider-metadata.mjs +2 -1
- package/package.json +7 -3
- package/dist/constants.js.map +0 -1
- package/dist/constants.mjs.map +0 -1
- package/dist/field-configs.js.map +0 -1
- package/dist/field-configs.mjs.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/index.mjs.map +0 -1
- package/dist/provider-metadata.js.map +0 -1
- package/dist/provider-metadata.mjs.map +0 -1
package/CHANGELOG.md
CHANGED
|
@@ -5,10 +5,71 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
-
## [0.6.0] - 2026-01-
|
|
8
|
+
## [0.6.0] - 2026-01-11
|
|
9
9
|
|
|
10
10
|
### Added
|
|
11
11
|
|
|
12
|
+
#### OpenAI Official Spec Integration
|
|
13
|
+
|
|
14
|
+
OpenAI types now auto-generated from the official [Stainless-hosted OpenAPI spec](https://app.stainless.com/api/spec/documented/openai/openapi.documented.yml):
|
|
15
|
+
|
|
16
|
+
```typescript
|
|
17
|
+
import { OpenAIModel, OpenAIResponseFormat } from 'voice-router-dev/constants'
|
|
18
|
+
import type {
|
|
19
|
+
RealtimeSessionCreateRequest,
|
|
20
|
+
RealtimeTranscriptionSessionCreateRequest,
|
|
21
|
+
CreateTranscriptionResponseDiarizedJson
|
|
22
|
+
} from 'voice-router-dev'
|
|
23
|
+
|
|
24
|
+
// All models from official spec
|
|
25
|
+
const model = OpenAIModel["gpt-4o-transcribe-diarize"]
|
|
26
|
+
|
|
27
|
+
// Response formats including diarization
|
|
28
|
+
const format = OpenAIResponseFormat.diarized_json
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
**What changed:**
|
|
32
|
+
- **Single source of truth**: Stainless live spec (auto-updated by OpenAI)
|
|
33
|
+
- **54 schemas** generated (up from 15 manual types)
|
|
34
|
+
- **7 endpoints** included: batch audio + realtime streaming
|
|
35
|
+
- **Diarization types** now from official spec (`CreateTranscriptionResponseDiarizedJson`)
|
|
36
|
+
- **Realtime API types**: `RealtimeSessionCreateRequest`, `RealtimeTranscriptionSessionCreateRequest`, `VadConfig`, etc.
|
|
37
|
+
|
|
38
|
+
**New models in `OpenAIModel`:**
|
|
39
|
+
- `whisper-1` - Open source Whisper V2
|
|
40
|
+
- `gpt-4o-transcribe` - GPT-4o based transcription
|
|
41
|
+
- `gpt-4o-mini-transcribe` - Faster, cost-effective
|
|
42
|
+
- `gpt-4o-mini-transcribe-2025-12-15` - Dated version
|
|
43
|
+
- `gpt-4o-transcribe-diarize` - With speaker diarization
|
|
44
|
+
|
|
45
|
+
**New response formats in `OpenAIResponseFormat`:**
|
|
46
|
+
- `diarized_json` - JSON with speaker annotations (requires `gpt-4o-transcribe-diarize`)
|
|
47
|
+
|
|
48
|
+
#### OpenAI Realtime Streaming Types
|
|
49
|
+
|
|
50
|
+
WebSocket event types for OpenAI Realtime API:
|
|
51
|
+
|
|
52
|
+
```typescript
|
|
53
|
+
import { OpenAIStreamingTypes } from 'voice-router-dev'
|
|
54
|
+
|
|
55
|
+
// Session creation
|
|
56
|
+
const session: OpenAIStreamingTypes.RealtimeSessionConfig = {
|
|
57
|
+
modalities: ['text', 'audio'],
|
|
58
|
+
voice: 'ash',
|
|
59
|
+
input_audio_format: 'pcm16',
|
|
60
|
+
input_audio_transcription: { model: 'whisper-1' },
|
|
61
|
+
turn_detection: { type: 'server_vad', threshold: 0.6 }
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// WebSocket event handling
|
|
65
|
+
type ServerEvent = OpenAIStreamingTypes.RealtimeServerEvent
|
|
66
|
+
type ClientEvent = OpenAIStreamingTypes.RealtimeClientEvent
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
**Endpoints:**
|
|
70
|
+
- OpenAI: `wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview`
|
|
71
|
+
- Azure OpenAI: `wss://{endpoint}/openai/realtime?deployment={model}&api-version={version}`
|
|
72
|
+
|
|
12
73
|
#### Soniox Provider (8th Provider)
|
|
13
74
|
|
|
14
75
|
New adapter for [Soniox](https://soniox.com) speech-to-text with batch and streaming support:
|
|
@@ -139,8 +200,28 @@ import { zodToFieldConfigs, SonioxApiZodSchemas } from 'voice-router-dev'
|
|
|
139
200
|
const transcriptionFields = zodToFieldConfigs(SonioxApiZodSchemas.createTranscriptionBody)
|
|
140
201
|
```
|
|
141
202
|
|
|
203
|
+
#### SDK Generation Pipeline Diagram
|
|
204
|
+
|
|
205
|
+
New auto-generated Mermaid diagram showing the SDK generation flow:
|
|
206
|
+
|
|
207
|
+
```bash
|
|
208
|
+
pnpm openapi:diagram
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
Generates `docs/sdk-generation-pipeline.mmd` from codebase analysis:
|
|
212
|
+
- Analyzes `sync-specs.js` for remote/manual spec sources
|
|
213
|
+
- Extracts orval config for API/Zod generation
|
|
214
|
+
- Maps streaming type sync scripts
|
|
215
|
+
- Includes consumer layer (router, webhooks, adapters)
|
|
216
|
+
- Shows public API exports
|
|
217
|
+
|
|
142
218
|
### Changed
|
|
143
219
|
|
|
220
|
+
- **OpenAI spec source**: Now uses Stainless live spec instead of manual `openai-whisper-openapi.yml`
|
|
221
|
+
- **`fix-openai-spec.js`**: Filters full OpenAI API to audio + realtime endpoints only
|
|
222
|
+
- **OpenAI adapter**: Uses `OpenAIModel` constants instead of hardcoded strings
|
|
223
|
+
- **Provider capabilities**: OpenAI now shows `streaming: true` (via Realtime API)
|
|
224
|
+
- **Azure adapter**: Uses generated enums instead of hardcoded strings, removed `any` type casts
|
|
144
225
|
- **Speechmatics adapter** now uses generated enums instead of hardcoded string values
|
|
145
226
|
- **Speechmatics adapter** fixed API structure: `sentiment_analysis_config` and `summarization_config` moved to job level (was incorrectly in `transcription_config`)
|
|
146
227
|
- **Speechmatics adapter** fixed `additional_vocab` format: now uses `{content: string}[]` per spec
|
|
@@ -152,8 +233,36 @@ const transcriptionFields = zodToFieldConfigs(SonioxApiZodSchemas.createTranscri
|
|
|
152
233
|
|
|
153
234
|
### Fixed
|
|
154
235
|
|
|
236
|
+
- OpenAI model values now stay in sync with official spec
|
|
237
|
+
- `OpenAIResponseFormat` now includes `diarized_json` from official spec
|
|
238
|
+
- OpenAI `languageDetection` capability is now `true` (language is optional in request)
|
|
239
|
+
- Azure `languageDetection` capability fixed (was incorrectly `false`)
|
|
240
|
+
- Azure `customVocabulary` capability fixed
|
|
241
|
+
- AssemblyAI/Speechmatics streaming types now survive `openapi:clean` (stored in `specs/`)
|
|
155
242
|
- Speechmatics batch field configs now work (was returning empty array)
|
|
156
243
|
- Speechmatics webhook handler now uses generated `RetrieveTranscriptResponse` type
|
|
244
|
+
- **AssemblyAI streaming field configs** now include SDK v3 fields (`keyterms`, `keytermsPrompt`, `speechModel`, `languageDetection`, etc.) - sync script parses both AsyncAPI spec and SDK TypeScript types
|
|
245
|
+
|
|
246
|
+
#### Soniox Regional Endpoints (Sovereign Cloud)
|
|
247
|
+
|
|
248
|
+
Regional endpoint support for Soniox data residency:
|
|
249
|
+
|
|
250
|
+
```typescript
|
|
251
|
+
import { createSonioxAdapter, SonioxRegion } from 'voice-router-dev'
|
|
252
|
+
|
|
253
|
+
const adapter = createSonioxAdapter({
|
|
254
|
+
apiKey: process.env.SONIOX_EU_API_KEY,
|
|
255
|
+
region: SonioxRegion.eu // EU data residency
|
|
256
|
+
})
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
| Region | REST API | WebSocket |
|
|
260
|
+
|--------|----------|-----------|
|
|
261
|
+
| `us` (default) | `api.soniox.com` | `stt-rt.soniox.com` |
|
|
262
|
+
| `eu` | `api.eu.soniox.com` | `stt-rt.eu.soniox.com` |
|
|
263
|
+
| `jp` | `api.jp.soniox.com` | `stt-rt.jp.soniox.com` |
|
|
264
|
+
|
|
265
|
+
**Note:** Soniox API keys are region-specific. Each project is created with a specific region, and the API key only works with that region's endpoint.
|
|
157
266
|
|
|
158
267
|
---
|
|
159
268
|
|
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Voice Router SDK
|
|
2
2
|
|
|
3
|
-
> Universal speech-to-text router for
|
|
3
|
+
> Universal speech-to-text router for 8 transcription providers with a single, unified API.
|
|
4
4
|
|
|
5
5
|
[](https://www.npmjs.com/package/voice-router-dev)
|
|
6
6
|
[](https://opensource.org/licenses/MIT)
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
## Why Voice Router?
|
|
10
10
|
|
|
11
|
-
Switch between speech-to-text providers **without changing your code**. One API for Gladia, AssemblyAI, Deepgram, Azure, OpenAI Whisper, and
|
|
11
|
+
Switch between speech-to-text providers **without changing your code**. One API for Gladia, AssemblyAI, Deepgram, Azure, OpenAI Whisper, Speechmatics, and Soniox.
|
|
12
12
|
|
|
13
13
|
```typescript
|
|
14
14
|
import { VoiceRouter } from 'voice-router-dev';
|
|
@@ -31,7 +31,7 @@ const result = await router.transcribe(audio, {
|
|
|
31
31
|
- **Provider-Agnostic** - Switch providers with one line
|
|
32
32
|
- **Unified API** - Same interface for all providers
|
|
33
33
|
- **Webhook Normalization** - Auto-detect and parse webhooks
|
|
34
|
-
- **Real-time Streaming** - WebSocket support (Gladia, AssemblyAI, Deepgram)
|
|
34
|
+
- **Real-time Streaming** - WebSocket support (Gladia, AssemblyAI, Deepgram, Soniox, OpenAI Realtime)
|
|
35
35
|
- **Advanced Features** - Diarization, sentiment, summarization, chapters, entities
|
|
36
36
|
- **Type-Safe** - Full TypeScript support with OpenAPI-generated types
|
|
37
37
|
- **Typed Extended Data** - Access provider-specific features with full autocomplete
|
|
@@ -46,8 +46,9 @@ const result = await router.transcribe(audio, {
|
|
|
46
46
|
| **AssemblyAI** | Yes | Real-time | HMAC | Chapters, entities, content moderation |
|
|
47
47
|
| **Deepgram** | Sync | WebSocket | Yes | PII redaction, keyword boosting |
|
|
48
48
|
| **Azure STT** | Async | No | HMAC | Custom models, language ID |
|
|
49
|
-
| **OpenAI
|
|
49
|
+
| **OpenAI** | Sync | Realtime | No | gpt-4o, diarization, Realtime API |
|
|
50
50
|
| **Speechmatics** | Async | No | Query params | High accuracy, summarization |
|
|
51
|
+
| **Soniox** | Yes | WebSocket | No | 60+ languages, translation, regions |
|
|
51
52
|
|
|
52
53
|
## Installation
|
|
53
54
|
|
|
@@ -371,8 +372,9 @@ Provider-specific implementations:
|
|
|
371
372
|
- `AssemblyAIAdapter` - AssemblyAI transcription
|
|
372
373
|
- `DeepgramAdapter` - Deepgram transcription
|
|
373
374
|
- `AzureSTTAdapter` - Azure Speech-to-Text
|
|
374
|
-
- `OpenAIWhisperAdapter` - OpenAI Whisper
|
|
375
|
+
- `OpenAIWhisperAdapter` - OpenAI Whisper + Realtime API
|
|
375
376
|
- `SpeechmaticsAdapter` - Speechmatics transcription
|
|
377
|
+
- `SonioxAdapter` - Soniox transcription (batch + streaming)
|
|
376
378
|
|
|
377
379
|
## TypeScript Support
|
|
378
380
|
|
|
@@ -651,6 +653,23 @@ router.registerAdapter(new SpeechmaticsAdapter());
|
|
|
651
653
|
|
|
652
654
|
Get your API key: https://speechmatics.com
|
|
653
655
|
|
|
656
|
+
### Soniox
|
|
657
|
+
```typescript
|
|
658
|
+
import { VoiceRouter, SonioxAdapter, SonioxRegion } from 'voice-router-dev';
|
|
659
|
+
|
|
660
|
+
const router = new VoiceRouter({
|
|
661
|
+
providers: {
|
|
662
|
+
soniox: {
|
|
663
|
+
apiKey: 'YOUR_KEY',
|
|
664
|
+
region: SonioxRegion.us // or 'eu', 'jp'
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
});
|
|
668
|
+
router.registerAdapter(new SonioxAdapter());
|
|
669
|
+
```
|
|
670
|
+
|
|
671
|
+
Get your API key: https://soniox.com
|
|
672
|
+
|
|
654
673
|
## Contributing
|
|
655
674
|
|
|
656
675
|
Contributions welcome! Please read our [Contributing Guide](CONTRIBUTING.md).
|
package/dist/constants.d.mts
CHANGED
|
@@ -736,10 +736,46 @@ declare const DeepgramRegion: {
|
|
|
736
736
|
/** European Union endpoint */
|
|
737
737
|
readonly eu: "eu";
|
|
738
738
|
};
|
|
739
|
+
/**
|
|
740
|
+
* Soniox regional endpoints (Sovereign Cloud)
|
|
741
|
+
*
|
|
742
|
+
* Soniox offers regional endpoints for data residency compliance.
|
|
743
|
+
* All audio, transcripts, and logs stay fully in-region.
|
|
744
|
+
*
|
|
745
|
+
* | Region | REST API | WebSocket (Real-time) |
|
|
746
|
+
* |--------|----------|----------------------|
|
|
747
|
+
* | US (default) | api.soniox.com | stt-rt.soniox.com |
|
|
748
|
+
* | EU | api.eu.soniox.com | stt-rt.eu.soniox.com |
|
|
749
|
+
* | Japan | api.jp.soniox.com | stt-rt.jp.soniox.com |
|
|
750
|
+
*
|
|
751
|
+
* **Coming soon:** Korea, Australia, India, Canada, Saudi Arabia, UK, Brazil
|
|
752
|
+
*
|
|
753
|
+
* @example
|
|
754
|
+
* ```typescript
|
|
755
|
+
* import { SonioxRegion } from 'voice-router-dev/constants'
|
|
756
|
+
*
|
|
757
|
+
* const adapter = createSonioxAdapter({
|
|
758
|
+
* apiKey: process.env.SONIOX_API_KEY,
|
|
759
|
+
* region: SonioxRegion.eu
|
|
760
|
+
* })
|
|
761
|
+
* ```
|
|
762
|
+
*
|
|
763
|
+
* @see https://soniox.com/docs/stt/data-residency - Official data residency docs
|
|
764
|
+
*/
|
|
765
|
+
declare const SonioxRegion: {
|
|
766
|
+
/** United States (default) */
|
|
767
|
+
readonly us: "us";
|
|
768
|
+
/** European Union */
|
|
769
|
+
readonly eu: "eu";
|
|
770
|
+
/** Japan */
|
|
771
|
+
readonly jp: "jp";
|
|
772
|
+
};
|
|
739
773
|
/** Speechmatics region type derived from const object */
|
|
740
774
|
type SpeechmaticsRegionType = (typeof SpeechmaticsRegion)[keyof typeof SpeechmaticsRegion];
|
|
741
775
|
/** Deepgram region type derived from const object */
|
|
742
776
|
type DeepgramRegionType = (typeof DeepgramRegion)[keyof typeof DeepgramRegion];
|
|
777
|
+
/** Soniox region type derived from const object */
|
|
778
|
+
type SonioxRegionType = (typeof SonioxRegion)[keyof typeof SonioxRegion];
|
|
743
779
|
/**
|
|
744
780
|
* Deepgram TTS voice models
|
|
745
781
|
*
|
|
@@ -890,7 +926,12 @@ type DeepgramTTSSampleRateType = (typeof DeepgramTTSSampleRate)[keyof typeof Dee
|
|
|
890
926
|
/**
|
|
891
927
|
* OpenAI Whisper transcription models
|
|
892
928
|
*
|
|
893
|
-
* Values
|
|
929
|
+
* Values from official spec (auto-synced from Stainless):
|
|
930
|
+
* - `whisper-1`: Open source Whisper V2 model
|
|
931
|
+
* - `gpt-4o-transcribe`: GPT-4o based transcription (more accurate)
|
|
932
|
+
* - `gpt-4o-mini-transcribe`: Faster, cost-effective GPT-4o mini
|
|
933
|
+
* - `gpt-4o-mini-transcribe-2025-12-15`: Dated version of GPT-4o mini
|
|
934
|
+
* - `gpt-4o-transcribe-diarize`: GPT-4o with speaker diarization
|
|
894
935
|
*
|
|
895
936
|
* @example
|
|
896
937
|
* ```typescript
|
|
@@ -898,28 +939,36 @@ type DeepgramTTSSampleRateType = (typeof DeepgramTTSSampleRate)[keyof typeof Dee
|
|
|
898
939
|
*
|
|
899
940
|
* { model: OpenAIModel["whisper-1"] }
|
|
900
941
|
* { model: OpenAIModel["gpt-4o-transcribe"] }
|
|
942
|
+
* { model: OpenAIModel["gpt-4o-transcribe-diarize"] }
|
|
901
943
|
* ```
|
|
902
944
|
*/
|
|
903
945
|
declare const OpenAIModel: {
|
|
904
946
|
readonly "whisper-1": "whisper-1";
|
|
905
|
-
readonly "gpt-4o-mini-transcribe": "gpt-4o-mini-transcribe";
|
|
906
947
|
readonly "gpt-4o-transcribe": "gpt-4o-transcribe";
|
|
948
|
+
readonly "gpt-4o-mini-transcribe": "gpt-4o-mini-transcribe";
|
|
949
|
+
readonly "gpt-4o-mini-transcribe-2025-12-15": "gpt-4o-mini-transcribe-2025-12-15";
|
|
907
950
|
readonly "gpt-4o-transcribe-diarize": "gpt-4o-transcribe-diarize";
|
|
908
951
|
};
|
|
909
952
|
/**
|
|
910
953
|
* OpenAI transcription response formats
|
|
911
954
|
*
|
|
912
|
-
* Values
|
|
955
|
+
* Values from official spec (auto-synced from Stainless):
|
|
956
|
+
* - `json`: Basic JSON response
|
|
957
|
+
* - `text`: Plain text
|
|
958
|
+
* - `srt`: SRT subtitle format
|
|
959
|
+
* - `verbose_json`: Detailed JSON with timestamps
|
|
960
|
+
* - `vtt`: VTT subtitle format
|
|
961
|
+
* - `diarized_json`: JSON with speaker annotations (gpt-4o-transcribe-diarize only)
|
|
913
962
|
*
|
|
914
|
-
* Note:
|
|
915
|
-
*
|
|
963
|
+
* Note: GPT-4o transcribe models only support `json` format.
|
|
964
|
+
* For diarization, use `diarized_json` with `gpt-4o-transcribe-diarize` model.
|
|
916
965
|
*
|
|
917
966
|
* @example
|
|
918
967
|
* ```typescript
|
|
919
968
|
* import { OpenAIResponseFormat } from 'voice-router-dev/constants'
|
|
920
969
|
*
|
|
921
970
|
* { responseFormat: OpenAIResponseFormat.verbose_json }
|
|
922
|
-
* { responseFormat: OpenAIResponseFormat.
|
|
971
|
+
* { responseFormat: OpenAIResponseFormat.diarized_json }
|
|
923
972
|
* ```
|
|
924
973
|
*/
|
|
925
974
|
declare const OpenAIResponseFormat: {
|
|
@@ -935,4 +984,4 @@ type OpenAIModelType = (typeof OpenAIModel)[keyof typeof OpenAIModel];
|
|
|
935
984
|
/** OpenAI response format type derived from const object */
|
|
936
985
|
type OpenAIResponseFormatType = (typeof OpenAIResponseFormat)[keyof typeof OpenAIResponseFormat];
|
|
937
986
|
|
|
938
|
-
export { AssemblyAIEncoding, type AssemblyAIEncodingType, AssemblyAISampleRate, type AssemblyAISampleRateType, AssemblyAISpeechModel, type AssemblyAISpeechModelType, AssemblyAIStatus, type AssemblyAIStatusType, AzureStatus, type AzureStatusType, DeepgramCallbackMethod, type DeepgramCallbackMethodType, DeepgramEncoding, type DeepgramEncodingType, DeepgramIntentMode, type DeepgramIntentModeType, DeepgramModel, type DeepgramModelType, DeepgramRedact, type DeepgramRedactType, DeepgramRegion, type DeepgramRegionType, DeepgramSampleRate, type DeepgramSampleRateType, DeepgramStatus, type DeepgramStatusType, DeepgramTTSContainer, type DeepgramTTSContainerType, DeepgramTTSEncoding, type DeepgramTTSEncodingType, DeepgramTTSModel, type DeepgramTTSModelType, DeepgramTTSSampleRate, type DeepgramTTSSampleRateType, DeepgramTopicMode, type DeepgramTopicModeType, GladiaBitDepth, type GladiaBitDepthType, GladiaEncoding, type GladiaEncodingType, GladiaLanguage, type GladiaLanguageType, GladiaModel, type GladiaModelType, GladiaRegion, type GladiaRegionType, GladiaSampleRate, type GladiaSampleRateType, GladiaStatus, type GladiaStatusType, GladiaTranslationLanguage, type GladiaTranslationLanguageType, OpenAIModel, type OpenAIModelType, OpenAIResponseFormat, type OpenAIResponseFormatType, SpeechmaticsRegion, type SpeechmaticsRegionType };
|
|
987
|
+
export { AssemblyAIEncoding, type AssemblyAIEncodingType, AssemblyAISampleRate, type AssemblyAISampleRateType, AssemblyAISpeechModel, type AssemblyAISpeechModelType, AssemblyAIStatus, type AssemblyAIStatusType, AzureStatus, type AzureStatusType, DeepgramCallbackMethod, type DeepgramCallbackMethodType, DeepgramEncoding, type DeepgramEncodingType, DeepgramIntentMode, type DeepgramIntentModeType, DeepgramModel, type DeepgramModelType, DeepgramRedact, type DeepgramRedactType, DeepgramRegion, type DeepgramRegionType, DeepgramSampleRate, type DeepgramSampleRateType, DeepgramStatus, type DeepgramStatusType, DeepgramTTSContainer, type DeepgramTTSContainerType, DeepgramTTSEncoding, type DeepgramTTSEncodingType, DeepgramTTSModel, type DeepgramTTSModelType, DeepgramTTSSampleRate, type DeepgramTTSSampleRateType, DeepgramTopicMode, type DeepgramTopicModeType, GladiaBitDepth, type GladiaBitDepthType, GladiaEncoding, type GladiaEncodingType, GladiaLanguage, type GladiaLanguageType, GladiaModel, type GladiaModelType, GladiaRegion, type GladiaRegionType, GladiaSampleRate, type GladiaSampleRateType, GladiaStatus, type GladiaStatusType, GladiaTranslationLanguage, type GladiaTranslationLanguageType, OpenAIModel, type OpenAIModelType, OpenAIResponseFormat, type OpenAIResponseFormatType, SonioxRegion, type SonioxRegionType, SpeechmaticsRegion, type SpeechmaticsRegionType };
|
package/dist/constants.d.ts
CHANGED
|
@@ -736,10 +736,46 @@ declare const DeepgramRegion: {
|
|
|
736
736
|
/** European Union endpoint */
|
|
737
737
|
readonly eu: "eu";
|
|
738
738
|
};
|
|
739
|
+
/**
|
|
740
|
+
* Soniox regional endpoints (Sovereign Cloud)
|
|
741
|
+
*
|
|
742
|
+
* Soniox offers regional endpoints for data residency compliance.
|
|
743
|
+
* All audio, transcripts, and logs stay fully in-region.
|
|
744
|
+
*
|
|
745
|
+
* | Region | REST API | WebSocket (Real-time) |
|
|
746
|
+
* |--------|----------|----------------------|
|
|
747
|
+
* | US (default) | api.soniox.com | stt-rt.soniox.com |
|
|
748
|
+
* | EU | api.eu.soniox.com | stt-rt.eu.soniox.com |
|
|
749
|
+
* | Japan | api.jp.soniox.com | stt-rt.jp.soniox.com |
|
|
750
|
+
*
|
|
751
|
+
* **Coming soon:** Korea, Australia, India, Canada, Saudi Arabia, UK, Brazil
|
|
752
|
+
*
|
|
753
|
+
* @example
|
|
754
|
+
* ```typescript
|
|
755
|
+
* import { SonioxRegion } from 'voice-router-dev/constants'
|
|
756
|
+
*
|
|
757
|
+
* const adapter = createSonioxAdapter({
|
|
758
|
+
* apiKey: process.env.SONIOX_API_KEY,
|
|
759
|
+
* region: SonioxRegion.eu
|
|
760
|
+
* })
|
|
761
|
+
* ```
|
|
762
|
+
*
|
|
763
|
+
* @see https://soniox.com/docs/stt/data-residency - Official data residency docs
|
|
764
|
+
*/
|
|
765
|
+
declare const SonioxRegion: {
|
|
766
|
+
/** United States (default) */
|
|
767
|
+
readonly us: "us";
|
|
768
|
+
/** European Union */
|
|
769
|
+
readonly eu: "eu";
|
|
770
|
+
/** Japan */
|
|
771
|
+
readonly jp: "jp";
|
|
772
|
+
};
|
|
739
773
|
/** Speechmatics region type derived from const object */
|
|
740
774
|
type SpeechmaticsRegionType = (typeof SpeechmaticsRegion)[keyof typeof SpeechmaticsRegion];
|
|
741
775
|
/** Deepgram region type derived from const object */
|
|
742
776
|
type DeepgramRegionType = (typeof DeepgramRegion)[keyof typeof DeepgramRegion];
|
|
777
|
+
/** Soniox region type derived from const object */
|
|
778
|
+
type SonioxRegionType = (typeof SonioxRegion)[keyof typeof SonioxRegion];
|
|
743
779
|
/**
|
|
744
780
|
* Deepgram TTS voice models
|
|
745
781
|
*
|
|
@@ -890,7 +926,12 @@ type DeepgramTTSSampleRateType = (typeof DeepgramTTSSampleRate)[keyof typeof Dee
|
|
|
890
926
|
/**
|
|
891
927
|
* OpenAI Whisper transcription models
|
|
892
928
|
*
|
|
893
|
-
* Values
|
|
929
|
+
* Values from official spec (auto-synced from Stainless):
|
|
930
|
+
* - `whisper-1`: Open source Whisper V2 model
|
|
931
|
+
* - `gpt-4o-transcribe`: GPT-4o based transcription (more accurate)
|
|
932
|
+
* - `gpt-4o-mini-transcribe`: Faster, cost-effective GPT-4o mini
|
|
933
|
+
* - `gpt-4o-mini-transcribe-2025-12-15`: Dated version of GPT-4o mini
|
|
934
|
+
* - `gpt-4o-transcribe-diarize`: GPT-4o with speaker diarization
|
|
894
935
|
*
|
|
895
936
|
* @example
|
|
896
937
|
* ```typescript
|
|
@@ -898,28 +939,36 @@ type DeepgramTTSSampleRateType = (typeof DeepgramTTSSampleRate)[keyof typeof Dee
|
|
|
898
939
|
*
|
|
899
940
|
* { model: OpenAIModel["whisper-1"] }
|
|
900
941
|
* { model: OpenAIModel["gpt-4o-transcribe"] }
|
|
942
|
+
* { model: OpenAIModel["gpt-4o-transcribe-diarize"] }
|
|
901
943
|
* ```
|
|
902
944
|
*/
|
|
903
945
|
declare const OpenAIModel: {
|
|
904
946
|
readonly "whisper-1": "whisper-1";
|
|
905
|
-
readonly "gpt-4o-mini-transcribe": "gpt-4o-mini-transcribe";
|
|
906
947
|
readonly "gpt-4o-transcribe": "gpt-4o-transcribe";
|
|
948
|
+
readonly "gpt-4o-mini-transcribe": "gpt-4o-mini-transcribe";
|
|
949
|
+
readonly "gpt-4o-mini-transcribe-2025-12-15": "gpt-4o-mini-transcribe-2025-12-15";
|
|
907
950
|
readonly "gpt-4o-transcribe-diarize": "gpt-4o-transcribe-diarize";
|
|
908
951
|
};
|
|
909
952
|
/**
|
|
910
953
|
* OpenAI transcription response formats
|
|
911
954
|
*
|
|
912
|
-
* Values
|
|
955
|
+
* Values from official spec (auto-synced from Stainless):
|
|
956
|
+
* - `json`: Basic JSON response
|
|
957
|
+
* - `text`: Plain text
|
|
958
|
+
* - `srt`: SRT subtitle format
|
|
959
|
+
* - `verbose_json`: Detailed JSON with timestamps
|
|
960
|
+
* - `vtt`: VTT subtitle format
|
|
961
|
+
* - `diarized_json`: JSON with speaker annotations (gpt-4o-transcribe-diarize only)
|
|
913
962
|
*
|
|
914
|
-
* Note:
|
|
915
|
-
*
|
|
963
|
+
* Note: GPT-4o transcribe models only support `json` format.
|
|
964
|
+
* For diarization, use `diarized_json` with `gpt-4o-transcribe-diarize` model.
|
|
916
965
|
*
|
|
917
966
|
* @example
|
|
918
967
|
* ```typescript
|
|
919
968
|
* import { OpenAIResponseFormat } from 'voice-router-dev/constants'
|
|
920
969
|
*
|
|
921
970
|
* { responseFormat: OpenAIResponseFormat.verbose_json }
|
|
922
|
-
* { responseFormat: OpenAIResponseFormat.
|
|
971
|
+
* { responseFormat: OpenAIResponseFormat.diarized_json }
|
|
923
972
|
* ```
|
|
924
973
|
*/
|
|
925
974
|
declare const OpenAIResponseFormat: {
|
|
@@ -935,4 +984,4 @@ type OpenAIModelType = (typeof OpenAIModel)[keyof typeof OpenAIModel];
|
|
|
935
984
|
/** OpenAI response format type derived from const object */
|
|
936
985
|
type OpenAIResponseFormatType = (typeof OpenAIResponseFormat)[keyof typeof OpenAIResponseFormat];
|
|
937
986
|
|
|
938
|
-
export { AssemblyAIEncoding, type AssemblyAIEncodingType, AssemblyAISampleRate, type AssemblyAISampleRateType, AssemblyAISpeechModel, type AssemblyAISpeechModelType, AssemblyAIStatus, type AssemblyAIStatusType, AzureStatus, type AzureStatusType, DeepgramCallbackMethod, type DeepgramCallbackMethodType, DeepgramEncoding, type DeepgramEncodingType, DeepgramIntentMode, type DeepgramIntentModeType, DeepgramModel, type DeepgramModelType, DeepgramRedact, type DeepgramRedactType, DeepgramRegion, type DeepgramRegionType, DeepgramSampleRate, type DeepgramSampleRateType, DeepgramStatus, type DeepgramStatusType, DeepgramTTSContainer, type DeepgramTTSContainerType, DeepgramTTSEncoding, type DeepgramTTSEncodingType, DeepgramTTSModel, type DeepgramTTSModelType, DeepgramTTSSampleRate, type DeepgramTTSSampleRateType, DeepgramTopicMode, type DeepgramTopicModeType, GladiaBitDepth, type GladiaBitDepthType, GladiaEncoding, type GladiaEncodingType, GladiaLanguage, type GladiaLanguageType, GladiaModel, type GladiaModelType, GladiaRegion, type GladiaRegionType, GladiaSampleRate, type GladiaSampleRateType, GladiaStatus, type GladiaStatusType, GladiaTranslationLanguage, type GladiaTranslationLanguageType, OpenAIModel, type OpenAIModelType, OpenAIResponseFormat, type OpenAIResponseFormatType, SpeechmaticsRegion, type SpeechmaticsRegionType };
|
|
987
|
+
export { AssemblyAIEncoding, type AssemblyAIEncodingType, AssemblyAISampleRate, type AssemblyAISampleRateType, AssemblyAISpeechModel, type AssemblyAISpeechModelType, AssemblyAIStatus, type AssemblyAIStatusType, AzureStatus, type AzureStatusType, DeepgramCallbackMethod, type DeepgramCallbackMethodType, DeepgramEncoding, type DeepgramEncodingType, DeepgramIntentMode, type DeepgramIntentModeType, DeepgramModel, type DeepgramModelType, DeepgramRedact, type DeepgramRedactType, DeepgramRegion, type DeepgramRegionType, DeepgramSampleRate, type DeepgramSampleRateType, DeepgramStatus, type DeepgramStatusType, DeepgramTTSContainer, type DeepgramTTSContainerType, DeepgramTTSEncoding, type DeepgramTTSEncodingType, DeepgramTTSModel, type DeepgramTTSModelType, DeepgramTTSSampleRate, type DeepgramTTSSampleRateType, DeepgramTopicMode, type DeepgramTopicModeType, GladiaBitDepth, type GladiaBitDepthType, GladiaEncoding, type GladiaEncodingType, GladiaLanguage, type GladiaLanguageType, GladiaModel, type GladiaModelType, GladiaRegion, type GladiaRegionType, GladiaSampleRate, type GladiaSampleRateType, GladiaStatus, type GladiaStatusType, GladiaTranslationLanguage, type GladiaTranslationLanguageType, OpenAIModel, type OpenAIModelType, OpenAIResponseFormat, type OpenAIResponseFormatType, SonioxRegion, type SonioxRegionType, SpeechmaticsRegion, type SpeechmaticsRegionType };
|
package/dist/constants.js
CHANGED
|
@@ -49,6 +49,7 @@ __export(constants_exports, {
|
|
|
49
49
|
GladiaTranslationLanguage: () => GladiaTranslationLanguage,
|
|
50
50
|
OpenAIModel: () => OpenAIModel,
|
|
51
51
|
OpenAIResponseFormat: () => OpenAIResponseFormat,
|
|
52
|
+
SonioxRegion: () => SonioxRegion,
|
|
52
53
|
SpeechmaticsRegion: () => SpeechmaticsRegion
|
|
53
54
|
});
|
|
54
55
|
module.exports = __toCommonJS(constants_exports);
|
|
@@ -70,38 +71,22 @@ var ListenV1RedactParameterOneOfItem = {
|
|
|
70
71
|
numbers: "numbers"
|
|
71
72
|
};
|
|
72
73
|
|
|
73
|
-
// src/generated/deepgram/schema/sharedCustomTopicModeParameter.ts
|
|
74
|
-
var SharedCustomTopicModeParameter = {
|
|
75
|
-
extended: "extended",
|
|
76
|
-
strict: "strict"
|
|
77
|
-
};
|
|
78
|
-
|
|
79
|
-
// src/generated/deepgram/schema/sharedCustomIntentModeParameter.ts
|
|
80
|
-
var SharedCustomIntentModeParameter = {
|
|
81
|
-
extended: "extended",
|
|
82
|
-
strict: "strict"
|
|
83
|
-
};
|
|
84
|
-
|
|
85
74
|
// src/generated/deepgram/schema/sharedCallbackMethodParameter.ts
|
|
86
75
|
var SharedCallbackMethodParameter = {
|
|
87
76
|
POST: "POST",
|
|
88
77
|
PUT: "PUT"
|
|
89
78
|
};
|
|
90
79
|
|
|
91
|
-
// src/generated/
|
|
92
|
-
var
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
"wav/ulaw": "wav/ulaw"
|
|
80
|
+
// src/generated/deepgram/schema/sharedCustomIntentModeParameter.ts
|
|
81
|
+
var SharedCustomIntentModeParameter = {
|
|
82
|
+
extended: "extended",
|
|
83
|
+
strict: "strict"
|
|
96
84
|
};
|
|
97
85
|
|
|
98
|
-
// src/generated/
|
|
99
|
-
var
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
NUMBER_32000: 32e3,
|
|
103
|
-
NUMBER_44100: 44100,
|
|
104
|
-
NUMBER_48000: 48e3
|
|
86
|
+
// src/generated/deepgram/schema/sharedCustomTopicModeParameter.ts
|
|
87
|
+
var SharedCustomTopicModeParameter = {
|
|
88
|
+
extended: "extended",
|
|
89
|
+
strict: "strict"
|
|
105
90
|
};
|
|
106
91
|
|
|
107
92
|
// src/generated/gladia/schema/streamingSupportedBitDepthEnum.ts
|
|
@@ -112,6 +97,13 @@ var StreamingSupportedBitDepthEnum = {
|
|
|
112
97
|
NUMBER_32: 32
|
|
113
98
|
};
|
|
114
99
|
|
|
100
|
+
// src/generated/gladia/schema/streamingSupportedEncodingEnum.ts
|
|
101
|
+
var StreamingSupportedEncodingEnum = {
|
|
102
|
+
"wav/pcm": "wav/pcm",
|
|
103
|
+
"wav/alaw": "wav/alaw",
|
|
104
|
+
"wav/ulaw": "wav/ulaw"
|
|
105
|
+
};
|
|
106
|
+
|
|
115
107
|
// src/generated/gladia/schema/streamingSupportedModels.ts
|
|
116
108
|
var StreamingSupportedModels = {
|
|
117
109
|
"solaria-1": "solaria-1"
|
|
@@ -123,6 +115,15 @@ var StreamingSupportedRegions = {
|
|
|
123
115
|
"eu-west": "eu-west"
|
|
124
116
|
};
|
|
125
117
|
|
|
118
|
+
// src/generated/gladia/schema/streamingSupportedSampleRateEnum.ts
|
|
119
|
+
var StreamingSupportedSampleRateEnum = {
|
|
120
|
+
NUMBER_8000: 8e3,
|
|
121
|
+
NUMBER_16000: 16e3,
|
|
122
|
+
NUMBER_32000: 32e3,
|
|
123
|
+
NUMBER_44100: 44100,
|
|
124
|
+
NUMBER_48000: 48e3
|
|
125
|
+
};
|
|
126
|
+
|
|
126
127
|
// src/generated/gladia/schema/transcriptionLanguageCodeEnum.ts
|
|
127
128
|
var TranscriptionLanguageCodeEnum = {
|
|
128
129
|
af: "af",
|
|
@@ -338,14 +339,6 @@ var TranscriptStatus = {
|
|
|
338
339
|
error: "error"
|
|
339
340
|
};
|
|
340
341
|
|
|
341
|
-
// src/generated/gladia/schema/transcriptionControllerListV2StatusItem.ts
|
|
342
|
-
var TranscriptionControllerListV2StatusItem = {
|
|
343
|
-
queued: "queued",
|
|
344
|
-
processing: "processing",
|
|
345
|
-
done: "done",
|
|
346
|
-
error: "error"
|
|
347
|
-
};
|
|
348
|
-
|
|
349
342
|
// src/generated/azure/schema/status.ts
|
|
350
343
|
var Status = {
|
|
351
344
|
NotStarted: "NotStarted",
|
|
@@ -360,6 +353,32 @@ var ManageV1FilterStatusParameter = {
|
|
|
360
353
|
failed: "failed"
|
|
361
354
|
};
|
|
362
355
|
|
|
356
|
+
// src/generated/gladia/schema/transcriptionControllerListV2StatusItem.ts
|
|
357
|
+
var TranscriptionControllerListV2StatusItem = {
|
|
358
|
+
queued: "queued",
|
|
359
|
+
processing: "processing",
|
|
360
|
+
done: "done",
|
|
361
|
+
error: "error"
|
|
362
|
+
};
|
|
363
|
+
|
|
364
|
+
// src/generated/deepgram/schema/speakV1ContainerParameter.ts
|
|
365
|
+
var SpeakV1ContainerParameter = {
|
|
366
|
+
none: "none",
|
|
367
|
+
wav: "wav",
|
|
368
|
+
ogg: "ogg"
|
|
369
|
+
};
|
|
370
|
+
|
|
371
|
+
// src/generated/deepgram/schema/speakV1EncodingParameter.ts
|
|
372
|
+
var SpeakV1EncodingParameter = {
|
|
373
|
+
linear16: "linear16",
|
|
374
|
+
aac: "aac",
|
|
375
|
+
opus: "opus",
|
|
376
|
+
mp3: "mp3",
|
|
377
|
+
flac: "flac",
|
|
378
|
+
mulaw: "mulaw",
|
|
379
|
+
alaw: "alaw"
|
|
380
|
+
};
|
|
381
|
+
|
|
363
382
|
// src/generated/deepgram/schema/speakV1ModelParameter.ts
|
|
364
383
|
var SpeakV1ModelParameter = {
|
|
365
384
|
"aura-asteria-en": "aura-asteria-en",
|
|
@@ -427,24 +446,6 @@ var SpeakV1ModelParameter = {
|
|
|
427
446
|
"aura-2-javier-es": "aura-2-javier-es"
|
|
428
447
|
};
|
|
429
448
|
|
|
430
|
-
// src/generated/deepgram/schema/speakV1EncodingParameter.ts
|
|
431
|
-
var SpeakV1EncodingParameter = {
|
|
432
|
-
linear16: "linear16",
|
|
433
|
-
aac: "aac",
|
|
434
|
-
opus: "opus",
|
|
435
|
-
mp3: "mp3",
|
|
436
|
-
flac: "flac",
|
|
437
|
-
mulaw: "mulaw",
|
|
438
|
-
alaw: "alaw"
|
|
439
|
-
};
|
|
440
|
-
|
|
441
|
-
// src/generated/deepgram/schema/speakV1ContainerParameter.ts
|
|
442
|
-
var SpeakV1ContainerParameter = {
|
|
443
|
-
none: "none",
|
|
444
|
-
wav: "wav",
|
|
445
|
-
ogg: "ogg"
|
|
446
|
-
};
|
|
447
|
-
|
|
448
449
|
// src/generated/deepgram/schema/speakV1SampleRateParameter.ts
|
|
449
450
|
var SpeakV1SampleRateParameter = {
|
|
450
451
|
NUMBER_8000: 8e3,
|
|
@@ -456,14 +457,6 @@ var SpeakV1SampleRateParameter = {
|
|
|
456
457
|
null: null
|
|
457
458
|
};
|
|
458
459
|
|
|
459
|
-
// src/generated/openai/schema/audioTranscriptionModel.ts
|
|
460
|
-
var AudioTranscriptionModel = {
|
|
461
|
-
"whisper-1": "whisper-1",
|
|
462
|
-
"gpt-4o-mini-transcribe": "gpt-4o-mini-transcribe",
|
|
463
|
-
"gpt-4o-transcribe": "gpt-4o-transcribe",
|
|
464
|
-
"gpt-4o-transcribe-diarize": "gpt-4o-transcribe-diarize"
|
|
465
|
-
};
|
|
466
|
-
|
|
467
460
|
// src/generated/openai/schema/audioResponseFormat.ts
|
|
468
461
|
var AudioResponseFormat = {
|
|
469
462
|
json: "json",
|
|
@@ -571,11 +564,25 @@ var DeepgramRegion = {
|
|
|
571
564
|
/** European Union endpoint */
|
|
572
565
|
eu: "eu"
|
|
573
566
|
};
|
|
567
|
+
var SonioxRegion = {
|
|
568
|
+
/** United States (default) */
|
|
569
|
+
us: "us",
|
|
570
|
+
/** European Union */
|
|
571
|
+
eu: "eu",
|
|
572
|
+
/** Japan */
|
|
573
|
+
jp: "jp"
|
|
574
|
+
};
|
|
574
575
|
var DeepgramTTSModel = SpeakV1ModelParameter;
|
|
575
576
|
var DeepgramTTSEncoding = SpeakV1EncodingParameter;
|
|
576
577
|
var DeepgramTTSContainer = SpeakV1ContainerParameter;
|
|
577
578
|
var DeepgramTTSSampleRate = SpeakV1SampleRateParameter;
|
|
578
|
-
var OpenAIModel =
|
|
579
|
+
var OpenAIModel = {
|
|
580
|
+
"whisper-1": "whisper-1",
|
|
581
|
+
"gpt-4o-transcribe": "gpt-4o-transcribe",
|
|
582
|
+
"gpt-4o-mini-transcribe": "gpt-4o-mini-transcribe",
|
|
583
|
+
"gpt-4o-mini-transcribe-2025-12-15": "gpt-4o-mini-transcribe-2025-12-15",
|
|
584
|
+
"gpt-4o-transcribe-diarize": "gpt-4o-transcribe-diarize"
|
|
585
|
+
};
|
|
579
586
|
var OpenAIResponseFormat = AudioResponseFormat;
|
|
580
587
|
// Annotate the CommonJS export names for ESM import in node:
|
|
581
588
|
0 && (module.exports = {
|
|
@@ -607,6 +614,7 @@ var OpenAIResponseFormat = AudioResponseFormat;
|
|
|
607
614
|
GladiaTranslationLanguage,
|
|
608
615
|
OpenAIModel,
|
|
609
616
|
OpenAIResponseFormat,
|
|
617
|
+
SonioxRegion,
|
|
610
618
|
SpeechmaticsRegion
|
|
611
619
|
});
|
|
612
620
|
//# sourceMappingURL=constants.js.map
|