voice-router-dev 0.2.6 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +154 -74
- package/dist/index.d.mts +709 -540
- package/dist/index.d.ts +709 -540
- package/dist/index.js +81 -20
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +81 -20
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -28,25 +28,26 @@ const result = await router.transcribe(audio, {
|
|
|
28
28
|
|
|
29
29
|
## Features
|
|
30
30
|
|
|
31
|
-
-
|
|
32
|
-
-
|
|
33
|
-
-
|
|
34
|
-
-
|
|
35
|
-
-
|
|
36
|
-
-
|
|
37
|
-
-
|
|
38
|
-
-
|
|
31
|
+
- **Provider-Agnostic** - Switch providers with one line
|
|
32
|
+
- **Unified API** - Same interface for all providers
|
|
33
|
+
- **Webhook Normalization** - Auto-detect and parse webhooks
|
|
34
|
+
- **Real-time Streaming** - WebSocket support (Gladia, AssemblyAI, Deepgram)
|
|
35
|
+
- **Advanced Features** - Diarization, sentiment, summarization, chapters, entities
|
|
36
|
+
- **Type-Safe** - Full TypeScript support with OpenAPI-generated types
|
|
37
|
+
- **Typed Extended Data** - Access provider-specific features with full autocomplete
|
|
38
|
+
- **Provider Fallback** - Automatic failover strategies
|
|
39
|
+
- **Zero Config** - Works out of the box
|
|
39
40
|
|
|
40
41
|
## Supported Providers
|
|
41
42
|
|
|
42
43
|
| Provider | Batch | Streaming | Webhooks | Special Features |
|
|
43
44
|
|----------|-------|-----------|----------|------------------|
|
|
44
|
-
| **Gladia** |
|
|
45
|
-
| **AssemblyAI** |
|
|
46
|
-
| **Deepgram** |
|
|
47
|
-
| **Azure STT** |
|
|
48
|
-
| **OpenAI Whisper** |
|
|
49
|
-
| **Speechmatics** |
|
|
45
|
+
| **Gladia** | Yes | WebSocket | Yes | Multi-language, code-switching, translation |
|
|
46
|
+
| **AssemblyAI** | Yes | Real-time | HMAC | Chapters, entities, content moderation |
|
|
47
|
+
| **Deepgram** | Sync | WebSocket | Yes | PII redaction, keyword boosting |
|
|
48
|
+
| **Azure STT** | Async | No | HMAC | Custom models, language ID |
|
|
49
|
+
| **OpenAI Whisper** | Sync | No | No | gpt-4o, diarization |
|
|
50
|
+
| **Speechmatics** | Async | No | Query params | High accuracy, summarization |
|
|
50
51
|
|
|
51
52
|
## Installation
|
|
52
53
|
|
|
@@ -198,51 +199,96 @@ app.post('/webhooks/transcription', express.json(), (req, res) => {
|
|
|
198
199
|
|
|
199
200
|
## Advanced Usage
|
|
200
201
|
|
|
201
|
-
### Provider-Specific Features
|
|
202
|
+
### Provider-Specific Features with Type Safety
|
|
203
|
+
|
|
204
|
+
Use typed provider options for full autocomplete and compile-time safety:
|
|
202
205
|
|
|
203
206
|
```typescript
|
|
204
|
-
// Gladia -
|
|
207
|
+
// Gladia - Full type-safe options
|
|
205
208
|
const result = await router.transcribe(audio, {
|
|
206
209
|
provider: 'gladia',
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
+
gladia: {
|
|
211
|
+
translation: true,
|
|
212
|
+
translation_config: { target_languages: ['fr', 'es'] },
|
|
213
|
+
moderation: true,
|
|
214
|
+
named_entity_recognition: true,
|
|
215
|
+
sentiment_analysis: true,
|
|
216
|
+
chapterization: true,
|
|
217
|
+
audio_to_llm: true,
|
|
218
|
+
audio_to_llm_config: [{ prompt: 'Summarize key points' }],
|
|
219
|
+
custom_metadata: { session_id: 'abc123' }
|
|
220
|
+
}
|
|
210
221
|
});
|
|
211
222
|
|
|
212
|
-
//
|
|
213
|
-
|
|
223
|
+
// Access typed extended data
|
|
224
|
+
if (result.extended) {
|
|
225
|
+
const translations = result.extended.translation?.results;
|
|
226
|
+
const chapters = result.extended.chapters?.results;
|
|
227
|
+
const entities = result.extended.entities?.results;
|
|
228
|
+
console.log('Custom metadata:', result.extended.customMetadata);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// AssemblyAI - Typed options with extended data
|
|
232
|
+
const assemblyResult = await router.transcribe(audio, {
|
|
214
233
|
provider: 'assemblyai',
|
|
215
|
-
|
|
216
|
-
|
|
234
|
+
assemblyai: {
|
|
235
|
+
auto_chapters: true,
|
|
236
|
+
entity_detection: true,
|
|
237
|
+
sentiment_analysis: true,
|
|
238
|
+
auto_highlights: true,
|
|
217
239
|
content_safety: true,
|
|
218
|
-
|
|
240
|
+
iab_categories: true
|
|
219
241
|
}
|
|
220
242
|
});
|
|
221
243
|
|
|
222
|
-
|
|
223
|
-
|
|
244
|
+
if (assemblyResult.extended) {
|
|
245
|
+
assemblyResult.extended.chapters?.forEach(ch => {
|
|
246
|
+
console.log(`${ch.headline}: ${ch.summary}`);
|
|
247
|
+
});
|
|
248
|
+
assemblyResult.extended.entities?.forEach(e => {
|
|
249
|
+
console.log(`${e.entity_type}: ${e.text}`);
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// Deepgram - Typed options with metadata tracking
|
|
254
|
+
const deepgramResult = await router.transcribe(audio, {
|
|
224
255
|
provider: 'deepgram',
|
|
225
|
-
|
|
226
|
-
|
|
256
|
+
deepgram: {
|
|
257
|
+
model: 'nova-3',
|
|
258
|
+
smart_format: true,
|
|
259
|
+
paragraphs: true,
|
|
260
|
+
detect_topics: true,
|
|
261
|
+
tag: ['meeting', 'sales'],
|
|
262
|
+
extra: { user_id: '12345' }
|
|
263
|
+
}
|
|
227
264
|
});
|
|
228
265
|
|
|
229
|
-
|
|
230
|
-
|
|
266
|
+
if (deepgramResult.extended) {
|
|
267
|
+
console.log('Request ID:', deepgramResult.extended.requestId);
|
|
268
|
+
console.log('Audio SHA256:', deepgramResult.extended.sha256);
|
|
269
|
+
console.log('Tags:', deepgramResult.extended.tags);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// OpenAI Whisper - Typed options
|
|
273
|
+
const whisperResult = await router.transcribe(audio, {
|
|
231
274
|
provider: 'openai-whisper',
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
temperature: 0.2
|
|
275
|
+
diarization: true,
|
|
276
|
+
openai: {
|
|
277
|
+
temperature: 0.2,
|
|
278
|
+
prompt: 'Technical discussion about APIs'
|
|
235
279
|
}
|
|
236
280
|
});
|
|
237
281
|
|
|
238
|
-
// Speechmatics - Enhanced accuracy
|
|
239
|
-
const
|
|
282
|
+
// Speechmatics - Enhanced accuracy with summarization
|
|
283
|
+
const speechmaticsResult = await router.transcribe(audio, {
|
|
240
284
|
provider: 'speechmatics',
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
}
|
|
285
|
+
model: 'enhanced',
|
|
286
|
+
summarization: true,
|
|
287
|
+
diarization: true
|
|
245
288
|
});
|
|
289
|
+
|
|
290
|
+
// All providers include request tracking
|
|
291
|
+
console.log('Request ID:', result.tracking?.requestId);
|
|
246
292
|
```
|
|
247
293
|
|
|
248
294
|
### Error Handling
|
|
@@ -348,34 +394,67 @@ import type {
|
|
|
348
394
|
|
|
349
395
|
### Provider-Specific Type Safety
|
|
350
396
|
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
The SDK now provides full type safety for provider-specific responses:
|
|
397
|
+
The SDK provides full type safety for provider-specific responses:
|
|
354
398
|
|
|
355
399
|
```typescript
|
|
356
|
-
// Generic response - raw
|
|
400
|
+
// Generic response - raw and extended fields are unknown
|
|
357
401
|
const result: UnifiedTranscriptResponse = await router.transcribe(audio);
|
|
358
402
|
|
|
359
|
-
// Provider-specific response - raw
|
|
403
|
+
// Provider-specific response - raw and extended are properly typed!
|
|
360
404
|
const deepgramResult: UnifiedTranscriptResponse<'deepgram'> = await router.transcribe(audio, {
|
|
361
405
|
provider: 'deepgram'
|
|
362
406
|
});
|
|
363
407
|
|
|
364
|
-
//
|
|
408
|
+
// TypeScript knows raw is ListenV1Response
|
|
365
409
|
const metadata = deepgramResult.raw?.metadata;
|
|
366
|
-
|
|
410
|
+
|
|
411
|
+
// TypeScript knows extended is DeepgramExtendedData
|
|
412
|
+
const requestId = deepgramResult.extended?.requestId;
|
|
413
|
+
const sha256 = deepgramResult.extended?.sha256;
|
|
367
414
|
```
|
|
368
415
|
|
|
369
416
|
**Provider-specific raw response types:**
|
|
370
|
-
- `gladia`
|
|
371
|
-
- `deepgram`
|
|
372
|
-
- `openai-whisper`
|
|
373
|
-
- `assemblyai`
|
|
374
|
-
- `azure-stt`
|
|
417
|
+
- `gladia` - `PreRecordedResponse`
|
|
418
|
+
- `deepgram` - `ListenV1Response`
|
|
419
|
+
- `openai-whisper` - `CreateTranscription200One`
|
|
420
|
+
- `assemblyai` - `AssemblyAITranscript`
|
|
421
|
+
- `azure-stt` - `AzureTranscription`
|
|
375
422
|
|
|
376
|
-
|
|
423
|
+
**Provider-specific extended data types:**
|
|
424
|
+
- `gladia` - `GladiaExtendedData` (translation, moderation, entities, sentiment, chapters, audioToLlm, customMetadata)
|
|
425
|
+
- `assemblyai` - `AssemblyAIExtendedData` (chapters, entities, sentimentResults, highlights, contentSafety, topics)
|
|
426
|
+
- `deepgram` - `DeepgramExtendedData` (metadata, requestId, sha256, modelInfo, tags)
|
|
427
|
+
|
|
428
|
+
### Typed Extended Data
|
|
429
|
+
|
|
430
|
+
Access rich provider-specific data beyond basic transcription:
|
|
431
|
+
|
|
432
|
+
```typescript
|
|
433
|
+
import type {
|
|
434
|
+
GladiaExtendedData,
|
|
435
|
+
AssemblyAIExtendedData,
|
|
436
|
+
DeepgramExtendedData,
|
|
437
|
+
// Individual types for fine-grained access
|
|
438
|
+
GladiaTranslation,
|
|
439
|
+
GladiaChapters,
|
|
440
|
+
AssemblyAIChapter,
|
|
441
|
+
AssemblyAIEntity,
|
|
442
|
+
DeepgramMetadata
|
|
443
|
+
} from 'voice-router-dev';
|
|
444
|
+
|
|
445
|
+
// Gladia extended data
|
|
446
|
+
const gladiaResult = await router.transcribe(audio, { provider: 'gladia', gladia: { translation: true } });
|
|
447
|
+
const translation: GladiaTranslation | undefined = gladiaResult.extended?.translation;
|
|
377
448
|
|
|
378
|
-
|
|
449
|
+
// AssemblyAI extended data
|
|
450
|
+
const assemblyResult = await router.transcribe(audio, { provider: 'assemblyai', assemblyai: { auto_chapters: true } });
|
|
451
|
+
const chapters: AssemblyAIChapter[] | undefined = assemblyResult.extended?.chapters;
|
|
452
|
+
|
|
453
|
+
// All responses include tracking info
|
|
454
|
+
console.log('Request ID:', gladiaResult.tracking?.requestId);
|
|
455
|
+
```
|
|
456
|
+
|
|
457
|
+
### Exported Parameter Enums
|
|
379
458
|
|
|
380
459
|
Import and use provider-specific enums for type-safe configuration:
|
|
381
460
|
|
|
@@ -394,15 +473,15 @@ import {
|
|
|
394
473
|
AudioResponseFormat
|
|
395
474
|
} from 'voice-router-dev';
|
|
396
475
|
|
|
397
|
-
//
|
|
476
|
+
// Type-safe Deepgram encoding
|
|
398
477
|
const session = await router.transcribeStream({
|
|
399
478
|
provider: 'deepgram',
|
|
400
|
-
encoding: ListenV1EncodingParameter.linear16,
|
|
479
|
+
encoding: ListenV1EncodingParameter.linear16,
|
|
401
480
|
model: ListenV1ModelParameter['nova-2'],
|
|
402
481
|
sampleRate: 16000
|
|
403
482
|
});
|
|
404
483
|
|
|
405
|
-
//
|
|
484
|
+
// Type-safe Gladia encoding
|
|
406
485
|
const gladiaSession = await router.transcribeStream({
|
|
407
486
|
provider: 'gladia',
|
|
408
487
|
encoding: StreamingSupportedEncodingEnum['wav/pcm'],
|
|
@@ -412,41 +491,42 @@ const gladiaSession = await router.transcribeStream({
|
|
|
412
491
|
|
|
413
492
|
### Type-Safe Streaming Options
|
|
414
493
|
|
|
415
|
-
Streaming options are
|
|
494
|
+
Streaming options are fully typed based on provider OpenAPI specifications:
|
|
416
495
|
|
|
417
496
|
```typescript
|
|
418
497
|
// Deepgram streaming - all options are type-safe
|
|
419
498
|
const deepgramSession = await router.transcribeStream({
|
|
420
499
|
provider: 'deepgram',
|
|
421
|
-
encoding: 'linear16',
|
|
422
|
-
model: 'nova-3',
|
|
423
|
-
language: 'en-US',
|
|
424
|
-
diarization: true
|
|
425
|
-
smartFormat: true
|
|
500
|
+
encoding: 'linear16',
|
|
501
|
+
model: 'nova-3',
|
|
502
|
+
language: 'en-US',
|
|
503
|
+
diarization: true
|
|
426
504
|
}, callbacks);
|
|
427
505
|
|
|
428
|
-
// Gladia streaming -
|
|
506
|
+
// Gladia streaming - with typed gladiaStreaming options
|
|
429
507
|
const gladiaSession = await router.transcribeStream({
|
|
430
508
|
provider: 'gladia',
|
|
431
|
-
encoding: 'wav/pcm',
|
|
432
|
-
sampleRate: 16000,
|
|
433
|
-
|
|
434
|
-
|
|
509
|
+
encoding: 'wav/pcm',
|
|
510
|
+
sampleRate: 16000,
|
|
511
|
+
gladiaStreaming: {
|
|
512
|
+
realtime_processing: { words_accurate_timestamps: true },
|
|
513
|
+
messages_config: { receive_partial_transcripts: true }
|
|
514
|
+
}
|
|
435
515
|
}, callbacks);
|
|
436
516
|
|
|
437
|
-
// AssemblyAI streaming
|
|
517
|
+
// AssemblyAI streaming
|
|
438
518
|
const assemblySession = await router.transcribeStream({
|
|
439
519
|
provider: 'assemblyai',
|
|
440
|
-
sampleRate: 16000,
|
|
520
|
+
sampleRate: 16000,
|
|
441
521
|
wordTimestamps: true
|
|
442
522
|
}, callbacks);
|
|
443
523
|
```
|
|
444
524
|
|
|
445
525
|
**Benefits:**
|
|
446
|
-
-
|
|
447
|
-
-
|
|
448
|
-
-
|
|
449
|
-
-
|
|
526
|
+
- **Full IntelliSense** - Autocomplete for all provider-specific options
|
|
527
|
+
- **Compile-time Safety** - Invalid options caught before runtime
|
|
528
|
+
- **Provider Discrimination** - Type system knows which provider you're using
|
|
529
|
+
- **OpenAPI-Generated** - Types come directly from provider specifications
|
|
450
530
|
|
|
451
531
|
## Requirements
|
|
452
532
|
|
|
@@ -460,7 +540,7 @@ const assemblySession = await router.transcribeStream({
|
|
|
460
540
|
|
|
461
541
|
Comprehensive API documentation is auto-generated with [TypeDoc](https://typedoc.org/) from TypeScript source code:
|
|
462
542
|
|
|
463
|
-
|
|
543
|
+
**[docs/generated/](./docs/generated/)** - Complete API reference
|
|
464
544
|
|
|
465
545
|
**Main Documentation Sets**:
|
|
466
546
|
|