voice-router-dev 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -28,25 +28,26 @@ const result = await router.transcribe(audio, {
28
28
 
29
29
  ## Features
30
30
 
31
- - 🔄 **Provider-Agnostic** - Switch providers with one line
32
- - 🎯 **Unified API** - Same interface for all providers
33
- - 📦 **Webhook Normalization** - Auto-detect and parse webhooks
34
- - 🔊 **Real-time Streaming** - WebSocket support (Gladia, AssemblyAI, Deepgram)
35
- - 📊 **Advanced Features** - Diarization, sentiment, summarization
36
- - 🔒 **Type-Safe** - Full TypeScript support
37
- - **Provider Fallback** - Automatic failover strategies
38
- - 🎨 **Zero Config** - Works out of the box
31
+ - **Provider-Agnostic** - Switch providers with one line
32
+ - **Unified API** - Same interface for all providers
33
+ - **Webhook Normalization** - Auto-detect and parse webhooks
34
+ - **Real-time Streaming** - WebSocket support (Gladia, AssemblyAI, Deepgram)
35
+ - **Advanced Features** - Diarization, sentiment, summarization, chapters, entities
36
+ - **Type-Safe** - Full TypeScript support with OpenAPI-generated types
37
+ - **Typed Extended Data** - Access provider-specific features with full autocomplete
38
+ - **Provider Fallback** - Automatic failover strategies
39
+ - **Zero Config** - Works out of the box
39
40
 
40
41
  ## Supported Providers
41
42
 
42
43
  | Provider | Batch | Streaming | Webhooks | Special Features |
43
44
  |----------|-------|-----------|----------|------------------|
44
- | **Gladia** | | WebSocket | | Multi-language, code-switching |
45
- | **AssemblyAI** | | Real-time | HMAC | Auto chapters, content moderation |
46
- | **Deepgram** | Sync | WebSocket | | PII redaction, keyword boosting |
47
- | **Azure STT** | Async | | HMAC | Custom models, language ID |
48
- | **OpenAI Whisper** | Sync | | | gpt-4o, multi-model support |
49
- | **Speechmatics** | Async | | Query params | High accuracy, enhanced mode |
45
+ | **Gladia** | Yes | WebSocket | Yes | Multi-language, code-switching, translation |
46
+ | **AssemblyAI** | Yes | Real-time | HMAC | Chapters, entities, content moderation |
47
+ | **Deepgram** | Sync | WebSocket | Yes | PII redaction, keyword boosting |
48
+ | **Azure STT** | Async | No | HMAC | Custom models, language ID |
49
+ | **OpenAI Whisper** | Sync | No | No | gpt-4o, diarization |
50
+ | **Speechmatics** | Async | No | Query params | High accuracy, summarization |
50
51
 
51
52
  ## Installation
52
53
 
@@ -198,51 +199,96 @@ app.post('/webhooks/transcription', express.json(), (req, res) => {
198
199
 
199
200
  ## Advanced Usage
200
201
 
201
- ### Provider-Specific Features
202
+ ### Provider-Specific Features with Type Safety
203
+
204
+ Use typed provider options for full autocomplete and compile-time safety:
202
205
 
203
206
  ```typescript
204
- // Gladia - Multi-language detection
207
+ // Gladia - Full type-safe options
205
208
  const result = await router.transcribe(audio, {
206
209
  provider: 'gladia',
207
- languageDetection: true,
208
- summarization: true,
209
- sentimentAnalysis: true
210
+ gladia: {
211
+ translation: true,
212
+ translation_config: { target_languages: ['fr', 'es'] },
213
+ moderation: true,
214
+ named_entity_recognition: true,
215
+ sentiment_analysis: true,
216
+ chapterization: true,
217
+ audio_to_llm: true,
218
+ audio_to_llm_config: [{ prompt: 'Summarize key points' }],
219
+ custom_metadata: { session_id: 'abc123' }
220
+ }
210
221
  });
211
222
 
212
- // AssemblyAI - Content moderation
213
- const result = await router.transcribe(audio, {
223
+ // Access typed extended data
224
+ if (result.extended) {
225
+ const translations = result.extended.translation?.results;
226
+ const chapters = result.extended.chapters?.results;
227
+ const entities = result.extended.entities?.results;
228
+ console.log('Custom metadata:', result.extended.customMetadata);
229
+ }
230
+
231
+ // AssemblyAI - Typed options with extended data
232
+ const assemblyResult = await router.transcribe(audio, {
214
233
  provider: 'assemblyai',
215
- entityDetection: true,
216
- metadata: {
234
+ assemblyai: {
235
+ auto_chapters: true,
236
+ entity_detection: true,
237
+ sentiment_analysis: true,
238
+ auto_highlights: true,
217
239
  content_safety: true,
218
- auto_chapters: true
240
+ iab_categories: true
219
241
  }
220
242
  });
221
243
 
222
- // Deepgram - PII redaction
223
- const result = await router.transcribe(audio, {
244
+ if (assemblyResult.extended) {
245
+ assemblyResult.extended.chapters?.forEach(ch => {
246
+ console.log(`${ch.headline}: ${ch.summary}`);
247
+ });
248
+ assemblyResult.extended.entities?.forEach(e => {
249
+ console.log(`${e.entity_type}: ${e.text}`);
250
+ });
251
+ }
252
+
253
+ // Deepgram - Typed options with metadata tracking
254
+ const deepgramResult = await router.transcribe(audio, {
224
255
  provider: 'deepgram',
225
- piiRedaction: true,
226
- customVocabulary: ['technical', 'terms']
256
+ deepgram: {
257
+ model: 'nova-3',
258
+ smart_format: true,
259
+ paragraphs: true,
260
+ detect_topics: true,
261
+ tag: ['meeting', 'sales'],
262
+ extra: { user_id: '12345' }
263
+ }
227
264
  });
228
265
 
229
- // OpenAI Whisper - Model selection
230
- const result = await router.transcribe(audio, {
266
+ if (deepgramResult.extended) {
267
+ console.log('Request ID:', deepgramResult.extended.requestId);
268
+ console.log('Audio SHA256:', deepgramResult.extended.sha256);
269
+ console.log('Tags:', deepgramResult.extended.tags);
270
+ }
271
+
272
+ // OpenAI Whisper - Typed options
273
+ const whisperResult = await router.transcribe(audio, {
231
274
  provider: 'openai-whisper',
232
- metadata: {
233
- model: 'gpt-4o-transcribe', // or 'whisper-1'
234
- temperature: 0.2
275
+ diarization: true,
276
+ openai: {
277
+ temperature: 0.2,
278
+ prompt: 'Technical discussion about APIs'
235
279
  }
236
280
  });
237
281
 
238
- // Speechmatics - Enhanced accuracy
239
- const result = await router.transcribe(audio, {
282
+ // Speechmatics - Enhanced accuracy with summarization
283
+ const speechmaticsResult = await router.transcribe(audio, {
240
284
  provider: 'speechmatics',
241
- metadata: {
242
- operating_point: 'enhanced', // Higher accuracy
243
- enable_sentiment_analysis: true
244
- }
285
+ model: 'enhanced',
286
+ summarization: true,
287
+ diarization: true
245
288
  });
289
+
290
+ // All providers include request tracking
291
+ console.log('Request ID:', result.tracking?.requestId);
246
292
  ```
247
293
 
248
294
  ### Error Handling
@@ -348,34 +394,67 @@ import type {
348
394
 
349
395
  ### Provider-Specific Type Safety
350
396
 
351
- **🎯 New: Type-safe responses with provider discrimination**
352
-
353
- The SDK now provides full type safety for provider-specific responses:
397
+ The SDK provides full type safety for provider-specific responses:
354
398
 
355
399
  ```typescript
356
- // Generic response - raw field is unknown
400
+ // Generic response - raw and extended fields are unknown
357
401
  const result: UnifiedTranscriptResponse = await router.transcribe(audio);
358
402
 
359
- // Provider-specific response - raw field is properly typed!
403
+ // Provider-specific response - raw and extended are properly typed!
360
404
  const deepgramResult: UnifiedTranscriptResponse<'deepgram'> = await router.transcribe(audio, {
361
405
  provider: 'deepgram'
362
406
  });
363
407
 
364
- // TypeScript knows raw is ListenV1Response
408
+ // TypeScript knows raw is ListenV1Response
365
409
  const metadata = deepgramResult.raw?.metadata;
366
- const model = deepgramResult.raw?.results?.channels?.[0]?.alternatives?.[0]?.model;
410
+
411
+ // TypeScript knows extended is DeepgramExtendedData
412
+ const requestId = deepgramResult.extended?.requestId;
413
+ const sha256 = deepgramResult.extended?.sha256;
367
414
  ```
368
415
 
369
416
  **Provider-specific raw response types:**
370
- - `gladia` `PreRecordedResponse`
371
- - `deepgram` `ListenV1Response`
372
- - `openai-whisper` `CreateTranscription200One`
373
- - `assemblyai` `AssemblyAITranscript`
374
- - `azure-stt` `AzureTranscription`
417
+ - `gladia` - `PreRecordedResponse`
418
+ - `deepgram` - `ListenV1Response`
419
+ - `openai-whisper` - `CreateTranscription200One`
420
+ - `assemblyai` - `AssemblyAITranscript`
421
+ - `azure-stt` - `AzureTranscription`
375
422
 
376
- ### Exported Parameter Enums
423
+ **Provider-specific extended data types:**
424
+ - `gladia` - `GladiaExtendedData` (translation, moderation, entities, sentiment, chapters, audioToLlm, customMetadata)
425
+ - `assemblyai` - `AssemblyAIExtendedData` (chapters, entities, sentimentResults, highlights, contentSafety, topics)
426
+ - `deepgram` - `DeepgramExtendedData` (metadata, requestId, sha256, modelInfo, tags)
427
+
428
+ ### Typed Extended Data
429
+
430
+ Access rich provider-specific data beyond basic transcription:
431
+
432
+ ```typescript
433
+ import type {
434
+ GladiaExtendedData,
435
+ AssemblyAIExtendedData,
436
+ DeepgramExtendedData,
437
+ // Individual types for fine-grained access
438
+ GladiaTranslation,
439
+ GladiaChapters,
440
+ AssemblyAIChapter,
441
+ AssemblyAIEntity,
442
+ DeepgramMetadata
443
+ } from 'voice-router-dev';
444
+
445
+ // Gladia extended data
446
+ const gladiaResult = await router.transcribe(audio, { provider: 'gladia', gladia: { translation: true } });
447
+ const translation: GladiaTranslation | undefined = gladiaResult.extended?.translation;
377
448
 
378
- **🎯 New: Direct access to provider parameter enums**
449
+ // AssemblyAI extended data
450
+ const assemblyResult = await router.transcribe(audio, { provider: 'assemblyai', assemblyai: { auto_chapters: true } });
451
+ const chapters: AssemblyAIChapter[] | undefined = assemblyResult.extended?.chapters;
452
+
453
+ // All responses include tracking info
454
+ console.log('Request ID:', gladiaResult.tracking?.requestId);
455
+ ```
456
+
457
+ ### Exported Parameter Enums
379
458
 
380
459
  Import and use provider-specific enums for type-safe configuration:
381
460
 
@@ -394,15 +473,15 @@ import {
394
473
  AudioResponseFormat
395
474
  } from 'voice-router-dev';
396
475
 
397
- // Type-safe Deepgram encoding
476
+ // Type-safe Deepgram encoding
398
477
  const session = await router.transcribeStream({
399
478
  provider: 'deepgram',
400
- encoding: ListenV1EncodingParameter.linear16, // Autocomplete works!
479
+ encoding: ListenV1EncodingParameter.linear16,
401
480
  model: ListenV1ModelParameter['nova-2'],
402
481
  sampleRate: 16000
403
482
  });
404
483
 
405
- // Type-safe Gladia encoding
484
+ // Type-safe Gladia encoding
406
485
  const gladiaSession = await router.transcribeStream({
407
486
  provider: 'gladia',
408
487
  encoding: StreamingSupportedEncodingEnum['wav/pcm'],
@@ -412,41 +491,42 @@ const gladiaSession = await router.transcribeStream({
412
491
 
413
492
  ### Type-Safe Streaming Options
414
493
 
415
- Streaming options are now fully typed based on provider OpenAPI specifications:
494
+ Streaming options are fully typed based on provider OpenAPI specifications:
416
495
 
417
496
  ```typescript
418
497
  // Deepgram streaming - all options are type-safe
419
498
  const deepgramSession = await router.transcribeStream({
420
499
  provider: 'deepgram',
421
- encoding: 'linear16', // ✅ Only Deepgram encodings
422
- model: 'nova-3', // ✅ Validated model names
423
- language: 'en-US', // ✅ BCP-47 language codes
424
- diarization: true,
425
- smartFormat: true
500
+ encoding: 'linear16',
501
+ model: 'nova-3',
502
+ language: 'en-US',
503
+ diarization: true
426
504
  }, callbacks);
427
505
 
428
- // Gladia streaming - different options
506
+ // Gladia streaming - with typed gladiaStreaming options
429
507
  const gladiaSession = await router.transcribeStream({
430
508
  provider: 'gladia',
431
- encoding: 'wav/pcm', // ✅ Only Gladia encodings
432
- sampleRate: 16000, // ✅ Only supported rates
433
- bitDepth: 16, // ✅ Only supported depths
434
- languageConfig: { languages: ['en'] }
509
+ encoding: 'wav/pcm',
510
+ sampleRate: 16000,
511
+ gladiaStreaming: {
512
+ realtime_processing: { words_accurate_timestamps: true },
513
+ messages_config: { receive_partial_transcripts: true }
514
+ }
435
515
  }, callbacks);
436
516
 
437
- // AssemblyAI streaming - simpler options
517
+ // AssemblyAI streaming
438
518
  const assemblySession = await router.transcribeStream({
439
519
  provider: 'assemblyai',
440
- sampleRate: 16000, // ✅ Only 8000, 16000, 22050, 44100, 48000
520
+ sampleRate: 16000,
441
521
  wordTimestamps: true
442
522
  }, callbacks);
443
523
  ```
444
524
 
445
525
  **Benefits:**
446
- - **Full IntelliSense** - Autocomplete for all provider-specific options
447
- - **Compile-time Safety** - Invalid options caught before runtime
448
- - **Provider Discrimination** - Type system knows which provider you're using
449
- - **OpenAPI-Generated** - Types come directly from provider specifications
526
+ - **Full IntelliSense** - Autocomplete for all provider-specific options
527
+ - **Compile-time Safety** - Invalid options caught before runtime
528
+ - **Provider Discrimination** - Type system knows which provider you're using
529
+ - **OpenAPI-Generated** - Types come directly from provider specifications
450
530
 
451
531
  ## Requirements
452
532
 
@@ -460,7 +540,7 @@ const assemblySession = await router.transcribeStream({
460
540
 
461
541
  Comprehensive API documentation is auto-generated with [TypeDoc](https://typedoc.org/) from TypeScript source code:
462
542
 
463
- 📁 **[docs/generated/](./docs/generated/)** - Complete API reference
543
+ **[docs/generated/](./docs/generated/)** - Complete API reference
464
544
 
465
545
  **Main Documentation Sets**:
466
546