voice-router-dev 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -220,6 +220,312 @@ var ListenV1EncodingParameter = {
220
220
  g729: "g729"
221
221
  };
222
222
 
223
+ // src/generated/deepgram/schema/listenV1RedactParameterOneOfItem.ts
224
+ var ListenV1RedactParameterOneOfItem = {
225
+ pci: "pci",
226
+ pii: "pii",
227
+ numbers: "numbers"
228
+ };
229
+
230
+ // src/generated/deepgram/schema/sharedCustomTopicModeParameter.ts
231
+ var SharedCustomTopicModeParameter = {
232
+ extended: "extended",
233
+ strict: "strict"
234
+ };
235
+
236
+ // src/generated/gladia/schema/streamingSupportedEncodingEnum.ts
237
+ var StreamingSupportedEncodingEnum = {
238
+ "wav/pcm": "wav/pcm",
239
+ "wav/alaw": "wav/alaw",
240
+ "wav/ulaw": "wav/ulaw"
241
+ };
242
+
243
+ // src/generated/gladia/schema/streamingSupportedSampleRateEnum.ts
244
+ var StreamingSupportedSampleRateEnum = {
245
+ NUMBER_8000: 8e3,
246
+ NUMBER_16000: 16e3,
247
+ NUMBER_32000: 32e3,
248
+ NUMBER_44100: 44100,
249
+ NUMBER_48000: 48e3
250
+ };
251
+
252
+ // src/generated/gladia/schema/streamingSupportedBitDepthEnum.ts
253
+ var StreamingSupportedBitDepthEnum = {
254
+ NUMBER_8: 8,
255
+ NUMBER_16: 16,
256
+ NUMBER_24: 24,
257
+ NUMBER_32: 32
258
+ };
259
+
260
+ // src/generated/gladia/schema/streamingSupportedModels.ts
261
+ var StreamingSupportedModels = {
262
+ "solaria-1": "solaria-1"
263
+ };
264
+
265
+ // src/generated/gladia/schema/transcriptionLanguageCodeEnum.ts
266
+ var TranscriptionLanguageCodeEnum = {
267
+ af: "af",
268
+ am: "am",
269
+ ar: "ar",
270
+ as: "as",
271
+ az: "az",
272
+ ba: "ba",
273
+ be: "be",
274
+ bg: "bg",
275
+ bn: "bn",
276
+ bo: "bo",
277
+ br: "br",
278
+ bs: "bs",
279
+ ca: "ca",
280
+ cs: "cs",
281
+ cy: "cy",
282
+ da: "da",
283
+ de: "de",
284
+ el: "el",
285
+ en: "en",
286
+ es: "es",
287
+ et: "et",
288
+ eu: "eu",
289
+ fa: "fa",
290
+ fi: "fi",
291
+ fo: "fo",
292
+ fr: "fr",
293
+ gl: "gl",
294
+ gu: "gu",
295
+ ha: "ha",
296
+ haw: "haw",
297
+ he: "he",
298
+ hi: "hi",
299
+ hr: "hr",
300
+ ht: "ht",
301
+ hu: "hu",
302
+ hy: "hy",
303
+ id: "id",
304
+ is: "is",
305
+ it: "it",
306
+ ja: "ja",
307
+ jw: "jw",
308
+ ka: "ka",
309
+ kk: "kk",
310
+ km: "km",
311
+ kn: "kn",
312
+ ko: "ko",
313
+ la: "la",
314
+ lb: "lb",
315
+ ln: "ln",
316
+ lo: "lo",
317
+ lt: "lt",
318
+ lv: "lv",
319
+ mg: "mg",
320
+ mi: "mi",
321
+ mk: "mk",
322
+ ml: "ml",
323
+ mn: "mn",
324
+ mr: "mr",
325
+ ms: "ms",
326
+ mt: "mt",
327
+ my: "my",
328
+ ne: "ne",
329
+ nl: "nl",
330
+ nn: "nn",
331
+ no: "no",
332
+ oc: "oc",
333
+ pa: "pa",
334
+ pl: "pl",
335
+ ps: "ps",
336
+ pt: "pt",
337
+ ro: "ro",
338
+ ru: "ru",
339
+ sa: "sa",
340
+ sd: "sd",
341
+ si: "si",
342
+ sk: "sk",
343
+ sl: "sl",
344
+ sn: "sn",
345
+ so: "so",
346
+ sq: "sq",
347
+ sr: "sr",
348
+ su: "su",
349
+ sv: "sv",
350
+ sw: "sw",
351
+ ta: "ta",
352
+ te: "te",
353
+ tg: "tg",
354
+ th: "th",
355
+ tk: "tk",
356
+ tl: "tl",
357
+ tr: "tr",
358
+ tt: "tt",
359
+ uk: "uk",
360
+ ur: "ur",
361
+ uz: "uz",
362
+ vi: "vi",
363
+ yi: "yi",
364
+ yo: "yo",
365
+ zh: "zh"
366
+ };
367
+
368
+ // src/generated/gladia/schema/translationLanguageCodeEnum.ts
369
+ var TranslationLanguageCodeEnum = {
370
+ af: "af",
371
+ am: "am",
372
+ ar: "ar",
373
+ as: "as",
374
+ az: "az",
375
+ ba: "ba",
376
+ be: "be",
377
+ bg: "bg",
378
+ bn: "bn",
379
+ bo: "bo",
380
+ br: "br",
381
+ bs: "bs",
382
+ ca: "ca",
383
+ cs: "cs",
384
+ cy: "cy",
385
+ da: "da",
386
+ de: "de",
387
+ el: "el",
388
+ en: "en",
389
+ es: "es",
390
+ et: "et",
391
+ eu: "eu",
392
+ fa: "fa",
393
+ fi: "fi",
394
+ fo: "fo",
395
+ fr: "fr",
396
+ gl: "gl",
397
+ gu: "gu",
398
+ ha: "ha",
399
+ haw: "haw",
400
+ he: "he",
401
+ hi: "hi",
402
+ hr: "hr",
403
+ ht: "ht",
404
+ hu: "hu",
405
+ hy: "hy",
406
+ id: "id",
407
+ is: "is",
408
+ it: "it",
409
+ ja: "ja",
410
+ jw: "jw",
411
+ ka: "ka",
412
+ kk: "kk",
413
+ km: "km",
414
+ kn: "kn",
415
+ ko: "ko",
416
+ la: "la",
417
+ lb: "lb",
418
+ ln: "ln",
419
+ lo: "lo",
420
+ lt: "lt",
421
+ lv: "lv",
422
+ mg: "mg",
423
+ mi: "mi",
424
+ mk: "mk",
425
+ ml: "ml",
426
+ mn: "mn",
427
+ mr: "mr",
428
+ ms: "ms",
429
+ mt: "mt",
430
+ my: "my",
431
+ ne: "ne",
432
+ nl: "nl",
433
+ nn: "nn",
434
+ no: "no",
435
+ oc: "oc",
436
+ pa: "pa",
437
+ pl: "pl",
438
+ ps: "ps",
439
+ pt: "pt",
440
+ ro: "ro",
441
+ ru: "ru",
442
+ sa: "sa",
443
+ sd: "sd",
444
+ si: "si",
445
+ sk: "sk",
446
+ sl: "sl",
447
+ sn: "sn",
448
+ so: "so",
449
+ sq: "sq",
450
+ sr: "sr",
451
+ su: "su",
452
+ sv: "sv",
453
+ sw: "sw",
454
+ ta: "ta",
455
+ te: "te",
456
+ tg: "tg",
457
+ th: "th",
458
+ tk: "tk",
459
+ tl: "tl",
460
+ tr: "tr",
461
+ tt: "tt",
462
+ uk: "uk",
463
+ ur: "ur",
464
+ uz: "uz",
465
+ vi: "vi",
466
+ wo: "wo",
467
+ yi: "yi",
468
+ yo: "yo",
469
+ zh: "zh"
470
+ };
471
+
472
+ // src/router/streaming-enums.ts
473
+ var DeepgramModel = {
474
+ // Nova 3 models (latest)
475
+ "nova-3": "nova-3",
476
+ "nova-3-general": "nova-3-general",
477
+ "nova-3-medical": "nova-3-medical",
478
+ // Nova 2 models
479
+ "nova-2": "nova-2",
480
+ "nova-2-general": "nova-2-general",
481
+ "nova-2-meeting": "nova-2-meeting",
482
+ "nova-2-finance": "nova-2-finance",
483
+ "nova-2-conversationalai": "nova-2-conversationalai",
484
+ "nova-2-voicemail": "nova-2-voicemail",
485
+ "nova-2-video": "nova-2-video",
486
+ "nova-2-medical": "nova-2-medical",
487
+ "nova-2-drivethru": "nova-2-drivethru",
488
+ "nova-2-automotive": "nova-2-automotive",
489
+ // Nova 1 models
490
+ nova: "nova",
491
+ "nova-general": "nova-general",
492
+ "nova-phonecall": "nova-phonecall",
493
+ "nova-medical": "nova-medical",
494
+ // Enhanced models
495
+ enhanced: "enhanced",
496
+ "enhanced-general": "enhanced-general",
497
+ "enhanced-meeting": "enhanced-meeting",
498
+ "enhanced-phonecall": "enhanced-phonecall",
499
+ "enhanced-finance": "enhanced-finance",
500
+ // Base models
501
+ base: "base",
502
+ meeting: "meeting",
503
+ phonecall: "phonecall",
504
+ finance: "finance",
505
+ conversationalai: "conversationalai",
506
+ voicemail: "voicemail",
507
+ video: "video"
508
+ };
509
+ var AssemblyAIEncoding = {
510
+ /** PCM signed 16-bit little-endian (recommended) */
511
+ pcmS16le: "pcm_s16le",
512
+ /** μ-law (telephony) */
513
+ pcmMulaw: "pcm_mulaw"
514
+ };
515
+ var AssemblyAISpeechModel = {
516
+ /** Optimized for English */
517
+ english: "universal-streaming-english",
518
+ /** Supports 20+ languages */
519
+ multilingual: "universal-streaming-multilingual"
520
+ };
521
+ var AssemblyAISampleRate = {
522
+ rate8000: 8e3,
523
+ rate16000: 16e3,
524
+ rate22050: 22050,
525
+ rate44100: 44100,
526
+ rate48000: 48e3
527
+ };
528
+
223
529
  // src/generated/deepgram/schema/speakV1EncodingParameter.ts
224
530
  var SpeakV1EncodingParameter = {
225
531
  linear16: "linear16",
@@ -249,30 +555,6 @@ var SpeakV1SampleRateParameter = {
249
555
  NUMBER_22050: 22050
250
556
  };
251
557
 
252
- // src/generated/gladia/schema/streamingSupportedEncodingEnum.ts
253
- var StreamingSupportedEncodingEnum = {
254
- "wav/pcm": "wav/pcm",
255
- "wav/alaw": "wav/alaw",
256
- "wav/ulaw": "wav/ulaw"
257
- };
258
-
259
- // src/generated/gladia/schema/streamingSupportedSampleRateEnum.ts
260
- var StreamingSupportedSampleRateEnum = {
261
- NUMBER_8000: 8e3,
262
- NUMBER_16000: 16e3,
263
- NUMBER_32000: 32e3,
264
- NUMBER_44100: 44100,
265
- NUMBER_48000: 48e3
266
- };
267
-
268
- // src/generated/gladia/schema/streamingSupportedBitDepthEnum.ts
269
- var StreamingSupportedBitDepthEnum = {
270
- NUMBER_8: 8,
271
- NUMBER_16: 16,
272
- NUMBER_24: 24,
273
- NUMBER_32: 32
274
- };
275
-
276
558
  // src/constants/defaults.ts
277
559
  var DEFAULT_TIMEOUTS = {
278
560
  /** Standard HTTP request timeout for API calls (60 seconds) */
@@ -1160,11 +1442,6 @@ var StreamingResponseStatus = {
1160
1442
  error: "error"
1161
1443
  };
1162
1444
 
1163
- // src/generated/gladia/schema/streamingSupportedModels.ts
1164
- var StreamingSupportedModels = {
1165
- "solaria-1": "solaria-1"
1166
- };
1167
-
1168
1445
  // src/generated/gladia/schema/streamingSupportedRegions.ts
1169
1446
  var StreamingSupportedRegions = {
1170
1447
  "us-west": "us-west",
@@ -1190,232 +1467,25 @@ var SummaryTypesEnum = {
1190
1467
  concise: "concise"
1191
1468
  };
1192
1469
 
1193
- // src/generated/gladia/schema/transcriptionControllerListV2KindItem.ts
1194
- var TranscriptionControllerListV2KindItem = {
1195
- "pre-recorded": "pre-recorded",
1196
- live: "live"
1197
- };
1198
-
1199
- // src/generated/gladia/schema/transcriptionControllerListV2StatusItem.ts
1200
- var TranscriptionControllerListV2StatusItem = {
1201
- queued: "queued",
1202
- processing: "processing",
1203
- done: "done",
1204
- error: "error"
1205
- };
1206
-
1207
- // src/generated/gladia/schema/transcriptionLanguageCodeEnum.ts
1208
- var TranscriptionLanguageCodeEnum = {
1209
- af: "af",
1210
- am: "am",
1211
- ar: "ar",
1212
- as: "as",
1213
- az: "az",
1214
- ba: "ba",
1215
- be: "be",
1216
- bg: "bg",
1217
- bn: "bn",
1218
- bo: "bo",
1219
- br: "br",
1220
- bs: "bs",
1221
- ca: "ca",
1222
- cs: "cs",
1223
- cy: "cy",
1224
- da: "da",
1225
- de: "de",
1226
- el: "el",
1227
- en: "en",
1228
- es: "es",
1229
- et: "et",
1230
- eu: "eu",
1231
- fa: "fa",
1232
- fi: "fi",
1233
- fo: "fo",
1234
- fr: "fr",
1235
- gl: "gl",
1236
- gu: "gu",
1237
- ha: "ha",
1238
- haw: "haw",
1239
- he: "he",
1240
- hi: "hi",
1241
- hr: "hr",
1242
- ht: "ht",
1243
- hu: "hu",
1244
- hy: "hy",
1245
- id: "id",
1246
- is: "is",
1247
- it: "it",
1248
- ja: "ja",
1249
- jw: "jw",
1250
- ka: "ka",
1251
- kk: "kk",
1252
- km: "km",
1253
- kn: "kn",
1254
- ko: "ko",
1255
- la: "la",
1256
- lb: "lb",
1257
- ln: "ln",
1258
- lo: "lo",
1259
- lt: "lt",
1260
- lv: "lv",
1261
- mg: "mg",
1262
- mi: "mi",
1263
- mk: "mk",
1264
- ml: "ml",
1265
- mn: "mn",
1266
- mr: "mr",
1267
- ms: "ms",
1268
- mt: "mt",
1269
- my: "my",
1270
- ne: "ne",
1271
- nl: "nl",
1272
- nn: "nn",
1273
- no: "no",
1274
- oc: "oc",
1275
- pa: "pa",
1276
- pl: "pl",
1277
- ps: "ps",
1278
- pt: "pt",
1279
- ro: "ro",
1280
- ru: "ru",
1281
- sa: "sa",
1282
- sd: "sd",
1283
- si: "si",
1284
- sk: "sk",
1285
- sl: "sl",
1286
- sn: "sn",
1287
- so: "so",
1288
- sq: "sq",
1289
- sr: "sr",
1290
- su: "su",
1291
- sv: "sv",
1292
- sw: "sw",
1293
- ta: "ta",
1294
- te: "te",
1295
- tg: "tg",
1296
- th: "th",
1297
- tk: "tk",
1298
- tl: "tl",
1299
- tr: "tr",
1300
- tt: "tt",
1301
- uk: "uk",
1302
- ur: "ur",
1303
- uz: "uz",
1304
- vi: "vi",
1305
- yi: "yi",
1306
- yo: "yo",
1307
- zh: "zh"
1308
- };
1309
-
1470
+ // src/generated/gladia/schema/transcriptionControllerListV2KindItem.ts
1471
+ var TranscriptionControllerListV2KindItem = {
1472
+ "pre-recorded": "pre-recorded",
1473
+ live: "live"
1474
+ };
1475
+
1476
+ // src/generated/gladia/schema/transcriptionControllerListV2StatusItem.ts
1477
+ var TranscriptionControllerListV2StatusItem = {
1478
+ queued: "queued",
1479
+ processing: "processing",
1480
+ done: "done",
1481
+ error: "error"
1482
+ };
1483
+
1310
1484
  // src/generated/gladia/schema/transcriptMessageType.ts
1311
1485
  var TranscriptMessageType = {
1312
1486
  transcript: "transcript"
1313
1487
  };
1314
1488
 
1315
- // src/generated/gladia/schema/translationLanguageCodeEnum.ts
1316
- var TranslationLanguageCodeEnum = {
1317
- af: "af",
1318
- am: "am",
1319
- ar: "ar",
1320
- as: "as",
1321
- az: "az",
1322
- ba: "ba",
1323
- be: "be",
1324
- bg: "bg",
1325
- bn: "bn",
1326
- bo: "bo",
1327
- br: "br",
1328
- bs: "bs",
1329
- ca: "ca",
1330
- cs: "cs",
1331
- cy: "cy",
1332
- da: "da",
1333
- de: "de",
1334
- el: "el",
1335
- en: "en",
1336
- es: "es",
1337
- et: "et",
1338
- eu: "eu",
1339
- fa: "fa",
1340
- fi: "fi",
1341
- fo: "fo",
1342
- fr: "fr",
1343
- gl: "gl",
1344
- gu: "gu",
1345
- ha: "ha",
1346
- haw: "haw",
1347
- he: "he",
1348
- hi: "hi",
1349
- hr: "hr",
1350
- ht: "ht",
1351
- hu: "hu",
1352
- hy: "hy",
1353
- id: "id",
1354
- is: "is",
1355
- it: "it",
1356
- ja: "ja",
1357
- jw: "jw",
1358
- ka: "ka",
1359
- kk: "kk",
1360
- km: "km",
1361
- kn: "kn",
1362
- ko: "ko",
1363
- la: "la",
1364
- lb: "lb",
1365
- ln: "ln",
1366
- lo: "lo",
1367
- lt: "lt",
1368
- lv: "lv",
1369
- mg: "mg",
1370
- mi: "mi",
1371
- mk: "mk",
1372
- ml: "ml",
1373
- mn: "mn",
1374
- mr: "mr",
1375
- ms: "ms",
1376
- mt: "mt",
1377
- my: "my",
1378
- ne: "ne",
1379
- nl: "nl",
1380
- nn: "nn",
1381
- no: "no",
1382
- oc: "oc",
1383
- pa: "pa",
1384
- pl: "pl",
1385
- ps: "ps",
1386
- pt: "pt",
1387
- ro: "ro",
1388
- ru: "ru",
1389
- sa: "sa",
1390
- sd: "sd",
1391
- si: "si",
1392
- sk: "sk",
1393
- sl: "sl",
1394
- sn: "sn",
1395
- so: "so",
1396
- sq: "sq",
1397
- sr: "sr",
1398
- su: "su",
1399
- sv: "sv",
1400
- sw: "sw",
1401
- ta: "ta",
1402
- te: "te",
1403
- tg: "tg",
1404
- th: "th",
1405
- tk: "tk",
1406
- tl: "tl",
1407
- tr: "tr",
1408
- tt: "tt",
1409
- uk: "uk",
1410
- ur: "ur",
1411
- uz: "uz",
1412
- vi: "vi",
1413
- wo: "wo",
1414
- yi: "yi",
1415
- yo: "yo",
1416
- zh: "zh"
1417
- };
1418
-
1419
1489
  // src/generated/gladia/schema/translationMessageType.ts
1420
1490
  var TranslationMessageType = {
1421
1491
  translation: "translation"
@@ -1983,7 +2053,7 @@ var GladiaAdapter = class extends BaseAdapter {
1983
2053
  }))
1984
2054
  );
1985
2055
  return extractWords(allWords, (item) => ({
1986
- text: item.word.word,
2056
+ word: item.word.word,
1987
2057
  start: item.word.start,
1988
2058
  end: item.word.end,
1989
2059
  confidence: item.word.confidence,
@@ -2003,11 +2073,11 @@ var GladiaAdapter = class extends BaseAdapter {
2003
2073
  end: utterance.end,
2004
2074
  speaker: utterance.speaker?.toString(),
2005
2075
  confidence: utterance.confidence,
2006
- words: utterance.words.map((word) => ({
2007
- text: word.word,
2008
- start: word.start,
2009
- end: word.end,
2010
- confidence: word.confidence
2076
+ words: utterance.words.map((w) => ({
2077
+ word: w.word,
2078
+ start: w.start,
2079
+ end: w.end,
2080
+ confidence: w.confidence
2011
2081
  }))
2012
2082
  }));
2013
2083
  }
@@ -2059,11 +2129,46 @@ var GladiaAdapter = class extends BaseAdapter {
2059
2129
  * Creates a WebSocket connection to Gladia for streaming transcription.
2060
2130
  * First initializes a session via REST API, then connects to WebSocket.
2061
2131
  *
2132
+ * Supports all Gladia streaming features:
2133
+ * - Real-time transcription with interim/final results
2134
+ * - Speech detection events (speech_start, speech_end)
2135
+ * - Real-time translation to other languages
2136
+ * - Real-time sentiment analysis
2137
+ * - Real-time named entity recognition
2138
+ * - Post-processing summarization and chapterization
2139
+ * - Audio preprocessing (audio enhancement, speech threshold)
2140
+ * - Custom vocabulary and spelling
2141
+ * - Multi-language code switching
2142
+ *
2062
2143
  * @param options - Streaming configuration options
2144
+ * @param options.encoding - Audio encoding (wav/pcm, wav/alaw, wav/ulaw)
2145
+ * @param options.sampleRate - Sample rate (8000, 16000, 32000, 44100, 48000)
2146
+ * @param options.bitDepth - Bit depth (8, 16, 24, 32)
2147
+ * @param options.channels - Number of channels (1-8)
2148
+ * @param options.language - Language code for transcription
2149
+ * @param options.interimResults - Enable partial/interim transcripts
2150
+ * @param options.endpointing - Silence duration to end utterance (0.01-10 seconds)
2151
+ * @param options.maxSilence - Max duration without endpointing (5-60 seconds)
2152
+ * @param options.customVocabulary - Words to boost in recognition
2153
+ * @param options.sentimentAnalysis - Enable real-time sentiment analysis
2154
+ * @param options.entityDetection - Enable named entity recognition
2155
+ * @param options.summarization - Enable post-processing summarization
2156
+ * @param options.gladiaStreaming - Full Gladia streaming options (pre_processing, realtime_processing, post_processing, messages_config)
2063
2157
  * @param callbacks - Event callbacks for transcription results
2158
+ * @param callbacks.onTranscript - Interim/final transcript received
2159
+ * @param callbacks.onUtterance - Complete utterance detected
2160
+ * @param callbacks.onSpeechStart - Speech detected (requires messages_config.receive_speech_events)
2161
+ * @param callbacks.onSpeechEnd - Speech ended (requires messages_config.receive_speech_events)
2162
+ * @param callbacks.onTranslation - Translation result (requires translation enabled)
2163
+ * @param callbacks.onSentiment - Sentiment analysis result
2164
+ * @param callbacks.onEntity - Named entity detected
2165
+ * @param callbacks.onSummarization - Summarization completed
2166
+ * @param callbacks.onChapterization - Chapterization completed
2167
+ * @param callbacks.onAudioAck - Audio chunk acknowledged
2168
+ * @param callbacks.onLifecycle - Session lifecycle events
2064
2169
  * @returns Promise that resolves with a StreamingSession
2065
2170
  *
2066
- * @example Real-time streaming
2171
+ * @example Basic real-time streaming
2067
2172
  * ```typescript
2068
2173
  * const session = await adapter.transcribeStream({
2069
2174
  * encoding: 'wav/pcm',
@@ -2085,15 +2190,124 @@ var GladiaAdapter = class extends BaseAdapter {
2085
2190
  * });
2086
2191
  *
2087
2192
  * // Send audio chunks
2088
- * const audioChunk = getAudioChunk(); // Your audio source
2193
+ * const audioChunk = getAudioChunk();
2089
2194
  * await session.sendAudio({ data: audioChunk });
2090
2195
  *
2091
2196
  * // Close when done
2092
2197
  * await session.close();
2093
2198
  * ```
2199
+ *
2200
+ * @example Advanced streaming with all features
2201
+ * ```typescript
2202
+ * const session = await adapter.transcribeStream({
2203
+ * encoding: 'wav/pcm',
2204
+ * sampleRate: 16000,
2205
+ * language: 'en',
2206
+ * sentimentAnalysis: true,
2207
+ * entityDetection: true,
2208
+ * summarization: true,
2209
+ * gladiaStreaming: {
2210
+ * pre_processing: {
2211
+ * audio_enhancer: true,
2212
+ * speech_threshold: 0.5
2213
+ * },
2214
+ * realtime_processing: {
2215
+ * translation: true,
2216
+ * translation_config: { target_languages: ['fr', 'es'] }
2217
+ * },
2218
+ * post_processing: {
2219
+ * chapterization: true
2220
+ * },
2221
+ * messages_config: {
2222
+ * receive_speech_events: true,
2223
+ * receive_acknowledgments: true,
2224
+ * receive_lifecycle_events: true
2225
+ * }
2226
+ * }
2227
+ * }, {
2228
+ * onTranscript: (e) => console.log('Transcript:', e.text),
2229
+ * onSpeechStart: (e) => console.log('Speech started at:', e.timestamp),
2230
+ * onSpeechEnd: (e) => console.log('Speech ended at:', e.timestamp),
2231
+ * onTranslation: (e) => console.log(`${e.targetLanguage}: ${e.translatedText}`),
2232
+ * onSentiment: (e) => console.log('Sentiment:', e.sentiment),
2233
+ * onEntity: (e) => console.log(`Entity: ${e.type} - ${e.text}`),
2234
+ * onSummarization: (e) => console.log('Summary:', e.summary),
2235
+ * onChapterization: (e) => console.log('Chapters:', e.chapters),
2236
+ * onAudioAck: (e) => console.log('Audio ack:', e.byteRange),
2237
+ * onLifecycle: (e) => console.log('Lifecycle:', e.eventType)
2238
+ * });
2239
+ * ```
2094
2240
  */
2095
2241
  async transcribeStream(options, callbacks) {
2096
2242
  this.validateConfig();
2243
+ const streamingRequest = this.buildStreamingRequest(options);
2244
+ const initResponse = await streamingControllerInitStreamingSessionV2(
2245
+ streamingRequest,
2246
+ void 0,
2247
+ // no params
2248
+ this.getAxiosConfig()
2249
+ );
2250
+ const { id, url: wsUrl } = initResponse.data;
2251
+ const ws = new WebSocket(wsUrl);
2252
+ let sessionStatus = "connecting";
2253
+ setupWebSocketHandlers(ws, callbacks, (status) => {
2254
+ sessionStatus = status;
2255
+ });
2256
+ ws.on("message", (data) => {
2257
+ try {
2258
+ const message = JSON.parse(data.toString());
2259
+ this.handleWebSocketMessage(message, callbacks);
2260
+ } catch (error) {
2261
+ callbacks?.onError?.({
2262
+ code: ERROR_CODES.PARSE_ERROR,
2263
+ message: "Failed to parse WebSocket message",
2264
+ details: error
2265
+ });
2266
+ }
2267
+ });
2268
+ await waitForWebSocketOpen(ws);
2269
+ return {
2270
+ id,
2271
+ provider: this.name,
2272
+ createdAt: /* @__PURE__ */ new Date(),
2273
+ getStatus: () => sessionStatus,
2274
+ sendAudio: async (chunk) => {
2275
+ validateSessionForAudio(sessionStatus, ws.readyState, WebSocket.OPEN);
2276
+ ws.send(chunk.data);
2277
+ if (chunk.isLast) {
2278
+ ws.send(
2279
+ JSON.stringify({
2280
+ type: "stop_recording"
2281
+ })
2282
+ );
2283
+ }
2284
+ },
2285
+ close: async () => {
2286
+ if (sessionStatus === "closed" || sessionStatus === "closing") {
2287
+ return;
2288
+ }
2289
+ sessionStatus = "closing";
2290
+ if (ws.readyState === WebSocket.OPEN) {
2291
+ ws.send(
2292
+ JSON.stringify({
2293
+ type: "stop_recording"
2294
+ })
2295
+ );
2296
+ }
2297
+ await closeWebSocket(ws);
2298
+ sessionStatus = "closed";
2299
+ }
2300
+ };
2301
+ }
2302
+ /**
2303
+ * Build streaming request with full type safety from OpenAPI specs
2304
+ *
2305
+ * Maps normalized options to Gladia streaming request format,
2306
+ * including all advanced features like pre-processing, real-time
2307
+ * processing, post-processing, and message configuration.
2308
+ */
2309
+ buildStreamingRequest(options) {
2310
+ const gladiaOpts = options?.gladiaStreaming || {};
2097
2311
  let validatedSampleRate;
2098
2312
  if (options?.sampleRate) {
2099
2313
  validatedSampleRate = validateEnumValue(
@@ -2103,112 +2317,376 @@ var GladiaAdapter = class extends BaseAdapter {
2103
2317
  "Gladia"
2104
2318
  );
2105
2319
  }
2320
+ let validatedBitDepth;
2321
+ if (options?.bitDepth) {
2322
+ validatedBitDepth = validateEnumValue(
2323
+ options.bitDepth,
2324
+ StreamingSupportedBitDepthEnum,
2325
+ "bit depth",
2326
+ "Gladia"
2327
+ );
2328
+ }
2106
2329
  const streamingRequest = {
2330
+ // Spread any direct Gladia streaming options first
2331
+ ...gladiaOpts,
2332
+ // Audio format configuration (these are excluded from gladiaStreaming to avoid conflicts)
2107
2333
  encoding: options?.encoding ? mapEncodingToProvider(options.encoding, "gladia") : void 0,
2108
2334
  sample_rate: validatedSampleRate,
2335
+ bit_depth: validatedBitDepth,
2109
2336
  channels: options?.channels,
2110
- endpointing: options?.endpointing,
2111
- model: options?.model
2337
+ // Model and processing
2338
+ model: options?.model ?? gladiaOpts.model,
2339
+ endpointing: options?.endpointing ?? gladiaOpts.endpointing,
2340
+ maximum_duration_without_endpointing: options?.maxSilence ?? gladiaOpts.maximum_duration_without_endpointing
2112
2341
  };
2113
- if (options?.language) {
2342
+ if (options?.language || options?.codeSwitching || gladiaOpts.language_config) {
2114
2343
  streamingRequest.language_config = {
2115
- languages: [options.language]
2344
+ ...gladiaOpts.language_config,
2345
+ languages: options?.language ? [options.language] : gladiaOpts.language_config?.languages,
2346
+ code_switching: options?.codeSwitching ?? gladiaOpts.language_config?.code_switching
2116
2347
  };
2117
2348
  }
2118
- const initResponse = await streamingControllerInitStreamingSessionV2(
2119
- streamingRequest,
2120
- void 0,
2121
- // no params
2122
- this.getAxiosConfig()
2123
- );
2124
- const { id, url: wsUrl } = initResponse.data;
2125
- const ws = new WebSocket(wsUrl);
2126
- let sessionStatus = "connecting";
2127
- setupWebSocketHandlers(ws, callbacks, (status) => {
2128
- sessionStatus = status;
2129
- });
2130
- ws.on("message", (data) => {
2131
- try {
2132
- const message = JSON.parse(data.toString());
2133
- if (message.type === "transcript") {
2134
- const transcriptMessage = message;
2135
- const messageData = transcriptMessage.data;
2136
- const utterance = messageData.utterance;
2137
- callbacks?.onTranscript?.({
2138
- type: "transcript",
2139
- text: utterance.text,
2140
- isFinal: messageData.is_final,
2141
- confidence: utterance.confidence,
2142
- words: utterance.words.map((word) => ({
2143
- text: word.word,
2144
- start: word.start,
2145
- end: word.end,
2146
- confidence: word.confidence
2147
- })),
2148
- data: message
2349
+ if (gladiaOpts.pre_processing) {
2350
+ streamingRequest.pre_processing = gladiaOpts.pre_processing;
2351
+ }
2352
+ const realtimeProcessing = gladiaOpts.realtime_processing || {};
2353
+ const hasRealtimeOptions = options?.customVocabulary || options?.sentimentAnalysis || options?.entityDetection || realtimeProcessing.translation || realtimeProcessing.custom_vocabulary || realtimeProcessing.custom_spelling || realtimeProcessing.named_entity_recognition || realtimeProcessing.sentiment_analysis;
2354
+ if (hasRealtimeOptions) {
2355
+ streamingRequest.realtime_processing = {
2356
+ ...realtimeProcessing,
2357
+ // Custom vocabulary
2358
+ custom_vocabulary: options?.customVocabulary && options.customVocabulary.length > 0 || realtimeProcessing.custom_vocabulary,
2359
+ custom_vocabulary_config: options?.customVocabulary && options.customVocabulary.length > 0 ? {
2360
+ ...realtimeProcessing.custom_vocabulary_config,
2361
+ vocabulary: options.customVocabulary
2362
+ } : realtimeProcessing.custom_vocabulary_config,
2363
+ // Sentiment analysis
2364
+ sentiment_analysis: options?.sentimentAnalysis ?? realtimeProcessing.sentiment_analysis,
2365
+ // Named entity recognition
2366
+ named_entity_recognition: options?.entityDetection ?? realtimeProcessing.named_entity_recognition
2367
+ };
2368
+ }
2369
+ const postProcessing = gladiaOpts.post_processing || {};
2370
+ if (options?.summarization || postProcessing.summarization || postProcessing.chapterization) {
2371
+ streamingRequest.post_processing = {
2372
+ ...postProcessing,
2373
+ summarization: options?.summarization ?? postProcessing.summarization
2374
+ };
2375
+ }
2376
+ if (gladiaOpts.messages_config) {
2377
+ streamingRequest.messages_config = gladiaOpts.messages_config;
2378
+ } else if (options?.interimResults !== void 0) {
2379
+ streamingRequest.messages_config = {
2380
+ receive_partial_transcripts: options.interimResults,
2381
+ receive_final_transcripts: true
2382
+ };
2383
+ }
2384
+ if (gladiaOpts.callback || gladiaOpts.callback_config) {
2385
+ streamingRequest.callback = gladiaOpts.callback;
2386
+ streamingRequest.callback_config = gladiaOpts.callback_config;
2387
+ }
2388
+ if (gladiaOpts.custom_metadata) {
2389
+ streamingRequest.custom_metadata = gladiaOpts.custom_metadata;
2390
+ }
2391
+ return streamingRequest;
2392
+ }
2393
+ /**
2394
+ * Handle all WebSocket message types from Gladia streaming
2395
+ *
2396
+ * Processes transcript, utterance, speech events, real-time processing
2397
+ * results (translation, sentiment, NER), post-processing results
2398
+ * (summarization, chapterization), acknowledgments, and lifecycle events.
2399
+ */
2400
+ handleWebSocketMessage(message, callbacks) {
2401
+ const msg = message;
2402
+ const messageType = msg.type;
2403
+ switch (messageType) {
2404
+ // ─────────────────────────────────────────────────────────────────
2405
+ // Transcript events
2406
+ // ─────────────────────────────────────────────────────────────────
2407
+ case "transcript": {
2408
+ const transcriptMessage = message;
2409
+ const messageData = transcriptMessage.data;
2410
+ const utterance = messageData.utterance;
2411
+ callbacks?.onTranscript?.({
2412
+ type: "transcript",
2413
+ text: utterance.text,
2414
+ isFinal: messageData.is_final,
2415
+ confidence: utterance.confidence,
2416
+ language: utterance.language,
2417
+ channel: utterance.channel,
2418
+ speaker: utterance.speaker?.toString(),
2419
+ words: utterance.words.map((w) => ({
2420
+ word: w.word,
2421
+ start: w.start,
2422
+ end: w.end,
2423
+ confidence: w.confidence
2424
+ })),
2425
+ data: message
2426
+ });
2427
+ break;
2428
+ }
2429
+ case "utterance": {
2430
+ const transcriptMessage = message;
2431
+ const messageData = transcriptMessage.data;
2432
+ const utterance = messageData.utterance;
2433
+ callbacks?.onUtterance?.({
2434
+ text: utterance.text,
2435
+ start: utterance.start,
2436
+ end: utterance.end,
2437
+ speaker: utterance.speaker?.toString(),
2438
+ confidence: utterance.confidence,
2439
+ words: utterance.words.map((w) => ({
2440
+ word: w.word,
2441
+ start: w.start,
2442
+ end: w.end,
2443
+ confidence: w.confidence
2444
+ }))
2445
+ });
2446
+ break;
2447
+ }
2448
+ // Post-processing transcripts (final accumulated transcript)
2449
+ case "post_transcript": {
2450
+ const postTranscript = message;
2451
+ callbacks?.onTranscript?.({
2452
+ type: "transcript",
2453
+ text: postTranscript.data?.full_transcript || "",
2454
+ isFinal: true,
2455
+ data: message
2456
+ });
2457
+ break;
2458
+ }
2459
+ case "post_final_transcript": {
2460
+ const postFinal = message;
2461
+ callbacks?.onTranscript?.({
2462
+ type: "transcript",
2463
+ text: postFinal.data?.transcription?.full_transcript || "",
2464
+ isFinal: true,
2465
+ data: message
2466
+ });
2467
+ break;
2468
+ }
2469
+ // ─────────────────────────────────────────────────────────────────
2470
+ // Speech detection events
2471
+ // ─────────────────────────────────────────────────────────────────
2472
+ case "speech_start": {
2473
+ const speechStart = message;
2474
+ const event = {
2475
+ type: "speech_start",
2476
+ timestamp: speechStart.data.time,
2477
+ channel: speechStart.data.channel,
2478
+ sessionId: speechStart.session_id
2479
+ };
2480
+ callbacks?.onSpeechStart?.(event);
2481
+ break;
2482
+ }
2483
+ case "speech_end": {
2484
+ const speechEnd = message;
2485
+ const event = {
2486
+ type: "speech_end",
2487
+ timestamp: speechEnd.data.time,
2488
+ channel: speechEnd.data.channel,
2489
+ sessionId: speechEnd.session_id
2490
+ };
2491
+ callbacks?.onSpeechEnd?.(event);
2492
+ break;
2493
+ }
2494
+ // ─────────────────────────────────────────────────────────────────
2495
+ // Real-time processing events
2496
+ // ─────────────────────────────────────────────────────────────────
2497
+ case "translation": {
2498
+ const translationMsg = message;
2499
+ if (translationMsg.error) {
2500
+ callbacks?.onError?.({
2501
+ code: ERROR_CODES.TRANSCRIPTION_ERROR,
2502
+ message: "Translation failed",
2503
+ details: translationMsg.error
2504
+ });
2505
+ } else if (translationMsg.data) {
2506
+ const event = {
2507
+ utteranceId: translationMsg.data.utterance_id,
2508
+ original: translationMsg.data.utterance.text,
2509
+ targetLanguage: translationMsg.data.target_language,
2510
+ translatedText: translationMsg.data.translated_utterance.text,
2511
+ isFinal: true
2512
+ };
2513
+ callbacks?.onTranslation?.(event);
2514
+ }
2515
+ break;
2516
+ }
2517
+ case "sentiment_analysis": {
2518
+ const sentimentMsg = message;
2519
+ if (sentimentMsg.error) {
2520
+ callbacks?.onError?.({
2521
+ code: ERROR_CODES.TRANSCRIPTION_ERROR,
2522
+ message: "Sentiment analysis failed",
2523
+ details: sentimentMsg.error
2524
+ });
2525
+ } else if (sentimentMsg.data) {
2526
+ for (const result of sentimentMsg.data.results) {
2527
+ const event = {
2528
+ utteranceId: sentimentMsg.data.utterance_id,
2529
+ sentiment: result.sentiment,
2530
+ confidence: void 0
2531
+ // Gladia doesn't provide confidence for sentiment
2532
+ };
2533
+ callbacks?.onSentiment?.(event);
2534
+ }
2535
+ }
2536
+ break;
2537
+ }
2538
+ case "named_entity_recognition": {
2539
+ const nerMsg = message;
2540
+ if (nerMsg.error) {
2541
+ callbacks?.onError?.({
2542
+ code: ERROR_CODES.TRANSCRIPTION_ERROR,
2543
+ message: "Named entity recognition failed",
2544
+ details: nerMsg.error
2149
2545
  });
2150
- } else if (message.type === "utterance") {
2151
- const transcriptMessage = message;
2152
- const messageData = transcriptMessage.data;
2153
- const utterance = messageData.utterance;
2154
- const utteranceData = {
2155
- text: utterance.text,
2156
- start: utterance.start,
2157
- end: utterance.end,
2158
- speaker: utterance.speaker?.toString(),
2159
- confidence: utterance.confidence,
2160
- words: utterance.words.map((word) => ({
2161
- text: word.word,
2162
- start: word.start,
2163
- end: word.end,
2164
- confidence: word.confidence
2165
- }))
2166
- };
2167
- callbacks?.onUtterance?.(utteranceData);
2168
- } else if (message.type === "metadata") {
2169
- callbacks?.onMetadata?.(message);
2546
+ } else if (nerMsg.data) {
2547
+ for (const entity of nerMsg.data.results) {
2548
+ const event = {
2549
+ utteranceId: nerMsg.data.utterance_id,
2550
+ text: entity.text,
2551
+ type: entity.entity_type,
2552
+ start: entity.start,
2553
+ end: entity.end
2554
+ };
2555
+ callbacks?.onEntity?.(event);
2556
+ }
2170
2557
  }
2171
- } catch (error) {
2172
- callbacks?.onError?.({
2173
- code: ERROR_CODES.PARSE_ERROR,
2174
- message: "Failed to parse WebSocket message",
2175
- details: error
2176
- });
2558
+ break;
2177
2559
  }
2178
- });
2179
- await waitForWebSocketOpen(ws);
2180
- return {
2181
- id,
2182
- provider: this.name,
2183
- createdAt: /* @__PURE__ */ new Date(),
2184
- getStatus: () => sessionStatus,
2185
- sendAudio: async (chunk) => {
2186
- validateSessionForAudio(sessionStatus, ws.readyState, WebSocket.OPEN);
2187
- ws.send(chunk.data);
2188
- if (chunk.isLast) {
2189
- ws.send(
2190
- JSON.stringify({
2191
- type: "stop_recording"
2192
- })
2193
- );
2560
+ // ─────────────────────────────────────────────────────────────────
2561
+ // Post-processing events
2562
+ // ─────────────────────────────────────────────────────────────────
2563
+ case "post_summarization": {
2564
+ const summaryMsg = message;
2565
+ if (summaryMsg.error) {
2566
+ callbacks?.onSummarization?.({
2567
+ summary: "",
2568
+ error: typeof summaryMsg.error === "string" ? summaryMsg.error : "Summarization failed"
2569
+ });
2570
+ } else if (summaryMsg.data) {
2571
+ callbacks?.onSummarization?.({
2572
+ summary: summaryMsg.data.results
2573
+ });
2194
2574
  }
2195
- },
2196
- close: async () => {
2197
- if (sessionStatus === "closed" || sessionStatus === "closing") {
2198
- return;
2575
+ break;
2576
+ }
2577
+ case "post_chapterization": {
2578
+ const chapterMsg = message;
2579
+ if (chapterMsg.error) {
2580
+ callbacks?.onChapterization?.({
2581
+ chapters: [],
2582
+ error: typeof chapterMsg.error === "string" ? chapterMsg.error : "Chapterization failed"
2583
+ });
2584
+ } else if (chapterMsg.data) {
2585
+ callbacks?.onChapterization?.({
2586
+ chapters: chapterMsg.data.results.map((ch) => ({
2587
+ headline: ch.headline,
2588
+ summary: ch.summary || ch.abstractive_summary || ch.extractive_summary || "",
2589
+ start: ch.start,
2590
+ end: ch.end
2591
+ }))
2592
+ });
2199
2593
  }
2200
- sessionStatus = "closing";
2201
- if (ws.readyState === WebSocket.OPEN) {
2202
- ws.send(
2203
- JSON.stringify({
2204
- type: "stop_recording"
2205
- })
2206
- );
2594
+ break;
2595
+ }
2596
+ // ─────────────────────────────────────────────────────────────────
2597
+ // Acknowledgment events
2598
+ // ─────────────────────────────────────────────────────────────────
2599
+ case "audio_chunk_ack": {
2600
+ const ackMsg = message;
2601
+ if (ackMsg.error) {
2602
+ callbacks?.onError?.({
2603
+ code: ERROR_CODES.TRANSCRIPTION_ERROR,
2604
+ message: "Audio chunk not acknowledged",
2605
+ details: ackMsg.error
2606
+ });
2607
+ } else if (ackMsg.data) {
2608
+ const event = {
2609
+ byteRange: ackMsg.data.byte_range,
2610
+ timeRange: ackMsg.data.time_range,
2611
+ timestamp: ackMsg.created_at
2612
+ };
2613
+ callbacks?.onAudioAck?.(event);
2207
2614
  }
2208
- await closeWebSocket(ws);
2209
- sessionStatus = "closed";
2615
+ break;
2210
2616
  }
2211
- };
2617
+ case "stop_recording_ack": {
2618
+ const stopAck = message;
2619
+ if (stopAck.error) {
2620
+ callbacks?.onError?.({
2621
+ code: ERROR_CODES.TRANSCRIPTION_ERROR,
2622
+ message: "Stop recording not acknowledged",
2623
+ details: stopAck.error
2624
+ });
2625
+ }
2626
+ break;
2627
+ }
2628
+ // ─────────────────────────────────────────────────────────────────
2629
+ // Lifecycle events
2630
+ // ─────────────────────────────────────────────────────────────────
2631
+ case "start_session": {
2632
+ const startSession = message;
2633
+ const event = {
2634
+ eventType: "start_session",
2635
+ timestamp: startSession.created_at,
2636
+ sessionId: startSession.session_id
2637
+ };
2638
+ callbacks?.onLifecycle?.(event);
2639
+ break;
2640
+ }
2641
+ case "start_recording": {
2642
+ const startRecording = message;
2643
+ const event = {
2644
+ eventType: "start_recording",
2645
+ timestamp: startRecording.created_at,
2646
+ sessionId: startRecording.session_id
2647
+ };
2648
+ callbacks?.onLifecycle?.(event);
2649
+ break;
2650
+ }
2651
+ case "end_recording": {
2652
+ const endRecording = message;
2653
+ const event = {
2654
+ eventType: "end_recording",
2655
+ timestamp: endRecording.created_at,
2656
+ sessionId: endRecording.session_id
2657
+ };
2658
+ callbacks?.onLifecycle?.(event);
2659
+ break;
2660
+ }
2661
+ case "end_session": {
2662
+ const endSession = message;
2663
+ const event = {
2664
+ eventType: "end_session",
2665
+ timestamp: endSession.created_at,
2666
+ sessionId: endSession.session_id
2667
+ };
2668
+ callbacks?.onLifecycle?.(event);
2669
+ break;
2670
+ }
2671
+ // ─────────────────────────────────────────────────────────────────
2672
+ // Metadata and other events
2673
+ // ─────────────────────────────────────────────────────────────────
2674
+ case "metadata":
2675
+ callbacks?.onMetadata?.(msg);
2676
+ break;
2677
+ case "error": {
2678
+ const errorMsg = msg;
2679
+ callbacks?.onError?.({
2680
+ code: errorMsg.error?.code || ERROR_CODES.TRANSCRIPTION_ERROR,
2681
+ message: errorMsg.error?.message || "Unknown streaming error",
2682
+ details: msg
2683
+ });
2684
+ break;
2685
+ }
2686
+ default:
2687
+ callbacks?.onMetadata?.(msg);
2688
+ break;
2689
+ }
2212
2690
  }
2213
2691
  };
2214
2692
  function createGladiaAdapter(config) {
@@ -2866,14 +3344,14 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2866
3344
  if (!transcript.words || transcript.words.length === 0) {
2867
3345
  return void 0;
2868
3346
  }
2869
- return transcript.words.map((word) => ({
2870
- text: word.text,
2871
- start: word.start / 1e3,
3347
+ return transcript.words.map((w) => ({
3348
+ word: w.text,
3349
+ start: w.start / 1e3,
2872
3350
  // Convert ms to seconds
2873
- end: word.end / 1e3,
3351
+ end: w.end / 1e3,
2874
3352
  // Convert ms to seconds
2875
- confidence: word.confidence,
2876
- speaker: word.speaker || void 0
3353
+ confidence: w.confidence,
3354
+ speaker: w.speaker || void 0
2877
3355
  }));
2878
3356
  }
2879
3357
  /**
@@ -2891,11 +3369,11 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2891
3369
  // Convert ms to seconds
2892
3370
  speaker: utterance.speaker || void 0,
2893
3371
  confidence: utterance.confidence,
2894
- words: utterance.words.map((word) => ({
2895
- text: word.text,
2896
- start: word.start / 1e3,
2897
- end: word.end / 1e3,
2898
- confidence: word.confidence
3372
+ words: utterance.words.map((w) => ({
3373
+ word: w.text,
3374
+ start: w.start / 1e3,
3375
+ end: w.end / 1e3,
3376
+ confidence: w.confidence
2899
3377
  }))
2900
3378
  }));
2901
3379
  }
@@ -2903,19 +3381,37 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2903
3381
  * Stream audio for real-time transcription
2904
3382
  *
2905
3383
  * Creates a WebSocket connection to AssemblyAI for streaming transcription.
2906
- * First obtains a temporary token, then connects and streams audio chunks.
3384
+ * Uses the v3 Universal Streaming API with full support for all parameters.
3385
+ *
3386
+ * Supports all AssemblyAI streaming features:
3387
+ * - Real-time transcription with interim/final results (Turn events)
3388
+ * - End-of-turn detection tuning (confidence threshold, silence duration)
3389
+ * - Voice Activity Detection (VAD) threshold tuning
3390
+ * - Real-time text formatting
3391
+ * - Profanity filtering
3392
+ * - Custom vocabulary (keyterms)
3393
+ * - Language detection
3394
+ * - Model selection (English or Multilingual)
3395
+ * - Dynamic configuration updates mid-stream
3396
+ * - Force endpoint command
2907
3397
  *
2908
3398
  * @param options - Streaming configuration options
3399
+ * @param options.sampleRate - Sample rate (8000, 16000, 22050, 44100, 48000)
3400
+ * @param options.encoding - Audio encoding (pcm_s16le, pcm_mulaw)
3401
+ * @param options.assemblyaiStreaming - All AssemblyAI-specific streaming options
2909
3402
  * @param callbacks - Event callbacks for transcription results
2910
- * @returns Promise that resolves with a StreamingSession
3403
+ * @param callbacks.onTranscript - Interim/final transcript received (Turn event)
3404
+ * @param callbacks.onUtterance - Complete utterance (Turn with end_of_turn=true)
3405
+ * @param callbacks.onMetadata - Session metadata (Begin, Termination events)
3406
+ * @param callbacks.onError - Error occurred
3407
+ * @param callbacks.onClose - Connection closed
3408
+ * @returns Promise that resolves with an extended StreamingSession
2911
3409
  *
2912
- * @example Real-time streaming
3410
+ * @example Basic real-time streaming
2913
3411
  * ```typescript
2914
3412
  * const session = await adapter.transcribeStream({
2915
- * encoding: 'pcm_s16le',
2916
3413
  * sampleRate: 16000,
2917
- * language: 'en',
2918
- * interimResults: true
3414
+ * encoding: 'pcm_s16le'
2919
3415
  * }, {
2920
3416
  * onOpen: () => console.log('Connected'),
2921
3417
  * onTranscript: (event) => {
@@ -2930,21 +3426,50 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2930
3426
  * });
2931
3427
  *
2932
3428
  * // Send audio chunks
2933
- * const audioChunk = getAudioChunk(); // Your audio source
3429
+ * const audioChunk = getAudioChunk();
2934
3430
  * await session.sendAudio({ data: audioChunk });
2935
3431
  *
2936
3432
  * // Close when done
2937
3433
  * await session.close();
2938
3434
  * ```
3435
+ *
3436
+ * @example Advanced streaming with all features
3437
+ * ```typescript
3438
+ * const session = await adapter.transcribeStream({
3439
+ * sampleRate: 16000,
3440
+ * assemblyaiStreaming: {
3441
+ * speechModel: 'universal-streaming-multilingual',
3442
+ * languageDetection: true,
3443
+ * endOfTurnConfidenceThreshold: 0.7,
3444
+ * minEndOfTurnSilenceWhenConfident: 500,
3445
+ * maxTurnSilence: 15000,
3446
+ * vadThreshold: 0.3,
3447
+ * formatTurns: true,
3448
+ * filterProfanity: true,
3449
+ * keyterms: ['TypeScript', 'JavaScript', 'API'],
3450
+ * inactivityTimeout: 60000
3451
+ * }
3452
+ * }, {
3453
+ * onTranscript: (e) => console.log('Transcript:', e.text),
3454
+ * onMetadata: (m) => console.log('Metadata:', m)
3455
+ * });
3456
+ *
3457
+ * // Update configuration mid-stream
3458
+ * session.updateConfiguration?.({
3459
+ * end_of_turn_confidence_threshold: 0.5,
3460
+ * vad_threshold: 0.2
3461
+ * });
3462
+ *
3463
+ * // Force endpoint detection
3464
+ * session.forceEndpoint?.();
3465
+ * ```
2939
3466
  */
2940
3467
  async transcribeStream(options, callbacks) {
2941
3468
  this.validateConfig();
2942
3469
  if (!this.config?.apiKey) {
2943
3470
  throw new Error("API key is required for streaming");
2944
3471
  }
2945
- const sampleRate = options?.sampleRate || 16e3;
2946
- const encoding = options?.encoding ? mapEncodingToProvider(options.encoding, "assemblyai") : "pcm_s16le";
2947
- const wsUrl = `${this.wsBaseUrl}?sample_rate=${sampleRate}&encoding=${encoding}`;
3472
+ const wsUrl = this.buildStreamingUrl(options);
2948
3473
  const ws = new WebSocket2(wsUrl, {
2949
3474
  headers: {
2950
3475
  Authorization: this.config.apiKey
@@ -2968,43 +3493,7 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2968
3493
  ws.on("message", (data) => {
2969
3494
  try {
2970
3495
  const message = JSON.parse(data.toString());
2971
- if ("error" in message) {
2972
- callbacks?.onError?.({
2973
- code: "API_ERROR",
2974
- message: message.error
2975
- });
2976
- return;
2977
- }
2978
- if (message.type === "Begin") {
2979
- const beginMsg = message;
2980
- callbacks?.onMetadata?.({
2981
- sessionId: beginMsg.id,
2982
- expiresAt: new Date(beginMsg.expires_at).toISOString()
2983
- });
2984
- } else if (message.type === "Turn") {
2985
- const turnMsg = message;
2986
- callbacks?.onTranscript?.({
2987
- type: "transcript",
2988
- text: turnMsg.transcript,
2989
- isFinal: turnMsg.end_of_turn,
2990
- confidence: turnMsg.end_of_turn_confidence,
2991
- words: turnMsg.words.map((word) => ({
2992
- text: word.text,
2993
- start: word.start / 1e3,
2994
- // Convert ms to seconds
2995
- end: word.end / 1e3,
2996
- confidence: word.confidence
2997
- })),
2998
- data: turnMsg
2999
- });
3000
- } else if (message.type === "Termination") {
3001
- const termMsg = message;
3002
- callbacks?.onMetadata?.({
3003
- terminated: true,
3004
- audioDurationSeconds: termMsg.audio_duration_seconds,
3005
- sessionDurationSeconds: termMsg.session_duration_seconds
3006
- });
3007
- }
3496
+ this.handleWebSocketMessage(message, callbacks);
3008
3497
  } catch (error) {
3009
3498
  callbacks?.onError?.({
3010
3499
  code: "PARSE_ERROR",
@@ -3056,11 +3545,7 @@ var AssemblyAIAdapter = class extends BaseAdapter {
3056
3545
  }
3057
3546
  if (chunk.isLast) {
3058
3547
  flushAudioBuffer();
3059
- ws.send(
3060
- JSON.stringify({
3061
- terminate_session: true
3062
- })
3063
- );
3548
+ ws.send(JSON.stringify({ type: "Terminate" }));
3064
3549
  }
3065
3550
  },
3066
3551
  close: async () => {
@@ -3070,11 +3555,7 @@ var AssemblyAIAdapter = class extends BaseAdapter {
3070
3555
  sessionStatus = "closing";
3071
3556
  flushAudioBuffer();
3072
3557
  if (ws.readyState === WebSocket2.OPEN) {
3073
- ws.send(
3074
- JSON.stringify({
3075
- terminate_session: true
3076
- })
3077
- );
3558
+ ws.send(JSON.stringify({ type: "Terminate" }));
3078
3559
  }
3079
3560
  return new Promise((resolve) => {
3080
3561
  const timeout = setTimeout(() => {
@@ -3088,9 +3569,166 @@ var AssemblyAIAdapter = class extends BaseAdapter {
3088
3569
  resolve();
3089
3570
  });
3090
3571
  });
3572
+ },
3573
+ /**
3574
+ * Update streaming configuration mid-session
3575
+ *
3576
+ * Allows changing VAD, end-of-turn, and formatting settings
3577
+ * without restarting the stream.
3578
+ *
3579
+ * @param config - Configuration parameters to update
3580
+ */
3581
+ updateConfiguration: (config) => {
3582
+ if (ws.readyState !== WebSocket2.OPEN) {
3583
+ throw new Error("Cannot update configuration: WebSocket is not open");
3584
+ }
3585
+ const updateMsg = {
3586
+ type: "UpdateConfiguration",
3587
+ ...config
3588
+ };
3589
+ ws.send(JSON.stringify(updateMsg));
3590
+ },
3591
+ /**
3592
+ * Force endpoint detection
3593
+ *
3594
+ * Immediately triggers end-of-turn, useful for manual control
3595
+ * of turn boundaries (e.g., when user presses a button).
3596
+ */
3597
+ forceEndpoint: () => {
3598
+ if (ws.readyState !== WebSocket2.OPEN) {
3599
+ throw new Error("Cannot force endpoint: WebSocket is not open");
3600
+ }
3601
+ const forceMsg = {
3602
+ type: "ForceEndpoint"
3603
+ };
3604
+ ws.send(JSON.stringify(forceMsg));
3091
3605
  }
3092
3606
  };
3093
3607
  }
3608
+ /**
3609
+ * Build WebSocket URL with all streaming parameters
3610
+ */
3611
+ buildStreamingUrl(options) {
3612
+ const params = new URLSearchParams();
3613
+ const aaiOpts = options?.assemblyaiStreaming || {};
3614
+ const sampleRate = options?.sampleRate || aaiOpts.sampleRate || 16e3;
3615
+ params.append("sample_rate", String(sampleRate));
3616
+ const encoding = options?.encoding ? mapEncodingToProvider(options.encoding, "assemblyai") : aaiOpts.encoding || "pcm_s16le";
3617
+ params.append("encoding", encoding);
3618
+ if (aaiOpts.speechModel) {
3619
+ params.append("speech_model", aaiOpts.speechModel);
3620
+ }
3621
+ if (aaiOpts.languageDetection) {
3622
+ params.append("language_detection", "true");
3623
+ }
3624
+ if (aaiOpts.endOfTurnConfidenceThreshold !== void 0) {
3625
+ params.append(
3626
+ "end_of_turn_confidence_threshold",
3627
+ String(aaiOpts.endOfTurnConfidenceThreshold)
3628
+ );
3629
+ }
3630
+ if (aaiOpts.minEndOfTurnSilenceWhenConfident !== void 0) {
3631
+ params.append(
3632
+ "min_end_of_turn_silence_when_confident",
3633
+ String(aaiOpts.minEndOfTurnSilenceWhenConfident)
3634
+ );
3635
+ }
3636
+ if (aaiOpts.maxTurnSilence !== void 0) {
3637
+ params.append("max_turn_silence", String(aaiOpts.maxTurnSilence));
3638
+ }
3639
+ if (aaiOpts.vadThreshold !== void 0) {
3640
+ params.append("vad_threshold", String(aaiOpts.vadThreshold));
3641
+ }
3642
+ if (aaiOpts.formatTurns !== void 0) {
3643
+ params.append("format_turns", String(aaiOpts.formatTurns));
3644
+ }
3645
+ if (aaiOpts.filterProfanity) {
3646
+ params.append("filter_profanity", "true");
3647
+ }
3648
+ const keyterms = options?.customVocabulary || aaiOpts.keyterms;
3649
+ if (keyterms && keyterms.length > 0) {
3650
+ keyterms.forEach((term) => params.append("keyterms", term));
3651
+ }
3652
+ if (aaiOpts.keytermsPrompt && aaiOpts.keytermsPrompt.length > 0) {
3653
+ aaiOpts.keytermsPrompt.forEach((prompt) => params.append("keyterms_prompt", prompt));
3654
+ }
3655
+ if (aaiOpts.inactivityTimeout !== void 0) {
3656
+ params.append("inactivity_timeout", String(aaiOpts.inactivityTimeout));
3657
+ }
3658
+ return `${this.wsBaseUrl}?${params.toString()}`;
3659
+ }
3660
+ /**
3661
+ * Handle all WebSocket message types from AssemblyAI streaming
3662
+ */
3663
+ handleWebSocketMessage(message, callbacks) {
3664
+ if ("error" in message) {
3665
+ callbacks?.onError?.({
3666
+ code: "API_ERROR",
3667
+ message: message.error
3668
+ });
3669
+ return;
3670
+ }
3671
+ const typedMessage = message;
3672
+ switch (typedMessage.type) {
3673
+ case "Begin": {
3674
+ const beginMsg = typedMessage;
3675
+ callbacks?.onMetadata?.({
3676
+ type: "begin",
3677
+ sessionId: beginMsg.id,
3678
+ expiresAt: new Date(beginMsg.expires_at).toISOString()
3679
+ });
3680
+ break;
3681
+ }
3682
+ case "Turn": {
3683
+ const turnMsg = typedMessage;
3684
+ callbacks?.onTranscript?.({
3685
+ type: "transcript",
3686
+ text: turnMsg.transcript,
3687
+ isFinal: turnMsg.end_of_turn,
3688
+ confidence: turnMsg.end_of_turn_confidence,
3689
+ language: turnMsg.language_code,
3690
+ words: turnMsg.words.map((w) => ({
3691
+ word: w.text,
3692
+ start: w.start / 1e3,
3693
+ // Convert ms to seconds
3694
+ end: w.end / 1e3,
3695
+ confidence: w.confidence
3696
+ })),
3697
+ data: turnMsg
3698
+ });
3699
+ if (turnMsg.end_of_turn) {
3700
+ const words = turnMsg.words;
3701
+ const start = words.length > 0 ? words[0].start / 1e3 : 0;
3702
+ const end = words.length > 0 ? words[words.length - 1].end / 1e3 : 0;
3703
+ callbacks?.onUtterance?.({
3704
+ text: turnMsg.transcript,
3705
+ start,
3706
+ end,
3707
+ confidence: turnMsg.end_of_turn_confidence,
3708
+ words: turnMsg.words.map((w) => ({
3709
+ word: w.text,
3710
+ start: w.start / 1e3,
3711
+ end: w.end / 1e3,
3712
+ confidence: w.confidence
3713
+ }))
3714
+ });
3715
+ }
3716
+ break;
3717
+ }
3718
+ case "Termination": {
3719
+ const termMsg = typedMessage;
3720
+ callbacks?.onMetadata?.({
3721
+ type: "termination",
3722
+ audioDurationSeconds: termMsg.audio_duration_seconds,
3723
+ sessionDurationSeconds: termMsg.session_duration_seconds
3724
+ });
3725
+ break;
3726
+ }
3727
+ default:
3728
+ callbacks?.onMetadata?.(message);
3729
+ break;
3730
+ }
3731
+ }
3094
3732
  };
3095
3733
  function createAssemblyAIAdapter(config) {
3096
3734
  const adapter = new AssemblyAIAdapter();
@@ -3352,11 +3990,11 @@ var DeepgramAdapter = class extends BaseAdapter {
3352
3990
  return void 0;
3353
3991
  }
3354
3992
  return alternative.words.map(
3355
- (word) => ({
3356
- text: word.word || "",
3357
- start: word.start || 0,
3358
- end: word.end || 0,
3359
- confidence: word.confidence,
3993
+ (w) => ({
3994
+ word: w.word || "",
3995
+ start: w.start || 0,
3996
+ end: w.end || 0,
3997
+ confidence: w.confidence,
3360
3998
  speaker: void 0
3361
3999
  // Speaker info is at utterance level, not word level
3362
4000
  })
@@ -3376,11 +4014,11 @@ var DeepgramAdapter = class extends BaseAdapter {
3376
4014
  end: utterance.end || 0,
3377
4015
  speaker: utterance.speaker?.toString(),
3378
4016
  confidence: utterance.confidence,
3379
- words: utterance.words?.map((word) => ({
3380
- text: word.word || "",
3381
- start: word.start || 0,
3382
- end: word.end || 0,
3383
- confidence: word.confidence
4017
+ words: utterance.words?.map((w) => ({
4018
+ word: w.word || "",
4019
+ start: w.start || 0,
4020
+ end: w.end || 0,
4021
+ confidence: w.confidence
3384
4022
  }))
3385
4023
  }));
3386
4024
  }
@@ -3399,11 +4037,44 @@ var DeepgramAdapter = class extends BaseAdapter {
3399
4037
  * Creates a WebSocket connection to Deepgram for streaming transcription.
3400
4038
  * Send audio chunks via session.sendAudio() and receive results via callbacks.
3401
4039
  *
4040
+ * Supports all Deepgram streaming features:
4041
+ * - Real-time transcription with interim/final results
4042
+ * - Speech detection events (SpeechStarted, UtteranceEnd)
4043
+ * - Speaker diarization
4044
+ * - Language detection
4045
+ * - Real-time sentiment, entity detection, topics, intents
4046
+ * - Custom vocabulary (keywords, keyterms)
4047
+ * - PII redaction
4048
+ * - Filler words, numerals, measurements, paragraphs
4049
+ * - Profanity filtering
4050
+ * - Dictation mode
4051
+ *
3402
4052
  * @param options - Streaming configuration options
4053
+ * @param options.encoding - Audio encoding (linear16, flac, mulaw, opus, speex, g729)
4054
+ * @param options.sampleRate - Sample rate in Hz
4055
+ * @param options.channels - Number of audio channels
4056
+ * @param options.language - Language code for transcription
4057
+ * @param options.model - Model to use (nova-2, nova-3, base, enhanced, etc.)
4058
+ * @param options.diarization - Enable speaker identification
4059
+ * @param options.languageDetection - Auto-detect language
4060
+ * @param options.interimResults - Enable partial transcripts
4061
+ * @param options.summarization - Enable summarization
4062
+ * @param options.sentimentAnalysis - Enable sentiment analysis
4063
+ * @param options.entityDetection - Enable entity detection
4064
+ * @param options.piiRedaction - Enable PII redaction
4065
+ * @param options.customVocabulary - Keywords to boost recognition
4066
+ * @param options.deepgramStreaming - All Deepgram-specific streaming options
3403
4067
  * @param callbacks - Event callbacks for transcription results
4068
+ * @param callbacks.onTranscript - Interim/final transcript received
4069
+ * @param callbacks.onUtterance - Complete utterance detected
4070
+ * @param callbacks.onSpeechStart - Speech detected (Deepgram SpeechStarted)
4071
+ * @param callbacks.onSpeechEnd - Speech ended (Deepgram UtteranceEnd)
4072
+ * @param callbacks.onMetadata - Metadata received
4073
+ * @param callbacks.onError - Error occurred
4074
+ * @param callbacks.onClose - Connection closed
3404
4075
  * @returns Promise that resolves with a StreamingSession
3405
4076
  *
3406
- * @example Real-time streaming
4077
+ * @example Basic real-time streaming
3407
4078
  * ```typescript
3408
4079
  * const session = await adapter.transcribeStream({
3409
4080
  * encoding: 'linear16',
@@ -3426,32 +4097,47 @@ var DeepgramAdapter = class extends BaseAdapter {
3426
4097
  * });
3427
4098
  *
3428
4099
  * // Send audio chunks
3429
- * const audioChunk = getAudioChunk(); // Your audio source
4100
+ * const audioChunk = getAudioChunk();
3430
4101
  * await session.sendAudio({ data: audioChunk });
3431
4102
  *
3432
4103
  * // Close when done
3433
4104
  * await session.close();
3434
4105
  * ```
4106
+ *
4107
+ * @example Advanced streaming with all features
4108
+ * ```typescript
4109
+ * const session = await adapter.transcribeStream({
4110
+ * encoding: 'linear16',
4111
+ * sampleRate: 16000,
4112
+ * language: 'en',
4113
+ * model: 'nova-3',
4114
+ * diarization: true,
4115
+ * sentimentAnalysis: true,
4116
+ * entityDetection: true,
4117
+ * deepgramStreaming: {
4118
+ * fillerWords: true,
4119
+ * numerals: true,
4120
+ * profanityFilter: true,
4121
+ * topics: true,
4122
+ * intents: true,
4123
+ * customTopic: ['sales', 'support'],
4124
+ * customIntent: ['purchase', 'complaint'],
4125
+ * keyterm: ['TypeScript', 'JavaScript'],
4126
+ * utteranceSplit: 800,
4127
+ * punctuate: true,
4128
+ * smartFormat: true
4129
+ * }
4130
+ * }, {
4131
+ * onTranscript: (e) => console.log('Transcript:', e.text),
4132
+ * onSpeechStart: (e) => console.log('Speech started at:', e.timestamp),
4133
+ * onSpeechEnd: (e) => console.log('Utterance ended'),
4134
+ * onMetadata: (m) => console.log('Metadata:', m)
4135
+ * });
4136
+ * ```
3435
4137
  */
3436
4138
  async transcribeStream(options, callbacks) {
3437
4139
  this.validateConfig();
3438
- const params = new URLSearchParams();
3439
- if (options?.encoding) params.append("encoding", options.encoding);
3440
- if (options?.sampleRate) params.append("sample_rate", options.sampleRate.toString());
3441
- if (options?.channels) params.append("channels", options.channels.toString());
3442
- if (options?.language) params.append("language", options.language);
3443
- if (options?.model) params.append("model", options.model);
3444
- if (options?.languageDetection) params.append("detect_language", "true");
3445
- if (options?.diarization) params.append("diarize", "true");
3446
- if (options?.interimResults) params.append("interim_results", "true");
3447
- if (options?.summarization) params.append("summarize", "true");
3448
- if (options?.sentimentAnalysis) params.append("sentiment", "true");
3449
- if (options?.entityDetection) params.append("detect_entities", "true");
3450
- if (options?.piiRedaction) params.append("redact", "pii");
3451
- if (options?.customVocabulary && options.customVocabulary.length > 0) {
3452
- params.append("keywords", options.customVocabulary.join(","));
3453
- }
3454
- const wsUrl = `${this.wsBaseUrl}?${params.toString()}`;
4140
+ const wsUrl = this.buildStreamingUrl(options);
3455
4141
  const ws = new WebSocket3(wsUrl, {
3456
4142
  headers: {
3457
4143
  Authorization: `Token ${this.config.apiKey}`
@@ -3466,31 +4152,7 @@ var DeepgramAdapter = class extends BaseAdapter {
3466
4152
  ws.on("message", (data) => {
3467
4153
  try {
3468
4154
  const message = JSON.parse(data.toString());
3469
- if (message.type === "Results") {
3470
- const channel = message.channel.alternatives[0];
3471
- if (channel) {
3472
- const transcript = channel.transcript;
3473
- const isFinal = message.is_final;
3474
- const words = channel.words?.map((word) => ({
3475
- text: word.word,
3476
- start: word.start,
3477
- end: word.end,
3478
- confidence: word.confidence
3479
- }));
3480
- callbacks?.onTranscript?.({
3481
- type: "transcript",
3482
- text: transcript,
3483
- isFinal,
3484
- words,
3485
- confidence: channel.confidence,
3486
- data: message
3487
- });
3488
- }
3489
- } else if (message.type === "UtteranceEnd") {
3490
- callbacks?.onMetadata?.(message);
3491
- } else if (message.type === "Metadata") {
3492
- callbacks?.onMetadata?.(message);
3493
- }
4155
+ this.handleWebSocketMessage(message, callbacks);
3494
4156
  } catch (error) {
3495
4157
  callbacks?.onError?.({
3496
4158
  code: "PARSE_ERROR",
@@ -3563,6 +4225,210 @@ var DeepgramAdapter = class extends BaseAdapter {
3563
4225
  }
3564
4226
  };
3565
4227
  }
4228
+ /**
4229
+ * Build WebSocket URL with all streaming parameters
4230
+ */
4231
+ buildStreamingUrl(options) {
4232
+ const params = new URLSearchParams();
4233
+ const dgOpts = options?.deepgramStreaming || {};
4234
+ if (options?.encoding || dgOpts.encoding) {
4235
+ params.append("encoding", options?.encoding || dgOpts.encoding);
4236
+ }
4237
+ if (options?.sampleRate || dgOpts.sampleRate) {
4238
+ params.append("sample_rate", String(options?.sampleRate || dgOpts.sampleRate));
4239
+ }
4240
+ if (options?.channels || dgOpts.channels) {
4241
+ params.append("channels", String(options?.channels || dgOpts.channels));
4242
+ }
4243
+ if (options?.language || dgOpts.language) {
4244
+ params.append("language", options?.language || dgOpts.language);
4245
+ }
4246
+ if (options?.model || dgOpts.model) {
4247
+ params.append("model", options?.model || dgOpts.model);
4248
+ }
4249
+ if (dgOpts.version) {
4250
+ params.append("version", dgOpts.version);
4251
+ }
4252
+ if (options?.languageDetection || dgOpts.languageDetection) {
4253
+ params.append("detect_language", "true");
4254
+ }
4255
+ if (options?.diarization || dgOpts.diarization) {
4256
+ params.append("diarize", "true");
4257
+ }
4258
+ if (options?.interimResults || dgOpts.interimResults) {
4259
+ params.append("interim_results", "true");
4260
+ }
4261
+ if (dgOpts.punctuate !== void 0) {
4262
+ params.append("punctuate", String(dgOpts.punctuate));
4263
+ }
4264
+ if (dgOpts.smartFormat !== void 0) {
4265
+ params.append("smart_format", String(dgOpts.smartFormat));
4266
+ }
4267
+ if (dgOpts.fillerWords) {
4268
+ params.append("filler_words", "true");
4269
+ }
4270
+ if (dgOpts.numerals) {
4271
+ params.append("numerals", "true");
4272
+ }
4273
+ if (dgOpts.measurements) {
4274
+ params.append("measurements", "true");
4275
+ }
4276
+ if (dgOpts.paragraphs) {
4277
+ params.append("paragraphs", "true");
4278
+ }
4279
+ if (dgOpts.profanityFilter) {
4280
+ params.append("profanity_filter", "true");
4281
+ }
4282
+ if (dgOpts.dictation) {
4283
+ params.append("dictation", "true");
4284
+ }
4285
+ if (dgOpts.utteranceSplit) {
4286
+ params.append("utt_split", String(dgOpts.utteranceSplit));
4287
+ }
4288
+ if (options?.summarization || dgOpts.summarize) {
4289
+ params.append("summarize", "true");
4290
+ }
4291
+ if (options?.sentimentAnalysis || dgOpts.sentiment) {
4292
+ params.append("sentiment", "true");
4293
+ }
4294
+ if (options?.entityDetection || dgOpts.detectEntities) {
4295
+ params.append("detect_entities", "true");
4296
+ }
4297
+ if (dgOpts.topics) {
4298
+ params.append("topics", "true");
4299
+ }
4300
+ if (dgOpts.customTopic && dgOpts.customTopic.length > 0) {
4301
+ dgOpts.customTopic.forEach((topic) => params.append("custom_topic", topic));
4302
+ }
4303
+ if (dgOpts.customTopicMode) {
4304
+ params.append("custom_topic_mode", dgOpts.customTopicMode);
4305
+ }
4306
+ if (dgOpts.intents) {
4307
+ params.append("intents", "true");
4308
+ }
4309
+ if (dgOpts.customIntent && dgOpts.customIntent.length > 0) {
4310
+ dgOpts.customIntent.forEach((intent) => params.append("custom_intent", intent));
4311
+ }
4312
+ if (dgOpts.customIntentMode) {
4313
+ params.append("custom_intent_mode", dgOpts.customIntentMode);
4314
+ }
4315
+ const keywords = options?.customVocabulary || dgOpts.keywords;
4316
+ if (keywords) {
4317
+ const keywordList = Array.isArray(keywords) ? keywords : [keywords];
4318
+ keywordList.forEach((kw) => params.append("keywords", kw));
4319
+ }
4320
+ if (dgOpts.keyterm && dgOpts.keyterm.length > 0) {
4321
+ dgOpts.keyterm.forEach((term) => params.append("keyterm", term));
4322
+ }
4323
+ if (options?.piiRedaction || dgOpts.redact) {
4324
+ if (Array.isArray(dgOpts.redact)) {
4325
+ dgOpts.redact.forEach((r) => params.append("redact", r));
4326
+ } else if (dgOpts.redact === true || options?.piiRedaction) {
4327
+ params.append("redact", "pii");
4328
+ params.append("redact", "pci");
4329
+ }
4330
+ }
4331
+ if (dgOpts.callback) {
4332
+ params.append("callback", dgOpts.callback);
4333
+ }
4334
+ if (dgOpts.tag && dgOpts.tag.length > 0) {
4335
+ dgOpts.tag.forEach((t) => params.append("tag", t));
4336
+ }
4337
+ if (dgOpts.extra) {
4338
+ params.append("extra", JSON.stringify(dgOpts.extra));
4339
+ }
4340
+ if (options?.endpointing !== void 0 || dgOpts.endpointing !== void 0) {
4341
+ const ep = options?.endpointing ?? dgOpts.endpointing;
4342
+ if (ep === false) {
4343
+ params.append("endpointing", "false");
4344
+ } else if (typeof ep === "number") {
4345
+ params.append("endpointing", String(ep));
4346
+ }
4347
+ }
4348
+ if (dgOpts.vadThreshold !== void 0) {
4349
+ params.append("vad_events", "true");
4350
+ }
4351
+ return `${this.wsBaseUrl}?${params.toString()}`;
4352
+ }
4353
+ /**
4354
+ * Handle all WebSocket message types from Deepgram streaming
4355
+ */
4356
+ handleWebSocketMessage(message, callbacks) {
4357
+ switch (message.type) {
4358
+ case "Results": {
4359
+ const channel = message.channel.alternatives[0];
4360
+ if (channel && channel.transcript) {
4361
+ callbacks?.onTranscript?.({
4362
+ type: "transcript",
4363
+ text: channel.transcript,
4364
+ isFinal: message.is_final,
4365
+ confidence: channel.confidence,
4366
+ language: message.channel.detected_language,
4367
+ words: channel.words?.map((w) => ({
4368
+ word: w.punctuated_word || w.word,
4369
+ start: w.start,
4370
+ end: w.end,
4371
+ confidence: w.confidence,
4372
+ speaker: w.speaker?.toString()
4373
+ })),
4374
+ data: message
4375
+ });
4376
+ }
4377
+ if (message.speech_final && channel && channel.transcript) {
4378
+ callbacks?.onUtterance?.({
4379
+ text: channel.transcript,
4380
+ start: message.start,
4381
+ end: message.start + message.duration,
4382
+ confidence: channel.confidence,
4383
+ words: channel.words?.map((w) => ({
4384
+ word: w.punctuated_word || w.word,
4385
+ start: w.start,
4386
+ end: w.end,
4387
+ confidence: w.confidence
4388
+ }))
4389
+ });
4390
+ }
4391
+ break;
4392
+ }
4393
+ case "SpeechStarted": {
4394
+ const event = {
4395
+ type: "speech_start",
4396
+ timestamp: message.timestamp,
4397
+ channel: message.channel[0]
4398
+ };
4399
+ callbacks?.onSpeechStart?.(event);
4400
+ break;
4401
+ }
4402
+ case "UtteranceEnd": {
4403
+ const event = {
4404
+ type: "speech_end",
4405
+ timestamp: message.last_word_end,
4406
+ channel: message.channel[0]
4407
+ };
4408
+ callbacks?.onSpeechEnd?.(event);
4409
+ break;
4410
+ }
4411
+ case "Metadata": {
4412
+ callbacks?.onMetadata?.(message);
4413
+ break;
4414
+ }
4415
+ case "Error": {
4416
+ callbacks?.onError?.({
4417
+ code: message.variant || "DEEPGRAM_ERROR",
4418
+ message: message.message || message.description || "Unknown error",
4419
+ details: message
4420
+ });
4421
+ break;
4422
+ }
4423
+ case "CloseStream": {
4424
+ break;
4425
+ }
4426
+ default: {
4427
+ callbacks?.onMetadata?.(message);
4428
+ break;
4429
+ }
4430
+ }
4431
+ }
3566
4432
  };
3567
4433
  function createDeepgramAdapter(config) {
3568
4434
  const adapter = new DeepgramAdapter();
@@ -3816,12 +4682,12 @@ var AzureSTTAdapter = class extends BaseAdapter {
3816
4682
  const recognizedPhrases = transcriptionData.recognizedPhrases || [];
3817
4683
  const fullText = combinedPhrases.map((phrase) => phrase.display || phrase.lexical).join(" ") || "";
3818
4684
  const words = recognizedPhrases.flatMap(
3819
- (phrase) => (phrase.nBest?.[0]?.words || []).map((word) => ({
3820
- text: word.word,
3821
- start: word.offsetInTicks / 1e7,
4685
+ (phrase) => (phrase.nBest?.[0]?.words || []).map((w) => ({
4686
+ word: w.word,
4687
+ start: w.offsetInTicks / 1e7,
3822
4688
  // Convert ticks to seconds
3823
- end: (word.offsetInTicks + word.durationInTicks) / 1e7,
3824
- confidence: word.confidence,
4689
+ end: (w.offsetInTicks + w.durationInTicks) / 1e7,
4690
+ confidence: w.confidence,
3825
4691
  speaker: phrase.speaker !== void 0 ? phrase.speaker.toString() : void 0
3826
4692
  }))
3827
4693
  );
@@ -4102,10 +4968,10 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
4102
4968
  }
4103
4969
  if ("duration" in response && "language" in response) {
4104
4970
  const verboseResponse = response;
4105
- const words = verboseResponse.words?.map((word) => ({
4106
- text: word.word,
4107
- start: word.start,
4108
- end: word.end,
4971
+ const words = verboseResponse.words?.map((w) => ({
4972
+ word: w.word,
4973
+ start: w.start,
4974
+ end: w.end,
4109
4975
  confidence: void 0
4110
4976
  }));
4111
4977
  const requestId2 = `openai-${Date.now()}`;
@@ -4371,7 +5237,7 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
4371
5237
  normalizeResponse(response) {
4372
5238
  const text = response.results.filter((r) => r.type === "word" && r.alternatives).map((r) => r.alternatives[0]?.content || "").join(" ");
4373
5239
  const words = response.results.filter((r) => r.type === "word" && r.start_time !== void 0 && r.end_time !== void 0).map((result) => ({
4374
- text: result.alternatives?.[0]?.content || "",
5240
+ word: result.alternatives?.[0]?.content || "",
4375
5241
  start: result.start_time,
4376
5242
  end: result.end_time,
4377
5243
  confidence: result.alternatives?.[0]?.confidence,
@@ -4522,12 +5388,12 @@ var GladiaWebhookHandler = class extends BaseWebhookHandler {
4522
5388
  /**
4523
5389
  * Convert Gladia WordDTO to unified Word type
4524
5390
  */
4525
- mapWord(word) {
5391
+ mapWord(w) {
4526
5392
  return {
4527
- text: word.word,
4528
- start: word.start,
4529
- end: word.end,
4530
- confidence: word.confidence
5393
+ word: w.word,
5394
+ start: w.start,
5395
+ end: w.end,
5396
+ confidence: w.confidence
4531
5397
  };
4532
5398
  }
4533
5399
  /**
@@ -4865,11 +5731,11 @@ var DeepgramWebhookHandler = class extends BaseWebhookHandler {
4865
5731
  raw: payload
4866
5732
  };
4867
5733
  }
4868
- const words = alternative.words && alternative.words.length > 0 ? alternative.words.map((word) => ({
4869
- text: word.word || "",
4870
- start: word.start || 0,
4871
- end: word.end || 0,
4872
- confidence: word.confidence
5734
+ const words = alternative.words && alternative.words.length > 0 ? alternative.words.map((w) => ({
5735
+ word: w.word || "",
5736
+ start: w.start || 0,
5737
+ end: w.end || 0,
5738
+ confidence: w.confidence
4873
5739
  })) : void 0;
4874
5740
  const speakers = response.results.utterances && response.results.utterances.length > 0 ? response.results.utterances.map((utterance) => ({
4875
5741
  id: utterance.speaker?.toString() || "unknown",
@@ -4883,11 +5749,11 @@ var DeepgramWebhookHandler = class extends BaseWebhookHandler {
4883
5749
  end: utterance.end || 0,
4884
5750
  speaker: utterance.speaker?.toString(),
4885
5751
  confidence: utterance.confidence,
4886
- words: utterance.words && utterance.words.length > 0 ? utterance.words.map((word) => ({
4887
- text: word.word || "",
4888
- start: word.start || 0,
4889
- end: word.end || 0,
4890
- confidence: word.confidence
5752
+ words: utterance.words && utterance.words.length > 0 ? utterance.words.map((w) => ({
5753
+ word: w.word || "",
5754
+ start: w.start || 0,
5755
+ end: w.end || 0,
5756
+ confidence: w.confidence
4891
5757
  })) : void 0
4892
5758
  })) : void 0;
4893
5759
  const summary = alternative.summaries?.[0]?.summary;
@@ -5398,6 +6264,9 @@ function createWebhookRouter() {
5398
6264
  }
5399
6265
  export {
5400
6266
  AssemblyAIAdapter,
6267
+ AssemblyAIEncoding,
6268
+ AssemblyAISampleRate,
6269
+ AssemblyAISpeechModel,
5401
6270
  schema_exports2 as AssemblyAITypes,
5402
6271
  AssemblyAIWebhookHandler,
5403
6272
  AzureSTTAdapter,
@@ -5405,8 +6274,18 @@ export {
5405
6274
  BaseAdapter,
5406
6275
  BaseWebhookHandler,
5407
6276
  DeepgramAdapter,
6277
+ ListenV1EncodingParameter as DeepgramEncoding,
6278
+ DeepgramModel,
6279
+ ListenV1RedactParameterOneOfItem as DeepgramRedact,
6280
+ SharedCustomTopicModeParameter as DeepgramTopicMode,
5408
6281
  DeepgramWebhookHandler,
5409
6282
  GladiaAdapter,
6283
+ StreamingSupportedBitDepthEnum as GladiaBitDepth,
6284
+ StreamingSupportedEncodingEnum as GladiaEncoding,
6285
+ TranscriptionLanguageCodeEnum as GladiaLanguage,
6286
+ StreamingSupportedModels as GladiaModel,
6287
+ StreamingSupportedSampleRateEnum as GladiaSampleRate,
6288
+ TranslationLanguageCodeEnum as GladiaTranslationLanguage,
5410
6289
  schema_exports as GladiaTypes,
5411
6290
  GladiaWebhookHandler,
5412
6291
  ListenV1EncodingParameter,