voice-router-dev 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +840 -117
- package/dist/index.d.ts +840 -117
- package/dist/index.js +1388 -496
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1375 -496
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -32,6 +32,9 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
32
32
|
var src_exports = {};
|
|
33
33
|
__export(src_exports, {
|
|
34
34
|
AssemblyAIAdapter: () => AssemblyAIAdapter,
|
|
35
|
+
AssemblyAIEncoding: () => AssemblyAIEncoding,
|
|
36
|
+
AssemblyAISampleRate: () => AssemblyAISampleRate,
|
|
37
|
+
AssemblyAISpeechModel: () => AssemblyAISpeechModel,
|
|
35
38
|
AssemblyAITypes: () => schema_exports2,
|
|
36
39
|
AssemblyAIWebhookHandler: () => AssemblyAIWebhookHandler,
|
|
37
40
|
AzureSTTAdapter: () => AzureSTTAdapter,
|
|
@@ -39,8 +42,18 @@ __export(src_exports, {
|
|
|
39
42
|
BaseAdapter: () => BaseAdapter,
|
|
40
43
|
BaseWebhookHandler: () => BaseWebhookHandler,
|
|
41
44
|
DeepgramAdapter: () => DeepgramAdapter,
|
|
45
|
+
DeepgramEncoding: () => ListenV1EncodingParameter,
|
|
46
|
+
DeepgramModel: () => DeepgramModel,
|
|
47
|
+
DeepgramRedact: () => ListenV1RedactParameterOneOfItem,
|
|
48
|
+
DeepgramTopicMode: () => SharedCustomTopicModeParameter,
|
|
42
49
|
DeepgramWebhookHandler: () => DeepgramWebhookHandler,
|
|
43
50
|
GladiaAdapter: () => GladiaAdapter,
|
|
51
|
+
GladiaBitDepth: () => StreamingSupportedBitDepthEnum,
|
|
52
|
+
GladiaEncoding: () => StreamingSupportedEncodingEnum,
|
|
53
|
+
GladiaLanguage: () => TranscriptionLanguageCodeEnum,
|
|
54
|
+
GladiaModel: () => StreamingSupportedModels,
|
|
55
|
+
GladiaSampleRate: () => StreamingSupportedSampleRateEnum,
|
|
56
|
+
GladiaTranslationLanguage: () => TranslationLanguageCodeEnum,
|
|
44
57
|
GladiaTypes: () => schema_exports,
|
|
45
58
|
GladiaWebhookHandler: () => GladiaWebhookHandler,
|
|
46
59
|
ListenV1EncodingParameter: () => ListenV1EncodingParameter,
|
|
@@ -285,6 +298,312 @@ var ListenV1EncodingParameter = {
|
|
|
285
298
|
g729: "g729"
|
|
286
299
|
};
|
|
287
300
|
|
|
301
|
+
// src/generated/deepgram/schema/listenV1RedactParameterOneOfItem.ts
|
|
302
|
+
var ListenV1RedactParameterOneOfItem = {
|
|
303
|
+
pci: "pci",
|
|
304
|
+
pii: "pii",
|
|
305
|
+
numbers: "numbers"
|
|
306
|
+
};
|
|
307
|
+
|
|
308
|
+
// src/generated/deepgram/schema/sharedCustomTopicModeParameter.ts
|
|
309
|
+
var SharedCustomTopicModeParameter = {
|
|
310
|
+
extended: "extended",
|
|
311
|
+
strict: "strict"
|
|
312
|
+
};
|
|
313
|
+
|
|
314
|
+
// src/generated/gladia/schema/streamingSupportedEncodingEnum.ts
|
|
315
|
+
var StreamingSupportedEncodingEnum = {
|
|
316
|
+
"wav/pcm": "wav/pcm",
|
|
317
|
+
"wav/alaw": "wav/alaw",
|
|
318
|
+
"wav/ulaw": "wav/ulaw"
|
|
319
|
+
};
|
|
320
|
+
|
|
321
|
+
// src/generated/gladia/schema/streamingSupportedSampleRateEnum.ts
|
|
322
|
+
var StreamingSupportedSampleRateEnum = {
|
|
323
|
+
NUMBER_8000: 8e3,
|
|
324
|
+
NUMBER_16000: 16e3,
|
|
325
|
+
NUMBER_32000: 32e3,
|
|
326
|
+
NUMBER_44100: 44100,
|
|
327
|
+
NUMBER_48000: 48e3
|
|
328
|
+
};
|
|
329
|
+
|
|
330
|
+
// src/generated/gladia/schema/streamingSupportedBitDepthEnum.ts
|
|
331
|
+
var StreamingSupportedBitDepthEnum = {
|
|
332
|
+
NUMBER_8: 8,
|
|
333
|
+
NUMBER_16: 16,
|
|
334
|
+
NUMBER_24: 24,
|
|
335
|
+
NUMBER_32: 32
|
|
336
|
+
};
|
|
337
|
+
|
|
338
|
+
// src/generated/gladia/schema/streamingSupportedModels.ts
|
|
339
|
+
var StreamingSupportedModels = {
|
|
340
|
+
"solaria-1": "solaria-1"
|
|
341
|
+
};
|
|
342
|
+
|
|
343
|
+
// src/generated/gladia/schema/transcriptionLanguageCodeEnum.ts
|
|
344
|
+
var TranscriptionLanguageCodeEnum = {
|
|
345
|
+
af: "af",
|
|
346
|
+
am: "am",
|
|
347
|
+
ar: "ar",
|
|
348
|
+
as: "as",
|
|
349
|
+
az: "az",
|
|
350
|
+
ba: "ba",
|
|
351
|
+
be: "be",
|
|
352
|
+
bg: "bg",
|
|
353
|
+
bn: "bn",
|
|
354
|
+
bo: "bo",
|
|
355
|
+
br: "br",
|
|
356
|
+
bs: "bs",
|
|
357
|
+
ca: "ca",
|
|
358
|
+
cs: "cs",
|
|
359
|
+
cy: "cy",
|
|
360
|
+
da: "da",
|
|
361
|
+
de: "de",
|
|
362
|
+
el: "el",
|
|
363
|
+
en: "en",
|
|
364
|
+
es: "es",
|
|
365
|
+
et: "et",
|
|
366
|
+
eu: "eu",
|
|
367
|
+
fa: "fa",
|
|
368
|
+
fi: "fi",
|
|
369
|
+
fo: "fo",
|
|
370
|
+
fr: "fr",
|
|
371
|
+
gl: "gl",
|
|
372
|
+
gu: "gu",
|
|
373
|
+
ha: "ha",
|
|
374
|
+
haw: "haw",
|
|
375
|
+
he: "he",
|
|
376
|
+
hi: "hi",
|
|
377
|
+
hr: "hr",
|
|
378
|
+
ht: "ht",
|
|
379
|
+
hu: "hu",
|
|
380
|
+
hy: "hy",
|
|
381
|
+
id: "id",
|
|
382
|
+
is: "is",
|
|
383
|
+
it: "it",
|
|
384
|
+
ja: "ja",
|
|
385
|
+
jw: "jw",
|
|
386
|
+
ka: "ka",
|
|
387
|
+
kk: "kk",
|
|
388
|
+
km: "km",
|
|
389
|
+
kn: "kn",
|
|
390
|
+
ko: "ko",
|
|
391
|
+
la: "la",
|
|
392
|
+
lb: "lb",
|
|
393
|
+
ln: "ln",
|
|
394
|
+
lo: "lo",
|
|
395
|
+
lt: "lt",
|
|
396
|
+
lv: "lv",
|
|
397
|
+
mg: "mg",
|
|
398
|
+
mi: "mi",
|
|
399
|
+
mk: "mk",
|
|
400
|
+
ml: "ml",
|
|
401
|
+
mn: "mn",
|
|
402
|
+
mr: "mr",
|
|
403
|
+
ms: "ms",
|
|
404
|
+
mt: "mt",
|
|
405
|
+
my: "my",
|
|
406
|
+
ne: "ne",
|
|
407
|
+
nl: "nl",
|
|
408
|
+
nn: "nn",
|
|
409
|
+
no: "no",
|
|
410
|
+
oc: "oc",
|
|
411
|
+
pa: "pa",
|
|
412
|
+
pl: "pl",
|
|
413
|
+
ps: "ps",
|
|
414
|
+
pt: "pt",
|
|
415
|
+
ro: "ro",
|
|
416
|
+
ru: "ru",
|
|
417
|
+
sa: "sa",
|
|
418
|
+
sd: "sd",
|
|
419
|
+
si: "si",
|
|
420
|
+
sk: "sk",
|
|
421
|
+
sl: "sl",
|
|
422
|
+
sn: "sn",
|
|
423
|
+
so: "so",
|
|
424
|
+
sq: "sq",
|
|
425
|
+
sr: "sr",
|
|
426
|
+
su: "su",
|
|
427
|
+
sv: "sv",
|
|
428
|
+
sw: "sw",
|
|
429
|
+
ta: "ta",
|
|
430
|
+
te: "te",
|
|
431
|
+
tg: "tg",
|
|
432
|
+
th: "th",
|
|
433
|
+
tk: "tk",
|
|
434
|
+
tl: "tl",
|
|
435
|
+
tr: "tr",
|
|
436
|
+
tt: "tt",
|
|
437
|
+
uk: "uk",
|
|
438
|
+
ur: "ur",
|
|
439
|
+
uz: "uz",
|
|
440
|
+
vi: "vi",
|
|
441
|
+
yi: "yi",
|
|
442
|
+
yo: "yo",
|
|
443
|
+
zh: "zh"
|
|
444
|
+
};
|
|
445
|
+
|
|
446
|
+
// src/generated/gladia/schema/translationLanguageCodeEnum.ts
|
|
447
|
+
var TranslationLanguageCodeEnum = {
|
|
448
|
+
af: "af",
|
|
449
|
+
am: "am",
|
|
450
|
+
ar: "ar",
|
|
451
|
+
as: "as",
|
|
452
|
+
az: "az",
|
|
453
|
+
ba: "ba",
|
|
454
|
+
be: "be",
|
|
455
|
+
bg: "bg",
|
|
456
|
+
bn: "bn",
|
|
457
|
+
bo: "bo",
|
|
458
|
+
br: "br",
|
|
459
|
+
bs: "bs",
|
|
460
|
+
ca: "ca",
|
|
461
|
+
cs: "cs",
|
|
462
|
+
cy: "cy",
|
|
463
|
+
da: "da",
|
|
464
|
+
de: "de",
|
|
465
|
+
el: "el",
|
|
466
|
+
en: "en",
|
|
467
|
+
es: "es",
|
|
468
|
+
et: "et",
|
|
469
|
+
eu: "eu",
|
|
470
|
+
fa: "fa",
|
|
471
|
+
fi: "fi",
|
|
472
|
+
fo: "fo",
|
|
473
|
+
fr: "fr",
|
|
474
|
+
gl: "gl",
|
|
475
|
+
gu: "gu",
|
|
476
|
+
ha: "ha",
|
|
477
|
+
haw: "haw",
|
|
478
|
+
he: "he",
|
|
479
|
+
hi: "hi",
|
|
480
|
+
hr: "hr",
|
|
481
|
+
ht: "ht",
|
|
482
|
+
hu: "hu",
|
|
483
|
+
hy: "hy",
|
|
484
|
+
id: "id",
|
|
485
|
+
is: "is",
|
|
486
|
+
it: "it",
|
|
487
|
+
ja: "ja",
|
|
488
|
+
jw: "jw",
|
|
489
|
+
ka: "ka",
|
|
490
|
+
kk: "kk",
|
|
491
|
+
km: "km",
|
|
492
|
+
kn: "kn",
|
|
493
|
+
ko: "ko",
|
|
494
|
+
la: "la",
|
|
495
|
+
lb: "lb",
|
|
496
|
+
ln: "ln",
|
|
497
|
+
lo: "lo",
|
|
498
|
+
lt: "lt",
|
|
499
|
+
lv: "lv",
|
|
500
|
+
mg: "mg",
|
|
501
|
+
mi: "mi",
|
|
502
|
+
mk: "mk",
|
|
503
|
+
ml: "ml",
|
|
504
|
+
mn: "mn",
|
|
505
|
+
mr: "mr",
|
|
506
|
+
ms: "ms",
|
|
507
|
+
mt: "mt",
|
|
508
|
+
my: "my",
|
|
509
|
+
ne: "ne",
|
|
510
|
+
nl: "nl",
|
|
511
|
+
nn: "nn",
|
|
512
|
+
no: "no",
|
|
513
|
+
oc: "oc",
|
|
514
|
+
pa: "pa",
|
|
515
|
+
pl: "pl",
|
|
516
|
+
ps: "ps",
|
|
517
|
+
pt: "pt",
|
|
518
|
+
ro: "ro",
|
|
519
|
+
ru: "ru",
|
|
520
|
+
sa: "sa",
|
|
521
|
+
sd: "sd",
|
|
522
|
+
si: "si",
|
|
523
|
+
sk: "sk",
|
|
524
|
+
sl: "sl",
|
|
525
|
+
sn: "sn",
|
|
526
|
+
so: "so",
|
|
527
|
+
sq: "sq",
|
|
528
|
+
sr: "sr",
|
|
529
|
+
su: "su",
|
|
530
|
+
sv: "sv",
|
|
531
|
+
sw: "sw",
|
|
532
|
+
ta: "ta",
|
|
533
|
+
te: "te",
|
|
534
|
+
tg: "tg",
|
|
535
|
+
th: "th",
|
|
536
|
+
tk: "tk",
|
|
537
|
+
tl: "tl",
|
|
538
|
+
tr: "tr",
|
|
539
|
+
tt: "tt",
|
|
540
|
+
uk: "uk",
|
|
541
|
+
ur: "ur",
|
|
542
|
+
uz: "uz",
|
|
543
|
+
vi: "vi",
|
|
544
|
+
wo: "wo",
|
|
545
|
+
yi: "yi",
|
|
546
|
+
yo: "yo",
|
|
547
|
+
zh: "zh"
|
|
548
|
+
};
|
|
549
|
+
|
|
550
|
+
// src/router/streaming-enums.ts
|
|
551
|
+
var DeepgramModel = {
|
|
552
|
+
// Nova 3 models (latest)
|
|
553
|
+
"nova-3": "nova-3",
|
|
554
|
+
"nova-3-general": "nova-3-general",
|
|
555
|
+
"nova-3-medical": "nova-3-medical",
|
|
556
|
+
// Nova 2 models
|
|
557
|
+
"nova-2": "nova-2",
|
|
558
|
+
"nova-2-general": "nova-2-general",
|
|
559
|
+
"nova-2-meeting": "nova-2-meeting",
|
|
560
|
+
"nova-2-finance": "nova-2-finance",
|
|
561
|
+
"nova-2-conversationalai": "nova-2-conversationalai",
|
|
562
|
+
"nova-2-voicemail": "nova-2-voicemail",
|
|
563
|
+
"nova-2-video": "nova-2-video",
|
|
564
|
+
"nova-2-medical": "nova-2-medical",
|
|
565
|
+
"nova-2-drivethru": "nova-2-drivethru",
|
|
566
|
+
"nova-2-automotive": "nova-2-automotive",
|
|
567
|
+
// Nova 1 models
|
|
568
|
+
nova: "nova",
|
|
569
|
+
"nova-general": "nova-general",
|
|
570
|
+
"nova-phonecall": "nova-phonecall",
|
|
571
|
+
"nova-medical": "nova-medical",
|
|
572
|
+
// Enhanced models
|
|
573
|
+
enhanced: "enhanced",
|
|
574
|
+
"enhanced-general": "enhanced-general",
|
|
575
|
+
"enhanced-meeting": "enhanced-meeting",
|
|
576
|
+
"enhanced-phonecall": "enhanced-phonecall",
|
|
577
|
+
"enhanced-finance": "enhanced-finance",
|
|
578
|
+
// Base models
|
|
579
|
+
base: "base",
|
|
580
|
+
meeting: "meeting",
|
|
581
|
+
phonecall: "phonecall",
|
|
582
|
+
finance: "finance",
|
|
583
|
+
conversationalai: "conversationalai",
|
|
584
|
+
voicemail: "voicemail",
|
|
585
|
+
video: "video"
|
|
586
|
+
};
|
|
587
|
+
var AssemblyAIEncoding = {
|
|
588
|
+
/** PCM signed 16-bit little-endian (recommended) */
|
|
589
|
+
pcmS16le: "pcm_s16le",
|
|
590
|
+
/** μ-law (telephony) */
|
|
591
|
+
pcmMulaw: "pcm_mulaw"
|
|
592
|
+
};
|
|
593
|
+
var AssemblyAISpeechModel = {
|
|
594
|
+
/** Optimized for English */
|
|
595
|
+
english: "universal-streaming-english",
|
|
596
|
+
/** Supports 20+ languages */
|
|
597
|
+
multilingual: "universal-streaming-multilingual"
|
|
598
|
+
};
|
|
599
|
+
var AssemblyAISampleRate = {
|
|
600
|
+
rate8000: 8e3,
|
|
601
|
+
rate16000: 16e3,
|
|
602
|
+
rate22050: 22050,
|
|
603
|
+
rate44100: 44100,
|
|
604
|
+
rate48000: 48e3
|
|
605
|
+
};
|
|
606
|
+
|
|
288
607
|
// src/generated/deepgram/schema/speakV1EncodingParameter.ts
|
|
289
608
|
var SpeakV1EncodingParameter = {
|
|
290
609
|
linear16: "linear16",
|
|
@@ -314,30 +633,6 @@ var SpeakV1SampleRateParameter = {
|
|
|
314
633
|
NUMBER_22050: 22050
|
|
315
634
|
};
|
|
316
635
|
|
|
317
|
-
// src/generated/gladia/schema/streamingSupportedEncodingEnum.ts
|
|
318
|
-
var StreamingSupportedEncodingEnum = {
|
|
319
|
-
"wav/pcm": "wav/pcm",
|
|
320
|
-
"wav/alaw": "wav/alaw",
|
|
321
|
-
"wav/ulaw": "wav/ulaw"
|
|
322
|
-
};
|
|
323
|
-
|
|
324
|
-
// src/generated/gladia/schema/streamingSupportedSampleRateEnum.ts
|
|
325
|
-
var StreamingSupportedSampleRateEnum = {
|
|
326
|
-
NUMBER_8000: 8e3,
|
|
327
|
-
NUMBER_16000: 16e3,
|
|
328
|
-
NUMBER_32000: 32e3,
|
|
329
|
-
NUMBER_44100: 44100,
|
|
330
|
-
NUMBER_48000: 48e3
|
|
331
|
-
};
|
|
332
|
-
|
|
333
|
-
// src/generated/gladia/schema/streamingSupportedBitDepthEnum.ts
|
|
334
|
-
var StreamingSupportedBitDepthEnum = {
|
|
335
|
-
NUMBER_8: 8,
|
|
336
|
-
NUMBER_16: 16,
|
|
337
|
-
NUMBER_24: 24,
|
|
338
|
-
NUMBER_32: 32
|
|
339
|
-
};
|
|
340
|
-
|
|
341
636
|
// src/constants/defaults.ts
|
|
342
637
|
var DEFAULT_TIMEOUTS = {
|
|
343
638
|
/** Standard HTTP request timeout for API calls (60 seconds) */
|
|
@@ -1225,11 +1520,6 @@ var StreamingResponseStatus = {
|
|
|
1225
1520
|
error: "error"
|
|
1226
1521
|
};
|
|
1227
1522
|
|
|
1228
|
-
// src/generated/gladia/schema/streamingSupportedModels.ts
|
|
1229
|
-
var StreamingSupportedModels = {
|
|
1230
|
-
"solaria-1": "solaria-1"
|
|
1231
|
-
};
|
|
1232
|
-
|
|
1233
1523
|
// src/generated/gladia/schema/streamingSupportedRegions.ts
|
|
1234
1524
|
var StreamingSupportedRegions = {
|
|
1235
1525
|
"us-west": "us-west",
|
|
@@ -1255,232 +1545,25 @@ var SummaryTypesEnum = {
|
|
|
1255
1545
|
concise: "concise"
|
|
1256
1546
|
};
|
|
1257
1547
|
|
|
1258
|
-
// src/generated/gladia/schema/transcriptionControllerListV2KindItem.ts
|
|
1259
|
-
var TranscriptionControllerListV2KindItem = {
|
|
1260
|
-
"pre-recorded": "pre-recorded",
|
|
1261
|
-
live: "live"
|
|
1262
|
-
};
|
|
1263
|
-
|
|
1264
|
-
// src/generated/gladia/schema/transcriptionControllerListV2StatusItem.ts
|
|
1265
|
-
var TranscriptionControllerListV2StatusItem = {
|
|
1266
|
-
queued: "queued",
|
|
1267
|
-
processing: "processing",
|
|
1268
|
-
done: "done",
|
|
1269
|
-
error: "error"
|
|
1270
|
-
};
|
|
1271
|
-
|
|
1272
|
-
// src/generated/gladia/schema/transcriptionLanguageCodeEnum.ts
|
|
1273
|
-
var TranscriptionLanguageCodeEnum = {
|
|
1274
|
-
af: "af",
|
|
1275
|
-
am: "am",
|
|
1276
|
-
ar: "ar",
|
|
1277
|
-
as: "as",
|
|
1278
|
-
az: "az",
|
|
1279
|
-
ba: "ba",
|
|
1280
|
-
be: "be",
|
|
1281
|
-
bg: "bg",
|
|
1282
|
-
bn: "bn",
|
|
1283
|
-
bo: "bo",
|
|
1284
|
-
br: "br",
|
|
1285
|
-
bs: "bs",
|
|
1286
|
-
ca: "ca",
|
|
1287
|
-
cs: "cs",
|
|
1288
|
-
cy: "cy",
|
|
1289
|
-
da: "da",
|
|
1290
|
-
de: "de",
|
|
1291
|
-
el: "el",
|
|
1292
|
-
en: "en",
|
|
1293
|
-
es: "es",
|
|
1294
|
-
et: "et",
|
|
1295
|
-
eu: "eu",
|
|
1296
|
-
fa: "fa",
|
|
1297
|
-
fi: "fi",
|
|
1298
|
-
fo: "fo",
|
|
1299
|
-
fr: "fr",
|
|
1300
|
-
gl: "gl",
|
|
1301
|
-
gu: "gu",
|
|
1302
|
-
ha: "ha",
|
|
1303
|
-
haw: "haw",
|
|
1304
|
-
he: "he",
|
|
1305
|
-
hi: "hi",
|
|
1306
|
-
hr: "hr",
|
|
1307
|
-
ht: "ht",
|
|
1308
|
-
hu: "hu",
|
|
1309
|
-
hy: "hy",
|
|
1310
|
-
id: "id",
|
|
1311
|
-
is: "is",
|
|
1312
|
-
it: "it",
|
|
1313
|
-
ja: "ja",
|
|
1314
|
-
jw: "jw",
|
|
1315
|
-
ka: "ka",
|
|
1316
|
-
kk: "kk",
|
|
1317
|
-
km: "km",
|
|
1318
|
-
kn: "kn",
|
|
1319
|
-
ko: "ko",
|
|
1320
|
-
la: "la",
|
|
1321
|
-
lb: "lb",
|
|
1322
|
-
ln: "ln",
|
|
1323
|
-
lo: "lo",
|
|
1324
|
-
lt: "lt",
|
|
1325
|
-
lv: "lv",
|
|
1326
|
-
mg: "mg",
|
|
1327
|
-
mi: "mi",
|
|
1328
|
-
mk: "mk",
|
|
1329
|
-
ml: "ml",
|
|
1330
|
-
mn: "mn",
|
|
1331
|
-
mr: "mr",
|
|
1332
|
-
ms: "ms",
|
|
1333
|
-
mt: "mt",
|
|
1334
|
-
my: "my",
|
|
1335
|
-
ne: "ne",
|
|
1336
|
-
nl: "nl",
|
|
1337
|
-
nn: "nn",
|
|
1338
|
-
no: "no",
|
|
1339
|
-
oc: "oc",
|
|
1340
|
-
pa: "pa",
|
|
1341
|
-
pl: "pl",
|
|
1342
|
-
ps: "ps",
|
|
1343
|
-
pt: "pt",
|
|
1344
|
-
ro: "ro",
|
|
1345
|
-
ru: "ru",
|
|
1346
|
-
sa: "sa",
|
|
1347
|
-
sd: "sd",
|
|
1348
|
-
si: "si",
|
|
1349
|
-
sk: "sk",
|
|
1350
|
-
sl: "sl",
|
|
1351
|
-
sn: "sn",
|
|
1352
|
-
so: "so",
|
|
1353
|
-
sq: "sq",
|
|
1354
|
-
sr: "sr",
|
|
1355
|
-
su: "su",
|
|
1356
|
-
sv: "sv",
|
|
1357
|
-
sw: "sw",
|
|
1358
|
-
ta: "ta",
|
|
1359
|
-
te: "te",
|
|
1360
|
-
tg: "tg",
|
|
1361
|
-
th: "th",
|
|
1362
|
-
tk: "tk",
|
|
1363
|
-
tl: "tl",
|
|
1364
|
-
tr: "tr",
|
|
1365
|
-
tt: "tt",
|
|
1366
|
-
uk: "uk",
|
|
1367
|
-
ur: "ur",
|
|
1368
|
-
uz: "uz",
|
|
1369
|
-
vi: "vi",
|
|
1370
|
-
yi: "yi",
|
|
1371
|
-
yo: "yo",
|
|
1372
|
-
zh: "zh"
|
|
1373
|
-
};
|
|
1374
|
-
|
|
1548
|
+
// src/generated/gladia/schema/transcriptionControllerListV2KindItem.ts
|
|
1549
|
+
var TranscriptionControllerListV2KindItem = {
|
|
1550
|
+
"pre-recorded": "pre-recorded",
|
|
1551
|
+
live: "live"
|
|
1552
|
+
};
|
|
1553
|
+
|
|
1554
|
+
// src/generated/gladia/schema/transcriptionControllerListV2StatusItem.ts
|
|
1555
|
+
var TranscriptionControllerListV2StatusItem = {
|
|
1556
|
+
queued: "queued",
|
|
1557
|
+
processing: "processing",
|
|
1558
|
+
done: "done",
|
|
1559
|
+
error: "error"
|
|
1560
|
+
};
|
|
1561
|
+
|
|
1375
1562
|
// src/generated/gladia/schema/transcriptMessageType.ts
|
|
1376
1563
|
var TranscriptMessageType = {
|
|
1377
1564
|
transcript: "transcript"
|
|
1378
1565
|
};
|
|
1379
1566
|
|
|
1380
|
-
// src/generated/gladia/schema/translationLanguageCodeEnum.ts
|
|
1381
|
-
var TranslationLanguageCodeEnum = {
|
|
1382
|
-
af: "af",
|
|
1383
|
-
am: "am",
|
|
1384
|
-
ar: "ar",
|
|
1385
|
-
as: "as",
|
|
1386
|
-
az: "az",
|
|
1387
|
-
ba: "ba",
|
|
1388
|
-
be: "be",
|
|
1389
|
-
bg: "bg",
|
|
1390
|
-
bn: "bn",
|
|
1391
|
-
bo: "bo",
|
|
1392
|
-
br: "br",
|
|
1393
|
-
bs: "bs",
|
|
1394
|
-
ca: "ca",
|
|
1395
|
-
cs: "cs",
|
|
1396
|
-
cy: "cy",
|
|
1397
|
-
da: "da",
|
|
1398
|
-
de: "de",
|
|
1399
|
-
el: "el",
|
|
1400
|
-
en: "en",
|
|
1401
|
-
es: "es",
|
|
1402
|
-
et: "et",
|
|
1403
|
-
eu: "eu",
|
|
1404
|
-
fa: "fa",
|
|
1405
|
-
fi: "fi",
|
|
1406
|
-
fo: "fo",
|
|
1407
|
-
fr: "fr",
|
|
1408
|
-
gl: "gl",
|
|
1409
|
-
gu: "gu",
|
|
1410
|
-
ha: "ha",
|
|
1411
|
-
haw: "haw",
|
|
1412
|
-
he: "he",
|
|
1413
|
-
hi: "hi",
|
|
1414
|
-
hr: "hr",
|
|
1415
|
-
ht: "ht",
|
|
1416
|
-
hu: "hu",
|
|
1417
|
-
hy: "hy",
|
|
1418
|
-
id: "id",
|
|
1419
|
-
is: "is",
|
|
1420
|
-
it: "it",
|
|
1421
|
-
ja: "ja",
|
|
1422
|
-
jw: "jw",
|
|
1423
|
-
ka: "ka",
|
|
1424
|
-
kk: "kk",
|
|
1425
|
-
km: "km",
|
|
1426
|
-
kn: "kn",
|
|
1427
|
-
ko: "ko",
|
|
1428
|
-
la: "la",
|
|
1429
|
-
lb: "lb",
|
|
1430
|
-
ln: "ln",
|
|
1431
|
-
lo: "lo",
|
|
1432
|
-
lt: "lt",
|
|
1433
|
-
lv: "lv",
|
|
1434
|
-
mg: "mg",
|
|
1435
|
-
mi: "mi",
|
|
1436
|
-
mk: "mk",
|
|
1437
|
-
ml: "ml",
|
|
1438
|
-
mn: "mn",
|
|
1439
|
-
mr: "mr",
|
|
1440
|
-
ms: "ms",
|
|
1441
|
-
mt: "mt",
|
|
1442
|
-
my: "my",
|
|
1443
|
-
ne: "ne",
|
|
1444
|
-
nl: "nl",
|
|
1445
|
-
nn: "nn",
|
|
1446
|
-
no: "no",
|
|
1447
|
-
oc: "oc",
|
|
1448
|
-
pa: "pa",
|
|
1449
|
-
pl: "pl",
|
|
1450
|
-
ps: "ps",
|
|
1451
|
-
pt: "pt",
|
|
1452
|
-
ro: "ro",
|
|
1453
|
-
ru: "ru",
|
|
1454
|
-
sa: "sa",
|
|
1455
|
-
sd: "sd",
|
|
1456
|
-
si: "si",
|
|
1457
|
-
sk: "sk",
|
|
1458
|
-
sl: "sl",
|
|
1459
|
-
sn: "sn",
|
|
1460
|
-
so: "so",
|
|
1461
|
-
sq: "sq",
|
|
1462
|
-
sr: "sr",
|
|
1463
|
-
su: "su",
|
|
1464
|
-
sv: "sv",
|
|
1465
|
-
sw: "sw",
|
|
1466
|
-
ta: "ta",
|
|
1467
|
-
te: "te",
|
|
1468
|
-
tg: "tg",
|
|
1469
|
-
th: "th",
|
|
1470
|
-
tk: "tk",
|
|
1471
|
-
tl: "tl",
|
|
1472
|
-
tr: "tr",
|
|
1473
|
-
tt: "tt",
|
|
1474
|
-
uk: "uk",
|
|
1475
|
-
ur: "ur",
|
|
1476
|
-
uz: "uz",
|
|
1477
|
-
vi: "vi",
|
|
1478
|
-
wo: "wo",
|
|
1479
|
-
yi: "yi",
|
|
1480
|
-
yo: "yo",
|
|
1481
|
-
zh: "zh"
|
|
1482
|
-
};
|
|
1483
|
-
|
|
1484
1567
|
// src/generated/gladia/schema/translationMessageType.ts
|
|
1485
1568
|
var TranslationMessageType = {
|
|
1486
1569
|
translation: "translation"
|
|
@@ -2048,7 +2131,7 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
2048
2131
|
}))
|
|
2049
2132
|
);
|
|
2050
2133
|
return extractWords(allWords, (item) => ({
|
|
2051
|
-
|
|
2134
|
+
word: item.word.word,
|
|
2052
2135
|
start: item.word.start,
|
|
2053
2136
|
end: item.word.end,
|
|
2054
2137
|
confidence: item.word.confidence,
|
|
@@ -2068,11 +2151,11 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
2068
2151
|
end: utterance.end,
|
|
2069
2152
|
speaker: utterance.speaker?.toString(),
|
|
2070
2153
|
confidence: utterance.confidence,
|
|
2071
|
-
words: utterance.words.map((
|
|
2072
|
-
|
|
2073
|
-
start:
|
|
2074
|
-
end:
|
|
2075
|
-
confidence:
|
|
2154
|
+
words: utterance.words.map((w) => ({
|
|
2155
|
+
word: w.word,
|
|
2156
|
+
start: w.start,
|
|
2157
|
+
end: w.end,
|
|
2158
|
+
confidence: w.confidence
|
|
2076
2159
|
}))
|
|
2077
2160
|
}));
|
|
2078
2161
|
}
|
|
@@ -2124,11 +2207,46 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
2124
2207
|
* Creates a WebSocket connection to Gladia for streaming transcription.
|
|
2125
2208
|
* First initializes a session via REST API, then connects to WebSocket.
|
|
2126
2209
|
*
|
|
2210
|
+
* Supports all Gladia streaming features:
|
|
2211
|
+
* - Real-time transcription with interim/final results
|
|
2212
|
+
* - Speech detection events (speech_start, speech_end)
|
|
2213
|
+
* - Real-time translation to other languages
|
|
2214
|
+
* - Real-time sentiment analysis
|
|
2215
|
+
* - Real-time named entity recognition
|
|
2216
|
+
* - Post-processing summarization and chapterization
|
|
2217
|
+
* - Audio preprocessing (audio enhancement, speech threshold)
|
|
2218
|
+
* - Custom vocabulary and spelling
|
|
2219
|
+
* - Multi-language code switching
|
|
2220
|
+
*
|
|
2127
2221
|
* @param options - Streaming configuration options
|
|
2222
|
+
* @param options.encoding - Audio encoding (wav/pcm, wav/alaw, wav/ulaw)
|
|
2223
|
+
* @param options.sampleRate - Sample rate (8000, 16000, 32000, 44100, 48000)
|
|
2224
|
+
* @param options.bitDepth - Bit depth (8, 16, 24, 32)
|
|
2225
|
+
* @param options.channels - Number of channels (1-8)
|
|
2226
|
+
* @param options.language - Language code for transcription
|
|
2227
|
+
* @param options.interimResults - Enable partial/interim transcripts
|
|
2228
|
+
* @param options.endpointing - Silence duration to end utterance (0.01-10 seconds)
|
|
2229
|
+
* @param options.maxSilence - Max duration without endpointing (5-60 seconds)
|
|
2230
|
+
* @param options.customVocabulary - Words to boost in recognition
|
|
2231
|
+
* @param options.sentimentAnalysis - Enable real-time sentiment analysis
|
|
2232
|
+
* @param options.entityDetection - Enable named entity recognition
|
|
2233
|
+
* @param options.summarization - Enable post-processing summarization
|
|
2234
|
+
* @param options.gladiaStreaming - Full Gladia streaming options (pre_processing, realtime_processing, post_processing, messages_config)
|
|
2128
2235
|
* @param callbacks - Event callbacks for transcription results
|
|
2236
|
+
* @param callbacks.onTranscript - Interim/final transcript received
|
|
2237
|
+
* @param callbacks.onUtterance - Complete utterance detected
|
|
2238
|
+
* @param callbacks.onSpeechStart - Speech detected (requires messages_config.receive_speech_events)
|
|
2239
|
+
* @param callbacks.onSpeechEnd - Speech ended (requires messages_config.receive_speech_events)
|
|
2240
|
+
* @param callbacks.onTranslation - Translation result (requires translation enabled)
|
|
2241
|
+
* @param callbacks.onSentiment - Sentiment analysis result
|
|
2242
|
+
* @param callbacks.onEntity - Named entity detected
|
|
2243
|
+
* @param callbacks.onSummarization - Summarization completed
|
|
2244
|
+
* @param callbacks.onChapterization - Chapterization completed
|
|
2245
|
+
* @param callbacks.onAudioAck - Audio chunk acknowledged
|
|
2246
|
+
* @param callbacks.onLifecycle - Session lifecycle events
|
|
2129
2247
|
* @returns Promise that resolves with a StreamingSession
|
|
2130
2248
|
*
|
|
2131
|
-
* @example
|
|
2249
|
+
* @example Basic real-time streaming
|
|
2132
2250
|
* ```typescript
|
|
2133
2251
|
* const session = await adapter.transcribeStream({
|
|
2134
2252
|
* encoding: 'wav/pcm',
|
|
@@ -2150,15 +2268,124 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
2150
2268
|
* });
|
|
2151
2269
|
*
|
|
2152
2270
|
* // Send audio chunks
|
|
2153
|
-
* const audioChunk = getAudioChunk();
|
|
2271
|
+
* const audioChunk = getAudioChunk();
|
|
2154
2272
|
* await session.sendAudio({ data: audioChunk });
|
|
2155
2273
|
*
|
|
2156
2274
|
* // Close when done
|
|
2157
2275
|
* await session.close();
|
|
2158
2276
|
* ```
|
|
2277
|
+
*
|
|
2278
|
+
* @example Advanced streaming with all features
|
|
2279
|
+
* ```typescript
|
|
2280
|
+
* const session = await adapter.transcribeStream({
|
|
2281
|
+
* encoding: 'wav/pcm',
|
|
2282
|
+
* sampleRate: 16000,
|
|
2283
|
+
* language: 'en',
|
|
2284
|
+
* sentimentAnalysis: true,
|
|
2285
|
+
* entityDetection: true,
|
|
2286
|
+
* summarization: true,
|
|
2287
|
+
* gladiaStreaming: {
|
|
2288
|
+
* pre_processing: {
|
|
2289
|
+
* audio_enhancer: true,
|
|
2290
|
+
* speech_threshold: 0.5
|
|
2291
|
+
* },
|
|
2292
|
+
* realtime_processing: {
|
|
2293
|
+
* translation: true,
|
|
2294
|
+
* translation_config: { target_languages: ['fr', 'es'] }
|
|
2295
|
+
* },
|
|
2296
|
+
* post_processing: {
|
|
2297
|
+
* chapterization: true
|
|
2298
|
+
* },
|
|
2299
|
+
* messages_config: {
|
|
2300
|
+
* receive_speech_events: true,
|
|
2301
|
+
* receive_acknowledgments: true,
|
|
2302
|
+
* receive_lifecycle_events: true
|
|
2303
|
+
* }
|
|
2304
|
+
* }
|
|
2305
|
+
* }, {
|
|
2306
|
+
* onTranscript: (e) => console.log('Transcript:', e.text),
|
|
2307
|
+
* onSpeechStart: (e) => console.log('Speech started at:', e.timestamp),
|
|
2308
|
+
* onSpeechEnd: (e) => console.log('Speech ended at:', e.timestamp),
|
|
2309
|
+
* onTranslation: (e) => console.log(`${e.targetLanguage}: ${e.translatedText}`),
|
|
2310
|
+
* onSentiment: (e) => console.log('Sentiment:', e.sentiment),
|
|
2311
|
+
* onEntity: (e) => console.log(`Entity: ${e.type} - ${e.text}`),
|
|
2312
|
+
* onSummarization: (e) => console.log('Summary:', e.summary),
|
|
2313
|
+
* onChapterization: (e) => console.log('Chapters:', e.chapters),
|
|
2314
|
+
* onAudioAck: (e) => console.log('Audio ack:', e.byteRange),
|
|
2315
|
+
* onLifecycle: (e) => console.log('Lifecycle:', e.eventType)
|
|
2316
|
+
* });
|
|
2317
|
+
* ```
|
|
2159
2318
|
*/
|
|
2160
2319
|
async transcribeStream(options, callbacks) {
|
|
2161
2320
|
this.validateConfig();
|
|
2321
|
+
const streamingRequest = this.buildStreamingRequest(options);
|
|
2322
|
+
const initResponse = await streamingControllerInitStreamingSessionV2(
|
|
2323
|
+
streamingRequest,
|
|
2324
|
+
void 0,
|
|
2325
|
+
// no params
|
|
2326
|
+
this.getAxiosConfig()
|
|
2327
|
+
);
|
|
2328
|
+
const { id, url: wsUrl } = initResponse.data;
|
|
2329
|
+
const ws = new import_ws.default(wsUrl);
|
|
2330
|
+
let sessionStatus = "connecting";
|
|
2331
|
+
setupWebSocketHandlers(ws, callbacks, (status) => {
|
|
2332
|
+
sessionStatus = status;
|
|
2333
|
+
});
|
|
2334
|
+
ws.on("message", (data) => {
|
|
2335
|
+
try {
|
|
2336
|
+
const message = JSON.parse(data.toString());
|
|
2337
|
+
this.handleWebSocketMessage(message, callbacks);
|
|
2338
|
+
} catch (error) {
|
|
2339
|
+
callbacks?.onError?.({
|
|
2340
|
+
code: ERROR_CODES.PARSE_ERROR,
|
|
2341
|
+
message: "Failed to parse WebSocket message",
|
|
2342
|
+
details: error
|
|
2343
|
+
});
|
|
2344
|
+
}
|
|
2345
|
+
});
|
|
2346
|
+
await waitForWebSocketOpen(ws);
|
|
2347
|
+
return {
|
|
2348
|
+
id,
|
|
2349
|
+
provider: this.name,
|
|
2350
|
+
createdAt: /* @__PURE__ */ new Date(),
|
|
2351
|
+
getStatus: () => sessionStatus,
|
|
2352
|
+
sendAudio: async (chunk) => {
|
|
2353
|
+
validateSessionForAudio(sessionStatus, ws.readyState, import_ws.default.OPEN);
|
|
2354
|
+
ws.send(chunk.data);
|
|
2355
|
+
if (chunk.isLast) {
|
|
2356
|
+
ws.send(
|
|
2357
|
+
JSON.stringify({
|
|
2358
|
+
type: "stop_recording"
|
|
2359
|
+
})
|
|
2360
|
+
);
|
|
2361
|
+
}
|
|
2362
|
+
},
|
|
2363
|
+
close: async () => {
|
|
2364
|
+
if (sessionStatus === "closed" || sessionStatus === "closing") {
|
|
2365
|
+
return;
|
|
2366
|
+
}
|
|
2367
|
+
sessionStatus = "closing";
|
|
2368
|
+
if (ws.readyState === import_ws.default.OPEN) {
|
|
2369
|
+
ws.send(
|
|
2370
|
+
JSON.stringify({
|
|
2371
|
+
type: "stop_recording"
|
|
2372
|
+
})
|
|
2373
|
+
);
|
|
2374
|
+
}
|
|
2375
|
+
await closeWebSocket(ws);
|
|
2376
|
+
sessionStatus = "closed";
|
|
2377
|
+
}
|
|
2378
|
+
};
|
|
2379
|
+
}
|
|
2380
|
+
/**
|
|
2381
|
+
* Build streaming request with full type safety from OpenAPI specs
|
|
2382
|
+
*
|
|
2383
|
+
* Maps normalized options to Gladia streaming request format,
|
|
2384
|
+
* including all advanced features like pre-processing, real-time
|
|
2385
|
+
* processing, post-processing, and message configuration.
|
|
2386
|
+
*/
|
|
2387
|
+
buildStreamingRequest(options) {
|
|
2388
|
+
const gladiaOpts = options?.gladiaStreaming || {};
|
|
2162
2389
|
let validatedSampleRate;
|
|
2163
2390
|
if (options?.sampleRate) {
|
|
2164
2391
|
validatedSampleRate = validateEnumValue(
|
|
@@ -2168,112 +2395,376 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
2168
2395
|
"Gladia"
|
|
2169
2396
|
);
|
|
2170
2397
|
}
|
|
2398
|
+
let validatedBitDepth;
|
|
2399
|
+
if (options?.bitDepth) {
|
|
2400
|
+
validatedBitDepth = validateEnumValue(
|
|
2401
|
+
options.bitDepth,
|
|
2402
|
+
StreamingSupportedBitDepthEnum,
|
|
2403
|
+
"bit depth",
|
|
2404
|
+
"Gladia"
|
|
2405
|
+
);
|
|
2406
|
+
}
|
|
2171
2407
|
const streamingRequest = {
|
|
2408
|
+
// Spread any direct Gladia streaming options first
|
|
2409
|
+
...gladiaOpts,
|
|
2410
|
+
// Audio format configuration (these are excluded from gladiaStreaming to avoid conflicts)
|
|
2172
2411
|
encoding: options?.encoding ? mapEncodingToProvider(options.encoding, "gladia") : void 0,
|
|
2173
2412
|
sample_rate: validatedSampleRate,
|
|
2413
|
+
bit_depth: validatedBitDepth,
|
|
2174
2414
|
channels: options?.channels,
|
|
2175
|
-
|
|
2176
|
-
model: options?.model
|
|
2415
|
+
// Model and processing
|
|
2416
|
+
model: options?.model ?? gladiaOpts.model,
|
|
2417
|
+
endpointing: options?.endpointing ?? gladiaOpts.endpointing,
|
|
2418
|
+
maximum_duration_without_endpointing: options?.maxSilence ?? gladiaOpts.maximum_duration_without_endpointing
|
|
2177
2419
|
};
|
|
2178
|
-
if (options?.language) {
|
|
2420
|
+
if (options?.language || options?.codeSwitching || gladiaOpts.language_config) {
|
|
2179
2421
|
streamingRequest.language_config = {
|
|
2180
|
-
|
|
2422
|
+
...gladiaOpts.language_config,
|
|
2423
|
+
languages: options?.language ? [options.language] : gladiaOpts.language_config?.languages,
|
|
2424
|
+
code_switching: options?.codeSwitching ?? gladiaOpts.language_config?.code_switching
|
|
2181
2425
|
};
|
|
2182
2426
|
}
|
|
2183
|
-
|
|
2184
|
-
streamingRequest
|
|
2185
|
-
|
|
2186
|
-
|
|
2187
|
-
|
|
2188
|
-
)
|
|
2189
|
-
|
|
2190
|
-
|
|
2191
|
-
|
|
2192
|
-
|
|
2193
|
-
|
|
2194
|
-
|
|
2195
|
-
|
|
2196
|
-
|
|
2197
|
-
|
|
2198
|
-
|
|
2199
|
-
|
|
2200
|
-
|
|
2201
|
-
|
|
2202
|
-
|
|
2203
|
-
|
|
2204
|
-
|
|
2205
|
-
|
|
2206
|
-
|
|
2207
|
-
|
|
2208
|
-
|
|
2209
|
-
|
|
2210
|
-
|
|
2211
|
-
|
|
2212
|
-
|
|
2213
|
-
|
|
2427
|
+
if (gladiaOpts.pre_processing) {
|
|
2428
|
+
streamingRequest.pre_processing = gladiaOpts.pre_processing;
|
|
2429
|
+
}
|
|
2430
|
+
const realtimeProcessing = gladiaOpts.realtime_processing || {};
|
|
2431
|
+
const hasRealtimeOptions = options?.customVocabulary || options?.sentimentAnalysis || options?.entityDetection || realtimeProcessing.translation || realtimeProcessing.custom_vocabulary || realtimeProcessing.custom_spelling || realtimeProcessing.named_entity_recognition || realtimeProcessing.sentiment_analysis;
|
|
2432
|
+
if (hasRealtimeOptions) {
|
|
2433
|
+
streamingRequest.realtime_processing = {
|
|
2434
|
+
...realtimeProcessing,
|
|
2435
|
+
// Custom vocabulary
|
|
2436
|
+
custom_vocabulary: options?.customVocabulary && options.customVocabulary.length > 0 || realtimeProcessing.custom_vocabulary,
|
|
2437
|
+
custom_vocabulary_config: options?.customVocabulary && options.customVocabulary.length > 0 ? {
|
|
2438
|
+
...realtimeProcessing.custom_vocabulary_config,
|
|
2439
|
+
vocabulary: options.customVocabulary
|
|
2440
|
+
} : realtimeProcessing.custom_vocabulary_config,
|
|
2441
|
+
// Sentiment analysis
|
|
2442
|
+
sentiment_analysis: options?.sentimentAnalysis ?? realtimeProcessing.sentiment_analysis,
|
|
2443
|
+
// Named entity recognition
|
|
2444
|
+
named_entity_recognition: options?.entityDetection ?? realtimeProcessing.named_entity_recognition
|
|
2445
|
+
};
|
|
2446
|
+
}
|
|
2447
|
+
const postProcessing = gladiaOpts.post_processing || {};
|
|
2448
|
+
if (options?.summarization || postProcessing.summarization || postProcessing.chapterization) {
|
|
2449
|
+
streamingRequest.post_processing = {
|
|
2450
|
+
...postProcessing,
|
|
2451
|
+
summarization: options?.summarization ?? postProcessing.summarization
|
|
2452
|
+
};
|
|
2453
|
+
}
|
|
2454
|
+
if (gladiaOpts.messages_config) {
|
|
2455
|
+
streamingRequest.messages_config = gladiaOpts.messages_config;
|
|
2456
|
+
} else if (options?.interimResults !== void 0) {
|
|
2457
|
+
streamingRequest.messages_config = {
|
|
2458
|
+
receive_partial_transcripts: options.interimResults,
|
|
2459
|
+
receive_final_transcripts: true
|
|
2460
|
+
};
|
|
2461
|
+
}
|
|
2462
|
+
if (gladiaOpts.callback || gladiaOpts.callback_config) {
|
|
2463
|
+
streamingRequest.callback = gladiaOpts.callback;
|
|
2464
|
+
streamingRequest.callback_config = gladiaOpts.callback_config;
|
|
2465
|
+
}
|
|
2466
|
+
if (gladiaOpts.custom_metadata) {
|
|
2467
|
+
streamingRequest.custom_metadata = gladiaOpts.custom_metadata;
|
|
2468
|
+
}
|
|
2469
|
+
return streamingRequest;
|
|
2470
|
+
}
|
|
2471
|
+
/**
|
|
2472
|
+
* Handle all WebSocket message types from Gladia streaming
|
|
2473
|
+
*
|
|
2474
|
+
* Processes transcript, utterance, speech events, real-time processing
|
|
2475
|
+
* results (translation, sentiment, NER), post-processing results
|
|
2476
|
+
* (summarization, chapterization), acknowledgments, and lifecycle events.
|
|
2477
|
+
*/
|
|
2478
|
+
handleWebSocketMessage(message, callbacks) {
|
|
2479
|
+
const msg = message;
|
|
2480
|
+
const messageType = msg.type;
|
|
2481
|
+
switch (messageType) {
|
|
2482
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2483
|
+
// Transcript events
|
|
2484
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2485
|
+
case "transcript": {
|
|
2486
|
+
const transcriptMessage = message;
|
|
2487
|
+
const messageData = transcriptMessage.data;
|
|
2488
|
+
const utterance = messageData.utterance;
|
|
2489
|
+
callbacks?.onTranscript?.({
|
|
2490
|
+
type: "transcript",
|
|
2491
|
+
text: utterance.text,
|
|
2492
|
+
isFinal: messageData.is_final,
|
|
2493
|
+
confidence: utterance.confidence,
|
|
2494
|
+
language: utterance.language,
|
|
2495
|
+
channel: utterance.channel,
|
|
2496
|
+
speaker: utterance.speaker?.toString(),
|
|
2497
|
+
words: utterance.words.map((w) => ({
|
|
2498
|
+
word: w.word,
|
|
2499
|
+
start: w.start,
|
|
2500
|
+
end: w.end,
|
|
2501
|
+
confidence: w.confidence
|
|
2502
|
+
})),
|
|
2503
|
+
data: message
|
|
2504
|
+
});
|
|
2505
|
+
break;
|
|
2506
|
+
}
|
|
2507
|
+
case "utterance": {
|
|
2508
|
+
const transcriptMessage = message;
|
|
2509
|
+
const messageData = transcriptMessage.data;
|
|
2510
|
+
const utterance = messageData.utterance;
|
|
2511
|
+
callbacks?.onUtterance?.({
|
|
2512
|
+
text: utterance.text,
|
|
2513
|
+
start: utterance.start,
|
|
2514
|
+
end: utterance.end,
|
|
2515
|
+
speaker: utterance.speaker?.toString(),
|
|
2516
|
+
confidence: utterance.confidence,
|
|
2517
|
+
words: utterance.words.map((w) => ({
|
|
2518
|
+
word: w.word,
|
|
2519
|
+
start: w.start,
|
|
2520
|
+
end: w.end,
|
|
2521
|
+
confidence: w.confidence
|
|
2522
|
+
}))
|
|
2523
|
+
});
|
|
2524
|
+
break;
|
|
2525
|
+
}
|
|
2526
|
+
// Post-processing transcripts (final accumulated transcript)
|
|
2527
|
+
case "post_transcript": {
|
|
2528
|
+
const postTranscript = message;
|
|
2529
|
+
callbacks?.onTranscript?.({
|
|
2530
|
+
type: "transcript",
|
|
2531
|
+
text: postTranscript.data?.full_transcript || "",
|
|
2532
|
+
isFinal: true,
|
|
2533
|
+
data: message
|
|
2534
|
+
});
|
|
2535
|
+
break;
|
|
2536
|
+
}
|
|
2537
|
+
case "post_final_transcript": {
|
|
2538
|
+
const postFinal = message;
|
|
2539
|
+
callbacks?.onTranscript?.({
|
|
2540
|
+
type: "transcript",
|
|
2541
|
+
text: postFinal.data?.transcription?.full_transcript || "",
|
|
2542
|
+
isFinal: true,
|
|
2543
|
+
data: message
|
|
2544
|
+
});
|
|
2545
|
+
break;
|
|
2546
|
+
}
|
|
2547
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2548
|
+
// Speech detection events
|
|
2549
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2550
|
+
case "speech_start": {
|
|
2551
|
+
const speechStart = message;
|
|
2552
|
+
const event = {
|
|
2553
|
+
type: "speech_start",
|
|
2554
|
+
timestamp: speechStart.data.time,
|
|
2555
|
+
channel: speechStart.data.channel,
|
|
2556
|
+
sessionId: speechStart.session_id
|
|
2557
|
+
};
|
|
2558
|
+
callbacks?.onSpeechStart?.(event);
|
|
2559
|
+
break;
|
|
2560
|
+
}
|
|
2561
|
+
case "speech_end": {
|
|
2562
|
+
const speechEnd = message;
|
|
2563
|
+
const event = {
|
|
2564
|
+
type: "speech_end",
|
|
2565
|
+
timestamp: speechEnd.data.time,
|
|
2566
|
+
channel: speechEnd.data.channel,
|
|
2567
|
+
sessionId: speechEnd.session_id
|
|
2568
|
+
};
|
|
2569
|
+
callbacks?.onSpeechEnd?.(event);
|
|
2570
|
+
break;
|
|
2571
|
+
}
|
|
2572
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2573
|
+
// Real-time processing events
|
|
2574
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2575
|
+
case "translation": {
|
|
2576
|
+
const translationMsg = message;
|
|
2577
|
+
if (translationMsg.error) {
|
|
2578
|
+
callbacks?.onError?.({
|
|
2579
|
+
code: ERROR_CODES.TRANSCRIPTION_ERROR,
|
|
2580
|
+
message: "Translation failed",
|
|
2581
|
+
details: translationMsg.error
|
|
2582
|
+
});
|
|
2583
|
+
} else if (translationMsg.data) {
|
|
2584
|
+
const event = {
|
|
2585
|
+
utteranceId: translationMsg.data.utterance_id,
|
|
2586
|
+
original: translationMsg.data.utterance.text,
|
|
2587
|
+
targetLanguage: translationMsg.data.target_language,
|
|
2588
|
+
translatedText: translationMsg.data.translated_utterance.text,
|
|
2589
|
+
isFinal: true
|
|
2590
|
+
};
|
|
2591
|
+
callbacks?.onTranslation?.(event);
|
|
2592
|
+
}
|
|
2593
|
+
break;
|
|
2594
|
+
}
|
|
2595
|
+
case "sentiment_analysis": {
|
|
2596
|
+
const sentimentMsg = message;
|
|
2597
|
+
if (sentimentMsg.error) {
|
|
2598
|
+
callbacks?.onError?.({
|
|
2599
|
+
code: ERROR_CODES.TRANSCRIPTION_ERROR,
|
|
2600
|
+
message: "Sentiment analysis failed",
|
|
2601
|
+
details: sentimentMsg.error
|
|
2602
|
+
});
|
|
2603
|
+
} else if (sentimentMsg.data) {
|
|
2604
|
+
for (const result of sentimentMsg.data.results) {
|
|
2605
|
+
const event = {
|
|
2606
|
+
utteranceId: sentimentMsg.data.utterance_id,
|
|
2607
|
+
sentiment: result.sentiment,
|
|
2608
|
+
confidence: void 0
|
|
2609
|
+
// Gladia doesn't provide confidence for sentiment
|
|
2610
|
+
};
|
|
2611
|
+
callbacks?.onSentiment?.(event);
|
|
2612
|
+
}
|
|
2613
|
+
}
|
|
2614
|
+
break;
|
|
2615
|
+
}
|
|
2616
|
+
case "named_entity_recognition": {
|
|
2617
|
+
const nerMsg = message;
|
|
2618
|
+
if (nerMsg.error) {
|
|
2619
|
+
callbacks?.onError?.({
|
|
2620
|
+
code: ERROR_CODES.TRANSCRIPTION_ERROR,
|
|
2621
|
+
message: "Named entity recognition failed",
|
|
2622
|
+
details: nerMsg.error
|
|
2214
2623
|
});
|
|
2215
|
-
} else if (
|
|
2216
|
-
const
|
|
2217
|
-
|
|
2218
|
-
|
|
2219
|
-
|
|
2220
|
-
|
|
2221
|
-
|
|
2222
|
-
|
|
2223
|
-
|
|
2224
|
-
|
|
2225
|
-
|
|
2226
|
-
text: word.word,
|
|
2227
|
-
start: word.start,
|
|
2228
|
-
end: word.end,
|
|
2229
|
-
confidence: word.confidence
|
|
2230
|
-
}))
|
|
2231
|
-
};
|
|
2232
|
-
callbacks?.onUtterance?.(utteranceData);
|
|
2233
|
-
} else if (message.type === "metadata") {
|
|
2234
|
-
callbacks?.onMetadata?.(message);
|
|
2624
|
+
} else if (nerMsg.data) {
|
|
2625
|
+
for (const entity of nerMsg.data.results) {
|
|
2626
|
+
const event = {
|
|
2627
|
+
utteranceId: nerMsg.data.utterance_id,
|
|
2628
|
+
text: entity.text,
|
|
2629
|
+
type: entity.entity_type,
|
|
2630
|
+
start: entity.start,
|
|
2631
|
+
end: entity.end
|
|
2632
|
+
};
|
|
2633
|
+
callbacks?.onEntity?.(event);
|
|
2634
|
+
}
|
|
2235
2635
|
}
|
|
2236
|
-
|
|
2237
|
-
callbacks?.onError?.({
|
|
2238
|
-
code: ERROR_CODES.PARSE_ERROR,
|
|
2239
|
-
message: "Failed to parse WebSocket message",
|
|
2240
|
-
details: error
|
|
2241
|
-
});
|
|
2636
|
+
break;
|
|
2242
2637
|
}
|
|
2243
|
-
|
|
2244
|
-
|
|
2245
|
-
|
|
2246
|
-
|
|
2247
|
-
|
|
2248
|
-
|
|
2249
|
-
|
|
2250
|
-
|
|
2251
|
-
|
|
2252
|
-
|
|
2253
|
-
if (
|
|
2254
|
-
|
|
2255
|
-
|
|
2256
|
-
|
|
2257
|
-
})
|
|
2258
|
-
);
|
|
2638
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2639
|
+
// Post-processing events
|
|
2640
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2641
|
+
case "post_summarization": {
|
|
2642
|
+
const summaryMsg = message;
|
|
2643
|
+
if (summaryMsg.error) {
|
|
2644
|
+
callbacks?.onSummarization?.({
|
|
2645
|
+
summary: "",
|
|
2646
|
+
error: typeof summaryMsg.error === "string" ? summaryMsg.error : "Summarization failed"
|
|
2647
|
+
});
|
|
2648
|
+
} else if (summaryMsg.data) {
|
|
2649
|
+
callbacks?.onSummarization?.({
|
|
2650
|
+
summary: summaryMsg.data.results
|
|
2651
|
+
});
|
|
2259
2652
|
}
|
|
2260
|
-
|
|
2261
|
-
|
|
2262
|
-
|
|
2263
|
-
|
|
2653
|
+
break;
|
|
2654
|
+
}
|
|
2655
|
+
case "post_chapterization": {
|
|
2656
|
+
const chapterMsg = message;
|
|
2657
|
+
if (chapterMsg.error) {
|
|
2658
|
+
callbacks?.onChapterization?.({
|
|
2659
|
+
chapters: [],
|
|
2660
|
+
error: typeof chapterMsg.error === "string" ? chapterMsg.error : "Chapterization failed"
|
|
2661
|
+
});
|
|
2662
|
+
} else if (chapterMsg.data) {
|
|
2663
|
+
callbacks?.onChapterization?.({
|
|
2664
|
+
chapters: chapterMsg.data.results.map((ch) => ({
|
|
2665
|
+
headline: ch.headline,
|
|
2666
|
+
summary: ch.summary || ch.abstractive_summary || ch.extractive_summary || "",
|
|
2667
|
+
start: ch.start,
|
|
2668
|
+
end: ch.end
|
|
2669
|
+
}))
|
|
2670
|
+
});
|
|
2264
2671
|
}
|
|
2265
|
-
|
|
2266
|
-
|
|
2267
|
-
|
|
2268
|
-
|
|
2269
|
-
|
|
2270
|
-
|
|
2271
|
-
|
|
2672
|
+
break;
|
|
2673
|
+
}
|
|
2674
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2675
|
+
// Acknowledgment events
|
|
2676
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2677
|
+
case "audio_chunk_ack": {
|
|
2678
|
+
const ackMsg = message;
|
|
2679
|
+
if (ackMsg.error) {
|
|
2680
|
+
callbacks?.onError?.({
|
|
2681
|
+
code: ERROR_CODES.TRANSCRIPTION_ERROR,
|
|
2682
|
+
message: "Audio chunk not acknowledged",
|
|
2683
|
+
details: ackMsg.error
|
|
2684
|
+
});
|
|
2685
|
+
} else if (ackMsg.data) {
|
|
2686
|
+
const event = {
|
|
2687
|
+
byteRange: ackMsg.data.byte_range,
|
|
2688
|
+
timeRange: ackMsg.data.time_range,
|
|
2689
|
+
timestamp: ackMsg.created_at
|
|
2690
|
+
};
|
|
2691
|
+
callbacks?.onAudioAck?.(event);
|
|
2272
2692
|
}
|
|
2273
|
-
|
|
2274
|
-
sessionStatus = "closed";
|
|
2693
|
+
break;
|
|
2275
2694
|
}
|
|
2276
|
-
|
|
2695
|
+
case "stop_recording_ack": {
|
|
2696
|
+
const stopAck = message;
|
|
2697
|
+
if (stopAck.error) {
|
|
2698
|
+
callbacks?.onError?.({
|
|
2699
|
+
code: ERROR_CODES.TRANSCRIPTION_ERROR,
|
|
2700
|
+
message: "Stop recording not acknowledged",
|
|
2701
|
+
details: stopAck.error
|
|
2702
|
+
});
|
|
2703
|
+
}
|
|
2704
|
+
break;
|
|
2705
|
+
}
|
|
2706
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2707
|
+
// Lifecycle events
|
|
2708
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2709
|
+
case "start_session": {
|
|
2710
|
+
const startSession = message;
|
|
2711
|
+
const event = {
|
|
2712
|
+
eventType: "start_session",
|
|
2713
|
+
timestamp: startSession.created_at,
|
|
2714
|
+
sessionId: startSession.session_id
|
|
2715
|
+
};
|
|
2716
|
+
callbacks?.onLifecycle?.(event);
|
|
2717
|
+
break;
|
|
2718
|
+
}
|
|
2719
|
+
case "start_recording": {
|
|
2720
|
+
const startRecording = message;
|
|
2721
|
+
const event = {
|
|
2722
|
+
eventType: "start_recording",
|
|
2723
|
+
timestamp: startRecording.created_at,
|
|
2724
|
+
sessionId: startRecording.session_id
|
|
2725
|
+
};
|
|
2726
|
+
callbacks?.onLifecycle?.(event);
|
|
2727
|
+
break;
|
|
2728
|
+
}
|
|
2729
|
+
case "end_recording": {
|
|
2730
|
+
const endRecording = message;
|
|
2731
|
+
const event = {
|
|
2732
|
+
eventType: "end_recording",
|
|
2733
|
+
timestamp: endRecording.created_at,
|
|
2734
|
+
sessionId: endRecording.session_id
|
|
2735
|
+
};
|
|
2736
|
+
callbacks?.onLifecycle?.(event);
|
|
2737
|
+
break;
|
|
2738
|
+
}
|
|
2739
|
+
case "end_session": {
|
|
2740
|
+
const endSession = message;
|
|
2741
|
+
const event = {
|
|
2742
|
+
eventType: "end_session",
|
|
2743
|
+
timestamp: endSession.created_at,
|
|
2744
|
+
sessionId: endSession.session_id
|
|
2745
|
+
};
|
|
2746
|
+
callbacks?.onLifecycle?.(event);
|
|
2747
|
+
break;
|
|
2748
|
+
}
|
|
2749
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2750
|
+
// Metadata and other events
|
|
2751
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2752
|
+
case "metadata":
|
|
2753
|
+
callbacks?.onMetadata?.(msg);
|
|
2754
|
+
break;
|
|
2755
|
+
case "error": {
|
|
2756
|
+
const errorMsg = msg;
|
|
2757
|
+
callbacks?.onError?.({
|
|
2758
|
+
code: errorMsg.error?.code || ERROR_CODES.TRANSCRIPTION_ERROR,
|
|
2759
|
+
message: errorMsg.error?.message || "Unknown streaming error",
|
|
2760
|
+
details: msg
|
|
2761
|
+
});
|
|
2762
|
+
break;
|
|
2763
|
+
}
|
|
2764
|
+
default:
|
|
2765
|
+
callbacks?.onMetadata?.(msg);
|
|
2766
|
+
break;
|
|
2767
|
+
}
|
|
2277
2768
|
}
|
|
2278
2769
|
};
|
|
2279
2770
|
function createGladiaAdapter(config) {
|
|
@@ -2931,14 +3422,14 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
2931
3422
|
if (!transcript.words || transcript.words.length === 0) {
|
|
2932
3423
|
return void 0;
|
|
2933
3424
|
}
|
|
2934
|
-
return transcript.words.map((
|
|
2935
|
-
|
|
2936
|
-
start:
|
|
3425
|
+
return transcript.words.map((w) => ({
|
|
3426
|
+
word: w.text,
|
|
3427
|
+
start: w.start / 1e3,
|
|
2937
3428
|
// Convert ms to seconds
|
|
2938
|
-
end:
|
|
3429
|
+
end: w.end / 1e3,
|
|
2939
3430
|
// Convert ms to seconds
|
|
2940
|
-
confidence:
|
|
2941
|
-
speaker:
|
|
3431
|
+
confidence: w.confidence,
|
|
3432
|
+
speaker: w.speaker || void 0
|
|
2942
3433
|
}));
|
|
2943
3434
|
}
|
|
2944
3435
|
/**
|
|
@@ -2956,11 +3447,11 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
2956
3447
|
// Convert ms to seconds
|
|
2957
3448
|
speaker: utterance.speaker || void 0,
|
|
2958
3449
|
confidence: utterance.confidence,
|
|
2959
|
-
words: utterance.words.map((
|
|
2960
|
-
|
|
2961
|
-
start:
|
|
2962
|
-
end:
|
|
2963
|
-
confidence:
|
|
3450
|
+
words: utterance.words.map((w) => ({
|
|
3451
|
+
word: w.text,
|
|
3452
|
+
start: w.start / 1e3,
|
|
3453
|
+
end: w.end / 1e3,
|
|
3454
|
+
confidence: w.confidence
|
|
2964
3455
|
}))
|
|
2965
3456
|
}));
|
|
2966
3457
|
}
|
|
@@ -2968,19 +3459,37 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
2968
3459
|
* Stream audio for real-time transcription
|
|
2969
3460
|
*
|
|
2970
3461
|
* Creates a WebSocket connection to AssemblyAI for streaming transcription.
|
|
2971
|
-
*
|
|
3462
|
+
* Uses the v3 Universal Streaming API with full support for all parameters.
|
|
3463
|
+
*
|
|
3464
|
+
* Supports all AssemblyAI streaming features:
|
|
3465
|
+
* - Real-time transcription with interim/final results (Turn events)
|
|
3466
|
+
* - End-of-turn detection tuning (confidence threshold, silence duration)
|
|
3467
|
+
* - Voice Activity Detection (VAD) threshold tuning
|
|
3468
|
+
* - Real-time text formatting
|
|
3469
|
+
* - Profanity filtering
|
|
3470
|
+
* - Custom vocabulary (keyterms)
|
|
3471
|
+
* - Language detection
|
|
3472
|
+
* - Model selection (English or Multilingual)
|
|
3473
|
+
* - Dynamic configuration updates mid-stream
|
|
3474
|
+
* - Force endpoint command
|
|
2972
3475
|
*
|
|
2973
3476
|
* @param options - Streaming configuration options
|
|
3477
|
+
* @param options.sampleRate - Sample rate (8000, 16000, 22050, 44100, 48000)
|
|
3478
|
+
* @param options.encoding - Audio encoding (pcm_s16le, pcm_mulaw)
|
|
3479
|
+
* @param options.assemblyaiStreaming - All AssemblyAI-specific streaming options
|
|
2974
3480
|
* @param callbacks - Event callbacks for transcription results
|
|
2975
|
-
* @
|
|
3481
|
+
* @param callbacks.onTranscript - Interim/final transcript received (Turn event)
|
|
3482
|
+
* @param callbacks.onUtterance - Complete utterance (Turn with end_of_turn=true)
|
|
3483
|
+
* @param callbacks.onMetadata - Session metadata (Begin, Termination events)
|
|
3484
|
+
* @param callbacks.onError - Error occurred
|
|
3485
|
+
* @param callbacks.onClose - Connection closed
|
|
3486
|
+
* @returns Promise that resolves with an extended StreamingSession
|
|
2976
3487
|
*
|
|
2977
|
-
* @example
|
|
3488
|
+
* @example Basic real-time streaming
|
|
2978
3489
|
* ```typescript
|
|
2979
3490
|
* const session = await adapter.transcribeStream({
|
|
2980
|
-
* encoding: 'pcm_s16le',
|
|
2981
3491
|
* sampleRate: 16000,
|
|
2982
|
-
*
|
|
2983
|
-
* interimResults: true
|
|
3492
|
+
* encoding: 'pcm_s16le'
|
|
2984
3493
|
* }, {
|
|
2985
3494
|
* onOpen: () => console.log('Connected'),
|
|
2986
3495
|
* onTranscript: (event) => {
|
|
@@ -2995,21 +3504,50 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
2995
3504
|
* });
|
|
2996
3505
|
*
|
|
2997
3506
|
* // Send audio chunks
|
|
2998
|
-
* const audioChunk = getAudioChunk();
|
|
3507
|
+
* const audioChunk = getAudioChunk();
|
|
2999
3508
|
* await session.sendAudio({ data: audioChunk });
|
|
3000
3509
|
*
|
|
3001
3510
|
* // Close when done
|
|
3002
3511
|
* await session.close();
|
|
3003
3512
|
* ```
|
|
3513
|
+
*
|
|
3514
|
+
* @example Advanced streaming with all features
|
|
3515
|
+
* ```typescript
|
|
3516
|
+
* const session = await adapter.transcribeStream({
|
|
3517
|
+
* sampleRate: 16000,
|
|
3518
|
+
* assemblyaiStreaming: {
|
|
3519
|
+
* speechModel: 'universal-streaming-multilingual',
|
|
3520
|
+
* languageDetection: true,
|
|
3521
|
+
* endOfTurnConfidenceThreshold: 0.7,
|
|
3522
|
+
* minEndOfTurnSilenceWhenConfident: 500,
|
|
3523
|
+
* maxTurnSilence: 15000,
|
|
3524
|
+
* vadThreshold: 0.3,
|
|
3525
|
+
* formatTurns: true,
|
|
3526
|
+
* filterProfanity: true,
|
|
3527
|
+
* keyterms: ['TypeScript', 'JavaScript', 'API'],
|
|
3528
|
+
* inactivityTimeout: 60000
|
|
3529
|
+
* }
|
|
3530
|
+
* }, {
|
|
3531
|
+
* onTranscript: (e) => console.log('Transcript:', e.text),
|
|
3532
|
+
* onMetadata: (m) => console.log('Metadata:', m)
|
|
3533
|
+
* });
|
|
3534
|
+
*
|
|
3535
|
+
* // Update configuration mid-stream
|
|
3536
|
+
* session.updateConfiguration?.({
|
|
3537
|
+
* end_of_turn_confidence_threshold: 0.5,
|
|
3538
|
+
* vad_threshold: 0.2
|
|
3539
|
+
* });
|
|
3540
|
+
*
|
|
3541
|
+
* // Force endpoint detection
|
|
3542
|
+
* session.forceEndpoint?.();
|
|
3543
|
+
* ```
|
|
3004
3544
|
*/
|
|
3005
3545
|
async transcribeStream(options, callbacks) {
|
|
3006
3546
|
this.validateConfig();
|
|
3007
3547
|
if (!this.config?.apiKey) {
|
|
3008
3548
|
throw new Error("API key is required for streaming");
|
|
3009
3549
|
}
|
|
3010
|
-
const
|
|
3011
|
-
const encoding = options?.encoding ? mapEncodingToProvider(options.encoding, "assemblyai") : "pcm_s16le";
|
|
3012
|
-
const wsUrl = `${this.wsBaseUrl}?sample_rate=${sampleRate}&encoding=${encoding}`;
|
|
3550
|
+
const wsUrl = this.buildStreamingUrl(options);
|
|
3013
3551
|
const ws = new import_ws2.default(wsUrl, {
|
|
3014
3552
|
headers: {
|
|
3015
3553
|
Authorization: this.config.apiKey
|
|
@@ -3033,43 +3571,7 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
3033
3571
|
ws.on("message", (data) => {
|
|
3034
3572
|
try {
|
|
3035
3573
|
const message = JSON.parse(data.toString());
|
|
3036
|
-
|
|
3037
|
-
callbacks?.onError?.({
|
|
3038
|
-
code: "API_ERROR",
|
|
3039
|
-
message: message.error
|
|
3040
|
-
});
|
|
3041
|
-
return;
|
|
3042
|
-
}
|
|
3043
|
-
if (message.type === "Begin") {
|
|
3044
|
-
const beginMsg = message;
|
|
3045
|
-
callbacks?.onMetadata?.({
|
|
3046
|
-
sessionId: beginMsg.id,
|
|
3047
|
-
expiresAt: new Date(beginMsg.expires_at).toISOString()
|
|
3048
|
-
});
|
|
3049
|
-
} else if (message.type === "Turn") {
|
|
3050
|
-
const turnMsg = message;
|
|
3051
|
-
callbacks?.onTranscript?.({
|
|
3052
|
-
type: "transcript",
|
|
3053
|
-
text: turnMsg.transcript,
|
|
3054
|
-
isFinal: turnMsg.end_of_turn,
|
|
3055
|
-
confidence: turnMsg.end_of_turn_confidence,
|
|
3056
|
-
words: turnMsg.words.map((word) => ({
|
|
3057
|
-
text: word.text,
|
|
3058
|
-
start: word.start / 1e3,
|
|
3059
|
-
// Convert ms to seconds
|
|
3060
|
-
end: word.end / 1e3,
|
|
3061
|
-
confidence: word.confidence
|
|
3062
|
-
})),
|
|
3063
|
-
data: turnMsg
|
|
3064
|
-
});
|
|
3065
|
-
} else if (message.type === "Termination") {
|
|
3066
|
-
const termMsg = message;
|
|
3067
|
-
callbacks?.onMetadata?.({
|
|
3068
|
-
terminated: true,
|
|
3069
|
-
audioDurationSeconds: termMsg.audio_duration_seconds,
|
|
3070
|
-
sessionDurationSeconds: termMsg.session_duration_seconds
|
|
3071
|
-
});
|
|
3072
|
-
}
|
|
3574
|
+
this.handleWebSocketMessage(message, callbacks);
|
|
3073
3575
|
} catch (error) {
|
|
3074
3576
|
callbacks?.onError?.({
|
|
3075
3577
|
code: "PARSE_ERROR",
|
|
@@ -3121,11 +3623,7 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
3121
3623
|
}
|
|
3122
3624
|
if (chunk.isLast) {
|
|
3123
3625
|
flushAudioBuffer();
|
|
3124
|
-
ws.send(
|
|
3125
|
-
JSON.stringify({
|
|
3126
|
-
terminate_session: true
|
|
3127
|
-
})
|
|
3128
|
-
);
|
|
3626
|
+
ws.send(JSON.stringify({ type: "Terminate" }));
|
|
3129
3627
|
}
|
|
3130
3628
|
},
|
|
3131
3629
|
close: async () => {
|
|
@@ -3135,11 +3633,7 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
3135
3633
|
sessionStatus = "closing";
|
|
3136
3634
|
flushAudioBuffer();
|
|
3137
3635
|
if (ws.readyState === import_ws2.default.OPEN) {
|
|
3138
|
-
ws.send(
|
|
3139
|
-
JSON.stringify({
|
|
3140
|
-
terminate_session: true
|
|
3141
|
-
})
|
|
3142
|
-
);
|
|
3636
|
+
ws.send(JSON.stringify({ type: "Terminate" }));
|
|
3143
3637
|
}
|
|
3144
3638
|
return new Promise((resolve) => {
|
|
3145
3639
|
const timeout = setTimeout(() => {
|
|
@@ -3153,9 +3647,166 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
3153
3647
|
resolve();
|
|
3154
3648
|
});
|
|
3155
3649
|
});
|
|
3650
|
+
},
|
|
3651
|
+
/**
|
|
3652
|
+
* Update streaming configuration mid-session
|
|
3653
|
+
*
|
|
3654
|
+
* Allows changing VAD, end-of-turn, and formatting settings
|
|
3655
|
+
* without restarting the stream.
|
|
3656
|
+
*
|
|
3657
|
+
* @param config - Configuration parameters to update
|
|
3658
|
+
*/
|
|
3659
|
+
updateConfiguration: (config) => {
|
|
3660
|
+
if (ws.readyState !== import_ws2.default.OPEN) {
|
|
3661
|
+
throw new Error("Cannot update configuration: WebSocket is not open");
|
|
3662
|
+
}
|
|
3663
|
+
const updateMsg = {
|
|
3664
|
+
type: "UpdateConfiguration",
|
|
3665
|
+
...config
|
|
3666
|
+
};
|
|
3667
|
+
ws.send(JSON.stringify(updateMsg));
|
|
3668
|
+
},
|
|
3669
|
+
/**
|
|
3670
|
+
* Force endpoint detection
|
|
3671
|
+
*
|
|
3672
|
+
* Immediately triggers end-of-turn, useful for manual control
|
|
3673
|
+
* of turn boundaries (e.g., when user presses a button).
|
|
3674
|
+
*/
|
|
3675
|
+
forceEndpoint: () => {
|
|
3676
|
+
if (ws.readyState !== import_ws2.default.OPEN) {
|
|
3677
|
+
throw new Error("Cannot force endpoint: WebSocket is not open");
|
|
3678
|
+
}
|
|
3679
|
+
const forceMsg = {
|
|
3680
|
+
type: "ForceEndpoint"
|
|
3681
|
+
};
|
|
3682
|
+
ws.send(JSON.stringify(forceMsg));
|
|
3156
3683
|
}
|
|
3157
3684
|
};
|
|
3158
3685
|
}
|
|
3686
|
+
/**
|
|
3687
|
+
* Build WebSocket URL with all streaming parameters
|
|
3688
|
+
*/
|
|
3689
|
+
buildStreamingUrl(options) {
|
|
3690
|
+
const params = new URLSearchParams();
|
|
3691
|
+
const aaiOpts = options?.assemblyaiStreaming || {};
|
|
3692
|
+
const sampleRate = options?.sampleRate || aaiOpts.sampleRate || 16e3;
|
|
3693
|
+
params.append("sample_rate", String(sampleRate));
|
|
3694
|
+
const encoding = options?.encoding ? mapEncodingToProvider(options.encoding, "assemblyai") : aaiOpts.encoding || "pcm_s16le";
|
|
3695
|
+
params.append("encoding", encoding);
|
|
3696
|
+
if (aaiOpts.speechModel) {
|
|
3697
|
+
params.append("speech_model", aaiOpts.speechModel);
|
|
3698
|
+
}
|
|
3699
|
+
if (aaiOpts.languageDetection) {
|
|
3700
|
+
params.append("language_detection", "true");
|
|
3701
|
+
}
|
|
3702
|
+
if (aaiOpts.endOfTurnConfidenceThreshold !== void 0) {
|
|
3703
|
+
params.append(
|
|
3704
|
+
"end_of_turn_confidence_threshold",
|
|
3705
|
+
String(aaiOpts.endOfTurnConfidenceThreshold)
|
|
3706
|
+
);
|
|
3707
|
+
}
|
|
3708
|
+
if (aaiOpts.minEndOfTurnSilenceWhenConfident !== void 0) {
|
|
3709
|
+
params.append(
|
|
3710
|
+
"min_end_of_turn_silence_when_confident",
|
|
3711
|
+
String(aaiOpts.minEndOfTurnSilenceWhenConfident)
|
|
3712
|
+
);
|
|
3713
|
+
}
|
|
3714
|
+
if (aaiOpts.maxTurnSilence !== void 0) {
|
|
3715
|
+
params.append("max_turn_silence", String(aaiOpts.maxTurnSilence));
|
|
3716
|
+
}
|
|
3717
|
+
if (aaiOpts.vadThreshold !== void 0) {
|
|
3718
|
+
params.append("vad_threshold", String(aaiOpts.vadThreshold));
|
|
3719
|
+
}
|
|
3720
|
+
if (aaiOpts.formatTurns !== void 0) {
|
|
3721
|
+
params.append("format_turns", String(aaiOpts.formatTurns));
|
|
3722
|
+
}
|
|
3723
|
+
if (aaiOpts.filterProfanity) {
|
|
3724
|
+
params.append("filter_profanity", "true");
|
|
3725
|
+
}
|
|
3726
|
+
const keyterms = options?.customVocabulary || aaiOpts.keyterms;
|
|
3727
|
+
if (keyterms && keyterms.length > 0) {
|
|
3728
|
+
keyterms.forEach((term) => params.append("keyterms", term));
|
|
3729
|
+
}
|
|
3730
|
+
if (aaiOpts.keytermsPrompt && aaiOpts.keytermsPrompt.length > 0) {
|
|
3731
|
+
aaiOpts.keytermsPrompt.forEach((prompt) => params.append("keyterms_prompt", prompt));
|
|
3732
|
+
}
|
|
3733
|
+
if (aaiOpts.inactivityTimeout !== void 0) {
|
|
3734
|
+
params.append("inactivity_timeout", String(aaiOpts.inactivityTimeout));
|
|
3735
|
+
}
|
|
3736
|
+
return `${this.wsBaseUrl}?${params.toString()}`;
|
|
3737
|
+
}
|
|
3738
|
+
/**
|
|
3739
|
+
* Handle all WebSocket message types from AssemblyAI streaming
|
|
3740
|
+
*/
|
|
3741
|
+
handleWebSocketMessage(message, callbacks) {
|
|
3742
|
+
if ("error" in message) {
|
|
3743
|
+
callbacks?.onError?.({
|
|
3744
|
+
code: "API_ERROR",
|
|
3745
|
+
message: message.error
|
|
3746
|
+
});
|
|
3747
|
+
return;
|
|
3748
|
+
}
|
|
3749
|
+
const typedMessage = message;
|
|
3750
|
+
switch (typedMessage.type) {
|
|
3751
|
+
case "Begin": {
|
|
3752
|
+
const beginMsg = typedMessage;
|
|
3753
|
+
callbacks?.onMetadata?.({
|
|
3754
|
+
type: "begin",
|
|
3755
|
+
sessionId: beginMsg.id,
|
|
3756
|
+
expiresAt: new Date(beginMsg.expires_at).toISOString()
|
|
3757
|
+
});
|
|
3758
|
+
break;
|
|
3759
|
+
}
|
|
3760
|
+
case "Turn": {
|
|
3761
|
+
const turnMsg = typedMessage;
|
|
3762
|
+
callbacks?.onTranscript?.({
|
|
3763
|
+
type: "transcript",
|
|
3764
|
+
text: turnMsg.transcript,
|
|
3765
|
+
isFinal: turnMsg.end_of_turn,
|
|
3766
|
+
confidence: turnMsg.end_of_turn_confidence,
|
|
3767
|
+
language: turnMsg.language_code,
|
|
3768
|
+
words: turnMsg.words.map((w) => ({
|
|
3769
|
+
word: w.text,
|
|
3770
|
+
start: w.start / 1e3,
|
|
3771
|
+
// Convert ms to seconds
|
|
3772
|
+
end: w.end / 1e3,
|
|
3773
|
+
confidence: w.confidence
|
|
3774
|
+
})),
|
|
3775
|
+
data: turnMsg
|
|
3776
|
+
});
|
|
3777
|
+
if (turnMsg.end_of_turn) {
|
|
3778
|
+
const words = turnMsg.words;
|
|
3779
|
+
const start = words.length > 0 ? words[0].start / 1e3 : 0;
|
|
3780
|
+
const end = words.length > 0 ? words[words.length - 1].end / 1e3 : 0;
|
|
3781
|
+
callbacks?.onUtterance?.({
|
|
3782
|
+
text: turnMsg.transcript,
|
|
3783
|
+
start,
|
|
3784
|
+
end,
|
|
3785
|
+
confidence: turnMsg.end_of_turn_confidence,
|
|
3786
|
+
words: turnMsg.words.map((w) => ({
|
|
3787
|
+
word: w.text,
|
|
3788
|
+
start: w.start / 1e3,
|
|
3789
|
+
end: w.end / 1e3,
|
|
3790
|
+
confidence: w.confidence
|
|
3791
|
+
}))
|
|
3792
|
+
});
|
|
3793
|
+
}
|
|
3794
|
+
break;
|
|
3795
|
+
}
|
|
3796
|
+
case "Termination": {
|
|
3797
|
+
const termMsg = typedMessage;
|
|
3798
|
+
callbacks?.onMetadata?.({
|
|
3799
|
+
type: "termination",
|
|
3800
|
+
audioDurationSeconds: termMsg.audio_duration_seconds,
|
|
3801
|
+
sessionDurationSeconds: termMsg.session_duration_seconds
|
|
3802
|
+
});
|
|
3803
|
+
break;
|
|
3804
|
+
}
|
|
3805
|
+
default:
|
|
3806
|
+
callbacks?.onMetadata?.(message);
|
|
3807
|
+
break;
|
|
3808
|
+
}
|
|
3809
|
+
}
|
|
3159
3810
|
};
|
|
3160
3811
|
function createAssemblyAIAdapter(config) {
|
|
3161
3812
|
const adapter = new AssemblyAIAdapter();
|
|
@@ -3417,11 +4068,11 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
3417
4068
|
return void 0;
|
|
3418
4069
|
}
|
|
3419
4070
|
return alternative.words.map(
|
|
3420
|
-
(
|
|
3421
|
-
|
|
3422
|
-
start:
|
|
3423
|
-
end:
|
|
3424
|
-
confidence:
|
|
4071
|
+
(w) => ({
|
|
4072
|
+
word: w.word || "",
|
|
4073
|
+
start: w.start || 0,
|
|
4074
|
+
end: w.end || 0,
|
|
4075
|
+
confidence: w.confidence,
|
|
3425
4076
|
speaker: void 0
|
|
3426
4077
|
// Speaker info is at utterance level, not word level
|
|
3427
4078
|
})
|
|
@@ -3441,11 +4092,11 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
3441
4092
|
end: utterance.end || 0,
|
|
3442
4093
|
speaker: utterance.speaker?.toString(),
|
|
3443
4094
|
confidence: utterance.confidence,
|
|
3444
|
-
words: utterance.words?.map((
|
|
3445
|
-
|
|
3446
|
-
start:
|
|
3447
|
-
end:
|
|
3448
|
-
confidence:
|
|
4095
|
+
words: utterance.words?.map((w) => ({
|
|
4096
|
+
word: w.word || "",
|
|
4097
|
+
start: w.start || 0,
|
|
4098
|
+
end: w.end || 0,
|
|
4099
|
+
confidence: w.confidence
|
|
3449
4100
|
}))
|
|
3450
4101
|
}));
|
|
3451
4102
|
}
|
|
@@ -3464,11 +4115,44 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
3464
4115
|
* Creates a WebSocket connection to Deepgram for streaming transcription.
|
|
3465
4116
|
* Send audio chunks via session.sendAudio() and receive results via callbacks.
|
|
3466
4117
|
*
|
|
4118
|
+
* Supports all Deepgram streaming features:
|
|
4119
|
+
* - Real-time transcription with interim/final results
|
|
4120
|
+
* - Speech detection events (SpeechStarted, UtteranceEnd)
|
|
4121
|
+
* - Speaker diarization
|
|
4122
|
+
* - Language detection
|
|
4123
|
+
* - Real-time sentiment, entity detection, topics, intents
|
|
4124
|
+
* - Custom vocabulary (keywords, keyterms)
|
|
4125
|
+
* - PII redaction
|
|
4126
|
+
* - Filler words, numerals, measurements, paragraphs
|
|
4127
|
+
* - Profanity filtering
|
|
4128
|
+
* - Dictation mode
|
|
4129
|
+
*
|
|
3467
4130
|
* @param options - Streaming configuration options
|
|
4131
|
+
* @param options.encoding - Audio encoding (linear16, flac, mulaw, opus, speex, g729)
|
|
4132
|
+
* @param options.sampleRate - Sample rate in Hz
|
|
4133
|
+
* @param options.channels - Number of audio channels
|
|
4134
|
+
* @param options.language - Language code for transcription
|
|
4135
|
+
* @param options.model - Model to use (nova-2, nova-3, base, enhanced, etc.)
|
|
4136
|
+
* @param options.diarization - Enable speaker identification
|
|
4137
|
+
* @param options.languageDetection - Auto-detect language
|
|
4138
|
+
* @param options.interimResults - Enable partial transcripts
|
|
4139
|
+
* @param options.summarization - Enable summarization
|
|
4140
|
+
* @param options.sentimentAnalysis - Enable sentiment analysis
|
|
4141
|
+
* @param options.entityDetection - Enable entity detection
|
|
4142
|
+
* @param options.piiRedaction - Enable PII redaction
|
|
4143
|
+
* @param options.customVocabulary - Keywords to boost recognition
|
|
4144
|
+
* @param options.deepgramStreaming - All Deepgram-specific streaming options
|
|
3468
4145
|
* @param callbacks - Event callbacks for transcription results
|
|
4146
|
+
* @param callbacks.onTranscript - Interim/final transcript received
|
|
4147
|
+
* @param callbacks.onUtterance - Complete utterance detected
|
|
4148
|
+
* @param callbacks.onSpeechStart - Speech detected (Deepgram SpeechStarted)
|
|
4149
|
+
* @param callbacks.onSpeechEnd - Speech ended (Deepgram UtteranceEnd)
|
|
4150
|
+
* @param callbacks.onMetadata - Metadata received
|
|
4151
|
+
* @param callbacks.onError - Error occurred
|
|
4152
|
+
* @param callbacks.onClose - Connection closed
|
|
3469
4153
|
* @returns Promise that resolves with a StreamingSession
|
|
3470
4154
|
*
|
|
3471
|
-
* @example
|
|
4155
|
+
* @example Basic real-time streaming
|
|
3472
4156
|
* ```typescript
|
|
3473
4157
|
* const session = await adapter.transcribeStream({
|
|
3474
4158
|
* encoding: 'linear16',
|
|
@@ -3491,32 +4175,47 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
3491
4175
|
* });
|
|
3492
4176
|
*
|
|
3493
4177
|
* // Send audio chunks
|
|
3494
|
-
* const audioChunk = getAudioChunk();
|
|
4178
|
+
* const audioChunk = getAudioChunk();
|
|
3495
4179
|
* await session.sendAudio({ data: audioChunk });
|
|
3496
4180
|
*
|
|
3497
4181
|
* // Close when done
|
|
3498
4182
|
* await session.close();
|
|
3499
4183
|
* ```
|
|
4184
|
+
*
|
|
4185
|
+
* @example Advanced streaming with all features
|
|
4186
|
+
* ```typescript
|
|
4187
|
+
* const session = await adapter.transcribeStream({
|
|
4188
|
+
* encoding: 'linear16',
|
|
4189
|
+
* sampleRate: 16000,
|
|
4190
|
+
* language: 'en',
|
|
4191
|
+
* model: 'nova-3',
|
|
4192
|
+
* diarization: true,
|
|
4193
|
+
* sentimentAnalysis: true,
|
|
4194
|
+
* entityDetection: true,
|
|
4195
|
+
* deepgramStreaming: {
|
|
4196
|
+
* fillerWords: true,
|
|
4197
|
+
* numerals: true,
|
|
4198
|
+
* profanityFilter: true,
|
|
4199
|
+
* topics: true,
|
|
4200
|
+
* intents: true,
|
|
4201
|
+
* customTopic: ['sales', 'support'],
|
|
4202
|
+
* customIntent: ['purchase', 'complaint'],
|
|
4203
|
+
* keyterm: ['TypeScript', 'JavaScript'],
|
|
4204
|
+
* utteranceSplit: 800,
|
|
4205
|
+
* punctuate: true,
|
|
4206
|
+
* smartFormat: true
|
|
4207
|
+
* }
|
|
4208
|
+
* }, {
|
|
4209
|
+
* onTranscript: (e) => console.log('Transcript:', e.text),
|
|
4210
|
+
* onSpeechStart: (e) => console.log('Speech started at:', e.timestamp),
|
|
4211
|
+
* onSpeechEnd: (e) => console.log('Utterance ended'),
|
|
4212
|
+
* onMetadata: (m) => console.log('Metadata:', m)
|
|
4213
|
+
* });
|
|
4214
|
+
* ```
|
|
3500
4215
|
*/
|
|
3501
4216
|
async transcribeStream(options, callbacks) {
|
|
3502
4217
|
this.validateConfig();
|
|
3503
|
-
const
|
|
3504
|
-
if (options?.encoding) params.append("encoding", options.encoding);
|
|
3505
|
-
if (options?.sampleRate) params.append("sample_rate", options.sampleRate.toString());
|
|
3506
|
-
if (options?.channels) params.append("channels", options.channels.toString());
|
|
3507
|
-
if (options?.language) params.append("language", options.language);
|
|
3508
|
-
if (options?.model) params.append("model", options.model);
|
|
3509
|
-
if (options?.languageDetection) params.append("detect_language", "true");
|
|
3510
|
-
if (options?.diarization) params.append("diarize", "true");
|
|
3511
|
-
if (options?.interimResults) params.append("interim_results", "true");
|
|
3512
|
-
if (options?.summarization) params.append("summarize", "true");
|
|
3513
|
-
if (options?.sentimentAnalysis) params.append("sentiment", "true");
|
|
3514
|
-
if (options?.entityDetection) params.append("detect_entities", "true");
|
|
3515
|
-
if (options?.piiRedaction) params.append("redact", "pii");
|
|
3516
|
-
if (options?.customVocabulary && options.customVocabulary.length > 0) {
|
|
3517
|
-
params.append("keywords", options.customVocabulary.join(","));
|
|
3518
|
-
}
|
|
3519
|
-
const wsUrl = `${this.wsBaseUrl}?${params.toString()}`;
|
|
4218
|
+
const wsUrl = this.buildStreamingUrl(options);
|
|
3520
4219
|
const ws = new import_ws3.default(wsUrl, {
|
|
3521
4220
|
headers: {
|
|
3522
4221
|
Authorization: `Token ${this.config.apiKey}`
|
|
@@ -3531,31 +4230,7 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
3531
4230
|
ws.on("message", (data) => {
|
|
3532
4231
|
try {
|
|
3533
4232
|
const message = JSON.parse(data.toString());
|
|
3534
|
-
|
|
3535
|
-
const channel = message.channel.alternatives[0];
|
|
3536
|
-
if (channel) {
|
|
3537
|
-
const transcript = channel.transcript;
|
|
3538
|
-
const isFinal = message.is_final;
|
|
3539
|
-
const words = channel.words?.map((word) => ({
|
|
3540
|
-
text: word.word,
|
|
3541
|
-
start: word.start,
|
|
3542
|
-
end: word.end,
|
|
3543
|
-
confidence: word.confidence
|
|
3544
|
-
}));
|
|
3545
|
-
callbacks?.onTranscript?.({
|
|
3546
|
-
type: "transcript",
|
|
3547
|
-
text: transcript,
|
|
3548
|
-
isFinal,
|
|
3549
|
-
words,
|
|
3550
|
-
confidence: channel.confidence,
|
|
3551
|
-
data: message
|
|
3552
|
-
});
|
|
3553
|
-
}
|
|
3554
|
-
} else if (message.type === "UtteranceEnd") {
|
|
3555
|
-
callbacks?.onMetadata?.(message);
|
|
3556
|
-
} else if (message.type === "Metadata") {
|
|
3557
|
-
callbacks?.onMetadata?.(message);
|
|
3558
|
-
}
|
|
4233
|
+
this.handleWebSocketMessage(message, callbacks);
|
|
3559
4234
|
} catch (error) {
|
|
3560
4235
|
callbacks?.onError?.({
|
|
3561
4236
|
code: "PARSE_ERROR",
|
|
@@ -3628,6 +4303,210 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
3628
4303
|
}
|
|
3629
4304
|
};
|
|
3630
4305
|
}
|
|
4306
|
+
/**
|
|
4307
|
+
* Build WebSocket URL with all streaming parameters
|
|
4308
|
+
*/
|
|
4309
|
+
buildStreamingUrl(options) {
|
|
4310
|
+
const params = new URLSearchParams();
|
|
4311
|
+
const dgOpts = options?.deepgramStreaming || {};
|
|
4312
|
+
if (options?.encoding || dgOpts.encoding) {
|
|
4313
|
+
params.append("encoding", options?.encoding || dgOpts.encoding);
|
|
4314
|
+
}
|
|
4315
|
+
if (options?.sampleRate || dgOpts.sampleRate) {
|
|
4316
|
+
params.append("sample_rate", String(options?.sampleRate || dgOpts.sampleRate));
|
|
4317
|
+
}
|
|
4318
|
+
if (options?.channels || dgOpts.channels) {
|
|
4319
|
+
params.append("channels", String(options?.channels || dgOpts.channels));
|
|
4320
|
+
}
|
|
4321
|
+
if (options?.language || dgOpts.language) {
|
|
4322
|
+
params.append("language", options?.language || dgOpts.language);
|
|
4323
|
+
}
|
|
4324
|
+
if (options?.model || dgOpts.model) {
|
|
4325
|
+
params.append("model", options?.model || dgOpts.model);
|
|
4326
|
+
}
|
|
4327
|
+
if (dgOpts.version) {
|
|
4328
|
+
params.append("version", dgOpts.version);
|
|
4329
|
+
}
|
|
4330
|
+
if (options?.languageDetection || dgOpts.languageDetection) {
|
|
4331
|
+
params.append("detect_language", "true");
|
|
4332
|
+
}
|
|
4333
|
+
if (options?.diarization || dgOpts.diarization) {
|
|
4334
|
+
params.append("diarize", "true");
|
|
4335
|
+
}
|
|
4336
|
+
if (options?.interimResults || dgOpts.interimResults) {
|
|
4337
|
+
params.append("interim_results", "true");
|
|
4338
|
+
}
|
|
4339
|
+
if (dgOpts.punctuate !== void 0) {
|
|
4340
|
+
params.append("punctuate", String(dgOpts.punctuate));
|
|
4341
|
+
}
|
|
4342
|
+
if (dgOpts.smartFormat !== void 0) {
|
|
4343
|
+
params.append("smart_format", String(dgOpts.smartFormat));
|
|
4344
|
+
}
|
|
4345
|
+
if (dgOpts.fillerWords) {
|
|
4346
|
+
params.append("filler_words", "true");
|
|
4347
|
+
}
|
|
4348
|
+
if (dgOpts.numerals) {
|
|
4349
|
+
params.append("numerals", "true");
|
|
4350
|
+
}
|
|
4351
|
+
if (dgOpts.measurements) {
|
|
4352
|
+
params.append("measurements", "true");
|
|
4353
|
+
}
|
|
4354
|
+
if (dgOpts.paragraphs) {
|
|
4355
|
+
params.append("paragraphs", "true");
|
|
4356
|
+
}
|
|
4357
|
+
if (dgOpts.profanityFilter) {
|
|
4358
|
+
params.append("profanity_filter", "true");
|
|
4359
|
+
}
|
|
4360
|
+
if (dgOpts.dictation) {
|
|
4361
|
+
params.append("dictation", "true");
|
|
4362
|
+
}
|
|
4363
|
+
if (dgOpts.utteranceSplit) {
|
|
4364
|
+
params.append("utt_split", String(dgOpts.utteranceSplit));
|
|
4365
|
+
}
|
|
4366
|
+
if (options?.summarization || dgOpts.summarize) {
|
|
4367
|
+
params.append("summarize", "true");
|
|
4368
|
+
}
|
|
4369
|
+
if (options?.sentimentAnalysis || dgOpts.sentiment) {
|
|
4370
|
+
params.append("sentiment", "true");
|
|
4371
|
+
}
|
|
4372
|
+
if (options?.entityDetection || dgOpts.detectEntities) {
|
|
4373
|
+
params.append("detect_entities", "true");
|
|
4374
|
+
}
|
|
4375
|
+
if (dgOpts.topics) {
|
|
4376
|
+
params.append("topics", "true");
|
|
4377
|
+
}
|
|
4378
|
+
if (dgOpts.customTopic && dgOpts.customTopic.length > 0) {
|
|
4379
|
+
dgOpts.customTopic.forEach((topic) => params.append("custom_topic", topic));
|
|
4380
|
+
}
|
|
4381
|
+
if (dgOpts.customTopicMode) {
|
|
4382
|
+
params.append("custom_topic_mode", dgOpts.customTopicMode);
|
|
4383
|
+
}
|
|
4384
|
+
if (dgOpts.intents) {
|
|
4385
|
+
params.append("intents", "true");
|
|
4386
|
+
}
|
|
4387
|
+
if (dgOpts.customIntent && dgOpts.customIntent.length > 0) {
|
|
4388
|
+
dgOpts.customIntent.forEach((intent) => params.append("custom_intent", intent));
|
|
4389
|
+
}
|
|
4390
|
+
if (dgOpts.customIntentMode) {
|
|
4391
|
+
params.append("custom_intent_mode", dgOpts.customIntentMode);
|
|
4392
|
+
}
|
|
4393
|
+
const keywords = options?.customVocabulary || dgOpts.keywords;
|
|
4394
|
+
if (keywords) {
|
|
4395
|
+
const keywordList = Array.isArray(keywords) ? keywords : [keywords];
|
|
4396
|
+
keywordList.forEach((kw) => params.append("keywords", kw));
|
|
4397
|
+
}
|
|
4398
|
+
if (dgOpts.keyterm && dgOpts.keyterm.length > 0) {
|
|
4399
|
+
dgOpts.keyterm.forEach((term) => params.append("keyterm", term));
|
|
4400
|
+
}
|
|
4401
|
+
if (options?.piiRedaction || dgOpts.redact) {
|
|
4402
|
+
if (Array.isArray(dgOpts.redact)) {
|
|
4403
|
+
dgOpts.redact.forEach((r) => params.append("redact", r));
|
|
4404
|
+
} else if (dgOpts.redact === true || options?.piiRedaction) {
|
|
4405
|
+
params.append("redact", "pii");
|
|
4406
|
+
params.append("redact", "pci");
|
|
4407
|
+
}
|
|
4408
|
+
}
|
|
4409
|
+
if (dgOpts.callback) {
|
|
4410
|
+
params.append("callback", dgOpts.callback);
|
|
4411
|
+
}
|
|
4412
|
+
if (dgOpts.tag && dgOpts.tag.length > 0) {
|
|
4413
|
+
dgOpts.tag.forEach((t) => params.append("tag", t));
|
|
4414
|
+
}
|
|
4415
|
+
if (dgOpts.extra) {
|
|
4416
|
+
params.append("extra", JSON.stringify(dgOpts.extra));
|
|
4417
|
+
}
|
|
4418
|
+
if (options?.endpointing !== void 0 || dgOpts.endpointing !== void 0) {
|
|
4419
|
+
const ep = options?.endpointing ?? dgOpts.endpointing;
|
|
4420
|
+
if (ep === false) {
|
|
4421
|
+
params.append("endpointing", "false");
|
|
4422
|
+
} else if (typeof ep === "number") {
|
|
4423
|
+
params.append("endpointing", String(ep));
|
|
4424
|
+
}
|
|
4425
|
+
}
|
|
4426
|
+
if (dgOpts.vadThreshold !== void 0) {
|
|
4427
|
+
params.append("vad_events", "true");
|
|
4428
|
+
}
|
|
4429
|
+
return `${this.wsBaseUrl}?${params.toString()}`;
|
|
4430
|
+
}
|
|
4431
|
+
/**
|
|
4432
|
+
* Handle all WebSocket message types from Deepgram streaming
|
|
4433
|
+
*/
|
|
4434
|
+
handleWebSocketMessage(message, callbacks) {
|
|
4435
|
+
switch (message.type) {
|
|
4436
|
+
case "Results": {
|
|
4437
|
+
const channel = message.channel.alternatives[0];
|
|
4438
|
+
if (channel && channel.transcript) {
|
|
4439
|
+
callbacks?.onTranscript?.({
|
|
4440
|
+
type: "transcript",
|
|
4441
|
+
text: channel.transcript,
|
|
4442
|
+
isFinal: message.is_final,
|
|
4443
|
+
confidence: channel.confidence,
|
|
4444
|
+
language: message.channel.detected_language,
|
|
4445
|
+
words: channel.words?.map((w) => ({
|
|
4446
|
+
word: w.punctuated_word || w.word,
|
|
4447
|
+
start: w.start,
|
|
4448
|
+
end: w.end,
|
|
4449
|
+
confidence: w.confidence,
|
|
4450
|
+
speaker: w.speaker?.toString()
|
|
4451
|
+
})),
|
|
4452
|
+
data: message
|
|
4453
|
+
});
|
|
4454
|
+
}
|
|
4455
|
+
if (message.speech_final && channel && channel.transcript) {
|
|
4456
|
+
callbacks?.onUtterance?.({
|
|
4457
|
+
text: channel.transcript,
|
|
4458
|
+
start: message.start,
|
|
4459
|
+
end: message.start + message.duration,
|
|
4460
|
+
confidence: channel.confidence,
|
|
4461
|
+
words: channel.words?.map((w) => ({
|
|
4462
|
+
word: w.punctuated_word || w.word,
|
|
4463
|
+
start: w.start,
|
|
4464
|
+
end: w.end,
|
|
4465
|
+
confidence: w.confidence
|
|
4466
|
+
}))
|
|
4467
|
+
});
|
|
4468
|
+
}
|
|
4469
|
+
break;
|
|
4470
|
+
}
|
|
4471
|
+
case "SpeechStarted": {
|
|
4472
|
+
const event = {
|
|
4473
|
+
type: "speech_start",
|
|
4474
|
+
timestamp: message.timestamp,
|
|
4475
|
+
channel: message.channel[0]
|
|
4476
|
+
};
|
|
4477
|
+
callbacks?.onSpeechStart?.(event);
|
|
4478
|
+
break;
|
|
4479
|
+
}
|
|
4480
|
+
case "UtteranceEnd": {
|
|
4481
|
+
const event = {
|
|
4482
|
+
type: "speech_end",
|
|
4483
|
+
timestamp: message.last_word_end,
|
|
4484
|
+
channel: message.channel[0]
|
|
4485
|
+
};
|
|
4486
|
+
callbacks?.onSpeechEnd?.(event);
|
|
4487
|
+
break;
|
|
4488
|
+
}
|
|
4489
|
+
case "Metadata": {
|
|
4490
|
+
callbacks?.onMetadata?.(message);
|
|
4491
|
+
break;
|
|
4492
|
+
}
|
|
4493
|
+
case "Error": {
|
|
4494
|
+
callbacks?.onError?.({
|
|
4495
|
+
code: message.variant || "DEEPGRAM_ERROR",
|
|
4496
|
+
message: message.message || message.description || "Unknown error",
|
|
4497
|
+
details: message
|
|
4498
|
+
});
|
|
4499
|
+
break;
|
|
4500
|
+
}
|
|
4501
|
+
case "CloseStream": {
|
|
4502
|
+
break;
|
|
4503
|
+
}
|
|
4504
|
+
default: {
|
|
4505
|
+
callbacks?.onMetadata?.(message);
|
|
4506
|
+
break;
|
|
4507
|
+
}
|
|
4508
|
+
}
|
|
4509
|
+
}
|
|
3631
4510
|
};
|
|
3632
4511
|
function createDeepgramAdapter(config) {
|
|
3633
4512
|
const adapter = new DeepgramAdapter();
|
|
@@ -3881,12 +4760,12 @@ var AzureSTTAdapter = class extends BaseAdapter {
|
|
|
3881
4760
|
const recognizedPhrases = transcriptionData.recognizedPhrases || [];
|
|
3882
4761
|
const fullText = combinedPhrases.map((phrase) => phrase.display || phrase.lexical).join(" ") || "";
|
|
3883
4762
|
const words = recognizedPhrases.flatMap(
|
|
3884
|
-
(phrase) => (phrase.nBest?.[0]?.words || []).map((
|
|
3885
|
-
|
|
3886
|
-
start:
|
|
4763
|
+
(phrase) => (phrase.nBest?.[0]?.words || []).map((w) => ({
|
|
4764
|
+
word: w.word,
|
|
4765
|
+
start: w.offsetInTicks / 1e7,
|
|
3887
4766
|
// Convert ticks to seconds
|
|
3888
|
-
end: (
|
|
3889
|
-
confidence:
|
|
4767
|
+
end: (w.offsetInTicks + w.durationInTicks) / 1e7,
|
|
4768
|
+
confidence: w.confidence,
|
|
3890
4769
|
speaker: phrase.speaker !== void 0 ? phrase.speaker.toString() : void 0
|
|
3891
4770
|
}))
|
|
3892
4771
|
);
|
|
@@ -4167,10 +5046,10 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
|
|
|
4167
5046
|
}
|
|
4168
5047
|
if ("duration" in response && "language" in response) {
|
|
4169
5048
|
const verboseResponse = response;
|
|
4170
|
-
const words = verboseResponse.words?.map((
|
|
4171
|
-
|
|
4172
|
-
start:
|
|
4173
|
-
end:
|
|
5049
|
+
const words = verboseResponse.words?.map((w) => ({
|
|
5050
|
+
word: w.word,
|
|
5051
|
+
start: w.start,
|
|
5052
|
+
end: w.end,
|
|
4174
5053
|
confidence: void 0
|
|
4175
5054
|
}));
|
|
4176
5055
|
const requestId2 = `openai-${Date.now()}`;
|
|
@@ -4436,7 +5315,7 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
4436
5315
|
normalizeResponse(response) {
|
|
4437
5316
|
const text = response.results.filter((r) => r.type === "word" && r.alternatives).map((r) => r.alternatives[0]?.content || "").join(" ");
|
|
4438
5317
|
const words = response.results.filter((r) => r.type === "word" && r.start_time !== void 0 && r.end_time !== void 0).map((result) => ({
|
|
4439
|
-
|
|
5318
|
+
word: result.alternatives?.[0]?.content || "",
|
|
4440
5319
|
start: result.start_time,
|
|
4441
5320
|
end: result.end_time,
|
|
4442
5321
|
confidence: result.alternatives?.[0]?.confidence,
|
|
@@ -4587,12 +5466,12 @@ var GladiaWebhookHandler = class extends BaseWebhookHandler {
|
|
|
4587
5466
|
/**
|
|
4588
5467
|
* Convert Gladia WordDTO to unified Word type
|
|
4589
5468
|
*/
|
|
4590
|
-
mapWord(
|
|
5469
|
+
mapWord(w) {
|
|
4591
5470
|
return {
|
|
4592
|
-
|
|
4593
|
-
start:
|
|
4594
|
-
end:
|
|
4595
|
-
confidence:
|
|
5471
|
+
word: w.word,
|
|
5472
|
+
start: w.start,
|
|
5473
|
+
end: w.end,
|
|
5474
|
+
confidence: w.confidence
|
|
4596
5475
|
};
|
|
4597
5476
|
}
|
|
4598
5477
|
/**
|
|
@@ -4930,11 +5809,11 @@ var DeepgramWebhookHandler = class extends BaseWebhookHandler {
|
|
|
4930
5809
|
raw: payload
|
|
4931
5810
|
};
|
|
4932
5811
|
}
|
|
4933
|
-
const words = alternative.words && alternative.words.length > 0 ? alternative.words.map((
|
|
4934
|
-
|
|
4935
|
-
start:
|
|
4936
|
-
end:
|
|
4937
|
-
confidence:
|
|
5812
|
+
const words = alternative.words && alternative.words.length > 0 ? alternative.words.map((w) => ({
|
|
5813
|
+
word: w.word || "",
|
|
5814
|
+
start: w.start || 0,
|
|
5815
|
+
end: w.end || 0,
|
|
5816
|
+
confidence: w.confidence
|
|
4938
5817
|
})) : void 0;
|
|
4939
5818
|
const speakers = response.results.utterances && response.results.utterances.length > 0 ? response.results.utterances.map((utterance) => ({
|
|
4940
5819
|
id: utterance.speaker?.toString() || "unknown",
|
|
@@ -4948,11 +5827,11 @@ var DeepgramWebhookHandler = class extends BaseWebhookHandler {
|
|
|
4948
5827
|
end: utterance.end || 0,
|
|
4949
5828
|
speaker: utterance.speaker?.toString(),
|
|
4950
5829
|
confidence: utterance.confidence,
|
|
4951
|
-
words: utterance.words && utterance.words.length > 0 ? utterance.words.map((
|
|
4952
|
-
|
|
4953
|
-
start:
|
|
4954
|
-
end:
|
|
4955
|
-
confidence:
|
|
5830
|
+
words: utterance.words && utterance.words.length > 0 ? utterance.words.map((w) => ({
|
|
5831
|
+
word: w.word || "",
|
|
5832
|
+
start: w.start || 0,
|
|
5833
|
+
end: w.end || 0,
|
|
5834
|
+
confidence: w.confidence
|
|
4956
5835
|
})) : void 0
|
|
4957
5836
|
})) : void 0;
|
|
4958
5837
|
const summary = alternative.summaries?.[0]?.summary;
|
|
@@ -5464,6 +6343,9 @@ function createWebhookRouter() {
|
|
|
5464
6343
|
// Annotate the CommonJS export names for ESM import in node:
|
|
5465
6344
|
0 && (module.exports = {
|
|
5466
6345
|
AssemblyAIAdapter,
|
|
6346
|
+
AssemblyAIEncoding,
|
|
6347
|
+
AssemblyAISampleRate,
|
|
6348
|
+
AssemblyAISpeechModel,
|
|
5467
6349
|
AssemblyAITypes,
|
|
5468
6350
|
AssemblyAIWebhookHandler,
|
|
5469
6351
|
AzureSTTAdapter,
|
|
@@ -5471,8 +6353,18 @@ function createWebhookRouter() {
|
|
|
5471
6353
|
BaseAdapter,
|
|
5472
6354
|
BaseWebhookHandler,
|
|
5473
6355
|
DeepgramAdapter,
|
|
6356
|
+
DeepgramEncoding,
|
|
6357
|
+
DeepgramModel,
|
|
6358
|
+
DeepgramRedact,
|
|
6359
|
+
DeepgramTopicMode,
|
|
5474
6360
|
DeepgramWebhookHandler,
|
|
5475
6361
|
GladiaAdapter,
|
|
6362
|
+
GladiaBitDepth,
|
|
6363
|
+
GladiaEncoding,
|
|
6364
|
+
GladiaLanguage,
|
|
6365
|
+
GladiaModel,
|
|
6366
|
+
GladiaSampleRate,
|
|
6367
|
+
GladiaTranslationLanguage,
|
|
5476
6368
|
GladiaTypes,
|
|
5477
6369
|
GladiaWebhookHandler,
|
|
5478
6370
|
ListenV1EncodingParameter,
|