voice-router-dev 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +840 -117
- package/dist/index.d.ts +840 -117
- package/dist/index.js +1388 -496
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1375 -496
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -220,6 +220,312 @@ var ListenV1EncodingParameter = {
|
|
|
220
220
|
g729: "g729"
|
|
221
221
|
};
|
|
222
222
|
|
|
223
|
+
// src/generated/deepgram/schema/listenV1RedactParameterOneOfItem.ts
|
|
224
|
+
var ListenV1RedactParameterOneOfItem = {
|
|
225
|
+
pci: "pci",
|
|
226
|
+
pii: "pii",
|
|
227
|
+
numbers: "numbers"
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
// src/generated/deepgram/schema/sharedCustomTopicModeParameter.ts
|
|
231
|
+
var SharedCustomTopicModeParameter = {
|
|
232
|
+
extended: "extended",
|
|
233
|
+
strict: "strict"
|
|
234
|
+
};
|
|
235
|
+
|
|
236
|
+
// src/generated/gladia/schema/streamingSupportedEncodingEnum.ts
|
|
237
|
+
var StreamingSupportedEncodingEnum = {
|
|
238
|
+
"wav/pcm": "wav/pcm",
|
|
239
|
+
"wav/alaw": "wav/alaw",
|
|
240
|
+
"wav/ulaw": "wav/ulaw"
|
|
241
|
+
};
|
|
242
|
+
|
|
243
|
+
// src/generated/gladia/schema/streamingSupportedSampleRateEnum.ts
|
|
244
|
+
var StreamingSupportedSampleRateEnum = {
|
|
245
|
+
NUMBER_8000: 8e3,
|
|
246
|
+
NUMBER_16000: 16e3,
|
|
247
|
+
NUMBER_32000: 32e3,
|
|
248
|
+
NUMBER_44100: 44100,
|
|
249
|
+
NUMBER_48000: 48e3
|
|
250
|
+
};
|
|
251
|
+
|
|
252
|
+
// src/generated/gladia/schema/streamingSupportedBitDepthEnum.ts
|
|
253
|
+
var StreamingSupportedBitDepthEnum = {
|
|
254
|
+
NUMBER_8: 8,
|
|
255
|
+
NUMBER_16: 16,
|
|
256
|
+
NUMBER_24: 24,
|
|
257
|
+
NUMBER_32: 32
|
|
258
|
+
};
|
|
259
|
+
|
|
260
|
+
// src/generated/gladia/schema/streamingSupportedModels.ts
|
|
261
|
+
var StreamingSupportedModels = {
|
|
262
|
+
"solaria-1": "solaria-1"
|
|
263
|
+
};
|
|
264
|
+
|
|
265
|
+
// src/generated/gladia/schema/transcriptionLanguageCodeEnum.ts
|
|
266
|
+
var TranscriptionLanguageCodeEnum = {
|
|
267
|
+
af: "af",
|
|
268
|
+
am: "am",
|
|
269
|
+
ar: "ar",
|
|
270
|
+
as: "as",
|
|
271
|
+
az: "az",
|
|
272
|
+
ba: "ba",
|
|
273
|
+
be: "be",
|
|
274
|
+
bg: "bg",
|
|
275
|
+
bn: "bn",
|
|
276
|
+
bo: "bo",
|
|
277
|
+
br: "br",
|
|
278
|
+
bs: "bs",
|
|
279
|
+
ca: "ca",
|
|
280
|
+
cs: "cs",
|
|
281
|
+
cy: "cy",
|
|
282
|
+
da: "da",
|
|
283
|
+
de: "de",
|
|
284
|
+
el: "el",
|
|
285
|
+
en: "en",
|
|
286
|
+
es: "es",
|
|
287
|
+
et: "et",
|
|
288
|
+
eu: "eu",
|
|
289
|
+
fa: "fa",
|
|
290
|
+
fi: "fi",
|
|
291
|
+
fo: "fo",
|
|
292
|
+
fr: "fr",
|
|
293
|
+
gl: "gl",
|
|
294
|
+
gu: "gu",
|
|
295
|
+
ha: "ha",
|
|
296
|
+
haw: "haw",
|
|
297
|
+
he: "he",
|
|
298
|
+
hi: "hi",
|
|
299
|
+
hr: "hr",
|
|
300
|
+
ht: "ht",
|
|
301
|
+
hu: "hu",
|
|
302
|
+
hy: "hy",
|
|
303
|
+
id: "id",
|
|
304
|
+
is: "is",
|
|
305
|
+
it: "it",
|
|
306
|
+
ja: "ja",
|
|
307
|
+
jw: "jw",
|
|
308
|
+
ka: "ka",
|
|
309
|
+
kk: "kk",
|
|
310
|
+
km: "km",
|
|
311
|
+
kn: "kn",
|
|
312
|
+
ko: "ko",
|
|
313
|
+
la: "la",
|
|
314
|
+
lb: "lb",
|
|
315
|
+
ln: "ln",
|
|
316
|
+
lo: "lo",
|
|
317
|
+
lt: "lt",
|
|
318
|
+
lv: "lv",
|
|
319
|
+
mg: "mg",
|
|
320
|
+
mi: "mi",
|
|
321
|
+
mk: "mk",
|
|
322
|
+
ml: "ml",
|
|
323
|
+
mn: "mn",
|
|
324
|
+
mr: "mr",
|
|
325
|
+
ms: "ms",
|
|
326
|
+
mt: "mt",
|
|
327
|
+
my: "my",
|
|
328
|
+
ne: "ne",
|
|
329
|
+
nl: "nl",
|
|
330
|
+
nn: "nn",
|
|
331
|
+
no: "no",
|
|
332
|
+
oc: "oc",
|
|
333
|
+
pa: "pa",
|
|
334
|
+
pl: "pl",
|
|
335
|
+
ps: "ps",
|
|
336
|
+
pt: "pt",
|
|
337
|
+
ro: "ro",
|
|
338
|
+
ru: "ru",
|
|
339
|
+
sa: "sa",
|
|
340
|
+
sd: "sd",
|
|
341
|
+
si: "si",
|
|
342
|
+
sk: "sk",
|
|
343
|
+
sl: "sl",
|
|
344
|
+
sn: "sn",
|
|
345
|
+
so: "so",
|
|
346
|
+
sq: "sq",
|
|
347
|
+
sr: "sr",
|
|
348
|
+
su: "su",
|
|
349
|
+
sv: "sv",
|
|
350
|
+
sw: "sw",
|
|
351
|
+
ta: "ta",
|
|
352
|
+
te: "te",
|
|
353
|
+
tg: "tg",
|
|
354
|
+
th: "th",
|
|
355
|
+
tk: "tk",
|
|
356
|
+
tl: "tl",
|
|
357
|
+
tr: "tr",
|
|
358
|
+
tt: "tt",
|
|
359
|
+
uk: "uk",
|
|
360
|
+
ur: "ur",
|
|
361
|
+
uz: "uz",
|
|
362
|
+
vi: "vi",
|
|
363
|
+
yi: "yi",
|
|
364
|
+
yo: "yo",
|
|
365
|
+
zh: "zh"
|
|
366
|
+
};
|
|
367
|
+
|
|
368
|
+
// src/generated/gladia/schema/translationLanguageCodeEnum.ts
|
|
369
|
+
var TranslationLanguageCodeEnum = {
|
|
370
|
+
af: "af",
|
|
371
|
+
am: "am",
|
|
372
|
+
ar: "ar",
|
|
373
|
+
as: "as",
|
|
374
|
+
az: "az",
|
|
375
|
+
ba: "ba",
|
|
376
|
+
be: "be",
|
|
377
|
+
bg: "bg",
|
|
378
|
+
bn: "bn",
|
|
379
|
+
bo: "bo",
|
|
380
|
+
br: "br",
|
|
381
|
+
bs: "bs",
|
|
382
|
+
ca: "ca",
|
|
383
|
+
cs: "cs",
|
|
384
|
+
cy: "cy",
|
|
385
|
+
da: "da",
|
|
386
|
+
de: "de",
|
|
387
|
+
el: "el",
|
|
388
|
+
en: "en",
|
|
389
|
+
es: "es",
|
|
390
|
+
et: "et",
|
|
391
|
+
eu: "eu",
|
|
392
|
+
fa: "fa",
|
|
393
|
+
fi: "fi",
|
|
394
|
+
fo: "fo",
|
|
395
|
+
fr: "fr",
|
|
396
|
+
gl: "gl",
|
|
397
|
+
gu: "gu",
|
|
398
|
+
ha: "ha",
|
|
399
|
+
haw: "haw",
|
|
400
|
+
he: "he",
|
|
401
|
+
hi: "hi",
|
|
402
|
+
hr: "hr",
|
|
403
|
+
ht: "ht",
|
|
404
|
+
hu: "hu",
|
|
405
|
+
hy: "hy",
|
|
406
|
+
id: "id",
|
|
407
|
+
is: "is",
|
|
408
|
+
it: "it",
|
|
409
|
+
ja: "ja",
|
|
410
|
+
jw: "jw",
|
|
411
|
+
ka: "ka",
|
|
412
|
+
kk: "kk",
|
|
413
|
+
km: "km",
|
|
414
|
+
kn: "kn",
|
|
415
|
+
ko: "ko",
|
|
416
|
+
la: "la",
|
|
417
|
+
lb: "lb",
|
|
418
|
+
ln: "ln",
|
|
419
|
+
lo: "lo",
|
|
420
|
+
lt: "lt",
|
|
421
|
+
lv: "lv",
|
|
422
|
+
mg: "mg",
|
|
423
|
+
mi: "mi",
|
|
424
|
+
mk: "mk",
|
|
425
|
+
ml: "ml",
|
|
426
|
+
mn: "mn",
|
|
427
|
+
mr: "mr",
|
|
428
|
+
ms: "ms",
|
|
429
|
+
mt: "mt",
|
|
430
|
+
my: "my",
|
|
431
|
+
ne: "ne",
|
|
432
|
+
nl: "nl",
|
|
433
|
+
nn: "nn",
|
|
434
|
+
no: "no",
|
|
435
|
+
oc: "oc",
|
|
436
|
+
pa: "pa",
|
|
437
|
+
pl: "pl",
|
|
438
|
+
ps: "ps",
|
|
439
|
+
pt: "pt",
|
|
440
|
+
ro: "ro",
|
|
441
|
+
ru: "ru",
|
|
442
|
+
sa: "sa",
|
|
443
|
+
sd: "sd",
|
|
444
|
+
si: "si",
|
|
445
|
+
sk: "sk",
|
|
446
|
+
sl: "sl",
|
|
447
|
+
sn: "sn",
|
|
448
|
+
so: "so",
|
|
449
|
+
sq: "sq",
|
|
450
|
+
sr: "sr",
|
|
451
|
+
su: "su",
|
|
452
|
+
sv: "sv",
|
|
453
|
+
sw: "sw",
|
|
454
|
+
ta: "ta",
|
|
455
|
+
te: "te",
|
|
456
|
+
tg: "tg",
|
|
457
|
+
th: "th",
|
|
458
|
+
tk: "tk",
|
|
459
|
+
tl: "tl",
|
|
460
|
+
tr: "tr",
|
|
461
|
+
tt: "tt",
|
|
462
|
+
uk: "uk",
|
|
463
|
+
ur: "ur",
|
|
464
|
+
uz: "uz",
|
|
465
|
+
vi: "vi",
|
|
466
|
+
wo: "wo",
|
|
467
|
+
yi: "yi",
|
|
468
|
+
yo: "yo",
|
|
469
|
+
zh: "zh"
|
|
470
|
+
};
|
|
471
|
+
|
|
472
|
+
// src/router/streaming-enums.ts
|
|
473
|
+
var DeepgramModel = {
|
|
474
|
+
// Nova 3 models (latest)
|
|
475
|
+
"nova-3": "nova-3",
|
|
476
|
+
"nova-3-general": "nova-3-general",
|
|
477
|
+
"nova-3-medical": "nova-3-medical",
|
|
478
|
+
// Nova 2 models
|
|
479
|
+
"nova-2": "nova-2",
|
|
480
|
+
"nova-2-general": "nova-2-general",
|
|
481
|
+
"nova-2-meeting": "nova-2-meeting",
|
|
482
|
+
"nova-2-finance": "nova-2-finance",
|
|
483
|
+
"nova-2-conversationalai": "nova-2-conversationalai",
|
|
484
|
+
"nova-2-voicemail": "nova-2-voicemail",
|
|
485
|
+
"nova-2-video": "nova-2-video",
|
|
486
|
+
"nova-2-medical": "nova-2-medical",
|
|
487
|
+
"nova-2-drivethru": "nova-2-drivethru",
|
|
488
|
+
"nova-2-automotive": "nova-2-automotive",
|
|
489
|
+
// Nova 1 models
|
|
490
|
+
nova: "nova",
|
|
491
|
+
"nova-general": "nova-general",
|
|
492
|
+
"nova-phonecall": "nova-phonecall",
|
|
493
|
+
"nova-medical": "nova-medical",
|
|
494
|
+
// Enhanced models
|
|
495
|
+
enhanced: "enhanced",
|
|
496
|
+
"enhanced-general": "enhanced-general",
|
|
497
|
+
"enhanced-meeting": "enhanced-meeting",
|
|
498
|
+
"enhanced-phonecall": "enhanced-phonecall",
|
|
499
|
+
"enhanced-finance": "enhanced-finance",
|
|
500
|
+
// Base models
|
|
501
|
+
base: "base",
|
|
502
|
+
meeting: "meeting",
|
|
503
|
+
phonecall: "phonecall",
|
|
504
|
+
finance: "finance",
|
|
505
|
+
conversationalai: "conversationalai",
|
|
506
|
+
voicemail: "voicemail",
|
|
507
|
+
video: "video"
|
|
508
|
+
};
|
|
509
|
+
var AssemblyAIEncoding = {
|
|
510
|
+
/** PCM signed 16-bit little-endian (recommended) */
|
|
511
|
+
pcmS16le: "pcm_s16le",
|
|
512
|
+
/** μ-law (telephony) */
|
|
513
|
+
pcmMulaw: "pcm_mulaw"
|
|
514
|
+
};
|
|
515
|
+
var AssemblyAISpeechModel = {
|
|
516
|
+
/** Optimized for English */
|
|
517
|
+
english: "universal-streaming-english",
|
|
518
|
+
/** Supports 20+ languages */
|
|
519
|
+
multilingual: "universal-streaming-multilingual"
|
|
520
|
+
};
|
|
521
|
+
var AssemblyAISampleRate = {
|
|
522
|
+
rate8000: 8e3,
|
|
523
|
+
rate16000: 16e3,
|
|
524
|
+
rate22050: 22050,
|
|
525
|
+
rate44100: 44100,
|
|
526
|
+
rate48000: 48e3
|
|
527
|
+
};
|
|
528
|
+
|
|
223
529
|
// src/generated/deepgram/schema/speakV1EncodingParameter.ts
|
|
224
530
|
var SpeakV1EncodingParameter = {
|
|
225
531
|
linear16: "linear16",
|
|
@@ -249,30 +555,6 @@ var SpeakV1SampleRateParameter = {
|
|
|
249
555
|
NUMBER_22050: 22050
|
|
250
556
|
};
|
|
251
557
|
|
|
252
|
-
// src/generated/gladia/schema/streamingSupportedEncodingEnum.ts
|
|
253
|
-
var StreamingSupportedEncodingEnum = {
|
|
254
|
-
"wav/pcm": "wav/pcm",
|
|
255
|
-
"wav/alaw": "wav/alaw",
|
|
256
|
-
"wav/ulaw": "wav/ulaw"
|
|
257
|
-
};
|
|
258
|
-
|
|
259
|
-
// src/generated/gladia/schema/streamingSupportedSampleRateEnum.ts
|
|
260
|
-
var StreamingSupportedSampleRateEnum = {
|
|
261
|
-
NUMBER_8000: 8e3,
|
|
262
|
-
NUMBER_16000: 16e3,
|
|
263
|
-
NUMBER_32000: 32e3,
|
|
264
|
-
NUMBER_44100: 44100,
|
|
265
|
-
NUMBER_48000: 48e3
|
|
266
|
-
};
|
|
267
|
-
|
|
268
|
-
// src/generated/gladia/schema/streamingSupportedBitDepthEnum.ts
|
|
269
|
-
var StreamingSupportedBitDepthEnum = {
|
|
270
|
-
NUMBER_8: 8,
|
|
271
|
-
NUMBER_16: 16,
|
|
272
|
-
NUMBER_24: 24,
|
|
273
|
-
NUMBER_32: 32
|
|
274
|
-
};
|
|
275
|
-
|
|
276
558
|
// src/constants/defaults.ts
|
|
277
559
|
var DEFAULT_TIMEOUTS = {
|
|
278
560
|
/** Standard HTTP request timeout for API calls (60 seconds) */
|
|
@@ -1160,11 +1442,6 @@ var StreamingResponseStatus = {
|
|
|
1160
1442
|
error: "error"
|
|
1161
1443
|
};
|
|
1162
1444
|
|
|
1163
|
-
// src/generated/gladia/schema/streamingSupportedModels.ts
|
|
1164
|
-
var StreamingSupportedModels = {
|
|
1165
|
-
"solaria-1": "solaria-1"
|
|
1166
|
-
};
|
|
1167
|
-
|
|
1168
1445
|
// src/generated/gladia/schema/streamingSupportedRegions.ts
|
|
1169
1446
|
var StreamingSupportedRegions = {
|
|
1170
1447
|
"us-west": "us-west",
|
|
@@ -1190,232 +1467,25 @@ var SummaryTypesEnum = {
|
|
|
1190
1467
|
concise: "concise"
|
|
1191
1468
|
};
|
|
1192
1469
|
|
|
1193
|
-
// src/generated/gladia/schema/transcriptionControllerListV2KindItem.ts
|
|
1194
|
-
var TranscriptionControllerListV2KindItem = {
|
|
1195
|
-
"pre-recorded": "pre-recorded",
|
|
1196
|
-
live: "live"
|
|
1197
|
-
};
|
|
1198
|
-
|
|
1199
|
-
// src/generated/gladia/schema/transcriptionControllerListV2StatusItem.ts
|
|
1200
|
-
var TranscriptionControllerListV2StatusItem = {
|
|
1201
|
-
queued: "queued",
|
|
1202
|
-
processing: "processing",
|
|
1203
|
-
done: "done",
|
|
1204
|
-
error: "error"
|
|
1205
|
-
};
|
|
1206
|
-
|
|
1207
|
-
// src/generated/gladia/schema/transcriptionLanguageCodeEnum.ts
|
|
1208
|
-
var TranscriptionLanguageCodeEnum = {
|
|
1209
|
-
af: "af",
|
|
1210
|
-
am: "am",
|
|
1211
|
-
ar: "ar",
|
|
1212
|
-
as: "as",
|
|
1213
|
-
az: "az",
|
|
1214
|
-
ba: "ba",
|
|
1215
|
-
be: "be",
|
|
1216
|
-
bg: "bg",
|
|
1217
|
-
bn: "bn",
|
|
1218
|
-
bo: "bo",
|
|
1219
|
-
br: "br",
|
|
1220
|
-
bs: "bs",
|
|
1221
|
-
ca: "ca",
|
|
1222
|
-
cs: "cs",
|
|
1223
|
-
cy: "cy",
|
|
1224
|
-
da: "da",
|
|
1225
|
-
de: "de",
|
|
1226
|
-
el: "el",
|
|
1227
|
-
en: "en",
|
|
1228
|
-
es: "es",
|
|
1229
|
-
et: "et",
|
|
1230
|
-
eu: "eu",
|
|
1231
|
-
fa: "fa",
|
|
1232
|
-
fi: "fi",
|
|
1233
|
-
fo: "fo",
|
|
1234
|
-
fr: "fr",
|
|
1235
|
-
gl: "gl",
|
|
1236
|
-
gu: "gu",
|
|
1237
|
-
ha: "ha",
|
|
1238
|
-
haw: "haw",
|
|
1239
|
-
he: "he",
|
|
1240
|
-
hi: "hi",
|
|
1241
|
-
hr: "hr",
|
|
1242
|
-
ht: "ht",
|
|
1243
|
-
hu: "hu",
|
|
1244
|
-
hy: "hy",
|
|
1245
|
-
id: "id",
|
|
1246
|
-
is: "is",
|
|
1247
|
-
it: "it",
|
|
1248
|
-
ja: "ja",
|
|
1249
|
-
jw: "jw",
|
|
1250
|
-
ka: "ka",
|
|
1251
|
-
kk: "kk",
|
|
1252
|
-
km: "km",
|
|
1253
|
-
kn: "kn",
|
|
1254
|
-
ko: "ko",
|
|
1255
|
-
la: "la",
|
|
1256
|
-
lb: "lb",
|
|
1257
|
-
ln: "ln",
|
|
1258
|
-
lo: "lo",
|
|
1259
|
-
lt: "lt",
|
|
1260
|
-
lv: "lv",
|
|
1261
|
-
mg: "mg",
|
|
1262
|
-
mi: "mi",
|
|
1263
|
-
mk: "mk",
|
|
1264
|
-
ml: "ml",
|
|
1265
|
-
mn: "mn",
|
|
1266
|
-
mr: "mr",
|
|
1267
|
-
ms: "ms",
|
|
1268
|
-
mt: "mt",
|
|
1269
|
-
my: "my",
|
|
1270
|
-
ne: "ne",
|
|
1271
|
-
nl: "nl",
|
|
1272
|
-
nn: "nn",
|
|
1273
|
-
no: "no",
|
|
1274
|
-
oc: "oc",
|
|
1275
|
-
pa: "pa",
|
|
1276
|
-
pl: "pl",
|
|
1277
|
-
ps: "ps",
|
|
1278
|
-
pt: "pt",
|
|
1279
|
-
ro: "ro",
|
|
1280
|
-
ru: "ru",
|
|
1281
|
-
sa: "sa",
|
|
1282
|
-
sd: "sd",
|
|
1283
|
-
si: "si",
|
|
1284
|
-
sk: "sk",
|
|
1285
|
-
sl: "sl",
|
|
1286
|
-
sn: "sn",
|
|
1287
|
-
so: "so",
|
|
1288
|
-
sq: "sq",
|
|
1289
|
-
sr: "sr",
|
|
1290
|
-
su: "su",
|
|
1291
|
-
sv: "sv",
|
|
1292
|
-
sw: "sw",
|
|
1293
|
-
ta: "ta",
|
|
1294
|
-
te: "te",
|
|
1295
|
-
tg: "tg",
|
|
1296
|
-
th: "th",
|
|
1297
|
-
tk: "tk",
|
|
1298
|
-
tl: "tl",
|
|
1299
|
-
tr: "tr",
|
|
1300
|
-
tt: "tt",
|
|
1301
|
-
uk: "uk",
|
|
1302
|
-
ur: "ur",
|
|
1303
|
-
uz: "uz",
|
|
1304
|
-
vi: "vi",
|
|
1305
|
-
yi: "yi",
|
|
1306
|
-
yo: "yo",
|
|
1307
|
-
zh: "zh"
|
|
1308
|
-
};
|
|
1309
|
-
|
|
1470
|
+
// src/generated/gladia/schema/transcriptionControllerListV2KindItem.ts
|
|
1471
|
+
var TranscriptionControllerListV2KindItem = {
|
|
1472
|
+
"pre-recorded": "pre-recorded",
|
|
1473
|
+
live: "live"
|
|
1474
|
+
};
|
|
1475
|
+
|
|
1476
|
+
// src/generated/gladia/schema/transcriptionControllerListV2StatusItem.ts
|
|
1477
|
+
var TranscriptionControllerListV2StatusItem = {
|
|
1478
|
+
queued: "queued",
|
|
1479
|
+
processing: "processing",
|
|
1480
|
+
done: "done",
|
|
1481
|
+
error: "error"
|
|
1482
|
+
};
|
|
1483
|
+
|
|
1310
1484
|
// src/generated/gladia/schema/transcriptMessageType.ts
|
|
1311
1485
|
var TranscriptMessageType = {
|
|
1312
1486
|
transcript: "transcript"
|
|
1313
1487
|
};
|
|
1314
1488
|
|
|
1315
|
-
// src/generated/gladia/schema/translationLanguageCodeEnum.ts
|
|
1316
|
-
var TranslationLanguageCodeEnum = {
|
|
1317
|
-
af: "af",
|
|
1318
|
-
am: "am",
|
|
1319
|
-
ar: "ar",
|
|
1320
|
-
as: "as",
|
|
1321
|
-
az: "az",
|
|
1322
|
-
ba: "ba",
|
|
1323
|
-
be: "be",
|
|
1324
|
-
bg: "bg",
|
|
1325
|
-
bn: "bn",
|
|
1326
|
-
bo: "bo",
|
|
1327
|
-
br: "br",
|
|
1328
|
-
bs: "bs",
|
|
1329
|
-
ca: "ca",
|
|
1330
|
-
cs: "cs",
|
|
1331
|
-
cy: "cy",
|
|
1332
|
-
da: "da",
|
|
1333
|
-
de: "de",
|
|
1334
|
-
el: "el",
|
|
1335
|
-
en: "en",
|
|
1336
|
-
es: "es",
|
|
1337
|
-
et: "et",
|
|
1338
|
-
eu: "eu",
|
|
1339
|
-
fa: "fa",
|
|
1340
|
-
fi: "fi",
|
|
1341
|
-
fo: "fo",
|
|
1342
|
-
fr: "fr",
|
|
1343
|
-
gl: "gl",
|
|
1344
|
-
gu: "gu",
|
|
1345
|
-
ha: "ha",
|
|
1346
|
-
haw: "haw",
|
|
1347
|
-
he: "he",
|
|
1348
|
-
hi: "hi",
|
|
1349
|
-
hr: "hr",
|
|
1350
|
-
ht: "ht",
|
|
1351
|
-
hu: "hu",
|
|
1352
|
-
hy: "hy",
|
|
1353
|
-
id: "id",
|
|
1354
|
-
is: "is",
|
|
1355
|
-
it: "it",
|
|
1356
|
-
ja: "ja",
|
|
1357
|
-
jw: "jw",
|
|
1358
|
-
ka: "ka",
|
|
1359
|
-
kk: "kk",
|
|
1360
|
-
km: "km",
|
|
1361
|
-
kn: "kn",
|
|
1362
|
-
ko: "ko",
|
|
1363
|
-
la: "la",
|
|
1364
|
-
lb: "lb",
|
|
1365
|
-
ln: "ln",
|
|
1366
|
-
lo: "lo",
|
|
1367
|
-
lt: "lt",
|
|
1368
|
-
lv: "lv",
|
|
1369
|
-
mg: "mg",
|
|
1370
|
-
mi: "mi",
|
|
1371
|
-
mk: "mk",
|
|
1372
|
-
ml: "ml",
|
|
1373
|
-
mn: "mn",
|
|
1374
|
-
mr: "mr",
|
|
1375
|
-
ms: "ms",
|
|
1376
|
-
mt: "mt",
|
|
1377
|
-
my: "my",
|
|
1378
|
-
ne: "ne",
|
|
1379
|
-
nl: "nl",
|
|
1380
|
-
nn: "nn",
|
|
1381
|
-
no: "no",
|
|
1382
|
-
oc: "oc",
|
|
1383
|
-
pa: "pa",
|
|
1384
|
-
pl: "pl",
|
|
1385
|
-
ps: "ps",
|
|
1386
|
-
pt: "pt",
|
|
1387
|
-
ro: "ro",
|
|
1388
|
-
ru: "ru",
|
|
1389
|
-
sa: "sa",
|
|
1390
|
-
sd: "sd",
|
|
1391
|
-
si: "si",
|
|
1392
|
-
sk: "sk",
|
|
1393
|
-
sl: "sl",
|
|
1394
|
-
sn: "sn",
|
|
1395
|
-
so: "so",
|
|
1396
|
-
sq: "sq",
|
|
1397
|
-
sr: "sr",
|
|
1398
|
-
su: "su",
|
|
1399
|
-
sv: "sv",
|
|
1400
|
-
sw: "sw",
|
|
1401
|
-
ta: "ta",
|
|
1402
|
-
te: "te",
|
|
1403
|
-
tg: "tg",
|
|
1404
|
-
th: "th",
|
|
1405
|
-
tk: "tk",
|
|
1406
|
-
tl: "tl",
|
|
1407
|
-
tr: "tr",
|
|
1408
|
-
tt: "tt",
|
|
1409
|
-
uk: "uk",
|
|
1410
|
-
ur: "ur",
|
|
1411
|
-
uz: "uz",
|
|
1412
|
-
vi: "vi",
|
|
1413
|
-
wo: "wo",
|
|
1414
|
-
yi: "yi",
|
|
1415
|
-
yo: "yo",
|
|
1416
|
-
zh: "zh"
|
|
1417
|
-
};
|
|
1418
|
-
|
|
1419
1489
|
// src/generated/gladia/schema/translationMessageType.ts
|
|
1420
1490
|
var TranslationMessageType = {
|
|
1421
1491
|
translation: "translation"
|
|
@@ -1983,7 +2053,7 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
1983
2053
|
}))
|
|
1984
2054
|
);
|
|
1985
2055
|
return extractWords(allWords, (item) => ({
|
|
1986
|
-
|
|
2056
|
+
word: item.word.word,
|
|
1987
2057
|
start: item.word.start,
|
|
1988
2058
|
end: item.word.end,
|
|
1989
2059
|
confidence: item.word.confidence,
|
|
@@ -2003,11 +2073,11 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
2003
2073
|
end: utterance.end,
|
|
2004
2074
|
speaker: utterance.speaker?.toString(),
|
|
2005
2075
|
confidence: utterance.confidence,
|
|
2006
|
-
words: utterance.words.map((
|
|
2007
|
-
|
|
2008
|
-
start:
|
|
2009
|
-
end:
|
|
2010
|
-
confidence:
|
|
2076
|
+
words: utterance.words.map((w) => ({
|
|
2077
|
+
word: w.word,
|
|
2078
|
+
start: w.start,
|
|
2079
|
+
end: w.end,
|
|
2080
|
+
confidence: w.confidence
|
|
2011
2081
|
}))
|
|
2012
2082
|
}));
|
|
2013
2083
|
}
|
|
@@ -2059,11 +2129,46 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
2059
2129
|
* Creates a WebSocket connection to Gladia for streaming transcription.
|
|
2060
2130
|
* First initializes a session via REST API, then connects to WebSocket.
|
|
2061
2131
|
*
|
|
2132
|
+
* Supports all Gladia streaming features:
|
|
2133
|
+
* - Real-time transcription with interim/final results
|
|
2134
|
+
* - Speech detection events (speech_start, speech_end)
|
|
2135
|
+
* - Real-time translation to other languages
|
|
2136
|
+
* - Real-time sentiment analysis
|
|
2137
|
+
* - Real-time named entity recognition
|
|
2138
|
+
* - Post-processing summarization and chapterization
|
|
2139
|
+
* - Audio preprocessing (audio enhancement, speech threshold)
|
|
2140
|
+
* - Custom vocabulary and spelling
|
|
2141
|
+
* - Multi-language code switching
|
|
2142
|
+
*
|
|
2062
2143
|
* @param options - Streaming configuration options
|
|
2144
|
+
* @param options.encoding - Audio encoding (wav/pcm, wav/alaw, wav/ulaw)
|
|
2145
|
+
* @param options.sampleRate - Sample rate (8000, 16000, 32000, 44100, 48000)
|
|
2146
|
+
* @param options.bitDepth - Bit depth (8, 16, 24, 32)
|
|
2147
|
+
* @param options.channels - Number of channels (1-8)
|
|
2148
|
+
* @param options.language - Language code for transcription
|
|
2149
|
+
* @param options.interimResults - Enable partial/interim transcripts
|
|
2150
|
+
* @param options.endpointing - Silence duration to end utterance (0.01-10 seconds)
|
|
2151
|
+
* @param options.maxSilence - Max duration without endpointing (5-60 seconds)
|
|
2152
|
+
* @param options.customVocabulary - Words to boost in recognition
|
|
2153
|
+
* @param options.sentimentAnalysis - Enable real-time sentiment analysis
|
|
2154
|
+
* @param options.entityDetection - Enable named entity recognition
|
|
2155
|
+
* @param options.summarization - Enable post-processing summarization
|
|
2156
|
+
* @param options.gladiaStreaming - Full Gladia streaming options (pre_processing, realtime_processing, post_processing, messages_config)
|
|
2063
2157
|
* @param callbacks - Event callbacks for transcription results
|
|
2158
|
+
* @param callbacks.onTranscript - Interim/final transcript received
|
|
2159
|
+
* @param callbacks.onUtterance - Complete utterance detected
|
|
2160
|
+
* @param callbacks.onSpeechStart - Speech detected (requires messages_config.receive_speech_events)
|
|
2161
|
+
* @param callbacks.onSpeechEnd - Speech ended (requires messages_config.receive_speech_events)
|
|
2162
|
+
* @param callbacks.onTranslation - Translation result (requires translation enabled)
|
|
2163
|
+
* @param callbacks.onSentiment - Sentiment analysis result
|
|
2164
|
+
* @param callbacks.onEntity - Named entity detected
|
|
2165
|
+
* @param callbacks.onSummarization - Summarization completed
|
|
2166
|
+
* @param callbacks.onChapterization - Chapterization completed
|
|
2167
|
+
* @param callbacks.onAudioAck - Audio chunk acknowledged
|
|
2168
|
+
* @param callbacks.onLifecycle - Session lifecycle events
|
|
2064
2169
|
* @returns Promise that resolves with a StreamingSession
|
|
2065
2170
|
*
|
|
2066
|
-
* @example
|
|
2171
|
+
* @example Basic real-time streaming
|
|
2067
2172
|
* ```typescript
|
|
2068
2173
|
* const session = await adapter.transcribeStream({
|
|
2069
2174
|
* encoding: 'wav/pcm',
|
|
@@ -2085,15 +2190,124 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
2085
2190
|
* });
|
|
2086
2191
|
*
|
|
2087
2192
|
* // Send audio chunks
|
|
2088
|
-
* const audioChunk = getAudioChunk();
|
|
2193
|
+
* const audioChunk = getAudioChunk();
|
|
2089
2194
|
* await session.sendAudio({ data: audioChunk });
|
|
2090
2195
|
*
|
|
2091
2196
|
* // Close when done
|
|
2092
2197
|
* await session.close();
|
|
2093
2198
|
* ```
|
|
2199
|
+
*
|
|
2200
|
+
* @example Advanced streaming with all features
|
|
2201
|
+
* ```typescript
|
|
2202
|
+
* const session = await adapter.transcribeStream({
|
|
2203
|
+
* encoding: 'wav/pcm',
|
|
2204
|
+
* sampleRate: 16000,
|
|
2205
|
+
* language: 'en',
|
|
2206
|
+
* sentimentAnalysis: true,
|
|
2207
|
+
* entityDetection: true,
|
|
2208
|
+
* summarization: true,
|
|
2209
|
+
* gladiaStreaming: {
|
|
2210
|
+
* pre_processing: {
|
|
2211
|
+
* audio_enhancer: true,
|
|
2212
|
+
* speech_threshold: 0.5
|
|
2213
|
+
* },
|
|
2214
|
+
* realtime_processing: {
|
|
2215
|
+
* translation: true,
|
|
2216
|
+
* translation_config: { target_languages: ['fr', 'es'] }
|
|
2217
|
+
* },
|
|
2218
|
+
* post_processing: {
|
|
2219
|
+
* chapterization: true
|
|
2220
|
+
* },
|
|
2221
|
+
* messages_config: {
|
|
2222
|
+
* receive_speech_events: true,
|
|
2223
|
+
* receive_acknowledgments: true,
|
|
2224
|
+
* receive_lifecycle_events: true
|
|
2225
|
+
* }
|
|
2226
|
+
* }
|
|
2227
|
+
* }, {
|
|
2228
|
+
* onTranscript: (e) => console.log('Transcript:', e.text),
|
|
2229
|
+
* onSpeechStart: (e) => console.log('Speech started at:', e.timestamp),
|
|
2230
|
+
* onSpeechEnd: (e) => console.log('Speech ended at:', e.timestamp),
|
|
2231
|
+
* onTranslation: (e) => console.log(`${e.targetLanguage}: ${e.translatedText}`),
|
|
2232
|
+
* onSentiment: (e) => console.log('Sentiment:', e.sentiment),
|
|
2233
|
+
* onEntity: (e) => console.log(`Entity: ${e.type} - ${e.text}`),
|
|
2234
|
+
* onSummarization: (e) => console.log('Summary:', e.summary),
|
|
2235
|
+
* onChapterization: (e) => console.log('Chapters:', e.chapters),
|
|
2236
|
+
* onAudioAck: (e) => console.log('Audio ack:', e.byteRange),
|
|
2237
|
+
* onLifecycle: (e) => console.log('Lifecycle:', e.eventType)
|
|
2238
|
+
* });
|
|
2239
|
+
* ```
|
|
2094
2240
|
*/
|
|
2095
2241
|
async transcribeStream(options, callbacks) {
|
|
2096
2242
|
this.validateConfig();
|
|
2243
|
+
const streamingRequest = this.buildStreamingRequest(options);
|
|
2244
|
+
const initResponse = await streamingControllerInitStreamingSessionV2(
|
|
2245
|
+
streamingRequest,
|
|
2246
|
+
void 0,
|
|
2247
|
+
// no params
|
|
2248
|
+
this.getAxiosConfig()
|
|
2249
|
+
);
|
|
2250
|
+
const { id, url: wsUrl } = initResponse.data;
|
|
2251
|
+
const ws = new WebSocket(wsUrl);
|
|
2252
|
+
let sessionStatus = "connecting";
|
|
2253
|
+
setupWebSocketHandlers(ws, callbacks, (status) => {
|
|
2254
|
+
sessionStatus = status;
|
|
2255
|
+
});
|
|
2256
|
+
ws.on("message", (data) => {
|
|
2257
|
+
try {
|
|
2258
|
+
const message = JSON.parse(data.toString());
|
|
2259
|
+
this.handleWebSocketMessage(message, callbacks);
|
|
2260
|
+
} catch (error) {
|
|
2261
|
+
callbacks?.onError?.({
|
|
2262
|
+
code: ERROR_CODES.PARSE_ERROR,
|
|
2263
|
+
message: "Failed to parse WebSocket message",
|
|
2264
|
+
details: error
|
|
2265
|
+
});
|
|
2266
|
+
}
|
|
2267
|
+
});
|
|
2268
|
+
await waitForWebSocketOpen(ws);
|
|
2269
|
+
return {
|
|
2270
|
+
id,
|
|
2271
|
+
provider: this.name,
|
|
2272
|
+
createdAt: /* @__PURE__ */ new Date(),
|
|
2273
|
+
getStatus: () => sessionStatus,
|
|
2274
|
+
sendAudio: async (chunk) => {
|
|
2275
|
+
validateSessionForAudio(sessionStatus, ws.readyState, WebSocket.OPEN);
|
|
2276
|
+
ws.send(chunk.data);
|
|
2277
|
+
if (chunk.isLast) {
|
|
2278
|
+
ws.send(
|
|
2279
|
+
JSON.stringify({
|
|
2280
|
+
type: "stop_recording"
|
|
2281
|
+
})
|
|
2282
|
+
);
|
|
2283
|
+
}
|
|
2284
|
+
},
|
|
2285
|
+
close: async () => {
|
|
2286
|
+
if (sessionStatus === "closed" || sessionStatus === "closing") {
|
|
2287
|
+
return;
|
|
2288
|
+
}
|
|
2289
|
+
sessionStatus = "closing";
|
|
2290
|
+
if (ws.readyState === WebSocket.OPEN) {
|
|
2291
|
+
ws.send(
|
|
2292
|
+
JSON.stringify({
|
|
2293
|
+
type: "stop_recording"
|
|
2294
|
+
})
|
|
2295
|
+
);
|
|
2296
|
+
}
|
|
2297
|
+
await closeWebSocket(ws);
|
|
2298
|
+
sessionStatus = "closed";
|
|
2299
|
+
}
|
|
2300
|
+
};
|
|
2301
|
+
}
|
|
2302
|
+
/**
|
|
2303
|
+
* Build streaming request with full type safety from OpenAPI specs
|
|
2304
|
+
*
|
|
2305
|
+
* Maps normalized options to Gladia streaming request format,
|
|
2306
|
+
* including all advanced features like pre-processing, real-time
|
|
2307
|
+
* processing, post-processing, and message configuration.
|
|
2308
|
+
*/
|
|
2309
|
+
buildStreamingRequest(options) {
|
|
2310
|
+
const gladiaOpts = options?.gladiaStreaming || {};
|
|
2097
2311
|
let validatedSampleRate;
|
|
2098
2312
|
if (options?.sampleRate) {
|
|
2099
2313
|
validatedSampleRate = validateEnumValue(
|
|
@@ -2103,112 +2317,376 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
2103
2317
|
"Gladia"
|
|
2104
2318
|
);
|
|
2105
2319
|
}
|
|
2320
|
+
let validatedBitDepth;
|
|
2321
|
+
if (options?.bitDepth) {
|
|
2322
|
+
validatedBitDepth = validateEnumValue(
|
|
2323
|
+
options.bitDepth,
|
|
2324
|
+
StreamingSupportedBitDepthEnum,
|
|
2325
|
+
"bit depth",
|
|
2326
|
+
"Gladia"
|
|
2327
|
+
);
|
|
2328
|
+
}
|
|
2106
2329
|
const streamingRequest = {
|
|
2330
|
+
// Spread any direct Gladia streaming options first
|
|
2331
|
+
...gladiaOpts,
|
|
2332
|
+
// Audio format configuration (these are excluded from gladiaStreaming to avoid conflicts)
|
|
2107
2333
|
encoding: options?.encoding ? mapEncodingToProvider(options.encoding, "gladia") : void 0,
|
|
2108
2334
|
sample_rate: validatedSampleRate,
|
|
2335
|
+
bit_depth: validatedBitDepth,
|
|
2109
2336
|
channels: options?.channels,
|
|
2110
|
-
|
|
2111
|
-
model: options?.model
|
|
2337
|
+
// Model and processing
|
|
2338
|
+
model: options?.model ?? gladiaOpts.model,
|
|
2339
|
+
endpointing: options?.endpointing ?? gladiaOpts.endpointing,
|
|
2340
|
+
maximum_duration_without_endpointing: options?.maxSilence ?? gladiaOpts.maximum_duration_without_endpointing
|
|
2112
2341
|
};
|
|
2113
|
-
if (options?.language) {
|
|
2342
|
+
if (options?.language || options?.codeSwitching || gladiaOpts.language_config) {
|
|
2114
2343
|
streamingRequest.language_config = {
|
|
2115
|
-
|
|
2344
|
+
...gladiaOpts.language_config,
|
|
2345
|
+
languages: options?.language ? [options.language] : gladiaOpts.language_config?.languages,
|
|
2346
|
+
code_switching: options?.codeSwitching ?? gladiaOpts.language_config?.code_switching
|
|
2116
2347
|
};
|
|
2117
2348
|
}
|
|
2118
|
-
|
|
2119
|
-
streamingRequest
|
|
2120
|
-
|
|
2121
|
-
|
|
2122
|
-
|
|
2123
|
-
)
|
|
2124
|
-
|
|
2125
|
-
|
|
2126
|
-
|
|
2127
|
-
|
|
2128
|
-
|
|
2129
|
-
|
|
2130
|
-
|
|
2131
|
-
|
|
2132
|
-
|
|
2133
|
-
|
|
2134
|
-
|
|
2135
|
-
|
|
2136
|
-
|
|
2137
|
-
|
|
2138
|
-
|
|
2139
|
-
|
|
2140
|
-
|
|
2141
|
-
|
|
2142
|
-
|
|
2143
|
-
|
|
2144
|
-
|
|
2145
|
-
|
|
2146
|
-
|
|
2147
|
-
|
|
2148
|
-
|
|
2349
|
+
if (gladiaOpts.pre_processing) {
|
|
2350
|
+
streamingRequest.pre_processing = gladiaOpts.pre_processing;
|
|
2351
|
+
}
|
|
2352
|
+
const realtimeProcessing = gladiaOpts.realtime_processing || {};
|
|
2353
|
+
const hasRealtimeOptions = options?.customVocabulary || options?.sentimentAnalysis || options?.entityDetection || realtimeProcessing.translation || realtimeProcessing.custom_vocabulary || realtimeProcessing.custom_spelling || realtimeProcessing.named_entity_recognition || realtimeProcessing.sentiment_analysis;
|
|
2354
|
+
if (hasRealtimeOptions) {
|
|
2355
|
+
streamingRequest.realtime_processing = {
|
|
2356
|
+
...realtimeProcessing,
|
|
2357
|
+
// Custom vocabulary
|
|
2358
|
+
custom_vocabulary: options?.customVocabulary && options.customVocabulary.length > 0 || realtimeProcessing.custom_vocabulary,
|
|
2359
|
+
custom_vocabulary_config: options?.customVocabulary && options.customVocabulary.length > 0 ? {
|
|
2360
|
+
...realtimeProcessing.custom_vocabulary_config,
|
|
2361
|
+
vocabulary: options.customVocabulary
|
|
2362
|
+
} : realtimeProcessing.custom_vocabulary_config,
|
|
2363
|
+
// Sentiment analysis
|
|
2364
|
+
sentiment_analysis: options?.sentimentAnalysis ?? realtimeProcessing.sentiment_analysis,
|
|
2365
|
+
// Named entity recognition
|
|
2366
|
+
named_entity_recognition: options?.entityDetection ?? realtimeProcessing.named_entity_recognition
|
|
2367
|
+
};
|
|
2368
|
+
}
|
|
2369
|
+
const postProcessing = gladiaOpts.post_processing || {};
|
|
2370
|
+
if (options?.summarization || postProcessing.summarization || postProcessing.chapterization) {
|
|
2371
|
+
streamingRequest.post_processing = {
|
|
2372
|
+
...postProcessing,
|
|
2373
|
+
summarization: options?.summarization ?? postProcessing.summarization
|
|
2374
|
+
};
|
|
2375
|
+
}
|
|
2376
|
+
if (gladiaOpts.messages_config) {
|
|
2377
|
+
streamingRequest.messages_config = gladiaOpts.messages_config;
|
|
2378
|
+
} else if (options?.interimResults !== void 0) {
|
|
2379
|
+
streamingRequest.messages_config = {
|
|
2380
|
+
receive_partial_transcripts: options.interimResults,
|
|
2381
|
+
receive_final_transcripts: true
|
|
2382
|
+
};
|
|
2383
|
+
}
|
|
2384
|
+
if (gladiaOpts.callback || gladiaOpts.callback_config) {
|
|
2385
|
+
streamingRequest.callback = gladiaOpts.callback;
|
|
2386
|
+
streamingRequest.callback_config = gladiaOpts.callback_config;
|
|
2387
|
+
}
|
|
2388
|
+
if (gladiaOpts.custom_metadata) {
|
|
2389
|
+
streamingRequest.custom_metadata = gladiaOpts.custom_metadata;
|
|
2390
|
+
}
|
|
2391
|
+
return streamingRequest;
|
|
2392
|
+
}
|
|
2393
|
+
/**
|
|
2394
|
+
* Handle all WebSocket message types from Gladia streaming
|
|
2395
|
+
*
|
|
2396
|
+
* Processes transcript, utterance, speech events, real-time processing
|
|
2397
|
+
* results (translation, sentiment, NER), post-processing results
|
|
2398
|
+
* (summarization, chapterization), acknowledgments, and lifecycle events.
|
|
2399
|
+
*/
|
|
2400
|
+
handleWebSocketMessage(message, callbacks) {
|
|
2401
|
+
const msg = message;
|
|
2402
|
+
const messageType = msg.type;
|
|
2403
|
+
switch (messageType) {
|
|
2404
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2405
|
+
// Transcript events
|
|
2406
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2407
|
+
case "transcript": {
|
|
2408
|
+
const transcriptMessage = message;
|
|
2409
|
+
const messageData = transcriptMessage.data;
|
|
2410
|
+
const utterance = messageData.utterance;
|
|
2411
|
+
callbacks?.onTranscript?.({
|
|
2412
|
+
type: "transcript",
|
|
2413
|
+
text: utterance.text,
|
|
2414
|
+
isFinal: messageData.is_final,
|
|
2415
|
+
confidence: utterance.confidence,
|
|
2416
|
+
language: utterance.language,
|
|
2417
|
+
channel: utterance.channel,
|
|
2418
|
+
speaker: utterance.speaker?.toString(),
|
|
2419
|
+
words: utterance.words.map((w) => ({
|
|
2420
|
+
word: w.word,
|
|
2421
|
+
start: w.start,
|
|
2422
|
+
end: w.end,
|
|
2423
|
+
confidence: w.confidence
|
|
2424
|
+
})),
|
|
2425
|
+
data: message
|
|
2426
|
+
});
|
|
2427
|
+
break;
|
|
2428
|
+
}
|
|
2429
|
+
case "utterance": {
|
|
2430
|
+
const transcriptMessage = message;
|
|
2431
|
+
const messageData = transcriptMessage.data;
|
|
2432
|
+
const utterance = messageData.utterance;
|
|
2433
|
+
callbacks?.onUtterance?.({
|
|
2434
|
+
text: utterance.text,
|
|
2435
|
+
start: utterance.start,
|
|
2436
|
+
end: utterance.end,
|
|
2437
|
+
speaker: utterance.speaker?.toString(),
|
|
2438
|
+
confidence: utterance.confidence,
|
|
2439
|
+
words: utterance.words.map((w) => ({
|
|
2440
|
+
word: w.word,
|
|
2441
|
+
start: w.start,
|
|
2442
|
+
end: w.end,
|
|
2443
|
+
confidence: w.confidence
|
|
2444
|
+
}))
|
|
2445
|
+
});
|
|
2446
|
+
break;
|
|
2447
|
+
}
|
|
2448
|
+
// Post-processing transcripts (final accumulated transcript)
|
|
2449
|
+
case "post_transcript": {
|
|
2450
|
+
const postTranscript = message;
|
|
2451
|
+
callbacks?.onTranscript?.({
|
|
2452
|
+
type: "transcript",
|
|
2453
|
+
text: postTranscript.data?.full_transcript || "",
|
|
2454
|
+
isFinal: true,
|
|
2455
|
+
data: message
|
|
2456
|
+
});
|
|
2457
|
+
break;
|
|
2458
|
+
}
|
|
2459
|
+
case "post_final_transcript": {
|
|
2460
|
+
const postFinal = message;
|
|
2461
|
+
callbacks?.onTranscript?.({
|
|
2462
|
+
type: "transcript",
|
|
2463
|
+
text: postFinal.data?.transcription?.full_transcript || "",
|
|
2464
|
+
isFinal: true,
|
|
2465
|
+
data: message
|
|
2466
|
+
});
|
|
2467
|
+
break;
|
|
2468
|
+
}
|
|
2469
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2470
|
+
// Speech detection events
|
|
2471
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2472
|
+
case "speech_start": {
|
|
2473
|
+
const speechStart = message;
|
|
2474
|
+
const event = {
|
|
2475
|
+
type: "speech_start",
|
|
2476
|
+
timestamp: speechStart.data.time,
|
|
2477
|
+
channel: speechStart.data.channel,
|
|
2478
|
+
sessionId: speechStart.session_id
|
|
2479
|
+
};
|
|
2480
|
+
callbacks?.onSpeechStart?.(event);
|
|
2481
|
+
break;
|
|
2482
|
+
}
|
|
2483
|
+
case "speech_end": {
|
|
2484
|
+
const speechEnd = message;
|
|
2485
|
+
const event = {
|
|
2486
|
+
type: "speech_end",
|
|
2487
|
+
timestamp: speechEnd.data.time,
|
|
2488
|
+
channel: speechEnd.data.channel,
|
|
2489
|
+
sessionId: speechEnd.session_id
|
|
2490
|
+
};
|
|
2491
|
+
callbacks?.onSpeechEnd?.(event);
|
|
2492
|
+
break;
|
|
2493
|
+
}
|
|
2494
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2495
|
+
// Real-time processing events
|
|
2496
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2497
|
+
case "translation": {
|
|
2498
|
+
const translationMsg = message;
|
|
2499
|
+
if (translationMsg.error) {
|
|
2500
|
+
callbacks?.onError?.({
|
|
2501
|
+
code: ERROR_CODES.TRANSCRIPTION_ERROR,
|
|
2502
|
+
message: "Translation failed",
|
|
2503
|
+
details: translationMsg.error
|
|
2504
|
+
});
|
|
2505
|
+
} else if (translationMsg.data) {
|
|
2506
|
+
const event = {
|
|
2507
|
+
utteranceId: translationMsg.data.utterance_id,
|
|
2508
|
+
original: translationMsg.data.utterance.text,
|
|
2509
|
+
targetLanguage: translationMsg.data.target_language,
|
|
2510
|
+
translatedText: translationMsg.data.translated_utterance.text,
|
|
2511
|
+
isFinal: true
|
|
2512
|
+
};
|
|
2513
|
+
callbacks?.onTranslation?.(event);
|
|
2514
|
+
}
|
|
2515
|
+
break;
|
|
2516
|
+
}
|
|
2517
|
+
case "sentiment_analysis": {
|
|
2518
|
+
const sentimentMsg = message;
|
|
2519
|
+
if (sentimentMsg.error) {
|
|
2520
|
+
callbacks?.onError?.({
|
|
2521
|
+
code: ERROR_CODES.TRANSCRIPTION_ERROR,
|
|
2522
|
+
message: "Sentiment analysis failed",
|
|
2523
|
+
details: sentimentMsg.error
|
|
2524
|
+
});
|
|
2525
|
+
} else if (sentimentMsg.data) {
|
|
2526
|
+
for (const result of sentimentMsg.data.results) {
|
|
2527
|
+
const event = {
|
|
2528
|
+
utteranceId: sentimentMsg.data.utterance_id,
|
|
2529
|
+
sentiment: result.sentiment,
|
|
2530
|
+
confidence: void 0
|
|
2531
|
+
// Gladia doesn't provide confidence for sentiment
|
|
2532
|
+
};
|
|
2533
|
+
callbacks?.onSentiment?.(event);
|
|
2534
|
+
}
|
|
2535
|
+
}
|
|
2536
|
+
break;
|
|
2537
|
+
}
|
|
2538
|
+
case "named_entity_recognition": {
|
|
2539
|
+
const nerMsg = message;
|
|
2540
|
+
if (nerMsg.error) {
|
|
2541
|
+
callbacks?.onError?.({
|
|
2542
|
+
code: ERROR_CODES.TRANSCRIPTION_ERROR,
|
|
2543
|
+
message: "Named entity recognition failed",
|
|
2544
|
+
details: nerMsg.error
|
|
2149
2545
|
});
|
|
2150
|
-
} else if (
|
|
2151
|
-
const
|
|
2152
|
-
|
|
2153
|
-
|
|
2154
|
-
|
|
2155
|
-
|
|
2156
|
-
|
|
2157
|
-
|
|
2158
|
-
|
|
2159
|
-
|
|
2160
|
-
|
|
2161
|
-
text: word.word,
|
|
2162
|
-
start: word.start,
|
|
2163
|
-
end: word.end,
|
|
2164
|
-
confidence: word.confidence
|
|
2165
|
-
}))
|
|
2166
|
-
};
|
|
2167
|
-
callbacks?.onUtterance?.(utteranceData);
|
|
2168
|
-
} else if (message.type === "metadata") {
|
|
2169
|
-
callbacks?.onMetadata?.(message);
|
|
2546
|
+
} else if (nerMsg.data) {
|
|
2547
|
+
for (const entity of nerMsg.data.results) {
|
|
2548
|
+
const event = {
|
|
2549
|
+
utteranceId: nerMsg.data.utterance_id,
|
|
2550
|
+
text: entity.text,
|
|
2551
|
+
type: entity.entity_type,
|
|
2552
|
+
start: entity.start,
|
|
2553
|
+
end: entity.end
|
|
2554
|
+
};
|
|
2555
|
+
callbacks?.onEntity?.(event);
|
|
2556
|
+
}
|
|
2170
2557
|
}
|
|
2171
|
-
|
|
2172
|
-
callbacks?.onError?.({
|
|
2173
|
-
code: ERROR_CODES.PARSE_ERROR,
|
|
2174
|
-
message: "Failed to parse WebSocket message",
|
|
2175
|
-
details: error
|
|
2176
|
-
});
|
|
2558
|
+
break;
|
|
2177
2559
|
}
|
|
2178
|
-
|
|
2179
|
-
|
|
2180
|
-
|
|
2181
|
-
|
|
2182
|
-
|
|
2183
|
-
|
|
2184
|
-
|
|
2185
|
-
|
|
2186
|
-
|
|
2187
|
-
|
|
2188
|
-
if (
|
|
2189
|
-
|
|
2190
|
-
|
|
2191
|
-
|
|
2192
|
-
})
|
|
2193
|
-
);
|
|
2560
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2561
|
+
// Post-processing events
|
|
2562
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2563
|
+
case "post_summarization": {
|
|
2564
|
+
const summaryMsg = message;
|
|
2565
|
+
if (summaryMsg.error) {
|
|
2566
|
+
callbacks?.onSummarization?.({
|
|
2567
|
+
summary: "",
|
|
2568
|
+
error: typeof summaryMsg.error === "string" ? summaryMsg.error : "Summarization failed"
|
|
2569
|
+
});
|
|
2570
|
+
} else if (summaryMsg.data) {
|
|
2571
|
+
callbacks?.onSummarization?.({
|
|
2572
|
+
summary: summaryMsg.data.results
|
|
2573
|
+
});
|
|
2194
2574
|
}
|
|
2195
|
-
|
|
2196
|
-
|
|
2197
|
-
|
|
2198
|
-
|
|
2575
|
+
break;
|
|
2576
|
+
}
|
|
2577
|
+
case "post_chapterization": {
|
|
2578
|
+
const chapterMsg = message;
|
|
2579
|
+
if (chapterMsg.error) {
|
|
2580
|
+
callbacks?.onChapterization?.({
|
|
2581
|
+
chapters: [],
|
|
2582
|
+
error: typeof chapterMsg.error === "string" ? chapterMsg.error : "Chapterization failed"
|
|
2583
|
+
});
|
|
2584
|
+
} else if (chapterMsg.data) {
|
|
2585
|
+
callbacks?.onChapterization?.({
|
|
2586
|
+
chapters: chapterMsg.data.results.map((ch) => ({
|
|
2587
|
+
headline: ch.headline,
|
|
2588
|
+
summary: ch.summary || ch.abstractive_summary || ch.extractive_summary || "",
|
|
2589
|
+
start: ch.start,
|
|
2590
|
+
end: ch.end
|
|
2591
|
+
}))
|
|
2592
|
+
});
|
|
2199
2593
|
}
|
|
2200
|
-
|
|
2201
|
-
|
|
2202
|
-
|
|
2203
|
-
|
|
2204
|
-
|
|
2205
|
-
|
|
2206
|
-
|
|
2594
|
+
break;
|
|
2595
|
+
}
|
|
2596
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2597
|
+
// Acknowledgment events
|
|
2598
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2599
|
+
case "audio_chunk_ack": {
|
|
2600
|
+
const ackMsg = message;
|
|
2601
|
+
if (ackMsg.error) {
|
|
2602
|
+
callbacks?.onError?.({
|
|
2603
|
+
code: ERROR_CODES.TRANSCRIPTION_ERROR,
|
|
2604
|
+
message: "Audio chunk not acknowledged",
|
|
2605
|
+
details: ackMsg.error
|
|
2606
|
+
});
|
|
2607
|
+
} else if (ackMsg.data) {
|
|
2608
|
+
const event = {
|
|
2609
|
+
byteRange: ackMsg.data.byte_range,
|
|
2610
|
+
timeRange: ackMsg.data.time_range,
|
|
2611
|
+
timestamp: ackMsg.created_at
|
|
2612
|
+
};
|
|
2613
|
+
callbacks?.onAudioAck?.(event);
|
|
2207
2614
|
}
|
|
2208
|
-
|
|
2209
|
-
sessionStatus = "closed";
|
|
2615
|
+
break;
|
|
2210
2616
|
}
|
|
2211
|
-
|
|
2617
|
+
case "stop_recording_ack": {
|
|
2618
|
+
const stopAck = message;
|
|
2619
|
+
if (stopAck.error) {
|
|
2620
|
+
callbacks?.onError?.({
|
|
2621
|
+
code: ERROR_CODES.TRANSCRIPTION_ERROR,
|
|
2622
|
+
message: "Stop recording not acknowledged",
|
|
2623
|
+
details: stopAck.error
|
|
2624
|
+
});
|
|
2625
|
+
}
|
|
2626
|
+
break;
|
|
2627
|
+
}
|
|
2628
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2629
|
+
// Lifecycle events
|
|
2630
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2631
|
+
case "start_session": {
|
|
2632
|
+
const startSession = message;
|
|
2633
|
+
const event = {
|
|
2634
|
+
eventType: "start_session",
|
|
2635
|
+
timestamp: startSession.created_at,
|
|
2636
|
+
sessionId: startSession.session_id
|
|
2637
|
+
};
|
|
2638
|
+
callbacks?.onLifecycle?.(event);
|
|
2639
|
+
break;
|
|
2640
|
+
}
|
|
2641
|
+
case "start_recording": {
|
|
2642
|
+
const startRecording = message;
|
|
2643
|
+
const event = {
|
|
2644
|
+
eventType: "start_recording",
|
|
2645
|
+
timestamp: startRecording.created_at,
|
|
2646
|
+
sessionId: startRecording.session_id
|
|
2647
|
+
};
|
|
2648
|
+
callbacks?.onLifecycle?.(event);
|
|
2649
|
+
break;
|
|
2650
|
+
}
|
|
2651
|
+
case "end_recording": {
|
|
2652
|
+
const endRecording = message;
|
|
2653
|
+
const event = {
|
|
2654
|
+
eventType: "end_recording",
|
|
2655
|
+
timestamp: endRecording.created_at,
|
|
2656
|
+
sessionId: endRecording.session_id
|
|
2657
|
+
};
|
|
2658
|
+
callbacks?.onLifecycle?.(event);
|
|
2659
|
+
break;
|
|
2660
|
+
}
|
|
2661
|
+
case "end_session": {
|
|
2662
|
+
const endSession = message;
|
|
2663
|
+
const event = {
|
|
2664
|
+
eventType: "end_session",
|
|
2665
|
+
timestamp: endSession.created_at,
|
|
2666
|
+
sessionId: endSession.session_id
|
|
2667
|
+
};
|
|
2668
|
+
callbacks?.onLifecycle?.(event);
|
|
2669
|
+
break;
|
|
2670
|
+
}
|
|
2671
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2672
|
+
// Metadata and other events
|
|
2673
|
+
// ─────────────────────────────────────────────────────────────────
|
|
2674
|
+
case "metadata":
|
|
2675
|
+
callbacks?.onMetadata?.(msg);
|
|
2676
|
+
break;
|
|
2677
|
+
case "error": {
|
|
2678
|
+
const errorMsg = msg;
|
|
2679
|
+
callbacks?.onError?.({
|
|
2680
|
+
code: errorMsg.error?.code || ERROR_CODES.TRANSCRIPTION_ERROR,
|
|
2681
|
+
message: errorMsg.error?.message || "Unknown streaming error",
|
|
2682
|
+
details: msg
|
|
2683
|
+
});
|
|
2684
|
+
break;
|
|
2685
|
+
}
|
|
2686
|
+
default:
|
|
2687
|
+
callbacks?.onMetadata?.(msg);
|
|
2688
|
+
break;
|
|
2689
|
+
}
|
|
2212
2690
|
}
|
|
2213
2691
|
};
|
|
2214
2692
|
function createGladiaAdapter(config) {
|
|
@@ -2866,14 +3344,14 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
2866
3344
|
if (!transcript.words || transcript.words.length === 0) {
|
|
2867
3345
|
return void 0;
|
|
2868
3346
|
}
|
|
2869
|
-
return transcript.words.map((
|
|
2870
|
-
|
|
2871
|
-
start:
|
|
3347
|
+
return transcript.words.map((w) => ({
|
|
3348
|
+
word: w.text,
|
|
3349
|
+
start: w.start / 1e3,
|
|
2872
3350
|
// Convert ms to seconds
|
|
2873
|
-
end:
|
|
3351
|
+
end: w.end / 1e3,
|
|
2874
3352
|
// Convert ms to seconds
|
|
2875
|
-
confidence:
|
|
2876
|
-
speaker:
|
|
3353
|
+
confidence: w.confidence,
|
|
3354
|
+
speaker: w.speaker || void 0
|
|
2877
3355
|
}));
|
|
2878
3356
|
}
|
|
2879
3357
|
/**
|
|
@@ -2891,11 +3369,11 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
2891
3369
|
// Convert ms to seconds
|
|
2892
3370
|
speaker: utterance.speaker || void 0,
|
|
2893
3371
|
confidence: utterance.confidence,
|
|
2894
|
-
words: utterance.words.map((
|
|
2895
|
-
|
|
2896
|
-
start:
|
|
2897
|
-
end:
|
|
2898
|
-
confidence:
|
|
3372
|
+
words: utterance.words.map((w) => ({
|
|
3373
|
+
word: w.text,
|
|
3374
|
+
start: w.start / 1e3,
|
|
3375
|
+
end: w.end / 1e3,
|
|
3376
|
+
confidence: w.confidence
|
|
2899
3377
|
}))
|
|
2900
3378
|
}));
|
|
2901
3379
|
}
|
|
@@ -2903,19 +3381,37 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
2903
3381
|
* Stream audio for real-time transcription
|
|
2904
3382
|
*
|
|
2905
3383
|
* Creates a WebSocket connection to AssemblyAI for streaming transcription.
|
|
2906
|
-
*
|
|
3384
|
+
* Uses the v3 Universal Streaming API with full support for all parameters.
|
|
3385
|
+
*
|
|
3386
|
+
* Supports all AssemblyAI streaming features:
|
|
3387
|
+
* - Real-time transcription with interim/final results (Turn events)
|
|
3388
|
+
* - End-of-turn detection tuning (confidence threshold, silence duration)
|
|
3389
|
+
* - Voice Activity Detection (VAD) threshold tuning
|
|
3390
|
+
* - Real-time text formatting
|
|
3391
|
+
* - Profanity filtering
|
|
3392
|
+
* - Custom vocabulary (keyterms)
|
|
3393
|
+
* - Language detection
|
|
3394
|
+
* - Model selection (English or Multilingual)
|
|
3395
|
+
* - Dynamic configuration updates mid-stream
|
|
3396
|
+
* - Force endpoint command
|
|
2907
3397
|
*
|
|
2908
3398
|
* @param options - Streaming configuration options
|
|
3399
|
+
* @param options.sampleRate - Sample rate (8000, 16000, 22050, 44100, 48000)
|
|
3400
|
+
* @param options.encoding - Audio encoding (pcm_s16le, pcm_mulaw)
|
|
3401
|
+
* @param options.assemblyaiStreaming - All AssemblyAI-specific streaming options
|
|
2909
3402
|
* @param callbacks - Event callbacks for transcription results
|
|
2910
|
-
* @
|
|
3403
|
+
* @param callbacks.onTranscript - Interim/final transcript received (Turn event)
|
|
3404
|
+
* @param callbacks.onUtterance - Complete utterance (Turn with end_of_turn=true)
|
|
3405
|
+
* @param callbacks.onMetadata - Session metadata (Begin, Termination events)
|
|
3406
|
+
* @param callbacks.onError - Error occurred
|
|
3407
|
+
* @param callbacks.onClose - Connection closed
|
|
3408
|
+
* @returns Promise that resolves with an extended StreamingSession
|
|
2911
3409
|
*
|
|
2912
|
-
* @example
|
|
3410
|
+
* @example Basic real-time streaming
|
|
2913
3411
|
* ```typescript
|
|
2914
3412
|
* const session = await adapter.transcribeStream({
|
|
2915
|
-
* encoding: 'pcm_s16le',
|
|
2916
3413
|
* sampleRate: 16000,
|
|
2917
|
-
*
|
|
2918
|
-
* interimResults: true
|
|
3414
|
+
* encoding: 'pcm_s16le'
|
|
2919
3415
|
* }, {
|
|
2920
3416
|
* onOpen: () => console.log('Connected'),
|
|
2921
3417
|
* onTranscript: (event) => {
|
|
@@ -2930,21 +3426,50 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
2930
3426
|
* });
|
|
2931
3427
|
*
|
|
2932
3428
|
* // Send audio chunks
|
|
2933
|
-
* const audioChunk = getAudioChunk();
|
|
3429
|
+
* const audioChunk = getAudioChunk();
|
|
2934
3430
|
* await session.sendAudio({ data: audioChunk });
|
|
2935
3431
|
*
|
|
2936
3432
|
* // Close when done
|
|
2937
3433
|
* await session.close();
|
|
2938
3434
|
* ```
|
|
3435
|
+
*
|
|
3436
|
+
* @example Advanced streaming with all features
|
|
3437
|
+
* ```typescript
|
|
3438
|
+
* const session = await adapter.transcribeStream({
|
|
3439
|
+
* sampleRate: 16000,
|
|
3440
|
+
* assemblyaiStreaming: {
|
|
3441
|
+
* speechModel: 'universal-streaming-multilingual',
|
|
3442
|
+
* languageDetection: true,
|
|
3443
|
+
* endOfTurnConfidenceThreshold: 0.7,
|
|
3444
|
+
* minEndOfTurnSilenceWhenConfident: 500,
|
|
3445
|
+
* maxTurnSilence: 15000,
|
|
3446
|
+
* vadThreshold: 0.3,
|
|
3447
|
+
* formatTurns: true,
|
|
3448
|
+
* filterProfanity: true,
|
|
3449
|
+
* keyterms: ['TypeScript', 'JavaScript', 'API'],
|
|
3450
|
+
* inactivityTimeout: 60000
|
|
3451
|
+
* }
|
|
3452
|
+
* }, {
|
|
3453
|
+
* onTranscript: (e) => console.log('Transcript:', e.text),
|
|
3454
|
+
* onMetadata: (m) => console.log('Metadata:', m)
|
|
3455
|
+
* });
|
|
3456
|
+
*
|
|
3457
|
+
* // Update configuration mid-stream
|
|
3458
|
+
* session.updateConfiguration?.({
|
|
3459
|
+
* end_of_turn_confidence_threshold: 0.5,
|
|
3460
|
+
* vad_threshold: 0.2
|
|
3461
|
+
* });
|
|
3462
|
+
*
|
|
3463
|
+
* // Force endpoint detection
|
|
3464
|
+
* session.forceEndpoint?.();
|
|
3465
|
+
* ```
|
|
2939
3466
|
*/
|
|
2940
3467
|
async transcribeStream(options, callbacks) {
|
|
2941
3468
|
this.validateConfig();
|
|
2942
3469
|
if (!this.config?.apiKey) {
|
|
2943
3470
|
throw new Error("API key is required for streaming");
|
|
2944
3471
|
}
|
|
2945
|
-
const
|
|
2946
|
-
const encoding = options?.encoding ? mapEncodingToProvider(options.encoding, "assemblyai") : "pcm_s16le";
|
|
2947
|
-
const wsUrl = `${this.wsBaseUrl}?sample_rate=${sampleRate}&encoding=${encoding}`;
|
|
3472
|
+
const wsUrl = this.buildStreamingUrl(options);
|
|
2948
3473
|
const ws = new WebSocket2(wsUrl, {
|
|
2949
3474
|
headers: {
|
|
2950
3475
|
Authorization: this.config.apiKey
|
|
@@ -2968,43 +3493,7 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
2968
3493
|
ws.on("message", (data) => {
|
|
2969
3494
|
try {
|
|
2970
3495
|
const message = JSON.parse(data.toString());
|
|
2971
|
-
|
|
2972
|
-
callbacks?.onError?.({
|
|
2973
|
-
code: "API_ERROR",
|
|
2974
|
-
message: message.error
|
|
2975
|
-
});
|
|
2976
|
-
return;
|
|
2977
|
-
}
|
|
2978
|
-
if (message.type === "Begin") {
|
|
2979
|
-
const beginMsg = message;
|
|
2980
|
-
callbacks?.onMetadata?.({
|
|
2981
|
-
sessionId: beginMsg.id,
|
|
2982
|
-
expiresAt: new Date(beginMsg.expires_at).toISOString()
|
|
2983
|
-
});
|
|
2984
|
-
} else if (message.type === "Turn") {
|
|
2985
|
-
const turnMsg = message;
|
|
2986
|
-
callbacks?.onTranscript?.({
|
|
2987
|
-
type: "transcript",
|
|
2988
|
-
text: turnMsg.transcript,
|
|
2989
|
-
isFinal: turnMsg.end_of_turn,
|
|
2990
|
-
confidence: turnMsg.end_of_turn_confidence,
|
|
2991
|
-
words: turnMsg.words.map((word) => ({
|
|
2992
|
-
text: word.text,
|
|
2993
|
-
start: word.start / 1e3,
|
|
2994
|
-
// Convert ms to seconds
|
|
2995
|
-
end: word.end / 1e3,
|
|
2996
|
-
confidence: word.confidence
|
|
2997
|
-
})),
|
|
2998
|
-
data: turnMsg
|
|
2999
|
-
});
|
|
3000
|
-
} else if (message.type === "Termination") {
|
|
3001
|
-
const termMsg = message;
|
|
3002
|
-
callbacks?.onMetadata?.({
|
|
3003
|
-
terminated: true,
|
|
3004
|
-
audioDurationSeconds: termMsg.audio_duration_seconds,
|
|
3005
|
-
sessionDurationSeconds: termMsg.session_duration_seconds
|
|
3006
|
-
});
|
|
3007
|
-
}
|
|
3496
|
+
this.handleWebSocketMessage(message, callbacks);
|
|
3008
3497
|
} catch (error) {
|
|
3009
3498
|
callbacks?.onError?.({
|
|
3010
3499
|
code: "PARSE_ERROR",
|
|
@@ -3056,11 +3545,7 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
3056
3545
|
}
|
|
3057
3546
|
if (chunk.isLast) {
|
|
3058
3547
|
flushAudioBuffer();
|
|
3059
|
-
ws.send(
|
|
3060
|
-
JSON.stringify({
|
|
3061
|
-
terminate_session: true
|
|
3062
|
-
})
|
|
3063
|
-
);
|
|
3548
|
+
ws.send(JSON.stringify({ type: "Terminate" }));
|
|
3064
3549
|
}
|
|
3065
3550
|
},
|
|
3066
3551
|
close: async () => {
|
|
@@ -3070,11 +3555,7 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
3070
3555
|
sessionStatus = "closing";
|
|
3071
3556
|
flushAudioBuffer();
|
|
3072
3557
|
if (ws.readyState === WebSocket2.OPEN) {
|
|
3073
|
-
ws.send(
|
|
3074
|
-
JSON.stringify({
|
|
3075
|
-
terminate_session: true
|
|
3076
|
-
})
|
|
3077
|
-
);
|
|
3558
|
+
ws.send(JSON.stringify({ type: "Terminate" }));
|
|
3078
3559
|
}
|
|
3079
3560
|
return new Promise((resolve) => {
|
|
3080
3561
|
const timeout = setTimeout(() => {
|
|
@@ -3088,9 +3569,166 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
3088
3569
|
resolve();
|
|
3089
3570
|
});
|
|
3090
3571
|
});
|
|
3572
|
+
},
|
|
3573
|
+
/**
|
|
3574
|
+
* Update streaming configuration mid-session
|
|
3575
|
+
*
|
|
3576
|
+
* Allows changing VAD, end-of-turn, and formatting settings
|
|
3577
|
+
* without restarting the stream.
|
|
3578
|
+
*
|
|
3579
|
+
* @param config - Configuration parameters to update
|
|
3580
|
+
*/
|
|
3581
|
+
updateConfiguration: (config) => {
|
|
3582
|
+
if (ws.readyState !== WebSocket2.OPEN) {
|
|
3583
|
+
throw new Error("Cannot update configuration: WebSocket is not open");
|
|
3584
|
+
}
|
|
3585
|
+
const updateMsg = {
|
|
3586
|
+
type: "UpdateConfiguration",
|
|
3587
|
+
...config
|
|
3588
|
+
};
|
|
3589
|
+
ws.send(JSON.stringify(updateMsg));
|
|
3590
|
+
},
|
|
3591
|
+
/**
|
|
3592
|
+
* Force endpoint detection
|
|
3593
|
+
*
|
|
3594
|
+
* Immediately triggers end-of-turn, useful for manual control
|
|
3595
|
+
* of turn boundaries (e.g., when user presses a button).
|
|
3596
|
+
*/
|
|
3597
|
+
forceEndpoint: () => {
|
|
3598
|
+
if (ws.readyState !== WebSocket2.OPEN) {
|
|
3599
|
+
throw new Error("Cannot force endpoint: WebSocket is not open");
|
|
3600
|
+
}
|
|
3601
|
+
const forceMsg = {
|
|
3602
|
+
type: "ForceEndpoint"
|
|
3603
|
+
};
|
|
3604
|
+
ws.send(JSON.stringify(forceMsg));
|
|
3091
3605
|
}
|
|
3092
3606
|
};
|
|
3093
3607
|
}
|
|
3608
|
+
/**
|
|
3609
|
+
* Build WebSocket URL with all streaming parameters
|
|
3610
|
+
*/
|
|
3611
|
+
buildStreamingUrl(options) {
|
|
3612
|
+
const params = new URLSearchParams();
|
|
3613
|
+
const aaiOpts = options?.assemblyaiStreaming || {};
|
|
3614
|
+
const sampleRate = options?.sampleRate || aaiOpts.sampleRate || 16e3;
|
|
3615
|
+
params.append("sample_rate", String(sampleRate));
|
|
3616
|
+
const encoding = options?.encoding ? mapEncodingToProvider(options.encoding, "assemblyai") : aaiOpts.encoding || "pcm_s16le";
|
|
3617
|
+
params.append("encoding", encoding);
|
|
3618
|
+
if (aaiOpts.speechModel) {
|
|
3619
|
+
params.append("speech_model", aaiOpts.speechModel);
|
|
3620
|
+
}
|
|
3621
|
+
if (aaiOpts.languageDetection) {
|
|
3622
|
+
params.append("language_detection", "true");
|
|
3623
|
+
}
|
|
3624
|
+
if (aaiOpts.endOfTurnConfidenceThreshold !== void 0) {
|
|
3625
|
+
params.append(
|
|
3626
|
+
"end_of_turn_confidence_threshold",
|
|
3627
|
+
String(aaiOpts.endOfTurnConfidenceThreshold)
|
|
3628
|
+
);
|
|
3629
|
+
}
|
|
3630
|
+
if (aaiOpts.minEndOfTurnSilenceWhenConfident !== void 0) {
|
|
3631
|
+
params.append(
|
|
3632
|
+
"min_end_of_turn_silence_when_confident",
|
|
3633
|
+
String(aaiOpts.minEndOfTurnSilenceWhenConfident)
|
|
3634
|
+
);
|
|
3635
|
+
}
|
|
3636
|
+
if (aaiOpts.maxTurnSilence !== void 0) {
|
|
3637
|
+
params.append("max_turn_silence", String(aaiOpts.maxTurnSilence));
|
|
3638
|
+
}
|
|
3639
|
+
if (aaiOpts.vadThreshold !== void 0) {
|
|
3640
|
+
params.append("vad_threshold", String(aaiOpts.vadThreshold));
|
|
3641
|
+
}
|
|
3642
|
+
if (aaiOpts.formatTurns !== void 0) {
|
|
3643
|
+
params.append("format_turns", String(aaiOpts.formatTurns));
|
|
3644
|
+
}
|
|
3645
|
+
if (aaiOpts.filterProfanity) {
|
|
3646
|
+
params.append("filter_profanity", "true");
|
|
3647
|
+
}
|
|
3648
|
+
const keyterms = options?.customVocabulary || aaiOpts.keyterms;
|
|
3649
|
+
if (keyterms && keyterms.length > 0) {
|
|
3650
|
+
keyterms.forEach((term) => params.append("keyterms", term));
|
|
3651
|
+
}
|
|
3652
|
+
if (aaiOpts.keytermsPrompt && aaiOpts.keytermsPrompt.length > 0) {
|
|
3653
|
+
aaiOpts.keytermsPrompt.forEach((prompt) => params.append("keyterms_prompt", prompt));
|
|
3654
|
+
}
|
|
3655
|
+
if (aaiOpts.inactivityTimeout !== void 0) {
|
|
3656
|
+
params.append("inactivity_timeout", String(aaiOpts.inactivityTimeout));
|
|
3657
|
+
}
|
|
3658
|
+
return `${this.wsBaseUrl}?${params.toString()}`;
|
|
3659
|
+
}
|
|
3660
|
+
/**
|
|
3661
|
+
* Handle all WebSocket message types from AssemblyAI streaming
|
|
3662
|
+
*/
|
|
3663
|
+
handleWebSocketMessage(message, callbacks) {
|
|
3664
|
+
if ("error" in message) {
|
|
3665
|
+
callbacks?.onError?.({
|
|
3666
|
+
code: "API_ERROR",
|
|
3667
|
+
message: message.error
|
|
3668
|
+
});
|
|
3669
|
+
return;
|
|
3670
|
+
}
|
|
3671
|
+
const typedMessage = message;
|
|
3672
|
+
switch (typedMessage.type) {
|
|
3673
|
+
case "Begin": {
|
|
3674
|
+
const beginMsg = typedMessage;
|
|
3675
|
+
callbacks?.onMetadata?.({
|
|
3676
|
+
type: "begin",
|
|
3677
|
+
sessionId: beginMsg.id,
|
|
3678
|
+
expiresAt: new Date(beginMsg.expires_at).toISOString()
|
|
3679
|
+
});
|
|
3680
|
+
break;
|
|
3681
|
+
}
|
|
3682
|
+
case "Turn": {
|
|
3683
|
+
const turnMsg = typedMessage;
|
|
3684
|
+
callbacks?.onTranscript?.({
|
|
3685
|
+
type: "transcript",
|
|
3686
|
+
text: turnMsg.transcript,
|
|
3687
|
+
isFinal: turnMsg.end_of_turn,
|
|
3688
|
+
confidence: turnMsg.end_of_turn_confidence,
|
|
3689
|
+
language: turnMsg.language_code,
|
|
3690
|
+
words: turnMsg.words.map((w) => ({
|
|
3691
|
+
word: w.text,
|
|
3692
|
+
start: w.start / 1e3,
|
|
3693
|
+
// Convert ms to seconds
|
|
3694
|
+
end: w.end / 1e3,
|
|
3695
|
+
confidence: w.confidence
|
|
3696
|
+
})),
|
|
3697
|
+
data: turnMsg
|
|
3698
|
+
});
|
|
3699
|
+
if (turnMsg.end_of_turn) {
|
|
3700
|
+
const words = turnMsg.words;
|
|
3701
|
+
const start = words.length > 0 ? words[0].start / 1e3 : 0;
|
|
3702
|
+
const end = words.length > 0 ? words[words.length - 1].end / 1e3 : 0;
|
|
3703
|
+
callbacks?.onUtterance?.({
|
|
3704
|
+
text: turnMsg.transcript,
|
|
3705
|
+
start,
|
|
3706
|
+
end,
|
|
3707
|
+
confidence: turnMsg.end_of_turn_confidence,
|
|
3708
|
+
words: turnMsg.words.map((w) => ({
|
|
3709
|
+
word: w.text,
|
|
3710
|
+
start: w.start / 1e3,
|
|
3711
|
+
end: w.end / 1e3,
|
|
3712
|
+
confidence: w.confidence
|
|
3713
|
+
}))
|
|
3714
|
+
});
|
|
3715
|
+
}
|
|
3716
|
+
break;
|
|
3717
|
+
}
|
|
3718
|
+
case "Termination": {
|
|
3719
|
+
const termMsg = typedMessage;
|
|
3720
|
+
callbacks?.onMetadata?.({
|
|
3721
|
+
type: "termination",
|
|
3722
|
+
audioDurationSeconds: termMsg.audio_duration_seconds,
|
|
3723
|
+
sessionDurationSeconds: termMsg.session_duration_seconds
|
|
3724
|
+
});
|
|
3725
|
+
break;
|
|
3726
|
+
}
|
|
3727
|
+
default:
|
|
3728
|
+
callbacks?.onMetadata?.(message);
|
|
3729
|
+
break;
|
|
3730
|
+
}
|
|
3731
|
+
}
|
|
3094
3732
|
};
|
|
3095
3733
|
function createAssemblyAIAdapter(config) {
|
|
3096
3734
|
const adapter = new AssemblyAIAdapter();
|
|
@@ -3352,11 +3990,11 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
3352
3990
|
return void 0;
|
|
3353
3991
|
}
|
|
3354
3992
|
return alternative.words.map(
|
|
3355
|
-
(
|
|
3356
|
-
|
|
3357
|
-
start:
|
|
3358
|
-
end:
|
|
3359
|
-
confidence:
|
|
3993
|
+
(w) => ({
|
|
3994
|
+
word: w.word || "",
|
|
3995
|
+
start: w.start || 0,
|
|
3996
|
+
end: w.end || 0,
|
|
3997
|
+
confidence: w.confidence,
|
|
3360
3998
|
speaker: void 0
|
|
3361
3999
|
// Speaker info is at utterance level, not word level
|
|
3362
4000
|
})
|
|
@@ -3376,11 +4014,11 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
3376
4014
|
end: utterance.end || 0,
|
|
3377
4015
|
speaker: utterance.speaker?.toString(),
|
|
3378
4016
|
confidence: utterance.confidence,
|
|
3379
|
-
words: utterance.words?.map((
|
|
3380
|
-
|
|
3381
|
-
start:
|
|
3382
|
-
end:
|
|
3383
|
-
confidence:
|
|
4017
|
+
words: utterance.words?.map((w) => ({
|
|
4018
|
+
word: w.word || "",
|
|
4019
|
+
start: w.start || 0,
|
|
4020
|
+
end: w.end || 0,
|
|
4021
|
+
confidence: w.confidence
|
|
3384
4022
|
}))
|
|
3385
4023
|
}));
|
|
3386
4024
|
}
|
|
@@ -3399,11 +4037,44 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
3399
4037
|
* Creates a WebSocket connection to Deepgram for streaming transcription.
|
|
3400
4038
|
* Send audio chunks via session.sendAudio() and receive results via callbacks.
|
|
3401
4039
|
*
|
|
4040
|
+
* Supports all Deepgram streaming features:
|
|
4041
|
+
* - Real-time transcription with interim/final results
|
|
4042
|
+
* - Speech detection events (SpeechStarted, UtteranceEnd)
|
|
4043
|
+
* - Speaker diarization
|
|
4044
|
+
* - Language detection
|
|
4045
|
+
* - Real-time sentiment, entity detection, topics, intents
|
|
4046
|
+
* - Custom vocabulary (keywords, keyterms)
|
|
4047
|
+
* - PII redaction
|
|
4048
|
+
* - Filler words, numerals, measurements, paragraphs
|
|
4049
|
+
* - Profanity filtering
|
|
4050
|
+
* - Dictation mode
|
|
4051
|
+
*
|
|
3402
4052
|
* @param options - Streaming configuration options
|
|
4053
|
+
* @param options.encoding - Audio encoding (linear16, flac, mulaw, opus, speex, g729)
|
|
4054
|
+
* @param options.sampleRate - Sample rate in Hz
|
|
4055
|
+
* @param options.channels - Number of audio channels
|
|
4056
|
+
* @param options.language - Language code for transcription
|
|
4057
|
+
* @param options.model - Model to use (nova-2, nova-3, base, enhanced, etc.)
|
|
4058
|
+
* @param options.diarization - Enable speaker identification
|
|
4059
|
+
* @param options.languageDetection - Auto-detect language
|
|
4060
|
+
* @param options.interimResults - Enable partial transcripts
|
|
4061
|
+
* @param options.summarization - Enable summarization
|
|
4062
|
+
* @param options.sentimentAnalysis - Enable sentiment analysis
|
|
4063
|
+
* @param options.entityDetection - Enable entity detection
|
|
4064
|
+
* @param options.piiRedaction - Enable PII redaction
|
|
4065
|
+
* @param options.customVocabulary - Keywords to boost recognition
|
|
4066
|
+
* @param options.deepgramStreaming - All Deepgram-specific streaming options
|
|
3403
4067
|
* @param callbacks - Event callbacks for transcription results
|
|
4068
|
+
* @param callbacks.onTranscript - Interim/final transcript received
|
|
4069
|
+
* @param callbacks.onUtterance - Complete utterance detected
|
|
4070
|
+
* @param callbacks.onSpeechStart - Speech detected (Deepgram SpeechStarted)
|
|
4071
|
+
* @param callbacks.onSpeechEnd - Speech ended (Deepgram UtteranceEnd)
|
|
4072
|
+
* @param callbacks.onMetadata - Metadata received
|
|
4073
|
+
* @param callbacks.onError - Error occurred
|
|
4074
|
+
* @param callbacks.onClose - Connection closed
|
|
3404
4075
|
* @returns Promise that resolves with a StreamingSession
|
|
3405
4076
|
*
|
|
3406
|
-
* @example
|
|
4077
|
+
* @example Basic real-time streaming
|
|
3407
4078
|
* ```typescript
|
|
3408
4079
|
* const session = await adapter.transcribeStream({
|
|
3409
4080
|
* encoding: 'linear16',
|
|
@@ -3426,32 +4097,47 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
3426
4097
|
* });
|
|
3427
4098
|
*
|
|
3428
4099
|
* // Send audio chunks
|
|
3429
|
-
* const audioChunk = getAudioChunk();
|
|
4100
|
+
* const audioChunk = getAudioChunk();
|
|
3430
4101
|
* await session.sendAudio({ data: audioChunk });
|
|
3431
4102
|
*
|
|
3432
4103
|
* // Close when done
|
|
3433
4104
|
* await session.close();
|
|
3434
4105
|
* ```
|
|
4106
|
+
*
|
|
4107
|
+
* @example Advanced streaming with all features
|
|
4108
|
+
* ```typescript
|
|
4109
|
+
* const session = await adapter.transcribeStream({
|
|
4110
|
+
* encoding: 'linear16',
|
|
4111
|
+
* sampleRate: 16000,
|
|
4112
|
+
* language: 'en',
|
|
4113
|
+
* model: 'nova-3',
|
|
4114
|
+
* diarization: true,
|
|
4115
|
+
* sentimentAnalysis: true,
|
|
4116
|
+
* entityDetection: true,
|
|
4117
|
+
* deepgramStreaming: {
|
|
4118
|
+
* fillerWords: true,
|
|
4119
|
+
* numerals: true,
|
|
4120
|
+
* profanityFilter: true,
|
|
4121
|
+
* topics: true,
|
|
4122
|
+
* intents: true,
|
|
4123
|
+
* customTopic: ['sales', 'support'],
|
|
4124
|
+
* customIntent: ['purchase', 'complaint'],
|
|
4125
|
+
* keyterm: ['TypeScript', 'JavaScript'],
|
|
4126
|
+
* utteranceSplit: 800,
|
|
4127
|
+
* punctuate: true,
|
|
4128
|
+
* smartFormat: true
|
|
4129
|
+
* }
|
|
4130
|
+
* }, {
|
|
4131
|
+
* onTranscript: (e) => console.log('Transcript:', e.text),
|
|
4132
|
+
* onSpeechStart: (e) => console.log('Speech started at:', e.timestamp),
|
|
4133
|
+
* onSpeechEnd: (e) => console.log('Utterance ended'),
|
|
4134
|
+
* onMetadata: (m) => console.log('Metadata:', m)
|
|
4135
|
+
* });
|
|
4136
|
+
* ```
|
|
3435
4137
|
*/
|
|
3436
4138
|
async transcribeStream(options, callbacks) {
|
|
3437
4139
|
this.validateConfig();
|
|
3438
|
-
const
|
|
3439
|
-
if (options?.encoding) params.append("encoding", options.encoding);
|
|
3440
|
-
if (options?.sampleRate) params.append("sample_rate", options.sampleRate.toString());
|
|
3441
|
-
if (options?.channels) params.append("channels", options.channels.toString());
|
|
3442
|
-
if (options?.language) params.append("language", options.language);
|
|
3443
|
-
if (options?.model) params.append("model", options.model);
|
|
3444
|
-
if (options?.languageDetection) params.append("detect_language", "true");
|
|
3445
|
-
if (options?.diarization) params.append("diarize", "true");
|
|
3446
|
-
if (options?.interimResults) params.append("interim_results", "true");
|
|
3447
|
-
if (options?.summarization) params.append("summarize", "true");
|
|
3448
|
-
if (options?.sentimentAnalysis) params.append("sentiment", "true");
|
|
3449
|
-
if (options?.entityDetection) params.append("detect_entities", "true");
|
|
3450
|
-
if (options?.piiRedaction) params.append("redact", "pii");
|
|
3451
|
-
if (options?.customVocabulary && options.customVocabulary.length > 0) {
|
|
3452
|
-
params.append("keywords", options.customVocabulary.join(","));
|
|
3453
|
-
}
|
|
3454
|
-
const wsUrl = `${this.wsBaseUrl}?${params.toString()}`;
|
|
4140
|
+
const wsUrl = this.buildStreamingUrl(options);
|
|
3455
4141
|
const ws = new WebSocket3(wsUrl, {
|
|
3456
4142
|
headers: {
|
|
3457
4143
|
Authorization: `Token ${this.config.apiKey}`
|
|
@@ -3466,31 +4152,7 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
3466
4152
|
ws.on("message", (data) => {
|
|
3467
4153
|
try {
|
|
3468
4154
|
const message = JSON.parse(data.toString());
|
|
3469
|
-
|
|
3470
|
-
const channel = message.channel.alternatives[0];
|
|
3471
|
-
if (channel) {
|
|
3472
|
-
const transcript = channel.transcript;
|
|
3473
|
-
const isFinal = message.is_final;
|
|
3474
|
-
const words = channel.words?.map((word) => ({
|
|
3475
|
-
text: word.word,
|
|
3476
|
-
start: word.start,
|
|
3477
|
-
end: word.end,
|
|
3478
|
-
confidence: word.confidence
|
|
3479
|
-
}));
|
|
3480
|
-
callbacks?.onTranscript?.({
|
|
3481
|
-
type: "transcript",
|
|
3482
|
-
text: transcript,
|
|
3483
|
-
isFinal,
|
|
3484
|
-
words,
|
|
3485
|
-
confidence: channel.confidence,
|
|
3486
|
-
data: message
|
|
3487
|
-
});
|
|
3488
|
-
}
|
|
3489
|
-
} else if (message.type === "UtteranceEnd") {
|
|
3490
|
-
callbacks?.onMetadata?.(message);
|
|
3491
|
-
} else if (message.type === "Metadata") {
|
|
3492
|
-
callbacks?.onMetadata?.(message);
|
|
3493
|
-
}
|
|
4155
|
+
this.handleWebSocketMessage(message, callbacks);
|
|
3494
4156
|
} catch (error) {
|
|
3495
4157
|
callbacks?.onError?.({
|
|
3496
4158
|
code: "PARSE_ERROR",
|
|
@@ -3563,6 +4225,210 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
3563
4225
|
}
|
|
3564
4226
|
};
|
|
3565
4227
|
}
|
|
4228
|
+
/**
|
|
4229
|
+
* Build WebSocket URL with all streaming parameters
|
|
4230
|
+
*/
|
|
4231
|
+
buildStreamingUrl(options) {
|
|
4232
|
+
const params = new URLSearchParams();
|
|
4233
|
+
const dgOpts = options?.deepgramStreaming || {};
|
|
4234
|
+
if (options?.encoding || dgOpts.encoding) {
|
|
4235
|
+
params.append("encoding", options?.encoding || dgOpts.encoding);
|
|
4236
|
+
}
|
|
4237
|
+
if (options?.sampleRate || dgOpts.sampleRate) {
|
|
4238
|
+
params.append("sample_rate", String(options?.sampleRate || dgOpts.sampleRate));
|
|
4239
|
+
}
|
|
4240
|
+
if (options?.channels || dgOpts.channels) {
|
|
4241
|
+
params.append("channels", String(options?.channels || dgOpts.channels));
|
|
4242
|
+
}
|
|
4243
|
+
if (options?.language || dgOpts.language) {
|
|
4244
|
+
params.append("language", options?.language || dgOpts.language);
|
|
4245
|
+
}
|
|
4246
|
+
if (options?.model || dgOpts.model) {
|
|
4247
|
+
params.append("model", options?.model || dgOpts.model);
|
|
4248
|
+
}
|
|
4249
|
+
if (dgOpts.version) {
|
|
4250
|
+
params.append("version", dgOpts.version);
|
|
4251
|
+
}
|
|
4252
|
+
if (options?.languageDetection || dgOpts.languageDetection) {
|
|
4253
|
+
params.append("detect_language", "true");
|
|
4254
|
+
}
|
|
4255
|
+
if (options?.diarization || dgOpts.diarization) {
|
|
4256
|
+
params.append("diarize", "true");
|
|
4257
|
+
}
|
|
4258
|
+
if (options?.interimResults || dgOpts.interimResults) {
|
|
4259
|
+
params.append("interim_results", "true");
|
|
4260
|
+
}
|
|
4261
|
+
if (dgOpts.punctuate !== void 0) {
|
|
4262
|
+
params.append("punctuate", String(dgOpts.punctuate));
|
|
4263
|
+
}
|
|
4264
|
+
if (dgOpts.smartFormat !== void 0) {
|
|
4265
|
+
params.append("smart_format", String(dgOpts.smartFormat));
|
|
4266
|
+
}
|
|
4267
|
+
if (dgOpts.fillerWords) {
|
|
4268
|
+
params.append("filler_words", "true");
|
|
4269
|
+
}
|
|
4270
|
+
if (dgOpts.numerals) {
|
|
4271
|
+
params.append("numerals", "true");
|
|
4272
|
+
}
|
|
4273
|
+
if (dgOpts.measurements) {
|
|
4274
|
+
params.append("measurements", "true");
|
|
4275
|
+
}
|
|
4276
|
+
if (dgOpts.paragraphs) {
|
|
4277
|
+
params.append("paragraphs", "true");
|
|
4278
|
+
}
|
|
4279
|
+
if (dgOpts.profanityFilter) {
|
|
4280
|
+
params.append("profanity_filter", "true");
|
|
4281
|
+
}
|
|
4282
|
+
if (dgOpts.dictation) {
|
|
4283
|
+
params.append("dictation", "true");
|
|
4284
|
+
}
|
|
4285
|
+
if (dgOpts.utteranceSplit) {
|
|
4286
|
+
params.append("utt_split", String(dgOpts.utteranceSplit));
|
|
4287
|
+
}
|
|
4288
|
+
if (options?.summarization || dgOpts.summarize) {
|
|
4289
|
+
params.append("summarize", "true");
|
|
4290
|
+
}
|
|
4291
|
+
if (options?.sentimentAnalysis || dgOpts.sentiment) {
|
|
4292
|
+
params.append("sentiment", "true");
|
|
4293
|
+
}
|
|
4294
|
+
if (options?.entityDetection || dgOpts.detectEntities) {
|
|
4295
|
+
params.append("detect_entities", "true");
|
|
4296
|
+
}
|
|
4297
|
+
if (dgOpts.topics) {
|
|
4298
|
+
params.append("topics", "true");
|
|
4299
|
+
}
|
|
4300
|
+
if (dgOpts.customTopic && dgOpts.customTopic.length > 0) {
|
|
4301
|
+
dgOpts.customTopic.forEach((topic) => params.append("custom_topic", topic));
|
|
4302
|
+
}
|
|
4303
|
+
if (dgOpts.customTopicMode) {
|
|
4304
|
+
params.append("custom_topic_mode", dgOpts.customTopicMode);
|
|
4305
|
+
}
|
|
4306
|
+
if (dgOpts.intents) {
|
|
4307
|
+
params.append("intents", "true");
|
|
4308
|
+
}
|
|
4309
|
+
if (dgOpts.customIntent && dgOpts.customIntent.length > 0) {
|
|
4310
|
+
dgOpts.customIntent.forEach((intent) => params.append("custom_intent", intent));
|
|
4311
|
+
}
|
|
4312
|
+
if (dgOpts.customIntentMode) {
|
|
4313
|
+
params.append("custom_intent_mode", dgOpts.customIntentMode);
|
|
4314
|
+
}
|
|
4315
|
+
const keywords = options?.customVocabulary || dgOpts.keywords;
|
|
4316
|
+
if (keywords) {
|
|
4317
|
+
const keywordList = Array.isArray(keywords) ? keywords : [keywords];
|
|
4318
|
+
keywordList.forEach((kw) => params.append("keywords", kw));
|
|
4319
|
+
}
|
|
4320
|
+
if (dgOpts.keyterm && dgOpts.keyterm.length > 0) {
|
|
4321
|
+
dgOpts.keyterm.forEach((term) => params.append("keyterm", term));
|
|
4322
|
+
}
|
|
4323
|
+
if (options?.piiRedaction || dgOpts.redact) {
|
|
4324
|
+
if (Array.isArray(dgOpts.redact)) {
|
|
4325
|
+
dgOpts.redact.forEach((r) => params.append("redact", r));
|
|
4326
|
+
} else if (dgOpts.redact === true || options?.piiRedaction) {
|
|
4327
|
+
params.append("redact", "pii");
|
|
4328
|
+
params.append("redact", "pci");
|
|
4329
|
+
}
|
|
4330
|
+
}
|
|
4331
|
+
if (dgOpts.callback) {
|
|
4332
|
+
params.append("callback", dgOpts.callback);
|
|
4333
|
+
}
|
|
4334
|
+
if (dgOpts.tag && dgOpts.tag.length > 0) {
|
|
4335
|
+
dgOpts.tag.forEach((t) => params.append("tag", t));
|
|
4336
|
+
}
|
|
4337
|
+
if (dgOpts.extra) {
|
|
4338
|
+
params.append("extra", JSON.stringify(dgOpts.extra));
|
|
4339
|
+
}
|
|
4340
|
+
if (options?.endpointing !== void 0 || dgOpts.endpointing !== void 0) {
|
|
4341
|
+
const ep = options?.endpointing ?? dgOpts.endpointing;
|
|
4342
|
+
if (ep === false) {
|
|
4343
|
+
params.append("endpointing", "false");
|
|
4344
|
+
} else if (typeof ep === "number") {
|
|
4345
|
+
params.append("endpointing", String(ep));
|
|
4346
|
+
}
|
|
4347
|
+
}
|
|
4348
|
+
if (dgOpts.vadThreshold !== void 0) {
|
|
4349
|
+
params.append("vad_events", "true");
|
|
4350
|
+
}
|
|
4351
|
+
return `${this.wsBaseUrl}?${params.toString()}`;
|
|
4352
|
+
}
|
|
4353
|
+
/**
|
|
4354
|
+
* Handle all WebSocket message types from Deepgram streaming
|
|
4355
|
+
*/
|
|
4356
|
+
handleWebSocketMessage(message, callbacks) {
|
|
4357
|
+
switch (message.type) {
|
|
4358
|
+
case "Results": {
|
|
4359
|
+
const channel = message.channel.alternatives[0];
|
|
4360
|
+
if (channel && channel.transcript) {
|
|
4361
|
+
callbacks?.onTranscript?.({
|
|
4362
|
+
type: "transcript",
|
|
4363
|
+
text: channel.transcript,
|
|
4364
|
+
isFinal: message.is_final,
|
|
4365
|
+
confidence: channel.confidence,
|
|
4366
|
+
language: message.channel.detected_language,
|
|
4367
|
+
words: channel.words?.map((w) => ({
|
|
4368
|
+
word: w.punctuated_word || w.word,
|
|
4369
|
+
start: w.start,
|
|
4370
|
+
end: w.end,
|
|
4371
|
+
confidence: w.confidence,
|
|
4372
|
+
speaker: w.speaker?.toString()
|
|
4373
|
+
})),
|
|
4374
|
+
data: message
|
|
4375
|
+
});
|
|
4376
|
+
}
|
|
4377
|
+
if (message.speech_final && channel && channel.transcript) {
|
|
4378
|
+
callbacks?.onUtterance?.({
|
|
4379
|
+
text: channel.transcript,
|
|
4380
|
+
start: message.start,
|
|
4381
|
+
end: message.start + message.duration,
|
|
4382
|
+
confidence: channel.confidence,
|
|
4383
|
+
words: channel.words?.map((w) => ({
|
|
4384
|
+
word: w.punctuated_word || w.word,
|
|
4385
|
+
start: w.start,
|
|
4386
|
+
end: w.end,
|
|
4387
|
+
confidence: w.confidence
|
|
4388
|
+
}))
|
|
4389
|
+
});
|
|
4390
|
+
}
|
|
4391
|
+
break;
|
|
4392
|
+
}
|
|
4393
|
+
case "SpeechStarted": {
|
|
4394
|
+
const event = {
|
|
4395
|
+
type: "speech_start",
|
|
4396
|
+
timestamp: message.timestamp,
|
|
4397
|
+
channel: message.channel[0]
|
|
4398
|
+
};
|
|
4399
|
+
callbacks?.onSpeechStart?.(event);
|
|
4400
|
+
break;
|
|
4401
|
+
}
|
|
4402
|
+
case "UtteranceEnd": {
|
|
4403
|
+
const event = {
|
|
4404
|
+
type: "speech_end",
|
|
4405
|
+
timestamp: message.last_word_end,
|
|
4406
|
+
channel: message.channel[0]
|
|
4407
|
+
};
|
|
4408
|
+
callbacks?.onSpeechEnd?.(event);
|
|
4409
|
+
break;
|
|
4410
|
+
}
|
|
4411
|
+
case "Metadata": {
|
|
4412
|
+
callbacks?.onMetadata?.(message);
|
|
4413
|
+
break;
|
|
4414
|
+
}
|
|
4415
|
+
case "Error": {
|
|
4416
|
+
callbacks?.onError?.({
|
|
4417
|
+
code: message.variant || "DEEPGRAM_ERROR",
|
|
4418
|
+
message: message.message || message.description || "Unknown error",
|
|
4419
|
+
details: message
|
|
4420
|
+
});
|
|
4421
|
+
break;
|
|
4422
|
+
}
|
|
4423
|
+
case "CloseStream": {
|
|
4424
|
+
break;
|
|
4425
|
+
}
|
|
4426
|
+
default: {
|
|
4427
|
+
callbacks?.onMetadata?.(message);
|
|
4428
|
+
break;
|
|
4429
|
+
}
|
|
4430
|
+
}
|
|
4431
|
+
}
|
|
3566
4432
|
};
|
|
3567
4433
|
function createDeepgramAdapter(config) {
|
|
3568
4434
|
const adapter = new DeepgramAdapter();
|
|
@@ -3816,12 +4682,12 @@ var AzureSTTAdapter = class extends BaseAdapter {
|
|
|
3816
4682
|
const recognizedPhrases = transcriptionData.recognizedPhrases || [];
|
|
3817
4683
|
const fullText = combinedPhrases.map((phrase) => phrase.display || phrase.lexical).join(" ") || "";
|
|
3818
4684
|
const words = recognizedPhrases.flatMap(
|
|
3819
|
-
(phrase) => (phrase.nBest?.[0]?.words || []).map((
|
|
3820
|
-
|
|
3821
|
-
start:
|
|
4685
|
+
(phrase) => (phrase.nBest?.[0]?.words || []).map((w) => ({
|
|
4686
|
+
word: w.word,
|
|
4687
|
+
start: w.offsetInTicks / 1e7,
|
|
3822
4688
|
// Convert ticks to seconds
|
|
3823
|
-
end: (
|
|
3824
|
-
confidence:
|
|
4689
|
+
end: (w.offsetInTicks + w.durationInTicks) / 1e7,
|
|
4690
|
+
confidence: w.confidence,
|
|
3825
4691
|
speaker: phrase.speaker !== void 0 ? phrase.speaker.toString() : void 0
|
|
3826
4692
|
}))
|
|
3827
4693
|
);
|
|
@@ -4102,10 +4968,10 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
|
|
|
4102
4968
|
}
|
|
4103
4969
|
if ("duration" in response && "language" in response) {
|
|
4104
4970
|
const verboseResponse = response;
|
|
4105
|
-
const words = verboseResponse.words?.map((
|
|
4106
|
-
|
|
4107
|
-
start:
|
|
4108
|
-
end:
|
|
4971
|
+
const words = verboseResponse.words?.map((w) => ({
|
|
4972
|
+
word: w.word,
|
|
4973
|
+
start: w.start,
|
|
4974
|
+
end: w.end,
|
|
4109
4975
|
confidence: void 0
|
|
4110
4976
|
}));
|
|
4111
4977
|
const requestId2 = `openai-${Date.now()}`;
|
|
@@ -4371,7 +5237,7 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
4371
5237
|
normalizeResponse(response) {
|
|
4372
5238
|
const text = response.results.filter((r) => r.type === "word" && r.alternatives).map((r) => r.alternatives[0]?.content || "").join(" ");
|
|
4373
5239
|
const words = response.results.filter((r) => r.type === "word" && r.start_time !== void 0 && r.end_time !== void 0).map((result) => ({
|
|
4374
|
-
|
|
5240
|
+
word: result.alternatives?.[0]?.content || "",
|
|
4375
5241
|
start: result.start_time,
|
|
4376
5242
|
end: result.end_time,
|
|
4377
5243
|
confidence: result.alternatives?.[0]?.confidence,
|
|
@@ -4522,12 +5388,12 @@ var GladiaWebhookHandler = class extends BaseWebhookHandler {
|
|
|
4522
5388
|
/**
|
|
4523
5389
|
* Convert Gladia WordDTO to unified Word type
|
|
4524
5390
|
*/
|
|
4525
|
-
mapWord(
|
|
5391
|
+
mapWord(w) {
|
|
4526
5392
|
return {
|
|
4527
|
-
|
|
4528
|
-
start:
|
|
4529
|
-
end:
|
|
4530
|
-
confidence:
|
|
5393
|
+
word: w.word,
|
|
5394
|
+
start: w.start,
|
|
5395
|
+
end: w.end,
|
|
5396
|
+
confidence: w.confidence
|
|
4531
5397
|
};
|
|
4532
5398
|
}
|
|
4533
5399
|
/**
|
|
@@ -4865,11 +5731,11 @@ var DeepgramWebhookHandler = class extends BaseWebhookHandler {
|
|
|
4865
5731
|
raw: payload
|
|
4866
5732
|
};
|
|
4867
5733
|
}
|
|
4868
|
-
const words = alternative.words && alternative.words.length > 0 ? alternative.words.map((
|
|
4869
|
-
|
|
4870
|
-
start:
|
|
4871
|
-
end:
|
|
4872
|
-
confidence:
|
|
5734
|
+
const words = alternative.words && alternative.words.length > 0 ? alternative.words.map((w) => ({
|
|
5735
|
+
word: w.word || "",
|
|
5736
|
+
start: w.start || 0,
|
|
5737
|
+
end: w.end || 0,
|
|
5738
|
+
confidence: w.confidence
|
|
4873
5739
|
})) : void 0;
|
|
4874
5740
|
const speakers = response.results.utterances && response.results.utterances.length > 0 ? response.results.utterances.map((utterance) => ({
|
|
4875
5741
|
id: utterance.speaker?.toString() || "unknown",
|
|
@@ -4883,11 +5749,11 @@ var DeepgramWebhookHandler = class extends BaseWebhookHandler {
|
|
|
4883
5749
|
end: utterance.end || 0,
|
|
4884
5750
|
speaker: utterance.speaker?.toString(),
|
|
4885
5751
|
confidence: utterance.confidence,
|
|
4886
|
-
words: utterance.words && utterance.words.length > 0 ? utterance.words.map((
|
|
4887
|
-
|
|
4888
|
-
start:
|
|
4889
|
-
end:
|
|
4890
|
-
confidence:
|
|
5752
|
+
words: utterance.words && utterance.words.length > 0 ? utterance.words.map((w) => ({
|
|
5753
|
+
word: w.word || "",
|
|
5754
|
+
start: w.start || 0,
|
|
5755
|
+
end: w.end || 0,
|
|
5756
|
+
confidence: w.confidence
|
|
4891
5757
|
})) : void 0
|
|
4892
5758
|
})) : void 0;
|
|
4893
5759
|
const summary = alternative.summaries?.[0]?.summary;
|
|
@@ -5398,6 +6264,9 @@ function createWebhookRouter() {
|
|
|
5398
6264
|
}
|
|
5399
6265
|
export {
|
|
5400
6266
|
AssemblyAIAdapter,
|
|
6267
|
+
AssemblyAIEncoding,
|
|
6268
|
+
AssemblyAISampleRate,
|
|
6269
|
+
AssemblyAISpeechModel,
|
|
5401
6270
|
schema_exports2 as AssemblyAITypes,
|
|
5402
6271
|
AssemblyAIWebhookHandler,
|
|
5403
6272
|
AzureSTTAdapter,
|
|
@@ -5405,8 +6274,18 @@ export {
|
|
|
5405
6274
|
BaseAdapter,
|
|
5406
6275
|
BaseWebhookHandler,
|
|
5407
6276
|
DeepgramAdapter,
|
|
6277
|
+
ListenV1EncodingParameter as DeepgramEncoding,
|
|
6278
|
+
DeepgramModel,
|
|
6279
|
+
ListenV1RedactParameterOneOfItem as DeepgramRedact,
|
|
6280
|
+
SharedCustomTopicModeParameter as DeepgramTopicMode,
|
|
5408
6281
|
DeepgramWebhookHandler,
|
|
5409
6282
|
GladiaAdapter,
|
|
6283
|
+
StreamingSupportedBitDepthEnum as GladiaBitDepth,
|
|
6284
|
+
StreamingSupportedEncodingEnum as GladiaEncoding,
|
|
6285
|
+
TranscriptionLanguageCodeEnum as GladiaLanguage,
|
|
6286
|
+
StreamingSupportedModels as GladiaModel,
|
|
6287
|
+
StreamingSupportedSampleRateEnum as GladiaSampleRate,
|
|
6288
|
+
TranslationLanguageCodeEnum as GladiaTranslationLanguage,
|
|
5410
6289
|
schema_exports as GladiaTypes,
|
|
5411
6290
|
GladiaWebhookHandler,
|
|
5412
6291
|
ListenV1EncodingParameter,
|