@absolutejs/voice 0.0.20 → 0.0.22-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +884 -4
- package/dist/angular/index.d.ts +1 -0
- package/dist/angular/index.js +759 -3
- package/dist/angular/voice-controller.service.d.ts +27 -0
- package/dist/angular/voice-stream.service.d.ts +6 -0
- package/dist/audioConditioning.d.ts +3 -0
- package/dist/client/actions.d.ts +48 -0
- package/dist/client/audioPlayer.d.ts +40 -0
- package/dist/client/connection.d.ts +5 -0
- package/dist/client/controller.d.ts +2 -0
- package/dist/client/duplex.d.ts +3 -0
- package/dist/client/htmxBootstrap.js +660 -167
- package/dist/client/index.d.ts +3 -0
- package/dist/client/index.js +991 -6
- package/dist/client/microphone.d.ts +4 -2
- package/dist/correction.d.ts +33 -0
- package/dist/fileStore.d.ts +27 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.js +3721 -298
- package/dist/ops.d.ts +100 -0
- package/dist/presets.d.ts +13 -0
- package/dist/react/index.d.ts +1 -0
- package/dist/react/index.js +728 -3
- package/dist/react/useVoiceController.d.ts +26 -0
- package/dist/react/useVoiceStream.d.ts +7 -0
- package/dist/routing.d.ts +3 -0
- package/dist/runtimeOps.d.ts +23 -0
- package/dist/store.d.ts +2 -2
- package/dist/svelte/index.d.ts +1 -0
- package/dist/svelte/index.js +691 -3
- package/dist/telephony/response.d.ts +7 -0
- package/dist/telephony/twilio.d.ts +116 -0
- package/dist/testing/benchmark.d.ts +93 -2
- package/dist/testing/corrected.d.ts +41 -0
- package/dist/testing/duplex.d.ts +59 -0
- package/dist/testing/fixtures.d.ts +18 -2
- package/dist/testing/index.d.ts +5 -0
- package/dist/testing/index.js +6247 -402
- package/dist/testing/review.d.ts +143 -0
- package/dist/testing/sessionBenchmark.d.ts +92 -2
- package/dist/testing/stt.d.ts +3 -1
- package/dist/testing/telephony.d.ts +70 -0
- package/dist/testing/tts.d.ts +73 -0
- package/dist/turnDetection.d.ts +5 -1
- package/dist/turnProfiles.d.ts +6 -0
- package/dist/types.d.ts +487 -10
- package/dist/vue/index.d.ts +1 -0
- package/dist/vue/index.js +750 -3
- package/dist/vue/useVoiceController.d.ts +30 -0
- package/dist/vue/useVoiceStream.d.ts +11 -0
- package/fixtures/README.md +9 -0
- package/fixtures/manifest.json +59 -1
- package/fixtures/pcm/dialogue-three-clean.pcm +0 -0
- package/fixtures/pcm/dialogue-three-mixed.pcm +0 -0
- package/fixtures/pcm/dialogue-two-clean.pcm +0 -0
- package/fixtures/pcm/dialogue-two-noisy.pcm +0 -0
- package/package.json +135 -1
package/dist/index.js
CHANGED
|
@@ -69,6 +69,61 @@ var __decorateElement = (array, flags, name, decorators, target, extra) => {
|
|
|
69
69
|
return k || __decoratorMetadata(array, target), desc && __defProp(target, name, desc), p ? k ^ 4 ? extra : desc : target;
|
|
70
70
|
};
|
|
71
71
|
|
|
72
|
+
// src/audioConditioning.ts
|
|
73
|
+
var DEFAULT_TARGET_LEVEL = 0.08;
|
|
74
|
+
var DEFAULT_MAX_GAIN = 3;
|
|
75
|
+
var DEFAULT_NOISE_GATE_THRESHOLD = 0.006;
|
|
76
|
+
var DEFAULT_NOISE_GATE_ATTENUATION = 0.15;
|
|
77
|
+
var toInt16Array = (audio) => {
|
|
78
|
+
if (audio instanceof ArrayBuffer) {
|
|
79
|
+
return new Int16Array(audio, 0, Math.floor(audio.byteLength / 2));
|
|
80
|
+
}
|
|
81
|
+
return new Int16Array(audio.buffer, audio.byteOffset, Math.floor(audio.byteLength / 2));
|
|
82
|
+
};
|
|
83
|
+
var computeRms = (samples) => {
|
|
84
|
+
if (samples.length === 0) {
|
|
85
|
+
return 0;
|
|
86
|
+
}
|
|
87
|
+
let sumSquares = 0;
|
|
88
|
+
for (const sample of samples) {
|
|
89
|
+
const normalized = sample / 32768;
|
|
90
|
+
sumSquares += normalized * normalized;
|
|
91
|
+
}
|
|
92
|
+
return Math.sqrt(sumSquares / samples.length);
|
|
93
|
+
};
|
|
94
|
+
var resolveAudioConditioningConfig = (config) => {
|
|
95
|
+
if (!config || config.enabled === false) {
|
|
96
|
+
return;
|
|
97
|
+
}
|
|
98
|
+
return {
|
|
99
|
+
enabled: true,
|
|
100
|
+
maxGain: config.maxGain ?? DEFAULT_MAX_GAIN,
|
|
101
|
+
noiseGateAttenuation: config.noiseGateAttenuation ?? DEFAULT_NOISE_GATE_ATTENUATION,
|
|
102
|
+
noiseGateThreshold: config.noiseGateThreshold ?? DEFAULT_NOISE_GATE_THRESHOLD,
|
|
103
|
+
targetLevel: config.targetLevel ?? DEFAULT_TARGET_LEVEL
|
|
104
|
+
};
|
|
105
|
+
};
|
|
106
|
+
var conditionAudioChunk = (audio, config) => {
|
|
107
|
+
if (!config) {
|
|
108
|
+
return audio;
|
|
109
|
+
}
|
|
110
|
+
const source = toInt16Array(audio);
|
|
111
|
+
if (source.length === 0) {
|
|
112
|
+
return audio;
|
|
113
|
+
}
|
|
114
|
+
const rms = computeRms(source);
|
|
115
|
+
const output = new Int16Array(source.length);
|
|
116
|
+
const gateFactor = rms < config.noiseGateThreshold ? config.noiseGateAttenuation : 1;
|
|
117
|
+
const baseLevel = Math.max(rms * gateFactor, 0.000001);
|
|
118
|
+
const gain = Math.min(config.maxGain, config.targetLevel / baseLevel);
|
|
119
|
+
const appliedGain = Math.max(0.25, gain) * gateFactor;
|
|
120
|
+
for (let index = 0;index < source.length; index += 1) {
|
|
121
|
+
const next = Math.round(source[index] * appliedGain);
|
|
122
|
+
output[index] = Math.max(-32768, Math.min(32767, next));
|
|
123
|
+
}
|
|
124
|
+
return new Uint8Array(output.buffer);
|
|
125
|
+
};
|
|
126
|
+
|
|
72
127
|
// src/plugin.ts
|
|
73
128
|
import { Elysia } from "elysia";
|
|
74
129
|
import { resolve } from "path";
|
|
@@ -118,6 +173,10 @@ var defaultMetrics = (input) => {
|
|
|
118
173
|
'<span class="voice-metric-label">Session</span>',
|
|
119
174
|
`<span class="voice-metric-value">${escapeHtml(input.sessionId)}</span>`,
|
|
120
175
|
"</div>",
|
|
176
|
+
input.session?.scenarioId ? `<div class="voice-metric">
|
|
177
|
+
<span class="voice-metric-label">Scenario</span>
|
|
178
|
+
<span class="voice-metric-value">${escapeHtml(input.session.scenarioId)}</span>
|
|
179
|
+
</div>` : "",
|
|
121
180
|
'<div class="voice-metric">',
|
|
122
181
|
'<span class="voice-metric-label">Status</span>',
|
|
123
182
|
`<span class="voice-metric-value">${escapeHtml(input.status)}</span>`,
|
|
@@ -207,24 +266,1224 @@ var resolveLogger = (logger) => ({
|
|
|
207
266
|
...logger
|
|
208
267
|
});
|
|
209
268
|
|
|
269
|
+
// src/turnProfiles.ts
|
|
270
|
+
var TURN_PROFILE_DEFAULTS = {
|
|
271
|
+
balanced: {
|
|
272
|
+
qualityProfile: "general",
|
|
273
|
+
silenceMs: 1400,
|
|
274
|
+
speechThreshold: 0.012,
|
|
275
|
+
transcriptStabilityMs: 1000
|
|
276
|
+
},
|
|
277
|
+
fast: {
|
|
278
|
+
qualityProfile: "general",
|
|
279
|
+
silenceMs: 700,
|
|
280
|
+
speechThreshold: 0.015,
|
|
281
|
+
transcriptStabilityMs: 450
|
|
282
|
+
},
|
|
283
|
+
"long-form": {
|
|
284
|
+
qualityProfile: "general",
|
|
285
|
+
silenceMs: 2200,
|
|
286
|
+
speechThreshold: 0.01,
|
|
287
|
+
transcriptStabilityMs: 1500
|
|
288
|
+
}
|
|
289
|
+
};
|
|
290
|
+
var QUALITY_PROFILE_DEFAULTS = {
|
|
291
|
+
general: {},
|
|
292
|
+
"accent-heavy": {
|
|
293
|
+
silenceMs: 1200,
|
|
294
|
+
speechThreshold: 0.01,
|
|
295
|
+
transcriptStabilityMs: 1200
|
|
296
|
+
},
|
|
297
|
+
"noisy-room": {
|
|
298
|
+
silenceMs: 2000,
|
|
299
|
+
speechThreshold: 0.02,
|
|
300
|
+
transcriptStabilityMs: 1600
|
|
301
|
+
},
|
|
302
|
+
"short-command": {
|
|
303
|
+
silenceMs: 500,
|
|
304
|
+
speechThreshold: 0.016,
|
|
305
|
+
transcriptStabilityMs: 420
|
|
306
|
+
}
|
|
307
|
+
};
|
|
308
|
+
var DEFAULT_TURN_PROFILE = "fast";
|
|
309
|
+
var DEFAULT_QUALITY_PROFILE = "general";
|
|
310
|
+
var resolveTurnDetectionConfig = (config) => {
|
|
311
|
+
const profile = config?.profile ?? DEFAULT_TURN_PROFILE;
|
|
312
|
+
const qualityProfile = config?.qualityProfile ?? DEFAULT_QUALITY_PROFILE;
|
|
313
|
+
const preset = TURN_PROFILE_DEFAULTS[profile];
|
|
314
|
+
const quality = QUALITY_PROFILE_DEFAULTS[qualityProfile];
|
|
315
|
+
return {
|
|
316
|
+
profile,
|
|
317
|
+
qualityProfile,
|
|
318
|
+
silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
|
|
319
|
+
speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
|
|
320
|
+
transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
|
|
321
|
+
};
|
|
322
|
+
};
|
|
323
|
+
|
|
324
|
+
// src/presets.ts
|
|
325
|
+
var PRESET_INPUTS = {
|
|
326
|
+
chat: {
|
|
327
|
+
audioConditioning: {
|
|
328
|
+
enabled: true,
|
|
329
|
+
maxGain: 2.5,
|
|
330
|
+
noiseGateAttenuation: 0,
|
|
331
|
+
noiseGateThreshold: 0.004,
|
|
332
|
+
targetLevel: 0.08
|
|
333
|
+
},
|
|
334
|
+
capture: {
|
|
335
|
+
channelCount: 1,
|
|
336
|
+
sampleRateHz: 16000
|
|
337
|
+
},
|
|
338
|
+
connection: {
|
|
339
|
+
maxReconnectAttempts: 10,
|
|
340
|
+
pingInterval: 30000,
|
|
341
|
+
reconnect: true
|
|
342
|
+
},
|
|
343
|
+
sttLifecycle: "continuous",
|
|
344
|
+
turnDetection: {
|
|
345
|
+
qualityProfile: "short-command",
|
|
346
|
+
profile: "balanced"
|
|
347
|
+
}
|
|
348
|
+
},
|
|
349
|
+
default: {
|
|
350
|
+
capture: {
|
|
351
|
+
channelCount: 1,
|
|
352
|
+
sampleRateHz: 16000
|
|
353
|
+
},
|
|
354
|
+
connection: {
|
|
355
|
+
maxReconnectAttempts: 10,
|
|
356
|
+
pingInterval: 30000,
|
|
357
|
+
reconnect: true
|
|
358
|
+
},
|
|
359
|
+
sttLifecycle: "continuous",
|
|
360
|
+
turnDetection: {
|
|
361
|
+
qualityProfile: "general",
|
|
362
|
+
profile: "fast"
|
|
363
|
+
}
|
|
364
|
+
},
|
|
365
|
+
dictation: {
|
|
366
|
+
audioConditioning: {
|
|
367
|
+
enabled: true,
|
|
368
|
+
maxGain: 2.25,
|
|
369
|
+
noiseGateAttenuation: 0.05,
|
|
370
|
+
noiseGateThreshold: 0.003,
|
|
371
|
+
targetLevel: 0.08
|
|
372
|
+
},
|
|
373
|
+
capture: {
|
|
374
|
+
channelCount: 1,
|
|
375
|
+
sampleRateHz: 16000
|
|
376
|
+
},
|
|
377
|
+
connection: {
|
|
378
|
+
maxReconnectAttempts: 12,
|
|
379
|
+
pingInterval: 30000,
|
|
380
|
+
reconnect: true
|
|
381
|
+
},
|
|
382
|
+
sttLifecycle: "continuous",
|
|
383
|
+
turnDetection: {
|
|
384
|
+
qualityProfile: "accent-heavy",
|
|
385
|
+
profile: "long-form"
|
|
386
|
+
}
|
|
387
|
+
},
|
|
388
|
+
"guided-intake": {
|
|
389
|
+
audioConditioning: {
|
|
390
|
+
enabled: true,
|
|
391
|
+
maxGain: 2.5,
|
|
392
|
+
noiseGateAttenuation: 0,
|
|
393
|
+
noiseGateThreshold: 0.004,
|
|
394
|
+
targetLevel: 0.08
|
|
395
|
+
},
|
|
396
|
+
capture: {
|
|
397
|
+
channelCount: 1,
|
|
398
|
+
sampleRateHz: 16000
|
|
399
|
+
},
|
|
400
|
+
connection: {
|
|
401
|
+
maxReconnectAttempts: 12,
|
|
402
|
+
pingInterval: 30000,
|
|
403
|
+
reconnect: true
|
|
404
|
+
},
|
|
405
|
+
sttLifecycle: "turn-scoped",
|
|
406
|
+
turnDetection: {
|
|
407
|
+
qualityProfile: "accent-heavy",
|
|
408
|
+
profile: "long-form"
|
|
409
|
+
}
|
|
410
|
+
},
|
|
411
|
+
"noisy-room": {
|
|
412
|
+
audioConditioning: {
|
|
413
|
+
enabled: true,
|
|
414
|
+
maxGain: 3,
|
|
415
|
+
noiseGateAttenuation: 0.12,
|
|
416
|
+
noiseGateThreshold: 0.006,
|
|
417
|
+
targetLevel: 0.085
|
|
418
|
+
},
|
|
419
|
+
capture: {
|
|
420
|
+
channelCount: 1,
|
|
421
|
+
sampleRateHz: 16000
|
|
422
|
+
},
|
|
423
|
+
connection: {
|
|
424
|
+
maxReconnectAttempts: 14,
|
|
425
|
+
pingInterval: 45000,
|
|
426
|
+
reconnect: true
|
|
427
|
+
},
|
|
428
|
+
sttLifecycle: "continuous",
|
|
429
|
+
turnDetection: {
|
|
430
|
+
qualityProfile: "noisy-room",
|
|
431
|
+
profile: "long-form",
|
|
432
|
+
silenceMs: 2100,
|
|
433
|
+
speechThreshold: 0.02,
|
|
434
|
+
transcriptStabilityMs: 1650
|
|
435
|
+
}
|
|
436
|
+
},
|
|
437
|
+
"pstn-balanced": {
|
|
438
|
+
audioConditioning: {
|
|
439
|
+
enabled: true,
|
|
440
|
+
maxGain: 2.8,
|
|
441
|
+
noiseGateAttenuation: 0.07,
|
|
442
|
+
noiseGateThreshold: 0.005,
|
|
443
|
+
targetLevel: 0.08
|
|
444
|
+
},
|
|
445
|
+
capture: {
|
|
446
|
+
channelCount: 1,
|
|
447
|
+
sampleRateHz: 16000
|
|
448
|
+
},
|
|
449
|
+
connection: {
|
|
450
|
+
maxReconnectAttempts: 14,
|
|
451
|
+
pingInterval: 45000,
|
|
452
|
+
reconnect: true
|
|
453
|
+
},
|
|
454
|
+
sttLifecycle: "continuous",
|
|
455
|
+
turnDetection: {
|
|
456
|
+
qualityProfile: "noisy-room",
|
|
457
|
+
profile: "long-form",
|
|
458
|
+
silenceMs: 660,
|
|
459
|
+
speechThreshold: 0.012,
|
|
460
|
+
transcriptStabilityMs: 300
|
|
461
|
+
}
|
|
462
|
+
},
|
|
463
|
+
"pstn-fast": {
|
|
464
|
+
audioConditioning: {
|
|
465
|
+
enabled: true,
|
|
466
|
+
maxGain: 2.75,
|
|
467
|
+
noiseGateAttenuation: 0.06,
|
|
468
|
+
noiseGateThreshold: 0.005,
|
|
469
|
+
targetLevel: 0.08
|
|
470
|
+
},
|
|
471
|
+
capture: {
|
|
472
|
+
channelCount: 1,
|
|
473
|
+
sampleRateHz: 16000
|
|
474
|
+
},
|
|
475
|
+
connection: {
|
|
476
|
+
maxReconnectAttempts: 14,
|
|
477
|
+
pingInterval: 45000,
|
|
478
|
+
reconnect: true
|
|
479
|
+
},
|
|
480
|
+
sttLifecycle: "continuous",
|
|
481
|
+
turnDetection: {
|
|
482
|
+
qualityProfile: "noisy-room",
|
|
483
|
+
profile: "long-form",
|
|
484
|
+
silenceMs: 620,
|
|
485
|
+
speechThreshold: 0.012,
|
|
486
|
+
transcriptStabilityMs: 280
|
|
487
|
+
}
|
|
488
|
+
},
|
|
489
|
+
reliability: {
|
|
490
|
+
audioConditioning: {
|
|
491
|
+
enabled: true,
|
|
492
|
+
maxGain: 2.9,
|
|
493
|
+
noiseGateAttenuation: 0.08,
|
|
494
|
+
noiseGateThreshold: 0.005,
|
|
495
|
+
targetLevel: 0.08
|
|
496
|
+
},
|
|
497
|
+
capture: {
|
|
498
|
+
channelCount: 1,
|
|
499
|
+
sampleRateHz: 16000
|
|
500
|
+
},
|
|
501
|
+
connection: {
|
|
502
|
+
maxReconnectAttempts: 14,
|
|
503
|
+
pingInterval: 45000,
|
|
504
|
+
reconnect: true
|
|
505
|
+
},
|
|
506
|
+
sttLifecycle: "continuous",
|
|
507
|
+
turnDetection: {
|
|
508
|
+
qualityProfile: "noisy-room",
|
|
509
|
+
profile: "long-form"
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
};
|
|
513
|
+
var resolveVoiceRuntimePreset = (name = "default") => {
|
|
514
|
+
const preset = PRESET_INPUTS[name];
|
|
515
|
+
return {
|
|
516
|
+
audioConditioning: resolveAudioConditioningConfig(preset.audioConditioning),
|
|
517
|
+
capture: {
|
|
518
|
+
channelCount: preset.capture?.channelCount ?? 1,
|
|
519
|
+
sampleRateHz: preset.capture?.sampleRateHz ?? 16000
|
|
520
|
+
},
|
|
521
|
+
connection: {
|
|
522
|
+
...preset.connection
|
|
523
|
+
},
|
|
524
|
+
name,
|
|
525
|
+
sttLifecycle: preset.sttLifecycle ?? "continuous",
|
|
526
|
+
turnDetection: resolveTurnDetectionConfig(preset.turnDetection)
|
|
527
|
+
};
|
|
528
|
+
};
|
|
529
|
+
|
|
530
|
+
// src/ops.ts
|
|
531
|
+
var ensureTaskHistory = (task, entry) => ({
|
|
532
|
+
...task,
|
|
533
|
+
history: [
|
|
534
|
+
...task.history ?? [],
|
|
535
|
+
{
|
|
536
|
+
...entry,
|
|
537
|
+
at: entry.at ?? Date.now()
|
|
538
|
+
}
|
|
539
|
+
],
|
|
540
|
+
updatedAt: Date.now()
|
|
541
|
+
});
|
|
542
|
+
var withVoiceOpsTaskId = (id, task) => ({
|
|
543
|
+
...task,
|
|
544
|
+
id
|
|
545
|
+
});
|
|
546
|
+
var withVoiceIntegrationEventId = (id, event) => ({
|
|
547
|
+
...event,
|
|
548
|
+
id
|
|
549
|
+
});
|
|
550
|
+
var buildVoiceOpsTaskFromReview = (review) => {
|
|
551
|
+
const createdAt = review.generatedAt ?? Date.now();
|
|
552
|
+
const common = {
|
|
553
|
+
createdAt,
|
|
554
|
+
history: [
|
|
555
|
+
{
|
|
556
|
+
actor: "system",
|
|
557
|
+
at: createdAt,
|
|
558
|
+
detail: review.postCall?.summary,
|
|
559
|
+
type: "created"
|
|
560
|
+
}
|
|
561
|
+
],
|
|
562
|
+
id: `${review.id}:ops`,
|
|
563
|
+
intakeId: review.id,
|
|
564
|
+
outcome: review.summary.outcome,
|
|
565
|
+
recommendedAction: review.postCall?.recommendedAction ?? "Review the voice artifact and decide the next operator action.",
|
|
566
|
+
reviewId: review.id,
|
|
567
|
+
status: "open",
|
|
568
|
+
target: review.postCall?.target,
|
|
569
|
+
updatedAt: createdAt
|
|
570
|
+
};
|
|
571
|
+
switch (review.summary.outcome) {
|
|
572
|
+
case "voicemail":
|
|
573
|
+
return {
|
|
574
|
+
...common,
|
|
575
|
+
description: review.postCall?.summary ?? "Caller reached voicemail and needs a callback follow-up.",
|
|
576
|
+
kind: "callback",
|
|
577
|
+
title: review.postCall?.target ? `Call back voicemail from ${review.postCall.target}` : "Call back voicemail lead"
|
|
578
|
+
};
|
|
579
|
+
case "no-answer":
|
|
580
|
+
return {
|
|
581
|
+
...common,
|
|
582
|
+
description: review.postCall?.summary ?? "Live contact was not established and should be retried.",
|
|
583
|
+
kind: "callback",
|
|
584
|
+
title: "Retry no-answer call"
|
|
585
|
+
};
|
|
586
|
+
case "escalated":
|
|
587
|
+
return {
|
|
588
|
+
...common,
|
|
589
|
+
description: review.postCall?.summary ?? "The automated path escalated this call for human review.",
|
|
590
|
+
kind: "escalation",
|
|
591
|
+
title: "Review escalated call"
|
|
592
|
+
};
|
|
593
|
+
case "transferred":
|
|
594
|
+
return {
|
|
595
|
+
...common,
|
|
596
|
+
description: review.postCall?.summary ?? "The call was transferred and should be verified downstream.",
|
|
597
|
+
kind: "transfer-check",
|
|
598
|
+
title: review.postCall?.target ? `Verify transfer to ${review.postCall.target}` : "Verify call transfer"
|
|
599
|
+
};
|
|
600
|
+
case "failed":
|
|
601
|
+
return {
|
|
602
|
+
...common,
|
|
603
|
+
description: review.postCall?.summary ?? "The call failed and needs operator review before retry.",
|
|
604
|
+
kind: "retry-review",
|
|
605
|
+
title: "Inspect failed call before retry"
|
|
606
|
+
};
|
|
607
|
+
default:
|
|
608
|
+
return null;
|
|
609
|
+
}
|
|
610
|
+
};
|
|
611
|
+
var assignVoiceOpsTask = (task, owner, input = {}) => {
|
|
612
|
+
const normalizedOwner = owner.trim() || "ops";
|
|
613
|
+
return ensureTaskHistory({
|
|
614
|
+
...task,
|
|
615
|
+
assignee: normalizedOwner
|
|
616
|
+
}, {
|
|
617
|
+
actor: input.actor ?? normalizedOwner,
|
|
618
|
+
at: input.at,
|
|
619
|
+
detail: `Assigned to ${normalizedOwner}`,
|
|
620
|
+
type: "assigned"
|
|
621
|
+
});
|
|
622
|
+
};
|
|
623
|
+
var startVoiceOpsTask = (task, input = {}) => ensureTaskHistory({
|
|
624
|
+
...task,
|
|
625
|
+
status: "in-progress"
|
|
626
|
+
}, {
|
|
627
|
+
actor: input.actor ?? task.assignee ?? "ops",
|
|
628
|
+
at: input.at,
|
|
629
|
+
detail: input.detail ?? "Work started",
|
|
630
|
+
type: "started"
|
|
631
|
+
});
|
|
632
|
+
var completeVoiceOpsTask = (task, input = {}) => ensureTaskHistory({
|
|
633
|
+
...task,
|
|
634
|
+
status: "done"
|
|
635
|
+
}, {
|
|
636
|
+
actor: input.actor ?? task.assignee ?? "ops",
|
|
637
|
+
at: input.at,
|
|
638
|
+
detail: input.detail ?? "Marked done",
|
|
639
|
+
type: "completed"
|
|
640
|
+
});
|
|
641
|
+
var reopenVoiceOpsTask = (task, input = {}) => ensureTaskHistory({
|
|
642
|
+
...task,
|
|
643
|
+
status: "open"
|
|
644
|
+
}, {
|
|
645
|
+
actor: input.actor ?? task.assignee ?? "ops",
|
|
646
|
+
at: input.at,
|
|
647
|
+
detail: input.detail ?? "Task reopened",
|
|
648
|
+
type: "reopened"
|
|
649
|
+
});
|
|
650
|
+
var listVoiceOpsTasks = (tasks) => [...tasks].sort((left, right) => right.createdAt - left.createdAt);
|
|
651
|
+
var summarizeVoiceOpsTasks = (tasks) => {
|
|
652
|
+
const summary = {
|
|
653
|
+
byKind: new Map,
|
|
654
|
+
byOutcome: new Map,
|
|
655
|
+
done: 0,
|
|
656
|
+
inProgress: 0,
|
|
657
|
+
open: 0,
|
|
658
|
+
topAssignees: new Map,
|
|
659
|
+
topTargets: new Map,
|
|
660
|
+
total: tasks.length
|
|
661
|
+
};
|
|
662
|
+
for (const task of tasks) {
|
|
663
|
+
if (task.status === "open") {
|
|
664
|
+
summary.open += 1;
|
|
665
|
+
} else if (task.status === "in-progress") {
|
|
666
|
+
summary.inProgress += 1;
|
|
667
|
+
} else if (task.status === "done") {
|
|
668
|
+
summary.done += 1;
|
|
669
|
+
}
|
|
670
|
+
summary.byKind.set(task.kind, (summary.byKind.get(task.kind) ?? 0) + 1);
|
|
671
|
+
if (task.outcome) {
|
|
672
|
+
summary.byOutcome.set(task.outcome, (summary.byOutcome.get(task.outcome) ?? 0) + 1);
|
|
673
|
+
}
|
|
674
|
+
if (task.target) {
|
|
675
|
+
summary.topTargets.set(task.target, (summary.topTargets.get(task.target) ?? 0) + 1);
|
|
676
|
+
}
|
|
677
|
+
if (task.assignee) {
|
|
678
|
+
summary.topAssignees.set(task.assignee, (summary.topAssignees.get(task.assignee) ?? 0) + 1);
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
return {
|
|
682
|
+
byKind: [...summary.byKind.entries()].sort((left, right) => right[1] - left[1]),
|
|
683
|
+
byOutcome: [...summary.byOutcome.entries()].sort((left, right) => right[1] - left[1]),
|
|
684
|
+
done: summary.done,
|
|
685
|
+
inProgress: summary.inProgress,
|
|
686
|
+
open: summary.open,
|
|
687
|
+
topAssignees: [...summary.topAssignees.entries()].sort((left, right) => right[1] - left[1]),
|
|
688
|
+
topTargets: [...summary.topTargets.entries()].sort((left, right) => right[1] - left[1]),
|
|
689
|
+
total: summary.total
|
|
690
|
+
};
|
|
691
|
+
};
|
|
692
|
+
var createVoiceIntegrationEvent = (type, payload, input = {}) => ({
|
|
693
|
+
createdAt: input.createdAt ?? Date.now(),
|
|
694
|
+
id: input.id ?? crypto.randomUUID(),
|
|
695
|
+
payload,
|
|
696
|
+
type
|
|
697
|
+
});
|
|
698
|
+
var createVoiceCallCompletedEvent = (input) => createVoiceIntegrationEvent("call.completed", {
|
|
699
|
+
call: input.session.call,
|
|
700
|
+
disposition: input.disposition ?? input.session.call?.disposition,
|
|
701
|
+
scenarioId: input.session.scenarioId,
|
|
702
|
+
sessionId: input.session.id,
|
|
703
|
+
sessionSummary: input.sessionSummary,
|
|
704
|
+
status: input.session.status,
|
|
705
|
+
turnCount: input.session.turns.length
|
|
706
|
+
}, {
|
|
707
|
+
id: `${input.session.id}:call.completed`
|
|
708
|
+
});
|
|
709
|
+
var createVoiceReviewSavedEvent = (review) => createVoiceIntegrationEvent("review.saved", {
|
|
710
|
+
elapsedMs: review.summary.elapsedMs,
|
|
711
|
+
firstTurnLatencyMs: review.summary.firstTurnLatencyMs,
|
|
712
|
+
outcome: review.summary.outcome,
|
|
713
|
+
postCall: review.postCall,
|
|
714
|
+
reviewId: review.id,
|
|
715
|
+
title: review.title
|
|
716
|
+
}, {
|
|
717
|
+
id: `${review.id}:review.saved`
|
|
718
|
+
});
|
|
719
|
+
var createVoiceTaskCreatedEvent = (task) => createVoiceIntegrationEvent("task.created", {
|
|
720
|
+
assignee: task.assignee,
|
|
721
|
+
kind: task.kind,
|
|
722
|
+
outcome: task.outcome,
|
|
723
|
+
recommendedAction: task.recommendedAction,
|
|
724
|
+
reviewId: task.reviewId,
|
|
725
|
+
status: task.status,
|
|
726
|
+
target: task.target,
|
|
727
|
+
taskId: task.id,
|
|
728
|
+
title: task.title
|
|
729
|
+
}, {
|
|
730
|
+
id: `${task.id}:task.created:${task.updatedAt}`
|
|
731
|
+
});
|
|
732
|
+
var createVoiceTaskUpdatedEvent = (task) => createVoiceIntegrationEvent("task.updated", {
|
|
733
|
+
assignee: task.assignee,
|
|
734
|
+
history: task.history,
|
|
735
|
+
kind: task.kind,
|
|
736
|
+
outcome: task.outcome,
|
|
737
|
+
recommendedAction: task.recommendedAction,
|
|
738
|
+
reviewId: task.reviewId,
|
|
739
|
+
status: task.status,
|
|
740
|
+
target: task.target,
|
|
741
|
+
taskId: task.id,
|
|
742
|
+
title: task.title,
|
|
743
|
+
updatedAt: task.updatedAt
|
|
744
|
+
}, {
|
|
745
|
+
id: `${task.id}:task.updated:${task.updatedAt}`
|
|
746
|
+
});
|
|
747
|
+
|
|
748
|
+
// src/testing/review.ts
|
|
749
|
+
var roundMetric = (value) => typeof value === "number" ? Math.round(value * 100) / 100 : undefined;
|
|
750
|
+
var formatMetric = (label, value, unit = "ms") => typeof value === "number" ? `${label}: ${roundMetric(value)}${unit}` : undefined;
|
|
751
|
+
var findTimelineEvent = (timeline, event, source) => timeline.find((entry) => entry.event === event && (source === undefined || entry.source === source));
|
|
752
|
+
var formatTimelineText = (entry) => {
|
|
753
|
+
const parts = [`- ${entry.atMs}ms`, `[${entry.source}]`, entry.event];
|
|
754
|
+
if (entry.text) {
|
|
755
|
+
parts.push(`"${entry.text}"`);
|
|
756
|
+
}
|
|
757
|
+
if (entry.reason) {
|
|
758
|
+
parts.push(`reason=${entry.reason}`);
|
|
759
|
+
}
|
|
760
|
+
if (typeof entry.bytes === "number") {
|
|
761
|
+
parts.push(`bytes=${entry.bytes}`);
|
|
762
|
+
}
|
|
763
|
+
if (typeof entry.confidence === "number") {
|
|
764
|
+
parts.push(`confidence=${roundMetric(entry.confidence)}`);
|
|
765
|
+
}
|
|
766
|
+
if (entry.name) {
|
|
767
|
+
parts.push(`name=${entry.name}`);
|
|
768
|
+
}
|
|
769
|
+
return parts.join(" ");
|
|
770
|
+
};
|
|
771
|
+
var isLowSignalTimelineEvent = (entry) => entry.event === "inbound-media" || entry.event === "inbound-silence-pad" || entry.event === "stt-send" || entry.event === "tts-audio";
|
|
772
|
+
var summarizeTimelineTraffic = (timeline) => {
|
|
773
|
+
const summaries = new Map;
|
|
774
|
+
for (const entry of timeline) {
|
|
775
|
+
const label = entry.event === "inbound-media" ? "inbound media chunks" : entry.event === "inbound-silence-pad" ? "inbound silence padding" : entry.event === "stt-send" ? "STT audio sends" : entry.event === "tts-audio" ? "post-first TTS audio chunks" : undefined;
|
|
776
|
+
if (!label) {
|
|
777
|
+
continue;
|
|
778
|
+
}
|
|
779
|
+
const summary = summaries.get(label) ?? {
|
|
780
|
+
audioMs: 0,
|
|
781
|
+
bytes: 0,
|
|
782
|
+
count: 0,
|
|
783
|
+
label
|
|
784
|
+
};
|
|
785
|
+
summary.count += 1;
|
|
786
|
+
summary.bytes += typeof entry.bytes === "number" ? entry.bytes : 0;
|
|
787
|
+
summary.audioMs = (summary.audioMs ?? 0) + (typeof entry.chunkDurationMs === "number" ? entry.chunkDurationMs : 0);
|
|
788
|
+
summaries.set(label, summary);
|
|
789
|
+
}
|
|
790
|
+
return [...summaries.values()];
|
|
791
|
+
};
|
|
792
|
+
var compactTimeline = (timeline) => {
|
|
793
|
+
const rows = [];
|
|
794
|
+
let index = 0;
|
|
795
|
+
while (index < timeline.length) {
|
|
796
|
+
const current = timeline[index];
|
|
797
|
+
if (!current) {
|
|
798
|
+
break;
|
|
799
|
+
}
|
|
800
|
+
const isBurstEvent = isLowSignalTimelineEvent(current) || current.event === "media" && current.source === "twilio";
|
|
801
|
+
if (!isBurstEvent) {
|
|
802
|
+
rows.push(formatTimelineText(current));
|
|
803
|
+
index += 1;
|
|
804
|
+
continue;
|
|
805
|
+
}
|
|
806
|
+
let endIndex = index;
|
|
807
|
+
let totalBytes = typeof current.bytes === "number" ? current.bytes : 0;
|
|
808
|
+
let totalChunkDurationMs = typeof current.chunkDurationMs === "number" ? current.chunkDurationMs : 0;
|
|
809
|
+
while (endIndex + 1 < timeline.length) {
|
|
810
|
+
const next = timeline[endIndex + 1];
|
|
811
|
+
if (!next) {
|
|
812
|
+
break;
|
|
813
|
+
}
|
|
814
|
+
if (next.event !== current.event || next.source !== current.source) {
|
|
815
|
+
break;
|
|
816
|
+
}
|
|
817
|
+
totalBytes += typeof next.bytes === "number" ? next.bytes : 0;
|
|
818
|
+
totalChunkDurationMs += typeof next.chunkDurationMs === "number" ? next.chunkDurationMs : 0;
|
|
819
|
+
endIndex += 1;
|
|
820
|
+
}
|
|
821
|
+
const startAt = current.atMs;
|
|
822
|
+
const endAt = timeline[endIndex]?.atMs ?? current.atMs;
|
|
823
|
+
const count = endIndex - index + 1;
|
|
824
|
+
const parts = [
|
|
825
|
+
`- ${startAt}-${endAt}ms`,
|
|
826
|
+
`[${current.source}]`,
|
|
827
|
+
`${current.event} x${count}`
|
|
828
|
+
];
|
|
829
|
+
if (totalBytes > 0) {
|
|
830
|
+
parts.push(`bytes=${totalBytes}`);
|
|
831
|
+
}
|
|
832
|
+
if (totalChunkDurationMs > 0) {
|
|
833
|
+
parts.push(`audio=${roundMetric(totalChunkDurationMs)}ms`);
|
|
834
|
+
}
|
|
835
|
+
rows.push(parts.join(" "));
|
|
836
|
+
index = endIndex + 1;
|
|
837
|
+
}
|
|
838
|
+
return rows;
|
|
839
|
+
};
|
|
840
|
+
var withVoiceCallReviewId = (id, artifact) => ({
|
|
841
|
+
...artifact,
|
|
842
|
+
id
|
|
843
|
+
});
|
|
844
|
+
var createVoiceCallReviewFromLiveTelephonyReport = (report, options = {}) => {
|
|
845
|
+
const fixture = report.fixtures?.[0];
|
|
846
|
+
if (!fixture) {
|
|
847
|
+
throw new Error("Live telephony review requires at least one fixture result.");
|
|
848
|
+
}
|
|
849
|
+
const timeline = [...report.trace ?? []].sort((left, right) => left.atMs - right.atMs);
|
|
850
|
+
const firstPartial = findTimelineEvent(timeline, "partial", "stt");
|
|
851
|
+
const commitEvent = findTimelineEvent(timeline, "commit", "turn");
|
|
852
|
+
const firstTtsAudio = findTimelineEvent(timeline, "tts-first-audio", "benchmark");
|
|
853
|
+
const firstOutboundMedia = findTimelineEvent(timeline, "media", "twilio");
|
|
854
|
+
const bargeInEvent = findTimelineEvent(timeline, "barge-in", "benchmark");
|
|
855
|
+
const clearEvent = findTimelineEvent(timeline, "clear", "twilio");
|
|
856
|
+
const lastSttText = [...timeline].reverse().find((entry) => entry.source === "stt" && (entry.event === "partial" || entry.event === "final") && typeof entry.text === "string" && entry.text.length > 0)?.text ?? undefined;
|
|
857
|
+
const latencyBreakdown = [
|
|
858
|
+
typeof firstPartial?.atMs === "number" ? {
|
|
859
|
+
label: "start to first partial",
|
|
860
|
+
valueMs: firstPartial.atMs
|
|
861
|
+
} : undefined,
|
|
862
|
+
typeof firstPartial?.atMs === "number" && typeof commitEvent?.atMs === "number" ? {
|
|
863
|
+
label: "first partial to commit",
|
|
864
|
+
valueMs: commitEvent.atMs - firstPartial.atMs
|
|
865
|
+
} : undefined,
|
|
866
|
+
typeof commitEvent?.atMs === "number" && typeof firstTtsAudio?.atMs === "number" ? {
|
|
867
|
+
label: "commit to first TTS audio",
|
|
868
|
+
valueMs: firstTtsAudio.atMs - commitEvent.atMs
|
|
869
|
+
} : undefined,
|
|
870
|
+
typeof commitEvent?.atMs === "number" && typeof firstOutboundMedia?.atMs === "number" ? {
|
|
871
|
+
label: "commit to first outbound media",
|
|
872
|
+
valueMs: firstOutboundMedia.atMs - commitEvent.atMs
|
|
873
|
+
} : undefined,
|
|
874
|
+
typeof bargeInEvent?.atMs === "number" && typeof clearEvent?.atMs === "number" ? {
|
|
875
|
+
label: "barge-in to clear",
|
|
876
|
+
valueMs: clearEvent.atMs - bargeInEvent.atMs
|
|
877
|
+
} : undefined
|
|
878
|
+
].filter((value) => value !== undefined && value.valueMs >= 0);
|
|
879
|
+
const notes = [
|
|
880
|
+
report.variant?.description,
|
|
881
|
+
firstPartial?.text ? `First partial: "${firstPartial.text}"` : undefined,
|
|
882
|
+
lastSttText ? `Last STT text: "${lastSttText}"` : undefined
|
|
883
|
+
].filter((value) => typeof value === "string" && value.length > 0);
|
|
884
|
+
return {
|
|
885
|
+
config: {
|
|
886
|
+
preset: options.preset,
|
|
887
|
+
stt: report.variant ? {
|
|
888
|
+
description: report.variant.description,
|
|
889
|
+
id: report.variant.id,
|
|
890
|
+
model: report.variant.model
|
|
891
|
+
} : undefined,
|
|
892
|
+
tts: report.ttsConfig,
|
|
893
|
+
turnDetection: report.turnDetectionConfig
|
|
894
|
+
},
|
|
895
|
+
errors: fixture.errors ?? [],
|
|
896
|
+
expectedText: fixture.expectedText,
|
|
897
|
+
fixtureId: fixture.fixtureId,
|
|
898
|
+
generatedAt: report.generatedAt,
|
|
899
|
+
latencyBreakdown,
|
|
900
|
+
notes,
|
|
901
|
+
path: options.path,
|
|
902
|
+
summary: {
|
|
903
|
+
clearLatencyMs: roundMetric(fixture.clearLatencyMs),
|
|
904
|
+
elapsedMs: roundMetric(fixture.elapsedMs),
|
|
905
|
+
firstOutboundMediaLatencyMs: roundMetric(fixture.firstOutboundMediaLatencyMs),
|
|
906
|
+
firstTurnLatencyMs: roundMetric(fixture.firstTurnLatencyMs),
|
|
907
|
+
markLatencyMs: roundMetric(fixture.markLatencyMs),
|
|
908
|
+
outboundMediaCount: fixture.outboundMediaCount,
|
|
909
|
+
pass: fixture.passes,
|
|
910
|
+
termRecall: roundMetric(fixture.termRecall),
|
|
911
|
+
turnCount: fixture.turnCount,
|
|
912
|
+
wordErrorRate: roundMetric(fixture.wordErrorRate)
|
|
913
|
+
},
|
|
914
|
+
title: fixture.title ?? "Voice Call Review",
|
|
915
|
+
timeline,
|
|
916
|
+
transcript: {
|
|
917
|
+
actual: fixture.actualText,
|
|
918
|
+
expected: fixture.expectedText
|
|
919
|
+
}
|
|
920
|
+
};
|
|
921
|
+
};
|
|
922
|
+
var toErrorMessage = (error) => {
|
|
923
|
+
if (typeof error === "string" && error.trim().length > 0) {
|
|
924
|
+
return error;
|
|
925
|
+
}
|
|
926
|
+
if (error instanceof Error && error.message.trim().length > 0) {
|
|
927
|
+
return error.message;
|
|
928
|
+
}
|
|
929
|
+
return "Unknown call error";
|
|
930
|
+
};
|
|
931
|
+
var createVoiceCallReviewRecorder = (options = {}) => {
|
|
932
|
+
const now = options.now ?? (() => Date.now());
|
|
933
|
+
const startedAt = now();
|
|
934
|
+
const errors = [];
|
|
935
|
+
const timeline = [];
|
|
936
|
+
const committedTurns = [];
|
|
937
|
+
const committedTurnIds = new Set;
|
|
938
|
+
const push = (source, event, fields = {}) => {
|
|
939
|
+
timeline.push({
|
|
940
|
+
atMs: Math.max(0, now() - startedAt),
|
|
941
|
+
event,
|
|
942
|
+
source,
|
|
943
|
+
...fields
|
|
944
|
+
});
|
|
945
|
+
};
|
|
946
|
+
return {
|
|
947
|
+
finalize: () => {
|
|
948
|
+
const sortedTimeline = [...timeline].sort((left, right) => left.atMs - right.atMs);
|
|
949
|
+
const firstPartial = findTimelineEvent(sortedTimeline, "partial", "stt");
|
|
950
|
+
const commitEvent = findTimelineEvent(sortedTimeline, "commit", "turn");
|
|
951
|
+
const firstTtsAudio = findTimelineEvent(sortedTimeline, "tts-first-audio", "benchmark");
|
|
952
|
+
const firstOutboundMedia = findTimelineEvent(sortedTimeline, "media", "twilio");
|
|
953
|
+
const bargeInEvent = findTimelineEvent(sortedTimeline, "barge-in", "benchmark");
|
|
954
|
+
const clearEvent = findTimelineEvent(sortedTimeline, "clear", "twilio");
|
|
955
|
+
const markEvent = findTimelineEvent(sortedTimeline, "mark", "twilio");
|
|
956
|
+
const elapsedMs = sortedTimeline.at(-1)?.atMs ?? 0;
|
|
957
|
+
const lastSttText = [...sortedTimeline].reverse().find((entry) => entry.source === "stt" && (entry.event === "partial" || entry.event === "final") && typeof entry.text === "string" && entry.text.length > 0)?.text ?? undefined;
|
|
958
|
+
const latencyBreakdown = [
|
|
959
|
+
typeof firstPartial?.atMs === "number" ? {
|
|
960
|
+
label: "start to first partial",
|
|
961
|
+
valueMs: firstPartial.atMs
|
|
962
|
+
} : undefined,
|
|
963
|
+
typeof firstPartial?.atMs === "number" && typeof commitEvent?.atMs === "number" ? {
|
|
964
|
+
label: "first partial to commit",
|
|
965
|
+
valueMs: commitEvent.atMs - firstPartial.atMs
|
|
966
|
+
} : undefined,
|
|
967
|
+
typeof commitEvent?.atMs === "number" && typeof firstTtsAudio?.atMs === "number" ? {
|
|
968
|
+
label: "commit to first TTS audio",
|
|
969
|
+
valueMs: firstTtsAudio.atMs - commitEvent.atMs
|
|
970
|
+
} : undefined,
|
|
971
|
+
typeof commitEvent?.atMs === "number" && typeof firstOutboundMedia?.atMs === "number" ? {
|
|
972
|
+
label: "commit to first outbound media",
|
|
973
|
+
valueMs: firstOutboundMedia.atMs - commitEvent.atMs
|
|
974
|
+
} : undefined,
|
|
975
|
+
typeof bargeInEvent?.atMs === "number" && typeof clearEvent?.atMs === "number" ? {
|
|
976
|
+
label: "barge-in to clear",
|
|
977
|
+
valueMs: clearEvent.atMs - bargeInEvent.atMs
|
|
978
|
+
} : undefined
|
|
979
|
+
].filter((value) => value !== undefined && value.valueMs >= 0);
|
|
980
|
+
return {
|
|
981
|
+
config: options.config,
|
|
982
|
+
errors,
|
|
983
|
+
fixtureId: options.fixtureId,
|
|
984
|
+
generatedAt: now(),
|
|
985
|
+
latencyBreakdown,
|
|
986
|
+
notes: [
|
|
987
|
+
firstPartial?.text ? `First partial: "${firstPartial.text}"` : undefined,
|
|
988
|
+
lastSttText ? `Last STT text: "${lastSttText}"` : undefined
|
|
989
|
+
].filter((value) => typeof value === "string"),
|
|
990
|
+
path: options.path,
|
|
991
|
+
summary: {
|
|
992
|
+
clearLatencyMs: roundMetric(typeof clearEvent?.atMs === "number" && typeof bargeInEvent?.atMs === "number" ? clearEvent.atMs - bargeInEvent.atMs : undefined),
|
|
993
|
+
elapsedMs: roundMetric(elapsedMs),
|
|
994
|
+
firstOutboundMediaLatencyMs: roundMetric(firstOutboundMedia?.atMs),
|
|
995
|
+
firstTurnLatencyMs: roundMetric(commitEvent?.atMs),
|
|
996
|
+
markLatencyMs: roundMetric(markEvent?.atMs),
|
|
997
|
+
outboundMediaCount: sortedTimeline.filter((entry) => entry.source === "twilio" && entry.event === "media").length,
|
|
998
|
+
pass: errors.length === 0,
|
|
999
|
+
turnCount: committedTurns.length
|
|
1000
|
+
},
|
|
1001
|
+
title: options.title ?? "Voice Call Review",
|
|
1002
|
+
timeline: sortedTimeline,
|
|
1003
|
+
transcript: {
|
|
1004
|
+
actual: committedTurns.join(" ").trim()
|
|
1005
|
+
}
|
|
1006
|
+
};
|
|
1007
|
+
},
|
|
1008
|
+
recordError: (error) => {
|
|
1009
|
+
const message = toErrorMessage(error);
|
|
1010
|
+
errors.push(message);
|
|
1011
|
+
push("turn", "error", {
|
|
1012
|
+
reason: message
|
|
1013
|
+
});
|
|
1014
|
+
},
|
|
1015
|
+
recordTwilioInbound: (input) => {
|
|
1016
|
+
push("twilio", input.event, {
|
|
1017
|
+
bytes: input.bytes,
|
|
1018
|
+
chunkDurationMs: input.chunkDurationMs,
|
|
1019
|
+
name: input.name,
|
|
1020
|
+
reason: input.reason,
|
|
1021
|
+
text: input.text,
|
|
1022
|
+
track: input.track
|
|
1023
|
+
});
|
|
1024
|
+
},
|
|
1025
|
+
recordTwilioOutbound: (input) => {
|
|
1026
|
+
push("twilio", input.event, {
|
|
1027
|
+
bytes: input.bytes,
|
|
1028
|
+
chunkDurationMs: input.chunkDurationMs,
|
|
1029
|
+
name: input.name,
|
|
1030
|
+
reason: input.reason,
|
|
1031
|
+
text: input.text,
|
|
1032
|
+
track: input.track
|
|
1033
|
+
});
|
|
1034
|
+
},
|
|
1035
|
+
recordVoiceMessage: (message) => {
|
|
1036
|
+
switch (message.type) {
|
|
1037
|
+
case "partial":
|
|
1038
|
+
case "final":
|
|
1039
|
+
push("stt", message.type, {
|
|
1040
|
+
confidence: message.transcript.confidence,
|
|
1041
|
+
text: message.transcript.text
|
|
1042
|
+
});
|
|
1043
|
+
return;
|
|
1044
|
+
case "assistant":
|
|
1045
|
+
push("turn", "assistant", {
|
|
1046
|
+
text: message.text
|
|
1047
|
+
});
|
|
1048
|
+
return;
|
|
1049
|
+
case "audio":
|
|
1050
|
+
push("benchmark", timeline.some((entry) => entry.event === "tts-first-audio") ? "tts-audio" : "tts-first-audio", {
|
|
1051
|
+
bytes: Math.floor(message.chunkBase64.length * 3 / 4)
|
|
1052
|
+
});
|
|
1053
|
+
return;
|
|
1054
|
+
case "turn":
|
|
1055
|
+
if (committedTurnIds.has(message.turn.id)) {
|
|
1056
|
+
return;
|
|
1057
|
+
}
|
|
1058
|
+
committedTurnIds.add(message.turn.id);
|
|
1059
|
+
committedTurns.push(message.turn.text);
|
|
1060
|
+
push("turn", "commit", {
|
|
1061
|
+
confidence: message.turn.quality?.averageConfidence,
|
|
1062
|
+
text: message.turn.text
|
|
1063
|
+
});
|
|
1064
|
+
return;
|
|
1065
|
+
case "error":
|
|
1066
|
+
errors.push(message.message);
|
|
1067
|
+
push("turn", "error", {
|
|
1068
|
+
reason: message.message
|
|
1069
|
+
});
|
|
1070
|
+
return;
|
|
1071
|
+
case "complete":
|
|
1072
|
+
push("turn", "complete", {
|
|
1073
|
+
text: message.sessionId
|
|
1074
|
+
});
|
|
1075
|
+
return;
|
|
1076
|
+
case "session":
|
|
1077
|
+
push("turn", "session", {
|
|
1078
|
+
reason: message.status,
|
|
1079
|
+
text: message.sessionId
|
|
1080
|
+
});
|
|
1081
|
+
return;
|
|
1082
|
+
case "pong":
|
|
1083
|
+
push("benchmark", "pong");
|
|
1084
|
+
return;
|
|
1085
|
+
}
|
|
1086
|
+
}
|
|
1087
|
+
};
|
|
1088
|
+
};
|
|
1089
|
+
var renderConfigSection = (config) => {
|
|
1090
|
+
if (!config) {
|
|
1091
|
+
return "";
|
|
1092
|
+
}
|
|
1093
|
+
return [
|
|
1094
|
+
"## Config",
|
|
1095
|
+
"",
|
|
1096
|
+
"```json",
|
|
1097
|
+
JSON.stringify(config, null, 2),
|
|
1098
|
+
"```"
|
|
1099
|
+
].join(`
|
|
1100
|
+
`);
|
|
1101
|
+
};
|
|
1102
|
+
var renderTimeline = (timeline) => {
|
|
1103
|
+
const focusedTimeline = timeline.filter((entry) => !isLowSignalTimelineEvent(entry));
|
|
1104
|
+
if (focusedTimeline.length === 0) {
|
|
1105
|
+
return `## Timeline
|
|
1106
|
+
|
|
1107
|
+
_No timeline events captured._`;
|
|
1108
|
+
}
|
|
1109
|
+
const lines = compactTimeline(focusedTimeline);
|
|
1110
|
+
return ["## Timeline", "", ...lines].join(`
|
|
1111
|
+
`);
|
|
1112
|
+
};
|
|
1113
|
+
var renderTransportSummary = (timeline) => {
|
|
1114
|
+
const summaries = summarizeTimelineTraffic(timeline);
|
|
1115
|
+
if (summaries.length === 0) {
|
|
1116
|
+
return "";
|
|
1117
|
+
}
|
|
1118
|
+
return [
|
|
1119
|
+
"## Transport Summary",
|
|
1120
|
+
"",
|
|
1121
|
+
...summaries.map((summary) => {
|
|
1122
|
+
const parts = [`- ${summary.label}: ${summary.count}`];
|
|
1123
|
+
if (summary.bytes > 0) {
|
|
1124
|
+
parts.push(`${summary.bytes} bytes`);
|
|
1125
|
+
}
|
|
1126
|
+
if ((summary.audioMs ?? 0) > 0) {
|
|
1127
|
+
parts.push(`${roundMetric(summary.audioMs)}ms audio`);
|
|
1128
|
+
}
|
|
1129
|
+
return parts.join(", ");
|
|
1130
|
+
})
|
|
1131
|
+
].join(`
|
|
1132
|
+
`);
|
|
1133
|
+
};
|
|
1134
|
+
var renderLatencyBreakdown = (breakdown) => {
|
|
1135
|
+
if (breakdown.length === 0) {
|
|
1136
|
+
return "";
|
|
1137
|
+
}
|
|
1138
|
+
return [
|
|
1139
|
+
"## Latency Breakdown",
|
|
1140
|
+
"",
|
|
1141
|
+
...breakdown.map((entry) => `- ${entry.label}: ${roundMetric(entry.valueMs)}ms`)
|
|
1142
|
+
].join(`
|
|
1143
|
+
`);
|
|
1144
|
+
};
|
|
1145
|
+
var renderVoiceCallReviewMarkdown = (artifact) => {
|
|
1146
|
+
const summaryLines = [
|
|
1147
|
+
`- pass: ${artifact.summary.pass ? "yes" : "no"}`,
|
|
1148
|
+
formatMetric("first turn", artifact.summary.firstTurnLatencyMs),
|
|
1149
|
+
formatMetric("first outbound media", artifact.summary.firstOutboundMediaLatencyMs),
|
|
1150
|
+
formatMetric("mark", artifact.summary.markLatencyMs),
|
|
1151
|
+
formatMetric("clear", artifact.summary.clearLatencyMs),
|
|
1152
|
+
formatMetric("elapsed", artifact.summary.elapsedMs),
|
|
1153
|
+
typeof artifact.summary.wordErrorRate === "number" ? `- word error rate: ${artifact.summary.wordErrorRate}` : undefined,
|
|
1154
|
+
typeof artifact.summary.termRecall === "number" ? `- term recall: ${artifact.summary.termRecall}` : undefined,
|
|
1155
|
+
typeof artifact.summary.turnCount === "number" ? `- turn count: ${artifact.summary.turnCount}` : undefined,
|
|
1156
|
+
typeof artifact.summary.outboundMediaCount === "number" ? `- outbound media count: ${artifact.summary.outboundMediaCount}` : undefined
|
|
1157
|
+
].filter((value) => typeof value === "string");
|
|
1158
|
+
const notes = artifact.notes.length ? ["## Notes", "", ...artifact.notes.map((note) => `- ${note}`)].join(`
|
|
1159
|
+
`) : "";
|
|
1160
|
+
const errors = artifact.errors.length ? ["## Errors", "", ...artifact.errors.map((error) => `- ${error}`)].join(`
|
|
1161
|
+
`) : "";
|
|
1162
|
+
const latency = renderLatencyBreakdown(artifact.latencyBreakdown);
|
|
1163
|
+
const transportSummary = renderTransportSummary(artifact.timeline);
|
|
1164
|
+
return [
|
|
1165
|
+
`# ${artifact.title}`,
|
|
1166
|
+
"",
|
|
1167
|
+
artifact.path ? `Source: \`${artifact.path}\`` : undefined,
|
|
1168
|
+
artifact.fixtureId ? `Fixture: \`${artifact.fixtureId}\`` : undefined,
|
|
1169
|
+
"",
|
|
1170
|
+
"## Summary",
|
|
1171
|
+
"",
|
|
1172
|
+
...summaryLines,
|
|
1173
|
+
"",
|
|
1174
|
+
"## Transcript",
|
|
1175
|
+
"",
|
|
1176
|
+
`- expected: ${artifact.transcript.expected ?? "_n/a_"}`,
|
|
1177
|
+
`- actual: ${artifact.transcript.actual}`,
|
|
1178
|
+
"",
|
|
1179
|
+
notes,
|
|
1180
|
+
notes ? "" : undefined,
|
|
1181
|
+
latency,
|
|
1182
|
+
latency ? "" : undefined,
|
|
1183
|
+
transportSummary,
|
|
1184
|
+
transportSummary ? "" : undefined,
|
|
1185
|
+
errors,
|
|
1186
|
+
errors ? "" : undefined,
|
|
1187
|
+
renderConfigSection(artifact.config),
|
|
1188
|
+
renderConfigSection(artifact.config) ? "" : undefined,
|
|
1189
|
+
renderTimeline(artifact.timeline)
|
|
1190
|
+
].filter((value) => typeof value === "string").join(`
|
|
1191
|
+
`);
|
|
1192
|
+
};
|
|
1193
|
+
var escapeHtml2 = (value) => value.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll('"', """).replaceAll("'", "'");
|
|
1194
|
+
var renderVoiceCallReviewHTML = (artifact) => {
|
|
1195
|
+
const notes = artifact.notes.map((note) => `<li>${escapeHtml2(note)}</li>`).join("");
|
|
1196
|
+
const latency = artifact.latencyBreakdown.map((entry) => `<li><strong>${escapeHtml2(entry.label)}:</strong> ${roundMetric(entry.valueMs)}ms</li>`).join("");
|
|
1197
|
+
const transport = summarizeTimelineTraffic(artifact.timeline).map((summary) => {
|
|
1198
|
+
const parts = [`${summary.count}`, "events"];
|
|
1199
|
+
if (summary.bytes > 0) {
|
|
1200
|
+
parts.push(`${summary.bytes} bytes`);
|
|
1201
|
+
}
|
|
1202
|
+
if ((summary.audioMs ?? 0) > 0) {
|
|
1203
|
+
parts.push(`${roundMetric(summary.audioMs)}ms audio`);
|
|
1204
|
+
}
|
|
1205
|
+
return `<li><strong>${escapeHtml2(summary.label)}:</strong> ${escapeHtml2(parts.join(", "))}</li>`;
|
|
1206
|
+
}).join("");
|
|
1207
|
+
const timeline = compactTimeline(artifact.timeline.filter((entry) => !isLowSignalTimelineEvent(entry))).map((line) => `<li>${escapeHtml2(line.replace(/^- /u, ""))}</li>`).join("");
|
|
1208
|
+
return `<!doctype html>
|
|
1209
|
+
<html lang="en">
|
|
1210
|
+
<head>
|
|
1211
|
+
<meta charset="utf-8" />
|
|
1212
|
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
|
1213
|
+
<title>${escapeHtml2(artifact.title)}</title>
|
|
1214
|
+
<style>
|
|
1215
|
+
:root { color-scheme: dark; }
|
|
1216
|
+
body { font-family: ui-sans-serif, system-ui, sans-serif; margin: 0; padding: 24px; background: #0b0d10; color: #f4f4f5; }
|
|
1217
|
+
main { max-width: 980px; margin: 0 auto; display: grid; gap: 16px; }
|
|
1218
|
+
section { background: #13161b; border: 1px solid #232833; border-radius: 16px; padding: 18px; }
|
|
1219
|
+
h1, h2 { margin: 0 0 12px; }
|
|
1220
|
+
ul { margin: 0; padding-left: 20px; display: grid; gap: 8px; }
|
|
1221
|
+
code, pre { font-family: ui-monospace, SFMono-Regular, monospace; }
|
|
1222
|
+
pre { white-space: pre-wrap; overflow-wrap: anywhere; background: #0f1217; border-radius: 12px; padding: 14px; border: 1px solid #232833; }
|
|
1223
|
+
.grid { display: grid; gap: 16px; grid-template-columns: repeat(auto-fit, minmax(260px, 1fr)); }
|
|
1224
|
+
.metric { display: grid; gap: 4px; }
|
|
1225
|
+
.label { color: #a1a1aa; font-size: 0.82rem; text-transform: uppercase; letter-spacing: 0.08em; }
|
|
1226
|
+
.value { font-size: 1.05rem; }
|
|
1227
|
+
</style>
|
|
1228
|
+
</head>
|
|
1229
|
+
<body>
|
|
1230
|
+
<main>
|
|
1231
|
+
<section>
|
|
1232
|
+
<h1>${escapeHtml2(artifact.title)}</h1>
|
|
1233
|
+
<div class="grid">
|
|
1234
|
+
<div class="metric"><div class="label">Pass</div><div class="value">${artifact.summary.pass ? "yes" : "no"}</div></div>
|
|
1235
|
+
<div class="metric"><div class="label">First Turn</div><div class="value">${artifact.summary.firstTurnLatencyMs ?? "n/a"}ms</div></div>
|
|
1236
|
+
<div class="metric"><div class="label">First Outbound Media</div><div class="value">${artifact.summary.firstOutboundMediaLatencyMs ?? "n/a"}ms</div></div>
|
|
1237
|
+
<div class="metric"><div class="label">Turn Count</div><div class="value">${artifact.summary.turnCount ?? "n/a"}</div></div>
|
|
1238
|
+
</div>
|
|
1239
|
+
</section>
|
|
1240
|
+
<section>
|
|
1241
|
+
<h2>Transcript</h2>
|
|
1242
|
+
<ul>
|
|
1243
|
+
<li><strong>Expected:</strong> ${escapeHtml2(artifact.transcript.expected ?? "n/a")}</li>
|
|
1244
|
+
<li><strong>Actual:</strong> ${escapeHtml2(artifact.transcript.actual || "n/a")}</li>
|
|
1245
|
+
</ul>
|
|
1246
|
+
</section>
|
|
1247
|
+
<section>
|
|
1248
|
+
<h2>Notes</h2>
|
|
1249
|
+
<ul>${notes || "<li>No notes.</li>"}</ul>
|
|
1250
|
+
</section>
|
|
1251
|
+
<section>
|
|
1252
|
+
<h2>Latency Breakdown</h2>
|
|
1253
|
+
<ul>${latency || "<li>No latency data.</li>"}</ul>
|
|
1254
|
+
</section>
|
|
1255
|
+
<section>
|
|
1256
|
+
<h2>Transport Summary</h2>
|
|
1257
|
+
<ul>${transport || "<li>No transport data.</li>"}</ul>
|
|
1258
|
+
</section>
|
|
1259
|
+
<section>
|
|
1260
|
+
<h2>Timeline</h2>
|
|
1261
|
+
<ul>${timeline || "<li>No timeline events.</li>"}</ul>
|
|
1262
|
+
</section>
|
|
1263
|
+
<section>
|
|
1264
|
+
<h2>Config</h2>
|
|
1265
|
+
<pre>${escapeHtml2(JSON.stringify(artifact.config ?? {}, null, 2))}</pre>
|
|
1266
|
+
</section>
|
|
1267
|
+
</main>
|
|
1268
|
+
</body>
|
|
1269
|
+
</html>`;
|
|
1270
|
+
};
|
|
1271
|
+
|
|
1272
|
+
// src/runtimeOps.ts
|
|
1273
|
+
var defaultReviewTitle = (session) => session.scenarioId ? `Voice call review: ${session.scenarioId}` : `Voice call review: ${session.id}`;
|
|
1274
|
+
var buildDefaultPostCallSummary = (input) => {
|
|
1275
|
+
switch (input.disposition) {
|
|
1276
|
+
case "transferred":
|
|
1277
|
+
return {
|
|
1278
|
+
label: "Transferred",
|
|
1279
|
+
recommendedAction: input.target ? `Confirm the handoff to ${input.target} completed successfully.` : "Confirm the transfer completed successfully.",
|
|
1280
|
+
reason: input.reason,
|
|
1281
|
+
summary: input.target ? `The call was transferred to ${input.target}.` : "The call was transferred.",
|
|
1282
|
+
target: input.target
|
|
1283
|
+
};
|
|
1284
|
+
case "escalated":
|
|
1285
|
+
return {
|
|
1286
|
+
label: "Escalated",
|
|
1287
|
+
recommendedAction: "Review the escalated call and route it to a human operator.",
|
|
1288
|
+
reason: input.reason,
|
|
1289
|
+
summary: input.reason ? `The call escalated because ${input.reason}.` : "The call escalated for operator review."
|
|
1290
|
+
};
|
|
1291
|
+
case "voicemail":
|
|
1292
|
+
return {
|
|
1293
|
+
label: "Voicemail",
|
|
1294
|
+
recommendedAction: "Queue a callback follow-up for this caller.",
|
|
1295
|
+
reason: input.reason,
|
|
1296
|
+
summary: "The call reached voicemail and needs a callback."
|
|
1297
|
+
};
|
|
1298
|
+
case "no-answer":
|
|
1299
|
+
return {
|
|
1300
|
+
label: "No Answer",
|
|
1301
|
+
recommendedAction: "Retry the call or create a callback task.",
|
|
1302
|
+
reason: input.reason,
|
|
1303
|
+
summary: "The call did not reach a live respondent."
|
|
1304
|
+
};
|
|
1305
|
+
case "failed":
|
|
1306
|
+
return {
|
|
1307
|
+
label: "Failed",
|
|
1308
|
+
recommendedAction: "Inspect the call review before retrying this flow.",
|
|
1309
|
+
reason: input.reason,
|
|
1310
|
+
summary: input.reason ? `The call failed because ${input.reason}.` : "The call failed before a successful completion."
|
|
1311
|
+
};
|
|
1312
|
+
case "closed":
|
|
1313
|
+
return {
|
|
1314
|
+
label: "Closed",
|
|
1315
|
+
recommendedAction: "Inspect the review if this early closure was unexpected.",
|
|
1316
|
+
reason: input.reason,
|
|
1317
|
+
summary: "The call closed before an explicit completion."
|
|
1318
|
+
};
|
|
1319
|
+
case "completed":
|
|
1320
|
+
default:
|
|
1321
|
+
return {
|
|
1322
|
+
label: "Completed",
|
|
1323
|
+
recommendedAction: "No follow-up action is required.",
|
|
1324
|
+
reason: input.reason,
|
|
1325
|
+
summary: "The call completed successfully."
|
|
1326
|
+
};
|
|
1327
|
+
}
|
|
1328
|
+
};
|
|
1329
|
+
var createVoiceCallReviewFromSession = (input) => {
|
|
1330
|
+
const generatedAt = input.generatedAt ?? Date.now();
|
|
1331
|
+
const actual = input.session.turns.map((turn) => turn.text).join(" ").trim();
|
|
1332
|
+
const elapsedMs = (input.session.lastActivityAt ?? generatedAt) - input.session.createdAt;
|
|
1333
|
+
return {
|
|
1334
|
+
errors: input.disposition === "failed" && input.reason ? [input.reason] : [],
|
|
1335
|
+
generatedAt,
|
|
1336
|
+
latencyBreakdown: typeof elapsedMs === "number" && elapsedMs >= 0 ? [
|
|
1337
|
+
{
|
|
1338
|
+
label: "Session elapsed",
|
|
1339
|
+
valueMs: elapsedMs
|
|
1340
|
+
}
|
|
1341
|
+
] : [],
|
|
1342
|
+
notes: [],
|
|
1343
|
+
postCall: buildDefaultPostCallSummary({
|
|
1344
|
+
disposition: input.disposition,
|
|
1345
|
+
reason: input.reason,
|
|
1346
|
+
target: input.target
|
|
1347
|
+
}),
|
|
1348
|
+
summary: {
|
|
1349
|
+
elapsedMs: elapsedMs >= 0 ? elapsedMs : undefined,
|
|
1350
|
+
outcome: input.disposition,
|
|
1351
|
+
pass: input.disposition !== "failed",
|
|
1352
|
+
turnCount: input.session.turns.length
|
|
1353
|
+
},
|
|
1354
|
+
title: defaultReviewTitle(input.session),
|
|
1355
|
+
timeline: input.session.call?.events.map((event) => ({
|
|
1356
|
+
atMs: Math.max(0, event.at - input.session.createdAt),
|
|
1357
|
+
event: `call-${event.type}`,
|
|
1358
|
+
reason: event.reason,
|
|
1359
|
+
source: "turn",
|
|
1360
|
+
text: event.target ?? event.disposition,
|
|
1361
|
+
track: event.target
|
|
1362
|
+
})) ?? [],
|
|
1363
|
+
transcript: {
|
|
1364
|
+
actual
|
|
1365
|
+
}
|
|
1366
|
+
};
|
|
1367
|
+
};
|
|
1368
|
+
var asStoredReview = (sessionId, review) => {
|
|
1369
|
+
if (typeof review.id === "string" && review.id.length > 0) {
|
|
1370
|
+
return review;
|
|
1371
|
+
}
|
|
1372
|
+
return withVoiceCallReviewId(`${sessionId}:review`, review);
|
|
1373
|
+
};
|
|
1374
|
+
var asStoredTask = (review, task) => {
|
|
1375
|
+
if ("id" in task && typeof task.id === "string" && task.id.length > 0) {
|
|
1376
|
+
return task;
|
|
1377
|
+
}
|
|
1378
|
+
return withVoiceOpsTaskId(`${review.id}:ops`, task);
|
|
1379
|
+
};
|
|
1380
|
+
var emitRuntimeEvent = async (input) => {
|
|
1381
|
+
await input.config.events?.set(input.event.id, input.event);
|
|
1382
|
+
await input.config.onEvent?.({
|
|
1383
|
+
api: input.api,
|
|
1384
|
+
context: input.context,
|
|
1385
|
+
event: input.event,
|
|
1386
|
+
session: input.session
|
|
1387
|
+
});
|
|
1388
|
+
};
|
|
1389
|
+
var recordVoiceRuntimeOps = async (input) => {
|
|
1390
|
+
if (!input.config) {
|
|
1391
|
+
return;
|
|
1392
|
+
}
|
|
1393
|
+
const result = input.session.turns.at(-1)?.result;
|
|
1394
|
+
const reviewCandidate = await input.config.buildReview?.({
|
|
1395
|
+
api: input.api,
|
|
1396
|
+
context: input.context,
|
|
1397
|
+
disposition: input.disposition,
|
|
1398
|
+
metadata: input.metadata,
|
|
1399
|
+
reason: input.reason,
|
|
1400
|
+
result,
|
|
1401
|
+
session: input.session,
|
|
1402
|
+
target: input.target
|
|
1403
|
+
}) ?? createVoiceCallReviewFromSession({
|
|
1404
|
+
disposition: input.disposition,
|
|
1405
|
+
reason: input.reason,
|
|
1406
|
+
session: input.session,
|
|
1407
|
+
target: input.target
|
|
1408
|
+
});
|
|
1409
|
+
const review = reviewCandidate ? asStoredReview(input.session.id, reviewCandidate) : undefined;
|
|
1410
|
+
if (review) {
|
|
1411
|
+
await input.config.reviews?.set(review.id, review);
|
|
1412
|
+
await emitRuntimeEvent({
|
|
1413
|
+
api: input.api,
|
|
1414
|
+
config: input.config,
|
|
1415
|
+
context: input.context,
|
|
1416
|
+
event: createVoiceReviewSavedEvent(review),
|
|
1417
|
+
session: input.session
|
|
1418
|
+
});
|
|
1419
|
+
}
|
|
1420
|
+
let task;
|
|
1421
|
+
if (review) {
|
|
1422
|
+
const taskCandidate = await input.config.createTaskFromReview?.({
|
|
1423
|
+
api: input.api,
|
|
1424
|
+
context: input.context,
|
|
1425
|
+
disposition: input.disposition,
|
|
1426
|
+
review,
|
|
1427
|
+
session: input.session
|
|
1428
|
+
}) ?? buildVoiceOpsTaskFromReview(review) ?? undefined;
|
|
1429
|
+
if (taskCandidate) {
|
|
1430
|
+
task = asStoredTask(review, taskCandidate);
|
|
1431
|
+
await input.config.tasks?.set(task.id, task);
|
|
1432
|
+
await emitRuntimeEvent({
|
|
1433
|
+
api: input.api,
|
|
1434
|
+
config: input.config,
|
|
1435
|
+
context: input.context,
|
|
1436
|
+
event: createVoiceTaskCreatedEvent(task),
|
|
1437
|
+
session: input.session
|
|
1438
|
+
});
|
|
1439
|
+
}
|
|
1440
|
+
}
|
|
1441
|
+
await emitRuntimeEvent({
|
|
1442
|
+
api: input.api,
|
|
1443
|
+
config: input.config,
|
|
1444
|
+
context: input.context,
|
|
1445
|
+
event: createVoiceCallCompletedEvent({
|
|
1446
|
+
disposition: input.disposition,
|
|
1447
|
+
session: input.session
|
|
1448
|
+
}),
|
|
1449
|
+
session: input.session
|
|
1450
|
+
});
|
|
1451
|
+
return {
|
|
1452
|
+
review,
|
|
1453
|
+
task
|
|
1454
|
+
};
|
|
1455
|
+
};
|
|
1456
|
+
|
|
210
1457
|
// src/store.ts
|
|
211
1458
|
var createId = () => crypto.randomUUID();
|
|
212
|
-
var createVoiceSessionRecord = (id) => ({
|
|
1459
|
+
var createVoiceSessionRecord = (id, scenarioId) => ({
|
|
213
1460
|
committedTurnIds: [],
|
|
214
1461
|
createdAt: Date.now(),
|
|
215
1462
|
currentTurn: {
|
|
216
1463
|
finalText: "",
|
|
1464
|
+
lastSpeechAt: undefined,
|
|
1465
|
+
lastTranscriptAt: undefined,
|
|
1466
|
+
partialEndedAt: undefined,
|
|
1467
|
+
partialStartedAt: undefined,
|
|
217
1468
|
partialText: "",
|
|
1469
|
+
silenceStartedAt: undefined,
|
|
218
1470
|
transcripts: []
|
|
219
1471
|
},
|
|
220
1472
|
id,
|
|
1473
|
+
scenarioId,
|
|
221
1474
|
reconnect: { attempts: 0 },
|
|
222
1475
|
status: "active",
|
|
223
1476
|
transcripts: [],
|
|
224
|
-
turns: []
|
|
1477
|
+
turns: [],
|
|
1478
|
+
lastCommittedTurn: {
|
|
1479
|
+
committedAt: 0,
|
|
1480
|
+
signature: "",
|
|
1481
|
+
text: "",
|
|
1482
|
+
transcriptIds: []
|
|
1483
|
+
}
|
|
225
1484
|
});
|
|
226
|
-
var resetVoiceSessionRecord = (id, existing) => ({
|
|
227
|
-
...createVoiceSessionRecord(id),
|
|
1485
|
+
var resetVoiceSessionRecord = (id, existing, scenarioId) => ({
|
|
1486
|
+
...createVoiceSessionRecord(id, scenarioId),
|
|
228
1487
|
metadata: existing?.metadata
|
|
229
1488
|
});
|
|
230
1489
|
var toVoiceSessionSummary = (session) => ({
|
|
@@ -235,6 +1494,9 @@ var toVoiceSessionSummary = (session) => ({
|
|
|
235
1494
|
turnCount: session.turns.length
|
|
236
1495
|
});
|
|
237
1496
|
|
|
1497
|
+
// src/session.ts
|
|
1498
|
+
import { Buffer } from "buffer";
|
|
1499
|
+
|
|
238
1500
|
// src/turnDetection.ts
|
|
239
1501
|
var DEFAULT_SILENCE_MS = 700;
|
|
240
1502
|
var DEFAULT_SPEECH_THRESHOLD = 0.015;
|
|
@@ -261,6 +1523,64 @@ var measureAudioLevel = (audio) => {
|
|
|
261
1523
|
return Math.sqrt(sumSquares / samples.length);
|
|
262
1524
|
};
|
|
263
1525
|
var normalizeText = (value) => value.trim().replace(/\s+/g, " ");
|
|
1526
|
+
var countWords = (value) => value.length > 0 ? value.split(" ").length : 0;
|
|
1527
|
+
var selectPreferredTranscriptText = (currentText, nextText) => {
|
|
1528
|
+
const current = normalizeText(currentText);
|
|
1529
|
+
const next = normalizeText(nextText);
|
|
1530
|
+
if (!current) {
|
|
1531
|
+
return next;
|
|
1532
|
+
}
|
|
1533
|
+
if (!next) {
|
|
1534
|
+
return current;
|
|
1535
|
+
}
|
|
1536
|
+
if (current === next || current.includes(next)) {
|
|
1537
|
+
return current;
|
|
1538
|
+
}
|
|
1539
|
+
if (next.includes(current)) {
|
|
1540
|
+
return next;
|
|
1541
|
+
}
|
|
1542
|
+
if (countWords(next) > countWords(current)) {
|
|
1543
|
+
return next;
|
|
1544
|
+
}
|
|
1545
|
+
if (countWords(next) === countWords(current) && next.length > current.length) {
|
|
1546
|
+
return next;
|
|
1547
|
+
}
|
|
1548
|
+
return current;
|
|
1549
|
+
};
|
|
1550
|
+
var mergeSequentialTranscriptText = (currentText, nextText) => {
|
|
1551
|
+
const current = normalizeText(currentText);
|
|
1552
|
+
const next = normalizeText(nextText);
|
|
1553
|
+
if (!current) {
|
|
1554
|
+
return next;
|
|
1555
|
+
}
|
|
1556
|
+
if (!next) {
|
|
1557
|
+
return current;
|
|
1558
|
+
}
|
|
1559
|
+
const currentWords = current.split(" ");
|
|
1560
|
+
const nextWords = next.split(" ");
|
|
1561
|
+
const maxOverlap = Math.min(currentWords.length, nextWords.length);
|
|
1562
|
+
for (let overlap = maxOverlap;overlap > 0; overlap -= 1) {
|
|
1563
|
+
const currentSuffix = currentWords.slice(-overlap).join(" ");
|
|
1564
|
+
const nextPrefix = nextWords.slice(0, overlap).join(" ");
|
|
1565
|
+
if (currentSuffix === nextPrefix) {
|
|
1566
|
+
return [...currentWords, ...nextWords.slice(overlap)].join(" ");
|
|
1567
|
+
}
|
|
1568
|
+
}
|
|
1569
|
+
return `${current} ${next}`.trim();
|
|
1570
|
+
};
|
|
1571
|
+
var countCommonPrefixWords = (currentText, nextText) => {
|
|
1572
|
+
const currentWords = normalizeText(currentText).split(" ").filter(Boolean);
|
|
1573
|
+
const nextWords = normalizeText(nextText).split(" ").filter(Boolean);
|
|
1574
|
+
const maxWords = Math.min(currentWords.length, nextWords.length);
|
|
1575
|
+
let count = 0;
|
|
1576
|
+
for (let index = 0;index < maxWords; index += 1) {
|
|
1577
|
+
if (currentWords[index] !== nextWords[index]) {
|
|
1578
|
+
break;
|
|
1579
|
+
}
|
|
1580
|
+
count += 1;
|
|
1581
|
+
}
|
|
1582
|
+
return count;
|
|
1583
|
+
};
|
|
264
1584
|
var mergeTranscriptTexts = (transcripts) => {
|
|
265
1585
|
const merged = [];
|
|
266
1586
|
for (const transcript of transcripts) {
|
|
@@ -284,31 +1604,195 @@ var mergeTranscriptTexts = (transcripts) => {
|
|
|
284
1604
|
}
|
|
285
1605
|
return merged.join(" ").trim();
|
|
286
1606
|
};
|
|
287
|
-
var buildTurnText = (transcripts, partialText) => {
|
|
1607
|
+
var buildTurnText = (transcripts, partialText, options = {}) => {
|
|
288
1608
|
const finalText = mergeTranscriptTexts(transcripts);
|
|
289
|
-
|
|
290
|
-
|
|
1609
|
+
const nextPartial = normalizeText(partialText);
|
|
1610
|
+
const lastFinalEndedAtMs = [...transcripts].reverse().find((transcript) => typeof transcript.endedAtMs === "number")?.endedAtMs;
|
|
1611
|
+
if (finalText && nextPartial && typeof lastFinalEndedAtMs === "number" && typeof options.partialStartedAtMs === "number" && options.partialStartedAtMs - lastFinalEndedAtMs >= 250 && countCommonPrefixWords(finalText, nextPartial) === 0) {
|
|
1612
|
+
return mergeSequentialTranscriptText(finalText, nextPartial);
|
|
291
1613
|
}
|
|
292
|
-
return
|
|
1614
|
+
return selectPreferredTranscriptText(finalText, nextPartial);
|
|
293
1615
|
};
|
|
294
1616
|
|
|
295
1617
|
// src/session.ts
|
|
296
1618
|
var DEFAULT_RECONNECT_TIMEOUT = 30000;
|
|
297
1619
|
var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
|
|
1620
|
+
var DEFAULT_TRANSCRIPT_STABILITY_MS = 450;
|
|
1621
|
+
var DEFAULT_FALLBACK_REPLAY_MS = 8000;
|
|
1622
|
+
var DEFAULT_FALLBACK_SETTLE_MS = 220;
|
|
1623
|
+
var DEFAULT_FALLBACK_COMPLETION_TIMEOUT_MS = 2500;
|
|
1624
|
+
var DEFAULT_FALLBACK_CONFIDENCE_THRESHOLD = 0.6;
|
|
1625
|
+
var DEFAULT_FALLBACK_MIN_TEXT_LENGTH = 2;
|
|
1626
|
+
var DEFAULT_FALLBACK_MAX_ATTEMPTS_PER_TURN = 1;
|
|
1627
|
+
var DEFAULT_DUPLICATE_TURN_WINDOW_MS = 5000;
|
|
1628
|
+
var FALLBACK_CONFIDENCE_SELECTION_DELTA = 0.05;
|
|
1629
|
+
var FALLBACK_WORD_COUNT_SELECTION_MARGIN_RATIO = 0.12;
|
|
1630
|
+
var EXTENDED_VENDOR_COMMIT_SILENCE_THRESHOLD_MS = 200;
|
|
1631
|
+
var MAX_VENDOR_COMMIT_GRACE_MS = 1200;
|
|
1632
|
+
var DEFAULT_FORMAT = {
|
|
1633
|
+
channels: 1,
|
|
1634
|
+
container: "raw",
|
|
1635
|
+
encoding: "pcm_s16le",
|
|
1636
|
+
sampleRateHz: 16000
|
|
1637
|
+
};
|
|
298
1638
|
var toError = (value) => value instanceof Error ? value : new Error(String(value));
|
|
299
1639
|
var createEmptyCurrentTurn = () => ({
|
|
300
1640
|
finalText: "",
|
|
1641
|
+
lastSpeechAt: undefined,
|
|
1642
|
+
lastTranscriptAt: undefined,
|
|
1643
|
+
partialEndedAt: undefined,
|
|
1644
|
+
partialStartedAt: undefined,
|
|
301
1645
|
partialText: "",
|
|
1646
|
+
silenceStartedAt: undefined,
|
|
302
1647
|
transcripts: []
|
|
303
1648
|
});
|
|
304
1649
|
var cloneTranscript = (transcript) => ({ ...transcript });
|
|
305
|
-
var
|
|
306
|
-
|
|
307
|
-
|
|
1650
|
+
var encodeBase64 = (chunk) => Buffer.from(chunk).toString("base64");
|
|
1651
|
+
var countWords2 = (text) => text.trim().split(/\s+/).filter(Boolean).length;
|
|
1652
|
+
var normalizeText2 = (text) => text.trim().replace(/\s+/g, " ");
|
|
1653
|
+
var getAudioChunkDurationMs = (chunk) => chunk.byteLength / (DEFAULT_FORMAT.sampleRateHz * DEFAULT_FORMAT.channels * 2) * 1000;
|
|
1654
|
+
var getBufferedAudioDurationMs = (chunks) => chunks.reduce((total, chunk) => total + getAudioChunkDurationMs(chunk), 0);
|
|
1655
|
+
var calculateMeanConfidence = (transcripts) => {
|
|
1656
|
+
let sum = 0;
|
|
1657
|
+
let total = 0;
|
|
1658
|
+
for (const transcript of transcripts) {
|
|
1659
|
+
if (typeof transcript.confidence === "number") {
|
|
1660
|
+
sum += transcript.confidence;
|
|
1661
|
+
total += 1;
|
|
1662
|
+
}
|
|
1663
|
+
}
|
|
1664
|
+
if (total === 0) {
|
|
1665
|
+
return 0;
|
|
1666
|
+
}
|
|
1667
|
+
return sum / total;
|
|
1668
|
+
};
|
|
1669
|
+
var createTurnQuality = (transcripts, source, fallbackUsed, fallbackDiagnostics, correctionDiagnostics, costEstimate) => {
|
|
1670
|
+
const sampledTranscripts = transcripts.filter((transcript) => typeof transcript.confidence === "number");
|
|
1671
|
+
const confidenceSampleCount = sampledTranscripts.length;
|
|
1672
|
+
return {
|
|
1673
|
+
averageConfidence: confidenceSampleCount > 0 ? sampledTranscripts.reduce((sum, transcript) => sum + transcript.confidence, 0) / confidenceSampleCount : undefined,
|
|
1674
|
+
confidenceSampleCount,
|
|
1675
|
+
correction: correctionDiagnostics,
|
|
1676
|
+
cost: costEstimate,
|
|
1677
|
+
fallback: fallbackDiagnostics,
|
|
1678
|
+
fallbackUsed,
|
|
1679
|
+
finalTranscriptCount: transcripts.filter((transcript) => transcript.isFinal).length,
|
|
1680
|
+
partialTranscriptCount: transcripts.filter((transcript) => !transcript.isFinal).length,
|
|
1681
|
+
selectedTranscriptCount: transcripts.length,
|
|
1682
|
+
source
|
|
1683
|
+
};
|
|
1684
|
+
};
|
|
1685
|
+
var createTurnCostEstimate = (input) => {
|
|
1686
|
+
const primaryMinutes = Math.max(0, input.primaryAudioMs) / 60000;
|
|
1687
|
+
const fallbackMinutes = Math.max(0, input.fallbackReplayAudioMs) / 60000;
|
|
1688
|
+
const primaryCostUnit = input.primaryPassCostUnit ?? 1;
|
|
1689
|
+
const fallbackCostUnit = input.fallbackPassCostUnit ?? primaryCostUnit;
|
|
1690
|
+
return {
|
|
1691
|
+
estimatedRelativeCostUnits: primaryMinutes * primaryCostUnit + fallbackMinutes * fallbackCostUnit,
|
|
1692
|
+
fallbackAttemptCount: input.fallbackAttemptCount,
|
|
1693
|
+
fallbackReplayAudioMs: Math.max(0, input.fallbackReplayAudioMs),
|
|
1694
|
+
primaryAudioMs: Math.max(0, input.primaryAudioMs),
|
|
1695
|
+
totalBillableAudioMs: Math.max(0, input.primaryAudioMs) + Math.max(0, input.fallbackReplayAudioMs)
|
|
1696
|
+
};
|
|
1697
|
+
};
|
|
1698
|
+
var normalizeCorrectionText = (text) => normalizeText2(text);
|
|
1699
|
+
var isFallbackNeeded = (candidate, config) => {
|
|
1700
|
+
const trimmed = normalizeText2(candidate.text);
|
|
1701
|
+
const wordCount = countWords2(trimmed);
|
|
1702
|
+
if (config.trigger === "always") {
|
|
1703
|
+
return true;
|
|
1704
|
+
}
|
|
1705
|
+
if (config.trigger === "empty-turn") {
|
|
1706
|
+
return wordCount < config.minTextLength;
|
|
1707
|
+
}
|
|
1708
|
+
const averageConfidence = calculateMeanConfidence(candidate.transcripts);
|
|
1709
|
+
if (config.trigger === "low-confidence") {
|
|
1710
|
+
return averageConfidence > 0 && averageConfidence < config.confidenceThreshold;
|
|
1711
|
+
}
|
|
1712
|
+
return averageConfidence > 0 && averageConfidence < config.confidenceThreshold || wordCount < config.minTextLength;
|
|
1713
|
+
};
|
|
1714
|
+
var selectBetterTurnText = (candidate, fallback) => {
|
|
1715
|
+
if (!fallback.text) {
|
|
1716
|
+
return {
|
|
1717
|
+
reason: "fallback-empty",
|
|
1718
|
+
winner: candidate
|
|
1719
|
+
};
|
|
1720
|
+
}
|
|
1721
|
+
if (!candidate.text) {
|
|
1722
|
+
return {
|
|
1723
|
+
reason: "primary-empty",
|
|
1724
|
+
winner: fallback
|
|
1725
|
+
};
|
|
1726
|
+
}
|
|
1727
|
+
const largestWordCount = Math.max(candidate.wordCount, fallback.wordCount, 1);
|
|
1728
|
+
const wordCountDelta = fallback.wordCount - candidate.wordCount;
|
|
1729
|
+
const wordCountDeltaRatio = Math.abs(wordCountDelta) / largestWordCount;
|
|
1730
|
+
if (wordCountDeltaRatio >= FALLBACK_WORD_COUNT_SELECTION_MARGIN_RATIO && wordCountDelta !== 0) {
|
|
1731
|
+
return {
|
|
1732
|
+
reason: "word-count-margin",
|
|
1733
|
+
winner: wordCountDelta > 0 ? fallback : candidate
|
|
1734
|
+
};
|
|
1735
|
+
}
|
|
1736
|
+
if (fallback.confidence > candidate.confidence + FALLBACK_CONFIDENCE_SELECTION_DELTA) {
|
|
1737
|
+
return {
|
|
1738
|
+
reason: "confidence-margin",
|
|
1739
|
+
winner: fallback
|
|
1740
|
+
};
|
|
1741
|
+
}
|
|
1742
|
+
if (candidate.confidence > fallback.confidence + FALLBACK_CONFIDENCE_SELECTION_DELTA) {
|
|
1743
|
+
return {
|
|
1744
|
+
reason: "kept-primary",
|
|
1745
|
+
winner: candidate
|
|
1746
|
+
};
|
|
1747
|
+
}
|
|
1748
|
+
if (fallback.wordCount > candidate.wordCount) {
|
|
1749
|
+
return {
|
|
1750
|
+
reason: "word-count-tiebreak",
|
|
1751
|
+
winner: fallback
|
|
1752
|
+
};
|
|
1753
|
+
}
|
|
1754
|
+
return {
|
|
1755
|
+
reason: "kept-primary",
|
|
1756
|
+
winner: candidate
|
|
1757
|
+
};
|
|
1758
|
+
};
|
|
1759
|
+
var setTurnResult = (session, turnId, input) => {
|
|
1760
|
+
session.turns = session.turns.map((turn) => turn.id === turnId ? {
|
|
1761
|
+
...turn,
|
|
308
1762
|
assistantText: input.assistantText ?? turn.assistantText,
|
|
309
1763
|
result: input.result ?? turn.result
|
|
310
1764
|
} : turn);
|
|
311
1765
|
};
|
|
1766
|
+
var ensureCallLifecycleState = (session) => {
|
|
1767
|
+
const startedAt = session.createdAt;
|
|
1768
|
+
session.call ??= {
|
|
1769
|
+
events: [],
|
|
1770
|
+
lastEventAt: startedAt,
|
|
1771
|
+
startedAt
|
|
1772
|
+
};
|
|
1773
|
+
return session.call;
|
|
1774
|
+
};
|
|
1775
|
+
var pushCallLifecycleEvent = (session, input) => {
|
|
1776
|
+
const lifecycle = ensureCallLifecycleState(session);
|
|
1777
|
+
const at = Date.now();
|
|
1778
|
+
lifecycle.events = [
|
|
1779
|
+
...lifecycle.events,
|
|
1780
|
+
{
|
|
1781
|
+
at,
|
|
1782
|
+
disposition: input.disposition,
|
|
1783
|
+
metadata: input.metadata,
|
|
1784
|
+
reason: input.reason,
|
|
1785
|
+
target: input.target,
|
|
1786
|
+
type: input.type
|
|
1787
|
+
}
|
|
1788
|
+
];
|
|
1789
|
+
lifecycle.lastEventAt = at;
|
|
1790
|
+
if (input.type === "end") {
|
|
1791
|
+
lifecycle.disposition = input.disposition;
|
|
1792
|
+
lifecycle.endedAt = at;
|
|
1793
|
+
}
|
|
1794
|
+
return lifecycle;
|
|
1795
|
+
};
|
|
312
1796
|
var createVoiceSession = (options) => {
|
|
313
1797
|
const logger = resolveLogger(options.logger);
|
|
314
1798
|
const reconnect = {
|
|
@@ -318,18 +1802,74 @@ var createVoiceSession = (options) => {
|
|
|
318
1802
|
};
|
|
319
1803
|
const turnDetection = {
|
|
320
1804
|
silenceMs: options.turnDetection.silenceMs ?? DEFAULT_SILENCE_MS,
|
|
321
|
-
speechThreshold: options.turnDetection.speechThreshold ?? DEFAULT_SPEECH_THRESHOLD
|
|
1805
|
+
speechThreshold: options.turnDetection.speechThreshold ?? DEFAULT_SPEECH_THRESHOLD,
|
|
1806
|
+
transcriptStabilityMs: options.turnDetection.transcriptStabilityMs ?? DEFAULT_TRANSCRIPT_STABILITY_MS
|
|
322
1807
|
};
|
|
1808
|
+
const sttFallback = options.sttFallback ? {
|
|
1809
|
+
adapter: options.sttFallback.adapter,
|
|
1810
|
+
completionTimeoutMs: options.sttFallback.completionTimeoutMs ?? DEFAULT_FALLBACK_COMPLETION_TIMEOUT_MS,
|
|
1811
|
+
confidenceThreshold: options.sttFallback.confidenceThreshold ?? DEFAULT_FALLBACK_CONFIDENCE_THRESHOLD,
|
|
1812
|
+
maxAttemptsPerTurn: options.sttFallback.maxAttemptsPerTurn ?? DEFAULT_FALLBACK_MAX_ATTEMPTS_PER_TURN,
|
|
1813
|
+
minTextLength: options.sttFallback.minTextLength ?? DEFAULT_FALLBACK_MIN_TEXT_LENGTH,
|
|
1814
|
+
replayWindowMs: options.sttFallback.replayWindowMs ?? DEFAULT_FALLBACK_REPLAY_MS,
|
|
1815
|
+
settleMs: options.sttFallback.settleMs ?? DEFAULT_FALLBACK_SETTLE_MS,
|
|
1816
|
+
trigger: options.sttFallback.trigger ?? "empty-or-low-confidence"
|
|
1817
|
+
} : undefined;
|
|
1818
|
+
const phraseHints = options.phraseHints ?? [];
|
|
1819
|
+
const lexicon = options.lexicon ?? [];
|
|
323
1820
|
let socket = options.socket;
|
|
324
1821
|
let sttSession = null;
|
|
1822
|
+
let ttsSession = null;
|
|
1823
|
+
let ttsSessionPromise = null;
|
|
325
1824
|
let silenceTimer = null;
|
|
1825
|
+
let pendingCommitReason = null;
|
|
326
1826
|
let speechDetected = false;
|
|
1827
|
+
let operationQueue = Promise.resolve();
|
|
1828
|
+
let adapterGenerationCounter = 0;
|
|
1829
|
+
let activeAdapterGeneration = 0;
|
|
1830
|
+
let activeTTSTurnId;
|
|
1831
|
+
const currentTurnAudio = [];
|
|
1832
|
+
let fallbackAttemptsForCurrentTurn = 0;
|
|
1833
|
+
let fallbackReplayAudioMsForCurrentTurn = 0;
|
|
1834
|
+
const pruneTurnAudio = () => {
|
|
1835
|
+
const replayWindowMs = sttFallback?.replayWindowMs ?? DEFAULT_FALLBACK_REPLAY_MS;
|
|
1836
|
+
const cutoffAt = Date.now() - replayWindowMs;
|
|
1837
|
+
let index = 0;
|
|
1838
|
+
while (index < currentTurnAudio.length && currentTurnAudio[index].recordedAt < cutoffAt) {
|
|
1839
|
+
index += 1;
|
|
1840
|
+
}
|
|
1841
|
+
if (index > 0) {
|
|
1842
|
+
currentTurnAudio.splice(0, index);
|
|
1843
|
+
}
|
|
1844
|
+
};
|
|
1845
|
+
const pushTurnAudio = (audio) => {
|
|
1846
|
+
const chunk = audio instanceof ArrayBuffer ? new Uint8Array(audio.slice(0)) : new Uint8Array(audio.buffer.slice(audio.byteOffset, audio.byteOffset + audio.byteLength));
|
|
1847
|
+
currentTurnAudio.push({
|
|
1848
|
+
chunk,
|
|
1849
|
+
recordedAt: Date.now()
|
|
1850
|
+
});
|
|
1851
|
+
pruneTurnAudio();
|
|
1852
|
+
};
|
|
1853
|
+
const getFallbackWindowAudio = () => {
|
|
1854
|
+
if (!sttFallback?.adapter) {
|
|
1855
|
+
return [];
|
|
1856
|
+
}
|
|
1857
|
+
pruneTurnAudio();
|
|
1858
|
+
return currentTurnAudio.map((audio) => audio.chunk);
|
|
1859
|
+
};
|
|
327
1860
|
const clearSilenceTimer = () => {
|
|
328
1861
|
if (!silenceTimer) {
|
|
329
1862
|
return;
|
|
330
1863
|
}
|
|
331
1864
|
clearTimeout(silenceTimer);
|
|
332
1865
|
silenceTimer = null;
|
|
1866
|
+
pendingCommitReason = null;
|
|
1867
|
+
};
|
|
1868
|
+
const getVendorCommitDelayMs = () => {
|
|
1869
|
+
if (turnDetection.silenceMs < EXTENDED_VENDOR_COMMIT_SILENCE_THRESHOLD_MS || turnDetection.transcriptStabilityMs < EXTENDED_VENDOR_COMMIT_SILENCE_THRESHOLD_MS) {
|
|
1870
|
+
return turnDetection.transcriptStabilityMs;
|
|
1871
|
+
}
|
|
1872
|
+
return Math.max(turnDetection.transcriptStabilityMs, Math.min(MAX_VENDOR_COMMIT_GRACE_MS, turnDetection.silenceMs * 2));
|
|
333
1873
|
};
|
|
334
1874
|
const send = async (message) => {
|
|
335
1875
|
try {
|
|
@@ -349,12 +1889,28 @@ var createVoiceSession = (options) => {
|
|
|
349
1889
|
await options.store.set(options.id, session);
|
|
350
1890
|
return session;
|
|
351
1891
|
};
|
|
1892
|
+
const runSerial = (phase, operation) => {
|
|
1893
|
+
const result = operationQueue.then(async () => {
|
|
1894
|
+
logger.debug("voice session operation", {
|
|
1895
|
+
phase,
|
|
1896
|
+
sessionId: options.id
|
|
1897
|
+
});
|
|
1898
|
+
return await operation();
|
|
1899
|
+
});
|
|
1900
|
+
operationQueue = result.then(() => {
|
|
1901
|
+
return;
|
|
1902
|
+
}, () => {
|
|
1903
|
+
return;
|
|
1904
|
+
});
|
|
1905
|
+
return result;
|
|
1906
|
+
};
|
|
352
1907
|
const closeAdapter = async (reason) => {
|
|
353
1908
|
if (!sttSession) {
|
|
354
1909
|
return;
|
|
355
1910
|
}
|
|
356
1911
|
const activeSession = sttSession;
|
|
357
1912
|
sttSession = null;
|
|
1913
|
+
activeAdapterGeneration = 0;
|
|
358
1914
|
try {
|
|
359
1915
|
await activeSession.close(reason);
|
|
360
1916
|
} catch (error) {
|
|
@@ -364,13 +1920,255 @@ var createVoiceSession = (options) => {
|
|
|
364
1920
|
});
|
|
365
1921
|
}
|
|
366
1922
|
};
|
|
367
|
-
const
|
|
368
|
-
|
|
1923
|
+
const closeTTSSession = async (reason) => {
|
|
1924
|
+
const activeSession = ttsSession;
|
|
1925
|
+
ttsSession = null;
|
|
1926
|
+
ttsSessionPromise = null;
|
|
1927
|
+
activeTTSTurnId = undefined;
|
|
1928
|
+
if (!activeSession) {
|
|
1929
|
+
return;
|
|
1930
|
+
}
|
|
1931
|
+
try {
|
|
1932
|
+
await activeSession.close(reason);
|
|
1933
|
+
} catch (error) {
|
|
1934
|
+
logger.warn("voice tts adapter close failed", {
|
|
1935
|
+
error: toError(error).message,
|
|
1936
|
+
reason,
|
|
1937
|
+
sessionId: options.id
|
|
1938
|
+
});
|
|
1939
|
+
}
|
|
1940
|
+
};
|
|
1941
|
+
const scheduleTurnCommit = (delayMs, reason, reset = true) => {
|
|
1942
|
+
if (!reset && silenceTimer) {
|
|
369
1943
|
return;
|
|
370
1944
|
}
|
|
1945
|
+
if (reset) {
|
|
1946
|
+
clearSilenceTimer();
|
|
1947
|
+
}
|
|
1948
|
+
pendingCommitReason = reason;
|
|
371
1949
|
silenceTimer = setTimeout(() => {
|
|
372
|
-
|
|
373
|
-
|
|
1950
|
+
silenceTimer = null;
|
|
1951
|
+
pendingCommitReason = null;
|
|
1952
|
+
api.commitTurn(reason);
|
|
1953
|
+
}, delayMs);
|
|
1954
|
+
};
|
|
1955
|
+
const scheduleSilenceCommit = (delayMs = turnDetection.silenceMs, reset = true) => scheduleTurnCommit(delayMs, "silence", reset);
|
|
1956
|
+
const requestTurnCommit = async (reason) => {
|
|
1957
|
+
const session = await readSession();
|
|
1958
|
+
const text = buildTurnText(session.currentTurn.transcripts, session.currentTurn.partialText, {
|
|
1959
|
+
partialEndedAtMs: session.currentTurn.partialEndedAt,
|
|
1960
|
+
partialStartedAtMs: session.currentTurn.partialStartedAt
|
|
1961
|
+
});
|
|
1962
|
+
if (!text) {
|
|
1963
|
+
return;
|
|
1964
|
+
}
|
|
1965
|
+
const transcriptStabilityAge = session.currentTurn.lastTranscriptAt !== undefined ? Date.now() - session.currentTurn.lastTranscriptAt : undefined;
|
|
1966
|
+
if (reason === "vendor") {
|
|
1967
|
+
scheduleTurnCommit(getVendorCommitDelayMs(), reason);
|
|
1968
|
+
return;
|
|
1969
|
+
}
|
|
1970
|
+
if (reason !== "manual" && typeof transcriptStabilityAge === "number" && transcriptStabilityAge < turnDetection.transcriptStabilityMs) {
|
|
1971
|
+
scheduleTurnCommit(turnDetection.transcriptStabilityMs - transcriptStabilityAge, reason);
|
|
1972
|
+
return;
|
|
1973
|
+
}
|
|
1974
|
+
await commitTurnInternal(reason);
|
|
1975
|
+
};
|
|
1976
|
+
const failInternal = async (error) => {
|
|
1977
|
+
clearSilenceTimer();
|
|
1978
|
+
let didFail = false;
|
|
1979
|
+
const session = await writeSession((currentSession) => {
|
|
1980
|
+
if (currentSession.status === "failed") {
|
|
1981
|
+
return;
|
|
1982
|
+
}
|
|
1983
|
+
didFail = true;
|
|
1984
|
+
currentSession.lastActivityAt = Date.now();
|
|
1985
|
+
currentSession.status = "failed";
|
|
1986
|
+
if (!currentSession.call?.endedAt) {
|
|
1987
|
+
pushCallLifecycleEvent(currentSession, {
|
|
1988
|
+
disposition: "failed",
|
|
1989
|
+
reason: toError(error).message,
|
|
1990
|
+
type: "end"
|
|
1991
|
+
});
|
|
1992
|
+
}
|
|
1993
|
+
});
|
|
1994
|
+
if (!didFail) {
|
|
1995
|
+
return;
|
|
1996
|
+
}
|
|
1997
|
+
const resolvedError = toError(error);
|
|
1998
|
+
await send({
|
|
1999
|
+
message: resolvedError.message,
|
|
2000
|
+
recoverable: false,
|
|
2001
|
+
type: "error"
|
|
2002
|
+
});
|
|
2003
|
+
await closeTTSSession("failed");
|
|
2004
|
+
await closeAdapter("failed");
|
|
2005
|
+
speechDetected = false;
|
|
2006
|
+
rewindFallbackTurnAudio();
|
|
2007
|
+
await options.route.onError?.({
|
|
2008
|
+
api,
|
|
2009
|
+
context: options.context,
|
|
2010
|
+
error: resolvedError,
|
|
2011
|
+
session,
|
|
2012
|
+
sessionId: options.id
|
|
2013
|
+
});
|
|
2014
|
+
await options.route.onCallEnd?.({
|
|
2015
|
+
api,
|
|
2016
|
+
context: options.context,
|
|
2017
|
+
disposition: "failed",
|
|
2018
|
+
reason: resolvedError.message,
|
|
2019
|
+
session
|
|
2020
|
+
});
|
|
2021
|
+
};
|
|
2022
|
+
const completeInternal = async (result, input = {}) => {
|
|
2023
|
+
clearSilenceTimer();
|
|
2024
|
+
const disposition = input.disposition ?? "completed";
|
|
2025
|
+
const shouldInvokeOnComplete = input.invokeOnComplete ?? disposition === "completed";
|
|
2026
|
+
let didComplete = false;
|
|
2027
|
+
const session = await writeSession((currentSession) => {
|
|
2028
|
+
if (currentSession.status === "completed" || currentSession.status === "failed") {
|
|
2029
|
+
return;
|
|
2030
|
+
}
|
|
2031
|
+
didComplete = true;
|
|
2032
|
+
currentSession.lastActivityAt = Date.now();
|
|
2033
|
+
currentSession.status = "completed";
|
|
2034
|
+
if (result !== undefined && currentSession.turns.length > 0) {
|
|
2035
|
+
const lastTurn = currentSession.turns.at(-1);
|
|
2036
|
+
if (lastTurn) {
|
|
2037
|
+
setTurnResult(currentSession, lastTurn.id, {
|
|
2038
|
+
result
|
|
2039
|
+
});
|
|
2040
|
+
}
|
|
2041
|
+
}
|
|
2042
|
+
if (!currentSession.call?.endedAt) {
|
|
2043
|
+
pushCallLifecycleEvent(currentSession, {
|
|
2044
|
+
disposition,
|
|
2045
|
+
metadata: input.metadata,
|
|
2046
|
+
reason: input.reason,
|
|
2047
|
+
target: input.target,
|
|
2048
|
+
type: "end"
|
|
2049
|
+
});
|
|
2050
|
+
}
|
|
2051
|
+
});
|
|
2052
|
+
if (!didComplete) {
|
|
2053
|
+
return;
|
|
2054
|
+
}
|
|
2055
|
+
await send({
|
|
2056
|
+
sessionId: options.id,
|
|
2057
|
+
type: "complete"
|
|
2058
|
+
});
|
|
2059
|
+
await closeTTSSession("complete");
|
|
2060
|
+
await closeAdapter("complete");
|
|
2061
|
+
speechDetected = false;
|
|
2062
|
+
rewindFallbackTurnAudio();
|
|
2063
|
+
if (disposition === "transferred" && input.target) {
|
|
2064
|
+
await options.route.onTransfer?.({
|
|
2065
|
+
api,
|
|
2066
|
+
context: options.context,
|
|
2067
|
+
metadata: input.metadata,
|
|
2068
|
+
reason: input.reason,
|
|
2069
|
+
session,
|
|
2070
|
+
target: input.target
|
|
2071
|
+
});
|
|
2072
|
+
}
|
|
2073
|
+
if (disposition === "escalated" && input.reason) {
|
|
2074
|
+
await options.route.onEscalation?.({
|
|
2075
|
+
api,
|
|
2076
|
+
context: options.context,
|
|
2077
|
+
metadata: input.metadata,
|
|
2078
|
+
reason: input.reason,
|
|
2079
|
+
session
|
|
2080
|
+
});
|
|
2081
|
+
}
|
|
2082
|
+
if (disposition === "voicemail") {
|
|
2083
|
+
await options.route.onVoicemail?.({
|
|
2084
|
+
api,
|
|
2085
|
+
context: options.context,
|
|
2086
|
+
metadata: input.metadata,
|
|
2087
|
+
session
|
|
2088
|
+
});
|
|
2089
|
+
}
|
|
2090
|
+
if (disposition === "no-answer") {
|
|
2091
|
+
await options.route.onNoAnswer?.({
|
|
2092
|
+
api,
|
|
2093
|
+
context: options.context,
|
|
2094
|
+
metadata: input.metadata,
|
|
2095
|
+
session
|
|
2096
|
+
});
|
|
2097
|
+
}
|
|
2098
|
+
if (shouldInvokeOnComplete) {
|
|
2099
|
+
await options.route.onComplete({
|
|
2100
|
+
api,
|
|
2101
|
+
context: options.context,
|
|
2102
|
+
session
|
|
2103
|
+
});
|
|
2104
|
+
}
|
|
2105
|
+
await options.route.onCallEnd?.({
|
|
2106
|
+
api,
|
|
2107
|
+
context: options.context,
|
|
2108
|
+
disposition,
|
|
2109
|
+
metadata: input.metadata,
|
|
2110
|
+
reason: input.reason,
|
|
2111
|
+
session,
|
|
2112
|
+
target: input.target
|
|
2113
|
+
});
|
|
2114
|
+
};
|
|
2115
|
+
const transferInternal = async (input) => {
|
|
2116
|
+
await writeSession((currentSession) => {
|
|
2117
|
+
pushCallLifecycleEvent(currentSession, {
|
|
2118
|
+
metadata: input.metadata,
|
|
2119
|
+
reason: input.reason,
|
|
2120
|
+
target: input.target,
|
|
2121
|
+
type: "transfer"
|
|
2122
|
+
});
|
|
2123
|
+
});
|
|
2124
|
+
await completeInternal(input.result, {
|
|
2125
|
+
disposition: "transferred",
|
|
2126
|
+
invokeOnComplete: false,
|
|
2127
|
+
metadata: input.metadata,
|
|
2128
|
+
reason: input.reason,
|
|
2129
|
+
target: input.target
|
|
2130
|
+
});
|
|
2131
|
+
};
|
|
2132
|
+
const escalateInternal = async (input) => {
|
|
2133
|
+
await writeSession((currentSession) => {
|
|
2134
|
+
pushCallLifecycleEvent(currentSession, {
|
|
2135
|
+
metadata: input.metadata,
|
|
2136
|
+
reason: input.reason,
|
|
2137
|
+
type: "escalation"
|
|
2138
|
+
});
|
|
2139
|
+
});
|
|
2140
|
+
await completeInternal(input.result, {
|
|
2141
|
+
disposition: "escalated",
|
|
2142
|
+
invokeOnComplete: false,
|
|
2143
|
+
metadata: input.metadata,
|
|
2144
|
+
reason: input.reason
|
|
2145
|
+
});
|
|
2146
|
+
};
|
|
2147
|
+
const markNoAnswerInternal = async (input) => {
|
|
2148
|
+
await writeSession((currentSession) => {
|
|
2149
|
+
pushCallLifecycleEvent(currentSession, {
|
|
2150
|
+
metadata: input?.metadata,
|
|
2151
|
+
type: "no-answer"
|
|
2152
|
+
});
|
|
2153
|
+
});
|
|
2154
|
+
await completeInternal(input?.result, {
|
|
2155
|
+
disposition: "no-answer",
|
|
2156
|
+
invokeOnComplete: false,
|
|
2157
|
+
metadata: input?.metadata
|
|
2158
|
+
});
|
|
2159
|
+
};
|
|
2160
|
+
const markVoicemailInternal = async (input) => {
|
|
2161
|
+
await writeSession((currentSession) => {
|
|
2162
|
+
pushCallLifecycleEvent(currentSession, {
|
|
2163
|
+
metadata: input?.metadata,
|
|
2164
|
+
type: "voicemail"
|
|
2165
|
+
});
|
|
2166
|
+
});
|
|
2167
|
+
await completeInternal(input?.result, {
|
|
2168
|
+
disposition: "voicemail",
|
|
2169
|
+
invokeOnComplete: false,
|
|
2170
|
+
metadata: input?.metadata
|
|
2171
|
+
});
|
|
374
2172
|
};
|
|
375
2173
|
const handleError = async (event) => {
|
|
376
2174
|
await send({
|
|
@@ -379,87 +2177,462 @@ var createVoiceSession = (options) => {
|
|
|
379
2177
|
type: "error"
|
|
380
2178
|
});
|
|
381
2179
|
if (!event.recoverable) {
|
|
382
|
-
await
|
|
2180
|
+
await failInternal(event.error);
|
|
383
2181
|
}
|
|
384
2182
|
};
|
|
385
2183
|
const handleClose = async (event) => {
|
|
386
2184
|
if (event.recoverable === false) {
|
|
387
|
-
await
|
|
2185
|
+
await failInternal(new Error(event.reason ?? "Speech-to-text session closed"));
|
|
2186
|
+
return;
|
|
2187
|
+
}
|
|
2188
|
+
if (!event.reason) {
|
|
2189
|
+
await closeAdapter("provider stream closed");
|
|
2190
|
+
return;
|
|
2191
|
+
}
|
|
2192
|
+
await closeAdapter(event.reason);
|
|
2193
|
+
};
|
|
2194
|
+
const rewindFallbackTurnAudio = () => {
|
|
2195
|
+
fallbackAttemptsForCurrentTurn = 0;
|
|
2196
|
+
fallbackReplayAudioMsForCurrentTurn = 0;
|
|
2197
|
+
currentTurnAudio.length = 0;
|
|
2198
|
+
};
|
|
2199
|
+
const runFallbackTranscription = async (primaryText, primaryTranscripts) => {
|
|
2200
|
+
if (!sttFallback?.adapter || fallbackAttemptsForCurrentTurn >= sttFallback.maxAttemptsPerTurn) {
|
|
2201
|
+
return null;
|
|
2202
|
+
}
|
|
2203
|
+
const candidate = {
|
|
2204
|
+
text: primaryText,
|
|
2205
|
+
transcripts: primaryTranscripts
|
|
2206
|
+
};
|
|
2207
|
+
if (!isFallbackNeeded(candidate, sttFallback)) {
|
|
2208
|
+
return null;
|
|
2209
|
+
}
|
|
2210
|
+
fallbackAttemptsForCurrentTurn += 1;
|
|
2211
|
+
const replayAudio = getFallbackWindowAudio();
|
|
2212
|
+
if (replayAudio.length === 0) {
|
|
2213
|
+
return null;
|
|
2214
|
+
}
|
|
2215
|
+
let fallbackSession = null;
|
|
2216
|
+
const fallbackTranscripts = [];
|
|
2217
|
+
let fallbackClosed = false;
|
|
2218
|
+
let fallbackEndOfTurnReceived = false;
|
|
2219
|
+
let fallbackFinalReceived = false;
|
|
2220
|
+
let lastFallbackTranscriptAt = 0;
|
|
2221
|
+
try {
|
|
2222
|
+
fallbackSession = await sttFallback.adapter.open({
|
|
2223
|
+
format: DEFAULT_FORMAT,
|
|
2224
|
+
languageStrategy: options.languageStrategy,
|
|
2225
|
+
lexicon,
|
|
2226
|
+
phraseHints,
|
|
2227
|
+
sessionId: `${options.id}:fallback:${fallbackAttemptsForCurrentTurn}`
|
|
2228
|
+
});
|
|
2229
|
+
} catch (error) {
|
|
2230
|
+
logger.warn("voice stt fallback open failed", {
|
|
2231
|
+
error: toError(error).message,
|
|
2232
|
+
sessionId: options.id
|
|
2233
|
+
});
|
|
2234
|
+
return null;
|
|
2235
|
+
}
|
|
2236
|
+
const unsubscribers = [
|
|
2237
|
+
fallbackSession.on("final", ({ transcript }) => {
|
|
2238
|
+
fallbackFinalReceived = true;
|
|
2239
|
+
lastFallbackTranscriptAt = Date.now();
|
|
2240
|
+
fallbackTranscripts.push(cloneTranscript(transcript));
|
|
2241
|
+
}),
|
|
2242
|
+
fallbackSession.on("partial", ({ transcript }) => {
|
|
2243
|
+
lastFallbackTranscriptAt = Date.now();
|
|
2244
|
+
fallbackTranscripts.push(cloneTranscript(transcript));
|
|
2245
|
+
}),
|
|
2246
|
+
fallbackSession.on("endOfTurn", () => {
|
|
2247
|
+
fallbackEndOfTurnReceived = true;
|
|
2248
|
+
}),
|
|
2249
|
+
fallbackSession.on("error", (event) => {
|
|
2250
|
+
logger.warn("voice stt fallback error", {
|
|
2251
|
+
error: toError(event.error).message,
|
|
2252
|
+
sessionId: options.id
|
|
2253
|
+
});
|
|
2254
|
+
}),
|
|
2255
|
+
fallbackSession.on("close", () => {
|
|
2256
|
+
fallbackClosed = true;
|
|
2257
|
+
})
|
|
2258
|
+
];
|
|
2259
|
+
const closeFallback = async (reason) => {
|
|
2260
|
+
if (!fallbackSession) {
|
|
2261
|
+
return;
|
|
2262
|
+
}
|
|
2263
|
+
try {
|
|
2264
|
+
await fallbackSession.close(reason);
|
|
2265
|
+
} catch (error) {
|
|
2266
|
+
logger.warn("voice stt fallback close failed", {
|
|
2267
|
+
error: toError(error).message,
|
|
2268
|
+
sessionId: options.id
|
|
2269
|
+
});
|
|
2270
|
+
} finally {
|
|
2271
|
+
fallbackSession = null;
|
|
2272
|
+
}
|
|
2273
|
+
};
|
|
2274
|
+
try {
|
|
2275
|
+
for (const chunk of replayAudio) {
|
|
2276
|
+
await fallbackSession.send(chunk);
|
|
2277
|
+
}
|
|
2278
|
+
const replayDurationMs = getBufferedAudioDurationMs(replayAudio);
|
|
2279
|
+
fallbackReplayAudioMsForCurrentTurn += replayDurationMs;
|
|
2280
|
+
const completionTimeoutMs = Math.max(sttFallback.completionTimeoutMs, Math.min(4000, Math.max(sttFallback.settleMs * 4, Math.round(replayDurationMs * 0.18))));
|
|
2281
|
+
const waitStartedAt = Date.now();
|
|
2282
|
+
while (Date.now() - waitStartedAt < completionTimeoutMs) {
|
|
2283
|
+
const idleMs = lastFallbackTranscriptAt > 0 ? Date.now() - lastFallbackTranscriptAt : Date.now() - waitStartedAt;
|
|
2284
|
+
if (fallbackEndOfTurnReceived && idleMs >= sttFallback.settleMs) {
|
|
2285
|
+
break;
|
|
2286
|
+
}
|
|
2287
|
+
if (fallbackFinalReceived && idleMs >= sttFallback.settleMs) {
|
|
2288
|
+
break;
|
|
2289
|
+
}
|
|
2290
|
+
if (fallbackClosed && (lastFallbackTranscriptAt === 0 || idleMs >= sttFallback.settleMs)) {
|
|
2291
|
+
break;
|
|
2292
|
+
}
|
|
2293
|
+
await Bun.sleep(Math.min(75, Math.max(25, sttFallback.settleMs / 2)));
|
|
2294
|
+
}
|
|
2295
|
+
} catch (error) {
|
|
2296
|
+
logger.warn("voice stt fallback failed", {
|
|
2297
|
+
error: toError(error).message,
|
|
2298
|
+
sessionId: options.id
|
|
2299
|
+
});
|
|
2300
|
+
} finally {
|
|
2301
|
+
await closeFallback("fallback-complete");
|
|
2302
|
+
for (const unsubscribe of unsubscribers) {
|
|
2303
|
+
unsubscribe();
|
|
2304
|
+
}
|
|
2305
|
+
}
|
|
2306
|
+
if (fallbackTranscripts.length === 0) {
|
|
2307
|
+
return null;
|
|
2308
|
+
}
|
|
2309
|
+
const fallbackText = buildTurnText(fallbackTranscripts, "", {});
|
|
2310
|
+
const fallbackConfidence = calculateMeanConfidence(fallbackTranscripts);
|
|
2311
|
+
const fallbackCandidate = {
|
|
2312
|
+
confidence: fallbackConfidence,
|
|
2313
|
+
text: fallbackText,
|
|
2314
|
+
wordCount: countWords2(normalizeText2(fallbackText))
|
|
2315
|
+
};
|
|
2316
|
+
const primaryCandidate = {
|
|
2317
|
+
confidence: calculateMeanConfidence(primaryTranscripts),
|
|
2318
|
+
text: primaryText,
|
|
2319
|
+
wordCount: countWords2(normalizeText2(primaryText))
|
|
2320
|
+
};
|
|
2321
|
+
const selection = selectBetterTurnText(primaryCandidate, fallbackCandidate);
|
|
2322
|
+
const diagnostics = {
|
|
2323
|
+
attempted: true,
|
|
2324
|
+
fallbackConfidence: fallbackCandidate.confidence,
|
|
2325
|
+
fallbackText: fallbackCandidate.text,
|
|
2326
|
+
fallbackWordCount: fallbackCandidate.wordCount,
|
|
2327
|
+
primaryConfidence: primaryCandidate.confidence,
|
|
2328
|
+
primaryText,
|
|
2329
|
+
primaryWordCount: primaryCandidate.wordCount,
|
|
2330
|
+
selected: selection.winner.text === fallbackCandidate.text,
|
|
2331
|
+
selectionReason: selection.reason,
|
|
2332
|
+
trigger: sttFallback.trigger
|
|
2333
|
+
};
|
|
2334
|
+
if (selection.winner.text === primaryCandidate.text) {
|
|
2335
|
+
return {
|
|
2336
|
+
diagnostics,
|
|
2337
|
+
fallbackUsed: false,
|
|
2338
|
+
source: "primary",
|
|
2339
|
+
text: primaryText,
|
|
2340
|
+
transcripts: primaryTranscripts.map((transcript) => ({
|
|
2341
|
+
...transcript,
|
|
2342
|
+
isFinal: true
|
|
2343
|
+
}))
|
|
2344
|
+
};
|
|
2345
|
+
}
|
|
2346
|
+
const candidateTranscripts = fallbackText === fallbackCandidate.text ? fallbackTranscripts : [];
|
|
2347
|
+
return {
|
|
2348
|
+
diagnostics,
|
|
2349
|
+
fallbackUsed: true,
|
|
2350
|
+
source: "fallback",
|
|
2351
|
+
text: selection.winner.text,
|
|
2352
|
+
transcripts: candidateTranscripts.length > 0 ? candidateTranscripts.map((transcript) => ({
|
|
2353
|
+
...transcript,
|
|
2354
|
+
isFinal: true
|
|
2355
|
+
})) : [{ id: createId(), isFinal: false, text: selection.winner.text }]
|
|
2356
|
+
};
|
|
2357
|
+
};
|
|
2358
|
+
const getFinalTranscriptIds = (transcripts) => {
|
|
2359
|
+
const finalTranscriptIds = transcripts.filter((transcript) => transcript.isFinal).map((transcript) => transcript.id);
|
|
2360
|
+
const fallbackIds = transcripts.map((transcript) => transcript.id);
|
|
2361
|
+
return finalTranscriptIds.length > 0 ? finalTranscriptIds : fallbackIds;
|
|
2362
|
+
};
|
|
2363
|
+
const runTurnCorrection = async (input) => {
|
|
2364
|
+
if (!options.route.correctTurn) {
|
|
2365
|
+
return;
|
|
2366
|
+
}
|
|
2367
|
+
const originalText = input.text;
|
|
2368
|
+
const result = await options.route.correctTurn({
|
|
2369
|
+
api,
|
|
2370
|
+
context: options.context,
|
|
2371
|
+
fallback: input.fallbackDiagnostics,
|
|
2372
|
+
lexicon,
|
|
2373
|
+
phraseHints,
|
|
2374
|
+
session: input.session,
|
|
2375
|
+
text: originalText,
|
|
2376
|
+
transcripts: input.transcripts.map(cloneTranscript)
|
|
2377
|
+
});
|
|
2378
|
+
const nextText = typeof result === "string" ? result : typeof result?.text === "string" ? result.text : originalText;
|
|
2379
|
+
const correctedText = normalizeCorrectionText(nextText);
|
|
2380
|
+
const normalizedOriginal = normalizeCorrectionText(originalText);
|
|
2381
|
+
return {
|
|
2382
|
+
diagnostics: {
|
|
2383
|
+
attempted: true,
|
|
2384
|
+
changed: correctedText.length > 0 && correctedText !== normalizedOriginal,
|
|
2385
|
+
correctedText: correctedText.length > 0 ? correctedText : normalizedOriginal,
|
|
2386
|
+
metadata: typeof result === "object" ? result.metadata : undefined,
|
|
2387
|
+
originalText,
|
|
2388
|
+
provider: typeof result === "object" ? result.provider : undefined,
|
|
2389
|
+
reason: typeof result === "object" ? result.reason : undefined
|
|
2390
|
+
},
|
|
2391
|
+
text: correctedText.length > 0 ? correctedText : originalText
|
|
2392
|
+
};
|
|
2393
|
+
};
|
|
2394
|
+
const ensureCommittedTurnGuard = (session) => {
|
|
2395
|
+
if (!session.lastCommittedTurn) {
|
|
2396
|
+
session.lastCommittedTurn = {
|
|
2397
|
+
committedAt: 0,
|
|
2398
|
+
signature: "",
|
|
2399
|
+
text: "",
|
|
2400
|
+
transcriptIds: []
|
|
2401
|
+
};
|
|
2402
|
+
}
|
|
2403
|
+
return session;
|
|
2404
|
+
};
|
|
2405
|
+
const buildTurnSignature = (session, finalText, transcriptIdsOverride) => {
|
|
2406
|
+
const finalTranscriptIds = transcriptIdsOverride ?? getFinalTranscriptIds(session.currentTurn.transcripts);
|
|
2407
|
+
return `${normalizeText2(finalText)}|${finalTranscriptIds.join(",")}`;
|
|
2408
|
+
};
|
|
2409
|
+
const isDuplicateTurnCommit = (session, finalText) => {
|
|
2410
|
+
const signature = buildTurnSignature(session, finalText);
|
|
2411
|
+
const committedTurn = session.lastCommittedTurn;
|
|
2412
|
+
const isRecent = committedTurn && committedTurn.committedAt > 0 && Date.now() - committedTurn.committedAt < DEFAULT_DUPLICATE_TURN_WINDOW_MS;
|
|
2413
|
+
const committedSignature = committedTurn?.signature ?? "";
|
|
2414
|
+
const committedTranscriptIds = committedTurn?.transcriptIds ?? [];
|
|
2415
|
+
const committedText = normalizeText2(committedTurn?.text ?? "");
|
|
2416
|
+
const isSameText = normalizeText2(finalText) === committedText;
|
|
2417
|
+
const hasNoNewAudioSinceCommit = (session.currentTurn.lastAudioAt ?? 0) <= (committedTurn?.committedAt ?? 0);
|
|
2418
|
+
if (!isRecent) {
|
|
2419
|
+
return false;
|
|
2420
|
+
}
|
|
2421
|
+
if (isSameText && hasNoNewAudioSinceCommit) {
|
|
2422
|
+
return true;
|
|
2423
|
+
}
|
|
2424
|
+
if (signature !== committedSignature) {
|
|
2425
|
+
return false;
|
|
388
2426
|
}
|
|
2427
|
+
const lastSignatureIds = new Set(committedTranscriptIds);
|
|
2428
|
+
const hasNoNewFinalIds = session.currentTurn.transcripts.every((transcript) => !transcript.isFinal || lastSignatureIds.has(transcript.id));
|
|
2429
|
+
return isRecent && hasNoNewFinalIds;
|
|
2430
|
+
};
|
|
2431
|
+
const markTurnCommitted = (session, finalText, committedTranscripts) => {
|
|
2432
|
+
session.lastCommittedTurn = {
|
|
2433
|
+
...session.lastCommittedTurn ?? {},
|
|
2434
|
+
committedAt: Date.now(),
|
|
2435
|
+
signature: buildTurnSignature(session, finalText, getFinalTranscriptIds(committedTranscripts)),
|
|
2436
|
+
text: normalizeText2(finalText),
|
|
2437
|
+
transcriptIds: getFinalTranscriptIds(committedTranscripts)
|
|
2438
|
+
};
|
|
389
2439
|
};
|
|
390
2440
|
const handlePartial = async (transcript) => {
|
|
391
|
-
await writeSession((
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
2441
|
+
const session = await writeSession((session2) => {
|
|
2442
|
+
const nextPartialStartedAt = transcript.startedAtMs ?? session2.currentTurn.partialStartedAt;
|
|
2443
|
+
const nextPartialEndedAt = transcript.endedAtMs ?? session2.currentTurn.partialEndedAt;
|
|
2444
|
+
const preferredPartial = selectPreferredTranscriptText(session2.currentTurn.partialText, transcript.text);
|
|
2445
|
+
session2.currentTurn.lastTranscriptAt = Date.now();
|
|
2446
|
+
session2.currentTurn.partialStartedAt = nextPartialStartedAt;
|
|
2447
|
+
session2.currentTurn.partialEndedAt = nextPartialEndedAt;
|
|
2448
|
+
session2.currentTurn.partialText = buildTurnText(session2.currentTurn.transcripts, preferredPartial, {
|
|
2449
|
+
partialEndedAtMs: nextPartialEndedAt,
|
|
2450
|
+
partialStartedAtMs: nextPartialStartedAt
|
|
2451
|
+
});
|
|
2452
|
+
session2.lastActivityAt = Date.now();
|
|
2453
|
+
session2.status = "active";
|
|
396
2454
|
});
|
|
2455
|
+
if (silenceTimer && pendingCommitReason === "vendor") {
|
|
2456
|
+
scheduleTurnCommit(getVendorCommitDelayMs(), "vendor");
|
|
2457
|
+
}
|
|
397
2458
|
await send({
|
|
398
2459
|
transcript,
|
|
399
2460
|
type: "partial"
|
|
400
2461
|
});
|
|
401
2462
|
};
|
|
402
2463
|
const handleFinal = async (transcript) => {
|
|
403
|
-
await writeSession((
|
|
404
|
-
const alreadyPresent =
|
|
2464
|
+
const session = await writeSession((session2) => {
|
|
2465
|
+
const alreadyPresent = session2.currentTurn.transcripts.some((existing) => existing.id === transcript.id);
|
|
405
2466
|
if (!alreadyPresent) {
|
|
406
|
-
|
|
407
|
-
...
|
|
2467
|
+
session2.currentTurn.transcripts = [
|
|
2468
|
+
...session2.currentTurn.transcripts,
|
|
408
2469
|
cloneTranscript(transcript)
|
|
409
2470
|
];
|
|
410
|
-
|
|
411
|
-
...
|
|
2471
|
+
session2.transcripts = [
|
|
2472
|
+
...session2.transcripts,
|
|
412
2473
|
cloneTranscript(transcript)
|
|
413
2474
|
];
|
|
414
2475
|
}
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
2476
|
+
session2.currentTurn.finalText = buildTurnText(session2.currentTurn.transcripts, session2.currentTurn.partialText, {
|
|
2477
|
+
partialEndedAtMs: session2.currentTurn.partialEndedAt,
|
|
2478
|
+
partialStartedAtMs: session2.currentTurn.partialStartedAt
|
|
2479
|
+
});
|
|
2480
|
+
session2.currentTurn.lastTranscriptAt = Date.now();
|
|
2481
|
+
session2.lastActivityAt = Date.now();
|
|
2482
|
+
session2.status = "active";
|
|
419
2483
|
});
|
|
2484
|
+
if (silenceTimer && pendingCommitReason === "vendor") {
|
|
2485
|
+
scheduleTurnCommit(getVendorCommitDelayMs(), "vendor");
|
|
2486
|
+
}
|
|
420
2487
|
await send({
|
|
421
2488
|
transcript,
|
|
422
2489
|
type: "final"
|
|
423
2490
|
});
|
|
424
2491
|
};
|
|
2492
|
+
const resumePendingTurnCommit = (session) => {
|
|
2493
|
+
const pendingText = buildTurnText(session.currentTurn.transcripts, session.currentTurn.partialText, {
|
|
2494
|
+
partialEndedAtMs: session.currentTurn.partialEndedAt,
|
|
2495
|
+
partialStartedAtMs: session.currentTurn.partialStartedAt
|
|
2496
|
+
});
|
|
2497
|
+
if (!pendingText) {
|
|
2498
|
+
speechDetected = false;
|
|
2499
|
+
return;
|
|
2500
|
+
}
|
|
2501
|
+
speechDetected = true;
|
|
2502
|
+
const audioAge = session.currentTurn.silenceStartedAt !== undefined ? Date.now() - session.currentTurn.silenceStartedAt : session.currentTurn.lastSpeechAt !== undefined ? Date.now() - session.currentTurn.lastSpeechAt : 0;
|
|
2503
|
+
const transcriptAge = session.currentTurn.lastTranscriptAt !== undefined ? Date.now() - session.currentTurn.lastTranscriptAt : turnDetection.transcriptStabilityMs;
|
|
2504
|
+
const delayMs = Math.max(0, turnDetection.silenceMs - audioAge, turnDetection.transcriptStabilityMs - transcriptAge);
|
|
2505
|
+
scheduleSilenceCommit(delayMs);
|
|
2506
|
+
};
|
|
425
2507
|
const ensureAdapter = async () => {
|
|
426
2508
|
if (sttSession) {
|
|
427
2509
|
return sttSession;
|
|
428
2510
|
}
|
|
429
|
-
|
|
430
|
-
format:
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
sampleRateHz: 16000
|
|
435
|
-
},
|
|
2511
|
+
const openedSession = await options.stt.open({
|
|
2512
|
+
format: DEFAULT_FORMAT,
|
|
2513
|
+
languageStrategy: options.languageStrategy,
|
|
2514
|
+
lexicon,
|
|
2515
|
+
phraseHints,
|
|
436
2516
|
sessionId: options.id
|
|
437
2517
|
});
|
|
438
|
-
|
|
439
|
-
|
|
2518
|
+
const generation = ++adapterGenerationCounter;
|
|
2519
|
+
sttSession = openedSession;
|
|
2520
|
+
activeAdapterGeneration = generation;
|
|
2521
|
+
const runAdapterEvent = (phase, handler) => {
|
|
2522
|
+
runSerial(phase, async () => {
|
|
2523
|
+
if (activeAdapterGeneration !== generation) {
|
|
2524
|
+
return;
|
|
2525
|
+
}
|
|
2526
|
+
await handler();
|
|
2527
|
+
});
|
|
2528
|
+
};
|
|
2529
|
+
openedSession.on("partial", ({ transcript }) => {
|
|
2530
|
+
runAdapterEvent("adapter.partial", () => handlePartial(transcript));
|
|
440
2531
|
});
|
|
441
|
-
|
|
442
|
-
handleFinal(transcript);
|
|
2532
|
+
openedSession.on("final", ({ transcript }) => {
|
|
2533
|
+
runAdapterEvent("adapter.final", () => handleFinal(transcript));
|
|
443
2534
|
});
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
2535
|
+
openedSession.on("endOfTurn", ({ reason }) => {
|
|
2536
|
+
runAdapterEvent("adapter.endOfTurn", async () => {
|
|
2537
|
+
clearSilenceTimer();
|
|
2538
|
+
await requestTurnCommit(reason);
|
|
2539
|
+
});
|
|
2540
|
+
});
|
|
2541
|
+
openedSession.on("error", (event) => {
|
|
2542
|
+
runAdapterEvent("adapter.error", () => handleError(event));
|
|
447
2543
|
});
|
|
448
|
-
|
|
449
|
-
|
|
2544
|
+
openedSession.on("close", (event) => {
|
|
2545
|
+
runAdapterEvent("adapter.close", () => handleClose(event));
|
|
2546
|
+
});
|
|
2547
|
+
return openedSession;
|
|
2548
|
+
};
|
|
2549
|
+
const ensureTTSSession = async () => {
|
|
2550
|
+
const ttsAdapter = options.tts;
|
|
2551
|
+
if (!ttsAdapter) {
|
|
2552
|
+
return null;
|
|
2553
|
+
}
|
|
2554
|
+
if (ttsSession) {
|
|
2555
|
+
return ttsSession;
|
|
2556
|
+
}
|
|
2557
|
+
if (ttsSessionPromise) {
|
|
2558
|
+
return ttsSessionPromise;
|
|
2559
|
+
}
|
|
2560
|
+
ttsSessionPromise = (async () => {
|
|
2561
|
+
const openedSession = await ttsAdapter.open({
|
|
2562
|
+
lexicon,
|
|
2563
|
+
sessionId: options.id
|
|
2564
|
+
});
|
|
2565
|
+
ttsSession = openedSession;
|
|
2566
|
+
openedSession.on("audio", ({ chunk, format, receivedAt }) => {
|
|
2567
|
+
runSerial("tts.audio", async () => {
|
|
2568
|
+
if (ttsSession !== openedSession) {
|
|
2569
|
+
return;
|
|
2570
|
+
}
|
|
2571
|
+
const normalizedChunk = chunk instanceof Uint8Array ? new Uint8Array(chunk) : chunk instanceof ArrayBuffer ? new Uint8Array(chunk.slice(0)) : new Uint8Array(chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength));
|
|
2572
|
+
await send({
|
|
2573
|
+
chunkBase64: encodeBase64(normalizedChunk),
|
|
2574
|
+
format,
|
|
2575
|
+
receivedAt,
|
|
2576
|
+
turnId: activeTTSTurnId,
|
|
2577
|
+
type: "audio"
|
|
2578
|
+
});
|
|
2579
|
+
});
|
|
2580
|
+
});
|
|
2581
|
+
openedSession.on("error", (event) => {
|
|
2582
|
+
runSerial("tts.error", async () => {
|
|
2583
|
+
if (ttsSession !== openedSession) {
|
|
2584
|
+
return;
|
|
2585
|
+
}
|
|
2586
|
+
await send({
|
|
2587
|
+
message: toError(event.error).message,
|
|
2588
|
+
recoverable: event.recoverable,
|
|
2589
|
+
type: "error"
|
|
2590
|
+
});
|
|
2591
|
+
});
|
|
2592
|
+
});
|
|
2593
|
+
openedSession.on("close", () => {
|
|
2594
|
+
runSerial("tts.close", async () => {
|
|
2595
|
+
if (ttsSession === openedSession) {
|
|
2596
|
+
ttsSession = null;
|
|
2597
|
+
ttsSessionPromise = null;
|
|
2598
|
+
activeTTSTurnId = undefined;
|
|
2599
|
+
}
|
|
2600
|
+
});
|
|
2601
|
+
});
|
|
2602
|
+
return openedSession;
|
|
2603
|
+
})().catch((error) => {
|
|
2604
|
+
ttsSessionPromise = null;
|
|
2605
|
+
throw error;
|
|
450
2606
|
});
|
|
451
|
-
|
|
452
|
-
|
|
2607
|
+
return ttsSessionPromise;
|
|
2608
|
+
};
|
|
2609
|
+
const warmTTSSession = () => {
|
|
2610
|
+
if (!options.tts || ttsSession || ttsSessionPromise) {
|
|
2611
|
+
return;
|
|
2612
|
+
}
|
|
2613
|
+
ensureTTSSession().catch((error) => {
|
|
2614
|
+
logger.warn("voice tts prewarm failed", {
|
|
2615
|
+
error: toError(error).message,
|
|
2616
|
+
sessionId: options.id
|
|
2617
|
+
});
|
|
453
2618
|
});
|
|
454
|
-
return sttSession;
|
|
455
2619
|
};
|
|
456
2620
|
const completeTurn = async (session, turn) => {
|
|
457
|
-
const
|
|
2621
|
+
const committedOutput = await options.route.onTurn({
|
|
458
2622
|
api,
|
|
459
2623
|
context: options.context,
|
|
460
2624
|
session,
|
|
461
2625
|
turn
|
|
462
2626
|
});
|
|
2627
|
+
const output = {
|
|
2628
|
+
assistantText: committedOutput?.assistantText,
|
|
2629
|
+
complete: committedOutput?.complete,
|
|
2630
|
+
escalate: committedOutput?.escalate,
|
|
2631
|
+
noAnswer: committedOutput?.noAnswer,
|
|
2632
|
+
result: committedOutput?.result,
|
|
2633
|
+
transfer: committedOutput?.transfer,
|
|
2634
|
+
voicemail: committedOutput?.voicemail
|
|
2635
|
+
};
|
|
463
2636
|
if (output?.assistantText) {
|
|
464
2637
|
await writeSession((currentSession) => {
|
|
465
2638
|
setTurnResult(currentSession, turn.id, {
|
|
@@ -471,7 +2644,20 @@ var createVoiceSession = (options) => {
|
|
|
471
2644
|
turnId: turn.id,
|
|
472
2645
|
type: "assistant"
|
|
473
2646
|
});
|
|
474
|
-
|
|
2647
|
+
try {
|
|
2648
|
+
const activeTTSSession = await ensureTTSSession();
|
|
2649
|
+
if (activeTTSSession) {
|
|
2650
|
+
activeTTSTurnId = turn.id;
|
|
2651
|
+
await activeTTSSession.send(output.assistantText);
|
|
2652
|
+
}
|
|
2653
|
+
} catch (error) {
|
|
2654
|
+
logger.warn("voice tts send failed", {
|
|
2655
|
+
error: toError(error).message,
|
|
2656
|
+
sessionId: options.id,
|
|
2657
|
+
turnId: turn.id
|
|
2658
|
+
});
|
|
2659
|
+
}
|
|
2660
|
+
}
|
|
475
2661
|
if (output?.result !== undefined) {
|
|
476
2662
|
await writeSession((currentSession) => {
|
|
477
2663
|
setTurnResult(currentSession, turn.id, {
|
|
@@ -479,208 +2665,358 @@ var createVoiceSession = (options) => {
|
|
|
479
2665
|
});
|
|
480
2666
|
});
|
|
481
2667
|
}
|
|
2668
|
+
if (output?.transfer) {
|
|
2669
|
+
await transferInternal({
|
|
2670
|
+
metadata: output.transfer.metadata,
|
|
2671
|
+
reason: output.transfer.reason,
|
|
2672
|
+
result: output.result,
|
|
2673
|
+
target: output.transfer.target
|
|
2674
|
+
});
|
|
2675
|
+
return;
|
|
2676
|
+
}
|
|
2677
|
+
if (output?.escalate) {
|
|
2678
|
+
await escalateInternal({
|
|
2679
|
+
metadata: output.escalate.metadata,
|
|
2680
|
+
reason: output.escalate.reason,
|
|
2681
|
+
result: output.result
|
|
2682
|
+
});
|
|
2683
|
+
return;
|
|
2684
|
+
}
|
|
2685
|
+
if (output?.voicemail) {
|
|
2686
|
+
await markVoicemailInternal({
|
|
2687
|
+
metadata: output.voicemail.metadata,
|
|
2688
|
+
result: output.result
|
|
2689
|
+
});
|
|
2690
|
+
return;
|
|
2691
|
+
}
|
|
2692
|
+
if (output?.noAnswer) {
|
|
2693
|
+
await markNoAnswerInternal({
|
|
2694
|
+
metadata: output.noAnswer.metadata,
|
|
2695
|
+
result: output.result
|
|
2696
|
+
});
|
|
2697
|
+
return;
|
|
2698
|
+
}
|
|
482
2699
|
if (output?.complete) {
|
|
483
|
-
await
|
|
2700
|
+
await completeInternal(output.result);
|
|
484
2701
|
}
|
|
485
2702
|
};
|
|
486
|
-
const
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
2703
|
+
const commitTurnInternal = async (reason = "manual") => {
|
|
2704
|
+
clearSilenceTimer();
|
|
2705
|
+
const session = await readSession();
|
|
2706
|
+
if (session.status === "completed" || session.status === "failed") {
|
|
2707
|
+
return;
|
|
2708
|
+
}
|
|
2709
|
+
const text = buildTurnText(session.currentTurn.transcripts, session.currentTurn.partialText, {
|
|
2710
|
+
partialEndedAtMs: session.currentTurn.partialEndedAt,
|
|
2711
|
+
partialStartedAtMs: session.currentTurn.partialStartedAt
|
|
2712
|
+
});
|
|
2713
|
+
let transcripts = session.currentTurn.transcripts.length ? session.currentTurn.transcripts.map(cloneTranscript) : [];
|
|
2714
|
+
let finalText = text;
|
|
2715
|
+
const transcriptStabilityAge = session.currentTurn.lastTranscriptAt !== undefined ? Date.now() - session.currentTurn.lastTranscriptAt : undefined;
|
|
2716
|
+
const fallbackSelection = await runFallbackTranscription(text, session.currentTurn.transcripts);
|
|
2717
|
+
const source = fallbackSelection?.source ?? "primary";
|
|
2718
|
+
const fallbackUsed = fallbackSelection?.fallbackUsed ?? false;
|
|
2719
|
+
const fallbackDiagnostics = fallbackSelection?.diagnostics;
|
|
2720
|
+
if (fallbackSelection) {
|
|
2721
|
+
finalText = fallbackSelection.text;
|
|
2722
|
+
transcripts = fallbackSelection.transcripts.length ? fallbackSelection.transcripts.map(cloneTranscript) : transcripts.length ? transcripts : [
|
|
2723
|
+
{
|
|
2724
|
+
id: createId(),
|
|
2725
|
+
isFinal: false,
|
|
2726
|
+
text: finalText
|
|
2727
|
+
}
|
|
2728
|
+
];
|
|
2729
|
+
if (fallbackSelection.fallbackUsed) {
|
|
2730
|
+
logger.info("voice fallback turn selected", {
|
|
2731
|
+
reason,
|
|
2732
|
+
sessionId: options.id,
|
|
2733
|
+
text: finalText
|
|
2734
|
+
});
|
|
502
2735
|
}
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
currentSession.lastActivityAt = Date.now();
|
|
522
|
-
currentSession.status = "active";
|
|
523
|
-
currentSession.turns = [...currentSession.turns, turn];
|
|
524
|
-
});
|
|
525
|
-
speechDetected = false;
|
|
526
|
-
logger.info("voice turn committed", {
|
|
2736
|
+
}
|
|
2737
|
+
const correctionSelection = await runTurnCorrection({
|
|
2738
|
+
fallbackDiagnostics,
|
|
2739
|
+
fallbackUsed,
|
|
2740
|
+
session,
|
|
2741
|
+
source,
|
|
2742
|
+
text: finalText,
|
|
2743
|
+
transcripts
|
|
2744
|
+
});
|
|
2745
|
+
const correctionDiagnostics = correctionSelection?.diagnostics;
|
|
2746
|
+
if (correctionSelection) {
|
|
2747
|
+
finalText = correctionSelection.text;
|
|
2748
|
+
}
|
|
2749
|
+
if (!finalText) {
|
|
2750
|
+
return;
|
|
2751
|
+
}
|
|
2752
|
+
if (isDuplicateTurnCommit(session, finalText)) {
|
|
2753
|
+
logger.debug("voice turn commit deduped", {
|
|
527
2754
|
reason,
|
|
528
|
-
sessionId: options.id
|
|
529
|
-
turnId: turn.id
|
|
530
|
-
});
|
|
531
|
-
await send({
|
|
532
|
-
turn,
|
|
533
|
-
type: "turn"
|
|
2755
|
+
sessionId: options.id
|
|
534
2756
|
});
|
|
535
|
-
|
|
536
|
-
}
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
2757
|
+
return;
|
|
2758
|
+
}
|
|
2759
|
+
if (typeof transcriptStabilityAge === "number" && transcriptStabilityAge < turnDetection.transcriptStabilityMs && reason !== "manual") {
|
|
2760
|
+
scheduleTurnCommit(turnDetection.transcriptStabilityMs - transcriptStabilityAge, reason, false);
|
|
2761
|
+
return;
|
|
2762
|
+
}
|
|
2763
|
+
const costEstimate = createTurnCostEstimate({
|
|
2764
|
+
fallbackAttemptCount: fallbackAttemptsForCurrentTurn,
|
|
2765
|
+
fallbackPassCostUnit: options.costTelemetry?.fallbackPassCostUnit,
|
|
2766
|
+
fallbackReplayAudioMs: fallbackReplayAudioMsForCurrentTurn,
|
|
2767
|
+
primaryAudioMs: getBufferedAudioDurationMs(currentTurnAudio.map((audio) => audio.chunk)),
|
|
2768
|
+
primaryPassCostUnit: options.costTelemetry?.primaryPassCostUnit
|
|
2769
|
+
});
|
|
2770
|
+
const turn = {
|
|
2771
|
+
committedAt: Date.now(),
|
|
2772
|
+
id: createId(),
|
|
2773
|
+
text: finalText,
|
|
2774
|
+
quality: createTurnQuality(transcripts, source, fallbackUsed, fallbackDiagnostics, correctionDiagnostics, costEstimate),
|
|
2775
|
+
transcripts: transcripts.length > 0 ? transcripts : [
|
|
2776
|
+
{
|
|
2777
|
+
id: createId(),
|
|
2778
|
+
isFinal: false,
|
|
2779
|
+
text: finalText
|
|
552
2780
|
}
|
|
2781
|
+
]
|
|
2782
|
+
};
|
|
2783
|
+
const updatedSession = await writeSession((currentSession) => {
|
|
2784
|
+
currentSession.committedTurnIds = [
|
|
2785
|
+
...currentSession.committedTurnIds,
|
|
2786
|
+
turn.id
|
|
2787
|
+
];
|
|
2788
|
+
currentSession.currentTurn = createEmptyCurrentTurn();
|
|
2789
|
+
currentSession.lastActivityAt = Date.now();
|
|
2790
|
+
currentSession.status = "active";
|
|
2791
|
+
currentSession.turns = [...currentSession.turns, turn];
|
|
2792
|
+
markTurnCommitted(currentSession, finalText, transcripts);
|
|
2793
|
+
});
|
|
2794
|
+
speechDetected = false;
|
|
2795
|
+
rewindFallbackTurnAudio();
|
|
2796
|
+
logger.info("voice turn committed", {
|
|
2797
|
+
reason,
|
|
2798
|
+
sessionId: options.id,
|
|
2799
|
+
turnId: turn.id
|
|
2800
|
+
});
|
|
2801
|
+
await options.costTelemetry?.onTurnCost?.({
|
|
2802
|
+
api,
|
|
2803
|
+
context: options.context,
|
|
2804
|
+
estimate: costEstimate,
|
|
2805
|
+
session: updatedSession,
|
|
2806
|
+
turn
|
|
2807
|
+
});
|
|
2808
|
+
await send({
|
|
2809
|
+
turn,
|
|
2810
|
+
type: "turn"
|
|
2811
|
+
});
|
|
2812
|
+
if (options.sttLifecycle === "turn-scoped") {
|
|
2813
|
+
await closeAdapter("turn-commit");
|
|
2814
|
+
}
|
|
2815
|
+
await completeTurn(updatedSession, turn);
|
|
2816
|
+
};
|
|
2817
|
+
const connectInternal = async (nextSocket) => {
|
|
2818
|
+
socket = nextSocket;
|
|
2819
|
+
const existingSession = await options.store.get(options.id);
|
|
2820
|
+
let session = existingSession ?? createVoiceSessionRecord(options.id, options.scenarioId);
|
|
2821
|
+
if (options.scenarioId && session.scenarioId !== options.scenarioId) {
|
|
2822
|
+
session.scenarioId = options.scenarioId;
|
|
2823
|
+
}
|
|
2824
|
+
ensureCommittedTurnGuard(session);
|
|
2825
|
+
let shouldFireOnSession = !existingSession;
|
|
2826
|
+
if (existingSession?.scenarioId && options.scenarioId && existingSession.scenarioId !== options.scenarioId) {
|
|
2827
|
+
session = resetVoiceSessionRecord(options.id, existingSession, options.scenarioId);
|
|
2828
|
+
shouldFireOnSession = true;
|
|
2829
|
+
}
|
|
2830
|
+
rewindFallbackTurnAudio();
|
|
2831
|
+
if (existingSession?.status === "reconnecting") {
|
|
2832
|
+
const nextAttempts = existingSession.reconnect.attempts + 1;
|
|
2833
|
+
const reconnectExpired = existingSession.reconnect.lastDisconnectAt !== undefined && Date.now() - existingSession.reconnect.lastDisconnectAt > reconnect.timeout;
|
|
2834
|
+
const tooManyAttempts = nextAttempts > reconnect.maxAttempts;
|
|
2835
|
+
if (reconnect.strategy === "fail" && (reconnectExpired || tooManyAttempts)) {
|
|
2836
|
+
await failInternal(new Error("Voice session reconnect policy exhausted"));
|
|
2837
|
+
return;
|
|
2838
|
+
}
|
|
2839
|
+
if (reconnect.strategy === "restart" && (reconnectExpired || tooManyAttempts)) {
|
|
2840
|
+
session = resetVoiceSessionRecord(options.id, existingSession, options.scenarioId);
|
|
2841
|
+
shouldFireOnSession = true;
|
|
2842
|
+
} else {
|
|
2843
|
+
session = {
|
|
2844
|
+
...existingSession,
|
|
2845
|
+
reconnect: {
|
|
2846
|
+
...existingSession.reconnect,
|
|
2847
|
+
attempts: nextAttempts
|
|
2848
|
+
},
|
|
2849
|
+
status: "active"
|
|
2850
|
+
};
|
|
2851
|
+
}
|
|
2852
|
+
}
|
|
2853
|
+
if (shouldFireOnSession) {
|
|
2854
|
+
pushCallLifecycleEvent(session, {
|
|
2855
|
+
type: "start"
|
|
553
2856
|
});
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
2857
|
+
}
|
|
2858
|
+
await options.store.set(options.id, session);
|
|
2859
|
+
await send({
|
|
2860
|
+
sessionId: options.id,
|
|
2861
|
+
status: session.status,
|
|
2862
|
+
scenarioId: session.scenarioId,
|
|
2863
|
+
type: "session"
|
|
2864
|
+
});
|
|
2865
|
+
if (shouldFireOnSession) {
|
|
2866
|
+
await options.route.onCallStart?.({
|
|
2867
|
+
api,
|
|
2868
|
+
context: options.context,
|
|
2869
|
+
session
|
|
557
2870
|
});
|
|
558
|
-
await
|
|
559
|
-
speechDetected = false;
|
|
560
|
-
await options.route.onComplete({
|
|
2871
|
+
await options.route.onSession?.({
|
|
561
2872
|
api,
|
|
562
2873
|
context: options.context,
|
|
563
2874
|
session
|
|
564
2875
|
});
|
|
565
|
-
}
|
|
566
|
-
|
|
567
|
-
socket = nextSocket;
|
|
568
|
-
const existingSession = await options.store.get(options.id);
|
|
569
|
-
let session = existingSession ?? createVoiceSessionRecord(options.id);
|
|
570
|
-
let shouldFireOnSession = !existingSession;
|
|
571
|
-
if (existingSession?.status === "reconnecting") {
|
|
572
|
-
const nextAttempts = existingSession.reconnect.attempts + 1;
|
|
573
|
-
const reconnectExpired = existingSession.reconnect.lastDisconnectAt !== undefined && Date.now() - existingSession.reconnect.lastDisconnectAt > reconnect.timeout;
|
|
574
|
-
const tooManyAttempts = nextAttempts > reconnect.maxAttempts;
|
|
575
|
-
if (reconnect.strategy === "fail" && (reconnectExpired || tooManyAttempts)) {
|
|
576
|
-
await api.fail(new Error("Voice session reconnect policy exhausted"));
|
|
577
|
-
return;
|
|
578
|
-
}
|
|
579
|
-
if (reconnect.strategy === "restart" && (reconnectExpired || tooManyAttempts)) {
|
|
580
|
-
session = resetVoiceSessionRecord(options.id, existingSession);
|
|
581
|
-
shouldFireOnSession = true;
|
|
582
|
-
} else {
|
|
583
|
-
session = {
|
|
584
|
-
...existingSession,
|
|
585
|
-
reconnect: {
|
|
586
|
-
...existingSession.reconnect,
|
|
587
|
-
attempts: nextAttempts
|
|
588
|
-
},
|
|
589
|
-
status: "active"
|
|
590
|
-
};
|
|
591
|
-
}
|
|
592
|
-
}
|
|
593
|
-
await options.store.set(options.id, session);
|
|
2876
|
+
}
|
|
2877
|
+
if (session.status === "completed") {
|
|
594
2878
|
await send({
|
|
595
2879
|
sessionId: options.id,
|
|
596
|
-
|
|
597
|
-
type: "session"
|
|
598
|
-
});
|
|
599
|
-
if (shouldFireOnSession) {
|
|
600
|
-
await options.route.onSession?.({
|
|
601
|
-
api,
|
|
602
|
-
context: options.context,
|
|
603
|
-
session
|
|
604
|
-
});
|
|
605
|
-
}
|
|
606
|
-
if (session.status === "completed") {
|
|
607
|
-
await send({
|
|
608
|
-
sessionId: options.id,
|
|
609
|
-
type: "complete"
|
|
610
|
-
});
|
|
611
|
-
return;
|
|
612
|
-
}
|
|
613
|
-
await ensureAdapter();
|
|
614
|
-
},
|
|
615
|
-
disconnect: async (event) => {
|
|
616
|
-
clearSilenceTimer();
|
|
617
|
-
await closeAdapter(event?.reason);
|
|
618
|
-
if (reconnect.strategy === "fail") {
|
|
619
|
-
await api.fail(new Error(event?.reason ?? "Voice socket disconnected"));
|
|
620
|
-
return;
|
|
621
|
-
}
|
|
622
|
-
await writeSession((session) => {
|
|
623
|
-
if (session.status === "completed" || session.status === "failed") {
|
|
624
|
-
return;
|
|
625
|
-
}
|
|
626
|
-
session.lastActivityAt = Date.now();
|
|
627
|
-
session.reconnect.lastDisconnectAt = Date.now();
|
|
628
|
-
session.status = "reconnecting";
|
|
629
|
-
});
|
|
630
|
-
speechDetected = false;
|
|
631
|
-
},
|
|
632
|
-
fail: async (error) => {
|
|
633
|
-
clearSilenceTimer();
|
|
634
|
-
const session = await writeSession((currentSession) => {
|
|
635
|
-
currentSession.lastActivityAt = Date.now();
|
|
636
|
-
currentSession.status = "failed";
|
|
637
|
-
});
|
|
638
|
-
const resolvedError = toError(error);
|
|
639
|
-
await send({
|
|
640
|
-
message: resolvedError.message,
|
|
641
|
-
recoverable: false,
|
|
642
|
-
type: "error"
|
|
643
|
-
});
|
|
644
|
-
await closeAdapter("failed");
|
|
645
|
-
speechDetected = false;
|
|
646
|
-
await options.route.onError?.({
|
|
647
|
-
api,
|
|
648
|
-
context: options.context,
|
|
649
|
-
error: resolvedError,
|
|
650
|
-
session,
|
|
651
|
-
sessionId: options.id
|
|
2880
|
+
type: "complete"
|
|
652
2881
|
});
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
2882
|
+
return;
|
|
2883
|
+
}
|
|
2884
|
+
resumePendingTurnCommit(session);
|
|
2885
|
+
await ensureAdapter();
|
|
2886
|
+
warmTTSSession();
|
|
2887
|
+
};
|
|
2888
|
+
const disconnectInternal = async (event) => {
|
|
2889
|
+
clearSilenceTimer();
|
|
2890
|
+
await closeTTSSession(event?.reason);
|
|
2891
|
+
await closeAdapter(event?.reason);
|
|
2892
|
+
rewindFallbackTurnAudio();
|
|
2893
|
+
if (reconnect.strategy === "fail") {
|
|
2894
|
+
await failInternal(new Error(event?.reason ?? "Voice socket disconnected"));
|
|
2895
|
+
return;
|
|
2896
|
+
}
|
|
2897
|
+
await writeSession((session) => {
|
|
656
2898
|
if (session.status === "completed" || session.status === "failed") {
|
|
657
2899
|
return;
|
|
658
2900
|
}
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
2901
|
+
session.lastActivityAt = Date.now();
|
|
2902
|
+
session.reconnect.lastDisconnectAt = Date.now();
|
|
2903
|
+
session.status = "reconnecting";
|
|
2904
|
+
});
|
|
2905
|
+
speechDetected = false;
|
|
2906
|
+
};
|
|
2907
|
+
const receiveAudioInternal = async (audio) => {
|
|
2908
|
+
const session = await readSession();
|
|
2909
|
+
if (session.status === "completed" || session.status === "failed") {
|
|
2910
|
+
return;
|
|
2911
|
+
}
|
|
2912
|
+
const adapter = await ensureAdapter();
|
|
2913
|
+
const conditionedAudio = conditionAudioChunk(audio, options.audioConditioning);
|
|
2914
|
+
const audioLevel = measureAudioLevel(conditionedAudio);
|
|
2915
|
+
const shouldStoreAudio = speechDetected || audioLevel >= turnDetection.speechThreshold;
|
|
2916
|
+
await writeSession((currentSession) => {
|
|
2917
|
+
currentSession.currentTurn.lastAudioAt = Date.now();
|
|
2918
|
+
currentSession.lastActivityAt = Date.now();
|
|
2919
|
+
currentSession.status = "active";
|
|
666
2920
|
if (audioLevel >= turnDetection.speechThreshold) {
|
|
667
|
-
|
|
2921
|
+
currentSession.currentTurn.lastSpeechAt = Date.now();
|
|
2922
|
+
currentSession.currentTurn.silenceStartedAt = undefined;
|
|
2923
|
+
} else if (speechDetected && currentSession.currentTurn.silenceStartedAt === undefined) {
|
|
2924
|
+
currentSession.currentTurn.silenceStartedAt = Date.now();
|
|
2925
|
+
}
|
|
2926
|
+
});
|
|
2927
|
+
if (shouldStoreAudio) {
|
|
2928
|
+
pushTurnAudio(conditionedAudio);
|
|
2929
|
+
}
|
|
2930
|
+
if (audioLevel >= turnDetection.speechThreshold) {
|
|
2931
|
+
speechDetected = true;
|
|
2932
|
+
clearSilenceTimer();
|
|
2933
|
+
} else if (speechDetected) {
|
|
2934
|
+
const currentSession = await readSession();
|
|
2935
|
+
const hasTurnText = Boolean(buildTurnText(currentSession.currentTurn.transcripts, currentSession.currentTurn.partialText, {
|
|
2936
|
+
partialEndedAtMs: currentSession.currentTurn.partialEndedAt,
|
|
2937
|
+
partialStartedAtMs: currentSession.currentTurn.partialStartedAt
|
|
2938
|
+
}));
|
|
2939
|
+
if (hasTurnText) {
|
|
2940
|
+
scheduleSilenceCommit(turnDetection.silenceMs, false);
|
|
2941
|
+
}
|
|
2942
|
+
}
|
|
2943
|
+
await adapter.send(conditionedAudio);
|
|
2944
|
+
};
|
|
2945
|
+
const api = {
|
|
2946
|
+
id: options.id,
|
|
2947
|
+
close: async (reason) => {
|
|
2948
|
+
await runSerial("api.close", async () => {
|
|
2949
|
+
const session = await writeSession((currentSession) => {
|
|
2950
|
+
if (currentSession.status !== "completed" && currentSession.status !== "failed" && !currentSession.call?.endedAt) {
|
|
2951
|
+
currentSession.lastActivityAt = Date.now();
|
|
2952
|
+
currentSession.status = "completed";
|
|
2953
|
+
pushCallLifecycleEvent(currentSession, {
|
|
2954
|
+
disposition: "closed",
|
|
2955
|
+
reason,
|
|
2956
|
+
type: "end"
|
|
2957
|
+
});
|
|
2958
|
+
}
|
|
2959
|
+
});
|
|
668
2960
|
clearSilenceTimer();
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
if (
|
|
673
|
-
|
|
2961
|
+
await closeTTSSession(reason);
|
|
2962
|
+
await closeAdapter(reason);
|
|
2963
|
+
await Promise.resolve(socket.close(1000, reason));
|
|
2964
|
+
if (session.call?.endedAt && session.call.disposition === "closed") {
|
|
2965
|
+
await options.route.onCallEnd?.({
|
|
2966
|
+
api,
|
|
2967
|
+
context: options.context,
|
|
2968
|
+
disposition: "closed",
|
|
2969
|
+
reason,
|
|
2970
|
+
session
|
|
2971
|
+
});
|
|
674
2972
|
}
|
|
675
|
-
}
|
|
676
|
-
await adapter.send(audio);
|
|
2973
|
+
});
|
|
677
2974
|
},
|
|
678
|
-
|
|
2975
|
+
commitTurn: async (reason = "manual") => runSerial("api.commitTurn", async () => {
|
|
2976
|
+
await commitTurnInternal(reason);
|
|
2977
|
+
}),
|
|
2978
|
+
complete: async (result) => runSerial("api.complete", async () => {
|
|
2979
|
+
await completeInternal(result);
|
|
2980
|
+
}),
|
|
2981
|
+
connect: async (nextSocket) => runSerial("api.connect", async () => {
|
|
2982
|
+
await connectInternal(nextSocket);
|
|
2983
|
+
}),
|
|
2984
|
+
disconnect: async (event) => runSerial("api.disconnect", async () => {
|
|
2985
|
+
await disconnectInternal(event);
|
|
2986
|
+
}),
|
|
2987
|
+
fail: async (error) => runSerial("api.fail", async () => {
|
|
2988
|
+
await failInternal(error);
|
|
2989
|
+
}),
|
|
2990
|
+
escalate: async (input) => runSerial("api.escalate", async () => {
|
|
2991
|
+
await escalateInternal(input);
|
|
2992
|
+
}),
|
|
2993
|
+
markNoAnswer: async (input) => runSerial("api.markNoAnswer", async () => {
|
|
2994
|
+
await markNoAnswerInternal(input);
|
|
2995
|
+
}),
|
|
2996
|
+
markVoicemail: async (input) => runSerial("api.markVoicemail", async () => {
|
|
2997
|
+
await markVoicemailInternal(input);
|
|
2998
|
+
}),
|
|
2999
|
+
receiveAudio: async (audio) => runSerial("api.receiveAudio", async () => {
|
|
3000
|
+
await receiveAudioInternal(audio);
|
|
3001
|
+
}),
|
|
3002
|
+
transfer: async (input) => runSerial("api.transfer", async () => {
|
|
3003
|
+
await transferInternal(input);
|
|
3004
|
+
}),
|
|
3005
|
+
snapshot: async () => runSerial("api.snapshot", async () => readSession())
|
|
679
3006
|
};
|
|
680
3007
|
return api;
|
|
681
3008
|
};
|
|
682
3009
|
|
|
683
3010
|
// src/plugin.ts
|
|
3011
|
+
var resolveQueryScenario = (query) => {
|
|
3012
|
+
if (typeof query?.scenarioId === "string" && query.scenarioId.trim()) {
|
|
3013
|
+
return query.scenarioId.trim();
|
|
3014
|
+
}
|
|
3015
|
+
if (typeof query?.mode === "string" && query.mode.trim()) {
|
|
3016
|
+
return query.mode.trim();
|
|
3017
|
+
}
|
|
3018
|
+
return null;
|
|
3019
|
+
};
|
|
684
3020
|
var HTMX_BOOTSTRAP_DIST_CANDIDATES = [
|
|
685
3021
|
resolve(import.meta.dir, "client", "htmxBootstrap.js"),
|
|
686
3022
|
resolve(import.meta.dir, "..", "dist", "client", "htmxBootstrap.js")
|
|
@@ -727,6 +3063,21 @@ ${log}` : ""}`);
|
|
|
727
3063
|
};
|
|
728
3064
|
})();
|
|
729
3065
|
var isArrayBufferView = (value) => typeof value === "object" && value !== null && ArrayBuffer.isView(value);
|
|
3066
|
+
var resolveSTTFallbackConfig = (config) => {
|
|
3067
|
+
if (!config) {
|
|
3068
|
+
return;
|
|
3069
|
+
}
|
|
3070
|
+
return {
|
|
3071
|
+
adapter: config.adapter,
|
|
3072
|
+
completionTimeoutMs: config.completionTimeoutMs ?? 2500,
|
|
3073
|
+
confidenceThreshold: config.confidenceThreshold ?? 0.6,
|
|
3074
|
+
maxAttemptsPerTurn: config.maxAttemptsPerTurn ?? 1,
|
|
3075
|
+
minTextLength: config.minTextLength ?? 2,
|
|
3076
|
+
replayWindowMs: config.replayWindowMs ?? 8000,
|
|
3077
|
+
settleMs: config.settleMs ?? 220,
|
|
3078
|
+
trigger: config.trigger ?? "empty-or-low-confidence"
|
|
3079
|
+
};
|
|
3080
|
+
};
|
|
730
3081
|
var isVoiceClientMessage = (value) => {
|
|
731
3082
|
if (!value || typeof value !== "object" || !("type" in value)) {
|
|
732
3083
|
return false;
|
|
@@ -739,7 +3090,7 @@ var isVoiceClientMessage = (value) => {
|
|
|
739
3090
|
case "ping":
|
|
740
3091
|
return true;
|
|
741
3092
|
case "start":
|
|
742
|
-
return !("sessionId" in value) || typeof value.sessionId === "string";
|
|
3093
|
+
return (!("sessionId" in value) || typeof value.sessionId === "string") && (!("scenarioId" in value) || typeof value.scenarioId === "string");
|
|
743
3094
|
default:
|
|
744
3095
|
return false;
|
|
745
3096
|
}
|
|
@@ -759,14 +3110,16 @@ var parseClientMessage = (raw) => {
|
|
|
759
3110
|
return null;
|
|
760
3111
|
};
|
|
761
3112
|
var resolveSessionId = (runtime, ws) => {
|
|
762
|
-
const existing = runtime.socketSessions.get(ws);
|
|
763
|
-
if (existing) {
|
|
764
|
-
return existing;
|
|
765
|
-
}
|
|
766
3113
|
const query = ws.data && typeof ws.data === "object" && "query" in ws.data ? ws.data.query : undefined;
|
|
767
|
-
const
|
|
768
|
-
|
|
769
|
-
|
|
3114
|
+
const existing = runtime.socketSessions.get(ws);
|
|
3115
|
+
const providedSessionId = typeof query?.sessionId === "string" && query.sessionId.trim() ? query.sessionId.trim() : existing?.sessionId ?? createId();
|
|
3116
|
+
const scenarioId = resolveQueryScenario(query) ?? existing?.scenarioId ?? null;
|
|
3117
|
+
const resolved = {
|
|
3118
|
+
sessionId: providedSessionId,
|
|
3119
|
+
scenarioId
|
|
3120
|
+
};
|
|
3121
|
+
runtime.socketSessions.set(ws, resolved);
|
|
3122
|
+
return resolved;
|
|
770
3123
|
};
|
|
771
3124
|
var toAudioChunk = (raw) => {
|
|
772
3125
|
if (raw instanceof ArrayBuffer) {
|
|
@@ -792,6 +3145,55 @@ var normalizeOnTurn = (handler) => {
|
|
|
792
3145
|
}
|
|
793
3146
|
return handler;
|
|
794
3147
|
};
|
|
3148
|
+
var resolveSessionOptions = (config) => {
|
|
3149
|
+
const preset = resolveVoiceRuntimePreset(config.preset);
|
|
3150
|
+
return {
|
|
3151
|
+
audioConditioning: config.audioConditioning !== undefined ? resolveAudioConditioningConfig(config.audioConditioning) : preset.audioConditioning,
|
|
3152
|
+
costTelemetry: config.costTelemetry,
|
|
3153
|
+
sttFallback: resolveSTTFallbackConfig(config.sttFallback),
|
|
3154
|
+
logger: config.logger,
|
|
3155
|
+
reconnect: {
|
|
3156
|
+
maxAttempts: config.reconnect?.maxAttempts ?? 10,
|
|
3157
|
+
strategy: config.reconnect?.strategy ?? "resume-last-turn",
|
|
3158
|
+
timeout: config.reconnect?.timeout ?? 30000
|
|
3159
|
+
},
|
|
3160
|
+
sttLifecycle: config.sttLifecycle ?? preset.sttLifecycle,
|
|
3161
|
+
turnDetection: resolveTurnDetectionConfig({
|
|
3162
|
+
...preset.turnDetection,
|
|
3163
|
+
...config.turnDetection
|
|
3164
|
+
})
|
|
3165
|
+
};
|
|
3166
|
+
};
|
|
3167
|
+
var normalizePhraseHints = (hints) => (hints ?? []).map((hint) => ({
|
|
3168
|
+
...hint,
|
|
3169
|
+
aliases: hint.aliases?.filter((value) => typeof value === "string" && value.trim().length > 0),
|
|
3170
|
+
text: hint.text.trim()
|
|
3171
|
+
})).filter((hint) => hint.text.length > 0);
|
|
3172
|
+
var normalizeLexicon = (entries) => (entries ?? []).map((entry) => ({
|
|
3173
|
+
...entry,
|
|
3174
|
+
aliases: entry.aliases?.filter((value) => typeof value === "string" && value.trim().length > 0),
|
|
3175
|
+
language: typeof entry.language === "string" && entry.language.trim().length > 0 ? entry.language.trim() : undefined,
|
|
3176
|
+
pronunciation: typeof entry.pronunciation === "string" && entry.pronunciation.trim().length > 0 ? entry.pronunciation.trim() : undefined,
|
|
3177
|
+
text: entry.text.trim()
|
|
3178
|
+
})).filter((entry) => entry.text.length > 0);
|
|
3179
|
+
var resolvePhraseHints = async (config, input) => {
|
|
3180
|
+
if (!config.phraseHints) {
|
|
3181
|
+
return [];
|
|
3182
|
+
}
|
|
3183
|
+
if (typeof config.phraseHints === "function") {
|
|
3184
|
+
return normalizePhraseHints(await config.phraseHints(input));
|
|
3185
|
+
}
|
|
3186
|
+
return normalizePhraseHints(config.phraseHints);
|
|
3187
|
+
};
|
|
3188
|
+
var resolveLexicon = async (config, input) => {
|
|
3189
|
+
if (!config.lexicon) {
|
|
3190
|
+
return [];
|
|
3191
|
+
}
|
|
3192
|
+
if (typeof config.lexicon === "function") {
|
|
3193
|
+
return normalizeLexicon(await config.lexicon(input));
|
|
3194
|
+
}
|
|
3195
|
+
return normalizeLexicon(config.lexicon);
|
|
3196
|
+
};
|
|
795
3197
|
var voice = (config) => {
|
|
796
3198
|
const runtime = {
|
|
797
3199
|
activeSessions: new Map,
|
|
@@ -799,11 +3201,79 @@ var voice = (config) => {
|
|
|
799
3201
|
socketSessions: new WeakMap
|
|
800
3202
|
};
|
|
801
3203
|
const onTurn = normalizeOnTurn(config.onTurn);
|
|
3204
|
+
const sessionOptions = resolveSessionOptions(config);
|
|
802
3205
|
const htmxOptions = config.htmx && typeof config.htmx === "object" ? config.htmx : undefined;
|
|
803
3206
|
const htmxRoute = htmxOptions?.route ?? `${config.path}/htmx/session`;
|
|
804
3207
|
const htmxBootstrapRoute = htmxOptions?.bootstrapRoute ?? `${config.path}/htmx/bootstrap.js`;
|
|
805
3208
|
const htmxRenderers = resolveVoiceHTMXRenderers(config.htmx && config.htmx !== true ? config.htmx : undefined);
|
|
806
3209
|
const htmxTargets = resolveVoiceHTMXTargets(htmxOptions?.targets);
|
|
3210
|
+
const createManagedSession = async (ws, sessionId, scenarioId) => {
|
|
3211
|
+
const context = ws.data;
|
|
3212
|
+
const phraseHints = await resolvePhraseHints(config, {
|
|
3213
|
+
context,
|
|
3214
|
+
scenarioId,
|
|
3215
|
+
sessionId
|
|
3216
|
+
});
|
|
3217
|
+
const lexicon = await resolveLexicon(config, {
|
|
3218
|
+
context,
|
|
3219
|
+
scenarioId,
|
|
3220
|
+
sessionId
|
|
3221
|
+
});
|
|
3222
|
+
return createVoiceSession({
|
|
3223
|
+
audioConditioning: sessionOptions.audioConditioning,
|
|
3224
|
+
context,
|
|
3225
|
+
id: sessionId,
|
|
3226
|
+
languageStrategy: config.languageStrategy,
|
|
3227
|
+
lexicon,
|
|
3228
|
+
logger: sessionOptions.logger,
|
|
3229
|
+
phraseHints,
|
|
3230
|
+
reconnect: sessionOptions.reconnect,
|
|
3231
|
+
route: {
|
|
3232
|
+
correctTurn: config.correctTurn,
|
|
3233
|
+
onCallEnd: async (input) => {
|
|
3234
|
+
let hookError;
|
|
3235
|
+
try {
|
|
3236
|
+
await config.onCallEnd?.(input);
|
|
3237
|
+
} catch (error) {
|
|
3238
|
+
hookError = error;
|
|
3239
|
+
}
|
|
3240
|
+
try {
|
|
3241
|
+
await recordVoiceRuntimeOps({
|
|
3242
|
+
api: input.api,
|
|
3243
|
+
config: config.ops,
|
|
3244
|
+
context: input.context,
|
|
3245
|
+
disposition: input.disposition,
|
|
3246
|
+
metadata: input.metadata,
|
|
3247
|
+
reason: input.reason,
|
|
3248
|
+
session: input.session,
|
|
3249
|
+
target: input.target
|
|
3250
|
+
});
|
|
3251
|
+
} finally {
|
|
3252
|
+
if (hookError) {
|
|
3253
|
+
throw hookError;
|
|
3254
|
+
}
|
|
3255
|
+
}
|
|
3256
|
+
},
|
|
3257
|
+
onCallStart: config.onCallStart,
|
|
3258
|
+
onComplete: config.onComplete,
|
|
3259
|
+
onEscalation: config.onEscalation,
|
|
3260
|
+
onError: config.onError,
|
|
3261
|
+
onNoAnswer: config.onNoAnswer,
|
|
3262
|
+
onSession: config.onSession,
|
|
3263
|
+
onTransfer: config.onTransfer,
|
|
3264
|
+
onTurn,
|
|
3265
|
+
onVoicemail: config.onVoicemail
|
|
3266
|
+
},
|
|
3267
|
+
scenarioId,
|
|
3268
|
+
socket: createSocketAdapter(ws),
|
|
3269
|
+
store: config.session,
|
|
3270
|
+
stt: config.stt,
|
|
3271
|
+
sttFallback: sessionOptions.sttFallback,
|
|
3272
|
+
sttLifecycle: sessionOptions.sttLifecycle,
|
|
3273
|
+
tts: config.tts,
|
|
3274
|
+
turnDetection: sessionOptions.turnDetection
|
|
3275
|
+
});
|
|
3276
|
+
};
|
|
807
3277
|
const htmxRoutes = () => {
|
|
808
3278
|
if (!config.htmx) {
|
|
809
3279
|
return new Elysia;
|
|
@@ -833,12 +3303,12 @@ var voice = (config) => {
|
|
|
833
3303
|
};
|
|
834
3304
|
return new Elysia({ name: "absolutejs-voice" }).ws(config.path, {
|
|
835
3305
|
close: async (ws, code, reason) => {
|
|
836
|
-
const
|
|
837
|
-
if (!
|
|
3306
|
+
const socketState = runtime.socketSessions.get(ws);
|
|
3307
|
+
if (!socketState) {
|
|
838
3308
|
return;
|
|
839
3309
|
}
|
|
840
|
-
const session = runtime.activeSessions.get(sessionId);
|
|
841
|
-
runtime.activeSessions.delete(sessionId);
|
|
3310
|
+
const session = runtime.activeSessions.get(socketState.sessionId);
|
|
3311
|
+
runtime.activeSessions.delete(socketState.sessionId);
|
|
842
3312
|
if (session) {
|
|
843
3313
|
await session.disconnect({
|
|
844
3314
|
code,
|
|
@@ -849,8 +3319,8 @@ var voice = (config) => {
|
|
|
849
3319
|
}
|
|
850
3320
|
},
|
|
851
3321
|
message: async (ws, raw) => {
|
|
852
|
-
const
|
|
853
|
-
const current = runtime.activeSessions.get(sessionId);
|
|
3322
|
+
const sessionState = resolveSessionId(runtime, ws);
|
|
3323
|
+
const current = runtime.activeSessions.get(sessionState.sessionId);
|
|
854
3324
|
const message = parseClientMessage(raw);
|
|
855
3325
|
if (message) {
|
|
856
3326
|
if (message.type === "ping") {
|
|
@@ -861,10 +3331,27 @@ var voice = (config) => {
|
|
|
861
3331
|
}
|
|
862
3332
|
if (message.type === "close" && current) {
|
|
863
3333
|
await current.close(message.reason);
|
|
864
|
-
runtime.activeSessions.delete(sessionId);
|
|
3334
|
+
runtime.activeSessions.delete(sessionState.sessionId);
|
|
865
3335
|
}
|
|
866
|
-
if (message.type === "start" && message.sessionId && message.sessionId !== sessionId) {
|
|
867
|
-
runtime.
|
|
3336
|
+
if (message.type === "start" && message.sessionId && message.sessionId !== sessionState.sessionId) {
|
|
3337
|
+
const currentSession = runtime.activeSessions.get(sessionState.sessionId);
|
|
3338
|
+
if (currentSession) {
|
|
3339
|
+
await currentSession.close("session-switch");
|
|
3340
|
+
runtime.activeSessions.delete(sessionState.sessionId);
|
|
3341
|
+
}
|
|
3342
|
+
sessionState.sessionId = message.sessionId;
|
|
3343
|
+
runtime.socketSessions.set(ws, {
|
|
3344
|
+
...sessionState,
|
|
3345
|
+
sessionId: message.sessionId,
|
|
3346
|
+
scenarioId: sessionState.scenarioId
|
|
3347
|
+
});
|
|
3348
|
+
}
|
|
3349
|
+
if (message.type === "start" && message.scenarioId) {
|
|
3350
|
+
sessionState.scenarioId = message.scenarioId;
|
|
3351
|
+
runtime.socketSessions.set(ws, {
|
|
3352
|
+
...sessionState,
|
|
3353
|
+
scenarioId: message.scenarioId
|
|
3354
|
+
});
|
|
868
3355
|
}
|
|
869
3356
|
return;
|
|
870
3357
|
}
|
|
@@ -872,70 +3359,191 @@ var voice = (config) => {
|
|
|
872
3359
|
if (!audio) {
|
|
873
3360
|
return;
|
|
874
3361
|
}
|
|
875
|
-
const session = current ??
|
|
876
|
-
context: ws.data,
|
|
877
|
-
id: sessionId,
|
|
878
|
-
logger: config.logger,
|
|
879
|
-
reconnect: {
|
|
880
|
-
maxAttempts: config.reconnect?.maxAttempts ?? 10,
|
|
881
|
-
strategy: config.reconnect?.strategy ?? "resume-last-turn",
|
|
882
|
-
timeout: config.reconnect?.timeout ?? 30000
|
|
883
|
-
},
|
|
884
|
-
route: {
|
|
885
|
-
onComplete: config.onComplete,
|
|
886
|
-
onError: config.onError,
|
|
887
|
-
onSession: config.onSession,
|
|
888
|
-
onTurn
|
|
889
|
-
},
|
|
890
|
-
socket: createSocketAdapter(ws),
|
|
891
|
-
store: config.session,
|
|
892
|
-
stt: config.stt,
|
|
893
|
-
turnDetection: {
|
|
894
|
-
silenceMs: config.turnDetection?.silenceMs ?? 700,
|
|
895
|
-
speechThreshold: config.turnDetection?.speechThreshold ?? 0.015
|
|
896
|
-
}
|
|
897
|
-
});
|
|
3362
|
+
const session = current ?? await createManagedSession(ws, sessionState.sessionId, sessionState.scenarioId ?? undefined);
|
|
898
3363
|
if (!current) {
|
|
899
|
-
runtime.activeSessions.set(sessionId, session);
|
|
3364
|
+
runtime.activeSessions.set(sessionState.sessionId, session);
|
|
900
3365
|
await session.connect(createSocketAdapter(ws));
|
|
901
3366
|
}
|
|
902
3367
|
await session.receiveAudio(audio);
|
|
903
3368
|
},
|
|
904
3369
|
open: async (ws) => {
|
|
905
|
-
const
|
|
906
|
-
const existing = runtime.activeSessions.get(sessionId);
|
|
3370
|
+
const sessionState = resolveSessionId(runtime, ws);
|
|
3371
|
+
const existing = runtime.activeSessions.get(sessionState.sessionId);
|
|
907
3372
|
if (existing) {
|
|
908
3373
|
await existing.close("superseded");
|
|
909
|
-
runtime.activeSessions.delete(sessionId);
|
|
910
|
-
}
|
|
911
|
-
const session =
|
|
912
|
-
|
|
913
|
-
id: sessionId,
|
|
914
|
-
logger: config.logger,
|
|
915
|
-
reconnect: {
|
|
916
|
-
maxAttempts: config.reconnect?.maxAttempts ?? 10,
|
|
917
|
-
strategy: config.reconnect?.strategy ?? "resume-last-turn",
|
|
918
|
-
timeout: config.reconnect?.timeout ?? 30000
|
|
919
|
-
},
|
|
920
|
-
route: {
|
|
921
|
-
onComplete: config.onComplete,
|
|
922
|
-
onError: config.onError,
|
|
923
|
-
onSession: config.onSession,
|
|
924
|
-
onTurn
|
|
925
|
-
},
|
|
926
|
-
socket: createSocketAdapter(ws),
|
|
927
|
-
store: config.session,
|
|
928
|
-
stt: config.stt,
|
|
929
|
-
turnDetection: {
|
|
930
|
-
silenceMs: config.turnDetection?.silenceMs ?? 700,
|
|
931
|
-
speechThreshold: config.turnDetection?.speechThreshold ?? 0.015
|
|
932
|
-
}
|
|
933
|
-
});
|
|
934
|
-
runtime.activeSessions.set(sessionId, session);
|
|
3374
|
+
runtime.activeSessions.delete(sessionState.sessionId);
|
|
3375
|
+
}
|
|
3376
|
+
const session = await createManagedSession(ws, sessionState.sessionId, sessionState.scenarioId ?? undefined);
|
|
3377
|
+
runtime.activeSessions.set(sessionState.sessionId, session);
|
|
935
3378
|
await session.connect(createSocketAdapter(ws));
|
|
936
3379
|
}
|
|
937
3380
|
}).use(htmxRoutes());
|
|
938
3381
|
};
|
|
3382
|
+
// src/fileStore.ts
|
|
3383
|
+
import { mkdir, readFile, readdir, rename, rm, writeFile } from "fs/promises";
|
|
3384
|
+
import { join } from "path";
|
|
3385
|
+
var listJsonFiles = async (directory) => {
|
|
3386
|
+
try {
|
|
3387
|
+
const entries = await readdir(directory, {
|
|
3388
|
+
withFileTypes: true
|
|
3389
|
+
});
|
|
3390
|
+
return entries.filter((entry) => entry.isFile() && entry.name.endsWith(".json")).map((entry) => join(directory, entry.name));
|
|
3391
|
+
} catch (error) {
|
|
3392
|
+
if (error.code === "ENOENT") {
|
|
3393
|
+
return [];
|
|
3394
|
+
}
|
|
3395
|
+
throw error;
|
|
3396
|
+
}
|
|
3397
|
+
};
|
|
3398
|
+
var encodeStoreId = (id) => `${encodeURIComponent(id)}.json`;
|
|
3399
|
+
var resolveFilePath = (directory, id) => join(directory, encodeStoreId(id));
|
|
3400
|
+
var readJsonFile = async (path) => JSON.parse(await readFile(path, "utf8"));
|
|
3401
|
+
var writeJsonFile = async (path, value, options) => {
|
|
3402
|
+
await mkdir(options.directory, {
|
|
3403
|
+
recursive: true
|
|
3404
|
+
});
|
|
3405
|
+
const tempPath = `${path}.${crypto.randomUUID()}.tmp`;
|
|
3406
|
+
await writeFile(tempPath, JSON.stringify(value, null, options.pretty === false ? undefined : 2));
|
|
3407
|
+
await rename(tempPath, path);
|
|
3408
|
+
};
|
|
3409
|
+
var createVoiceFileSessionStore = (options) => {
|
|
3410
|
+
const get = async (id) => {
|
|
3411
|
+
const path = resolveFilePath(options.directory, id);
|
|
3412
|
+
try {
|
|
3413
|
+
return await readJsonFile(path);
|
|
3414
|
+
} catch (error) {
|
|
3415
|
+
if (error.code === "ENOENT") {
|
|
3416
|
+
return;
|
|
3417
|
+
}
|
|
3418
|
+
throw error;
|
|
3419
|
+
}
|
|
3420
|
+
};
|
|
3421
|
+
const getOrCreate = async (id) => {
|
|
3422
|
+
const existing = await get(id);
|
|
3423
|
+
if (existing) {
|
|
3424
|
+
return existing;
|
|
3425
|
+
}
|
|
3426
|
+
const session = createVoiceSessionRecord(id);
|
|
3427
|
+
await writeJsonFile(resolveFilePath(options.directory, id), session, options);
|
|
3428
|
+
return session;
|
|
3429
|
+
};
|
|
3430
|
+
const set = async (id, value) => {
|
|
3431
|
+
await writeJsonFile(resolveFilePath(options.directory, id), value, options);
|
|
3432
|
+
};
|
|
3433
|
+
const list = async () => {
|
|
3434
|
+
const files = await listJsonFiles(options.directory);
|
|
3435
|
+
const sessions = await Promise.all(files.map((file) => readJsonFile(file)));
|
|
3436
|
+
return sessions.map((session) => toVoiceSessionSummary(session)).sort((first, second) => (second.lastActivityAt ?? second.createdAt) - (first.lastActivityAt ?? first.createdAt));
|
|
3437
|
+
};
|
|
3438
|
+
const remove = async (id) => {
|
|
3439
|
+
await rm(resolveFilePath(options.directory, id), {
|
|
3440
|
+
force: true
|
|
3441
|
+
});
|
|
3442
|
+
};
|
|
3443
|
+
return { get, getOrCreate, list, remove, set };
|
|
3444
|
+
};
|
|
3445
|
+
var createVoiceFileReviewStore = (options) => {
|
|
3446
|
+
const get = async (id) => {
|
|
3447
|
+
const path = resolveFilePath(options.directory, id);
|
|
3448
|
+
try {
|
|
3449
|
+
return await readJsonFile(path);
|
|
3450
|
+
} catch (error) {
|
|
3451
|
+
if (error.code === "ENOENT") {
|
|
3452
|
+
return;
|
|
3453
|
+
}
|
|
3454
|
+
throw error;
|
|
3455
|
+
}
|
|
3456
|
+
};
|
|
3457
|
+
const list = async () => {
|
|
3458
|
+
const files = await listJsonFiles(options.directory);
|
|
3459
|
+
const reviews = await Promise.all(files.map((file) => readJsonFile(file)));
|
|
3460
|
+
return reviews.sort((left, right) => (right.generatedAt ?? 0) - (left.generatedAt ?? 0));
|
|
3461
|
+
};
|
|
3462
|
+
const set = async (id, artifact) => {
|
|
3463
|
+
await writeJsonFile(resolveFilePath(options.directory, id), withVoiceCallReviewId(id, artifact), options);
|
|
3464
|
+
};
|
|
3465
|
+
const remove = async (id) => {
|
|
3466
|
+
await rm(resolveFilePath(options.directory, id), {
|
|
3467
|
+
force: true
|
|
3468
|
+
});
|
|
3469
|
+
};
|
|
3470
|
+
return { get, list, remove, set };
|
|
3471
|
+
};
|
|
3472
|
+
var createVoiceFileTaskStore = (options) => {
|
|
3473
|
+
const get = async (id) => {
|
|
3474
|
+
const path = resolveFilePath(options.directory, id);
|
|
3475
|
+
try {
|
|
3476
|
+
return await readJsonFile(path);
|
|
3477
|
+
} catch (error) {
|
|
3478
|
+
if (error.code === "ENOENT") {
|
|
3479
|
+
return;
|
|
3480
|
+
}
|
|
3481
|
+
throw error;
|
|
3482
|
+
}
|
|
3483
|
+
};
|
|
3484
|
+
const list = async () => {
|
|
3485
|
+
const files = await listJsonFiles(options.directory);
|
|
3486
|
+
const tasks = await Promise.all(files.map((file) => readJsonFile(file)));
|
|
3487
|
+
return tasks.sort((left, right) => right.createdAt - left.createdAt);
|
|
3488
|
+
};
|
|
3489
|
+
const set = async (id, task) => {
|
|
3490
|
+
await writeJsonFile(resolveFilePath(options.directory, id), withVoiceOpsTaskId(id, task), options);
|
|
3491
|
+
};
|
|
3492
|
+
const remove = async (id) => {
|
|
3493
|
+
await rm(resolveFilePath(options.directory, id), {
|
|
3494
|
+
force: true
|
|
3495
|
+
});
|
|
3496
|
+
};
|
|
3497
|
+
return { get, list, remove, set };
|
|
3498
|
+
};
|
|
3499
|
+
var createVoiceFileIntegrationEventStore = (options) => {
|
|
3500
|
+
const get = async (id) => {
|
|
3501
|
+
const path = resolveFilePath(options.directory, id);
|
|
3502
|
+
try {
|
|
3503
|
+
return await readJsonFile(path);
|
|
3504
|
+
} catch (error) {
|
|
3505
|
+
if (error.code === "ENOENT") {
|
|
3506
|
+
return;
|
|
3507
|
+
}
|
|
3508
|
+
throw error;
|
|
3509
|
+
}
|
|
3510
|
+
};
|
|
3511
|
+
const list = async () => {
|
|
3512
|
+
const files = await listJsonFiles(options.directory);
|
|
3513
|
+
const events = await Promise.all(files.map((file) => readJsonFile(file)));
|
|
3514
|
+
return events.sort((left, right) => right.createdAt - left.createdAt);
|
|
3515
|
+
};
|
|
3516
|
+
const set = async (id, event) => {
|
|
3517
|
+
await writeJsonFile(resolveFilePath(options.directory, id), withVoiceIntegrationEventId(id, event), options);
|
|
3518
|
+
};
|
|
3519
|
+
const remove = async (id) => {
|
|
3520
|
+
await rm(resolveFilePath(options.directory, id), {
|
|
3521
|
+
force: true
|
|
3522
|
+
});
|
|
3523
|
+
};
|
|
3524
|
+
return { get, list, remove, set };
|
|
3525
|
+
};
|
|
3526
|
+
var createVoiceFileRuntimeStorage = (options) => ({
|
|
3527
|
+
events: createVoiceFileIntegrationEventStore({
|
|
3528
|
+
...options,
|
|
3529
|
+
directory: join(options.directory, "events")
|
|
3530
|
+
}),
|
|
3531
|
+
reviews: createVoiceFileReviewStore({
|
|
3532
|
+
...options,
|
|
3533
|
+
directory: join(options.directory, "reviews")
|
|
3534
|
+
}),
|
|
3535
|
+
session: createVoiceFileSessionStore({
|
|
3536
|
+
...options,
|
|
3537
|
+
directory: join(options.directory, "sessions")
|
|
3538
|
+
}),
|
|
3539
|
+
tasks: createVoiceFileTaskStore({
|
|
3540
|
+
...options,
|
|
3541
|
+
directory: join(options.directory, "tasks")
|
|
3542
|
+
})
|
|
3543
|
+
});
|
|
3544
|
+
var createStoredVoiceCallReviewArtifact = (id, artifact) => withVoiceCallReviewId(id, artifact);
|
|
3545
|
+
var createStoredVoiceOpsTask = (id, task) => withVoiceOpsTaskId(id, task);
|
|
3546
|
+
var createStoredVoiceIntegrationEvent = (id, event) => withVoiceIntegrationEventId(id, event);
|
|
939
3547
|
// src/memoryStore.ts
|
|
940
3548
|
var createVoiceMemoryStore = () => {
|
|
941
3549
|
const sessions = new Map;
|
|
@@ -957,10 +3565,825 @@ var createVoiceMemoryStore = () => {
|
|
|
957
3565
|
};
|
|
958
3566
|
return { get, getOrCreate, list, remove, set };
|
|
959
3567
|
};
|
|
3568
|
+
// src/correction.ts
|
|
3569
|
+
var escapeRegExp = (value) => value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
3570
|
+
var buildAliasMatcher = (alias) => new RegExp(`(?<![\\p{L}\\p{N}'])${escapeRegExp(alias)}(?![\\p{L}\\p{N}'])`, "giu");
|
|
3571
|
+
var WORD_PATTERN = /[\p{L}\p{N}']+/gu;
|
|
3572
|
+
var normalizeComparableText = (value) => value.toLowerCase().replace(/[^\p{L}\p{N}\s']/gu, " ").replace(/\s+/g, " ").trim();
|
|
3573
|
+
var normalizeDomainTerm = (value) => normalizeComparableText(value);
|
|
3574
|
+
var tokenizeWithIndices = (value) => {
|
|
3575
|
+
const matches = value.matchAll(WORD_PATTERN);
|
|
3576
|
+
const tokens = [];
|
|
3577
|
+
for (const match of matches) {
|
|
3578
|
+
const token = match[0];
|
|
3579
|
+
const start = match.index ?? -1;
|
|
3580
|
+
if (start < 0) {
|
|
3581
|
+
continue;
|
|
3582
|
+
}
|
|
3583
|
+
tokens.push({
|
|
3584
|
+
end: start + token.length,
|
|
3585
|
+
start,
|
|
3586
|
+
text: token
|
|
3587
|
+
});
|
|
3588
|
+
}
|
|
3589
|
+
return tokens;
|
|
3590
|
+
};
|
|
3591
|
+
var levenshteinDistance = (left, right) => {
|
|
3592
|
+
if (left === right) {
|
|
3593
|
+
return 0;
|
|
3594
|
+
}
|
|
3595
|
+
if (left.length === 0) {
|
|
3596
|
+
return right.length;
|
|
3597
|
+
}
|
|
3598
|
+
if (right.length === 0) {
|
|
3599
|
+
return left.length;
|
|
3600
|
+
}
|
|
3601
|
+
const previous = Array.from({ length: right.length + 1 }, (_, index) => index);
|
|
3602
|
+
const current = new Array(right.length + 1);
|
|
3603
|
+
for (let leftIndex = 1;leftIndex <= left.length; leftIndex += 1) {
|
|
3604
|
+
current[0] = leftIndex;
|
|
3605
|
+
for (let rightIndex = 1;rightIndex <= right.length; rightIndex += 1) {
|
|
3606
|
+
const cost = left[leftIndex - 1] === right[rightIndex - 1] ? 0 : 1;
|
|
3607
|
+
current[rightIndex] = Math.min(current[rightIndex - 1] + 1, previous[rightIndex] + 1, previous[rightIndex - 1] + cost);
|
|
3608
|
+
}
|
|
3609
|
+
for (let rightIndex = 0;rightIndex <= right.length; rightIndex += 1) {
|
|
3610
|
+
previous[rightIndex] = current[rightIndex];
|
|
3611
|
+
}
|
|
3612
|
+
}
|
|
3613
|
+
return previous[right.length];
|
|
3614
|
+
};
|
|
3615
|
+
var resolveFuzzyThreshold = (riskTier) => {
|
|
3616
|
+
switch (riskTier) {
|
|
3617
|
+
case "safe":
|
|
3618
|
+
return -1;
|
|
3619
|
+
case "balanced":
|
|
3620
|
+
return 0.14;
|
|
3621
|
+
case "risky":
|
|
3622
|
+
return 0.2;
|
|
3623
|
+
}
|
|
3624
|
+
};
|
|
3625
|
+
var canUseTieredFuzzyAlias = (alias, riskTier) => {
|
|
3626
|
+
if (riskTier === "safe") {
|
|
3627
|
+
return false;
|
|
3628
|
+
}
|
|
3629
|
+
const tokenCount = normalizeComparableText(alias).split(" ").filter((token) => token.length > 0).length;
|
|
3630
|
+
return riskTier === "balanced" ? tokenCount >= 3 : tokenCount >= 2;
|
|
3631
|
+
};
|
|
3632
|
+
var findFuzzyAliasMatch = (text, alias, riskTier) => {
|
|
3633
|
+
const tokens = tokenizeWithIndices(text);
|
|
3634
|
+
const aliasTokens = normalizeComparableText(alias).split(" ").filter((token) => token.length > 0);
|
|
3635
|
+
if (tokens.length === 0 || aliasTokens.length < 2) {
|
|
3636
|
+
return;
|
|
3637
|
+
}
|
|
3638
|
+
const minWindowLength = Math.max(1, aliasTokens.length - 1);
|
|
3639
|
+
const maxWindowLength = Math.min(tokens.length, aliasTokens.length + 1);
|
|
3640
|
+
const normalizedAlias = aliasTokens.join(" ");
|
|
3641
|
+
const normalizedAliasFirstToken = aliasTokens[0] ?? "";
|
|
3642
|
+
let bestMatch;
|
|
3643
|
+
for (let startIndex = 0;startIndex < tokens.length; startIndex += 1) {
|
|
3644
|
+
for (let windowLength = minWindowLength;windowLength <= maxWindowLength; windowLength += 1) {
|
|
3645
|
+
const endIndex = startIndex + windowLength - 1;
|
|
3646
|
+
if (endIndex >= tokens.length) {
|
|
3647
|
+
break;
|
|
3648
|
+
}
|
|
3649
|
+
const windowTokens = tokens.slice(startIndex, endIndex + 1);
|
|
3650
|
+
const normalizedWindow = normalizeComparableText(windowTokens.map((token) => token.text).join(" "));
|
|
3651
|
+
if (!normalizedWindow) {
|
|
3652
|
+
continue;
|
|
3653
|
+
}
|
|
3654
|
+
const [windowFirstToken] = normalizedWindow.split(" ");
|
|
3655
|
+
if (windowFirstToken !== normalizedAliasFirstToken) {
|
|
3656
|
+
continue;
|
|
3657
|
+
}
|
|
3658
|
+
const distance = levenshteinDistance(normalizedWindow, normalizedAlias);
|
|
3659
|
+
const denominator = Math.max(normalizedWindow.length, normalizedAlias.length);
|
|
3660
|
+
const score = denominator > 0 ? distance / denominator : 0;
|
|
3661
|
+
if (score > resolveFuzzyThreshold(riskTier)) {
|
|
3662
|
+
continue;
|
|
3663
|
+
}
|
|
3664
|
+
const candidate = {
|
|
3665
|
+
end: windowTokens[windowTokens.length - 1].end,
|
|
3666
|
+
score,
|
|
3667
|
+
start: windowTokens[0].start
|
|
3668
|
+
};
|
|
3669
|
+
if (!bestMatch || candidate.score < bestMatch.score || candidate.score === bestMatch.score && candidate.end - candidate.start > bestMatch.end - bestMatch.start) {
|
|
3670
|
+
bestMatch = candidate;
|
|
3671
|
+
}
|
|
3672
|
+
}
|
|
3673
|
+
}
|
|
3674
|
+
return bestMatch;
|
|
3675
|
+
};
|
|
3676
|
+
var normalizeHintAliases = (hint) => (hint.aliases ?? []).map((alias) => alias.trim()).filter((alias) => alias.length > 0).sort((left, right) => right.length - left.length);
|
|
3677
|
+
var applyPhraseHintCorrections = (text, phraseHints) => {
|
|
3678
|
+
return applyRiskTieredPhraseHintCorrections(text, phraseHints, {
|
|
3679
|
+
riskTier: "risky"
|
|
3680
|
+
});
|
|
3681
|
+
};
|
|
3682
|
+
var applyRiskTieredPhraseHintCorrections = (text, phraseHints, options = {}) => {
|
|
3683
|
+
const riskTier = options.riskTier ?? "safe";
|
|
3684
|
+
let corrected = text;
|
|
3685
|
+
const matches = [];
|
|
3686
|
+
for (const hint of phraseHints) {
|
|
3687
|
+
for (const alias of normalizeHintAliases(hint)) {
|
|
3688
|
+
const matcher = buildAliasMatcher(alias);
|
|
3689
|
+
if (!matcher.test(corrected)) {
|
|
3690
|
+
if (!canUseTieredFuzzyAlias(alias, riskTier)) {
|
|
3691
|
+
continue;
|
|
3692
|
+
}
|
|
3693
|
+
const fuzzyMatch = findFuzzyAliasMatch(corrected, alias, riskTier);
|
|
3694
|
+
if (!fuzzyMatch) {
|
|
3695
|
+
continue;
|
|
3696
|
+
}
|
|
3697
|
+
corrected = `${corrected.slice(0, fuzzyMatch.start)}${hint.text}${corrected.slice(fuzzyMatch.end)}`;
|
|
3698
|
+
matches.push({
|
|
3699
|
+
alias,
|
|
3700
|
+
hint
|
|
3701
|
+
});
|
|
3702
|
+
break;
|
|
3703
|
+
}
|
|
3704
|
+
corrected = corrected.replace(matcher, hint.text);
|
|
3705
|
+
matches.push({
|
|
3706
|
+
alias,
|
|
3707
|
+
hint
|
|
3708
|
+
});
|
|
3709
|
+
break;
|
|
3710
|
+
}
|
|
3711
|
+
}
|
|
3712
|
+
return {
|
|
3713
|
+
changed: corrected !== text,
|
|
3714
|
+
matches,
|
|
3715
|
+
text: corrected
|
|
3716
|
+
};
|
|
3717
|
+
};
|
|
3718
|
+
var dedupeAliases = (aliases) => {
|
|
3719
|
+
const seen = new Set;
|
|
3720
|
+
const deduped = [];
|
|
3721
|
+
for (const alias of aliases) {
|
|
3722
|
+
const normalized = normalizeDomainTerm(alias);
|
|
3723
|
+
if (!normalized || seen.has(normalized)) {
|
|
3724
|
+
continue;
|
|
3725
|
+
}
|
|
3726
|
+
seen.add(normalized);
|
|
3727
|
+
deduped.push(alias);
|
|
3728
|
+
}
|
|
3729
|
+
return deduped;
|
|
3730
|
+
};
|
|
3731
|
+
var isSafeAlias = (alias) => {
|
|
3732
|
+
const normalized = normalizeDomainTerm(alias);
|
|
3733
|
+
if (normalized.length < 4) {
|
|
3734
|
+
return false;
|
|
3735
|
+
}
|
|
3736
|
+
const tokens = normalized.split(" ").filter((token) => token.length > 0);
|
|
3737
|
+
return tokens.length >= 2 || normalized.length >= 7;
|
|
3738
|
+
};
|
|
3739
|
+
var createDomainPhraseHints = (terms, options = {}) => {
|
|
3740
|
+
const riskTier = options.riskTier ?? "safe";
|
|
3741
|
+
const hints = [];
|
|
3742
|
+
const seen = new Set;
|
|
3743
|
+
for (const term of terms) {
|
|
3744
|
+
const normalizedText = normalizeDomainTerm(term.text);
|
|
3745
|
+
if (!normalizedText || seen.has(normalizedText)) {
|
|
3746
|
+
continue;
|
|
3747
|
+
}
|
|
3748
|
+
const candidateAliases = dedupeAliases(term.aliases ?? []);
|
|
3749
|
+
const aliases = candidateAliases.filter((alias) => {
|
|
3750
|
+
if (riskTier === "risky") {
|
|
3751
|
+
return true;
|
|
3752
|
+
}
|
|
3753
|
+
if (riskTier === "balanced") {
|
|
3754
|
+
return isSafeAlias(alias) || normalizeDomainTerm(alias) === normalizedText;
|
|
3755
|
+
}
|
|
3756
|
+
return isSafeAlias(alias);
|
|
3757
|
+
});
|
|
3758
|
+
hints.push({
|
|
3759
|
+
aliases: aliases.length > 0 ? aliases : undefined,
|
|
3760
|
+
boost: term.boost,
|
|
3761
|
+
metadata: term.metadata,
|
|
3762
|
+
text: term.text
|
|
3763
|
+
});
|
|
3764
|
+
seen.add(normalizedText);
|
|
3765
|
+
}
|
|
3766
|
+
return hints;
|
|
3767
|
+
};
|
|
3768
|
+
var createDomainLexicon = (terms) => {
|
|
3769
|
+
const entries = [];
|
|
3770
|
+
const seen = new Set;
|
|
3771
|
+
for (const term of terms) {
|
|
3772
|
+
const normalizedText = normalizeDomainTerm(term.text);
|
|
3773
|
+
if (!normalizedText || seen.has(normalizedText)) {
|
|
3774
|
+
continue;
|
|
3775
|
+
}
|
|
3776
|
+
entries.push({
|
|
3777
|
+
aliases: dedupeAliases(term.aliases ?? []),
|
|
3778
|
+
language: term.language,
|
|
3779
|
+
metadata: term.metadata,
|
|
3780
|
+
pronunciation: term.pronunciation,
|
|
3781
|
+
text: term.text
|
|
3782
|
+
});
|
|
3783
|
+
seen.add(normalizedText);
|
|
3784
|
+
}
|
|
3785
|
+
return entries;
|
|
3786
|
+
};
|
|
3787
|
+
var averageTranscriptConfidence = (transcripts) => {
|
|
3788
|
+
const confidences = transcripts.map((transcript) => transcript.confidence).filter((value) => typeof value === "number");
|
|
3789
|
+
return confidences.length > 0 ? confidences.reduce((sum, value) => sum + value, 0) / confidences.length : undefined;
|
|
3790
|
+
};
|
|
3791
|
+
var createPhraseHintCorrectionHandler = (options = {}) => {
|
|
3792
|
+
const provider = options.provider ?? "@absolutejs/voice";
|
|
3793
|
+
const reason = options.reason ?? "phrase-hint-correction";
|
|
3794
|
+
return async ({ phraseHints, text }) => {
|
|
3795
|
+
const result = applyPhraseHintCorrections(text, phraseHints);
|
|
3796
|
+
if (!result.changed) {
|
|
3797
|
+
return;
|
|
3798
|
+
}
|
|
3799
|
+
return {
|
|
3800
|
+
metadata: result.matches.length > 0 ? {
|
|
3801
|
+
matchedAliases: result.matches.map((match) => match.alias),
|
|
3802
|
+
matchedHints: result.matches.map((match) => match.hint.text)
|
|
3803
|
+
} : undefined,
|
|
3804
|
+
provider,
|
|
3805
|
+
reason,
|
|
3806
|
+
text: result.text
|
|
3807
|
+
};
|
|
3808
|
+
};
|
|
3809
|
+
};
|
|
3810
|
+
var lexiconToPhraseHints = (lexicon) => lexicon.map((entry) => ({
|
|
3811
|
+
aliases: entry.aliases,
|
|
3812
|
+
metadata: entry.metadata,
|
|
3813
|
+
text: entry.text
|
|
3814
|
+
}));
|
|
3815
|
+
var applyLexiconCorrections = (text, lexicon) => applyPhraseHintCorrections(text, lexiconToPhraseHints(lexicon));
|
|
3816
|
+
var createLexiconCorrectionHandler = (options = {}) => {
|
|
3817
|
+
const provider = options.provider ?? "@absolutejs/voice";
|
|
3818
|
+
const reason = options.reason ?? "lexicon-correction";
|
|
3819
|
+
return async ({ lexicon, text }) => {
|
|
3820
|
+
const result = applyLexiconCorrections(text, lexicon);
|
|
3821
|
+
if (!result.changed) {
|
|
3822
|
+
return;
|
|
3823
|
+
}
|
|
3824
|
+
return {
|
|
3825
|
+
metadata: result.matches.length > 0 ? {
|
|
3826
|
+
matchedAliases: result.matches.map((match) => match.alias),
|
|
3827
|
+
matchedHints: result.matches.map((match) => match.hint.text)
|
|
3828
|
+
} : undefined,
|
|
3829
|
+
provider,
|
|
3830
|
+
reason,
|
|
3831
|
+
text: result.text
|
|
3832
|
+
};
|
|
3833
|
+
};
|
|
3834
|
+
};
|
|
3835
|
+
var createRiskyTurnCorrectionHandler = (options = {}) => {
|
|
3836
|
+
const provider = options.provider ?? "@absolutejs/voice";
|
|
3837
|
+
const reason = options.reason ?? "risky-turn-correction";
|
|
3838
|
+
const riskTier = options.riskTier ?? "balanced";
|
|
3839
|
+
const maxAverageConfidence = options.maxAverageConfidence ?? 0.92;
|
|
3840
|
+
return async ({ lexicon, phraseHints, text, transcripts }) => {
|
|
3841
|
+
const averageConfidence = averageTranscriptConfidence(transcripts);
|
|
3842
|
+
if (averageConfidence !== undefined && averageConfidence > maxAverageConfidence) {
|
|
3843
|
+
return;
|
|
3844
|
+
}
|
|
3845
|
+
const result = applyRiskTieredPhraseHintCorrections(text, [
|
|
3846
|
+
...phraseHints,
|
|
3847
|
+
...lexiconToPhraseHints(lexicon)
|
|
3848
|
+
], { riskTier });
|
|
3849
|
+
if (!result.changed) {
|
|
3850
|
+
return;
|
|
3851
|
+
}
|
|
3852
|
+
return {
|
|
3853
|
+
metadata: {
|
|
3854
|
+
averageConfidence,
|
|
3855
|
+
matchedAliases: result.matches.map((match) => match.alias),
|
|
3856
|
+
matchedHints: result.matches.map((match) => match.hint.text),
|
|
3857
|
+
riskTier
|
|
3858
|
+
},
|
|
3859
|
+
provider,
|
|
3860
|
+
reason,
|
|
3861
|
+
text: result.text
|
|
3862
|
+
};
|
|
3863
|
+
};
|
|
3864
|
+
};
|
|
3865
|
+
|
|
3866
|
+
// src/routing.ts
|
|
3867
|
+
var resolveVoiceSTTRoutingStrategy = (goal = "best") => {
|
|
3868
|
+
if (goal === "low-cost") {
|
|
3869
|
+
return {
|
|
3870
|
+
benchmarkSessionTarget: "deepgram-flux",
|
|
3871
|
+
correctionMode: "none",
|
|
3872
|
+
goal,
|
|
3873
|
+
notes: [
|
|
3874
|
+
"Uses the cheapest in-package path: one primary STT pass with no correction hook.",
|
|
3875
|
+
"Good for baseline throughput and lower post-processing overhead."
|
|
3876
|
+
],
|
|
3877
|
+
preset: "default",
|
|
3878
|
+
sttLifecycle: "turn-scoped"
|
|
3879
|
+
};
|
|
3880
|
+
}
|
|
3881
|
+
return {
|
|
3882
|
+
benchmarkSessionTarget: "deepgram-corrected",
|
|
3883
|
+
correctionMode: "generic",
|
|
3884
|
+
goal,
|
|
3885
|
+
notes: [
|
|
3886
|
+
"Uses the current best in-package path: Deepgram Flux with generic deterministic correction.",
|
|
3887
|
+
"Optimized for accuracy and robustness rather than minimum processing cost."
|
|
3888
|
+
],
|
|
3889
|
+
preset: "reliability",
|
|
3890
|
+
sttLifecycle: "continuous"
|
|
3891
|
+
};
|
|
3892
|
+
};
|
|
3893
|
+
var createVoiceSTTRoutingCorrectionHandler = (mode = "generic") => {
|
|
3894
|
+
if (mode === "none") {
|
|
3895
|
+
return;
|
|
3896
|
+
}
|
|
3897
|
+
if (mode === "risky-turn") {
|
|
3898
|
+
return createRiskyTurnCorrectionHandler();
|
|
3899
|
+
}
|
|
3900
|
+
return createPhraseHintCorrectionHandler();
|
|
3901
|
+
};
|
|
3902
|
+
// src/telephony/twilio.ts
|
|
3903
|
+
import { Buffer as Buffer2 } from "buffer";
|
|
3904
|
+
var TWILIO_MULAW_SAMPLE_RATE = 8000;
|
|
3905
|
+
var VOICE_PCM_SAMPLE_RATE = 16000;
|
|
3906
|
+
var escapeXml = (value) => value.replaceAll("&", "&").replaceAll('"', """).replaceAll("'", "'").replaceAll("<", "<").replaceAll(">", ">");
|
|
3907
|
+
var normalizeOnTurn2 = (handler) => {
|
|
3908
|
+
if (handler.length > 1) {
|
|
3909
|
+
const directHandler = handler;
|
|
3910
|
+
return async ({ context, session, turn, api }) => directHandler(session, turn, api, context);
|
|
3911
|
+
}
|
|
3912
|
+
return handler;
|
|
3913
|
+
};
|
|
3914
|
+
var resolveSTTFallbackConfig2 = (config) => {
|
|
3915
|
+
if (!config) {
|
|
3916
|
+
return;
|
|
3917
|
+
}
|
|
3918
|
+
return {
|
|
3919
|
+
adapter: config.adapter,
|
|
3920
|
+
completionTimeoutMs: config.completionTimeoutMs ?? 2500,
|
|
3921
|
+
confidenceThreshold: config.confidenceThreshold ?? 0.6,
|
|
3922
|
+
maxAttemptsPerTurn: config.maxAttemptsPerTurn ?? 1,
|
|
3923
|
+
minTextLength: config.minTextLength ?? 2,
|
|
3924
|
+
replayWindowMs: config.replayWindowMs ?? 8000,
|
|
3925
|
+
settleMs: config.settleMs ?? 220,
|
|
3926
|
+
trigger: config.trigger ?? "empty-or-low-confidence"
|
|
3927
|
+
};
|
|
3928
|
+
};
|
|
3929
|
+
var normalizePhraseHints2 = (hints) => (hints ?? []).map((hint) => ({
|
|
3930
|
+
...hint,
|
|
3931
|
+
aliases: hint.aliases?.filter((value) => typeof value === "string" && value.trim().length > 0),
|
|
3932
|
+
text: hint.text.trim()
|
|
3933
|
+
})).filter((hint) => hint.text.length > 0);
|
|
3934
|
+
var normalizeLexicon2 = (entries) => (entries ?? []).map((entry) => ({
|
|
3935
|
+
...entry,
|
|
3936
|
+
aliases: entry.aliases?.filter((value) => typeof value === "string" && value.trim().length > 0),
|
|
3937
|
+
language: typeof entry.language === "string" && entry.language.trim().length > 0 ? entry.language.trim() : undefined,
|
|
3938
|
+
pronunciation: typeof entry.pronunciation === "string" && entry.pronunciation.trim().length > 0 ? entry.pronunciation.trim() : undefined,
|
|
3939
|
+
text: entry.text.trim()
|
|
3940
|
+
})).filter((entry) => entry.text.length > 0);
|
|
3941
|
+
var clamp16 = (value) => Math.max(-32768, Math.min(32767, Math.round(value)));
|
|
3942
|
+
var linearResample = (input, inputRate, outputRate) => {
|
|
3943
|
+
if (input.length === 0) {
|
|
3944
|
+
return new Int16Array(0);
|
|
3945
|
+
}
|
|
3946
|
+
if (inputRate === outputRate) {
|
|
3947
|
+
return new Int16Array(input);
|
|
3948
|
+
}
|
|
3949
|
+
const outputLength = Math.max(1, Math.round(input.length * outputRate / inputRate));
|
|
3950
|
+
const output = new Int16Array(outputLength);
|
|
3951
|
+
const ratio = inputRate / outputRate;
|
|
3952
|
+
for (let index = 0;index < outputLength; index += 1) {
|
|
3953
|
+
const sourcePosition = index * ratio;
|
|
3954
|
+
const leftIndex = Math.floor(sourcePosition);
|
|
3955
|
+
const rightIndex = Math.min(input.length - 1, leftIndex + 1);
|
|
3956
|
+
const blend = sourcePosition - leftIndex;
|
|
3957
|
+
const left = input[Math.min(leftIndex, input.length - 1)] ?? 0;
|
|
3958
|
+
const right = input[rightIndex] ?? left;
|
|
3959
|
+
output[index] = clamp16(left + (right - left) * blend);
|
|
3960
|
+
}
|
|
3961
|
+
return output;
|
|
3962
|
+
};
|
|
3963
|
+
var MULAW_BIAS = 132;
|
|
3964
|
+
var MULAW_CLIP = 32635;
|
|
3965
|
+
var encodeMulawSample = (sample) => {
|
|
3966
|
+
let value = clamp16(sample);
|
|
3967
|
+
let sign = 0;
|
|
3968
|
+
if (value < 0) {
|
|
3969
|
+
sign = 128;
|
|
3970
|
+
value = -value;
|
|
3971
|
+
}
|
|
3972
|
+
value = Math.min(MULAW_CLIP, value);
|
|
3973
|
+
value += MULAW_BIAS;
|
|
3974
|
+
let exponent = 7;
|
|
3975
|
+
for (let bit = 16384;(value & bit) === 0 && exponent > 0; bit >>= 1) {
|
|
3976
|
+
exponent -= 1;
|
|
3977
|
+
}
|
|
3978
|
+
const mantissa = value >> exponent + 3 & 15;
|
|
3979
|
+
return ~(sign | exponent << 4 | mantissa) & 255;
|
|
3980
|
+
};
|
|
3981
|
+
var decodeMulawSample = (value) => {
|
|
3982
|
+
const normalized = ~value & 255;
|
|
3983
|
+
const sign = normalized & 128;
|
|
3984
|
+
const exponent = normalized >> 4 & 7;
|
|
3985
|
+
const mantissa = normalized & 15;
|
|
3986
|
+
let sample = (mantissa << 3) + MULAW_BIAS << exponent;
|
|
3987
|
+
sample -= MULAW_BIAS;
|
|
3988
|
+
return sign ? -sample : sample;
|
|
3989
|
+
};
|
|
3990
|
+
var int16ArrayToBytes = (samples) => {
|
|
3991
|
+
const output = new Uint8Array(samples.length * 2);
|
|
3992
|
+
const view = new DataView(output.buffer);
|
|
3993
|
+
for (let index = 0;index < samples.length; index += 1) {
|
|
3994
|
+
view.setInt16(index * 2, samples[index] ?? 0, true);
|
|
3995
|
+
}
|
|
3996
|
+
return output;
|
|
3997
|
+
};
|
|
3998
|
+
var bytesToInt16Array = (bytes) => {
|
|
3999
|
+
const sampleCount = Math.floor(bytes.byteLength / 2);
|
|
4000
|
+
const output = new Int16Array(sampleCount);
|
|
4001
|
+
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
|
|
4002
|
+
for (let index = 0;index < sampleCount; index += 1) {
|
|
4003
|
+
output[index] = view.getInt16(index * 2, true);
|
|
4004
|
+
}
|
|
4005
|
+
return output;
|
|
4006
|
+
};
|
|
4007
|
+
var decodeTwilioMulawBase64 = (payload) => {
|
|
4008
|
+
const bytes = Uint8Array.from(Buffer2.from(payload, "base64"));
|
|
4009
|
+
const samples = new Int16Array(bytes.length);
|
|
4010
|
+
for (let index = 0;index < bytes.length; index += 1) {
|
|
4011
|
+
samples[index] = decodeMulawSample(bytes[index] ?? 0);
|
|
4012
|
+
}
|
|
4013
|
+
return samples;
|
|
4014
|
+
};
|
|
4015
|
+
var encodeTwilioMulawBase64 = (samples) => {
|
|
4016
|
+
const bytes = new Uint8Array(samples.length);
|
|
4017
|
+
for (let index = 0;index < samples.length; index += 1) {
|
|
4018
|
+
bytes[index] = encodeMulawSample(samples[index] ?? 0);
|
|
4019
|
+
}
|
|
4020
|
+
return Buffer2.from(bytes).toString("base64");
|
|
4021
|
+
};
|
|
4022
|
+
var transcodeTwilioInboundPayloadToPCM16 = (payload) => {
|
|
4023
|
+
const narrowband = decodeTwilioMulawBase64(payload);
|
|
4024
|
+
const wideband = linearResample(narrowband, TWILIO_MULAW_SAMPLE_RATE, VOICE_PCM_SAMPLE_RATE);
|
|
4025
|
+
return int16ArrayToBytes(wideband);
|
|
4026
|
+
};
|
|
4027
|
+
var transcodePCMToTwilioOutboundPayload = (chunk, format) => {
|
|
4028
|
+
if (format.container === "raw" && format.encoding === "mulaw" && format.channels === 1 && format.sampleRateHz === TWILIO_MULAW_SAMPLE_RATE) {
|
|
4029
|
+
return Buffer2.from(chunk).toString("base64");
|
|
4030
|
+
}
|
|
4031
|
+
if (format.encoding !== "pcm_s16le") {
|
|
4032
|
+
throw new Error(`Unsupported outbound telephony audio format: ${format.container}/${format.encoding}`);
|
|
4033
|
+
}
|
|
4034
|
+
const pcm = bytesToInt16Array(chunk);
|
|
4035
|
+
const mono = format.channels === 1 ? pcm : new Int16Array(Array.from({ length: Math.floor(pcm.length / 2) }, (_, frameIndex) => {
|
|
4036
|
+
const left = pcm[frameIndex * 2] ?? 0;
|
|
4037
|
+
const right = pcm[frameIndex * 2 + 1] ?? 0;
|
|
4038
|
+
return clamp16((left + right) / 2);
|
|
4039
|
+
}));
|
|
4040
|
+
const telephony = linearResample(mono, format.sampleRateHz, TWILIO_MULAW_SAMPLE_RATE);
|
|
4041
|
+
return encodeTwilioMulawBase64(telephony);
|
|
4042
|
+
};
|
|
4043
|
+
var parseTwilioMessage = (raw) => {
|
|
4044
|
+
if (typeof raw !== "string") {
|
|
4045
|
+
return raw;
|
|
4046
|
+
}
|
|
4047
|
+
return JSON.parse(raw);
|
|
4048
|
+
};
|
|
4049
|
+
var createTwilioSocketAdapter = (socket, getState) => ({
|
|
4050
|
+
close: async (code, reason) => {
|
|
4051
|
+
await Promise.resolve(socket.close(code, reason));
|
|
4052
|
+
},
|
|
4053
|
+
send: async (data) => {
|
|
4054
|
+
if (typeof data !== "string") {
|
|
4055
|
+
return;
|
|
4056
|
+
}
|
|
4057
|
+
const state = getState();
|
|
4058
|
+
const message = JSON.parse(data);
|
|
4059
|
+
state.reviewRecorder?.recordVoiceMessage(message);
|
|
4060
|
+
await Promise.resolve(state.onVoiceMessage?.({
|
|
4061
|
+
callSid: state.callSid ?? undefined,
|
|
4062
|
+
message,
|
|
4063
|
+
sessionId: state.sessionId ?? "",
|
|
4064
|
+
streamSid: state.streamSid ?? undefined
|
|
4065
|
+
}));
|
|
4066
|
+
if (!state.streamSid) {
|
|
4067
|
+
return;
|
|
4068
|
+
}
|
|
4069
|
+
if (message.type === "audio") {
|
|
4070
|
+
const payload = transcodePCMToTwilioOutboundPayload(Uint8Array.from(Buffer2.from(message.chunkBase64, "base64")), message.format);
|
|
4071
|
+
state.hasOutboundAudioSinceLastInbound = true;
|
|
4072
|
+
state.reviewRecorder?.recordTwilioOutbound({
|
|
4073
|
+
bytes: payload.length,
|
|
4074
|
+
event: "media",
|
|
4075
|
+
track: "outbound"
|
|
4076
|
+
});
|
|
4077
|
+
await Promise.resolve(socket.send(JSON.stringify({
|
|
4078
|
+
event: "media",
|
|
4079
|
+
media: {
|
|
4080
|
+
payload
|
|
4081
|
+
},
|
|
4082
|
+
streamSid: state.streamSid
|
|
4083
|
+
})));
|
|
4084
|
+
return;
|
|
4085
|
+
}
|
|
4086
|
+
if (message.type === "assistant" && message.turnId) {
|
|
4087
|
+
state.reviewRecorder?.recordTwilioOutbound({
|
|
4088
|
+
event: "mark",
|
|
4089
|
+
name: `assistant:${message.turnId}`
|
|
4090
|
+
});
|
|
4091
|
+
await Promise.resolve(socket.send(JSON.stringify({
|
|
4092
|
+
event: "mark",
|
|
4093
|
+
mark: {
|
|
4094
|
+
name: `assistant:${message.turnId}`
|
|
4095
|
+
},
|
|
4096
|
+
streamSid: state.streamSid
|
|
4097
|
+
})));
|
|
4098
|
+
}
|
|
4099
|
+
}
|
|
4100
|
+
});
|
|
4101
|
+
var createTwilioVoiceResponse = (options) => {
|
|
4102
|
+
const parameters = Object.entries(options.parameters ?? {}).filter((entry) => entry[1] !== undefined).map(([name, value]) => `<Parameter name="${escapeXml(name)}" value="${escapeXml(String(value))}" />`).join("");
|
|
4103
|
+
return `<?xml version="1.0" encoding="UTF-8"?><Response><Connect><Stream url="${escapeXml(options.streamUrl)}"${options.track ? ` track="${escapeXml(options.track)}"` : ""}${options.streamName ? ` name="${escapeXml(options.streamName)}"` : ""}>${parameters}</Stream></Connect></Response>`;
|
|
4104
|
+
};
|
|
4105
|
+
var createTwilioMediaStreamBridge = (socket, options) => {
|
|
4106
|
+
const runtimePreset = resolveVoiceRuntimePreset(options.preset);
|
|
4107
|
+
const turnDetection = resolveTurnDetectionConfig({
|
|
4108
|
+
...runtimePreset.turnDetection,
|
|
4109
|
+
...options.turnDetection
|
|
4110
|
+
});
|
|
4111
|
+
const audioConditioning = options.audioConditioning !== undefined ? resolveAudioConditioningConfig(options.audioConditioning) : runtimePreset.audioConditioning;
|
|
4112
|
+
const logger = resolveLogger(options.logger);
|
|
4113
|
+
const reconnect = {
|
|
4114
|
+
maxAttempts: options.reconnect?.maxAttempts ?? 10,
|
|
4115
|
+
strategy: options.reconnect?.strategy ?? "resume-last-turn",
|
|
4116
|
+
timeout: options.reconnect?.timeout ?? 30000
|
|
4117
|
+
};
|
|
4118
|
+
const bridgeState = {
|
|
4119
|
+
callSid: null,
|
|
4120
|
+
hasOutboundAudioSinceLastInbound: false,
|
|
4121
|
+
onVoiceMessage: options.onVoiceMessage,
|
|
4122
|
+
reviewRecorder: options.review ? createVoiceCallReviewRecorder({
|
|
4123
|
+
config: options.review.config ?? {
|
|
4124
|
+
preset: options.preset,
|
|
4125
|
+
stt: {
|
|
4126
|
+
kind: options.stt.kind
|
|
4127
|
+
},
|
|
4128
|
+
tts: options.tts ? {
|
|
4129
|
+
kind: options.tts.kind
|
|
4130
|
+
} : undefined,
|
|
4131
|
+
turnDetection
|
|
4132
|
+
},
|
|
4133
|
+
fixtureId: options.review.fixtureId,
|
|
4134
|
+
path: options.review.path,
|
|
4135
|
+
title: options.review.title
|
|
4136
|
+
}) : undefined,
|
|
4137
|
+
scenarioId: options.scenarioId ?? null,
|
|
4138
|
+
sessionId: options.sessionId ?? null,
|
|
4139
|
+
streamSid: null
|
|
4140
|
+
};
|
|
4141
|
+
let sessionHandle = null;
|
|
4142
|
+
let reviewArtifactDelivered = false;
|
|
4143
|
+
const resolveLexicon2 = async () => {
|
|
4144
|
+
if (typeof options.lexicon === "function") {
|
|
4145
|
+
return normalizeLexicon2(await options.lexicon({
|
|
4146
|
+
context: options.context,
|
|
4147
|
+
scenarioId: bridgeState.scenarioId ?? undefined,
|
|
4148
|
+
sessionId: bridgeState.sessionId ?? ""
|
|
4149
|
+
}) ?? []);
|
|
4150
|
+
}
|
|
4151
|
+
return normalizeLexicon2(options.lexicon);
|
|
4152
|
+
};
|
|
4153
|
+
const resolvePhraseHints2 = async () => {
|
|
4154
|
+
if (typeof options.phraseHints === "function") {
|
|
4155
|
+
return normalizePhraseHints2(await options.phraseHints({
|
|
4156
|
+
context: options.context,
|
|
4157
|
+
scenarioId: bridgeState.scenarioId ?? undefined,
|
|
4158
|
+
sessionId: bridgeState.sessionId ?? ""
|
|
4159
|
+
}) ?? []);
|
|
4160
|
+
}
|
|
4161
|
+
return normalizePhraseHints2(options.phraseHints);
|
|
4162
|
+
};
|
|
4163
|
+
const ensureSession = async () => {
|
|
4164
|
+
if (sessionHandle) {
|
|
4165
|
+
return sessionHandle;
|
|
4166
|
+
}
|
|
4167
|
+
bridgeState.sessionId ??= `phone-${Date.now().toString(36)}`;
|
|
4168
|
+
const lexicon = await resolveLexicon2();
|
|
4169
|
+
const phraseHints = await resolvePhraseHints2();
|
|
4170
|
+
const normalizedOnTurn = normalizeOnTurn2(options.onTurn);
|
|
4171
|
+
const route = {
|
|
4172
|
+
correctTurn: options.correctTurn,
|
|
4173
|
+
onComplete: options.onComplete,
|
|
4174
|
+
onError: options.onError,
|
|
4175
|
+
onSession: options.onSession,
|
|
4176
|
+
onTurn: async (input) => {
|
|
4177
|
+
bridgeState.reviewRecorder?.recordVoiceMessage({
|
|
4178
|
+
type: "turn",
|
|
4179
|
+
turn: input.turn
|
|
4180
|
+
});
|
|
4181
|
+
const result = await normalizedOnTurn(input);
|
|
4182
|
+
if (result?.assistantText) {
|
|
4183
|
+
bridgeState.reviewRecorder?.recordVoiceMessage({
|
|
4184
|
+
type: "assistant",
|
|
4185
|
+
text: result.assistantText,
|
|
4186
|
+
turnId: input.turn.id
|
|
4187
|
+
});
|
|
4188
|
+
}
|
|
4189
|
+
return result;
|
|
4190
|
+
}
|
|
4191
|
+
};
|
|
4192
|
+
const voiceSocket = createTwilioSocketAdapter(socket, () => bridgeState);
|
|
4193
|
+
sessionHandle = createVoiceSession({
|
|
4194
|
+
audioConditioning,
|
|
4195
|
+
context: options.context,
|
|
4196
|
+
costTelemetry: options.costTelemetry,
|
|
4197
|
+
id: bridgeState.sessionId,
|
|
4198
|
+
languageStrategy: options.languageStrategy,
|
|
4199
|
+
lexicon,
|
|
4200
|
+
logger,
|
|
4201
|
+
phraseHints,
|
|
4202
|
+
reconnect,
|
|
4203
|
+
route,
|
|
4204
|
+
scenarioId: bridgeState.scenarioId ?? undefined,
|
|
4205
|
+
socket: voiceSocket,
|
|
4206
|
+
store: options.session,
|
|
4207
|
+
stt: options.stt,
|
|
4208
|
+
sttFallback: resolveSTTFallbackConfig2(options.sttFallback),
|
|
4209
|
+
sttLifecycle: options.sttLifecycle ?? runtimePreset.sttLifecycle,
|
|
4210
|
+
tts: options.tts,
|
|
4211
|
+
turnDetection
|
|
4212
|
+
});
|
|
4213
|
+
return sessionHandle;
|
|
4214
|
+
};
|
|
4215
|
+
return {
|
|
4216
|
+
close: async (reason) => {
|
|
4217
|
+
await sessionHandle?.close(reason);
|
|
4218
|
+
if (bridgeState.reviewRecorder && options.review?.onArtifact && !reviewArtifactDelivered) {
|
|
4219
|
+
reviewArtifactDelivered = true;
|
|
4220
|
+
await Promise.resolve(options.review.onArtifact(bridgeState.reviewRecorder.finalize()));
|
|
4221
|
+
}
|
|
4222
|
+
},
|
|
4223
|
+
getSessionId: () => bridgeState.sessionId,
|
|
4224
|
+
getStreamSid: () => bridgeState.streamSid,
|
|
4225
|
+
handleMessage: async (raw) => {
|
|
4226
|
+
const message = parseTwilioMessage(raw);
|
|
4227
|
+
switch (message.event) {
|
|
4228
|
+
case "connected":
|
|
4229
|
+
bridgeState.reviewRecorder?.recordTwilioInbound({
|
|
4230
|
+
event: "connected"
|
|
4231
|
+
});
|
|
4232
|
+
return;
|
|
4233
|
+
case "start": {
|
|
4234
|
+
bridgeState.streamSid = message.start.streamSid;
|
|
4235
|
+
bridgeState.callSid = message.start.callSid ?? null;
|
|
4236
|
+
bridgeState.sessionId = message.start.customParameters?.sessionId?.trim() || bridgeState.sessionId;
|
|
4237
|
+
bridgeState.scenarioId = message.start.customParameters?.scenarioId?.trim() || bridgeState.scenarioId;
|
|
4238
|
+
bridgeState.reviewRecorder?.recordTwilioInbound({
|
|
4239
|
+
event: "start",
|
|
4240
|
+
reason: message.start.callSid,
|
|
4241
|
+
text: bridgeState.sessionId ?? undefined
|
|
4242
|
+
});
|
|
4243
|
+
await ensureSession();
|
|
4244
|
+
return;
|
|
4245
|
+
}
|
|
4246
|
+
case "media": {
|
|
4247
|
+
const activeSession = await ensureSession();
|
|
4248
|
+
bridgeState.reviewRecorder?.recordTwilioInbound({
|
|
4249
|
+
bytes: message.media.payload.length,
|
|
4250
|
+
event: "media",
|
|
4251
|
+
track: message.media.track
|
|
4252
|
+
});
|
|
4253
|
+
if (options.clearOnInboundMedia !== false && bridgeState.hasOutboundAudioSinceLastInbound && bridgeState.streamSid) {
|
|
4254
|
+
bridgeState.reviewRecorder?.recordTwilioOutbound({
|
|
4255
|
+
event: "clear"
|
|
4256
|
+
});
|
|
4257
|
+
await Promise.resolve(socket.send(JSON.stringify({
|
|
4258
|
+
event: "clear",
|
|
4259
|
+
streamSid: bridgeState.streamSid
|
|
4260
|
+
})));
|
|
4261
|
+
}
|
|
4262
|
+
bridgeState.hasOutboundAudioSinceLastInbound = false;
|
|
4263
|
+
await activeSession.receiveAudio(transcodeTwilioInboundPayloadToPCM16(message.media.payload));
|
|
4264
|
+
return;
|
|
4265
|
+
}
|
|
4266
|
+
case "mark":
|
|
4267
|
+
bridgeState.reviewRecorder?.recordTwilioInbound({
|
|
4268
|
+
event: "mark",
|
|
4269
|
+
name: message.mark?.name
|
|
4270
|
+
});
|
|
4271
|
+
return;
|
|
4272
|
+
case "stop":
|
|
4273
|
+
bridgeState.reviewRecorder?.recordTwilioInbound({
|
|
4274
|
+
event: "stop",
|
|
4275
|
+
reason: message.stop?.callSid
|
|
4276
|
+
});
|
|
4277
|
+
await sessionHandle?.close("twilio-stop");
|
|
4278
|
+
return;
|
|
4279
|
+
}
|
|
4280
|
+
}
|
|
4281
|
+
};
|
|
4282
|
+
};
|
|
4283
|
+
// src/telephony/response.ts
|
|
4284
|
+
var normalizeWhitespace = (value) => value.replace(/\s+/g, " ").trim();
|
|
4285
|
+
var DEFAULT_MAX_WORDS = 12;
|
|
4286
|
+
var CLAUSE_BOUNDARY_PATTERN = /(?<=[,.;!?])\s+/u;
|
|
4287
|
+
var clampWords = (text, maxWords) => {
|
|
4288
|
+
if (!Number.isFinite(maxWords) || maxWords <= 0) {
|
|
4289
|
+
return text;
|
|
4290
|
+
}
|
|
4291
|
+
const words = text.split(/\s+/u).filter(Boolean);
|
|
4292
|
+
if (words.length <= maxWords) {
|
|
4293
|
+
return text;
|
|
4294
|
+
}
|
|
4295
|
+
return words.slice(0, maxWords).join(" ");
|
|
4296
|
+
};
|
|
4297
|
+
var clampChars = (text, maxChars) => {
|
|
4298
|
+
if (!Number.isFinite(maxChars) || !maxChars || maxChars <= 0) {
|
|
4299
|
+
return text;
|
|
4300
|
+
}
|
|
4301
|
+
if (text.length <= maxChars) {
|
|
4302
|
+
return text;
|
|
4303
|
+
}
|
|
4304
|
+
return text.slice(0, maxChars).trim();
|
|
4305
|
+
};
|
|
4306
|
+
var ensureTerminalPunctuation = (text) => {
|
|
4307
|
+
if (!text) {
|
|
4308
|
+
return text;
|
|
4309
|
+
}
|
|
4310
|
+
return /[.!?]$/u.test(text) ? text : `${text}.`;
|
|
4311
|
+
};
|
|
4312
|
+
var extractLeadClause = (text) => {
|
|
4313
|
+
const normalized = normalizeWhitespace(text);
|
|
4314
|
+
if (!normalized) {
|
|
4315
|
+
return normalized;
|
|
4316
|
+
}
|
|
4317
|
+
const colonIndex = normalized.indexOf(":");
|
|
4318
|
+
const body = colonIndex >= 0 && colonIndex < 24 && colonIndex < normalized.length - 1 ? normalizeWhitespace(normalized.slice(colonIndex + 1)) : normalized;
|
|
4319
|
+
const clauses = body.split(CLAUSE_BOUNDARY_PATTERN).filter(Boolean);
|
|
4320
|
+
return clauses[0] ?? body;
|
|
4321
|
+
};
|
|
4322
|
+
var shapeTelephonyAssistantText = (text, options = {}) => {
|
|
4323
|
+
const normalized = normalizeWhitespace(text);
|
|
4324
|
+
if (!normalized) {
|
|
4325
|
+
return normalized;
|
|
4326
|
+
}
|
|
4327
|
+
if ((options.mode ?? "lead-clause") === "full") {
|
|
4328
|
+
return clampChars(normalized, options.maxChars);
|
|
4329
|
+
}
|
|
4330
|
+
const lead = extractLeadClause(normalized);
|
|
4331
|
+
const limitedWords = clampWords(lead, options.maxWords ?? DEFAULT_MAX_WORDS);
|
|
4332
|
+
const limitedChars = clampChars(limitedWords, options.maxChars);
|
|
4333
|
+
return ensureTerminalPunctuation(normalizeWhitespace(limitedChars));
|
|
4334
|
+
};
|
|
960
4335
|
export {
|
|
4336
|
+
withVoiceOpsTaskId,
|
|
4337
|
+
withVoiceIntegrationEventId,
|
|
961
4338
|
voice,
|
|
4339
|
+
transcodeTwilioInboundPayloadToPCM16,
|
|
4340
|
+
transcodePCMToTwilioOutboundPayload,
|
|
4341
|
+
summarizeVoiceOpsTasks,
|
|
4342
|
+
startVoiceOpsTask,
|
|
4343
|
+
shapeTelephonyAssistantText,
|
|
4344
|
+
resolveVoiceSTTRoutingStrategy,
|
|
4345
|
+
resolveVoiceRuntimePreset,
|
|
4346
|
+
resolveTurnDetectionConfig,
|
|
4347
|
+
resolveAudioConditioningConfig,
|
|
4348
|
+
reopenVoiceOpsTask,
|
|
4349
|
+
renderVoiceCallReviewMarkdown,
|
|
4350
|
+
renderVoiceCallReviewHTML,
|
|
4351
|
+
recordVoiceRuntimeOps,
|
|
4352
|
+
listVoiceOpsTasks,
|
|
4353
|
+
encodeTwilioMulawBase64,
|
|
4354
|
+
decodeTwilioMulawBase64,
|
|
4355
|
+
createVoiceTaskUpdatedEvent,
|
|
4356
|
+
createVoiceTaskCreatedEvent,
|
|
962
4357
|
createVoiceSessionRecord,
|
|
963
4358
|
createVoiceSession,
|
|
4359
|
+
createVoiceSTTRoutingCorrectionHandler,
|
|
4360
|
+
createVoiceReviewSavedEvent,
|
|
964
4361
|
createVoiceMemoryStore,
|
|
965
|
-
|
|
4362
|
+
createVoiceIntegrationEvent,
|
|
4363
|
+
createVoiceFileTaskStore,
|
|
4364
|
+
createVoiceFileSessionStore,
|
|
4365
|
+
createVoiceFileRuntimeStorage,
|
|
4366
|
+
createVoiceFileReviewStore,
|
|
4367
|
+
createVoiceFileIntegrationEventStore,
|
|
4368
|
+
createVoiceCallReviewRecorder,
|
|
4369
|
+
createVoiceCallReviewFromSession,
|
|
4370
|
+
createVoiceCallReviewFromLiveTelephonyReport,
|
|
4371
|
+
createVoiceCallCompletedEvent,
|
|
4372
|
+
createTwilioVoiceResponse,
|
|
4373
|
+
createTwilioMediaStreamBridge,
|
|
4374
|
+
createStoredVoiceOpsTask,
|
|
4375
|
+
createStoredVoiceIntegrationEvent,
|
|
4376
|
+
createStoredVoiceCallReviewArtifact,
|
|
4377
|
+
createRiskyTurnCorrectionHandler,
|
|
4378
|
+
createPhraseHintCorrectionHandler,
|
|
4379
|
+
createId,
|
|
4380
|
+
createDomainPhraseHints,
|
|
4381
|
+
createDomainLexicon,
|
|
4382
|
+
conditionAudioChunk,
|
|
4383
|
+
completeVoiceOpsTask,
|
|
4384
|
+
buildVoiceOpsTaskFromReview,
|
|
4385
|
+
assignVoiceOpsTask,
|
|
4386
|
+
applyRiskTieredPhraseHintCorrections,
|
|
4387
|
+
applyPhraseHintCorrections,
|
|
4388
|
+
TURN_PROFILE_DEFAULTS
|
|
966
4389
|
};
|