@absolutejs/voice 0.0.21 → 0.0.22-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +499 -2
- package/dist/angular/index.js +90 -0
- package/dist/angular/voice-controller.service.d.ts +6 -0
- package/dist/angular/voice-stream.service.d.ts +6 -0
- package/dist/client/actions.d.ts +41 -0
- package/dist/client/audioPlayer.d.ts +40 -0
- package/dist/client/duplex.d.ts +3 -0
- package/dist/client/htmxBootstrap.js +84 -0
- package/dist/client/index.d.ts +2 -0
- package/dist/client/index.js +507 -5
- package/dist/correction.d.ts +18 -1
- package/dist/fileStore.d.ts +27 -0
- package/dist/index.d.ts +12 -1
- package/dist/index.js +2425 -33
- package/dist/ops.d.ts +100 -0
- package/dist/react/index.js +86 -0
- package/dist/react/useVoiceController.d.ts +6 -0
- package/dist/react/useVoiceStream.d.ts +6 -0
- package/dist/routing.d.ts +3 -0
- package/dist/runtimeOps.d.ts +23 -0
- package/dist/svelte/index.js +84 -0
- package/dist/telephony/response.d.ts +7 -0
- package/dist/telephony/twilio.d.ts +116 -0
- package/dist/testing/benchmark.d.ts +59 -4
- package/dist/testing/corrected.d.ts +41 -0
- package/dist/testing/duplex.d.ts +59 -0
- package/dist/testing/fixtures.d.ts +18 -2
- package/dist/testing/index.d.ts +5 -0
- package/dist/testing/index.js +4940 -307
- package/dist/testing/review.d.ts +143 -0
- package/dist/testing/sessionBenchmark.d.ts +25 -0
- package/dist/testing/stt.d.ts +2 -1
- package/dist/testing/telephony.d.ts +70 -0
- package/dist/testing/tts.d.ts +73 -0
- package/dist/types.d.ts +290 -3
- package/dist/vue/index.js +90 -0
- package/dist/vue/useVoiceController.d.ts +11 -0
- package/dist/vue/useVoiceStream.d.ts +11 -0
- package/package.json +115 -1
package/dist/index.js
CHANGED
|
@@ -434,6 +434,58 @@ var PRESET_INPUTS = {
|
|
|
434
434
|
transcriptStabilityMs: 1650
|
|
435
435
|
}
|
|
436
436
|
},
|
|
437
|
+
"pstn-balanced": {
|
|
438
|
+
audioConditioning: {
|
|
439
|
+
enabled: true,
|
|
440
|
+
maxGain: 2.8,
|
|
441
|
+
noiseGateAttenuation: 0.07,
|
|
442
|
+
noiseGateThreshold: 0.005,
|
|
443
|
+
targetLevel: 0.08
|
|
444
|
+
},
|
|
445
|
+
capture: {
|
|
446
|
+
channelCount: 1,
|
|
447
|
+
sampleRateHz: 16000
|
|
448
|
+
},
|
|
449
|
+
connection: {
|
|
450
|
+
maxReconnectAttempts: 14,
|
|
451
|
+
pingInterval: 45000,
|
|
452
|
+
reconnect: true
|
|
453
|
+
},
|
|
454
|
+
sttLifecycle: "continuous",
|
|
455
|
+
turnDetection: {
|
|
456
|
+
qualityProfile: "noisy-room",
|
|
457
|
+
profile: "long-form",
|
|
458
|
+
silenceMs: 660,
|
|
459
|
+
speechThreshold: 0.012,
|
|
460
|
+
transcriptStabilityMs: 300
|
|
461
|
+
}
|
|
462
|
+
},
|
|
463
|
+
"pstn-fast": {
|
|
464
|
+
audioConditioning: {
|
|
465
|
+
enabled: true,
|
|
466
|
+
maxGain: 2.75,
|
|
467
|
+
noiseGateAttenuation: 0.06,
|
|
468
|
+
noiseGateThreshold: 0.005,
|
|
469
|
+
targetLevel: 0.08
|
|
470
|
+
},
|
|
471
|
+
capture: {
|
|
472
|
+
channelCount: 1,
|
|
473
|
+
sampleRateHz: 16000
|
|
474
|
+
},
|
|
475
|
+
connection: {
|
|
476
|
+
maxReconnectAttempts: 14,
|
|
477
|
+
pingInterval: 45000,
|
|
478
|
+
reconnect: true
|
|
479
|
+
},
|
|
480
|
+
sttLifecycle: "continuous",
|
|
481
|
+
turnDetection: {
|
|
482
|
+
qualityProfile: "noisy-room",
|
|
483
|
+
profile: "long-form",
|
|
484
|
+
silenceMs: 620,
|
|
485
|
+
speechThreshold: 0.012,
|
|
486
|
+
transcriptStabilityMs: 280
|
|
487
|
+
}
|
|
488
|
+
},
|
|
437
489
|
reliability: {
|
|
438
490
|
audioConditioning: {
|
|
439
491
|
enabled: true,
|
|
@@ -475,6 +527,933 @@ var resolveVoiceRuntimePreset = (name = "default") => {
|
|
|
475
527
|
};
|
|
476
528
|
};
|
|
477
529
|
|
|
530
|
+
// src/ops.ts
|
|
531
|
+
var ensureTaskHistory = (task, entry) => ({
|
|
532
|
+
...task,
|
|
533
|
+
history: [
|
|
534
|
+
...task.history ?? [],
|
|
535
|
+
{
|
|
536
|
+
...entry,
|
|
537
|
+
at: entry.at ?? Date.now()
|
|
538
|
+
}
|
|
539
|
+
],
|
|
540
|
+
updatedAt: Date.now()
|
|
541
|
+
});
|
|
542
|
+
var withVoiceOpsTaskId = (id, task) => ({
|
|
543
|
+
...task,
|
|
544
|
+
id
|
|
545
|
+
});
|
|
546
|
+
var withVoiceIntegrationEventId = (id, event) => ({
|
|
547
|
+
...event,
|
|
548
|
+
id
|
|
549
|
+
});
|
|
550
|
+
var buildVoiceOpsTaskFromReview = (review) => {
|
|
551
|
+
const createdAt = review.generatedAt ?? Date.now();
|
|
552
|
+
const common = {
|
|
553
|
+
createdAt,
|
|
554
|
+
history: [
|
|
555
|
+
{
|
|
556
|
+
actor: "system",
|
|
557
|
+
at: createdAt,
|
|
558
|
+
detail: review.postCall?.summary,
|
|
559
|
+
type: "created"
|
|
560
|
+
}
|
|
561
|
+
],
|
|
562
|
+
id: `${review.id}:ops`,
|
|
563
|
+
intakeId: review.id,
|
|
564
|
+
outcome: review.summary.outcome,
|
|
565
|
+
recommendedAction: review.postCall?.recommendedAction ?? "Review the voice artifact and decide the next operator action.",
|
|
566
|
+
reviewId: review.id,
|
|
567
|
+
status: "open",
|
|
568
|
+
target: review.postCall?.target,
|
|
569
|
+
updatedAt: createdAt
|
|
570
|
+
};
|
|
571
|
+
switch (review.summary.outcome) {
|
|
572
|
+
case "voicemail":
|
|
573
|
+
return {
|
|
574
|
+
...common,
|
|
575
|
+
description: review.postCall?.summary ?? "Caller reached voicemail and needs a callback follow-up.",
|
|
576
|
+
kind: "callback",
|
|
577
|
+
title: review.postCall?.target ? `Call back voicemail from ${review.postCall.target}` : "Call back voicemail lead"
|
|
578
|
+
};
|
|
579
|
+
case "no-answer":
|
|
580
|
+
return {
|
|
581
|
+
...common,
|
|
582
|
+
description: review.postCall?.summary ?? "Live contact was not established and should be retried.",
|
|
583
|
+
kind: "callback",
|
|
584
|
+
title: "Retry no-answer call"
|
|
585
|
+
};
|
|
586
|
+
case "escalated":
|
|
587
|
+
return {
|
|
588
|
+
...common,
|
|
589
|
+
description: review.postCall?.summary ?? "The automated path escalated this call for human review.",
|
|
590
|
+
kind: "escalation",
|
|
591
|
+
title: "Review escalated call"
|
|
592
|
+
};
|
|
593
|
+
case "transferred":
|
|
594
|
+
return {
|
|
595
|
+
...common,
|
|
596
|
+
description: review.postCall?.summary ?? "The call was transferred and should be verified downstream.",
|
|
597
|
+
kind: "transfer-check",
|
|
598
|
+
title: review.postCall?.target ? `Verify transfer to ${review.postCall.target}` : "Verify call transfer"
|
|
599
|
+
};
|
|
600
|
+
case "failed":
|
|
601
|
+
return {
|
|
602
|
+
...common,
|
|
603
|
+
description: review.postCall?.summary ?? "The call failed and needs operator review before retry.",
|
|
604
|
+
kind: "retry-review",
|
|
605
|
+
title: "Inspect failed call before retry"
|
|
606
|
+
};
|
|
607
|
+
default:
|
|
608
|
+
return null;
|
|
609
|
+
}
|
|
610
|
+
};
|
|
611
|
+
var assignVoiceOpsTask = (task, owner, input = {}) => {
|
|
612
|
+
const normalizedOwner = owner.trim() || "ops";
|
|
613
|
+
return ensureTaskHistory({
|
|
614
|
+
...task,
|
|
615
|
+
assignee: normalizedOwner
|
|
616
|
+
}, {
|
|
617
|
+
actor: input.actor ?? normalizedOwner,
|
|
618
|
+
at: input.at,
|
|
619
|
+
detail: `Assigned to ${normalizedOwner}`,
|
|
620
|
+
type: "assigned"
|
|
621
|
+
});
|
|
622
|
+
};
|
|
623
|
+
var startVoiceOpsTask = (task, input = {}) => ensureTaskHistory({
|
|
624
|
+
...task,
|
|
625
|
+
status: "in-progress"
|
|
626
|
+
}, {
|
|
627
|
+
actor: input.actor ?? task.assignee ?? "ops",
|
|
628
|
+
at: input.at,
|
|
629
|
+
detail: input.detail ?? "Work started",
|
|
630
|
+
type: "started"
|
|
631
|
+
});
|
|
632
|
+
var completeVoiceOpsTask = (task, input = {}) => ensureTaskHistory({
|
|
633
|
+
...task,
|
|
634
|
+
status: "done"
|
|
635
|
+
}, {
|
|
636
|
+
actor: input.actor ?? task.assignee ?? "ops",
|
|
637
|
+
at: input.at,
|
|
638
|
+
detail: input.detail ?? "Marked done",
|
|
639
|
+
type: "completed"
|
|
640
|
+
});
|
|
641
|
+
var reopenVoiceOpsTask = (task, input = {}) => ensureTaskHistory({
|
|
642
|
+
...task,
|
|
643
|
+
status: "open"
|
|
644
|
+
}, {
|
|
645
|
+
actor: input.actor ?? task.assignee ?? "ops",
|
|
646
|
+
at: input.at,
|
|
647
|
+
detail: input.detail ?? "Task reopened",
|
|
648
|
+
type: "reopened"
|
|
649
|
+
});
|
|
650
|
+
var listVoiceOpsTasks = (tasks) => [...tasks].sort((left, right) => right.createdAt - left.createdAt);
|
|
651
|
+
var summarizeVoiceOpsTasks = (tasks) => {
|
|
652
|
+
const summary = {
|
|
653
|
+
byKind: new Map,
|
|
654
|
+
byOutcome: new Map,
|
|
655
|
+
done: 0,
|
|
656
|
+
inProgress: 0,
|
|
657
|
+
open: 0,
|
|
658
|
+
topAssignees: new Map,
|
|
659
|
+
topTargets: new Map,
|
|
660
|
+
total: tasks.length
|
|
661
|
+
};
|
|
662
|
+
for (const task of tasks) {
|
|
663
|
+
if (task.status === "open") {
|
|
664
|
+
summary.open += 1;
|
|
665
|
+
} else if (task.status === "in-progress") {
|
|
666
|
+
summary.inProgress += 1;
|
|
667
|
+
} else if (task.status === "done") {
|
|
668
|
+
summary.done += 1;
|
|
669
|
+
}
|
|
670
|
+
summary.byKind.set(task.kind, (summary.byKind.get(task.kind) ?? 0) + 1);
|
|
671
|
+
if (task.outcome) {
|
|
672
|
+
summary.byOutcome.set(task.outcome, (summary.byOutcome.get(task.outcome) ?? 0) + 1);
|
|
673
|
+
}
|
|
674
|
+
if (task.target) {
|
|
675
|
+
summary.topTargets.set(task.target, (summary.topTargets.get(task.target) ?? 0) + 1);
|
|
676
|
+
}
|
|
677
|
+
if (task.assignee) {
|
|
678
|
+
summary.topAssignees.set(task.assignee, (summary.topAssignees.get(task.assignee) ?? 0) + 1);
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
return {
|
|
682
|
+
byKind: [...summary.byKind.entries()].sort((left, right) => right[1] - left[1]),
|
|
683
|
+
byOutcome: [...summary.byOutcome.entries()].sort((left, right) => right[1] - left[1]),
|
|
684
|
+
done: summary.done,
|
|
685
|
+
inProgress: summary.inProgress,
|
|
686
|
+
open: summary.open,
|
|
687
|
+
topAssignees: [...summary.topAssignees.entries()].sort((left, right) => right[1] - left[1]),
|
|
688
|
+
topTargets: [...summary.topTargets.entries()].sort((left, right) => right[1] - left[1]),
|
|
689
|
+
total: summary.total
|
|
690
|
+
};
|
|
691
|
+
};
|
|
692
|
+
var createVoiceIntegrationEvent = (type, payload, input = {}) => ({
|
|
693
|
+
createdAt: input.createdAt ?? Date.now(),
|
|
694
|
+
id: input.id ?? crypto.randomUUID(),
|
|
695
|
+
payload,
|
|
696
|
+
type
|
|
697
|
+
});
|
|
698
|
+
var createVoiceCallCompletedEvent = (input) => createVoiceIntegrationEvent("call.completed", {
|
|
699
|
+
call: input.session.call,
|
|
700
|
+
disposition: input.disposition ?? input.session.call?.disposition,
|
|
701
|
+
scenarioId: input.session.scenarioId,
|
|
702
|
+
sessionId: input.session.id,
|
|
703
|
+
sessionSummary: input.sessionSummary,
|
|
704
|
+
status: input.session.status,
|
|
705
|
+
turnCount: input.session.turns.length
|
|
706
|
+
}, {
|
|
707
|
+
id: `${input.session.id}:call.completed`
|
|
708
|
+
});
|
|
709
|
+
var createVoiceReviewSavedEvent = (review) => createVoiceIntegrationEvent("review.saved", {
|
|
710
|
+
elapsedMs: review.summary.elapsedMs,
|
|
711
|
+
firstTurnLatencyMs: review.summary.firstTurnLatencyMs,
|
|
712
|
+
outcome: review.summary.outcome,
|
|
713
|
+
postCall: review.postCall,
|
|
714
|
+
reviewId: review.id,
|
|
715
|
+
title: review.title
|
|
716
|
+
}, {
|
|
717
|
+
id: `${review.id}:review.saved`
|
|
718
|
+
});
|
|
719
|
+
var createVoiceTaskCreatedEvent = (task) => createVoiceIntegrationEvent("task.created", {
|
|
720
|
+
assignee: task.assignee,
|
|
721
|
+
kind: task.kind,
|
|
722
|
+
outcome: task.outcome,
|
|
723
|
+
recommendedAction: task.recommendedAction,
|
|
724
|
+
reviewId: task.reviewId,
|
|
725
|
+
status: task.status,
|
|
726
|
+
target: task.target,
|
|
727
|
+
taskId: task.id,
|
|
728
|
+
title: task.title
|
|
729
|
+
}, {
|
|
730
|
+
id: `${task.id}:task.created:${task.updatedAt}`
|
|
731
|
+
});
|
|
732
|
+
var createVoiceTaskUpdatedEvent = (task) => createVoiceIntegrationEvent("task.updated", {
|
|
733
|
+
assignee: task.assignee,
|
|
734
|
+
history: task.history,
|
|
735
|
+
kind: task.kind,
|
|
736
|
+
outcome: task.outcome,
|
|
737
|
+
recommendedAction: task.recommendedAction,
|
|
738
|
+
reviewId: task.reviewId,
|
|
739
|
+
status: task.status,
|
|
740
|
+
target: task.target,
|
|
741
|
+
taskId: task.id,
|
|
742
|
+
title: task.title,
|
|
743
|
+
updatedAt: task.updatedAt
|
|
744
|
+
}, {
|
|
745
|
+
id: `${task.id}:task.updated:${task.updatedAt}`
|
|
746
|
+
});
|
|
747
|
+
|
|
748
|
+
// src/testing/review.ts
|
|
749
|
+
var roundMetric = (value) => typeof value === "number" ? Math.round(value * 100) / 100 : undefined;
|
|
750
|
+
var formatMetric = (label, value, unit = "ms") => typeof value === "number" ? `${label}: ${roundMetric(value)}${unit}` : undefined;
|
|
751
|
+
var findTimelineEvent = (timeline, event, source) => timeline.find((entry) => entry.event === event && (source === undefined || entry.source === source));
|
|
752
|
+
var formatTimelineText = (entry) => {
|
|
753
|
+
const parts = [`- ${entry.atMs}ms`, `[${entry.source}]`, entry.event];
|
|
754
|
+
if (entry.text) {
|
|
755
|
+
parts.push(`"${entry.text}"`);
|
|
756
|
+
}
|
|
757
|
+
if (entry.reason) {
|
|
758
|
+
parts.push(`reason=${entry.reason}`);
|
|
759
|
+
}
|
|
760
|
+
if (typeof entry.bytes === "number") {
|
|
761
|
+
parts.push(`bytes=${entry.bytes}`);
|
|
762
|
+
}
|
|
763
|
+
if (typeof entry.confidence === "number") {
|
|
764
|
+
parts.push(`confidence=${roundMetric(entry.confidence)}`);
|
|
765
|
+
}
|
|
766
|
+
if (entry.name) {
|
|
767
|
+
parts.push(`name=${entry.name}`);
|
|
768
|
+
}
|
|
769
|
+
return parts.join(" ");
|
|
770
|
+
};
|
|
771
|
+
var isLowSignalTimelineEvent = (entry) => entry.event === "inbound-media" || entry.event === "inbound-silence-pad" || entry.event === "stt-send" || entry.event === "tts-audio";
|
|
772
|
+
var summarizeTimelineTraffic = (timeline) => {
|
|
773
|
+
const summaries = new Map;
|
|
774
|
+
for (const entry of timeline) {
|
|
775
|
+
const label = entry.event === "inbound-media" ? "inbound media chunks" : entry.event === "inbound-silence-pad" ? "inbound silence padding" : entry.event === "stt-send" ? "STT audio sends" : entry.event === "tts-audio" ? "post-first TTS audio chunks" : undefined;
|
|
776
|
+
if (!label) {
|
|
777
|
+
continue;
|
|
778
|
+
}
|
|
779
|
+
const summary = summaries.get(label) ?? {
|
|
780
|
+
audioMs: 0,
|
|
781
|
+
bytes: 0,
|
|
782
|
+
count: 0,
|
|
783
|
+
label
|
|
784
|
+
};
|
|
785
|
+
summary.count += 1;
|
|
786
|
+
summary.bytes += typeof entry.bytes === "number" ? entry.bytes : 0;
|
|
787
|
+
summary.audioMs = (summary.audioMs ?? 0) + (typeof entry.chunkDurationMs === "number" ? entry.chunkDurationMs : 0);
|
|
788
|
+
summaries.set(label, summary);
|
|
789
|
+
}
|
|
790
|
+
return [...summaries.values()];
|
|
791
|
+
};
|
|
792
|
+
var compactTimeline = (timeline) => {
|
|
793
|
+
const rows = [];
|
|
794
|
+
let index = 0;
|
|
795
|
+
while (index < timeline.length) {
|
|
796
|
+
const current = timeline[index];
|
|
797
|
+
if (!current) {
|
|
798
|
+
break;
|
|
799
|
+
}
|
|
800
|
+
const isBurstEvent = isLowSignalTimelineEvent(current) || current.event === "media" && current.source === "twilio";
|
|
801
|
+
if (!isBurstEvent) {
|
|
802
|
+
rows.push(formatTimelineText(current));
|
|
803
|
+
index += 1;
|
|
804
|
+
continue;
|
|
805
|
+
}
|
|
806
|
+
let endIndex = index;
|
|
807
|
+
let totalBytes = typeof current.bytes === "number" ? current.bytes : 0;
|
|
808
|
+
let totalChunkDurationMs = typeof current.chunkDurationMs === "number" ? current.chunkDurationMs : 0;
|
|
809
|
+
while (endIndex + 1 < timeline.length) {
|
|
810
|
+
const next = timeline[endIndex + 1];
|
|
811
|
+
if (!next) {
|
|
812
|
+
break;
|
|
813
|
+
}
|
|
814
|
+
if (next.event !== current.event || next.source !== current.source) {
|
|
815
|
+
break;
|
|
816
|
+
}
|
|
817
|
+
totalBytes += typeof next.bytes === "number" ? next.bytes : 0;
|
|
818
|
+
totalChunkDurationMs += typeof next.chunkDurationMs === "number" ? next.chunkDurationMs : 0;
|
|
819
|
+
endIndex += 1;
|
|
820
|
+
}
|
|
821
|
+
const startAt = current.atMs;
|
|
822
|
+
const endAt = timeline[endIndex]?.atMs ?? current.atMs;
|
|
823
|
+
const count = endIndex - index + 1;
|
|
824
|
+
const parts = [
|
|
825
|
+
`- ${startAt}-${endAt}ms`,
|
|
826
|
+
`[${current.source}]`,
|
|
827
|
+
`${current.event} x${count}`
|
|
828
|
+
];
|
|
829
|
+
if (totalBytes > 0) {
|
|
830
|
+
parts.push(`bytes=${totalBytes}`);
|
|
831
|
+
}
|
|
832
|
+
if (totalChunkDurationMs > 0) {
|
|
833
|
+
parts.push(`audio=${roundMetric(totalChunkDurationMs)}ms`);
|
|
834
|
+
}
|
|
835
|
+
rows.push(parts.join(" "));
|
|
836
|
+
index = endIndex + 1;
|
|
837
|
+
}
|
|
838
|
+
return rows;
|
|
839
|
+
};
|
|
840
|
+
var withVoiceCallReviewId = (id, artifact) => ({
|
|
841
|
+
...artifact,
|
|
842
|
+
id
|
|
843
|
+
});
|
|
844
|
+
var createVoiceCallReviewFromLiveTelephonyReport = (report, options = {}) => {
|
|
845
|
+
const fixture = report.fixtures?.[0];
|
|
846
|
+
if (!fixture) {
|
|
847
|
+
throw new Error("Live telephony review requires at least one fixture result.");
|
|
848
|
+
}
|
|
849
|
+
const timeline = [...report.trace ?? []].sort((left, right) => left.atMs - right.atMs);
|
|
850
|
+
const firstPartial = findTimelineEvent(timeline, "partial", "stt");
|
|
851
|
+
const commitEvent = findTimelineEvent(timeline, "commit", "turn");
|
|
852
|
+
const firstTtsAudio = findTimelineEvent(timeline, "tts-first-audio", "benchmark");
|
|
853
|
+
const firstOutboundMedia = findTimelineEvent(timeline, "media", "twilio");
|
|
854
|
+
const bargeInEvent = findTimelineEvent(timeline, "barge-in", "benchmark");
|
|
855
|
+
const clearEvent = findTimelineEvent(timeline, "clear", "twilio");
|
|
856
|
+
const lastSttText = [...timeline].reverse().find((entry) => entry.source === "stt" && (entry.event === "partial" || entry.event === "final") && typeof entry.text === "string" && entry.text.length > 0)?.text ?? undefined;
|
|
857
|
+
const latencyBreakdown = [
|
|
858
|
+
typeof firstPartial?.atMs === "number" ? {
|
|
859
|
+
label: "start to first partial",
|
|
860
|
+
valueMs: firstPartial.atMs
|
|
861
|
+
} : undefined,
|
|
862
|
+
typeof firstPartial?.atMs === "number" && typeof commitEvent?.atMs === "number" ? {
|
|
863
|
+
label: "first partial to commit",
|
|
864
|
+
valueMs: commitEvent.atMs - firstPartial.atMs
|
|
865
|
+
} : undefined,
|
|
866
|
+
typeof commitEvent?.atMs === "number" && typeof firstTtsAudio?.atMs === "number" ? {
|
|
867
|
+
label: "commit to first TTS audio",
|
|
868
|
+
valueMs: firstTtsAudio.atMs - commitEvent.atMs
|
|
869
|
+
} : undefined,
|
|
870
|
+
typeof commitEvent?.atMs === "number" && typeof firstOutboundMedia?.atMs === "number" ? {
|
|
871
|
+
label: "commit to first outbound media",
|
|
872
|
+
valueMs: firstOutboundMedia.atMs - commitEvent.atMs
|
|
873
|
+
} : undefined,
|
|
874
|
+
typeof bargeInEvent?.atMs === "number" && typeof clearEvent?.atMs === "number" ? {
|
|
875
|
+
label: "barge-in to clear",
|
|
876
|
+
valueMs: clearEvent.atMs - bargeInEvent.atMs
|
|
877
|
+
} : undefined
|
|
878
|
+
].filter((value) => value !== undefined && value.valueMs >= 0);
|
|
879
|
+
const notes = [
|
|
880
|
+
report.variant?.description,
|
|
881
|
+
firstPartial?.text ? `First partial: "${firstPartial.text}"` : undefined,
|
|
882
|
+
lastSttText ? `Last STT text: "${lastSttText}"` : undefined
|
|
883
|
+
].filter((value) => typeof value === "string" && value.length > 0);
|
|
884
|
+
return {
|
|
885
|
+
config: {
|
|
886
|
+
preset: options.preset,
|
|
887
|
+
stt: report.variant ? {
|
|
888
|
+
description: report.variant.description,
|
|
889
|
+
id: report.variant.id,
|
|
890
|
+
model: report.variant.model
|
|
891
|
+
} : undefined,
|
|
892
|
+
tts: report.ttsConfig,
|
|
893
|
+
turnDetection: report.turnDetectionConfig
|
|
894
|
+
},
|
|
895
|
+
errors: fixture.errors ?? [],
|
|
896
|
+
expectedText: fixture.expectedText,
|
|
897
|
+
fixtureId: fixture.fixtureId,
|
|
898
|
+
generatedAt: report.generatedAt,
|
|
899
|
+
latencyBreakdown,
|
|
900
|
+
notes,
|
|
901
|
+
path: options.path,
|
|
902
|
+
summary: {
|
|
903
|
+
clearLatencyMs: roundMetric(fixture.clearLatencyMs),
|
|
904
|
+
elapsedMs: roundMetric(fixture.elapsedMs),
|
|
905
|
+
firstOutboundMediaLatencyMs: roundMetric(fixture.firstOutboundMediaLatencyMs),
|
|
906
|
+
firstTurnLatencyMs: roundMetric(fixture.firstTurnLatencyMs),
|
|
907
|
+
markLatencyMs: roundMetric(fixture.markLatencyMs),
|
|
908
|
+
outboundMediaCount: fixture.outboundMediaCount,
|
|
909
|
+
pass: fixture.passes,
|
|
910
|
+
termRecall: roundMetric(fixture.termRecall),
|
|
911
|
+
turnCount: fixture.turnCount,
|
|
912
|
+
wordErrorRate: roundMetric(fixture.wordErrorRate)
|
|
913
|
+
},
|
|
914
|
+
title: fixture.title ?? "Voice Call Review",
|
|
915
|
+
timeline,
|
|
916
|
+
transcript: {
|
|
917
|
+
actual: fixture.actualText,
|
|
918
|
+
expected: fixture.expectedText
|
|
919
|
+
}
|
|
920
|
+
};
|
|
921
|
+
};
|
|
922
|
+
var toErrorMessage = (error) => {
|
|
923
|
+
if (typeof error === "string" && error.trim().length > 0) {
|
|
924
|
+
return error;
|
|
925
|
+
}
|
|
926
|
+
if (error instanceof Error && error.message.trim().length > 0) {
|
|
927
|
+
return error.message;
|
|
928
|
+
}
|
|
929
|
+
return "Unknown call error";
|
|
930
|
+
};
|
|
931
|
+
var createVoiceCallReviewRecorder = (options = {}) => {
|
|
932
|
+
const now = options.now ?? (() => Date.now());
|
|
933
|
+
const startedAt = now();
|
|
934
|
+
const errors = [];
|
|
935
|
+
const timeline = [];
|
|
936
|
+
const committedTurns = [];
|
|
937
|
+
const committedTurnIds = new Set;
|
|
938
|
+
const push = (source, event, fields = {}) => {
|
|
939
|
+
timeline.push({
|
|
940
|
+
atMs: Math.max(0, now() - startedAt),
|
|
941
|
+
event,
|
|
942
|
+
source,
|
|
943
|
+
...fields
|
|
944
|
+
});
|
|
945
|
+
};
|
|
946
|
+
return {
|
|
947
|
+
finalize: () => {
|
|
948
|
+
const sortedTimeline = [...timeline].sort((left, right) => left.atMs - right.atMs);
|
|
949
|
+
const firstPartial = findTimelineEvent(sortedTimeline, "partial", "stt");
|
|
950
|
+
const commitEvent = findTimelineEvent(sortedTimeline, "commit", "turn");
|
|
951
|
+
const firstTtsAudio = findTimelineEvent(sortedTimeline, "tts-first-audio", "benchmark");
|
|
952
|
+
const firstOutboundMedia = findTimelineEvent(sortedTimeline, "media", "twilio");
|
|
953
|
+
const bargeInEvent = findTimelineEvent(sortedTimeline, "barge-in", "benchmark");
|
|
954
|
+
const clearEvent = findTimelineEvent(sortedTimeline, "clear", "twilio");
|
|
955
|
+
const markEvent = findTimelineEvent(sortedTimeline, "mark", "twilio");
|
|
956
|
+
const elapsedMs = sortedTimeline.at(-1)?.atMs ?? 0;
|
|
957
|
+
const lastSttText = [...sortedTimeline].reverse().find((entry) => entry.source === "stt" && (entry.event === "partial" || entry.event === "final") && typeof entry.text === "string" && entry.text.length > 0)?.text ?? undefined;
|
|
958
|
+
const latencyBreakdown = [
|
|
959
|
+
typeof firstPartial?.atMs === "number" ? {
|
|
960
|
+
label: "start to first partial",
|
|
961
|
+
valueMs: firstPartial.atMs
|
|
962
|
+
} : undefined,
|
|
963
|
+
typeof firstPartial?.atMs === "number" && typeof commitEvent?.atMs === "number" ? {
|
|
964
|
+
label: "first partial to commit",
|
|
965
|
+
valueMs: commitEvent.atMs - firstPartial.atMs
|
|
966
|
+
} : undefined,
|
|
967
|
+
typeof commitEvent?.atMs === "number" && typeof firstTtsAudio?.atMs === "number" ? {
|
|
968
|
+
label: "commit to first TTS audio",
|
|
969
|
+
valueMs: firstTtsAudio.atMs - commitEvent.atMs
|
|
970
|
+
} : undefined,
|
|
971
|
+
typeof commitEvent?.atMs === "number" && typeof firstOutboundMedia?.atMs === "number" ? {
|
|
972
|
+
label: "commit to first outbound media",
|
|
973
|
+
valueMs: firstOutboundMedia.atMs - commitEvent.atMs
|
|
974
|
+
} : undefined,
|
|
975
|
+
typeof bargeInEvent?.atMs === "number" && typeof clearEvent?.atMs === "number" ? {
|
|
976
|
+
label: "barge-in to clear",
|
|
977
|
+
valueMs: clearEvent.atMs - bargeInEvent.atMs
|
|
978
|
+
} : undefined
|
|
979
|
+
].filter((value) => value !== undefined && value.valueMs >= 0);
|
|
980
|
+
return {
|
|
981
|
+
config: options.config,
|
|
982
|
+
errors,
|
|
983
|
+
fixtureId: options.fixtureId,
|
|
984
|
+
generatedAt: now(),
|
|
985
|
+
latencyBreakdown,
|
|
986
|
+
notes: [
|
|
987
|
+
firstPartial?.text ? `First partial: "${firstPartial.text}"` : undefined,
|
|
988
|
+
lastSttText ? `Last STT text: "${lastSttText}"` : undefined
|
|
989
|
+
].filter((value) => typeof value === "string"),
|
|
990
|
+
path: options.path,
|
|
991
|
+
summary: {
|
|
992
|
+
clearLatencyMs: roundMetric(typeof clearEvent?.atMs === "number" && typeof bargeInEvent?.atMs === "number" ? clearEvent.atMs - bargeInEvent.atMs : undefined),
|
|
993
|
+
elapsedMs: roundMetric(elapsedMs),
|
|
994
|
+
firstOutboundMediaLatencyMs: roundMetric(firstOutboundMedia?.atMs),
|
|
995
|
+
firstTurnLatencyMs: roundMetric(commitEvent?.atMs),
|
|
996
|
+
markLatencyMs: roundMetric(markEvent?.atMs),
|
|
997
|
+
outboundMediaCount: sortedTimeline.filter((entry) => entry.source === "twilio" && entry.event === "media").length,
|
|
998
|
+
pass: errors.length === 0,
|
|
999
|
+
turnCount: committedTurns.length
|
|
1000
|
+
},
|
|
1001
|
+
title: options.title ?? "Voice Call Review",
|
|
1002
|
+
timeline: sortedTimeline,
|
|
1003
|
+
transcript: {
|
|
1004
|
+
actual: committedTurns.join(" ").trim()
|
|
1005
|
+
}
|
|
1006
|
+
};
|
|
1007
|
+
},
|
|
1008
|
+
recordError: (error) => {
|
|
1009
|
+
const message = toErrorMessage(error);
|
|
1010
|
+
errors.push(message);
|
|
1011
|
+
push("turn", "error", {
|
|
1012
|
+
reason: message
|
|
1013
|
+
});
|
|
1014
|
+
},
|
|
1015
|
+
recordTwilioInbound: (input) => {
|
|
1016
|
+
push("twilio", input.event, {
|
|
1017
|
+
bytes: input.bytes,
|
|
1018
|
+
chunkDurationMs: input.chunkDurationMs,
|
|
1019
|
+
name: input.name,
|
|
1020
|
+
reason: input.reason,
|
|
1021
|
+
text: input.text,
|
|
1022
|
+
track: input.track
|
|
1023
|
+
});
|
|
1024
|
+
},
|
|
1025
|
+
recordTwilioOutbound: (input) => {
|
|
1026
|
+
push("twilio", input.event, {
|
|
1027
|
+
bytes: input.bytes,
|
|
1028
|
+
chunkDurationMs: input.chunkDurationMs,
|
|
1029
|
+
name: input.name,
|
|
1030
|
+
reason: input.reason,
|
|
1031
|
+
text: input.text,
|
|
1032
|
+
track: input.track
|
|
1033
|
+
});
|
|
1034
|
+
},
|
|
1035
|
+
recordVoiceMessage: (message) => {
|
|
1036
|
+
switch (message.type) {
|
|
1037
|
+
case "partial":
|
|
1038
|
+
case "final":
|
|
1039
|
+
push("stt", message.type, {
|
|
1040
|
+
confidence: message.transcript.confidence,
|
|
1041
|
+
text: message.transcript.text
|
|
1042
|
+
});
|
|
1043
|
+
return;
|
|
1044
|
+
case "assistant":
|
|
1045
|
+
push("turn", "assistant", {
|
|
1046
|
+
text: message.text
|
|
1047
|
+
});
|
|
1048
|
+
return;
|
|
1049
|
+
case "audio":
|
|
1050
|
+
push("benchmark", timeline.some((entry) => entry.event === "tts-first-audio") ? "tts-audio" : "tts-first-audio", {
|
|
1051
|
+
bytes: Math.floor(message.chunkBase64.length * 3 / 4)
|
|
1052
|
+
});
|
|
1053
|
+
return;
|
|
1054
|
+
case "turn":
|
|
1055
|
+
if (committedTurnIds.has(message.turn.id)) {
|
|
1056
|
+
return;
|
|
1057
|
+
}
|
|
1058
|
+
committedTurnIds.add(message.turn.id);
|
|
1059
|
+
committedTurns.push(message.turn.text);
|
|
1060
|
+
push("turn", "commit", {
|
|
1061
|
+
confidence: message.turn.quality?.averageConfidence,
|
|
1062
|
+
text: message.turn.text
|
|
1063
|
+
});
|
|
1064
|
+
return;
|
|
1065
|
+
case "error":
|
|
1066
|
+
errors.push(message.message);
|
|
1067
|
+
push("turn", "error", {
|
|
1068
|
+
reason: message.message
|
|
1069
|
+
});
|
|
1070
|
+
return;
|
|
1071
|
+
case "complete":
|
|
1072
|
+
push("turn", "complete", {
|
|
1073
|
+
text: message.sessionId
|
|
1074
|
+
});
|
|
1075
|
+
return;
|
|
1076
|
+
case "session":
|
|
1077
|
+
push("turn", "session", {
|
|
1078
|
+
reason: message.status,
|
|
1079
|
+
text: message.sessionId
|
|
1080
|
+
});
|
|
1081
|
+
return;
|
|
1082
|
+
case "pong":
|
|
1083
|
+
push("benchmark", "pong");
|
|
1084
|
+
return;
|
|
1085
|
+
}
|
|
1086
|
+
}
|
|
1087
|
+
};
|
|
1088
|
+
};
|
|
1089
|
+
var renderConfigSection = (config) => {
|
|
1090
|
+
if (!config) {
|
|
1091
|
+
return "";
|
|
1092
|
+
}
|
|
1093
|
+
return [
|
|
1094
|
+
"## Config",
|
|
1095
|
+
"",
|
|
1096
|
+
"```json",
|
|
1097
|
+
JSON.stringify(config, null, 2),
|
|
1098
|
+
"```"
|
|
1099
|
+
].join(`
|
|
1100
|
+
`);
|
|
1101
|
+
};
|
|
1102
|
+
var renderTimeline = (timeline) => {
|
|
1103
|
+
const focusedTimeline = timeline.filter((entry) => !isLowSignalTimelineEvent(entry));
|
|
1104
|
+
if (focusedTimeline.length === 0) {
|
|
1105
|
+
return `## Timeline
|
|
1106
|
+
|
|
1107
|
+
_No timeline events captured._`;
|
|
1108
|
+
}
|
|
1109
|
+
const lines = compactTimeline(focusedTimeline);
|
|
1110
|
+
return ["## Timeline", "", ...lines].join(`
|
|
1111
|
+
`);
|
|
1112
|
+
};
|
|
1113
|
+
var renderTransportSummary = (timeline) => {
|
|
1114
|
+
const summaries = summarizeTimelineTraffic(timeline);
|
|
1115
|
+
if (summaries.length === 0) {
|
|
1116
|
+
return "";
|
|
1117
|
+
}
|
|
1118
|
+
return [
|
|
1119
|
+
"## Transport Summary",
|
|
1120
|
+
"",
|
|
1121
|
+
...summaries.map((summary) => {
|
|
1122
|
+
const parts = [`- ${summary.label}: ${summary.count}`];
|
|
1123
|
+
if (summary.bytes > 0) {
|
|
1124
|
+
parts.push(`${summary.bytes} bytes`);
|
|
1125
|
+
}
|
|
1126
|
+
if ((summary.audioMs ?? 0) > 0) {
|
|
1127
|
+
parts.push(`${roundMetric(summary.audioMs)}ms audio`);
|
|
1128
|
+
}
|
|
1129
|
+
return parts.join(", ");
|
|
1130
|
+
})
|
|
1131
|
+
].join(`
|
|
1132
|
+
`);
|
|
1133
|
+
};
|
|
1134
|
+
var renderLatencyBreakdown = (breakdown) => {
|
|
1135
|
+
if (breakdown.length === 0) {
|
|
1136
|
+
return "";
|
|
1137
|
+
}
|
|
1138
|
+
return [
|
|
1139
|
+
"## Latency Breakdown",
|
|
1140
|
+
"",
|
|
1141
|
+
...breakdown.map((entry) => `- ${entry.label}: ${roundMetric(entry.valueMs)}ms`)
|
|
1142
|
+
].join(`
|
|
1143
|
+
`);
|
|
1144
|
+
};
|
|
1145
|
+
var renderVoiceCallReviewMarkdown = (artifact) => {
|
|
1146
|
+
const summaryLines = [
|
|
1147
|
+
`- pass: ${artifact.summary.pass ? "yes" : "no"}`,
|
|
1148
|
+
formatMetric("first turn", artifact.summary.firstTurnLatencyMs),
|
|
1149
|
+
formatMetric("first outbound media", artifact.summary.firstOutboundMediaLatencyMs),
|
|
1150
|
+
formatMetric("mark", artifact.summary.markLatencyMs),
|
|
1151
|
+
formatMetric("clear", artifact.summary.clearLatencyMs),
|
|
1152
|
+
formatMetric("elapsed", artifact.summary.elapsedMs),
|
|
1153
|
+
typeof artifact.summary.wordErrorRate === "number" ? `- word error rate: ${artifact.summary.wordErrorRate}` : undefined,
|
|
1154
|
+
typeof artifact.summary.termRecall === "number" ? `- term recall: ${artifact.summary.termRecall}` : undefined,
|
|
1155
|
+
typeof artifact.summary.turnCount === "number" ? `- turn count: ${artifact.summary.turnCount}` : undefined,
|
|
1156
|
+
typeof artifact.summary.outboundMediaCount === "number" ? `- outbound media count: ${artifact.summary.outboundMediaCount}` : undefined
|
|
1157
|
+
].filter((value) => typeof value === "string");
|
|
1158
|
+
const notes = artifact.notes.length ? ["## Notes", "", ...artifact.notes.map((note) => `- ${note}`)].join(`
|
|
1159
|
+
`) : "";
|
|
1160
|
+
const errors = artifact.errors.length ? ["## Errors", "", ...artifact.errors.map((error) => `- ${error}`)].join(`
|
|
1161
|
+
`) : "";
|
|
1162
|
+
const latency = renderLatencyBreakdown(artifact.latencyBreakdown);
|
|
1163
|
+
const transportSummary = renderTransportSummary(artifact.timeline);
|
|
1164
|
+
return [
|
|
1165
|
+
`# ${artifact.title}`,
|
|
1166
|
+
"",
|
|
1167
|
+
artifact.path ? `Source: \`${artifact.path}\`` : undefined,
|
|
1168
|
+
artifact.fixtureId ? `Fixture: \`${artifact.fixtureId}\`` : undefined,
|
|
1169
|
+
"",
|
|
1170
|
+
"## Summary",
|
|
1171
|
+
"",
|
|
1172
|
+
...summaryLines,
|
|
1173
|
+
"",
|
|
1174
|
+
"## Transcript",
|
|
1175
|
+
"",
|
|
1176
|
+
`- expected: ${artifact.transcript.expected ?? "_n/a_"}`,
|
|
1177
|
+
`- actual: ${artifact.transcript.actual}`,
|
|
1178
|
+
"",
|
|
1179
|
+
notes,
|
|
1180
|
+
notes ? "" : undefined,
|
|
1181
|
+
latency,
|
|
1182
|
+
latency ? "" : undefined,
|
|
1183
|
+
transportSummary,
|
|
1184
|
+
transportSummary ? "" : undefined,
|
|
1185
|
+
errors,
|
|
1186
|
+
errors ? "" : undefined,
|
|
1187
|
+
renderConfigSection(artifact.config),
|
|
1188
|
+
renderConfigSection(artifact.config) ? "" : undefined,
|
|
1189
|
+
renderTimeline(artifact.timeline)
|
|
1190
|
+
].filter((value) => typeof value === "string").join(`
|
|
1191
|
+
`);
|
|
1192
|
+
};
|
|
1193
|
+
var escapeHtml2 = (value) => value.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll('"', """).replaceAll("'", "'");
|
|
1194
|
+
var renderVoiceCallReviewHTML = (artifact) => {
|
|
1195
|
+
const notes = artifact.notes.map((note) => `<li>${escapeHtml2(note)}</li>`).join("");
|
|
1196
|
+
const latency = artifact.latencyBreakdown.map((entry) => `<li><strong>${escapeHtml2(entry.label)}:</strong> ${roundMetric(entry.valueMs)}ms</li>`).join("");
|
|
1197
|
+
const transport = summarizeTimelineTraffic(artifact.timeline).map((summary) => {
|
|
1198
|
+
const parts = [`${summary.count}`, "events"];
|
|
1199
|
+
if (summary.bytes > 0) {
|
|
1200
|
+
parts.push(`${summary.bytes} bytes`);
|
|
1201
|
+
}
|
|
1202
|
+
if ((summary.audioMs ?? 0) > 0) {
|
|
1203
|
+
parts.push(`${roundMetric(summary.audioMs)}ms audio`);
|
|
1204
|
+
}
|
|
1205
|
+
return `<li><strong>${escapeHtml2(summary.label)}:</strong> ${escapeHtml2(parts.join(", "))}</li>`;
|
|
1206
|
+
}).join("");
|
|
1207
|
+
const timeline = compactTimeline(artifact.timeline.filter((entry) => !isLowSignalTimelineEvent(entry))).map((line) => `<li>${escapeHtml2(line.replace(/^- /u, ""))}</li>`).join("");
|
|
1208
|
+
return `<!doctype html>
|
|
1209
|
+
<html lang="en">
|
|
1210
|
+
<head>
|
|
1211
|
+
<meta charset="utf-8" />
|
|
1212
|
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
|
1213
|
+
<title>${escapeHtml2(artifact.title)}</title>
|
|
1214
|
+
<style>
|
|
1215
|
+
:root { color-scheme: dark; }
|
|
1216
|
+
body { font-family: ui-sans-serif, system-ui, sans-serif; margin: 0; padding: 24px; background: #0b0d10; color: #f4f4f5; }
|
|
1217
|
+
main { max-width: 980px; margin: 0 auto; display: grid; gap: 16px; }
|
|
1218
|
+
section { background: #13161b; border: 1px solid #232833; border-radius: 16px; padding: 18px; }
|
|
1219
|
+
h1, h2 { margin: 0 0 12px; }
|
|
1220
|
+
ul { margin: 0; padding-left: 20px; display: grid; gap: 8px; }
|
|
1221
|
+
code, pre { font-family: ui-monospace, SFMono-Regular, monospace; }
|
|
1222
|
+
pre { white-space: pre-wrap; overflow-wrap: anywhere; background: #0f1217; border-radius: 12px; padding: 14px; border: 1px solid #232833; }
|
|
1223
|
+
.grid { display: grid; gap: 16px; grid-template-columns: repeat(auto-fit, minmax(260px, 1fr)); }
|
|
1224
|
+
.metric { display: grid; gap: 4px; }
|
|
1225
|
+
.label { color: #a1a1aa; font-size: 0.82rem; text-transform: uppercase; letter-spacing: 0.08em; }
|
|
1226
|
+
.value { font-size: 1.05rem; }
|
|
1227
|
+
</style>
|
|
1228
|
+
</head>
|
|
1229
|
+
<body>
|
|
1230
|
+
<main>
|
|
1231
|
+
<section>
|
|
1232
|
+
<h1>${escapeHtml2(artifact.title)}</h1>
|
|
1233
|
+
<div class="grid">
|
|
1234
|
+
<div class="metric"><div class="label">Pass</div><div class="value">${artifact.summary.pass ? "yes" : "no"}</div></div>
|
|
1235
|
+
<div class="metric"><div class="label">First Turn</div><div class="value">${artifact.summary.firstTurnLatencyMs ?? "n/a"}ms</div></div>
|
|
1236
|
+
<div class="metric"><div class="label">First Outbound Media</div><div class="value">${artifact.summary.firstOutboundMediaLatencyMs ?? "n/a"}ms</div></div>
|
|
1237
|
+
<div class="metric"><div class="label">Turn Count</div><div class="value">${artifact.summary.turnCount ?? "n/a"}</div></div>
|
|
1238
|
+
</div>
|
|
1239
|
+
</section>
|
|
1240
|
+
<section>
|
|
1241
|
+
<h2>Transcript</h2>
|
|
1242
|
+
<ul>
|
|
1243
|
+
<li><strong>Expected:</strong> ${escapeHtml2(artifact.transcript.expected ?? "n/a")}</li>
|
|
1244
|
+
<li><strong>Actual:</strong> ${escapeHtml2(artifact.transcript.actual || "n/a")}</li>
|
|
1245
|
+
</ul>
|
|
1246
|
+
</section>
|
|
1247
|
+
<section>
|
|
1248
|
+
<h2>Notes</h2>
|
|
1249
|
+
<ul>${notes || "<li>No notes.</li>"}</ul>
|
|
1250
|
+
</section>
|
|
1251
|
+
<section>
|
|
1252
|
+
<h2>Latency Breakdown</h2>
|
|
1253
|
+
<ul>${latency || "<li>No latency data.</li>"}</ul>
|
|
1254
|
+
</section>
|
|
1255
|
+
<section>
|
|
1256
|
+
<h2>Transport Summary</h2>
|
|
1257
|
+
<ul>${transport || "<li>No transport data.</li>"}</ul>
|
|
1258
|
+
</section>
|
|
1259
|
+
<section>
|
|
1260
|
+
<h2>Timeline</h2>
|
|
1261
|
+
<ul>${timeline || "<li>No timeline events.</li>"}</ul>
|
|
1262
|
+
</section>
|
|
1263
|
+
<section>
|
|
1264
|
+
<h2>Config</h2>
|
|
1265
|
+
<pre>${escapeHtml2(JSON.stringify(artifact.config ?? {}, null, 2))}</pre>
|
|
1266
|
+
</section>
|
|
1267
|
+
</main>
|
|
1268
|
+
</body>
|
|
1269
|
+
</html>`;
|
|
1270
|
+
};
|
|
1271
|
+
|
|
1272
|
+
// src/runtimeOps.ts
|
|
1273
|
+
var defaultReviewTitle = (session) => session.scenarioId ? `Voice call review: ${session.scenarioId}` : `Voice call review: ${session.id}`;
|
|
1274
|
+
var buildDefaultPostCallSummary = (input) => {
|
|
1275
|
+
switch (input.disposition) {
|
|
1276
|
+
case "transferred":
|
|
1277
|
+
return {
|
|
1278
|
+
label: "Transferred",
|
|
1279
|
+
recommendedAction: input.target ? `Confirm the handoff to ${input.target} completed successfully.` : "Confirm the transfer completed successfully.",
|
|
1280
|
+
reason: input.reason,
|
|
1281
|
+
summary: input.target ? `The call was transferred to ${input.target}.` : "The call was transferred.",
|
|
1282
|
+
target: input.target
|
|
1283
|
+
};
|
|
1284
|
+
case "escalated":
|
|
1285
|
+
return {
|
|
1286
|
+
label: "Escalated",
|
|
1287
|
+
recommendedAction: "Review the escalated call and route it to a human operator.",
|
|
1288
|
+
reason: input.reason,
|
|
1289
|
+
summary: input.reason ? `The call escalated because ${input.reason}.` : "The call escalated for operator review."
|
|
1290
|
+
};
|
|
1291
|
+
case "voicemail":
|
|
1292
|
+
return {
|
|
1293
|
+
label: "Voicemail",
|
|
1294
|
+
recommendedAction: "Queue a callback follow-up for this caller.",
|
|
1295
|
+
reason: input.reason,
|
|
1296
|
+
summary: "The call reached voicemail and needs a callback."
|
|
1297
|
+
};
|
|
1298
|
+
case "no-answer":
|
|
1299
|
+
return {
|
|
1300
|
+
label: "No Answer",
|
|
1301
|
+
recommendedAction: "Retry the call or create a callback task.",
|
|
1302
|
+
reason: input.reason,
|
|
1303
|
+
summary: "The call did not reach a live respondent."
|
|
1304
|
+
};
|
|
1305
|
+
case "failed":
|
|
1306
|
+
return {
|
|
1307
|
+
label: "Failed",
|
|
1308
|
+
recommendedAction: "Inspect the call review before retrying this flow.",
|
|
1309
|
+
reason: input.reason,
|
|
1310
|
+
summary: input.reason ? `The call failed because ${input.reason}.` : "The call failed before a successful completion."
|
|
1311
|
+
};
|
|
1312
|
+
case "closed":
|
|
1313
|
+
return {
|
|
1314
|
+
label: "Closed",
|
|
1315
|
+
recommendedAction: "Inspect the review if this early closure was unexpected.",
|
|
1316
|
+
reason: input.reason,
|
|
1317
|
+
summary: "The call closed before an explicit completion."
|
|
1318
|
+
};
|
|
1319
|
+
case "completed":
|
|
1320
|
+
default:
|
|
1321
|
+
return {
|
|
1322
|
+
label: "Completed",
|
|
1323
|
+
recommendedAction: "No follow-up action is required.",
|
|
1324
|
+
reason: input.reason,
|
|
1325
|
+
summary: "The call completed successfully."
|
|
1326
|
+
};
|
|
1327
|
+
}
|
|
1328
|
+
};
|
|
1329
|
+
var createVoiceCallReviewFromSession = (input) => {
|
|
1330
|
+
const generatedAt = input.generatedAt ?? Date.now();
|
|
1331
|
+
const actual = input.session.turns.map((turn) => turn.text).join(" ").trim();
|
|
1332
|
+
const elapsedMs = (input.session.lastActivityAt ?? generatedAt) - input.session.createdAt;
|
|
1333
|
+
return {
|
|
1334
|
+
errors: input.disposition === "failed" && input.reason ? [input.reason] : [],
|
|
1335
|
+
generatedAt,
|
|
1336
|
+
latencyBreakdown: typeof elapsedMs === "number" && elapsedMs >= 0 ? [
|
|
1337
|
+
{
|
|
1338
|
+
label: "Session elapsed",
|
|
1339
|
+
valueMs: elapsedMs
|
|
1340
|
+
}
|
|
1341
|
+
] : [],
|
|
1342
|
+
notes: [],
|
|
1343
|
+
postCall: buildDefaultPostCallSummary({
|
|
1344
|
+
disposition: input.disposition,
|
|
1345
|
+
reason: input.reason,
|
|
1346
|
+
target: input.target
|
|
1347
|
+
}),
|
|
1348
|
+
summary: {
|
|
1349
|
+
elapsedMs: elapsedMs >= 0 ? elapsedMs : undefined,
|
|
1350
|
+
outcome: input.disposition,
|
|
1351
|
+
pass: input.disposition !== "failed",
|
|
1352
|
+
turnCount: input.session.turns.length
|
|
1353
|
+
},
|
|
1354
|
+
title: defaultReviewTitle(input.session),
|
|
1355
|
+
timeline: input.session.call?.events.map((event) => ({
|
|
1356
|
+
atMs: Math.max(0, event.at - input.session.createdAt),
|
|
1357
|
+
event: `call-${event.type}`,
|
|
1358
|
+
reason: event.reason,
|
|
1359
|
+
source: "turn",
|
|
1360
|
+
text: event.target ?? event.disposition,
|
|
1361
|
+
track: event.target
|
|
1362
|
+
})) ?? [],
|
|
1363
|
+
transcript: {
|
|
1364
|
+
actual
|
|
1365
|
+
}
|
|
1366
|
+
};
|
|
1367
|
+
};
|
|
1368
|
+
var asStoredReview = (sessionId, review) => {
|
|
1369
|
+
if (typeof review.id === "string" && review.id.length > 0) {
|
|
1370
|
+
return review;
|
|
1371
|
+
}
|
|
1372
|
+
return withVoiceCallReviewId(`${sessionId}:review`, review);
|
|
1373
|
+
};
|
|
1374
|
+
var asStoredTask = (review, task) => {
|
|
1375
|
+
if ("id" in task && typeof task.id === "string" && task.id.length > 0) {
|
|
1376
|
+
return task;
|
|
1377
|
+
}
|
|
1378
|
+
return withVoiceOpsTaskId(`${review.id}:ops`, task);
|
|
1379
|
+
};
|
|
1380
|
+
var emitRuntimeEvent = async (input) => {
|
|
1381
|
+
await input.config.events?.set(input.event.id, input.event);
|
|
1382
|
+
await input.config.onEvent?.({
|
|
1383
|
+
api: input.api,
|
|
1384
|
+
context: input.context,
|
|
1385
|
+
event: input.event,
|
|
1386
|
+
session: input.session
|
|
1387
|
+
});
|
|
1388
|
+
};
|
|
1389
|
+
var recordVoiceRuntimeOps = async (input) => {
|
|
1390
|
+
if (!input.config) {
|
|
1391
|
+
return;
|
|
1392
|
+
}
|
|
1393
|
+
const result = input.session.turns.at(-1)?.result;
|
|
1394
|
+
const reviewCandidate = await input.config.buildReview?.({
|
|
1395
|
+
api: input.api,
|
|
1396
|
+
context: input.context,
|
|
1397
|
+
disposition: input.disposition,
|
|
1398
|
+
metadata: input.metadata,
|
|
1399
|
+
reason: input.reason,
|
|
1400
|
+
result,
|
|
1401
|
+
session: input.session,
|
|
1402
|
+
target: input.target
|
|
1403
|
+
}) ?? createVoiceCallReviewFromSession({
|
|
1404
|
+
disposition: input.disposition,
|
|
1405
|
+
reason: input.reason,
|
|
1406
|
+
session: input.session,
|
|
1407
|
+
target: input.target
|
|
1408
|
+
});
|
|
1409
|
+
const review = reviewCandidate ? asStoredReview(input.session.id, reviewCandidate) : undefined;
|
|
1410
|
+
if (review) {
|
|
1411
|
+
await input.config.reviews?.set(review.id, review);
|
|
1412
|
+
await emitRuntimeEvent({
|
|
1413
|
+
api: input.api,
|
|
1414
|
+
config: input.config,
|
|
1415
|
+
context: input.context,
|
|
1416
|
+
event: createVoiceReviewSavedEvent(review),
|
|
1417
|
+
session: input.session
|
|
1418
|
+
});
|
|
1419
|
+
}
|
|
1420
|
+
let task;
|
|
1421
|
+
if (review) {
|
|
1422
|
+
const taskCandidate = await input.config.createTaskFromReview?.({
|
|
1423
|
+
api: input.api,
|
|
1424
|
+
context: input.context,
|
|
1425
|
+
disposition: input.disposition,
|
|
1426
|
+
review,
|
|
1427
|
+
session: input.session
|
|
1428
|
+
}) ?? buildVoiceOpsTaskFromReview(review) ?? undefined;
|
|
1429
|
+
if (taskCandidate) {
|
|
1430
|
+
task = asStoredTask(review, taskCandidate);
|
|
1431
|
+
await input.config.tasks?.set(task.id, task);
|
|
1432
|
+
await emitRuntimeEvent({
|
|
1433
|
+
api: input.api,
|
|
1434
|
+
config: input.config,
|
|
1435
|
+
context: input.context,
|
|
1436
|
+
event: createVoiceTaskCreatedEvent(task),
|
|
1437
|
+
session: input.session
|
|
1438
|
+
});
|
|
1439
|
+
}
|
|
1440
|
+
}
|
|
1441
|
+
await emitRuntimeEvent({
|
|
1442
|
+
api: input.api,
|
|
1443
|
+
config: input.config,
|
|
1444
|
+
context: input.context,
|
|
1445
|
+
event: createVoiceCallCompletedEvent({
|
|
1446
|
+
disposition: input.disposition,
|
|
1447
|
+
session: input.session
|
|
1448
|
+
}),
|
|
1449
|
+
session: input.session
|
|
1450
|
+
});
|
|
1451
|
+
return {
|
|
1452
|
+
review,
|
|
1453
|
+
task
|
|
1454
|
+
};
|
|
1455
|
+
};
|
|
1456
|
+
|
|
478
1457
|
// src/store.ts
|
|
479
1458
|
var createId = () => crypto.randomUUID();
|
|
480
1459
|
var createVoiceSessionRecord = (id, scenarioId) => ({
|
|
@@ -515,6 +1494,9 @@ var toVoiceSessionSummary = (session) => ({
|
|
|
515
1494
|
turnCount: session.turns.length
|
|
516
1495
|
});
|
|
517
1496
|
|
|
1497
|
+
// src/session.ts
|
|
1498
|
+
import { Buffer } from "buffer";
|
|
1499
|
+
|
|
518
1500
|
// src/turnDetection.ts
|
|
519
1501
|
var DEFAULT_SILENCE_MS = 700;
|
|
520
1502
|
var DEFAULT_SPEECH_THRESHOLD = 0.015;
|
|
@@ -560,6 +1542,9 @@ var selectPreferredTranscriptText = (currentText, nextText) => {
|
|
|
560
1542
|
if (countWords(next) > countWords(current)) {
|
|
561
1543
|
return next;
|
|
562
1544
|
}
|
|
1545
|
+
if (countWords(next) === countWords(current) && next.length > current.length) {
|
|
1546
|
+
return next;
|
|
1547
|
+
}
|
|
563
1548
|
return current;
|
|
564
1549
|
};
|
|
565
1550
|
var mergeSequentialTranscriptText = (currentText, nextText) => {
|
|
@@ -642,6 +1627,8 @@ var DEFAULT_FALLBACK_MAX_ATTEMPTS_PER_TURN = 1;
|
|
|
642
1627
|
var DEFAULT_DUPLICATE_TURN_WINDOW_MS = 5000;
|
|
643
1628
|
var FALLBACK_CONFIDENCE_SELECTION_DELTA = 0.05;
|
|
644
1629
|
var FALLBACK_WORD_COUNT_SELECTION_MARGIN_RATIO = 0.12;
|
|
1630
|
+
var EXTENDED_VENDOR_COMMIT_SILENCE_THRESHOLD_MS = 200;
|
|
1631
|
+
var MAX_VENDOR_COMMIT_GRACE_MS = 1200;
|
|
645
1632
|
var DEFAULT_FORMAT = {
|
|
646
1633
|
channels: 1,
|
|
647
1634
|
container: "raw",
|
|
@@ -660,6 +1647,7 @@ var createEmptyCurrentTurn = () => ({
|
|
|
660
1647
|
transcripts: []
|
|
661
1648
|
});
|
|
662
1649
|
var cloneTranscript = (transcript) => ({ ...transcript });
|
|
1650
|
+
var encodeBase64 = (chunk) => Buffer.from(chunk).toString("base64");
|
|
663
1651
|
var countWords2 = (text) => text.trim().split(/\s+/).filter(Boolean).length;
|
|
664
1652
|
var normalizeText2 = (text) => text.trim().replace(/\s+/g, " ");
|
|
665
1653
|
var getAudioChunkDurationMs = (chunk) => chunk.byteLength / (DEFAULT_FORMAT.sampleRateHz * DEFAULT_FORMAT.channels * 2) * 1000;
|
|
@@ -678,13 +1666,14 @@ var calculateMeanConfidence = (transcripts) => {
|
|
|
678
1666
|
}
|
|
679
1667
|
return sum / total;
|
|
680
1668
|
};
|
|
681
|
-
var createTurnQuality = (transcripts, source, fallbackUsed, fallbackDiagnostics, correctionDiagnostics) => {
|
|
1669
|
+
var createTurnQuality = (transcripts, source, fallbackUsed, fallbackDiagnostics, correctionDiagnostics, costEstimate) => {
|
|
682
1670
|
const sampledTranscripts = transcripts.filter((transcript) => typeof transcript.confidence === "number");
|
|
683
1671
|
const confidenceSampleCount = sampledTranscripts.length;
|
|
684
1672
|
return {
|
|
685
1673
|
averageConfidence: confidenceSampleCount > 0 ? sampledTranscripts.reduce((sum, transcript) => sum + transcript.confidence, 0) / confidenceSampleCount : undefined,
|
|
686
1674
|
confidenceSampleCount,
|
|
687
1675
|
correction: correctionDiagnostics,
|
|
1676
|
+
cost: costEstimate,
|
|
688
1677
|
fallback: fallbackDiagnostics,
|
|
689
1678
|
fallbackUsed,
|
|
690
1679
|
finalTranscriptCount: transcripts.filter((transcript) => transcript.isFinal).length,
|
|
@@ -693,6 +1682,19 @@ var createTurnQuality = (transcripts, source, fallbackUsed, fallbackDiagnostics,
|
|
|
693
1682
|
source
|
|
694
1683
|
};
|
|
695
1684
|
};
|
|
1685
|
+
var createTurnCostEstimate = (input) => {
|
|
1686
|
+
const primaryMinutes = Math.max(0, input.primaryAudioMs) / 60000;
|
|
1687
|
+
const fallbackMinutes = Math.max(0, input.fallbackReplayAudioMs) / 60000;
|
|
1688
|
+
const primaryCostUnit = input.primaryPassCostUnit ?? 1;
|
|
1689
|
+
const fallbackCostUnit = input.fallbackPassCostUnit ?? primaryCostUnit;
|
|
1690
|
+
return {
|
|
1691
|
+
estimatedRelativeCostUnits: primaryMinutes * primaryCostUnit + fallbackMinutes * fallbackCostUnit,
|
|
1692
|
+
fallbackAttemptCount: input.fallbackAttemptCount,
|
|
1693
|
+
fallbackReplayAudioMs: Math.max(0, input.fallbackReplayAudioMs),
|
|
1694
|
+
primaryAudioMs: Math.max(0, input.primaryAudioMs),
|
|
1695
|
+
totalBillableAudioMs: Math.max(0, input.primaryAudioMs) + Math.max(0, input.fallbackReplayAudioMs)
|
|
1696
|
+
};
|
|
1697
|
+
};
|
|
696
1698
|
var normalizeCorrectionText = (text) => normalizeText2(text);
|
|
697
1699
|
var isFallbackNeeded = (candidate, config) => {
|
|
698
1700
|
const trimmed = normalizeText2(candidate.text);
|
|
@@ -761,6 +1763,36 @@ var setTurnResult = (session, turnId, input) => {
|
|
|
761
1763
|
result: input.result ?? turn.result
|
|
762
1764
|
} : turn);
|
|
763
1765
|
};
|
|
1766
|
+
var ensureCallLifecycleState = (session) => {
|
|
1767
|
+
const startedAt = session.createdAt;
|
|
1768
|
+
session.call ??= {
|
|
1769
|
+
events: [],
|
|
1770
|
+
lastEventAt: startedAt,
|
|
1771
|
+
startedAt
|
|
1772
|
+
};
|
|
1773
|
+
return session.call;
|
|
1774
|
+
};
|
|
1775
|
+
var pushCallLifecycleEvent = (session, input) => {
|
|
1776
|
+
const lifecycle = ensureCallLifecycleState(session);
|
|
1777
|
+
const at = Date.now();
|
|
1778
|
+
lifecycle.events = [
|
|
1779
|
+
...lifecycle.events,
|
|
1780
|
+
{
|
|
1781
|
+
at,
|
|
1782
|
+
disposition: input.disposition,
|
|
1783
|
+
metadata: input.metadata,
|
|
1784
|
+
reason: input.reason,
|
|
1785
|
+
target: input.target,
|
|
1786
|
+
type: input.type
|
|
1787
|
+
}
|
|
1788
|
+
];
|
|
1789
|
+
lifecycle.lastEventAt = at;
|
|
1790
|
+
if (input.type === "end") {
|
|
1791
|
+
lifecycle.disposition = input.disposition;
|
|
1792
|
+
lifecycle.endedAt = at;
|
|
1793
|
+
}
|
|
1794
|
+
return lifecycle;
|
|
1795
|
+
};
|
|
764
1796
|
var createVoiceSession = (options) => {
|
|
765
1797
|
const logger = resolveLogger(options.logger);
|
|
766
1798
|
const reconnect = {
|
|
@@ -784,15 +1816,21 @@ var createVoiceSession = (options) => {
|
|
|
784
1816
|
trigger: options.sttFallback.trigger ?? "empty-or-low-confidence"
|
|
785
1817
|
} : undefined;
|
|
786
1818
|
const phraseHints = options.phraseHints ?? [];
|
|
1819
|
+
const lexicon = options.lexicon ?? [];
|
|
787
1820
|
let socket = options.socket;
|
|
788
1821
|
let sttSession = null;
|
|
1822
|
+
let ttsSession = null;
|
|
1823
|
+
let ttsSessionPromise = null;
|
|
789
1824
|
let silenceTimer = null;
|
|
1825
|
+
let pendingCommitReason = null;
|
|
790
1826
|
let speechDetected = false;
|
|
791
1827
|
let operationQueue = Promise.resolve();
|
|
792
1828
|
let adapterGenerationCounter = 0;
|
|
793
1829
|
let activeAdapterGeneration = 0;
|
|
1830
|
+
let activeTTSTurnId;
|
|
794
1831
|
const currentTurnAudio = [];
|
|
795
1832
|
let fallbackAttemptsForCurrentTurn = 0;
|
|
1833
|
+
let fallbackReplayAudioMsForCurrentTurn = 0;
|
|
796
1834
|
const pruneTurnAudio = () => {
|
|
797
1835
|
const replayWindowMs = sttFallback?.replayWindowMs ?? DEFAULT_FALLBACK_REPLAY_MS;
|
|
798
1836
|
const cutoffAt = Date.now() - replayWindowMs;
|
|
@@ -825,6 +1863,13 @@ var createVoiceSession = (options) => {
|
|
|
825
1863
|
}
|
|
826
1864
|
clearTimeout(silenceTimer);
|
|
827
1865
|
silenceTimer = null;
|
|
1866
|
+
pendingCommitReason = null;
|
|
1867
|
+
};
|
|
1868
|
+
const getVendorCommitDelayMs = () => {
|
|
1869
|
+
if (turnDetection.silenceMs < EXTENDED_VENDOR_COMMIT_SILENCE_THRESHOLD_MS || turnDetection.transcriptStabilityMs < EXTENDED_VENDOR_COMMIT_SILENCE_THRESHOLD_MS) {
|
|
1870
|
+
return turnDetection.transcriptStabilityMs;
|
|
1871
|
+
}
|
|
1872
|
+
return Math.max(turnDetection.transcriptStabilityMs, Math.min(MAX_VENDOR_COMMIT_GRACE_MS, turnDetection.silenceMs * 2));
|
|
828
1873
|
};
|
|
829
1874
|
const send = async (message) => {
|
|
830
1875
|
try {
|
|
@@ -875,6 +1920,24 @@ var createVoiceSession = (options) => {
|
|
|
875
1920
|
});
|
|
876
1921
|
}
|
|
877
1922
|
};
|
|
1923
|
+
const closeTTSSession = async (reason) => {
|
|
1924
|
+
const activeSession = ttsSession;
|
|
1925
|
+
ttsSession = null;
|
|
1926
|
+
ttsSessionPromise = null;
|
|
1927
|
+
activeTTSTurnId = undefined;
|
|
1928
|
+
if (!activeSession) {
|
|
1929
|
+
return;
|
|
1930
|
+
}
|
|
1931
|
+
try {
|
|
1932
|
+
await activeSession.close(reason);
|
|
1933
|
+
} catch (error) {
|
|
1934
|
+
logger.warn("voice tts adapter close failed", {
|
|
1935
|
+
error: toError(error).message,
|
|
1936
|
+
reason,
|
|
1937
|
+
sessionId: options.id
|
|
1938
|
+
});
|
|
1939
|
+
}
|
|
1940
|
+
};
|
|
878
1941
|
const scheduleTurnCommit = (delayMs, reason, reset = true) => {
|
|
879
1942
|
if (!reset && silenceTimer) {
|
|
880
1943
|
return;
|
|
@@ -882,8 +1945,10 @@ var createVoiceSession = (options) => {
|
|
|
882
1945
|
if (reset) {
|
|
883
1946
|
clearSilenceTimer();
|
|
884
1947
|
}
|
|
1948
|
+
pendingCommitReason = reason;
|
|
885
1949
|
silenceTimer = setTimeout(() => {
|
|
886
1950
|
silenceTimer = null;
|
|
1951
|
+
pendingCommitReason = null;
|
|
887
1952
|
api.commitTurn(reason);
|
|
888
1953
|
}, delayMs);
|
|
889
1954
|
};
|
|
@@ -898,6 +1963,10 @@ var createVoiceSession = (options) => {
|
|
|
898
1963
|
return;
|
|
899
1964
|
}
|
|
900
1965
|
const transcriptStabilityAge = session.currentTurn.lastTranscriptAt !== undefined ? Date.now() - session.currentTurn.lastTranscriptAt : undefined;
|
|
1966
|
+
if (reason === "vendor") {
|
|
1967
|
+
scheduleTurnCommit(getVendorCommitDelayMs(), reason);
|
|
1968
|
+
return;
|
|
1969
|
+
}
|
|
901
1970
|
if (reason !== "manual" && typeof transcriptStabilityAge === "number" && transcriptStabilityAge < turnDetection.transcriptStabilityMs) {
|
|
902
1971
|
scheduleTurnCommit(turnDetection.transcriptStabilityMs - transcriptStabilityAge, reason);
|
|
903
1972
|
return;
|
|
@@ -906,16 +1975,32 @@ var createVoiceSession = (options) => {
|
|
|
906
1975
|
};
|
|
907
1976
|
const failInternal = async (error) => {
|
|
908
1977
|
clearSilenceTimer();
|
|
1978
|
+
let didFail = false;
|
|
909
1979
|
const session = await writeSession((currentSession) => {
|
|
1980
|
+
if (currentSession.status === "failed") {
|
|
1981
|
+
return;
|
|
1982
|
+
}
|
|
1983
|
+
didFail = true;
|
|
910
1984
|
currentSession.lastActivityAt = Date.now();
|
|
911
1985
|
currentSession.status = "failed";
|
|
1986
|
+
if (!currentSession.call?.endedAt) {
|
|
1987
|
+
pushCallLifecycleEvent(currentSession, {
|
|
1988
|
+
disposition: "failed",
|
|
1989
|
+
reason: toError(error).message,
|
|
1990
|
+
type: "end"
|
|
1991
|
+
});
|
|
1992
|
+
}
|
|
912
1993
|
});
|
|
913
|
-
|
|
1994
|
+
if (!didFail) {
|
|
1995
|
+
return;
|
|
1996
|
+
}
|
|
1997
|
+
const resolvedError = toError(error);
|
|
914
1998
|
await send({
|
|
915
1999
|
message: resolvedError.message,
|
|
916
2000
|
recoverable: false,
|
|
917
2001
|
type: "error"
|
|
918
2002
|
});
|
|
2003
|
+
await closeTTSSession("failed");
|
|
919
2004
|
await closeAdapter("failed");
|
|
920
2005
|
speechDetected = false;
|
|
921
2006
|
rewindFallbackTurnAudio();
|
|
@@ -926,13 +2011,24 @@ var createVoiceSession = (options) => {
|
|
|
926
2011
|
session,
|
|
927
2012
|
sessionId: options.id
|
|
928
2013
|
});
|
|
2014
|
+
await options.route.onCallEnd?.({
|
|
2015
|
+
api,
|
|
2016
|
+
context: options.context,
|
|
2017
|
+
disposition: "failed",
|
|
2018
|
+
reason: resolvedError.message,
|
|
2019
|
+
session
|
|
2020
|
+
});
|
|
929
2021
|
};
|
|
930
|
-
const completeInternal = async (result) => {
|
|
2022
|
+
const completeInternal = async (result, input = {}) => {
|
|
931
2023
|
clearSilenceTimer();
|
|
2024
|
+
const disposition = input.disposition ?? "completed";
|
|
2025
|
+
const shouldInvokeOnComplete = input.invokeOnComplete ?? disposition === "completed";
|
|
2026
|
+
let didComplete = false;
|
|
932
2027
|
const session = await writeSession((currentSession) => {
|
|
933
|
-
if (currentSession.status === "completed") {
|
|
2028
|
+
if (currentSession.status === "completed" || currentSession.status === "failed") {
|
|
934
2029
|
return;
|
|
935
2030
|
}
|
|
2031
|
+
didComplete = true;
|
|
936
2032
|
currentSession.lastActivityAt = Date.now();
|
|
937
2033
|
currentSession.status = "completed";
|
|
938
2034
|
if (result !== undefined && currentSession.turns.length > 0) {
|
|
@@ -943,18 +2039,135 @@ var createVoiceSession = (options) => {
|
|
|
943
2039
|
});
|
|
944
2040
|
}
|
|
945
2041
|
}
|
|
2042
|
+
if (!currentSession.call?.endedAt) {
|
|
2043
|
+
pushCallLifecycleEvent(currentSession, {
|
|
2044
|
+
disposition,
|
|
2045
|
+
metadata: input.metadata,
|
|
2046
|
+
reason: input.reason,
|
|
2047
|
+
target: input.target,
|
|
2048
|
+
type: "end"
|
|
2049
|
+
});
|
|
2050
|
+
}
|
|
946
2051
|
});
|
|
2052
|
+
if (!didComplete) {
|
|
2053
|
+
return;
|
|
2054
|
+
}
|
|
947
2055
|
await send({
|
|
948
2056
|
sessionId: options.id,
|
|
949
2057
|
type: "complete"
|
|
950
2058
|
});
|
|
2059
|
+
await closeTTSSession("complete");
|
|
951
2060
|
await closeAdapter("complete");
|
|
952
2061
|
speechDetected = false;
|
|
953
2062
|
rewindFallbackTurnAudio();
|
|
954
|
-
|
|
2063
|
+
if (disposition === "transferred" && input.target) {
|
|
2064
|
+
await options.route.onTransfer?.({
|
|
2065
|
+
api,
|
|
2066
|
+
context: options.context,
|
|
2067
|
+
metadata: input.metadata,
|
|
2068
|
+
reason: input.reason,
|
|
2069
|
+
session,
|
|
2070
|
+
target: input.target
|
|
2071
|
+
});
|
|
2072
|
+
}
|
|
2073
|
+
if (disposition === "escalated" && input.reason) {
|
|
2074
|
+
await options.route.onEscalation?.({
|
|
2075
|
+
api,
|
|
2076
|
+
context: options.context,
|
|
2077
|
+
metadata: input.metadata,
|
|
2078
|
+
reason: input.reason,
|
|
2079
|
+
session
|
|
2080
|
+
});
|
|
2081
|
+
}
|
|
2082
|
+
if (disposition === "voicemail") {
|
|
2083
|
+
await options.route.onVoicemail?.({
|
|
2084
|
+
api,
|
|
2085
|
+
context: options.context,
|
|
2086
|
+
metadata: input.metadata,
|
|
2087
|
+
session
|
|
2088
|
+
});
|
|
2089
|
+
}
|
|
2090
|
+
if (disposition === "no-answer") {
|
|
2091
|
+
await options.route.onNoAnswer?.({
|
|
2092
|
+
api,
|
|
2093
|
+
context: options.context,
|
|
2094
|
+
metadata: input.metadata,
|
|
2095
|
+
session
|
|
2096
|
+
});
|
|
2097
|
+
}
|
|
2098
|
+
if (shouldInvokeOnComplete) {
|
|
2099
|
+
await options.route.onComplete({
|
|
2100
|
+
api,
|
|
2101
|
+
context: options.context,
|
|
2102
|
+
session
|
|
2103
|
+
});
|
|
2104
|
+
}
|
|
2105
|
+
await options.route.onCallEnd?.({
|
|
955
2106
|
api,
|
|
956
2107
|
context: options.context,
|
|
957
|
-
|
|
2108
|
+
disposition,
|
|
2109
|
+
metadata: input.metadata,
|
|
2110
|
+
reason: input.reason,
|
|
2111
|
+
session,
|
|
2112
|
+
target: input.target
|
|
2113
|
+
});
|
|
2114
|
+
};
|
|
2115
|
+
const transferInternal = async (input) => {
|
|
2116
|
+
await writeSession((currentSession) => {
|
|
2117
|
+
pushCallLifecycleEvent(currentSession, {
|
|
2118
|
+
metadata: input.metadata,
|
|
2119
|
+
reason: input.reason,
|
|
2120
|
+
target: input.target,
|
|
2121
|
+
type: "transfer"
|
|
2122
|
+
});
|
|
2123
|
+
});
|
|
2124
|
+
await completeInternal(input.result, {
|
|
2125
|
+
disposition: "transferred",
|
|
2126
|
+
invokeOnComplete: false,
|
|
2127
|
+
metadata: input.metadata,
|
|
2128
|
+
reason: input.reason,
|
|
2129
|
+
target: input.target
|
|
2130
|
+
});
|
|
2131
|
+
};
|
|
2132
|
+
const escalateInternal = async (input) => {
|
|
2133
|
+
await writeSession((currentSession) => {
|
|
2134
|
+
pushCallLifecycleEvent(currentSession, {
|
|
2135
|
+
metadata: input.metadata,
|
|
2136
|
+
reason: input.reason,
|
|
2137
|
+
type: "escalation"
|
|
2138
|
+
});
|
|
2139
|
+
});
|
|
2140
|
+
await completeInternal(input.result, {
|
|
2141
|
+
disposition: "escalated",
|
|
2142
|
+
invokeOnComplete: false,
|
|
2143
|
+
metadata: input.metadata,
|
|
2144
|
+
reason: input.reason
|
|
2145
|
+
});
|
|
2146
|
+
};
|
|
2147
|
+
const markNoAnswerInternal = async (input) => {
|
|
2148
|
+
await writeSession((currentSession) => {
|
|
2149
|
+
pushCallLifecycleEvent(currentSession, {
|
|
2150
|
+
metadata: input?.metadata,
|
|
2151
|
+
type: "no-answer"
|
|
2152
|
+
});
|
|
2153
|
+
});
|
|
2154
|
+
await completeInternal(input?.result, {
|
|
2155
|
+
disposition: "no-answer",
|
|
2156
|
+
invokeOnComplete: false,
|
|
2157
|
+
metadata: input?.metadata
|
|
2158
|
+
});
|
|
2159
|
+
};
|
|
2160
|
+
const markVoicemailInternal = async (input) => {
|
|
2161
|
+
await writeSession((currentSession) => {
|
|
2162
|
+
pushCallLifecycleEvent(currentSession, {
|
|
2163
|
+
metadata: input?.metadata,
|
|
2164
|
+
type: "voicemail"
|
|
2165
|
+
});
|
|
2166
|
+
});
|
|
2167
|
+
await completeInternal(input?.result, {
|
|
2168
|
+
disposition: "voicemail",
|
|
2169
|
+
invokeOnComplete: false,
|
|
2170
|
+
metadata: input?.metadata
|
|
958
2171
|
});
|
|
959
2172
|
};
|
|
960
2173
|
const handleError = async (event) => {
|
|
@@ -980,6 +2193,7 @@ var createVoiceSession = (options) => {
|
|
|
980
2193
|
};
|
|
981
2194
|
const rewindFallbackTurnAudio = () => {
|
|
982
2195
|
fallbackAttemptsForCurrentTurn = 0;
|
|
2196
|
+
fallbackReplayAudioMsForCurrentTurn = 0;
|
|
983
2197
|
currentTurnAudio.length = 0;
|
|
984
2198
|
};
|
|
985
2199
|
const runFallbackTranscription = async (primaryText, primaryTranscripts) => {
|
|
@@ -1007,6 +2221,8 @@ var createVoiceSession = (options) => {
|
|
|
1007
2221
|
try {
|
|
1008
2222
|
fallbackSession = await sttFallback.adapter.open({
|
|
1009
2223
|
format: DEFAULT_FORMAT,
|
|
2224
|
+
languageStrategy: options.languageStrategy,
|
|
2225
|
+
lexicon,
|
|
1010
2226
|
phraseHints,
|
|
1011
2227
|
sessionId: `${options.id}:fallback:${fallbackAttemptsForCurrentTurn}`
|
|
1012
2228
|
});
|
|
@@ -1060,6 +2276,7 @@ var createVoiceSession = (options) => {
|
|
|
1060
2276
|
await fallbackSession.send(chunk);
|
|
1061
2277
|
}
|
|
1062
2278
|
const replayDurationMs = getBufferedAudioDurationMs(replayAudio);
|
|
2279
|
+
fallbackReplayAudioMsForCurrentTurn += replayDurationMs;
|
|
1063
2280
|
const completionTimeoutMs = Math.max(sttFallback.completionTimeoutMs, Math.min(4000, Math.max(sttFallback.settleMs * 4, Math.round(replayDurationMs * 0.18))));
|
|
1064
2281
|
const waitStartedAt = Date.now();
|
|
1065
2282
|
while (Date.now() - waitStartedAt < completionTimeoutMs) {
|
|
@@ -1152,6 +2369,7 @@ var createVoiceSession = (options) => {
|
|
|
1152
2369
|
api,
|
|
1153
2370
|
context: options.context,
|
|
1154
2371
|
fallback: input.fallbackDiagnostics,
|
|
2372
|
+
lexicon,
|
|
1155
2373
|
phraseHints,
|
|
1156
2374
|
session: input.session,
|
|
1157
2375
|
text: originalText,
|
|
@@ -1220,46 +2438,52 @@ var createVoiceSession = (options) => {
|
|
|
1220
2438
|
};
|
|
1221
2439
|
};
|
|
1222
2440
|
const handlePartial = async (transcript) => {
|
|
1223
|
-
await writeSession((
|
|
1224
|
-
const nextPartialStartedAt = transcript.startedAtMs ??
|
|
1225
|
-
const nextPartialEndedAt = transcript.endedAtMs ??
|
|
1226
|
-
const preferredPartial = selectPreferredTranscriptText(
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
2441
|
+
const session = await writeSession((session2) => {
|
|
2442
|
+
const nextPartialStartedAt = transcript.startedAtMs ?? session2.currentTurn.partialStartedAt;
|
|
2443
|
+
const nextPartialEndedAt = transcript.endedAtMs ?? session2.currentTurn.partialEndedAt;
|
|
2444
|
+
const preferredPartial = selectPreferredTranscriptText(session2.currentTurn.partialText, transcript.text);
|
|
2445
|
+
session2.currentTurn.lastTranscriptAt = Date.now();
|
|
2446
|
+
session2.currentTurn.partialStartedAt = nextPartialStartedAt;
|
|
2447
|
+
session2.currentTurn.partialEndedAt = nextPartialEndedAt;
|
|
2448
|
+
session2.currentTurn.partialText = buildTurnText(session2.currentTurn.transcripts, preferredPartial, {
|
|
1231
2449
|
partialEndedAtMs: nextPartialEndedAt,
|
|
1232
2450
|
partialStartedAtMs: nextPartialStartedAt
|
|
1233
2451
|
});
|
|
1234
|
-
|
|
1235
|
-
|
|
2452
|
+
session2.lastActivityAt = Date.now();
|
|
2453
|
+
session2.status = "active";
|
|
1236
2454
|
});
|
|
2455
|
+
if (silenceTimer && pendingCommitReason === "vendor") {
|
|
2456
|
+
scheduleTurnCommit(getVendorCommitDelayMs(), "vendor");
|
|
2457
|
+
}
|
|
1237
2458
|
await send({
|
|
1238
2459
|
transcript,
|
|
1239
2460
|
type: "partial"
|
|
1240
2461
|
});
|
|
1241
2462
|
};
|
|
1242
2463
|
const handleFinal = async (transcript) => {
|
|
1243
|
-
await writeSession((
|
|
1244
|
-
const alreadyPresent =
|
|
2464
|
+
const session = await writeSession((session2) => {
|
|
2465
|
+
const alreadyPresent = session2.currentTurn.transcripts.some((existing) => existing.id === transcript.id);
|
|
1245
2466
|
if (!alreadyPresent) {
|
|
1246
|
-
|
|
1247
|
-
...
|
|
2467
|
+
session2.currentTurn.transcripts = [
|
|
2468
|
+
...session2.currentTurn.transcripts,
|
|
1248
2469
|
cloneTranscript(transcript)
|
|
1249
2470
|
];
|
|
1250
|
-
|
|
1251
|
-
...
|
|
2471
|
+
session2.transcripts = [
|
|
2472
|
+
...session2.transcripts,
|
|
1252
2473
|
cloneTranscript(transcript)
|
|
1253
2474
|
];
|
|
1254
2475
|
}
|
|
1255
|
-
|
|
1256
|
-
partialEndedAtMs:
|
|
1257
|
-
partialStartedAtMs:
|
|
2476
|
+
session2.currentTurn.finalText = buildTurnText(session2.currentTurn.transcripts, session2.currentTurn.partialText, {
|
|
2477
|
+
partialEndedAtMs: session2.currentTurn.partialEndedAt,
|
|
2478
|
+
partialStartedAtMs: session2.currentTurn.partialStartedAt
|
|
1258
2479
|
});
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
2480
|
+
session2.currentTurn.lastTranscriptAt = Date.now();
|
|
2481
|
+
session2.lastActivityAt = Date.now();
|
|
2482
|
+
session2.status = "active";
|
|
1262
2483
|
});
|
|
2484
|
+
if (silenceTimer && pendingCommitReason === "vendor") {
|
|
2485
|
+
scheduleTurnCommit(getVendorCommitDelayMs(), "vendor");
|
|
2486
|
+
}
|
|
1263
2487
|
await send({
|
|
1264
2488
|
transcript,
|
|
1265
2489
|
type: "final"
|
|
@@ -1286,6 +2510,8 @@ var createVoiceSession = (options) => {
|
|
|
1286
2510
|
}
|
|
1287
2511
|
const openedSession = await options.stt.open({
|
|
1288
2512
|
format: DEFAULT_FORMAT,
|
|
2513
|
+
languageStrategy: options.languageStrategy,
|
|
2514
|
+
lexicon,
|
|
1289
2515
|
phraseHints,
|
|
1290
2516
|
sessionId: options.id
|
|
1291
2517
|
});
|
|
@@ -1320,13 +2546,93 @@ var createVoiceSession = (options) => {
|
|
|
1320
2546
|
});
|
|
1321
2547
|
return openedSession;
|
|
1322
2548
|
};
|
|
2549
|
+
const ensureTTSSession = async () => {
|
|
2550
|
+
const ttsAdapter = options.tts;
|
|
2551
|
+
if (!ttsAdapter) {
|
|
2552
|
+
return null;
|
|
2553
|
+
}
|
|
2554
|
+
if (ttsSession) {
|
|
2555
|
+
return ttsSession;
|
|
2556
|
+
}
|
|
2557
|
+
if (ttsSessionPromise) {
|
|
2558
|
+
return ttsSessionPromise;
|
|
2559
|
+
}
|
|
2560
|
+
ttsSessionPromise = (async () => {
|
|
2561
|
+
const openedSession = await ttsAdapter.open({
|
|
2562
|
+
lexicon,
|
|
2563
|
+
sessionId: options.id
|
|
2564
|
+
});
|
|
2565
|
+
ttsSession = openedSession;
|
|
2566
|
+
openedSession.on("audio", ({ chunk, format, receivedAt }) => {
|
|
2567
|
+
runSerial("tts.audio", async () => {
|
|
2568
|
+
if (ttsSession !== openedSession) {
|
|
2569
|
+
return;
|
|
2570
|
+
}
|
|
2571
|
+
const normalizedChunk = chunk instanceof Uint8Array ? new Uint8Array(chunk) : chunk instanceof ArrayBuffer ? new Uint8Array(chunk.slice(0)) : new Uint8Array(chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength));
|
|
2572
|
+
await send({
|
|
2573
|
+
chunkBase64: encodeBase64(normalizedChunk),
|
|
2574
|
+
format,
|
|
2575
|
+
receivedAt,
|
|
2576
|
+
turnId: activeTTSTurnId,
|
|
2577
|
+
type: "audio"
|
|
2578
|
+
});
|
|
2579
|
+
});
|
|
2580
|
+
});
|
|
2581
|
+
openedSession.on("error", (event) => {
|
|
2582
|
+
runSerial("tts.error", async () => {
|
|
2583
|
+
if (ttsSession !== openedSession) {
|
|
2584
|
+
return;
|
|
2585
|
+
}
|
|
2586
|
+
await send({
|
|
2587
|
+
message: toError(event.error).message,
|
|
2588
|
+
recoverable: event.recoverable,
|
|
2589
|
+
type: "error"
|
|
2590
|
+
});
|
|
2591
|
+
});
|
|
2592
|
+
});
|
|
2593
|
+
openedSession.on("close", () => {
|
|
2594
|
+
runSerial("tts.close", async () => {
|
|
2595
|
+
if (ttsSession === openedSession) {
|
|
2596
|
+
ttsSession = null;
|
|
2597
|
+
ttsSessionPromise = null;
|
|
2598
|
+
activeTTSTurnId = undefined;
|
|
2599
|
+
}
|
|
2600
|
+
});
|
|
2601
|
+
});
|
|
2602
|
+
return openedSession;
|
|
2603
|
+
})().catch((error) => {
|
|
2604
|
+
ttsSessionPromise = null;
|
|
2605
|
+
throw error;
|
|
2606
|
+
});
|
|
2607
|
+
return ttsSessionPromise;
|
|
2608
|
+
};
|
|
2609
|
+
const warmTTSSession = () => {
|
|
2610
|
+
if (!options.tts || ttsSession || ttsSessionPromise) {
|
|
2611
|
+
return;
|
|
2612
|
+
}
|
|
2613
|
+
ensureTTSSession().catch((error) => {
|
|
2614
|
+
logger.warn("voice tts prewarm failed", {
|
|
2615
|
+
error: toError(error).message,
|
|
2616
|
+
sessionId: options.id
|
|
2617
|
+
});
|
|
2618
|
+
});
|
|
2619
|
+
};
|
|
1323
2620
|
const completeTurn = async (session, turn) => {
|
|
1324
|
-
const
|
|
2621
|
+
const committedOutput = await options.route.onTurn({
|
|
1325
2622
|
api,
|
|
1326
2623
|
context: options.context,
|
|
1327
2624
|
session,
|
|
1328
2625
|
turn
|
|
1329
2626
|
});
|
|
2627
|
+
const output = {
|
|
2628
|
+
assistantText: committedOutput?.assistantText,
|
|
2629
|
+
complete: committedOutput?.complete,
|
|
2630
|
+
escalate: committedOutput?.escalate,
|
|
2631
|
+
noAnswer: committedOutput?.noAnswer,
|
|
2632
|
+
result: committedOutput?.result,
|
|
2633
|
+
transfer: committedOutput?.transfer,
|
|
2634
|
+
voicemail: committedOutput?.voicemail
|
|
2635
|
+
};
|
|
1330
2636
|
if (output?.assistantText) {
|
|
1331
2637
|
await writeSession((currentSession) => {
|
|
1332
2638
|
setTurnResult(currentSession, turn.id, {
|
|
@@ -1338,6 +2644,19 @@ var createVoiceSession = (options) => {
|
|
|
1338
2644
|
turnId: turn.id,
|
|
1339
2645
|
type: "assistant"
|
|
1340
2646
|
});
|
|
2647
|
+
try {
|
|
2648
|
+
const activeTTSSession = await ensureTTSSession();
|
|
2649
|
+
if (activeTTSSession) {
|
|
2650
|
+
activeTTSTurnId = turn.id;
|
|
2651
|
+
await activeTTSSession.send(output.assistantText);
|
|
2652
|
+
}
|
|
2653
|
+
} catch (error) {
|
|
2654
|
+
logger.warn("voice tts send failed", {
|
|
2655
|
+
error: toError(error).message,
|
|
2656
|
+
sessionId: options.id,
|
|
2657
|
+
turnId: turn.id
|
|
2658
|
+
});
|
|
2659
|
+
}
|
|
1341
2660
|
}
|
|
1342
2661
|
if (output?.result !== undefined) {
|
|
1343
2662
|
await writeSession((currentSession) => {
|
|
@@ -1346,6 +2665,37 @@ var createVoiceSession = (options) => {
|
|
|
1346
2665
|
});
|
|
1347
2666
|
});
|
|
1348
2667
|
}
|
|
2668
|
+
if (output?.transfer) {
|
|
2669
|
+
await transferInternal({
|
|
2670
|
+
metadata: output.transfer.metadata,
|
|
2671
|
+
reason: output.transfer.reason,
|
|
2672
|
+
result: output.result,
|
|
2673
|
+
target: output.transfer.target
|
|
2674
|
+
});
|
|
2675
|
+
return;
|
|
2676
|
+
}
|
|
2677
|
+
if (output?.escalate) {
|
|
2678
|
+
await escalateInternal({
|
|
2679
|
+
metadata: output.escalate.metadata,
|
|
2680
|
+
reason: output.escalate.reason,
|
|
2681
|
+
result: output.result
|
|
2682
|
+
});
|
|
2683
|
+
return;
|
|
2684
|
+
}
|
|
2685
|
+
if (output?.voicemail) {
|
|
2686
|
+
await markVoicemailInternal({
|
|
2687
|
+
metadata: output.voicemail.metadata,
|
|
2688
|
+
result: output.result
|
|
2689
|
+
});
|
|
2690
|
+
return;
|
|
2691
|
+
}
|
|
2692
|
+
if (output?.noAnswer) {
|
|
2693
|
+
await markNoAnswerInternal({
|
|
2694
|
+
metadata: output.noAnswer.metadata,
|
|
2695
|
+
result: output.result
|
|
2696
|
+
});
|
|
2697
|
+
return;
|
|
2698
|
+
}
|
|
1349
2699
|
if (output?.complete) {
|
|
1350
2700
|
await completeInternal(output.result);
|
|
1351
2701
|
}
|
|
@@ -1410,11 +2760,18 @@ var createVoiceSession = (options) => {
|
|
|
1410
2760
|
scheduleTurnCommit(turnDetection.transcriptStabilityMs - transcriptStabilityAge, reason, false);
|
|
1411
2761
|
return;
|
|
1412
2762
|
}
|
|
2763
|
+
const costEstimate = createTurnCostEstimate({
|
|
2764
|
+
fallbackAttemptCount: fallbackAttemptsForCurrentTurn,
|
|
2765
|
+
fallbackPassCostUnit: options.costTelemetry?.fallbackPassCostUnit,
|
|
2766
|
+
fallbackReplayAudioMs: fallbackReplayAudioMsForCurrentTurn,
|
|
2767
|
+
primaryAudioMs: getBufferedAudioDurationMs(currentTurnAudio.map((audio) => audio.chunk)),
|
|
2768
|
+
primaryPassCostUnit: options.costTelemetry?.primaryPassCostUnit
|
|
2769
|
+
});
|
|
1413
2770
|
const turn = {
|
|
1414
2771
|
committedAt: Date.now(),
|
|
1415
2772
|
id: createId(),
|
|
1416
2773
|
text: finalText,
|
|
1417
|
-
quality: createTurnQuality(transcripts, source, fallbackUsed, fallbackDiagnostics, correctionDiagnostics),
|
|
2774
|
+
quality: createTurnQuality(transcripts, source, fallbackUsed, fallbackDiagnostics, correctionDiagnostics, costEstimate),
|
|
1418
2775
|
transcripts: transcripts.length > 0 ? transcripts : [
|
|
1419
2776
|
{
|
|
1420
2777
|
id: createId(),
|
|
@@ -1441,6 +2798,13 @@ var createVoiceSession = (options) => {
|
|
|
1441
2798
|
sessionId: options.id,
|
|
1442
2799
|
turnId: turn.id
|
|
1443
2800
|
});
|
|
2801
|
+
await options.costTelemetry?.onTurnCost?.({
|
|
2802
|
+
api,
|
|
2803
|
+
context: options.context,
|
|
2804
|
+
estimate: costEstimate,
|
|
2805
|
+
session: updatedSession,
|
|
2806
|
+
turn
|
|
2807
|
+
});
|
|
1444
2808
|
await send({
|
|
1445
2809
|
turn,
|
|
1446
2810
|
type: "turn"
|
|
@@ -1486,6 +2850,11 @@ var createVoiceSession = (options) => {
|
|
|
1486
2850
|
};
|
|
1487
2851
|
}
|
|
1488
2852
|
}
|
|
2853
|
+
if (shouldFireOnSession) {
|
|
2854
|
+
pushCallLifecycleEvent(session, {
|
|
2855
|
+
type: "start"
|
|
2856
|
+
});
|
|
2857
|
+
}
|
|
1489
2858
|
await options.store.set(options.id, session);
|
|
1490
2859
|
await send({
|
|
1491
2860
|
sessionId: options.id,
|
|
@@ -1494,6 +2863,11 @@ var createVoiceSession = (options) => {
|
|
|
1494
2863
|
type: "session"
|
|
1495
2864
|
});
|
|
1496
2865
|
if (shouldFireOnSession) {
|
|
2866
|
+
await options.route.onCallStart?.({
|
|
2867
|
+
api,
|
|
2868
|
+
context: options.context,
|
|
2869
|
+
session
|
|
2870
|
+
});
|
|
1497
2871
|
await options.route.onSession?.({
|
|
1498
2872
|
api,
|
|
1499
2873
|
context: options.context,
|
|
@@ -1509,9 +2883,11 @@ var createVoiceSession = (options) => {
|
|
|
1509
2883
|
}
|
|
1510
2884
|
resumePendingTurnCommit(session);
|
|
1511
2885
|
await ensureAdapter();
|
|
2886
|
+
warmTTSSession();
|
|
1512
2887
|
};
|
|
1513
2888
|
const disconnectInternal = async (event) => {
|
|
1514
2889
|
clearSilenceTimer();
|
|
2890
|
+
await closeTTSSession(event?.reason);
|
|
1515
2891
|
await closeAdapter(event?.reason);
|
|
1516
2892
|
rewindFallbackTurnAudio();
|
|
1517
2893
|
if (reconnect.strategy === "fail") {
|
|
@@ -1570,9 +2946,30 @@ var createVoiceSession = (options) => {
|
|
|
1570
2946
|
id: options.id,
|
|
1571
2947
|
close: async (reason) => {
|
|
1572
2948
|
await runSerial("api.close", async () => {
|
|
2949
|
+
const session = await writeSession((currentSession) => {
|
|
2950
|
+
if (currentSession.status !== "completed" && currentSession.status !== "failed" && !currentSession.call?.endedAt) {
|
|
2951
|
+
currentSession.lastActivityAt = Date.now();
|
|
2952
|
+
currentSession.status = "completed";
|
|
2953
|
+
pushCallLifecycleEvent(currentSession, {
|
|
2954
|
+
disposition: "closed",
|
|
2955
|
+
reason,
|
|
2956
|
+
type: "end"
|
|
2957
|
+
});
|
|
2958
|
+
}
|
|
2959
|
+
});
|
|
1573
2960
|
clearSilenceTimer();
|
|
2961
|
+
await closeTTSSession(reason);
|
|
1574
2962
|
await closeAdapter(reason);
|
|
1575
2963
|
await Promise.resolve(socket.close(1000, reason));
|
|
2964
|
+
if (session.call?.endedAt && session.call.disposition === "closed") {
|
|
2965
|
+
await options.route.onCallEnd?.({
|
|
2966
|
+
api,
|
|
2967
|
+
context: options.context,
|
|
2968
|
+
disposition: "closed",
|
|
2969
|
+
reason,
|
|
2970
|
+
session
|
|
2971
|
+
});
|
|
2972
|
+
}
|
|
1576
2973
|
});
|
|
1577
2974
|
},
|
|
1578
2975
|
commitTurn: async (reason = "manual") => runSerial("api.commitTurn", async () => {
|
|
@@ -1590,9 +2987,21 @@ var createVoiceSession = (options) => {
|
|
|
1590
2987
|
fail: async (error) => runSerial("api.fail", async () => {
|
|
1591
2988
|
await failInternal(error);
|
|
1592
2989
|
}),
|
|
2990
|
+
escalate: async (input) => runSerial("api.escalate", async () => {
|
|
2991
|
+
await escalateInternal(input);
|
|
2992
|
+
}),
|
|
2993
|
+
markNoAnswer: async (input) => runSerial("api.markNoAnswer", async () => {
|
|
2994
|
+
await markNoAnswerInternal(input);
|
|
2995
|
+
}),
|
|
2996
|
+
markVoicemail: async (input) => runSerial("api.markVoicemail", async () => {
|
|
2997
|
+
await markVoicemailInternal(input);
|
|
2998
|
+
}),
|
|
1593
2999
|
receiveAudio: async (audio) => runSerial("api.receiveAudio", async () => {
|
|
1594
3000
|
await receiveAudioInternal(audio);
|
|
1595
3001
|
}),
|
|
3002
|
+
transfer: async (input) => runSerial("api.transfer", async () => {
|
|
3003
|
+
await transferInternal(input);
|
|
3004
|
+
}),
|
|
1596
3005
|
snapshot: async () => runSerial("api.snapshot", async () => readSession())
|
|
1597
3006
|
};
|
|
1598
3007
|
return api;
|
|
@@ -1740,6 +3149,7 @@ var resolveSessionOptions = (config) => {
|
|
|
1740
3149
|
const preset = resolveVoiceRuntimePreset(config.preset);
|
|
1741
3150
|
return {
|
|
1742
3151
|
audioConditioning: config.audioConditioning !== undefined ? resolveAudioConditioningConfig(config.audioConditioning) : preset.audioConditioning,
|
|
3152
|
+
costTelemetry: config.costTelemetry,
|
|
1743
3153
|
sttFallback: resolveSTTFallbackConfig(config.sttFallback),
|
|
1744
3154
|
logger: config.logger,
|
|
1745
3155
|
reconnect: {
|
|
@@ -1759,6 +3169,13 @@ var normalizePhraseHints = (hints) => (hints ?? []).map((hint) => ({
|
|
|
1759
3169
|
aliases: hint.aliases?.filter((value) => typeof value === "string" && value.trim().length > 0),
|
|
1760
3170
|
text: hint.text.trim()
|
|
1761
3171
|
})).filter((hint) => hint.text.length > 0);
|
|
3172
|
+
var normalizeLexicon = (entries) => (entries ?? []).map((entry) => ({
|
|
3173
|
+
...entry,
|
|
3174
|
+
aliases: entry.aliases?.filter((value) => typeof value === "string" && value.trim().length > 0),
|
|
3175
|
+
language: typeof entry.language === "string" && entry.language.trim().length > 0 ? entry.language.trim() : undefined,
|
|
3176
|
+
pronunciation: typeof entry.pronunciation === "string" && entry.pronunciation.trim().length > 0 ? entry.pronunciation.trim() : undefined,
|
|
3177
|
+
text: entry.text.trim()
|
|
3178
|
+
})).filter((entry) => entry.text.length > 0);
|
|
1762
3179
|
var resolvePhraseHints = async (config, input) => {
|
|
1763
3180
|
if (!config.phraseHints) {
|
|
1764
3181
|
return [];
|
|
@@ -1768,6 +3185,15 @@ var resolvePhraseHints = async (config, input) => {
|
|
|
1768
3185
|
}
|
|
1769
3186
|
return normalizePhraseHints(config.phraseHints);
|
|
1770
3187
|
};
|
|
3188
|
+
var resolveLexicon = async (config, input) => {
|
|
3189
|
+
if (!config.lexicon) {
|
|
3190
|
+
return [];
|
|
3191
|
+
}
|
|
3192
|
+
if (typeof config.lexicon === "function") {
|
|
3193
|
+
return normalizeLexicon(await config.lexicon(input));
|
|
3194
|
+
}
|
|
3195
|
+
return normalizeLexicon(config.lexicon);
|
|
3196
|
+
};
|
|
1771
3197
|
var voice = (config) => {
|
|
1772
3198
|
const runtime = {
|
|
1773
3199
|
activeSessions: new Map,
|
|
@@ -1788,19 +3214,55 @@ var voice = (config) => {
|
|
|
1788
3214
|
scenarioId,
|
|
1789
3215
|
sessionId
|
|
1790
3216
|
});
|
|
3217
|
+
const lexicon = await resolveLexicon(config, {
|
|
3218
|
+
context,
|
|
3219
|
+
scenarioId,
|
|
3220
|
+
sessionId
|
|
3221
|
+
});
|
|
1791
3222
|
return createVoiceSession({
|
|
1792
3223
|
audioConditioning: sessionOptions.audioConditioning,
|
|
1793
3224
|
context,
|
|
1794
3225
|
id: sessionId,
|
|
3226
|
+
languageStrategy: config.languageStrategy,
|
|
3227
|
+
lexicon,
|
|
1795
3228
|
logger: sessionOptions.logger,
|
|
1796
3229
|
phraseHints,
|
|
1797
3230
|
reconnect: sessionOptions.reconnect,
|
|
1798
3231
|
route: {
|
|
1799
3232
|
correctTurn: config.correctTurn,
|
|
3233
|
+
onCallEnd: async (input) => {
|
|
3234
|
+
let hookError;
|
|
3235
|
+
try {
|
|
3236
|
+
await config.onCallEnd?.(input);
|
|
3237
|
+
} catch (error) {
|
|
3238
|
+
hookError = error;
|
|
3239
|
+
}
|
|
3240
|
+
try {
|
|
3241
|
+
await recordVoiceRuntimeOps({
|
|
3242
|
+
api: input.api,
|
|
3243
|
+
config: config.ops,
|
|
3244
|
+
context: input.context,
|
|
3245
|
+
disposition: input.disposition,
|
|
3246
|
+
metadata: input.metadata,
|
|
3247
|
+
reason: input.reason,
|
|
3248
|
+
session: input.session,
|
|
3249
|
+
target: input.target
|
|
3250
|
+
});
|
|
3251
|
+
} finally {
|
|
3252
|
+
if (hookError) {
|
|
3253
|
+
throw hookError;
|
|
3254
|
+
}
|
|
3255
|
+
}
|
|
3256
|
+
},
|
|
3257
|
+
onCallStart: config.onCallStart,
|
|
1800
3258
|
onComplete: config.onComplete,
|
|
3259
|
+
onEscalation: config.onEscalation,
|
|
1801
3260
|
onError: config.onError,
|
|
3261
|
+
onNoAnswer: config.onNoAnswer,
|
|
1802
3262
|
onSession: config.onSession,
|
|
1803
|
-
|
|
3263
|
+
onTransfer: config.onTransfer,
|
|
3264
|
+
onTurn,
|
|
3265
|
+
onVoicemail: config.onVoicemail
|
|
1804
3266
|
},
|
|
1805
3267
|
scenarioId,
|
|
1806
3268
|
socket: createSocketAdapter(ws),
|
|
@@ -1808,6 +3270,7 @@ var voice = (config) => {
|
|
|
1808
3270
|
stt: config.stt,
|
|
1809
3271
|
sttFallback: sessionOptions.sttFallback,
|
|
1810
3272
|
sttLifecycle: sessionOptions.sttLifecycle,
|
|
3273
|
+
tts: config.tts,
|
|
1811
3274
|
turnDetection: sessionOptions.turnDetection
|
|
1812
3275
|
});
|
|
1813
3276
|
};
|
|
@@ -1916,6 +3379,171 @@ var voice = (config) => {
|
|
|
1916
3379
|
}
|
|
1917
3380
|
}).use(htmxRoutes());
|
|
1918
3381
|
};
|
|
3382
|
+
// src/fileStore.ts
|
|
3383
|
+
import { mkdir, readFile, readdir, rename, rm, writeFile } from "fs/promises";
|
|
3384
|
+
import { join } from "path";
|
|
3385
|
+
var listJsonFiles = async (directory) => {
|
|
3386
|
+
try {
|
|
3387
|
+
const entries = await readdir(directory, {
|
|
3388
|
+
withFileTypes: true
|
|
3389
|
+
});
|
|
3390
|
+
return entries.filter((entry) => entry.isFile() && entry.name.endsWith(".json")).map((entry) => join(directory, entry.name));
|
|
3391
|
+
} catch (error) {
|
|
3392
|
+
if (error.code === "ENOENT") {
|
|
3393
|
+
return [];
|
|
3394
|
+
}
|
|
3395
|
+
throw error;
|
|
3396
|
+
}
|
|
3397
|
+
};
|
|
3398
|
+
var encodeStoreId = (id) => `${encodeURIComponent(id)}.json`;
|
|
3399
|
+
var resolveFilePath = (directory, id) => join(directory, encodeStoreId(id));
|
|
3400
|
+
var readJsonFile = async (path) => JSON.parse(await readFile(path, "utf8"));
|
|
3401
|
+
var writeJsonFile = async (path, value, options) => {
|
|
3402
|
+
await mkdir(options.directory, {
|
|
3403
|
+
recursive: true
|
|
3404
|
+
});
|
|
3405
|
+
const tempPath = `${path}.${crypto.randomUUID()}.tmp`;
|
|
3406
|
+
await writeFile(tempPath, JSON.stringify(value, null, options.pretty === false ? undefined : 2));
|
|
3407
|
+
await rename(tempPath, path);
|
|
3408
|
+
};
|
|
3409
|
+
var createVoiceFileSessionStore = (options) => {
|
|
3410
|
+
const get = async (id) => {
|
|
3411
|
+
const path = resolveFilePath(options.directory, id);
|
|
3412
|
+
try {
|
|
3413
|
+
return await readJsonFile(path);
|
|
3414
|
+
} catch (error) {
|
|
3415
|
+
if (error.code === "ENOENT") {
|
|
3416
|
+
return;
|
|
3417
|
+
}
|
|
3418
|
+
throw error;
|
|
3419
|
+
}
|
|
3420
|
+
};
|
|
3421
|
+
const getOrCreate = async (id) => {
|
|
3422
|
+
const existing = await get(id);
|
|
3423
|
+
if (existing) {
|
|
3424
|
+
return existing;
|
|
3425
|
+
}
|
|
3426
|
+
const session = createVoiceSessionRecord(id);
|
|
3427
|
+
await writeJsonFile(resolveFilePath(options.directory, id), session, options);
|
|
3428
|
+
return session;
|
|
3429
|
+
};
|
|
3430
|
+
const set = async (id, value) => {
|
|
3431
|
+
await writeJsonFile(resolveFilePath(options.directory, id), value, options);
|
|
3432
|
+
};
|
|
3433
|
+
const list = async () => {
|
|
3434
|
+
const files = await listJsonFiles(options.directory);
|
|
3435
|
+
const sessions = await Promise.all(files.map((file) => readJsonFile(file)));
|
|
3436
|
+
return sessions.map((session) => toVoiceSessionSummary(session)).sort((first, second) => (second.lastActivityAt ?? second.createdAt) - (first.lastActivityAt ?? first.createdAt));
|
|
3437
|
+
};
|
|
3438
|
+
const remove = async (id) => {
|
|
3439
|
+
await rm(resolveFilePath(options.directory, id), {
|
|
3440
|
+
force: true
|
|
3441
|
+
});
|
|
3442
|
+
};
|
|
3443
|
+
return { get, getOrCreate, list, remove, set };
|
|
3444
|
+
};
|
|
3445
|
+
var createVoiceFileReviewStore = (options) => {
|
|
3446
|
+
const get = async (id) => {
|
|
3447
|
+
const path = resolveFilePath(options.directory, id);
|
|
3448
|
+
try {
|
|
3449
|
+
return await readJsonFile(path);
|
|
3450
|
+
} catch (error) {
|
|
3451
|
+
if (error.code === "ENOENT") {
|
|
3452
|
+
return;
|
|
3453
|
+
}
|
|
3454
|
+
throw error;
|
|
3455
|
+
}
|
|
3456
|
+
};
|
|
3457
|
+
const list = async () => {
|
|
3458
|
+
const files = await listJsonFiles(options.directory);
|
|
3459
|
+
const reviews = await Promise.all(files.map((file) => readJsonFile(file)));
|
|
3460
|
+
return reviews.sort((left, right) => (right.generatedAt ?? 0) - (left.generatedAt ?? 0));
|
|
3461
|
+
};
|
|
3462
|
+
const set = async (id, artifact) => {
|
|
3463
|
+
await writeJsonFile(resolveFilePath(options.directory, id), withVoiceCallReviewId(id, artifact), options);
|
|
3464
|
+
};
|
|
3465
|
+
const remove = async (id) => {
|
|
3466
|
+
await rm(resolveFilePath(options.directory, id), {
|
|
3467
|
+
force: true
|
|
3468
|
+
});
|
|
3469
|
+
};
|
|
3470
|
+
return { get, list, remove, set };
|
|
3471
|
+
};
|
|
3472
|
+
var createVoiceFileTaskStore = (options) => {
|
|
3473
|
+
const get = async (id) => {
|
|
3474
|
+
const path = resolveFilePath(options.directory, id);
|
|
3475
|
+
try {
|
|
3476
|
+
return await readJsonFile(path);
|
|
3477
|
+
} catch (error) {
|
|
3478
|
+
if (error.code === "ENOENT") {
|
|
3479
|
+
return;
|
|
3480
|
+
}
|
|
3481
|
+
throw error;
|
|
3482
|
+
}
|
|
3483
|
+
};
|
|
3484
|
+
const list = async () => {
|
|
3485
|
+
const files = await listJsonFiles(options.directory);
|
|
3486
|
+
const tasks = await Promise.all(files.map((file) => readJsonFile(file)));
|
|
3487
|
+
return tasks.sort((left, right) => right.createdAt - left.createdAt);
|
|
3488
|
+
};
|
|
3489
|
+
const set = async (id, task) => {
|
|
3490
|
+
await writeJsonFile(resolveFilePath(options.directory, id), withVoiceOpsTaskId(id, task), options);
|
|
3491
|
+
};
|
|
3492
|
+
const remove = async (id) => {
|
|
3493
|
+
await rm(resolveFilePath(options.directory, id), {
|
|
3494
|
+
force: true
|
|
3495
|
+
});
|
|
3496
|
+
};
|
|
3497
|
+
return { get, list, remove, set };
|
|
3498
|
+
};
|
|
3499
|
+
var createVoiceFileIntegrationEventStore = (options) => {
|
|
3500
|
+
const get = async (id) => {
|
|
3501
|
+
const path = resolveFilePath(options.directory, id);
|
|
3502
|
+
try {
|
|
3503
|
+
return await readJsonFile(path);
|
|
3504
|
+
} catch (error) {
|
|
3505
|
+
if (error.code === "ENOENT") {
|
|
3506
|
+
return;
|
|
3507
|
+
}
|
|
3508
|
+
throw error;
|
|
3509
|
+
}
|
|
3510
|
+
};
|
|
3511
|
+
const list = async () => {
|
|
3512
|
+
const files = await listJsonFiles(options.directory);
|
|
3513
|
+
const events = await Promise.all(files.map((file) => readJsonFile(file)));
|
|
3514
|
+
return events.sort((left, right) => right.createdAt - left.createdAt);
|
|
3515
|
+
};
|
|
3516
|
+
const set = async (id, event) => {
|
|
3517
|
+
await writeJsonFile(resolveFilePath(options.directory, id), withVoiceIntegrationEventId(id, event), options);
|
|
3518
|
+
};
|
|
3519
|
+
const remove = async (id) => {
|
|
3520
|
+
await rm(resolveFilePath(options.directory, id), {
|
|
3521
|
+
force: true
|
|
3522
|
+
});
|
|
3523
|
+
};
|
|
3524
|
+
return { get, list, remove, set };
|
|
3525
|
+
};
|
|
3526
|
+
var createVoiceFileRuntimeStorage = (options) => ({
|
|
3527
|
+
events: createVoiceFileIntegrationEventStore({
|
|
3528
|
+
...options,
|
|
3529
|
+
directory: join(options.directory, "events")
|
|
3530
|
+
}),
|
|
3531
|
+
reviews: createVoiceFileReviewStore({
|
|
3532
|
+
...options,
|
|
3533
|
+
directory: join(options.directory, "reviews")
|
|
3534
|
+
}),
|
|
3535
|
+
session: createVoiceFileSessionStore({
|
|
3536
|
+
...options,
|
|
3537
|
+
directory: join(options.directory, "sessions")
|
|
3538
|
+
}),
|
|
3539
|
+
tasks: createVoiceFileTaskStore({
|
|
3540
|
+
...options,
|
|
3541
|
+
directory: join(options.directory, "tasks")
|
|
3542
|
+
})
|
|
3543
|
+
});
|
|
3544
|
+
var createStoredVoiceCallReviewArtifact = (id, artifact) => withVoiceCallReviewId(id, artifact);
|
|
3545
|
+
var createStoredVoiceOpsTask = (id, task) => withVoiceOpsTaskId(id, task);
|
|
3546
|
+
var createStoredVoiceIntegrationEvent = (id, event) => withVoiceIntegrationEventId(id, event);
|
|
1919
3547
|
// src/memoryStore.ts
|
|
1920
3548
|
var createVoiceMemoryStore = () => {
|
|
1921
3549
|
const sessions = new Map;
|
|
@@ -1939,21 +3567,146 @@ var createVoiceMemoryStore = () => {
|
|
|
1939
3567
|
};
|
|
1940
3568
|
// src/correction.ts
|
|
1941
3569
|
var escapeRegExp = (value) => value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
3570
|
+
var buildAliasMatcher = (alias) => new RegExp(`(?<![\\p{L}\\p{N}'])${escapeRegExp(alias)}(?![\\p{L}\\p{N}'])`, "giu");
|
|
3571
|
+
var WORD_PATTERN = /[\p{L}\p{N}']+/gu;
|
|
3572
|
+
var normalizeComparableText = (value) => value.toLowerCase().replace(/[^\p{L}\p{N}\s']/gu, " ").replace(/\s+/g, " ").trim();
|
|
3573
|
+
var normalizeDomainTerm = (value) => normalizeComparableText(value);
|
|
3574
|
+
var tokenizeWithIndices = (value) => {
|
|
3575
|
+
const matches = value.matchAll(WORD_PATTERN);
|
|
3576
|
+
const tokens = [];
|
|
3577
|
+
for (const match of matches) {
|
|
3578
|
+
const token = match[0];
|
|
3579
|
+
const start = match.index ?? -1;
|
|
3580
|
+
if (start < 0) {
|
|
3581
|
+
continue;
|
|
3582
|
+
}
|
|
3583
|
+
tokens.push({
|
|
3584
|
+
end: start + token.length,
|
|
3585
|
+
start,
|
|
3586
|
+
text: token
|
|
3587
|
+
});
|
|
3588
|
+
}
|
|
3589
|
+
return tokens;
|
|
3590
|
+
};
|
|
3591
|
+
var levenshteinDistance = (left, right) => {
|
|
3592
|
+
if (left === right) {
|
|
3593
|
+
return 0;
|
|
3594
|
+
}
|
|
3595
|
+
if (left.length === 0) {
|
|
3596
|
+
return right.length;
|
|
3597
|
+
}
|
|
3598
|
+
if (right.length === 0) {
|
|
3599
|
+
return left.length;
|
|
3600
|
+
}
|
|
3601
|
+
const previous = Array.from({ length: right.length + 1 }, (_, index) => index);
|
|
3602
|
+
const current = new Array(right.length + 1);
|
|
3603
|
+
for (let leftIndex = 1;leftIndex <= left.length; leftIndex += 1) {
|
|
3604
|
+
current[0] = leftIndex;
|
|
3605
|
+
for (let rightIndex = 1;rightIndex <= right.length; rightIndex += 1) {
|
|
3606
|
+
const cost = left[leftIndex - 1] === right[rightIndex - 1] ? 0 : 1;
|
|
3607
|
+
current[rightIndex] = Math.min(current[rightIndex - 1] + 1, previous[rightIndex] + 1, previous[rightIndex - 1] + cost);
|
|
3608
|
+
}
|
|
3609
|
+
for (let rightIndex = 0;rightIndex <= right.length; rightIndex += 1) {
|
|
3610
|
+
previous[rightIndex] = current[rightIndex];
|
|
3611
|
+
}
|
|
3612
|
+
}
|
|
3613
|
+
return previous[right.length];
|
|
3614
|
+
};
|
|
3615
|
+
var resolveFuzzyThreshold = (riskTier) => {
|
|
3616
|
+
switch (riskTier) {
|
|
3617
|
+
case "safe":
|
|
3618
|
+
return -1;
|
|
3619
|
+
case "balanced":
|
|
3620
|
+
return 0.14;
|
|
3621
|
+
case "risky":
|
|
3622
|
+
return 0.2;
|
|
3623
|
+
}
|
|
3624
|
+
};
|
|
3625
|
+
var canUseTieredFuzzyAlias = (alias, riskTier) => {
|
|
3626
|
+
if (riskTier === "safe") {
|
|
3627
|
+
return false;
|
|
3628
|
+
}
|
|
3629
|
+
const tokenCount = normalizeComparableText(alias).split(" ").filter((token) => token.length > 0).length;
|
|
3630
|
+
return riskTier === "balanced" ? tokenCount >= 3 : tokenCount >= 2;
|
|
3631
|
+
};
|
|
3632
|
+
var findFuzzyAliasMatch = (text, alias, riskTier) => {
|
|
3633
|
+
const tokens = tokenizeWithIndices(text);
|
|
3634
|
+
const aliasTokens = normalizeComparableText(alias).split(" ").filter((token) => token.length > 0);
|
|
3635
|
+
if (tokens.length === 0 || aliasTokens.length < 2) {
|
|
3636
|
+
return;
|
|
3637
|
+
}
|
|
3638
|
+
const minWindowLength = Math.max(1, aliasTokens.length - 1);
|
|
3639
|
+
const maxWindowLength = Math.min(tokens.length, aliasTokens.length + 1);
|
|
3640
|
+
const normalizedAlias = aliasTokens.join(" ");
|
|
3641
|
+
const normalizedAliasFirstToken = aliasTokens[0] ?? "";
|
|
3642
|
+
let bestMatch;
|
|
3643
|
+
for (let startIndex = 0;startIndex < tokens.length; startIndex += 1) {
|
|
3644
|
+
for (let windowLength = minWindowLength;windowLength <= maxWindowLength; windowLength += 1) {
|
|
3645
|
+
const endIndex = startIndex + windowLength - 1;
|
|
3646
|
+
if (endIndex >= tokens.length) {
|
|
3647
|
+
break;
|
|
3648
|
+
}
|
|
3649
|
+
const windowTokens = tokens.slice(startIndex, endIndex + 1);
|
|
3650
|
+
const normalizedWindow = normalizeComparableText(windowTokens.map((token) => token.text).join(" "));
|
|
3651
|
+
if (!normalizedWindow) {
|
|
3652
|
+
continue;
|
|
3653
|
+
}
|
|
3654
|
+
const [windowFirstToken] = normalizedWindow.split(" ");
|
|
3655
|
+
if (windowFirstToken !== normalizedAliasFirstToken) {
|
|
3656
|
+
continue;
|
|
3657
|
+
}
|
|
3658
|
+
const distance = levenshteinDistance(normalizedWindow, normalizedAlias);
|
|
3659
|
+
const denominator = Math.max(normalizedWindow.length, normalizedAlias.length);
|
|
3660
|
+
const score = denominator > 0 ? distance / denominator : 0;
|
|
3661
|
+
if (score > resolveFuzzyThreshold(riskTier)) {
|
|
3662
|
+
continue;
|
|
3663
|
+
}
|
|
3664
|
+
const candidate = {
|
|
3665
|
+
end: windowTokens[windowTokens.length - 1].end,
|
|
3666
|
+
score,
|
|
3667
|
+
start: windowTokens[0].start
|
|
3668
|
+
};
|
|
3669
|
+
if (!bestMatch || candidate.score < bestMatch.score || candidate.score === bestMatch.score && candidate.end - candidate.start > bestMatch.end - bestMatch.start) {
|
|
3670
|
+
bestMatch = candidate;
|
|
3671
|
+
}
|
|
3672
|
+
}
|
|
3673
|
+
}
|
|
3674
|
+
return bestMatch;
|
|
3675
|
+
};
|
|
1942
3676
|
var normalizeHintAliases = (hint) => (hint.aliases ?? []).map((alias) => alias.trim()).filter((alias) => alias.length > 0).sort((left, right) => right.length - left.length);
|
|
1943
3677
|
var applyPhraseHintCorrections = (text, phraseHints) => {
|
|
3678
|
+
return applyRiskTieredPhraseHintCorrections(text, phraseHints, {
|
|
3679
|
+
riskTier: "risky"
|
|
3680
|
+
});
|
|
3681
|
+
};
|
|
3682
|
+
var applyRiskTieredPhraseHintCorrections = (text, phraseHints, options = {}) => {
|
|
3683
|
+
const riskTier = options.riskTier ?? "safe";
|
|
1944
3684
|
let corrected = text;
|
|
1945
3685
|
const matches = [];
|
|
1946
3686
|
for (const hint of phraseHints) {
|
|
1947
3687
|
for (const alias of normalizeHintAliases(hint)) {
|
|
1948
|
-
const matcher =
|
|
3688
|
+
const matcher = buildAliasMatcher(alias);
|
|
1949
3689
|
if (!matcher.test(corrected)) {
|
|
1950
|
-
|
|
3690
|
+
if (!canUseTieredFuzzyAlias(alias, riskTier)) {
|
|
3691
|
+
continue;
|
|
3692
|
+
}
|
|
3693
|
+
const fuzzyMatch = findFuzzyAliasMatch(corrected, alias, riskTier);
|
|
3694
|
+
if (!fuzzyMatch) {
|
|
3695
|
+
continue;
|
|
3696
|
+
}
|
|
3697
|
+
corrected = `${corrected.slice(0, fuzzyMatch.start)}${hint.text}${corrected.slice(fuzzyMatch.end)}`;
|
|
3698
|
+
matches.push({
|
|
3699
|
+
alias,
|
|
3700
|
+
hint
|
|
3701
|
+
});
|
|
3702
|
+
break;
|
|
1951
3703
|
}
|
|
1952
3704
|
corrected = corrected.replace(matcher, hint.text);
|
|
1953
3705
|
matches.push({
|
|
1954
3706
|
alias,
|
|
1955
3707
|
hint
|
|
1956
3708
|
});
|
|
3709
|
+
break;
|
|
1957
3710
|
}
|
|
1958
3711
|
}
|
|
1959
3712
|
return {
|
|
@@ -1962,6 +3715,79 @@ var applyPhraseHintCorrections = (text, phraseHints) => {
|
|
|
1962
3715
|
text: corrected
|
|
1963
3716
|
};
|
|
1964
3717
|
};
|
|
3718
|
+
var dedupeAliases = (aliases) => {
|
|
3719
|
+
const seen = new Set;
|
|
3720
|
+
const deduped = [];
|
|
3721
|
+
for (const alias of aliases) {
|
|
3722
|
+
const normalized = normalizeDomainTerm(alias);
|
|
3723
|
+
if (!normalized || seen.has(normalized)) {
|
|
3724
|
+
continue;
|
|
3725
|
+
}
|
|
3726
|
+
seen.add(normalized);
|
|
3727
|
+
deduped.push(alias);
|
|
3728
|
+
}
|
|
3729
|
+
return deduped;
|
|
3730
|
+
};
|
|
3731
|
+
var isSafeAlias = (alias) => {
|
|
3732
|
+
const normalized = normalizeDomainTerm(alias);
|
|
3733
|
+
if (normalized.length < 4) {
|
|
3734
|
+
return false;
|
|
3735
|
+
}
|
|
3736
|
+
const tokens = normalized.split(" ").filter((token) => token.length > 0);
|
|
3737
|
+
return tokens.length >= 2 || normalized.length >= 7;
|
|
3738
|
+
};
|
|
3739
|
+
var createDomainPhraseHints = (terms, options = {}) => {
|
|
3740
|
+
const riskTier = options.riskTier ?? "safe";
|
|
3741
|
+
const hints = [];
|
|
3742
|
+
const seen = new Set;
|
|
3743
|
+
for (const term of terms) {
|
|
3744
|
+
const normalizedText = normalizeDomainTerm(term.text);
|
|
3745
|
+
if (!normalizedText || seen.has(normalizedText)) {
|
|
3746
|
+
continue;
|
|
3747
|
+
}
|
|
3748
|
+
const candidateAliases = dedupeAliases(term.aliases ?? []);
|
|
3749
|
+
const aliases = candidateAliases.filter((alias) => {
|
|
3750
|
+
if (riskTier === "risky") {
|
|
3751
|
+
return true;
|
|
3752
|
+
}
|
|
3753
|
+
if (riskTier === "balanced") {
|
|
3754
|
+
return isSafeAlias(alias) || normalizeDomainTerm(alias) === normalizedText;
|
|
3755
|
+
}
|
|
3756
|
+
return isSafeAlias(alias);
|
|
3757
|
+
});
|
|
3758
|
+
hints.push({
|
|
3759
|
+
aliases: aliases.length > 0 ? aliases : undefined,
|
|
3760
|
+
boost: term.boost,
|
|
3761
|
+
metadata: term.metadata,
|
|
3762
|
+
text: term.text
|
|
3763
|
+
});
|
|
3764
|
+
seen.add(normalizedText);
|
|
3765
|
+
}
|
|
3766
|
+
return hints;
|
|
3767
|
+
};
|
|
3768
|
+
var createDomainLexicon = (terms) => {
|
|
3769
|
+
const entries = [];
|
|
3770
|
+
const seen = new Set;
|
|
3771
|
+
for (const term of terms) {
|
|
3772
|
+
const normalizedText = normalizeDomainTerm(term.text);
|
|
3773
|
+
if (!normalizedText || seen.has(normalizedText)) {
|
|
3774
|
+
continue;
|
|
3775
|
+
}
|
|
3776
|
+
entries.push({
|
|
3777
|
+
aliases: dedupeAliases(term.aliases ?? []),
|
|
3778
|
+
language: term.language,
|
|
3779
|
+
metadata: term.metadata,
|
|
3780
|
+
pronunciation: term.pronunciation,
|
|
3781
|
+
text: term.text
|
|
3782
|
+
});
|
|
3783
|
+
seen.add(normalizedText);
|
|
3784
|
+
}
|
|
3785
|
+
return entries;
|
|
3786
|
+
};
|
|
3787
|
+
var averageTranscriptConfidence = (transcripts) => {
|
|
3788
|
+
const confidences = transcripts.map((transcript) => transcript.confidence).filter((value) => typeof value === "number");
|
|
3789
|
+
return confidences.length > 0 ? confidences.reduce((sum, value) => sum + value, 0) / confidences.length : undefined;
|
|
3790
|
+
};
|
|
1965
3791
|
var createPhraseHintCorrectionHandler = (options = {}) => {
|
|
1966
3792
|
const provider = options.provider ?? "@absolutejs/voice";
|
|
1967
3793
|
const reason = options.reason ?? "phrase-hint-correction";
|
|
@@ -1981,17 +3807,583 @@ var createPhraseHintCorrectionHandler = (options = {}) => {
|
|
|
1981
3807
|
};
|
|
1982
3808
|
};
|
|
1983
3809
|
};
|
|
3810
|
+
var lexiconToPhraseHints = (lexicon) => lexicon.map((entry) => ({
|
|
3811
|
+
aliases: entry.aliases,
|
|
3812
|
+
metadata: entry.metadata,
|
|
3813
|
+
text: entry.text
|
|
3814
|
+
}));
|
|
3815
|
+
var applyLexiconCorrections = (text, lexicon) => applyPhraseHintCorrections(text, lexiconToPhraseHints(lexicon));
|
|
3816
|
+
var createLexiconCorrectionHandler = (options = {}) => {
|
|
3817
|
+
const provider = options.provider ?? "@absolutejs/voice";
|
|
3818
|
+
const reason = options.reason ?? "lexicon-correction";
|
|
3819
|
+
return async ({ lexicon, text }) => {
|
|
3820
|
+
const result = applyLexiconCorrections(text, lexicon);
|
|
3821
|
+
if (!result.changed) {
|
|
3822
|
+
return;
|
|
3823
|
+
}
|
|
3824
|
+
return {
|
|
3825
|
+
metadata: result.matches.length > 0 ? {
|
|
3826
|
+
matchedAliases: result.matches.map((match) => match.alias),
|
|
3827
|
+
matchedHints: result.matches.map((match) => match.hint.text)
|
|
3828
|
+
} : undefined,
|
|
3829
|
+
provider,
|
|
3830
|
+
reason,
|
|
3831
|
+
text: result.text
|
|
3832
|
+
};
|
|
3833
|
+
};
|
|
3834
|
+
};
|
|
3835
|
+
var createRiskyTurnCorrectionHandler = (options = {}) => {
|
|
3836
|
+
const provider = options.provider ?? "@absolutejs/voice";
|
|
3837
|
+
const reason = options.reason ?? "risky-turn-correction";
|
|
3838
|
+
const riskTier = options.riskTier ?? "balanced";
|
|
3839
|
+
const maxAverageConfidence = options.maxAverageConfidence ?? 0.92;
|
|
3840
|
+
return async ({ lexicon, phraseHints, text, transcripts }) => {
|
|
3841
|
+
const averageConfidence = averageTranscriptConfidence(transcripts);
|
|
3842
|
+
if (averageConfidence !== undefined && averageConfidence > maxAverageConfidence) {
|
|
3843
|
+
return;
|
|
3844
|
+
}
|
|
3845
|
+
const result = applyRiskTieredPhraseHintCorrections(text, [
|
|
3846
|
+
...phraseHints,
|
|
3847
|
+
...lexiconToPhraseHints(lexicon)
|
|
3848
|
+
], { riskTier });
|
|
3849
|
+
if (!result.changed) {
|
|
3850
|
+
return;
|
|
3851
|
+
}
|
|
3852
|
+
return {
|
|
3853
|
+
metadata: {
|
|
3854
|
+
averageConfidence,
|
|
3855
|
+
matchedAliases: result.matches.map((match) => match.alias),
|
|
3856
|
+
matchedHints: result.matches.map((match) => match.hint.text),
|
|
3857
|
+
riskTier
|
|
3858
|
+
},
|
|
3859
|
+
provider,
|
|
3860
|
+
reason,
|
|
3861
|
+
text: result.text
|
|
3862
|
+
};
|
|
3863
|
+
};
|
|
3864
|
+
};
|
|
3865
|
+
|
|
3866
|
+
// src/routing.ts
|
|
3867
|
+
var resolveVoiceSTTRoutingStrategy = (goal = "best") => {
|
|
3868
|
+
if (goal === "low-cost") {
|
|
3869
|
+
return {
|
|
3870
|
+
benchmarkSessionTarget: "deepgram-flux",
|
|
3871
|
+
correctionMode: "none",
|
|
3872
|
+
goal,
|
|
3873
|
+
notes: [
|
|
3874
|
+
"Uses the cheapest in-package path: one primary STT pass with no correction hook.",
|
|
3875
|
+
"Good for baseline throughput and lower post-processing overhead."
|
|
3876
|
+
],
|
|
3877
|
+
preset: "default",
|
|
3878
|
+
sttLifecycle: "turn-scoped"
|
|
3879
|
+
};
|
|
3880
|
+
}
|
|
3881
|
+
return {
|
|
3882
|
+
benchmarkSessionTarget: "deepgram-corrected",
|
|
3883
|
+
correctionMode: "generic",
|
|
3884
|
+
goal,
|
|
3885
|
+
notes: [
|
|
3886
|
+
"Uses the current best in-package path: Deepgram Flux with generic deterministic correction.",
|
|
3887
|
+
"Optimized for accuracy and robustness rather than minimum processing cost."
|
|
3888
|
+
],
|
|
3889
|
+
preset: "reliability",
|
|
3890
|
+
sttLifecycle: "continuous"
|
|
3891
|
+
};
|
|
3892
|
+
};
|
|
3893
|
+
var createVoiceSTTRoutingCorrectionHandler = (mode = "generic") => {
|
|
3894
|
+
if (mode === "none") {
|
|
3895
|
+
return;
|
|
3896
|
+
}
|
|
3897
|
+
if (mode === "risky-turn") {
|
|
3898
|
+
return createRiskyTurnCorrectionHandler();
|
|
3899
|
+
}
|
|
3900
|
+
return createPhraseHintCorrectionHandler();
|
|
3901
|
+
};
|
|
3902
|
+
// src/telephony/twilio.ts
|
|
3903
|
+
import { Buffer as Buffer2 } from "buffer";
|
|
3904
|
+
var TWILIO_MULAW_SAMPLE_RATE = 8000;
|
|
3905
|
+
var VOICE_PCM_SAMPLE_RATE = 16000;
|
|
3906
|
+
var escapeXml = (value) => value.replaceAll("&", "&").replaceAll('"', """).replaceAll("'", "'").replaceAll("<", "<").replaceAll(">", ">");
|
|
3907
|
+
var normalizeOnTurn2 = (handler) => {
|
|
3908
|
+
if (handler.length > 1) {
|
|
3909
|
+
const directHandler = handler;
|
|
3910
|
+
return async ({ context, session, turn, api }) => directHandler(session, turn, api, context);
|
|
3911
|
+
}
|
|
3912
|
+
return handler;
|
|
3913
|
+
};
|
|
3914
|
+
var resolveSTTFallbackConfig2 = (config) => {
|
|
3915
|
+
if (!config) {
|
|
3916
|
+
return;
|
|
3917
|
+
}
|
|
3918
|
+
return {
|
|
3919
|
+
adapter: config.adapter,
|
|
3920
|
+
completionTimeoutMs: config.completionTimeoutMs ?? 2500,
|
|
3921
|
+
confidenceThreshold: config.confidenceThreshold ?? 0.6,
|
|
3922
|
+
maxAttemptsPerTurn: config.maxAttemptsPerTurn ?? 1,
|
|
3923
|
+
minTextLength: config.minTextLength ?? 2,
|
|
3924
|
+
replayWindowMs: config.replayWindowMs ?? 8000,
|
|
3925
|
+
settleMs: config.settleMs ?? 220,
|
|
3926
|
+
trigger: config.trigger ?? "empty-or-low-confidence"
|
|
3927
|
+
};
|
|
3928
|
+
};
|
|
3929
|
+
var normalizePhraseHints2 = (hints) => (hints ?? []).map((hint) => ({
|
|
3930
|
+
...hint,
|
|
3931
|
+
aliases: hint.aliases?.filter((value) => typeof value === "string" && value.trim().length > 0),
|
|
3932
|
+
text: hint.text.trim()
|
|
3933
|
+
})).filter((hint) => hint.text.length > 0);
|
|
3934
|
+
var normalizeLexicon2 = (entries) => (entries ?? []).map((entry) => ({
|
|
3935
|
+
...entry,
|
|
3936
|
+
aliases: entry.aliases?.filter((value) => typeof value === "string" && value.trim().length > 0),
|
|
3937
|
+
language: typeof entry.language === "string" && entry.language.trim().length > 0 ? entry.language.trim() : undefined,
|
|
3938
|
+
pronunciation: typeof entry.pronunciation === "string" && entry.pronunciation.trim().length > 0 ? entry.pronunciation.trim() : undefined,
|
|
3939
|
+
text: entry.text.trim()
|
|
3940
|
+
})).filter((entry) => entry.text.length > 0);
|
|
3941
|
+
var clamp16 = (value) => Math.max(-32768, Math.min(32767, Math.round(value)));
|
|
3942
|
+
var linearResample = (input, inputRate, outputRate) => {
|
|
3943
|
+
if (input.length === 0) {
|
|
3944
|
+
return new Int16Array(0);
|
|
3945
|
+
}
|
|
3946
|
+
if (inputRate === outputRate) {
|
|
3947
|
+
return new Int16Array(input);
|
|
3948
|
+
}
|
|
3949
|
+
const outputLength = Math.max(1, Math.round(input.length * outputRate / inputRate));
|
|
3950
|
+
const output = new Int16Array(outputLength);
|
|
3951
|
+
const ratio = inputRate / outputRate;
|
|
3952
|
+
for (let index = 0;index < outputLength; index += 1) {
|
|
3953
|
+
const sourcePosition = index * ratio;
|
|
3954
|
+
const leftIndex = Math.floor(sourcePosition);
|
|
3955
|
+
const rightIndex = Math.min(input.length - 1, leftIndex + 1);
|
|
3956
|
+
const blend = sourcePosition - leftIndex;
|
|
3957
|
+
const left = input[Math.min(leftIndex, input.length - 1)] ?? 0;
|
|
3958
|
+
const right = input[rightIndex] ?? left;
|
|
3959
|
+
output[index] = clamp16(left + (right - left) * blend);
|
|
3960
|
+
}
|
|
3961
|
+
return output;
|
|
3962
|
+
};
|
|
3963
|
+
var MULAW_BIAS = 132;
|
|
3964
|
+
var MULAW_CLIP = 32635;
|
|
3965
|
+
var encodeMulawSample = (sample) => {
|
|
3966
|
+
let value = clamp16(sample);
|
|
3967
|
+
let sign = 0;
|
|
3968
|
+
if (value < 0) {
|
|
3969
|
+
sign = 128;
|
|
3970
|
+
value = -value;
|
|
3971
|
+
}
|
|
3972
|
+
value = Math.min(MULAW_CLIP, value);
|
|
3973
|
+
value += MULAW_BIAS;
|
|
3974
|
+
let exponent = 7;
|
|
3975
|
+
for (let bit = 16384;(value & bit) === 0 && exponent > 0; bit >>= 1) {
|
|
3976
|
+
exponent -= 1;
|
|
3977
|
+
}
|
|
3978
|
+
const mantissa = value >> exponent + 3 & 15;
|
|
3979
|
+
return ~(sign | exponent << 4 | mantissa) & 255;
|
|
3980
|
+
};
|
|
3981
|
+
var decodeMulawSample = (value) => {
|
|
3982
|
+
const normalized = ~value & 255;
|
|
3983
|
+
const sign = normalized & 128;
|
|
3984
|
+
const exponent = normalized >> 4 & 7;
|
|
3985
|
+
const mantissa = normalized & 15;
|
|
3986
|
+
let sample = (mantissa << 3) + MULAW_BIAS << exponent;
|
|
3987
|
+
sample -= MULAW_BIAS;
|
|
3988
|
+
return sign ? -sample : sample;
|
|
3989
|
+
};
|
|
3990
|
+
var int16ArrayToBytes = (samples) => {
|
|
3991
|
+
const output = new Uint8Array(samples.length * 2);
|
|
3992
|
+
const view = new DataView(output.buffer);
|
|
3993
|
+
for (let index = 0;index < samples.length; index += 1) {
|
|
3994
|
+
view.setInt16(index * 2, samples[index] ?? 0, true);
|
|
3995
|
+
}
|
|
3996
|
+
return output;
|
|
3997
|
+
};
|
|
3998
|
+
var bytesToInt16Array = (bytes) => {
|
|
3999
|
+
const sampleCount = Math.floor(bytes.byteLength / 2);
|
|
4000
|
+
const output = new Int16Array(sampleCount);
|
|
4001
|
+
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
|
|
4002
|
+
for (let index = 0;index < sampleCount; index += 1) {
|
|
4003
|
+
output[index] = view.getInt16(index * 2, true);
|
|
4004
|
+
}
|
|
4005
|
+
return output;
|
|
4006
|
+
};
|
|
4007
|
+
var decodeTwilioMulawBase64 = (payload) => {
|
|
4008
|
+
const bytes = Uint8Array.from(Buffer2.from(payload, "base64"));
|
|
4009
|
+
const samples = new Int16Array(bytes.length);
|
|
4010
|
+
for (let index = 0;index < bytes.length; index += 1) {
|
|
4011
|
+
samples[index] = decodeMulawSample(bytes[index] ?? 0);
|
|
4012
|
+
}
|
|
4013
|
+
return samples;
|
|
4014
|
+
};
|
|
4015
|
+
var encodeTwilioMulawBase64 = (samples) => {
|
|
4016
|
+
const bytes = new Uint8Array(samples.length);
|
|
4017
|
+
for (let index = 0;index < samples.length; index += 1) {
|
|
4018
|
+
bytes[index] = encodeMulawSample(samples[index] ?? 0);
|
|
4019
|
+
}
|
|
4020
|
+
return Buffer2.from(bytes).toString("base64");
|
|
4021
|
+
};
|
|
4022
|
+
var transcodeTwilioInboundPayloadToPCM16 = (payload) => {
|
|
4023
|
+
const narrowband = decodeTwilioMulawBase64(payload);
|
|
4024
|
+
const wideband = linearResample(narrowband, TWILIO_MULAW_SAMPLE_RATE, VOICE_PCM_SAMPLE_RATE);
|
|
4025
|
+
return int16ArrayToBytes(wideband);
|
|
4026
|
+
};
|
|
4027
|
+
var transcodePCMToTwilioOutboundPayload = (chunk, format) => {
|
|
4028
|
+
if (format.container === "raw" && format.encoding === "mulaw" && format.channels === 1 && format.sampleRateHz === TWILIO_MULAW_SAMPLE_RATE) {
|
|
4029
|
+
return Buffer2.from(chunk).toString("base64");
|
|
4030
|
+
}
|
|
4031
|
+
if (format.encoding !== "pcm_s16le") {
|
|
4032
|
+
throw new Error(`Unsupported outbound telephony audio format: ${format.container}/${format.encoding}`);
|
|
4033
|
+
}
|
|
4034
|
+
const pcm = bytesToInt16Array(chunk);
|
|
4035
|
+
const mono = format.channels === 1 ? pcm : new Int16Array(Array.from({ length: Math.floor(pcm.length / 2) }, (_, frameIndex) => {
|
|
4036
|
+
const left = pcm[frameIndex * 2] ?? 0;
|
|
4037
|
+
const right = pcm[frameIndex * 2 + 1] ?? 0;
|
|
4038
|
+
return clamp16((left + right) / 2);
|
|
4039
|
+
}));
|
|
4040
|
+
const telephony = linearResample(mono, format.sampleRateHz, TWILIO_MULAW_SAMPLE_RATE);
|
|
4041
|
+
return encodeTwilioMulawBase64(telephony);
|
|
4042
|
+
};
|
|
4043
|
+
var parseTwilioMessage = (raw) => {
|
|
4044
|
+
if (typeof raw !== "string") {
|
|
4045
|
+
return raw;
|
|
4046
|
+
}
|
|
4047
|
+
return JSON.parse(raw);
|
|
4048
|
+
};
|
|
4049
|
+
var createTwilioSocketAdapter = (socket, getState) => ({
|
|
4050
|
+
close: async (code, reason) => {
|
|
4051
|
+
await Promise.resolve(socket.close(code, reason));
|
|
4052
|
+
},
|
|
4053
|
+
send: async (data) => {
|
|
4054
|
+
if (typeof data !== "string") {
|
|
4055
|
+
return;
|
|
4056
|
+
}
|
|
4057
|
+
const state = getState();
|
|
4058
|
+
const message = JSON.parse(data);
|
|
4059
|
+
state.reviewRecorder?.recordVoiceMessage(message);
|
|
4060
|
+
await Promise.resolve(state.onVoiceMessage?.({
|
|
4061
|
+
callSid: state.callSid ?? undefined,
|
|
4062
|
+
message,
|
|
4063
|
+
sessionId: state.sessionId ?? "",
|
|
4064
|
+
streamSid: state.streamSid ?? undefined
|
|
4065
|
+
}));
|
|
4066
|
+
if (!state.streamSid) {
|
|
4067
|
+
return;
|
|
4068
|
+
}
|
|
4069
|
+
if (message.type === "audio") {
|
|
4070
|
+
const payload = transcodePCMToTwilioOutboundPayload(Uint8Array.from(Buffer2.from(message.chunkBase64, "base64")), message.format);
|
|
4071
|
+
state.hasOutboundAudioSinceLastInbound = true;
|
|
4072
|
+
state.reviewRecorder?.recordTwilioOutbound({
|
|
4073
|
+
bytes: payload.length,
|
|
4074
|
+
event: "media",
|
|
4075
|
+
track: "outbound"
|
|
4076
|
+
});
|
|
4077
|
+
await Promise.resolve(socket.send(JSON.stringify({
|
|
4078
|
+
event: "media",
|
|
4079
|
+
media: {
|
|
4080
|
+
payload
|
|
4081
|
+
},
|
|
4082
|
+
streamSid: state.streamSid
|
|
4083
|
+
})));
|
|
4084
|
+
return;
|
|
4085
|
+
}
|
|
4086
|
+
if (message.type === "assistant" && message.turnId) {
|
|
4087
|
+
state.reviewRecorder?.recordTwilioOutbound({
|
|
4088
|
+
event: "mark",
|
|
4089
|
+
name: `assistant:${message.turnId}`
|
|
4090
|
+
});
|
|
4091
|
+
await Promise.resolve(socket.send(JSON.stringify({
|
|
4092
|
+
event: "mark",
|
|
4093
|
+
mark: {
|
|
4094
|
+
name: `assistant:${message.turnId}`
|
|
4095
|
+
},
|
|
4096
|
+
streamSid: state.streamSid
|
|
4097
|
+
})));
|
|
4098
|
+
}
|
|
4099
|
+
}
|
|
4100
|
+
});
|
|
4101
|
+
var createTwilioVoiceResponse = (options) => {
|
|
4102
|
+
const parameters = Object.entries(options.parameters ?? {}).filter((entry) => entry[1] !== undefined).map(([name, value]) => `<Parameter name="${escapeXml(name)}" value="${escapeXml(String(value))}" />`).join("");
|
|
4103
|
+
return `<?xml version="1.0" encoding="UTF-8"?><Response><Connect><Stream url="${escapeXml(options.streamUrl)}"${options.track ? ` track="${escapeXml(options.track)}"` : ""}${options.streamName ? ` name="${escapeXml(options.streamName)}"` : ""}>${parameters}</Stream></Connect></Response>`;
|
|
4104
|
+
};
|
|
4105
|
+
var createTwilioMediaStreamBridge = (socket, options) => {
|
|
4106
|
+
const runtimePreset = resolveVoiceRuntimePreset(options.preset);
|
|
4107
|
+
const turnDetection = resolveTurnDetectionConfig({
|
|
4108
|
+
...runtimePreset.turnDetection,
|
|
4109
|
+
...options.turnDetection
|
|
4110
|
+
});
|
|
4111
|
+
const audioConditioning = options.audioConditioning !== undefined ? resolveAudioConditioningConfig(options.audioConditioning) : runtimePreset.audioConditioning;
|
|
4112
|
+
const logger = resolveLogger(options.logger);
|
|
4113
|
+
const reconnect = {
|
|
4114
|
+
maxAttempts: options.reconnect?.maxAttempts ?? 10,
|
|
4115
|
+
strategy: options.reconnect?.strategy ?? "resume-last-turn",
|
|
4116
|
+
timeout: options.reconnect?.timeout ?? 30000
|
|
4117
|
+
};
|
|
4118
|
+
const bridgeState = {
|
|
4119
|
+
callSid: null,
|
|
4120
|
+
hasOutboundAudioSinceLastInbound: false,
|
|
4121
|
+
onVoiceMessage: options.onVoiceMessage,
|
|
4122
|
+
reviewRecorder: options.review ? createVoiceCallReviewRecorder({
|
|
4123
|
+
config: options.review.config ?? {
|
|
4124
|
+
preset: options.preset,
|
|
4125
|
+
stt: {
|
|
4126
|
+
kind: options.stt.kind
|
|
4127
|
+
},
|
|
4128
|
+
tts: options.tts ? {
|
|
4129
|
+
kind: options.tts.kind
|
|
4130
|
+
} : undefined,
|
|
4131
|
+
turnDetection
|
|
4132
|
+
},
|
|
4133
|
+
fixtureId: options.review.fixtureId,
|
|
4134
|
+
path: options.review.path,
|
|
4135
|
+
title: options.review.title
|
|
4136
|
+
}) : undefined,
|
|
4137
|
+
scenarioId: options.scenarioId ?? null,
|
|
4138
|
+
sessionId: options.sessionId ?? null,
|
|
4139
|
+
streamSid: null
|
|
4140
|
+
};
|
|
4141
|
+
let sessionHandle = null;
|
|
4142
|
+
let reviewArtifactDelivered = false;
|
|
4143
|
+
const resolveLexicon2 = async () => {
|
|
4144
|
+
if (typeof options.lexicon === "function") {
|
|
4145
|
+
return normalizeLexicon2(await options.lexicon({
|
|
4146
|
+
context: options.context,
|
|
4147
|
+
scenarioId: bridgeState.scenarioId ?? undefined,
|
|
4148
|
+
sessionId: bridgeState.sessionId ?? ""
|
|
4149
|
+
}) ?? []);
|
|
4150
|
+
}
|
|
4151
|
+
return normalizeLexicon2(options.lexicon);
|
|
4152
|
+
};
|
|
4153
|
+
const resolvePhraseHints2 = async () => {
|
|
4154
|
+
if (typeof options.phraseHints === "function") {
|
|
4155
|
+
return normalizePhraseHints2(await options.phraseHints({
|
|
4156
|
+
context: options.context,
|
|
4157
|
+
scenarioId: bridgeState.scenarioId ?? undefined,
|
|
4158
|
+
sessionId: bridgeState.sessionId ?? ""
|
|
4159
|
+
}) ?? []);
|
|
4160
|
+
}
|
|
4161
|
+
return normalizePhraseHints2(options.phraseHints);
|
|
4162
|
+
};
|
|
4163
|
+
const ensureSession = async () => {
|
|
4164
|
+
if (sessionHandle) {
|
|
4165
|
+
return sessionHandle;
|
|
4166
|
+
}
|
|
4167
|
+
bridgeState.sessionId ??= `phone-${Date.now().toString(36)}`;
|
|
4168
|
+
const lexicon = await resolveLexicon2();
|
|
4169
|
+
const phraseHints = await resolvePhraseHints2();
|
|
4170
|
+
const normalizedOnTurn = normalizeOnTurn2(options.onTurn);
|
|
4171
|
+
const route = {
|
|
4172
|
+
correctTurn: options.correctTurn,
|
|
4173
|
+
onComplete: options.onComplete,
|
|
4174
|
+
onError: options.onError,
|
|
4175
|
+
onSession: options.onSession,
|
|
4176
|
+
onTurn: async (input) => {
|
|
4177
|
+
bridgeState.reviewRecorder?.recordVoiceMessage({
|
|
4178
|
+
type: "turn",
|
|
4179
|
+
turn: input.turn
|
|
4180
|
+
});
|
|
4181
|
+
const result = await normalizedOnTurn(input);
|
|
4182
|
+
if (result?.assistantText) {
|
|
4183
|
+
bridgeState.reviewRecorder?.recordVoiceMessage({
|
|
4184
|
+
type: "assistant",
|
|
4185
|
+
text: result.assistantText,
|
|
4186
|
+
turnId: input.turn.id
|
|
4187
|
+
});
|
|
4188
|
+
}
|
|
4189
|
+
return result;
|
|
4190
|
+
}
|
|
4191
|
+
};
|
|
4192
|
+
const voiceSocket = createTwilioSocketAdapter(socket, () => bridgeState);
|
|
4193
|
+
sessionHandle = createVoiceSession({
|
|
4194
|
+
audioConditioning,
|
|
4195
|
+
context: options.context,
|
|
4196
|
+
costTelemetry: options.costTelemetry,
|
|
4197
|
+
id: bridgeState.sessionId,
|
|
4198
|
+
languageStrategy: options.languageStrategy,
|
|
4199
|
+
lexicon,
|
|
4200
|
+
logger,
|
|
4201
|
+
phraseHints,
|
|
4202
|
+
reconnect,
|
|
4203
|
+
route,
|
|
4204
|
+
scenarioId: bridgeState.scenarioId ?? undefined,
|
|
4205
|
+
socket: voiceSocket,
|
|
4206
|
+
store: options.session,
|
|
4207
|
+
stt: options.stt,
|
|
4208
|
+
sttFallback: resolveSTTFallbackConfig2(options.sttFallback),
|
|
4209
|
+
sttLifecycle: options.sttLifecycle ?? runtimePreset.sttLifecycle,
|
|
4210
|
+
tts: options.tts,
|
|
4211
|
+
turnDetection
|
|
4212
|
+
});
|
|
4213
|
+
return sessionHandle;
|
|
4214
|
+
};
|
|
4215
|
+
return {
|
|
4216
|
+
close: async (reason) => {
|
|
4217
|
+
await sessionHandle?.close(reason);
|
|
4218
|
+
if (bridgeState.reviewRecorder && options.review?.onArtifact && !reviewArtifactDelivered) {
|
|
4219
|
+
reviewArtifactDelivered = true;
|
|
4220
|
+
await Promise.resolve(options.review.onArtifact(bridgeState.reviewRecorder.finalize()));
|
|
4221
|
+
}
|
|
4222
|
+
},
|
|
4223
|
+
getSessionId: () => bridgeState.sessionId,
|
|
4224
|
+
getStreamSid: () => bridgeState.streamSid,
|
|
4225
|
+
handleMessage: async (raw) => {
|
|
4226
|
+
const message = parseTwilioMessage(raw);
|
|
4227
|
+
switch (message.event) {
|
|
4228
|
+
case "connected":
|
|
4229
|
+
bridgeState.reviewRecorder?.recordTwilioInbound({
|
|
4230
|
+
event: "connected"
|
|
4231
|
+
});
|
|
4232
|
+
return;
|
|
4233
|
+
case "start": {
|
|
4234
|
+
bridgeState.streamSid = message.start.streamSid;
|
|
4235
|
+
bridgeState.callSid = message.start.callSid ?? null;
|
|
4236
|
+
bridgeState.sessionId = message.start.customParameters?.sessionId?.trim() || bridgeState.sessionId;
|
|
4237
|
+
bridgeState.scenarioId = message.start.customParameters?.scenarioId?.trim() || bridgeState.scenarioId;
|
|
4238
|
+
bridgeState.reviewRecorder?.recordTwilioInbound({
|
|
4239
|
+
event: "start",
|
|
4240
|
+
reason: message.start.callSid,
|
|
4241
|
+
text: bridgeState.sessionId ?? undefined
|
|
4242
|
+
});
|
|
4243
|
+
await ensureSession();
|
|
4244
|
+
return;
|
|
4245
|
+
}
|
|
4246
|
+
case "media": {
|
|
4247
|
+
const activeSession = await ensureSession();
|
|
4248
|
+
bridgeState.reviewRecorder?.recordTwilioInbound({
|
|
4249
|
+
bytes: message.media.payload.length,
|
|
4250
|
+
event: "media",
|
|
4251
|
+
track: message.media.track
|
|
4252
|
+
});
|
|
4253
|
+
if (options.clearOnInboundMedia !== false && bridgeState.hasOutboundAudioSinceLastInbound && bridgeState.streamSid) {
|
|
4254
|
+
bridgeState.reviewRecorder?.recordTwilioOutbound({
|
|
4255
|
+
event: "clear"
|
|
4256
|
+
});
|
|
4257
|
+
await Promise.resolve(socket.send(JSON.stringify({
|
|
4258
|
+
event: "clear",
|
|
4259
|
+
streamSid: bridgeState.streamSid
|
|
4260
|
+
})));
|
|
4261
|
+
}
|
|
4262
|
+
bridgeState.hasOutboundAudioSinceLastInbound = false;
|
|
4263
|
+
await activeSession.receiveAudio(transcodeTwilioInboundPayloadToPCM16(message.media.payload));
|
|
4264
|
+
return;
|
|
4265
|
+
}
|
|
4266
|
+
case "mark":
|
|
4267
|
+
bridgeState.reviewRecorder?.recordTwilioInbound({
|
|
4268
|
+
event: "mark",
|
|
4269
|
+
name: message.mark?.name
|
|
4270
|
+
});
|
|
4271
|
+
return;
|
|
4272
|
+
case "stop":
|
|
4273
|
+
bridgeState.reviewRecorder?.recordTwilioInbound({
|
|
4274
|
+
event: "stop",
|
|
4275
|
+
reason: message.stop?.callSid
|
|
4276
|
+
});
|
|
4277
|
+
await sessionHandle?.close("twilio-stop");
|
|
4278
|
+
return;
|
|
4279
|
+
}
|
|
4280
|
+
}
|
|
4281
|
+
};
|
|
4282
|
+
};
|
|
4283
|
+
// src/telephony/response.ts
|
|
4284
|
+
var normalizeWhitespace = (value) => value.replace(/\s+/g, " ").trim();
|
|
4285
|
+
var DEFAULT_MAX_WORDS = 12;
|
|
4286
|
+
var CLAUSE_BOUNDARY_PATTERN = /(?<=[,.;!?])\s+/u;
|
|
4287
|
+
var clampWords = (text, maxWords) => {
|
|
4288
|
+
if (!Number.isFinite(maxWords) || maxWords <= 0) {
|
|
4289
|
+
return text;
|
|
4290
|
+
}
|
|
4291
|
+
const words = text.split(/\s+/u).filter(Boolean);
|
|
4292
|
+
if (words.length <= maxWords) {
|
|
4293
|
+
return text;
|
|
4294
|
+
}
|
|
4295
|
+
return words.slice(0, maxWords).join(" ");
|
|
4296
|
+
};
|
|
4297
|
+
var clampChars = (text, maxChars) => {
|
|
4298
|
+
if (!Number.isFinite(maxChars) || !maxChars || maxChars <= 0) {
|
|
4299
|
+
return text;
|
|
4300
|
+
}
|
|
4301
|
+
if (text.length <= maxChars) {
|
|
4302
|
+
return text;
|
|
4303
|
+
}
|
|
4304
|
+
return text.slice(0, maxChars).trim();
|
|
4305
|
+
};
|
|
4306
|
+
var ensureTerminalPunctuation = (text) => {
|
|
4307
|
+
if (!text) {
|
|
4308
|
+
return text;
|
|
4309
|
+
}
|
|
4310
|
+
return /[.!?]$/u.test(text) ? text : `${text}.`;
|
|
4311
|
+
};
|
|
4312
|
+
var extractLeadClause = (text) => {
|
|
4313
|
+
const normalized = normalizeWhitespace(text);
|
|
4314
|
+
if (!normalized) {
|
|
4315
|
+
return normalized;
|
|
4316
|
+
}
|
|
4317
|
+
const colonIndex = normalized.indexOf(":");
|
|
4318
|
+
const body = colonIndex >= 0 && colonIndex < 24 && colonIndex < normalized.length - 1 ? normalizeWhitespace(normalized.slice(colonIndex + 1)) : normalized;
|
|
4319
|
+
const clauses = body.split(CLAUSE_BOUNDARY_PATTERN).filter(Boolean);
|
|
4320
|
+
return clauses[0] ?? body;
|
|
4321
|
+
};
|
|
4322
|
+
var shapeTelephonyAssistantText = (text, options = {}) => {
|
|
4323
|
+
const normalized = normalizeWhitespace(text);
|
|
4324
|
+
if (!normalized) {
|
|
4325
|
+
return normalized;
|
|
4326
|
+
}
|
|
4327
|
+
if ((options.mode ?? "lead-clause") === "full") {
|
|
4328
|
+
return clampChars(normalized, options.maxChars);
|
|
4329
|
+
}
|
|
4330
|
+
const lead = extractLeadClause(normalized);
|
|
4331
|
+
const limitedWords = clampWords(lead, options.maxWords ?? DEFAULT_MAX_WORDS);
|
|
4332
|
+
const limitedChars = clampChars(limitedWords, options.maxChars);
|
|
4333
|
+
return ensureTerminalPunctuation(normalizeWhitespace(limitedChars));
|
|
4334
|
+
};
|
|
1984
4335
|
export {
|
|
4336
|
+
withVoiceOpsTaskId,
|
|
4337
|
+
withVoiceIntegrationEventId,
|
|
1985
4338
|
voice,
|
|
4339
|
+
transcodeTwilioInboundPayloadToPCM16,
|
|
4340
|
+
transcodePCMToTwilioOutboundPayload,
|
|
4341
|
+
summarizeVoiceOpsTasks,
|
|
4342
|
+
startVoiceOpsTask,
|
|
4343
|
+
shapeTelephonyAssistantText,
|
|
4344
|
+
resolveVoiceSTTRoutingStrategy,
|
|
1986
4345
|
resolveVoiceRuntimePreset,
|
|
1987
4346
|
resolveTurnDetectionConfig,
|
|
1988
4347
|
resolveAudioConditioningConfig,
|
|
4348
|
+
reopenVoiceOpsTask,
|
|
4349
|
+
renderVoiceCallReviewMarkdown,
|
|
4350
|
+
renderVoiceCallReviewHTML,
|
|
4351
|
+
recordVoiceRuntimeOps,
|
|
4352
|
+
listVoiceOpsTasks,
|
|
4353
|
+
encodeTwilioMulawBase64,
|
|
4354
|
+
decodeTwilioMulawBase64,
|
|
4355
|
+
createVoiceTaskUpdatedEvent,
|
|
4356
|
+
createVoiceTaskCreatedEvent,
|
|
1989
4357
|
createVoiceSessionRecord,
|
|
1990
4358
|
createVoiceSession,
|
|
4359
|
+
createVoiceSTTRoutingCorrectionHandler,
|
|
4360
|
+
createVoiceReviewSavedEvent,
|
|
1991
4361
|
createVoiceMemoryStore,
|
|
4362
|
+
createVoiceIntegrationEvent,
|
|
4363
|
+
createVoiceFileTaskStore,
|
|
4364
|
+
createVoiceFileSessionStore,
|
|
4365
|
+
createVoiceFileRuntimeStorage,
|
|
4366
|
+
createVoiceFileReviewStore,
|
|
4367
|
+
createVoiceFileIntegrationEventStore,
|
|
4368
|
+
createVoiceCallReviewRecorder,
|
|
4369
|
+
createVoiceCallReviewFromSession,
|
|
4370
|
+
createVoiceCallReviewFromLiveTelephonyReport,
|
|
4371
|
+
createVoiceCallCompletedEvent,
|
|
4372
|
+
createTwilioVoiceResponse,
|
|
4373
|
+
createTwilioMediaStreamBridge,
|
|
4374
|
+
createStoredVoiceOpsTask,
|
|
4375
|
+
createStoredVoiceIntegrationEvent,
|
|
4376
|
+
createStoredVoiceCallReviewArtifact,
|
|
4377
|
+
createRiskyTurnCorrectionHandler,
|
|
1992
4378
|
createPhraseHintCorrectionHandler,
|
|
1993
4379
|
createId,
|
|
4380
|
+
createDomainPhraseHints,
|
|
4381
|
+
createDomainLexicon,
|
|
1994
4382
|
conditionAudioChunk,
|
|
4383
|
+
completeVoiceOpsTask,
|
|
4384
|
+
buildVoiceOpsTaskFromReview,
|
|
4385
|
+
assignVoiceOpsTask,
|
|
4386
|
+
applyRiskTieredPhraseHintCorrections,
|
|
1995
4387
|
applyPhraseHintCorrections,
|
|
1996
4388
|
TURN_PROFILE_DEFAULTS
|
|
1997
4389
|
};
|