@absolutejs/voice 0.0.21 → 0.0.22-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +499 -2
  2. package/dist/angular/index.js +90 -0
  3. package/dist/angular/voice-controller.service.d.ts +6 -0
  4. package/dist/angular/voice-stream.service.d.ts +6 -0
  5. package/dist/client/actions.d.ts +41 -0
  6. package/dist/client/audioPlayer.d.ts +40 -0
  7. package/dist/client/duplex.d.ts +3 -0
  8. package/dist/client/htmxBootstrap.js +84 -0
  9. package/dist/client/index.d.ts +2 -0
  10. package/dist/client/index.js +507 -5
  11. package/dist/correction.d.ts +18 -1
  12. package/dist/fileStore.d.ts +27 -0
  13. package/dist/index.d.ts +12 -1
  14. package/dist/index.js +2425 -33
  15. package/dist/ops.d.ts +100 -0
  16. package/dist/react/index.js +86 -0
  17. package/dist/react/useVoiceController.d.ts +6 -0
  18. package/dist/react/useVoiceStream.d.ts +6 -0
  19. package/dist/routing.d.ts +3 -0
  20. package/dist/runtimeOps.d.ts +23 -0
  21. package/dist/svelte/index.js +84 -0
  22. package/dist/telephony/response.d.ts +7 -0
  23. package/dist/telephony/twilio.d.ts +116 -0
  24. package/dist/testing/benchmark.d.ts +59 -4
  25. package/dist/testing/corrected.d.ts +41 -0
  26. package/dist/testing/duplex.d.ts +59 -0
  27. package/dist/testing/fixtures.d.ts +18 -2
  28. package/dist/testing/index.d.ts +5 -0
  29. package/dist/testing/index.js +4940 -307
  30. package/dist/testing/review.d.ts +143 -0
  31. package/dist/testing/sessionBenchmark.d.ts +25 -0
  32. package/dist/testing/stt.d.ts +2 -1
  33. package/dist/testing/telephony.d.ts +70 -0
  34. package/dist/testing/tts.d.ts +73 -0
  35. package/dist/types.d.ts +290 -3
  36. package/dist/vue/index.js +90 -0
  37. package/dist/vue/useVoiceController.d.ts +11 -0
  38. package/dist/vue/useVoiceStream.d.ts +11 -0
  39. package/package.json +115 -1
package/dist/index.js CHANGED
@@ -434,6 +434,58 @@ var PRESET_INPUTS = {
434
434
  transcriptStabilityMs: 1650
435
435
  }
436
436
  },
437
+ "pstn-balanced": {
438
+ audioConditioning: {
439
+ enabled: true,
440
+ maxGain: 2.8,
441
+ noiseGateAttenuation: 0.07,
442
+ noiseGateThreshold: 0.005,
443
+ targetLevel: 0.08
444
+ },
445
+ capture: {
446
+ channelCount: 1,
447
+ sampleRateHz: 16000
448
+ },
449
+ connection: {
450
+ maxReconnectAttempts: 14,
451
+ pingInterval: 45000,
452
+ reconnect: true
453
+ },
454
+ sttLifecycle: "continuous",
455
+ turnDetection: {
456
+ qualityProfile: "noisy-room",
457
+ profile: "long-form",
458
+ silenceMs: 660,
459
+ speechThreshold: 0.012,
460
+ transcriptStabilityMs: 300
461
+ }
462
+ },
463
+ "pstn-fast": {
464
+ audioConditioning: {
465
+ enabled: true,
466
+ maxGain: 2.75,
467
+ noiseGateAttenuation: 0.06,
468
+ noiseGateThreshold: 0.005,
469
+ targetLevel: 0.08
470
+ },
471
+ capture: {
472
+ channelCount: 1,
473
+ sampleRateHz: 16000
474
+ },
475
+ connection: {
476
+ maxReconnectAttempts: 14,
477
+ pingInterval: 45000,
478
+ reconnect: true
479
+ },
480
+ sttLifecycle: "continuous",
481
+ turnDetection: {
482
+ qualityProfile: "noisy-room",
483
+ profile: "long-form",
484
+ silenceMs: 620,
485
+ speechThreshold: 0.012,
486
+ transcriptStabilityMs: 280
487
+ }
488
+ },
437
489
  reliability: {
438
490
  audioConditioning: {
439
491
  enabled: true,
@@ -475,6 +527,933 @@ var resolveVoiceRuntimePreset = (name = "default") => {
475
527
  };
476
528
  };
477
529
 
530
+ // src/ops.ts
531
+ var ensureTaskHistory = (task, entry) => ({
532
+ ...task,
533
+ history: [
534
+ ...task.history ?? [],
535
+ {
536
+ ...entry,
537
+ at: entry.at ?? Date.now()
538
+ }
539
+ ],
540
+ updatedAt: Date.now()
541
+ });
542
+ var withVoiceOpsTaskId = (id, task) => ({
543
+ ...task,
544
+ id
545
+ });
546
+ var withVoiceIntegrationEventId = (id, event) => ({
547
+ ...event,
548
+ id
549
+ });
550
+ var buildVoiceOpsTaskFromReview = (review) => {
551
+ const createdAt = review.generatedAt ?? Date.now();
552
+ const common = {
553
+ createdAt,
554
+ history: [
555
+ {
556
+ actor: "system",
557
+ at: createdAt,
558
+ detail: review.postCall?.summary,
559
+ type: "created"
560
+ }
561
+ ],
562
+ id: `${review.id}:ops`,
563
+ intakeId: review.id,
564
+ outcome: review.summary.outcome,
565
+ recommendedAction: review.postCall?.recommendedAction ?? "Review the voice artifact and decide the next operator action.",
566
+ reviewId: review.id,
567
+ status: "open",
568
+ target: review.postCall?.target,
569
+ updatedAt: createdAt
570
+ };
571
+ switch (review.summary.outcome) {
572
+ case "voicemail":
573
+ return {
574
+ ...common,
575
+ description: review.postCall?.summary ?? "Caller reached voicemail and needs a callback follow-up.",
576
+ kind: "callback",
577
+ title: review.postCall?.target ? `Call back voicemail from ${review.postCall.target}` : "Call back voicemail lead"
578
+ };
579
+ case "no-answer":
580
+ return {
581
+ ...common,
582
+ description: review.postCall?.summary ?? "Live contact was not established and should be retried.",
583
+ kind: "callback",
584
+ title: "Retry no-answer call"
585
+ };
586
+ case "escalated":
587
+ return {
588
+ ...common,
589
+ description: review.postCall?.summary ?? "The automated path escalated this call for human review.",
590
+ kind: "escalation",
591
+ title: "Review escalated call"
592
+ };
593
+ case "transferred":
594
+ return {
595
+ ...common,
596
+ description: review.postCall?.summary ?? "The call was transferred and should be verified downstream.",
597
+ kind: "transfer-check",
598
+ title: review.postCall?.target ? `Verify transfer to ${review.postCall.target}` : "Verify call transfer"
599
+ };
600
+ case "failed":
601
+ return {
602
+ ...common,
603
+ description: review.postCall?.summary ?? "The call failed and needs operator review before retry.",
604
+ kind: "retry-review",
605
+ title: "Inspect failed call before retry"
606
+ };
607
+ default:
608
+ return null;
609
+ }
610
+ };
611
+ var assignVoiceOpsTask = (task, owner, input = {}) => {
612
+ const normalizedOwner = owner.trim() || "ops";
613
+ return ensureTaskHistory({
614
+ ...task,
615
+ assignee: normalizedOwner
616
+ }, {
617
+ actor: input.actor ?? normalizedOwner,
618
+ at: input.at,
619
+ detail: `Assigned to ${normalizedOwner}`,
620
+ type: "assigned"
621
+ });
622
+ };
623
+ var startVoiceOpsTask = (task, input = {}) => ensureTaskHistory({
624
+ ...task,
625
+ status: "in-progress"
626
+ }, {
627
+ actor: input.actor ?? task.assignee ?? "ops",
628
+ at: input.at,
629
+ detail: input.detail ?? "Work started",
630
+ type: "started"
631
+ });
632
+ var completeVoiceOpsTask = (task, input = {}) => ensureTaskHistory({
633
+ ...task,
634
+ status: "done"
635
+ }, {
636
+ actor: input.actor ?? task.assignee ?? "ops",
637
+ at: input.at,
638
+ detail: input.detail ?? "Marked done",
639
+ type: "completed"
640
+ });
641
+ var reopenVoiceOpsTask = (task, input = {}) => ensureTaskHistory({
642
+ ...task,
643
+ status: "open"
644
+ }, {
645
+ actor: input.actor ?? task.assignee ?? "ops",
646
+ at: input.at,
647
+ detail: input.detail ?? "Task reopened",
648
+ type: "reopened"
649
+ });
650
+ var listVoiceOpsTasks = (tasks) => [...tasks].sort((left, right) => right.createdAt - left.createdAt);
651
+ var summarizeVoiceOpsTasks = (tasks) => {
652
+ const summary = {
653
+ byKind: new Map,
654
+ byOutcome: new Map,
655
+ done: 0,
656
+ inProgress: 0,
657
+ open: 0,
658
+ topAssignees: new Map,
659
+ topTargets: new Map,
660
+ total: tasks.length
661
+ };
662
+ for (const task of tasks) {
663
+ if (task.status === "open") {
664
+ summary.open += 1;
665
+ } else if (task.status === "in-progress") {
666
+ summary.inProgress += 1;
667
+ } else if (task.status === "done") {
668
+ summary.done += 1;
669
+ }
670
+ summary.byKind.set(task.kind, (summary.byKind.get(task.kind) ?? 0) + 1);
671
+ if (task.outcome) {
672
+ summary.byOutcome.set(task.outcome, (summary.byOutcome.get(task.outcome) ?? 0) + 1);
673
+ }
674
+ if (task.target) {
675
+ summary.topTargets.set(task.target, (summary.topTargets.get(task.target) ?? 0) + 1);
676
+ }
677
+ if (task.assignee) {
678
+ summary.topAssignees.set(task.assignee, (summary.topAssignees.get(task.assignee) ?? 0) + 1);
679
+ }
680
+ }
681
+ return {
682
+ byKind: [...summary.byKind.entries()].sort((left, right) => right[1] - left[1]),
683
+ byOutcome: [...summary.byOutcome.entries()].sort((left, right) => right[1] - left[1]),
684
+ done: summary.done,
685
+ inProgress: summary.inProgress,
686
+ open: summary.open,
687
+ topAssignees: [...summary.topAssignees.entries()].sort((left, right) => right[1] - left[1]),
688
+ topTargets: [...summary.topTargets.entries()].sort((left, right) => right[1] - left[1]),
689
+ total: summary.total
690
+ };
691
+ };
692
+ var createVoiceIntegrationEvent = (type, payload, input = {}) => ({
693
+ createdAt: input.createdAt ?? Date.now(),
694
+ id: input.id ?? crypto.randomUUID(),
695
+ payload,
696
+ type
697
+ });
698
+ var createVoiceCallCompletedEvent = (input) => createVoiceIntegrationEvent("call.completed", {
699
+ call: input.session.call,
700
+ disposition: input.disposition ?? input.session.call?.disposition,
701
+ scenarioId: input.session.scenarioId,
702
+ sessionId: input.session.id,
703
+ sessionSummary: input.sessionSummary,
704
+ status: input.session.status,
705
+ turnCount: input.session.turns.length
706
+ }, {
707
+ id: `${input.session.id}:call.completed`
708
+ });
709
+ var createVoiceReviewSavedEvent = (review) => createVoiceIntegrationEvent("review.saved", {
710
+ elapsedMs: review.summary.elapsedMs,
711
+ firstTurnLatencyMs: review.summary.firstTurnLatencyMs,
712
+ outcome: review.summary.outcome,
713
+ postCall: review.postCall,
714
+ reviewId: review.id,
715
+ title: review.title
716
+ }, {
717
+ id: `${review.id}:review.saved`
718
+ });
719
+ var createVoiceTaskCreatedEvent = (task) => createVoiceIntegrationEvent("task.created", {
720
+ assignee: task.assignee,
721
+ kind: task.kind,
722
+ outcome: task.outcome,
723
+ recommendedAction: task.recommendedAction,
724
+ reviewId: task.reviewId,
725
+ status: task.status,
726
+ target: task.target,
727
+ taskId: task.id,
728
+ title: task.title
729
+ }, {
730
+ id: `${task.id}:task.created:${task.updatedAt}`
731
+ });
732
+ var createVoiceTaskUpdatedEvent = (task) => createVoiceIntegrationEvent("task.updated", {
733
+ assignee: task.assignee,
734
+ history: task.history,
735
+ kind: task.kind,
736
+ outcome: task.outcome,
737
+ recommendedAction: task.recommendedAction,
738
+ reviewId: task.reviewId,
739
+ status: task.status,
740
+ target: task.target,
741
+ taskId: task.id,
742
+ title: task.title,
743
+ updatedAt: task.updatedAt
744
+ }, {
745
+ id: `${task.id}:task.updated:${task.updatedAt}`
746
+ });
747
+
748
+ // src/testing/review.ts
749
+ var roundMetric = (value) => typeof value === "number" ? Math.round(value * 100) / 100 : undefined;
750
+ var formatMetric = (label, value, unit = "ms") => typeof value === "number" ? `${label}: ${roundMetric(value)}${unit}` : undefined;
751
+ var findTimelineEvent = (timeline, event, source) => timeline.find((entry) => entry.event === event && (source === undefined || entry.source === source));
752
+ var formatTimelineText = (entry) => {
753
+ const parts = [`- ${entry.atMs}ms`, `[${entry.source}]`, entry.event];
754
+ if (entry.text) {
755
+ parts.push(`"${entry.text}"`);
756
+ }
757
+ if (entry.reason) {
758
+ parts.push(`reason=${entry.reason}`);
759
+ }
760
+ if (typeof entry.bytes === "number") {
761
+ parts.push(`bytes=${entry.bytes}`);
762
+ }
763
+ if (typeof entry.confidence === "number") {
764
+ parts.push(`confidence=${roundMetric(entry.confidence)}`);
765
+ }
766
+ if (entry.name) {
767
+ parts.push(`name=${entry.name}`);
768
+ }
769
+ return parts.join(" ");
770
+ };
771
+ var isLowSignalTimelineEvent = (entry) => entry.event === "inbound-media" || entry.event === "inbound-silence-pad" || entry.event === "stt-send" || entry.event === "tts-audio";
772
+ var summarizeTimelineTraffic = (timeline) => {
773
+ const summaries = new Map;
774
+ for (const entry of timeline) {
775
+ const label = entry.event === "inbound-media" ? "inbound media chunks" : entry.event === "inbound-silence-pad" ? "inbound silence padding" : entry.event === "stt-send" ? "STT audio sends" : entry.event === "tts-audio" ? "post-first TTS audio chunks" : undefined;
776
+ if (!label) {
777
+ continue;
778
+ }
779
+ const summary = summaries.get(label) ?? {
780
+ audioMs: 0,
781
+ bytes: 0,
782
+ count: 0,
783
+ label
784
+ };
785
+ summary.count += 1;
786
+ summary.bytes += typeof entry.bytes === "number" ? entry.bytes : 0;
787
+ summary.audioMs = (summary.audioMs ?? 0) + (typeof entry.chunkDurationMs === "number" ? entry.chunkDurationMs : 0);
788
+ summaries.set(label, summary);
789
+ }
790
+ return [...summaries.values()];
791
+ };
792
+ var compactTimeline = (timeline) => {
793
+ const rows = [];
794
+ let index = 0;
795
+ while (index < timeline.length) {
796
+ const current = timeline[index];
797
+ if (!current) {
798
+ break;
799
+ }
800
+ const isBurstEvent = isLowSignalTimelineEvent(current) || current.event === "media" && current.source === "twilio";
801
+ if (!isBurstEvent) {
802
+ rows.push(formatTimelineText(current));
803
+ index += 1;
804
+ continue;
805
+ }
806
+ let endIndex = index;
807
+ let totalBytes = typeof current.bytes === "number" ? current.bytes : 0;
808
+ let totalChunkDurationMs = typeof current.chunkDurationMs === "number" ? current.chunkDurationMs : 0;
809
+ while (endIndex + 1 < timeline.length) {
810
+ const next = timeline[endIndex + 1];
811
+ if (!next) {
812
+ break;
813
+ }
814
+ if (next.event !== current.event || next.source !== current.source) {
815
+ break;
816
+ }
817
+ totalBytes += typeof next.bytes === "number" ? next.bytes : 0;
818
+ totalChunkDurationMs += typeof next.chunkDurationMs === "number" ? next.chunkDurationMs : 0;
819
+ endIndex += 1;
820
+ }
821
+ const startAt = current.atMs;
822
+ const endAt = timeline[endIndex]?.atMs ?? current.atMs;
823
+ const count = endIndex - index + 1;
824
+ const parts = [
825
+ `- ${startAt}-${endAt}ms`,
826
+ `[${current.source}]`,
827
+ `${current.event} x${count}`
828
+ ];
829
+ if (totalBytes > 0) {
830
+ parts.push(`bytes=${totalBytes}`);
831
+ }
832
+ if (totalChunkDurationMs > 0) {
833
+ parts.push(`audio=${roundMetric(totalChunkDurationMs)}ms`);
834
+ }
835
+ rows.push(parts.join(" "));
836
+ index = endIndex + 1;
837
+ }
838
+ return rows;
839
+ };
840
+ var withVoiceCallReviewId = (id, artifact) => ({
841
+ ...artifact,
842
+ id
843
+ });
844
+ var createVoiceCallReviewFromLiveTelephonyReport = (report, options = {}) => {
845
+ const fixture = report.fixtures?.[0];
846
+ if (!fixture) {
847
+ throw new Error("Live telephony review requires at least one fixture result.");
848
+ }
849
+ const timeline = [...report.trace ?? []].sort((left, right) => left.atMs - right.atMs);
850
+ const firstPartial = findTimelineEvent(timeline, "partial", "stt");
851
+ const commitEvent = findTimelineEvent(timeline, "commit", "turn");
852
+ const firstTtsAudio = findTimelineEvent(timeline, "tts-first-audio", "benchmark");
853
+ const firstOutboundMedia = findTimelineEvent(timeline, "media", "twilio");
854
+ const bargeInEvent = findTimelineEvent(timeline, "barge-in", "benchmark");
855
+ const clearEvent = findTimelineEvent(timeline, "clear", "twilio");
856
+ const lastSttText = [...timeline].reverse().find((entry) => entry.source === "stt" && (entry.event === "partial" || entry.event === "final") && typeof entry.text === "string" && entry.text.length > 0)?.text ?? undefined;
857
+ const latencyBreakdown = [
858
+ typeof firstPartial?.atMs === "number" ? {
859
+ label: "start to first partial",
860
+ valueMs: firstPartial.atMs
861
+ } : undefined,
862
+ typeof firstPartial?.atMs === "number" && typeof commitEvent?.atMs === "number" ? {
863
+ label: "first partial to commit",
864
+ valueMs: commitEvent.atMs - firstPartial.atMs
865
+ } : undefined,
866
+ typeof commitEvent?.atMs === "number" && typeof firstTtsAudio?.atMs === "number" ? {
867
+ label: "commit to first TTS audio",
868
+ valueMs: firstTtsAudio.atMs - commitEvent.atMs
869
+ } : undefined,
870
+ typeof commitEvent?.atMs === "number" && typeof firstOutboundMedia?.atMs === "number" ? {
871
+ label: "commit to first outbound media",
872
+ valueMs: firstOutboundMedia.atMs - commitEvent.atMs
873
+ } : undefined,
874
+ typeof bargeInEvent?.atMs === "number" && typeof clearEvent?.atMs === "number" ? {
875
+ label: "barge-in to clear",
876
+ valueMs: clearEvent.atMs - bargeInEvent.atMs
877
+ } : undefined
878
+ ].filter((value) => value !== undefined && value.valueMs >= 0);
879
+ const notes = [
880
+ report.variant?.description,
881
+ firstPartial?.text ? `First partial: "${firstPartial.text}"` : undefined,
882
+ lastSttText ? `Last STT text: "${lastSttText}"` : undefined
883
+ ].filter((value) => typeof value === "string" && value.length > 0);
884
+ return {
885
+ config: {
886
+ preset: options.preset,
887
+ stt: report.variant ? {
888
+ description: report.variant.description,
889
+ id: report.variant.id,
890
+ model: report.variant.model
891
+ } : undefined,
892
+ tts: report.ttsConfig,
893
+ turnDetection: report.turnDetectionConfig
894
+ },
895
+ errors: fixture.errors ?? [],
896
+ expectedText: fixture.expectedText,
897
+ fixtureId: fixture.fixtureId,
898
+ generatedAt: report.generatedAt,
899
+ latencyBreakdown,
900
+ notes,
901
+ path: options.path,
902
+ summary: {
903
+ clearLatencyMs: roundMetric(fixture.clearLatencyMs),
904
+ elapsedMs: roundMetric(fixture.elapsedMs),
905
+ firstOutboundMediaLatencyMs: roundMetric(fixture.firstOutboundMediaLatencyMs),
906
+ firstTurnLatencyMs: roundMetric(fixture.firstTurnLatencyMs),
907
+ markLatencyMs: roundMetric(fixture.markLatencyMs),
908
+ outboundMediaCount: fixture.outboundMediaCount,
909
+ pass: fixture.passes,
910
+ termRecall: roundMetric(fixture.termRecall),
911
+ turnCount: fixture.turnCount,
912
+ wordErrorRate: roundMetric(fixture.wordErrorRate)
913
+ },
914
+ title: fixture.title ?? "Voice Call Review",
915
+ timeline,
916
+ transcript: {
917
+ actual: fixture.actualText,
918
+ expected: fixture.expectedText
919
+ }
920
+ };
921
+ };
922
+ var toErrorMessage = (error) => {
923
+ if (typeof error === "string" && error.trim().length > 0) {
924
+ return error;
925
+ }
926
+ if (error instanceof Error && error.message.trim().length > 0) {
927
+ return error.message;
928
+ }
929
+ return "Unknown call error";
930
+ };
931
+ var createVoiceCallReviewRecorder = (options = {}) => {
932
+ const now = options.now ?? (() => Date.now());
933
+ const startedAt = now();
934
+ const errors = [];
935
+ const timeline = [];
936
+ const committedTurns = [];
937
+ const committedTurnIds = new Set;
938
+ const push = (source, event, fields = {}) => {
939
+ timeline.push({
940
+ atMs: Math.max(0, now() - startedAt),
941
+ event,
942
+ source,
943
+ ...fields
944
+ });
945
+ };
946
+ return {
947
+ finalize: () => {
948
+ const sortedTimeline = [...timeline].sort((left, right) => left.atMs - right.atMs);
949
+ const firstPartial = findTimelineEvent(sortedTimeline, "partial", "stt");
950
+ const commitEvent = findTimelineEvent(sortedTimeline, "commit", "turn");
951
+ const firstTtsAudio = findTimelineEvent(sortedTimeline, "tts-first-audio", "benchmark");
952
+ const firstOutboundMedia = findTimelineEvent(sortedTimeline, "media", "twilio");
953
+ const bargeInEvent = findTimelineEvent(sortedTimeline, "barge-in", "benchmark");
954
+ const clearEvent = findTimelineEvent(sortedTimeline, "clear", "twilio");
955
+ const markEvent = findTimelineEvent(sortedTimeline, "mark", "twilio");
956
+ const elapsedMs = sortedTimeline.at(-1)?.atMs ?? 0;
957
+ const lastSttText = [...sortedTimeline].reverse().find((entry) => entry.source === "stt" && (entry.event === "partial" || entry.event === "final") && typeof entry.text === "string" && entry.text.length > 0)?.text ?? undefined;
958
+ const latencyBreakdown = [
959
+ typeof firstPartial?.atMs === "number" ? {
960
+ label: "start to first partial",
961
+ valueMs: firstPartial.atMs
962
+ } : undefined,
963
+ typeof firstPartial?.atMs === "number" && typeof commitEvent?.atMs === "number" ? {
964
+ label: "first partial to commit",
965
+ valueMs: commitEvent.atMs - firstPartial.atMs
966
+ } : undefined,
967
+ typeof commitEvent?.atMs === "number" && typeof firstTtsAudio?.atMs === "number" ? {
968
+ label: "commit to first TTS audio",
969
+ valueMs: firstTtsAudio.atMs - commitEvent.atMs
970
+ } : undefined,
971
+ typeof commitEvent?.atMs === "number" && typeof firstOutboundMedia?.atMs === "number" ? {
972
+ label: "commit to first outbound media",
973
+ valueMs: firstOutboundMedia.atMs - commitEvent.atMs
974
+ } : undefined,
975
+ typeof bargeInEvent?.atMs === "number" && typeof clearEvent?.atMs === "number" ? {
976
+ label: "barge-in to clear",
977
+ valueMs: clearEvent.atMs - bargeInEvent.atMs
978
+ } : undefined
979
+ ].filter((value) => value !== undefined && value.valueMs >= 0);
980
+ return {
981
+ config: options.config,
982
+ errors,
983
+ fixtureId: options.fixtureId,
984
+ generatedAt: now(),
985
+ latencyBreakdown,
986
+ notes: [
987
+ firstPartial?.text ? `First partial: "${firstPartial.text}"` : undefined,
988
+ lastSttText ? `Last STT text: "${lastSttText}"` : undefined
989
+ ].filter((value) => typeof value === "string"),
990
+ path: options.path,
991
+ summary: {
992
+ clearLatencyMs: roundMetric(typeof clearEvent?.atMs === "number" && typeof bargeInEvent?.atMs === "number" ? clearEvent.atMs - bargeInEvent.atMs : undefined),
993
+ elapsedMs: roundMetric(elapsedMs),
994
+ firstOutboundMediaLatencyMs: roundMetric(firstOutboundMedia?.atMs),
995
+ firstTurnLatencyMs: roundMetric(commitEvent?.atMs),
996
+ markLatencyMs: roundMetric(markEvent?.atMs),
997
+ outboundMediaCount: sortedTimeline.filter((entry) => entry.source === "twilio" && entry.event === "media").length,
998
+ pass: errors.length === 0,
999
+ turnCount: committedTurns.length
1000
+ },
1001
+ title: options.title ?? "Voice Call Review",
1002
+ timeline: sortedTimeline,
1003
+ transcript: {
1004
+ actual: committedTurns.join(" ").trim()
1005
+ }
1006
+ };
1007
+ },
1008
+ recordError: (error) => {
1009
+ const message = toErrorMessage(error);
1010
+ errors.push(message);
1011
+ push("turn", "error", {
1012
+ reason: message
1013
+ });
1014
+ },
1015
+ recordTwilioInbound: (input) => {
1016
+ push("twilio", input.event, {
1017
+ bytes: input.bytes,
1018
+ chunkDurationMs: input.chunkDurationMs,
1019
+ name: input.name,
1020
+ reason: input.reason,
1021
+ text: input.text,
1022
+ track: input.track
1023
+ });
1024
+ },
1025
+ recordTwilioOutbound: (input) => {
1026
+ push("twilio", input.event, {
1027
+ bytes: input.bytes,
1028
+ chunkDurationMs: input.chunkDurationMs,
1029
+ name: input.name,
1030
+ reason: input.reason,
1031
+ text: input.text,
1032
+ track: input.track
1033
+ });
1034
+ },
1035
+ recordVoiceMessage: (message) => {
1036
+ switch (message.type) {
1037
+ case "partial":
1038
+ case "final":
1039
+ push("stt", message.type, {
1040
+ confidence: message.transcript.confidence,
1041
+ text: message.transcript.text
1042
+ });
1043
+ return;
1044
+ case "assistant":
1045
+ push("turn", "assistant", {
1046
+ text: message.text
1047
+ });
1048
+ return;
1049
+ case "audio":
1050
+ push("benchmark", timeline.some((entry) => entry.event === "tts-first-audio") ? "tts-audio" : "tts-first-audio", {
1051
+ bytes: Math.floor(message.chunkBase64.length * 3 / 4)
1052
+ });
1053
+ return;
1054
+ case "turn":
1055
+ if (committedTurnIds.has(message.turn.id)) {
1056
+ return;
1057
+ }
1058
+ committedTurnIds.add(message.turn.id);
1059
+ committedTurns.push(message.turn.text);
1060
+ push("turn", "commit", {
1061
+ confidence: message.turn.quality?.averageConfidence,
1062
+ text: message.turn.text
1063
+ });
1064
+ return;
1065
+ case "error":
1066
+ errors.push(message.message);
1067
+ push("turn", "error", {
1068
+ reason: message.message
1069
+ });
1070
+ return;
1071
+ case "complete":
1072
+ push("turn", "complete", {
1073
+ text: message.sessionId
1074
+ });
1075
+ return;
1076
+ case "session":
1077
+ push("turn", "session", {
1078
+ reason: message.status,
1079
+ text: message.sessionId
1080
+ });
1081
+ return;
1082
+ case "pong":
1083
+ push("benchmark", "pong");
1084
+ return;
1085
+ }
1086
+ }
1087
+ };
1088
+ };
1089
+ var renderConfigSection = (config) => {
1090
+ if (!config) {
1091
+ return "";
1092
+ }
1093
+ return [
1094
+ "## Config",
1095
+ "",
1096
+ "```json",
1097
+ JSON.stringify(config, null, 2),
1098
+ "```"
1099
+ ].join(`
1100
+ `);
1101
+ };
1102
+ var renderTimeline = (timeline) => {
1103
+ const focusedTimeline = timeline.filter((entry) => !isLowSignalTimelineEvent(entry));
1104
+ if (focusedTimeline.length === 0) {
1105
+ return `## Timeline
1106
+
1107
+ _No timeline events captured._`;
1108
+ }
1109
+ const lines = compactTimeline(focusedTimeline);
1110
+ return ["## Timeline", "", ...lines].join(`
1111
+ `);
1112
+ };
1113
+ var renderTransportSummary = (timeline) => {
1114
+ const summaries = summarizeTimelineTraffic(timeline);
1115
+ if (summaries.length === 0) {
1116
+ return "";
1117
+ }
1118
+ return [
1119
+ "## Transport Summary",
1120
+ "",
1121
+ ...summaries.map((summary) => {
1122
+ const parts = [`- ${summary.label}: ${summary.count}`];
1123
+ if (summary.bytes > 0) {
1124
+ parts.push(`${summary.bytes} bytes`);
1125
+ }
1126
+ if ((summary.audioMs ?? 0) > 0) {
1127
+ parts.push(`${roundMetric(summary.audioMs)}ms audio`);
1128
+ }
1129
+ return parts.join(", ");
1130
+ })
1131
+ ].join(`
1132
+ `);
1133
+ };
1134
+ var renderLatencyBreakdown = (breakdown) => {
1135
+ if (breakdown.length === 0) {
1136
+ return "";
1137
+ }
1138
+ return [
1139
+ "## Latency Breakdown",
1140
+ "",
1141
+ ...breakdown.map((entry) => `- ${entry.label}: ${roundMetric(entry.valueMs)}ms`)
1142
+ ].join(`
1143
+ `);
1144
+ };
1145
+ var renderVoiceCallReviewMarkdown = (artifact) => {
1146
+ const summaryLines = [
1147
+ `- pass: ${artifact.summary.pass ? "yes" : "no"}`,
1148
+ formatMetric("first turn", artifact.summary.firstTurnLatencyMs),
1149
+ formatMetric("first outbound media", artifact.summary.firstOutboundMediaLatencyMs),
1150
+ formatMetric("mark", artifact.summary.markLatencyMs),
1151
+ formatMetric("clear", artifact.summary.clearLatencyMs),
1152
+ formatMetric("elapsed", artifact.summary.elapsedMs),
1153
+ typeof artifact.summary.wordErrorRate === "number" ? `- word error rate: ${artifact.summary.wordErrorRate}` : undefined,
1154
+ typeof artifact.summary.termRecall === "number" ? `- term recall: ${artifact.summary.termRecall}` : undefined,
1155
+ typeof artifact.summary.turnCount === "number" ? `- turn count: ${artifact.summary.turnCount}` : undefined,
1156
+ typeof artifact.summary.outboundMediaCount === "number" ? `- outbound media count: ${artifact.summary.outboundMediaCount}` : undefined
1157
+ ].filter((value) => typeof value === "string");
1158
+ const notes = artifact.notes.length ? ["## Notes", "", ...artifact.notes.map((note) => `- ${note}`)].join(`
1159
+ `) : "";
1160
+ const errors = artifact.errors.length ? ["## Errors", "", ...artifact.errors.map((error) => `- ${error}`)].join(`
1161
+ `) : "";
1162
+ const latency = renderLatencyBreakdown(artifact.latencyBreakdown);
1163
+ const transportSummary = renderTransportSummary(artifact.timeline);
1164
+ return [
1165
+ `# ${artifact.title}`,
1166
+ "",
1167
+ artifact.path ? `Source: \`${artifact.path}\`` : undefined,
1168
+ artifact.fixtureId ? `Fixture: \`${artifact.fixtureId}\`` : undefined,
1169
+ "",
1170
+ "## Summary",
1171
+ "",
1172
+ ...summaryLines,
1173
+ "",
1174
+ "## Transcript",
1175
+ "",
1176
+ `- expected: ${artifact.transcript.expected ?? "_n/a_"}`,
1177
+ `- actual: ${artifact.transcript.actual}`,
1178
+ "",
1179
+ notes,
1180
+ notes ? "" : undefined,
1181
+ latency,
1182
+ latency ? "" : undefined,
1183
+ transportSummary,
1184
+ transportSummary ? "" : undefined,
1185
+ errors,
1186
+ errors ? "" : undefined,
1187
+ renderConfigSection(artifact.config),
1188
+ renderConfigSection(artifact.config) ? "" : undefined,
1189
+ renderTimeline(artifact.timeline)
1190
+ ].filter((value) => typeof value === "string").join(`
1191
+ `);
1192
+ };
1193
+ var escapeHtml2 = (value) => value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll('"', "&quot;").replaceAll("'", "&#39;");
1194
+ var renderVoiceCallReviewHTML = (artifact) => {
1195
+ const notes = artifact.notes.map((note) => `<li>${escapeHtml2(note)}</li>`).join("");
1196
+ const latency = artifact.latencyBreakdown.map((entry) => `<li><strong>${escapeHtml2(entry.label)}:</strong> ${roundMetric(entry.valueMs)}ms</li>`).join("");
1197
+ const transport = summarizeTimelineTraffic(artifact.timeline).map((summary) => {
1198
+ const parts = [`${summary.count}`, "events"];
1199
+ if (summary.bytes > 0) {
1200
+ parts.push(`${summary.bytes} bytes`);
1201
+ }
1202
+ if ((summary.audioMs ?? 0) > 0) {
1203
+ parts.push(`${roundMetric(summary.audioMs)}ms audio`);
1204
+ }
1205
+ return `<li><strong>${escapeHtml2(summary.label)}:</strong> ${escapeHtml2(parts.join(", "))}</li>`;
1206
+ }).join("");
1207
+ const timeline = compactTimeline(artifact.timeline.filter((entry) => !isLowSignalTimelineEvent(entry))).map((line) => `<li>${escapeHtml2(line.replace(/^- /u, ""))}</li>`).join("");
1208
+ return `<!doctype html>
1209
+ <html lang="en">
1210
+ <head>
1211
+ <meta charset="utf-8" />
1212
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
1213
+ <title>${escapeHtml2(artifact.title)}</title>
1214
+ <style>
1215
+ :root { color-scheme: dark; }
1216
+ body { font-family: ui-sans-serif, system-ui, sans-serif; margin: 0; padding: 24px; background: #0b0d10; color: #f4f4f5; }
1217
+ main { max-width: 980px; margin: 0 auto; display: grid; gap: 16px; }
1218
+ section { background: #13161b; border: 1px solid #232833; border-radius: 16px; padding: 18px; }
1219
+ h1, h2 { margin: 0 0 12px; }
1220
+ ul { margin: 0; padding-left: 20px; display: grid; gap: 8px; }
1221
+ code, pre { font-family: ui-monospace, SFMono-Regular, monospace; }
1222
+ pre { white-space: pre-wrap; overflow-wrap: anywhere; background: #0f1217; border-radius: 12px; padding: 14px; border: 1px solid #232833; }
1223
+ .grid { display: grid; gap: 16px; grid-template-columns: repeat(auto-fit, minmax(260px, 1fr)); }
1224
+ .metric { display: grid; gap: 4px; }
1225
+ .label { color: #a1a1aa; font-size: 0.82rem; text-transform: uppercase; letter-spacing: 0.08em; }
1226
+ .value { font-size: 1.05rem; }
1227
+ </style>
1228
+ </head>
1229
+ <body>
1230
+ <main>
1231
+ <section>
1232
+ <h1>${escapeHtml2(artifact.title)}</h1>
1233
+ <div class="grid">
1234
+ <div class="metric"><div class="label">Pass</div><div class="value">${artifact.summary.pass ? "yes" : "no"}</div></div>
1235
+ <div class="metric"><div class="label">First Turn</div><div class="value">${artifact.summary.firstTurnLatencyMs ?? "n/a"}ms</div></div>
1236
+ <div class="metric"><div class="label">First Outbound Media</div><div class="value">${artifact.summary.firstOutboundMediaLatencyMs ?? "n/a"}ms</div></div>
1237
+ <div class="metric"><div class="label">Turn Count</div><div class="value">${artifact.summary.turnCount ?? "n/a"}</div></div>
1238
+ </div>
1239
+ </section>
1240
+ <section>
1241
+ <h2>Transcript</h2>
1242
+ <ul>
1243
+ <li><strong>Expected:</strong> ${escapeHtml2(artifact.transcript.expected ?? "n/a")}</li>
1244
+ <li><strong>Actual:</strong> ${escapeHtml2(artifact.transcript.actual || "n/a")}</li>
1245
+ </ul>
1246
+ </section>
1247
+ <section>
1248
+ <h2>Notes</h2>
1249
+ <ul>${notes || "<li>No notes.</li>"}</ul>
1250
+ </section>
1251
+ <section>
1252
+ <h2>Latency Breakdown</h2>
1253
+ <ul>${latency || "<li>No latency data.</li>"}</ul>
1254
+ </section>
1255
+ <section>
1256
+ <h2>Transport Summary</h2>
1257
+ <ul>${transport || "<li>No transport data.</li>"}</ul>
1258
+ </section>
1259
+ <section>
1260
+ <h2>Timeline</h2>
1261
+ <ul>${timeline || "<li>No timeline events.</li>"}</ul>
1262
+ </section>
1263
+ <section>
1264
+ <h2>Config</h2>
1265
+ <pre>${escapeHtml2(JSON.stringify(artifact.config ?? {}, null, 2))}</pre>
1266
+ </section>
1267
+ </main>
1268
+ </body>
1269
+ </html>`;
1270
+ };
1271
+
1272
+ // src/runtimeOps.ts
1273
+ var defaultReviewTitle = (session) => session.scenarioId ? `Voice call review: ${session.scenarioId}` : `Voice call review: ${session.id}`;
1274
+ var buildDefaultPostCallSummary = (input) => {
1275
+ switch (input.disposition) {
1276
+ case "transferred":
1277
+ return {
1278
+ label: "Transferred",
1279
+ recommendedAction: input.target ? `Confirm the handoff to ${input.target} completed successfully.` : "Confirm the transfer completed successfully.",
1280
+ reason: input.reason,
1281
+ summary: input.target ? `The call was transferred to ${input.target}.` : "The call was transferred.",
1282
+ target: input.target
1283
+ };
1284
+ case "escalated":
1285
+ return {
1286
+ label: "Escalated",
1287
+ recommendedAction: "Review the escalated call and route it to a human operator.",
1288
+ reason: input.reason,
1289
+ summary: input.reason ? `The call escalated because ${input.reason}.` : "The call escalated for operator review."
1290
+ };
1291
+ case "voicemail":
1292
+ return {
1293
+ label: "Voicemail",
1294
+ recommendedAction: "Queue a callback follow-up for this caller.",
1295
+ reason: input.reason,
1296
+ summary: "The call reached voicemail and needs a callback."
1297
+ };
1298
+ case "no-answer":
1299
+ return {
1300
+ label: "No Answer",
1301
+ recommendedAction: "Retry the call or create a callback task.",
1302
+ reason: input.reason,
1303
+ summary: "The call did not reach a live respondent."
1304
+ };
1305
+ case "failed":
1306
+ return {
1307
+ label: "Failed",
1308
+ recommendedAction: "Inspect the call review before retrying this flow.",
1309
+ reason: input.reason,
1310
+ summary: input.reason ? `The call failed because ${input.reason}.` : "The call failed before a successful completion."
1311
+ };
1312
+ case "closed":
1313
+ return {
1314
+ label: "Closed",
1315
+ recommendedAction: "Inspect the review if this early closure was unexpected.",
1316
+ reason: input.reason,
1317
+ summary: "The call closed before an explicit completion."
1318
+ };
1319
+ case "completed":
1320
+ default:
1321
+ return {
1322
+ label: "Completed",
1323
+ recommendedAction: "No follow-up action is required.",
1324
+ reason: input.reason,
1325
+ summary: "The call completed successfully."
1326
+ };
1327
+ }
1328
+ };
1329
+ var createVoiceCallReviewFromSession = (input) => {
1330
+ const generatedAt = input.generatedAt ?? Date.now();
1331
+ const actual = input.session.turns.map((turn) => turn.text).join(" ").trim();
1332
+ const elapsedMs = (input.session.lastActivityAt ?? generatedAt) - input.session.createdAt;
1333
+ return {
1334
+ errors: input.disposition === "failed" && input.reason ? [input.reason] : [],
1335
+ generatedAt,
1336
+ latencyBreakdown: typeof elapsedMs === "number" && elapsedMs >= 0 ? [
1337
+ {
1338
+ label: "Session elapsed",
1339
+ valueMs: elapsedMs
1340
+ }
1341
+ ] : [],
1342
+ notes: [],
1343
+ postCall: buildDefaultPostCallSummary({
1344
+ disposition: input.disposition,
1345
+ reason: input.reason,
1346
+ target: input.target
1347
+ }),
1348
+ summary: {
1349
+ elapsedMs: elapsedMs >= 0 ? elapsedMs : undefined,
1350
+ outcome: input.disposition,
1351
+ pass: input.disposition !== "failed",
1352
+ turnCount: input.session.turns.length
1353
+ },
1354
+ title: defaultReviewTitle(input.session),
1355
+ timeline: input.session.call?.events.map((event) => ({
1356
+ atMs: Math.max(0, event.at - input.session.createdAt),
1357
+ event: `call-${event.type}`,
1358
+ reason: event.reason,
1359
+ source: "turn",
1360
+ text: event.target ?? event.disposition,
1361
+ track: event.target
1362
+ })) ?? [],
1363
+ transcript: {
1364
+ actual
1365
+ }
1366
+ };
1367
+ };
1368
+ var asStoredReview = (sessionId, review) => {
1369
+ if (typeof review.id === "string" && review.id.length > 0) {
1370
+ return review;
1371
+ }
1372
+ return withVoiceCallReviewId(`${sessionId}:review`, review);
1373
+ };
1374
+ var asStoredTask = (review, task) => {
1375
+ if ("id" in task && typeof task.id === "string" && task.id.length > 0) {
1376
+ return task;
1377
+ }
1378
+ return withVoiceOpsTaskId(`${review.id}:ops`, task);
1379
+ };
1380
+ var emitRuntimeEvent = async (input) => {
1381
+ await input.config.events?.set(input.event.id, input.event);
1382
+ await input.config.onEvent?.({
1383
+ api: input.api,
1384
+ context: input.context,
1385
+ event: input.event,
1386
+ session: input.session
1387
+ });
1388
+ };
1389
+ var recordVoiceRuntimeOps = async (input) => {
1390
+ if (!input.config) {
1391
+ return;
1392
+ }
1393
+ const result = input.session.turns.at(-1)?.result;
1394
+ const reviewCandidate = await input.config.buildReview?.({
1395
+ api: input.api,
1396
+ context: input.context,
1397
+ disposition: input.disposition,
1398
+ metadata: input.metadata,
1399
+ reason: input.reason,
1400
+ result,
1401
+ session: input.session,
1402
+ target: input.target
1403
+ }) ?? createVoiceCallReviewFromSession({
1404
+ disposition: input.disposition,
1405
+ reason: input.reason,
1406
+ session: input.session,
1407
+ target: input.target
1408
+ });
1409
+ const review = reviewCandidate ? asStoredReview(input.session.id, reviewCandidate) : undefined;
1410
+ if (review) {
1411
+ await input.config.reviews?.set(review.id, review);
1412
+ await emitRuntimeEvent({
1413
+ api: input.api,
1414
+ config: input.config,
1415
+ context: input.context,
1416
+ event: createVoiceReviewSavedEvent(review),
1417
+ session: input.session
1418
+ });
1419
+ }
1420
+ let task;
1421
+ if (review) {
1422
+ const taskCandidate = await input.config.createTaskFromReview?.({
1423
+ api: input.api,
1424
+ context: input.context,
1425
+ disposition: input.disposition,
1426
+ review,
1427
+ session: input.session
1428
+ }) ?? buildVoiceOpsTaskFromReview(review) ?? undefined;
1429
+ if (taskCandidate) {
1430
+ task = asStoredTask(review, taskCandidate);
1431
+ await input.config.tasks?.set(task.id, task);
1432
+ await emitRuntimeEvent({
1433
+ api: input.api,
1434
+ config: input.config,
1435
+ context: input.context,
1436
+ event: createVoiceTaskCreatedEvent(task),
1437
+ session: input.session
1438
+ });
1439
+ }
1440
+ }
1441
+ await emitRuntimeEvent({
1442
+ api: input.api,
1443
+ config: input.config,
1444
+ context: input.context,
1445
+ event: createVoiceCallCompletedEvent({
1446
+ disposition: input.disposition,
1447
+ session: input.session
1448
+ }),
1449
+ session: input.session
1450
+ });
1451
+ return {
1452
+ review,
1453
+ task
1454
+ };
1455
+ };
1456
+
478
1457
  // src/store.ts
479
1458
  var createId = () => crypto.randomUUID();
480
1459
  var createVoiceSessionRecord = (id, scenarioId) => ({
@@ -515,6 +1494,9 @@ var toVoiceSessionSummary = (session) => ({
515
1494
  turnCount: session.turns.length
516
1495
  });
517
1496
 
1497
+ // src/session.ts
1498
+ import { Buffer } from "buffer";
1499
+
518
1500
  // src/turnDetection.ts
519
1501
  var DEFAULT_SILENCE_MS = 700;
520
1502
  var DEFAULT_SPEECH_THRESHOLD = 0.015;
@@ -560,6 +1542,9 @@ var selectPreferredTranscriptText = (currentText, nextText) => {
560
1542
  if (countWords(next) > countWords(current)) {
561
1543
  return next;
562
1544
  }
1545
+ if (countWords(next) === countWords(current) && next.length > current.length) {
1546
+ return next;
1547
+ }
563
1548
  return current;
564
1549
  };
565
1550
  var mergeSequentialTranscriptText = (currentText, nextText) => {
@@ -642,6 +1627,8 @@ var DEFAULT_FALLBACK_MAX_ATTEMPTS_PER_TURN = 1;
642
1627
  var DEFAULT_DUPLICATE_TURN_WINDOW_MS = 5000;
643
1628
  var FALLBACK_CONFIDENCE_SELECTION_DELTA = 0.05;
644
1629
  var FALLBACK_WORD_COUNT_SELECTION_MARGIN_RATIO = 0.12;
1630
+ var EXTENDED_VENDOR_COMMIT_SILENCE_THRESHOLD_MS = 200;
1631
+ var MAX_VENDOR_COMMIT_GRACE_MS = 1200;
645
1632
  var DEFAULT_FORMAT = {
646
1633
  channels: 1,
647
1634
  container: "raw",
@@ -660,6 +1647,7 @@ var createEmptyCurrentTurn = () => ({
660
1647
  transcripts: []
661
1648
  });
662
1649
  var cloneTranscript = (transcript) => ({ ...transcript });
1650
+ var encodeBase64 = (chunk) => Buffer.from(chunk).toString("base64");
663
1651
  var countWords2 = (text) => text.trim().split(/\s+/).filter(Boolean).length;
664
1652
  var normalizeText2 = (text) => text.trim().replace(/\s+/g, " ");
665
1653
  var getAudioChunkDurationMs = (chunk) => chunk.byteLength / (DEFAULT_FORMAT.sampleRateHz * DEFAULT_FORMAT.channels * 2) * 1000;
@@ -678,13 +1666,14 @@ var calculateMeanConfidence = (transcripts) => {
678
1666
  }
679
1667
  return sum / total;
680
1668
  };
681
- var createTurnQuality = (transcripts, source, fallbackUsed, fallbackDiagnostics, correctionDiagnostics) => {
1669
+ var createTurnQuality = (transcripts, source, fallbackUsed, fallbackDiagnostics, correctionDiagnostics, costEstimate) => {
682
1670
  const sampledTranscripts = transcripts.filter((transcript) => typeof transcript.confidence === "number");
683
1671
  const confidenceSampleCount = sampledTranscripts.length;
684
1672
  return {
685
1673
  averageConfidence: confidenceSampleCount > 0 ? sampledTranscripts.reduce((sum, transcript) => sum + transcript.confidence, 0) / confidenceSampleCount : undefined,
686
1674
  confidenceSampleCount,
687
1675
  correction: correctionDiagnostics,
1676
+ cost: costEstimate,
688
1677
  fallback: fallbackDiagnostics,
689
1678
  fallbackUsed,
690
1679
  finalTranscriptCount: transcripts.filter((transcript) => transcript.isFinal).length,
@@ -693,6 +1682,19 @@ var createTurnQuality = (transcripts, source, fallbackUsed, fallbackDiagnostics,
693
1682
  source
694
1683
  };
695
1684
  };
1685
+ var createTurnCostEstimate = (input) => {
1686
+ const primaryMinutes = Math.max(0, input.primaryAudioMs) / 60000;
1687
+ const fallbackMinutes = Math.max(0, input.fallbackReplayAudioMs) / 60000;
1688
+ const primaryCostUnit = input.primaryPassCostUnit ?? 1;
1689
+ const fallbackCostUnit = input.fallbackPassCostUnit ?? primaryCostUnit;
1690
+ return {
1691
+ estimatedRelativeCostUnits: primaryMinutes * primaryCostUnit + fallbackMinutes * fallbackCostUnit,
1692
+ fallbackAttemptCount: input.fallbackAttemptCount,
1693
+ fallbackReplayAudioMs: Math.max(0, input.fallbackReplayAudioMs),
1694
+ primaryAudioMs: Math.max(0, input.primaryAudioMs),
1695
+ totalBillableAudioMs: Math.max(0, input.primaryAudioMs) + Math.max(0, input.fallbackReplayAudioMs)
1696
+ };
1697
+ };
696
1698
  var normalizeCorrectionText = (text) => normalizeText2(text);
697
1699
  var isFallbackNeeded = (candidate, config) => {
698
1700
  const trimmed = normalizeText2(candidate.text);
@@ -761,6 +1763,36 @@ var setTurnResult = (session, turnId, input) => {
761
1763
  result: input.result ?? turn.result
762
1764
  } : turn);
763
1765
  };
1766
+ var ensureCallLifecycleState = (session) => {
1767
+ const startedAt = session.createdAt;
1768
+ session.call ??= {
1769
+ events: [],
1770
+ lastEventAt: startedAt,
1771
+ startedAt
1772
+ };
1773
+ return session.call;
1774
+ };
1775
+ var pushCallLifecycleEvent = (session, input) => {
1776
+ const lifecycle = ensureCallLifecycleState(session);
1777
+ const at = Date.now();
1778
+ lifecycle.events = [
1779
+ ...lifecycle.events,
1780
+ {
1781
+ at,
1782
+ disposition: input.disposition,
1783
+ metadata: input.metadata,
1784
+ reason: input.reason,
1785
+ target: input.target,
1786
+ type: input.type
1787
+ }
1788
+ ];
1789
+ lifecycle.lastEventAt = at;
1790
+ if (input.type === "end") {
1791
+ lifecycle.disposition = input.disposition;
1792
+ lifecycle.endedAt = at;
1793
+ }
1794
+ return lifecycle;
1795
+ };
764
1796
  var createVoiceSession = (options) => {
765
1797
  const logger = resolveLogger(options.logger);
766
1798
  const reconnect = {
@@ -784,15 +1816,21 @@ var createVoiceSession = (options) => {
784
1816
  trigger: options.sttFallback.trigger ?? "empty-or-low-confidence"
785
1817
  } : undefined;
786
1818
  const phraseHints = options.phraseHints ?? [];
1819
+ const lexicon = options.lexicon ?? [];
787
1820
  let socket = options.socket;
788
1821
  let sttSession = null;
1822
+ let ttsSession = null;
1823
+ let ttsSessionPromise = null;
789
1824
  let silenceTimer = null;
1825
+ let pendingCommitReason = null;
790
1826
  let speechDetected = false;
791
1827
  let operationQueue = Promise.resolve();
792
1828
  let adapterGenerationCounter = 0;
793
1829
  let activeAdapterGeneration = 0;
1830
+ let activeTTSTurnId;
794
1831
  const currentTurnAudio = [];
795
1832
  let fallbackAttemptsForCurrentTurn = 0;
1833
+ let fallbackReplayAudioMsForCurrentTurn = 0;
796
1834
  const pruneTurnAudio = () => {
797
1835
  const replayWindowMs = sttFallback?.replayWindowMs ?? DEFAULT_FALLBACK_REPLAY_MS;
798
1836
  const cutoffAt = Date.now() - replayWindowMs;
@@ -825,6 +1863,13 @@ var createVoiceSession = (options) => {
825
1863
  }
826
1864
  clearTimeout(silenceTimer);
827
1865
  silenceTimer = null;
1866
+ pendingCommitReason = null;
1867
+ };
1868
+ const getVendorCommitDelayMs = () => {
1869
+ if (turnDetection.silenceMs < EXTENDED_VENDOR_COMMIT_SILENCE_THRESHOLD_MS || turnDetection.transcriptStabilityMs < EXTENDED_VENDOR_COMMIT_SILENCE_THRESHOLD_MS) {
1870
+ return turnDetection.transcriptStabilityMs;
1871
+ }
1872
+ return Math.max(turnDetection.transcriptStabilityMs, Math.min(MAX_VENDOR_COMMIT_GRACE_MS, turnDetection.silenceMs * 2));
828
1873
  };
829
1874
  const send = async (message) => {
830
1875
  try {
@@ -875,6 +1920,24 @@ var createVoiceSession = (options) => {
875
1920
  });
876
1921
  }
877
1922
  };
1923
+ const closeTTSSession = async (reason) => {
1924
+ const activeSession = ttsSession;
1925
+ ttsSession = null;
1926
+ ttsSessionPromise = null;
1927
+ activeTTSTurnId = undefined;
1928
+ if (!activeSession) {
1929
+ return;
1930
+ }
1931
+ try {
1932
+ await activeSession.close(reason);
1933
+ } catch (error) {
1934
+ logger.warn("voice tts adapter close failed", {
1935
+ error: toError(error).message,
1936
+ reason,
1937
+ sessionId: options.id
1938
+ });
1939
+ }
1940
+ };
878
1941
  const scheduleTurnCommit = (delayMs, reason, reset = true) => {
879
1942
  if (!reset && silenceTimer) {
880
1943
  return;
@@ -882,8 +1945,10 @@ var createVoiceSession = (options) => {
882
1945
  if (reset) {
883
1946
  clearSilenceTimer();
884
1947
  }
1948
+ pendingCommitReason = reason;
885
1949
  silenceTimer = setTimeout(() => {
886
1950
  silenceTimer = null;
1951
+ pendingCommitReason = null;
887
1952
  api.commitTurn(reason);
888
1953
  }, delayMs);
889
1954
  };
@@ -898,6 +1963,10 @@ var createVoiceSession = (options) => {
898
1963
  return;
899
1964
  }
900
1965
  const transcriptStabilityAge = session.currentTurn.lastTranscriptAt !== undefined ? Date.now() - session.currentTurn.lastTranscriptAt : undefined;
1966
+ if (reason === "vendor") {
1967
+ scheduleTurnCommit(getVendorCommitDelayMs(), reason);
1968
+ return;
1969
+ }
901
1970
  if (reason !== "manual" && typeof transcriptStabilityAge === "number" && transcriptStabilityAge < turnDetection.transcriptStabilityMs) {
902
1971
  scheduleTurnCommit(turnDetection.transcriptStabilityMs - transcriptStabilityAge, reason);
903
1972
  return;
@@ -906,16 +1975,32 @@ var createVoiceSession = (options) => {
906
1975
  };
907
1976
  const failInternal = async (error) => {
908
1977
  clearSilenceTimer();
1978
+ let didFail = false;
909
1979
  const session = await writeSession((currentSession) => {
1980
+ if (currentSession.status === "failed") {
1981
+ return;
1982
+ }
1983
+ didFail = true;
910
1984
  currentSession.lastActivityAt = Date.now();
911
1985
  currentSession.status = "failed";
1986
+ if (!currentSession.call?.endedAt) {
1987
+ pushCallLifecycleEvent(currentSession, {
1988
+ disposition: "failed",
1989
+ reason: toError(error).message,
1990
+ type: "end"
1991
+ });
1992
+ }
912
1993
  });
913
- const resolvedError = toError(error);
1994
+ if (!didFail) {
1995
+ return;
1996
+ }
1997
+ const resolvedError = toError(error);
914
1998
  await send({
915
1999
  message: resolvedError.message,
916
2000
  recoverable: false,
917
2001
  type: "error"
918
2002
  });
2003
+ await closeTTSSession("failed");
919
2004
  await closeAdapter("failed");
920
2005
  speechDetected = false;
921
2006
  rewindFallbackTurnAudio();
@@ -926,13 +2011,24 @@ var createVoiceSession = (options) => {
926
2011
  session,
927
2012
  sessionId: options.id
928
2013
  });
2014
+ await options.route.onCallEnd?.({
2015
+ api,
2016
+ context: options.context,
2017
+ disposition: "failed",
2018
+ reason: resolvedError.message,
2019
+ session
2020
+ });
929
2021
  };
930
- const completeInternal = async (result) => {
2022
+ const completeInternal = async (result, input = {}) => {
931
2023
  clearSilenceTimer();
2024
+ const disposition = input.disposition ?? "completed";
2025
+ const shouldInvokeOnComplete = input.invokeOnComplete ?? disposition === "completed";
2026
+ let didComplete = false;
932
2027
  const session = await writeSession((currentSession) => {
933
- if (currentSession.status === "completed") {
2028
+ if (currentSession.status === "completed" || currentSession.status === "failed") {
934
2029
  return;
935
2030
  }
2031
+ didComplete = true;
936
2032
  currentSession.lastActivityAt = Date.now();
937
2033
  currentSession.status = "completed";
938
2034
  if (result !== undefined && currentSession.turns.length > 0) {
@@ -943,18 +2039,135 @@ var createVoiceSession = (options) => {
943
2039
  });
944
2040
  }
945
2041
  }
2042
+ if (!currentSession.call?.endedAt) {
2043
+ pushCallLifecycleEvent(currentSession, {
2044
+ disposition,
2045
+ metadata: input.metadata,
2046
+ reason: input.reason,
2047
+ target: input.target,
2048
+ type: "end"
2049
+ });
2050
+ }
946
2051
  });
2052
+ if (!didComplete) {
2053
+ return;
2054
+ }
947
2055
  await send({
948
2056
  sessionId: options.id,
949
2057
  type: "complete"
950
2058
  });
2059
+ await closeTTSSession("complete");
951
2060
  await closeAdapter("complete");
952
2061
  speechDetected = false;
953
2062
  rewindFallbackTurnAudio();
954
- await options.route.onComplete({
2063
+ if (disposition === "transferred" && input.target) {
2064
+ await options.route.onTransfer?.({
2065
+ api,
2066
+ context: options.context,
2067
+ metadata: input.metadata,
2068
+ reason: input.reason,
2069
+ session,
2070
+ target: input.target
2071
+ });
2072
+ }
2073
+ if (disposition === "escalated" && input.reason) {
2074
+ await options.route.onEscalation?.({
2075
+ api,
2076
+ context: options.context,
2077
+ metadata: input.metadata,
2078
+ reason: input.reason,
2079
+ session
2080
+ });
2081
+ }
2082
+ if (disposition === "voicemail") {
2083
+ await options.route.onVoicemail?.({
2084
+ api,
2085
+ context: options.context,
2086
+ metadata: input.metadata,
2087
+ session
2088
+ });
2089
+ }
2090
+ if (disposition === "no-answer") {
2091
+ await options.route.onNoAnswer?.({
2092
+ api,
2093
+ context: options.context,
2094
+ metadata: input.metadata,
2095
+ session
2096
+ });
2097
+ }
2098
+ if (shouldInvokeOnComplete) {
2099
+ await options.route.onComplete({
2100
+ api,
2101
+ context: options.context,
2102
+ session
2103
+ });
2104
+ }
2105
+ await options.route.onCallEnd?.({
955
2106
  api,
956
2107
  context: options.context,
957
- session
2108
+ disposition,
2109
+ metadata: input.metadata,
2110
+ reason: input.reason,
2111
+ session,
2112
+ target: input.target
2113
+ });
2114
+ };
2115
+ const transferInternal = async (input) => {
2116
+ await writeSession((currentSession) => {
2117
+ pushCallLifecycleEvent(currentSession, {
2118
+ metadata: input.metadata,
2119
+ reason: input.reason,
2120
+ target: input.target,
2121
+ type: "transfer"
2122
+ });
2123
+ });
2124
+ await completeInternal(input.result, {
2125
+ disposition: "transferred",
2126
+ invokeOnComplete: false,
2127
+ metadata: input.metadata,
2128
+ reason: input.reason,
2129
+ target: input.target
2130
+ });
2131
+ };
2132
+ const escalateInternal = async (input) => {
2133
+ await writeSession((currentSession) => {
2134
+ pushCallLifecycleEvent(currentSession, {
2135
+ metadata: input.metadata,
2136
+ reason: input.reason,
2137
+ type: "escalation"
2138
+ });
2139
+ });
2140
+ await completeInternal(input.result, {
2141
+ disposition: "escalated",
2142
+ invokeOnComplete: false,
2143
+ metadata: input.metadata,
2144
+ reason: input.reason
2145
+ });
2146
+ };
2147
+ const markNoAnswerInternal = async (input) => {
2148
+ await writeSession((currentSession) => {
2149
+ pushCallLifecycleEvent(currentSession, {
2150
+ metadata: input?.metadata,
2151
+ type: "no-answer"
2152
+ });
2153
+ });
2154
+ await completeInternal(input?.result, {
2155
+ disposition: "no-answer",
2156
+ invokeOnComplete: false,
2157
+ metadata: input?.metadata
2158
+ });
2159
+ };
2160
+ const markVoicemailInternal = async (input) => {
2161
+ await writeSession((currentSession) => {
2162
+ pushCallLifecycleEvent(currentSession, {
2163
+ metadata: input?.metadata,
2164
+ type: "voicemail"
2165
+ });
2166
+ });
2167
+ await completeInternal(input?.result, {
2168
+ disposition: "voicemail",
2169
+ invokeOnComplete: false,
2170
+ metadata: input?.metadata
958
2171
  });
959
2172
  };
960
2173
  const handleError = async (event) => {
@@ -980,6 +2193,7 @@ var createVoiceSession = (options) => {
980
2193
  };
981
2194
  const rewindFallbackTurnAudio = () => {
982
2195
  fallbackAttemptsForCurrentTurn = 0;
2196
+ fallbackReplayAudioMsForCurrentTurn = 0;
983
2197
  currentTurnAudio.length = 0;
984
2198
  };
985
2199
  const runFallbackTranscription = async (primaryText, primaryTranscripts) => {
@@ -1007,6 +2221,8 @@ var createVoiceSession = (options) => {
1007
2221
  try {
1008
2222
  fallbackSession = await sttFallback.adapter.open({
1009
2223
  format: DEFAULT_FORMAT,
2224
+ languageStrategy: options.languageStrategy,
2225
+ lexicon,
1010
2226
  phraseHints,
1011
2227
  sessionId: `${options.id}:fallback:${fallbackAttemptsForCurrentTurn}`
1012
2228
  });
@@ -1060,6 +2276,7 @@ var createVoiceSession = (options) => {
1060
2276
  await fallbackSession.send(chunk);
1061
2277
  }
1062
2278
  const replayDurationMs = getBufferedAudioDurationMs(replayAudio);
2279
+ fallbackReplayAudioMsForCurrentTurn += replayDurationMs;
1063
2280
  const completionTimeoutMs = Math.max(sttFallback.completionTimeoutMs, Math.min(4000, Math.max(sttFallback.settleMs * 4, Math.round(replayDurationMs * 0.18))));
1064
2281
  const waitStartedAt = Date.now();
1065
2282
  while (Date.now() - waitStartedAt < completionTimeoutMs) {
@@ -1152,6 +2369,7 @@ var createVoiceSession = (options) => {
1152
2369
  api,
1153
2370
  context: options.context,
1154
2371
  fallback: input.fallbackDiagnostics,
2372
+ lexicon,
1155
2373
  phraseHints,
1156
2374
  session: input.session,
1157
2375
  text: originalText,
@@ -1220,46 +2438,52 @@ var createVoiceSession = (options) => {
1220
2438
  };
1221
2439
  };
1222
2440
  const handlePartial = async (transcript) => {
1223
- await writeSession((session) => {
1224
- const nextPartialStartedAt = transcript.startedAtMs ?? session.currentTurn.partialStartedAt;
1225
- const nextPartialEndedAt = transcript.endedAtMs ?? session.currentTurn.partialEndedAt;
1226
- const preferredPartial = selectPreferredTranscriptText(session.currentTurn.partialText, transcript.text);
1227
- session.currentTurn.lastTranscriptAt = Date.now();
1228
- session.currentTurn.partialStartedAt = nextPartialStartedAt;
1229
- session.currentTurn.partialEndedAt = nextPartialEndedAt;
1230
- session.currentTurn.partialText = buildTurnText(session.currentTurn.transcripts, preferredPartial, {
2441
+ const session = await writeSession((session2) => {
2442
+ const nextPartialStartedAt = transcript.startedAtMs ?? session2.currentTurn.partialStartedAt;
2443
+ const nextPartialEndedAt = transcript.endedAtMs ?? session2.currentTurn.partialEndedAt;
2444
+ const preferredPartial = selectPreferredTranscriptText(session2.currentTurn.partialText, transcript.text);
2445
+ session2.currentTurn.lastTranscriptAt = Date.now();
2446
+ session2.currentTurn.partialStartedAt = nextPartialStartedAt;
2447
+ session2.currentTurn.partialEndedAt = nextPartialEndedAt;
2448
+ session2.currentTurn.partialText = buildTurnText(session2.currentTurn.transcripts, preferredPartial, {
1231
2449
  partialEndedAtMs: nextPartialEndedAt,
1232
2450
  partialStartedAtMs: nextPartialStartedAt
1233
2451
  });
1234
- session.lastActivityAt = Date.now();
1235
- session.status = "active";
2452
+ session2.lastActivityAt = Date.now();
2453
+ session2.status = "active";
1236
2454
  });
2455
+ if (silenceTimer && pendingCommitReason === "vendor") {
2456
+ scheduleTurnCommit(getVendorCommitDelayMs(), "vendor");
2457
+ }
1237
2458
  await send({
1238
2459
  transcript,
1239
2460
  type: "partial"
1240
2461
  });
1241
2462
  };
1242
2463
  const handleFinal = async (transcript) => {
1243
- await writeSession((session) => {
1244
- const alreadyPresent = session.currentTurn.transcripts.some((existing) => existing.id === transcript.id);
2464
+ const session = await writeSession((session2) => {
2465
+ const alreadyPresent = session2.currentTurn.transcripts.some((existing) => existing.id === transcript.id);
1245
2466
  if (!alreadyPresent) {
1246
- session.currentTurn.transcripts = [
1247
- ...session.currentTurn.transcripts,
2467
+ session2.currentTurn.transcripts = [
2468
+ ...session2.currentTurn.transcripts,
1248
2469
  cloneTranscript(transcript)
1249
2470
  ];
1250
- session.transcripts = [
1251
- ...session.transcripts,
2471
+ session2.transcripts = [
2472
+ ...session2.transcripts,
1252
2473
  cloneTranscript(transcript)
1253
2474
  ];
1254
2475
  }
1255
- session.currentTurn.finalText = buildTurnText(session.currentTurn.transcripts, session.currentTurn.partialText, {
1256
- partialEndedAtMs: session.currentTurn.partialEndedAt,
1257
- partialStartedAtMs: session.currentTurn.partialStartedAt
2476
+ session2.currentTurn.finalText = buildTurnText(session2.currentTurn.transcripts, session2.currentTurn.partialText, {
2477
+ partialEndedAtMs: session2.currentTurn.partialEndedAt,
2478
+ partialStartedAtMs: session2.currentTurn.partialStartedAt
1258
2479
  });
1259
- session.currentTurn.lastTranscriptAt = Date.now();
1260
- session.lastActivityAt = Date.now();
1261
- session.status = "active";
2480
+ session2.currentTurn.lastTranscriptAt = Date.now();
2481
+ session2.lastActivityAt = Date.now();
2482
+ session2.status = "active";
1262
2483
  });
2484
+ if (silenceTimer && pendingCommitReason === "vendor") {
2485
+ scheduleTurnCommit(getVendorCommitDelayMs(), "vendor");
2486
+ }
1263
2487
  await send({
1264
2488
  transcript,
1265
2489
  type: "final"
@@ -1286,6 +2510,8 @@ var createVoiceSession = (options) => {
1286
2510
  }
1287
2511
  const openedSession = await options.stt.open({
1288
2512
  format: DEFAULT_FORMAT,
2513
+ languageStrategy: options.languageStrategy,
2514
+ lexicon,
1289
2515
  phraseHints,
1290
2516
  sessionId: options.id
1291
2517
  });
@@ -1320,13 +2546,93 @@ var createVoiceSession = (options) => {
1320
2546
  });
1321
2547
  return openedSession;
1322
2548
  };
2549
+ const ensureTTSSession = async () => {
2550
+ const ttsAdapter = options.tts;
2551
+ if (!ttsAdapter) {
2552
+ return null;
2553
+ }
2554
+ if (ttsSession) {
2555
+ return ttsSession;
2556
+ }
2557
+ if (ttsSessionPromise) {
2558
+ return ttsSessionPromise;
2559
+ }
2560
+ ttsSessionPromise = (async () => {
2561
+ const openedSession = await ttsAdapter.open({
2562
+ lexicon,
2563
+ sessionId: options.id
2564
+ });
2565
+ ttsSession = openedSession;
2566
+ openedSession.on("audio", ({ chunk, format, receivedAt }) => {
2567
+ runSerial("tts.audio", async () => {
2568
+ if (ttsSession !== openedSession) {
2569
+ return;
2570
+ }
2571
+ const normalizedChunk = chunk instanceof Uint8Array ? new Uint8Array(chunk) : chunk instanceof ArrayBuffer ? new Uint8Array(chunk.slice(0)) : new Uint8Array(chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength));
2572
+ await send({
2573
+ chunkBase64: encodeBase64(normalizedChunk),
2574
+ format,
2575
+ receivedAt,
2576
+ turnId: activeTTSTurnId,
2577
+ type: "audio"
2578
+ });
2579
+ });
2580
+ });
2581
+ openedSession.on("error", (event) => {
2582
+ runSerial("tts.error", async () => {
2583
+ if (ttsSession !== openedSession) {
2584
+ return;
2585
+ }
2586
+ await send({
2587
+ message: toError(event.error).message,
2588
+ recoverable: event.recoverable,
2589
+ type: "error"
2590
+ });
2591
+ });
2592
+ });
2593
+ openedSession.on("close", () => {
2594
+ runSerial("tts.close", async () => {
2595
+ if (ttsSession === openedSession) {
2596
+ ttsSession = null;
2597
+ ttsSessionPromise = null;
2598
+ activeTTSTurnId = undefined;
2599
+ }
2600
+ });
2601
+ });
2602
+ return openedSession;
2603
+ })().catch((error) => {
2604
+ ttsSessionPromise = null;
2605
+ throw error;
2606
+ });
2607
+ return ttsSessionPromise;
2608
+ };
2609
+ const warmTTSSession = () => {
2610
+ if (!options.tts || ttsSession || ttsSessionPromise) {
2611
+ return;
2612
+ }
2613
+ ensureTTSSession().catch((error) => {
2614
+ logger.warn("voice tts prewarm failed", {
2615
+ error: toError(error).message,
2616
+ sessionId: options.id
2617
+ });
2618
+ });
2619
+ };
1323
2620
  const completeTurn = async (session, turn) => {
1324
- const output = await options.route.onTurn({
2621
+ const committedOutput = await options.route.onTurn({
1325
2622
  api,
1326
2623
  context: options.context,
1327
2624
  session,
1328
2625
  turn
1329
2626
  });
2627
+ const output = {
2628
+ assistantText: committedOutput?.assistantText,
2629
+ complete: committedOutput?.complete,
2630
+ escalate: committedOutput?.escalate,
2631
+ noAnswer: committedOutput?.noAnswer,
2632
+ result: committedOutput?.result,
2633
+ transfer: committedOutput?.transfer,
2634
+ voicemail: committedOutput?.voicemail
2635
+ };
1330
2636
  if (output?.assistantText) {
1331
2637
  await writeSession((currentSession) => {
1332
2638
  setTurnResult(currentSession, turn.id, {
@@ -1338,6 +2644,19 @@ var createVoiceSession = (options) => {
1338
2644
  turnId: turn.id,
1339
2645
  type: "assistant"
1340
2646
  });
2647
+ try {
2648
+ const activeTTSSession = await ensureTTSSession();
2649
+ if (activeTTSSession) {
2650
+ activeTTSTurnId = turn.id;
2651
+ await activeTTSSession.send(output.assistantText);
2652
+ }
2653
+ } catch (error) {
2654
+ logger.warn("voice tts send failed", {
2655
+ error: toError(error).message,
2656
+ sessionId: options.id,
2657
+ turnId: turn.id
2658
+ });
2659
+ }
1341
2660
  }
1342
2661
  if (output?.result !== undefined) {
1343
2662
  await writeSession((currentSession) => {
@@ -1346,6 +2665,37 @@ var createVoiceSession = (options) => {
1346
2665
  });
1347
2666
  });
1348
2667
  }
2668
+ if (output?.transfer) {
2669
+ await transferInternal({
2670
+ metadata: output.transfer.metadata,
2671
+ reason: output.transfer.reason,
2672
+ result: output.result,
2673
+ target: output.transfer.target
2674
+ });
2675
+ return;
2676
+ }
2677
+ if (output?.escalate) {
2678
+ await escalateInternal({
2679
+ metadata: output.escalate.metadata,
2680
+ reason: output.escalate.reason,
2681
+ result: output.result
2682
+ });
2683
+ return;
2684
+ }
2685
+ if (output?.voicemail) {
2686
+ await markVoicemailInternal({
2687
+ metadata: output.voicemail.metadata,
2688
+ result: output.result
2689
+ });
2690
+ return;
2691
+ }
2692
+ if (output?.noAnswer) {
2693
+ await markNoAnswerInternal({
2694
+ metadata: output.noAnswer.metadata,
2695
+ result: output.result
2696
+ });
2697
+ return;
2698
+ }
1349
2699
  if (output?.complete) {
1350
2700
  await completeInternal(output.result);
1351
2701
  }
@@ -1410,11 +2760,18 @@ var createVoiceSession = (options) => {
1410
2760
  scheduleTurnCommit(turnDetection.transcriptStabilityMs - transcriptStabilityAge, reason, false);
1411
2761
  return;
1412
2762
  }
2763
+ const costEstimate = createTurnCostEstimate({
2764
+ fallbackAttemptCount: fallbackAttemptsForCurrentTurn,
2765
+ fallbackPassCostUnit: options.costTelemetry?.fallbackPassCostUnit,
2766
+ fallbackReplayAudioMs: fallbackReplayAudioMsForCurrentTurn,
2767
+ primaryAudioMs: getBufferedAudioDurationMs(currentTurnAudio.map((audio) => audio.chunk)),
2768
+ primaryPassCostUnit: options.costTelemetry?.primaryPassCostUnit
2769
+ });
1413
2770
  const turn = {
1414
2771
  committedAt: Date.now(),
1415
2772
  id: createId(),
1416
2773
  text: finalText,
1417
- quality: createTurnQuality(transcripts, source, fallbackUsed, fallbackDiagnostics, correctionDiagnostics),
2774
+ quality: createTurnQuality(transcripts, source, fallbackUsed, fallbackDiagnostics, correctionDiagnostics, costEstimate),
1418
2775
  transcripts: transcripts.length > 0 ? transcripts : [
1419
2776
  {
1420
2777
  id: createId(),
@@ -1441,6 +2798,13 @@ var createVoiceSession = (options) => {
1441
2798
  sessionId: options.id,
1442
2799
  turnId: turn.id
1443
2800
  });
2801
+ await options.costTelemetry?.onTurnCost?.({
2802
+ api,
2803
+ context: options.context,
2804
+ estimate: costEstimate,
2805
+ session: updatedSession,
2806
+ turn
2807
+ });
1444
2808
  await send({
1445
2809
  turn,
1446
2810
  type: "turn"
@@ -1486,6 +2850,11 @@ var createVoiceSession = (options) => {
1486
2850
  };
1487
2851
  }
1488
2852
  }
2853
+ if (shouldFireOnSession) {
2854
+ pushCallLifecycleEvent(session, {
2855
+ type: "start"
2856
+ });
2857
+ }
1489
2858
  await options.store.set(options.id, session);
1490
2859
  await send({
1491
2860
  sessionId: options.id,
@@ -1494,6 +2863,11 @@ var createVoiceSession = (options) => {
1494
2863
  type: "session"
1495
2864
  });
1496
2865
  if (shouldFireOnSession) {
2866
+ await options.route.onCallStart?.({
2867
+ api,
2868
+ context: options.context,
2869
+ session
2870
+ });
1497
2871
  await options.route.onSession?.({
1498
2872
  api,
1499
2873
  context: options.context,
@@ -1509,9 +2883,11 @@ var createVoiceSession = (options) => {
1509
2883
  }
1510
2884
  resumePendingTurnCommit(session);
1511
2885
  await ensureAdapter();
2886
+ warmTTSSession();
1512
2887
  };
1513
2888
  const disconnectInternal = async (event) => {
1514
2889
  clearSilenceTimer();
2890
+ await closeTTSSession(event?.reason);
1515
2891
  await closeAdapter(event?.reason);
1516
2892
  rewindFallbackTurnAudio();
1517
2893
  if (reconnect.strategy === "fail") {
@@ -1570,9 +2946,30 @@ var createVoiceSession = (options) => {
1570
2946
  id: options.id,
1571
2947
  close: async (reason) => {
1572
2948
  await runSerial("api.close", async () => {
2949
+ const session = await writeSession((currentSession) => {
2950
+ if (currentSession.status !== "completed" && currentSession.status !== "failed" && !currentSession.call?.endedAt) {
2951
+ currentSession.lastActivityAt = Date.now();
2952
+ currentSession.status = "completed";
2953
+ pushCallLifecycleEvent(currentSession, {
2954
+ disposition: "closed",
2955
+ reason,
2956
+ type: "end"
2957
+ });
2958
+ }
2959
+ });
1573
2960
  clearSilenceTimer();
2961
+ await closeTTSSession(reason);
1574
2962
  await closeAdapter(reason);
1575
2963
  await Promise.resolve(socket.close(1000, reason));
2964
+ if (session.call?.endedAt && session.call.disposition === "closed") {
2965
+ await options.route.onCallEnd?.({
2966
+ api,
2967
+ context: options.context,
2968
+ disposition: "closed",
2969
+ reason,
2970
+ session
2971
+ });
2972
+ }
1576
2973
  });
1577
2974
  },
1578
2975
  commitTurn: async (reason = "manual") => runSerial("api.commitTurn", async () => {
@@ -1590,9 +2987,21 @@ var createVoiceSession = (options) => {
1590
2987
  fail: async (error) => runSerial("api.fail", async () => {
1591
2988
  await failInternal(error);
1592
2989
  }),
2990
+ escalate: async (input) => runSerial("api.escalate", async () => {
2991
+ await escalateInternal(input);
2992
+ }),
2993
+ markNoAnswer: async (input) => runSerial("api.markNoAnswer", async () => {
2994
+ await markNoAnswerInternal(input);
2995
+ }),
2996
+ markVoicemail: async (input) => runSerial("api.markVoicemail", async () => {
2997
+ await markVoicemailInternal(input);
2998
+ }),
1593
2999
  receiveAudio: async (audio) => runSerial("api.receiveAudio", async () => {
1594
3000
  await receiveAudioInternal(audio);
1595
3001
  }),
3002
+ transfer: async (input) => runSerial("api.transfer", async () => {
3003
+ await transferInternal(input);
3004
+ }),
1596
3005
  snapshot: async () => runSerial("api.snapshot", async () => readSession())
1597
3006
  };
1598
3007
  return api;
@@ -1740,6 +3149,7 @@ var resolveSessionOptions = (config) => {
1740
3149
  const preset = resolveVoiceRuntimePreset(config.preset);
1741
3150
  return {
1742
3151
  audioConditioning: config.audioConditioning !== undefined ? resolveAudioConditioningConfig(config.audioConditioning) : preset.audioConditioning,
3152
+ costTelemetry: config.costTelemetry,
1743
3153
  sttFallback: resolveSTTFallbackConfig(config.sttFallback),
1744
3154
  logger: config.logger,
1745
3155
  reconnect: {
@@ -1759,6 +3169,13 @@ var normalizePhraseHints = (hints) => (hints ?? []).map((hint) => ({
1759
3169
  aliases: hint.aliases?.filter((value) => typeof value === "string" && value.trim().length > 0),
1760
3170
  text: hint.text.trim()
1761
3171
  })).filter((hint) => hint.text.length > 0);
3172
+ var normalizeLexicon = (entries) => (entries ?? []).map((entry) => ({
3173
+ ...entry,
3174
+ aliases: entry.aliases?.filter((value) => typeof value === "string" && value.trim().length > 0),
3175
+ language: typeof entry.language === "string" && entry.language.trim().length > 0 ? entry.language.trim() : undefined,
3176
+ pronunciation: typeof entry.pronunciation === "string" && entry.pronunciation.trim().length > 0 ? entry.pronunciation.trim() : undefined,
3177
+ text: entry.text.trim()
3178
+ })).filter((entry) => entry.text.length > 0);
1762
3179
  var resolvePhraseHints = async (config, input) => {
1763
3180
  if (!config.phraseHints) {
1764
3181
  return [];
@@ -1768,6 +3185,15 @@ var resolvePhraseHints = async (config, input) => {
1768
3185
  }
1769
3186
  return normalizePhraseHints(config.phraseHints);
1770
3187
  };
3188
+ var resolveLexicon = async (config, input) => {
3189
+ if (!config.lexicon) {
3190
+ return [];
3191
+ }
3192
+ if (typeof config.lexicon === "function") {
3193
+ return normalizeLexicon(await config.lexicon(input));
3194
+ }
3195
+ return normalizeLexicon(config.lexicon);
3196
+ };
1771
3197
  var voice = (config) => {
1772
3198
  const runtime = {
1773
3199
  activeSessions: new Map,
@@ -1788,19 +3214,55 @@ var voice = (config) => {
1788
3214
  scenarioId,
1789
3215
  sessionId
1790
3216
  });
3217
+ const lexicon = await resolveLexicon(config, {
3218
+ context,
3219
+ scenarioId,
3220
+ sessionId
3221
+ });
1791
3222
  return createVoiceSession({
1792
3223
  audioConditioning: sessionOptions.audioConditioning,
1793
3224
  context,
1794
3225
  id: sessionId,
3226
+ languageStrategy: config.languageStrategy,
3227
+ lexicon,
1795
3228
  logger: sessionOptions.logger,
1796
3229
  phraseHints,
1797
3230
  reconnect: sessionOptions.reconnect,
1798
3231
  route: {
1799
3232
  correctTurn: config.correctTurn,
3233
+ onCallEnd: async (input) => {
3234
+ let hookError;
3235
+ try {
3236
+ await config.onCallEnd?.(input);
3237
+ } catch (error) {
3238
+ hookError = error;
3239
+ }
3240
+ try {
3241
+ await recordVoiceRuntimeOps({
3242
+ api: input.api,
3243
+ config: config.ops,
3244
+ context: input.context,
3245
+ disposition: input.disposition,
3246
+ metadata: input.metadata,
3247
+ reason: input.reason,
3248
+ session: input.session,
3249
+ target: input.target
3250
+ });
3251
+ } finally {
3252
+ if (hookError) {
3253
+ throw hookError;
3254
+ }
3255
+ }
3256
+ },
3257
+ onCallStart: config.onCallStart,
1800
3258
  onComplete: config.onComplete,
3259
+ onEscalation: config.onEscalation,
1801
3260
  onError: config.onError,
3261
+ onNoAnswer: config.onNoAnswer,
1802
3262
  onSession: config.onSession,
1803
- onTurn
3263
+ onTransfer: config.onTransfer,
3264
+ onTurn,
3265
+ onVoicemail: config.onVoicemail
1804
3266
  },
1805
3267
  scenarioId,
1806
3268
  socket: createSocketAdapter(ws),
@@ -1808,6 +3270,7 @@ var voice = (config) => {
1808
3270
  stt: config.stt,
1809
3271
  sttFallback: sessionOptions.sttFallback,
1810
3272
  sttLifecycle: sessionOptions.sttLifecycle,
3273
+ tts: config.tts,
1811
3274
  turnDetection: sessionOptions.turnDetection
1812
3275
  });
1813
3276
  };
@@ -1916,6 +3379,171 @@ var voice = (config) => {
1916
3379
  }
1917
3380
  }).use(htmxRoutes());
1918
3381
  };
3382
+ // src/fileStore.ts
3383
+ import { mkdir, readFile, readdir, rename, rm, writeFile } from "fs/promises";
3384
+ import { join } from "path";
3385
+ var listJsonFiles = async (directory) => {
3386
+ try {
3387
+ const entries = await readdir(directory, {
3388
+ withFileTypes: true
3389
+ });
3390
+ return entries.filter((entry) => entry.isFile() && entry.name.endsWith(".json")).map((entry) => join(directory, entry.name));
3391
+ } catch (error) {
3392
+ if (error.code === "ENOENT") {
3393
+ return [];
3394
+ }
3395
+ throw error;
3396
+ }
3397
+ };
3398
+ var encodeStoreId = (id) => `${encodeURIComponent(id)}.json`;
3399
+ var resolveFilePath = (directory, id) => join(directory, encodeStoreId(id));
3400
+ var readJsonFile = async (path) => JSON.parse(await readFile(path, "utf8"));
3401
+ var writeJsonFile = async (path, value, options) => {
3402
+ await mkdir(options.directory, {
3403
+ recursive: true
3404
+ });
3405
+ const tempPath = `${path}.${crypto.randomUUID()}.tmp`;
3406
+ await writeFile(tempPath, JSON.stringify(value, null, options.pretty === false ? undefined : 2));
3407
+ await rename(tempPath, path);
3408
+ };
3409
+ var createVoiceFileSessionStore = (options) => {
3410
+ const get = async (id) => {
3411
+ const path = resolveFilePath(options.directory, id);
3412
+ try {
3413
+ return await readJsonFile(path);
3414
+ } catch (error) {
3415
+ if (error.code === "ENOENT") {
3416
+ return;
3417
+ }
3418
+ throw error;
3419
+ }
3420
+ };
3421
+ const getOrCreate = async (id) => {
3422
+ const existing = await get(id);
3423
+ if (existing) {
3424
+ return existing;
3425
+ }
3426
+ const session = createVoiceSessionRecord(id);
3427
+ await writeJsonFile(resolveFilePath(options.directory, id), session, options);
3428
+ return session;
3429
+ };
3430
+ const set = async (id, value) => {
3431
+ await writeJsonFile(resolveFilePath(options.directory, id), value, options);
3432
+ };
3433
+ const list = async () => {
3434
+ const files = await listJsonFiles(options.directory);
3435
+ const sessions = await Promise.all(files.map((file) => readJsonFile(file)));
3436
+ return sessions.map((session) => toVoiceSessionSummary(session)).sort((first, second) => (second.lastActivityAt ?? second.createdAt) - (first.lastActivityAt ?? first.createdAt));
3437
+ };
3438
+ const remove = async (id) => {
3439
+ await rm(resolveFilePath(options.directory, id), {
3440
+ force: true
3441
+ });
3442
+ };
3443
+ return { get, getOrCreate, list, remove, set };
3444
+ };
3445
+ var createVoiceFileReviewStore = (options) => {
3446
+ const get = async (id) => {
3447
+ const path = resolveFilePath(options.directory, id);
3448
+ try {
3449
+ return await readJsonFile(path);
3450
+ } catch (error) {
3451
+ if (error.code === "ENOENT") {
3452
+ return;
3453
+ }
3454
+ throw error;
3455
+ }
3456
+ };
3457
+ const list = async () => {
3458
+ const files = await listJsonFiles(options.directory);
3459
+ const reviews = await Promise.all(files.map((file) => readJsonFile(file)));
3460
+ return reviews.sort((left, right) => (right.generatedAt ?? 0) - (left.generatedAt ?? 0));
3461
+ };
3462
+ const set = async (id, artifact) => {
3463
+ await writeJsonFile(resolveFilePath(options.directory, id), withVoiceCallReviewId(id, artifact), options);
3464
+ };
3465
+ const remove = async (id) => {
3466
+ await rm(resolveFilePath(options.directory, id), {
3467
+ force: true
3468
+ });
3469
+ };
3470
+ return { get, list, remove, set };
3471
+ };
3472
+ var createVoiceFileTaskStore = (options) => {
3473
+ const get = async (id) => {
3474
+ const path = resolveFilePath(options.directory, id);
3475
+ try {
3476
+ return await readJsonFile(path);
3477
+ } catch (error) {
3478
+ if (error.code === "ENOENT") {
3479
+ return;
3480
+ }
3481
+ throw error;
3482
+ }
3483
+ };
3484
+ const list = async () => {
3485
+ const files = await listJsonFiles(options.directory);
3486
+ const tasks = await Promise.all(files.map((file) => readJsonFile(file)));
3487
+ return tasks.sort((left, right) => right.createdAt - left.createdAt);
3488
+ };
3489
+ const set = async (id, task) => {
3490
+ await writeJsonFile(resolveFilePath(options.directory, id), withVoiceOpsTaskId(id, task), options);
3491
+ };
3492
+ const remove = async (id) => {
3493
+ await rm(resolveFilePath(options.directory, id), {
3494
+ force: true
3495
+ });
3496
+ };
3497
+ return { get, list, remove, set };
3498
+ };
3499
+ var createVoiceFileIntegrationEventStore = (options) => {
3500
+ const get = async (id) => {
3501
+ const path = resolveFilePath(options.directory, id);
3502
+ try {
3503
+ return await readJsonFile(path);
3504
+ } catch (error) {
3505
+ if (error.code === "ENOENT") {
3506
+ return;
3507
+ }
3508
+ throw error;
3509
+ }
3510
+ };
3511
+ const list = async () => {
3512
+ const files = await listJsonFiles(options.directory);
3513
+ const events = await Promise.all(files.map((file) => readJsonFile(file)));
3514
+ return events.sort((left, right) => right.createdAt - left.createdAt);
3515
+ };
3516
+ const set = async (id, event) => {
3517
+ await writeJsonFile(resolveFilePath(options.directory, id), withVoiceIntegrationEventId(id, event), options);
3518
+ };
3519
+ const remove = async (id) => {
3520
+ await rm(resolveFilePath(options.directory, id), {
3521
+ force: true
3522
+ });
3523
+ };
3524
+ return { get, list, remove, set };
3525
+ };
3526
+ var createVoiceFileRuntimeStorage = (options) => ({
3527
+ events: createVoiceFileIntegrationEventStore({
3528
+ ...options,
3529
+ directory: join(options.directory, "events")
3530
+ }),
3531
+ reviews: createVoiceFileReviewStore({
3532
+ ...options,
3533
+ directory: join(options.directory, "reviews")
3534
+ }),
3535
+ session: createVoiceFileSessionStore({
3536
+ ...options,
3537
+ directory: join(options.directory, "sessions")
3538
+ }),
3539
+ tasks: createVoiceFileTaskStore({
3540
+ ...options,
3541
+ directory: join(options.directory, "tasks")
3542
+ })
3543
+ });
3544
+ var createStoredVoiceCallReviewArtifact = (id, artifact) => withVoiceCallReviewId(id, artifact);
3545
+ var createStoredVoiceOpsTask = (id, task) => withVoiceOpsTaskId(id, task);
3546
+ var createStoredVoiceIntegrationEvent = (id, event) => withVoiceIntegrationEventId(id, event);
1919
3547
  // src/memoryStore.ts
1920
3548
  var createVoiceMemoryStore = () => {
1921
3549
  const sessions = new Map;
@@ -1939,21 +3567,146 @@ var createVoiceMemoryStore = () => {
1939
3567
  };
1940
3568
  // src/correction.ts
1941
3569
  var escapeRegExp = (value) => value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
3570
+ var buildAliasMatcher = (alias) => new RegExp(`(?<![\\p{L}\\p{N}'])${escapeRegExp(alias)}(?![\\p{L}\\p{N}'])`, "giu");
3571
+ var WORD_PATTERN = /[\p{L}\p{N}']+/gu;
3572
+ var normalizeComparableText = (value) => value.toLowerCase().replace(/[^\p{L}\p{N}\s']/gu, " ").replace(/\s+/g, " ").trim();
3573
+ var normalizeDomainTerm = (value) => normalizeComparableText(value);
3574
+ var tokenizeWithIndices = (value) => {
3575
+ const matches = value.matchAll(WORD_PATTERN);
3576
+ const tokens = [];
3577
+ for (const match of matches) {
3578
+ const token = match[0];
3579
+ const start = match.index ?? -1;
3580
+ if (start < 0) {
3581
+ continue;
3582
+ }
3583
+ tokens.push({
3584
+ end: start + token.length,
3585
+ start,
3586
+ text: token
3587
+ });
3588
+ }
3589
+ return tokens;
3590
+ };
3591
+ var levenshteinDistance = (left, right) => {
3592
+ if (left === right) {
3593
+ return 0;
3594
+ }
3595
+ if (left.length === 0) {
3596
+ return right.length;
3597
+ }
3598
+ if (right.length === 0) {
3599
+ return left.length;
3600
+ }
3601
+ const previous = Array.from({ length: right.length + 1 }, (_, index) => index);
3602
+ const current = new Array(right.length + 1);
3603
+ for (let leftIndex = 1;leftIndex <= left.length; leftIndex += 1) {
3604
+ current[0] = leftIndex;
3605
+ for (let rightIndex = 1;rightIndex <= right.length; rightIndex += 1) {
3606
+ const cost = left[leftIndex - 1] === right[rightIndex - 1] ? 0 : 1;
3607
+ current[rightIndex] = Math.min(current[rightIndex - 1] + 1, previous[rightIndex] + 1, previous[rightIndex - 1] + cost);
3608
+ }
3609
+ for (let rightIndex = 0;rightIndex <= right.length; rightIndex += 1) {
3610
+ previous[rightIndex] = current[rightIndex];
3611
+ }
3612
+ }
3613
+ return previous[right.length];
3614
+ };
3615
+ var resolveFuzzyThreshold = (riskTier) => {
3616
+ switch (riskTier) {
3617
+ case "safe":
3618
+ return -1;
3619
+ case "balanced":
3620
+ return 0.14;
3621
+ case "risky":
3622
+ return 0.2;
3623
+ }
3624
+ };
3625
+ var canUseTieredFuzzyAlias = (alias, riskTier) => {
3626
+ if (riskTier === "safe") {
3627
+ return false;
3628
+ }
3629
+ const tokenCount = normalizeComparableText(alias).split(" ").filter((token) => token.length > 0).length;
3630
+ return riskTier === "balanced" ? tokenCount >= 3 : tokenCount >= 2;
3631
+ };
3632
+ var findFuzzyAliasMatch = (text, alias, riskTier) => {
3633
+ const tokens = tokenizeWithIndices(text);
3634
+ const aliasTokens = normalizeComparableText(alias).split(" ").filter((token) => token.length > 0);
3635
+ if (tokens.length === 0 || aliasTokens.length < 2) {
3636
+ return;
3637
+ }
3638
+ const minWindowLength = Math.max(1, aliasTokens.length - 1);
3639
+ const maxWindowLength = Math.min(tokens.length, aliasTokens.length + 1);
3640
+ const normalizedAlias = aliasTokens.join(" ");
3641
+ const normalizedAliasFirstToken = aliasTokens[0] ?? "";
3642
+ let bestMatch;
3643
+ for (let startIndex = 0;startIndex < tokens.length; startIndex += 1) {
3644
+ for (let windowLength = minWindowLength;windowLength <= maxWindowLength; windowLength += 1) {
3645
+ const endIndex = startIndex + windowLength - 1;
3646
+ if (endIndex >= tokens.length) {
3647
+ break;
3648
+ }
3649
+ const windowTokens = tokens.slice(startIndex, endIndex + 1);
3650
+ const normalizedWindow = normalizeComparableText(windowTokens.map((token) => token.text).join(" "));
3651
+ if (!normalizedWindow) {
3652
+ continue;
3653
+ }
3654
+ const [windowFirstToken] = normalizedWindow.split(" ");
3655
+ if (windowFirstToken !== normalizedAliasFirstToken) {
3656
+ continue;
3657
+ }
3658
+ const distance = levenshteinDistance(normalizedWindow, normalizedAlias);
3659
+ const denominator = Math.max(normalizedWindow.length, normalizedAlias.length);
3660
+ const score = denominator > 0 ? distance / denominator : 0;
3661
+ if (score > resolveFuzzyThreshold(riskTier)) {
3662
+ continue;
3663
+ }
3664
+ const candidate = {
3665
+ end: windowTokens[windowTokens.length - 1].end,
3666
+ score,
3667
+ start: windowTokens[0].start
3668
+ };
3669
+ if (!bestMatch || candidate.score < bestMatch.score || candidate.score === bestMatch.score && candidate.end - candidate.start > bestMatch.end - bestMatch.start) {
3670
+ bestMatch = candidate;
3671
+ }
3672
+ }
3673
+ }
3674
+ return bestMatch;
3675
+ };
1942
3676
  var normalizeHintAliases = (hint) => (hint.aliases ?? []).map((alias) => alias.trim()).filter((alias) => alias.length > 0).sort((left, right) => right.length - left.length);
1943
3677
  var applyPhraseHintCorrections = (text, phraseHints) => {
3678
+ return applyRiskTieredPhraseHintCorrections(text, phraseHints, {
3679
+ riskTier: "risky"
3680
+ });
3681
+ };
3682
+ var applyRiskTieredPhraseHintCorrections = (text, phraseHints, options = {}) => {
3683
+ const riskTier = options.riskTier ?? "safe";
1944
3684
  let corrected = text;
1945
3685
  const matches = [];
1946
3686
  for (const hint of phraseHints) {
1947
3687
  for (const alias of normalizeHintAliases(hint)) {
1948
- const matcher = new RegExp(`\\b${escapeRegExp(alias)}\\b`, "gi");
3688
+ const matcher = buildAliasMatcher(alias);
1949
3689
  if (!matcher.test(corrected)) {
1950
- continue;
3690
+ if (!canUseTieredFuzzyAlias(alias, riskTier)) {
3691
+ continue;
3692
+ }
3693
+ const fuzzyMatch = findFuzzyAliasMatch(corrected, alias, riskTier);
3694
+ if (!fuzzyMatch) {
3695
+ continue;
3696
+ }
3697
+ corrected = `${corrected.slice(0, fuzzyMatch.start)}${hint.text}${corrected.slice(fuzzyMatch.end)}`;
3698
+ matches.push({
3699
+ alias,
3700
+ hint
3701
+ });
3702
+ break;
1951
3703
  }
1952
3704
  corrected = corrected.replace(matcher, hint.text);
1953
3705
  matches.push({
1954
3706
  alias,
1955
3707
  hint
1956
3708
  });
3709
+ break;
1957
3710
  }
1958
3711
  }
1959
3712
  return {
@@ -1962,6 +3715,79 @@ var applyPhraseHintCorrections = (text, phraseHints) => {
1962
3715
  text: corrected
1963
3716
  };
1964
3717
  };
3718
+ var dedupeAliases = (aliases) => {
3719
+ const seen = new Set;
3720
+ const deduped = [];
3721
+ for (const alias of aliases) {
3722
+ const normalized = normalizeDomainTerm(alias);
3723
+ if (!normalized || seen.has(normalized)) {
3724
+ continue;
3725
+ }
3726
+ seen.add(normalized);
3727
+ deduped.push(alias);
3728
+ }
3729
+ return deduped;
3730
+ };
3731
+ var isSafeAlias = (alias) => {
3732
+ const normalized = normalizeDomainTerm(alias);
3733
+ if (normalized.length < 4) {
3734
+ return false;
3735
+ }
3736
+ const tokens = normalized.split(" ").filter((token) => token.length > 0);
3737
+ return tokens.length >= 2 || normalized.length >= 7;
3738
+ };
3739
+ var createDomainPhraseHints = (terms, options = {}) => {
3740
+ const riskTier = options.riskTier ?? "safe";
3741
+ const hints = [];
3742
+ const seen = new Set;
3743
+ for (const term of terms) {
3744
+ const normalizedText = normalizeDomainTerm(term.text);
3745
+ if (!normalizedText || seen.has(normalizedText)) {
3746
+ continue;
3747
+ }
3748
+ const candidateAliases = dedupeAliases(term.aliases ?? []);
3749
+ const aliases = candidateAliases.filter((alias) => {
3750
+ if (riskTier === "risky") {
3751
+ return true;
3752
+ }
3753
+ if (riskTier === "balanced") {
3754
+ return isSafeAlias(alias) || normalizeDomainTerm(alias) === normalizedText;
3755
+ }
3756
+ return isSafeAlias(alias);
3757
+ });
3758
+ hints.push({
3759
+ aliases: aliases.length > 0 ? aliases : undefined,
3760
+ boost: term.boost,
3761
+ metadata: term.metadata,
3762
+ text: term.text
3763
+ });
3764
+ seen.add(normalizedText);
3765
+ }
3766
+ return hints;
3767
+ };
3768
+ var createDomainLexicon = (terms) => {
3769
+ const entries = [];
3770
+ const seen = new Set;
3771
+ for (const term of terms) {
3772
+ const normalizedText = normalizeDomainTerm(term.text);
3773
+ if (!normalizedText || seen.has(normalizedText)) {
3774
+ continue;
3775
+ }
3776
+ entries.push({
3777
+ aliases: dedupeAliases(term.aliases ?? []),
3778
+ language: term.language,
3779
+ metadata: term.metadata,
3780
+ pronunciation: term.pronunciation,
3781
+ text: term.text
3782
+ });
3783
+ seen.add(normalizedText);
3784
+ }
3785
+ return entries;
3786
+ };
3787
+ var averageTranscriptConfidence = (transcripts) => {
3788
+ const confidences = transcripts.map((transcript) => transcript.confidence).filter((value) => typeof value === "number");
3789
+ return confidences.length > 0 ? confidences.reduce((sum, value) => sum + value, 0) / confidences.length : undefined;
3790
+ };
1965
3791
  var createPhraseHintCorrectionHandler = (options = {}) => {
1966
3792
  const provider = options.provider ?? "@absolutejs/voice";
1967
3793
  const reason = options.reason ?? "phrase-hint-correction";
@@ -1981,17 +3807,583 @@ var createPhraseHintCorrectionHandler = (options = {}) => {
1981
3807
  };
1982
3808
  };
1983
3809
  };
3810
+ var lexiconToPhraseHints = (lexicon) => lexicon.map((entry) => ({
3811
+ aliases: entry.aliases,
3812
+ metadata: entry.metadata,
3813
+ text: entry.text
3814
+ }));
3815
+ var applyLexiconCorrections = (text, lexicon) => applyPhraseHintCorrections(text, lexiconToPhraseHints(lexicon));
3816
+ var createLexiconCorrectionHandler = (options = {}) => {
3817
+ const provider = options.provider ?? "@absolutejs/voice";
3818
+ const reason = options.reason ?? "lexicon-correction";
3819
+ return async ({ lexicon, text }) => {
3820
+ const result = applyLexiconCorrections(text, lexicon);
3821
+ if (!result.changed) {
3822
+ return;
3823
+ }
3824
+ return {
3825
+ metadata: result.matches.length > 0 ? {
3826
+ matchedAliases: result.matches.map((match) => match.alias),
3827
+ matchedHints: result.matches.map((match) => match.hint.text)
3828
+ } : undefined,
3829
+ provider,
3830
+ reason,
3831
+ text: result.text
3832
+ };
3833
+ };
3834
+ };
3835
+ var createRiskyTurnCorrectionHandler = (options = {}) => {
3836
+ const provider = options.provider ?? "@absolutejs/voice";
3837
+ const reason = options.reason ?? "risky-turn-correction";
3838
+ const riskTier = options.riskTier ?? "balanced";
3839
+ const maxAverageConfidence = options.maxAverageConfidence ?? 0.92;
3840
+ return async ({ lexicon, phraseHints, text, transcripts }) => {
3841
+ const averageConfidence = averageTranscriptConfidence(transcripts);
3842
+ if (averageConfidence !== undefined && averageConfidence > maxAverageConfidence) {
3843
+ return;
3844
+ }
3845
+ const result = applyRiskTieredPhraseHintCorrections(text, [
3846
+ ...phraseHints,
3847
+ ...lexiconToPhraseHints(lexicon)
3848
+ ], { riskTier });
3849
+ if (!result.changed) {
3850
+ return;
3851
+ }
3852
+ return {
3853
+ metadata: {
3854
+ averageConfidence,
3855
+ matchedAliases: result.matches.map((match) => match.alias),
3856
+ matchedHints: result.matches.map((match) => match.hint.text),
3857
+ riskTier
3858
+ },
3859
+ provider,
3860
+ reason,
3861
+ text: result.text
3862
+ };
3863
+ };
3864
+ };
3865
+
3866
+ // src/routing.ts
3867
+ var resolveVoiceSTTRoutingStrategy = (goal = "best") => {
3868
+ if (goal === "low-cost") {
3869
+ return {
3870
+ benchmarkSessionTarget: "deepgram-flux",
3871
+ correctionMode: "none",
3872
+ goal,
3873
+ notes: [
3874
+ "Uses the cheapest in-package path: one primary STT pass with no correction hook.",
3875
+ "Good for baseline throughput and lower post-processing overhead."
3876
+ ],
3877
+ preset: "default",
3878
+ sttLifecycle: "turn-scoped"
3879
+ };
3880
+ }
3881
+ return {
3882
+ benchmarkSessionTarget: "deepgram-corrected",
3883
+ correctionMode: "generic",
3884
+ goal,
3885
+ notes: [
3886
+ "Uses the current best in-package path: Deepgram Flux with generic deterministic correction.",
3887
+ "Optimized for accuracy and robustness rather than minimum processing cost."
3888
+ ],
3889
+ preset: "reliability",
3890
+ sttLifecycle: "continuous"
3891
+ };
3892
+ };
3893
+ var createVoiceSTTRoutingCorrectionHandler = (mode = "generic") => {
3894
+ if (mode === "none") {
3895
+ return;
3896
+ }
3897
+ if (mode === "risky-turn") {
3898
+ return createRiskyTurnCorrectionHandler();
3899
+ }
3900
+ return createPhraseHintCorrectionHandler();
3901
+ };
3902
+ // src/telephony/twilio.ts
3903
+ import { Buffer as Buffer2 } from "buffer";
3904
+ var TWILIO_MULAW_SAMPLE_RATE = 8000;
3905
+ var VOICE_PCM_SAMPLE_RATE = 16000;
3906
+ var escapeXml = (value) => value.replaceAll("&", "&amp;").replaceAll('"', "&quot;").replaceAll("'", "&apos;").replaceAll("<", "&lt;").replaceAll(">", "&gt;");
3907
+ var normalizeOnTurn2 = (handler) => {
3908
+ if (handler.length > 1) {
3909
+ const directHandler = handler;
3910
+ return async ({ context, session, turn, api }) => directHandler(session, turn, api, context);
3911
+ }
3912
+ return handler;
3913
+ };
3914
+ var resolveSTTFallbackConfig2 = (config) => {
3915
+ if (!config) {
3916
+ return;
3917
+ }
3918
+ return {
3919
+ adapter: config.adapter,
3920
+ completionTimeoutMs: config.completionTimeoutMs ?? 2500,
3921
+ confidenceThreshold: config.confidenceThreshold ?? 0.6,
3922
+ maxAttemptsPerTurn: config.maxAttemptsPerTurn ?? 1,
3923
+ minTextLength: config.minTextLength ?? 2,
3924
+ replayWindowMs: config.replayWindowMs ?? 8000,
3925
+ settleMs: config.settleMs ?? 220,
3926
+ trigger: config.trigger ?? "empty-or-low-confidence"
3927
+ };
3928
+ };
3929
+ var normalizePhraseHints2 = (hints) => (hints ?? []).map((hint) => ({
3930
+ ...hint,
3931
+ aliases: hint.aliases?.filter((value) => typeof value === "string" && value.trim().length > 0),
3932
+ text: hint.text.trim()
3933
+ })).filter((hint) => hint.text.length > 0);
3934
+ var normalizeLexicon2 = (entries) => (entries ?? []).map((entry) => ({
3935
+ ...entry,
3936
+ aliases: entry.aliases?.filter((value) => typeof value === "string" && value.trim().length > 0),
3937
+ language: typeof entry.language === "string" && entry.language.trim().length > 0 ? entry.language.trim() : undefined,
3938
+ pronunciation: typeof entry.pronunciation === "string" && entry.pronunciation.trim().length > 0 ? entry.pronunciation.trim() : undefined,
3939
+ text: entry.text.trim()
3940
+ })).filter((entry) => entry.text.length > 0);
3941
+ var clamp16 = (value) => Math.max(-32768, Math.min(32767, Math.round(value)));
3942
+ var linearResample = (input, inputRate, outputRate) => {
3943
+ if (input.length === 0) {
3944
+ return new Int16Array(0);
3945
+ }
3946
+ if (inputRate === outputRate) {
3947
+ return new Int16Array(input);
3948
+ }
3949
+ const outputLength = Math.max(1, Math.round(input.length * outputRate / inputRate));
3950
+ const output = new Int16Array(outputLength);
3951
+ const ratio = inputRate / outputRate;
3952
+ for (let index = 0;index < outputLength; index += 1) {
3953
+ const sourcePosition = index * ratio;
3954
+ const leftIndex = Math.floor(sourcePosition);
3955
+ const rightIndex = Math.min(input.length - 1, leftIndex + 1);
3956
+ const blend = sourcePosition - leftIndex;
3957
+ const left = input[Math.min(leftIndex, input.length - 1)] ?? 0;
3958
+ const right = input[rightIndex] ?? left;
3959
+ output[index] = clamp16(left + (right - left) * blend);
3960
+ }
3961
+ return output;
3962
+ };
3963
+ var MULAW_BIAS = 132;
3964
+ var MULAW_CLIP = 32635;
3965
+ var encodeMulawSample = (sample) => {
3966
+ let value = clamp16(sample);
3967
+ let sign = 0;
3968
+ if (value < 0) {
3969
+ sign = 128;
3970
+ value = -value;
3971
+ }
3972
+ value = Math.min(MULAW_CLIP, value);
3973
+ value += MULAW_BIAS;
3974
+ let exponent = 7;
3975
+ for (let bit = 16384;(value & bit) === 0 && exponent > 0; bit >>= 1) {
3976
+ exponent -= 1;
3977
+ }
3978
+ const mantissa = value >> exponent + 3 & 15;
3979
+ return ~(sign | exponent << 4 | mantissa) & 255;
3980
+ };
3981
+ var decodeMulawSample = (value) => {
3982
+ const normalized = ~value & 255;
3983
+ const sign = normalized & 128;
3984
+ const exponent = normalized >> 4 & 7;
3985
+ const mantissa = normalized & 15;
3986
+ let sample = (mantissa << 3) + MULAW_BIAS << exponent;
3987
+ sample -= MULAW_BIAS;
3988
+ return sign ? -sample : sample;
3989
+ };
3990
+ var int16ArrayToBytes = (samples) => {
3991
+ const output = new Uint8Array(samples.length * 2);
3992
+ const view = new DataView(output.buffer);
3993
+ for (let index = 0;index < samples.length; index += 1) {
3994
+ view.setInt16(index * 2, samples[index] ?? 0, true);
3995
+ }
3996
+ return output;
3997
+ };
3998
+ var bytesToInt16Array = (bytes) => {
3999
+ const sampleCount = Math.floor(bytes.byteLength / 2);
4000
+ const output = new Int16Array(sampleCount);
4001
+ const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
4002
+ for (let index = 0;index < sampleCount; index += 1) {
4003
+ output[index] = view.getInt16(index * 2, true);
4004
+ }
4005
+ return output;
4006
+ };
4007
+ var decodeTwilioMulawBase64 = (payload) => {
4008
+ const bytes = Uint8Array.from(Buffer2.from(payload, "base64"));
4009
+ const samples = new Int16Array(bytes.length);
4010
+ for (let index = 0;index < bytes.length; index += 1) {
4011
+ samples[index] = decodeMulawSample(bytes[index] ?? 0);
4012
+ }
4013
+ return samples;
4014
+ };
4015
+ var encodeTwilioMulawBase64 = (samples) => {
4016
+ const bytes = new Uint8Array(samples.length);
4017
+ for (let index = 0;index < samples.length; index += 1) {
4018
+ bytes[index] = encodeMulawSample(samples[index] ?? 0);
4019
+ }
4020
+ return Buffer2.from(bytes).toString("base64");
4021
+ };
4022
+ var transcodeTwilioInboundPayloadToPCM16 = (payload) => {
4023
+ const narrowband = decodeTwilioMulawBase64(payload);
4024
+ const wideband = linearResample(narrowband, TWILIO_MULAW_SAMPLE_RATE, VOICE_PCM_SAMPLE_RATE);
4025
+ return int16ArrayToBytes(wideband);
4026
+ };
4027
+ var transcodePCMToTwilioOutboundPayload = (chunk, format) => {
4028
+ if (format.container === "raw" && format.encoding === "mulaw" && format.channels === 1 && format.sampleRateHz === TWILIO_MULAW_SAMPLE_RATE) {
4029
+ return Buffer2.from(chunk).toString("base64");
4030
+ }
4031
+ if (format.encoding !== "pcm_s16le") {
4032
+ throw new Error(`Unsupported outbound telephony audio format: ${format.container}/${format.encoding}`);
4033
+ }
4034
+ const pcm = bytesToInt16Array(chunk);
4035
+ const mono = format.channels === 1 ? pcm : new Int16Array(Array.from({ length: Math.floor(pcm.length / 2) }, (_, frameIndex) => {
4036
+ const left = pcm[frameIndex * 2] ?? 0;
4037
+ const right = pcm[frameIndex * 2 + 1] ?? 0;
4038
+ return clamp16((left + right) / 2);
4039
+ }));
4040
+ const telephony = linearResample(mono, format.sampleRateHz, TWILIO_MULAW_SAMPLE_RATE);
4041
+ return encodeTwilioMulawBase64(telephony);
4042
+ };
4043
+ var parseTwilioMessage = (raw) => {
4044
+ if (typeof raw !== "string") {
4045
+ return raw;
4046
+ }
4047
+ return JSON.parse(raw);
4048
+ };
4049
+ var createTwilioSocketAdapter = (socket, getState) => ({
4050
+ close: async (code, reason) => {
4051
+ await Promise.resolve(socket.close(code, reason));
4052
+ },
4053
+ send: async (data) => {
4054
+ if (typeof data !== "string") {
4055
+ return;
4056
+ }
4057
+ const state = getState();
4058
+ const message = JSON.parse(data);
4059
+ state.reviewRecorder?.recordVoiceMessage(message);
4060
+ await Promise.resolve(state.onVoiceMessage?.({
4061
+ callSid: state.callSid ?? undefined,
4062
+ message,
4063
+ sessionId: state.sessionId ?? "",
4064
+ streamSid: state.streamSid ?? undefined
4065
+ }));
4066
+ if (!state.streamSid) {
4067
+ return;
4068
+ }
4069
+ if (message.type === "audio") {
4070
+ const payload = transcodePCMToTwilioOutboundPayload(Uint8Array.from(Buffer2.from(message.chunkBase64, "base64")), message.format);
4071
+ state.hasOutboundAudioSinceLastInbound = true;
4072
+ state.reviewRecorder?.recordTwilioOutbound({
4073
+ bytes: payload.length,
4074
+ event: "media",
4075
+ track: "outbound"
4076
+ });
4077
+ await Promise.resolve(socket.send(JSON.stringify({
4078
+ event: "media",
4079
+ media: {
4080
+ payload
4081
+ },
4082
+ streamSid: state.streamSid
4083
+ })));
4084
+ return;
4085
+ }
4086
+ if (message.type === "assistant" && message.turnId) {
4087
+ state.reviewRecorder?.recordTwilioOutbound({
4088
+ event: "mark",
4089
+ name: `assistant:${message.turnId}`
4090
+ });
4091
+ await Promise.resolve(socket.send(JSON.stringify({
4092
+ event: "mark",
4093
+ mark: {
4094
+ name: `assistant:${message.turnId}`
4095
+ },
4096
+ streamSid: state.streamSid
4097
+ })));
4098
+ }
4099
+ }
4100
+ });
4101
+ var createTwilioVoiceResponse = (options) => {
4102
+ const parameters = Object.entries(options.parameters ?? {}).filter((entry) => entry[1] !== undefined).map(([name, value]) => `<Parameter name="${escapeXml(name)}" value="${escapeXml(String(value))}" />`).join("");
4103
+ return `<?xml version="1.0" encoding="UTF-8"?><Response><Connect><Stream url="${escapeXml(options.streamUrl)}"${options.track ? ` track="${escapeXml(options.track)}"` : ""}${options.streamName ? ` name="${escapeXml(options.streamName)}"` : ""}>${parameters}</Stream></Connect></Response>`;
4104
+ };
4105
+ var createTwilioMediaStreamBridge = (socket, options) => {
4106
+ const runtimePreset = resolveVoiceRuntimePreset(options.preset);
4107
+ const turnDetection = resolveTurnDetectionConfig({
4108
+ ...runtimePreset.turnDetection,
4109
+ ...options.turnDetection
4110
+ });
4111
+ const audioConditioning = options.audioConditioning !== undefined ? resolveAudioConditioningConfig(options.audioConditioning) : runtimePreset.audioConditioning;
4112
+ const logger = resolveLogger(options.logger);
4113
+ const reconnect = {
4114
+ maxAttempts: options.reconnect?.maxAttempts ?? 10,
4115
+ strategy: options.reconnect?.strategy ?? "resume-last-turn",
4116
+ timeout: options.reconnect?.timeout ?? 30000
4117
+ };
4118
+ const bridgeState = {
4119
+ callSid: null,
4120
+ hasOutboundAudioSinceLastInbound: false,
4121
+ onVoiceMessage: options.onVoiceMessage,
4122
+ reviewRecorder: options.review ? createVoiceCallReviewRecorder({
4123
+ config: options.review.config ?? {
4124
+ preset: options.preset,
4125
+ stt: {
4126
+ kind: options.stt.kind
4127
+ },
4128
+ tts: options.tts ? {
4129
+ kind: options.tts.kind
4130
+ } : undefined,
4131
+ turnDetection
4132
+ },
4133
+ fixtureId: options.review.fixtureId,
4134
+ path: options.review.path,
4135
+ title: options.review.title
4136
+ }) : undefined,
4137
+ scenarioId: options.scenarioId ?? null,
4138
+ sessionId: options.sessionId ?? null,
4139
+ streamSid: null
4140
+ };
4141
+ let sessionHandle = null;
4142
+ let reviewArtifactDelivered = false;
4143
+ const resolveLexicon2 = async () => {
4144
+ if (typeof options.lexicon === "function") {
4145
+ return normalizeLexicon2(await options.lexicon({
4146
+ context: options.context,
4147
+ scenarioId: bridgeState.scenarioId ?? undefined,
4148
+ sessionId: bridgeState.sessionId ?? ""
4149
+ }) ?? []);
4150
+ }
4151
+ return normalizeLexicon2(options.lexicon);
4152
+ };
4153
+ const resolvePhraseHints2 = async () => {
4154
+ if (typeof options.phraseHints === "function") {
4155
+ return normalizePhraseHints2(await options.phraseHints({
4156
+ context: options.context,
4157
+ scenarioId: bridgeState.scenarioId ?? undefined,
4158
+ sessionId: bridgeState.sessionId ?? ""
4159
+ }) ?? []);
4160
+ }
4161
+ return normalizePhraseHints2(options.phraseHints);
4162
+ };
4163
+ const ensureSession = async () => {
4164
+ if (sessionHandle) {
4165
+ return sessionHandle;
4166
+ }
4167
+ bridgeState.sessionId ??= `phone-${Date.now().toString(36)}`;
4168
+ const lexicon = await resolveLexicon2();
4169
+ const phraseHints = await resolvePhraseHints2();
4170
+ const normalizedOnTurn = normalizeOnTurn2(options.onTurn);
4171
+ const route = {
4172
+ correctTurn: options.correctTurn,
4173
+ onComplete: options.onComplete,
4174
+ onError: options.onError,
4175
+ onSession: options.onSession,
4176
+ onTurn: async (input) => {
4177
+ bridgeState.reviewRecorder?.recordVoiceMessage({
4178
+ type: "turn",
4179
+ turn: input.turn
4180
+ });
4181
+ const result = await normalizedOnTurn(input);
4182
+ if (result?.assistantText) {
4183
+ bridgeState.reviewRecorder?.recordVoiceMessage({
4184
+ type: "assistant",
4185
+ text: result.assistantText,
4186
+ turnId: input.turn.id
4187
+ });
4188
+ }
4189
+ return result;
4190
+ }
4191
+ };
4192
+ const voiceSocket = createTwilioSocketAdapter(socket, () => bridgeState);
4193
+ sessionHandle = createVoiceSession({
4194
+ audioConditioning,
4195
+ context: options.context,
4196
+ costTelemetry: options.costTelemetry,
4197
+ id: bridgeState.sessionId,
4198
+ languageStrategy: options.languageStrategy,
4199
+ lexicon,
4200
+ logger,
4201
+ phraseHints,
4202
+ reconnect,
4203
+ route,
4204
+ scenarioId: bridgeState.scenarioId ?? undefined,
4205
+ socket: voiceSocket,
4206
+ store: options.session,
4207
+ stt: options.stt,
4208
+ sttFallback: resolveSTTFallbackConfig2(options.sttFallback),
4209
+ sttLifecycle: options.sttLifecycle ?? runtimePreset.sttLifecycle,
4210
+ tts: options.tts,
4211
+ turnDetection
4212
+ });
4213
+ return sessionHandle;
4214
+ };
4215
+ return {
4216
+ close: async (reason) => {
4217
+ await sessionHandle?.close(reason);
4218
+ if (bridgeState.reviewRecorder && options.review?.onArtifact && !reviewArtifactDelivered) {
4219
+ reviewArtifactDelivered = true;
4220
+ await Promise.resolve(options.review.onArtifact(bridgeState.reviewRecorder.finalize()));
4221
+ }
4222
+ },
4223
+ getSessionId: () => bridgeState.sessionId,
4224
+ getStreamSid: () => bridgeState.streamSid,
4225
+ handleMessage: async (raw) => {
4226
+ const message = parseTwilioMessage(raw);
4227
+ switch (message.event) {
4228
+ case "connected":
4229
+ bridgeState.reviewRecorder?.recordTwilioInbound({
4230
+ event: "connected"
4231
+ });
4232
+ return;
4233
+ case "start": {
4234
+ bridgeState.streamSid = message.start.streamSid;
4235
+ bridgeState.callSid = message.start.callSid ?? null;
4236
+ bridgeState.sessionId = message.start.customParameters?.sessionId?.trim() || bridgeState.sessionId;
4237
+ bridgeState.scenarioId = message.start.customParameters?.scenarioId?.trim() || bridgeState.scenarioId;
4238
+ bridgeState.reviewRecorder?.recordTwilioInbound({
4239
+ event: "start",
4240
+ reason: message.start.callSid,
4241
+ text: bridgeState.sessionId ?? undefined
4242
+ });
4243
+ await ensureSession();
4244
+ return;
4245
+ }
4246
+ case "media": {
4247
+ const activeSession = await ensureSession();
4248
+ bridgeState.reviewRecorder?.recordTwilioInbound({
4249
+ bytes: message.media.payload.length,
4250
+ event: "media",
4251
+ track: message.media.track
4252
+ });
4253
+ if (options.clearOnInboundMedia !== false && bridgeState.hasOutboundAudioSinceLastInbound && bridgeState.streamSid) {
4254
+ bridgeState.reviewRecorder?.recordTwilioOutbound({
4255
+ event: "clear"
4256
+ });
4257
+ await Promise.resolve(socket.send(JSON.stringify({
4258
+ event: "clear",
4259
+ streamSid: bridgeState.streamSid
4260
+ })));
4261
+ }
4262
+ bridgeState.hasOutboundAudioSinceLastInbound = false;
4263
+ await activeSession.receiveAudio(transcodeTwilioInboundPayloadToPCM16(message.media.payload));
4264
+ return;
4265
+ }
4266
+ case "mark":
4267
+ bridgeState.reviewRecorder?.recordTwilioInbound({
4268
+ event: "mark",
4269
+ name: message.mark?.name
4270
+ });
4271
+ return;
4272
+ case "stop":
4273
+ bridgeState.reviewRecorder?.recordTwilioInbound({
4274
+ event: "stop",
4275
+ reason: message.stop?.callSid
4276
+ });
4277
+ await sessionHandle?.close("twilio-stop");
4278
+ return;
4279
+ }
4280
+ }
4281
+ };
4282
+ };
4283
+ // src/telephony/response.ts
4284
+ var normalizeWhitespace = (value) => value.replace(/\s+/g, " ").trim();
4285
+ var DEFAULT_MAX_WORDS = 12;
4286
+ var CLAUSE_BOUNDARY_PATTERN = /(?<=[,.;!?])\s+/u;
4287
+ var clampWords = (text, maxWords) => {
4288
+ if (!Number.isFinite(maxWords) || maxWords <= 0) {
4289
+ return text;
4290
+ }
4291
+ const words = text.split(/\s+/u).filter(Boolean);
4292
+ if (words.length <= maxWords) {
4293
+ return text;
4294
+ }
4295
+ return words.slice(0, maxWords).join(" ");
4296
+ };
4297
+ var clampChars = (text, maxChars) => {
4298
+ if (!Number.isFinite(maxChars) || !maxChars || maxChars <= 0) {
4299
+ return text;
4300
+ }
4301
+ if (text.length <= maxChars) {
4302
+ return text;
4303
+ }
4304
+ return text.slice(0, maxChars).trim();
4305
+ };
4306
+ var ensureTerminalPunctuation = (text) => {
4307
+ if (!text) {
4308
+ return text;
4309
+ }
4310
+ return /[.!?]$/u.test(text) ? text : `${text}.`;
4311
+ };
4312
+ var extractLeadClause = (text) => {
4313
+ const normalized = normalizeWhitespace(text);
4314
+ if (!normalized) {
4315
+ return normalized;
4316
+ }
4317
+ const colonIndex = normalized.indexOf(":");
4318
+ const body = colonIndex >= 0 && colonIndex < 24 && colonIndex < normalized.length - 1 ? normalizeWhitespace(normalized.slice(colonIndex + 1)) : normalized;
4319
+ const clauses = body.split(CLAUSE_BOUNDARY_PATTERN).filter(Boolean);
4320
+ return clauses[0] ?? body;
4321
+ };
4322
+ var shapeTelephonyAssistantText = (text, options = {}) => {
4323
+ const normalized = normalizeWhitespace(text);
4324
+ if (!normalized) {
4325
+ return normalized;
4326
+ }
4327
+ if ((options.mode ?? "lead-clause") === "full") {
4328
+ return clampChars(normalized, options.maxChars);
4329
+ }
4330
+ const lead = extractLeadClause(normalized);
4331
+ const limitedWords = clampWords(lead, options.maxWords ?? DEFAULT_MAX_WORDS);
4332
+ const limitedChars = clampChars(limitedWords, options.maxChars);
4333
+ return ensureTerminalPunctuation(normalizeWhitespace(limitedChars));
4334
+ };
1984
4335
  export {
4336
+ withVoiceOpsTaskId,
4337
+ withVoiceIntegrationEventId,
1985
4338
  voice,
4339
+ transcodeTwilioInboundPayloadToPCM16,
4340
+ transcodePCMToTwilioOutboundPayload,
4341
+ summarizeVoiceOpsTasks,
4342
+ startVoiceOpsTask,
4343
+ shapeTelephonyAssistantText,
4344
+ resolveVoiceSTTRoutingStrategy,
1986
4345
  resolveVoiceRuntimePreset,
1987
4346
  resolveTurnDetectionConfig,
1988
4347
  resolveAudioConditioningConfig,
4348
+ reopenVoiceOpsTask,
4349
+ renderVoiceCallReviewMarkdown,
4350
+ renderVoiceCallReviewHTML,
4351
+ recordVoiceRuntimeOps,
4352
+ listVoiceOpsTasks,
4353
+ encodeTwilioMulawBase64,
4354
+ decodeTwilioMulawBase64,
4355
+ createVoiceTaskUpdatedEvent,
4356
+ createVoiceTaskCreatedEvent,
1989
4357
  createVoiceSessionRecord,
1990
4358
  createVoiceSession,
4359
+ createVoiceSTTRoutingCorrectionHandler,
4360
+ createVoiceReviewSavedEvent,
1991
4361
  createVoiceMemoryStore,
4362
+ createVoiceIntegrationEvent,
4363
+ createVoiceFileTaskStore,
4364
+ createVoiceFileSessionStore,
4365
+ createVoiceFileRuntimeStorage,
4366
+ createVoiceFileReviewStore,
4367
+ createVoiceFileIntegrationEventStore,
4368
+ createVoiceCallReviewRecorder,
4369
+ createVoiceCallReviewFromSession,
4370
+ createVoiceCallReviewFromLiveTelephonyReport,
4371
+ createVoiceCallCompletedEvent,
4372
+ createTwilioVoiceResponse,
4373
+ createTwilioMediaStreamBridge,
4374
+ createStoredVoiceOpsTask,
4375
+ createStoredVoiceIntegrationEvent,
4376
+ createStoredVoiceCallReviewArtifact,
4377
+ createRiskyTurnCorrectionHandler,
1992
4378
  createPhraseHintCorrectionHandler,
1993
4379
  createId,
4380
+ createDomainPhraseHints,
4381
+ createDomainLexicon,
1994
4382
  conditionAudioChunk,
4383
+ completeVoiceOpsTask,
4384
+ buildVoiceOpsTaskFromReview,
4385
+ assignVoiceOpsTask,
4386
+ applyRiskTieredPhraseHintCorrections,
1995
4387
  applyPhraseHintCorrections,
1996
4388
  TURN_PROFILE_DEFAULTS
1997
4389
  };