@absolutejs/voice 0.0.19 → 0.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +387 -4
  2. package/dist/angular/index.d.ts +1 -0
  3. package/dist/angular/index.js +669 -3
  4. package/dist/angular/voice-controller.service.d.ts +21 -0
  5. package/dist/audioConditioning.d.ts +3 -0
  6. package/dist/client/actions.d.ts +7 -0
  7. package/dist/client/connection.d.ts +5 -0
  8. package/dist/client/controller.d.ts +2 -0
  9. package/dist/client/htmxBootstrap.js +576 -167
  10. package/dist/client/index.d.ts +1 -0
  11. package/dist/client/index.js +486 -3
  12. package/dist/client/microphone.d.ts +4 -2
  13. package/dist/correction.d.ts +16 -0
  14. package/dist/index.d.ts +4 -0
  15. package/dist/index.js +1314 -283
  16. package/dist/presets.d.ts +13 -0
  17. package/dist/react/index.d.ts +1 -0
  18. package/dist/react/index.js +642 -3
  19. package/dist/react/useVoiceController.d.ts +20 -0
  20. package/dist/react/useVoiceStream.d.ts +1 -0
  21. package/dist/store.d.ts +2 -2
  22. package/dist/svelte/index.d.ts +1 -0
  23. package/dist/svelte/index.js +607 -3
  24. package/dist/testing/benchmark.d.ts +36 -0
  25. package/dist/testing/fixtures.d.ts +1 -0
  26. package/dist/testing/index.d.ts +2 -0
  27. package/dist/testing/index.js +1975 -4
  28. package/dist/testing/resilience.d.ts +20 -0
  29. package/dist/testing/sessionBenchmark.d.ts +126 -0
  30. package/dist/testing/stt.d.ts +1 -0
  31. package/dist/turnDetection.d.ts +5 -1
  32. package/dist/turnProfiles.d.ts +6 -0
  33. package/dist/types.d.ts +198 -8
  34. package/dist/vue/index.d.ts +1 -0
  35. package/dist/vue/index.js +660 -3
  36. package/dist/vue/useVoiceController.d.ts +19 -0
  37. package/fixtures/README.md +24 -0
  38. package/fixtures/manifest.json +127 -0
  39. package/fixtures/pcm/dialogue-three-clean.pcm +0 -0
  40. package/fixtures/pcm/dialogue-three-mixed.pcm +0 -0
  41. package/fixtures/pcm/dialogue-two-clean.pcm +0 -0
  42. package/fixtures/pcm/dialogue-two-noisy.pcm +0 -0
  43. package/fixtures/pcm/multiturn-three-mixed.pcm +0 -0
  44. package/fixtures/pcm/multiturn-two-clean.pcm +0 -0
  45. package/fixtures/pcm/stella-bulgaria-bulgarian20.pcm +0 -0
  46. package/fixtures/pcm/stella-jamaica-jamaican-creole-english1.pcm +0 -0
  47. package/fixtures/pcm/stella-liberia-liberian-pidgin-english2.pcm +0 -0
  48. package/fixtures/pcm/stella-sierra-leone-krio5.pcm +0 -0
  49. package/package.json +25 -1
@@ -95,6 +95,61 @@ var measureAudioLevel = (audio) => {
95
95
  return Math.sqrt(sumSquares / samples.length);
96
96
  };
97
97
  var normalizeText = (value) => value.trim().replace(/\s+/g, " ");
98
+ var countWords = (value) => value.length > 0 ? value.split(" ").length : 0;
99
+ var selectPreferredTranscriptText = (currentText, nextText) => {
100
+ const current = normalizeText(currentText);
101
+ const next = normalizeText(nextText);
102
+ if (!current) {
103
+ return next;
104
+ }
105
+ if (!next) {
106
+ return current;
107
+ }
108
+ if (current === next || current.includes(next)) {
109
+ return current;
110
+ }
111
+ if (next.includes(current)) {
112
+ return next;
113
+ }
114
+ if (countWords(next) > countWords(current)) {
115
+ return next;
116
+ }
117
+ return current;
118
+ };
119
+ var mergeSequentialTranscriptText = (currentText, nextText) => {
120
+ const current = normalizeText(currentText);
121
+ const next = normalizeText(nextText);
122
+ if (!current) {
123
+ return next;
124
+ }
125
+ if (!next) {
126
+ return current;
127
+ }
128
+ const currentWords = current.split(" ");
129
+ const nextWords = next.split(" ");
130
+ const maxOverlap = Math.min(currentWords.length, nextWords.length);
131
+ for (let overlap = maxOverlap;overlap > 0; overlap -= 1) {
132
+ const currentSuffix = currentWords.slice(-overlap).join(" ");
133
+ const nextPrefix = nextWords.slice(0, overlap).join(" ");
134
+ if (currentSuffix === nextPrefix) {
135
+ return [...currentWords, ...nextWords.slice(overlap)].join(" ");
136
+ }
137
+ }
138
+ return `${current} ${next}`.trim();
139
+ };
140
+ var countCommonPrefixWords = (currentText, nextText) => {
141
+ const currentWords = normalizeText(currentText).split(" ").filter(Boolean);
142
+ const nextWords = normalizeText(nextText).split(" ").filter(Boolean);
143
+ const maxWords = Math.min(currentWords.length, nextWords.length);
144
+ let count = 0;
145
+ for (let index = 0;index < maxWords; index += 1) {
146
+ if (currentWords[index] !== nextWords[index]) {
147
+ break;
148
+ }
149
+ count += 1;
150
+ }
151
+ return count;
152
+ };
98
153
  var mergeTranscriptTexts = (transcripts) => {
99
154
  const merged = [];
100
155
  for (const transcript of transcripts) {
@@ -118,12 +173,14 @@ var mergeTranscriptTexts = (transcripts) => {
118
173
  }
119
174
  return merged.join(" ").trim();
120
175
  };
121
- var buildTurnText = (transcripts, partialText) => {
176
+ var buildTurnText = (transcripts, partialText, options = {}) => {
122
177
  const finalText = mergeTranscriptTexts(transcripts);
123
- if (finalText) {
124
- return finalText;
178
+ const nextPartial = normalizeText(partialText);
179
+ const lastFinalEndedAtMs = [...transcripts].reverse().find((transcript) => typeof transcript.endedAtMs === "number")?.endedAtMs;
180
+ if (finalText && nextPartial && typeof lastFinalEndedAtMs === "number" && typeof options.partialStartedAtMs === "number" && options.partialStartedAtMs - lastFinalEndedAtMs >= 250 && countCommonPrefixWords(finalText, nextPartial) === 0) {
181
+ return mergeSequentialTranscriptText(finalText, nextPartial);
125
182
  }
126
- return normalizeText(partialText);
183
+ return selectPreferredTranscriptText(finalText, nextPartial);
127
184
  };
128
185
 
129
186
  // src/testing/accuracy.ts
@@ -204,6 +261,7 @@ var runSTTAdapterFixture = async (adapter, fixture, options = {}) => {
204
261
  const settleMs = options.settleMs ?? 500;
205
262
  const waitForRealtimeMs = options.waitForRealtimeMs ?? 0;
206
263
  let lastActivityAt = Date.now();
264
+ let speechEndedAt = startedAt;
207
265
  const markActive = () => {
208
266
  lastActivityAt = Date.now();
209
267
  };
@@ -240,12 +298,15 @@ var runSTTAdapterFixture = async (adapter, fixture, options = {}) => {
240
298
  const realtimeDelayMs = waitForRealtimeMs > 0 ? waitForRealtimeMs : chunkDurationMs;
241
299
  for (const chunk of chunks) {
242
300
  await session.send(chunk);
301
+ markActive();
243
302
  await Bun.sleep(realtimeDelayMs);
244
303
  }
304
+ speechEndedAt = Date.now();
245
305
  if (tailPaddingMs > 0) {
246
306
  const tailBytes = Math.max(2, Math.floor(bytesPerMillisecond * tailPaddingMs));
247
307
  for (const chunk of chunkAudio(createSilence(tailBytes), bytesPerChunk)) {
248
308
  await session.send(chunk);
309
+ markActive();
249
310
  await Bun.sleep(realtimeDelayMs);
250
311
  }
251
312
  }
@@ -265,11 +326,30 @@ var runSTTAdapterFixture = async (adapter, fixture, options = {}) => {
265
326
  finalEvents,
266
327
  finalText,
267
328
  partialEvents,
329
+ speechEndedAt,
268
330
  startedAt
269
331
  };
270
332
  };
271
333
 
272
334
  // src/testing/benchmark.ts
335
+ var resolveFixtureEnvironment = (fixture) => {
336
+ const tags = new Set(fixture.tags ?? []);
337
+ const hasAccent = tags.has("accent") || tags.has("speech-accent-archive");
338
+ const hasNoisy = tags.has("noisy") || tags.has("synthetic-noise") || tags.has("stress");
339
+ if (hasAccent && hasNoisy) {
340
+ return "accent-noisy";
341
+ }
342
+ if (hasAccent) {
343
+ return "accent";
344
+ }
345
+ if (hasNoisy) {
346
+ return "noisy";
347
+ }
348
+ if (tags.has("clean")) {
349
+ return "clean";
350
+ }
351
+ return "other";
352
+ };
273
353
  var normalizeBenchmarkText = (value) => value.toLowerCase().replace(/[^\p{L}\p{N}\s']/gu, " ").replace(/\s+/g, " ").trim();
274
354
  var scoreExpectedTerms = (actualText, expectedTerms) => {
275
355
  const normalizedActual = normalizeBenchmarkText(actualText);
@@ -300,10 +380,46 @@ var roundMetric = (value, digits = 4) => {
300
380
  const factor = 10 ** digits;
301
381
  return Math.round(value * factor) / factor;
302
382
  };
383
+ var calculateGroupSummary = (fixtures) => {
384
+ const grouped = new Map;
385
+ for (const fixture of fixtures) {
386
+ const existing = grouped.get(fixture.group) ?? [];
387
+ existing.push(fixture);
388
+ grouped.set(fixture.group, existing);
389
+ }
390
+ return Array.from(grouped.entries()).map(([group, results]) => {
391
+ const fixtureCount = results.length;
392
+ const passCount = results.filter((fixture) => fixture.passes).length;
393
+ const averageWordErrorRate = average(results.map((result) => result.accuracy.wordErrorRate)) ?? 0;
394
+ const averageTermRecall = average(results.map((result) => result.expectedTerms.recall)) ?? 0;
395
+ const averageElapsedMs = average(results.map((result) => result.elapsedMs));
396
+ const accuracy = 1 - averageWordErrorRate;
397
+ return {
398
+ averageElapsedMs: roundMetric(averageElapsedMs, 2) ?? 0,
399
+ averageTermRecall: roundMetric(averageTermRecall) ?? 0,
400
+ averageWordErrorRate: roundMetric(averageWordErrorRate) ?? 0,
401
+ fixturesWithErrors: results.filter((fixture) => fixture.errorCount > 0).length,
402
+ fixturesWithFragments: results.filter((fixture) => fixture.fragmentationCount > 0).length,
403
+ fixtureCount,
404
+ group,
405
+ passCount,
406
+ passRate: fixtureCount > 0 ? roundMetric(passCount / fixtureCount) ?? 0 : 0,
407
+ wordAccuracyRate: roundMetric(accuracy) ?? 0
408
+ };
409
+ }).sort((a, b) => a.group.localeCompare(b.group));
410
+ };
303
411
  var toFixtureBenchmarkResult = (fixture, result, elapsedMs) => {
412
+ const toPostSpeechLatency = (timestamp) => {
413
+ if (typeof timestamp !== "number") {
414
+ return;
415
+ }
416
+ return Math.max(0, timestamp - result.speechEndedAt);
417
+ };
304
418
  const timeToFirstPartialMs = result.partialEvents[0] ? result.partialEvents[0].receivedAt - result.startedAt : undefined;
305
419
  const timeToFirstFinalMs = result.finalEvents[0] ? result.finalEvents[0].receivedAt - result.startedAt : undefined;
306
420
  const timeToEndOfTurnMs = result.endOfTurnEvents[0] ? result.endOfTurnEvents[0].receivedAt - result.startedAt : undefined;
421
+ const postSpeechTimeToFirstFinalMs = toPostSpeechLatency(result.finalEvents[0]?.receivedAt);
422
+ const postSpeechTimeToEndOfTurnMs = toPostSpeechLatency(result.endOfTurnEvents[0]?.receivedAt);
307
423
  const expectedTerms = scoreExpectedTerms(result.finalText, fixture.expectedTerms);
308
424
  return {
309
425
  accuracy: result.accuracy,
@@ -317,8 +433,11 @@ var toFixtureBenchmarkResult = (fixture, result, elapsedMs) => {
317
433
  finalText: result.finalText,
318
434
  fixtureId: fixture.id,
319
435
  fragmentationCount: Math.max(0, result.finalEvents.length - 1),
436
+ group: resolveFixtureEnvironment(fixture),
320
437
  passes: result.errorEvents.length === 0 && result.finalText.trim().length > 0 && result.accuracy.passesThreshold,
321
438
  partialCount: result.partialEvents.length,
439
+ postSpeechTimeToEndOfTurnMs,
440
+ postSpeechTimeToFirstFinalMs,
322
441
  tags: fixture.tags ?? [],
323
442
  timeToEndOfTurnMs,
324
443
  timeToFirstFinalMs,
@@ -336,6 +455,8 @@ var summarizeSTTBenchmark = (adapterId, fixtures) => {
336
455
  averageEndOfTurnCount: roundMetric(average(fixtures.map((fixture) => fixture.endOfTurnCount)), 2) ?? 0,
337
456
  averageFinalCount: roundMetric(average(fixtures.map((fixture) => fixture.finalCount)), 2) ?? 0,
338
457
  averageTermRecall: roundMetric(average(fixtures.map((fixture) => fixture.expectedTerms.recall))) ?? 0,
458
+ averagePostSpeechTimeToEndOfTurnMs: roundMetric(average(fixtures.map((fixture) => fixture.postSpeechTimeToEndOfTurnMs)), 2),
459
+ averagePostSpeechTimeToFirstFinalMs: roundMetric(average(fixtures.map((fixture) => fixture.postSpeechTimeToFirstFinalMs)), 2),
339
460
  averageTimeToEndOfTurnMs: roundMetric(average(fixtures.map((fixture) => fixture.timeToEndOfTurnMs)), 2),
340
461
  averageTimeToFirstFinalMs: roundMetric(average(fixtures.map((fixture) => fixture.timeToFirstFinalMs)), 2),
341
462
  averageTimeToFirstPartialMs: roundMetric(average(fixtures.map((fixture) => fixture.timeToFirstPartialMs)), 2),
@@ -343,12 +464,51 @@ var summarizeSTTBenchmark = (adapterId, fixtures) => {
343
464
  fixtureCount,
344
465
  fixturesWithErrors: fixtures.filter((fixture) => fixture.errorCount > 0).length,
345
466
  fixturesWithFragmentation: fixtures.filter((fixture) => fixture.fragmentationCount > 0).length,
467
+ groupSummaries: calculateGroupSummary(fixtures),
346
468
  passCount,
347
469
  passRate: fixtureCount > 0 ? roundMetric(passCount / fixtureCount) ?? 0 : 0,
348
470
  totalErrorCount: fixtures.reduce((sum, fixture) => sum + fixture.errorCount, 0),
349
471
  wordAccuracyRate: fixtureCount > 0 ? roundMetric(1 - (average(fixtures.map((fixture) => fixture.accuracy.wordErrorRate)) ?? 0)) ?? 0 : 0
350
472
  };
351
473
  };
474
+ var evaluateSTTBenchmarkAcceptance = (report, thresholds = {}) => {
475
+ const failures = [];
476
+ const details = thresholds;
477
+ const overallPassRate = details.overallPassRate;
478
+ if (overallPassRate !== undefined && report.summary.passRate < overallPassRate) {
479
+ failures.push(`overall passRate ${(report.summary.passRate * 100).toFixed(2)}% below ${(overallPassRate * 100).toFixed(2)}%`);
480
+ }
481
+ const minTermRecall = details.termRecall;
482
+ if (minTermRecall !== undefined && report.summary.averageTermRecall < minTermRecall) {
483
+ failures.push(`overall term recall ${report.summary.averageTermRecall.toFixed(4)} below ${minTermRecall.toFixed(4)}`);
484
+ }
485
+ const minWordAccuracy = details.wordAccuracyRate;
486
+ if (minWordAccuracy !== undefined && report.summary.wordAccuracyRate < minWordAccuracy) {
487
+ failures.push(`overall word accuracy ${(report.summary.wordAccuracyRate * 100).toFixed(2)}% below ${(minWordAccuracy * 100).toFixed(2)}%`);
488
+ }
489
+ const groupThresholds = details.groupPassRate;
490
+ if (groupThresholds) {
491
+ for (const groupSummary of report.summary.groupSummaries) {
492
+ const threshold = groupThresholds[groupSummary.group];
493
+ if (!threshold) {
494
+ continue;
495
+ }
496
+ if (threshold.passRate !== undefined && groupSummary.passRate < threshold.passRate) {
497
+ failures.push(`${groupSummary.group} passRate ${(groupSummary.passRate * 100).toFixed(2)}% below ${(threshold.passRate * 100).toFixed(2)}%`);
498
+ }
499
+ if (threshold.wordAccuracyRate !== undefined && groupSummary.wordAccuracyRate < threshold.wordAccuracyRate) {
500
+ failures.push(`${groupSummary.group} wordAccuracy ${(groupSummary.wordAccuracyRate * 100).toFixed(2)}% below ${(threshold.wordAccuracyRate * 100).toFixed(2)}%`);
501
+ }
502
+ }
503
+ }
504
+ const score = roundMetric(report.summary.passRate * 0.45 + report.summary.wordAccuracyRate * 0.35 + report.summary.averageTermRecall * 0.2, 3) ?? 0;
505
+ return {
506
+ adapterId: report.adapterId,
507
+ failures,
508
+ passed: failures.length === 0,
509
+ score
510
+ };
511
+ };
352
512
  var compareSTTBenchmarks = (reports) => {
353
513
  const entries = reports.map((report) => ({
354
514
  adapterId: report.adapterId,
@@ -434,13 +594,1824 @@ var loadVoiceTestFixtures = async (fixtureDirectory) => {
434
594
  };
435
595
  }));
436
596
  };
597
+ // src/store.ts
598
+ var createId = () => crypto.randomUUID();
599
+ var createVoiceSessionRecord = (id, scenarioId) => ({
600
+ committedTurnIds: [],
601
+ createdAt: Date.now(),
602
+ currentTurn: {
603
+ finalText: "",
604
+ lastSpeechAt: undefined,
605
+ lastTranscriptAt: undefined,
606
+ partialEndedAt: undefined,
607
+ partialStartedAt: undefined,
608
+ partialText: "",
609
+ silenceStartedAt: undefined,
610
+ transcripts: []
611
+ },
612
+ id,
613
+ scenarioId,
614
+ reconnect: { attempts: 0 },
615
+ status: "active",
616
+ transcripts: [],
617
+ turns: [],
618
+ lastCommittedTurn: {
619
+ committedAt: 0,
620
+ signature: "",
621
+ text: "",
622
+ transcriptIds: []
623
+ }
624
+ });
625
+ var resetVoiceSessionRecord = (id, existing, scenarioId) => ({
626
+ ...createVoiceSessionRecord(id, scenarioId),
627
+ metadata: existing?.metadata
628
+ });
629
+ var toVoiceSessionSummary = (session) => ({
630
+ createdAt: session.createdAt,
631
+ id: session.id,
632
+ lastActivityAt: session.lastActivityAt,
633
+ status: session.status,
634
+ turnCount: session.turns.length
635
+ });
636
+
637
+ // src/memoryStore.ts
638
+ var createVoiceMemoryStore = () => {
639
+ const sessions = new Map;
640
+ const get = async (id) => sessions.get(id);
641
+ const getOrCreate = async (id) => {
642
+ let session = sessions.get(id);
643
+ if (!session) {
644
+ session = createVoiceSessionRecord(id);
645
+ sessions.set(id, session);
646
+ }
647
+ return session;
648
+ };
649
+ const set = async (id, value) => {
650
+ sessions.set(id, value);
651
+ };
652
+ const list = async () => Array.from(sessions.values()).map((session) => toVoiceSessionSummary(session)).sort((first, second) => (second.lastActivityAt ?? second.createdAt) - (first.lastActivityAt ?? first.createdAt));
653
+ const remove = async (id) => {
654
+ sessions.delete(id);
655
+ };
656
+ return { get, getOrCreate, list, remove, set };
657
+ };
658
+
659
+ // src/audioConditioning.ts
660
+ var DEFAULT_TARGET_LEVEL = 0.08;
661
+ var DEFAULT_MAX_GAIN = 3;
662
+ var DEFAULT_NOISE_GATE_THRESHOLD = 0.006;
663
+ var DEFAULT_NOISE_GATE_ATTENUATION = 0.15;
664
+ var toInt16Array = (audio) => {
665
+ if (audio instanceof ArrayBuffer) {
666
+ return new Int16Array(audio, 0, Math.floor(audio.byteLength / 2));
667
+ }
668
+ return new Int16Array(audio.buffer, audio.byteOffset, Math.floor(audio.byteLength / 2));
669
+ };
670
+ var computeRms = (samples) => {
671
+ if (samples.length === 0) {
672
+ return 0;
673
+ }
674
+ let sumSquares = 0;
675
+ for (const sample of samples) {
676
+ const normalized = sample / 32768;
677
+ sumSquares += normalized * normalized;
678
+ }
679
+ return Math.sqrt(sumSquares / samples.length);
680
+ };
681
+ var resolveAudioConditioningConfig = (config) => {
682
+ if (!config || config.enabled === false) {
683
+ return;
684
+ }
685
+ return {
686
+ enabled: true,
687
+ maxGain: config.maxGain ?? DEFAULT_MAX_GAIN,
688
+ noiseGateAttenuation: config.noiseGateAttenuation ?? DEFAULT_NOISE_GATE_ATTENUATION,
689
+ noiseGateThreshold: config.noiseGateThreshold ?? DEFAULT_NOISE_GATE_THRESHOLD,
690
+ targetLevel: config.targetLevel ?? DEFAULT_TARGET_LEVEL
691
+ };
692
+ };
693
+ var conditionAudioChunk = (audio, config) => {
694
+ if (!config) {
695
+ return audio;
696
+ }
697
+ const source = toInt16Array(audio);
698
+ if (source.length === 0) {
699
+ return audio;
700
+ }
701
+ const rms = computeRms(source);
702
+ const output = new Int16Array(source.length);
703
+ const gateFactor = rms < config.noiseGateThreshold ? config.noiseGateAttenuation : 1;
704
+ const baseLevel = Math.max(rms * gateFactor, 0.000001);
705
+ const gain = Math.min(config.maxGain, config.targetLevel / baseLevel);
706
+ const appliedGain = Math.max(0.25, gain) * gateFactor;
707
+ for (let index = 0;index < source.length; index += 1) {
708
+ const next = Math.round(source[index] * appliedGain);
709
+ output[index] = Math.max(-32768, Math.min(32767, next));
710
+ }
711
+ return new Uint8Array(output.buffer);
712
+ };
713
+
714
+ // src/logger.ts
715
+ var noop = () => {};
716
+ var createNoopLogger = () => ({
717
+ debug: noop,
718
+ error: noop,
719
+ info: noop,
720
+ warn: noop
721
+ });
722
+ var resolveLogger = (logger) => ({
723
+ ...createNoopLogger(),
724
+ ...logger
725
+ });
726
+
727
+ // src/session.ts
728
+ var DEFAULT_RECONNECT_TIMEOUT = 30000;
729
+ var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
730
+ var DEFAULT_TRANSCRIPT_STABILITY_MS = 450;
731
+ var DEFAULT_FALLBACK_REPLAY_MS = 8000;
732
+ var DEFAULT_FALLBACK_SETTLE_MS = 220;
733
+ var DEFAULT_FALLBACK_COMPLETION_TIMEOUT_MS = 2500;
734
+ var DEFAULT_FALLBACK_CONFIDENCE_THRESHOLD = 0.6;
735
+ var DEFAULT_FALLBACK_MIN_TEXT_LENGTH = 2;
736
+ var DEFAULT_FALLBACK_MAX_ATTEMPTS_PER_TURN = 1;
737
+ var DEFAULT_DUPLICATE_TURN_WINDOW_MS = 5000;
738
+ var FALLBACK_CONFIDENCE_SELECTION_DELTA = 0.05;
739
+ var FALLBACK_WORD_COUNT_SELECTION_MARGIN_RATIO = 0.12;
740
+ var DEFAULT_FORMAT = {
741
+ channels: 1,
742
+ container: "raw",
743
+ encoding: "pcm_s16le",
744
+ sampleRateHz: 16000
745
+ };
746
+ var toError = (value) => value instanceof Error ? value : new Error(String(value));
747
+ var createEmptyCurrentTurn = () => ({
748
+ finalText: "",
749
+ lastSpeechAt: undefined,
750
+ lastTranscriptAt: undefined,
751
+ partialEndedAt: undefined,
752
+ partialStartedAt: undefined,
753
+ partialText: "",
754
+ silenceStartedAt: undefined,
755
+ transcripts: []
756
+ });
757
+ var cloneTranscript = (transcript) => ({ ...transcript });
758
+ var countWords2 = (text) => text.trim().split(/\s+/).filter(Boolean).length;
759
+ var normalizeText2 = (text) => text.trim().replace(/\s+/g, " ");
760
+ var getAudioChunkDurationMs = (chunk) => chunk.byteLength / (DEFAULT_FORMAT.sampleRateHz * DEFAULT_FORMAT.channels * 2) * 1000;
761
+ var getBufferedAudioDurationMs = (chunks) => chunks.reduce((total, chunk) => total + getAudioChunkDurationMs(chunk), 0);
762
+ var calculateMeanConfidence = (transcripts) => {
763
+ let sum = 0;
764
+ let total = 0;
765
+ for (const transcript of transcripts) {
766
+ if (typeof transcript.confidence === "number") {
767
+ sum += transcript.confidence;
768
+ total += 1;
769
+ }
770
+ }
771
+ if (total === 0) {
772
+ return 0;
773
+ }
774
+ return sum / total;
775
+ };
776
+ var createTurnQuality = (transcripts, source, fallbackUsed, fallbackDiagnostics, correctionDiagnostics) => {
777
+ const sampledTranscripts = transcripts.filter((transcript) => typeof transcript.confidence === "number");
778
+ const confidenceSampleCount = sampledTranscripts.length;
779
+ return {
780
+ averageConfidence: confidenceSampleCount > 0 ? sampledTranscripts.reduce((sum, transcript) => sum + transcript.confidence, 0) / confidenceSampleCount : undefined,
781
+ confidenceSampleCount,
782
+ correction: correctionDiagnostics,
783
+ fallback: fallbackDiagnostics,
784
+ fallbackUsed,
785
+ finalTranscriptCount: transcripts.filter((transcript) => transcript.isFinal).length,
786
+ partialTranscriptCount: transcripts.filter((transcript) => !transcript.isFinal).length,
787
+ selectedTranscriptCount: transcripts.length,
788
+ source
789
+ };
790
+ };
791
+ var normalizeCorrectionText = (text) => normalizeText2(text);
792
+ var isFallbackNeeded = (candidate, config) => {
793
+ const trimmed = normalizeText2(candidate.text);
794
+ const wordCount = countWords2(trimmed);
795
+ if (config.trigger === "always") {
796
+ return true;
797
+ }
798
+ if (config.trigger === "empty-turn") {
799
+ return wordCount < config.minTextLength;
800
+ }
801
+ const averageConfidence = calculateMeanConfidence(candidate.transcripts);
802
+ if (config.trigger === "low-confidence") {
803
+ return averageConfidence > 0 && averageConfidence < config.confidenceThreshold;
804
+ }
805
+ return averageConfidence > 0 && averageConfidence < config.confidenceThreshold || wordCount < config.minTextLength;
806
+ };
807
+ var selectBetterTurnText = (candidate, fallback) => {
808
+ if (!fallback.text) {
809
+ return {
810
+ reason: "fallback-empty",
811
+ winner: candidate
812
+ };
813
+ }
814
+ if (!candidate.text) {
815
+ return {
816
+ reason: "primary-empty",
817
+ winner: fallback
818
+ };
819
+ }
820
+ const largestWordCount = Math.max(candidate.wordCount, fallback.wordCount, 1);
821
+ const wordCountDelta = fallback.wordCount - candidate.wordCount;
822
+ const wordCountDeltaRatio = Math.abs(wordCountDelta) / largestWordCount;
823
+ if (wordCountDeltaRatio >= FALLBACK_WORD_COUNT_SELECTION_MARGIN_RATIO && wordCountDelta !== 0) {
824
+ return {
825
+ reason: "word-count-margin",
826
+ winner: wordCountDelta > 0 ? fallback : candidate
827
+ };
828
+ }
829
+ if (fallback.confidence > candidate.confidence + FALLBACK_CONFIDENCE_SELECTION_DELTA) {
830
+ return {
831
+ reason: "confidence-margin",
832
+ winner: fallback
833
+ };
834
+ }
835
+ if (candidate.confidence > fallback.confidence + FALLBACK_CONFIDENCE_SELECTION_DELTA) {
836
+ return {
837
+ reason: "kept-primary",
838
+ winner: candidate
839
+ };
840
+ }
841
+ if (fallback.wordCount > candidate.wordCount) {
842
+ return {
843
+ reason: "word-count-tiebreak",
844
+ winner: fallback
845
+ };
846
+ }
847
+ return {
848
+ reason: "kept-primary",
849
+ winner: candidate
850
+ };
851
+ };
852
+ var setTurnResult = (session, turnId, input) => {
853
+ session.turns = session.turns.map((turn) => turn.id === turnId ? {
854
+ ...turn,
855
+ assistantText: input.assistantText ?? turn.assistantText,
856
+ result: input.result ?? turn.result
857
+ } : turn);
858
+ };
859
+ var createVoiceSession = (options) => {
860
+ const logger = resolveLogger(options.logger);
861
+ const reconnect = {
862
+ maxAttempts: options.reconnect.maxAttempts ?? DEFAULT_MAX_RECONNECT_ATTEMPTS,
863
+ strategy: options.reconnect.strategy ?? "resume-last-turn",
864
+ timeout: options.reconnect.timeout ?? DEFAULT_RECONNECT_TIMEOUT
865
+ };
866
+ const turnDetection = {
867
+ silenceMs: options.turnDetection.silenceMs ?? DEFAULT_SILENCE_MS,
868
+ speechThreshold: options.turnDetection.speechThreshold ?? DEFAULT_SPEECH_THRESHOLD,
869
+ transcriptStabilityMs: options.turnDetection.transcriptStabilityMs ?? DEFAULT_TRANSCRIPT_STABILITY_MS
870
+ };
871
+ const sttFallback = options.sttFallback ? {
872
+ adapter: options.sttFallback.adapter,
873
+ completionTimeoutMs: options.sttFallback.completionTimeoutMs ?? DEFAULT_FALLBACK_COMPLETION_TIMEOUT_MS,
874
+ confidenceThreshold: options.sttFallback.confidenceThreshold ?? DEFAULT_FALLBACK_CONFIDENCE_THRESHOLD,
875
+ maxAttemptsPerTurn: options.sttFallback.maxAttemptsPerTurn ?? DEFAULT_FALLBACK_MAX_ATTEMPTS_PER_TURN,
876
+ minTextLength: options.sttFallback.minTextLength ?? DEFAULT_FALLBACK_MIN_TEXT_LENGTH,
877
+ replayWindowMs: options.sttFallback.replayWindowMs ?? DEFAULT_FALLBACK_REPLAY_MS,
878
+ settleMs: options.sttFallback.settleMs ?? DEFAULT_FALLBACK_SETTLE_MS,
879
+ trigger: options.sttFallback.trigger ?? "empty-or-low-confidence"
880
+ } : undefined;
881
+ const phraseHints = options.phraseHints ?? [];
882
+ let socket = options.socket;
883
+ let sttSession = null;
884
+ let silenceTimer = null;
885
+ let speechDetected = false;
886
+ let operationQueue = Promise.resolve();
887
+ let adapterGenerationCounter = 0;
888
+ let activeAdapterGeneration = 0;
889
+ const currentTurnAudio = [];
890
+ let fallbackAttemptsForCurrentTurn = 0;
891
+ const pruneTurnAudio = () => {
892
+ const replayWindowMs = sttFallback?.replayWindowMs ?? DEFAULT_FALLBACK_REPLAY_MS;
893
+ const cutoffAt = Date.now() - replayWindowMs;
894
+ let index = 0;
895
+ while (index < currentTurnAudio.length && currentTurnAudio[index].recordedAt < cutoffAt) {
896
+ index += 1;
897
+ }
898
+ if (index > 0) {
899
+ currentTurnAudio.splice(0, index);
900
+ }
901
+ };
902
+ const pushTurnAudio = (audio) => {
903
+ const chunk = audio instanceof ArrayBuffer ? new Uint8Array(audio.slice(0)) : new Uint8Array(audio.buffer.slice(audio.byteOffset, audio.byteOffset + audio.byteLength));
904
+ currentTurnAudio.push({
905
+ chunk,
906
+ recordedAt: Date.now()
907
+ });
908
+ pruneTurnAudio();
909
+ };
910
+ const getFallbackWindowAudio = () => {
911
+ if (!sttFallback?.adapter) {
912
+ return [];
913
+ }
914
+ pruneTurnAudio();
915
+ return currentTurnAudio.map((audio) => audio.chunk);
916
+ };
917
+ const clearSilenceTimer = () => {
918
+ if (!silenceTimer) {
919
+ return;
920
+ }
921
+ clearTimeout(silenceTimer);
922
+ silenceTimer = null;
923
+ };
924
+ const send = async (message) => {
925
+ try {
926
+ await Promise.resolve(socket.send(JSON.stringify(message)));
927
+ } catch (error) {
928
+ logger.warn("voice socket send failed", {
929
+ error: toError(error).message,
930
+ sessionId: options.id,
931
+ type: message.type
932
+ });
933
+ }
934
+ };
935
+ const readSession = async () => options.store.getOrCreate(options.id);
936
+ const writeSession = async (mutate) => {
937
+ const session = await options.store.getOrCreate(options.id);
938
+ mutate(session);
939
+ await options.store.set(options.id, session);
940
+ return session;
941
+ };
942
+ const runSerial = (phase, operation) => {
943
+ const result = operationQueue.then(async () => {
944
+ logger.debug("voice session operation", {
945
+ phase,
946
+ sessionId: options.id
947
+ });
948
+ return await operation();
949
+ });
950
+ operationQueue = result.then(() => {
951
+ return;
952
+ }, () => {
953
+ return;
954
+ });
955
+ return result;
956
+ };
957
+ const closeAdapter = async (reason) => {
958
+ if (!sttSession) {
959
+ return;
960
+ }
961
+ const activeSession = sttSession;
962
+ sttSession = null;
963
+ activeAdapterGeneration = 0;
964
+ try {
965
+ await activeSession.close(reason);
966
+ } catch (error) {
967
+ logger.warn("voice stt close failed", {
968
+ error: toError(error).message,
969
+ sessionId: options.id
970
+ });
971
+ }
972
+ };
973
+ const scheduleTurnCommit = (delayMs, reason, reset = true) => {
974
+ if (!reset && silenceTimer) {
975
+ return;
976
+ }
977
+ if (reset) {
978
+ clearSilenceTimer();
979
+ }
980
+ silenceTimer = setTimeout(() => {
981
+ silenceTimer = null;
982
+ api.commitTurn(reason);
983
+ }, delayMs);
984
+ };
985
+ const scheduleSilenceCommit = (delayMs = turnDetection.silenceMs, reset = true) => scheduleTurnCommit(delayMs, "silence", reset);
986
+ const requestTurnCommit = async (reason) => {
987
+ const session = await readSession();
988
+ const text = buildTurnText(session.currentTurn.transcripts, session.currentTurn.partialText, {
989
+ partialEndedAtMs: session.currentTurn.partialEndedAt,
990
+ partialStartedAtMs: session.currentTurn.partialStartedAt
991
+ });
992
+ if (!text) {
993
+ return;
994
+ }
995
+ const transcriptStabilityAge = session.currentTurn.lastTranscriptAt !== undefined ? Date.now() - session.currentTurn.lastTranscriptAt : undefined;
996
+ if (reason !== "manual" && typeof transcriptStabilityAge === "number" && transcriptStabilityAge < turnDetection.transcriptStabilityMs) {
997
+ scheduleTurnCommit(turnDetection.transcriptStabilityMs - transcriptStabilityAge, reason);
998
+ return;
999
+ }
1000
+ await commitTurnInternal(reason);
1001
+ };
1002
+ const failInternal = async (error) => {
1003
+ clearSilenceTimer();
1004
+ const session = await writeSession((currentSession) => {
1005
+ currentSession.lastActivityAt = Date.now();
1006
+ currentSession.status = "failed";
1007
+ });
1008
+ const resolvedError = toError(error);
1009
+ await send({
1010
+ message: resolvedError.message,
1011
+ recoverable: false,
1012
+ type: "error"
1013
+ });
1014
+ await closeAdapter("failed");
1015
+ speechDetected = false;
1016
+ rewindFallbackTurnAudio();
1017
+ await options.route.onError?.({
1018
+ api,
1019
+ context: options.context,
1020
+ error: resolvedError,
1021
+ session,
1022
+ sessionId: options.id
1023
+ });
1024
+ };
1025
+ const completeInternal = async (result) => {
1026
+ clearSilenceTimer();
1027
+ const session = await writeSession((currentSession) => {
1028
+ if (currentSession.status === "completed") {
1029
+ return;
1030
+ }
1031
+ currentSession.lastActivityAt = Date.now();
1032
+ currentSession.status = "completed";
1033
+ if (result !== undefined && currentSession.turns.length > 0) {
1034
+ const lastTurn = currentSession.turns.at(-1);
1035
+ if (lastTurn) {
1036
+ setTurnResult(currentSession, lastTurn.id, {
1037
+ result
1038
+ });
1039
+ }
1040
+ }
1041
+ });
1042
+ await send({
1043
+ sessionId: options.id,
1044
+ type: "complete"
1045
+ });
1046
+ await closeAdapter("complete");
1047
+ speechDetected = false;
1048
+ rewindFallbackTurnAudio();
1049
+ await options.route.onComplete({
1050
+ api,
1051
+ context: options.context,
1052
+ session
1053
+ });
1054
+ };
1055
+ const handleError = async (event) => {
1056
+ await send({
1057
+ message: event.error.message,
1058
+ recoverable: event.recoverable,
1059
+ type: "error"
1060
+ });
1061
+ if (!event.recoverable) {
1062
+ await failInternal(event.error);
1063
+ }
1064
+ };
1065
+ const handleClose = async (event) => {
1066
+ if (event.recoverable === false) {
1067
+ await failInternal(new Error(event.reason ?? "Speech-to-text session closed"));
1068
+ return;
1069
+ }
1070
+ if (!event.reason) {
1071
+ await closeAdapter("provider stream closed");
1072
+ return;
1073
+ }
1074
+ await closeAdapter(event.reason);
1075
+ };
1076
+ const rewindFallbackTurnAudio = () => {
1077
+ fallbackAttemptsForCurrentTurn = 0;
1078
+ currentTurnAudio.length = 0;
1079
+ };
1080
+ const runFallbackTranscription = async (primaryText, primaryTranscripts) => {
1081
+ if (!sttFallback?.adapter || fallbackAttemptsForCurrentTurn >= sttFallback.maxAttemptsPerTurn) {
1082
+ return null;
1083
+ }
1084
+ const candidate = {
1085
+ text: primaryText,
1086
+ transcripts: primaryTranscripts
1087
+ };
1088
+ if (!isFallbackNeeded(candidate, sttFallback)) {
1089
+ return null;
1090
+ }
1091
+ fallbackAttemptsForCurrentTurn += 1;
1092
+ const replayAudio = getFallbackWindowAudio();
1093
+ if (replayAudio.length === 0) {
1094
+ return null;
1095
+ }
1096
+ let fallbackSession = null;
1097
+ const fallbackTranscripts = [];
1098
+ let fallbackClosed = false;
1099
+ let fallbackEndOfTurnReceived = false;
1100
+ let fallbackFinalReceived = false;
1101
+ let lastFallbackTranscriptAt = 0;
1102
+ try {
1103
+ fallbackSession = await sttFallback.adapter.open({
1104
+ format: DEFAULT_FORMAT,
1105
+ phraseHints,
1106
+ sessionId: `${options.id}:fallback:${fallbackAttemptsForCurrentTurn}`
1107
+ });
1108
+ } catch (error) {
1109
+ logger.warn("voice stt fallback open failed", {
1110
+ error: toError(error).message,
1111
+ sessionId: options.id
1112
+ });
1113
+ return null;
1114
+ }
1115
+ const unsubscribers = [
1116
+ fallbackSession.on("final", ({ transcript }) => {
1117
+ fallbackFinalReceived = true;
1118
+ lastFallbackTranscriptAt = Date.now();
1119
+ fallbackTranscripts.push(cloneTranscript(transcript));
1120
+ }),
1121
+ fallbackSession.on("partial", ({ transcript }) => {
1122
+ lastFallbackTranscriptAt = Date.now();
1123
+ fallbackTranscripts.push(cloneTranscript(transcript));
1124
+ }),
1125
+ fallbackSession.on("endOfTurn", () => {
1126
+ fallbackEndOfTurnReceived = true;
1127
+ }),
1128
+ fallbackSession.on("error", (event) => {
1129
+ logger.warn("voice stt fallback error", {
1130
+ error: toError(event.error).message,
1131
+ sessionId: options.id
1132
+ });
1133
+ }),
1134
+ fallbackSession.on("close", () => {
1135
+ fallbackClosed = true;
1136
+ })
1137
+ ];
1138
+ const closeFallback = async (reason) => {
1139
+ if (!fallbackSession) {
1140
+ return;
1141
+ }
1142
+ try {
1143
+ await fallbackSession.close(reason);
1144
+ } catch (error) {
1145
+ logger.warn("voice stt fallback close failed", {
1146
+ error: toError(error).message,
1147
+ sessionId: options.id
1148
+ });
1149
+ } finally {
1150
+ fallbackSession = null;
1151
+ }
1152
+ };
1153
+ try {
1154
+ for (const chunk of replayAudio) {
1155
+ await fallbackSession.send(chunk);
1156
+ }
1157
+ const replayDurationMs = getBufferedAudioDurationMs(replayAudio);
1158
+ const completionTimeoutMs = Math.max(sttFallback.completionTimeoutMs, Math.min(4000, Math.max(sttFallback.settleMs * 4, Math.round(replayDurationMs * 0.18))));
1159
+ const waitStartedAt = Date.now();
1160
+ while (Date.now() - waitStartedAt < completionTimeoutMs) {
1161
+ const idleMs = lastFallbackTranscriptAt > 0 ? Date.now() - lastFallbackTranscriptAt : Date.now() - waitStartedAt;
1162
+ if (fallbackEndOfTurnReceived && idleMs >= sttFallback.settleMs) {
1163
+ break;
1164
+ }
1165
+ if (fallbackFinalReceived && idleMs >= sttFallback.settleMs) {
1166
+ break;
1167
+ }
1168
+ if (fallbackClosed && (lastFallbackTranscriptAt === 0 || idleMs >= sttFallback.settleMs)) {
1169
+ break;
1170
+ }
1171
+ await Bun.sleep(Math.min(75, Math.max(25, sttFallback.settleMs / 2)));
1172
+ }
1173
+ } catch (error) {
1174
+ logger.warn("voice stt fallback failed", {
1175
+ error: toError(error).message,
1176
+ sessionId: options.id
1177
+ });
1178
+ } finally {
1179
+ await closeFallback("fallback-complete");
1180
+ for (const unsubscribe of unsubscribers) {
1181
+ unsubscribe();
1182
+ }
1183
+ }
1184
+ if (fallbackTranscripts.length === 0) {
1185
+ return null;
1186
+ }
1187
+ const fallbackText = buildTurnText(fallbackTranscripts, "", {});
1188
+ const fallbackConfidence = calculateMeanConfidence(fallbackTranscripts);
1189
+ const fallbackCandidate = {
1190
+ confidence: fallbackConfidence,
1191
+ text: fallbackText,
1192
+ wordCount: countWords2(normalizeText2(fallbackText))
1193
+ };
1194
+ const primaryCandidate = {
1195
+ confidence: calculateMeanConfidence(primaryTranscripts),
1196
+ text: primaryText,
1197
+ wordCount: countWords2(normalizeText2(primaryText))
1198
+ };
1199
+ const selection = selectBetterTurnText(primaryCandidate, fallbackCandidate);
1200
+ const diagnostics = {
1201
+ attempted: true,
1202
+ fallbackConfidence: fallbackCandidate.confidence,
1203
+ fallbackText: fallbackCandidate.text,
1204
+ fallbackWordCount: fallbackCandidate.wordCount,
1205
+ primaryConfidence: primaryCandidate.confidence,
1206
+ primaryText,
1207
+ primaryWordCount: primaryCandidate.wordCount,
1208
+ selected: selection.winner.text === fallbackCandidate.text,
1209
+ selectionReason: selection.reason,
1210
+ trigger: sttFallback.trigger
1211
+ };
1212
+ if (selection.winner.text === primaryCandidate.text) {
1213
+ return {
1214
+ diagnostics,
1215
+ fallbackUsed: false,
1216
+ source: "primary",
1217
+ text: primaryText,
1218
+ transcripts: primaryTranscripts.map((transcript) => ({
1219
+ ...transcript,
1220
+ isFinal: true
1221
+ }))
1222
+ };
1223
+ }
1224
+ const candidateTranscripts = fallbackText === fallbackCandidate.text ? fallbackTranscripts : [];
1225
+ return {
1226
+ diagnostics,
1227
+ fallbackUsed: true,
1228
+ source: "fallback",
1229
+ text: selection.winner.text,
1230
+ transcripts: candidateTranscripts.length > 0 ? candidateTranscripts.map((transcript) => ({
1231
+ ...transcript,
1232
+ isFinal: true
1233
+ })) : [{ id: createId(), isFinal: false, text: selection.winner.text }]
1234
+ };
1235
+ };
1236
+ const getFinalTranscriptIds = (transcripts) => {
1237
+ const finalTranscriptIds = transcripts.filter((transcript) => transcript.isFinal).map((transcript) => transcript.id);
1238
+ const fallbackIds = transcripts.map((transcript) => transcript.id);
1239
+ return finalTranscriptIds.length > 0 ? finalTranscriptIds : fallbackIds;
1240
+ };
1241
+ const runTurnCorrection = async (input) => {
1242
+ if (!options.route.correctTurn) {
1243
+ return;
1244
+ }
1245
+ const originalText = input.text;
1246
+ const result = await options.route.correctTurn({
1247
+ api,
1248
+ context: options.context,
1249
+ fallback: input.fallbackDiagnostics,
1250
+ phraseHints,
1251
+ session: input.session,
1252
+ text: originalText,
1253
+ transcripts: input.transcripts.map(cloneTranscript)
1254
+ });
1255
+ const nextText = typeof result === "string" ? result : typeof result?.text === "string" ? result.text : originalText;
1256
+ const correctedText = normalizeCorrectionText(nextText);
1257
+ const normalizedOriginal = normalizeCorrectionText(originalText);
1258
+ return {
1259
+ diagnostics: {
1260
+ attempted: true,
1261
+ changed: correctedText.length > 0 && correctedText !== normalizedOriginal,
1262
+ correctedText: correctedText.length > 0 ? correctedText : normalizedOriginal,
1263
+ metadata: typeof result === "object" ? result.metadata : undefined,
1264
+ originalText,
1265
+ provider: typeof result === "object" ? result.provider : undefined,
1266
+ reason: typeof result === "object" ? result.reason : undefined
1267
+ },
1268
+ text: correctedText.length > 0 ? correctedText : originalText
1269
+ };
1270
+ };
1271
+ const ensureCommittedTurnGuard = (session) => {
1272
+ if (!session.lastCommittedTurn) {
1273
+ session.lastCommittedTurn = {
1274
+ committedAt: 0,
1275
+ signature: "",
1276
+ text: "",
1277
+ transcriptIds: []
1278
+ };
1279
+ }
1280
+ return session;
1281
+ };
1282
+ const buildTurnSignature = (session, finalText, transcriptIdsOverride) => {
1283
+ const finalTranscriptIds = transcriptIdsOverride ?? getFinalTranscriptIds(session.currentTurn.transcripts);
1284
+ return `${normalizeText2(finalText)}|${finalTranscriptIds.join(",")}`;
1285
+ };
1286
+ const isDuplicateTurnCommit = (session, finalText) => {
1287
+ const signature = buildTurnSignature(session, finalText);
1288
+ const committedTurn = session.lastCommittedTurn;
1289
+ const isRecent = committedTurn && committedTurn.committedAt > 0 && Date.now() - committedTurn.committedAt < DEFAULT_DUPLICATE_TURN_WINDOW_MS;
1290
+ const committedSignature = committedTurn?.signature ?? "";
1291
+ const committedTranscriptIds = committedTurn?.transcriptIds ?? [];
1292
+ const committedText = normalizeText2(committedTurn?.text ?? "");
1293
+ const isSameText = normalizeText2(finalText) === committedText;
1294
+ const hasNoNewAudioSinceCommit = (session.currentTurn.lastAudioAt ?? 0) <= (committedTurn?.committedAt ?? 0);
1295
+ if (!isRecent) {
1296
+ return false;
1297
+ }
1298
+ if (isSameText && hasNoNewAudioSinceCommit) {
1299
+ return true;
1300
+ }
1301
+ if (signature !== committedSignature) {
1302
+ return false;
1303
+ }
1304
+ const lastSignatureIds = new Set(committedTranscriptIds);
1305
+ const hasNoNewFinalIds = session.currentTurn.transcripts.every((transcript) => !transcript.isFinal || lastSignatureIds.has(transcript.id));
1306
+ return isRecent && hasNoNewFinalIds;
1307
+ };
1308
+ const markTurnCommitted = (session, finalText, committedTranscripts) => {
1309
+ session.lastCommittedTurn = {
1310
+ ...session.lastCommittedTurn ?? {},
1311
+ committedAt: Date.now(),
1312
+ signature: buildTurnSignature(session, finalText, getFinalTranscriptIds(committedTranscripts)),
1313
+ text: normalizeText2(finalText),
1314
+ transcriptIds: getFinalTranscriptIds(committedTranscripts)
1315
+ };
1316
+ };
1317
+ const handlePartial = async (transcript) => {
1318
+ await writeSession((session) => {
1319
+ const nextPartialStartedAt = transcript.startedAtMs ?? session.currentTurn.partialStartedAt;
1320
+ const nextPartialEndedAt = transcript.endedAtMs ?? session.currentTurn.partialEndedAt;
1321
+ const preferredPartial = selectPreferredTranscriptText(session.currentTurn.partialText, transcript.text);
1322
+ session.currentTurn.lastTranscriptAt = Date.now();
1323
+ session.currentTurn.partialStartedAt = nextPartialStartedAt;
1324
+ session.currentTurn.partialEndedAt = nextPartialEndedAt;
1325
+ session.currentTurn.partialText = buildTurnText(session.currentTurn.transcripts, preferredPartial, {
1326
+ partialEndedAtMs: nextPartialEndedAt,
1327
+ partialStartedAtMs: nextPartialStartedAt
1328
+ });
1329
+ session.lastActivityAt = Date.now();
1330
+ session.status = "active";
1331
+ });
1332
+ await send({
1333
+ transcript,
1334
+ type: "partial"
1335
+ });
1336
+ };
1337
+ const handleFinal = async (transcript) => {
1338
+ await writeSession((session) => {
1339
+ const alreadyPresent = session.currentTurn.transcripts.some((existing) => existing.id === transcript.id);
1340
+ if (!alreadyPresent) {
1341
+ session.currentTurn.transcripts = [
1342
+ ...session.currentTurn.transcripts,
1343
+ cloneTranscript(transcript)
1344
+ ];
1345
+ session.transcripts = [
1346
+ ...session.transcripts,
1347
+ cloneTranscript(transcript)
1348
+ ];
1349
+ }
1350
+ session.currentTurn.finalText = buildTurnText(session.currentTurn.transcripts, session.currentTurn.partialText, {
1351
+ partialEndedAtMs: session.currentTurn.partialEndedAt,
1352
+ partialStartedAtMs: session.currentTurn.partialStartedAt
1353
+ });
1354
+ session.currentTurn.lastTranscriptAt = Date.now();
1355
+ session.lastActivityAt = Date.now();
1356
+ session.status = "active";
1357
+ });
1358
+ await send({
1359
+ transcript,
1360
+ type: "final"
1361
+ });
1362
+ };
1363
+ const resumePendingTurnCommit = (session) => {
1364
+ const pendingText = buildTurnText(session.currentTurn.transcripts, session.currentTurn.partialText, {
1365
+ partialEndedAtMs: session.currentTurn.partialEndedAt,
1366
+ partialStartedAtMs: session.currentTurn.partialStartedAt
1367
+ });
1368
+ if (!pendingText) {
1369
+ speechDetected = false;
1370
+ return;
1371
+ }
1372
+ speechDetected = true;
1373
+ const audioAge = session.currentTurn.silenceStartedAt !== undefined ? Date.now() - session.currentTurn.silenceStartedAt : session.currentTurn.lastSpeechAt !== undefined ? Date.now() - session.currentTurn.lastSpeechAt : 0;
1374
+ const transcriptAge = session.currentTurn.lastTranscriptAt !== undefined ? Date.now() - session.currentTurn.lastTranscriptAt : turnDetection.transcriptStabilityMs;
1375
+ const delayMs = Math.max(0, turnDetection.silenceMs - audioAge, turnDetection.transcriptStabilityMs - transcriptAge);
1376
+ scheduleSilenceCommit(delayMs);
1377
+ };
1378
+ const ensureAdapter = async () => {
1379
+ if (sttSession) {
1380
+ return sttSession;
1381
+ }
1382
+ const openedSession = await options.stt.open({
1383
+ format: DEFAULT_FORMAT,
1384
+ phraseHints,
1385
+ sessionId: options.id
1386
+ });
1387
+ const generation = ++adapterGenerationCounter;
1388
+ sttSession = openedSession;
1389
+ activeAdapterGeneration = generation;
1390
+ const runAdapterEvent = (phase, handler) => {
1391
+ runSerial(phase, async () => {
1392
+ if (activeAdapterGeneration !== generation) {
1393
+ return;
1394
+ }
1395
+ await handler();
1396
+ });
1397
+ };
1398
+ openedSession.on("partial", ({ transcript }) => {
1399
+ runAdapterEvent("adapter.partial", () => handlePartial(transcript));
1400
+ });
1401
+ openedSession.on("final", ({ transcript }) => {
1402
+ runAdapterEvent("adapter.final", () => handleFinal(transcript));
1403
+ });
1404
+ openedSession.on("endOfTurn", ({ reason }) => {
1405
+ runAdapterEvent("adapter.endOfTurn", async () => {
1406
+ clearSilenceTimer();
1407
+ await requestTurnCommit(reason);
1408
+ });
1409
+ });
1410
+ openedSession.on("error", (event) => {
1411
+ runAdapterEvent("adapter.error", () => handleError(event));
1412
+ });
1413
+ openedSession.on("close", (event) => {
1414
+ runAdapterEvent("adapter.close", () => handleClose(event));
1415
+ });
1416
+ return openedSession;
1417
+ };
1418
+ const completeTurn = async (session, turn) => {
1419
+ const output = await options.route.onTurn({
1420
+ api,
1421
+ context: options.context,
1422
+ session,
1423
+ turn
1424
+ });
1425
+ if (output?.assistantText) {
1426
+ await writeSession((currentSession) => {
1427
+ setTurnResult(currentSession, turn.id, {
1428
+ assistantText: output.assistantText
1429
+ });
1430
+ });
1431
+ await send({
1432
+ text: output.assistantText,
1433
+ turnId: turn.id,
1434
+ type: "assistant"
1435
+ });
1436
+ }
1437
+ if (output?.result !== undefined) {
1438
+ await writeSession((currentSession) => {
1439
+ setTurnResult(currentSession, turn.id, {
1440
+ result: output.result
1441
+ });
1442
+ });
1443
+ }
1444
+ if (output?.complete) {
1445
+ await completeInternal(output.result);
1446
+ }
1447
+ };
1448
+ const commitTurnInternal = async (reason = "manual") => {
1449
+ clearSilenceTimer();
1450
+ const session = await readSession();
1451
+ if (session.status === "completed" || session.status === "failed") {
1452
+ return;
1453
+ }
1454
+ const text = buildTurnText(session.currentTurn.transcripts, session.currentTurn.partialText, {
1455
+ partialEndedAtMs: session.currentTurn.partialEndedAt,
1456
+ partialStartedAtMs: session.currentTurn.partialStartedAt
1457
+ });
1458
+ let transcripts = session.currentTurn.transcripts.length ? session.currentTurn.transcripts.map(cloneTranscript) : [];
1459
+ let finalText = text;
1460
+ const transcriptStabilityAge = session.currentTurn.lastTranscriptAt !== undefined ? Date.now() - session.currentTurn.lastTranscriptAt : undefined;
1461
+ const fallbackSelection = await runFallbackTranscription(text, session.currentTurn.transcripts);
1462
+ const source = fallbackSelection?.source ?? "primary";
1463
+ const fallbackUsed = fallbackSelection?.fallbackUsed ?? false;
1464
+ const fallbackDiagnostics = fallbackSelection?.diagnostics;
1465
+ if (fallbackSelection) {
1466
+ finalText = fallbackSelection.text;
1467
+ transcripts = fallbackSelection.transcripts.length ? fallbackSelection.transcripts.map(cloneTranscript) : transcripts.length ? transcripts : [
1468
+ {
1469
+ id: createId(),
1470
+ isFinal: false,
1471
+ text: finalText
1472
+ }
1473
+ ];
1474
+ if (fallbackSelection.fallbackUsed) {
1475
+ logger.info("voice fallback turn selected", {
1476
+ reason,
1477
+ sessionId: options.id,
1478
+ text: finalText
1479
+ });
1480
+ }
1481
+ }
1482
+ const correctionSelection = await runTurnCorrection({
1483
+ fallbackDiagnostics,
1484
+ fallbackUsed,
1485
+ session,
1486
+ source,
1487
+ text: finalText,
1488
+ transcripts
1489
+ });
1490
+ const correctionDiagnostics = correctionSelection?.diagnostics;
1491
+ if (correctionSelection) {
1492
+ finalText = correctionSelection.text;
1493
+ }
1494
+ if (!finalText) {
1495
+ return;
1496
+ }
1497
+ if (isDuplicateTurnCommit(session, finalText)) {
1498
+ logger.debug("voice turn commit deduped", {
1499
+ reason,
1500
+ sessionId: options.id
1501
+ });
1502
+ return;
1503
+ }
1504
+ if (typeof transcriptStabilityAge === "number" && transcriptStabilityAge < turnDetection.transcriptStabilityMs && reason !== "manual") {
1505
+ scheduleTurnCommit(turnDetection.transcriptStabilityMs - transcriptStabilityAge, reason, false);
1506
+ return;
1507
+ }
1508
+ const turn = {
1509
+ committedAt: Date.now(),
1510
+ id: createId(),
1511
+ text: finalText,
1512
+ quality: createTurnQuality(transcripts, source, fallbackUsed, fallbackDiagnostics, correctionDiagnostics),
1513
+ transcripts: transcripts.length > 0 ? transcripts : [
1514
+ {
1515
+ id: createId(),
1516
+ isFinal: false,
1517
+ text: finalText
1518
+ }
1519
+ ]
1520
+ };
1521
+ const updatedSession = await writeSession((currentSession) => {
1522
+ currentSession.committedTurnIds = [
1523
+ ...currentSession.committedTurnIds,
1524
+ turn.id
1525
+ ];
1526
+ currentSession.currentTurn = createEmptyCurrentTurn();
1527
+ currentSession.lastActivityAt = Date.now();
1528
+ currentSession.status = "active";
1529
+ currentSession.turns = [...currentSession.turns, turn];
1530
+ markTurnCommitted(currentSession, finalText, transcripts);
1531
+ });
1532
+ speechDetected = false;
1533
+ rewindFallbackTurnAudio();
1534
+ logger.info("voice turn committed", {
1535
+ reason,
1536
+ sessionId: options.id,
1537
+ turnId: turn.id
1538
+ });
1539
+ await send({
1540
+ turn,
1541
+ type: "turn"
1542
+ });
1543
+ if (options.sttLifecycle === "turn-scoped") {
1544
+ await closeAdapter("turn-commit");
1545
+ }
1546
+ await completeTurn(updatedSession, turn);
1547
+ };
1548
+ const connectInternal = async (nextSocket) => {
1549
+ socket = nextSocket;
1550
+ const existingSession = await options.store.get(options.id);
1551
+ let session = existingSession ?? createVoiceSessionRecord(options.id, options.scenarioId);
1552
+ if (options.scenarioId && session.scenarioId !== options.scenarioId) {
1553
+ session.scenarioId = options.scenarioId;
1554
+ }
1555
+ ensureCommittedTurnGuard(session);
1556
+ let shouldFireOnSession = !existingSession;
1557
+ if (existingSession?.scenarioId && options.scenarioId && existingSession.scenarioId !== options.scenarioId) {
1558
+ session = resetVoiceSessionRecord(options.id, existingSession, options.scenarioId);
1559
+ shouldFireOnSession = true;
1560
+ }
1561
+ rewindFallbackTurnAudio();
1562
+ if (existingSession?.status === "reconnecting") {
1563
+ const nextAttempts = existingSession.reconnect.attempts + 1;
1564
+ const reconnectExpired = existingSession.reconnect.lastDisconnectAt !== undefined && Date.now() - existingSession.reconnect.lastDisconnectAt > reconnect.timeout;
1565
+ const tooManyAttempts = nextAttempts > reconnect.maxAttempts;
1566
+ if (reconnect.strategy === "fail" && (reconnectExpired || tooManyAttempts)) {
1567
+ await failInternal(new Error("Voice session reconnect policy exhausted"));
1568
+ return;
1569
+ }
1570
+ if (reconnect.strategy === "restart" && (reconnectExpired || tooManyAttempts)) {
1571
+ session = resetVoiceSessionRecord(options.id, existingSession, options.scenarioId);
1572
+ shouldFireOnSession = true;
1573
+ } else {
1574
+ session = {
1575
+ ...existingSession,
1576
+ reconnect: {
1577
+ ...existingSession.reconnect,
1578
+ attempts: nextAttempts
1579
+ },
1580
+ status: "active"
1581
+ };
1582
+ }
1583
+ }
1584
+ await options.store.set(options.id, session);
1585
+ await send({
1586
+ sessionId: options.id,
1587
+ status: session.status,
1588
+ scenarioId: session.scenarioId,
1589
+ type: "session"
1590
+ });
1591
+ if (shouldFireOnSession) {
1592
+ await options.route.onSession?.({
1593
+ api,
1594
+ context: options.context,
1595
+ session
1596
+ });
1597
+ }
1598
+ if (session.status === "completed") {
1599
+ await send({
1600
+ sessionId: options.id,
1601
+ type: "complete"
1602
+ });
1603
+ return;
1604
+ }
1605
+ resumePendingTurnCommit(session);
1606
+ await ensureAdapter();
1607
+ };
1608
+ const disconnectInternal = async (event) => {
1609
+ clearSilenceTimer();
1610
+ await closeAdapter(event?.reason);
1611
+ rewindFallbackTurnAudio();
1612
+ if (reconnect.strategy === "fail") {
1613
+ await failInternal(new Error(event?.reason ?? "Voice socket disconnected"));
1614
+ return;
1615
+ }
1616
+ await writeSession((session) => {
1617
+ if (session.status === "completed" || session.status === "failed") {
1618
+ return;
1619
+ }
1620
+ session.lastActivityAt = Date.now();
1621
+ session.reconnect.lastDisconnectAt = Date.now();
1622
+ session.status = "reconnecting";
1623
+ });
1624
+ speechDetected = false;
1625
+ };
1626
+ const receiveAudioInternal = async (audio) => {
1627
+ const session = await readSession();
1628
+ if (session.status === "completed" || session.status === "failed") {
1629
+ return;
1630
+ }
1631
+ const adapter = await ensureAdapter();
1632
+ const conditionedAudio = conditionAudioChunk(audio, options.audioConditioning);
1633
+ const audioLevel = measureAudioLevel(conditionedAudio);
1634
+ const shouldStoreAudio = speechDetected || audioLevel >= turnDetection.speechThreshold;
1635
+ await writeSession((currentSession) => {
1636
+ currentSession.currentTurn.lastAudioAt = Date.now();
1637
+ currentSession.lastActivityAt = Date.now();
1638
+ currentSession.status = "active";
1639
+ if (audioLevel >= turnDetection.speechThreshold) {
1640
+ currentSession.currentTurn.lastSpeechAt = Date.now();
1641
+ currentSession.currentTurn.silenceStartedAt = undefined;
1642
+ } else if (speechDetected && currentSession.currentTurn.silenceStartedAt === undefined) {
1643
+ currentSession.currentTurn.silenceStartedAt = Date.now();
1644
+ }
1645
+ });
1646
+ if (shouldStoreAudio) {
1647
+ pushTurnAudio(conditionedAudio);
1648
+ }
1649
+ if (audioLevel >= turnDetection.speechThreshold) {
1650
+ speechDetected = true;
1651
+ clearSilenceTimer();
1652
+ } else if (speechDetected) {
1653
+ const currentSession = await readSession();
1654
+ const hasTurnText = Boolean(buildTurnText(currentSession.currentTurn.transcripts, currentSession.currentTurn.partialText, {
1655
+ partialEndedAtMs: currentSession.currentTurn.partialEndedAt,
1656
+ partialStartedAtMs: currentSession.currentTurn.partialStartedAt
1657
+ }));
1658
+ if (hasTurnText) {
1659
+ scheduleSilenceCommit(turnDetection.silenceMs, false);
1660
+ }
1661
+ }
1662
+ await adapter.send(conditionedAudio);
1663
+ };
1664
+ const api = {
1665
+ id: options.id,
1666
+ close: async (reason) => {
1667
+ await runSerial("api.close", async () => {
1668
+ clearSilenceTimer();
1669
+ await closeAdapter(reason);
1670
+ await Promise.resolve(socket.close(1000, reason));
1671
+ });
1672
+ },
1673
+ commitTurn: async (reason = "manual") => runSerial("api.commitTurn", async () => {
1674
+ await commitTurnInternal(reason);
1675
+ }),
1676
+ complete: async (result) => runSerial("api.complete", async () => {
1677
+ await completeInternal(result);
1678
+ }),
1679
+ connect: async (nextSocket) => runSerial("api.connect", async () => {
1680
+ await connectInternal(nextSocket);
1681
+ }),
1682
+ disconnect: async (event) => runSerial("api.disconnect", async () => {
1683
+ await disconnectInternal(event);
1684
+ }),
1685
+ fail: async (error) => runSerial("api.fail", async () => {
1686
+ await failInternal(error);
1687
+ }),
1688
+ receiveAudio: async (audio) => runSerial("api.receiveAudio", async () => {
1689
+ await receiveAudioInternal(audio);
1690
+ }),
1691
+ snapshot: async () => runSerial("api.snapshot", async () => readSession())
1692
+ };
1693
+ return api;
1694
+ };
1695
+
1696
+ // src/turnProfiles.ts
1697
+ var TURN_PROFILE_DEFAULTS = {
1698
+ balanced: {
1699
+ qualityProfile: "general",
1700
+ silenceMs: 1400,
1701
+ speechThreshold: 0.012,
1702
+ transcriptStabilityMs: 1000
1703
+ },
1704
+ fast: {
1705
+ qualityProfile: "general",
1706
+ silenceMs: 700,
1707
+ speechThreshold: 0.015,
1708
+ transcriptStabilityMs: 450
1709
+ },
1710
+ "long-form": {
1711
+ qualityProfile: "general",
1712
+ silenceMs: 2200,
1713
+ speechThreshold: 0.01,
1714
+ transcriptStabilityMs: 1500
1715
+ }
1716
+ };
1717
+ var QUALITY_PROFILE_DEFAULTS = {
1718
+ general: {},
1719
+ "accent-heavy": {
1720
+ silenceMs: 1200,
1721
+ speechThreshold: 0.01,
1722
+ transcriptStabilityMs: 1200
1723
+ },
1724
+ "noisy-room": {
1725
+ silenceMs: 2000,
1726
+ speechThreshold: 0.02,
1727
+ transcriptStabilityMs: 1600
1728
+ },
1729
+ "short-command": {
1730
+ silenceMs: 500,
1731
+ speechThreshold: 0.016,
1732
+ transcriptStabilityMs: 420
1733
+ }
1734
+ };
1735
+ var DEFAULT_TURN_PROFILE = "fast";
1736
+ var DEFAULT_QUALITY_PROFILE = "general";
1737
+ var resolveTurnDetectionConfig = (config) => {
1738
+ const profile = config?.profile ?? DEFAULT_TURN_PROFILE;
1739
+ const qualityProfile = config?.qualityProfile ?? DEFAULT_QUALITY_PROFILE;
1740
+ const preset = TURN_PROFILE_DEFAULTS[profile];
1741
+ const quality = QUALITY_PROFILE_DEFAULTS[qualityProfile];
1742
+ return {
1743
+ profile,
1744
+ qualityProfile,
1745
+ silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
1746
+ speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
1747
+ transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
1748
+ };
1749
+ };
1750
+
1751
+ // src/testing/resilience.ts
1752
+ var roundMetric2 = (value, digits = 4) => {
1753
+ const factor = 10 ** digits;
1754
+ return Math.round(value * factor) / factor;
1755
+ };
1756
+ var createMockSocket = () => ({
1757
+ close: async () => {},
1758
+ send: async () => {}
1759
+ });
1760
+ var createSpeechChunk = (sample) => new Int16Array(160).fill(sample);
1761
+ var createFakeAdapter = () => {
1762
+ const listeners = {
1763
+ close: [],
1764
+ endOfTurn: [],
1765
+ error: [],
1766
+ final: [],
1767
+ partial: []
1768
+ };
1769
+ const session = {
1770
+ close: async () => {},
1771
+ emit: async (event, payload) => {
1772
+ for (const listener of listeners[event]) {
1773
+ await listener(payload);
1774
+ }
1775
+ },
1776
+ on: (event, handler) => {
1777
+ listeners[event].push(handler);
1778
+ return () => {
1779
+ const index = listeners[event].indexOf(handler);
1780
+ if (index >= 0) {
1781
+ listeners[event].splice(index, 1);
1782
+ }
1783
+ };
1784
+ },
1785
+ send: async (_audio) => {}
1786
+ };
1787
+ return {
1788
+ adapter: {
1789
+ kind: "stt",
1790
+ open: () => session
1791
+ },
1792
+ session
1793
+ };
1794
+ };
1795
+ var runScenario = async (id, title, run) => {
1796
+ const store = createVoiceMemoryStore();
1797
+ const adapter = createFakeAdapter();
1798
+ const turns = [];
1799
+ const voice = createVoiceSession({
1800
+ context: {},
1801
+ id,
1802
+ logger: {},
1803
+ reconnect: {
1804
+ maxAttempts: 2,
1805
+ strategy: "resume-last-turn",
1806
+ timeout: 5000
1807
+ },
1808
+ route: {
1809
+ onComplete: async () => {},
1810
+ onTurn: async ({ turn }) => {
1811
+ turns.push(turn.text);
1812
+ }
1813
+ },
1814
+ socket: createMockSocket(),
1815
+ store,
1816
+ stt: adapter.adapter,
1817
+ sttLifecycle: "continuous",
1818
+ turnDetection: resolveTurnDetectionConfig({
1819
+ silenceMs: 20,
1820
+ speechThreshold: 0.01,
1821
+ transcriptStabilityMs: 5
1822
+ })
1823
+ });
1824
+ await voice.connect(createMockSocket());
1825
+ try {
1826
+ await run({
1827
+ adapter,
1828
+ commit: async (text, transcriptId = `${id}-${turns.length}`) => {
1829
+ await adapter.session.emit("final", {
1830
+ receivedAt: Date.now(),
1831
+ transcript: {
1832
+ id: transcriptId,
1833
+ isFinal: true,
1834
+ text
1835
+ },
1836
+ type: "final"
1837
+ });
1838
+ await voice.receiveAudio(createSpeechChunk(16000));
1839
+ await voice.receiveAudio(createSpeechChunk(0));
1840
+ await Bun.sleep(60);
1841
+ },
1842
+ connectNewSocket: async () => {
1843
+ await voice.connect(createMockSocket());
1844
+ },
1845
+ disconnect: async () => {
1846
+ await voice.disconnect({
1847
+ recoverable: true,
1848
+ type: "close"
1849
+ });
1850
+ },
1851
+ emitEndOfTurn: async () => {
1852
+ await adapter.session.emit("endOfTurn", {
1853
+ reason: "vendor",
1854
+ receivedAt: Date.now(),
1855
+ type: "endOfTurn"
1856
+ });
1857
+ },
1858
+ emitFinal: async (text, transcriptId = `${id}-${turns.length}`) => {
1859
+ await adapter.session.emit("final", {
1860
+ receivedAt: Date.now(),
1861
+ transcript: {
1862
+ id: transcriptId,
1863
+ isFinal: true,
1864
+ text
1865
+ },
1866
+ type: "final"
1867
+ });
1868
+ },
1869
+ turns
1870
+ });
1871
+ } finally {
1872
+ await voice.close("resilience-complete");
1873
+ }
1874
+ const uniqueTurns = new Set(turns.map((turn) => turn.toLowerCase()));
1875
+ const replayedTurns = turns.length - uniqueTurns.size;
1876
+ return {
1877
+ actualTurns: turns,
1878
+ id,
1879
+ passes: replayedTurns === 0,
1880
+ replayedTurns,
1881
+ title
1882
+ };
1883
+ };
1884
+ var runVoiceResilienceBenchmark = async () => {
1885
+ const scenarios = await Promise.all([
1886
+ runScenario("resume-no-replay", "Reconnect after first turn does not replay committed text", async ({ commit, connectNewSocket, disconnect }) => {
1887
+ await commit("Reconnect should not duplicate prior turns");
1888
+ await disconnect();
1889
+ await connectNewSocket();
1890
+ await commit("A second turn should still commit after resume");
1891
+ }),
1892
+ runScenario("duplicate-final-id", "Duplicate transcript ids do not create replayed turns", async ({ adapter, connectNewSocket, disconnect, turns, commit }) => {
1893
+ await commit("Duplicate final ids should still produce one turn", "same-id");
1894
+ await disconnect();
1895
+ await connectNewSocket();
1896
+ await adapter.session.emit("final", {
1897
+ receivedAt: Date.now(),
1898
+ transcript: {
1899
+ id: "same-id",
1900
+ isFinal: true,
1901
+ text: "Duplicate final ids should still produce one turn"
1902
+ },
1903
+ type: "final"
1904
+ });
1905
+ if (turns.length === 1) {
1906
+ await commit("Fresh transcripts should still commit later");
1907
+ }
1908
+ }),
1909
+ runScenario("duplicate-end-of-turn", "Repeated end-of-turn events for the same turn stay deduped", async ({ emitFinal, emitEndOfTurn, turns }) => {
1910
+ await emitFinal("Repeated end-of-turn should only commit once", "dup-endofturn");
1911
+ await emitEndOfTurn();
1912
+ await emitEndOfTurn();
1913
+ await Bun.sleep(80);
1914
+ if (turns.length !== 1) {
1915
+ throw new Error("Repeated end-of-turn events created duplicate turns");
1916
+ }
1917
+ }),
1918
+ runScenario("duplicate-end-of-turn-jitter", "End-of-turn jitter does not trigger extra commits", async ({ emitFinal, emitEndOfTurn, turns }) => {
1919
+ await emitFinal("Noisy end-of-turn signals should still commit once", "dup-endofturn-jitter");
1920
+ for (const delayMs of [40, 95, 180, 120]) {
1921
+ await Bun.sleep(delayMs);
1922
+ await emitEndOfTurn();
1923
+ }
1924
+ await Bun.sleep(80);
1925
+ if (turns.length !== 1) {
1926
+ throw new Error("Jittered end-of-turn signals created duplicate turns");
1927
+ }
1928
+ }),
1929
+ runScenario("reconnect-duplicate-text-no-new-audio", "Reconnect duplicate text with different ids and no audio does not replay turn", async ({
1930
+ adapter,
1931
+ connectNewSocket,
1932
+ disconnect,
1933
+ emitEndOfTurn,
1934
+ emitFinal,
1935
+ turns
1936
+ }) => {
1937
+ await emitFinal("Reconnect duplicate text should be suppressed", "dup-text-reconnect-1");
1938
+ await emitEndOfTurn();
1939
+ await Bun.sleep(60);
1940
+ await disconnect();
1941
+ await connectNewSocket();
1942
+ await adapter.session.emit("final", {
1943
+ receivedAt: Date.now(),
1944
+ transcript: {
1945
+ id: "dup-text-reconnect-2",
1946
+ isFinal: true,
1947
+ text: "Reconnect duplicate text should be suppressed"
1948
+ },
1949
+ type: "final"
1950
+ });
1951
+ for (const delayMs of [40, 70, 110]) {
1952
+ await Bun.sleep(delayMs);
1953
+ await emitEndOfTurn();
1954
+ }
1955
+ await Bun.sleep(60);
1956
+ if (turns.length !== 1) {
1957
+ throw new Error("Reconnect duplicate text was committed twice");
1958
+ }
1959
+ }),
1960
+ runScenario("reconnect-end-of-turn-jitter", "End-of-turn jitter after reconnect does not replay committed turns", async ({
1961
+ adapter,
1962
+ connectNewSocket,
1963
+ disconnect,
1964
+ emitEndOfTurn,
1965
+ emitFinal,
1966
+ turns
1967
+ }) => {
1968
+ await emitFinal("Reconnect duplicate end-of-turn should dedupe", "resume-jitter");
1969
+ await emitEndOfTurn();
1970
+ await Bun.sleep(60);
1971
+ await disconnect();
1972
+ await connectNewSocket();
1973
+ await adapter.session.emit("final", {
1974
+ receivedAt: Date.now(),
1975
+ transcript: {
1976
+ id: "resume-jitter",
1977
+ isFinal: true,
1978
+ text: "Reconnect duplicate end-of-turn should dedupe"
1979
+ },
1980
+ type: "final"
1981
+ });
1982
+ for (const delayMs of [50, 80, 120, 180]) {
1983
+ await Bun.sleep(delayMs);
1984
+ await emitEndOfTurn();
1985
+ }
1986
+ await Bun.sleep(80);
1987
+ if (turns.length !== 1) {
1988
+ throw new Error("Reconnected jittered end-of-turn signals replayed a committed turn");
1989
+ }
1990
+ })
1991
+ ]);
1992
+ const passCount = scenarios.filter((scenario) => scenario.passes).length;
1993
+ const replayFailures = scenarios.filter((scenario) => scenario.replayedTurns > 0).length;
1994
+ return {
1995
+ generatedAt: Date.now(),
1996
+ scenarios,
1997
+ summary: {
1998
+ duplicateTurnRate: roundMetric2(scenarios.length > 0 ? replayFailures / scenarios.length : 0),
1999
+ passCount,
2000
+ passRate: roundMetric2(scenarios.length > 0 ? passCount / scenarios.length : 0),
2001
+ replayFailureRate: roundMetric2(scenarios.length > 0 ? replayFailures / scenarios.length : 0),
2002
+ scenarioCount: scenarios.length
2003
+ }
2004
+ };
2005
+ };
2006
+ // src/testing/sessionBenchmark.ts
2007
+ var average2 = (values) => values.length > 0 ? values.reduce((sum, value) => sum + value, 0) / values.length : 0;
2008
+ var normalizeTurnText = (value) => value.toLowerCase().replace(/[^\p{L}\p{N}\s']/gu, " ").replace(/\s+/g, " ").trim();
2009
+ var roundMetric3 = (value, digits = 4) => {
2010
+ const factor = 10 ** digits;
2011
+ return Math.round(value * factor) / factor;
2012
+ };
2013
+ var resolveBenchmarkFallbackConfig = (config) => {
2014
+ if (!config) {
2015
+ return;
2016
+ }
2017
+ return {
2018
+ adapter: config.adapter,
2019
+ completionTimeoutMs: config.completionTimeoutMs ?? 2500,
2020
+ confidenceThreshold: config.confidenceThreshold ?? 0.6,
2021
+ maxAttemptsPerTurn: config.maxAttemptsPerTurn ?? 1,
2022
+ minTextLength: config.minTextLength ?? 2,
2023
+ replayWindowMs: config.replayWindowMs ?? 8000,
2024
+ settleMs: config.settleMs ?? 220,
2025
+ trigger: config.trigger ?? "empty-or-low-confidence"
2026
+ };
2027
+ };
2028
+ var chunkAudio2 = (audio, bytesPerChunk) => {
2029
+ const chunks = [];
2030
+ for (let offset = 0;offset < audio.byteLength; offset += bytesPerChunk) {
2031
+ chunks.push(audio.slice(offset, offset + bytesPerChunk));
2032
+ }
2033
+ return chunks;
2034
+ };
2035
+ var createSilence2 = (byteLength) => new Uint8Array(byteLength);
2036
+ var countUnexpectedDuplicateTurns = (actualTurns, expectedTurns) => {
2037
+ const expectedCounts = new Map;
2038
+ for (const turn of expectedTurns) {
2039
+ const key = normalizeTurnText(turn);
2040
+ expectedCounts.set(key, (expectedCounts.get(key) ?? 0) + 1);
2041
+ }
2042
+ const actualCounts = new Map;
2043
+ for (const turn of actualTurns) {
2044
+ const key = normalizeTurnText(turn);
2045
+ actualCounts.set(key, (actualCounts.get(key) ?? 0) + 1);
2046
+ }
2047
+ let duplicates = 0;
2048
+ for (const [key, actualCount] of actualCounts.entries()) {
2049
+ const expectedCount = expectedCounts.get(key) ?? 0;
2050
+ const allowedOccurrences = Math.max(expectedCount, 1);
2051
+ if (actualCount > allowedOccurrences) {
2052
+ duplicates += actualCount - allowedOccurrences;
2053
+ }
2054
+ }
2055
+ return duplicates;
2056
+ };
2057
+ var normalizeSocketMessage = (data) => {
2058
+ if (typeof data !== "string") {
2059
+ return {
2060
+ byteLength: data instanceof ArrayBuffer ? data.byteLength : data.byteLength,
2061
+ kind: "binary"
2062
+ };
2063
+ }
2064
+ try {
2065
+ return JSON.parse(data);
2066
+ } catch {
2067
+ return data;
2068
+ }
2069
+ };
2070
+ var createMockSocket2 = (onEvent) => ({
2071
+ close: async (code, reason) => {
2072
+ onEvent?.({
2073
+ data: {
2074
+ code,
2075
+ reason
2076
+ },
2077
+ phase: "socket.close"
2078
+ });
2079
+ },
2080
+ send: async (data) => {
2081
+ onEvent?.({
2082
+ data: normalizeSocketMessage(data),
2083
+ phase: "socket.send"
2084
+ });
2085
+ }
2086
+ });
2087
+ var waitForSessionIdle = async (session, settleMs, idleTimeoutMs) => {
2088
+ const startedAt = Date.now();
2089
+ while (Date.now() - startedAt < idleTimeoutMs) {
2090
+ const snapshot = await session.snapshot();
2091
+ const pendingText = snapshot.currentTurn.finalText || snapshot.currentTurn.partialText;
2092
+ const lastActivityAt = snapshot.lastActivityAt ?? snapshot.createdAt;
2093
+ if (!pendingText && Date.now() - lastActivityAt >= settleMs) {
2094
+ return;
2095
+ }
2096
+ await Bun.sleep(Math.min(100, settleMs));
2097
+ }
2098
+ };
2099
+ var runVoiceSessionBenchmarkScenario = async (adapter, fixture, options = {}) => {
2100
+ const store = createVoiceMemoryStore();
2101
+ const committedTurns = [];
2102
+ const traceStartedAt = Date.now();
2103
+ const trace = [];
2104
+ const pushTrace = (entry) => {
2105
+ if (!options.trace) {
2106
+ return;
2107
+ }
2108
+ trace.push({
2109
+ ...entry,
2110
+ atMs: Date.now() - traceStartedAt
2111
+ });
2112
+ };
2113
+ const captureSnapshot = async (phase) => {
2114
+ if (!options.trace) {
2115
+ return;
2116
+ }
2117
+ const snapshot = await store.getOrCreate(`session-bench-${fixture.id}`);
2118
+ pushTrace({
2119
+ data: {
2120
+ currentTurn: {
2121
+ finalText: snapshot.currentTurn.finalText,
2122
+ lastAudioAt: snapshot.currentTurn.lastAudioAt,
2123
+ lastSpeechAt: snapshot.currentTurn.lastSpeechAt,
2124
+ lastTranscriptAt: snapshot.currentTurn.lastTranscriptAt,
2125
+ partialText: snapshot.currentTurn.partialText,
2126
+ silenceStartedAt: snapshot.currentTurn.silenceStartedAt,
2127
+ transcriptCount: snapshot.currentTurn.transcripts.length
2128
+ },
2129
+ lastActivityAt: snapshot.lastActivityAt,
2130
+ status: snapshot.status,
2131
+ turns: snapshot.turns.map((turn) => turn.text)
2132
+ },
2133
+ phase
2134
+ });
2135
+ };
2136
+ const logger = {
2137
+ debug: (message, meta) => {
2138
+ pushTrace({
2139
+ data: meta,
2140
+ phase: `logger.debug:${message}`
2141
+ });
2142
+ },
2143
+ error: (message, meta) => {
2144
+ pushTrace({
2145
+ data: meta,
2146
+ phase: `logger.error:${message}`
2147
+ });
2148
+ },
2149
+ info: (message, meta) => {
2150
+ pushTrace({
2151
+ data: meta,
2152
+ phase: `logger.info:${message}`
2153
+ });
2154
+ },
2155
+ warn: (message, meta) => {
2156
+ pushTrace({
2157
+ data: meta,
2158
+ phase: `logger.warn:${message}`
2159
+ });
2160
+ }
2161
+ };
2162
+ const session = createVoiceSession({
2163
+ audioConditioning: resolveAudioConditioningConfig(fixture.audioConditioning),
2164
+ context: {},
2165
+ id: `session-bench-${fixture.id}`,
2166
+ logger,
2167
+ reconnect: {
2168
+ maxAttempts: 2,
2169
+ strategy: "resume-last-turn",
2170
+ timeout: 5000
2171
+ },
2172
+ route: {
2173
+ correctTurn: options.correctTurn,
2174
+ onComplete: async () => {},
2175
+ onTurn: async ({ turn }) => {
2176
+ committedTurns.push({
2177
+ quality: turn.quality,
2178
+ text: turn.text
2179
+ });
2180
+ pushTrace({
2181
+ data: {
2182
+ quality: turn.quality,
2183
+ text: turn.text,
2184
+ transcriptCount: turn.transcripts.length,
2185
+ turnId: turn.id
2186
+ },
2187
+ phase: "route.onTurn"
2188
+ });
2189
+ }
2190
+ },
2191
+ phraseHints: fixture.phraseHints,
2192
+ socket: createMockSocket2(pushTrace),
2193
+ store,
2194
+ stt: adapter,
2195
+ sttFallback: resolveBenchmarkFallbackConfig(options.sttFallback),
2196
+ sttLifecycle: fixture.sttLifecycle ?? "continuous",
2197
+ turnDetection: resolveTurnDetectionConfig({
2198
+ profile: fixture.turnProfile ?? "balanced",
2199
+ silenceMs: fixture.silenceMs ?? DEFAULT_SILENCE_MS,
2200
+ speechThreshold: fixture.speechThreshold ?? DEFAULT_SPEECH_THRESHOLD,
2201
+ transcriptStabilityMs: fixture.transcriptStabilityMs ?? 900
2202
+ })
2203
+ });
2204
+ const startedAt = Date.now();
2205
+ let reconnectTriggered = false;
2206
+ await session.connect(createMockSocket2(pushTrace));
2207
+ await captureSnapshot("session.connected");
2208
+ try {
2209
+ const chunkDurationMs = fixture.chunkDurationMs ?? 100;
2210
+ const bytesPerMillisecond = fixture.format.sampleRateHz * fixture.format.channels * 2 / 1000;
2211
+ const bytesPerChunk = Math.max(2, Math.floor(bytesPerMillisecond * chunkDurationMs));
2212
+ const chunks = chunkAudio2(fixture.audio, bytesPerChunk);
2213
+ for (const [index, chunk] of chunks.entries()) {
2214
+ await session.receiveAudio(chunk);
2215
+ await Bun.sleep(chunkDurationMs);
2216
+ if (fixture.reconnectAtChunkIndex !== undefined && index === fixture.reconnectAtChunkIndex && !reconnectTriggered) {
2217
+ reconnectTriggered = true;
2218
+ pushTrace({
2219
+ data: {
2220
+ chunkIndex: index
2221
+ },
2222
+ phase: "reconnect.begin"
2223
+ });
2224
+ await captureSnapshot("reconnect.pre-disconnect");
2225
+ await session.disconnect({
2226
+ reason: "benchmark-reconnect",
2227
+ recoverable: true,
2228
+ type: "close"
2229
+ });
2230
+ await captureSnapshot("reconnect.post-disconnect");
2231
+ await Bun.sleep(fixture.reconnectPauseMs ?? 150);
2232
+ await session.connect(createMockSocket2(pushTrace));
2233
+ await captureSnapshot("reconnect.post-connect");
2234
+ }
2235
+ }
2236
+ const tailPaddingMs = fixture.tailPaddingMs ?? 1200;
2237
+ if (tailPaddingMs > 0) {
2238
+ const tailBytes = Math.max(2, Math.floor(bytesPerMillisecond * tailPaddingMs));
2239
+ for (const chunk of chunkAudio2(createSilence2(tailBytes), bytesPerChunk)) {
2240
+ await session.receiveAudio(chunk);
2241
+ await Bun.sleep(chunkDurationMs);
2242
+ }
2243
+ }
2244
+ await waitForSessionIdle(session, Math.max(1200, (fixture.silenceMs ?? DEFAULT_SILENCE_MS) + (fixture.transcriptStabilityMs ?? 900)), 8000);
2245
+ await captureSnapshot("session.idle");
2246
+ } finally {
2247
+ await captureSnapshot("session.pre-close");
2248
+ await session.close("session-benchmark-complete");
2249
+ }
2250
+ const duplicateTurnCount = countUnexpectedDuplicateTurns(committedTurns.map((turn) => turn.text), fixture.expectedTurnTexts);
2251
+ const turnResults = fixture.expectedTurnTexts.map((expectedText, index) => {
2252
+ const actualTurn = committedTurns[index];
2253
+ const actualText = actualTurn?.text;
2254
+ if (!actualText) {
2255
+ return {
2256
+ actualText: "",
2257
+ expectedText,
2258
+ index,
2259
+ passes: false
2260
+ };
2261
+ }
2262
+ const accuracy = scoreTranscriptAccuracy(actualText, expectedText, fixture.transcriptThreshold ?? 0.35);
2263
+ return {
2264
+ actualText,
2265
+ accuracy,
2266
+ expectedText,
2267
+ index,
2268
+ passes: accuracy.passesThreshold,
2269
+ quality: actualTurn?.quality
2270
+ };
2271
+ });
2272
+ for (let index = fixture.expectedTurnTexts.length;index < committedTurns.length; index += 1) {
2273
+ turnResults.push({
2274
+ actualText: committedTurns[index]?.text ?? "",
2275
+ expectedText: undefined,
2276
+ index,
2277
+ passes: false,
2278
+ quality: committedTurns[index]?.quality
2279
+ });
2280
+ }
2281
+ const turnCountDelta = committedTurns.length - fixture.expectedTurnTexts.length;
2282
+ return {
2283
+ actualTurns: committedTurns.map((turn) => turn.text),
2284
+ duplicateTurnCount,
2285
+ elapsedMs: Date.now() - startedAt,
2286
+ expectedTurns: fixture.expectedTurnTexts,
2287
+ fixtureId: fixture.id,
2288
+ passes: duplicateTurnCount === 0 && turnCountDelta === 0 && turnResults.every((result) => result.passes),
2289
+ reconnectTriggered,
2290
+ tags: fixture.tags ?? [],
2291
+ title: fixture.title,
2292
+ turnCountDelta,
2293
+ turnResults,
2294
+ trace: options.trace ? trace : undefined
2295
+ };
2296
+ };
2297
+ var summarizeVoiceSessionBenchmark = (adapterId, scenarios) => {
2298
+ const passCount = scenarios.filter((scenario) => scenario.passes).length;
2299
+ const reconnectScenarios = scenarios.filter((scenario) => scenario.reconnectTriggered);
2300
+ const reconnectSuccessCount = reconnectScenarios.filter((scenario) => scenario.passes).length;
2301
+ const turnAccuracies = scenarios.flatMap((scenario) => scenario.turnResults.map((turn) => turn.accuracy?.wordErrorRate).filter((value) => typeof value === "number"));
2302
+ return {
2303
+ adapterId,
2304
+ averageElapsedMs: roundMetric3(average2(scenarios.map((scenario) => scenario.elapsedMs)), 2),
2305
+ averageWordErrorRate: roundMetric3(average2(turnAccuracies)),
2306
+ duplicateTurnRate: roundMetric3(scenarios.length > 0 ? scenarios.filter((scenario) => scenario.duplicateTurnCount > 0).length / scenarios.length : 0),
2307
+ passCount,
2308
+ passRate: roundMetric3(scenarios.length > 0 ? passCount / scenarios.length : 0),
2309
+ reconnectSuccessRate: roundMetric3(reconnectScenarios.length > 0 ? reconnectSuccessCount / reconnectScenarios.length : 1),
2310
+ scenarioCount: scenarios.length,
2311
+ scenariosWithDuplicateTurns: scenarios.filter((scenario) => scenario.duplicateTurnCount > 0).length,
2312
+ scenariosWithTurnCountMismatch: scenarios.filter((scenario) => scenario.turnCountDelta !== 0).length
2313
+ };
2314
+ };
2315
+ var summarizeVoiceSessionBenchmarkSeries = (input) => {
2316
+ const scenarioMap = new Map;
2317
+ for (const report of input.reports) {
2318
+ for (const scenario of report.scenarios) {
2319
+ const entries = scenarioMap.get(scenario.fixtureId) ?? [];
2320
+ entries.push(scenario);
2321
+ scenarioMap.set(scenario.fixtureId, entries);
2322
+ }
2323
+ }
2324
+ const scenarioAggregates = [...scenarioMap.entries()].map(([fixtureId, results]) => {
2325
+ const wordErrorRates = results.flatMap((scenario) => scenario.turnResults.map((turn) => turn.accuracy?.wordErrorRate).filter((value) => typeof value === "number"));
2326
+ const reconnectRuns = results.filter((scenario) => scenario.reconnectTriggered);
2327
+ const passCount = results.filter((scenario) => scenario.passes).length;
2328
+ const sample = results[0];
2329
+ return {
2330
+ averageElapsedMs: roundMetric3(average2(results.map((scenario) => scenario.elapsedMs)), 2),
2331
+ averageWordErrorRate: roundMetric3(average2(wordErrorRates)),
2332
+ bestWordErrorRate: roundMetric3(wordErrorRates.length > 0 ? Math.min(...wordErrorRates) : 0),
2333
+ fixtureId,
2334
+ passCount,
2335
+ passRate: roundMetric3(results.length > 0 ? passCount / results.length : 0),
2336
+ reconnectSuccessRate: roundMetric3(reconnectRuns.length > 0 ? reconnectRuns.filter((scenario) => scenario.passes).length / reconnectRuns.length : 1),
2337
+ runCount: results.length,
2338
+ tags: sample.tags,
2339
+ title: sample.title,
2340
+ worstWordErrorRate: roundMetric3(wordErrorRates.length > 0 ? Math.max(...wordErrorRates) : 0)
2341
+ };
2342
+ });
2343
+ const totalRunCount = input.reports.reduce((sum, report) => sum + report.scenarios.length, 0);
2344
+ const totalPassCount = input.reports.reduce((sum, report) => sum + report.summary.passCount, 0);
2345
+ const reconnectRates = scenarioAggregates.map((scenario) => scenario.reconnectSuccessRate).filter((value) => Number.isFinite(value));
2346
+ return {
2347
+ adapterId: input.adapterId,
2348
+ generatedAt: Date.now(),
2349
+ runCount: input.reports.length,
2350
+ scenarios: scenarioAggregates,
2351
+ summary: {
2352
+ adapterId: input.adapterId,
2353
+ averageElapsedMs: roundMetric3(average2(scenarioAggregates.map((scenario) => scenario.averageElapsedMs)), 2),
2354
+ averagePassRate: roundMetric3(average2(scenarioAggregates.map((scenario) => scenario.passRate))),
2355
+ averageWordErrorRate: roundMetric3(average2(scenarioAggregates.map((scenario) => scenario.averageWordErrorRate))),
2356
+ flakyScenarioCount: scenarioAggregates.filter((scenario) => scenario.passRate > 0 && scenario.passRate < 1).length,
2357
+ generatedRunCount: input.reports.length,
2358
+ reconnectSuccessRate: roundMetric3(average2(reconnectRates)),
2359
+ scenarioCount: scenarioAggregates.length,
2360
+ stableScenarioCount: scenarioAggregates.filter((scenario) => scenario.passRate === 1).length,
2361
+ totalPassCount,
2362
+ totalRunCount
2363
+ }
2364
+ };
2365
+ };
2366
+ var runVoiceSessionBenchmark = async (input) => {
2367
+ const scenarioResults = [];
2368
+ for (const scenario of input.scenarios) {
2369
+ scenarioResults.push(await runVoiceSessionBenchmarkScenario(input.adapter, scenario, {
2370
+ correctTurn: input.correctTurn,
2371
+ sttFallback: input.sttFallback,
2372
+ trace: input.trace
2373
+ }));
2374
+ }
2375
+ return {
2376
+ adapterId: input.adapterId,
2377
+ generatedAt: Date.now(),
2378
+ scenarios: scenarioResults,
2379
+ summary: summarizeVoiceSessionBenchmark(input.adapterId, scenarioResults)
2380
+ };
2381
+ };
2382
+ var runVoiceSessionBenchmarkSeries = async (input) => {
2383
+ const reports = [];
2384
+ const runCount = Math.max(1, Math.floor(input.runs));
2385
+ for (let runIndex = 0;runIndex < runCount; runIndex += 1) {
2386
+ reports.push(await runVoiceSessionBenchmark({
2387
+ adapter: input.adapter,
2388
+ adapterId: input.adapterId,
2389
+ correctTurn: input.correctTurn,
2390
+ scenarios: input.scenarios,
2391
+ sttFallback: input.sttFallback,
2392
+ trace: input.trace
2393
+ }));
2394
+ }
2395
+ return summarizeVoiceSessionBenchmarkSeries({
2396
+ adapterId: input.adapterId,
2397
+ reports
2398
+ });
2399
+ };
437
2400
  export {
2401
+ summarizeVoiceSessionBenchmarkSeries,
2402
+ summarizeVoiceSessionBenchmark,
438
2403
  summarizeSTTBenchmark,
439
2404
  scoreTranscriptAccuracy,
2405
+ runVoiceSessionBenchmarkSeries,
2406
+ runVoiceSessionBenchmarkScenario,
2407
+ runVoiceSessionBenchmark,
2408
+ runVoiceResilienceBenchmark,
440
2409
  runSTTAdapterFixture,
441
2410
  runSTTAdapterBenchmark,
2411
+ resolveFixtureEnvironment,
442
2412
  mergeFinalTranscriptText,
443
2413
  loadVoiceTestFixtures,
444
2414
  getVoiceFixtureDirectory,
2415
+ evaluateSTTBenchmarkAcceptance,
445
2416
  compareSTTBenchmarks
446
2417
  };