@absolutejs/voice 0.0.20 → 0.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +387 -4
  2. package/dist/angular/index.d.ts +1 -0
  3. package/dist/angular/index.js +669 -3
  4. package/dist/angular/voice-controller.service.d.ts +21 -0
  5. package/dist/audioConditioning.d.ts +3 -0
  6. package/dist/client/actions.d.ts +7 -0
  7. package/dist/client/connection.d.ts +5 -0
  8. package/dist/client/controller.d.ts +2 -0
  9. package/dist/client/htmxBootstrap.js +576 -167
  10. package/dist/client/index.d.ts +1 -0
  11. package/dist/client/index.js +486 -3
  12. package/dist/client/microphone.d.ts +4 -2
  13. package/dist/correction.d.ts +16 -0
  14. package/dist/index.d.ts +4 -0
  15. package/dist/index.js +1314 -283
  16. package/dist/presets.d.ts +13 -0
  17. package/dist/react/index.d.ts +1 -0
  18. package/dist/react/index.js +642 -3
  19. package/dist/react/useVoiceController.d.ts +20 -0
  20. package/dist/react/useVoiceStream.d.ts +1 -0
  21. package/dist/store.d.ts +2 -2
  22. package/dist/svelte/index.d.ts +1 -0
  23. package/dist/svelte/index.js +607 -3
  24. package/dist/testing/benchmark.d.ts +36 -0
  25. package/dist/testing/index.js +1453 -241
  26. package/dist/testing/sessionBenchmark.d.ts +67 -2
  27. package/dist/testing/stt.d.ts +1 -0
  28. package/dist/turnDetection.d.ts +5 -1
  29. package/dist/turnProfiles.d.ts +6 -0
  30. package/dist/types.d.ts +198 -8
  31. package/dist/vue/index.d.ts +1 -0
  32. package/dist/vue/index.js +660 -3
  33. package/dist/vue/useVoiceController.d.ts +19 -0
  34. package/fixtures/README.md +9 -0
  35. package/fixtures/manifest.json +59 -1
  36. package/fixtures/pcm/dialogue-three-clean.pcm +0 -0
  37. package/fixtures/pcm/dialogue-three-mixed.pcm +0 -0
  38. package/fixtures/pcm/dialogue-two-clean.pcm +0 -0
  39. package/fixtures/pcm/dialogue-two-noisy.pcm +0 -0
  40. package/package.json +21 -1
@@ -95,6 +95,61 @@ var measureAudioLevel = (audio) => {
95
95
  return Math.sqrt(sumSquares / samples.length);
96
96
  };
97
97
  var normalizeText = (value) => value.trim().replace(/\s+/g, " ");
98
+ var countWords = (value) => value.length > 0 ? value.split(" ").length : 0;
99
+ var selectPreferredTranscriptText = (currentText, nextText) => {
100
+ const current = normalizeText(currentText);
101
+ const next = normalizeText(nextText);
102
+ if (!current) {
103
+ return next;
104
+ }
105
+ if (!next) {
106
+ return current;
107
+ }
108
+ if (current === next || current.includes(next)) {
109
+ return current;
110
+ }
111
+ if (next.includes(current)) {
112
+ return next;
113
+ }
114
+ if (countWords(next) > countWords(current)) {
115
+ return next;
116
+ }
117
+ return current;
118
+ };
119
+ var mergeSequentialTranscriptText = (currentText, nextText) => {
120
+ const current = normalizeText(currentText);
121
+ const next = normalizeText(nextText);
122
+ if (!current) {
123
+ return next;
124
+ }
125
+ if (!next) {
126
+ return current;
127
+ }
128
+ const currentWords = current.split(" ");
129
+ const nextWords = next.split(" ");
130
+ const maxOverlap = Math.min(currentWords.length, nextWords.length);
131
+ for (let overlap = maxOverlap;overlap > 0; overlap -= 1) {
132
+ const currentSuffix = currentWords.slice(-overlap).join(" ");
133
+ const nextPrefix = nextWords.slice(0, overlap).join(" ");
134
+ if (currentSuffix === nextPrefix) {
135
+ return [...currentWords, ...nextWords.slice(overlap)].join(" ");
136
+ }
137
+ }
138
+ return `${current} ${next}`.trim();
139
+ };
140
+ var countCommonPrefixWords = (currentText, nextText) => {
141
+ const currentWords = normalizeText(currentText).split(" ").filter(Boolean);
142
+ const nextWords = normalizeText(nextText).split(" ").filter(Boolean);
143
+ const maxWords = Math.min(currentWords.length, nextWords.length);
144
+ let count = 0;
145
+ for (let index = 0;index < maxWords; index += 1) {
146
+ if (currentWords[index] !== nextWords[index]) {
147
+ break;
148
+ }
149
+ count += 1;
150
+ }
151
+ return count;
152
+ };
98
153
  var mergeTranscriptTexts = (transcripts) => {
99
154
  const merged = [];
100
155
  for (const transcript of transcripts) {
@@ -118,12 +173,14 @@ var mergeTranscriptTexts = (transcripts) => {
118
173
  }
119
174
  return merged.join(" ").trim();
120
175
  };
121
- var buildTurnText = (transcripts, partialText) => {
176
+ var buildTurnText = (transcripts, partialText, options = {}) => {
122
177
  const finalText = mergeTranscriptTexts(transcripts);
123
- if (finalText) {
124
- return finalText;
178
+ const nextPartial = normalizeText(partialText);
179
+ const lastFinalEndedAtMs = [...transcripts].reverse().find((transcript) => typeof transcript.endedAtMs === "number")?.endedAtMs;
180
+ if (finalText && nextPartial && typeof lastFinalEndedAtMs === "number" && typeof options.partialStartedAtMs === "number" && options.partialStartedAtMs - lastFinalEndedAtMs >= 250 && countCommonPrefixWords(finalText, nextPartial) === 0) {
181
+ return mergeSequentialTranscriptText(finalText, nextPartial);
125
182
  }
126
- return normalizeText(partialText);
183
+ return selectPreferredTranscriptText(finalText, nextPartial);
127
184
  };
128
185
 
129
186
  // src/testing/accuracy.ts
@@ -204,6 +261,7 @@ var runSTTAdapterFixture = async (adapter, fixture, options = {}) => {
204
261
  const settleMs = options.settleMs ?? 500;
205
262
  const waitForRealtimeMs = options.waitForRealtimeMs ?? 0;
206
263
  let lastActivityAt = Date.now();
264
+ let speechEndedAt = startedAt;
207
265
  const markActive = () => {
208
266
  lastActivityAt = Date.now();
209
267
  };
@@ -240,12 +298,15 @@ var runSTTAdapterFixture = async (adapter, fixture, options = {}) => {
240
298
  const realtimeDelayMs = waitForRealtimeMs > 0 ? waitForRealtimeMs : chunkDurationMs;
241
299
  for (const chunk of chunks) {
242
300
  await session.send(chunk);
301
+ markActive();
243
302
  await Bun.sleep(realtimeDelayMs);
244
303
  }
304
+ speechEndedAt = Date.now();
245
305
  if (tailPaddingMs > 0) {
246
306
  const tailBytes = Math.max(2, Math.floor(bytesPerMillisecond * tailPaddingMs));
247
307
  for (const chunk of chunkAudio(createSilence(tailBytes), bytesPerChunk)) {
248
308
  await session.send(chunk);
309
+ markActive();
249
310
  await Bun.sleep(realtimeDelayMs);
250
311
  }
251
312
  }
@@ -265,11 +326,30 @@ var runSTTAdapterFixture = async (adapter, fixture, options = {}) => {
265
326
  finalEvents,
266
327
  finalText,
267
328
  partialEvents,
329
+ speechEndedAt,
268
330
  startedAt
269
331
  };
270
332
  };
271
333
 
272
334
  // src/testing/benchmark.ts
335
+ var resolveFixtureEnvironment = (fixture) => {
336
+ const tags = new Set(fixture.tags ?? []);
337
+ const hasAccent = tags.has("accent") || tags.has("speech-accent-archive");
338
+ const hasNoisy = tags.has("noisy") || tags.has("synthetic-noise") || tags.has("stress");
339
+ if (hasAccent && hasNoisy) {
340
+ return "accent-noisy";
341
+ }
342
+ if (hasAccent) {
343
+ return "accent";
344
+ }
345
+ if (hasNoisy) {
346
+ return "noisy";
347
+ }
348
+ if (tags.has("clean")) {
349
+ return "clean";
350
+ }
351
+ return "other";
352
+ };
273
353
  var normalizeBenchmarkText = (value) => value.toLowerCase().replace(/[^\p{L}\p{N}\s']/gu, " ").replace(/\s+/g, " ").trim();
274
354
  var scoreExpectedTerms = (actualText, expectedTerms) => {
275
355
  const normalizedActual = normalizeBenchmarkText(actualText);
@@ -300,10 +380,46 @@ var roundMetric = (value, digits = 4) => {
300
380
  const factor = 10 ** digits;
301
381
  return Math.round(value * factor) / factor;
302
382
  };
383
+ var calculateGroupSummary = (fixtures) => {
384
+ const grouped = new Map;
385
+ for (const fixture of fixtures) {
386
+ const existing = grouped.get(fixture.group) ?? [];
387
+ existing.push(fixture);
388
+ grouped.set(fixture.group, existing);
389
+ }
390
+ return Array.from(grouped.entries()).map(([group, results]) => {
391
+ const fixtureCount = results.length;
392
+ const passCount = results.filter((fixture) => fixture.passes).length;
393
+ const averageWordErrorRate = average(results.map((result) => result.accuracy.wordErrorRate)) ?? 0;
394
+ const averageTermRecall = average(results.map((result) => result.expectedTerms.recall)) ?? 0;
395
+ const averageElapsedMs = average(results.map((result) => result.elapsedMs));
396
+ const accuracy = 1 - averageWordErrorRate;
397
+ return {
398
+ averageElapsedMs: roundMetric(averageElapsedMs, 2) ?? 0,
399
+ averageTermRecall: roundMetric(averageTermRecall) ?? 0,
400
+ averageWordErrorRate: roundMetric(averageWordErrorRate) ?? 0,
401
+ fixturesWithErrors: results.filter((fixture) => fixture.errorCount > 0).length,
402
+ fixturesWithFragments: results.filter((fixture) => fixture.fragmentationCount > 0).length,
403
+ fixtureCount,
404
+ group,
405
+ passCount,
406
+ passRate: fixtureCount > 0 ? roundMetric(passCount / fixtureCount) ?? 0 : 0,
407
+ wordAccuracyRate: roundMetric(accuracy) ?? 0
408
+ };
409
+ }).sort((a, b) => a.group.localeCompare(b.group));
410
+ };
303
411
  var toFixtureBenchmarkResult = (fixture, result, elapsedMs) => {
412
+ const toPostSpeechLatency = (timestamp) => {
413
+ if (typeof timestamp !== "number") {
414
+ return;
415
+ }
416
+ return Math.max(0, timestamp - result.speechEndedAt);
417
+ };
304
418
  const timeToFirstPartialMs = result.partialEvents[0] ? result.partialEvents[0].receivedAt - result.startedAt : undefined;
305
419
  const timeToFirstFinalMs = result.finalEvents[0] ? result.finalEvents[0].receivedAt - result.startedAt : undefined;
306
420
  const timeToEndOfTurnMs = result.endOfTurnEvents[0] ? result.endOfTurnEvents[0].receivedAt - result.startedAt : undefined;
421
+ const postSpeechTimeToFirstFinalMs = toPostSpeechLatency(result.finalEvents[0]?.receivedAt);
422
+ const postSpeechTimeToEndOfTurnMs = toPostSpeechLatency(result.endOfTurnEvents[0]?.receivedAt);
307
423
  const expectedTerms = scoreExpectedTerms(result.finalText, fixture.expectedTerms);
308
424
  return {
309
425
  accuracy: result.accuracy,
@@ -317,8 +433,11 @@ var toFixtureBenchmarkResult = (fixture, result, elapsedMs) => {
317
433
  finalText: result.finalText,
318
434
  fixtureId: fixture.id,
319
435
  fragmentationCount: Math.max(0, result.finalEvents.length - 1),
436
+ group: resolveFixtureEnvironment(fixture),
320
437
  passes: result.errorEvents.length === 0 && result.finalText.trim().length > 0 && result.accuracy.passesThreshold,
321
438
  partialCount: result.partialEvents.length,
439
+ postSpeechTimeToEndOfTurnMs,
440
+ postSpeechTimeToFirstFinalMs,
322
441
  tags: fixture.tags ?? [],
323
442
  timeToEndOfTurnMs,
324
443
  timeToFirstFinalMs,
@@ -336,6 +455,8 @@ var summarizeSTTBenchmark = (adapterId, fixtures) => {
336
455
  averageEndOfTurnCount: roundMetric(average(fixtures.map((fixture) => fixture.endOfTurnCount)), 2) ?? 0,
337
456
  averageFinalCount: roundMetric(average(fixtures.map((fixture) => fixture.finalCount)), 2) ?? 0,
338
457
  averageTermRecall: roundMetric(average(fixtures.map((fixture) => fixture.expectedTerms.recall))) ?? 0,
458
+ averagePostSpeechTimeToEndOfTurnMs: roundMetric(average(fixtures.map((fixture) => fixture.postSpeechTimeToEndOfTurnMs)), 2),
459
+ averagePostSpeechTimeToFirstFinalMs: roundMetric(average(fixtures.map((fixture) => fixture.postSpeechTimeToFirstFinalMs)), 2),
339
460
  averageTimeToEndOfTurnMs: roundMetric(average(fixtures.map((fixture) => fixture.timeToEndOfTurnMs)), 2),
340
461
  averageTimeToFirstFinalMs: roundMetric(average(fixtures.map((fixture) => fixture.timeToFirstFinalMs)), 2),
341
462
  averageTimeToFirstPartialMs: roundMetric(average(fixtures.map((fixture) => fixture.timeToFirstPartialMs)), 2),
@@ -343,12 +464,51 @@ var summarizeSTTBenchmark = (adapterId, fixtures) => {
343
464
  fixtureCount,
344
465
  fixturesWithErrors: fixtures.filter((fixture) => fixture.errorCount > 0).length,
345
466
  fixturesWithFragmentation: fixtures.filter((fixture) => fixture.fragmentationCount > 0).length,
467
+ groupSummaries: calculateGroupSummary(fixtures),
346
468
  passCount,
347
469
  passRate: fixtureCount > 0 ? roundMetric(passCount / fixtureCount) ?? 0 : 0,
348
470
  totalErrorCount: fixtures.reduce((sum, fixture) => sum + fixture.errorCount, 0),
349
471
  wordAccuracyRate: fixtureCount > 0 ? roundMetric(1 - (average(fixtures.map((fixture) => fixture.accuracy.wordErrorRate)) ?? 0)) ?? 0 : 0
350
472
  };
351
473
  };
474
+ var evaluateSTTBenchmarkAcceptance = (report, thresholds = {}) => {
475
+ const failures = [];
476
+ const details = thresholds;
477
+ const overallPassRate = details.overallPassRate;
478
+ if (overallPassRate !== undefined && report.summary.passRate < overallPassRate) {
479
+ failures.push(`overall passRate ${(report.summary.passRate * 100).toFixed(2)}% below ${(overallPassRate * 100).toFixed(2)}%`);
480
+ }
481
+ const minTermRecall = details.termRecall;
482
+ if (minTermRecall !== undefined && report.summary.averageTermRecall < minTermRecall) {
483
+ failures.push(`overall term recall ${report.summary.averageTermRecall.toFixed(4)} below ${minTermRecall.toFixed(4)}`);
484
+ }
485
+ const minWordAccuracy = details.wordAccuracyRate;
486
+ if (minWordAccuracy !== undefined && report.summary.wordAccuracyRate < minWordAccuracy) {
487
+ failures.push(`overall word accuracy ${(report.summary.wordAccuracyRate * 100).toFixed(2)}% below ${(minWordAccuracy * 100).toFixed(2)}%`);
488
+ }
489
+ const groupThresholds = details.groupPassRate;
490
+ if (groupThresholds) {
491
+ for (const groupSummary of report.summary.groupSummaries) {
492
+ const threshold = groupThresholds[groupSummary.group];
493
+ if (!threshold) {
494
+ continue;
495
+ }
496
+ if (threshold.passRate !== undefined && groupSummary.passRate < threshold.passRate) {
497
+ failures.push(`${groupSummary.group} passRate ${(groupSummary.passRate * 100).toFixed(2)}% below ${(threshold.passRate * 100).toFixed(2)}%`);
498
+ }
499
+ if (threshold.wordAccuracyRate !== undefined && groupSummary.wordAccuracyRate < threshold.wordAccuracyRate) {
500
+ failures.push(`${groupSummary.group} wordAccuracy ${(groupSummary.wordAccuracyRate * 100).toFixed(2)}% below ${(threshold.wordAccuracyRate * 100).toFixed(2)}%`);
501
+ }
502
+ }
503
+ }
504
+ const score = roundMetric(report.summary.passRate * 0.45 + report.summary.wordAccuracyRate * 0.35 + report.summary.averageTermRecall * 0.2, 3) ?? 0;
505
+ return {
506
+ adapterId: report.adapterId,
507
+ failures,
508
+ passed: failures.length === 0,
509
+ score
510
+ };
511
+ };
352
512
  var compareSTTBenchmarks = (reports) => {
353
513
  const entries = reports.map((report) => ({
354
514
  adapterId: report.adapterId,
@@ -436,22 +596,34 @@ var loadVoiceTestFixtures = async (fixtureDirectory) => {
436
596
  };
437
597
  // src/store.ts
438
598
  var createId = () => crypto.randomUUID();
439
- var createVoiceSessionRecord = (id) => ({
599
+ var createVoiceSessionRecord = (id, scenarioId) => ({
440
600
  committedTurnIds: [],
441
601
  createdAt: Date.now(),
442
602
  currentTurn: {
443
603
  finalText: "",
604
+ lastSpeechAt: undefined,
605
+ lastTranscriptAt: undefined,
606
+ partialEndedAt: undefined,
607
+ partialStartedAt: undefined,
444
608
  partialText: "",
609
+ silenceStartedAt: undefined,
445
610
  transcripts: []
446
611
  },
447
612
  id,
613
+ scenarioId,
448
614
  reconnect: { attempts: 0 },
449
615
  status: "active",
450
616
  transcripts: [],
451
- turns: []
617
+ turns: [],
618
+ lastCommittedTurn: {
619
+ committedAt: 0,
620
+ signature: "",
621
+ text: "",
622
+ transcriptIds: []
623
+ }
452
624
  });
453
- var resetVoiceSessionRecord = (id, existing) => ({
454
- ...createVoiceSessionRecord(id),
625
+ var resetVoiceSessionRecord = (id, existing, scenarioId) => ({
626
+ ...createVoiceSessionRecord(id, scenarioId),
455
627
  metadata: existing?.metadata
456
628
  });
457
629
  var toVoiceSessionSummary = (session) => ({
@@ -484,6 +656,61 @@ var createVoiceMemoryStore = () => {
484
656
  return { get, getOrCreate, list, remove, set };
485
657
  };
486
658
 
659
+ // src/audioConditioning.ts
660
+ var DEFAULT_TARGET_LEVEL = 0.08;
661
+ var DEFAULT_MAX_GAIN = 3;
662
+ var DEFAULT_NOISE_GATE_THRESHOLD = 0.006;
663
+ var DEFAULT_NOISE_GATE_ATTENUATION = 0.15;
664
+ var toInt16Array = (audio) => {
665
+ if (audio instanceof ArrayBuffer) {
666
+ return new Int16Array(audio, 0, Math.floor(audio.byteLength / 2));
667
+ }
668
+ return new Int16Array(audio.buffer, audio.byteOffset, Math.floor(audio.byteLength / 2));
669
+ };
670
+ var computeRms = (samples) => {
671
+ if (samples.length === 0) {
672
+ return 0;
673
+ }
674
+ let sumSquares = 0;
675
+ for (const sample of samples) {
676
+ const normalized = sample / 32768;
677
+ sumSquares += normalized * normalized;
678
+ }
679
+ return Math.sqrt(sumSquares / samples.length);
680
+ };
681
+ var resolveAudioConditioningConfig = (config) => {
682
+ if (!config || config.enabled === false) {
683
+ return;
684
+ }
685
+ return {
686
+ enabled: true,
687
+ maxGain: config.maxGain ?? DEFAULT_MAX_GAIN,
688
+ noiseGateAttenuation: config.noiseGateAttenuation ?? DEFAULT_NOISE_GATE_ATTENUATION,
689
+ noiseGateThreshold: config.noiseGateThreshold ?? DEFAULT_NOISE_GATE_THRESHOLD,
690
+ targetLevel: config.targetLevel ?? DEFAULT_TARGET_LEVEL
691
+ };
692
+ };
693
+ var conditionAudioChunk = (audio, config) => {
694
+ if (!config) {
695
+ return audio;
696
+ }
697
+ const source = toInt16Array(audio);
698
+ if (source.length === 0) {
699
+ return audio;
700
+ }
701
+ const rms = computeRms(source);
702
+ const output = new Int16Array(source.length);
703
+ const gateFactor = rms < config.noiseGateThreshold ? config.noiseGateAttenuation : 1;
704
+ const baseLevel = Math.max(rms * gateFactor, 0.000001);
705
+ const gain = Math.min(config.maxGain, config.targetLevel / baseLevel);
706
+ const appliedGain = Math.max(0.25, gain) * gateFactor;
707
+ for (let index = 0;index < source.length; index += 1) {
708
+ const next = Math.round(source[index] * appliedGain);
709
+ output[index] = Math.max(-32768, Math.min(32767, next));
710
+ }
711
+ return new Uint8Array(output.buffer);
712
+ };
713
+
487
714
  // src/logger.ts
488
715
  var noop = () => {};
489
716
  var createNoopLogger = () => ({
@@ -500,13 +727,128 @@ var resolveLogger = (logger) => ({
500
727
  // src/session.ts
501
728
  var DEFAULT_RECONNECT_TIMEOUT = 30000;
502
729
  var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
730
+ var DEFAULT_TRANSCRIPT_STABILITY_MS = 450;
731
+ var DEFAULT_FALLBACK_REPLAY_MS = 8000;
732
+ var DEFAULT_FALLBACK_SETTLE_MS = 220;
733
+ var DEFAULT_FALLBACK_COMPLETION_TIMEOUT_MS = 2500;
734
+ var DEFAULT_FALLBACK_CONFIDENCE_THRESHOLD = 0.6;
735
+ var DEFAULT_FALLBACK_MIN_TEXT_LENGTH = 2;
736
+ var DEFAULT_FALLBACK_MAX_ATTEMPTS_PER_TURN = 1;
737
+ var DEFAULT_DUPLICATE_TURN_WINDOW_MS = 5000;
738
+ var FALLBACK_CONFIDENCE_SELECTION_DELTA = 0.05;
739
+ var FALLBACK_WORD_COUNT_SELECTION_MARGIN_RATIO = 0.12;
740
+ var DEFAULT_FORMAT = {
741
+ channels: 1,
742
+ container: "raw",
743
+ encoding: "pcm_s16le",
744
+ sampleRateHz: 16000
745
+ };
503
746
  var toError = (value) => value instanceof Error ? value : new Error(String(value));
504
747
  var createEmptyCurrentTurn = () => ({
505
748
  finalText: "",
749
+ lastSpeechAt: undefined,
750
+ lastTranscriptAt: undefined,
751
+ partialEndedAt: undefined,
752
+ partialStartedAt: undefined,
506
753
  partialText: "",
754
+ silenceStartedAt: undefined,
507
755
  transcripts: []
508
756
  });
509
757
  var cloneTranscript = (transcript) => ({ ...transcript });
758
+ var countWords2 = (text) => text.trim().split(/\s+/).filter(Boolean).length;
759
+ var normalizeText2 = (text) => text.trim().replace(/\s+/g, " ");
760
+ var getAudioChunkDurationMs = (chunk) => chunk.byteLength / (DEFAULT_FORMAT.sampleRateHz * DEFAULT_FORMAT.channels * 2) * 1000;
761
+ var getBufferedAudioDurationMs = (chunks) => chunks.reduce((total, chunk) => total + getAudioChunkDurationMs(chunk), 0);
762
+ var calculateMeanConfidence = (transcripts) => {
763
+ let sum = 0;
764
+ let total = 0;
765
+ for (const transcript of transcripts) {
766
+ if (typeof transcript.confidence === "number") {
767
+ sum += transcript.confidence;
768
+ total += 1;
769
+ }
770
+ }
771
+ if (total === 0) {
772
+ return 0;
773
+ }
774
+ return sum / total;
775
+ };
776
+ var createTurnQuality = (transcripts, source, fallbackUsed, fallbackDiagnostics, correctionDiagnostics) => {
777
+ const sampledTranscripts = transcripts.filter((transcript) => typeof transcript.confidence === "number");
778
+ const confidenceSampleCount = sampledTranscripts.length;
779
+ return {
780
+ averageConfidence: confidenceSampleCount > 0 ? sampledTranscripts.reduce((sum, transcript) => sum + transcript.confidence, 0) / confidenceSampleCount : undefined,
781
+ confidenceSampleCount,
782
+ correction: correctionDiagnostics,
783
+ fallback: fallbackDiagnostics,
784
+ fallbackUsed,
785
+ finalTranscriptCount: transcripts.filter((transcript) => transcript.isFinal).length,
786
+ partialTranscriptCount: transcripts.filter((transcript) => !transcript.isFinal).length,
787
+ selectedTranscriptCount: transcripts.length,
788
+ source
789
+ };
790
+ };
791
+ var normalizeCorrectionText = (text) => normalizeText2(text);
792
+ var isFallbackNeeded = (candidate, config) => {
793
+ const trimmed = normalizeText2(candidate.text);
794
+ const wordCount = countWords2(trimmed);
795
+ if (config.trigger === "always") {
796
+ return true;
797
+ }
798
+ if (config.trigger === "empty-turn") {
799
+ return wordCount < config.minTextLength;
800
+ }
801
+ const averageConfidence = calculateMeanConfidence(candidate.transcripts);
802
+ if (config.trigger === "low-confidence") {
803
+ return averageConfidence > 0 && averageConfidence < config.confidenceThreshold;
804
+ }
805
+ return averageConfidence > 0 && averageConfidence < config.confidenceThreshold || wordCount < config.minTextLength;
806
+ };
807
+ var selectBetterTurnText = (candidate, fallback) => {
808
+ if (!fallback.text) {
809
+ return {
810
+ reason: "fallback-empty",
811
+ winner: candidate
812
+ };
813
+ }
814
+ if (!candidate.text) {
815
+ return {
816
+ reason: "primary-empty",
817
+ winner: fallback
818
+ };
819
+ }
820
+ const largestWordCount = Math.max(candidate.wordCount, fallback.wordCount, 1);
821
+ const wordCountDelta = fallback.wordCount - candidate.wordCount;
822
+ const wordCountDeltaRatio = Math.abs(wordCountDelta) / largestWordCount;
823
+ if (wordCountDeltaRatio >= FALLBACK_WORD_COUNT_SELECTION_MARGIN_RATIO && wordCountDelta !== 0) {
824
+ return {
825
+ reason: "word-count-margin",
826
+ winner: wordCountDelta > 0 ? fallback : candidate
827
+ };
828
+ }
829
+ if (fallback.confidence > candidate.confidence + FALLBACK_CONFIDENCE_SELECTION_DELTA) {
830
+ return {
831
+ reason: "confidence-margin",
832
+ winner: fallback
833
+ };
834
+ }
835
+ if (candidate.confidence > fallback.confidence + FALLBACK_CONFIDENCE_SELECTION_DELTA) {
836
+ return {
837
+ reason: "kept-primary",
838
+ winner: candidate
839
+ };
840
+ }
841
+ if (fallback.wordCount > candidate.wordCount) {
842
+ return {
843
+ reason: "word-count-tiebreak",
844
+ winner: fallback
845
+ };
846
+ }
847
+ return {
848
+ reason: "kept-primary",
849
+ winner: candidate
850
+ };
851
+ };
510
852
  var setTurnResult = (session, turnId, input) => {
511
853
  session.turns = session.turns.map((turn) => turn.id === turnId ? {
512
854
  ...turn,
@@ -523,12 +865,55 @@ var createVoiceSession = (options) => {
523
865
  };
524
866
  const turnDetection = {
525
867
  silenceMs: options.turnDetection.silenceMs ?? DEFAULT_SILENCE_MS,
526
- speechThreshold: options.turnDetection.speechThreshold ?? DEFAULT_SPEECH_THRESHOLD
868
+ speechThreshold: options.turnDetection.speechThreshold ?? DEFAULT_SPEECH_THRESHOLD,
869
+ transcriptStabilityMs: options.turnDetection.transcriptStabilityMs ?? DEFAULT_TRANSCRIPT_STABILITY_MS
527
870
  };
871
+ const sttFallback = options.sttFallback ? {
872
+ adapter: options.sttFallback.adapter,
873
+ completionTimeoutMs: options.sttFallback.completionTimeoutMs ?? DEFAULT_FALLBACK_COMPLETION_TIMEOUT_MS,
874
+ confidenceThreshold: options.sttFallback.confidenceThreshold ?? DEFAULT_FALLBACK_CONFIDENCE_THRESHOLD,
875
+ maxAttemptsPerTurn: options.sttFallback.maxAttemptsPerTurn ?? DEFAULT_FALLBACK_MAX_ATTEMPTS_PER_TURN,
876
+ minTextLength: options.sttFallback.minTextLength ?? DEFAULT_FALLBACK_MIN_TEXT_LENGTH,
877
+ replayWindowMs: options.sttFallback.replayWindowMs ?? DEFAULT_FALLBACK_REPLAY_MS,
878
+ settleMs: options.sttFallback.settleMs ?? DEFAULT_FALLBACK_SETTLE_MS,
879
+ trigger: options.sttFallback.trigger ?? "empty-or-low-confidence"
880
+ } : undefined;
881
+ const phraseHints = options.phraseHints ?? [];
528
882
  let socket = options.socket;
529
883
  let sttSession = null;
530
884
  let silenceTimer = null;
531
885
  let speechDetected = false;
886
+ let operationQueue = Promise.resolve();
887
+ let adapterGenerationCounter = 0;
888
+ let activeAdapterGeneration = 0;
889
+ const currentTurnAudio = [];
890
+ let fallbackAttemptsForCurrentTurn = 0;
891
+ const pruneTurnAudio = () => {
892
+ const replayWindowMs = sttFallback?.replayWindowMs ?? DEFAULT_FALLBACK_REPLAY_MS;
893
+ const cutoffAt = Date.now() - replayWindowMs;
894
+ let index = 0;
895
+ while (index < currentTurnAudio.length && currentTurnAudio[index].recordedAt < cutoffAt) {
896
+ index += 1;
897
+ }
898
+ if (index > 0) {
899
+ currentTurnAudio.splice(0, index);
900
+ }
901
+ };
902
+ const pushTurnAudio = (audio) => {
903
+ const chunk = audio instanceof ArrayBuffer ? new Uint8Array(audio.slice(0)) : new Uint8Array(audio.buffer.slice(audio.byteOffset, audio.byteOffset + audio.byteLength));
904
+ currentTurnAudio.push({
905
+ chunk,
906
+ recordedAt: Date.now()
907
+ });
908
+ pruneTurnAudio();
909
+ };
910
+ const getFallbackWindowAudio = () => {
911
+ if (!sttFallback?.adapter) {
912
+ return [];
913
+ }
914
+ pruneTurnAudio();
915
+ return currentTurnAudio.map((audio) => audio.chunk);
916
+ };
532
917
  const clearSilenceTimer = () => {
533
918
  if (!silenceTimer) {
534
919
  return;
@@ -554,12 +939,28 @@ var createVoiceSession = (options) => {
554
939
  await options.store.set(options.id, session);
555
940
  return session;
556
941
  };
942
+ const runSerial = (phase, operation) => {
943
+ const result = operationQueue.then(async () => {
944
+ logger.debug("voice session operation", {
945
+ phase,
946
+ sessionId: options.id
947
+ });
948
+ return await operation();
949
+ });
950
+ operationQueue = result.then(() => {
951
+ return;
952
+ }, () => {
953
+ return;
954
+ });
955
+ return result;
956
+ };
557
957
  const closeAdapter = async (reason) => {
558
958
  if (!sttSession) {
559
959
  return;
560
960
  }
561
961
  const activeSession = sttSession;
562
962
  sttSession = null;
963
+ activeAdapterGeneration = 0;
563
964
  try {
564
965
  await activeSession.close(reason);
565
966
  } catch (error) {
@@ -569,13 +970,87 @@ var createVoiceSession = (options) => {
569
970
  });
570
971
  }
571
972
  };
572
- const scheduleSilenceCommit = () => {
573
- if (silenceTimer) {
973
+ const scheduleTurnCommit = (delayMs, reason, reset = true) => {
974
+ if (!reset && silenceTimer) {
574
975
  return;
575
976
  }
977
+ if (reset) {
978
+ clearSilenceTimer();
979
+ }
576
980
  silenceTimer = setTimeout(() => {
577
- api.commitTurn("silence");
578
- }, turnDetection.silenceMs);
981
+ silenceTimer = null;
982
+ api.commitTurn(reason);
983
+ }, delayMs);
984
+ };
985
+ const scheduleSilenceCommit = (delayMs = turnDetection.silenceMs, reset = true) => scheduleTurnCommit(delayMs, "silence", reset);
986
+ const requestTurnCommit = async (reason) => {
987
+ const session = await readSession();
988
+ const text = buildTurnText(session.currentTurn.transcripts, session.currentTurn.partialText, {
989
+ partialEndedAtMs: session.currentTurn.partialEndedAt,
990
+ partialStartedAtMs: session.currentTurn.partialStartedAt
991
+ });
992
+ if (!text) {
993
+ return;
994
+ }
995
+ const transcriptStabilityAge = session.currentTurn.lastTranscriptAt !== undefined ? Date.now() - session.currentTurn.lastTranscriptAt : undefined;
996
+ if (reason !== "manual" && typeof transcriptStabilityAge === "number" && transcriptStabilityAge < turnDetection.transcriptStabilityMs) {
997
+ scheduleTurnCommit(turnDetection.transcriptStabilityMs - transcriptStabilityAge, reason);
998
+ return;
999
+ }
1000
+ await commitTurnInternal(reason);
1001
+ };
1002
+ const failInternal = async (error) => {
1003
+ clearSilenceTimer();
1004
+ const session = await writeSession((currentSession) => {
1005
+ currentSession.lastActivityAt = Date.now();
1006
+ currentSession.status = "failed";
1007
+ });
1008
+ const resolvedError = toError(error);
1009
+ await send({
1010
+ message: resolvedError.message,
1011
+ recoverable: false,
1012
+ type: "error"
1013
+ });
1014
+ await closeAdapter("failed");
1015
+ speechDetected = false;
1016
+ rewindFallbackTurnAudio();
1017
+ await options.route.onError?.({
1018
+ api,
1019
+ context: options.context,
1020
+ error: resolvedError,
1021
+ session,
1022
+ sessionId: options.id
1023
+ });
1024
+ };
1025
+ const completeInternal = async (result) => {
1026
+ clearSilenceTimer();
1027
+ const session = await writeSession((currentSession) => {
1028
+ if (currentSession.status === "completed") {
1029
+ return;
1030
+ }
1031
+ currentSession.lastActivityAt = Date.now();
1032
+ currentSession.status = "completed";
1033
+ if (result !== undefined && currentSession.turns.length > 0) {
1034
+ const lastTurn = currentSession.turns.at(-1);
1035
+ if (lastTurn) {
1036
+ setTurnResult(currentSession, lastTurn.id, {
1037
+ result
1038
+ });
1039
+ }
1040
+ }
1041
+ });
1042
+ await send({
1043
+ sessionId: options.id,
1044
+ type: "complete"
1045
+ });
1046
+ await closeAdapter("complete");
1047
+ speechDetected = false;
1048
+ rewindFallbackTurnAudio();
1049
+ await options.route.onComplete({
1050
+ api,
1051
+ context: options.context,
1052
+ session
1053
+ });
579
1054
  };
580
1055
  const handleError = async (event) => {
581
1056
  await send({
@@ -584,18 +1059,273 @@ var createVoiceSession = (options) => {
584
1059
  type: "error"
585
1060
  });
586
1061
  if (!event.recoverable) {
587
- await api.fail(event.error);
1062
+ await failInternal(event.error);
588
1063
  }
589
1064
  };
590
1065
  const handleClose = async (event) => {
591
1066
  if (event.recoverable === false) {
592
- await api.fail(new Error(event.reason ?? "Speech-to-text session closed"));
1067
+ await failInternal(new Error(event.reason ?? "Speech-to-text session closed"));
1068
+ return;
1069
+ }
1070
+ if (!event.reason) {
1071
+ await closeAdapter("provider stream closed");
1072
+ return;
1073
+ }
1074
+ await closeAdapter(event.reason);
1075
+ };
1076
+ const rewindFallbackTurnAudio = () => {
1077
+ fallbackAttemptsForCurrentTurn = 0;
1078
+ currentTurnAudio.length = 0;
1079
+ };
1080
+ const runFallbackTranscription = async (primaryText, primaryTranscripts) => {
1081
+ if (!sttFallback?.adapter || fallbackAttemptsForCurrentTurn >= sttFallback.maxAttemptsPerTurn) {
1082
+ return null;
1083
+ }
1084
+ const candidate = {
1085
+ text: primaryText,
1086
+ transcripts: primaryTranscripts
1087
+ };
1088
+ if (!isFallbackNeeded(candidate, sttFallback)) {
1089
+ return null;
1090
+ }
1091
+ fallbackAttemptsForCurrentTurn += 1;
1092
+ const replayAudio = getFallbackWindowAudio();
1093
+ if (replayAudio.length === 0) {
1094
+ return null;
1095
+ }
1096
+ let fallbackSession = null;
1097
+ const fallbackTranscripts = [];
1098
+ let fallbackClosed = false;
1099
+ let fallbackEndOfTurnReceived = false;
1100
+ let fallbackFinalReceived = false;
1101
+ let lastFallbackTranscriptAt = 0;
1102
+ try {
1103
+ fallbackSession = await sttFallback.adapter.open({
1104
+ format: DEFAULT_FORMAT,
1105
+ phraseHints,
1106
+ sessionId: `${options.id}:fallback:${fallbackAttemptsForCurrentTurn}`
1107
+ });
1108
+ } catch (error) {
1109
+ logger.warn("voice stt fallback open failed", {
1110
+ error: toError(error).message,
1111
+ sessionId: options.id
1112
+ });
1113
+ return null;
1114
+ }
1115
+ const unsubscribers = [
1116
+ fallbackSession.on("final", ({ transcript }) => {
1117
+ fallbackFinalReceived = true;
1118
+ lastFallbackTranscriptAt = Date.now();
1119
+ fallbackTranscripts.push(cloneTranscript(transcript));
1120
+ }),
1121
+ fallbackSession.on("partial", ({ transcript }) => {
1122
+ lastFallbackTranscriptAt = Date.now();
1123
+ fallbackTranscripts.push(cloneTranscript(transcript));
1124
+ }),
1125
+ fallbackSession.on("endOfTurn", () => {
1126
+ fallbackEndOfTurnReceived = true;
1127
+ }),
1128
+ fallbackSession.on("error", (event) => {
1129
+ logger.warn("voice stt fallback error", {
1130
+ error: toError(event.error).message,
1131
+ sessionId: options.id
1132
+ });
1133
+ }),
1134
+ fallbackSession.on("close", () => {
1135
+ fallbackClosed = true;
1136
+ })
1137
+ ];
1138
+ const closeFallback = async (reason) => {
1139
+ if (!fallbackSession) {
1140
+ return;
1141
+ }
1142
+ try {
1143
+ await fallbackSession.close(reason);
1144
+ } catch (error) {
1145
+ logger.warn("voice stt fallback close failed", {
1146
+ error: toError(error).message,
1147
+ sessionId: options.id
1148
+ });
1149
+ } finally {
1150
+ fallbackSession = null;
1151
+ }
1152
+ };
1153
+ try {
1154
+ for (const chunk of replayAudio) {
1155
+ await fallbackSession.send(chunk);
1156
+ }
1157
+ const replayDurationMs = getBufferedAudioDurationMs(replayAudio);
1158
+ const completionTimeoutMs = Math.max(sttFallback.completionTimeoutMs, Math.min(4000, Math.max(sttFallback.settleMs * 4, Math.round(replayDurationMs * 0.18))));
1159
+ const waitStartedAt = Date.now();
1160
+ while (Date.now() - waitStartedAt < completionTimeoutMs) {
1161
+ const idleMs = lastFallbackTranscriptAt > 0 ? Date.now() - lastFallbackTranscriptAt : Date.now() - waitStartedAt;
1162
+ if (fallbackEndOfTurnReceived && idleMs >= sttFallback.settleMs) {
1163
+ break;
1164
+ }
1165
+ if (fallbackFinalReceived && idleMs >= sttFallback.settleMs) {
1166
+ break;
1167
+ }
1168
+ if (fallbackClosed && (lastFallbackTranscriptAt === 0 || idleMs >= sttFallback.settleMs)) {
1169
+ break;
1170
+ }
1171
+ await Bun.sleep(Math.min(75, Math.max(25, sttFallback.settleMs / 2)));
1172
+ }
1173
+ } catch (error) {
1174
+ logger.warn("voice stt fallback failed", {
1175
+ error: toError(error).message,
1176
+ sessionId: options.id
1177
+ });
1178
+ } finally {
1179
+ await closeFallback("fallback-complete");
1180
+ for (const unsubscribe of unsubscribers) {
1181
+ unsubscribe();
1182
+ }
1183
+ }
1184
+ if (fallbackTranscripts.length === 0) {
1185
+ return null;
1186
+ }
1187
+ const fallbackText = buildTurnText(fallbackTranscripts, "", {});
1188
+ const fallbackConfidence = calculateMeanConfidence(fallbackTranscripts);
1189
+ const fallbackCandidate = {
1190
+ confidence: fallbackConfidence,
1191
+ text: fallbackText,
1192
+ wordCount: countWords2(normalizeText2(fallbackText))
1193
+ };
1194
+ const primaryCandidate = {
1195
+ confidence: calculateMeanConfidence(primaryTranscripts),
1196
+ text: primaryText,
1197
+ wordCount: countWords2(normalizeText2(primaryText))
1198
+ };
1199
+ const selection = selectBetterTurnText(primaryCandidate, fallbackCandidate);
1200
+ const diagnostics = {
1201
+ attempted: true,
1202
+ fallbackConfidence: fallbackCandidate.confidence,
1203
+ fallbackText: fallbackCandidate.text,
1204
+ fallbackWordCount: fallbackCandidate.wordCount,
1205
+ primaryConfidence: primaryCandidate.confidence,
1206
+ primaryText,
1207
+ primaryWordCount: primaryCandidate.wordCount,
1208
+ selected: selection.winner.text === fallbackCandidate.text,
1209
+ selectionReason: selection.reason,
1210
+ trigger: sttFallback.trigger
1211
+ };
1212
+ if (selection.winner.text === primaryCandidate.text) {
1213
+ return {
1214
+ diagnostics,
1215
+ fallbackUsed: false,
1216
+ source: "primary",
1217
+ text: primaryText,
1218
+ transcripts: primaryTranscripts.map((transcript) => ({
1219
+ ...transcript,
1220
+ isFinal: true
1221
+ }))
1222
+ };
593
1223
  }
1224
+ const candidateTranscripts = fallbackText === fallbackCandidate.text ? fallbackTranscripts : [];
1225
+ return {
1226
+ diagnostics,
1227
+ fallbackUsed: true,
1228
+ source: "fallback",
1229
+ text: selection.winner.text,
1230
+ transcripts: candidateTranscripts.length > 0 ? candidateTranscripts.map((transcript) => ({
1231
+ ...transcript,
1232
+ isFinal: true
1233
+ })) : [{ id: createId(), isFinal: false, text: selection.winner.text }]
1234
+ };
1235
+ };
1236
+ const getFinalTranscriptIds = (transcripts) => {
1237
+ const finalTranscriptIds = transcripts.filter((transcript) => transcript.isFinal).map((transcript) => transcript.id);
1238
+ const fallbackIds = transcripts.map((transcript) => transcript.id);
1239
+ return finalTranscriptIds.length > 0 ? finalTranscriptIds : fallbackIds;
1240
+ };
1241
+ const runTurnCorrection = async (input) => {
1242
+ if (!options.route.correctTurn) {
1243
+ return;
1244
+ }
1245
+ const originalText = input.text;
1246
+ const result = await options.route.correctTurn({
1247
+ api,
1248
+ context: options.context,
1249
+ fallback: input.fallbackDiagnostics,
1250
+ phraseHints,
1251
+ session: input.session,
1252
+ text: originalText,
1253
+ transcripts: input.transcripts.map(cloneTranscript)
1254
+ });
1255
+ const nextText = typeof result === "string" ? result : typeof result?.text === "string" ? result.text : originalText;
1256
+ const correctedText = normalizeCorrectionText(nextText);
1257
+ const normalizedOriginal = normalizeCorrectionText(originalText);
1258
+ return {
1259
+ diagnostics: {
1260
+ attempted: true,
1261
+ changed: correctedText.length > 0 && correctedText !== normalizedOriginal,
1262
+ correctedText: correctedText.length > 0 ? correctedText : normalizedOriginal,
1263
+ metadata: typeof result === "object" ? result.metadata : undefined,
1264
+ originalText,
1265
+ provider: typeof result === "object" ? result.provider : undefined,
1266
+ reason: typeof result === "object" ? result.reason : undefined
1267
+ },
1268
+ text: correctedText.length > 0 ? correctedText : originalText
1269
+ };
1270
+ };
1271
+ const ensureCommittedTurnGuard = (session) => {
1272
+ if (!session.lastCommittedTurn) {
1273
+ session.lastCommittedTurn = {
1274
+ committedAt: 0,
1275
+ signature: "",
1276
+ text: "",
1277
+ transcriptIds: []
1278
+ };
1279
+ }
1280
+ return session;
1281
+ };
1282
+ const buildTurnSignature = (session, finalText, transcriptIdsOverride) => {
1283
+ const finalTranscriptIds = transcriptIdsOverride ?? getFinalTranscriptIds(session.currentTurn.transcripts);
1284
+ return `${normalizeText2(finalText)}|${finalTranscriptIds.join(",")}`;
1285
+ };
1286
+ const isDuplicateTurnCommit = (session, finalText) => {
1287
+ const signature = buildTurnSignature(session, finalText);
1288
+ const committedTurn = session.lastCommittedTurn;
1289
+ const isRecent = committedTurn && committedTurn.committedAt > 0 && Date.now() - committedTurn.committedAt < DEFAULT_DUPLICATE_TURN_WINDOW_MS;
1290
+ const committedSignature = committedTurn?.signature ?? "";
1291
+ const committedTranscriptIds = committedTurn?.transcriptIds ?? [];
1292
+ const committedText = normalizeText2(committedTurn?.text ?? "");
1293
+ const isSameText = normalizeText2(finalText) === committedText;
1294
+ const hasNoNewAudioSinceCommit = (session.currentTurn.lastAudioAt ?? 0) <= (committedTurn?.committedAt ?? 0);
1295
+ if (!isRecent) {
1296
+ return false;
1297
+ }
1298
+ if (isSameText && hasNoNewAudioSinceCommit) {
1299
+ return true;
1300
+ }
1301
+ if (signature !== committedSignature) {
1302
+ return false;
1303
+ }
1304
+ const lastSignatureIds = new Set(committedTranscriptIds);
1305
+ const hasNoNewFinalIds = session.currentTurn.transcripts.every((transcript) => !transcript.isFinal || lastSignatureIds.has(transcript.id));
1306
+ return isRecent && hasNoNewFinalIds;
1307
+ };
1308
+ const markTurnCommitted = (session, finalText, committedTranscripts) => {
1309
+ session.lastCommittedTurn = {
1310
+ ...session.lastCommittedTurn ?? {},
1311
+ committedAt: Date.now(),
1312
+ signature: buildTurnSignature(session, finalText, getFinalTranscriptIds(committedTranscripts)),
1313
+ text: normalizeText2(finalText),
1314
+ transcriptIds: getFinalTranscriptIds(committedTranscripts)
1315
+ };
594
1316
  };
595
1317
  const handlePartial = async (transcript) => {
596
1318
  await writeSession((session) => {
597
- session.currentTurn.lastAudioAt = Date.now();
598
- session.currentTurn.partialText = buildTurnText(session.currentTurn.transcripts, transcript.text);
1319
+ const nextPartialStartedAt = transcript.startedAtMs ?? session.currentTurn.partialStartedAt;
1320
+ const nextPartialEndedAt = transcript.endedAtMs ?? session.currentTurn.partialEndedAt;
1321
+ const preferredPartial = selectPreferredTranscriptText(session.currentTurn.partialText, transcript.text);
1322
+ session.currentTurn.lastTranscriptAt = Date.now();
1323
+ session.currentTurn.partialStartedAt = nextPartialStartedAt;
1324
+ session.currentTurn.partialEndedAt = nextPartialEndedAt;
1325
+ session.currentTurn.partialText = buildTurnText(session.currentTurn.transcripts, preferredPartial, {
1326
+ partialEndedAtMs: nextPartialEndedAt,
1327
+ partialStartedAtMs: nextPartialStartedAt
1328
+ });
599
1329
  session.lastActivityAt = Date.now();
600
1330
  session.status = "active";
601
1331
  });
@@ -617,8 +1347,11 @@ var createVoiceSession = (options) => {
617
1347
  cloneTranscript(transcript)
618
1348
  ];
619
1349
  }
620
- session.currentTurn.finalText = buildTurnText(session.currentTurn.transcripts, session.currentTurn.partialText);
621
- session.currentTurn.lastAudioAt = Date.now();
1350
+ session.currentTurn.finalText = buildTurnText(session.currentTurn.transcripts, session.currentTurn.partialText, {
1351
+ partialEndedAtMs: session.currentTurn.partialEndedAt,
1352
+ partialStartedAtMs: session.currentTurn.partialStartedAt
1353
+ });
1354
+ session.currentTurn.lastTranscriptAt = Date.now();
622
1355
  session.lastActivityAt = Date.now();
623
1356
  session.status = "active";
624
1357
  });
@@ -627,36 +1360,60 @@ var createVoiceSession = (options) => {
627
1360
  type: "final"
628
1361
  });
629
1362
  };
1363
+ const resumePendingTurnCommit = (session) => {
1364
+ const pendingText = buildTurnText(session.currentTurn.transcripts, session.currentTurn.partialText, {
1365
+ partialEndedAtMs: session.currentTurn.partialEndedAt,
1366
+ partialStartedAtMs: session.currentTurn.partialStartedAt
1367
+ });
1368
+ if (!pendingText) {
1369
+ speechDetected = false;
1370
+ return;
1371
+ }
1372
+ speechDetected = true;
1373
+ const audioAge = session.currentTurn.silenceStartedAt !== undefined ? Date.now() - session.currentTurn.silenceStartedAt : session.currentTurn.lastSpeechAt !== undefined ? Date.now() - session.currentTurn.lastSpeechAt : 0;
1374
+ const transcriptAge = session.currentTurn.lastTranscriptAt !== undefined ? Date.now() - session.currentTurn.lastTranscriptAt : turnDetection.transcriptStabilityMs;
1375
+ const delayMs = Math.max(0, turnDetection.silenceMs - audioAge, turnDetection.transcriptStabilityMs - transcriptAge);
1376
+ scheduleSilenceCommit(delayMs);
1377
+ };
630
1378
  const ensureAdapter = async () => {
631
1379
  if (sttSession) {
632
1380
  return sttSession;
633
1381
  }
634
- sttSession = await options.stt.open({
635
- format: {
636
- channels: 1,
637
- container: "raw",
638
- encoding: "pcm_s16le",
639
- sampleRateHz: 16000
640
- },
1382
+ const openedSession = await options.stt.open({
1383
+ format: DEFAULT_FORMAT,
1384
+ phraseHints,
641
1385
  sessionId: options.id
642
1386
  });
643
- sttSession.on("partial", ({ transcript }) => {
644
- handlePartial(transcript);
1387
+ const generation = ++adapterGenerationCounter;
1388
+ sttSession = openedSession;
1389
+ activeAdapterGeneration = generation;
1390
+ const runAdapterEvent = (phase, handler) => {
1391
+ runSerial(phase, async () => {
1392
+ if (activeAdapterGeneration !== generation) {
1393
+ return;
1394
+ }
1395
+ await handler();
1396
+ });
1397
+ };
1398
+ openedSession.on("partial", ({ transcript }) => {
1399
+ runAdapterEvent("adapter.partial", () => handlePartial(transcript));
645
1400
  });
646
- sttSession.on("final", ({ transcript }) => {
647
- handleFinal(transcript);
1401
+ openedSession.on("final", ({ transcript }) => {
1402
+ runAdapterEvent("adapter.final", () => handleFinal(transcript));
648
1403
  });
649
- sttSession.on("endOfTurn", ({ reason }) => {
650
- clearSilenceTimer();
651
- api.commitTurn(reason);
1404
+ openedSession.on("endOfTurn", ({ reason }) => {
1405
+ runAdapterEvent("adapter.endOfTurn", async () => {
1406
+ clearSilenceTimer();
1407
+ await requestTurnCommit(reason);
1408
+ });
652
1409
  });
653
- sttSession.on("error", (event) => {
654
- handleError(event);
1410
+ openedSession.on("error", (event) => {
1411
+ runAdapterEvent("adapter.error", () => handleError(event));
655
1412
  });
656
- sttSession.on("close", (event) => {
657
- handleClose(event);
1413
+ openedSession.on("close", (event) => {
1414
+ runAdapterEvent("adapter.close", () => handleClose(event));
658
1415
  });
659
- return sttSession;
1416
+ return openedSession;
660
1417
  };
661
1418
  const completeTurn = async (session, turn) => {
662
1419
  const output = await options.route.onTurn({
@@ -685,206 +1442,312 @@ var createVoiceSession = (options) => {
685
1442
  });
686
1443
  }
687
1444
  if (output?.complete) {
688
- await api.complete(output.result);
1445
+ await completeInternal(output.result);
689
1446
  }
690
1447
  };
691
- const api = {
692
- id: options.id,
693
- close: async (reason) => {
694
- clearSilenceTimer();
695
- await closeAdapter(reason);
696
- await Promise.resolve(socket.close(1000, reason));
697
- },
698
- commitTurn: async (reason = "manual") => {
699
- clearSilenceTimer();
700
- const session = await readSession();
701
- if (session.status === "completed" || session.status === "failed") {
702
- return;
703
- }
704
- const text = buildTurnText(session.currentTurn.transcripts, session.currentTurn.partialText);
705
- if (!text) {
706
- return;
1448
+ const commitTurnInternal = async (reason = "manual") => {
1449
+ clearSilenceTimer();
1450
+ const session = await readSession();
1451
+ if (session.status === "completed" || session.status === "failed") {
1452
+ return;
1453
+ }
1454
+ const text = buildTurnText(session.currentTurn.transcripts, session.currentTurn.partialText, {
1455
+ partialEndedAtMs: session.currentTurn.partialEndedAt,
1456
+ partialStartedAtMs: session.currentTurn.partialStartedAt
1457
+ });
1458
+ let transcripts = session.currentTurn.transcripts.length ? session.currentTurn.transcripts.map(cloneTranscript) : [];
1459
+ let finalText = text;
1460
+ const transcriptStabilityAge = session.currentTurn.lastTranscriptAt !== undefined ? Date.now() - session.currentTurn.lastTranscriptAt : undefined;
1461
+ const fallbackSelection = await runFallbackTranscription(text, session.currentTurn.transcripts);
1462
+ const source = fallbackSelection?.source ?? "primary";
1463
+ const fallbackUsed = fallbackSelection?.fallbackUsed ?? false;
1464
+ const fallbackDiagnostics = fallbackSelection?.diagnostics;
1465
+ if (fallbackSelection) {
1466
+ finalText = fallbackSelection.text;
1467
+ transcripts = fallbackSelection.transcripts.length ? fallbackSelection.transcripts.map(cloneTranscript) : transcripts.length ? transcripts : [
1468
+ {
1469
+ id: createId(),
1470
+ isFinal: false,
1471
+ text: finalText
1472
+ }
1473
+ ];
1474
+ if (fallbackSelection.fallbackUsed) {
1475
+ logger.info("voice fallback turn selected", {
1476
+ reason,
1477
+ sessionId: options.id,
1478
+ text: finalText
1479
+ });
707
1480
  }
708
- const turn = {
709
- committedAt: Date.now(),
710
- id: createId(),
711
- text,
712
- transcripts: session.currentTurn.transcripts.length > 0 ? session.currentTurn.transcripts.map(cloneTranscript) : [
713
- {
714
- id: createId(),
715
- isFinal: false,
716
- text
717
- }
718
- ]
719
- };
720
- const updatedSession = await writeSession((currentSession) => {
721
- currentSession.committedTurnIds = [
722
- ...currentSession.committedTurnIds,
723
- turn.id
724
- ];
725
- currentSession.currentTurn = createEmptyCurrentTurn();
726
- currentSession.lastActivityAt = Date.now();
727
- currentSession.status = "active";
728
- currentSession.turns = [...currentSession.turns, turn];
729
- });
730
- speechDetected = false;
731
- logger.info("voice turn committed", {
1481
+ }
1482
+ const correctionSelection = await runTurnCorrection({
1483
+ fallbackDiagnostics,
1484
+ fallbackUsed,
1485
+ session,
1486
+ source,
1487
+ text: finalText,
1488
+ transcripts
1489
+ });
1490
+ const correctionDiagnostics = correctionSelection?.diagnostics;
1491
+ if (correctionSelection) {
1492
+ finalText = correctionSelection.text;
1493
+ }
1494
+ if (!finalText) {
1495
+ return;
1496
+ }
1497
+ if (isDuplicateTurnCommit(session, finalText)) {
1498
+ logger.debug("voice turn commit deduped", {
732
1499
  reason,
733
- sessionId: options.id,
734
- turnId: turn.id
735
- });
736
- await send({
737
- turn,
738
- type: "turn"
1500
+ sessionId: options.id
739
1501
  });
740
- await completeTurn(updatedSession, turn);
741
- },
742
- complete: async (result) => {
743
- clearSilenceTimer();
744
- const session = await writeSession((currentSession) => {
745
- if (currentSession.status === "completed") {
746
- return;
747
- }
748
- currentSession.lastActivityAt = Date.now();
749
- currentSession.status = "completed";
750
- if (result !== undefined && currentSession.turns.length > 0) {
751
- const lastTurn = currentSession.turns.at(-1);
752
- if (lastTurn) {
753
- setTurnResult(currentSession, lastTurn.id, {
754
- result
755
- });
756
- }
1502
+ return;
1503
+ }
1504
+ if (typeof transcriptStabilityAge === "number" && transcriptStabilityAge < turnDetection.transcriptStabilityMs && reason !== "manual") {
1505
+ scheduleTurnCommit(turnDetection.transcriptStabilityMs - transcriptStabilityAge, reason, false);
1506
+ return;
1507
+ }
1508
+ const turn = {
1509
+ committedAt: Date.now(),
1510
+ id: createId(),
1511
+ text: finalText,
1512
+ quality: createTurnQuality(transcripts, source, fallbackUsed, fallbackDiagnostics, correctionDiagnostics),
1513
+ transcripts: transcripts.length > 0 ? transcripts : [
1514
+ {
1515
+ id: createId(),
1516
+ isFinal: false,
1517
+ text: finalText
757
1518
  }
758
- });
759
- await send({
760
- sessionId: options.id,
761
- type: "complete"
762
- });
763
- await closeAdapter("complete");
764
- speechDetected = false;
765
- await options.route.onComplete({
1519
+ ]
1520
+ };
1521
+ const updatedSession = await writeSession((currentSession) => {
1522
+ currentSession.committedTurnIds = [
1523
+ ...currentSession.committedTurnIds,
1524
+ turn.id
1525
+ ];
1526
+ currentSession.currentTurn = createEmptyCurrentTurn();
1527
+ currentSession.lastActivityAt = Date.now();
1528
+ currentSession.status = "active";
1529
+ currentSession.turns = [...currentSession.turns, turn];
1530
+ markTurnCommitted(currentSession, finalText, transcripts);
1531
+ });
1532
+ speechDetected = false;
1533
+ rewindFallbackTurnAudio();
1534
+ logger.info("voice turn committed", {
1535
+ reason,
1536
+ sessionId: options.id,
1537
+ turnId: turn.id
1538
+ });
1539
+ await send({
1540
+ turn,
1541
+ type: "turn"
1542
+ });
1543
+ if (options.sttLifecycle === "turn-scoped") {
1544
+ await closeAdapter("turn-commit");
1545
+ }
1546
+ await completeTurn(updatedSession, turn);
1547
+ };
1548
+ const connectInternal = async (nextSocket) => {
1549
+ socket = nextSocket;
1550
+ const existingSession = await options.store.get(options.id);
1551
+ let session = existingSession ?? createVoiceSessionRecord(options.id, options.scenarioId);
1552
+ if (options.scenarioId && session.scenarioId !== options.scenarioId) {
1553
+ session.scenarioId = options.scenarioId;
1554
+ }
1555
+ ensureCommittedTurnGuard(session);
1556
+ let shouldFireOnSession = !existingSession;
1557
+ if (existingSession?.scenarioId && options.scenarioId && existingSession.scenarioId !== options.scenarioId) {
1558
+ session = resetVoiceSessionRecord(options.id, existingSession, options.scenarioId);
1559
+ shouldFireOnSession = true;
1560
+ }
1561
+ rewindFallbackTurnAudio();
1562
+ if (existingSession?.status === "reconnecting") {
1563
+ const nextAttempts = existingSession.reconnect.attempts + 1;
1564
+ const reconnectExpired = existingSession.reconnect.lastDisconnectAt !== undefined && Date.now() - existingSession.reconnect.lastDisconnectAt > reconnect.timeout;
1565
+ const tooManyAttempts = nextAttempts > reconnect.maxAttempts;
1566
+ if (reconnect.strategy === "fail" && (reconnectExpired || tooManyAttempts)) {
1567
+ await failInternal(new Error("Voice session reconnect policy exhausted"));
1568
+ return;
1569
+ }
1570
+ if (reconnect.strategy === "restart" && (reconnectExpired || tooManyAttempts)) {
1571
+ session = resetVoiceSessionRecord(options.id, existingSession, options.scenarioId);
1572
+ shouldFireOnSession = true;
1573
+ } else {
1574
+ session = {
1575
+ ...existingSession,
1576
+ reconnect: {
1577
+ ...existingSession.reconnect,
1578
+ attempts: nextAttempts
1579
+ },
1580
+ status: "active"
1581
+ };
1582
+ }
1583
+ }
1584
+ await options.store.set(options.id, session);
1585
+ await send({
1586
+ sessionId: options.id,
1587
+ status: session.status,
1588
+ scenarioId: session.scenarioId,
1589
+ type: "session"
1590
+ });
1591
+ if (shouldFireOnSession) {
1592
+ await options.route.onSession?.({
766
1593
  api,
767
1594
  context: options.context,
768
1595
  session
769
1596
  });
770
- },
771
- connect: async (nextSocket) => {
772
- socket = nextSocket;
773
- const existingSession = await options.store.get(options.id);
774
- let session = existingSession ?? createVoiceSessionRecord(options.id);
775
- let shouldFireOnSession = !existingSession;
776
- if (existingSession?.status === "reconnecting") {
777
- const nextAttempts = existingSession.reconnect.attempts + 1;
778
- const reconnectExpired = existingSession.reconnect.lastDisconnectAt !== undefined && Date.now() - existingSession.reconnect.lastDisconnectAt > reconnect.timeout;
779
- const tooManyAttempts = nextAttempts > reconnect.maxAttempts;
780
- if (reconnect.strategy === "fail" && (reconnectExpired || tooManyAttempts)) {
781
- await api.fail(new Error("Voice session reconnect policy exhausted"));
782
- return;
783
- }
784
- if (reconnect.strategy === "restart" && (reconnectExpired || tooManyAttempts)) {
785
- session = resetVoiceSessionRecord(options.id, existingSession);
786
- shouldFireOnSession = true;
787
- } else {
788
- session = {
789
- ...existingSession,
790
- reconnect: {
791
- ...existingSession.reconnect,
792
- attempts: nextAttempts
793
- },
794
- status: "active"
795
- };
796
- }
797
- }
798
- await options.store.set(options.id, session);
1597
+ }
1598
+ if (session.status === "completed") {
799
1599
  await send({
800
1600
  sessionId: options.id,
801
- status: session.status,
802
- type: "session"
1601
+ type: "complete"
803
1602
  });
804
- if (shouldFireOnSession) {
805
- await options.route.onSession?.({
806
- api,
807
- context: options.context,
808
- session
809
- });
810
- }
811
- if (session.status === "completed") {
812
- await send({
813
- sessionId: options.id,
814
- type: "complete"
815
- });
1603
+ return;
1604
+ }
1605
+ resumePendingTurnCommit(session);
1606
+ await ensureAdapter();
1607
+ };
1608
+ const disconnectInternal = async (event) => {
1609
+ clearSilenceTimer();
1610
+ await closeAdapter(event?.reason);
1611
+ rewindFallbackTurnAudio();
1612
+ if (reconnect.strategy === "fail") {
1613
+ await failInternal(new Error(event?.reason ?? "Voice socket disconnected"));
1614
+ return;
1615
+ }
1616
+ await writeSession((session) => {
1617
+ if (session.status === "completed" || session.status === "failed") {
816
1618
  return;
817
1619
  }
818
- await ensureAdapter();
819
- },
820
- disconnect: async (event) => {
821
- clearSilenceTimer();
822
- await closeAdapter(event?.reason);
823
- if (reconnect.strategy === "fail") {
824
- await api.fail(new Error(event?.reason ?? "Voice socket disconnected"));
825
- return;
1620
+ session.lastActivityAt = Date.now();
1621
+ session.reconnect.lastDisconnectAt = Date.now();
1622
+ session.status = "reconnecting";
1623
+ });
1624
+ speechDetected = false;
1625
+ };
1626
+ const receiveAudioInternal = async (audio) => {
1627
+ const session = await readSession();
1628
+ if (session.status === "completed" || session.status === "failed") {
1629
+ return;
1630
+ }
1631
+ const adapter = await ensureAdapter();
1632
+ const conditionedAudio = conditionAudioChunk(audio, options.audioConditioning);
1633
+ const audioLevel = measureAudioLevel(conditionedAudio);
1634
+ const shouldStoreAudio = speechDetected || audioLevel >= turnDetection.speechThreshold;
1635
+ await writeSession((currentSession) => {
1636
+ currentSession.currentTurn.lastAudioAt = Date.now();
1637
+ currentSession.lastActivityAt = Date.now();
1638
+ currentSession.status = "active";
1639
+ if (audioLevel >= turnDetection.speechThreshold) {
1640
+ currentSession.currentTurn.lastSpeechAt = Date.now();
1641
+ currentSession.currentTurn.silenceStartedAt = undefined;
1642
+ } else if (speechDetected && currentSession.currentTurn.silenceStartedAt === undefined) {
1643
+ currentSession.currentTurn.silenceStartedAt = Date.now();
826
1644
  }
827
- await writeSession((session) => {
828
- if (session.status === "completed" || session.status === "failed") {
829
- return;
830
- }
831
- session.lastActivityAt = Date.now();
832
- session.reconnect.lastDisconnectAt = Date.now();
833
- session.status = "reconnecting";
834
- });
835
- speechDetected = false;
836
- },
837
- fail: async (error) => {
1645
+ });
1646
+ if (shouldStoreAudio) {
1647
+ pushTurnAudio(conditionedAudio);
1648
+ }
1649
+ if (audioLevel >= turnDetection.speechThreshold) {
1650
+ speechDetected = true;
838
1651
  clearSilenceTimer();
839
- const session = await writeSession((currentSession) => {
840
- currentSession.lastActivityAt = Date.now();
841
- currentSession.status = "failed";
842
- });
843
- const resolvedError = toError(error);
844
- await send({
845
- message: resolvedError.message,
846
- recoverable: false,
847
- type: "error"
848
- });
849
- await closeAdapter("failed");
850
- speechDetected = false;
851
- await options.route.onError?.({
852
- api,
853
- context: options.context,
854
- error: resolvedError,
855
- session,
856
- sessionId: options.id
857
- });
858
- },
859
- receiveAudio: async (audio) => {
860
- const session = await readSession();
861
- if (session.status === "completed" || session.status === "failed") {
862
- return;
1652
+ } else if (speechDetected) {
1653
+ const currentSession = await readSession();
1654
+ const hasTurnText = Boolean(buildTurnText(currentSession.currentTurn.transcripts, currentSession.currentTurn.partialText, {
1655
+ partialEndedAtMs: currentSession.currentTurn.partialEndedAt,
1656
+ partialStartedAtMs: currentSession.currentTurn.partialStartedAt
1657
+ }));
1658
+ if (hasTurnText) {
1659
+ scheduleSilenceCommit(turnDetection.silenceMs, false);
863
1660
  }
864
- const adapter = await ensureAdapter();
865
- const audioLevel = measureAudioLevel(audio);
866
- await writeSession((currentSession) => {
867
- currentSession.currentTurn.lastAudioAt = Date.now();
868
- currentSession.lastActivityAt = Date.now();
869
- currentSession.status = "active";
870
- });
871
- if (audioLevel >= turnDetection.speechThreshold) {
872
- speechDetected = true;
1661
+ }
1662
+ await adapter.send(conditionedAudio);
1663
+ };
1664
+ const api = {
1665
+ id: options.id,
1666
+ close: async (reason) => {
1667
+ await runSerial("api.close", async () => {
873
1668
  clearSilenceTimer();
874
- } else if (speechDetected) {
875
- const currentSession = await readSession();
876
- const hasTurnText = Boolean(buildTurnText(currentSession.currentTurn.transcripts, currentSession.currentTurn.partialText));
877
- if (hasTurnText) {
878
- scheduleSilenceCommit();
879
- }
880
- }
881
- await adapter.send(audio);
1669
+ await closeAdapter(reason);
1670
+ await Promise.resolve(socket.close(1000, reason));
1671
+ });
882
1672
  },
883
- snapshot: async () => readSession()
1673
+ commitTurn: async (reason = "manual") => runSerial("api.commitTurn", async () => {
1674
+ await commitTurnInternal(reason);
1675
+ }),
1676
+ complete: async (result) => runSerial("api.complete", async () => {
1677
+ await completeInternal(result);
1678
+ }),
1679
+ connect: async (nextSocket) => runSerial("api.connect", async () => {
1680
+ await connectInternal(nextSocket);
1681
+ }),
1682
+ disconnect: async (event) => runSerial("api.disconnect", async () => {
1683
+ await disconnectInternal(event);
1684
+ }),
1685
+ fail: async (error) => runSerial("api.fail", async () => {
1686
+ await failInternal(error);
1687
+ }),
1688
+ receiveAudio: async (audio) => runSerial("api.receiveAudio", async () => {
1689
+ await receiveAudioInternal(audio);
1690
+ }),
1691
+ snapshot: async () => runSerial("api.snapshot", async () => readSession())
884
1692
  };
885
1693
  return api;
886
1694
  };
887
1695
 
1696
+ // src/turnProfiles.ts
1697
+ var TURN_PROFILE_DEFAULTS = {
1698
+ balanced: {
1699
+ qualityProfile: "general",
1700
+ silenceMs: 1400,
1701
+ speechThreshold: 0.012,
1702
+ transcriptStabilityMs: 1000
1703
+ },
1704
+ fast: {
1705
+ qualityProfile: "general",
1706
+ silenceMs: 700,
1707
+ speechThreshold: 0.015,
1708
+ transcriptStabilityMs: 450
1709
+ },
1710
+ "long-form": {
1711
+ qualityProfile: "general",
1712
+ silenceMs: 2200,
1713
+ speechThreshold: 0.01,
1714
+ transcriptStabilityMs: 1500
1715
+ }
1716
+ };
1717
+ var QUALITY_PROFILE_DEFAULTS = {
1718
+ general: {},
1719
+ "accent-heavy": {
1720
+ silenceMs: 1200,
1721
+ speechThreshold: 0.01,
1722
+ transcriptStabilityMs: 1200
1723
+ },
1724
+ "noisy-room": {
1725
+ silenceMs: 2000,
1726
+ speechThreshold: 0.02,
1727
+ transcriptStabilityMs: 1600
1728
+ },
1729
+ "short-command": {
1730
+ silenceMs: 500,
1731
+ speechThreshold: 0.016,
1732
+ transcriptStabilityMs: 420
1733
+ }
1734
+ };
1735
+ var DEFAULT_TURN_PROFILE = "fast";
1736
+ var DEFAULT_QUALITY_PROFILE = "general";
1737
+ var resolveTurnDetectionConfig = (config) => {
1738
+ const profile = config?.profile ?? DEFAULT_TURN_PROFILE;
1739
+ const qualityProfile = config?.qualityProfile ?? DEFAULT_QUALITY_PROFILE;
1740
+ const preset = TURN_PROFILE_DEFAULTS[profile];
1741
+ const quality = QUALITY_PROFILE_DEFAULTS[qualityProfile];
1742
+ return {
1743
+ profile,
1744
+ qualityProfile,
1745
+ silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
1746
+ speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
1747
+ transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
1748
+ };
1749
+ };
1750
+
888
1751
  // src/testing/resilience.ts
889
1752
  var roundMetric2 = (value, digits = 4) => {
890
1753
  const factor = 10 ** digits;
@@ -951,10 +1814,12 @@ var runScenario = async (id, title, run) => {
951
1814
  socket: createMockSocket(),
952
1815
  store,
953
1816
  stt: adapter.adapter,
954
- turnDetection: {
1817
+ sttLifecycle: "continuous",
1818
+ turnDetection: resolveTurnDetectionConfig({
955
1819
  silenceMs: 20,
956
- speechThreshold: 0.01
957
- }
1820
+ speechThreshold: 0.01,
1821
+ transcriptStabilityMs: 5
1822
+ })
958
1823
  });
959
1824
  await voice.connect(createMockSocket());
960
1825
  try {
@@ -983,6 +1848,24 @@ var runScenario = async (id, title, run) => {
983
1848
  type: "close"
984
1849
  });
985
1850
  },
1851
+ emitEndOfTurn: async () => {
1852
+ await adapter.session.emit("endOfTurn", {
1853
+ reason: "vendor",
1854
+ receivedAt: Date.now(),
1855
+ type: "endOfTurn"
1856
+ });
1857
+ },
1858
+ emitFinal: async (text, transcriptId = `${id}-${turns.length}`) => {
1859
+ await adapter.session.emit("final", {
1860
+ receivedAt: Date.now(),
1861
+ transcript: {
1862
+ id: transcriptId,
1863
+ isFinal: true,
1864
+ text
1865
+ },
1866
+ type: "final"
1867
+ });
1868
+ },
986
1869
  turns
987
1870
  });
988
1871
  } finally {
@@ -1022,6 +1905,88 @@ var runVoiceResilienceBenchmark = async () => {
1022
1905
  if (turns.length === 1) {
1023
1906
  await commit("Fresh transcripts should still commit later");
1024
1907
  }
1908
+ }),
1909
+ runScenario("duplicate-end-of-turn", "Repeated end-of-turn events for the same turn stay deduped", async ({ emitFinal, emitEndOfTurn, turns }) => {
1910
+ await emitFinal("Repeated end-of-turn should only commit once", "dup-endofturn");
1911
+ await emitEndOfTurn();
1912
+ await emitEndOfTurn();
1913
+ await Bun.sleep(80);
1914
+ if (turns.length !== 1) {
1915
+ throw new Error("Repeated end-of-turn events created duplicate turns");
1916
+ }
1917
+ }),
1918
+ runScenario("duplicate-end-of-turn-jitter", "End-of-turn jitter does not trigger extra commits", async ({ emitFinal, emitEndOfTurn, turns }) => {
1919
+ await emitFinal("Noisy end-of-turn signals should still commit once", "dup-endofturn-jitter");
1920
+ for (const delayMs of [40, 95, 180, 120]) {
1921
+ await Bun.sleep(delayMs);
1922
+ await emitEndOfTurn();
1923
+ }
1924
+ await Bun.sleep(80);
1925
+ if (turns.length !== 1) {
1926
+ throw new Error("Jittered end-of-turn signals created duplicate turns");
1927
+ }
1928
+ }),
1929
+ runScenario("reconnect-duplicate-text-no-new-audio", "Reconnect duplicate text with different ids and no audio does not replay turn", async ({
1930
+ adapter,
1931
+ connectNewSocket,
1932
+ disconnect,
1933
+ emitEndOfTurn,
1934
+ emitFinal,
1935
+ turns
1936
+ }) => {
1937
+ await emitFinal("Reconnect duplicate text should be suppressed", "dup-text-reconnect-1");
1938
+ await emitEndOfTurn();
1939
+ await Bun.sleep(60);
1940
+ await disconnect();
1941
+ await connectNewSocket();
1942
+ await adapter.session.emit("final", {
1943
+ receivedAt: Date.now(),
1944
+ transcript: {
1945
+ id: "dup-text-reconnect-2",
1946
+ isFinal: true,
1947
+ text: "Reconnect duplicate text should be suppressed"
1948
+ },
1949
+ type: "final"
1950
+ });
1951
+ for (const delayMs of [40, 70, 110]) {
1952
+ await Bun.sleep(delayMs);
1953
+ await emitEndOfTurn();
1954
+ }
1955
+ await Bun.sleep(60);
1956
+ if (turns.length !== 1) {
1957
+ throw new Error("Reconnect duplicate text was committed twice");
1958
+ }
1959
+ }),
1960
+ runScenario("reconnect-end-of-turn-jitter", "End-of-turn jitter after reconnect does not replay committed turns", async ({
1961
+ adapter,
1962
+ connectNewSocket,
1963
+ disconnect,
1964
+ emitEndOfTurn,
1965
+ emitFinal,
1966
+ turns
1967
+ }) => {
1968
+ await emitFinal("Reconnect duplicate end-of-turn should dedupe", "resume-jitter");
1969
+ await emitEndOfTurn();
1970
+ await Bun.sleep(60);
1971
+ await disconnect();
1972
+ await connectNewSocket();
1973
+ await adapter.session.emit("final", {
1974
+ receivedAt: Date.now(),
1975
+ transcript: {
1976
+ id: "resume-jitter",
1977
+ isFinal: true,
1978
+ text: "Reconnect duplicate end-of-turn should dedupe"
1979
+ },
1980
+ type: "final"
1981
+ });
1982
+ for (const delayMs of [50, 80, 120, 180]) {
1983
+ await Bun.sleep(delayMs);
1984
+ await emitEndOfTurn();
1985
+ }
1986
+ await Bun.sleep(80);
1987
+ if (turns.length !== 1) {
1988
+ throw new Error("Reconnected jittered end-of-turn signals replayed a committed turn");
1989
+ }
1025
1990
  })
1026
1991
  ]);
1027
1992
  const passCount = scenarios.filter((scenario) => scenario.passes).length;
@@ -1040,10 +2005,26 @@ var runVoiceResilienceBenchmark = async () => {
1040
2005
  };
1041
2006
  // src/testing/sessionBenchmark.ts
1042
2007
  var average2 = (values) => values.length > 0 ? values.reduce((sum, value) => sum + value, 0) / values.length : 0;
2008
+ var normalizeTurnText = (value) => value.toLowerCase().replace(/[^\p{L}\p{N}\s']/gu, " ").replace(/\s+/g, " ").trim();
1043
2009
  var roundMetric3 = (value, digits = 4) => {
1044
2010
  const factor = 10 ** digits;
1045
2011
  return Math.round(value * factor) / factor;
1046
2012
  };
2013
+ var resolveBenchmarkFallbackConfig = (config) => {
2014
+ if (!config) {
2015
+ return;
2016
+ }
2017
+ return {
2018
+ adapter: config.adapter,
2019
+ completionTimeoutMs: config.completionTimeoutMs ?? 2500,
2020
+ confidenceThreshold: config.confidenceThreshold ?? 0.6,
2021
+ maxAttemptsPerTurn: config.maxAttemptsPerTurn ?? 1,
2022
+ minTextLength: config.minTextLength ?? 2,
2023
+ replayWindowMs: config.replayWindowMs ?? 8000,
2024
+ settleMs: config.settleMs ?? 220,
2025
+ trigger: config.trigger ?? "empty-or-low-confidence"
2026
+ };
2027
+ };
1047
2028
  var chunkAudio2 = (audio, bytesPerChunk) => {
1048
2029
  const chunks = [];
1049
2030
  for (let offset = 0;offset < audio.byteLength; offset += bytesPerChunk) {
@@ -1052,39 +2033,178 @@ var chunkAudio2 = (audio, bytesPerChunk) => {
1052
2033
  return chunks;
1053
2034
  };
1054
2035
  var createSilence2 = (byteLength) => new Uint8Array(byteLength);
1055
- var createMockSocket2 = () => ({
1056
- close: async () => {},
1057
- send: async () => {}
2036
+ var countUnexpectedDuplicateTurns = (actualTurns, expectedTurns) => {
2037
+ const expectedCounts = new Map;
2038
+ for (const turn of expectedTurns) {
2039
+ const key = normalizeTurnText(turn);
2040
+ expectedCounts.set(key, (expectedCounts.get(key) ?? 0) + 1);
2041
+ }
2042
+ const actualCounts = new Map;
2043
+ for (const turn of actualTurns) {
2044
+ const key = normalizeTurnText(turn);
2045
+ actualCounts.set(key, (actualCounts.get(key) ?? 0) + 1);
2046
+ }
2047
+ let duplicates = 0;
2048
+ for (const [key, actualCount] of actualCounts.entries()) {
2049
+ const expectedCount = expectedCounts.get(key) ?? 0;
2050
+ const allowedOccurrences = Math.max(expectedCount, 1);
2051
+ if (actualCount > allowedOccurrences) {
2052
+ duplicates += actualCount - allowedOccurrences;
2053
+ }
2054
+ }
2055
+ return duplicates;
2056
+ };
2057
+ var normalizeSocketMessage = (data) => {
2058
+ if (typeof data !== "string") {
2059
+ return {
2060
+ byteLength: data instanceof ArrayBuffer ? data.byteLength : data.byteLength,
2061
+ kind: "binary"
2062
+ };
2063
+ }
2064
+ try {
2065
+ return JSON.parse(data);
2066
+ } catch {
2067
+ return data;
2068
+ }
2069
+ };
2070
+ var createMockSocket2 = (onEvent) => ({
2071
+ close: async (code, reason) => {
2072
+ onEvent?.({
2073
+ data: {
2074
+ code,
2075
+ reason
2076
+ },
2077
+ phase: "socket.close"
2078
+ });
2079
+ },
2080
+ send: async (data) => {
2081
+ onEvent?.({
2082
+ data: normalizeSocketMessage(data),
2083
+ phase: "socket.send"
2084
+ });
2085
+ }
1058
2086
  });
1059
- var runVoiceSessionBenchmarkScenario = async (adapter, fixture) => {
2087
+ var waitForSessionIdle = async (session, settleMs, idleTimeoutMs) => {
2088
+ const startedAt = Date.now();
2089
+ while (Date.now() - startedAt < idleTimeoutMs) {
2090
+ const snapshot = await session.snapshot();
2091
+ const pendingText = snapshot.currentTurn.finalText || snapshot.currentTurn.partialText;
2092
+ const lastActivityAt = snapshot.lastActivityAt ?? snapshot.createdAt;
2093
+ if (!pendingText && Date.now() - lastActivityAt >= settleMs) {
2094
+ return;
2095
+ }
2096
+ await Bun.sleep(Math.min(100, settleMs));
2097
+ }
2098
+ };
2099
+ var runVoiceSessionBenchmarkScenario = async (adapter, fixture, options = {}) => {
1060
2100
  const store = createVoiceMemoryStore();
1061
- const turns = [];
2101
+ const committedTurns = [];
2102
+ const traceStartedAt = Date.now();
2103
+ const trace = [];
2104
+ const pushTrace = (entry) => {
2105
+ if (!options.trace) {
2106
+ return;
2107
+ }
2108
+ trace.push({
2109
+ ...entry,
2110
+ atMs: Date.now() - traceStartedAt
2111
+ });
2112
+ };
2113
+ const captureSnapshot = async (phase) => {
2114
+ if (!options.trace) {
2115
+ return;
2116
+ }
2117
+ const snapshot = await store.getOrCreate(`session-bench-${fixture.id}`);
2118
+ pushTrace({
2119
+ data: {
2120
+ currentTurn: {
2121
+ finalText: snapshot.currentTurn.finalText,
2122
+ lastAudioAt: snapshot.currentTurn.lastAudioAt,
2123
+ lastSpeechAt: snapshot.currentTurn.lastSpeechAt,
2124
+ lastTranscriptAt: snapshot.currentTurn.lastTranscriptAt,
2125
+ partialText: snapshot.currentTurn.partialText,
2126
+ silenceStartedAt: snapshot.currentTurn.silenceStartedAt,
2127
+ transcriptCount: snapshot.currentTurn.transcripts.length
2128
+ },
2129
+ lastActivityAt: snapshot.lastActivityAt,
2130
+ status: snapshot.status,
2131
+ turns: snapshot.turns.map((turn) => turn.text)
2132
+ },
2133
+ phase
2134
+ });
2135
+ };
2136
+ const logger = {
2137
+ debug: (message, meta) => {
2138
+ pushTrace({
2139
+ data: meta,
2140
+ phase: `logger.debug:${message}`
2141
+ });
2142
+ },
2143
+ error: (message, meta) => {
2144
+ pushTrace({
2145
+ data: meta,
2146
+ phase: `logger.error:${message}`
2147
+ });
2148
+ },
2149
+ info: (message, meta) => {
2150
+ pushTrace({
2151
+ data: meta,
2152
+ phase: `logger.info:${message}`
2153
+ });
2154
+ },
2155
+ warn: (message, meta) => {
2156
+ pushTrace({
2157
+ data: meta,
2158
+ phase: `logger.warn:${message}`
2159
+ });
2160
+ }
2161
+ };
1062
2162
  const session = createVoiceSession({
2163
+ audioConditioning: resolveAudioConditioningConfig(fixture.audioConditioning),
1063
2164
  context: {},
1064
2165
  id: `session-bench-${fixture.id}`,
1065
- logger: {},
2166
+ logger,
1066
2167
  reconnect: {
1067
2168
  maxAttempts: 2,
1068
2169
  strategy: "resume-last-turn",
1069
2170
  timeout: 5000
1070
2171
  },
1071
2172
  route: {
2173
+ correctTurn: options.correctTurn,
1072
2174
  onComplete: async () => {},
1073
2175
  onTurn: async ({ turn }) => {
1074
- turns.push(turn.text);
2176
+ committedTurns.push({
2177
+ quality: turn.quality,
2178
+ text: turn.text
2179
+ });
2180
+ pushTrace({
2181
+ data: {
2182
+ quality: turn.quality,
2183
+ text: turn.text,
2184
+ transcriptCount: turn.transcripts.length,
2185
+ turnId: turn.id
2186
+ },
2187
+ phase: "route.onTurn"
2188
+ });
1075
2189
  }
1076
2190
  },
1077
- socket: createMockSocket2(),
2191
+ phraseHints: fixture.phraseHints,
2192
+ socket: createMockSocket2(pushTrace),
1078
2193
  store,
1079
2194
  stt: adapter,
1080
- turnDetection: {
2195
+ sttFallback: resolveBenchmarkFallbackConfig(options.sttFallback),
2196
+ sttLifecycle: fixture.sttLifecycle ?? "continuous",
2197
+ turnDetection: resolveTurnDetectionConfig({
2198
+ profile: fixture.turnProfile ?? "balanced",
1081
2199
  silenceMs: fixture.silenceMs ?? DEFAULT_SILENCE_MS,
1082
- speechThreshold: fixture.speechThreshold ?? DEFAULT_SPEECH_THRESHOLD
1083
- }
2200
+ speechThreshold: fixture.speechThreshold ?? DEFAULT_SPEECH_THRESHOLD,
2201
+ transcriptStabilityMs: fixture.transcriptStabilityMs ?? 900
2202
+ })
1084
2203
  });
1085
2204
  const startedAt = Date.now();
1086
2205
  let reconnectTriggered = false;
1087
- await session.connect(createMockSocket2());
2206
+ await session.connect(createMockSocket2(pushTrace));
2207
+ await captureSnapshot("session.connected");
1088
2208
  try {
1089
2209
  const chunkDurationMs = fixture.chunkDurationMs ?? 100;
1090
2210
  const bytesPerMillisecond = fixture.format.sampleRateHz * fixture.format.channels * 2 / 1000;
@@ -1095,13 +2215,22 @@ var runVoiceSessionBenchmarkScenario = async (adapter, fixture) => {
1095
2215
  await Bun.sleep(chunkDurationMs);
1096
2216
  if (fixture.reconnectAtChunkIndex !== undefined && index === fixture.reconnectAtChunkIndex && !reconnectTriggered) {
1097
2217
  reconnectTriggered = true;
2218
+ pushTrace({
2219
+ data: {
2220
+ chunkIndex: index
2221
+ },
2222
+ phase: "reconnect.begin"
2223
+ });
2224
+ await captureSnapshot("reconnect.pre-disconnect");
1098
2225
  await session.disconnect({
1099
2226
  reason: "benchmark-reconnect",
1100
2227
  recoverable: true,
1101
2228
  type: "close"
1102
2229
  });
2230
+ await captureSnapshot("reconnect.post-disconnect");
1103
2231
  await Bun.sleep(fixture.reconnectPauseMs ?? 150);
1104
- await session.connect(createMockSocket2());
2232
+ await session.connect(createMockSocket2(pushTrace));
2233
+ await captureSnapshot("reconnect.post-connect");
1105
2234
  }
1106
2235
  }
1107
2236
  const tailPaddingMs = fixture.tailPaddingMs ?? 1200;
@@ -1112,13 +2241,16 @@ var runVoiceSessionBenchmarkScenario = async (adapter, fixture) => {
1112
2241
  await Bun.sleep(chunkDurationMs);
1113
2242
  }
1114
2243
  }
1115
- await Bun.sleep(Math.max(1200, fixture.silenceMs ?? DEFAULT_SILENCE_MS));
2244
+ await waitForSessionIdle(session, Math.max(1200, (fixture.silenceMs ?? DEFAULT_SILENCE_MS) + (fixture.transcriptStabilityMs ?? 900)), 8000);
2245
+ await captureSnapshot("session.idle");
1116
2246
  } finally {
2247
+ await captureSnapshot("session.pre-close");
1117
2248
  await session.close("session-benchmark-complete");
1118
2249
  }
1119
- const duplicateTurnCount = Math.max(0, turns.length - new Set(turns.map((turn) => turn.toLowerCase())).size);
2250
+ const duplicateTurnCount = countUnexpectedDuplicateTurns(committedTurns.map((turn) => turn.text), fixture.expectedTurnTexts);
1120
2251
  const turnResults = fixture.expectedTurnTexts.map((expectedText, index) => {
1121
- const actualText = turns[index];
2252
+ const actualTurn = committedTurns[index];
2253
+ const actualText = actualTurn?.text;
1122
2254
  if (!actualText) {
1123
2255
  return {
1124
2256
  actualText: "",
@@ -1133,20 +2265,22 @@ var runVoiceSessionBenchmarkScenario = async (adapter, fixture) => {
1133
2265
  accuracy,
1134
2266
  expectedText,
1135
2267
  index,
1136
- passes: accuracy.passesThreshold
2268
+ passes: accuracy.passesThreshold,
2269
+ quality: actualTurn?.quality
1137
2270
  };
1138
2271
  });
1139
- for (let index = fixture.expectedTurnTexts.length;index < turns.length; index += 1) {
2272
+ for (let index = fixture.expectedTurnTexts.length;index < committedTurns.length; index += 1) {
1140
2273
  turnResults.push({
1141
- actualText: turns[index] ?? "",
2274
+ actualText: committedTurns[index]?.text ?? "",
1142
2275
  expectedText: undefined,
1143
2276
  index,
1144
- passes: false
2277
+ passes: false,
2278
+ quality: committedTurns[index]?.quality
1145
2279
  });
1146
2280
  }
1147
- const turnCountDelta = turns.length - fixture.expectedTurnTexts.length;
2281
+ const turnCountDelta = committedTurns.length - fixture.expectedTurnTexts.length;
1148
2282
  return {
1149
- actualTurns: turns,
2283
+ actualTurns: committedTurns.map((turn) => turn.text),
1150
2284
  duplicateTurnCount,
1151
2285
  elapsedMs: Date.now() - startedAt,
1152
2286
  expectedTurns: fixture.expectedTurnTexts,
@@ -1156,7 +2290,8 @@ var runVoiceSessionBenchmarkScenario = async (adapter, fixture) => {
1156
2290
  tags: fixture.tags ?? [],
1157
2291
  title: fixture.title,
1158
2292
  turnCountDelta,
1159
- turnResults
2293
+ turnResults,
2294
+ trace: options.trace ? trace : undefined
1160
2295
  };
1161
2296
  };
1162
2297
  var summarizeVoiceSessionBenchmark = (adapterId, scenarios) => {
@@ -1177,10 +2312,65 @@ var summarizeVoiceSessionBenchmark = (adapterId, scenarios) => {
1177
2312
  scenariosWithTurnCountMismatch: scenarios.filter((scenario) => scenario.turnCountDelta !== 0).length
1178
2313
  };
1179
2314
  };
2315
+ var summarizeVoiceSessionBenchmarkSeries = (input) => {
2316
+ const scenarioMap = new Map;
2317
+ for (const report of input.reports) {
2318
+ for (const scenario of report.scenarios) {
2319
+ const entries = scenarioMap.get(scenario.fixtureId) ?? [];
2320
+ entries.push(scenario);
2321
+ scenarioMap.set(scenario.fixtureId, entries);
2322
+ }
2323
+ }
2324
+ const scenarioAggregates = [...scenarioMap.entries()].map(([fixtureId, results]) => {
2325
+ const wordErrorRates = results.flatMap((scenario) => scenario.turnResults.map((turn) => turn.accuracy?.wordErrorRate).filter((value) => typeof value === "number"));
2326
+ const reconnectRuns = results.filter((scenario) => scenario.reconnectTriggered);
2327
+ const passCount = results.filter((scenario) => scenario.passes).length;
2328
+ const sample = results[0];
2329
+ return {
2330
+ averageElapsedMs: roundMetric3(average2(results.map((scenario) => scenario.elapsedMs)), 2),
2331
+ averageWordErrorRate: roundMetric3(average2(wordErrorRates)),
2332
+ bestWordErrorRate: roundMetric3(wordErrorRates.length > 0 ? Math.min(...wordErrorRates) : 0),
2333
+ fixtureId,
2334
+ passCount,
2335
+ passRate: roundMetric3(results.length > 0 ? passCount / results.length : 0),
2336
+ reconnectSuccessRate: roundMetric3(reconnectRuns.length > 0 ? reconnectRuns.filter((scenario) => scenario.passes).length / reconnectRuns.length : 1),
2337
+ runCount: results.length,
2338
+ tags: sample.tags,
2339
+ title: sample.title,
2340
+ worstWordErrorRate: roundMetric3(wordErrorRates.length > 0 ? Math.max(...wordErrorRates) : 0)
2341
+ };
2342
+ });
2343
+ const totalRunCount = input.reports.reduce((sum, report) => sum + report.scenarios.length, 0);
2344
+ const totalPassCount = input.reports.reduce((sum, report) => sum + report.summary.passCount, 0);
2345
+ const reconnectRates = scenarioAggregates.map((scenario) => scenario.reconnectSuccessRate).filter((value) => Number.isFinite(value));
2346
+ return {
2347
+ adapterId: input.adapterId,
2348
+ generatedAt: Date.now(),
2349
+ runCount: input.reports.length,
2350
+ scenarios: scenarioAggregates,
2351
+ summary: {
2352
+ adapterId: input.adapterId,
2353
+ averageElapsedMs: roundMetric3(average2(scenarioAggregates.map((scenario) => scenario.averageElapsedMs)), 2),
2354
+ averagePassRate: roundMetric3(average2(scenarioAggregates.map((scenario) => scenario.passRate))),
2355
+ averageWordErrorRate: roundMetric3(average2(scenarioAggregates.map((scenario) => scenario.averageWordErrorRate))),
2356
+ flakyScenarioCount: scenarioAggregates.filter((scenario) => scenario.passRate > 0 && scenario.passRate < 1).length,
2357
+ generatedRunCount: input.reports.length,
2358
+ reconnectSuccessRate: roundMetric3(average2(reconnectRates)),
2359
+ scenarioCount: scenarioAggregates.length,
2360
+ stableScenarioCount: scenarioAggregates.filter((scenario) => scenario.passRate === 1).length,
2361
+ totalPassCount,
2362
+ totalRunCount
2363
+ }
2364
+ };
2365
+ };
1180
2366
  var runVoiceSessionBenchmark = async (input) => {
1181
2367
  const scenarioResults = [];
1182
2368
  for (const scenario of input.scenarios) {
1183
- scenarioResults.push(await runVoiceSessionBenchmarkScenario(input.adapter, scenario));
2369
+ scenarioResults.push(await runVoiceSessionBenchmarkScenario(input.adapter, scenario, {
2370
+ correctTurn: input.correctTurn,
2371
+ sttFallback: input.sttFallback,
2372
+ trace: input.trace
2373
+ }));
1184
2374
  }
1185
2375
  return {
1186
2376
  adapterId: input.adapterId,
@@ -1189,17 +2379,39 @@ var runVoiceSessionBenchmark = async (input) => {
1189
2379
  summary: summarizeVoiceSessionBenchmark(input.adapterId, scenarioResults)
1190
2380
  };
1191
2381
  };
2382
+ var runVoiceSessionBenchmarkSeries = async (input) => {
2383
+ const reports = [];
2384
+ const runCount = Math.max(1, Math.floor(input.runs));
2385
+ for (let runIndex = 0;runIndex < runCount; runIndex += 1) {
2386
+ reports.push(await runVoiceSessionBenchmark({
2387
+ adapter: input.adapter,
2388
+ adapterId: input.adapterId,
2389
+ correctTurn: input.correctTurn,
2390
+ scenarios: input.scenarios,
2391
+ sttFallback: input.sttFallback,
2392
+ trace: input.trace
2393
+ }));
2394
+ }
2395
+ return summarizeVoiceSessionBenchmarkSeries({
2396
+ adapterId: input.adapterId,
2397
+ reports
2398
+ });
2399
+ };
1192
2400
  export {
2401
+ summarizeVoiceSessionBenchmarkSeries,
1193
2402
  summarizeVoiceSessionBenchmark,
1194
2403
  summarizeSTTBenchmark,
1195
2404
  scoreTranscriptAccuracy,
2405
+ runVoiceSessionBenchmarkSeries,
1196
2406
  runVoiceSessionBenchmarkScenario,
1197
2407
  runVoiceSessionBenchmark,
1198
2408
  runVoiceResilienceBenchmark,
1199
2409
  runSTTAdapterFixture,
1200
2410
  runSTTAdapterBenchmark,
2411
+ resolveFixtureEnvironment,
1201
2412
  mergeFinalTranscriptText,
1202
2413
  loadVoiceTestFixtures,
1203
2414
  getVoiceFixtureDirectory,
2415
+ evaluateSTTBenchmarkAcceptance,
1204
2416
  compareSTTBenchmarks
1205
2417
  };