@absolutejs/voice 0.0.22-beta.583 → 0.0.22-beta.585

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1380,22 +1380,146 @@ var resolveAudioConditioningConfig = (config) => {
1380
1380
  };
1381
1381
  };
1382
1382
 
1383
+ // src/core/turnDetection.ts
1384
+ var DEFAULT_SILENCE_MS = 700;
1385
+ var DEFAULT_SPEECH_THRESHOLD = 0.015;
1386
+ var DEFAULT_SEMANTIC_VETO_RECHECK_MS = 1200;
1387
+ var toUint8Array = (audio) => {
1388
+ if (audio instanceof ArrayBuffer) {
1389
+ return new Uint8Array(audio);
1390
+ }
1391
+ return new Uint8Array(audio.buffer, audio.byteOffset, audio.byteLength);
1392
+ };
1393
+ var measureAudioLevel = (audio) => {
1394
+ const bytes = toUint8Array(audio);
1395
+ if (bytes.byteLength < 2) {
1396
+ return 0;
1397
+ }
1398
+ const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
1399
+ if (samples.length === 0) {
1400
+ return 0;
1401
+ }
1402
+ let sumSquares = 0;
1403
+ for (const sample of samples) {
1404
+ const normalized = sample / 32768;
1405
+ sumSquares += normalized * normalized;
1406
+ }
1407
+ return Math.sqrt(sumSquares / samples.length);
1408
+ };
1409
+ var normalizeText = (value) => value.trim().replace(/\s+/g, " ");
1410
+ var countWords = (value) => value.length > 0 ? value.split(" ").length : 0;
1411
+ var selectPreferredTranscriptText = (currentText, nextText) => {
1412
+ const current = normalizeText(currentText);
1413
+ const next = normalizeText(nextText);
1414
+ if (!current) {
1415
+ return next;
1416
+ }
1417
+ if (!next) {
1418
+ return current;
1419
+ }
1420
+ if (current === next || current.includes(next)) {
1421
+ return current;
1422
+ }
1423
+ if (next.includes(current)) {
1424
+ return next;
1425
+ }
1426
+ if (countWords(next) > countWords(current)) {
1427
+ return next;
1428
+ }
1429
+ if (countWords(next) === countWords(current) && next.length > current.length) {
1430
+ return next;
1431
+ }
1432
+ return current;
1433
+ };
1434
+ var mergeSequentialTranscriptText = (currentText, nextText) => {
1435
+ const current = normalizeText(currentText);
1436
+ const next = normalizeText(nextText);
1437
+ if (!current) {
1438
+ return next;
1439
+ }
1440
+ if (!next) {
1441
+ return current;
1442
+ }
1443
+ const currentWords = current.split(" ");
1444
+ const nextWords = next.split(" ");
1445
+ const maxOverlap = Math.min(currentWords.length, nextWords.length);
1446
+ for (let overlap = maxOverlap;overlap > 0; overlap -= 1) {
1447
+ const currentSuffix = currentWords.slice(-overlap).join(" ");
1448
+ const nextPrefix = nextWords.slice(0, overlap).join(" ");
1449
+ if (currentSuffix === nextPrefix) {
1450
+ return [...currentWords, ...nextWords.slice(overlap)].join(" ");
1451
+ }
1452
+ }
1453
+ return `${current} ${next}`.trim();
1454
+ };
1455
+ var countCommonPrefixWords = (currentText, nextText) => {
1456
+ const currentWords = normalizeText(currentText).split(" ").filter(Boolean);
1457
+ const nextWords = normalizeText(nextText).split(" ").filter(Boolean);
1458
+ const maxWords = Math.min(currentWords.length, nextWords.length);
1459
+ let count = 0;
1460
+ for (let index = 0;index < maxWords; index += 1) {
1461
+ if (currentWords[index] !== nextWords[index]) {
1462
+ break;
1463
+ }
1464
+ count += 1;
1465
+ }
1466
+ return count;
1467
+ };
1468
+ var mergeTranscriptTexts = (transcripts) => {
1469
+ const merged = [];
1470
+ for (const transcript of transcripts) {
1471
+ const nextText = normalizeText(transcript.text);
1472
+ if (!nextText) {
1473
+ continue;
1474
+ }
1475
+ const previous = merged.at(-1);
1476
+ if (!previous) {
1477
+ merged.push(nextText);
1478
+ continue;
1479
+ }
1480
+ if (nextText === previous || previous.includes(nextText)) {
1481
+ continue;
1482
+ }
1483
+ if (nextText.includes(previous)) {
1484
+ merged[merged.length - 1] = nextText;
1485
+ continue;
1486
+ }
1487
+ merged.push(nextText);
1488
+ }
1489
+ return merged.join(" ").trim();
1490
+ };
1491
+ var buildTurnText = (transcripts, partialText, options = {}) => {
1492
+ const finalText = mergeTranscriptTexts(transcripts);
1493
+ const nextPartial = normalizeText(partialText);
1494
+ const lastFinalEndedAtMs = [...transcripts].reverse().find((transcript) => typeof transcript.endedAtMs === "number")?.endedAtMs;
1495
+ if (finalText && nextPartial && typeof lastFinalEndedAtMs === "number" && typeof options.partialStartedAtMs === "number" && options.partialStartedAtMs - lastFinalEndedAtMs >= 250 && countCommonPrefixWords(finalText, nextPartial) === 0) {
1496
+ return mergeSequentialTranscriptText(finalText, nextPartial);
1497
+ }
1498
+ return selectPreferredTranscriptText(finalText, nextPartial);
1499
+ };
1500
+
1383
1501
  // src/core/turnProfiles.ts
1384
1502
  var TURN_PROFILE_DEFAULTS = {
1385
1503
  balanced: {
1386
1504
  qualityProfile: "general",
1505
+ semanticVetoMaxMs: 0,
1506
+ semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
1387
1507
  silenceMs: 1400,
1388
1508
  speechThreshold: 0.012,
1389
1509
  transcriptStabilityMs: 1000
1390
1510
  },
1391
1511
  fast: {
1392
1512
  qualityProfile: "general",
1513
+ semanticVetoMaxMs: 0,
1514
+ semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
1393
1515
  silenceMs: 700,
1394
1516
  speechThreshold: 0.015,
1395
1517
  transcriptStabilityMs: 450
1396
1518
  },
1397
1519
  "long-form": {
1398
1520
  qualityProfile: "general",
1521
+ semanticVetoMaxMs: 0,
1522
+ semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
1399
1523
  silenceMs: 2200,
1400
1524
  speechThreshold: 0.01,
1401
1525
  transcriptStabilityMs: 1500
@@ -1429,6 +1553,8 @@ var resolveTurnDetectionConfig = (config) => {
1429
1553
  return {
1430
1554
  profile,
1431
1555
  qualityProfile,
1556
+ semanticVetoMaxMs: config?.semanticVetoMaxMs ?? preset.semanticVetoMaxMs,
1557
+ semanticVetoRecheckMs: config?.semanticVetoRecheckMs ?? preset.semanticVetoRecheckMs,
1432
1558
  silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
1433
1559
  speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
1434
1560
  transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
@@ -86,6 +86,7 @@ var __require = import.meta.require;
86
86
  // src/core/turnDetection.ts
87
87
  var DEFAULT_SILENCE_MS = 700;
88
88
  var DEFAULT_SPEECH_THRESHOLD = 0.015;
89
+ var DEFAULT_SEMANTIC_VETO_RECHECK_MS = 1200;
89
90
  var toUint8Array = (audio) => {
90
91
  if (audio instanceof ArrayBuffer) {
91
92
  return new Uint8Array(audio);
@@ -3133,18 +3134,24 @@ var resolveAudioConditioningConfig = (config) => {
3133
3134
  var TURN_PROFILE_DEFAULTS = {
3134
3135
  balanced: {
3135
3136
  qualityProfile: "general",
3137
+ semanticVetoMaxMs: 0,
3138
+ semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
3136
3139
  silenceMs: 1400,
3137
3140
  speechThreshold: 0.012,
3138
3141
  transcriptStabilityMs: 1000
3139
3142
  },
3140
3143
  fast: {
3141
3144
  qualityProfile: "general",
3145
+ semanticVetoMaxMs: 0,
3146
+ semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
3142
3147
  silenceMs: 700,
3143
3148
  speechThreshold: 0.015,
3144
3149
  transcriptStabilityMs: 450
3145
3150
  },
3146
3151
  "long-form": {
3147
3152
  qualityProfile: "general",
3153
+ semanticVetoMaxMs: 0,
3154
+ semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
3148
3155
  silenceMs: 2200,
3149
3156
  speechThreshold: 0.01,
3150
3157
  transcriptStabilityMs: 1500
@@ -3178,6 +3185,8 @@ var resolveTurnDetectionConfig = (config) => {
3178
3185
  return {
3179
3186
  profile,
3180
3187
  qualityProfile,
3188
+ semanticVetoMaxMs: config?.semanticVetoMaxMs ?? preset.semanticVetoMaxMs,
3189
+ semanticVetoRecheckMs: config?.semanticVetoRecheckMs ?? preset.semanticVetoRecheckMs,
3181
3190
  silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
3182
3191
  speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
3183
3192
  transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
@@ -4210,6 +4219,45 @@ var startVoiceTimer = (sessionId) => {
4210
4219
  };
4211
4220
  var voiceTimingEnabled = () => timingEnabled();
4212
4221
 
4222
+ // src/core/hardenedFetch.ts
4223
+ var ATTEMPT_TIMEOUT_MS = 6000;
4224
+ var isBun = "Bun" in globalThis;
4225
+ var oneAttempt = async (baseFetch, input, init) => {
4226
+ const controller = new AbortController;
4227
+ const callerSignal = init?.signal ?? undefined;
4228
+ const onCallerAbort = () => controller.abort(callerSignal?.reason);
4229
+ if (callerSignal?.aborted)
4230
+ controller.abort(callerSignal.reason);
4231
+ else
4232
+ callerSignal?.addEventListener("abort", onCallerAbort, { once: true });
4233
+ const timer = setTimeout(() => {
4234
+ controller.abort(new Error(`fetch exceeded ${ATTEMPT_TIMEOUT_MS}ms before response headers (stale Bun keep-alive socket?)`));
4235
+ }, ATTEMPT_TIMEOUT_MS);
4236
+ const headers = new Headers(init?.headers);
4237
+ if (isBun)
4238
+ headers.set("Connection", "close");
4239
+ try {
4240
+ return await baseFetch(input, {
4241
+ ...init,
4242
+ headers,
4243
+ signal: controller.signal
4244
+ });
4245
+ } finally {
4246
+ clearTimeout(timer);
4247
+ callerSignal?.removeEventListener("abort", onCallerAbort);
4248
+ }
4249
+ };
4250
+ var hardenFetch = (baseFetch = globalThis.fetch) => Object.assign(async (input, init) => {
4251
+ try {
4252
+ return await oneAttempt(baseFetch, input, init);
4253
+ } catch (error) {
4254
+ if (init?.signal?.aborted)
4255
+ throw error;
4256
+ console.warn(`[voice] hardened fetch retrying on a fresh connection: ${error instanceof Error ? error.message : String(error)}`);
4257
+ return oneAttempt(baseFetch, input, init);
4258
+ }
4259
+ }, { preconnect: baseFetch.preconnect.bind(baseFetch) });
4260
+
4213
4261
  // src/core/modelAdapters.ts
4214
4262
  var isVoiceProviderRoutingPolicyPreset = (value) => value === "balanced" || value === "cost-cap" || value === "cost-first" || value === "latency-first" || value === "quality-first";
4215
4263
  var resolveVoiceProviderRoutingPolicyPreset = (preset, options = {}) => {
@@ -4914,7 +4962,7 @@ var consumeOpenAIResponsesStream = async (response, onTextDelta, abortOptions) =
4914
4962
  return { assistantText, toolCalls: finalizeToolCalls(calls), usage };
4915
4963
  };
4916
4964
  var createOpenAIVoiceAssistantModel = (options) => {
4917
- const fetchImpl = options.fetch ?? globalThis.fetch;
4965
+ const fetchImpl = hardenFetch(options.fetch);
4918
4966
  const baseUrl = options.baseUrl ?? "https://api.openai.com/v1";
4919
4967
  const model = options.model ?? "gpt-4.1-mini";
4920
4968
  const timeoutMs = options.timeoutMs ?? 60000;
@@ -5039,7 +5087,7 @@ var consumeAnthropicStream = async (response, onTextDelta) => {
5039
5087
  return { assistantText, toolCalls: finalizeToolCalls(calls), usage };
5040
5088
  };
5041
5089
  var createAnthropicVoiceAssistantModel = (options) => {
5042
- const fetchImpl = options.fetch ?? globalThis.fetch;
5090
+ const fetchImpl = hardenFetch(options.fetch);
5043
5091
  const baseUrl = options.baseUrl ?? "https://api.anthropic.com/v1";
5044
5092
  const model = options.model ?? "claude-sonnet-4-5";
5045
5093
  return {
@@ -5125,7 +5173,7 @@ var consumeGeminiStream = async (response, onTextDelta) => {
5125
5173
  return { assistantText, toolCalls, usage };
5126
5174
  };
5127
5175
  var createGeminiVoiceAssistantModel = (options) => {
5128
- const fetchImpl = options.fetch ?? globalThis.fetch;
5176
+ const fetchImpl = hardenFetch(options.fetch);
5129
5177
  const baseUrl = options.baseUrl ?? "https://generativelanguage.googleapis.com/v1beta";
5130
5178
  const model = options.model ?? "gemini-2.5-flash";
5131
5179
  const maxRetries = Math.max(0, options.maxRetries ?? 2);
@@ -6066,8 +6114,11 @@ var createVoiceSession = (options) => {
6066
6114
  const turnDetection = {
6067
6115
  silenceMs: options.turnDetection.silenceMs ?? DEFAULT_SILENCE_MS,
6068
6116
  speechThreshold: options.turnDetection.speechThreshold ?? DEFAULT_SPEECH_THRESHOLD,
6069
- transcriptStabilityMs: options.turnDetection.transcriptStabilityMs ?? DEFAULT_TRANSCRIPT_STABILITY_MS
6117
+ transcriptStabilityMs: options.turnDetection.transcriptStabilityMs ?? DEFAULT_TRANSCRIPT_STABILITY_MS,
6118
+ semanticVetoMaxMs: options.turnDetection.semanticVetoMaxMs ?? 0,
6119
+ semanticVetoRecheckMs: options.turnDetection.semanticVetoRecheckMs ?? DEFAULT_SEMANTIC_VETO_RECHECK_MS
6070
6120
  };
6121
+ let semanticVetoElapsedMs = 0;
6071
6122
  const sttFallback = options.sttFallback ? {
6072
6123
  adapter: options.sttFallback.adapter,
6073
6124
  completionTimeoutMs: options.sttFallback.completionTimeoutMs ?? DEFAULT_FALLBACK_COMPLETION_TIMEOUT_MS,
@@ -6582,10 +6633,51 @@ var createVoiceSession = (options) => {
6582
6633
  silenceTimer = setTimeout(() => {
6583
6634
  silenceTimer = null;
6584
6635
  pendingCommitReason = null;
6585
- api.commitTurn(reason);
6636
+ runScheduledCommit(reason);
6586
6637
  }, delayMs);
6587
6638
  };
6588
6639
  const scheduleSilenceCommit = (delayMs = turnDetection.silenceMs, reset = true) => scheduleTurnCommit(delayMs, "silence", reset);
6640
+ const shouldDeferSilenceCommit = async (reason) => {
6641
+ if (reason !== "silence" || turnDetection.semanticVetoMaxMs <= 0 || !options.semanticTurnDetector || semanticVetoElapsedMs >= turnDetection.semanticVetoMaxMs) {
6642
+ return false;
6643
+ }
6644
+ const session = await readSession();
6645
+ const { partialText, transcripts } = session.currentTurn;
6646
+ const userText = buildTurnText(transcripts, partialText, {
6647
+ partialEndedAtMs: session.currentTurn.partialEndedAt,
6648
+ partialStartedAtMs: session.currentTurn.partialStartedAt
6649
+ });
6650
+ if (!userText) {
6651
+ return false;
6652
+ }
6653
+ const silenceMs = session.currentTurn.silenceStartedAt !== undefined ? Date.now() - session.currentTurn.silenceStartedAt : turnDetection.silenceMs;
6654
+ let endOfTurn = true;
6655
+ try {
6656
+ const verdict = await Promise.resolve(options.semanticTurnDetector.evaluate({
6657
+ lastFinalTranscript: transcripts.at(-1),
6658
+ partialText,
6659
+ silenceMs,
6660
+ transcripts
6661
+ }));
6662
+ endOfTurn = verdict.endOfTurn;
6663
+ } catch {
6664
+ return false;
6665
+ }
6666
+ if (endOfTurn !== false) {
6667
+ return false;
6668
+ }
6669
+ const remaining = turnDetection.semanticVetoMaxMs - semanticVetoElapsedMs;
6670
+ const extendMs = Math.max(1, Math.min(turnDetection.semanticVetoRecheckMs, remaining));
6671
+ semanticVetoElapsedMs += extendMs;
6672
+ scheduleTurnCommit(extendMs, reason);
6673
+ return true;
6674
+ };
6675
+ const runScheduledCommit = async (reason) => {
6676
+ if (await shouldDeferSilenceCommit(reason)) {
6677
+ return;
6678
+ }
6679
+ await api.commitTurn(reason);
6680
+ };
6589
6681
  const requestTurnCommit = async (reason) => {
6590
6682
  const session = await readSession();
6591
6683
  const text = buildTurnText(session.currentTurn.transcripts, session.currentTurn.partialText, {
@@ -7297,6 +7389,7 @@ var createVoiceSession = (options) => {
7297
7389
  session2.lastActivityAt = Date.now();
7298
7390
  session2.status = "active";
7299
7391
  });
7392
+ semanticVetoElapsedMs = 0;
7300
7393
  if (silenceTimer && pendingCommitReason === "vendor") {
7301
7394
  scheduleTurnCommit(getVendorCommitDelayMs(), "vendor");
7302
7395
  }
@@ -8000,6 +8093,7 @@ var createVoiceSession = (options) => {
8000
8093
  };
8001
8094
  const commitTurnInternal = async (reason = "manual") => {
8002
8095
  clearSilenceTimer();
8096
+ semanticVetoElapsedMs = 0;
8003
8097
  backchannelDriver?.reset();
8004
8098
  amdLastTurnCommitAt = Date.now();
8005
8099
  const session = await readSession();
package/dist/vue/index.js CHANGED
@@ -11660,22 +11660,146 @@ var resolveAudioConditioningConfig = (config) => {
11660
11660
  };
11661
11661
  };
11662
11662
 
11663
+ // src/core/turnDetection.ts
11664
+ var DEFAULT_SILENCE_MS = 700;
11665
+ var DEFAULT_SPEECH_THRESHOLD = 0.015;
11666
+ var DEFAULT_SEMANTIC_VETO_RECHECK_MS = 1200;
11667
+ var toUint8Array = (audio) => {
11668
+ if (audio instanceof ArrayBuffer) {
11669
+ return new Uint8Array(audio);
11670
+ }
11671
+ return new Uint8Array(audio.buffer, audio.byteOffset, audio.byteLength);
11672
+ };
11673
+ var measureAudioLevel = (audio) => {
11674
+ const bytes = toUint8Array(audio);
11675
+ if (bytes.byteLength < 2) {
11676
+ return 0;
11677
+ }
11678
+ const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
11679
+ if (samples.length === 0) {
11680
+ return 0;
11681
+ }
11682
+ let sumSquares = 0;
11683
+ for (const sample of samples) {
11684
+ const normalized = sample / 32768;
11685
+ sumSquares += normalized * normalized;
11686
+ }
11687
+ return Math.sqrt(sumSquares / samples.length);
11688
+ };
11689
+ var normalizeText = (value) => value.trim().replace(/\s+/g, " ");
11690
+ var countWords = (value) => value.length > 0 ? value.split(" ").length : 0;
11691
+ var selectPreferredTranscriptText = (currentText, nextText) => {
11692
+ const current = normalizeText(currentText);
11693
+ const next = normalizeText(nextText);
11694
+ if (!current) {
11695
+ return next;
11696
+ }
11697
+ if (!next) {
11698
+ return current;
11699
+ }
11700
+ if (current === next || current.includes(next)) {
11701
+ return current;
11702
+ }
11703
+ if (next.includes(current)) {
11704
+ return next;
11705
+ }
11706
+ if (countWords(next) > countWords(current)) {
11707
+ return next;
11708
+ }
11709
+ if (countWords(next) === countWords(current) && next.length > current.length) {
11710
+ return next;
11711
+ }
11712
+ return current;
11713
+ };
11714
+ var mergeSequentialTranscriptText = (currentText, nextText) => {
11715
+ const current = normalizeText(currentText);
11716
+ const next = normalizeText(nextText);
11717
+ if (!current) {
11718
+ return next;
11719
+ }
11720
+ if (!next) {
11721
+ return current;
11722
+ }
11723
+ const currentWords = current.split(" ");
11724
+ const nextWords = next.split(" ");
11725
+ const maxOverlap = Math.min(currentWords.length, nextWords.length);
11726
+ for (let overlap = maxOverlap;overlap > 0; overlap -= 1) {
11727
+ const currentSuffix = currentWords.slice(-overlap).join(" ");
11728
+ const nextPrefix = nextWords.slice(0, overlap).join(" ");
11729
+ if (currentSuffix === nextPrefix) {
11730
+ return [...currentWords, ...nextWords.slice(overlap)].join(" ");
11731
+ }
11732
+ }
11733
+ return `${current} ${next}`.trim();
11734
+ };
11735
+ var countCommonPrefixWords = (currentText, nextText) => {
11736
+ const currentWords = normalizeText(currentText).split(" ").filter(Boolean);
11737
+ const nextWords = normalizeText(nextText).split(" ").filter(Boolean);
11738
+ const maxWords = Math.min(currentWords.length, nextWords.length);
11739
+ let count = 0;
11740
+ for (let index = 0;index < maxWords; index += 1) {
11741
+ if (currentWords[index] !== nextWords[index]) {
11742
+ break;
11743
+ }
11744
+ count += 1;
11745
+ }
11746
+ return count;
11747
+ };
11748
+ var mergeTranscriptTexts = (transcripts) => {
11749
+ const merged = [];
11750
+ for (const transcript of transcripts) {
11751
+ const nextText = normalizeText(transcript.text);
11752
+ if (!nextText) {
11753
+ continue;
11754
+ }
11755
+ const previous = merged.at(-1);
11756
+ if (!previous) {
11757
+ merged.push(nextText);
11758
+ continue;
11759
+ }
11760
+ if (nextText === previous || previous.includes(nextText)) {
11761
+ continue;
11762
+ }
11763
+ if (nextText.includes(previous)) {
11764
+ merged[merged.length - 1] = nextText;
11765
+ continue;
11766
+ }
11767
+ merged.push(nextText);
11768
+ }
11769
+ return merged.join(" ").trim();
11770
+ };
11771
+ var buildTurnText = (transcripts, partialText, options = {}) => {
11772
+ const finalText = mergeTranscriptTexts(transcripts);
11773
+ const nextPartial = normalizeText(partialText);
11774
+ const lastFinalEndedAtMs = [...transcripts].reverse().find((transcript) => typeof transcript.endedAtMs === "number")?.endedAtMs;
11775
+ if (finalText && nextPartial && typeof lastFinalEndedAtMs === "number" && typeof options.partialStartedAtMs === "number" && options.partialStartedAtMs - lastFinalEndedAtMs >= 250 && countCommonPrefixWords(finalText, nextPartial) === 0) {
11776
+ return mergeSequentialTranscriptText(finalText, nextPartial);
11777
+ }
11778
+ return selectPreferredTranscriptText(finalText, nextPartial);
11779
+ };
11780
+
11663
11781
  // src/core/turnProfiles.ts
11664
11782
  var TURN_PROFILE_DEFAULTS = {
11665
11783
  balanced: {
11666
11784
  qualityProfile: "general",
11785
+ semanticVetoMaxMs: 0,
11786
+ semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
11667
11787
  silenceMs: 1400,
11668
11788
  speechThreshold: 0.012,
11669
11789
  transcriptStabilityMs: 1000
11670
11790
  },
11671
11791
  fast: {
11672
11792
  qualityProfile: "general",
11793
+ semanticVetoMaxMs: 0,
11794
+ semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
11673
11795
  silenceMs: 700,
11674
11796
  speechThreshold: 0.015,
11675
11797
  transcriptStabilityMs: 450
11676
11798
  },
11677
11799
  "long-form": {
11678
11800
  qualityProfile: "general",
11801
+ semanticVetoMaxMs: 0,
11802
+ semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
11679
11803
  silenceMs: 2200,
11680
11804
  speechThreshold: 0.01,
11681
11805
  transcriptStabilityMs: 1500
@@ -11709,6 +11833,8 @@ var resolveTurnDetectionConfig = (config) => {
11709
11833
  return {
11710
11834
  profile,
11711
11835
  qualityProfile,
11836
+ semanticVetoMaxMs: config?.semanticVetoMaxMs ?? preset.semanticVetoMaxMs,
11837
+ semanticVetoRecheckMs: config?.semanticVetoRecheckMs ?? preset.semanticVetoRecheckMs,
11712
11838
  silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
11713
11839
  speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
11714
11840
  transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.583",
3
+ "version": "0.0.22-beta.585",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",