@absolutejs/voice 0.0.22-beta.598 → 0.0.22-beta.599
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/angular/index.js +6 -127
- package/dist/client/htmxBootstrap.js +6 -12
- package/dist/client/index.js +6 -127
- package/dist/core/turnDetection.d.ts +1 -1
- package/dist/core/types.d.ts +2 -4
- package/dist/embed/index.js +6 -12
- package/dist/embed/voice-widget.js +8 -8
- package/dist/index.js +158 -184
- package/dist/react/index.js +6 -127
- package/dist/svelte/index.js +6 -127
- package/dist/testing/index.js +29 -57
- package/dist/vue/index.js +6 -127
- package/package.json +1 -1
package/dist/svelte/index.js
CHANGED
|
@@ -1409,146 +1409,25 @@ var resolveAudioConditioningConfig = (config) => {
|
|
|
1409
1409
|
};
|
|
1410
1410
|
};
|
|
1411
1411
|
|
|
1412
|
-
// src/core/turnDetection.ts
|
|
1413
|
-
var DEFAULT_SILENCE_MS = 700;
|
|
1414
|
-
var DEFAULT_SPEECH_THRESHOLD = 0.015;
|
|
1415
|
-
var DEFAULT_SEMANTIC_VETO_RECHECK_MS = 1200;
|
|
1416
|
-
var toUint8Array = (audio) => {
|
|
1417
|
-
if (audio instanceof ArrayBuffer) {
|
|
1418
|
-
return new Uint8Array(audio);
|
|
1419
|
-
}
|
|
1420
|
-
return new Uint8Array(audio.buffer, audio.byteOffset, audio.byteLength);
|
|
1421
|
-
};
|
|
1422
|
-
var measureAudioLevel = (audio) => {
|
|
1423
|
-
const bytes = toUint8Array(audio);
|
|
1424
|
-
if (bytes.byteLength < 2) {
|
|
1425
|
-
return 0;
|
|
1426
|
-
}
|
|
1427
|
-
const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
|
|
1428
|
-
if (samples.length === 0) {
|
|
1429
|
-
return 0;
|
|
1430
|
-
}
|
|
1431
|
-
let sumSquares = 0;
|
|
1432
|
-
for (const sample of samples) {
|
|
1433
|
-
const normalized = sample / 32768;
|
|
1434
|
-
sumSquares += normalized * normalized;
|
|
1435
|
-
}
|
|
1436
|
-
return Math.sqrt(sumSquares / samples.length);
|
|
1437
|
-
};
|
|
1438
|
-
var normalizeText = (value) => value.trim().replace(/\s+/g, " ");
|
|
1439
|
-
var countWords = (value) => value.length > 0 ? value.split(" ").length : 0;
|
|
1440
|
-
var selectPreferredTranscriptText = (currentText, nextText) => {
|
|
1441
|
-
const current = normalizeText(currentText);
|
|
1442
|
-
const next = normalizeText(nextText);
|
|
1443
|
-
if (!current) {
|
|
1444
|
-
return next;
|
|
1445
|
-
}
|
|
1446
|
-
if (!next) {
|
|
1447
|
-
return current;
|
|
1448
|
-
}
|
|
1449
|
-
if (current === next || current.includes(next)) {
|
|
1450
|
-
return current;
|
|
1451
|
-
}
|
|
1452
|
-
if (next.includes(current)) {
|
|
1453
|
-
return next;
|
|
1454
|
-
}
|
|
1455
|
-
if (countWords(next) > countWords(current)) {
|
|
1456
|
-
return next;
|
|
1457
|
-
}
|
|
1458
|
-
if (countWords(next) === countWords(current) && next.length > current.length) {
|
|
1459
|
-
return next;
|
|
1460
|
-
}
|
|
1461
|
-
return current;
|
|
1462
|
-
};
|
|
1463
|
-
var mergeSequentialTranscriptText = (currentText, nextText) => {
|
|
1464
|
-
const current = normalizeText(currentText);
|
|
1465
|
-
const next = normalizeText(nextText);
|
|
1466
|
-
if (!current) {
|
|
1467
|
-
return next;
|
|
1468
|
-
}
|
|
1469
|
-
if (!next) {
|
|
1470
|
-
return current;
|
|
1471
|
-
}
|
|
1472
|
-
const currentWords = current.split(" ");
|
|
1473
|
-
const nextWords = next.split(" ");
|
|
1474
|
-
const maxOverlap = Math.min(currentWords.length, nextWords.length);
|
|
1475
|
-
for (let overlap = maxOverlap;overlap > 0; overlap -= 1) {
|
|
1476
|
-
const currentSuffix = currentWords.slice(-overlap).join(" ");
|
|
1477
|
-
const nextPrefix = nextWords.slice(0, overlap).join(" ");
|
|
1478
|
-
if (currentSuffix === nextPrefix) {
|
|
1479
|
-
return [...currentWords, ...nextWords.slice(overlap)].join(" ");
|
|
1480
|
-
}
|
|
1481
|
-
}
|
|
1482
|
-
return `${current} ${next}`.trim();
|
|
1483
|
-
};
|
|
1484
|
-
var countCommonPrefixWords = (currentText, nextText) => {
|
|
1485
|
-
const currentWords = normalizeText(currentText).split(" ").filter(Boolean);
|
|
1486
|
-
const nextWords = normalizeText(nextText).split(" ").filter(Boolean);
|
|
1487
|
-
const maxWords = Math.min(currentWords.length, nextWords.length);
|
|
1488
|
-
let count = 0;
|
|
1489
|
-
for (let index = 0;index < maxWords; index += 1) {
|
|
1490
|
-
if (currentWords[index] !== nextWords[index]) {
|
|
1491
|
-
break;
|
|
1492
|
-
}
|
|
1493
|
-
count += 1;
|
|
1494
|
-
}
|
|
1495
|
-
return count;
|
|
1496
|
-
};
|
|
1497
|
-
var mergeTranscriptTexts = (transcripts) => {
|
|
1498
|
-
const merged = [];
|
|
1499
|
-
for (const transcript of transcripts) {
|
|
1500
|
-
const nextText = normalizeText(transcript.text);
|
|
1501
|
-
if (!nextText) {
|
|
1502
|
-
continue;
|
|
1503
|
-
}
|
|
1504
|
-
const previous = merged.at(-1);
|
|
1505
|
-
if (!previous) {
|
|
1506
|
-
merged.push(nextText);
|
|
1507
|
-
continue;
|
|
1508
|
-
}
|
|
1509
|
-
if (nextText === previous || previous.includes(nextText)) {
|
|
1510
|
-
continue;
|
|
1511
|
-
}
|
|
1512
|
-
if (nextText.includes(previous)) {
|
|
1513
|
-
merged[merged.length - 1] = nextText;
|
|
1514
|
-
continue;
|
|
1515
|
-
}
|
|
1516
|
-
merged.push(nextText);
|
|
1517
|
-
}
|
|
1518
|
-
return merged.join(" ").trim();
|
|
1519
|
-
};
|
|
1520
|
-
var buildTurnText = (transcripts, partialText, options = {}) => {
|
|
1521
|
-
const finalText = mergeTranscriptTexts(transcripts);
|
|
1522
|
-
const nextPartial = normalizeText(partialText);
|
|
1523
|
-
const lastFinalEndedAtMs = [...transcripts].reverse().find((transcript) => typeof transcript.endedAtMs === "number")?.endedAtMs;
|
|
1524
|
-
if (finalText && nextPartial && typeof lastFinalEndedAtMs === "number" && typeof options.partialStartedAtMs === "number" && options.partialStartedAtMs - lastFinalEndedAtMs >= 250 && countCommonPrefixWords(finalText, nextPartial) === 0) {
|
|
1525
|
-
return mergeSequentialTranscriptText(finalText, nextPartial);
|
|
1526
|
-
}
|
|
1527
|
-
return selectPreferredTranscriptText(finalText, nextPartial);
|
|
1528
|
-
};
|
|
1529
|
-
|
|
1530
1412
|
// src/core/turnProfiles.ts
|
|
1531
1413
|
var TURN_PROFILE_DEFAULTS = {
|
|
1532
1414
|
balanced: {
|
|
1533
1415
|
qualityProfile: "general",
|
|
1534
|
-
|
|
1535
|
-
semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
|
|
1416
|
+
minSilenceMs: 400,
|
|
1536
1417
|
silenceMs: 1400,
|
|
1537
1418
|
speechThreshold: 0.012,
|
|
1538
1419
|
transcriptStabilityMs: 1000
|
|
1539
1420
|
},
|
|
1540
1421
|
fast: {
|
|
1541
1422
|
qualityProfile: "general",
|
|
1542
|
-
|
|
1543
|
-
semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
|
|
1423
|
+
minSilenceMs: 300,
|
|
1544
1424
|
silenceMs: 700,
|
|
1545
1425
|
speechThreshold: 0.015,
|
|
1546
1426
|
transcriptStabilityMs: 450
|
|
1547
1427
|
},
|
|
1548
1428
|
"long-form": {
|
|
1549
1429
|
qualityProfile: "general",
|
|
1550
|
-
|
|
1551
|
-
semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
|
|
1430
|
+
minSilenceMs: 600,
|
|
1552
1431
|
silenceMs: 2200,
|
|
1553
1432
|
speechThreshold: 0.01,
|
|
1554
1433
|
transcriptStabilityMs: 1500
|
|
@@ -1579,12 +1458,12 @@ var resolveTurnDetectionConfig = (config) => {
|
|
|
1579
1458
|
const qualityProfile = config?.qualityProfile ?? DEFAULT_QUALITY_PROFILE;
|
|
1580
1459
|
const preset = TURN_PROFILE_DEFAULTS[profile];
|
|
1581
1460
|
const quality = QUALITY_PROFILE_DEFAULTS[qualityProfile];
|
|
1461
|
+
const silenceMs = config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs;
|
|
1582
1462
|
return {
|
|
1583
1463
|
profile,
|
|
1584
1464
|
qualityProfile,
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
|
|
1465
|
+
minSilenceMs: Math.min(silenceMs, config?.minSilenceMs ?? quality.minSilenceMs ?? preset.minSilenceMs),
|
|
1466
|
+
silenceMs,
|
|
1588
1467
|
speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
|
|
1589
1468
|
transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
|
|
1590
1469
|
};
|
package/dist/testing/index.js
CHANGED
|
@@ -86,7 +86,7 @@ var __require = import.meta.require;
|
|
|
86
86
|
// src/core/turnDetection.ts
|
|
87
87
|
var DEFAULT_SILENCE_MS = 700;
|
|
88
88
|
var DEFAULT_SPEECH_THRESHOLD = 0.015;
|
|
89
|
-
var
|
|
89
|
+
var DEFAULT_MIN_SILENCE_MS = 400;
|
|
90
90
|
var toUint8Array = (audio) => {
|
|
91
91
|
if (audio instanceof ArrayBuffer) {
|
|
92
92
|
return new Uint8Array(audio);
|
|
@@ -3163,24 +3163,21 @@ var resolveAudioConditioningConfig = (config) => {
|
|
|
3163
3163
|
var TURN_PROFILE_DEFAULTS = {
|
|
3164
3164
|
balanced: {
|
|
3165
3165
|
qualityProfile: "general",
|
|
3166
|
-
|
|
3167
|
-
semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
|
|
3166
|
+
minSilenceMs: 400,
|
|
3168
3167
|
silenceMs: 1400,
|
|
3169
3168
|
speechThreshold: 0.012,
|
|
3170
3169
|
transcriptStabilityMs: 1000
|
|
3171
3170
|
},
|
|
3172
3171
|
fast: {
|
|
3173
3172
|
qualityProfile: "general",
|
|
3174
|
-
|
|
3175
|
-
semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
|
|
3173
|
+
minSilenceMs: 300,
|
|
3176
3174
|
silenceMs: 700,
|
|
3177
3175
|
speechThreshold: 0.015,
|
|
3178
3176
|
transcriptStabilityMs: 450
|
|
3179
3177
|
},
|
|
3180
3178
|
"long-form": {
|
|
3181
3179
|
qualityProfile: "general",
|
|
3182
|
-
|
|
3183
|
-
semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
|
|
3180
|
+
minSilenceMs: 600,
|
|
3184
3181
|
silenceMs: 2200,
|
|
3185
3182
|
speechThreshold: 0.01,
|
|
3186
3183
|
transcriptStabilityMs: 1500
|
|
@@ -3211,12 +3208,12 @@ var resolveTurnDetectionConfig = (config) => {
|
|
|
3211
3208
|
const qualityProfile = config?.qualityProfile ?? DEFAULT_QUALITY_PROFILE;
|
|
3212
3209
|
const preset = TURN_PROFILE_DEFAULTS[profile];
|
|
3213
3210
|
const quality = QUALITY_PROFILE_DEFAULTS[qualityProfile];
|
|
3211
|
+
const silenceMs = config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs;
|
|
3214
3212
|
return {
|
|
3215
3213
|
profile,
|
|
3216
3214
|
qualityProfile,
|
|
3217
|
-
|
|
3218
|
-
|
|
3219
|
-
silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
|
|
3215
|
+
minSilenceMs: Math.min(silenceMs, config?.minSilenceMs ?? quality.minSilenceMs ?? preset.minSilenceMs),
|
|
3216
|
+
silenceMs,
|
|
3220
3217
|
speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
|
|
3221
3218
|
transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
|
|
3222
3219
|
};
|
|
@@ -6153,14 +6150,22 @@ var createVoiceSession = (options) => {
|
|
|
6153
6150
|
strategy: options.reconnect.strategy ?? "resume-last-turn",
|
|
6154
6151
|
timeout: options.reconnect.timeout ?? DEFAULT_RECONNECT_TIMEOUT
|
|
6155
6152
|
};
|
|
6153
|
+
const resolvedSilenceMs = options.turnDetection.silenceMs ?? DEFAULT_SILENCE_MS;
|
|
6156
6154
|
const turnDetection = {
|
|
6157
|
-
silenceMs:
|
|
6155
|
+
silenceMs: resolvedSilenceMs,
|
|
6156
|
+
minSilenceMs: Math.min(resolvedSilenceMs, options.turnDetection.minSilenceMs ?? DEFAULT_MIN_SILENCE_MS),
|
|
6158
6157
|
speechThreshold: options.turnDetection.speechThreshold ?? DEFAULT_SPEECH_THRESHOLD,
|
|
6159
|
-
transcriptStabilityMs: options.turnDetection.transcriptStabilityMs ?? DEFAULT_TRANSCRIPT_STABILITY_MS
|
|
6160
|
-
|
|
6161
|
-
|
|
6158
|
+
transcriptStabilityMs: options.turnDetection.transcriptStabilityMs ?? DEFAULT_TRANSCRIPT_STABILITY_MS
|
|
6159
|
+
};
|
|
6160
|
+
let lastTurnCompleteConfidence = null;
|
|
6161
|
+
const adaptiveSilenceMs = () => {
|
|
6162
|
+
const { minSilenceMs, silenceMs } = turnDetection;
|
|
6163
|
+
if (lastTurnCompleteConfidence === null || silenceMs <= minSilenceMs) {
|
|
6164
|
+
return silenceMs;
|
|
6165
|
+
}
|
|
6166
|
+
const complete = Math.max(0, Math.min(1, lastTurnCompleteConfidence));
|
|
6167
|
+
return Math.round(minSilenceMs + (silenceMs - minSilenceMs) * (1 - complete));
|
|
6162
6168
|
};
|
|
6163
|
-
let semanticVetoElapsedMs = 0;
|
|
6164
6169
|
const sttFallback = options.sttFallback ? {
|
|
6165
6170
|
adapter: options.sttFallback.adapter,
|
|
6166
6171
|
completionTimeoutMs: options.sttFallback.completionTimeoutMs ?? DEFAULT_FALLBACK_COMPLETION_TIMEOUT_MS,
|
|
@@ -6693,47 +6698,8 @@ var createVoiceSession = (options) => {
|
|
|
6693
6698
|
runScheduledCommit(reason);
|
|
6694
6699
|
}, delayMs);
|
|
6695
6700
|
};
|
|
6696
|
-
const scheduleSilenceCommit = (delayMs =
|
|
6697
|
-
const shouldDeferSilenceCommit = async (reason) => {
|
|
6698
|
-
if (reason !== "silence" || turnDetection.semanticVetoMaxMs <= 0 || !options.semanticTurnDetector || semanticVetoElapsedMs >= turnDetection.semanticVetoMaxMs) {
|
|
6699
|
-
return false;
|
|
6700
|
-
}
|
|
6701
|
-
const session = await readSession();
|
|
6702
|
-
const { partialText, transcripts } = session.currentTurn;
|
|
6703
|
-
const userText = buildTurnText(transcripts, partialText, {
|
|
6704
|
-
partialEndedAtMs: session.currentTurn.partialEndedAt,
|
|
6705
|
-
partialStartedAtMs: session.currentTurn.partialStartedAt
|
|
6706
|
-
});
|
|
6707
|
-
if (!userText) {
|
|
6708
|
-
return false;
|
|
6709
|
-
}
|
|
6710
|
-
const silenceMs = session.currentTurn.silenceStartedAt !== undefined ? Date.now() - session.currentTurn.silenceStartedAt : turnDetection.silenceMs;
|
|
6711
|
-
let endOfTurn = true;
|
|
6712
|
-
try {
|
|
6713
|
-
const verdict = await Promise.resolve(options.semanticTurnDetector.evaluate({
|
|
6714
|
-
lastFinalTranscript: transcripts.at(-1),
|
|
6715
|
-
partialText,
|
|
6716
|
-
silenceMs,
|
|
6717
|
-
transcripts,
|
|
6718
|
-
...getTurnAudioForDetector()
|
|
6719
|
-
}));
|
|
6720
|
-
endOfTurn = verdict.endOfTurn;
|
|
6721
|
-
} catch {
|
|
6722
|
-
return false;
|
|
6723
|
-
}
|
|
6724
|
-
if (endOfTurn !== false) {
|
|
6725
|
-
return false;
|
|
6726
|
-
}
|
|
6727
|
-
const remaining = turnDetection.semanticVetoMaxMs - semanticVetoElapsedMs;
|
|
6728
|
-
const extendMs = Math.max(1, Math.min(turnDetection.semanticVetoRecheckMs, remaining));
|
|
6729
|
-
semanticVetoElapsedMs += extendMs;
|
|
6730
|
-
scheduleTurnCommit(extendMs, reason);
|
|
6731
|
-
return true;
|
|
6732
|
-
};
|
|
6701
|
+
const scheduleSilenceCommit = (delayMs = adaptiveSilenceMs(), reset = true) => scheduleTurnCommit(delayMs, "silence", reset);
|
|
6733
6702
|
const runScheduledCommit = async (reason) => {
|
|
6734
|
-
if (await shouldDeferSilenceCommit(reason)) {
|
|
6735
|
-
return;
|
|
6736
|
-
}
|
|
6737
6703
|
await api.commitTurn(reason);
|
|
6738
6704
|
};
|
|
6739
6705
|
const requestTurnCommit = async (reason) => {
|
|
@@ -7473,7 +7439,7 @@ var createVoiceSession = (options) => {
|
|
|
7473
7439
|
session2.lastActivityAt = Date.now();
|
|
7474
7440
|
session2.status = "active";
|
|
7475
7441
|
});
|
|
7476
|
-
|
|
7442
|
+
lastTurnCompleteConfidence = null;
|
|
7477
7443
|
if (silenceTimer && pendingCommitReason === "vendor") {
|
|
7478
7444
|
scheduleTurnCommit(getVendorCommitDelayMs(), "vendor");
|
|
7479
7445
|
}
|
|
@@ -7503,6 +7469,12 @@ var createVoiceSession = (options) => {
|
|
|
7503
7469
|
transcripts: session.currentTurn.transcripts,
|
|
7504
7470
|
...getTurnAudioForDetector()
|
|
7505
7471
|
}));
|
|
7472
|
+
if (typeof verdict.confidence === "number") {
|
|
7473
|
+
lastTurnCompleteConfidence = verdict.confidence;
|
|
7474
|
+
if (silenceTimer && pendingCommitReason === "silence") {
|
|
7475
|
+
scheduleSilenceCommit();
|
|
7476
|
+
}
|
|
7477
|
+
}
|
|
7506
7478
|
if (verdict.endOfTurn) {
|
|
7507
7479
|
clearSilenceTimer();
|
|
7508
7480
|
await requestTurnCommit("vendor");
|
|
@@ -8198,7 +8170,7 @@ var createVoiceSession = (options) => {
|
|
|
8198
8170
|
};
|
|
8199
8171
|
const commitTurnInternal = async (reason = "manual") => {
|
|
8200
8172
|
clearSilenceTimer();
|
|
8201
|
-
|
|
8173
|
+
lastTurnCompleteConfidence = null;
|
|
8202
8174
|
backchannelDriver?.reset();
|
|
8203
8175
|
amdLastTurnCommitAt = Date.now();
|
|
8204
8176
|
const session = await readSession();
|
package/dist/vue/index.js
CHANGED
|
@@ -11689,146 +11689,25 @@ var resolveAudioConditioningConfig = (config) => {
|
|
|
11689
11689
|
};
|
|
11690
11690
|
};
|
|
11691
11691
|
|
|
11692
|
-
// src/core/turnDetection.ts
|
|
11693
|
-
var DEFAULT_SILENCE_MS = 700;
|
|
11694
|
-
var DEFAULT_SPEECH_THRESHOLD = 0.015;
|
|
11695
|
-
var DEFAULT_SEMANTIC_VETO_RECHECK_MS = 1200;
|
|
11696
|
-
var toUint8Array = (audio) => {
|
|
11697
|
-
if (audio instanceof ArrayBuffer) {
|
|
11698
|
-
return new Uint8Array(audio);
|
|
11699
|
-
}
|
|
11700
|
-
return new Uint8Array(audio.buffer, audio.byteOffset, audio.byteLength);
|
|
11701
|
-
};
|
|
11702
|
-
var measureAudioLevel = (audio) => {
|
|
11703
|
-
const bytes = toUint8Array(audio);
|
|
11704
|
-
if (bytes.byteLength < 2) {
|
|
11705
|
-
return 0;
|
|
11706
|
-
}
|
|
11707
|
-
const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
|
|
11708
|
-
if (samples.length === 0) {
|
|
11709
|
-
return 0;
|
|
11710
|
-
}
|
|
11711
|
-
let sumSquares = 0;
|
|
11712
|
-
for (const sample of samples) {
|
|
11713
|
-
const normalized = sample / 32768;
|
|
11714
|
-
sumSquares += normalized * normalized;
|
|
11715
|
-
}
|
|
11716
|
-
return Math.sqrt(sumSquares / samples.length);
|
|
11717
|
-
};
|
|
11718
|
-
var normalizeText = (value) => value.trim().replace(/\s+/g, " ");
|
|
11719
|
-
var countWords = (value) => value.length > 0 ? value.split(" ").length : 0;
|
|
11720
|
-
var selectPreferredTranscriptText = (currentText, nextText) => {
|
|
11721
|
-
const current = normalizeText(currentText);
|
|
11722
|
-
const next = normalizeText(nextText);
|
|
11723
|
-
if (!current) {
|
|
11724
|
-
return next;
|
|
11725
|
-
}
|
|
11726
|
-
if (!next) {
|
|
11727
|
-
return current;
|
|
11728
|
-
}
|
|
11729
|
-
if (current === next || current.includes(next)) {
|
|
11730
|
-
return current;
|
|
11731
|
-
}
|
|
11732
|
-
if (next.includes(current)) {
|
|
11733
|
-
return next;
|
|
11734
|
-
}
|
|
11735
|
-
if (countWords(next) > countWords(current)) {
|
|
11736
|
-
return next;
|
|
11737
|
-
}
|
|
11738
|
-
if (countWords(next) === countWords(current) && next.length > current.length) {
|
|
11739
|
-
return next;
|
|
11740
|
-
}
|
|
11741
|
-
return current;
|
|
11742
|
-
};
|
|
11743
|
-
var mergeSequentialTranscriptText = (currentText, nextText) => {
|
|
11744
|
-
const current = normalizeText(currentText);
|
|
11745
|
-
const next = normalizeText(nextText);
|
|
11746
|
-
if (!current) {
|
|
11747
|
-
return next;
|
|
11748
|
-
}
|
|
11749
|
-
if (!next) {
|
|
11750
|
-
return current;
|
|
11751
|
-
}
|
|
11752
|
-
const currentWords = current.split(" ");
|
|
11753
|
-
const nextWords = next.split(" ");
|
|
11754
|
-
const maxOverlap = Math.min(currentWords.length, nextWords.length);
|
|
11755
|
-
for (let overlap = maxOverlap;overlap > 0; overlap -= 1) {
|
|
11756
|
-
const currentSuffix = currentWords.slice(-overlap).join(" ");
|
|
11757
|
-
const nextPrefix = nextWords.slice(0, overlap).join(" ");
|
|
11758
|
-
if (currentSuffix === nextPrefix) {
|
|
11759
|
-
return [...currentWords, ...nextWords.slice(overlap)].join(" ");
|
|
11760
|
-
}
|
|
11761
|
-
}
|
|
11762
|
-
return `${current} ${next}`.trim();
|
|
11763
|
-
};
|
|
11764
|
-
var countCommonPrefixWords = (currentText, nextText) => {
|
|
11765
|
-
const currentWords = normalizeText(currentText).split(" ").filter(Boolean);
|
|
11766
|
-
const nextWords = normalizeText(nextText).split(" ").filter(Boolean);
|
|
11767
|
-
const maxWords = Math.min(currentWords.length, nextWords.length);
|
|
11768
|
-
let count = 0;
|
|
11769
|
-
for (let index = 0;index < maxWords; index += 1) {
|
|
11770
|
-
if (currentWords[index] !== nextWords[index]) {
|
|
11771
|
-
break;
|
|
11772
|
-
}
|
|
11773
|
-
count += 1;
|
|
11774
|
-
}
|
|
11775
|
-
return count;
|
|
11776
|
-
};
|
|
11777
|
-
var mergeTranscriptTexts = (transcripts) => {
|
|
11778
|
-
const merged = [];
|
|
11779
|
-
for (const transcript of transcripts) {
|
|
11780
|
-
const nextText = normalizeText(transcript.text);
|
|
11781
|
-
if (!nextText) {
|
|
11782
|
-
continue;
|
|
11783
|
-
}
|
|
11784
|
-
const previous = merged.at(-1);
|
|
11785
|
-
if (!previous) {
|
|
11786
|
-
merged.push(nextText);
|
|
11787
|
-
continue;
|
|
11788
|
-
}
|
|
11789
|
-
if (nextText === previous || previous.includes(nextText)) {
|
|
11790
|
-
continue;
|
|
11791
|
-
}
|
|
11792
|
-
if (nextText.includes(previous)) {
|
|
11793
|
-
merged[merged.length - 1] = nextText;
|
|
11794
|
-
continue;
|
|
11795
|
-
}
|
|
11796
|
-
merged.push(nextText);
|
|
11797
|
-
}
|
|
11798
|
-
return merged.join(" ").trim();
|
|
11799
|
-
};
|
|
11800
|
-
var buildTurnText = (transcripts, partialText, options = {}) => {
|
|
11801
|
-
const finalText = mergeTranscriptTexts(transcripts);
|
|
11802
|
-
const nextPartial = normalizeText(partialText);
|
|
11803
|
-
const lastFinalEndedAtMs = [...transcripts].reverse().find((transcript) => typeof transcript.endedAtMs === "number")?.endedAtMs;
|
|
11804
|
-
if (finalText && nextPartial && typeof lastFinalEndedAtMs === "number" && typeof options.partialStartedAtMs === "number" && options.partialStartedAtMs - lastFinalEndedAtMs >= 250 && countCommonPrefixWords(finalText, nextPartial) === 0) {
|
|
11805
|
-
return mergeSequentialTranscriptText(finalText, nextPartial);
|
|
11806
|
-
}
|
|
11807
|
-
return selectPreferredTranscriptText(finalText, nextPartial);
|
|
11808
|
-
};
|
|
11809
|
-
|
|
11810
11692
|
// src/core/turnProfiles.ts
|
|
11811
11693
|
var TURN_PROFILE_DEFAULTS = {
|
|
11812
11694
|
balanced: {
|
|
11813
11695
|
qualityProfile: "general",
|
|
11814
|
-
|
|
11815
|
-
semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
|
|
11696
|
+
minSilenceMs: 400,
|
|
11816
11697
|
silenceMs: 1400,
|
|
11817
11698
|
speechThreshold: 0.012,
|
|
11818
11699
|
transcriptStabilityMs: 1000
|
|
11819
11700
|
},
|
|
11820
11701
|
fast: {
|
|
11821
11702
|
qualityProfile: "general",
|
|
11822
|
-
|
|
11823
|
-
semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
|
|
11703
|
+
minSilenceMs: 300,
|
|
11824
11704
|
silenceMs: 700,
|
|
11825
11705
|
speechThreshold: 0.015,
|
|
11826
11706
|
transcriptStabilityMs: 450
|
|
11827
11707
|
},
|
|
11828
11708
|
"long-form": {
|
|
11829
11709
|
qualityProfile: "general",
|
|
11830
|
-
|
|
11831
|
-
semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
|
|
11710
|
+
minSilenceMs: 600,
|
|
11832
11711
|
silenceMs: 2200,
|
|
11833
11712
|
speechThreshold: 0.01,
|
|
11834
11713
|
transcriptStabilityMs: 1500
|
|
@@ -11859,12 +11738,12 @@ var resolveTurnDetectionConfig = (config) => {
|
|
|
11859
11738
|
const qualityProfile = config?.qualityProfile ?? DEFAULT_QUALITY_PROFILE;
|
|
11860
11739
|
const preset = TURN_PROFILE_DEFAULTS[profile];
|
|
11861
11740
|
const quality = QUALITY_PROFILE_DEFAULTS[qualityProfile];
|
|
11741
|
+
const silenceMs = config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs;
|
|
11862
11742
|
return {
|
|
11863
11743
|
profile,
|
|
11864
11744
|
qualityProfile,
|
|
11865
|
-
|
|
11866
|
-
|
|
11867
|
-
silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
|
|
11745
|
+
minSilenceMs: Math.min(silenceMs, config?.minSilenceMs ?? quality.minSilenceMs ?? preset.minSilenceMs),
|
|
11746
|
+
silenceMs,
|
|
11868
11747
|
speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
|
|
11869
11748
|
transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
|
|
11870
11749
|
};
|