@absolutejs/voice 0.0.22-beta.583 → 0.0.22-beta.585
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/angular/index.js +126 -0
- package/dist/client/htmxBootstrap.js +11 -0
- package/dist/client/index.js +126 -0
- package/dist/core/hardenedFetch.d.ts +3 -0
- package/dist/core/turnDetection.d.ts +1 -0
- package/dist/core/types.d.ts +4 -0
- package/dist/embed/index.js +11 -0
- package/dist/embed/voice-widget.js +8 -8
- package/dist/index.d.ts +1 -0
- package/dist/index.js +219 -122
- package/dist/react/index.js +126 -0
- package/dist/svelte/index.js +126 -0
- package/dist/testing/index.js +99 -5
- package/dist/vue/index.js +126 -0
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -391,22 +391,146 @@ var resolveLogger = (logger) => ({
|
|
|
391
391
|
...logger
|
|
392
392
|
});
|
|
393
393
|
|
|
394
|
+
// src/core/turnDetection.ts
|
|
395
|
+
var DEFAULT_SILENCE_MS = 700;
|
|
396
|
+
var DEFAULT_SPEECH_THRESHOLD = 0.015;
|
|
397
|
+
var DEFAULT_SEMANTIC_VETO_RECHECK_MS = 1200;
|
|
398
|
+
var toUint8Array = (audio) => {
|
|
399
|
+
if (audio instanceof ArrayBuffer) {
|
|
400
|
+
return new Uint8Array(audio);
|
|
401
|
+
}
|
|
402
|
+
return new Uint8Array(audio.buffer, audio.byteOffset, audio.byteLength);
|
|
403
|
+
};
|
|
404
|
+
var measureAudioLevel = (audio) => {
|
|
405
|
+
const bytes = toUint8Array(audio);
|
|
406
|
+
if (bytes.byteLength < 2) {
|
|
407
|
+
return 0;
|
|
408
|
+
}
|
|
409
|
+
const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
|
|
410
|
+
if (samples.length === 0) {
|
|
411
|
+
return 0;
|
|
412
|
+
}
|
|
413
|
+
let sumSquares = 0;
|
|
414
|
+
for (const sample of samples) {
|
|
415
|
+
const normalized = sample / 32768;
|
|
416
|
+
sumSquares += normalized * normalized;
|
|
417
|
+
}
|
|
418
|
+
return Math.sqrt(sumSquares / samples.length);
|
|
419
|
+
};
|
|
420
|
+
var normalizeText = (value) => value.trim().replace(/\s+/g, " ");
|
|
421
|
+
var countWords = (value) => value.length > 0 ? value.split(" ").length : 0;
|
|
422
|
+
var selectPreferredTranscriptText = (currentText, nextText) => {
|
|
423
|
+
const current = normalizeText(currentText);
|
|
424
|
+
const next = normalizeText(nextText);
|
|
425
|
+
if (!current) {
|
|
426
|
+
return next;
|
|
427
|
+
}
|
|
428
|
+
if (!next) {
|
|
429
|
+
return current;
|
|
430
|
+
}
|
|
431
|
+
if (current === next || current.includes(next)) {
|
|
432
|
+
return current;
|
|
433
|
+
}
|
|
434
|
+
if (next.includes(current)) {
|
|
435
|
+
return next;
|
|
436
|
+
}
|
|
437
|
+
if (countWords(next) > countWords(current)) {
|
|
438
|
+
return next;
|
|
439
|
+
}
|
|
440
|
+
if (countWords(next) === countWords(current) && next.length > current.length) {
|
|
441
|
+
return next;
|
|
442
|
+
}
|
|
443
|
+
return current;
|
|
444
|
+
};
|
|
445
|
+
var mergeSequentialTranscriptText = (currentText, nextText) => {
|
|
446
|
+
const current = normalizeText(currentText);
|
|
447
|
+
const next = normalizeText(nextText);
|
|
448
|
+
if (!current) {
|
|
449
|
+
return next;
|
|
450
|
+
}
|
|
451
|
+
if (!next) {
|
|
452
|
+
return current;
|
|
453
|
+
}
|
|
454
|
+
const currentWords = current.split(" ");
|
|
455
|
+
const nextWords = next.split(" ");
|
|
456
|
+
const maxOverlap = Math.min(currentWords.length, nextWords.length);
|
|
457
|
+
for (let overlap = maxOverlap;overlap > 0; overlap -= 1) {
|
|
458
|
+
const currentSuffix = currentWords.slice(-overlap).join(" ");
|
|
459
|
+
const nextPrefix = nextWords.slice(0, overlap).join(" ");
|
|
460
|
+
if (currentSuffix === nextPrefix) {
|
|
461
|
+
return [...currentWords, ...nextWords.slice(overlap)].join(" ");
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
return `${current} ${next}`.trim();
|
|
465
|
+
};
|
|
466
|
+
var countCommonPrefixWords = (currentText, nextText) => {
|
|
467
|
+
const currentWords = normalizeText(currentText).split(" ").filter(Boolean);
|
|
468
|
+
const nextWords = normalizeText(nextText).split(" ").filter(Boolean);
|
|
469
|
+
const maxWords = Math.min(currentWords.length, nextWords.length);
|
|
470
|
+
let count = 0;
|
|
471
|
+
for (let index = 0;index < maxWords; index += 1) {
|
|
472
|
+
if (currentWords[index] !== nextWords[index]) {
|
|
473
|
+
break;
|
|
474
|
+
}
|
|
475
|
+
count += 1;
|
|
476
|
+
}
|
|
477
|
+
return count;
|
|
478
|
+
};
|
|
479
|
+
var mergeTranscriptTexts = (transcripts) => {
|
|
480
|
+
const merged = [];
|
|
481
|
+
for (const transcript of transcripts) {
|
|
482
|
+
const nextText = normalizeText(transcript.text);
|
|
483
|
+
if (!nextText) {
|
|
484
|
+
continue;
|
|
485
|
+
}
|
|
486
|
+
const previous = merged.at(-1);
|
|
487
|
+
if (!previous) {
|
|
488
|
+
merged.push(nextText);
|
|
489
|
+
continue;
|
|
490
|
+
}
|
|
491
|
+
if (nextText === previous || previous.includes(nextText)) {
|
|
492
|
+
continue;
|
|
493
|
+
}
|
|
494
|
+
if (nextText.includes(previous)) {
|
|
495
|
+
merged[merged.length - 1] = nextText;
|
|
496
|
+
continue;
|
|
497
|
+
}
|
|
498
|
+
merged.push(nextText);
|
|
499
|
+
}
|
|
500
|
+
return merged.join(" ").trim();
|
|
501
|
+
};
|
|
502
|
+
var buildTurnText = (transcripts, partialText, options = {}) => {
|
|
503
|
+
const finalText = mergeTranscriptTexts(transcripts);
|
|
504
|
+
const nextPartial = normalizeText(partialText);
|
|
505
|
+
const lastFinalEndedAtMs = [...transcripts].reverse().find((transcript) => typeof transcript.endedAtMs === "number")?.endedAtMs;
|
|
506
|
+
if (finalText && nextPartial && typeof lastFinalEndedAtMs === "number" && typeof options.partialStartedAtMs === "number" && options.partialStartedAtMs - lastFinalEndedAtMs >= 250 && countCommonPrefixWords(finalText, nextPartial) === 0) {
|
|
507
|
+
return mergeSequentialTranscriptText(finalText, nextPartial);
|
|
508
|
+
}
|
|
509
|
+
return selectPreferredTranscriptText(finalText, nextPartial);
|
|
510
|
+
};
|
|
511
|
+
|
|
394
512
|
// src/core/turnProfiles.ts
|
|
395
513
|
var TURN_PROFILE_DEFAULTS = {
|
|
396
514
|
balanced: {
|
|
397
515
|
qualityProfile: "general",
|
|
516
|
+
semanticVetoMaxMs: 0,
|
|
517
|
+
semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
|
|
398
518
|
silenceMs: 1400,
|
|
399
519
|
speechThreshold: 0.012,
|
|
400
520
|
transcriptStabilityMs: 1000
|
|
401
521
|
},
|
|
402
522
|
fast: {
|
|
403
523
|
qualityProfile: "general",
|
|
524
|
+
semanticVetoMaxMs: 0,
|
|
525
|
+
semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
|
|
404
526
|
silenceMs: 700,
|
|
405
527
|
speechThreshold: 0.015,
|
|
406
528
|
transcriptStabilityMs: 450
|
|
407
529
|
},
|
|
408
530
|
"long-form": {
|
|
409
531
|
qualityProfile: "general",
|
|
532
|
+
semanticVetoMaxMs: 0,
|
|
533
|
+
semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
|
|
410
534
|
silenceMs: 2200,
|
|
411
535
|
speechThreshold: 0.01,
|
|
412
536
|
transcriptStabilityMs: 1500
|
|
@@ -440,6 +564,8 @@ var resolveTurnDetectionConfig = (config) => {
|
|
|
440
564
|
return {
|
|
441
565
|
profile,
|
|
442
566
|
qualityProfile,
|
|
567
|
+
semanticVetoMaxMs: config?.semanticVetoMaxMs ?? preset.semanticVetoMaxMs,
|
|
568
|
+
semanticVetoRecheckMs: config?.semanticVetoRecheckMs ?? preset.semanticVetoRecheckMs,
|
|
443
569
|
silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
|
|
444
570
|
speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
|
|
445
571
|
transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
|
|
@@ -3454,123 +3580,6 @@ var createVoiceTwilioRedirectHandoffAdapter = (options) => ({
|
|
|
3454
3580
|
}
|
|
3455
3581
|
});
|
|
3456
3582
|
|
|
3457
|
-
// src/core/turnDetection.ts
|
|
3458
|
-
var DEFAULT_SILENCE_MS = 700;
|
|
3459
|
-
var DEFAULT_SPEECH_THRESHOLD = 0.015;
|
|
3460
|
-
var toUint8Array = (audio) => {
|
|
3461
|
-
if (audio instanceof ArrayBuffer) {
|
|
3462
|
-
return new Uint8Array(audio);
|
|
3463
|
-
}
|
|
3464
|
-
return new Uint8Array(audio.buffer, audio.byteOffset, audio.byteLength);
|
|
3465
|
-
};
|
|
3466
|
-
var measureAudioLevel = (audio) => {
|
|
3467
|
-
const bytes = toUint8Array(audio);
|
|
3468
|
-
if (bytes.byteLength < 2) {
|
|
3469
|
-
return 0;
|
|
3470
|
-
}
|
|
3471
|
-
const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
|
|
3472
|
-
if (samples.length === 0) {
|
|
3473
|
-
return 0;
|
|
3474
|
-
}
|
|
3475
|
-
let sumSquares = 0;
|
|
3476
|
-
for (const sample of samples) {
|
|
3477
|
-
const normalized = sample / 32768;
|
|
3478
|
-
sumSquares += normalized * normalized;
|
|
3479
|
-
}
|
|
3480
|
-
return Math.sqrt(sumSquares / samples.length);
|
|
3481
|
-
};
|
|
3482
|
-
var normalizeText = (value) => value.trim().replace(/\s+/g, " ");
|
|
3483
|
-
var countWords = (value) => value.length > 0 ? value.split(" ").length : 0;
|
|
3484
|
-
var selectPreferredTranscriptText = (currentText, nextText) => {
|
|
3485
|
-
const current = normalizeText(currentText);
|
|
3486
|
-
const next = normalizeText(nextText);
|
|
3487
|
-
if (!current) {
|
|
3488
|
-
return next;
|
|
3489
|
-
}
|
|
3490
|
-
if (!next) {
|
|
3491
|
-
return current;
|
|
3492
|
-
}
|
|
3493
|
-
if (current === next || current.includes(next)) {
|
|
3494
|
-
return current;
|
|
3495
|
-
}
|
|
3496
|
-
if (next.includes(current)) {
|
|
3497
|
-
return next;
|
|
3498
|
-
}
|
|
3499
|
-
if (countWords(next) > countWords(current)) {
|
|
3500
|
-
return next;
|
|
3501
|
-
}
|
|
3502
|
-
if (countWords(next) === countWords(current) && next.length > current.length) {
|
|
3503
|
-
return next;
|
|
3504
|
-
}
|
|
3505
|
-
return current;
|
|
3506
|
-
};
|
|
3507
|
-
var mergeSequentialTranscriptText = (currentText, nextText) => {
|
|
3508
|
-
const current = normalizeText(currentText);
|
|
3509
|
-
const next = normalizeText(nextText);
|
|
3510
|
-
if (!current) {
|
|
3511
|
-
return next;
|
|
3512
|
-
}
|
|
3513
|
-
if (!next) {
|
|
3514
|
-
return current;
|
|
3515
|
-
}
|
|
3516
|
-
const currentWords = current.split(" ");
|
|
3517
|
-
const nextWords = next.split(" ");
|
|
3518
|
-
const maxOverlap = Math.min(currentWords.length, nextWords.length);
|
|
3519
|
-
for (let overlap = maxOverlap;overlap > 0; overlap -= 1) {
|
|
3520
|
-
const currentSuffix = currentWords.slice(-overlap).join(" ");
|
|
3521
|
-
const nextPrefix = nextWords.slice(0, overlap).join(" ");
|
|
3522
|
-
if (currentSuffix === nextPrefix) {
|
|
3523
|
-
return [...currentWords, ...nextWords.slice(overlap)].join(" ");
|
|
3524
|
-
}
|
|
3525
|
-
}
|
|
3526
|
-
return `${current} ${next}`.trim();
|
|
3527
|
-
};
|
|
3528
|
-
var countCommonPrefixWords = (currentText, nextText) => {
|
|
3529
|
-
const currentWords = normalizeText(currentText).split(" ").filter(Boolean);
|
|
3530
|
-
const nextWords = normalizeText(nextText).split(" ").filter(Boolean);
|
|
3531
|
-
const maxWords = Math.min(currentWords.length, nextWords.length);
|
|
3532
|
-
let count = 0;
|
|
3533
|
-
for (let index = 0;index < maxWords; index += 1) {
|
|
3534
|
-
if (currentWords[index] !== nextWords[index]) {
|
|
3535
|
-
break;
|
|
3536
|
-
}
|
|
3537
|
-
count += 1;
|
|
3538
|
-
}
|
|
3539
|
-
return count;
|
|
3540
|
-
};
|
|
3541
|
-
var mergeTranscriptTexts = (transcripts) => {
|
|
3542
|
-
const merged = [];
|
|
3543
|
-
for (const transcript of transcripts) {
|
|
3544
|
-
const nextText = normalizeText(transcript.text);
|
|
3545
|
-
if (!nextText) {
|
|
3546
|
-
continue;
|
|
3547
|
-
}
|
|
3548
|
-
const previous = merged.at(-1);
|
|
3549
|
-
if (!previous) {
|
|
3550
|
-
merged.push(nextText);
|
|
3551
|
-
continue;
|
|
3552
|
-
}
|
|
3553
|
-
if (nextText === previous || previous.includes(nextText)) {
|
|
3554
|
-
continue;
|
|
3555
|
-
}
|
|
3556
|
-
if (nextText.includes(previous)) {
|
|
3557
|
-
merged[merged.length - 1] = nextText;
|
|
3558
|
-
continue;
|
|
3559
|
-
}
|
|
3560
|
-
merged.push(nextText);
|
|
3561
|
-
}
|
|
3562
|
-
return merged.join(" ").trim();
|
|
3563
|
-
};
|
|
3564
|
-
var buildTurnText = (transcripts, partialText, options = {}) => {
|
|
3565
|
-
const finalText = mergeTranscriptTexts(transcripts);
|
|
3566
|
-
const nextPartial = normalizeText(partialText);
|
|
3567
|
-
const lastFinalEndedAtMs = [...transcripts].reverse().find((transcript) => typeof transcript.endedAtMs === "number")?.endedAtMs;
|
|
3568
|
-
if (finalText && nextPartial && typeof lastFinalEndedAtMs === "number" && typeof options.partialStartedAtMs === "number" && options.partialStartedAtMs - lastFinalEndedAtMs >= 250 && countCommonPrefixWords(finalText, nextPartial) === 0) {
|
|
3569
|
-
return mergeSequentialTranscriptText(finalText, nextPartial);
|
|
3570
|
-
}
|
|
3571
|
-
return selectPreferredTranscriptText(finalText, nextPartial);
|
|
3572
|
-
};
|
|
3573
|
-
|
|
3574
3583
|
// src/core/types.ts
|
|
3575
3584
|
var ttsAdapterSessionCanCancel = (session) => typeof session.cancel === "function";
|
|
3576
3585
|
|
|
@@ -3907,8 +3916,11 @@ var createVoiceSession = (options) => {
|
|
|
3907
3916
|
const turnDetection = {
|
|
3908
3917
|
silenceMs: options.turnDetection.silenceMs ?? DEFAULT_SILENCE_MS,
|
|
3909
3918
|
speechThreshold: options.turnDetection.speechThreshold ?? DEFAULT_SPEECH_THRESHOLD,
|
|
3910
|
-
transcriptStabilityMs: options.turnDetection.transcriptStabilityMs ?? DEFAULT_TRANSCRIPT_STABILITY_MS
|
|
3919
|
+
transcriptStabilityMs: options.turnDetection.transcriptStabilityMs ?? DEFAULT_TRANSCRIPT_STABILITY_MS,
|
|
3920
|
+
semanticVetoMaxMs: options.turnDetection.semanticVetoMaxMs ?? 0,
|
|
3921
|
+
semanticVetoRecheckMs: options.turnDetection.semanticVetoRecheckMs ?? DEFAULT_SEMANTIC_VETO_RECHECK_MS
|
|
3911
3922
|
};
|
|
3923
|
+
let semanticVetoElapsedMs = 0;
|
|
3912
3924
|
const sttFallback = options.sttFallback ? {
|
|
3913
3925
|
adapter: options.sttFallback.adapter,
|
|
3914
3926
|
completionTimeoutMs: options.sttFallback.completionTimeoutMs ?? DEFAULT_FALLBACK_COMPLETION_TIMEOUT_MS,
|
|
@@ -4423,10 +4435,51 @@ var createVoiceSession = (options) => {
|
|
|
4423
4435
|
silenceTimer = setTimeout(() => {
|
|
4424
4436
|
silenceTimer = null;
|
|
4425
4437
|
pendingCommitReason = null;
|
|
4426
|
-
|
|
4438
|
+
runScheduledCommit(reason);
|
|
4427
4439
|
}, delayMs);
|
|
4428
4440
|
};
|
|
4429
4441
|
const scheduleSilenceCommit = (delayMs = turnDetection.silenceMs, reset = true) => scheduleTurnCommit(delayMs, "silence", reset);
|
|
4442
|
+
const shouldDeferSilenceCommit = async (reason) => {
|
|
4443
|
+
if (reason !== "silence" || turnDetection.semanticVetoMaxMs <= 0 || !options.semanticTurnDetector || semanticVetoElapsedMs >= turnDetection.semanticVetoMaxMs) {
|
|
4444
|
+
return false;
|
|
4445
|
+
}
|
|
4446
|
+
const session = await readSession();
|
|
4447
|
+
const { partialText, transcripts } = session.currentTurn;
|
|
4448
|
+
const userText = buildTurnText(transcripts, partialText, {
|
|
4449
|
+
partialEndedAtMs: session.currentTurn.partialEndedAt,
|
|
4450
|
+
partialStartedAtMs: session.currentTurn.partialStartedAt
|
|
4451
|
+
});
|
|
4452
|
+
if (!userText) {
|
|
4453
|
+
return false;
|
|
4454
|
+
}
|
|
4455
|
+
const silenceMs = session.currentTurn.silenceStartedAt !== undefined ? Date.now() - session.currentTurn.silenceStartedAt : turnDetection.silenceMs;
|
|
4456
|
+
let endOfTurn = true;
|
|
4457
|
+
try {
|
|
4458
|
+
const verdict = await Promise.resolve(options.semanticTurnDetector.evaluate({
|
|
4459
|
+
lastFinalTranscript: transcripts.at(-1),
|
|
4460
|
+
partialText,
|
|
4461
|
+
silenceMs,
|
|
4462
|
+
transcripts
|
|
4463
|
+
}));
|
|
4464
|
+
endOfTurn = verdict.endOfTurn;
|
|
4465
|
+
} catch {
|
|
4466
|
+
return false;
|
|
4467
|
+
}
|
|
4468
|
+
if (endOfTurn !== false) {
|
|
4469
|
+
return false;
|
|
4470
|
+
}
|
|
4471
|
+
const remaining = turnDetection.semanticVetoMaxMs - semanticVetoElapsedMs;
|
|
4472
|
+
const extendMs = Math.max(1, Math.min(turnDetection.semanticVetoRecheckMs, remaining));
|
|
4473
|
+
semanticVetoElapsedMs += extendMs;
|
|
4474
|
+
scheduleTurnCommit(extendMs, reason);
|
|
4475
|
+
return true;
|
|
4476
|
+
};
|
|
4477
|
+
const runScheduledCommit = async (reason) => {
|
|
4478
|
+
if (await shouldDeferSilenceCommit(reason)) {
|
|
4479
|
+
return;
|
|
4480
|
+
}
|
|
4481
|
+
await api.commitTurn(reason);
|
|
4482
|
+
};
|
|
4430
4483
|
const requestTurnCommit = async (reason) => {
|
|
4431
4484
|
const session = await readSession();
|
|
4432
4485
|
const text = buildTurnText(session.currentTurn.transcripts, session.currentTurn.partialText, {
|
|
@@ -5138,6 +5191,7 @@ var createVoiceSession = (options) => {
|
|
|
5138
5191
|
session2.lastActivityAt = Date.now();
|
|
5139
5192
|
session2.status = "active";
|
|
5140
5193
|
});
|
|
5194
|
+
semanticVetoElapsedMs = 0;
|
|
5141
5195
|
if (silenceTimer && pendingCommitReason === "vendor") {
|
|
5142
5196
|
scheduleTurnCommit(getVendorCommitDelayMs(), "vendor");
|
|
5143
5197
|
}
|
|
@@ -5841,6 +5895,7 @@ var createVoiceSession = (options) => {
|
|
|
5841
5895
|
};
|
|
5842
5896
|
const commitTurnInternal = async (reason = "manual") => {
|
|
5843
5897
|
clearSilenceTimer();
|
|
5898
|
+
semanticVetoElapsedMs = 0;
|
|
5844
5899
|
backchannelDriver?.reset();
|
|
5845
5900
|
amdLastTurnCommitAt = Date.now();
|
|
5846
5901
|
const session = await readSession();
|
|
@@ -40842,6 +40897,44 @@ Respond with only your spoken line. When your goal is met or you want to hang up
|
|
|
40842
40897
|
persona: options.persona
|
|
40843
40898
|
};
|
|
40844
40899
|
};
|
|
40900
|
+
// src/core/hardenedFetch.ts
|
|
40901
|
+
var ATTEMPT_TIMEOUT_MS = 6000;
|
|
40902
|
+
var isBun = "Bun" in globalThis;
|
|
40903
|
+
var oneAttempt = async (baseFetch, input, init) => {
|
|
40904
|
+
const controller = new AbortController;
|
|
40905
|
+
const callerSignal = init?.signal ?? undefined;
|
|
40906
|
+
const onCallerAbort = () => controller.abort(callerSignal?.reason);
|
|
40907
|
+
if (callerSignal?.aborted)
|
|
40908
|
+
controller.abort(callerSignal.reason);
|
|
40909
|
+
else
|
|
40910
|
+
callerSignal?.addEventListener("abort", onCallerAbort, { once: true });
|
|
40911
|
+
const timer = setTimeout(() => {
|
|
40912
|
+
controller.abort(new Error(`fetch exceeded ${ATTEMPT_TIMEOUT_MS}ms before response headers (stale Bun keep-alive socket?)`));
|
|
40913
|
+
}, ATTEMPT_TIMEOUT_MS);
|
|
40914
|
+
const headers = new Headers(init?.headers);
|
|
40915
|
+
if (isBun)
|
|
40916
|
+
headers.set("Connection", "close");
|
|
40917
|
+
try {
|
|
40918
|
+
return await baseFetch(input, {
|
|
40919
|
+
...init,
|
|
40920
|
+
headers,
|
|
40921
|
+
signal: controller.signal
|
|
40922
|
+
});
|
|
40923
|
+
} finally {
|
|
40924
|
+
clearTimeout(timer);
|
|
40925
|
+
callerSignal?.removeEventListener("abort", onCallerAbort);
|
|
40926
|
+
}
|
|
40927
|
+
};
|
|
40928
|
+
var hardenFetch = (baseFetch = globalThis.fetch) => Object.assign(async (input, init) => {
|
|
40929
|
+
try {
|
|
40930
|
+
return await oneAttempt(baseFetch, input, init);
|
|
40931
|
+
} catch (error) {
|
|
40932
|
+
if (init?.signal?.aborted)
|
|
40933
|
+
throw error;
|
|
40934
|
+
console.warn(`[voice] hardened fetch retrying on a fresh connection: ${error instanceof Error ? error.message : String(error)}`);
|
|
40935
|
+
return oneAttempt(baseFetch, input, init);
|
|
40936
|
+
}
|
|
40937
|
+
}, { preconnect: baseFetch.preconnect.bind(baseFetch) });
|
|
40845
40938
|
// src/core/mcpToolset.ts
|
|
40846
40939
|
var flattenContent = (result) => {
|
|
40847
40940
|
const blocks = result.content ?? [];
|
|
@@ -42350,9 +42443,12 @@ var createVoiceConfiguration = (configuration) => configuration;
|
|
|
42350
42443
|
var DEFAULT_SPEECH_THRESHOLD2 = 0.015;
|
|
42351
42444
|
var DEFAULT_SILENCE_MS2 = 700;
|
|
42352
42445
|
var DEFAULT_TRANSCRIPT_STABILITY_MS2 = 200;
|
|
42446
|
+
var DEFAULT_SEMANTIC_VETO_RECHECK_MS2 = 1200;
|
|
42353
42447
|
var resolveTurnDetection = (input) => ({
|
|
42354
42448
|
profile: input?.profile ?? "balanced",
|
|
42355
42449
|
qualityProfile: input?.qualityProfile ?? "general",
|
|
42450
|
+
semanticVetoMaxMs: input?.semanticVetoMaxMs ?? 0,
|
|
42451
|
+
semanticVetoRecheckMs: input?.semanticVetoRecheckMs ?? DEFAULT_SEMANTIC_VETO_RECHECK_MS2,
|
|
42356
42452
|
silenceMs: input?.silenceMs ?? DEFAULT_SILENCE_MS2,
|
|
42357
42453
|
speechThreshold: input?.speechThreshold ?? DEFAULT_SPEECH_THRESHOLD2,
|
|
42358
42454
|
transcriptStabilityMs: input?.transcriptStabilityMs ?? DEFAULT_TRANSCRIPT_STABILITY_MS2
|
|
@@ -45390,7 +45486,7 @@ var consumeOpenAIResponsesStream = async (response, onTextDelta, abortOptions) =
|
|
|
45390
45486
|
return { assistantText, toolCalls: finalizeToolCalls(calls), usage };
|
|
45391
45487
|
};
|
|
45392
45488
|
var createOpenAIVoiceAssistantModel = (options) => {
|
|
45393
|
-
const fetchImpl = options.fetch
|
|
45489
|
+
const fetchImpl = hardenFetch(options.fetch);
|
|
45394
45490
|
const baseUrl = options.baseUrl ?? "https://api.openai.com/v1";
|
|
45395
45491
|
const model = options.model ?? "gpt-4.1-mini";
|
|
45396
45492
|
const timeoutMs = options.timeoutMs ?? 60000;
|
|
@@ -45515,7 +45611,7 @@ var consumeAnthropicStream = async (response, onTextDelta) => {
|
|
|
45515
45611
|
return { assistantText, toolCalls: finalizeToolCalls(calls), usage };
|
|
45516
45612
|
};
|
|
45517
45613
|
var createAnthropicVoiceAssistantModel = (options) => {
|
|
45518
|
-
const fetchImpl = options.fetch
|
|
45614
|
+
const fetchImpl = hardenFetch(options.fetch);
|
|
45519
45615
|
const baseUrl = options.baseUrl ?? "https://api.anthropic.com/v1";
|
|
45520
45616
|
const model = options.model ?? "claude-sonnet-4-5";
|
|
45521
45617
|
return {
|
|
@@ -45601,7 +45697,7 @@ var consumeGeminiStream = async (response, onTextDelta) => {
|
|
|
45601
45697
|
return { assistantText, toolCalls, usage };
|
|
45602
45698
|
};
|
|
45603
45699
|
var createGeminiVoiceAssistantModel = (options) => {
|
|
45604
|
-
const fetchImpl = options.fetch
|
|
45700
|
+
const fetchImpl = hardenFetch(options.fetch);
|
|
45605
45701
|
const baseUrl = options.baseUrl ?? "https://generativelanguage.googleapis.com/v1beta";
|
|
45606
45702
|
const model = options.model ?? "gemini-2.5-flash";
|
|
45607
45703
|
const maxRetries = Math.max(0, options.maxRetries ?? 2);
|
|
@@ -52711,6 +52807,7 @@ export {
|
|
|
52711
52807
|
importVoiceCampaignRecipients,
|
|
52712
52808
|
heartbeatVoiceOpsTask,
|
|
52713
52809
|
hasVoiceOpsTaskSLABreach,
|
|
52810
|
+
hardenFetch,
|
|
52714
52811
|
getVoiceProofTargetLogicalFailure,
|
|
52715
52812
|
getVoiceLiveOpsControlStatus,
|
|
52716
52813
|
getVoiceCampaignDialerProofStatus,
|
package/dist/react/index.js
CHANGED
|
@@ -12243,22 +12243,146 @@ var resolveAudioConditioningConfig = (config) => {
|
|
|
12243
12243
|
};
|
|
12244
12244
|
};
|
|
12245
12245
|
|
|
12246
|
+
// src/core/turnDetection.ts
|
|
12247
|
+
var DEFAULT_SILENCE_MS = 700;
|
|
12248
|
+
var DEFAULT_SPEECH_THRESHOLD = 0.015;
|
|
12249
|
+
var DEFAULT_SEMANTIC_VETO_RECHECK_MS = 1200;
|
|
12250
|
+
var toUint8Array = (audio) => {
|
|
12251
|
+
if (audio instanceof ArrayBuffer) {
|
|
12252
|
+
return new Uint8Array(audio);
|
|
12253
|
+
}
|
|
12254
|
+
return new Uint8Array(audio.buffer, audio.byteOffset, audio.byteLength);
|
|
12255
|
+
};
|
|
12256
|
+
var measureAudioLevel = (audio) => {
|
|
12257
|
+
const bytes = toUint8Array(audio);
|
|
12258
|
+
if (bytes.byteLength < 2) {
|
|
12259
|
+
return 0;
|
|
12260
|
+
}
|
|
12261
|
+
const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
|
|
12262
|
+
if (samples.length === 0) {
|
|
12263
|
+
return 0;
|
|
12264
|
+
}
|
|
12265
|
+
let sumSquares = 0;
|
|
12266
|
+
for (const sample of samples) {
|
|
12267
|
+
const normalized = sample / 32768;
|
|
12268
|
+
sumSquares += normalized * normalized;
|
|
12269
|
+
}
|
|
12270
|
+
return Math.sqrt(sumSquares / samples.length);
|
|
12271
|
+
};
|
|
12272
|
+
var normalizeText = (value) => value.trim().replace(/\s+/g, " ");
|
|
12273
|
+
var countWords = (value) => value.length > 0 ? value.split(" ").length : 0;
|
|
12274
|
+
var selectPreferredTranscriptText = (currentText, nextText) => {
|
|
12275
|
+
const current = normalizeText(currentText);
|
|
12276
|
+
const next = normalizeText(nextText);
|
|
12277
|
+
if (!current) {
|
|
12278
|
+
return next;
|
|
12279
|
+
}
|
|
12280
|
+
if (!next) {
|
|
12281
|
+
return current;
|
|
12282
|
+
}
|
|
12283
|
+
if (current === next || current.includes(next)) {
|
|
12284
|
+
return current;
|
|
12285
|
+
}
|
|
12286
|
+
if (next.includes(current)) {
|
|
12287
|
+
return next;
|
|
12288
|
+
}
|
|
12289
|
+
if (countWords(next) > countWords(current)) {
|
|
12290
|
+
return next;
|
|
12291
|
+
}
|
|
12292
|
+
if (countWords(next) === countWords(current) && next.length > current.length) {
|
|
12293
|
+
return next;
|
|
12294
|
+
}
|
|
12295
|
+
return current;
|
|
12296
|
+
};
|
|
12297
|
+
var mergeSequentialTranscriptText = (currentText, nextText) => {
|
|
12298
|
+
const current = normalizeText(currentText);
|
|
12299
|
+
const next = normalizeText(nextText);
|
|
12300
|
+
if (!current) {
|
|
12301
|
+
return next;
|
|
12302
|
+
}
|
|
12303
|
+
if (!next) {
|
|
12304
|
+
return current;
|
|
12305
|
+
}
|
|
12306
|
+
const currentWords = current.split(" ");
|
|
12307
|
+
const nextWords = next.split(" ");
|
|
12308
|
+
const maxOverlap = Math.min(currentWords.length, nextWords.length);
|
|
12309
|
+
for (let overlap = maxOverlap;overlap > 0; overlap -= 1) {
|
|
12310
|
+
const currentSuffix = currentWords.slice(-overlap).join(" ");
|
|
12311
|
+
const nextPrefix = nextWords.slice(0, overlap).join(" ");
|
|
12312
|
+
if (currentSuffix === nextPrefix) {
|
|
12313
|
+
return [...currentWords, ...nextWords.slice(overlap)].join(" ");
|
|
12314
|
+
}
|
|
12315
|
+
}
|
|
12316
|
+
return `${current} ${next}`.trim();
|
|
12317
|
+
};
|
|
12318
|
+
var countCommonPrefixWords = (currentText, nextText) => {
|
|
12319
|
+
const currentWords = normalizeText(currentText).split(" ").filter(Boolean);
|
|
12320
|
+
const nextWords = normalizeText(nextText).split(" ").filter(Boolean);
|
|
12321
|
+
const maxWords = Math.min(currentWords.length, nextWords.length);
|
|
12322
|
+
let count = 0;
|
|
12323
|
+
for (let index = 0;index < maxWords; index += 1) {
|
|
12324
|
+
if (currentWords[index] !== nextWords[index]) {
|
|
12325
|
+
break;
|
|
12326
|
+
}
|
|
12327
|
+
count += 1;
|
|
12328
|
+
}
|
|
12329
|
+
return count;
|
|
12330
|
+
};
|
|
12331
|
+
var mergeTranscriptTexts = (transcripts) => {
|
|
12332
|
+
const merged = [];
|
|
12333
|
+
for (const transcript of transcripts) {
|
|
12334
|
+
const nextText = normalizeText(transcript.text);
|
|
12335
|
+
if (!nextText) {
|
|
12336
|
+
continue;
|
|
12337
|
+
}
|
|
12338
|
+
const previous = merged.at(-1);
|
|
12339
|
+
if (!previous) {
|
|
12340
|
+
merged.push(nextText);
|
|
12341
|
+
continue;
|
|
12342
|
+
}
|
|
12343
|
+
if (nextText === previous || previous.includes(nextText)) {
|
|
12344
|
+
continue;
|
|
12345
|
+
}
|
|
12346
|
+
if (nextText.includes(previous)) {
|
|
12347
|
+
merged[merged.length - 1] = nextText;
|
|
12348
|
+
continue;
|
|
12349
|
+
}
|
|
12350
|
+
merged.push(nextText);
|
|
12351
|
+
}
|
|
12352
|
+
return merged.join(" ").trim();
|
|
12353
|
+
};
|
|
12354
|
+
var buildTurnText = (transcripts, partialText, options = {}) => {
|
|
12355
|
+
const finalText = mergeTranscriptTexts(transcripts);
|
|
12356
|
+
const nextPartial = normalizeText(partialText);
|
|
12357
|
+
const lastFinalEndedAtMs = [...transcripts].reverse().find((transcript) => typeof transcript.endedAtMs === "number")?.endedAtMs;
|
|
12358
|
+
if (finalText && nextPartial && typeof lastFinalEndedAtMs === "number" && typeof options.partialStartedAtMs === "number" && options.partialStartedAtMs - lastFinalEndedAtMs >= 250 && countCommonPrefixWords(finalText, nextPartial) === 0) {
|
|
12359
|
+
return mergeSequentialTranscriptText(finalText, nextPartial);
|
|
12360
|
+
}
|
|
12361
|
+
return selectPreferredTranscriptText(finalText, nextPartial);
|
|
12362
|
+
};
|
|
12363
|
+
|
|
12246
12364
|
// src/core/turnProfiles.ts
|
|
12247
12365
|
var TURN_PROFILE_DEFAULTS = {
|
|
12248
12366
|
balanced: {
|
|
12249
12367
|
qualityProfile: "general",
|
|
12368
|
+
semanticVetoMaxMs: 0,
|
|
12369
|
+
semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
|
|
12250
12370
|
silenceMs: 1400,
|
|
12251
12371
|
speechThreshold: 0.012,
|
|
12252
12372
|
transcriptStabilityMs: 1000
|
|
12253
12373
|
},
|
|
12254
12374
|
fast: {
|
|
12255
12375
|
qualityProfile: "general",
|
|
12376
|
+
semanticVetoMaxMs: 0,
|
|
12377
|
+
semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
|
|
12256
12378
|
silenceMs: 700,
|
|
12257
12379
|
speechThreshold: 0.015,
|
|
12258
12380
|
transcriptStabilityMs: 450
|
|
12259
12381
|
},
|
|
12260
12382
|
"long-form": {
|
|
12261
12383
|
qualityProfile: "general",
|
|
12384
|
+
semanticVetoMaxMs: 0,
|
|
12385
|
+
semanticVetoRecheckMs: DEFAULT_SEMANTIC_VETO_RECHECK_MS,
|
|
12262
12386
|
silenceMs: 2200,
|
|
12263
12387
|
speechThreshold: 0.01,
|
|
12264
12388
|
transcriptStabilityMs: 1500
|
|
@@ -12292,6 +12416,8 @@ var resolveTurnDetectionConfig = (config) => {
|
|
|
12292
12416
|
return {
|
|
12293
12417
|
profile,
|
|
12294
12418
|
qualityProfile,
|
|
12419
|
+
semanticVetoMaxMs: config?.semanticVetoMaxMs ?? preset.semanticVetoMaxMs,
|
|
12420
|
+
semanticVetoRecheckMs: config?.semanticVetoRecheckMs ?? preset.semanticVetoRecheckMs,
|
|
12295
12421
|
silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
|
|
12296
12422
|
speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
|
|
12297
12423
|
transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
|