@absolutejs/voice 0.0.22-beta.620 → 0.0.22-beta.622
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/types.d.ts +12 -0
- package/dist/index.js +47 -24
- package/dist/testing/index.js +46 -24
- package/package.json +1 -1
package/dist/core/types.d.ts
CHANGED
|
@@ -800,6 +800,9 @@ export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionR
|
|
|
800
800
|
resumeGreeting?: string | ((input: {
|
|
801
801
|
session: TSession;
|
|
802
802
|
}) => string | Promise<string>);
|
|
803
|
+
sttRecoveryLine?: string | ((input: {
|
|
804
|
+
session: TSession;
|
|
805
|
+
}) => string | Promise<string>);
|
|
803
806
|
languageStrategy?: VoiceLanguageStrategy;
|
|
804
807
|
lexicon?: VoiceLexiconEntry[] | VoiceLexiconResolver<TContext>;
|
|
805
808
|
phraseHints?: VoicePhraseHint[] | VoicePhraseHintResolver<TContext>;
|
|
@@ -946,6 +949,15 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
|
|
|
946
949
|
resumeGreeting?: string | ((input: {
|
|
947
950
|
session: TSession;
|
|
948
951
|
}) => string | Promise<string>);
|
|
952
|
+
/** Spoken when the STT-health watchdog detects the stream has gone DEAF
|
|
953
|
+
* mid-call (continuous speech energy, no transcripts landing — see
|
|
954
|
+
* STT_HEALTH_STALE_MS). A short re-prompt ("Sorry, I think I missed that — go
|
|
955
|
+
* ahead?") so the caller repeats into the freshly reconnected stream instead
|
|
956
|
+
* of talking into a silently dead call. Cooldown-guarded to fire at most once
|
|
957
|
+
* per stale episode. Receives the session. Unset = silent reconnect only. */
|
|
958
|
+
sttRecoveryLine?: string | ((input: {
|
|
959
|
+
session: TSession;
|
|
960
|
+
}) => string | Promise<string>);
|
|
949
961
|
stt?: STTAdapter;
|
|
950
962
|
realtime?: RealtimeAdapter;
|
|
951
963
|
realtimeInputFormat?: AudioFormat;
|
package/dist/index.js
CHANGED
|
@@ -3816,6 +3816,7 @@ var STT_RECONNECT_FLAP_WINDOW_MS = 4000;
|
|
|
3816
3816
|
var MAX_STT_RECONNECTS_IN_FLAP_WINDOW = 3;
|
|
3817
3817
|
var STT_HEALTH_STALE_MS = 6000;
|
|
3818
3818
|
var STT_HEALTH_SPEECH_GAP_MS = 2000;
|
|
3819
|
+
var STT_RECOVERY_COOLDOWN_MS = 15000;
|
|
3819
3820
|
var DEFAULT_FORMAT = {
|
|
3820
3821
|
channels: 1,
|
|
3821
3822
|
container: "raw",
|
|
@@ -4082,6 +4083,7 @@ var createVoiceSession = (options) => {
|
|
|
4082
4083
|
let lastSttReconnectAt = 0;
|
|
4083
4084
|
let lastSpeechEnergyAt = 0;
|
|
4084
4085
|
let sttHealthPhaseStart = 0;
|
|
4086
|
+
let lastSttRecoverySpokenAt = 0;
|
|
4085
4087
|
let activeTTSTurnId;
|
|
4086
4088
|
let assistantSpeechEndsAt = 0;
|
|
4087
4089
|
let lastAssistantAudioAt = 0;
|
|
@@ -6311,6 +6313,40 @@ var createVoiceSession = (options) => {
|
|
|
6311
6313
|
}
|
|
6312
6314
|
await completeTurn(updatedSession, turn);
|
|
6313
6315
|
};
|
|
6316
|
+
const speakAssistantLine = async (text) => {
|
|
6317
|
+
if (!text.trim()) {
|
|
6318
|
+
return;
|
|
6319
|
+
}
|
|
6320
|
+
const lineTurnId = createId();
|
|
6321
|
+
await send({ text, turnId: lineTurnId, type: "assistant" });
|
|
6322
|
+
try {
|
|
6323
|
+
const lineTTSSession = await ensureTTSSession();
|
|
6324
|
+
if (lineTTSSession) {
|
|
6325
|
+
activeTTSTurnId = lineTurnId;
|
|
6326
|
+
await lineTTSSession.send(text);
|
|
6327
|
+
lastTtsSendAt = Date.now();
|
|
6328
|
+
} else if (options.realtime) {
|
|
6329
|
+
const lineRealtimeSession = await ensureAdapter();
|
|
6330
|
+
activeTTSTurnId = lineTurnId;
|
|
6331
|
+
await lineRealtimeSession.send(text);
|
|
6332
|
+
lastTtsSendAt = Date.now();
|
|
6333
|
+
}
|
|
6334
|
+
} catch {}
|
|
6335
|
+
};
|
|
6336
|
+
const resolveSessionLine = async (line, sessionForLine) => typeof line === "function" ? line({ session: sessionForLine }) : line;
|
|
6337
|
+
const speakResolvedLine = async (line, sessionForLine) => {
|
|
6338
|
+
try {
|
|
6339
|
+
await speakAssistantLine(await resolveSessionLine(line, sessionForLine));
|
|
6340
|
+
} catch {}
|
|
6341
|
+
};
|
|
6342
|
+
const maybeSpeakSttRecovery = (nowMs, sessionForLine) => {
|
|
6343
|
+
if (!options.sttRecoveryLine)
|
|
6344
|
+
return;
|
|
6345
|
+
if (nowMs - lastSttRecoverySpokenAt < STT_RECOVERY_COOLDOWN_MS)
|
|
6346
|
+
return;
|
|
6347
|
+
lastSttRecoverySpokenAt = nowMs;
|
|
6348
|
+
speakResolvedLine(options.sttRecoveryLine, sessionForLine);
|
|
6349
|
+
};
|
|
6314
6350
|
const connectInternal = async (nextSocket) => {
|
|
6315
6351
|
socket = nextSocket;
|
|
6316
6352
|
const existingSession = await options.store.get(options.id);
|
|
@@ -6408,31 +6444,10 @@ var createVoiceSession = (options) => {
|
|
|
6408
6444
|
warmTTSSession();
|
|
6409
6445
|
kickCallSilenceWatchdog();
|
|
6410
6446
|
startAmdEvaluationTimer();
|
|
6411
|
-
const speakAssistantLine = async (text) => {
|
|
6412
|
-
if (!text.trim()) {
|
|
6413
|
-
return;
|
|
6414
|
-
}
|
|
6415
|
-
const lineTurnId = createId();
|
|
6416
|
-
await send({ text, turnId: lineTurnId, type: "assistant" });
|
|
6417
|
-
try {
|
|
6418
|
-
const lineTTSSession = await ensureTTSSession();
|
|
6419
|
-
if (lineTTSSession) {
|
|
6420
|
-
activeTTSTurnId = lineTurnId;
|
|
6421
|
-
await lineTTSSession.send(text);
|
|
6422
|
-
lastTtsSendAt = Date.now();
|
|
6423
|
-
} else if (options.realtime) {
|
|
6424
|
-
const lineRealtimeSession = await ensureAdapter();
|
|
6425
|
-
activeTTSTurnId = lineTurnId;
|
|
6426
|
-
await lineRealtimeSession.send(text);
|
|
6427
|
-
lastTtsSendAt = Date.now();
|
|
6428
|
-
}
|
|
6429
|
-
} catch {}
|
|
6430
|
-
};
|
|
6431
|
-
const resolveLine = async (line) => typeof line === "function" ? line({ session }) : line;
|
|
6432
6447
|
if (options.greeting && session.turns.length === 0) {
|
|
6433
|
-
await
|
|
6448
|
+
await speakResolvedLine(options.greeting, session);
|
|
6434
6449
|
} else if (isResume && options.resumeGreeting && session.turns.length > 0) {
|
|
6435
|
-
await
|
|
6450
|
+
await speakResolvedLine(options.resumeGreeting, session);
|
|
6436
6451
|
}
|
|
6437
6452
|
};
|
|
6438
6453
|
const disconnectInternal = async (event) => {
|
|
@@ -6531,6 +6546,7 @@ var createVoiceSession = (options) => {
|
|
|
6531
6546
|
sttReconnectCount = nowMs - lastSttReconnectAt < STT_RECONNECT_FLAP_WINDOW_MS ? sttReconnectCount + 1 : 1;
|
|
6532
6547
|
lastSttReconnectAt = nowMs;
|
|
6533
6548
|
sttHealthPhaseStart = nowMs;
|
|
6549
|
+
maybeSpeakSttRecovery(nowMs, latest);
|
|
6534
6550
|
if (sttReconnectCount <= MAX_STT_RECONNECTS_IN_FLAP_WINDOW) {
|
|
6535
6551
|
await appendTrace({
|
|
6536
6552
|
payload: {
|
|
@@ -39688,6 +39704,7 @@ var voice = (config) => {
|
|
|
39688
39704
|
id: sessionId,
|
|
39689
39705
|
greeting: config.greeting,
|
|
39690
39706
|
resumeGreeting: config.resumeGreeting,
|
|
39707
|
+
sttRecoveryLine: config.sttRecoveryLine,
|
|
39691
39708
|
handoff: config.handoff,
|
|
39692
39709
|
languageStrategy: config.languageStrategy,
|
|
39693
39710
|
lexicon,
|
|
@@ -46252,9 +46269,15 @@ var createAnthropicVoiceAssistantModel = (options) => {
|
|
|
46252
46269
|
messages: input.messages.map(messageToAnthropicMessage).filter(Boolean),
|
|
46253
46270
|
model,
|
|
46254
46271
|
stream: true,
|
|
46255
|
-
system: [
|
|
46272
|
+
system: [
|
|
46273
|
+
{
|
|
46274
|
+
cache_control: { type: "ephemeral" },
|
|
46275
|
+
text: [input.system, VOICE_SYSTEM_INSTRUCTIONS].filter(Boolean).join(`
|
|
46256
46276
|
|
|
46257
46277
|
`),
|
|
46278
|
+
type: "text"
|
|
46279
|
+
}
|
|
46280
|
+
],
|
|
46258
46281
|
temperature: options.temperature,
|
|
46259
46282
|
tool_choice: input.tools.length ? { type: "auto" } : { type: "none" },
|
|
46260
46283
|
tools: input.tools.map((tool) => ({
|
package/dist/testing/index.js
CHANGED
|
@@ -5209,9 +5209,15 @@ var createAnthropicVoiceAssistantModel = (options) => {
|
|
|
5209
5209
|
messages: input.messages.map(messageToAnthropicMessage).filter(Boolean),
|
|
5210
5210
|
model,
|
|
5211
5211
|
stream: true,
|
|
5212
|
-
system: [
|
|
5212
|
+
system: [
|
|
5213
|
+
{
|
|
5214
|
+
cache_control: { type: "ephemeral" },
|
|
5215
|
+
text: [input.system, VOICE_SYSTEM_INSTRUCTIONS].filter(Boolean).join(`
|
|
5213
5216
|
|
|
5214
5217
|
`),
|
|
5218
|
+
type: "text"
|
|
5219
|
+
}
|
|
5220
|
+
],
|
|
5215
5221
|
temperature: options.temperature,
|
|
5216
5222
|
tool_choice: input.tools.length ? { type: "auto" } : { type: "none" },
|
|
5217
5223
|
tools: input.tools.map((tool) => ({
|
|
@@ -6130,6 +6136,7 @@ var STT_RECONNECT_FLAP_WINDOW_MS = 4000;
|
|
|
6130
6136
|
var MAX_STT_RECONNECTS_IN_FLAP_WINDOW = 3;
|
|
6131
6137
|
var STT_HEALTH_STALE_MS = 6000;
|
|
6132
6138
|
var STT_HEALTH_SPEECH_GAP_MS = 2000;
|
|
6139
|
+
var STT_RECOVERY_COOLDOWN_MS = 15000;
|
|
6133
6140
|
var DEFAULT_FORMAT = {
|
|
6134
6141
|
channels: 1,
|
|
6135
6142
|
container: "raw",
|
|
@@ -6396,6 +6403,7 @@ var createVoiceSession = (options) => {
|
|
|
6396
6403
|
let lastSttReconnectAt = 0;
|
|
6397
6404
|
let lastSpeechEnergyAt = 0;
|
|
6398
6405
|
let sttHealthPhaseStart = 0;
|
|
6406
|
+
let lastSttRecoverySpokenAt = 0;
|
|
6399
6407
|
let activeTTSTurnId;
|
|
6400
6408
|
let assistantSpeechEndsAt = 0;
|
|
6401
6409
|
let lastAssistantAudioAt = 0;
|
|
@@ -8625,6 +8633,40 @@ var createVoiceSession = (options) => {
|
|
|
8625
8633
|
}
|
|
8626
8634
|
await completeTurn(updatedSession, turn);
|
|
8627
8635
|
};
|
|
8636
|
+
const speakAssistantLine = async (text) => {
|
|
8637
|
+
if (!text.trim()) {
|
|
8638
|
+
return;
|
|
8639
|
+
}
|
|
8640
|
+
const lineTurnId = createId();
|
|
8641
|
+
await send({ text, turnId: lineTurnId, type: "assistant" });
|
|
8642
|
+
try {
|
|
8643
|
+
const lineTTSSession = await ensureTTSSession();
|
|
8644
|
+
if (lineTTSSession) {
|
|
8645
|
+
activeTTSTurnId = lineTurnId;
|
|
8646
|
+
await lineTTSSession.send(text);
|
|
8647
|
+
lastTtsSendAt = Date.now();
|
|
8648
|
+
} else if (options.realtime) {
|
|
8649
|
+
const lineRealtimeSession = await ensureAdapter();
|
|
8650
|
+
activeTTSTurnId = lineTurnId;
|
|
8651
|
+
await lineRealtimeSession.send(text);
|
|
8652
|
+
lastTtsSendAt = Date.now();
|
|
8653
|
+
}
|
|
8654
|
+
} catch {}
|
|
8655
|
+
};
|
|
8656
|
+
const resolveSessionLine = async (line, sessionForLine) => typeof line === "function" ? line({ session: sessionForLine }) : line;
|
|
8657
|
+
const speakResolvedLine = async (line, sessionForLine) => {
|
|
8658
|
+
try {
|
|
8659
|
+
await speakAssistantLine(await resolveSessionLine(line, sessionForLine));
|
|
8660
|
+
} catch {}
|
|
8661
|
+
};
|
|
8662
|
+
const maybeSpeakSttRecovery = (nowMs, sessionForLine) => {
|
|
8663
|
+
if (!options.sttRecoveryLine)
|
|
8664
|
+
return;
|
|
8665
|
+
if (nowMs - lastSttRecoverySpokenAt < STT_RECOVERY_COOLDOWN_MS)
|
|
8666
|
+
return;
|
|
8667
|
+
lastSttRecoverySpokenAt = nowMs;
|
|
8668
|
+
speakResolvedLine(options.sttRecoveryLine, sessionForLine);
|
|
8669
|
+
};
|
|
8628
8670
|
const connectInternal = async (nextSocket) => {
|
|
8629
8671
|
socket = nextSocket;
|
|
8630
8672
|
const existingSession = await options.store.get(options.id);
|
|
@@ -8722,31 +8764,10 @@ var createVoiceSession = (options) => {
|
|
|
8722
8764
|
warmTTSSession();
|
|
8723
8765
|
kickCallSilenceWatchdog();
|
|
8724
8766
|
startAmdEvaluationTimer();
|
|
8725
|
-
const speakAssistantLine = async (text) => {
|
|
8726
|
-
if (!text.trim()) {
|
|
8727
|
-
return;
|
|
8728
|
-
}
|
|
8729
|
-
const lineTurnId = createId();
|
|
8730
|
-
await send({ text, turnId: lineTurnId, type: "assistant" });
|
|
8731
|
-
try {
|
|
8732
|
-
const lineTTSSession = await ensureTTSSession();
|
|
8733
|
-
if (lineTTSSession) {
|
|
8734
|
-
activeTTSTurnId = lineTurnId;
|
|
8735
|
-
await lineTTSSession.send(text);
|
|
8736
|
-
lastTtsSendAt = Date.now();
|
|
8737
|
-
} else if (options.realtime) {
|
|
8738
|
-
const lineRealtimeSession = await ensureAdapter();
|
|
8739
|
-
activeTTSTurnId = lineTurnId;
|
|
8740
|
-
await lineRealtimeSession.send(text);
|
|
8741
|
-
lastTtsSendAt = Date.now();
|
|
8742
|
-
}
|
|
8743
|
-
} catch {}
|
|
8744
|
-
};
|
|
8745
|
-
const resolveLine = async (line) => typeof line === "function" ? line({ session }) : line;
|
|
8746
8767
|
if (options.greeting && session.turns.length === 0) {
|
|
8747
|
-
await
|
|
8768
|
+
await speakResolvedLine(options.greeting, session);
|
|
8748
8769
|
} else if (isResume && options.resumeGreeting && session.turns.length > 0) {
|
|
8749
|
-
await
|
|
8770
|
+
await speakResolvedLine(options.resumeGreeting, session);
|
|
8750
8771
|
}
|
|
8751
8772
|
};
|
|
8752
8773
|
const disconnectInternal = async (event) => {
|
|
@@ -8845,6 +8866,7 @@ var createVoiceSession = (options) => {
|
|
|
8845
8866
|
sttReconnectCount = nowMs - lastSttReconnectAt < STT_RECONNECT_FLAP_WINDOW_MS ? sttReconnectCount + 1 : 1;
|
|
8846
8867
|
lastSttReconnectAt = nowMs;
|
|
8847
8868
|
sttHealthPhaseStart = nowMs;
|
|
8869
|
+
maybeSpeakSttRecovery(nowMs, latest);
|
|
8848
8870
|
if (sttReconnectCount <= MAX_STT_RECONNECTS_IN_FLAP_WINDOW) {
|
|
8849
8871
|
await appendTrace({
|
|
8850
8872
|
payload: {
|