@absolutejs/voice 0.0.22-beta.620 → 0.0.22-beta.622

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -800,6 +800,9 @@ export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionR
800
800
  resumeGreeting?: string | ((input: {
801
801
  session: TSession;
802
802
  }) => string | Promise<string>);
803
+ sttRecoveryLine?: string | ((input: {
804
+ session: TSession;
805
+ }) => string | Promise<string>);
803
806
  languageStrategy?: VoiceLanguageStrategy;
804
807
  lexicon?: VoiceLexiconEntry[] | VoiceLexiconResolver<TContext>;
805
808
  phraseHints?: VoicePhraseHint[] | VoicePhraseHintResolver<TContext>;
@@ -946,6 +949,15 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
946
949
  resumeGreeting?: string | ((input: {
947
950
  session: TSession;
948
951
  }) => string | Promise<string>);
952
+ /** Spoken when the STT-health watchdog detects the stream has gone DEAF
953
+ * mid-call (continuous speech energy, no transcripts landing — see
954
+ * STT_HEALTH_STALE_MS). A short re-prompt ("Sorry, I think I missed that — go
955
+ * ahead?") so the caller repeats into the freshly reconnected stream instead
956
+ * of talking into a silently dead call. Cooldown-guarded to fire at most once
957
+ * per stale episode. Receives the session. Unset = silent reconnect only. */
958
+ sttRecoveryLine?: string | ((input: {
959
+ session: TSession;
960
+ }) => string | Promise<string>);
949
961
  stt?: STTAdapter;
950
962
  realtime?: RealtimeAdapter;
951
963
  realtimeInputFormat?: AudioFormat;
package/dist/index.js CHANGED
@@ -3816,6 +3816,7 @@ var STT_RECONNECT_FLAP_WINDOW_MS = 4000;
3816
3816
  var MAX_STT_RECONNECTS_IN_FLAP_WINDOW = 3;
3817
3817
  var STT_HEALTH_STALE_MS = 6000;
3818
3818
  var STT_HEALTH_SPEECH_GAP_MS = 2000;
3819
+ var STT_RECOVERY_COOLDOWN_MS = 15000;
3819
3820
  var DEFAULT_FORMAT = {
3820
3821
  channels: 1,
3821
3822
  container: "raw",
@@ -4082,6 +4083,7 @@ var createVoiceSession = (options) => {
4082
4083
  let lastSttReconnectAt = 0;
4083
4084
  let lastSpeechEnergyAt = 0;
4084
4085
  let sttHealthPhaseStart = 0;
4086
+ let lastSttRecoverySpokenAt = 0;
4085
4087
  let activeTTSTurnId;
4086
4088
  let assistantSpeechEndsAt = 0;
4087
4089
  let lastAssistantAudioAt = 0;
@@ -6311,6 +6313,40 @@ var createVoiceSession = (options) => {
6311
6313
  }
6312
6314
  await completeTurn(updatedSession, turn);
6313
6315
  };
6316
+ const speakAssistantLine = async (text) => {
6317
+ if (!text.trim()) {
6318
+ return;
6319
+ }
6320
+ const lineTurnId = createId();
6321
+ await send({ text, turnId: lineTurnId, type: "assistant" });
6322
+ try {
6323
+ const lineTTSSession = await ensureTTSSession();
6324
+ if (lineTTSSession) {
6325
+ activeTTSTurnId = lineTurnId;
6326
+ await lineTTSSession.send(text);
6327
+ lastTtsSendAt = Date.now();
6328
+ } else if (options.realtime) {
6329
+ const lineRealtimeSession = await ensureAdapter();
6330
+ activeTTSTurnId = lineTurnId;
6331
+ await lineRealtimeSession.send(text);
6332
+ lastTtsSendAt = Date.now();
6333
+ }
6334
+ } catch {}
6335
+ };
6336
+ const resolveSessionLine = async (line, sessionForLine) => typeof line === "function" ? line({ session: sessionForLine }) : line;
6337
+ const speakResolvedLine = async (line, sessionForLine) => {
6338
+ try {
6339
+ await speakAssistantLine(await resolveSessionLine(line, sessionForLine));
6340
+ } catch {}
6341
+ };
6342
+ const maybeSpeakSttRecovery = (nowMs, sessionForLine) => {
6343
+ if (!options.sttRecoveryLine)
6344
+ return;
6345
+ if (nowMs - lastSttRecoverySpokenAt < STT_RECOVERY_COOLDOWN_MS)
6346
+ return;
6347
+ lastSttRecoverySpokenAt = nowMs;
6348
+ speakResolvedLine(options.sttRecoveryLine, sessionForLine);
6349
+ };
6314
6350
  const connectInternal = async (nextSocket) => {
6315
6351
  socket = nextSocket;
6316
6352
  const existingSession = await options.store.get(options.id);
@@ -6408,31 +6444,10 @@ var createVoiceSession = (options) => {
6408
6444
  warmTTSSession();
6409
6445
  kickCallSilenceWatchdog();
6410
6446
  startAmdEvaluationTimer();
6411
- const speakAssistantLine = async (text) => {
6412
- if (!text.trim()) {
6413
- return;
6414
- }
6415
- const lineTurnId = createId();
6416
- await send({ text, turnId: lineTurnId, type: "assistant" });
6417
- try {
6418
- const lineTTSSession = await ensureTTSSession();
6419
- if (lineTTSSession) {
6420
- activeTTSTurnId = lineTurnId;
6421
- await lineTTSSession.send(text);
6422
- lastTtsSendAt = Date.now();
6423
- } else if (options.realtime) {
6424
- const lineRealtimeSession = await ensureAdapter();
6425
- activeTTSTurnId = lineTurnId;
6426
- await lineRealtimeSession.send(text);
6427
- lastTtsSendAt = Date.now();
6428
- }
6429
- } catch {}
6430
- };
6431
- const resolveLine = async (line) => typeof line === "function" ? line({ session }) : line;
6432
6447
  if (options.greeting && session.turns.length === 0) {
6433
- await speakAssistantLine(await resolveLine(options.greeting));
6448
+ await speakResolvedLine(options.greeting, session);
6434
6449
  } else if (isResume && options.resumeGreeting && session.turns.length > 0) {
6435
- await speakAssistantLine(await resolveLine(options.resumeGreeting));
6450
+ await speakResolvedLine(options.resumeGreeting, session);
6436
6451
  }
6437
6452
  };
6438
6453
  const disconnectInternal = async (event) => {
@@ -6531,6 +6546,7 @@ var createVoiceSession = (options) => {
6531
6546
  sttReconnectCount = nowMs - lastSttReconnectAt < STT_RECONNECT_FLAP_WINDOW_MS ? sttReconnectCount + 1 : 1;
6532
6547
  lastSttReconnectAt = nowMs;
6533
6548
  sttHealthPhaseStart = nowMs;
6549
+ maybeSpeakSttRecovery(nowMs, latest);
6534
6550
  if (sttReconnectCount <= MAX_STT_RECONNECTS_IN_FLAP_WINDOW) {
6535
6551
  await appendTrace({
6536
6552
  payload: {
@@ -39688,6 +39704,7 @@ var voice = (config) => {
39688
39704
  id: sessionId,
39689
39705
  greeting: config.greeting,
39690
39706
  resumeGreeting: config.resumeGreeting,
39707
+ sttRecoveryLine: config.sttRecoveryLine,
39691
39708
  handoff: config.handoff,
39692
39709
  languageStrategy: config.languageStrategy,
39693
39710
  lexicon,
@@ -46252,9 +46269,15 @@ var createAnthropicVoiceAssistantModel = (options) => {
46252
46269
  messages: input.messages.map(messageToAnthropicMessage).filter(Boolean),
46253
46270
  model,
46254
46271
  stream: true,
46255
- system: [input.system, VOICE_SYSTEM_INSTRUCTIONS].filter(Boolean).join(`
46272
+ system: [
46273
+ {
46274
+ cache_control: { type: "ephemeral" },
46275
+ text: [input.system, VOICE_SYSTEM_INSTRUCTIONS].filter(Boolean).join(`
46256
46276
 
46257
46277
  `),
46278
+ type: "text"
46279
+ }
46280
+ ],
46258
46281
  temperature: options.temperature,
46259
46282
  tool_choice: input.tools.length ? { type: "auto" } : { type: "none" },
46260
46283
  tools: input.tools.map((tool) => ({
@@ -5209,9 +5209,15 @@ var createAnthropicVoiceAssistantModel = (options) => {
5209
5209
  messages: input.messages.map(messageToAnthropicMessage).filter(Boolean),
5210
5210
  model,
5211
5211
  stream: true,
5212
- system: [input.system, VOICE_SYSTEM_INSTRUCTIONS].filter(Boolean).join(`
5212
+ system: [
5213
+ {
5214
+ cache_control: { type: "ephemeral" },
5215
+ text: [input.system, VOICE_SYSTEM_INSTRUCTIONS].filter(Boolean).join(`
5213
5216
 
5214
5217
  `),
5218
+ type: "text"
5219
+ }
5220
+ ],
5215
5221
  temperature: options.temperature,
5216
5222
  tool_choice: input.tools.length ? { type: "auto" } : { type: "none" },
5217
5223
  tools: input.tools.map((tool) => ({
@@ -6130,6 +6136,7 @@ var STT_RECONNECT_FLAP_WINDOW_MS = 4000;
6130
6136
  var MAX_STT_RECONNECTS_IN_FLAP_WINDOW = 3;
6131
6137
  var STT_HEALTH_STALE_MS = 6000;
6132
6138
  var STT_HEALTH_SPEECH_GAP_MS = 2000;
6139
+ var STT_RECOVERY_COOLDOWN_MS = 15000;
6133
6140
  var DEFAULT_FORMAT = {
6134
6141
  channels: 1,
6135
6142
  container: "raw",
@@ -6396,6 +6403,7 @@ var createVoiceSession = (options) => {
6396
6403
  let lastSttReconnectAt = 0;
6397
6404
  let lastSpeechEnergyAt = 0;
6398
6405
  let sttHealthPhaseStart = 0;
6406
+ let lastSttRecoverySpokenAt = 0;
6399
6407
  let activeTTSTurnId;
6400
6408
  let assistantSpeechEndsAt = 0;
6401
6409
  let lastAssistantAudioAt = 0;
@@ -8625,6 +8633,40 @@ var createVoiceSession = (options) => {
8625
8633
  }
8626
8634
  await completeTurn(updatedSession, turn);
8627
8635
  };
8636
+ const speakAssistantLine = async (text) => {
8637
+ if (!text.trim()) {
8638
+ return;
8639
+ }
8640
+ const lineTurnId = createId();
8641
+ await send({ text, turnId: lineTurnId, type: "assistant" });
8642
+ try {
8643
+ const lineTTSSession = await ensureTTSSession();
8644
+ if (lineTTSSession) {
8645
+ activeTTSTurnId = lineTurnId;
8646
+ await lineTTSSession.send(text);
8647
+ lastTtsSendAt = Date.now();
8648
+ } else if (options.realtime) {
8649
+ const lineRealtimeSession = await ensureAdapter();
8650
+ activeTTSTurnId = lineTurnId;
8651
+ await lineRealtimeSession.send(text);
8652
+ lastTtsSendAt = Date.now();
8653
+ }
8654
+ } catch {}
8655
+ };
8656
+ const resolveSessionLine = async (line, sessionForLine) => typeof line === "function" ? line({ session: sessionForLine }) : line;
8657
+ const speakResolvedLine = async (line, sessionForLine) => {
8658
+ try {
8659
+ await speakAssistantLine(await resolveSessionLine(line, sessionForLine));
8660
+ } catch {}
8661
+ };
8662
+ const maybeSpeakSttRecovery = (nowMs, sessionForLine) => {
8663
+ if (!options.sttRecoveryLine)
8664
+ return;
8665
+ if (nowMs - lastSttRecoverySpokenAt < STT_RECOVERY_COOLDOWN_MS)
8666
+ return;
8667
+ lastSttRecoverySpokenAt = nowMs;
8668
+ speakResolvedLine(options.sttRecoveryLine, sessionForLine);
8669
+ };
8628
8670
  const connectInternal = async (nextSocket) => {
8629
8671
  socket = nextSocket;
8630
8672
  const existingSession = await options.store.get(options.id);
@@ -8722,31 +8764,10 @@ var createVoiceSession = (options) => {
8722
8764
  warmTTSSession();
8723
8765
  kickCallSilenceWatchdog();
8724
8766
  startAmdEvaluationTimer();
8725
- const speakAssistantLine = async (text) => {
8726
- if (!text.trim()) {
8727
- return;
8728
- }
8729
- const lineTurnId = createId();
8730
- await send({ text, turnId: lineTurnId, type: "assistant" });
8731
- try {
8732
- const lineTTSSession = await ensureTTSSession();
8733
- if (lineTTSSession) {
8734
- activeTTSTurnId = lineTurnId;
8735
- await lineTTSSession.send(text);
8736
- lastTtsSendAt = Date.now();
8737
- } else if (options.realtime) {
8738
- const lineRealtimeSession = await ensureAdapter();
8739
- activeTTSTurnId = lineTurnId;
8740
- await lineRealtimeSession.send(text);
8741
- lastTtsSendAt = Date.now();
8742
- }
8743
- } catch {}
8744
- };
8745
- const resolveLine = async (line) => typeof line === "function" ? line({ session }) : line;
8746
8767
  if (options.greeting && session.turns.length === 0) {
8747
- await speakAssistantLine(await resolveLine(options.greeting));
8768
+ await speakResolvedLine(options.greeting, session);
8748
8769
  } else if (isResume && options.resumeGreeting && session.turns.length > 0) {
8749
- await speakAssistantLine(await resolveLine(options.resumeGreeting));
8770
+ await speakResolvedLine(options.resumeGreeting, session);
8750
8771
  }
8751
8772
  };
8752
8773
  const disconnectInternal = async (event) => {
@@ -8845,6 +8866,7 @@ var createVoiceSession = (options) => {
8845
8866
  sttReconnectCount = nowMs - lastSttReconnectAt < STT_RECONNECT_FLAP_WINDOW_MS ? sttReconnectCount + 1 : 1;
8846
8867
  lastSttReconnectAt = nowMs;
8847
8868
  sttHealthPhaseStart = nowMs;
8869
+ maybeSpeakSttRecovery(nowMs, latest);
8848
8870
  if (sttReconnectCount <= MAX_STT_RECONNECTS_IN_FLAP_WINDOW) {
8849
8871
  await appendTrace({
8850
8872
  payload: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.620",
3
+ "version": "0.0.22-beta.622",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",