@absolutejs/voice 0.0.22-beta.621 → 0.0.22-beta.623

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -800,6 +800,16 @@ export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionR
800
800
  resumeGreeting?: string | ((input: {
801
801
  session: TSession;
802
802
  }) => string | Promise<string>);
803
+ sttRecoveryLine?: string | ((input: {
804
+ session: TSession;
805
+ }) => string | Promise<string>);
806
+ stuckCallClose?: {
807
+ afterMs: number;
808
+ line?: string | ((input: {
809
+ session: TSession;
810
+ }) => string | Promise<string>);
811
+ reason?: string;
812
+ };
803
813
  languageStrategy?: VoiceLanguageStrategy;
804
814
  lexicon?: VoiceLexiconEntry[] | VoiceLexiconResolver<TContext>;
805
815
  phraseHints?: VoicePhraseHint[] | VoicePhraseHintResolver<TContext>;
@@ -946,6 +956,29 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
946
956
  resumeGreeting?: string | ((input: {
947
957
  session: TSession;
948
958
  }) => string | Promise<string>);
959
+ /** Spoken when the STT-health watchdog detects the stream has gone DEAF
960
+ * mid-call (continuous speech energy, no transcripts landing — see
961
+ * STT_HEALTH_STALE_MS). A short re-prompt ("Sorry, I think I missed that — go
962
+ * ahead?") so the caller repeats into the freshly reconnected stream instead
963
+ * of talking into a silently dead call. Cooldown-guarded to fire at most once
964
+ * per stale episode. Receives the session. Unset = silent reconnect only. */
965
+ sttRecoveryLine?: string | ((input: {
966
+ session: TSession;
967
+ }) => string | Promise<string>);
968
+ /** Last-resort GRACEFUL terminal close for a wedged call. If no caller-side
969
+ * progress (committed turn / user partial) lands for `afterMs` on a live call
970
+ * — STT permanently deaf, or the caller left — the assistant speaks `line` and
971
+ * the session COMPLETES (disposition "completed") so onComplete still saves and
972
+ * the call ends with a real goodbye instead of dead air + "abandoned". Reset by
973
+ * real progress (committed turn / user partial / (re)connect), NOT by the
974
+ * assistant's own speech, so STT recovery re-prompts can't defer it forever. */
975
+ stuckCallClose?: {
976
+ afterMs: number;
977
+ line?: string | ((input: {
978
+ session: TSession;
979
+ }) => string | Promise<string>);
980
+ reason?: string;
981
+ };
949
982
  stt?: STTAdapter;
950
983
  realtime?: RealtimeAdapter;
951
984
  realtimeInputFormat?: AudioFormat;
package/dist/index.js CHANGED
@@ -3816,6 +3816,7 @@ var STT_RECONNECT_FLAP_WINDOW_MS = 4000;
3816
3816
  var MAX_STT_RECONNECTS_IN_FLAP_WINDOW = 3;
3817
3817
  var STT_HEALTH_STALE_MS = 6000;
3818
3818
  var STT_HEALTH_SPEECH_GAP_MS = 2000;
3819
+ var STT_RECOVERY_COOLDOWN_MS = 15000;
3819
3820
  var DEFAULT_FORMAT = {
3820
3821
  channels: 1,
3821
3822
  container: "raw",
@@ -4082,6 +4083,7 @@ var createVoiceSession = (options) => {
4082
4083
  let lastSttReconnectAt = 0;
4083
4084
  let lastSpeechEnergyAt = 0;
4084
4085
  let sttHealthPhaseStart = 0;
4086
+ let lastSttRecoverySpokenAt = 0;
4085
4087
  let activeTTSTurnId;
4086
4088
  let assistantSpeechEndsAt = 0;
4087
4089
  let lastAssistantAudioAt = 0;
@@ -4184,6 +4186,51 @@ var createVoiceSession = (options) => {
4184
4186
  clearCallSilenceWatchdog();
4185
4187
  callSilenceWatchdog = setTimeout(fireCallSilenceTimeout, callSilenceTimeoutMs);
4186
4188
  };
4189
+ const stuckCloseConfig = options.stuckCallClose;
4190
+ const stuckCloseAfterMs = stuckCloseConfig && stuckCloseConfig.afterMs > 0 ? stuckCloseConfig.afterMs : undefined;
4191
+ let stuckCloseWatchdog = null;
4192
+ let stuckCloseFired = false;
4193
+ const clearStuckCloseWatchdog = () => {
4194
+ if (stuckCloseWatchdog) {
4195
+ clearTimeout(stuckCloseWatchdog);
4196
+ stuckCloseWatchdog = null;
4197
+ }
4198
+ };
4199
+ const fireStuckClose = () => {
4200
+ stuckCloseWatchdog = null;
4201
+ if (stuckCloseFired) {
4202
+ return;
4203
+ }
4204
+ stuckCloseFired = true;
4205
+ runSerial("stuck-call-close", async () => {
4206
+ const snapshot = await readSession();
4207
+ if (snapshot.status === "completed" || snapshot.status === "failed" || snapshot.call?.endedAt) {
4208
+ return;
4209
+ }
4210
+ await appendTrace({
4211
+ payload: {
4212
+ action: "stuck-call-close",
4213
+ reason: `no caller progress for ${stuckCloseAfterMs}ms`
4214
+ },
4215
+ session: snapshot,
4216
+ type: "session.error"
4217
+ });
4218
+ if (stuckCloseConfig?.line) {
4219
+ await speakResolvedLine(stuckCloseConfig.line, snapshot);
4220
+ }
4221
+ await completeInternal(undefined, {
4222
+ disposition: "completed",
4223
+ reason: stuckCloseConfig?.reason ?? "stuck-call-close"
4224
+ });
4225
+ });
4226
+ };
4227
+ const kickStuckCloseWatchdog = () => {
4228
+ if (stuckCloseAfterMs === undefined || stuckCloseFired) {
4229
+ return;
4230
+ }
4231
+ clearStuckCloseWatchdog();
4232
+ stuckCloseWatchdog = setTimeout(fireStuckClose, stuckCloseAfterMs);
4233
+ };
4187
4234
  const recordingConfig = options.recording;
4188
4235
  const recordingChannels = new Set(recordingConfig?.channels ?? ["assistant", "user"]);
4189
4236
  const recordingMaxBytes = recordingConfig?.maxBytesPerChannel ?? 50 * 1024 * 1024;
@@ -4680,6 +4727,7 @@ var createVoiceSession = (options) => {
4680
4727
  type: "error"
4681
4728
  });
4682
4729
  clearCallSilenceWatchdog();
4730
+ clearStuckCloseWatchdog();
4683
4731
  clearAmdEvaluationTimer();
4684
4732
  await closeTTSSession("failed");
4685
4733
  await closeAdapter("failed");
@@ -4789,6 +4837,7 @@ var createVoiceSession = (options) => {
4789
4837
  type: "complete"
4790
4838
  });
4791
4839
  clearCallSilenceWatchdog();
4840
+ clearStuckCloseWatchdog();
4792
4841
  clearAmdEvaluationTimer();
4793
4842
  await closeTTSSession("complete");
4794
4843
  await closeAdapter("complete");
@@ -5266,6 +5315,9 @@ var createVoiceSession = (options) => {
5266
5315
  };
5267
5316
  };
5268
5317
  const handlePartial = async (transcript) => {
5318
+ if (transcript.text.trim()) {
5319
+ kickStuckCloseWatchdog();
5320
+ }
5269
5321
  if (activeTTSTurnId !== undefined) {
5270
5322
  const triggeringText = transcript.text.trim();
5271
5323
  if (triggeringText) {
@@ -5740,6 +5792,7 @@ var createVoiceSession = (options) => {
5740
5792
  };
5741
5793
  const completeTurn = async (session, turn) => {
5742
5794
  console.error(`[voice] completeTurn ENTER session=${options.id} turn=${turn.id} textLen=${turn.text?.length ?? 0}`);
5795
+ kickStuckCloseWatchdog();
5743
5796
  const liveOpsControl = await options.liveOps?.getControl(options.id);
5744
5797
  if (liveOpsControl?.assistantPaused || liveOpsControl?.operatorTakeover) {
5745
5798
  await appendTrace({
@@ -6311,6 +6364,40 @@ var createVoiceSession = (options) => {
6311
6364
  }
6312
6365
  await completeTurn(updatedSession, turn);
6313
6366
  };
6367
+ const speakAssistantLine = async (text) => {
6368
+ if (!text.trim()) {
6369
+ return;
6370
+ }
6371
+ const lineTurnId = createId();
6372
+ await send({ text, turnId: lineTurnId, type: "assistant" });
6373
+ try {
6374
+ const lineTTSSession = await ensureTTSSession();
6375
+ if (lineTTSSession) {
6376
+ activeTTSTurnId = lineTurnId;
6377
+ await lineTTSSession.send(text);
6378
+ lastTtsSendAt = Date.now();
6379
+ } else if (options.realtime) {
6380
+ const lineRealtimeSession = await ensureAdapter();
6381
+ activeTTSTurnId = lineTurnId;
6382
+ await lineRealtimeSession.send(text);
6383
+ lastTtsSendAt = Date.now();
6384
+ }
6385
+ } catch {}
6386
+ };
6387
+ const resolveSessionLine = async (line, sessionForLine) => typeof line === "function" ? line({ session: sessionForLine }) : line;
6388
+ const speakResolvedLine = async (line, sessionForLine) => {
6389
+ try {
6390
+ await speakAssistantLine(await resolveSessionLine(line, sessionForLine));
6391
+ } catch {}
6392
+ };
6393
+ const maybeSpeakSttRecovery = (nowMs, sessionForLine) => {
6394
+ if (!options.sttRecoveryLine)
6395
+ return;
6396
+ if (nowMs - lastSttRecoverySpokenAt < STT_RECOVERY_COOLDOWN_MS)
6397
+ return;
6398
+ lastSttRecoverySpokenAt = nowMs;
6399
+ speakResolvedLine(options.sttRecoveryLine, sessionForLine);
6400
+ };
6314
6401
  const connectInternal = async (nextSocket) => {
6315
6402
  socket = nextSocket;
6316
6403
  const existingSession = await options.store.get(options.id);
@@ -6407,32 +6494,12 @@ var createVoiceSession = (options) => {
6407
6494
  await ensureAdapter();
6408
6495
  warmTTSSession();
6409
6496
  kickCallSilenceWatchdog();
6497
+ kickStuckCloseWatchdog();
6410
6498
  startAmdEvaluationTimer();
6411
- const speakAssistantLine = async (text) => {
6412
- if (!text.trim()) {
6413
- return;
6414
- }
6415
- const lineTurnId = createId();
6416
- await send({ text, turnId: lineTurnId, type: "assistant" });
6417
- try {
6418
- const lineTTSSession = await ensureTTSSession();
6419
- if (lineTTSSession) {
6420
- activeTTSTurnId = lineTurnId;
6421
- await lineTTSSession.send(text);
6422
- lastTtsSendAt = Date.now();
6423
- } else if (options.realtime) {
6424
- const lineRealtimeSession = await ensureAdapter();
6425
- activeTTSTurnId = lineTurnId;
6426
- await lineRealtimeSession.send(text);
6427
- lastTtsSendAt = Date.now();
6428
- }
6429
- } catch {}
6430
- };
6431
- const resolveLine = async (line) => typeof line === "function" ? line({ session }) : line;
6432
6499
  if (options.greeting && session.turns.length === 0) {
6433
- await speakAssistantLine(await resolveLine(options.greeting));
6500
+ await speakResolvedLine(options.greeting, session);
6434
6501
  } else if (isResume && options.resumeGreeting && session.turns.length > 0) {
6435
- await speakAssistantLine(await resolveLine(options.resumeGreeting));
6502
+ await speakResolvedLine(options.resumeGreeting, session);
6436
6503
  }
6437
6504
  };
6438
6505
  const disconnectInternal = async (event) => {
@@ -6531,6 +6598,7 @@ var createVoiceSession = (options) => {
6531
6598
  sttReconnectCount = nowMs - lastSttReconnectAt < STT_RECONNECT_FLAP_WINDOW_MS ? sttReconnectCount + 1 : 1;
6532
6599
  lastSttReconnectAt = nowMs;
6533
6600
  sttHealthPhaseStart = nowMs;
6601
+ maybeSpeakSttRecovery(nowMs, latest);
6534
6602
  if (sttReconnectCount <= MAX_STT_RECONNECTS_IN_FLAP_WINDOW) {
6535
6603
  await appendTrace({
6536
6604
  payload: {
@@ -6572,6 +6640,7 @@ var createVoiceSession = (options) => {
6572
6640
  });
6573
6641
  clearSilenceTimer();
6574
6642
  clearCallSilenceWatchdog();
6643
+ clearStuckCloseWatchdog();
6575
6644
  clearAmdEvaluationTimer();
6576
6645
  if (options.noiseSuppressor?.close) {
6577
6646
  try {
@@ -39688,6 +39757,8 @@ var voice = (config) => {
39688
39757
  id: sessionId,
39689
39758
  greeting: config.greeting,
39690
39759
  resumeGreeting: config.resumeGreeting,
39760
+ sttRecoveryLine: config.sttRecoveryLine,
39761
+ stuckCallClose: config.stuckCallClose,
39691
39762
  handoff: config.handoff,
39692
39763
  languageStrategy: config.languageStrategy,
39693
39764
  lexicon,
@@ -6136,6 +6136,7 @@ var STT_RECONNECT_FLAP_WINDOW_MS = 4000;
6136
6136
  var MAX_STT_RECONNECTS_IN_FLAP_WINDOW = 3;
6137
6137
  var STT_HEALTH_STALE_MS = 6000;
6138
6138
  var STT_HEALTH_SPEECH_GAP_MS = 2000;
6139
+ var STT_RECOVERY_COOLDOWN_MS = 15000;
6139
6140
  var DEFAULT_FORMAT = {
6140
6141
  channels: 1,
6141
6142
  container: "raw",
@@ -6402,6 +6403,7 @@ var createVoiceSession = (options) => {
6402
6403
  let lastSttReconnectAt = 0;
6403
6404
  let lastSpeechEnergyAt = 0;
6404
6405
  let sttHealthPhaseStart = 0;
6406
+ let lastSttRecoverySpokenAt = 0;
6405
6407
  let activeTTSTurnId;
6406
6408
  let assistantSpeechEndsAt = 0;
6407
6409
  let lastAssistantAudioAt = 0;
@@ -6504,6 +6506,51 @@ var createVoiceSession = (options) => {
6504
6506
  clearCallSilenceWatchdog();
6505
6507
  callSilenceWatchdog = setTimeout(fireCallSilenceTimeout, callSilenceTimeoutMs);
6506
6508
  };
6509
+ const stuckCloseConfig = options.stuckCallClose;
6510
+ const stuckCloseAfterMs = stuckCloseConfig && stuckCloseConfig.afterMs > 0 ? stuckCloseConfig.afterMs : undefined;
6511
+ let stuckCloseWatchdog = null;
6512
+ let stuckCloseFired = false;
6513
+ const clearStuckCloseWatchdog = () => {
6514
+ if (stuckCloseWatchdog) {
6515
+ clearTimeout(stuckCloseWatchdog);
6516
+ stuckCloseWatchdog = null;
6517
+ }
6518
+ };
6519
+ const fireStuckClose = () => {
6520
+ stuckCloseWatchdog = null;
6521
+ if (stuckCloseFired) {
6522
+ return;
6523
+ }
6524
+ stuckCloseFired = true;
6525
+ runSerial("stuck-call-close", async () => {
6526
+ const snapshot = await readSession();
6527
+ if (snapshot.status === "completed" || snapshot.status === "failed" || snapshot.call?.endedAt) {
6528
+ return;
6529
+ }
6530
+ await appendTrace({
6531
+ payload: {
6532
+ action: "stuck-call-close",
6533
+ reason: `no caller progress for ${stuckCloseAfterMs}ms`
6534
+ },
6535
+ session: snapshot,
6536
+ type: "session.error"
6537
+ });
6538
+ if (stuckCloseConfig?.line) {
6539
+ await speakResolvedLine(stuckCloseConfig.line, snapshot);
6540
+ }
6541
+ await completeInternal(undefined, {
6542
+ disposition: "completed",
6543
+ reason: stuckCloseConfig?.reason ?? "stuck-call-close"
6544
+ });
6545
+ });
6546
+ };
6547
+ const kickStuckCloseWatchdog = () => {
6548
+ if (stuckCloseAfterMs === undefined || stuckCloseFired) {
6549
+ return;
6550
+ }
6551
+ clearStuckCloseWatchdog();
6552
+ stuckCloseWatchdog = setTimeout(fireStuckClose, stuckCloseAfterMs);
6553
+ };
6507
6554
  const recordingConfig = options.recording;
6508
6555
  const recordingChannels = new Set(recordingConfig?.channels ?? ["assistant", "user"]);
6509
6556
  const recordingMaxBytes = recordingConfig?.maxBytesPerChannel ?? 50 * 1024 * 1024;
@@ -7000,6 +7047,7 @@ var createVoiceSession = (options) => {
7000
7047
  type: "error"
7001
7048
  });
7002
7049
  clearCallSilenceWatchdog();
7050
+ clearStuckCloseWatchdog();
7003
7051
  clearAmdEvaluationTimer();
7004
7052
  await closeTTSSession("failed");
7005
7053
  await closeAdapter("failed");
@@ -7109,6 +7157,7 @@ var createVoiceSession = (options) => {
7109
7157
  type: "complete"
7110
7158
  });
7111
7159
  clearCallSilenceWatchdog();
7160
+ clearStuckCloseWatchdog();
7112
7161
  clearAmdEvaluationTimer();
7113
7162
  await closeTTSSession("complete");
7114
7163
  await closeAdapter("complete");
@@ -7586,6 +7635,9 @@ var createVoiceSession = (options) => {
7586
7635
  };
7587
7636
  };
7588
7637
  const handlePartial = async (transcript) => {
7638
+ if (transcript.text.trim()) {
7639
+ kickStuckCloseWatchdog();
7640
+ }
7589
7641
  if (activeTTSTurnId !== undefined) {
7590
7642
  const triggeringText = transcript.text.trim();
7591
7643
  if (triggeringText) {
@@ -8060,6 +8112,7 @@ var createVoiceSession = (options) => {
8060
8112
  };
8061
8113
  const completeTurn = async (session, turn) => {
8062
8114
  console.error(`[voice] completeTurn ENTER session=${options.id} turn=${turn.id} textLen=${turn.text?.length ?? 0}`);
8115
+ kickStuckCloseWatchdog();
8063
8116
  const liveOpsControl = await options.liveOps?.getControl(options.id);
8064
8117
  if (liveOpsControl?.assistantPaused || liveOpsControl?.operatorTakeover) {
8065
8118
  await appendTrace({
@@ -8631,6 +8684,40 @@ var createVoiceSession = (options) => {
8631
8684
  }
8632
8685
  await completeTurn(updatedSession, turn);
8633
8686
  };
8687
+ const speakAssistantLine = async (text) => {
8688
+ if (!text.trim()) {
8689
+ return;
8690
+ }
8691
+ const lineTurnId = createId();
8692
+ await send({ text, turnId: lineTurnId, type: "assistant" });
8693
+ try {
8694
+ const lineTTSSession = await ensureTTSSession();
8695
+ if (lineTTSSession) {
8696
+ activeTTSTurnId = lineTurnId;
8697
+ await lineTTSSession.send(text);
8698
+ lastTtsSendAt = Date.now();
8699
+ } else if (options.realtime) {
8700
+ const lineRealtimeSession = await ensureAdapter();
8701
+ activeTTSTurnId = lineTurnId;
8702
+ await lineRealtimeSession.send(text);
8703
+ lastTtsSendAt = Date.now();
8704
+ }
8705
+ } catch {}
8706
+ };
8707
+ const resolveSessionLine = async (line, sessionForLine) => typeof line === "function" ? line({ session: sessionForLine }) : line;
8708
+ const speakResolvedLine = async (line, sessionForLine) => {
8709
+ try {
8710
+ await speakAssistantLine(await resolveSessionLine(line, sessionForLine));
8711
+ } catch {}
8712
+ };
8713
+ const maybeSpeakSttRecovery = (nowMs, sessionForLine) => {
8714
+ if (!options.sttRecoveryLine)
8715
+ return;
8716
+ if (nowMs - lastSttRecoverySpokenAt < STT_RECOVERY_COOLDOWN_MS)
8717
+ return;
8718
+ lastSttRecoverySpokenAt = nowMs;
8719
+ speakResolvedLine(options.sttRecoveryLine, sessionForLine);
8720
+ };
8634
8721
  const connectInternal = async (nextSocket) => {
8635
8722
  socket = nextSocket;
8636
8723
  const existingSession = await options.store.get(options.id);
@@ -8727,32 +8814,12 @@ var createVoiceSession = (options) => {
8727
8814
  await ensureAdapter();
8728
8815
  warmTTSSession();
8729
8816
  kickCallSilenceWatchdog();
8817
+ kickStuckCloseWatchdog();
8730
8818
  startAmdEvaluationTimer();
8731
- const speakAssistantLine = async (text) => {
8732
- if (!text.trim()) {
8733
- return;
8734
- }
8735
- const lineTurnId = createId();
8736
- await send({ text, turnId: lineTurnId, type: "assistant" });
8737
- try {
8738
- const lineTTSSession = await ensureTTSSession();
8739
- if (lineTTSSession) {
8740
- activeTTSTurnId = lineTurnId;
8741
- await lineTTSSession.send(text);
8742
- lastTtsSendAt = Date.now();
8743
- } else if (options.realtime) {
8744
- const lineRealtimeSession = await ensureAdapter();
8745
- activeTTSTurnId = lineTurnId;
8746
- await lineRealtimeSession.send(text);
8747
- lastTtsSendAt = Date.now();
8748
- }
8749
- } catch {}
8750
- };
8751
- const resolveLine = async (line) => typeof line === "function" ? line({ session }) : line;
8752
8819
  if (options.greeting && session.turns.length === 0) {
8753
- await speakAssistantLine(await resolveLine(options.greeting));
8820
+ await speakResolvedLine(options.greeting, session);
8754
8821
  } else if (isResume && options.resumeGreeting && session.turns.length > 0) {
8755
- await speakAssistantLine(await resolveLine(options.resumeGreeting));
8822
+ await speakResolvedLine(options.resumeGreeting, session);
8756
8823
  }
8757
8824
  };
8758
8825
  const disconnectInternal = async (event) => {
@@ -8851,6 +8918,7 @@ var createVoiceSession = (options) => {
8851
8918
  sttReconnectCount = nowMs - lastSttReconnectAt < STT_RECONNECT_FLAP_WINDOW_MS ? sttReconnectCount + 1 : 1;
8852
8919
  lastSttReconnectAt = nowMs;
8853
8920
  sttHealthPhaseStart = nowMs;
8921
+ maybeSpeakSttRecovery(nowMs, latest);
8854
8922
  if (sttReconnectCount <= MAX_STT_RECONNECTS_IN_FLAP_WINDOW) {
8855
8923
  await appendTrace({
8856
8924
  payload: {
@@ -8892,6 +8960,7 @@ var createVoiceSession = (options) => {
8892
8960
  });
8893
8961
  clearSilenceTimer();
8894
8962
  clearCallSilenceWatchdog();
8963
+ clearStuckCloseWatchdog();
8895
8964
  clearAmdEvaluationTimer();
8896
8965
  if (options.noiseSuppressor?.close) {
8897
8966
  try {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.621",
3
+ "version": "0.0.22-beta.623",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",