@absolutejs/voice 0.0.22-beta.595 → 0.0.22-beta.597

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3723,6 +3723,8 @@ var EXTENDED_VENDOR_COMMIT_SILENCE_THRESHOLD_MS = 200;
3723
3723
  var MAX_VENDOR_COMMIT_GRACE_MS = 1200;
3724
3724
  var STT_RECONNECT_FLAP_WINDOW_MS = 4000;
3725
3725
  var MAX_STT_RECONNECTS_IN_FLAP_WINDOW = 3;
3726
+ var STT_HEALTH_STALE_MS = 6000;
3727
+ var STT_HEALTH_SPEECH_GAP_MS = 2000;
3726
3728
  var DEFAULT_FORMAT = {
3727
3729
  channels: 1,
3728
3730
  container: "raw",
@@ -3755,6 +3757,8 @@ var getBufferedAudioDurationMs = (chunks) => chunks.reduce((total, chunk) => tot
3755
3757
  var STREAM_SENTENCE_BOUNDARY = /[.!?\u2026]['")\]]*\s/;
3756
3758
  var STREAM_CLAUSE_BOUNDARY = /[,;:]\s/g;
3757
3759
  var MAX_TTS_CHUNK_CHARS = 320;
3760
+ var STREAM_SENTENCE_END = /[.!?\u2026]['")\]]*$/;
3761
+ var STREAM_IDLE_FLUSH_MS = 350;
3758
3762
  var nextSpeakableBoundary = (buffer) => {
3759
3763
  const match = STREAM_SENTENCE_BOUNDARY.exec(buffer);
3760
3764
  return match ? match.index + match[0].length : -1;
@@ -3972,6 +3976,8 @@ var createVoiceSession = (options) => {
3972
3976
  let activeAdapterGeneration = 0;
3973
3977
  let sttReconnectCount = 0;
3974
3978
  let lastSttReconnectAt = 0;
3979
+ let lastSpeechEnergyAt = 0;
3980
+ let sttHealthPhaseStart = 0;
3975
3981
  let activeTTSTurnId;
3976
3982
  let assistantSpeechEndsAt = 0;
3977
3983
  let lastAssistantAudioAt = 0;
@@ -5444,6 +5450,7 @@ var createVoiceSession = (options) => {
5444
5450
  let charsSent = 0;
5445
5451
  let started = false;
5446
5452
  let streamed = false;
5453
+ let idleFlushTimer = null;
5447
5454
  let sendChain = Promise.resolve();
5448
5455
  let ttsSessionRequest = null;
5449
5456
  const ttsStartedAt = Date.now();
@@ -5503,8 +5510,23 @@ var createVoiceSession = (options) => {
5503
5510
  }
5504
5511
  })();
5505
5512
  };
5513
+ const clearIdleFlush = () => {
5514
+ if (idleFlushTimer) {
5515
+ clearTimeout(idleFlushTimer);
5516
+ idleFlushTimer = null;
5517
+ }
5518
+ };
5519
+ const flushOnIdle = () => {
5520
+ idleFlushTimer = null;
5521
+ const pending = buffer.trim();
5522
+ if (pending && STREAM_SENTENCE_END.test(pending)) {
5523
+ flush(buffer);
5524
+ buffer = "";
5525
+ }
5526
+ };
5506
5527
  return {
5507
5528
  finish: async () => {
5529
+ clearIdleFlush();
5508
5530
  if (buffer.trim()) {
5509
5531
  flush(buffer);
5510
5532
  }
@@ -5550,6 +5572,10 @@ var createVoiceSession = (options) => {
5550
5572
  flush(buffer.slice(0, cut));
5551
5573
  buffer = buffer.slice(cut);
5552
5574
  }
5575
+ clearIdleFlush();
5576
+ if (buffer.trim()) {
5577
+ idleFlushTimer = setTimeout(flushOnIdle, STREAM_IDLE_FLUSH_MS);
5578
+ }
5553
5579
  }
5554
5580
  };
5555
5581
  };
@@ -6303,6 +6329,30 @@ var createVoiceSession = (options) => {
6303
6329
  } else {
6304
6330
  clearSilenceTimer();
6305
6331
  }
6332
+ const nowMs = Date.now();
6333
+ if (nowMs - lastSpeechEnergyAt > STT_HEALTH_SPEECH_GAP_MS) {
6334
+ sttHealthPhaseStart = nowMs;
6335
+ }
6336
+ lastSpeechEnergyAt = nowMs;
6337
+ const lastTranscriptAt = latest.currentTurn.lastTranscriptAt ?? 0;
6338
+ if (!options.realtime && sttSession && lastTranscriptAt < sttHealthPhaseStart && nowMs - sttHealthPhaseStart >= STT_HEALTH_STALE_MS) {
6339
+ sttReconnectCount = nowMs - lastSttReconnectAt < STT_RECONNECT_FLAP_WINDOW_MS ? sttReconnectCount + 1 : 1;
6340
+ lastSttReconnectAt = nowMs;
6341
+ sttHealthPhaseStart = nowMs;
6342
+ if (sttReconnectCount <= MAX_STT_RECONNECTS_IN_FLAP_WINDOW) {
6343
+ await appendTrace({
6344
+ payload: {
6345
+ action: "stt-health-reconnect",
6346
+ attempt: sttReconnectCount,
6347
+ reason: `no transcript for ${STT_HEALTH_STALE_MS}ms of continuous speech`
6348
+ },
6349
+ session: latest,
6350
+ type: "session.error"
6351
+ });
6352
+ await closeAdapter("stt stale; health-reconnect");
6353
+ return;
6354
+ }
6355
+ }
6306
6356
  } else if (speechDetected) {
6307
6357
  backchannelDriver?.noteSilence();
6308
6358
  const currentSession = await readSession();
@@ -5950,6 +5950,8 @@ var EXTENDED_VENDOR_COMMIT_SILENCE_THRESHOLD_MS = 200;
5950
5950
  var MAX_VENDOR_COMMIT_GRACE_MS = 1200;
5951
5951
  var STT_RECONNECT_FLAP_WINDOW_MS = 4000;
5952
5952
  var MAX_STT_RECONNECTS_IN_FLAP_WINDOW = 3;
5953
+ var STT_HEALTH_STALE_MS = 6000;
5954
+ var STT_HEALTH_SPEECH_GAP_MS = 2000;
5953
5955
  var DEFAULT_FORMAT = {
5954
5956
  channels: 1,
5955
5957
  container: "raw",
@@ -5982,6 +5984,8 @@ var getBufferedAudioDurationMs = (chunks) => chunks.reduce((total, chunk) => tot
5982
5984
  var STREAM_SENTENCE_BOUNDARY = /[.!?\u2026]['")\]]*\s/;
5983
5985
  var STREAM_CLAUSE_BOUNDARY = /[,;:]\s/g;
5984
5986
  var MAX_TTS_CHUNK_CHARS = 320;
5987
+ var STREAM_SENTENCE_END = /[.!?\u2026]['")\]]*$/;
5988
+ var STREAM_IDLE_FLUSH_MS = 350;
5985
5989
  var nextSpeakableBoundary = (buffer) => {
5986
5990
  const match = STREAM_SENTENCE_BOUNDARY.exec(buffer);
5987
5991
  return match ? match.index + match[0].length : -1;
@@ -6199,6 +6203,8 @@ var createVoiceSession = (options) => {
6199
6203
  let activeAdapterGeneration = 0;
6200
6204
  let sttReconnectCount = 0;
6201
6205
  let lastSttReconnectAt = 0;
6206
+ let lastSpeechEnergyAt = 0;
6207
+ let sttHealthPhaseStart = 0;
6202
6208
  let activeTTSTurnId;
6203
6209
  let assistantSpeechEndsAt = 0;
6204
6210
  let lastAssistantAudioAt = 0;
@@ -7671,6 +7677,7 @@ var createVoiceSession = (options) => {
7671
7677
  let charsSent = 0;
7672
7678
  let started = false;
7673
7679
  let streamed = false;
7680
+ let idleFlushTimer = null;
7674
7681
  let sendChain = Promise.resolve();
7675
7682
  let ttsSessionRequest = null;
7676
7683
  const ttsStartedAt = Date.now();
@@ -7730,8 +7737,23 @@ var createVoiceSession = (options) => {
7730
7737
  }
7731
7738
  })();
7732
7739
  };
7740
+ const clearIdleFlush = () => {
7741
+ if (idleFlushTimer) {
7742
+ clearTimeout(idleFlushTimer);
7743
+ idleFlushTimer = null;
7744
+ }
7745
+ };
7746
+ const flushOnIdle = () => {
7747
+ idleFlushTimer = null;
7748
+ const pending = buffer.trim();
7749
+ if (pending && STREAM_SENTENCE_END.test(pending)) {
7750
+ flush(buffer);
7751
+ buffer = "";
7752
+ }
7753
+ };
7733
7754
  return {
7734
7755
  finish: async () => {
7756
+ clearIdleFlush();
7735
7757
  if (buffer.trim()) {
7736
7758
  flush(buffer);
7737
7759
  }
@@ -7777,6 +7799,10 @@ var createVoiceSession = (options) => {
7777
7799
  flush(buffer.slice(0, cut));
7778
7800
  buffer = buffer.slice(cut);
7779
7801
  }
7802
+ clearIdleFlush();
7803
+ if (buffer.trim()) {
7804
+ idleFlushTimer = setTimeout(flushOnIdle, STREAM_IDLE_FLUSH_MS);
7805
+ }
7780
7806
  }
7781
7807
  };
7782
7808
  };
@@ -8530,6 +8556,30 @@ var createVoiceSession = (options) => {
8530
8556
  } else {
8531
8557
  clearSilenceTimer();
8532
8558
  }
8559
+ const nowMs = Date.now();
8560
+ if (nowMs - lastSpeechEnergyAt > STT_HEALTH_SPEECH_GAP_MS) {
8561
+ sttHealthPhaseStart = nowMs;
8562
+ }
8563
+ lastSpeechEnergyAt = nowMs;
8564
+ const lastTranscriptAt = latest.currentTurn.lastTranscriptAt ?? 0;
8565
+ if (!options.realtime && sttSession && lastTranscriptAt < sttHealthPhaseStart && nowMs - sttHealthPhaseStart >= STT_HEALTH_STALE_MS) {
8566
+ sttReconnectCount = nowMs - lastSttReconnectAt < STT_RECONNECT_FLAP_WINDOW_MS ? sttReconnectCount + 1 : 1;
8567
+ lastSttReconnectAt = nowMs;
8568
+ sttHealthPhaseStart = nowMs;
8569
+ if (sttReconnectCount <= MAX_STT_RECONNECTS_IN_FLAP_WINDOW) {
8570
+ await appendTrace({
8571
+ payload: {
8572
+ action: "stt-health-reconnect",
8573
+ attempt: sttReconnectCount,
8574
+ reason: `no transcript for ${STT_HEALTH_STALE_MS}ms of continuous speech`
8575
+ },
8576
+ session: latest,
8577
+ type: "session.error"
8578
+ });
8579
+ await closeAdapter("stt stale; health-reconnect");
8580
+ return;
8581
+ }
8582
+ }
8533
8583
  } else if (speechDetected) {
8534
8584
  backchannelDriver?.noteSilence();
8535
8585
  const currentSession = await readSession();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.595",
3
+ "version": "0.0.22-beta.597",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",