@absolutejs/voice 0.0.22-beta.595 → 0.0.22-beta.597
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +50 -0
- package/dist/testing/index.js +50 -0
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -3723,6 +3723,8 @@ var EXTENDED_VENDOR_COMMIT_SILENCE_THRESHOLD_MS = 200;
|
|
|
3723
3723
|
var MAX_VENDOR_COMMIT_GRACE_MS = 1200;
|
|
3724
3724
|
var STT_RECONNECT_FLAP_WINDOW_MS = 4000;
|
|
3725
3725
|
var MAX_STT_RECONNECTS_IN_FLAP_WINDOW = 3;
|
|
3726
|
+
var STT_HEALTH_STALE_MS = 6000;
|
|
3727
|
+
var STT_HEALTH_SPEECH_GAP_MS = 2000;
|
|
3726
3728
|
var DEFAULT_FORMAT = {
|
|
3727
3729
|
channels: 1,
|
|
3728
3730
|
container: "raw",
|
|
@@ -3755,6 +3757,8 @@ var getBufferedAudioDurationMs = (chunks) => chunks.reduce((total, chunk) => tot
|
|
|
3755
3757
|
var STREAM_SENTENCE_BOUNDARY = /[.!?\u2026]['")\]]*\s/;
|
|
3756
3758
|
var STREAM_CLAUSE_BOUNDARY = /[,;:]\s/g;
|
|
3757
3759
|
var MAX_TTS_CHUNK_CHARS = 320;
|
|
3760
|
+
var STREAM_SENTENCE_END = /[.!?\u2026]['")\]]*$/;
|
|
3761
|
+
var STREAM_IDLE_FLUSH_MS = 350;
|
|
3758
3762
|
var nextSpeakableBoundary = (buffer) => {
|
|
3759
3763
|
const match = STREAM_SENTENCE_BOUNDARY.exec(buffer);
|
|
3760
3764
|
return match ? match.index + match[0].length : -1;
|
|
@@ -3972,6 +3976,8 @@ var createVoiceSession = (options) => {
|
|
|
3972
3976
|
let activeAdapterGeneration = 0;
|
|
3973
3977
|
let sttReconnectCount = 0;
|
|
3974
3978
|
let lastSttReconnectAt = 0;
|
|
3979
|
+
let lastSpeechEnergyAt = 0;
|
|
3980
|
+
let sttHealthPhaseStart = 0;
|
|
3975
3981
|
let activeTTSTurnId;
|
|
3976
3982
|
let assistantSpeechEndsAt = 0;
|
|
3977
3983
|
let lastAssistantAudioAt = 0;
|
|
@@ -5444,6 +5450,7 @@ var createVoiceSession = (options) => {
|
|
|
5444
5450
|
let charsSent = 0;
|
|
5445
5451
|
let started = false;
|
|
5446
5452
|
let streamed = false;
|
|
5453
|
+
let idleFlushTimer = null;
|
|
5447
5454
|
let sendChain = Promise.resolve();
|
|
5448
5455
|
let ttsSessionRequest = null;
|
|
5449
5456
|
const ttsStartedAt = Date.now();
|
|
@@ -5503,8 +5510,23 @@ var createVoiceSession = (options) => {
|
|
|
5503
5510
|
}
|
|
5504
5511
|
})();
|
|
5505
5512
|
};
|
|
5513
|
+
const clearIdleFlush = () => {
|
|
5514
|
+
if (idleFlushTimer) {
|
|
5515
|
+
clearTimeout(idleFlushTimer);
|
|
5516
|
+
idleFlushTimer = null;
|
|
5517
|
+
}
|
|
5518
|
+
};
|
|
5519
|
+
const flushOnIdle = () => {
|
|
5520
|
+
idleFlushTimer = null;
|
|
5521
|
+
const pending = buffer.trim();
|
|
5522
|
+
if (pending && STREAM_SENTENCE_END.test(pending)) {
|
|
5523
|
+
flush(buffer);
|
|
5524
|
+
buffer = "";
|
|
5525
|
+
}
|
|
5526
|
+
};
|
|
5506
5527
|
return {
|
|
5507
5528
|
finish: async () => {
|
|
5529
|
+
clearIdleFlush();
|
|
5508
5530
|
if (buffer.trim()) {
|
|
5509
5531
|
flush(buffer);
|
|
5510
5532
|
}
|
|
@@ -5550,6 +5572,10 @@ var createVoiceSession = (options) => {
|
|
|
5550
5572
|
flush(buffer.slice(0, cut));
|
|
5551
5573
|
buffer = buffer.slice(cut);
|
|
5552
5574
|
}
|
|
5575
|
+
clearIdleFlush();
|
|
5576
|
+
if (buffer.trim()) {
|
|
5577
|
+
idleFlushTimer = setTimeout(flushOnIdle, STREAM_IDLE_FLUSH_MS);
|
|
5578
|
+
}
|
|
5553
5579
|
}
|
|
5554
5580
|
};
|
|
5555
5581
|
};
|
|
@@ -6303,6 +6329,30 @@ var createVoiceSession = (options) => {
|
|
|
6303
6329
|
} else {
|
|
6304
6330
|
clearSilenceTimer();
|
|
6305
6331
|
}
|
|
6332
|
+
const nowMs = Date.now();
|
|
6333
|
+
if (nowMs - lastSpeechEnergyAt > STT_HEALTH_SPEECH_GAP_MS) {
|
|
6334
|
+
sttHealthPhaseStart = nowMs;
|
|
6335
|
+
}
|
|
6336
|
+
lastSpeechEnergyAt = nowMs;
|
|
6337
|
+
const lastTranscriptAt = latest.currentTurn.lastTranscriptAt ?? 0;
|
|
6338
|
+
if (!options.realtime && sttSession && lastTranscriptAt < sttHealthPhaseStart && nowMs - sttHealthPhaseStart >= STT_HEALTH_STALE_MS) {
|
|
6339
|
+
sttReconnectCount = nowMs - lastSttReconnectAt < STT_RECONNECT_FLAP_WINDOW_MS ? sttReconnectCount + 1 : 1;
|
|
6340
|
+
lastSttReconnectAt = nowMs;
|
|
6341
|
+
sttHealthPhaseStart = nowMs;
|
|
6342
|
+
if (sttReconnectCount <= MAX_STT_RECONNECTS_IN_FLAP_WINDOW) {
|
|
6343
|
+
await appendTrace({
|
|
6344
|
+
payload: {
|
|
6345
|
+
action: "stt-health-reconnect",
|
|
6346
|
+
attempt: sttReconnectCount,
|
|
6347
|
+
reason: `no transcript for ${STT_HEALTH_STALE_MS}ms of continuous speech`
|
|
6348
|
+
},
|
|
6349
|
+
session: latest,
|
|
6350
|
+
type: "session.error"
|
|
6351
|
+
});
|
|
6352
|
+
await closeAdapter("stt stale; health-reconnect");
|
|
6353
|
+
return;
|
|
6354
|
+
}
|
|
6355
|
+
}
|
|
6306
6356
|
} else if (speechDetected) {
|
|
6307
6357
|
backchannelDriver?.noteSilence();
|
|
6308
6358
|
const currentSession = await readSession();
|
package/dist/testing/index.js
CHANGED
|
@@ -5950,6 +5950,8 @@ var EXTENDED_VENDOR_COMMIT_SILENCE_THRESHOLD_MS = 200;
|
|
|
5950
5950
|
var MAX_VENDOR_COMMIT_GRACE_MS = 1200;
|
|
5951
5951
|
var STT_RECONNECT_FLAP_WINDOW_MS = 4000;
|
|
5952
5952
|
var MAX_STT_RECONNECTS_IN_FLAP_WINDOW = 3;
|
|
5953
|
+
var STT_HEALTH_STALE_MS = 6000;
|
|
5954
|
+
var STT_HEALTH_SPEECH_GAP_MS = 2000;
|
|
5953
5955
|
var DEFAULT_FORMAT = {
|
|
5954
5956
|
channels: 1,
|
|
5955
5957
|
container: "raw",
|
|
@@ -5982,6 +5984,8 @@ var getBufferedAudioDurationMs = (chunks) => chunks.reduce((total, chunk) => tot
|
|
|
5982
5984
|
var STREAM_SENTENCE_BOUNDARY = /[.!?\u2026]['")\]]*\s/;
|
|
5983
5985
|
var STREAM_CLAUSE_BOUNDARY = /[,;:]\s/g;
|
|
5984
5986
|
var MAX_TTS_CHUNK_CHARS = 320;
|
|
5987
|
+
var STREAM_SENTENCE_END = /[.!?\u2026]['")\]]*$/;
|
|
5988
|
+
var STREAM_IDLE_FLUSH_MS = 350;
|
|
5985
5989
|
var nextSpeakableBoundary = (buffer) => {
|
|
5986
5990
|
const match = STREAM_SENTENCE_BOUNDARY.exec(buffer);
|
|
5987
5991
|
return match ? match.index + match[0].length : -1;
|
|
@@ -6199,6 +6203,8 @@ var createVoiceSession = (options) => {
|
|
|
6199
6203
|
let activeAdapterGeneration = 0;
|
|
6200
6204
|
let sttReconnectCount = 0;
|
|
6201
6205
|
let lastSttReconnectAt = 0;
|
|
6206
|
+
let lastSpeechEnergyAt = 0;
|
|
6207
|
+
let sttHealthPhaseStart = 0;
|
|
6202
6208
|
let activeTTSTurnId;
|
|
6203
6209
|
let assistantSpeechEndsAt = 0;
|
|
6204
6210
|
let lastAssistantAudioAt = 0;
|
|
@@ -7671,6 +7677,7 @@ var createVoiceSession = (options) => {
|
|
|
7671
7677
|
let charsSent = 0;
|
|
7672
7678
|
let started = false;
|
|
7673
7679
|
let streamed = false;
|
|
7680
|
+
let idleFlushTimer = null;
|
|
7674
7681
|
let sendChain = Promise.resolve();
|
|
7675
7682
|
let ttsSessionRequest = null;
|
|
7676
7683
|
const ttsStartedAt = Date.now();
|
|
@@ -7730,8 +7737,23 @@ var createVoiceSession = (options) => {
|
|
|
7730
7737
|
}
|
|
7731
7738
|
})();
|
|
7732
7739
|
};
|
|
7740
|
+
const clearIdleFlush = () => {
|
|
7741
|
+
if (idleFlushTimer) {
|
|
7742
|
+
clearTimeout(idleFlushTimer);
|
|
7743
|
+
idleFlushTimer = null;
|
|
7744
|
+
}
|
|
7745
|
+
};
|
|
7746
|
+
const flushOnIdle = () => {
|
|
7747
|
+
idleFlushTimer = null;
|
|
7748
|
+
const pending = buffer.trim();
|
|
7749
|
+
if (pending && STREAM_SENTENCE_END.test(pending)) {
|
|
7750
|
+
flush(buffer);
|
|
7751
|
+
buffer = "";
|
|
7752
|
+
}
|
|
7753
|
+
};
|
|
7733
7754
|
return {
|
|
7734
7755
|
finish: async () => {
|
|
7756
|
+
clearIdleFlush();
|
|
7735
7757
|
if (buffer.trim()) {
|
|
7736
7758
|
flush(buffer);
|
|
7737
7759
|
}
|
|
@@ -7777,6 +7799,10 @@ var createVoiceSession = (options) => {
|
|
|
7777
7799
|
flush(buffer.slice(0, cut));
|
|
7778
7800
|
buffer = buffer.slice(cut);
|
|
7779
7801
|
}
|
|
7802
|
+
clearIdleFlush();
|
|
7803
|
+
if (buffer.trim()) {
|
|
7804
|
+
idleFlushTimer = setTimeout(flushOnIdle, STREAM_IDLE_FLUSH_MS);
|
|
7805
|
+
}
|
|
7780
7806
|
}
|
|
7781
7807
|
};
|
|
7782
7808
|
};
|
|
@@ -8530,6 +8556,30 @@ var createVoiceSession = (options) => {
|
|
|
8530
8556
|
} else {
|
|
8531
8557
|
clearSilenceTimer();
|
|
8532
8558
|
}
|
|
8559
|
+
const nowMs = Date.now();
|
|
8560
|
+
if (nowMs - lastSpeechEnergyAt > STT_HEALTH_SPEECH_GAP_MS) {
|
|
8561
|
+
sttHealthPhaseStart = nowMs;
|
|
8562
|
+
}
|
|
8563
|
+
lastSpeechEnergyAt = nowMs;
|
|
8564
|
+
const lastTranscriptAt = latest.currentTurn.lastTranscriptAt ?? 0;
|
|
8565
|
+
if (!options.realtime && sttSession && lastTranscriptAt < sttHealthPhaseStart && nowMs - sttHealthPhaseStart >= STT_HEALTH_STALE_MS) {
|
|
8566
|
+
sttReconnectCount = nowMs - lastSttReconnectAt < STT_RECONNECT_FLAP_WINDOW_MS ? sttReconnectCount + 1 : 1;
|
|
8567
|
+
lastSttReconnectAt = nowMs;
|
|
8568
|
+
sttHealthPhaseStart = nowMs;
|
|
8569
|
+
if (sttReconnectCount <= MAX_STT_RECONNECTS_IN_FLAP_WINDOW) {
|
|
8570
|
+
await appendTrace({
|
|
8571
|
+
payload: {
|
|
8572
|
+
action: "stt-health-reconnect",
|
|
8573
|
+
attempt: sttReconnectCount,
|
|
8574
|
+
reason: `no transcript for ${STT_HEALTH_STALE_MS}ms of continuous speech`
|
|
8575
|
+
},
|
|
8576
|
+
session: latest,
|
|
8577
|
+
type: "session.error"
|
|
8578
|
+
});
|
|
8579
|
+
await closeAdapter("stt stale; health-reconnect");
|
|
8580
|
+
return;
|
|
8581
|
+
}
|
|
8582
|
+
}
|
|
8533
8583
|
} else if (speechDetected) {
|
|
8534
8584
|
backchannelDriver?.noteSilence();
|
|
8535
8585
|
const currentSession = await readSession();
|