@absolutejs/voice 0.0.22-beta.580 → 0.0.22-beta.581
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +41 -11
- package/dist/testing/index.js +41 -11
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -3936,6 +3936,8 @@ var createVoiceSession = (options) => {
|
|
|
3936
3936
|
let activeAdapterGeneration = 0;
|
|
3937
3937
|
let activeTTSTurnId;
|
|
3938
3938
|
let assistantSpeechEndsAt = 0;
|
|
3939
|
+
let lastAssistantAudioAt = 0;
|
|
3940
|
+
let lastTtsSendAt = 0;
|
|
3939
3941
|
let fillerTimer = null;
|
|
3940
3942
|
let fillerActive = false;
|
|
3941
3943
|
let fillerToken = 0;
|
|
@@ -4209,6 +4211,15 @@ var createVoiceSession = (options) => {
|
|
|
4209
4211
|
});
|
|
4210
4212
|
return result;
|
|
4211
4213
|
};
|
|
4214
|
+
let assistantAudioQueue = Promise.resolve();
|
|
4215
|
+
const runAudioSerial = (operation) => {
|
|
4216
|
+
const next = assistantAudioQueue.then(operation);
|
|
4217
|
+
assistantAudioQueue = next.then(() => {
|
|
4218
|
+
return;
|
|
4219
|
+
}, () => {
|
|
4220
|
+
return;
|
|
4221
|
+
});
|
|
4222
|
+
};
|
|
4212
4223
|
const closeAdapter = async (reason) => {
|
|
4213
4224
|
if (!sttSession) {
|
|
4214
4225
|
return;
|
|
@@ -4377,6 +4388,7 @@ var createVoiceSession = (options) => {
|
|
|
4377
4388
|
const chunkMs = normalizedChunk.byteLength / bytesPerSecond * 1000;
|
|
4378
4389
|
assistantSpeechEndsAt = Math.max(assistantSpeechEndsAt, Date.now()) + chunkMs;
|
|
4379
4390
|
}
|
|
4391
|
+
lastAssistantAudioAt = Date.now();
|
|
4380
4392
|
if (activeTTSTurnId) {
|
|
4381
4393
|
await appendTurnLatencyStage({
|
|
4382
4394
|
at: input.receivedAt,
|
|
@@ -4486,18 +4498,28 @@ var createVoiceSession = (options) => {
|
|
|
4486
4498
|
session
|
|
4487
4499
|
});
|
|
4488
4500
|
};
|
|
4489
|
-
const DRAIN_POLL_MS =
|
|
4501
|
+
const DRAIN_POLL_MS = 100;
|
|
4490
4502
|
const DRAIN_TAIL_BUFFER_MS = 300;
|
|
4491
|
-
const
|
|
4492
|
-
const
|
|
4503
|
+
const DRAIN_QUIET_MS = 600;
|
|
4504
|
+
const DRAIN_RENDER_START_MS = 4000;
|
|
4505
|
+
const DRAIN_MAX_MS = 20000;
|
|
4506
|
+
const drainAssistantSpeech = async (renderPendingSince) => {
|
|
4493
4507
|
const startedAt = Date.now();
|
|
4508
|
+
const sleep3 = (delayMs) => new Promise((resolve) => {
|
|
4509
|
+
setTimeout(resolve, delayMs);
|
|
4510
|
+
});
|
|
4494
4511
|
while (Date.now() - startedAt < DRAIN_MAX_MS) {
|
|
4495
|
-
const
|
|
4496
|
-
|
|
4512
|
+
const now = Date.now();
|
|
4513
|
+
const renderStarted = lastAssistantAudioAt >= renderPendingSince || now - renderPendingSince >= DRAIN_RENDER_START_MS;
|
|
4514
|
+
if (!renderStarted) {
|
|
4515
|
+
await sleep3(DRAIN_POLL_MS);
|
|
4516
|
+
continue;
|
|
4517
|
+
}
|
|
4518
|
+
const streamQuiet = now - lastAssistantAudioAt >= DRAIN_QUIET_MS;
|
|
4519
|
+
const playbackDrained = assistantSpeechEndsAt + DRAIN_TAIL_BUFFER_MS <= now;
|
|
4520
|
+
if (streamQuiet && playbackDrained)
|
|
4497
4521
|
return;
|
|
4498
|
-
await
|
|
4499
|
-
setTimeout(resolve, Math.min(remaining, DRAIN_POLL_MS));
|
|
4500
|
-
});
|
|
4522
|
+
await sleep3(DRAIN_POLL_MS);
|
|
4501
4523
|
}
|
|
4502
4524
|
};
|
|
4503
4525
|
const completeInternal = async (result, input = {}) => {
|
|
@@ -4534,7 +4556,8 @@ var createVoiceSession = (options) => {
|
|
|
4534
4556
|
return;
|
|
4535
4557
|
}
|
|
4536
4558
|
if (disposition === "completed") {
|
|
4537
|
-
await drainAssistantSpeech();
|
|
4559
|
+
await drainAssistantSpeech(lastTtsSendAt);
|
|
4560
|
+
await assistantAudioQueue;
|
|
4538
4561
|
}
|
|
4539
4562
|
await appendTrace({
|
|
4540
4563
|
payload: {
|
|
@@ -5204,7 +5227,10 @@ var createVoiceSession = (options) => {
|
|
|
5204
5227
|
});
|
|
5205
5228
|
if (options.realtime) {
|
|
5206
5229
|
openedSession.on("audio", ({ chunk, format, receivedAt }) => {
|
|
5207
|
-
|
|
5230
|
+
runAudioSerial(async () => {
|
|
5231
|
+
if (activeAdapterGeneration !== generation) {
|
|
5232
|
+
return;
|
|
5233
|
+
}
|
|
5208
5234
|
await sendAssistantAudio(chunk, {
|
|
5209
5235
|
format,
|
|
5210
5236
|
receivedAt
|
|
@@ -5233,7 +5259,7 @@ var createVoiceSession = (options) => {
|
|
|
5233
5259
|
});
|
|
5234
5260
|
ttsSession = openedSession;
|
|
5235
5261
|
openedSession.on("audio", ({ chunk, format, receivedAt }) => {
|
|
5236
|
-
|
|
5262
|
+
runAudioSerial(async () => {
|
|
5237
5263
|
if (ttsSession !== openedSession) {
|
|
5238
5264
|
return;
|
|
5239
5265
|
}
|
|
@@ -5361,6 +5387,7 @@ var createVoiceSession = (options) => {
|
|
|
5361
5387
|
try {
|
|
5362
5388
|
await ttsSession2.send(text);
|
|
5363
5389
|
charsSent += text.length;
|
|
5390
|
+
lastTtsSendAt = Date.now();
|
|
5364
5391
|
} catch (error) {
|
|
5365
5392
|
logger.warn("voice assistant audio send failed", {
|
|
5366
5393
|
error: toError(error).message,
|
|
@@ -5643,6 +5670,7 @@ var createVoiceSession = (options) => {
|
|
|
5643
5670
|
turnId: turn.id
|
|
5644
5671
|
});
|
|
5645
5672
|
await activeTTSSession.send(output.assistantText);
|
|
5673
|
+
lastTtsSendAt = Date.now();
|
|
5646
5674
|
if (options.costAccountant) {
|
|
5647
5675
|
options.costAccountant.recordTTS({
|
|
5648
5676
|
characters: output.assistantText.length
|
|
@@ -6067,10 +6095,12 @@ var createVoiceSession = (options) => {
|
|
|
6067
6095
|
if (greetingTTSSession) {
|
|
6068
6096
|
activeTTSTurnId = greetingTurnId;
|
|
6069
6097
|
await greetingTTSSession.send(greetingText);
|
|
6098
|
+
lastTtsSendAt = Date.now();
|
|
6070
6099
|
} else if (options.realtime) {
|
|
6071
6100
|
const greetingRealtimeSession = await ensureAdapter();
|
|
6072
6101
|
activeTTSTurnId = greetingTurnId;
|
|
6073
6102
|
await greetingRealtimeSession.send(greetingText);
|
|
6103
|
+
lastTtsSendAt = Date.now();
|
|
6074
6104
|
}
|
|
6075
6105
|
} catch {}
|
|
6076
6106
|
}
|
package/dist/testing/index.js
CHANGED
|
@@ -6053,6 +6053,8 @@ var createVoiceSession = (options) => {
|
|
|
6053
6053
|
let activeAdapterGeneration = 0;
|
|
6054
6054
|
let activeTTSTurnId;
|
|
6055
6055
|
let assistantSpeechEndsAt = 0;
|
|
6056
|
+
let lastAssistantAudioAt = 0;
|
|
6057
|
+
let lastTtsSendAt = 0;
|
|
6056
6058
|
let fillerTimer = null;
|
|
6057
6059
|
let fillerActive = false;
|
|
6058
6060
|
let fillerToken = 0;
|
|
@@ -6326,6 +6328,15 @@ var createVoiceSession = (options) => {
|
|
|
6326
6328
|
});
|
|
6327
6329
|
return result;
|
|
6328
6330
|
};
|
|
6331
|
+
let assistantAudioQueue = Promise.resolve();
|
|
6332
|
+
const runAudioSerial = (operation) => {
|
|
6333
|
+
const next = assistantAudioQueue.then(operation);
|
|
6334
|
+
assistantAudioQueue = next.then(() => {
|
|
6335
|
+
return;
|
|
6336
|
+
}, () => {
|
|
6337
|
+
return;
|
|
6338
|
+
});
|
|
6339
|
+
};
|
|
6329
6340
|
const closeAdapter = async (reason) => {
|
|
6330
6341
|
if (!sttSession) {
|
|
6331
6342
|
return;
|
|
@@ -6494,6 +6505,7 @@ var createVoiceSession = (options) => {
|
|
|
6494
6505
|
const chunkMs = normalizedChunk.byteLength / bytesPerSecond * 1000;
|
|
6495
6506
|
assistantSpeechEndsAt = Math.max(assistantSpeechEndsAt, Date.now()) + chunkMs;
|
|
6496
6507
|
}
|
|
6508
|
+
lastAssistantAudioAt = Date.now();
|
|
6497
6509
|
if (activeTTSTurnId) {
|
|
6498
6510
|
await appendTurnLatencyStage({
|
|
6499
6511
|
at: input.receivedAt,
|
|
@@ -6603,18 +6615,28 @@ var createVoiceSession = (options) => {
|
|
|
6603
6615
|
session
|
|
6604
6616
|
});
|
|
6605
6617
|
};
|
|
6606
|
-
const DRAIN_POLL_MS =
|
|
6618
|
+
const DRAIN_POLL_MS = 100;
|
|
6607
6619
|
const DRAIN_TAIL_BUFFER_MS = 300;
|
|
6608
|
-
const
|
|
6609
|
-
const
|
|
6620
|
+
const DRAIN_QUIET_MS = 600;
|
|
6621
|
+
const DRAIN_RENDER_START_MS = 4000;
|
|
6622
|
+
const DRAIN_MAX_MS = 20000;
|
|
6623
|
+
const drainAssistantSpeech = async (renderPendingSince) => {
|
|
6610
6624
|
const startedAt = Date.now();
|
|
6625
|
+
const sleep2 = (delayMs) => new Promise((resolve2) => {
|
|
6626
|
+
setTimeout(resolve2, delayMs);
|
|
6627
|
+
});
|
|
6611
6628
|
while (Date.now() - startedAt < DRAIN_MAX_MS) {
|
|
6612
|
-
const
|
|
6613
|
-
|
|
6629
|
+
const now = Date.now();
|
|
6630
|
+
const renderStarted = lastAssistantAudioAt >= renderPendingSince || now - renderPendingSince >= DRAIN_RENDER_START_MS;
|
|
6631
|
+
if (!renderStarted) {
|
|
6632
|
+
await sleep2(DRAIN_POLL_MS);
|
|
6633
|
+
continue;
|
|
6634
|
+
}
|
|
6635
|
+
const streamQuiet = now - lastAssistantAudioAt >= DRAIN_QUIET_MS;
|
|
6636
|
+
const playbackDrained = assistantSpeechEndsAt + DRAIN_TAIL_BUFFER_MS <= now;
|
|
6637
|
+
if (streamQuiet && playbackDrained)
|
|
6614
6638
|
return;
|
|
6615
|
-
await
|
|
6616
|
-
setTimeout(resolve2, Math.min(remaining, DRAIN_POLL_MS));
|
|
6617
|
-
});
|
|
6639
|
+
await sleep2(DRAIN_POLL_MS);
|
|
6618
6640
|
}
|
|
6619
6641
|
};
|
|
6620
6642
|
const completeInternal = async (result, input = {}) => {
|
|
@@ -6651,7 +6673,8 @@ var createVoiceSession = (options) => {
|
|
|
6651
6673
|
return;
|
|
6652
6674
|
}
|
|
6653
6675
|
if (disposition === "completed") {
|
|
6654
|
-
await drainAssistantSpeech();
|
|
6676
|
+
await drainAssistantSpeech(lastTtsSendAt);
|
|
6677
|
+
await assistantAudioQueue;
|
|
6655
6678
|
}
|
|
6656
6679
|
await appendTrace({
|
|
6657
6680
|
payload: {
|
|
@@ -7321,7 +7344,10 @@ var createVoiceSession = (options) => {
|
|
|
7321
7344
|
});
|
|
7322
7345
|
if (options.realtime) {
|
|
7323
7346
|
openedSession.on("audio", ({ chunk, format, receivedAt }) => {
|
|
7324
|
-
|
|
7347
|
+
runAudioSerial(async () => {
|
|
7348
|
+
if (activeAdapterGeneration !== generation) {
|
|
7349
|
+
return;
|
|
7350
|
+
}
|
|
7325
7351
|
await sendAssistantAudio(chunk, {
|
|
7326
7352
|
format,
|
|
7327
7353
|
receivedAt
|
|
@@ -7350,7 +7376,7 @@ var createVoiceSession = (options) => {
|
|
|
7350
7376
|
});
|
|
7351
7377
|
ttsSession = openedSession;
|
|
7352
7378
|
openedSession.on("audio", ({ chunk, format, receivedAt }) => {
|
|
7353
|
-
|
|
7379
|
+
runAudioSerial(async () => {
|
|
7354
7380
|
if (ttsSession !== openedSession) {
|
|
7355
7381
|
return;
|
|
7356
7382
|
}
|
|
@@ -7478,6 +7504,7 @@ var createVoiceSession = (options) => {
|
|
|
7478
7504
|
try {
|
|
7479
7505
|
await ttsSession2.send(text);
|
|
7480
7506
|
charsSent += text.length;
|
|
7507
|
+
lastTtsSendAt = Date.now();
|
|
7481
7508
|
} catch (error) {
|
|
7482
7509
|
logger.warn("voice assistant audio send failed", {
|
|
7483
7510
|
error: toError(error).message,
|
|
@@ -7760,6 +7787,7 @@ var createVoiceSession = (options) => {
|
|
|
7760
7787
|
turnId: turn.id
|
|
7761
7788
|
});
|
|
7762
7789
|
await activeTTSSession.send(output.assistantText);
|
|
7790
|
+
lastTtsSendAt = Date.now();
|
|
7763
7791
|
if (options.costAccountant) {
|
|
7764
7792
|
options.costAccountant.recordTTS({
|
|
7765
7793
|
characters: output.assistantText.length
|
|
@@ -8184,10 +8212,12 @@ var createVoiceSession = (options) => {
|
|
|
8184
8212
|
if (greetingTTSSession) {
|
|
8185
8213
|
activeTTSTurnId = greetingTurnId;
|
|
8186
8214
|
await greetingTTSSession.send(greetingText);
|
|
8215
|
+
lastTtsSendAt = Date.now();
|
|
8187
8216
|
} else if (options.realtime) {
|
|
8188
8217
|
const greetingRealtimeSession = await ensureAdapter();
|
|
8189
8218
|
activeTTSTurnId = greetingTurnId;
|
|
8190
8219
|
await greetingRealtimeSession.send(greetingText);
|
|
8220
|
+
lastTtsSendAt = Date.now();
|
|
8191
8221
|
}
|
|
8192
8222
|
} catch {}
|
|
8193
8223
|
}
|