@absolutejs/voice 0.0.22-beta.580 → 0.0.22-beta.581

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3936,6 +3936,8 @@ var createVoiceSession = (options) => {
3936
3936
  let activeAdapterGeneration = 0;
3937
3937
  let activeTTSTurnId;
3938
3938
  let assistantSpeechEndsAt = 0;
3939
+ let lastAssistantAudioAt = 0;
3940
+ let lastTtsSendAt = 0;
3939
3941
  let fillerTimer = null;
3940
3942
  let fillerActive = false;
3941
3943
  let fillerToken = 0;
@@ -4209,6 +4211,15 @@ var createVoiceSession = (options) => {
4209
4211
  });
4210
4212
  return result;
4211
4213
  };
4214
+ let assistantAudioQueue = Promise.resolve();
4215
+ const runAudioSerial = (operation) => {
4216
+ const next = assistantAudioQueue.then(operation);
4217
+ assistantAudioQueue = next.then(() => {
4218
+ return;
4219
+ }, () => {
4220
+ return;
4221
+ });
4222
+ };
4212
4223
  const closeAdapter = async (reason) => {
4213
4224
  if (!sttSession) {
4214
4225
  return;
@@ -4377,6 +4388,7 @@ var createVoiceSession = (options) => {
4377
4388
  const chunkMs = normalizedChunk.byteLength / bytesPerSecond * 1000;
4378
4389
  assistantSpeechEndsAt = Math.max(assistantSpeechEndsAt, Date.now()) + chunkMs;
4379
4390
  }
4391
+ lastAssistantAudioAt = Date.now();
4380
4392
  if (activeTTSTurnId) {
4381
4393
  await appendTurnLatencyStage({
4382
4394
  at: input.receivedAt,
@@ -4486,18 +4498,28 @@ var createVoiceSession = (options) => {
4486
4498
  session
4487
4499
  });
4488
4500
  };
4489
- const DRAIN_POLL_MS = 200;
4501
+ const DRAIN_POLL_MS = 100;
4490
4502
  const DRAIN_TAIL_BUFFER_MS = 300;
4491
- const DRAIN_MAX_MS = 12000;
4492
- const drainAssistantSpeech = async () => {
4503
+ const DRAIN_QUIET_MS = 600;
4504
+ const DRAIN_RENDER_START_MS = 4000;
4505
+ const DRAIN_MAX_MS = 20000;
4506
+ const drainAssistantSpeech = async (renderPendingSince) => {
4493
4507
  const startedAt = Date.now();
4508
+ const sleep3 = (delayMs) => new Promise((resolve) => {
4509
+ setTimeout(resolve, delayMs);
4510
+ });
4494
4511
  while (Date.now() - startedAt < DRAIN_MAX_MS) {
4495
- const remaining = assistantSpeechEndsAt + DRAIN_TAIL_BUFFER_MS - Date.now();
4496
- if (remaining <= 0)
4512
+ const now = Date.now();
4513
+ const renderStarted = lastAssistantAudioAt >= renderPendingSince || now - renderPendingSince >= DRAIN_RENDER_START_MS;
4514
+ if (!renderStarted) {
4515
+ await sleep3(DRAIN_POLL_MS);
4516
+ continue;
4517
+ }
4518
+ const streamQuiet = now - lastAssistantAudioAt >= DRAIN_QUIET_MS;
4519
+ const playbackDrained = assistantSpeechEndsAt + DRAIN_TAIL_BUFFER_MS <= now;
4520
+ if (streamQuiet && playbackDrained)
4497
4521
  return;
4498
- await new Promise((resolve) => {
4499
- setTimeout(resolve, Math.min(remaining, DRAIN_POLL_MS));
4500
- });
4522
+ await sleep3(DRAIN_POLL_MS);
4501
4523
  }
4502
4524
  };
4503
4525
  const completeInternal = async (result, input = {}) => {
@@ -4534,7 +4556,8 @@ var createVoiceSession = (options) => {
4534
4556
  return;
4535
4557
  }
4536
4558
  if (disposition === "completed") {
4537
- await drainAssistantSpeech();
4559
+ await drainAssistantSpeech(lastTtsSendAt);
4560
+ await assistantAudioQueue;
4538
4561
  }
4539
4562
  await appendTrace({
4540
4563
  payload: {
@@ -5204,7 +5227,10 @@ var createVoiceSession = (options) => {
5204
5227
  });
5205
5228
  if (options.realtime) {
5206
5229
  openedSession.on("audio", ({ chunk, format, receivedAt }) => {
5207
- runAdapterEvent("adapter.audio", async () => {
5230
+ runAudioSerial(async () => {
5231
+ if (activeAdapterGeneration !== generation) {
5232
+ return;
5233
+ }
5208
5234
  await sendAssistantAudio(chunk, {
5209
5235
  format,
5210
5236
  receivedAt
@@ -5233,7 +5259,7 @@ var createVoiceSession = (options) => {
5233
5259
  });
5234
5260
  ttsSession = openedSession;
5235
5261
  openedSession.on("audio", ({ chunk, format, receivedAt }) => {
5236
- runSerial("tts.audio", async () => {
5262
+ runAudioSerial(async () => {
5237
5263
  if (ttsSession !== openedSession) {
5238
5264
  return;
5239
5265
  }
@@ -5361,6 +5387,7 @@ var createVoiceSession = (options) => {
5361
5387
  try {
5362
5388
  await ttsSession2.send(text);
5363
5389
  charsSent += text.length;
5390
+ lastTtsSendAt = Date.now();
5364
5391
  } catch (error) {
5365
5392
  logger.warn("voice assistant audio send failed", {
5366
5393
  error: toError(error).message,
@@ -5643,6 +5670,7 @@ var createVoiceSession = (options) => {
5643
5670
  turnId: turn.id
5644
5671
  });
5645
5672
  await activeTTSSession.send(output.assistantText);
5673
+ lastTtsSendAt = Date.now();
5646
5674
  if (options.costAccountant) {
5647
5675
  options.costAccountant.recordTTS({
5648
5676
  characters: output.assistantText.length
@@ -6067,10 +6095,12 @@ var createVoiceSession = (options) => {
6067
6095
  if (greetingTTSSession) {
6068
6096
  activeTTSTurnId = greetingTurnId;
6069
6097
  await greetingTTSSession.send(greetingText);
6098
+ lastTtsSendAt = Date.now();
6070
6099
  } else if (options.realtime) {
6071
6100
  const greetingRealtimeSession = await ensureAdapter();
6072
6101
  activeTTSTurnId = greetingTurnId;
6073
6102
  await greetingRealtimeSession.send(greetingText);
6103
+ lastTtsSendAt = Date.now();
6074
6104
  }
6075
6105
  } catch {}
6076
6106
  }
@@ -6053,6 +6053,8 @@ var createVoiceSession = (options) => {
6053
6053
  let activeAdapterGeneration = 0;
6054
6054
  let activeTTSTurnId;
6055
6055
  let assistantSpeechEndsAt = 0;
6056
+ let lastAssistantAudioAt = 0;
6057
+ let lastTtsSendAt = 0;
6056
6058
  let fillerTimer = null;
6057
6059
  let fillerActive = false;
6058
6060
  let fillerToken = 0;
@@ -6326,6 +6328,15 @@ var createVoiceSession = (options) => {
6326
6328
  });
6327
6329
  return result;
6328
6330
  };
6331
+ let assistantAudioQueue = Promise.resolve();
6332
+ const runAudioSerial = (operation) => {
6333
+ const next = assistantAudioQueue.then(operation);
6334
+ assistantAudioQueue = next.then(() => {
6335
+ return;
6336
+ }, () => {
6337
+ return;
6338
+ });
6339
+ };
6329
6340
  const closeAdapter = async (reason) => {
6330
6341
  if (!sttSession) {
6331
6342
  return;
@@ -6494,6 +6505,7 @@ var createVoiceSession = (options) => {
6494
6505
  const chunkMs = normalizedChunk.byteLength / bytesPerSecond * 1000;
6495
6506
  assistantSpeechEndsAt = Math.max(assistantSpeechEndsAt, Date.now()) + chunkMs;
6496
6507
  }
6508
+ lastAssistantAudioAt = Date.now();
6497
6509
  if (activeTTSTurnId) {
6498
6510
  await appendTurnLatencyStage({
6499
6511
  at: input.receivedAt,
@@ -6603,18 +6615,28 @@ var createVoiceSession = (options) => {
6603
6615
  session
6604
6616
  });
6605
6617
  };
6606
- const DRAIN_POLL_MS = 200;
6618
+ const DRAIN_POLL_MS = 100;
6607
6619
  const DRAIN_TAIL_BUFFER_MS = 300;
6608
- const DRAIN_MAX_MS = 12000;
6609
- const drainAssistantSpeech = async () => {
6620
+ const DRAIN_QUIET_MS = 600;
6621
+ const DRAIN_RENDER_START_MS = 4000;
6622
+ const DRAIN_MAX_MS = 20000;
6623
+ const drainAssistantSpeech = async (renderPendingSince) => {
6610
6624
  const startedAt = Date.now();
6625
+ const sleep2 = (delayMs) => new Promise((resolve2) => {
6626
+ setTimeout(resolve2, delayMs);
6627
+ });
6611
6628
  while (Date.now() - startedAt < DRAIN_MAX_MS) {
6612
- const remaining = assistantSpeechEndsAt + DRAIN_TAIL_BUFFER_MS - Date.now();
6613
- if (remaining <= 0)
6629
+ const now = Date.now();
6630
+ const renderStarted = lastAssistantAudioAt >= renderPendingSince || now - renderPendingSince >= DRAIN_RENDER_START_MS;
6631
+ if (!renderStarted) {
6632
+ await sleep2(DRAIN_POLL_MS);
6633
+ continue;
6634
+ }
6635
+ const streamQuiet = now - lastAssistantAudioAt >= DRAIN_QUIET_MS;
6636
+ const playbackDrained = assistantSpeechEndsAt + DRAIN_TAIL_BUFFER_MS <= now;
6637
+ if (streamQuiet && playbackDrained)
6614
6638
  return;
6615
- await new Promise((resolve2) => {
6616
- setTimeout(resolve2, Math.min(remaining, DRAIN_POLL_MS));
6617
- });
6639
+ await sleep2(DRAIN_POLL_MS);
6618
6640
  }
6619
6641
  };
6620
6642
  const completeInternal = async (result, input = {}) => {
@@ -6651,7 +6673,8 @@ var createVoiceSession = (options) => {
6651
6673
  return;
6652
6674
  }
6653
6675
  if (disposition === "completed") {
6654
- await drainAssistantSpeech();
6676
+ await drainAssistantSpeech(lastTtsSendAt);
6677
+ await assistantAudioQueue;
6655
6678
  }
6656
6679
  await appendTrace({
6657
6680
  payload: {
@@ -7321,7 +7344,10 @@ var createVoiceSession = (options) => {
7321
7344
  });
7322
7345
  if (options.realtime) {
7323
7346
  openedSession.on("audio", ({ chunk, format, receivedAt }) => {
7324
- runAdapterEvent("adapter.audio", async () => {
7347
+ runAudioSerial(async () => {
7348
+ if (activeAdapterGeneration !== generation) {
7349
+ return;
7350
+ }
7325
7351
  await sendAssistantAudio(chunk, {
7326
7352
  format,
7327
7353
  receivedAt
@@ -7350,7 +7376,7 @@ var createVoiceSession = (options) => {
7350
7376
  });
7351
7377
  ttsSession = openedSession;
7352
7378
  openedSession.on("audio", ({ chunk, format, receivedAt }) => {
7353
- runSerial("tts.audio", async () => {
7379
+ runAudioSerial(async () => {
7354
7380
  if (ttsSession !== openedSession) {
7355
7381
  return;
7356
7382
  }
@@ -7478,6 +7504,7 @@ var createVoiceSession = (options) => {
7478
7504
  try {
7479
7505
  await ttsSession2.send(text);
7480
7506
  charsSent += text.length;
7507
+ lastTtsSendAt = Date.now();
7481
7508
  } catch (error) {
7482
7509
  logger.warn("voice assistant audio send failed", {
7483
7510
  error: toError(error).message,
@@ -7760,6 +7787,7 @@ var createVoiceSession = (options) => {
7760
7787
  turnId: turn.id
7761
7788
  });
7762
7789
  await activeTTSSession.send(output.assistantText);
7790
+ lastTtsSendAt = Date.now();
7763
7791
  if (options.costAccountant) {
7764
7792
  options.costAccountant.recordTTS({
7765
7793
  characters: output.assistantText.length
@@ -8184,10 +8212,12 @@ var createVoiceSession = (options) => {
8184
8212
  if (greetingTTSSession) {
8185
8213
  activeTTSTurnId = greetingTurnId;
8186
8214
  await greetingTTSSession.send(greetingText);
8215
+ lastTtsSendAt = Date.now();
8187
8216
  } else if (options.realtime) {
8188
8217
  const greetingRealtimeSession = await ensureAdapter();
8189
8218
  activeTTSTurnId = greetingTurnId;
8190
8219
  await greetingRealtimeSession.send(greetingText);
8220
+ lastTtsSendAt = Date.now();
8191
8221
  }
8192
8222
  } catch {}
8193
8223
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.580",
3
+ "version": "0.0.22-beta.581",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",