@absolutejs/voice 0.0.22-beta.573 → 0.0.22-beta.575

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,6 +8,9 @@ type MinimalAudioBufferSourceNode = {
8
8
  connect: (destination: unknown) => void;
9
9
  disconnect?: () => void;
10
10
  onended: (() => void) | null;
11
+ playbackRate?: {
12
+ value: number;
13
+ };
11
14
  start: (when?: number) => void;
12
15
  stop?: () => void;
13
16
  };
@@ -1536,6 +1536,9 @@ var createVoiceController = (path, options = {}) => {
1536
1536
  // src/client/audioPlayer.ts
1537
1537
  var DEFAULT_LOOKAHEAD_MS = 15;
1538
1538
  var DEFAULT_VOLUME = 1;
1539
+ var DEFAULT_PLAYBACK_RATE = 1;
1540
+ var MIN_PLAYBACK_RATE = 0.5;
1541
+ var MAX_PLAYBACK_RATE = 2;
1539
1542
  var createInitialState3 = () => ({
1540
1543
  activeSourceCount: 0,
1541
1544
  error: null,
@@ -1558,6 +1561,12 @@ var clampVolume = (volume) => {
1558
1561
  }
1559
1562
  return Math.min(1, Math.max(0, volume));
1560
1563
  };
1564
+ var clampPlaybackRate = (rate) => {
1565
+ if (typeof rate !== "number" || !Number.isFinite(rate)) {
1566
+ return DEFAULT_PLAYBACK_RATE;
1567
+ }
1568
+ return Math.min(MAX_PLAYBACK_RATE, Math.max(MIN_PLAYBACK_RATE, rate));
1569
+ };
1561
1570
  var decodePCM16LEChunk = (audioContext, chunk) => {
1562
1571
  const { format } = chunk;
1563
1572
  if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
@@ -1591,6 +1600,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
1591
1600
  let audioContext = null;
1592
1601
  let outputNode = null;
1593
1602
  let volume = clampVolume(options.volume);
1603
+ let playbackRate = clampPlaybackRate(options.playbackRate);
1594
1604
  let queueEndTime = 0;
1595
1605
  let syncPromise = Promise.resolve();
1596
1606
  let interruptStartedAt = null;
@@ -1692,6 +1702,9 @@ var createVoiceAudioPlayer = (source, options = {}) => {
1692
1702
  const buffer = decodePCM16LEChunk(context, chunk);
1693
1703
  const node = context.createBufferSource();
1694
1704
  node.buffer = buffer;
1705
+ if (node.playbackRate) {
1706
+ node.playbackRate.value = playbackRate;
1707
+ }
1695
1708
  node.connect(outputNode ?? context.destination);
1696
1709
  node.onended = () => {
1697
1710
  sourceNodes.delete(node);
@@ -1703,7 +1716,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
1703
1716
  maybeResolveInterrupt();
1704
1717
  };
1705
1718
  const startAt = Math.max(context.currentTime + lookaheadSeconds, queueEndTime);
1706
- queueEndTime = startAt + buffer.duration;
1719
+ queueEndTime = startAt + buffer.duration / playbackRate;
1707
1720
  sourceNodes.add(node);
1708
1721
  setState({
1709
1722
  activeSourceCount: sourceNodes.size,
@@ -1848,12 +1861,18 @@ var createVoiceAudioPlayer = (source, options = {}) => {
1848
1861
  isPlaying: false
1849
1862
  });
1850
1863
  },
1864
+ get playbackRate() {
1865
+ return playbackRate;
1866
+ },
1851
1867
  get processedChunkCount() {
1852
1868
  return state.processedChunkCount;
1853
1869
  },
1854
1870
  get queuedChunkCount() {
1855
1871
  return state.queuedChunkCount;
1856
1872
  },
1873
+ setPlaybackRate: (nextRate) => {
1874
+ playbackRate = clampPlaybackRate(nextRate);
1875
+ },
1857
1876
  setVolume: (nextVolume) => {
1858
1877
  volume = clampVolume(nextVolume);
1859
1878
  applyOutputGain(audioContext);
@@ -373,6 +373,9 @@ var createVoiceConnection = (path, options = {}) => {
373
373
  // src/client/audioPlayer.ts
374
374
  var DEFAULT_LOOKAHEAD_MS = 15;
375
375
  var DEFAULT_VOLUME = 1;
376
+ var DEFAULT_PLAYBACK_RATE = 1;
377
+ var MIN_PLAYBACK_RATE = 0.5;
378
+ var MAX_PLAYBACK_RATE = 2;
376
379
  var createInitialState = () => ({
377
380
  activeSourceCount: 0,
378
381
  error: null,
@@ -395,6 +398,12 @@ var clampVolume = (volume) => {
395
398
  }
396
399
  return Math.min(1, Math.max(0, volume));
397
400
  };
401
+ var clampPlaybackRate = (rate) => {
402
+ if (typeof rate !== "number" || !Number.isFinite(rate)) {
403
+ return DEFAULT_PLAYBACK_RATE;
404
+ }
405
+ return Math.min(MAX_PLAYBACK_RATE, Math.max(MIN_PLAYBACK_RATE, rate));
406
+ };
398
407
  var decodePCM16LEChunk = (audioContext, chunk) => {
399
408
  const { format } = chunk;
400
409
  if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
@@ -428,6 +437,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
428
437
  let audioContext = null;
429
438
  let outputNode = null;
430
439
  let volume = clampVolume(options.volume);
440
+ let playbackRate = clampPlaybackRate(options.playbackRate);
431
441
  let queueEndTime = 0;
432
442
  let syncPromise = Promise.resolve();
433
443
  let interruptStartedAt = null;
@@ -529,6 +539,9 @@ var createVoiceAudioPlayer = (source, options = {}) => {
529
539
  const buffer = decodePCM16LEChunk(context, chunk);
530
540
  const node = context.createBufferSource();
531
541
  node.buffer = buffer;
542
+ if (node.playbackRate) {
543
+ node.playbackRate.value = playbackRate;
544
+ }
532
545
  node.connect(outputNode ?? context.destination);
533
546
  node.onended = () => {
534
547
  sourceNodes.delete(node);
@@ -540,7 +553,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
540
553
  maybeResolveInterrupt();
541
554
  };
542
555
  const startAt = Math.max(context.currentTime + lookaheadSeconds, queueEndTime);
543
- queueEndTime = startAt + buffer.duration;
556
+ queueEndTime = startAt + buffer.duration / playbackRate;
544
557
  sourceNodes.add(node);
545
558
  setState({
546
559
  activeSourceCount: sourceNodes.size,
@@ -685,12 +698,18 @@ var createVoiceAudioPlayer = (source, options = {}) => {
685
698
  isPlaying: false
686
699
  });
687
700
  },
701
+ get playbackRate() {
702
+ return playbackRate;
703
+ },
688
704
  get processedChunkCount() {
689
705
  return state.processedChunkCount;
690
706
  },
691
707
  get queuedChunkCount() {
692
708
  return state.queuedChunkCount;
693
709
  },
710
+ setPlaybackRate: (nextRate) => {
711
+ playbackRate = clampPlaybackRate(nextRate);
712
+ },
694
713
  setVolume: (nextVolume) => {
695
714
  volume = clampVolume(nextVolume);
696
715
  applyOutputGain(audioContext);
@@ -1183,6 +1183,14 @@ export type VoiceAudioPlayerOptions = {
1183
1183
  autoStart?: boolean;
1184
1184
  createAudioContext?: () => AudioContext;
1185
1185
  lookaheadMs?: number;
1186
+ /**
1187
+ * Playback speed multiplier for the assistant's speech. 1 = normal. Clamped
1188
+ * to [0.5, 2]. Pitch shifts with the rate (Web Audio playbackRate), so keep
1189
+ * UI ranges modest (≈0.85–1.25) to stay natural. Can be changed live via
1190
+ * setPlaybackRate — already-scheduled chunks keep their rate; new chunks
1191
+ * adopt the new one.
1192
+ */
1193
+ playbackRate?: number;
1186
1194
  volume?: number;
1187
1195
  };
1188
1196
  export type VoiceDuplexControllerOptions = VoiceControllerOptions & {
@@ -1313,8 +1321,10 @@ export type VoiceAudioPlayer = {
1313
1321
  lastInterruptLatencyMs?: number;
1314
1322
  lastPlaybackStopLatencyMs?: number;
1315
1323
  pause: () => Promise<void>;
1324
+ playbackRate: number;
1316
1325
  processedChunkCount: number;
1317
1326
  queuedChunkCount: number;
1327
+ setPlaybackRate: (rate: number) => void;
1318
1328
  setVolume: (volume: number) => void;
1319
1329
  start: () => Promise<void>;
1320
1330
  subscribe: (subscriber: () => void) => () => void;
package/dist/index.js CHANGED
@@ -3870,6 +3870,7 @@ var createVoiceSession = (options) => {
3870
3870
  let adapterGenerationCounter = 0;
3871
3871
  let activeAdapterGeneration = 0;
3872
3872
  let activeTTSTurnId;
3873
+ let assistantSpeechEndsAt = 0;
3873
3874
  let fillerTimer = null;
3874
3875
  let fillerActive = false;
3875
3876
  let fillerToken = 0;
@@ -4262,6 +4263,7 @@ var createVoiceSession = (options) => {
4262
4263
  return;
4263
4264
  }
4264
4265
  activeTTSTurnId = undefined;
4266
+ assistantSpeechEndsAt = Date.now();
4265
4267
  appendTurnLatencyStage({
4266
4268
  metadata: { reason },
4267
4269
  stage: "tts_canceled",
@@ -4304,6 +4306,12 @@ var createVoiceSession = (options) => {
4304
4306
  turnId: activeTTSTurnId,
4305
4307
  type: "audio"
4306
4308
  });
4309
+ const bytesPerSample = input.format.encoding === "pcm_s16le" ? 2 : 1;
4310
+ const bytesPerSecond = input.format.sampleRateHz * input.format.channels * bytesPerSample;
4311
+ if (bytesPerSecond > 0) {
4312
+ const chunkMs = normalizedChunk.byteLength / bytesPerSecond * 1000;
4313
+ assistantSpeechEndsAt = Math.max(assistantSpeechEndsAt, Date.now()) + chunkMs;
4314
+ }
4307
4315
  if (activeTTSTurnId) {
4308
4316
  await appendTurnLatencyStage({
4309
4317
  at: input.receivedAt,
@@ -4413,6 +4421,20 @@ var createVoiceSession = (options) => {
4413
4421
  session
4414
4422
  });
4415
4423
  };
4424
+ const DRAIN_POLL_MS = 200;
4425
+ const DRAIN_TAIL_BUFFER_MS = 300;
4426
+ const DRAIN_MAX_MS = 12000;
4427
+ const drainAssistantSpeech = async () => {
4428
+ const startedAt = Date.now();
4429
+ while (Date.now() - startedAt < DRAIN_MAX_MS) {
4430
+ const remaining = assistantSpeechEndsAt + DRAIN_TAIL_BUFFER_MS - Date.now();
4431
+ if (remaining <= 0)
4432
+ return;
4433
+ await new Promise((resolve) => {
4434
+ setTimeout(resolve, Math.min(remaining, DRAIN_POLL_MS));
4435
+ });
4436
+ }
4437
+ };
4416
4438
  const completeInternal = async (result, input = {}) => {
4417
4439
  clearSilenceTimer();
4418
4440
  const disposition = input.disposition ?? "completed";
@@ -4446,6 +4468,9 @@ var createVoiceSession = (options) => {
4446
4468
  if (!didComplete) {
4447
4469
  return;
4448
4470
  }
4471
+ if (disposition === "completed") {
4472
+ await drainAssistantSpeech();
4473
+ }
4449
4474
  await appendTrace({
4450
4475
  payload: {
4451
4476
  disposition,
@@ -1580,6 +1580,9 @@ var buildSessionCorrectionAudit = (raw, generic, experimental, benchmarkSeeded,
1580
1580
  // src/client/audioPlayer.ts
1581
1581
  var DEFAULT_LOOKAHEAD_MS = 15;
1582
1582
  var DEFAULT_VOLUME = 1;
1583
+ var DEFAULT_PLAYBACK_RATE = 1;
1584
+ var MIN_PLAYBACK_RATE = 0.5;
1585
+ var MAX_PLAYBACK_RATE = 2;
1583
1586
  var createInitialState = () => ({
1584
1587
  activeSourceCount: 0,
1585
1588
  error: null,
@@ -1602,6 +1605,12 @@ var clampVolume = (volume) => {
1602
1605
  }
1603
1606
  return Math.min(1, Math.max(0, volume));
1604
1607
  };
1608
+ var clampPlaybackRate = (rate) => {
1609
+ if (typeof rate !== "number" || !Number.isFinite(rate)) {
1610
+ return DEFAULT_PLAYBACK_RATE;
1611
+ }
1612
+ return Math.min(MAX_PLAYBACK_RATE, Math.max(MIN_PLAYBACK_RATE, rate));
1613
+ };
1605
1614
  var decodePCM16LEChunk = (audioContext, chunk) => {
1606
1615
  const { format } = chunk;
1607
1616
  if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
@@ -1635,6 +1644,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
1635
1644
  let audioContext = null;
1636
1645
  let outputNode = null;
1637
1646
  let volume = clampVolume(options.volume);
1647
+ let playbackRate = clampPlaybackRate(options.playbackRate);
1638
1648
  let queueEndTime = 0;
1639
1649
  let syncPromise = Promise.resolve();
1640
1650
  let interruptStartedAt = null;
@@ -1736,6 +1746,9 @@ var createVoiceAudioPlayer = (source, options = {}) => {
1736
1746
  const buffer = decodePCM16LEChunk(context, chunk);
1737
1747
  const node = context.createBufferSource();
1738
1748
  node.buffer = buffer;
1749
+ if (node.playbackRate) {
1750
+ node.playbackRate.value = playbackRate;
1751
+ }
1739
1752
  node.connect(outputNode ?? context.destination);
1740
1753
  node.onended = () => {
1741
1754
  sourceNodes.delete(node);
@@ -1747,7 +1760,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
1747
1760
  maybeResolveInterrupt();
1748
1761
  };
1749
1762
  const startAt = Math.max(context.currentTime + lookaheadSeconds, queueEndTime);
1750
- queueEndTime = startAt + buffer.duration;
1763
+ queueEndTime = startAt + buffer.duration / playbackRate;
1751
1764
  sourceNodes.add(node);
1752
1765
  setState({
1753
1766
  activeSourceCount: sourceNodes.size,
@@ -1892,12 +1905,18 @@ var createVoiceAudioPlayer = (source, options = {}) => {
1892
1905
  isPlaying: false
1893
1906
  });
1894
1907
  },
1908
+ get playbackRate() {
1909
+ return playbackRate;
1910
+ },
1895
1911
  get processedChunkCount() {
1896
1912
  return state.processedChunkCount;
1897
1913
  },
1898
1914
  get queuedChunkCount() {
1899
1915
  return state.queuedChunkCount;
1900
1916
  },
1917
+ setPlaybackRate: (nextRate) => {
1918
+ playbackRate = clampPlaybackRate(nextRate);
1919
+ },
1901
1920
  setVolume: (nextVolume) => {
1902
1921
  volume = clampVolume(nextVolume);
1903
1922
  applyOutputGain(audioContext);
@@ -5786,6 +5805,7 @@ var createVoiceSession = (options) => {
5786
5805
  let adapterGenerationCounter = 0;
5787
5806
  let activeAdapterGeneration = 0;
5788
5807
  let activeTTSTurnId;
5808
+ let assistantSpeechEndsAt = 0;
5789
5809
  let fillerTimer = null;
5790
5810
  let fillerActive = false;
5791
5811
  let fillerToken = 0;
@@ -6178,6 +6198,7 @@ var createVoiceSession = (options) => {
6178
6198
  return;
6179
6199
  }
6180
6200
  activeTTSTurnId = undefined;
6201
+ assistantSpeechEndsAt = Date.now();
6181
6202
  appendTurnLatencyStage({
6182
6203
  metadata: { reason },
6183
6204
  stage: "tts_canceled",
@@ -6220,6 +6241,12 @@ var createVoiceSession = (options) => {
6220
6241
  turnId: activeTTSTurnId,
6221
6242
  type: "audio"
6222
6243
  });
6244
+ const bytesPerSample = input.format.encoding === "pcm_s16le" ? 2 : 1;
6245
+ const bytesPerSecond = input.format.sampleRateHz * input.format.channels * bytesPerSample;
6246
+ if (bytesPerSecond > 0) {
6247
+ const chunkMs = normalizedChunk.byteLength / bytesPerSecond * 1000;
6248
+ assistantSpeechEndsAt = Math.max(assistantSpeechEndsAt, Date.now()) + chunkMs;
6249
+ }
6223
6250
  if (activeTTSTurnId) {
6224
6251
  await appendTurnLatencyStage({
6225
6252
  at: input.receivedAt,
@@ -6329,6 +6356,20 @@ var createVoiceSession = (options) => {
6329
6356
  session
6330
6357
  });
6331
6358
  };
6359
+ const DRAIN_POLL_MS = 200;
6360
+ const DRAIN_TAIL_BUFFER_MS = 300;
6361
+ const DRAIN_MAX_MS = 12000;
6362
+ const drainAssistantSpeech = async () => {
6363
+ const startedAt = Date.now();
6364
+ while (Date.now() - startedAt < DRAIN_MAX_MS) {
6365
+ const remaining = assistantSpeechEndsAt + DRAIN_TAIL_BUFFER_MS - Date.now();
6366
+ if (remaining <= 0)
6367
+ return;
6368
+ await new Promise((resolve2) => {
6369
+ setTimeout(resolve2, Math.min(remaining, DRAIN_POLL_MS));
6370
+ });
6371
+ }
6372
+ };
6332
6373
  const completeInternal = async (result, input = {}) => {
6333
6374
  clearSilenceTimer();
6334
6375
  const disposition = input.disposition ?? "completed";
@@ -6362,6 +6403,9 @@ var createVoiceSession = (options) => {
6362
6403
  if (!didComplete) {
6363
6404
  return;
6364
6405
  }
6406
+ if (disposition === "completed") {
6407
+ await drainAssistantSpeech();
6408
+ }
6365
6409
  await appendTrace({
6366
6410
  payload: {
6367
6411
  disposition,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.573",
3
+ "version": "0.0.22-beta.575",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",