@absolutejs/voice 0.0.22-beta.574 → 0.0.22-beta.576

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,6 +8,9 @@ type MinimalAudioBufferSourceNode = {
8
8
  connect: (destination: unknown) => void;
9
9
  disconnect?: () => void;
10
10
  onended: (() => void) | null;
11
+ playbackRate?: {
12
+ value: number;
13
+ };
11
14
  start: (when?: number) => void;
12
15
  stop?: () => void;
13
16
  };
@@ -1536,6 +1536,9 @@ var createVoiceController = (path, options = {}) => {
1536
1536
  // src/client/audioPlayer.ts
1537
1537
  var DEFAULT_LOOKAHEAD_MS = 15;
1538
1538
  var DEFAULT_VOLUME = 1;
1539
+ var DEFAULT_PLAYBACK_RATE = 1;
1540
+ var MIN_PLAYBACK_RATE = 0.5;
1541
+ var MAX_PLAYBACK_RATE = 2;
1539
1542
  var createInitialState3 = () => ({
1540
1543
  activeSourceCount: 0,
1541
1544
  error: null,
@@ -1558,6 +1561,12 @@ var clampVolume = (volume) => {
1558
1561
  }
1559
1562
  return Math.min(1, Math.max(0, volume));
1560
1563
  };
1564
+ var clampPlaybackRate = (rate) => {
1565
+ if (typeof rate !== "number" || !Number.isFinite(rate)) {
1566
+ return DEFAULT_PLAYBACK_RATE;
1567
+ }
1568
+ return Math.min(MAX_PLAYBACK_RATE, Math.max(MIN_PLAYBACK_RATE, rate));
1569
+ };
1561
1570
  var decodePCM16LEChunk = (audioContext, chunk) => {
1562
1571
  const { format } = chunk;
1563
1572
  if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
@@ -1591,6 +1600,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
1591
1600
  let audioContext = null;
1592
1601
  let outputNode = null;
1593
1602
  let volume = clampVolume(options.volume);
1603
+ let playbackRate = clampPlaybackRate(options.playbackRate);
1594
1604
  let queueEndTime = 0;
1595
1605
  let syncPromise = Promise.resolve();
1596
1606
  let interruptStartedAt = null;
@@ -1692,6 +1702,9 @@ var createVoiceAudioPlayer = (source, options = {}) => {
1692
1702
  const buffer = decodePCM16LEChunk(context, chunk);
1693
1703
  const node = context.createBufferSource();
1694
1704
  node.buffer = buffer;
1705
+ if (node.playbackRate) {
1706
+ node.playbackRate.value = playbackRate;
1707
+ }
1695
1708
  node.connect(outputNode ?? context.destination);
1696
1709
  node.onended = () => {
1697
1710
  sourceNodes.delete(node);
@@ -1703,7 +1716,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
1703
1716
  maybeResolveInterrupt();
1704
1717
  };
1705
1718
  const startAt = Math.max(context.currentTime + lookaheadSeconds, queueEndTime);
1706
- queueEndTime = startAt + buffer.duration;
1719
+ queueEndTime = startAt + buffer.duration / playbackRate;
1707
1720
  sourceNodes.add(node);
1708
1721
  setState({
1709
1722
  activeSourceCount: sourceNodes.size,
@@ -1848,12 +1861,18 @@ var createVoiceAudioPlayer = (source, options = {}) => {
1848
1861
  isPlaying: false
1849
1862
  });
1850
1863
  },
1864
+ get playbackRate() {
1865
+ return playbackRate;
1866
+ },
1851
1867
  get processedChunkCount() {
1852
1868
  return state.processedChunkCount;
1853
1869
  },
1854
1870
  get queuedChunkCount() {
1855
1871
  return state.queuedChunkCount;
1856
1872
  },
1873
+ setPlaybackRate: (nextRate) => {
1874
+ playbackRate = clampPlaybackRate(nextRate);
1875
+ },
1857
1876
  setVolume: (nextVolume) => {
1858
1877
  volume = clampVolume(nextVolume);
1859
1878
  applyOutputGain(audioContext);
@@ -373,6 +373,9 @@ var createVoiceConnection = (path, options = {}) => {
373
373
  // src/client/audioPlayer.ts
374
374
  var DEFAULT_LOOKAHEAD_MS = 15;
375
375
  var DEFAULT_VOLUME = 1;
376
+ var DEFAULT_PLAYBACK_RATE = 1;
377
+ var MIN_PLAYBACK_RATE = 0.5;
378
+ var MAX_PLAYBACK_RATE = 2;
376
379
  var createInitialState = () => ({
377
380
  activeSourceCount: 0,
378
381
  error: null,
@@ -395,6 +398,12 @@ var clampVolume = (volume) => {
395
398
  }
396
399
  return Math.min(1, Math.max(0, volume));
397
400
  };
401
+ var clampPlaybackRate = (rate) => {
402
+ if (typeof rate !== "number" || !Number.isFinite(rate)) {
403
+ return DEFAULT_PLAYBACK_RATE;
404
+ }
405
+ return Math.min(MAX_PLAYBACK_RATE, Math.max(MIN_PLAYBACK_RATE, rate));
406
+ };
398
407
  var decodePCM16LEChunk = (audioContext, chunk) => {
399
408
  const { format } = chunk;
400
409
  if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
@@ -428,6 +437,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
428
437
  let audioContext = null;
429
438
  let outputNode = null;
430
439
  let volume = clampVolume(options.volume);
440
+ let playbackRate = clampPlaybackRate(options.playbackRate);
431
441
  let queueEndTime = 0;
432
442
  let syncPromise = Promise.resolve();
433
443
  let interruptStartedAt = null;
@@ -529,6 +539,9 @@ var createVoiceAudioPlayer = (source, options = {}) => {
529
539
  const buffer = decodePCM16LEChunk(context, chunk);
530
540
  const node = context.createBufferSource();
531
541
  node.buffer = buffer;
542
+ if (node.playbackRate) {
543
+ node.playbackRate.value = playbackRate;
544
+ }
532
545
  node.connect(outputNode ?? context.destination);
533
546
  node.onended = () => {
534
547
  sourceNodes.delete(node);
@@ -540,7 +553,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
540
553
  maybeResolveInterrupt();
541
554
  };
542
555
  const startAt = Math.max(context.currentTime + lookaheadSeconds, queueEndTime);
543
- queueEndTime = startAt + buffer.duration;
556
+ queueEndTime = startAt + buffer.duration / playbackRate;
544
557
  sourceNodes.add(node);
545
558
  setState({
546
559
  activeSourceCount: sourceNodes.size,
@@ -685,12 +698,18 @@ var createVoiceAudioPlayer = (source, options = {}) => {
685
698
  isPlaying: false
686
699
  });
687
700
  },
701
+ get playbackRate() {
702
+ return playbackRate;
703
+ },
688
704
  get processedChunkCount() {
689
705
  return state.processedChunkCount;
690
706
  },
691
707
  get queuedChunkCount() {
692
708
  return state.queuedChunkCount;
693
709
  },
710
+ setPlaybackRate: (nextRate) => {
711
+ playbackRate = clampPlaybackRate(nextRate);
712
+ },
694
713
  setVolume: (nextVolume) => {
695
714
  volume = clampVolume(nextVolume);
696
715
  applyOutputGain(audioContext);
@@ -1,4 +1,21 @@
1
- import type { TTSAdapter, TTSAdapterOpenOptions } from "./types";
1
+ import type { TTSAdapter, TTSAdapterOpenOptions, TTSAudioEvent } from "./types";
2
+ /**
3
+ * Optional persistent backing store for the cache — an L2 behind the in-memory
4
+ * LRU. Lets rendered audio survive process restarts/deploys so a fixed prompt
5
+ * (e.g. a greeting) is synthesized once *ever* per content key, not once per
6
+ * process. The store is content-addressed by the same `keyFor` key, so a
7
+ * changed prompt/voice/model naturally lands on a new key and re-renders.
8
+ *
9
+ * The store is told `TTSAudioEvent[]` and must return the same on read; how it
10
+ * serializes the binary `chunk`s (base64 in JSON, bytea, a file, etc.) is up to
11
+ * the implementation. `get` returns `null`/`undefined` on a miss. Both may be
12
+ * sync or async; errors should be swallowed by the implementation (a store
13
+ * failure must never break playback — the wrapper falls back to live render).
14
+ */
15
+ export type CachedTTSStore = {
16
+ get: (key: string) => Promise<TTSAudioEvent[] | null | undefined> | TTSAudioEvent[] | null | undefined;
17
+ set: (key: string, events: TTSAudioEvent[]) => Promise<void> | void;
18
+ };
2
19
  export type CachedTTSOptions = {
3
20
  /**
4
21
  * Return a stable cache key for an utterance whose synthesized audio should
@@ -13,8 +30,15 @@ export type CachedTTSOptions = {
13
30
  * (and re-caches) while the old entry is simply orphaned.
14
31
  */
15
32
  keyFor: (text: string, openOptions: TTSAdapterOpenOptions) => string | null | undefined;
16
- /** Max distinct utterances to retain (LRU by insertion). Default 32. */
33
+ /** Max distinct utterances to retain in memory (LRU by insertion). Default 32. */
17
34
  maxEntries?: number;
35
+ /**
36
+ * Optional persistent L2 store (see {@link CachedTTSStore}). When set, an
37
+ * in-memory miss consults the store before rendering; a store hit is replayed
38
+ * and promoted into memory, and a fresh render is written through to it. Omit
39
+ * for memory-only behaviour (unchanged).
40
+ */
41
+ store?: CachedTTSStore;
18
42
  };
19
43
  /**
20
44
  * Wrap a TTS adapter so selected utterances are synthesized once and replayed
@@ -1183,6 +1183,14 @@ export type VoiceAudioPlayerOptions = {
1183
1183
  autoStart?: boolean;
1184
1184
  createAudioContext?: () => AudioContext;
1185
1185
  lookaheadMs?: number;
1186
+ /**
1187
+ * Playback speed multiplier for the assistant's speech. 1 = normal. Clamped
1188
+ * to [0.5, 2]. Pitch shifts with the rate (Web Audio playbackRate), so keep
1189
+ * UI ranges modest (≈0.85–1.25) to stay natural. Can be changed live via
1190
+ * setPlaybackRate — already-scheduled chunks keep their rate; new chunks
1191
+ * adopt the new one.
1192
+ */
1193
+ playbackRate?: number;
1186
1194
  volume?: number;
1187
1195
  };
1188
1196
  export type VoiceDuplexControllerOptions = VoiceControllerOptions & {
@@ -1313,8 +1321,10 @@ export type VoiceAudioPlayer = {
1313
1321
  lastInterruptLatencyMs?: number;
1314
1322
  lastPlaybackStopLatencyMs?: number;
1315
1323
  pause: () => Promise<void>;
1324
+ playbackRate: number;
1316
1325
  processedChunkCount: number;
1317
1326
  queuedChunkCount: number;
1327
+ setPlaybackRate: (rate: number) => void;
1318
1328
  setVolume: (volume: number) => void;
1319
1329
  start: () => Promise<void>;
1320
1330
  subscribe: (subscriber: () => void) => () => void;
package/dist/index.d.ts CHANGED
@@ -223,7 +223,7 @@ export type { VoiceSimulationSuiteAssertionInput, VoiceSimulationSuiteAssertionR
223
223
  export type { VoiceWorkflowContract, VoiceWorkflowContractDefinition, VoiceWorkflowContractField, VoiceWorkflowContractFieldMatch, VoiceWorkflowContractPresetName, VoiceWorkflowContractPresetOptions, VoiceWorkflowContractTracePayload, VoiceWorkflowContractValidation, VoiceWorkflowContractValidationIssue, VoiceWorkflowOutcome, } from "./core/workflowContract";
224
224
  export type { VoiceSessionListHTMLHandlerOptions, VoiceSessionListItem, VoiceSessionListOptions, VoiceSessionListRoutesOptions, VoiceSessionListStatus, VoiceProviderFallbackRecoverySummary, VoiceSessionReplay, VoiceSessionReplayHTMLHandlerOptions, VoiceSessionReplayOptions, VoiceSessionReplayRoutesOptions, VoiceSessionReplayTurn, } from "./core/sessionReplay";
225
225
  export type { AnthropicVoiceAssistantModelOptions, GeminiVoiceAssistantModelOptions, OpenAIVoiceAssistantModelOptions, VoiceProviderRouterEvent, VoiceProviderRouterFallbackMode, VoiceProviderRouterHealthOptions, VoiceProviderRouterOptions, VoiceProviderOrchestrationProfile, VoiceProviderOrchestrationProfileOptions, VoiceProviderOrchestrationResolvedSurface, VoiceProviderOrchestrationSurface, VoiceProviderRouterPolicy, VoiceProviderRouterPolicyPreset, VoiceProviderRouterPolicyWeights, VoiceProviderRouterProviderHealth, VoiceProviderRouterProviderProfile, VoiceProviderRouterStrategy, VoiceJSONAssistantModelHandler, VoiceJSONAssistantModelOptions, } from "./core/modelAdapters";
226
- export type { CachedTTSOptions } from "./core/cachedTTS";
226
+ export type { CachedTTSOptions, CachedTTSStore } from "./core/cachedTTS";
227
227
  export type { OpenAIVoiceTTSOptions, OpenAIVoiceTTSVoice, } from "./core/openaiTTS";
228
228
  export type { VoiceProviderHealthStatus, VoiceProviderHealthSummary, VoiceProviderHealthSummaryOptions, } from "./core/providerHealth";
229
229
  export type { VoiceProviderCapabilityDefinition, VoiceProviderCapabilityHandlerOptions, VoiceProviderCapabilityHTMLHandlerOptions, VoiceProviderCapabilityKind, VoiceProviderCapabilityOptions, VoiceProviderCapabilityReport, VoiceProviderCapabilityRoutesOptions, VoiceProviderCapabilitySummary, } from "./core/providerCapabilities";
package/dist/index.js CHANGED
@@ -45566,6 +45566,7 @@ var createGeminiVoiceAssistantModel = (options) => {
45566
45566
  var DEFAULT_MAX_ENTRIES = 32;
45567
45567
  var createCachedTTS = (inner, options) => {
45568
45568
  const maxEntries = options.maxEntries ?? DEFAULT_MAX_ENTRIES;
45569
+ const { store } = options;
45569
45570
  const cache = new Map;
45570
45571
  const remember = (key, events) => {
45571
45572
  cache.delete(key);
@@ -45578,6 +45579,18 @@ var createCachedTTS = (inner, options) => {
45578
45579
  cache.delete(oldest);
45579
45580
  }
45580
45581
  };
45582
+ const loadFromStore = async (key) => {
45583
+ if (!store)
45584
+ return null;
45585
+ try {
45586
+ const events = await store.get(key);
45587
+ if (events && events.length > 0) {
45588
+ remember(key, events);
45589
+ return events;
45590
+ }
45591
+ } catch {}
45592
+ return null;
45593
+ };
45581
45594
  return {
45582
45595
  kind: "tts",
45583
45596
  open: async (openOptions) => {
@@ -45608,9 +45621,8 @@ var createCachedTTS = (inner, options) => {
45608
45621
  await session.send(text);
45609
45622
  return;
45610
45623
  }
45611
- const cached = cache.get(key);
45612
- if (cached) {
45613
- for (const event of cached) {
45624
+ const replayEvents = async (events) => {
45625
+ for (const event of events) {
45614
45626
  const replay = {
45615
45627
  ...event,
45616
45628
  receivedAt: Date.now()
@@ -45619,12 +45631,22 @@ var createCachedTTS = (inner, options) => {
45619
45631
  await Promise.resolve(handler(replay));
45620
45632
  }
45621
45633
  }
45634
+ };
45635
+ const cached = cache.get(key) ?? await loadFromStore(key);
45636
+ if (cached) {
45637
+ await replayEvents(cached);
45622
45638
  return;
45623
45639
  }
45624
45640
  capture = [];
45625
45641
  await session.send(text);
45626
- remember(key, capture);
45642
+ const rendered = capture;
45643
+ remember(key, rendered);
45627
45644
  capture = null;
45645
+ if (store) {
45646
+ try {
45647
+ await store.set(key, rendered);
45648
+ } catch {}
45649
+ }
45628
45650
  }
45629
45651
  };
45630
45652
  }
@@ -1580,6 +1580,9 @@ var buildSessionCorrectionAudit = (raw, generic, experimental, benchmarkSeeded,
1580
1580
  // src/client/audioPlayer.ts
1581
1581
  var DEFAULT_LOOKAHEAD_MS = 15;
1582
1582
  var DEFAULT_VOLUME = 1;
1583
+ var DEFAULT_PLAYBACK_RATE = 1;
1584
+ var MIN_PLAYBACK_RATE = 0.5;
1585
+ var MAX_PLAYBACK_RATE = 2;
1583
1586
  var createInitialState = () => ({
1584
1587
  activeSourceCount: 0,
1585
1588
  error: null,
@@ -1602,6 +1605,12 @@ var clampVolume = (volume) => {
1602
1605
  }
1603
1606
  return Math.min(1, Math.max(0, volume));
1604
1607
  };
1608
+ var clampPlaybackRate = (rate) => {
1609
+ if (typeof rate !== "number" || !Number.isFinite(rate)) {
1610
+ return DEFAULT_PLAYBACK_RATE;
1611
+ }
1612
+ return Math.min(MAX_PLAYBACK_RATE, Math.max(MIN_PLAYBACK_RATE, rate));
1613
+ };
1605
1614
  var decodePCM16LEChunk = (audioContext, chunk) => {
1606
1615
  const { format } = chunk;
1607
1616
  if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
@@ -1635,6 +1644,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
1635
1644
  let audioContext = null;
1636
1645
  let outputNode = null;
1637
1646
  let volume = clampVolume(options.volume);
1647
+ let playbackRate = clampPlaybackRate(options.playbackRate);
1638
1648
  let queueEndTime = 0;
1639
1649
  let syncPromise = Promise.resolve();
1640
1650
  let interruptStartedAt = null;
@@ -1736,6 +1746,9 @@ var createVoiceAudioPlayer = (source, options = {}) => {
1736
1746
  const buffer = decodePCM16LEChunk(context, chunk);
1737
1747
  const node = context.createBufferSource();
1738
1748
  node.buffer = buffer;
1749
+ if (node.playbackRate) {
1750
+ node.playbackRate.value = playbackRate;
1751
+ }
1739
1752
  node.connect(outputNode ?? context.destination);
1740
1753
  node.onended = () => {
1741
1754
  sourceNodes.delete(node);
@@ -1747,7 +1760,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
1747
1760
  maybeResolveInterrupt();
1748
1761
  };
1749
1762
  const startAt = Math.max(context.currentTime + lookaheadSeconds, queueEndTime);
1750
- queueEndTime = startAt + buffer.duration;
1763
+ queueEndTime = startAt + buffer.duration / playbackRate;
1751
1764
  sourceNodes.add(node);
1752
1765
  setState({
1753
1766
  activeSourceCount: sourceNodes.size,
@@ -1892,12 +1905,18 @@ var createVoiceAudioPlayer = (source, options = {}) => {
1892
1905
  isPlaying: false
1893
1906
  });
1894
1907
  },
1908
+ get playbackRate() {
1909
+ return playbackRate;
1910
+ },
1895
1911
  get processedChunkCount() {
1896
1912
  return state.processedChunkCount;
1897
1913
  },
1898
1914
  get queuedChunkCount() {
1899
1915
  return state.queuedChunkCount;
1900
1916
  },
1917
+ setPlaybackRate: (nextRate) => {
1918
+ playbackRate = clampPlaybackRate(nextRate);
1919
+ },
1901
1920
  setVolume: (nextVolume) => {
1902
1921
  volume = clampVolume(nextVolume);
1903
1922
  applyOutputGain(audioContext);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.574",
3
+ "version": "0.0.22-beta.576",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",