@absolutejs/voice 0.0.22-beta.573 → 0.0.22-beta.575
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client/audioPlayer.d.ts +3 -0
- package/dist/client/htmxBootstrap.js +20 -1
- package/dist/client/index.js +20 -1
- package/dist/core/types.d.ts +10 -0
- package/dist/index.js +25 -0
- package/dist/testing/index.js +45 -1
- package/package.json +1 -1
|
@@ -1536,6 +1536,9 @@ var createVoiceController = (path, options = {}) => {
|
|
|
1536
1536
|
// src/client/audioPlayer.ts
|
|
1537
1537
|
var DEFAULT_LOOKAHEAD_MS = 15;
|
|
1538
1538
|
var DEFAULT_VOLUME = 1;
|
|
1539
|
+
var DEFAULT_PLAYBACK_RATE = 1;
|
|
1540
|
+
var MIN_PLAYBACK_RATE = 0.5;
|
|
1541
|
+
var MAX_PLAYBACK_RATE = 2;
|
|
1539
1542
|
var createInitialState3 = () => ({
|
|
1540
1543
|
activeSourceCount: 0,
|
|
1541
1544
|
error: null,
|
|
@@ -1558,6 +1561,12 @@ var clampVolume = (volume) => {
|
|
|
1558
1561
|
}
|
|
1559
1562
|
return Math.min(1, Math.max(0, volume));
|
|
1560
1563
|
};
|
|
1564
|
+
var clampPlaybackRate = (rate) => {
|
|
1565
|
+
if (typeof rate !== "number" || !Number.isFinite(rate)) {
|
|
1566
|
+
return DEFAULT_PLAYBACK_RATE;
|
|
1567
|
+
}
|
|
1568
|
+
return Math.min(MAX_PLAYBACK_RATE, Math.max(MIN_PLAYBACK_RATE, rate));
|
|
1569
|
+
};
|
|
1561
1570
|
var decodePCM16LEChunk = (audioContext, chunk) => {
|
|
1562
1571
|
const { format } = chunk;
|
|
1563
1572
|
if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
|
|
@@ -1591,6 +1600,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
|
|
|
1591
1600
|
let audioContext = null;
|
|
1592
1601
|
let outputNode = null;
|
|
1593
1602
|
let volume = clampVolume(options.volume);
|
|
1603
|
+
let playbackRate = clampPlaybackRate(options.playbackRate);
|
|
1594
1604
|
let queueEndTime = 0;
|
|
1595
1605
|
let syncPromise = Promise.resolve();
|
|
1596
1606
|
let interruptStartedAt = null;
|
|
@@ -1692,6 +1702,9 @@ var createVoiceAudioPlayer = (source, options = {}) => {
|
|
|
1692
1702
|
const buffer = decodePCM16LEChunk(context, chunk);
|
|
1693
1703
|
const node = context.createBufferSource();
|
|
1694
1704
|
node.buffer = buffer;
|
|
1705
|
+
if (node.playbackRate) {
|
|
1706
|
+
node.playbackRate.value = playbackRate;
|
|
1707
|
+
}
|
|
1695
1708
|
node.connect(outputNode ?? context.destination);
|
|
1696
1709
|
node.onended = () => {
|
|
1697
1710
|
sourceNodes.delete(node);
|
|
@@ -1703,7 +1716,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
|
|
|
1703
1716
|
maybeResolveInterrupt();
|
|
1704
1717
|
};
|
|
1705
1718
|
const startAt = Math.max(context.currentTime + lookaheadSeconds, queueEndTime);
|
|
1706
|
-
queueEndTime = startAt + buffer.duration;
|
|
1719
|
+
queueEndTime = startAt + buffer.duration / playbackRate;
|
|
1707
1720
|
sourceNodes.add(node);
|
|
1708
1721
|
setState({
|
|
1709
1722
|
activeSourceCount: sourceNodes.size,
|
|
@@ -1848,12 +1861,18 @@ var createVoiceAudioPlayer = (source, options = {}) => {
|
|
|
1848
1861
|
isPlaying: false
|
|
1849
1862
|
});
|
|
1850
1863
|
},
|
|
1864
|
+
get playbackRate() {
|
|
1865
|
+
return playbackRate;
|
|
1866
|
+
},
|
|
1851
1867
|
get processedChunkCount() {
|
|
1852
1868
|
return state.processedChunkCount;
|
|
1853
1869
|
},
|
|
1854
1870
|
get queuedChunkCount() {
|
|
1855
1871
|
return state.queuedChunkCount;
|
|
1856
1872
|
},
|
|
1873
|
+
setPlaybackRate: (nextRate) => {
|
|
1874
|
+
playbackRate = clampPlaybackRate(nextRate);
|
|
1875
|
+
},
|
|
1857
1876
|
setVolume: (nextVolume) => {
|
|
1858
1877
|
volume = clampVolume(nextVolume);
|
|
1859
1878
|
applyOutputGain(audioContext);
|
package/dist/client/index.js
CHANGED
|
@@ -373,6 +373,9 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
373
373
|
// src/client/audioPlayer.ts
|
|
374
374
|
var DEFAULT_LOOKAHEAD_MS = 15;
|
|
375
375
|
var DEFAULT_VOLUME = 1;
|
|
376
|
+
var DEFAULT_PLAYBACK_RATE = 1;
|
|
377
|
+
var MIN_PLAYBACK_RATE = 0.5;
|
|
378
|
+
var MAX_PLAYBACK_RATE = 2;
|
|
376
379
|
var createInitialState = () => ({
|
|
377
380
|
activeSourceCount: 0,
|
|
378
381
|
error: null,
|
|
@@ -395,6 +398,12 @@ var clampVolume = (volume) => {
|
|
|
395
398
|
}
|
|
396
399
|
return Math.min(1, Math.max(0, volume));
|
|
397
400
|
};
|
|
401
|
+
var clampPlaybackRate = (rate) => {
|
|
402
|
+
if (typeof rate !== "number" || !Number.isFinite(rate)) {
|
|
403
|
+
return DEFAULT_PLAYBACK_RATE;
|
|
404
|
+
}
|
|
405
|
+
return Math.min(MAX_PLAYBACK_RATE, Math.max(MIN_PLAYBACK_RATE, rate));
|
|
406
|
+
};
|
|
398
407
|
var decodePCM16LEChunk = (audioContext, chunk) => {
|
|
399
408
|
const { format } = chunk;
|
|
400
409
|
if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
|
|
@@ -428,6 +437,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
|
|
|
428
437
|
let audioContext = null;
|
|
429
438
|
let outputNode = null;
|
|
430
439
|
let volume = clampVolume(options.volume);
|
|
440
|
+
let playbackRate = clampPlaybackRate(options.playbackRate);
|
|
431
441
|
let queueEndTime = 0;
|
|
432
442
|
let syncPromise = Promise.resolve();
|
|
433
443
|
let interruptStartedAt = null;
|
|
@@ -529,6 +539,9 @@ var createVoiceAudioPlayer = (source, options = {}) => {
|
|
|
529
539
|
const buffer = decodePCM16LEChunk(context, chunk);
|
|
530
540
|
const node = context.createBufferSource();
|
|
531
541
|
node.buffer = buffer;
|
|
542
|
+
if (node.playbackRate) {
|
|
543
|
+
node.playbackRate.value = playbackRate;
|
|
544
|
+
}
|
|
532
545
|
node.connect(outputNode ?? context.destination);
|
|
533
546
|
node.onended = () => {
|
|
534
547
|
sourceNodes.delete(node);
|
|
@@ -540,7 +553,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
|
|
|
540
553
|
maybeResolveInterrupt();
|
|
541
554
|
};
|
|
542
555
|
const startAt = Math.max(context.currentTime + lookaheadSeconds, queueEndTime);
|
|
543
|
-
queueEndTime = startAt + buffer.duration;
|
|
556
|
+
queueEndTime = startAt + buffer.duration / playbackRate;
|
|
544
557
|
sourceNodes.add(node);
|
|
545
558
|
setState({
|
|
546
559
|
activeSourceCount: sourceNodes.size,
|
|
@@ -685,12 +698,18 @@ var createVoiceAudioPlayer = (source, options = {}) => {
|
|
|
685
698
|
isPlaying: false
|
|
686
699
|
});
|
|
687
700
|
},
|
|
701
|
+
get playbackRate() {
|
|
702
|
+
return playbackRate;
|
|
703
|
+
},
|
|
688
704
|
get processedChunkCount() {
|
|
689
705
|
return state.processedChunkCount;
|
|
690
706
|
},
|
|
691
707
|
get queuedChunkCount() {
|
|
692
708
|
return state.queuedChunkCount;
|
|
693
709
|
},
|
|
710
|
+
setPlaybackRate: (nextRate) => {
|
|
711
|
+
playbackRate = clampPlaybackRate(nextRate);
|
|
712
|
+
},
|
|
694
713
|
setVolume: (nextVolume) => {
|
|
695
714
|
volume = clampVolume(nextVolume);
|
|
696
715
|
applyOutputGain(audioContext);
|
package/dist/core/types.d.ts
CHANGED
|
@@ -1183,6 +1183,14 @@ export type VoiceAudioPlayerOptions = {
|
|
|
1183
1183
|
autoStart?: boolean;
|
|
1184
1184
|
createAudioContext?: () => AudioContext;
|
|
1185
1185
|
lookaheadMs?: number;
|
|
1186
|
+
/**
|
|
1187
|
+
* Playback speed multiplier for the assistant's speech. 1 = normal. Clamped
|
|
1188
|
+
* to [0.5, 2]. Pitch shifts with the rate (Web Audio playbackRate), so keep
|
|
1189
|
+
* UI ranges modest (≈0.85–1.25) to stay natural. Can be changed live via
|
|
1190
|
+
* setPlaybackRate — already-scheduled chunks keep their rate; new chunks
|
|
1191
|
+
* adopt the new one.
|
|
1192
|
+
*/
|
|
1193
|
+
playbackRate?: number;
|
|
1186
1194
|
volume?: number;
|
|
1187
1195
|
};
|
|
1188
1196
|
export type VoiceDuplexControllerOptions = VoiceControllerOptions & {
|
|
@@ -1313,8 +1321,10 @@ export type VoiceAudioPlayer = {
|
|
|
1313
1321
|
lastInterruptLatencyMs?: number;
|
|
1314
1322
|
lastPlaybackStopLatencyMs?: number;
|
|
1315
1323
|
pause: () => Promise<void>;
|
|
1324
|
+
playbackRate: number;
|
|
1316
1325
|
processedChunkCount: number;
|
|
1317
1326
|
queuedChunkCount: number;
|
|
1327
|
+
setPlaybackRate: (rate: number) => void;
|
|
1318
1328
|
setVolume: (volume: number) => void;
|
|
1319
1329
|
start: () => Promise<void>;
|
|
1320
1330
|
subscribe: (subscriber: () => void) => () => void;
|
package/dist/index.js
CHANGED
|
@@ -3870,6 +3870,7 @@ var createVoiceSession = (options) => {
|
|
|
3870
3870
|
let adapterGenerationCounter = 0;
|
|
3871
3871
|
let activeAdapterGeneration = 0;
|
|
3872
3872
|
let activeTTSTurnId;
|
|
3873
|
+
let assistantSpeechEndsAt = 0;
|
|
3873
3874
|
let fillerTimer = null;
|
|
3874
3875
|
let fillerActive = false;
|
|
3875
3876
|
let fillerToken = 0;
|
|
@@ -4262,6 +4263,7 @@ var createVoiceSession = (options) => {
|
|
|
4262
4263
|
return;
|
|
4263
4264
|
}
|
|
4264
4265
|
activeTTSTurnId = undefined;
|
|
4266
|
+
assistantSpeechEndsAt = Date.now();
|
|
4265
4267
|
appendTurnLatencyStage({
|
|
4266
4268
|
metadata: { reason },
|
|
4267
4269
|
stage: "tts_canceled",
|
|
@@ -4304,6 +4306,12 @@ var createVoiceSession = (options) => {
|
|
|
4304
4306
|
turnId: activeTTSTurnId,
|
|
4305
4307
|
type: "audio"
|
|
4306
4308
|
});
|
|
4309
|
+
const bytesPerSample = input.format.encoding === "pcm_s16le" ? 2 : 1;
|
|
4310
|
+
const bytesPerSecond = input.format.sampleRateHz * input.format.channels * bytesPerSample;
|
|
4311
|
+
if (bytesPerSecond > 0) {
|
|
4312
|
+
const chunkMs = normalizedChunk.byteLength / bytesPerSecond * 1000;
|
|
4313
|
+
assistantSpeechEndsAt = Math.max(assistantSpeechEndsAt, Date.now()) + chunkMs;
|
|
4314
|
+
}
|
|
4307
4315
|
if (activeTTSTurnId) {
|
|
4308
4316
|
await appendTurnLatencyStage({
|
|
4309
4317
|
at: input.receivedAt,
|
|
@@ -4413,6 +4421,20 @@ var createVoiceSession = (options) => {
|
|
|
4413
4421
|
session
|
|
4414
4422
|
});
|
|
4415
4423
|
};
|
|
4424
|
+
const DRAIN_POLL_MS = 200;
|
|
4425
|
+
const DRAIN_TAIL_BUFFER_MS = 300;
|
|
4426
|
+
const DRAIN_MAX_MS = 12000;
|
|
4427
|
+
const drainAssistantSpeech = async () => {
|
|
4428
|
+
const startedAt = Date.now();
|
|
4429
|
+
while (Date.now() - startedAt < DRAIN_MAX_MS) {
|
|
4430
|
+
const remaining = assistantSpeechEndsAt + DRAIN_TAIL_BUFFER_MS - Date.now();
|
|
4431
|
+
if (remaining <= 0)
|
|
4432
|
+
return;
|
|
4433
|
+
await new Promise((resolve) => {
|
|
4434
|
+
setTimeout(resolve, Math.min(remaining, DRAIN_POLL_MS));
|
|
4435
|
+
});
|
|
4436
|
+
}
|
|
4437
|
+
};
|
|
4416
4438
|
const completeInternal = async (result, input = {}) => {
|
|
4417
4439
|
clearSilenceTimer();
|
|
4418
4440
|
const disposition = input.disposition ?? "completed";
|
|
@@ -4446,6 +4468,9 @@ var createVoiceSession = (options) => {
|
|
|
4446
4468
|
if (!didComplete) {
|
|
4447
4469
|
return;
|
|
4448
4470
|
}
|
|
4471
|
+
if (disposition === "completed") {
|
|
4472
|
+
await drainAssistantSpeech();
|
|
4473
|
+
}
|
|
4449
4474
|
await appendTrace({
|
|
4450
4475
|
payload: {
|
|
4451
4476
|
disposition,
|
package/dist/testing/index.js
CHANGED
|
@@ -1580,6 +1580,9 @@ var buildSessionCorrectionAudit = (raw, generic, experimental, benchmarkSeeded,
|
|
|
1580
1580
|
// src/client/audioPlayer.ts
|
|
1581
1581
|
var DEFAULT_LOOKAHEAD_MS = 15;
|
|
1582
1582
|
var DEFAULT_VOLUME = 1;
|
|
1583
|
+
var DEFAULT_PLAYBACK_RATE = 1;
|
|
1584
|
+
var MIN_PLAYBACK_RATE = 0.5;
|
|
1585
|
+
var MAX_PLAYBACK_RATE = 2;
|
|
1583
1586
|
var createInitialState = () => ({
|
|
1584
1587
|
activeSourceCount: 0,
|
|
1585
1588
|
error: null,
|
|
@@ -1602,6 +1605,12 @@ var clampVolume = (volume) => {
|
|
|
1602
1605
|
}
|
|
1603
1606
|
return Math.min(1, Math.max(0, volume));
|
|
1604
1607
|
};
|
|
1608
|
+
var clampPlaybackRate = (rate) => {
|
|
1609
|
+
if (typeof rate !== "number" || !Number.isFinite(rate)) {
|
|
1610
|
+
return DEFAULT_PLAYBACK_RATE;
|
|
1611
|
+
}
|
|
1612
|
+
return Math.min(MAX_PLAYBACK_RATE, Math.max(MIN_PLAYBACK_RATE, rate));
|
|
1613
|
+
};
|
|
1605
1614
|
var decodePCM16LEChunk = (audioContext, chunk) => {
|
|
1606
1615
|
const { format } = chunk;
|
|
1607
1616
|
if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
|
|
@@ -1635,6 +1644,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
|
|
|
1635
1644
|
let audioContext = null;
|
|
1636
1645
|
let outputNode = null;
|
|
1637
1646
|
let volume = clampVolume(options.volume);
|
|
1647
|
+
let playbackRate = clampPlaybackRate(options.playbackRate);
|
|
1638
1648
|
let queueEndTime = 0;
|
|
1639
1649
|
let syncPromise = Promise.resolve();
|
|
1640
1650
|
let interruptStartedAt = null;
|
|
@@ -1736,6 +1746,9 @@ var createVoiceAudioPlayer = (source, options = {}) => {
|
|
|
1736
1746
|
const buffer = decodePCM16LEChunk(context, chunk);
|
|
1737
1747
|
const node = context.createBufferSource();
|
|
1738
1748
|
node.buffer = buffer;
|
|
1749
|
+
if (node.playbackRate) {
|
|
1750
|
+
node.playbackRate.value = playbackRate;
|
|
1751
|
+
}
|
|
1739
1752
|
node.connect(outputNode ?? context.destination);
|
|
1740
1753
|
node.onended = () => {
|
|
1741
1754
|
sourceNodes.delete(node);
|
|
@@ -1747,7 +1760,7 @@ var createVoiceAudioPlayer = (source, options = {}) => {
|
|
|
1747
1760
|
maybeResolveInterrupt();
|
|
1748
1761
|
};
|
|
1749
1762
|
const startAt = Math.max(context.currentTime + lookaheadSeconds, queueEndTime);
|
|
1750
|
-
queueEndTime = startAt + buffer.duration;
|
|
1763
|
+
queueEndTime = startAt + buffer.duration / playbackRate;
|
|
1751
1764
|
sourceNodes.add(node);
|
|
1752
1765
|
setState({
|
|
1753
1766
|
activeSourceCount: sourceNodes.size,
|
|
@@ -1892,12 +1905,18 @@ var createVoiceAudioPlayer = (source, options = {}) => {
|
|
|
1892
1905
|
isPlaying: false
|
|
1893
1906
|
});
|
|
1894
1907
|
},
|
|
1908
|
+
get playbackRate() {
|
|
1909
|
+
return playbackRate;
|
|
1910
|
+
},
|
|
1895
1911
|
get processedChunkCount() {
|
|
1896
1912
|
return state.processedChunkCount;
|
|
1897
1913
|
},
|
|
1898
1914
|
get queuedChunkCount() {
|
|
1899
1915
|
return state.queuedChunkCount;
|
|
1900
1916
|
},
|
|
1917
|
+
setPlaybackRate: (nextRate) => {
|
|
1918
|
+
playbackRate = clampPlaybackRate(nextRate);
|
|
1919
|
+
},
|
|
1901
1920
|
setVolume: (nextVolume) => {
|
|
1902
1921
|
volume = clampVolume(nextVolume);
|
|
1903
1922
|
applyOutputGain(audioContext);
|
|
@@ -5786,6 +5805,7 @@ var createVoiceSession = (options) => {
|
|
|
5786
5805
|
let adapterGenerationCounter = 0;
|
|
5787
5806
|
let activeAdapterGeneration = 0;
|
|
5788
5807
|
let activeTTSTurnId;
|
|
5808
|
+
let assistantSpeechEndsAt = 0;
|
|
5789
5809
|
let fillerTimer = null;
|
|
5790
5810
|
let fillerActive = false;
|
|
5791
5811
|
let fillerToken = 0;
|
|
@@ -6178,6 +6198,7 @@ var createVoiceSession = (options) => {
|
|
|
6178
6198
|
return;
|
|
6179
6199
|
}
|
|
6180
6200
|
activeTTSTurnId = undefined;
|
|
6201
|
+
assistantSpeechEndsAt = Date.now();
|
|
6181
6202
|
appendTurnLatencyStage({
|
|
6182
6203
|
metadata: { reason },
|
|
6183
6204
|
stage: "tts_canceled",
|
|
@@ -6220,6 +6241,12 @@ var createVoiceSession = (options) => {
|
|
|
6220
6241
|
turnId: activeTTSTurnId,
|
|
6221
6242
|
type: "audio"
|
|
6222
6243
|
});
|
|
6244
|
+
const bytesPerSample = input.format.encoding === "pcm_s16le" ? 2 : 1;
|
|
6245
|
+
const bytesPerSecond = input.format.sampleRateHz * input.format.channels * bytesPerSample;
|
|
6246
|
+
if (bytesPerSecond > 0) {
|
|
6247
|
+
const chunkMs = normalizedChunk.byteLength / bytesPerSecond * 1000;
|
|
6248
|
+
assistantSpeechEndsAt = Math.max(assistantSpeechEndsAt, Date.now()) + chunkMs;
|
|
6249
|
+
}
|
|
6223
6250
|
if (activeTTSTurnId) {
|
|
6224
6251
|
await appendTurnLatencyStage({
|
|
6225
6252
|
at: input.receivedAt,
|
|
@@ -6329,6 +6356,20 @@ var createVoiceSession = (options) => {
|
|
|
6329
6356
|
session
|
|
6330
6357
|
});
|
|
6331
6358
|
};
|
|
6359
|
+
const DRAIN_POLL_MS = 200;
|
|
6360
|
+
const DRAIN_TAIL_BUFFER_MS = 300;
|
|
6361
|
+
const DRAIN_MAX_MS = 12000;
|
|
6362
|
+
const drainAssistantSpeech = async () => {
|
|
6363
|
+
const startedAt = Date.now();
|
|
6364
|
+
while (Date.now() - startedAt < DRAIN_MAX_MS) {
|
|
6365
|
+
const remaining = assistantSpeechEndsAt + DRAIN_TAIL_BUFFER_MS - Date.now();
|
|
6366
|
+
if (remaining <= 0)
|
|
6367
|
+
return;
|
|
6368
|
+
await new Promise((resolve2) => {
|
|
6369
|
+
setTimeout(resolve2, Math.min(remaining, DRAIN_POLL_MS));
|
|
6370
|
+
});
|
|
6371
|
+
}
|
|
6372
|
+
};
|
|
6332
6373
|
const completeInternal = async (result, input = {}) => {
|
|
6333
6374
|
clearSilenceTimer();
|
|
6334
6375
|
const disposition = input.disposition ?? "completed";
|
|
@@ -6362,6 +6403,9 @@ var createVoiceSession = (options) => {
|
|
|
6362
6403
|
if (!didComplete) {
|
|
6363
6404
|
return;
|
|
6364
6405
|
}
|
|
6406
|
+
if (disposition === "completed") {
|
|
6407
|
+
await drainAssistantSpeech();
|
|
6408
|
+
}
|
|
6365
6409
|
await appendTrace({
|
|
6366
6410
|
payload: {
|
|
6367
6411
|
disposition,
|