@absolutejs/voice 0.0.22-beta.93 → 0.0.22-beta.94

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1110,6 +1110,352 @@ var createVoiceController = (path, options = {}) => {
1110
1110
  };
1111
1111
  };
1112
1112
 
1113
+ // src/client/audioPlayer.ts
1114
+ var DEFAULT_LOOKAHEAD_MS = 15;
1115
+ var createInitialState3 = () => ({
1116
+ activeSourceCount: 0,
1117
+ error: null,
1118
+ isActive: false,
1119
+ isPlaying: false,
1120
+ lastInterruptLatencyMs: undefined,
1121
+ lastPlaybackStopLatencyMs: undefined,
1122
+ processedChunkCount: 0,
1123
+ queuedChunkCount: 0
1124
+ });
1125
+ var getAudioContextCtor = () => {
1126
+ if (typeof window === "undefined") {
1127
+ return typeof AudioContext === "undefined" ? undefined : AudioContext;
1128
+ }
1129
+ return window.AudioContext ?? window.webkitAudioContext;
1130
+ };
1131
+ var decodePCM16LEChunk = (audioContext, chunk) => {
1132
+ const format = chunk.format;
1133
+ if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
1134
+ throw new Error(`Unsupported assistant audio format: ${format.container}/${format.encoding}`);
1135
+ }
1136
+ const bytes = chunk.chunk;
1137
+ const channels = Math.max(1, format.channels);
1138
+ const sampleCount = Math.floor(bytes.byteLength / 2);
1139
+ const frameCount = Math.max(1, Math.floor(sampleCount / channels));
1140
+ const audioBuffer = audioContext.createBuffer(channels, frameCount, format.sampleRateHz);
1141
+ const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
1142
+ for (let channelIndex = 0;channelIndex < channels; channelIndex += 1) {
1143
+ const channelData = audioBuffer.getChannelData(channelIndex);
1144
+ for (let frameIndex = 0;frameIndex < frameCount; frameIndex += 1) {
1145
+ const sampleIndex = frameIndex * channels + channelIndex;
1146
+ const sampleOffset = sampleIndex * 2;
1147
+ if (sampleOffset + 1 >= bytes.byteLength) {
1148
+ channelData[frameIndex] = 0;
1149
+ continue;
1150
+ }
1151
+ channelData[frameIndex] = view.getInt16(sampleOffset, true) / 32768;
1152
+ }
1153
+ }
1154
+ return audioBuffer;
1155
+ };
1156
+ var createVoiceAudioPlayer = (source, options = {}) => {
1157
+ const subscribers = new Set;
1158
+ const sourceNodes = new Set;
1159
+ const lookaheadSeconds = (options.lookaheadMs ?? DEFAULT_LOOKAHEAD_MS) / 1000;
1160
+ let state = createInitialState3();
1161
+ let audioContext = null;
1162
+ let outputNode = null;
1163
+ let queueEndTime = 0;
1164
+ let syncPromise = Promise.resolve();
1165
+ let interruptStartedAt = null;
1166
+ let interruptPromise = null;
1167
+ let resolveInterruptPromise = null;
1168
+ let interruptFallbackTimer = null;
1169
+ const notify = () => {
1170
+ for (const subscriber of subscribers) {
1171
+ subscriber();
1172
+ }
1173
+ };
1174
+ const setState = (next) => {
1175
+ state = {
1176
+ ...state,
1177
+ ...next
1178
+ };
1179
+ notify();
1180
+ };
1181
+ const clearError = () => {
1182
+ if (state.error !== null) {
1183
+ setState({ error: null });
1184
+ }
1185
+ };
1186
+ const clearInterruptTimer = () => {
1187
+ if (interruptFallbackTimer !== null) {
1188
+ clearTimeout(interruptFallbackTimer);
1189
+ interruptFallbackTimer = null;
1190
+ }
1191
+ };
1192
+ const resolveInterrupt = (latencyMs) => {
1193
+ clearInterruptTimer();
1194
+ interruptStartedAt = null;
1195
+ setState({
1196
+ activeSourceCount: sourceNodes.size,
1197
+ isPlaying: false,
1198
+ lastInterruptLatencyMs: latencyMs,
1199
+ lastPlaybackStopLatencyMs: state.lastPlaybackStopLatencyMs ?? latencyMs
1200
+ });
1201
+ resolveInterruptPromise?.();
1202
+ resolveInterruptPromise = null;
1203
+ interruptPromise = null;
1204
+ };
1205
+ const estimateOutputStopLatencyMs = (context) => {
1206
+ if (!context) {
1207
+ return 0;
1208
+ }
1209
+ return Math.max(0, ((context.baseLatency ?? 0) + (context.outputLatency ?? 0)) * 1000);
1210
+ };
1211
+ const restoreOutputGain = (context) => {
1212
+ if (!outputNode) {
1213
+ return;
1214
+ }
1215
+ const gainValue = 1;
1216
+ if (outputNode.gain.setValueAtTime) {
1217
+ outputNode.gain.setValueAtTime(gainValue, context?.currentTime ?? 0);
1218
+ return;
1219
+ }
1220
+ outputNode.gain.value = gainValue;
1221
+ };
1222
+ const muteOutputGain = (context) => {
1223
+ if (!outputNode) {
1224
+ return;
1225
+ }
1226
+ const gainValue = 0;
1227
+ if (outputNode.gain.setValueAtTime) {
1228
+ outputNode.gain.setValueAtTime(gainValue, context?.currentTime ?? 0);
1229
+ return;
1230
+ }
1231
+ outputNode.gain.value = gainValue;
1232
+ };
1233
+ const maybeResolveInterrupt = () => {
1234
+ if (interruptStartedAt === null || sourceNodes.size > 0) {
1235
+ return;
1236
+ }
1237
+ resolveInterrupt(Date.now() - interruptStartedAt);
1238
+ };
1239
+ const ensureAudioContext = async () => {
1240
+ if (audioContext) {
1241
+ return audioContext;
1242
+ }
1243
+ if (options.createAudioContext) {
1244
+ audioContext = options.createAudioContext();
1245
+ } else {
1246
+ const AudioContextCtor = getAudioContextCtor();
1247
+ if (!AudioContextCtor) {
1248
+ throw new Error("Assistant audio playback requires AudioContext support.");
1249
+ }
1250
+ audioContext = new AudioContextCtor;
1251
+ }
1252
+ if (audioContext.createGain) {
1253
+ outputNode = audioContext.createGain();
1254
+ outputNode.connect?.(audioContext.destination);
1255
+ }
1256
+ queueEndTime = audioContext.currentTime;
1257
+ return audioContext;
1258
+ };
1259
+ const scheduleChunk = async (chunk) => {
1260
+ const context = await ensureAudioContext();
1261
+ const buffer = decodePCM16LEChunk(context, chunk);
1262
+ const node = context.createBufferSource();
1263
+ node.buffer = buffer;
1264
+ node.connect(outputNode ?? context.destination);
1265
+ node.onended = () => {
1266
+ sourceNodes.delete(node);
1267
+ node.disconnect?.();
1268
+ setState({
1269
+ activeSourceCount: sourceNodes.size,
1270
+ isPlaying: sourceNodes.size > 0 && state.isActive
1271
+ });
1272
+ maybeResolveInterrupt();
1273
+ };
1274
+ const startAt = Math.max(context.currentTime + lookaheadSeconds, queueEndTime);
1275
+ queueEndTime = startAt + buffer.duration;
1276
+ sourceNodes.add(node);
1277
+ setState({
1278
+ activeSourceCount: sourceNodes.size,
1279
+ isPlaying: true
1280
+ });
1281
+ node.start(startAt);
1282
+ };
1283
+ const stopQueuedPlayback = (options2) => {
1284
+ for (const node of [...sourceNodes]) {
1285
+ node.stop?.();
1286
+ }
1287
+ queueEndTime = audioContext ? audioContext.currentTime : 0;
1288
+ if (options2?.forceClear) {
1289
+ for (const node of sourceNodes) {
1290
+ node.disconnect?.();
1291
+ }
1292
+ sourceNodes.clear();
1293
+ maybeResolveInterrupt();
1294
+ }
1295
+ };
1296
+ const sync = async () => {
1297
+ if (!state.isActive) {
1298
+ return;
1299
+ }
1300
+ const nextChunks = source.assistantAudio.slice(state.processedChunkCount);
1301
+ if (nextChunks.length === 0) {
1302
+ return;
1303
+ }
1304
+ try {
1305
+ clearError();
1306
+ for (const chunk of nextChunks) {
1307
+ await scheduleChunk(chunk);
1308
+ }
1309
+ setState({
1310
+ processedChunkCount: source.assistantAudio.length,
1311
+ queuedChunkCount: state.queuedChunkCount + nextChunks.length
1312
+ });
1313
+ } catch (error) {
1314
+ setState({
1315
+ error: error instanceof Error ? error.message : String(error)
1316
+ });
1317
+ }
1318
+ };
1319
+ const queueSync = () => {
1320
+ syncPromise = syncPromise.then(() => sync(), () => sync());
1321
+ return syncPromise;
1322
+ };
1323
+ const unsubscribeSource = source.subscribe(() => {
1324
+ if (options.autoStart && !state.isActive && source.assistantAudio.length > 0) {
1325
+ player.start();
1326
+ return;
1327
+ }
1328
+ if (state.isActive) {
1329
+ queueSync();
1330
+ }
1331
+ });
1332
+ const player = {
1333
+ close: async () => {
1334
+ unsubscribeSource();
1335
+ stopQueuedPlayback({ forceClear: true });
1336
+ clearInterruptTimer();
1337
+ resolveInterruptPromise?.();
1338
+ resolveInterruptPromise = null;
1339
+ interruptPromise = null;
1340
+ interruptStartedAt = null;
1341
+ if (audioContext && audioContext.state !== "closed") {
1342
+ await audioContext.close();
1343
+ }
1344
+ audioContext = null;
1345
+ outputNode?.disconnect?.();
1346
+ outputNode = null;
1347
+ queueEndTime = 0;
1348
+ setState({
1349
+ activeSourceCount: 0,
1350
+ isActive: false,
1351
+ isPlaying: false
1352
+ });
1353
+ },
1354
+ get activeSourceCount() {
1355
+ return state.activeSourceCount;
1356
+ },
1357
+ get error() {
1358
+ return state.error;
1359
+ },
1360
+ getSnapshot: () => state,
1361
+ get isActive() {
1362
+ return state.isActive;
1363
+ },
1364
+ get isPlaying() {
1365
+ return state.isPlaying;
1366
+ },
1367
+ interrupt: async () => {
1368
+ const startedAt = Date.now();
1369
+ const context = await ensureAudioContext();
1370
+ interruptStartedAt = startedAt;
1371
+ muteOutputGain(context);
1372
+ const playbackStopLatencyMs = Date.now() - startedAt + estimateOutputStopLatencyMs(context);
1373
+ setState({
1374
+ isActive: false,
1375
+ isPlaying: sourceNodes.size > 0,
1376
+ lastPlaybackStopLatencyMs: playbackStopLatencyMs
1377
+ });
1378
+ if (sourceNodes.size === 0) {
1379
+ resolveInterrupt(playbackStopLatencyMs);
1380
+ return;
1381
+ }
1382
+ if (!interruptPromise) {
1383
+ interruptPromise = new Promise((resolve) => {
1384
+ resolveInterruptPromise = resolve;
1385
+ });
1386
+ }
1387
+ clearInterruptTimer();
1388
+ interruptFallbackTimer = setTimeout(() => {
1389
+ for (const node of sourceNodes) {
1390
+ node.disconnect?.();
1391
+ }
1392
+ sourceNodes.clear();
1393
+ resolveInterrupt(Date.now() - startedAt);
1394
+ }, 250);
1395
+ stopQueuedPlayback();
1396
+ await interruptPromise;
1397
+ },
1398
+ get lastInterruptLatencyMs() {
1399
+ return state.lastInterruptLatencyMs;
1400
+ },
1401
+ get lastPlaybackStopLatencyMs() {
1402
+ return state.lastPlaybackStopLatencyMs;
1403
+ },
1404
+ pause: async () => {
1405
+ if (!audioContext) {
1406
+ setState({
1407
+ activeSourceCount: 0,
1408
+ isActive: false,
1409
+ isPlaying: false
1410
+ });
1411
+ return;
1412
+ }
1413
+ await audioContext.suspend();
1414
+ setState({
1415
+ activeSourceCount: sourceNodes.size,
1416
+ isActive: false,
1417
+ isPlaying: false
1418
+ });
1419
+ },
1420
+ get processedChunkCount() {
1421
+ return state.processedChunkCount;
1422
+ },
1423
+ get queuedChunkCount() {
1424
+ return state.queuedChunkCount;
1425
+ },
1426
+ start: async () => {
1427
+ try {
1428
+ clearError();
1429
+ const context = await ensureAudioContext();
1430
+ restoreOutputGain(context);
1431
+ if (context.state === "suspended") {
1432
+ await context.resume();
1433
+ }
1434
+ setState({
1435
+ activeSourceCount: sourceNodes.size,
1436
+ isActive: true,
1437
+ isPlaying: context.state === "running"
1438
+ });
1439
+ await queueSync();
1440
+ } catch (error) {
1441
+ setState({
1442
+ error: error instanceof Error ? error.message : String(error),
1443
+ isActive: false,
1444
+ isPlaying: false
1445
+ });
1446
+ throw error;
1447
+ }
1448
+ },
1449
+ subscribe: (subscriber) => {
1450
+ subscribers.add(subscriber);
1451
+ return () => {
1452
+ subscribers.delete(subscriber);
1453
+ };
1454
+ }
1455
+ };
1456
+ return player;
1457
+ };
1458
+
1113
1459
  // src/client/bargeInMonitor.ts
1114
1460
  var DEFAULT_THRESHOLD_MS = 250;
1115
1461
  var createEventId = () => `barge-in:${Date.now()}:${crypto.randomUUID?.() ?? Math.random().toString(36).slice(2)}`;
@@ -1181,6 +1527,58 @@ var createVoiceBargeInMonitor = (options = {}) => {
1181
1527
  };
1182
1528
  };
1183
1529
 
1530
+ // src/client/duplex.ts
1531
+ var DEFAULT_INTERRUPT_THRESHOLD = 0.08;
1532
+ var shouldInterruptForLevel = (level, options = {}) => (options.enabled ?? true) && level >= (options.interruptThreshold ?? DEFAULT_INTERRUPT_THRESHOLD);
1533
+ var bindVoiceBargeIn = (controller, player, options = {}) => {
1534
+ let lastPartial = controller.partial;
1535
+ const interruptIfPlaying = (reason) => {
1536
+ if (!player.isPlaying || options.enabled === false) {
1537
+ options.monitor?.recordSkipped({
1538
+ reason,
1539
+ sessionId: controller.sessionId
1540
+ });
1541
+ return;
1542
+ }
1543
+ options.monitor?.recordRequested({
1544
+ reason,
1545
+ sessionId: controller.sessionId
1546
+ });
1547
+ player.interrupt().then(() => {
1548
+ options.monitor?.recordStopped({
1549
+ latencyMs: player.lastInterruptLatencyMs,
1550
+ playbackStopLatencyMs: player.lastPlaybackStopLatencyMs,
1551
+ reason,
1552
+ sessionId: controller.sessionId
1553
+ });
1554
+ });
1555
+ };
1556
+ const unsubscribe = controller.subscribe(() => {
1557
+ if (options.interruptOnPartial === false) {
1558
+ lastPartial = controller.partial;
1559
+ return;
1560
+ }
1561
+ if (!lastPartial && controller.partial) {
1562
+ interruptIfPlaying("partial-transcript");
1563
+ }
1564
+ lastPartial = controller.partial;
1565
+ });
1566
+ return {
1567
+ close: () => {
1568
+ unsubscribe();
1569
+ },
1570
+ handleLevel: (level) => {
1571
+ if (shouldInterruptForLevel(level, options)) {
1572
+ interruptIfPlaying("input-level");
1573
+ }
1574
+ },
1575
+ sendAudio: (audio) => {
1576
+ interruptIfPlaying("manual-audio");
1577
+ controller.sendAudio(audio);
1578
+ }
1579
+ };
1580
+ };
1581
+
1184
1582
  // src/client/htmxBootstrap.ts
1185
1583
  var VOICE_WAVE_POINTS = 48;
1186
1584
  var VOICE_WAVE_WIDTH = 320;
@@ -1378,44 +1776,19 @@ var initVoiceHTMXRoot = (root) => {
1378
1776
  let isCapturing = false;
1379
1777
  let micError = null;
1380
1778
  let waveLevels = createInitialVoiceWaveLevels();
1381
- let lastInputLevel = 0;
1382
- let lastAssistantAt = 0;
1383
- let lastAssistantAudioCount = 0;
1384
- let lastAssistantTextCount = 0;
1385
- const syncBargeInOutput = () => {
1386
- if (!bargeInMonitor) {
1387
- return;
1388
- }
1389
- const voice = currentVoice();
1390
- const audioCount = voice.assistantAudio.length;
1391
- const textCount = voice.assistantTexts.length;
1392
- if (audioCount > lastAssistantAudioCount || textCount > lastAssistantTextCount) {
1393
- lastAssistantAt = Date.now();
1394
- }
1395
- lastAssistantAudioCount = audioCount;
1396
- lastAssistantTextCount = textCount;
1397
- };
1398
- const sendAudioWithBargeInEvidence = (audio, sendAudio) => {
1399
- syncBargeInOutput();
1400
- if (bargeInMonitor && Date.now() - lastAssistantAt <= bargeInRecentWindowMs && lastInputLevel >= bargeInSpeechThreshold) {
1401
- bargeInMonitor.recordRequested({
1402
- reason: "manual-audio",
1403
- sessionId: currentVoice().sessionId
1404
- });
1405
- bargeInMonitor.recordStopped({
1406
- latencyMs: 0,
1407
- playbackStopLatencyMs: 0,
1408
- reason: "manual-audio",
1409
- sessionId: currentVoice().sessionId
1410
- });
1411
- }
1412
- sendAudio(audio);
1413
- };
1779
+ let guidedBargeInBinding = null;
1780
+ let generalBargeInBinding = null;
1414
1781
  const guidedVoice = createVoiceController(guidedPath, {
1415
1782
  capture: {
1416
- onAudio: sendAudioWithBargeInEvidence,
1783
+ onAudio: (audio, sendAudio) => {
1784
+ if (guidedBargeInBinding) {
1785
+ guidedBargeInBinding.sendAudio(audio);
1786
+ return;
1787
+ }
1788
+ sendAudio(audio);
1789
+ },
1417
1790
  onLevel: (level) => {
1418
- lastInputLevel = level;
1791
+ guidedBargeInBinding?.handleLevel(level);
1419
1792
  waveLevels = pushVoiceWaveLevel(waveLevels, level);
1420
1793
  renderWave();
1421
1794
  }
@@ -1424,9 +1797,15 @@ var initVoiceHTMXRoot = (root) => {
1424
1797
  });
1425
1798
  const generalVoice = createVoiceController(generalPath, {
1426
1799
  capture: {
1427
- onAudio: sendAudioWithBargeInEvidence,
1800
+ onAudio: (audio, sendAudio) => {
1801
+ if (generalBargeInBinding) {
1802
+ generalBargeInBinding.sendAudio(audio);
1803
+ return;
1804
+ }
1805
+ sendAudio(audio);
1806
+ },
1428
1807
  onLevel: (level) => {
1429
- lastInputLevel = level;
1808
+ generalBargeInBinding?.handleLevel(level);
1430
1809
  waveLevels = pushVoiceWaveLevel(waveLevels, level);
1431
1810
  renderWave();
1432
1811
  }
@@ -1435,7 +1814,18 @@ var initVoiceHTMXRoot = (root) => {
1435
1814
  });
1436
1815
  const stopGuidedBinding = guidedVoice.bindHTMX({ element: syncElement });
1437
1816
  const stopGeneralBinding = generalVoice.bindHTMX({ element: syncElement });
1817
+ const guidedAudioPlayer = createVoiceAudioPlayer(guidedVoice);
1818
+ const generalAudioPlayer = createVoiceAudioPlayer(generalVoice);
1819
+ guidedBargeInBinding = bindVoiceBargeIn(guidedVoice, guidedAudioPlayer, {
1820
+ interruptThreshold: bargeInSpeechThreshold,
1821
+ monitor: bargeInMonitor ?? undefined
1822
+ });
1823
+ generalBargeInBinding = bindVoiceBargeIn(generalVoice, generalAudioPlayer, {
1824
+ interruptThreshold: bargeInSpeechThreshold,
1825
+ monitor: bargeInMonitor ?? undefined
1826
+ });
1438
1827
  const currentVoice = () => activeMode === "general" ? generalVoice : guidedVoice;
1828
+ const currentAudioPlayer = () => activeMode === "general" ? generalAudioPlayer : guidedAudioPlayer;
1439
1829
  const renderWave = () => {
1440
1830
  const path = createVoiceWavePath(waveLevels);
1441
1831
  voiceWaveGlow.setAttribute("d", path);
@@ -1514,11 +1904,15 @@ var initVoiceHTMXRoot = (root) => {
1514
1904
  }
1515
1905
  };
1516
1906
  guidedVoice.subscribe(() => {
1517
- syncBargeInOutput();
1907
+ if (guidedVoice.assistantAudio.length > 0) {
1908
+ guidedAudioPlayer.start().catch(() => {});
1909
+ }
1518
1910
  render();
1519
1911
  });
1520
1912
  generalVoice.subscribe(() => {
1521
- syncBargeInOutput();
1913
+ if (generalVoice.assistantAudio.length > 0) {
1914
+ generalAudioPlayer.start().catch(() => {});
1915
+ }
1522
1916
  render();
1523
1917
  });
1524
1918
  startGuidedButton.addEventListener("click", () => {
@@ -1533,6 +1927,10 @@ var initVoiceHTMXRoot = (root) => {
1533
1927
  window.addEventListener("beforeunload", () => {
1534
1928
  guidedVoice.stopRecording();
1535
1929
  generalVoice.stopRecording();
1930
+ guidedBargeInBinding?.close();
1931
+ generalBargeInBinding?.close();
1932
+ guidedAudioPlayer.close();
1933
+ generalAudioPlayer.close();
1536
1934
  stopGuidedBinding();
1537
1935
  stopGeneralBinding();
1538
1936
  guidedVoice.close();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.93",
3
+ "version": "0.0.22-beta.94",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",