@absolutejs/voice 0.0.22-beta.92 → 0.0.22-beta.94

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1287,7 +1287,13 @@ var createVoiceController = (path, options = {}) => {
1287
1287
  capture = createMicrophoneCapture({
1288
1288
  channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
1289
1289
  onLevel: options.capture?.onLevel,
1290
- onAudio: (audio) => stream.sendAudio(audio),
1290
+ onAudio: (audio) => {
1291
+ if (options.capture?.onAudio) {
1292
+ options.capture.onAudio(audio, stream.sendAudio);
1293
+ return;
1294
+ }
1295
+ stream.sendAudio(audio);
1296
+ },
1291
1297
  sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
1292
1298
  });
1293
1299
  return capture;
@@ -994,7 +994,13 @@ var createVoiceController = (path, options = {}) => {
994
994
  capture = createMicrophoneCapture({
995
995
  channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
996
996
  onLevel: options.capture?.onLevel,
997
- onAudio: (audio) => stream.sendAudio(audio),
997
+ onAudio: (audio) => {
998
+ if (options.capture?.onAudio) {
999
+ options.capture.onAudio(audio, stream.sendAudio);
1000
+ return;
1001
+ }
1002
+ stream.sendAudio(audio);
1003
+ },
998
1004
  sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
999
1005
  });
1000
1006
  return capture;
@@ -1104,6 +1110,475 @@ var createVoiceController = (path, options = {}) => {
1104
1110
  };
1105
1111
  };
1106
1112
 
1113
+ // src/client/audioPlayer.ts
1114
+ var DEFAULT_LOOKAHEAD_MS = 15;
1115
+ var createInitialState3 = () => ({
1116
+ activeSourceCount: 0,
1117
+ error: null,
1118
+ isActive: false,
1119
+ isPlaying: false,
1120
+ lastInterruptLatencyMs: undefined,
1121
+ lastPlaybackStopLatencyMs: undefined,
1122
+ processedChunkCount: 0,
1123
+ queuedChunkCount: 0
1124
+ });
1125
+ var getAudioContextCtor = () => {
1126
+ if (typeof window === "undefined") {
1127
+ return typeof AudioContext === "undefined" ? undefined : AudioContext;
1128
+ }
1129
+ return window.AudioContext ?? window.webkitAudioContext;
1130
+ };
1131
+ var decodePCM16LEChunk = (audioContext, chunk) => {
1132
+ const format = chunk.format;
1133
+ if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
1134
+ throw new Error(`Unsupported assistant audio format: ${format.container}/${format.encoding}`);
1135
+ }
1136
+ const bytes = chunk.chunk;
1137
+ const channels = Math.max(1, format.channels);
1138
+ const sampleCount = Math.floor(bytes.byteLength / 2);
1139
+ const frameCount = Math.max(1, Math.floor(sampleCount / channels));
1140
+ const audioBuffer = audioContext.createBuffer(channels, frameCount, format.sampleRateHz);
1141
+ const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
1142
+ for (let channelIndex = 0;channelIndex < channels; channelIndex += 1) {
1143
+ const channelData = audioBuffer.getChannelData(channelIndex);
1144
+ for (let frameIndex = 0;frameIndex < frameCount; frameIndex += 1) {
1145
+ const sampleIndex = frameIndex * channels + channelIndex;
1146
+ const sampleOffset = sampleIndex * 2;
1147
+ if (sampleOffset + 1 >= bytes.byteLength) {
1148
+ channelData[frameIndex] = 0;
1149
+ continue;
1150
+ }
1151
+ channelData[frameIndex] = view.getInt16(sampleOffset, true) / 32768;
1152
+ }
1153
+ }
1154
+ return audioBuffer;
1155
+ };
1156
+ var createVoiceAudioPlayer = (source, options = {}) => {
1157
+ const subscribers = new Set;
1158
+ const sourceNodes = new Set;
1159
+ const lookaheadSeconds = (options.lookaheadMs ?? DEFAULT_LOOKAHEAD_MS) / 1000;
1160
+ let state = createInitialState3();
1161
+ let audioContext = null;
1162
+ let outputNode = null;
1163
+ let queueEndTime = 0;
1164
+ let syncPromise = Promise.resolve();
1165
+ let interruptStartedAt = null;
1166
+ let interruptPromise = null;
1167
+ let resolveInterruptPromise = null;
1168
+ let interruptFallbackTimer = null;
1169
+ const notify = () => {
1170
+ for (const subscriber of subscribers) {
1171
+ subscriber();
1172
+ }
1173
+ };
1174
+ const setState = (next) => {
1175
+ state = {
1176
+ ...state,
1177
+ ...next
1178
+ };
1179
+ notify();
1180
+ };
1181
+ const clearError = () => {
1182
+ if (state.error !== null) {
1183
+ setState({ error: null });
1184
+ }
1185
+ };
1186
+ const clearInterruptTimer = () => {
1187
+ if (interruptFallbackTimer !== null) {
1188
+ clearTimeout(interruptFallbackTimer);
1189
+ interruptFallbackTimer = null;
1190
+ }
1191
+ };
1192
+ const resolveInterrupt = (latencyMs) => {
1193
+ clearInterruptTimer();
1194
+ interruptStartedAt = null;
1195
+ setState({
1196
+ activeSourceCount: sourceNodes.size,
1197
+ isPlaying: false,
1198
+ lastInterruptLatencyMs: latencyMs,
1199
+ lastPlaybackStopLatencyMs: state.lastPlaybackStopLatencyMs ?? latencyMs
1200
+ });
1201
+ resolveInterruptPromise?.();
1202
+ resolveInterruptPromise = null;
1203
+ interruptPromise = null;
1204
+ };
1205
+ const estimateOutputStopLatencyMs = (context) => {
1206
+ if (!context) {
1207
+ return 0;
1208
+ }
1209
+ return Math.max(0, ((context.baseLatency ?? 0) + (context.outputLatency ?? 0)) * 1000);
1210
+ };
1211
+ const restoreOutputGain = (context) => {
1212
+ if (!outputNode) {
1213
+ return;
1214
+ }
1215
+ const gainValue = 1;
1216
+ if (outputNode.gain.setValueAtTime) {
1217
+ outputNode.gain.setValueAtTime(gainValue, context?.currentTime ?? 0);
1218
+ return;
1219
+ }
1220
+ outputNode.gain.value = gainValue;
1221
+ };
1222
+ const muteOutputGain = (context) => {
1223
+ if (!outputNode) {
1224
+ return;
1225
+ }
1226
+ const gainValue = 0;
1227
+ if (outputNode.gain.setValueAtTime) {
1228
+ outputNode.gain.setValueAtTime(gainValue, context?.currentTime ?? 0);
1229
+ return;
1230
+ }
1231
+ outputNode.gain.value = gainValue;
1232
+ };
1233
+ const maybeResolveInterrupt = () => {
1234
+ if (interruptStartedAt === null || sourceNodes.size > 0) {
1235
+ return;
1236
+ }
1237
+ resolveInterrupt(Date.now() - interruptStartedAt);
1238
+ };
1239
+ const ensureAudioContext = async () => {
1240
+ if (audioContext) {
1241
+ return audioContext;
1242
+ }
1243
+ if (options.createAudioContext) {
1244
+ audioContext = options.createAudioContext();
1245
+ } else {
1246
+ const AudioContextCtor = getAudioContextCtor();
1247
+ if (!AudioContextCtor) {
1248
+ throw new Error("Assistant audio playback requires AudioContext support.");
1249
+ }
1250
+ audioContext = new AudioContextCtor;
1251
+ }
1252
+ if (audioContext.createGain) {
1253
+ outputNode = audioContext.createGain();
1254
+ outputNode.connect?.(audioContext.destination);
1255
+ }
1256
+ queueEndTime = audioContext.currentTime;
1257
+ return audioContext;
1258
+ };
1259
+ const scheduleChunk = async (chunk) => {
1260
+ const context = await ensureAudioContext();
1261
+ const buffer = decodePCM16LEChunk(context, chunk);
1262
+ const node = context.createBufferSource();
1263
+ node.buffer = buffer;
1264
+ node.connect(outputNode ?? context.destination);
1265
+ node.onended = () => {
1266
+ sourceNodes.delete(node);
1267
+ node.disconnect?.();
1268
+ setState({
1269
+ activeSourceCount: sourceNodes.size,
1270
+ isPlaying: sourceNodes.size > 0 && state.isActive
1271
+ });
1272
+ maybeResolveInterrupt();
1273
+ };
1274
+ const startAt = Math.max(context.currentTime + lookaheadSeconds, queueEndTime);
1275
+ queueEndTime = startAt + buffer.duration;
1276
+ sourceNodes.add(node);
1277
+ setState({
1278
+ activeSourceCount: sourceNodes.size,
1279
+ isPlaying: true
1280
+ });
1281
+ node.start(startAt);
1282
+ };
1283
+ const stopQueuedPlayback = (options2) => {
1284
+ for (const node of [...sourceNodes]) {
1285
+ node.stop?.();
1286
+ }
1287
+ queueEndTime = audioContext ? audioContext.currentTime : 0;
1288
+ if (options2?.forceClear) {
1289
+ for (const node of sourceNodes) {
1290
+ node.disconnect?.();
1291
+ }
1292
+ sourceNodes.clear();
1293
+ maybeResolveInterrupt();
1294
+ }
1295
+ };
1296
+ const sync = async () => {
1297
+ if (!state.isActive) {
1298
+ return;
1299
+ }
1300
+ const nextChunks = source.assistantAudio.slice(state.processedChunkCount);
1301
+ if (nextChunks.length === 0) {
1302
+ return;
1303
+ }
1304
+ try {
1305
+ clearError();
1306
+ for (const chunk of nextChunks) {
1307
+ await scheduleChunk(chunk);
1308
+ }
1309
+ setState({
1310
+ processedChunkCount: source.assistantAudio.length,
1311
+ queuedChunkCount: state.queuedChunkCount + nextChunks.length
1312
+ });
1313
+ } catch (error) {
1314
+ setState({
1315
+ error: error instanceof Error ? error.message : String(error)
1316
+ });
1317
+ }
1318
+ };
1319
+ const queueSync = () => {
1320
+ syncPromise = syncPromise.then(() => sync(), () => sync());
1321
+ return syncPromise;
1322
+ };
1323
+ const unsubscribeSource = source.subscribe(() => {
1324
+ if (options.autoStart && !state.isActive && source.assistantAudio.length > 0) {
1325
+ player.start();
1326
+ return;
1327
+ }
1328
+ if (state.isActive) {
1329
+ queueSync();
1330
+ }
1331
+ });
1332
+ const player = {
1333
+ close: async () => {
1334
+ unsubscribeSource();
1335
+ stopQueuedPlayback({ forceClear: true });
1336
+ clearInterruptTimer();
1337
+ resolveInterruptPromise?.();
1338
+ resolveInterruptPromise = null;
1339
+ interruptPromise = null;
1340
+ interruptStartedAt = null;
1341
+ if (audioContext && audioContext.state !== "closed") {
1342
+ await audioContext.close();
1343
+ }
1344
+ audioContext = null;
1345
+ outputNode?.disconnect?.();
1346
+ outputNode = null;
1347
+ queueEndTime = 0;
1348
+ setState({
1349
+ activeSourceCount: 0,
1350
+ isActive: false,
1351
+ isPlaying: false
1352
+ });
1353
+ },
1354
+ get activeSourceCount() {
1355
+ return state.activeSourceCount;
1356
+ },
1357
+ get error() {
1358
+ return state.error;
1359
+ },
1360
+ getSnapshot: () => state,
1361
+ get isActive() {
1362
+ return state.isActive;
1363
+ },
1364
+ get isPlaying() {
1365
+ return state.isPlaying;
1366
+ },
1367
+ interrupt: async () => {
1368
+ const startedAt = Date.now();
1369
+ const context = await ensureAudioContext();
1370
+ interruptStartedAt = startedAt;
1371
+ muteOutputGain(context);
1372
+ const playbackStopLatencyMs = Date.now() - startedAt + estimateOutputStopLatencyMs(context);
1373
+ setState({
1374
+ isActive: false,
1375
+ isPlaying: sourceNodes.size > 0,
1376
+ lastPlaybackStopLatencyMs: playbackStopLatencyMs
1377
+ });
1378
+ if (sourceNodes.size === 0) {
1379
+ resolveInterrupt(playbackStopLatencyMs);
1380
+ return;
1381
+ }
1382
+ if (!interruptPromise) {
1383
+ interruptPromise = new Promise((resolve) => {
1384
+ resolveInterruptPromise = resolve;
1385
+ });
1386
+ }
1387
+ clearInterruptTimer();
1388
+ interruptFallbackTimer = setTimeout(() => {
1389
+ for (const node of sourceNodes) {
1390
+ node.disconnect?.();
1391
+ }
1392
+ sourceNodes.clear();
1393
+ resolveInterrupt(Date.now() - startedAt);
1394
+ }, 250);
1395
+ stopQueuedPlayback();
1396
+ await interruptPromise;
1397
+ },
1398
+ get lastInterruptLatencyMs() {
1399
+ return state.lastInterruptLatencyMs;
1400
+ },
1401
+ get lastPlaybackStopLatencyMs() {
1402
+ return state.lastPlaybackStopLatencyMs;
1403
+ },
1404
+ pause: async () => {
1405
+ if (!audioContext) {
1406
+ setState({
1407
+ activeSourceCount: 0,
1408
+ isActive: false,
1409
+ isPlaying: false
1410
+ });
1411
+ return;
1412
+ }
1413
+ await audioContext.suspend();
1414
+ setState({
1415
+ activeSourceCount: sourceNodes.size,
1416
+ isActive: false,
1417
+ isPlaying: false
1418
+ });
1419
+ },
1420
+ get processedChunkCount() {
1421
+ return state.processedChunkCount;
1422
+ },
1423
+ get queuedChunkCount() {
1424
+ return state.queuedChunkCount;
1425
+ },
1426
+ start: async () => {
1427
+ try {
1428
+ clearError();
1429
+ const context = await ensureAudioContext();
1430
+ restoreOutputGain(context);
1431
+ if (context.state === "suspended") {
1432
+ await context.resume();
1433
+ }
1434
+ setState({
1435
+ activeSourceCount: sourceNodes.size,
1436
+ isActive: true,
1437
+ isPlaying: context.state === "running"
1438
+ });
1439
+ await queueSync();
1440
+ } catch (error) {
1441
+ setState({
1442
+ error: error instanceof Error ? error.message : String(error),
1443
+ isActive: false,
1444
+ isPlaying: false
1445
+ });
1446
+ throw error;
1447
+ }
1448
+ },
1449
+ subscribe: (subscriber) => {
1450
+ subscribers.add(subscriber);
1451
+ return () => {
1452
+ subscribers.delete(subscriber);
1453
+ };
1454
+ }
1455
+ };
1456
+ return player;
1457
+ };
1458
+
1459
+ // src/client/bargeInMonitor.ts
1460
+ var DEFAULT_THRESHOLD_MS = 250;
1461
+ var createEventId = () => `barge-in:${Date.now()}:${crypto.randomUUID?.() ?? Math.random().toString(36).slice(2)}`;
1462
+ var summarize = (events, thresholdMs) => {
1463
+ const stopped = events.filter((event) => event.status === "stopped");
1464
+ const latencies = stopped.map((event) => event.latencyMs).filter((value) => typeof value === "number");
1465
+ const failed = stopped.filter((event) => typeof event.latencyMs === "number" && event.latencyMs > thresholdMs).length;
1466
+ const passed = stopped.length - failed;
1467
+ return {
1468
+ averageLatencyMs: latencies.length > 0 ? Math.round(latencies.reduce((total, value) => total + value, 0) / latencies.length) : undefined,
1469
+ events: [...events],
1470
+ failed,
1471
+ lastEvent: events.at(-1),
1472
+ passed,
1473
+ status: events.length === 0 ? "empty" : failed > 0 ? "fail" : stopped.length === 0 ? "warn" : "pass",
1474
+ thresholdMs,
1475
+ total: stopped.length
1476
+ };
1477
+ };
1478
+ var createVoiceBargeInMonitor = (options = {}) => {
1479
+ const listeners = new Set;
1480
+ const thresholdMs = options.thresholdMs ?? DEFAULT_THRESHOLD_MS;
1481
+ const fetchImpl = options.fetch ?? globalThis.fetch;
1482
+ const events = [];
1483
+ const emit = () => {
1484
+ for (const listener of listeners) {
1485
+ listener();
1486
+ }
1487
+ };
1488
+ const postEvent = (event) => {
1489
+ if (!options.path || typeof fetchImpl !== "function") {
1490
+ return;
1491
+ }
1492
+ fetchImpl(options.path, {
1493
+ body: JSON.stringify(event),
1494
+ headers: {
1495
+ "Content-Type": "application/json"
1496
+ },
1497
+ method: "POST"
1498
+ }).catch(() => {});
1499
+ };
1500
+ const record = (status, input) => {
1501
+ const event = {
1502
+ at: Date.now(),
1503
+ id: createEventId(),
1504
+ latencyMs: input.latencyMs,
1505
+ playbackStopLatencyMs: input.playbackStopLatencyMs,
1506
+ reason: input.reason,
1507
+ sessionId: input.sessionId,
1508
+ status,
1509
+ thresholdMs
1510
+ };
1511
+ events.push(event);
1512
+ postEvent(event);
1513
+ emit();
1514
+ return event;
1515
+ };
1516
+ return {
1517
+ getSnapshot: () => summarize(events, thresholdMs),
1518
+ recordRequested: (input) => record("requested", input),
1519
+ recordSkipped: (input) => record("skipped", input),
1520
+ recordStopped: (input) => record("stopped", input),
1521
+ subscribe: (subscriber) => {
1522
+ listeners.add(subscriber);
1523
+ return () => {
1524
+ listeners.delete(subscriber);
1525
+ };
1526
+ }
1527
+ };
1528
+ };
1529
+
1530
+ // src/client/duplex.ts
1531
+ var DEFAULT_INTERRUPT_THRESHOLD = 0.08;
1532
+ var shouldInterruptForLevel = (level, options = {}) => (options.enabled ?? true) && level >= (options.interruptThreshold ?? DEFAULT_INTERRUPT_THRESHOLD);
1533
+ var bindVoiceBargeIn = (controller, player, options = {}) => {
1534
+ let lastPartial = controller.partial;
1535
+ const interruptIfPlaying = (reason) => {
1536
+ if (!player.isPlaying || options.enabled === false) {
1537
+ options.monitor?.recordSkipped({
1538
+ reason,
1539
+ sessionId: controller.sessionId
1540
+ });
1541
+ return;
1542
+ }
1543
+ options.monitor?.recordRequested({
1544
+ reason,
1545
+ sessionId: controller.sessionId
1546
+ });
1547
+ player.interrupt().then(() => {
1548
+ options.monitor?.recordStopped({
1549
+ latencyMs: player.lastInterruptLatencyMs,
1550
+ playbackStopLatencyMs: player.lastPlaybackStopLatencyMs,
1551
+ reason,
1552
+ sessionId: controller.sessionId
1553
+ });
1554
+ });
1555
+ };
1556
+ const unsubscribe = controller.subscribe(() => {
1557
+ if (options.interruptOnPartial === false) {
1558
+ lastPartial = controller.partial;
1559
+ return;
1560
+ }
1561
+ if (!lastPartial && controller.partial) {
1562
+ interruptIfPlaying("partial-transcript");
1563
+ }
1564
+ lastPartial = controller.partial;
1565
+ });
1566
+ return {
1567
+ close: () => {
1568
+ unsubscribe();
1569
+ },
1570
+ handleLevel: (level) => {
1571
+ if (shouldInterruptForLevel(level, options)) {
1572
+ interruptIfPlaying("input-level");
1573
+ }
1574
+ },
1575
+ sendAudio: (audio) => {
1576
+ interruptIfPlaying("manual-audio");
1577
+ controller.sendAudio(audio);
1578
+ }
1579
+ };
1580
+ };
1581
+
1107
1582
  // src/client/htmxBootstrap.ts
1108
1583
  var VOICE_WAVE_POINTS = 48;
1109
1584
  var VOICE_WAVE_WIDTH = 320;
@@ -1216,6 +1691,13 @@ var parsePromptList = (value) => {
1216
1691
  } catch {}
1217
1692
  return DEFAULT_GUIDED_PROMPTS;
1218
1693
  };
1694
+ var parseOptionalNumber = (value) => {
1695
+ if (!value) {
1696
+ return;
1697
+ }
1698
+ const parsed = Number(value);
1699
+ return Number.isFinite(parsed) ? parsed : undefined;
1700
+ };
1219
1701
  var requireElement = (root, selector, ctor, name) => {
1220
1702
  const value = selector ? document.querySelector(selector) : null;
1221
1703
  if (value instanceof ctor) {
@@ -1266,6 +1748,13 @@ var initVoiceHTMXRoot = (root) => {
1266
1748
  const guidedPrompts = parsePromptList(root.dataset.voiceGuidedPrompts);
1267
1749
  const guidedLabel = root.dataset.voiceGuidedLabel ?? DEFAULT_GUIDED_LABEL;
1268
1750
  const generalLabel = root.dataset.voiceGeneralLabel ?? DEFAULT_GENERAL_LABEL;
1751
+ const bargeInPath = root.dataset.voiceBargeInPath;
1752
+ const bargeInMonitor = bargeInPath ? createVoiceBargeInMonitor({
1753
+ path: bargeInPath,
1754
+ thresholdMs: parseOptionalNumber(root.dataset.voiceBargeInThresholdMs)
1755
+ }) : null;
1756
+ const bargeInRecentWindowMs = parseOptionalNumber(root.dataset.voiceBargeInRecentWindowMs) ?? 4000;
1757
+ const bargeInSpeechThreshold = parseOptionalNumber(root.dataset.voiceBargeInSpeechThreshold) ?? 0.04;
1269
1758
  const syncElement = requireElement(document, root.dataset.voiceSync, HTMLElement, "voice-htmx-sync");
1270
1759
  const connectionMetric = requireElement(root, root.dataset.voiceConnection, HTMLElement, "metric-connection");
1271
1760
  const errorStatus = requireElement(root, root.dataset.voiceError, HTMLElement, "status-error");
@@ -1279,9 +1768,27 @@ var initVoiceHTMXRoot = (root) => {
1279
1768
  const voiceMonitorCopy = requireElement(root, root.dataset.voiceMonitorCopy, HTMLElement, "voice-monitor-copy");
1280
1769
  const voiceWaveGlow = requireElement(root, root.dataset.voiceWaveGlow, SVGPathElement, "voice-wave-glow");
1281
1770
  const voiceWavePath = requireElement(root, root.dataset.voiceWavePath, SVGPathElement, "voice-wave-path");
1771
+ let activeMode = null;
1772
+ let hasStartedModes = {
1773
+ general: false,
1774
+ guided: false
1775
+ };
1776
+ let isCapturing = false;
1777
+ let micError = null;
1778
+ let waveLevels = createInitialVoiceWaveLevels();
1779
+ let guidedBargeInBinding = null;
1780
+ let generalBargeInBinding = null;
1282
1781
  const guidedVoice = createVoiceController(guidedPath, {
1283
1782
  capture: {
1783
+ onAudio: (audio, sendAudio) => {
1784
+ if (guidedBargeInBinding) {
1785
+ guidedBargeInBinding.sendAudio(audio);
1786
+ return;
1787
+ }
1788
+ sendAudio(audio);
1789
+ },
1284
1790
  onLevel: (level) => {
1791
+ guidedBargeInBinding?.handleLevel(level);
1285
1792
  waveLevels = pushVoiceWaveLevel(waveLevels, level);
1286
1793
  renderWave();
1287
1794
  }
@@ -1290,7 +1797,15 @@ var initVoiceHTMXRoot = (root) => {
1290
1797
  });
1291
1798
  const generalVoice = createVoiceController(generalPath, {
1292
1799
  capture: {
1800
+ onAudio: (audio, sendAudio) => {
1801
+ if (generalBargeInBinding) {
1802
+ generalBargeInBinding.sendAudio(audio);
1803
+ return;
1804
+ }
1805
+ sendAudio(audio);
1806
+ },
1293
1807
  onLevel: (level) => {
1808
+ generalBargeInBinding?.handleLevel(level);
1294
1809
  waveLevels = pushVoiceWaveLevel(waveLevels, level);
1295
1810
  renderWave();
1296
1811
  }
@@ -1299,15 +1814,18 @@ var initVoiceHTMXRoot = (root) => {
1299
1814
  });
1300
1815
  const stopGuidedBinding = guidedVoice.bindHTMX({ element: syncElement });
1301
1816
  const stopGeneralBinding = generalVoice.bindHTMX({ element: syncElement });
1302
- let activeMode = null;
1303
- let hasStartedModes = {
1304
- general: false,
1305
- guided: false
1306
- };
1307
- let isCapturing = false;
1308
- let micError = null;
1309
- let waveLevels = createInitialVoiceWaveLevels();
1817
+ const guidedAudioPlayer = createVoiceAudioPlayer(guidedVoice);
1818
+ const generalAudioPlayer = createVoiceAudioPlayer(generalVoice);
1819
+ guidedBargeInBinding = bindVoiceBargeIn(guidedVoice, guidedAudioPlayer, {
1820
+ interruptThreshold: bargeInSpeechThreshold,
1821
+ monitor: bargeInMonitor ?? undefined
1822
+ });
1823
+ generalBargeInBinding = bindVoiceBargeIn(generalVoice, generalAudioPlayer, {
1824
+ interruptThreshold: bargeInSpeechThreshold,
1825
+ monitor: bargeInMonitor ?? undefined
1826
+ });
1310
1827
  const currentVoice = () => activeMode === "general" ? generalVoice : guidedVoice;
1828
+ const currentAudioPlayer = () => activeMode === "general" ? generalAudioPlayer : guidedAudioPlayer;
1311
1829
  const renderWave = () => {
1312
1830
  const path = createVoiceWavePath(waveLevels);
1313
1831
  voiceWaveGlow.setAttribute("d", path);
@@ -1385,8 +1903,18 @@ var initVoiceHTMXRoot = (root) => {
1385
1903
  render();
1386
1904
  }
1387
1905
  };
1388
- guidedVoice.subscribe(render);
1389
- generalVoice.subscribe(render);
1906
+ guidedVoice.subscribe(() => {
1907
+ if (guidedVoice.assistantAudio.length > 0) {
1908
+ guidedAudioPlayer.start().catch(() => {});
1909
+ }
1910
+ render();
1911
+ });
1912
+ generalVoice.subscribe(() => {
1913
+ if (generalVoice.assistantAudio.length > 0) {
1914
+ generalAudioPlayer.start().catch(() => {});
1915
+ }
1916
+ render();
1917
+ });
1390
1918
  startGuidedButton.addEventListener("click", () => {
1391
1919
  startMode("guided");
1392
1920
  });
@@ -1399,6 +1927,10 @@ var initVoiceHTMXRoot = (root) => {
1399
1927
  window.addEventListener("beforeunload", () => {
1400
1928
  guidedVoice.stopRecording();
1401
1929
  generalVoice.stopRecording();
1930
+ guidedBargeInBinding?.close();
1931
+ generalBargeInBinding?.close();
1932
+ guidedAudioPlayer.close();
1933
+ generalAudioPlayer.close();
1402
1934
  stopGuidedBinding();
1403
1935
  stopGeneralBinding();
1404
1936
  guidedVoice.close();
@@ -1446,7 +1446,13 @@ var createVoiceController = (path, options = {}) => {
1446
1446
  capture = createMicrophoneCapture({
1447
1447
  channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
1448
1448
  onLevel: options.capture?.onLevel,
1449
- onAudio: (audio) => stream.sendAudio(audio),
1449
+ onAudio: (audio) => {
1450
+ if (options.capture?.onAudio) {
1451
+ options.capture.onAudio(audio, stream.sendAudio);
1452
+ return;
1453
+ }
1454
+ stream.sendAudio(audio);
1455
+ },
1450
1456
  sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
1451
1457
  });
1452
1458
  return capture;
@@ -3116,7 +3116,13 @@ var createVoiceController = (path, options = {}) => {
3116
3116
  capture = createMicrophoneCapture({
3117
3117
  channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
3118
3118
  onLevel: options.capture?.onLevel,
3119
- onAudio: (audio) => stream.sendAudio(audio),
3119
+ onAudio: (audio) => {
3120
+ if (options.capture?.onAudio) {
3121
+ options.capture.onAudio(audio, stream.sendAudio);
3122
+ return;
3123
+ }
3124
+ stream.sendAudio(audio);
3125
+ },
3120
3126
  sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
3121
3127
  });
3122
3128
  return capture;
@@ -2558,7 +2558,13 @@ var createVoiceController = (path, options = {}) => {
2558
2558
  capture = createMicrophoneCapture({
2559
2559
  channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
2560
2560
  onLevel: options.capture?.onLevel,
2561
- onAudio: (audio) => stream.sendAudio(audio),
2561
+ onAudio: (audio) => {
2562
+ if (options.capture?.onAudio) {
2563
+ options.capture.onAudio(audio, stream.sendAudio);
2564
+ return;
2565
+ }
2566
+ stream.sendAudio(audio);
2567
+ },
2562
2568
  sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
2563
2569
  });
2564
2570
  return capture;
@@ -2948,7 +2948,13 @@ var createVoiceController = (path, options = {}) => {
2948
2948
  capture = createMicrophoneCapture({
2949
2949
  channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
2950
2950
  onLevel: options.capture?.onLevel,
2951
- onAudio: (audio) => stream.sendAudio(audio),
2951
+ onAudio: (audio) => {
2952
+ if (options.capture?.onAudio) {
2953
+ options.capture.onAudio(audio, stream.sendAudio);
2954
+ return;
2955
+ }
2956
+ stream.sendAudio(audio);
2957
+ },
2952
2958
  sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
2953
2959
  });
2954
2960
  return capture;
package/dist/types.d.ts CHANGED
@@ -734,6 +734,7 @@ export type VoiceConnectionOptions = {
734
734
  };
735
735
  export type VoiceCaptureOptions = {
736
736
  channelCount?: 1 | 2;
737
+ onAudio?: (audio: Uint8Array | ArrayBuffer, sendAudio: (audio: Uint8Array | ArrayBuffer) => void) => void;
737
738
  onLevel?: (level: number) => void;
738
739
  sampleRateHz?: number;
739
740
  };
package/dist/vue/index.js CHANGED
@@ -2891,7 +2891,13 @@ var createVoiceController = (path, options = {}) => {
2891
2891
  capture = createMicrophoneCapture({
2892
2892
  channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
2893
2893
  onLevel: options.capture?.onLevel,
2894
- onAudio: (audio) => stream.sendAudio(audio),
2894
+ onAudio: (audio) => {
2895
+ if (options.capture?.onAudio) {
2896
+ options.capture.onAudio(audio, stream.sendAudio);
2897
+ return;
2898
+ }
2899
+ stream.sendAudio(audio);
2900
+ },
2895
2901
  sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
2896
2902
  });
2897
2903
  return capture;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.92",
3
+ "version": "0.0.22-beta.94",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",