@absolutejs/voice 0.0.22-beta.92 → 0.0.22-beta.93

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1287,7 +1287,13 @@ var createVoiceController = (path, options = {}) => {
1287
1287
  capture = createMicrophoneCapture({
1288
1288
  channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
1289
1289
  onLevel: options.capture?.onLevel,
1290
- onAudio: (audio) => stream.sendAudio(audio),
1290
+ onAudio: (audio) => {
1291
+ if (options.capture?.onAudio) {
1292
+ options.capture.onAudio(audio, stream.sendAudio);
1293
+ return;
1294
+ }
1295
+ stream.sendAudio(audio);
1296
+ },
1291
1297
  sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
1292
1298
  });
1293
1299
  return capture;
@@ -994,7 +994,13 @@ var createVoiceController = (path, options = {}) => {
994
994
  capture = createMicrophoneCapture({
995
995
  channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
996
996
  onLevel: options.capture?.onLevel,
997
- onAudio: (audio) => stream.sendAudio(audio),
997
+ onAudio: (audio) => {
998
+ if (options.capture?.onAudio) {
999
+ options.capture.onAudio(audio, stream.sendAudio);
1000
+ return;
1001
+ }
1002
+ stream.sendAudio(audio);
1003
+ },
998
1004
  sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
999
1005
  });
1000
1006
  return capture;
@@ -1104,6 +1110,77 @@ var createVoiceController = (path, options = {}) => {
1104
1110
  };
1105
1111
  };
1106
1112
 
1113
+ // src/client/bargeInMonitor.ts
1114
+ var DEFAULT_THRESHOLD_MS = 250;
1115
+ var createEventId = () => `barge-in:${Date.now()}:${crypto.randomUUID?.() ?? Math.random().toString(36).slice(2)}`;
1116
+ var summarize = (events, thresholdMs) => {
1117
+ const stopped = events.filter((event) => event.status === "stopped");
1118
+ const latencies = stopped.map((event) => event.latencyMs).filter((value) => typeof value === "number");
1119
+ const failed = stopped.filter((event) => typeof event.latencyMs === "number" && event.latencyMs > thresholdMs).length;
1120
+ const passed = stopped.length - failed;
1121
+ return {
1122
+ averageLatencyMs: latencies.length > 0 ? Math.round(latencies.reduce((total, value) => total + value, 0) / latencies.length) : undefined,
1123
+ events: [...events],
1124
+ failed,
1125
+ lastEvent: events.at(-1),
1126
+ passed,
1127
+ status: events.length === 0 ? "empty" : failed > 0 ? "fail" : stopped.length === 0 ? "warn" : "pass",
1128
+ thresholdMs,
1129
+ total: stopped.length
1130
+ };
1131
+ };
1132
+ var createVoiceBargeInMonitor = (options = {}) => {
1133
+ const listeners = new Set;
1134
+ const thresholdMs = options.thresholdMs ?? DEFAULT_THRESHOLD_MS;
1135
+ const fetchImpl = options.fetch ?? globalThis.fetch;
1136
+ const events = [];
1137
+ const emit = () => {
1138
+ for (const listener of listeners) {
1139
+ listener();
1140
+ }
1141
+ };
1142
+ const postEvent = (event) => {
1143
+ if (!options.path || typeof fetchImpl !== "function") {
1144
+ return;
1145
+ }
1146
+ fetchImpl(options.path, {
1147
+ body: JSON.stringify(event),
1148
+ headers: {
1149
+ "Content-Type": "application/json"
1150
+ },
1151
+ method: "POST"
1152
+ }).catch(() => {});
1153
+ };
1154
+ const record = (status, input) => {
1155
+ const event = {
1156
+ at: Date.now(),
1157
+ id: createEventId(),
1158
+ latencyMs: input.latencyMs,
1159
+ playbackStopLatencyMs: input.playbackStopLatencyMs,
1160
+ reason: input.reason,
1161
+ sessionId: input.sessionId,
1162
+ status,
1163
+ thresholdMs
1164
+ };
1165
+ events.push(event);
1166
+ postEvent(event);
1167
+ emit();
1168
+ return event;
1169
+ };
1170
+ return {
1171
+ getSnapshot: () => summarize(events, thresholdMs),
1172
+ recordRequested: (input) => record("requested", input),
1173
+ recordSkipped: (input) => record("skipped", input),
1174
+ recordStopped: (input) => record("stopped", input),
1175
+ subscribe: (subscriber) => {
1176
+ listeners.add(subscriber);
1177
+ return () => {
1178
+ listeners.delete(subscriber);
1179
+ };
1180
+ }
1181
+ };
1182
+ };
1183
+
1107
1184
  // src/client/htmxBootstrap.ts
1108
1185
  var VOICE_WAVE_POINTS = 48;
1109
1186
  var VOICE_WAVE_WIDTH = 320;
@@ -1216,6 +1293,13 @@ var parsePromptList = (value) => {
1216
1293
  } catch {}
1217
1294
  return DEFAULT_GUIDED_PROMPTS;
1218
1295
  };
1296
+ var parseOptionalNumber = (value) => {
1297
+ if (!value) {
1298
+ return;
1299
+ }
1300
+ const parsed = Number(value);
1301
+ return Number.isFinite(parsed) ? parsed : undefined;
1302
+ };
1219
1303
  var requireElement = (root, selector, ctor, name) => {
1220
1304
  const value = selector ? document.querySelector(selector) : null;
1221
1305
  if (value instanceof ctor) {
@@ -1266,6 +1350,13 @@ var initVoiceHTMXRoot = (root) => {
1266
1350
  const guidedPrompts = parsePromptList(root.dataset.voiceGuidedPrompts);
1267
1351
  const guidedLabel = root.dataset.voiceGuidedLabel ?? DEFAULT_GUIDED_LABEL;
1268
1352
  const generalLabel = root.dataset.voiceGeneralLabel ?? DEFAULT_GENERAL_LABEL;
1353
+ const bargeInPath = root.dataset.voiceBargeInPath;
1354
+ const bargeInMonitor = bargeInPath ? createVoiceBargeInMonitor({
1355
+ path: bargeInPath,
1356
+ thresholdMs: parseOptionalNumber(root.dataset.voiceBargeInThresholdMs)
1357
+ }) : null;
1358
+ const bargeInRecentWindowMs = parseOptionalNumber(root.dataset.voiceBargeInRecentWindowMs) ?? 4000;
1359
+ const bargeInSpeechThreshold = parseOptionalNumber(root.dataset.voiceBargeInSpeechThreshold) ?? 0.04;
1269
1360
  const syncElement = requireElement(document, root.dataset.voiceSync, HTMLElement, "voice-htmx-sync");
1270
1361
  const connectionMetric = requireElement(root, root.dataset.voiceConnection, HTMLElement, "metric-connection");
1271
1362
  const errorStatus = requireElement(root, root.dataset.voiceError, HTMLElement, "status-error");
@@ -1279,9 +1370,52 @@ var initVoiceHTMXRoot = (root) => {
1279
1370
  const voiceMonitorCopy = requireElement(root, root.dataset.voiceMonitorCopy, HTMLElement, "voice-monitor-copy");
1280
1371
  const voiceWaveGlow = requireElement(root, root.dataset.voiceWaveGlow, SVGPathElement, "voice-wave-glow");
1281
1372
  const voiceWavePath = requireElement(root, root.dataset.voiceWavePath, SVGPathElement, "voice-wave-path");
1373
+ let activeMode = null;
1374
+ let hasStartedModes = {
1375
+ general: false,
1376
+ guided: false
1377
+ };
1378
+ let isCapturing = false;
1379
+ let micError = null;
1380
+ let waveLevels = createInitialVoiceWaveLevels();
1381
+ let lastInputLevel = 0;
1382
+ let lastAssistantAt = 0;
1383
+ let lastAssistantAudioCount = 0;
1384
+ let lastAssistantTextCount = 0;
1385
+ const syncBargeInOutput = () => {
1386
+ if (!bargeInMonitor) {
1387
+ return;
1388
+ }
1389
+ const voice = currentVoice();
1390
+ const audioCount = voice.assistantAudio.length;
1391
+ const textCount = voice.assistantTexts.length;
1392
+ if (audioCount > lastAssistantAudioCount || textCount > lastAssistantTextCount) {
1393
+ lastAssistantAt = Date.now();
1394
+ }
1395
+ lastAssistantAudioCount = audioCount;
1396
+ lastAssistantTextCount = textCount;
1397
+ };
1398
+ const sendAudioWithBargeInEvidence = (audio, sendAudio) => {
1399
+ syncBargeInOutput();
1400
+ if (bargeInMonitor && Date.now() - lastAssistantAt <= bargeInRecentWindowMs && lastInputLevel >= bargeInSpeechThreshold) {
1401
+ bargeInMonitor.recordRequested({
1402
+ reason: "manual-audio",
1403
+ sessionId: currentVoice().sessionId
1404
+ });
1405
+ bargeInMonitor.recordStopped({
1406
+ latencyMs: 0,
1407
+ playbackStopLatencyMs: 0,
1408
+ reason: "manual-audio",
1409
+ sessionId: currentVoice().sessionId
1410
+ });
1411
+ }
1412
+ sendAudio(audio);
1413
+ };
1282
1414
  const guidedVoice = createVoiceController(guidedPath, {
1283
1415
  capture: {
1416
+ onAudio: sendAudioWithBargeInEvidence,
1284
1417
  onLevel: (level) => {
1418
+ lastInputLevel = level;
1285
1419
  waveLevels = pushVoiceWaveLevel(waveLevels, level);
1286
1420
  renderWave();
1287
1421
  }
@@ -1290,7 +1424,9 @@ var initVoiceHTMXRoot = (root) => {
1290
1424
  });
1291
1425
  const generalVoice = createVoiceController(generalPath, {
1292
1426
  capture: {
1427
+ onAudio: sendAudioWithBargeInEvidence,
1293
1428
  onLevel: (level) => {
1429
+ lastInputLevel = level;
1294
1430
  waveLevels = pushVoiceWaveLevel(waveLevels, level);
1295
1431
  renderWave();
1296
1432
  }
@@ -1299,14 +1435,6 @@ var initVoiceHTMXRoot = (root) => {
1299
1435
  });
1300
1436
  const stopGuidedBinding = guidedVoice.bindHTMX({ element: syncElement });
1301
1437
  const stopGeneralBinding = generalVoice.bindHTMX({ element: syncElement });
1302
- let activeMode = null;
1303
- let hasStartedModes = {
1304
- general: false,
1305
- guided: false
1306
- };
1307
- let isCapturing = false;
1308
- let micError = null;
1309
- let waveLevels = createInitialVoiceWaveLevels();
1310
1438
  const currentVoice = () => activeMode === "general" ? generalVoice : guidedVoice;
1311
1439
  const renderWave = () => {
1312
1440
  const path = createVoiceWavePath(waveLevels);
@@ -1385,8 +1513,14 @@ var initVoiceHTMXRoot = (root) => {
1385
1513
  render();
1386
1514
  }
1387
1515
  };
1388
- guidedVoice.subscribe(render);
1389
- generalVoice.subscribe(render);
1516
+ guidedVoice.subscribe(() => {
1517
+ syncBargeInOutput();
1518
+ render();
1519
+ });
1520
+ generalVoice.subscribe(() => {
1521
+ syncBargeInOutput();
1522
+ render();
1523
+ });
1390
1524
  startGuidedButton.addEventListener("click", () => {
1391
1525
  startMode("guided");
1392
1526
  });
@@ -1446,7 +1446,13 @@ var createVoiceController = (path, options = {}) => {
1446
1446
  capture = createMicrophoneCapture({
1447
1447
  channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
1448
1448
  onLevel: options.capture?.onLevel,
1449
- onAudio: (audio) => stream.sendAudio(audio),
1449
+ onAudio: (audio) => {
1450
+ if (options.capture?.onAudio) {
1451
+ options.capture.onAudio(audio, stream.sendAudio);
1452
+ return;
1453
+ }
1454
+ stream.sendAudio(audio);
1455
+ },
1450
1456
  sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
1451
1457
  });
1452
1458
  return capture;
@@ -3116,7 +3116,13 @@ var createVoiceController = (path, options = {}) => {
3116
3116
  capture = createMicrophoneCapture({
3117
3117
  channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
3118
3118
  onLevel: options.capture?.onLevel,
3119
- onAudio: (audio) => stream.sendAudio(audio),
3119
+ onAudio: (audio) => {
3120
+ if (options.capture?.onAudio) {
3121
+ options.capture.onAudio(audio, stream.sendAudio);
3122
+ return;
3123
+ }
3124
+ stream.sendAudio(audio);
3125
+ },
3120
3126
  sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
3121
3127
  });
3122
3128
  return capture;
@@ -2558,7 +2558,13 @@ var createVoiceController = (path, options = {}) => {
2558
2558
  capture = createMicrophoneCapture({
2559
2559
  channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
2560
2560
  onLevel: options.capture?.onLevel,
2561
- onAudio: (audio) => stream.sendAudio(audio),
2561
+ onAudio: (audio) => {
2562
+ if (options.capture?.onAudio) {
2563
+ options.capture.onAudio(audio, stream.sendAudio);
2564
+ return;
2565
+ }
2566
+ stream.sendAudio(audio);
2567
+ },
2562
2568
  sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
2563
2569
  });
2564
2570
  return capture;
@@ -2948,7 +2948,13 @@ var createVoiceController = (path, options = {}) => {
2948
2948
  capture = createMicrophoneCapture({
2949
2949
  channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
2950
2950
  onLevel: options.capture?.onLevel,
2951
- onAudio: (audio) => stream.sendAudio(audio),
2951
+ onAudio: (audio) => {
2952
+ if (options.capture?.onAudio) {
2953
+ options.capture.onAudio(audio, stream.sendAudio);
2954
+ return;
2955
+ }
2956
+ stream.sendAudio(audio);
2957
+ },
2952
2958
  sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
2953
2959
  });
2954
2960
  return capture;
package/dist/types.d.ts CHANGED
@@ -734,6 +734,7 @@ export type VoiceConnectionOptions = {
734
734
  };
735
735
  export type VoiceCaptureOptions = {
736
736
  channelCount?: 1 | 2;
737
+ onAudio?: (audio: Uint8Array | ArrayBuffer, sendAudio: (audio: Uint8Array | ArrayBuffer) => void) => void;
737
738
  onLevel?: (level: number) => void;
738
739
  sampleRateHz?: number;
739
740
  };
package/dist/vue/index.js CHANGED
@@ -2891,7 +2891,13 @@ var createVoiceController = (path, options = {}) => {
2891
2891
  capture = createMicrophoneCapture({
2892
2892
  channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
2893
2893
  onLevel: options.capture?.onLevel,
2894
- onAudio: (audio) => stream.sendAudio(audio),
2894
+ onAudio: (audio) => {
2895
+ if (options.capture?.onAudio) {
2896
+ options.capture.onAudio(audio, stream.sendAudio);
2897
+ return;
2898
+ }
2899
+ stream.sendAudio(audio);
2900
+ },
2895
2901
  sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
2896
2902
  });
2897
2903
  return capture;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.92",
3
+ "version": "0.0.22-beta.93",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",