@absolutejs/voice 0.0.20 → 0.0.22-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +884 -4
  2. package/dist/angular/index.d.ts +1 -0
  3. package/dist/angular/index.js +759 -3
  4. package/dist/angular/voice-controller.service.d.ts +27 -0
  5. package/dist/angular/voice-stream.service.d.ts +6 -0
  6. package/dist/audioConditioning.d.ts +3 -0
  7. package/dist/client/actions.d.ts +48 -0
  8. package/dist/client/audioPlayer.d.ts +40 -0
  9. package/dist/client/connection.d.ts +5 -0
  10. package/dist/client/controller.d.ts +2 -0
  11. package/dist/client/duplex.d.ts +3 -0
  12. package/dist/client/htmxBootstrap.js +660 -167
  13. package/dist/client/index.d.ts +3 -0
  14. package/dist/client/index.js +991 -6
  15. package/dist/client/microphone.d.ts +4 -2
  16. package/dist/correction.d.ts +33 -0
  17. package/dist/fileStore.d.ts +27 -0
  18. package/dist/index.d.ts +15 -0
  19. package/dist/index.js +3721 -298
  20. package/dist/ops.d.ts +100 -0
  21. package/dist/presets.d.ts +13 -0
  22. package/dist/react/index.d.ts +1 -0
  23. package/dist/react/index.js +728 -3
  24. package/dist/react/useVoiceController.d.ts +26 -0
  25. package/dist/react/useVoiceStream.d.ts +7 -0
  26. package/dist/routing.d.ts +3 -0
  27. package/dist/runtimeOps.d.ts +23 -0
  28. package/dist/store.d.ts +2 -2
  29. package/dist/svelte/index.d.ts +1 -0
  30. package/dist/svelte/index.js +691 -3
  31. package/dist/telephony/response.d.ts +7 -0
  32. package/dist/telephony/twilio.d.ts +116 -0
  33. package/dist/testing/benchmark.d.ts +93 -2
  34. package/dist/testing/corrected.d.ts +41 -0
  35. package/dist/testing/duplex.d.ts +59 -0
  36. package/dist/testing/fixtures.d.ts +18 -2
  37. package/dist/testing/index.d.ts +5 -0
  38. package/dist/testing/index.js +6247 -402
  39. package/dist/testing/review.d.ts +143 -0
  40. package/dist/testing/sessionBenchmark.d.ts +92 -2
  41. package/dist/testing/stt.d.ts +3 -1
  42. package/dist/testing/telephony.d.ts +70 -0
  43. package/dist/testing/tts.d.ts +73 -0
  44. package/dist/turnDetection.d.ts +5 -1
  45. package/dist/turnProfiles.d.ts +6 -0
  46. package/dist/types.d.ts +487 -10
  47. package/dist/vue/index.d.ts +1 -0
  48. package/dist/vue/index.js +750 -3
  49. package/dist/vue/useVoiceController.d.ts +30 -0
  50. package/dist/vue/useVoiceStream.d.ts +11 -0
  51. package/fixtures/README.md +9 -0
  52. package/fixtures/manifest.json +59 -1
  53. package/fixtures/pcm/dialogue-three-clean.pcm +0 -0
  54. package/fixtures/pcm/dialogue-three-mixed.pcm +0 -0
  55. package/fixtures/pcm/dialogue-two-clean.pcm +0 -0
  56. package/fixtures/pcm/dialogue-two-noisy.pcm +0 -0
  57. package/package.json +135 -1
@@ -1,3 +1,145 @@
1
+ // src/client/htmx.ts
2
+ var DEFAULT_EVENT_NAME = "voice-refresh";
3
+ var DEFAULT_QUERY_PARAM = "sessionId";
4
+ var resolveElement = (input) => {
5
+ if (typeof input !== "string") {
6
+ return input;
7
+ }
8
+ return document.querySelector(input);
9
+ };
10
+ var buildRoute = (element, route, queryParam, sessionId) => {
11
+ const baseRoute = route ?? element.getAttribute("hx-get") ?? "";
12
+ if (!baseRoute) {
13
+ return "";
14
+ }
15
+ const url = new URL(baseRoute, window.location.origin);
16
+ if (sessionId) {
17
+ url.searchParams.set(queryParam, sessionId);
18
+ } else {
19
+ url.searchParams.delete(queryParam);
20
+ }
21
+ return `${url.pathname}${url.search}${url.hash}`;
22
+ };
23
+ var bindVoiceHTMX = (stream, options) => {
24
+ if (typeof window === "undefined" || typeof document === "undefined") {
25
+ return () => {};
26
+ }
27
+ const element = resolveElement(options.element);
28
+ if (!element) {
29
+ return () => {};
30
+ }
31
+ const eventName = options.eventName ?? DEFAULT_EVENT_NAME;
32
+ const queryParam = options.sessionQueryParam ?? DEFAULT_QUERY_PARAM;
33
+ const sync = () => {
34
+ const htmxWindow = window;
35
+ const nextRoute = buildRoute(element, options.route, queryParam, stream.sessionId);
36
+ if (nextRoute) {
37
+ element.setAttribute("hx-get", nextRoute);
38
+ }
39
+ htmxWindow.htmx?.process?.(element);
40
+ htmxWindow.htmx?.trigger?.(element, eventName);
41
+ };
42
+ const unsubscribe = stream.subscribe(sync);
43
+ sync();
44
+ return () => {
45
+ unsubscribe();
46
+ };
47
+ };
48
+
49
+ // src/client/microphone.ts
50
+ var clampSample = (value) => Math.max(-1, Math.min(1, value));
51
+ var floatTo16BitPCM = (input) => {
52
+ const output = new Int16Array(input.length);
53
+ for (let index = 0;index < input.length; index += 1) {
54
+ const sample = clampSample(input[index] ?? 0);
55
+ output[index] = sample < 0 ? sample * 32768 : sample * 32767;
56
+ }
57
+ return new Uint8Array(output.buffer);
58
+ };
59
+ var getPcmLevel = (audio) => {
60
+ const bytes = audio instanceof Uint8Array ? audio : new Uint8Array(audio);
61
+ if (bytes.byteLength < 2) {
62
+ return 0;
63
+ }
64
+ const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
65
+ if (samples.length === 0) {
66
+ return 0;
67
+ }
68
+ let sumSquares = 0;
69
+ for (const sample of samples) {
70
+ const normalized = sample / 32768;
71
+ sumSquares += normalized * normalized;
72
+ }
73
+ return Math.min(1, Math.max(0, Math.sqrt(sumSquares / samples.length) * 5.5));
74
+ };
75
+ var downsampleBuffer = (input, sourceRate, targetRate) => {
76
+ if (sourceRate === targetRate) {
77
+ return input;
78
+ }
79
+ const ratio = sourceRate / targetRate;
80
+ const length = Math.round(input.length / ratio);
81
+ const output = new Float32Array(length);
82
+ let offsetResult = 0;
83
+ let offsetBuffer = 0;
84
+ while (offsetResult < output.length) {
85
+ const nextOffsetBuffer = Math.round((offsetResult + 1) * ratio);
86
+ let accum = 0;
87
+ let count = 0;
88
+ for (let index = offsetBuffer;index < nextOffsetBuffer && index < input.length; index += 1) {
89
+ accum += input[index] ?? 0;
90
+ count += 1;
91
+ }
92
+ output[offsetResult] = count > 0 ? accum / count : 0;
93
+ offsetResult += 1;
94
+ offsetBuffer = nextOffsetBuffer;
95
+ }
96
+ return output;
97
+ };
98
+ var createMicrophoneCapture = (options) => {
99
+ let audioContext = null;
100
+ let sourceNode = null;
101
+ let processorNode = null;
102
+ let mediaStream = null;
103
+ const start = async () => {
104
+ if (typeof navigator === "undefined" || !navigator.mediaDevices?.getUserMedia) {
105
+ throw new Error("Browser microphone capture requires navigator.mediaDevices.getUserMedia.");
106
+ }
107
+ const AudioContextCtor = (typeof window !== "undefined" ? window.AudioContext ?? window.webkitAudioContext : undefined) ?? AudioContext;
108
+ if (!AudioContextCtor) {
109
+ throw new Error("Browser microphone capture requires AudioContext support.");
110
+ }
111
+ mediaStream = await navigator.mediaDevices.getUserMedia({
112
+ audio: {
113
+ channelCount: options.channelCount ?? 1
114
+ }
115
+ });
116
+ audioContext = new AudioContextCtor;
117
+ sourceNode = audioContext.createMediaStreamSource(mediaStream);
118
+ processorNode = audioContext.createScriptProcessor(4096, 1, 1);
119
+ processorNode.onaudioprocess = (event) => {
120
+ const channel = event.inputBuffer.getChannelData(0);
121
+ const downsampled = downsampleBuffer(channel, audioContext?.sampleRate ?? 48000, options.sampleRateHz ?? 16000);
122
+ const pcm = floatTo16BitPCM(downsampled);
123
+ options.onLevel?.(getPcmLevel(pcm));
124
+ options.onAudio(pcm);
125
+ };
126
+ sourceNode.connect(processorNode);
127
+ processorNode.connect(audioContext.destination);
128
+ };
129
+ const stop = () => {
130
+ processorNode?.disconnect();
131
+ sourceNode?.disconnect();
132
+ mediaStream?.getTracks().forEach((track) => track.stop());
133
+ audioContext?.close();
134
+ options.onLevel?.(0);
135
+ audioContext = null;
136
+ mediaStream = null;
137
+ processorNode = null;
138
+ sourceNode = null;
139
+ };
140
+ return { start, stop };
141
+ };
142
+
1
143
  // src/client/actions.ts
2
144
  var normalizeErrorMessage = (value) => {
3
145
  if (typeof value === "string" && value.trim()) {
@@ -28,6 +170,14 @@ var normalizeErrorMessage = (value) => {
28
170
  };
29
171
  var serverMessageToAction = (message) => {
30
172
  switch (message.type) {
173
+ case "audio":
174
+ return {
175
+ chunk: Uint8Array.from(atob(message.chunkBase64), (char) => char.charCodeAt(0)),
176
+ format: message.format,
177
+ receivedAt: message.receivedAt,
178
+ turnId: message.turnId,
179
+ type: "audio"
180
+ };
31
181
  case "assistant":
32
182
  return {
33
183
  text: message.text,
@@ -56,6 +206,7 @@ var serverMessageToAction = (message) => {
56
206
  case "session":
57
207
  return {
58
208
  sessionId: message.sessionId,
209
+ scenarioId: message.scenarioId,
59
210
  status: message.status,
60
211
  type: "session"
61
212
  };
@@ -76,24 +227,30 @@ var WS_NORMAL_CLOSURE = 1000;
76
227
  var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
77
228
  var DEFAULT_PING_INTERVAL = 30000;
78
229
  var RECONNECT_DELAY_MS = 500;
230
+ var DEFAULT_SCENARIO_QUERY_PARAM = "scenarioId";
79
231
  var noop = () => {};
80
232
  var noopUnsubscribe = () => noop;
81
233
  var NOOP_CONNECTION = {
234
+ start: () => {},
82
235
  close: noop,
83
236
  endTurn: noop,
84
237
  getReadyState: () => WS_CLOSED,
238
+ getScenarioId: () => "",
85
239
  getSessionId: () => "",
86
240
  send: noop,
87
241
  sendAudio: noop,
88
242
  subscribe: noopUnsubscribe
89
243
  };
90
244
  var createSessionId = () => crypto.randomUUID();
91
- var buildWsUrl = (path, sessionId) => {
245
+ var buildWsUrl = (path, sessionId, scenarioId) => {
92
246
  const { hostname, port, protocol } = window.location;
93
247
  const wsProtocol = protocol === "https:" ? "wss:" : "ws:";
94
248
  const portSuffix = port ? `:${port}` : "";
95
249
  const url = new URL(`${wsProtocol}//${hostname}${portSuffix}${path}`);
96
250
  url.searchParams.set("sessionId", sessionId);
251
+ if (scenarioId) {
252
+ url.searchParams.set(DEFAULT_SCENARIO_QUERY_PARAM, scenarioId);
253
+ }
97
254
  return url.toString();
98
255
  };
99
256
  var isVoiceServerMessage = (value) => {
@@ -101,6 +258,7 @@ var isVoiceServerMessage = (value) => {
101
258
  return false;
102
259
  }
103
260
  switch (value.type) {
261
+ case "audio":
104
262
  case "assistant":
105
263
  case "complete":
106
264
  case "error":
@@ -136,6 +294,7 @@ var createVoiceConnection = (path, options = {}) => {
136
294
  const state = {
137
295
  isConnected: false,
138
296
  pendingMessages: [],
297
+ scenarioId: options.scenarioId ?? null,
139
298
  pingInterval: null,
140
299
  reconnectAttempts: 0,
141
300
  reconnectTimeout: null,
@@ -173,13 +332,14 @@ var createVoiceConnection = (path, options = {}) => {
173
332
  }, RECONNECT_DELAY_MS);
174
333
  };
175
334
  const connect = () => {
176
- const ws = new WebSocket(buildWsUrl(path, state.sessionId));
335
+ const ws = new WebSocket(buildWsUrl(path, state.sessionId, state.scenarioId));
177
336
  ws.binaryType = "arraybuffer";
178
337
  ws.onopen = () => {
179
338
  state.isConnected = true;
180
339
  state.reconnectAttempts = 0;
181
340
  flushPendingMessages();
182
341
  listeners.forEach((listener) => listener({
342
+ scenarioId: state.scenarioId ?? undefined,
183
343
  sessionId: state.sessionId,
184
344
  status: "active",
185
345
  type: "session"
@@ -197,6 +357,7 @@ var createVoiceConnection = (path, options = {}) => {
197
357
  }
198
358
  if (parsed.type === "session") {
199
359
  state.sessionId = parsed.sessionId;
360
+ state.scenarioId = parsed.scenarioId ?? state.scenarioId;
200
361
  }
201
362
  listeners.forEach((listener) => listener(parsed));
202
363
  };
@@ -220,6 +381,19 @@ var createVoiceConnection = (path, options = {}) => {
220
381
  const send = (message) => {
221
382
  sendSerialized(JSON.stringify(message));
222
383
  };
384
+ const start = (input = {}) => {
385
+ if (input.sessionId) {
386
+ state.sessionId = input.sessionId;
387
+ }
388
+ if (input.scenarioId) {
389
+ state.scenarioId = input.scenarioId;
390
+ }
391
+ send({
392
+ type: "start",
393
+ sessionId: state.sessionId,
394
+ scenarioId: state.scenarioId ?? undefined
395
+ });
396
+ };
223
397
  const sendAudio = (audio) => {
224
398
  sendSerialized(audio);
225
399
  };
@@ -243,9 +417,11 @@ var createVoiceConnection = (path, options = {}) => {
243
417
  };
244
418
  connect();
245
419
  return {
420
+ start,
246
421
  close,
247
422
  endTurn,
248
423
  getReadyState: () => state.ws?.readyState ?? WS_CLOSED,
424
+ getScenarioId: () => state.scenarioId ?? "",
249
425
  getSessionId: () => state.sessionId,
250
426
  send,
251
427
  sendAudio,
@@ -255,9 +431,11 @@ var createVoiceConnection = (path, options = {}) => {
255
431
 
256
432
  // src/client/store.ts
257
433
  var createInitialState = () => ({
434
+ assistantAudio: [],
258
435
  assistantTexts: [],
259
436
  error: null,
260
437
  isConnected: false,
438
+ scenarioId: null,
261
439
  partial: "",
262
440
  sessionId: null,
263
441
  status: "idle",
@@ -271,6 +449,20 @@ var createVoiceStreamStore = () => {
271
449
  };
272
450
  const dispatch = (action) => {
273
451
  switch (action.type) {
452
+ case "audio":
453
+ state = {
454
+ ...state,
455
+ assistantAudio: [
456
+ ...state.assistantAudio,
457
+ {
458
+ chunk: action.chunk,
459
+ format: action.format,
460
+ receivedAt: action.receivedAt,
461
+ turnId: action.turnId
462
+ }
463
+ ]
464
+ };
465
+ break;
274
466
  case "assistant":
275
467
  state = {
276
468
  ...state,
@@ -319,6 +511,7 @@ var createVoiceStreamStore = () => {
319
511
  state = {
320
512
  ...state,
321
513
  error: null,
514
+ scenarioId: action.scenarioId ?? state.scenarioId,
322
515
  isConnected: action.status === "active",
323
516
  sessionId: action.sessionId,
324
517
  status: action.status
@@ -352,6 +545,12 @@ var createVoiceStream = (path, options = {}) => {
352
545
  const connection = createVoiceConnection(path, options);
353
546
  const store = createVoiceStreamStore();
354
547
  const subscribers = new Set;
548
+ const start = (input) => Promise.resolve().then(() => {
549
+ if (!input?.sessionId && !input?.scenarioId) {
550
+ return;
551
+ }
552
+ connection.start(input);
553
+ });
355
554
  const notify = () => {
356
555
  subscribers.forEach((subscriber) => subscriber());
357
556
  };
@@ -384,6 +583,10 @@ var createVoiceStream = (path, options = {}) => {
384
583
  get isConnected() {
385
584
  return store.getSnapshot().isConnected;
386
585
  },
586
+ get scenarioId() {
587
+ return store.getSnapshot().scenarioId;
588
+ },
589
+ start,
387
590
  get partial() {
388
591
  return store.getSnapshot().partial;
389
592
  },
@@ -399,6 +602,9 @@ var createVoiceStream = (path, options = {}) => {
399
602
  get assistantTexts() {
400
603
  return store.getSnapshot().assistantTexts;
401
604
  },
605
+ get assistantAudio() {
606
+ return store.getSnapshot().assistantAudio;
607
+ },
402
608
  sendAudio(audio) {
403
609
  connection.sendAudio(audio);
404
610
  },
@@ -411,127 +617,449 @@ var createVoiceStream = (path, options = {}) => {
411
617
  };
412
618
  };
413
619
 
414
- // src/client/htmx.ts
415
- var DEFAULT_EVENT_NAME = "voice-refresh";
416
- var DEFAULT_QUERY_PARAM = "sessionId";
417
- var resolveElement = (input) => {
418
- if (typeof input !== "string") {
419
- return input;
620
+ // src/audioConditioning.ts
621
+ var DEFAULT_TARGET_LEVEL = 0.08;
622
+ var DEFAULT_MAX_GAIN = 3;
623
+ var DEFAULT_NOISE_GATE_THRESHOLD = 0.006;
624
+ var DEFAULT_NOISE_GATE_ATTENUATION = 0.15;
625
+ var resolveAudioConditioningConfig = (config) => {
626
+ if (!config || config.enabled === false) {
627
+ return;
420
628
  }
421
- return document.querySelector(input);
629
+ return {
630
+ enabled: true,
631
+ maxGain: config.maxGain ?? DEFAULT_MAX_GAIN,
632
+ noiseGateAttenuation: config.noiseGateAttenuation ?? DEFAULT_NOISE_GATE_ATTENUATION,
633
+ noiseGateThreshold: config.noiseGateThreshold ?? DEFAULT_NOISE_GATE_THRESHOLD,
634
+ targetLevel: config.targetLevel ?? DEFAULT_TARGET_LEVEL
635
+ };
422
636
  };
423
- var buildRoute = (element, route, queryParam, sessionId) => {
424
- const baseRoute = route ?? element.getAttribute("hx-get") ?? "";
425
- if (!baseRoute) {
426
- return "";
427
- }
428
- const url = new URL(baseRoute, window.location.origin);
429
- if (sessionId) {
430
- url.searchParams.set(queryParam, sessionId);
431
- } else {
432
- url.searchParams.delete(queryParam);
637
+
638
+ // src/turnProfiles.ts
639
+ var TURN_PROFILE_DEFAULTS = {
640
+ balanced: {
641
+ qualityProfile: "general",
642
+ silenceMs: 1400,
643
+ speechThreshold: 0.012,
644
+ transcriptStabilityMs: 1000
645
+ },
646
+ fast: {
647
+ qualityProfile: "general",
648
+ silenceMs: 700,
649
+ speechThreshold: 0.015,
650
+ transcriptStabilityMs: 450
651
+ },
652
+ "long-form": {
653
+ qualityProfile: "general",
654
+ silenceMs: 2200,
655
+ speechThreshold: 0.01,
656
+ transcriptStabilityMs: 1500
433
657
  }
434
- return `${url.pathname}${url.search}${url.hash}`;
435
658
  };
436
- var bindVoiceHTMX = (stream, options) => {
437
- if (typeof window === "undefined" || typeof document === "undefined") {
438
- return () => {};
439
- }
440
- const element = resolveElement(options.element);
441
- if (!element) {
442
- return () => {};
659
+ var QUALITY_PROFILE_DEFAULTS = {
660
+ general: {},
661
+ "accent-heavy": {
662
+ silenceMs: 1200,
663
+ speechThreshold: 0.01,
664
+ transcriptStabilityMs: 1200
665
+ },
666
+ "noisy-room": {
667
+ silenceMs: 2000,
668
+ speechThreshold: 0.02,
669
+ transcriptStabilityMs: 1600
670
+ },
671
+ "short-command": {
672
+ silenceMs: 500,
673
+ speechThreshold: 0.016,
674
+ transcriptStabilityMs: 420
443
675
  }
444
- const eventName = options.eventName ?? DEFAULT_EVENT_NAME;
445
- const queryParam = options.sessionQueryParam ?? DEFAULT_QUERY_PARAM;
446
- const sync = () => {
447
- const htmxWindow = window;
448
- const nextRoute = buildRoute(element, options.route, queryParam, stream.sessionId);
449
- if (nextRoute) {
450
- element.setAttribute("hx-get", nextRoute);
451
- }
452
- htmxWindow.htmx?.process?.(element);
453
- htmxWindow.htmx?.trigger?.(element, eventName);
454
- };
455
- const unsubscribe = stream.subscribe(sync);
456
- sync();
457
- return () => {
458
- unsubscribe();
676
+ };
677
+ var DEFAULT_TURN_PROFILE = "fast";
678
+ var DEFAULT_QUALITY_PROFILE = "general";
679
+ var resolveTurnDetectionConfig = (config) => {
680
+ const profile = config?.profile ?? DEFAULT_TURN_PROFILE;
681
+ const qualityProfile = config?.qualityProfile ?? DEFAULT_QUALITY_PROFILE;
682
+ const preset = TURN_PROFILE_DEFAULTS[profile];
683
+ const quality = QUALITY_PROFILE_DEFAULTS[qualityProfile];
684
+ return {
685
+ profile,
686
+ qualityProfile,
687
+ silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
688
+ speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
689
+ transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
459
690
  };
460
691
  };
461
692
 
462
- // src/client/microphone.ts
463
- var clampSample = (value) => Math.max(-1, Math.min(1, value));
464
- var floatTo16BitPCM = (input) => {
465
- const output = new Int16Array(input.length);
466
- for (let index = 0;index < input.length; index += 1) {
467
- const sample = clampSample(input[index] ?? 0);
468
- output[index] = sample < 0 ? sample * 32768 : sample * 32767;
469
- }
470
- return new Uint8Array(output.buffer);
471
- };
472
- var downsampleBuffer = (input, sourceRate, targetRate) => {
473
- if (sourceRate === targetRate) {
474
- return input;
475
- }
476
- const ratio = sourceRate / targetRate;
477
- const length = Math.round(input.length / ratio);
478
- const output = new Float32Array(length);
479
- let offsetResult = 0;
480
- let offsetBuffer = 0;
481
- while (offsetResult < output.length) {
482
- const nextOffsetBuffer = Math.round((offsetResult + 1) * ratio);
483
- let accum = 0;
484
- let count = 0;
485
- for (let index = offsetBuffer;index < nextOffsetBuffer && index < input.length; index += 1) {
486
- accum += input[index] ?? 0;
487
- count += 1;
693
+ // src/presets.ts
694
+ var PRESET_INPUTS = {
695
+ chat: {
696
+ audioConditioning: {
697
+ enabled: true,
698
+ maxGain: 2.5,
699
+ noiseGateAttenuation: 0,
700
+ noiseGateThreshold: 0.004,
701
+ targetLevel: 0.08
702
+ },
703
+ capture: {
704
+ channelCount: 1,
705
+ sampleRateHz: 16000
706
+ },
707
+ connection: {
708
+ maxReconnectAttempts: 10,
709
+ pingInterval: 30000,
710
+ reconnect: true
711
+ },
712
+ sttLifecycle: "continuous",
713
+ turnDetection: {
714
+ qualityProfile: "short-command",
715
+ profile: "balanced"
716
+ }
717
+ },
718
+ default: {
719
+ capture: {
720
+ channelCount: 1,
721
+ sampleRateHz: 16000
722
+ },
723
+ connection: {
724
+ maxReconnectAttempts: 10,
725
+ pingInterval: 30000,
726
+ reconnect: true
727
+ },
728
+ sttLifecycle: "continuous",
729
+ turnDetection: {
730
+ qualityProfile: "general",
731
+ profile: "fast"
732
+ }
733
+ },
734
+ dictation: {
735
+ audioConditioning: {
736
+ enabled: true,
737
+ maxGain: 2.25,
738
+ noiseGateAttenuation: 0.05,
739
+ noiseGateThreshold: 0.003,
740
+ targetLevel: 0.08
741
+ },
742
+ capture: {
743
+ channelCount: 1,
744
+ sampleRateHz: 16000
745
+ },
746
+ connection: {
747
+ maxReconnectAttempts: 12,
748
+ pingInterval: 30000,
749
+ reconnect: true
750
+ },
751
+ sttLifecycle: "continuous",
752
+ turnDetection: {
753
+ qualityProfile: "accent-heavy",
754
+ profile: "long-form"
755
+ }
756
+ },
757
+ "guided-intake": {
758
+ audioConditioning: {
759
+ enabled: true,
760
+ maxGain: 2.5,
761
+ noiseGateAttenuation: 0,
762
+ noiseGateThreshold: 0.004,
763
+ targetLevel: 0.08
764
+ },
765
+ capture: {
766
+ channelCount: 1,
767
+ sampleRateHz: 16000
768
+ },
769
+ connection: {
770
+ maxReconnectAttempts: 12,
771
+ pingInterval: 30000,
772
+ reconnect: true
773
+ },
774
+ sttLifecycle: "turn-scoped",
775
+ turnDetection: {
776
+ qualityProfile: "accent-heavy",
777
+ profile: "long-form"
778
+ }
779
+ },
780
+ "noisy-room": {
781
+ audioConditioning: {
782
+ enabled: true,
783
+ maxGain: 3,
784
+ noiseGateAttenuation: 0.12,
785
+ noiseGateThreshold: 0.006,
786
+ targetLevel: 0.085
787
+ },
788
+ capture: {
789
+ channelCount: 1,
790
+ sampleRateHz: 16000
791
+ },
792
+ connection: {
793
+ maxReconnectAttempts: 14,
794
+ pingInterval: 45000,
795
+ reconnect: true
796
+ },
797
+ sttLifecycle: "continuous",
798
+ turnDetection: {
799
+ qualityProfile: "noisy-room",
800
+ profile: "long-form",
801
+ silenceMs: 2100,
802
+ speechThreshold: 0.02,
803
+ transcriptStabilityMs: 1650
804
+ }
805
+ },
806
+ "pstn-balanced": {
807
+ audioConditioning: {
808
+ enabled: true,
809
+ maxGain: 2.8,
810
+ noiseGateAttenuation: 0.07,
811
+ noiseGateThreshold: 0.005,
812
+ targetLevel: 0.08
813
+ },
814
+ capture: {
815
+ channelCount: 1,
816
+ sampleRateHz: 16000
817
+ },
818
+ connection: {
819
+ maxReconnectAttempts: 14,
820
+ pingInterval: 45000,
821
+ reconnect: true
822
+ },
823
+ sttLifecycle: "continuous",
824
+ turnDetection: {
825
+ qualityProfile: "noisy-room",
826
+ profile: "long-form",
827
+ silenceMs: 660,
828
+ speechThreshold: 0.012,
829
+ transcriptStabilityMs: 300
830
+ }
831
+ },
832
+ "pstn-fast": {
833
+ audioConditioning: {
834
+ enabled: true,
835
+ maxGain: 2.75,
836
+ noiseGateAttenuation: 0.06,
837
+ noiseGateThreshold: 0.005,
838
+ targetLevel: 0.08
839
+ },
840
+ capture: {
841
+ channelCount: 1,
842
+ sampleRateHz: 16000
843
+ },
844
+ connection: {
845
+ maxReconnectAttempts: 14,
846
+ pingInterval: 45000,
847
+ reconnect: true
848
+ },
849
+ sttLifecycle: "continuous",
850
+ turnDetection: {
851
+ qualityProfile: "noisy-room",
852
+ profile: "long-form",
853
+ silenceMs: 620,
854
+ speechThreshold: 0.012,
855
+ transcriptStabilityMs: 280
856
+ }
857
+ },
858
+ reliability: {
859
+ audioConditioning: {
860
+ enabled: true,
861
+ maxGain: 2.9,
862
+ noiseGateAttenuation: 0.08,
863
+ noiseGateThreshold: 0.005,
864
+ targetLevel: 0.08
865
+ },
866
+ capture: {
867
+ channelCount: 1,
868
+ sampleRateHz: 16000
869
+ },
870
+ connection: {
871
+ maxReconnectAttempts: 14,
872
+ pingInterval: 45000,
873
+ reconnect: true
874
+ },
875
+ sttLifecycle: "continuous",
876
+ turnDetection: {
877
+ qualityProfile: "noisy-room",
878
+ profile: "long-form"
488
879
  }
489
- output[offsetResult] = count > 0 ? accum / count : 0;
490
- offsetResult += 1;
491
- offsetBuffer = nextOffsetBuffer;
492
880
  }
493
- return output;
494
881
  };
495
- var createMicrophoneCapture = (options) => {
496
- let audioContext = null;
497
- let sourceNode = null;
498
- let processorNode = null;
499
- let mediaStream = null;
500
- const start = async () => {
501
- if (typeof navigator === "undefined" || !navigator.mediaDevices?.getUserMedia) {
502
- throw new Error("Browser microphone capture requires navigator.mediaDevices.getUserMedia.");
882
+ var resolveVoiceRuntimePreset = (name = "default") => {
883
+ const preset = PRESET_INPUTS[name];
884
+ return {
885
+ audioConditioning: resolveAudioConditioningConfig(preset.audioConditioning),
886
+ capture: {
887
+ channelCount: preset.capture?.channelCount ?? 1,
888
+ sampleRateHz: preset.capture?.sampleRateHz ?? 16000
889
+ },
890
+ connection: {
891
+ ...preset.connection
892
+ },
893
+ name,
894
+ sttLifecycle: preset.sttLifecycle ?? "continuous",
895
+ turnDetection: resolveTurnDetectionConfig(preset.turnDetection)
896
+ };
897
+ };
898
+
899
+ // src/client/controller.ts
900
+ var createInitialState2 = (stream) => ({
901
+ assistantAudio: [...stream.assistantAudio],
902
+ assistantTexts: [...stream.assistantTexts],
903
+ error: stream.error,
904
+ isConnected: stream.isConnected,
905
+ isRecording: false,
906
+ partial: stream.partial,
907
+ recordingError: null,
908
+ sessionId: stream.sessionId,
909
+ scenarioId: stream.scenarioId,
910
+ status: stream.status,
911
+ turns: [...stream.turns]
912
+ });
913
+ var createVoiceController = (path, options = {}) => {
914
+ const preset = resolveVoiceRuntimePreset(options.preset);
915
+ const stream = createVoiceStream(path, {
916
+ ...preset.connection,
917
+ ...options.connection
918
+ });
919
+ let capture = null;
920
+ let state = createInitialState2(stream);
921
+ const subscribers = new Set;
922
+ const notify = () => {
923
+ for (const subscriber of subscribers) {
924
+ subscriber();
503
925
  }
504
- const AudioContextCtor = (typeof window !== "undefined" ? window.AudioContext ?? window.webkitAudioContext : undefined) ?? AudioContext;
505
- if (!AudioContextCtor) {
506
- throw new Error("Browser microphone capture requires AudioContext support.");
926
+ };
927
+ const sync = () => {
928
+ state = {
929
+ ...state,
930
+ assistantAudio: [...stream.assistantAudio],
931
+ assistantTexts: [...stream.assistantTexts],
932
+ error: stream.error,
933
+ isConnected: stream.isConnected,
934
+ partial: stream.partial,
935
+ sessionId: stream.sessionId,
936
+ scenarioId: stream.scenarioId,
937
+ status: stream.status,
938
+ turns: [...stream.turns]
939
+ };
940
+ if (options.autoStopOnComplete !== false && state.status === "completed" && state.isRecording) {
941
+ capture?.stop();
942
+ capture = null;
943
+ state = {
944
+ ...state,
945
+ isRecording: false
946
+ };
507
947
  }
508
- mediaStream = await navigator.mediaDevices.getUserMedia({
509
- audio: {
510
- channelCount: options.channelCount ?? 1
511
- }
948
+ notify();
949
+ };
950
+ const unsubscribeStream = stream.subscribe(sync);
951
+ sync();
952
+ const ensureCapture = () => {
953
+ if (capture) {
954
+ return capture;
955
+ }
956
+ capture = createMicrophoneCapture({
957
+ channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
958
+ onLevel: options.capture?.onLevel,
959
+ onAudio: (audio) => stream.sendAudio(audio),
960
+ sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
512
961
  });
513
- audioContext = new AudioContextCtor;
514
- sourceNode = audioContext.createMediaStreamSource(mediaStream);
515
- processorNode = audioContext.createScriptProcessor(4096, 1, 1);
516
- processorNode.onaudioprocess = (event) => {
517
- const channel = event.inputBuffer.getChannelData(0);
518
- const downsampled = downsampleBuffer(channel, audioContext?.sampleRate ?? 48000, options.sampleRateHz ?? 16000);
519
- options.onAudio(floatTo16BitPCM(downsampled));
962
+ return capture;
963
+ };
964
+ const stopRecording = () => {
965
+ capture?.stop();
966
+ capture = null;
967
+ state = {
968
+ ...state,
969
+ isRecording: false
520
970
  };
521
- sourceNode.connect(processorNode);
522
- processorNode.connect(audioContext.destination);
971
+ notify();
523
972
  };
524
- const stop = () => {
525
- processorNode?.disconnect();
526
- sourceNode?.disconnect();
527
- mediaStream?.getTracks().forEach((track) => track.stop());
528
- audioContext?.close();
529
- audioContext = null;
530
- mediaStream = null;
531
- processorNode = null;
532
- sourceNode = null;
973
+ const startRecording = async () => {
974
+ if (state.isRecording) {
975
+ return;
976
+ }
977
+ try {
978
+ state = {
979
+ ...state,
980
+ recordingError: null
981
+ };
982
+ notify();
983
+ await ensureCapture().start();
984
+ state = {
985
+ ...state,
986
+ isRecording: true
987
+ };
988
+ notify();
989
+ } catch (error) {
990
+ capture = null;
991
+ state = {
992
+ ...state,
993
+ isRecording: false,
994
+ recordingError: error instanceof Error ? error.message : String(error)
995
+ };
996
+ notify();
997
+ throw error;
998
+ }
999
+ };
1000
+ const close = () => {
1001
+ unsubscribeStream();
1002
+ stopRecording();
1003
+ stream.close();
1004
+ };
1005
+ return {
1006
+ bindHTMX(bindingOptions) {
1007
+ return bindVoiceHTMX(stream, bindingOptions);
1008
+ },
1009
+ close,
1010
+ endTurn: () => stream.endTurn(),
1011
+ get error() {
1012
+ return state.error;
1013
+ },
1014
+ getServerSnapshot: () => state,
1015
+ getSnapshot: () => state,
1016
+ get isConnected() {
1017
+ return state.isConnected;
1018
+ },
1019
+ get isRecording() {
1020
+ return state.isRecording;
1021
+ },
1022
+ get partial() {
1023
+ return state.partial;
1024
+ },
1025
+ get recordingError() {
1026
+ return state.recordingError;
1027
+ },
1028
+ sendAudio: (audio) => stream.sendAudio(audio),
1029
+ get sessionId() {
1030
+ return state.sessionId;
1031
+ },
1032
+ get scenarioId() {
1033
+ return state.scenarioId;
1034
+ },
1035
+ startRecording,
1036
+ get status() {
1037
+ return state.status;
1038
+ },
1039
+ stopRecording,
1040
+ subscribe: (subscriber) => {
1041
+ subscribers.add(subscriber);
1042
+ return () => {
1043
+ subscribers.delete(subscriber);
1044
+ };
1045
+ },
1046
+ toggleRecording: async () => {
1047
+ if (state.isRecording) {
1048
+ stopRecording();
1049
+ return;
1050
+ }
1051
+ await startRecording();
1052
+ },
1053
+ get turns() {
1054
+ return state.turns;
1055
+ },
1056
+ get assistantTexts() {
1057
+ return state.assistantTexts;
1058
+ },
1059
+ get assistantAudio() {
1060
+ return state.assistantAudio;
1061
+ }
533
1062
  };
534
- return { start, stop };
535
1063
  };
536
1064
 
537
1065
  // src/client/htmxBootstrap.ts
@@ -540,10 +1068,10 @@ var VOICE_WAVE_WIDTH = 320;
540
1068
  var VOICE_WAVE_HEIGHT = 88;
541
1069
  var DEFAULT_GUIDED_LABEL = "Guided test";
542
1070
  var DEFAULT_GENERAL_LABEL = "General recording";
543
- var DEFAULT_IDLE_LEAD = "Pick a mode to begin the demo.";
1071
+ var DEFAULT_IDLE_LEAD = "Pick a scenario to begin the demo.";
544
1072
  var DEFAULT_GUIDED_LEAD = "I can walk you through a short guided voice test.";
545
1073
  var DEFAULT_GENERAL_LEAD = "I can capture one freeform recording and confirm that it landed.";
546
- var DEFAULT_IDLE_PROMPT = "Choose a mode to begin. Guided test asks follow-up prompts. General recording just captures what you say.";
1074
+ var DEFAULT_IDLE_PROMPT = "Choose a scenario to begin. Guided test asks follow-up prompts. General recording just captures what you say.";
547
1075
  var DEFAULT_GENERAL_IDLE_PROMPT = "Click Start general recording to capture one freeform answer.";
548
1076
  var DEFAULT_GENERAL_LIVE_PROMPT = "Speak freely. When you pause, the recording will be captured.";
549
1077
  var DEFAULT_GENERAL_COMPLETE_PROMPT = "Recording saved. Start again if you want another capture.";
@@ -631,23 +1159,6 @@ var createVoiceWavePath = (levels, width = VOICE_WAVE_WIDTH, height = VOICE_WAVE
631
1159
  }
632
1160
  return path;
633
1161
  };
634
- var getPcmLevel = (audio) => {
635
- const bytes = audio instanceof Uint8Array ? audio : new Uint8Array(audio);
636
- if (bytes.byteLength < 2) {
637
- return 0;
638
- }
639
- const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
640
- if (samples.length === 0) {
641
- return 0;
642
- }
643
- let sumSquares = 0;
644
- for (const sample of samples) {
645
- const normalized = sample / 32768;
646
- sumSquares += normalized * normalized;
647
- }
648
- const rms = Math.sqrt(sumSquares / samples.length);
649
- return clamp(rms * 5.5, 0, 1);
650
- };
651
1162
  var parsePromptList = (value) => {
652
1163
  if (!value) {
653
1164
  return DEFAULT_GUIDED_PROMPTS;
@@ -704,35 +1215,6 @@ var resolvePromptMessage = (input) => {
704
1215
  }
705
1216
  return input.guidedPrompts[input.turnCount] ?? DEFAULT_GUIDED_OVERFLOW_PROMPT;
706
1217
  };
707
- var createDemoMicrophone = (onAudio, onLevel) => {
708
- let capture = null;
709
- return {
710
- start: async () => {
711
- if (capture) {
712
- return;
713
- }
714
- const nextCapture = createMicrophoneCapture({
715
- onAudio: (audio) => {
716
- onLevel(getPcmLevel(audio));
717
- onAudio(audio);
718
- },
719
- sampleRateHz: 16000
720
- });
721
- capture = nextCapture;
722
- try {
723
- await capture.start();
724
- } catch (error) {
725
- capture = null;
726
- throw error;
727
- }
728
- },
729
- stop: () => {
730
- capture?.stop();
731
- capture = null;
732
- onLevel(0);
733
- }
734
- };
735
- };
736
1218
  var initVoiceHTMXRoot = (root) => {
737
1219
  const guidedPath = root.dataset.voiceGuidedPath;
738
1220
  const generalPath = root.dataset.voiceGeneralPath;
@@ -755,12 +1237,26 @@ var initVoiceHTMXRoot = (root) => {
755
1237
  const voiceMonitorCopy = requireElement(root, root.dataset.voiceMonitorCopy, HTMLElement, "voice-monitor-copy");
756
1238
  const voiceWaveGlow = requireElement(root, root.dataset.voiceWaveGlow, SVGPathElement, "voice-wave-glow");
757
1239
  const voiceWavePath = requireElement(root, root.dataset.voiceWavePath, SVGPathElement, "voice-wave-path");
758
- const guidedVoice = createVoiceStream(guidedPath);
759
- const generalVoice = createVoiceStream(generalPath);
760
- const stopGuidedBinding = bindVoiceHTMX(guidedVoice, { element: syncElement });
761
- const stopGeneralBinding = bindVoiceHTMX(generalVoice, {
762
- element: syncElement
1240
+ const guidedVoice = createVoiceController(guidedPath, {
1241
+ capture: {
1242
+ onLevel: (level) => {
1243
+ waveLevels = pushVoiceWaveLevel(waveLevels, level);
1244
+ renderWave();
1245
+ }
1246
+ },
1247
+ preset: "guided-intake"
1248
+ });
1249
+ const generalVoice = createVoiceController(generalPath, {
1250
+ capture: {
1251
+ onLevel: (level) => {
1252
+ waveLevels = pushVoiceWaveLevel(waveLevels, level);
1253
+ renderWave();
1254
+ }
1255
+ },
1256
+ preset: "dictation"
763
1257
  });
1258
+ const stopGuidedBinding = guidedVoice.bindHTMX({ element: syncElement });
1259
+ const stopGeneralBinding = generalVoice.bindHTMX({ element: syncElement });
764
1260
  let activeMode = null;
765
1261
  let hasStartedModes = {
766
1262
  general: false,
@@ -821,12 +1317,8 @@ var initVoiceHTMXRoot = (root) => {
821
1317
  </article>` : ""}`;
822
1318
  renderWave();
823
1319
  };
824
- const microphone = createDemoMicrophone((audio) => currentVoice().sendAudio(audio), (level) => {
825
- waveLevels = pushVoiceWaveLevel(waveLevels, level);
826
- renderWave();
827
- });
828
1320
  const stopMic = () => {
829
- microphone.stop();
1321
+ currentVoice().stopRecording();
830
1322
  isCapturing = false;
831
1323
  micError = null;
832
1324
  waveLevels = createInitialVoiceWaveLevels();
@@ -839,12 +1331,12 @@ var initVoiceHTMXRoot = (root) => {
839
1331
  [mode]: true
840
1332
  };
841
1333
  try {
842
- await microphone.start();
1334
+ await currentVoice().startRecording();
843
1335
  micError = null;
844
1336
  isCapturing = true;
845
1337
  render();
846
1338
  } catch (error) {
847
- microphone.stop();
1339
+ currentVoice().stopRecording();
848
1340
  isCapturing = false;
849
1341
  waveLevels = createInitialVoiceWaveLevels();
850
1342
  micError = formatErrorMessage(error);
@@ -863,7 +1355,8 @@ var initVoiceHTMXRoot = (root) => {
863
1355
  stopMic();
864
1356
  });
865
1357
  window.addEventListener("beforeunload", () => {
866
- microphone.stop();
1358
+ guidedVoice.stopRecording();
1359
+ generalVoice.stopRecording();
867
1360
  stopGuidedBinding();
868
1361
  stopGeneralBinding();
869
1362
  guidedVoice.close();