@absolutejs/voice 0.0.20 → 0.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +387 -4
  2. package/dist/angular/index.d.ts +1 -0
  3. package/dist/angular/index.js +669 -3
  4. package/dist/angular/voice-controller.service.d.ts +21 -0
  5. package/dist/audioConditioning.d.ts +3 -0
  6. package/dist/client/actions.d.ts +7 -0
  7. package/dist/client/connection.d.ts +5 -0
  8. package/dist/client/controller.d.ts +2 -0
  9. package/dist/client/htmxBootstrap.js +576 -167
  10. package/dist/client/index.d.ts +1 -0
  11. package/dist/client/index.js +486 -3
  12. package/dist/client/microphone.d.ts +4 -2
  13. package/dist/correction.d.ts +16 -0
  14. package/dist/index.d.ts +4 -0
  15. package/dist/index.js +1314 -283
  16. package/dist/presets.d.ts +13 -0
  17. package/dist/react/index.d.ts +1 -0
  18. package/dist/react/index.js +642 -3
  19. package/dist/react/useVoiceController.d.ts +20 -0
  20. package/dist/react/useVoiceStream.d.ts +1 -0
  21. package/dist/store.d.ts +2 -2
  22. package/dist/svelte/index.d.ts +1 -0
  23. package/dist/svelte/index.js +607 -3
  24. package/dist/testing/benchmark.d.ts +36 -0
  25. package/dist/testing/index.js +1453 -241
  26. package/dist/testing/sessionBenchmark.d.ts +67 -2
  27. package/dist/testing/stt.d.ts +1 -0
  28. package/dist/turnDetection.d.ts +5 -1
  29. package/dist/turnProfiles.d.ts +6 -0
  30. package/dist/types.d.ts +198 -8
  31. package/dist/vue/index.d.ts +1 -0
  32. package/dist/vue/index.js +660 -3
  33. package/dist/vue/useVoiceController.d.ts +19 -0
  34. package/fixtures/README.md +9 -0
  35. package/fixtures/manifest.json +59 -1
  36. package/fixtures/pcm/dialogue-three-clean.pcm +0 -0
  37. package/fixtures/pcm/dialogue-three-mixed.pcm +0 -0
  38. package/fixtures/pcm/dialogue-two-clean.pcm +0 -0
  39. package/fixtures/pcm/dialogue-two-noisy.pcm +0 -0
  40. package/package.json +21 -1
@@ -1,3 +1,145 @@
1
+ // src/client/htmx.ts
2
+ var DEFAULT_EVENT_NAME = "voice-refresh";
3
+ var DEFAULT_QUERY_PARAM = "sessionId";
4
+ var resolveElement = (input) => {
5
+ if (typeof input !== "string") {
6
+ return input;
7
+ }
8
+ return document.querySelector(input);
9
+ };
10
+ var buildRoute = (element, route, queryParam, sessionId) => {
11
+ const baseRoute = route ?? element.getAttribute("hx-get") ?? "";
12
+ if (!baseRoute) {
13
+ return "";
14
+ }
15
+ const url = new URL(baseRoute, window.location.origin);
16
+ if (sessionId) {
17
+ url.searchParams.set(queryParam, sessionId);
18
+ } else {
19
+ url.searchParams.delete(queryParam);
20
+ }
21
+ return `${url.pathname}${url.search}${url.hash}`;
22
+ };
23
+ var bindVoiceHTMX = (stream, options) => {
24
+ if (typeof window === "undefined" || typeof document === "undefined") {
25
+ return () => {};
26
+ }
27
+ const element = resolveElement(options.element);
28
+ if (!element) {
29
+ return () => {};
30
+ }
31
+ const eventName = options.eventName ?? DEFAULT_EVENT_NAME;
32
+ const queryParam = options.sessionQueryParam ?? DEFAULT_QUERY_PARAM;
33
+ const sync = () => {
34
+ const htmxWindow = window;
35
+ const nextRoute = buildRoute(element, options.route, queryParam, stream.sessionId);
36
+ if (nextRoute) {
37
+ element.setAttribute("hx-get", nextRoute);
38
+ }
39
+ htmxWindow.htmx?.process?.(element);
40
+ htmxWindow.htmx?.trigger?.(element, eventName);
41
+ };
42
+ const unsubscribe = stream.subscribe(sync);
43
+ sync();
44
+ return () => {
45
+ unsubscribe();
46
+ };
47
+ };
48
+
49
+ // src/client/microphone.ts
50
+ var clampSample = (value) => Math.max(-1, Math.min(1, value));
51
+ var floatTo16BitPCM = (input) => {
52
+ const output = new Int16Array(input.length);
53
+ for (let index = 0;index < input.length; index += 1) {
54
+ const sample = clampSample(input[index] ?? 0);
55
+ output[index] = sample < 0 ? sample * 32768 : sample * 32767;
56
+ }
57
+ return new Uint8Array(output.buffer);
58
+ };
59
+ var getPcmLevel = (audio) => {
60
+ const bytes = audio instanceof Uint8Array ? audio : new Uint8Array(audio);
61
+ if (bytes.byteLength < 2) {
62
+ return 0;
63
+ }
64
+ const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
65
+ if (samples.length === 0) {
66
+ return 0;
67
+ }
68
+ let sumSquares = 0;
69
+ for (const sample of samples) {
70
+ const normalized = sample / 32768;
71
+ sumSquares += normalized * normalized;
72
+ }
73
+ return Math.min(1, Math.max(0, Math.sqrt(sumSquares / samples.length) * 5.5));
74
+ };
75
+ var downsampleBuffer = (input, sourceRate, targetRate) => {
76
+ if (sourceRate === targetRate) {
77
+ return input;
78
+ }
79
+ const ratio = sourceRate / targetRate;
80
+ const length = Math.round(input.length / ratio);
81
+ const output = new Float32Array(length);
82
+ let offsetResult = 0;
83
+ let offsetBuffer = 0;
84
+ while (offsetResult < output.length) {
85
+ const nextOffsetBuffer = Math.round((offsetResult + 1) * ratio);
86
+ let accum = 0;
87
+ let count = 0;
88
+ for (let index = offsetBuffer;index < nextOffsetBuffer && index < input.length; index += 1) {
89
+ accum += input[index] ?? 0;
90
+ count += 1;
91
+ }
92
+ output[offsetResult] = count > 0 ? accum / count : 0;
93
+ offsetResult += 1;
94
+ offsetBuffer = nextOffsetBuffer;
95
+ }
96
+ return output;
97
+ };
98
+ var createMicrophoneCapture = (options) => {
99
+ let audioContext = null;
100
+ let sourceNode = null;
101
+ let processorNode = null;
102
+ let mediaStream = null;
103
+ const start = async () => {
104
+ if (typeof navigator === "undefined" || !navigator.mediaDevices?.getUserMedia) {
105
+ throw new Error("Browser microphone capture requires navigator.mediaDevices.getUserMedia.");
106
+ }
107
+ const AudioContextCtor = (typeof window !== "undefined" ? window.AudioContext ?? window.webkitAudioContext : undefined) ?? AudioContext;
108
+ if (!AudioContextCtor) {
109
+ throw new Error("Browser microphone capture requires AudioContext support.");
110
+ }
111
+ mediaStream = await navigator.mediaDevices.getUserMedia({
112
+ audio: {
113
+ channelCount: options.channelCount ?? 1
114
+ }
115
+ });
116
+ audioContext = new AudioContextCtor;
117
+ sourceNode = audioContext.createMediaStreamSource(mediaStream);
118
+ processorNode = audioContext.createScriptProcessor(4096, 1, 1);
119
+ processorNode.onaudioprocess = (event) => {
120
+ const channel = event.inputBuffer.getChannelData(0);
121
+ const downsampled = downsampleBuffer(channel, audioContext?.sampleRate ?? 48000, options.sampleRateHz ?? 16000);
122
+ const pcm = floatTo16BitPCM(downsampled);
123
+ options.onLevel?.(getPcmLevel(pcm));
124
+ options.onAudio(pcm);
125
+ };
126
+ sourceNode.connect(processorNode);
127
+ processorNode.connect(audioContext.destination);
128
+ };
129
+ const stop = () => {
130
+ processorNode?.disconnect();
131
+ sourceNode?.disconnect();
132
+ mediaStream?.getTracks().forEach((track) => track.stop());
133
+ audioContext?.close();
134
+ options.onLevel?.(0);
135
+ audioContext = null;
136
+ mediaStream = null;
137
+ processorNode = null;
138
+ sourceNode = null;
139
+ };
140
+ return { start, stop };
141
+ };
142
+
1
143
  // src/client/actions.ts
2
144
  var normalizeErrorMessage = (value) => {
3
145
  if (typeof value === "string" && value.trim()) {
@@ -56,6 +198,7 @@ var serverMessageToAction = (message) => {
56
198
  case "session":
57
199
  return {
58
200
  sessionId: message.sessionId,
201
+ scenarioId: message.scenarioId,
59
202
  status: message.status,
60
203
  type: "session"
61
204
  };
@@ -76,24 +219,30 @@ var WS_NORMAL_CLOSURE = 1000;
76
219
  var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
77
220
  var DEFAULT_PING_INTERVAL = 30000;
78
221
  var RECONNECT_DELAY_MS = 500;
222
+ var DEFAULT_SCENARIO_QUERY_PARAM = "scenarioId";
79
223
  var noop = () => {};
80
224
  var noopUnsubscribe = () => noop;
81
225
  var NOOP_CONNECTION = {
226
+ start: () => {},
82
227
  close: noop,
83
228
  endTurn: noop,
84
229
  getReadyState: () => WS_CLOSED,
230
+ getScenarioId: () => "",
85
231
  getSessionId: () => "",
86
232
  send: noop,
87
233
  sendAudio: noop,
88
234
  subscribe: noopUnsubscribe
89
235
  };
90
236
  var createSessionId = () => crypto.randomUUID();
91
- var buildWsUrl = (path, sessionId) => {
237
+ var buildWsUrl = (path, sessionId, scenarioId) => {
92
238
  const { hostname, port, protocol } = window.location;
93
239
  const wsProtocol = protocol === "https:" ? "wss:" : "ws:";
94
240
  const portSuffix = port ? `:${port}` : "";
95
241
  const url = new URL(`${wsProtocol}//${hostname}${portSuffix}${path}`);
96
242
  url.searchParams.set("sessionId", sessionId);
243
+ if (scenarioId) {
244
+ url.searchParams.set(DEFAULT_SCENARIO_QUERY_PARAM, scenarioId);
245
+ }
97
246
  return url.toString();
98
247
  };
99
248
  var isVoiceServerMessage = (value) => {
@@ -136,6 +285,7 @@ var createVoiceConnection = (path, options = {}) => {
136
285
  const state = {
137
286
  isConnected: false,
138
287
  pendingMessages: [],
288
+ scenarioId: options.scenarioId ?? null,
139
289
  pingInterval: null,
140
290
  reconnectAttempts: 0,
141
291
  reconnectTimeout: null,
@@ -173,13 +323,14 @@ var createVoiceConnection = (path, options = {}) => {
173
323
  }, RECONNECT_DELAY_MS);
174
324
  };
175
325
  const connect = () => {
176
- const ws = new WebSocket(buildWsUrl(path, state.sessionId));
326
+ const ws = new WebSocket(buildWsUrl(path, state.sessionId, state.scenarioId));
177
327
  ws.binaryType = "arraybuffer";
178
328
  ws.onopen = () => {
179
329
  state.isConnected = true;
180
330
  state.reconnectAttempts = 0;
181
331
  flushPendingMessages();
182
332
  listeners.forEach((listener) => listener({
333
+ scenarioId: state.scenarioId ?? undefined,
183
334
  sessionId: state.sessionId,
184
335
  status: "active",
185
336
  type: "session"
@@ -197,6 +348,7 @@ var createVoiceConnection = (path, options = {}) => {
197
348
  }
198
349
  if (parsed.type === "session") {
199
350
  state.sessionId = parsed.sessionId;
351
+ state.scenarioId = parsed.scenarioId ?? state.scenarioId;
200
352
  }
201
353
  listeners.forEach((listener) => listener(parsed));
202
354
  };
@@ -220,6 +372,19 @@ var createVoiceConnection = (path, options = {}) => {
220
372
  const send = (message) => {
221
373
  sendSerialized(JSON.stringify(message));
222
374
  };
375
+ const start = (input = {}) => {
376
+ if (input.sessionId) {
377
+ state.sessionId = input.sessionId;
378
+ }
379
+ if (input.scenarioId) {
380
+ state.scenarioId = input.scenarioId;
381
+ }
382
+ send({
383
+ type: "start",
384
+ sessionId: state.sessionId,
385
+ scenarioId: state.scenarioId ?? undefined
386
+ });
387
+ };
223
388
  const sendAudio = (audio) => {
224
389
  sendSerialized(audio);
225
390
  };
@@ -243,9 +408,11 @@ var createVoiceConnection = (path, options = {}) => {
243
408
  };
244
409
  connect();
245
410
  return {
411
+ start,
246
412
  close,
247
413
  endTurn,
248
414
  getReadyState: () => state.ws?.readyState ?? WS_CLOSED,
415
+ getScenarioId: () => state.scenarioId ?? "",
249
416
  getSessionId: () => state.sessionId,
250
417
  send,
251
418
  sendAudio,
@@ -258,6 +425,7 @@ var createInitialState = () => ({
258
425
  assistantTexts: [],
259
426
  error: null,
260
427
  isConnected: false,
428
+ scenarioId: null,
261
429
  partial: "",
262
430
  sessionId: null,
263
431
  status: "idle",
@@ -319,6 +487,7 @@ var createVoiceStreamStore = () => {
319
487
  state = {
320
488
  ...state,
321
489
  error: null,
490
+ scenarioId: action.scenarioId ?? state.scenarioId,
322
491
  isConnected: action.status === "active",
323
492
  sessionId: action.sessionId,
324
493
  status: action.status
@@ -352,6 +521,12 @@ var createVoiceStream = (path, options = {}) => {
352
521
  const connection = createVoiceConnection(path, options);
353
522
  const store = createVoiceStreamStore();
354
523
  const subscribers = new Set;
524
+ const start = (input) => Promise.resolve().then(() => {
525
+ if (!input?.sessionId && !input?.scenarioId) {
526
+ return;
527
+ }
528
+ connection.start(input);
529
+ });
355
530
  const notify = () => {
356
531
  subscribers.forEach((subscriber) => subscriber());
357
532
  };
@@ -384,6 +559,10 @@ var createVoiceStream = (path, options = {}) => {
384
559
  get isConnected() {
385
560
  return store.getSnapshot().isConnected;
386
561
  },
562
+ get scenarioId() {
563
+ return store.getSnapshot().scenarioId;
564
+ },
565
+ start,
387
566
  get partial() {
388
567
  return store.getSnapshot().partial;
389
568
  },
@@ -411,127 +590,392 @@ var createVoiceStream = (path, options = {}) => {
411
590
  };
412
591
  };
413
592
 
414
- // src/client/htmx.ts
415
- var DEFAULT_EVENT_NAME = "voice-refresh";
416
- var DEFAULT_QUERY_PARAM = "sessionId";
417
- var resolveElement = (input) => {
418
- if (typeof input !== "string") {
419
- return input;
593
+ // src/audioConditioning.ts
594
+ var DEFAULT_TARGET_LEVEL = 0.08;
595
+ var DEFAULT_MAX_GAIN = 3;
596
+ var DEFAULT_NOISE_GATE_THRESHOLD = 0.006;
597
+ var DEFAULT_NOISE_GATE_ATTENUATION = 0.15;
598
+ var resolveAudioConditioningConfig = (config) => {
599
+ if (!config || config.enabled === false) {
600
+ return;
420
601
  }
421
- return document.querySelector(input);
602
+ return {
603
+ enabled: true,
604
+ maxGain: config.maxGain ?? DEFAULT_MAX_GAIN,
605
+ noiseGateAttenuation: config.noiseGateAttenuation ?? DEFAULT_NOISE_GATE_ATTENUATION,
606
+ noiseGateThreshold: config.noiseGateThreshold ?? DEFAULT_NOISE_GATE_THRESHOLD,
607
+ targetLevel: config.targetLevel ?? DEFAULT_TARGET_LEVEL
608
+ };
422
609
  };
423
- var buildRoute = (element, route, queryParam, sessionId) => {
424
- const baseRoute = route ?? element.getAttribute("hx-get") ?? "";
425
- if (!baseRoute) {
426
- return "";
427
- }
428
- const url = new URL(baseRoute, window.location.origin);
429
- if (sessionId) {
430
- url.searchParams.set(queryParam, sessionId);
431
- } else {
432
- url.searchParams.delete(queryParam);
610
+
611
+ // src/turnProfiles.ts
612
+ var TURN_PROFILE_DEFAULTS = {
613
+ balanced: {
614
+ qualityProfile: "general",
615
+ silenceMs: 1400,
616
+ speechThreshold: 0.012,
617
+ transcriptStabilityMs: 1000
618
+ },
619
+ fast: {
620
+ qualityProfile: "general",
621
+ silenceMs: 700,
622
+ speechThreshold: 0.015,
623
+ transcriptStabilityMs: 450
624
+ },
625
+ "long-form": {
626
+ qualityProfile: "general",
627
+ silenceMs: 2200,
628
+ speechThreshold: 0.01,
629
+ transcriptStabilityMs: 1500
433
630
  }
434
- return `${url.pathname}${url.search}${url.hash}`;
435
631
  };
436
- var bindVoiceHTMX = (stream, options) => {
437
- if (typeof window === "undefined" || typeof document === "undefined") {
438
- return () => {};
439
- }
440
- const element = resolveElement(options.element);
441
- if (!element) {
442
- return () => {};
632
+ var QUALITY_PROFILE_DEFAULTS = {
633
+ general: {},
634
+ "accent-heavy": {
635
+ silenceMs: 1200,
636
+ speechThreshold: 0.01,
637
+ transcriptStabilityMs: 1200
638
+ },
639
+ "noisy-room": {
640
+ silenceMs: 2000,
641
+ speechThreshold: 0.02,
642
+ transcriptStabilityMs: 1600
643
+ },
644
+ "short-command": {
645
+ silenceMs: 500,
646
+ speechThreshold: 0.016,
647
+ transcriptStabilityMs: 420
443
648
  }
444
- const eventName = options.eventName ?? DEFAULT_EVENT_NAME;
445
- const queryParam = options.sessionQueryParam ?? DEFAULT_QUERY_PARAM;
446
- const sync = () => {
447
- const htmxWindow = window;
448
- const nextRoute = buildRoute(element, options.route, queryParam, stream.sessionId);
449
- if (nextRoute) {
450
- element.setAttribute("hx-get", nextRoute);
451
- }
452
- htmxWindow.htmx?.process?.(element);
453
- htmxWindow.htmx?.trigger?.(element, eventName);
454
- };
455
- const unsubscribe = stream.subscribe(sync);
456
- sync();
457
- return () => {
458
- unsubscribe();
649
+ };
650
+ var DEFAULT_TURN_PROFILE = "fast";
651
+ var DEFAULT_QUALITY_PROFILE = "general";
652
+ var resolveTurnDetectionConfig = (config) => {
653
+ const profile = config?.profile ?? DEFAULT_TURN_PROFILE;
654
+ const qualityProfile = config?.qualityProfile ?? DEFAULT_QUALITY_PROFILE;
655
+ const preset = TURN_PROFILE_DEFAULTS[profile];
656
+ const quality = QUALITY_PROFILE_DEFAULTS[qualityProfile];
657
+ return {
658
+ profile,
659
+ qualityProfile,
660
+ silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
661
+ speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
662
+ transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
459
663
  };
460
664
  };
461
665
 
462
- // src/client/microphone.ts
463
- var clampSample = (value) => Math.max(-1, Math.min(1, value));
464
- var floatTo16BitPCM = (input) => {
465
- const output = new Int16Array(input.length);
466
- for (let index = 0;index < input.length; index += 1) {
467
- const sample = clampSample(input[index] ?? 0);
468
- output[index] = sample < 0 ? sample * 32768 : sample * 32767;
469
- }
470
- return new Uint8Array(output.buffer);
471
- };
472
- var downsampleBuffer = (input, sourceRate, targetRate) => {
473
- if (sourceRate === targetRate) {
474
- return input;
475
- }
476
- const ratio = sourceRate / targetRate;
477
- const length = Math.round(input.length / ratio);
478
- const output = new Float32Array(length);
479
- let offsetResult = 0;
480
- let offsetBuffer = 0;
481
- while (offsetResult < output.length) {
482
- const nextOffsetBuffer = Math.round((offsetResult + 1) * ratio);
483
- let accum = 0;
484
- let count = 0;
485
- for (let index = offsetBuffer;index < nextOffsetBuffer && index < input.length; index += 1) {
486
- accum += input[index] ?? 0;
487
- count += 1;
666
+ // src/presets.ts
667
+ var PRESET_INPUTS = {
668
+ chat: {
669
+ audioConditioning: {
670
+ enabled: true,
671
+ maxGain: 2.5,
672
+ noiseGateAttenuation: 0,
673
+ noiseGateThreshold: 0.004,
674
+ targetLevel: 0.08
675
+ },
676
+ capture: {
677
+ channelCount: 1,
678
+ sampleRateHz: 16000
679
+ },
680
+ connection: {
681
+ maxReconnectAttempts: 10,
682
+ pingInterval: 30000,
683
+ reconnect: true
684
+ },
685
+ sttLifecycle: "continuous",
686
+ turnDetection: {
687
+ qualityProfile: "short-command",
688
+ profile: "balanced"
689
+ }
690
+ },
691
+ default: {
692
+ capture: {
693
+ channelCount: 1,
694
+ sampleRateHz: 16000
695
+ },
696
+ connection: {
697
+ maxReconnectAttempts: 10,
698
+ pingInterval: 30000,
699
+ reconnect: true
700
+ },
701
+ sttLifecycle: "continuous",
702
+ turnDetection: {
703
+ qualityProfile: "general",
704
+ profile: "fast"
705
+ }
706
+ },
707
+ dictation: {
708
+ audioConditioning: {
709
+ enabled: true,
710
+ maxGain: 2.25,
711
+ noiseGateAttenuation: 0.05,
712
+ noiseGateThreshold: 0.003,
713
+ targetLevel: 0.08
714
+ },
715
+ capture: {
716
+ channelCount: 1,
717
+ sampleRateHz: 16000
718
+ },
719
+ connection: {
720
+ maxReconnectAttempts: 12,
721
+ pingInterval: 30000,
722
+ reconnect: true
723
+ },
724
+ sttLifecycle: "continuous",
725
+ turnDetection: {
726
+ qualityProfile: "accent-heavy",
727
+ profile: "long-form"
728
+ }
729
+ },
730
+ "guided-intake": {
731
+ audioConditioning: {
732
+ enabled: true,
733
+ maxGain: 2.5,
734
+ noiseGateAttenuation: 0,
735
+ noiseGateThreshold: 0.004,
736
+ targetLevel: 0.08
737
+ },
738
+ capture: {
739
+ channelCount: 1,
740
+ sampleRateHz: 16000
741
+ },
742
+ connection: {
743
+ maxReconnectAttempts: 12,
744
+ pingInterval: 30000,
745
+ reconnect: true
746
+ },
747
+ sttLifecycle: "turn-scoped",
748
+ turnDetection: {
749
+ qualityProfile: "accent-heavy",
750
+ profile: "long-form"
751
+ }
752
+ },
753
+ "noisy-room": {
754
+ audioConditioning: {
755
+ enabled: true,
756
+ maxGain: 3,
757
+ noiseGateAttenuation: 0.12,
758
+ noiseGateThreshold: 0.006,
759
+ targetLevel: 0.085
760
+ },
761
+ capture: {
762
+ channelCount: 1,
763
+ sampleRateHz: 16000
764
+ },
765
+ connection: {
766
+ maxReconnectAttempts: 14,
767
+ pingInterval: 45000,
768
+ reconnect: true
769
+ },
770
+ sttLifecycle: "continuous",
771
+ turnDetection: {
772
+ qualityProfile: "noisy-room",
773
+ profile: "long-form",
774
+ silenceMs: 2100,
775
+ speechThreshold: 0.02,
776
+ transcriptStabilityMs: 1650
777
+ }
778
+ },
779
+ reliability: {
780
+ audioConditioning: {
781
+ enabled: true,
782
+ maxGain: 2.9,
783
+ noiseGateAttenuation: 0.08,
784
+ noiseGateThreshold: 0.005,
785
+ targetLevel: 0.08
786
+ },
787
+ capture: {
788
+ channelCount: 1,
789
+ sampleRateHz: 16000
790
+ },
791
+ connection: {
792
+ maxReconnectAttempts: 14,
793
+ pingInterval: 45000,
794
+ reconnect: true
795
+ },
796
+ sttLifecycle: "continuous",
797
+ turnDetection: {
798
+ qualityProfile: "noisy-room",
799
+ profile: "long-form"
488
800
  }
489
- output[offsetResult] = count > 0 ? accum / count : 0;
490
- offsetResult += 1;
491
- offsetBuffer = nextOffsetBuffer;
492
801
  }
493
- return output;
494
802
  };
495
- var createMicrophoneCapture = (options) => {
496
- let audioContext = null;
497
- let sourceNode = null;
498
- let processorNode = null;
499
- let mediaStream = null;
500
- const start = async () => {
501
- if (typeof navigator === "undefined" || !navigator.mediaDevices?.getUserMedia) {
502
- throw new Error("Browser microphone capture requires navigator.mediaDevices.getUserMedia.");
803
+ var resolveVoiceRuntimePreset = (name = "default") => {
804
+ const preset = PRESET_INPUTS[name];
805
+ return {
806
+ audioConditioning: resolveAudioConditioningConfig(preset.audioConditioning),
807
+ capture: {
808
+ channelCount: preset.capture?.channelCount ?? 1,
809
+ sampleRateHz: preset.capture?.sampleRateHz ?? 16000
810
+ },
811
+ connection: {
812
+ ...preset.connection
813
+ },
814
+ name,
815
+ sttLifecycle: preset.sttLifecycle ?? "continuous",
816
+ turnDetection: resolveTurnDetectionConfig(preset.turnDetection)
817
+ };
818
+ };
819
+
820
+ // src/client/controller.ts
821
+ var createInitialState2 = (stream) => ({
822
+ assistantTexts: [...stream.assistantTexts],
823
+ error: stream.error,
824
+ isConnected: stream.isConnected,
825
+ isRecording: false,
826
+ partial: stream.partial,
827
+ recordingError: null,
828
+ sessionId: stream.sessionId,
829
+ scenarioId: stream.scenarioId,
830
+ status: stream.status,
831
+ turns: [...stream.turns]
832
+ });
833
+ var createVoiceController = (path, options = {}) => {
834
+ const preset = resolveVoiceRuntimePreset(options.preset);
835
+ const stream = createVoiceStream(path, {
836
+ ...preset.connection,
837
+ ...options.connection
838
+ });
839
+ let capture = null;
840
+ let state = createInitialState2(stream);
841
+ const subscribers = new Set;
842
+ const notify = () => {
843
+ for (const subscriber of subscribers) {
844
+ subscriber();
503
845
  }
504
- const AudioContextCtor = (typeof window !== "undefined" ? window.AudioContext ?? window.webkitAudioContext : undefined) ?? AudioContext;
505
- if (!AudioContextCtor) {
506
- throw new Error("Browser microphone capture requires AudioContext support.");
846
+ };
847
+ const sync = () => {
848
+ state = {
849
+ ...state,
850
+ assistantTexts: [...stream.assistantTexts],
851
+ error: stream.error,
852
+ isConnected: stream.isConnected,
853
+ partial: stream.partial,
854
+ sessionId: stream.sessionId,
855
+ scenarioId: stream.scenarioId,
856
+ status: stream.status,
857
+ turns: [...stream.turns]
858
+ };
859
+ if (options.autoStopOnComplete !== false && state.status === "completed" && state.isRecording) {
860
+ capture?.stop();
861
+ capture = null;
862
+ state = {
863
+ ...state,
864
+ isRecording: false
865
+ };
507
866
  }
508
- mediaStream = await navigator.mediaDevices.getUserMedia({
509
- audio: {
510
- channelCount: options.channelCount ?? 1
511
- }
867
+ notify();
868
+ };
869
+ const unsubscribeStream = stream.subscribe(sync);
870
+ sync();
871
+ const ensureCapture = () => {
872
+ if (capture) {
873
+ return capture;
874
+ }
875
+ capture = createMicrophoneCapture({
876
+ channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
877
+ onLevel: options.capture?.onLevel,
878
+ onAudio: (audio) => stream.sendAudio(audio),
879
+ sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
512
880
  });
513
- audioContext = new AudioContextCtor;
514
- sourceNode = audioContext.createMediaStreamSource(mediaStream);
515
- processorNode = audioContext.createScriptProcessor(4096, 1, 1);
516
- processorNode.onaudioprocess = (event) => {
517
- const channel = event.inputBuffer.getChannelData(0);
518
- const downsampled = downsampleBuffer(channel, audioContext?.sampleRate ?? 48000, options.sampleRateHz ?? 16000);
519
- options.onAudio(floatTo16BitPCM(downsampled));
881
+ return capture;
882
+ };
883
+ const stopRecording = () => {
884
+ capture?.stop();
885
+ capture = null;
886
+ state = {
887
+ ...state,
888
+ isRecording: false
520
889
  };
521
- sourceNode.connect(processorNode);
522
- processorNode.connect(audioContext.destination);
890
+ notify();
523
891
  };
524
- const stop = () => {
525
- processorNode?.disconnect();
526
- sourceNode?.disconnect();
527
- mediaStream?.getTracks().forEach((track) => track.stop());
528
- audioContext?.close();
529
- audioContext = null;
530
- mediaStream = null;
531
- processorNode = null;
532
- sourceNode = null;
892
+ const startRecording = async () => {
893
+ if (state.isRecording) {
894
+ return;
895
+ }
896
+ try {
897
+ state = {
898
+ ...state,
899
+ recordingError: null
900
+ };
901
+ notify();
902
+ await ensureCapture().start();
903
+ state = {
904
+ ...state,
905
+ isRecording: true
906
+ };
907
+ notify();
908
+ } catch (error) {
909
+ capture = null;
910
+ state = {
911
+ ...state,
912
+ isRecording: false,
913
+ recordingError: error instanceof Error ? error.message : String(error)
914
+ };
915
+ notify();
916
+ throw error;
917
+ }
918
+ };
919
+ const close = () => {
920
+ unsubscribeStream();
921
+ stopRecording();
922
+ stream.close();
923
+ };
924
+ return {
925
+ bindHTMX(bindingOptions) {
926
+ return bindVoiceHTMX(stream, bindingOptions);
927
+ },
928
+ close,
929
+ endTurn: () => stream.endTurn(),
930
+ get error() {
931
+ return state.error;
932
+ },
933
+ getServerSnapshot: () => state,
934
+ getSnapshot: () => state,
935
+ get isConnected() {
936
+ return state.isConnected;
937
+ },
938
+ get isRecording() {
939
+ return state.isRecording;
940
+ },
941
+ get partial() {
942
+ return state.partial;
943
+ },
944
+ get recordingError() {
945
+ return state.recordingError;
946
+ },
947
+ sendAudio: (audio) => stream.sendAudio(audio),
948
+ get sessionId() {
949
+ return state.sessionId;
950
+ },
951
+ get scenarioId() {
952
+ return state.scenarioId;
953
+ },
954
+ startRecording,
955
+ get status() {
956
+ return state.status;
957
+ },
958
+ stopRecording,
959
+ subscribe: (subscriber) => {
960
+ subscribers.add(subscriber);
961
+ return () => {
962
+ subscribers.delete(subscriber);
963
+ };
964
+ },
965
+ toggleRecording: async () => {
966
+ if (state.isRecording) {
967
+ stopRecording();
968
+ return;
969
+ }
970
+ await startRecording();
971
+ },
972
+ get turns() {
973
+ return state.turns;
974
+ },
975
+ get assistantTexts() {
976
+ return state.assistantTexts;
977
+ }
533
978
  };
534
- return { start, stop };
535
979
  };
536
980
 
537
981
  // src/client/htmxBootstrap.ts
@@ -540,10 +984,10 @@ var VOICE_WAVE_WIDTH = 320;
540
984
  var VOICE_WAVE_HEIGHT = 88;
541
985
  var DEFAULT_GUIDED_LABEL = "Guided test";
542
986
  var DEFAULT_GENERAL_LABEL = "General recording";
543
- var DEFAULT_IDLE_LEAD = "Pick a mode to begin the demo.";
987
+ var DEFAULT_IDLE_LEAD = "Pick a scenario to begin the demo.";
544
988
  var DEFAULT_GUIDED_LEAD = "I can walk you through a short guided voice test.";
545
989
  var DEFAULT_GENERAL_LEAD = "I can capture one freeform recording and confirm that it landed.";
546
- var DEFAULT_IDLE_PROMPT = "Choose a mode to begin. Guided test asks follow-up prompts. General recording just captures what you say.";
990
+ var DEFAULT_IDLE_PROMPT = "Choose a scenario to begin. Guided test asks follow-up prompts. General recording just captures what you say.";
547
991
  var DEFAULT_GENERAL_IDLE_PROMPT = "Click Start general recording to capture one freeform answer.";
548
992
  var DEFAULT_GENERAL_LIVE_PROMPT = "Speak freely. When you pause, the recording will be captured.";
549
993
  var DEFAULT_GENERAL_COMPLETE_PROMPT = "Recording saved. Start again if you want another capture.";
@@ -631,23 +1075,6 @@ var createVoiceWavePath = (levels, width = VOICE_WAVE_WIDTH, height = VOICE_WAVE
631
1075
  }
632
1076
  return path;
633
1077
  };
634
- var getPcmLevel = (audio) => {
635
- const bytes = audio instanceof Uint8Array ? audio : new Uint8Array(audio);
636
- if (bytes.byteLength < 2) {
637
- return 0;
638
- }
639
- const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
640
- if (samples.length === 0) {
641
- return 0;
642
- }
643
- let sumSquares = 0;
644
- for (const sample of samples) {
645
- const normalized = sample / 32768;
646
- sumSquares += normalized * normalized;
647
- }
648
- const rms = Math.sqrt(sumSquares / samples.length);
649
- return clamp(rms * 5.5, 0, 1);
650
- };
651
1078
  var parsePromptList = (value) => {
652
1079
  if (!value) {
653
1080
  return DEFAULT_GUIDED_PROMPTS;
@@ -704,35 +1131,6 @@ var resolvePromptMessage = (input) => {
704
1131
  }
705
1132
  return input.guidedPrompts[input.turnCount] ?? DEFAULT_GUIDED_OVERFLOW_PROMPT;
706
1133
  };
707
- var createDemoMicrophone = (onAudio, onLevel) => {
708
- let capture = null;
709
- return {
710
- start: async () => {
711
- if (capture) {
712
- return;
713
- }
714
- const nextCapture = createMicrophoneCapture({
715
- onAudio: (audio) => {
716
- onLevel(getPcmLevel(audio));
717
- onAudio(audio);
718
- },
719
- sampleRateHz: 16000
720
- });
721
- capture = nextCapture;
722
- try {
723
- await capture.start();
724
- } catch (error) {
725
- capture = null;
726
- throw error;
727
- }
728
- },
729
- stop: () => {
730
- capture?.stop();
731
- capture = null;
732
- onLevel(0);
733
- }
734
- };
735
- };
736
1134
  var initVoiceHTMXRoot = (root) => {
737
1135
  const guidedPath = root.dataset.voiceGuidedPath;
738
1136
  const generalPath = root.dataset.voiceGeneralPath;
@@ -755,12 +1153,26 @@ var initVoiceHTMXRoot = (root) => {
755
1153
  const voiceMonitorCopy = requireElement(root, root.dataset.voiceMonitorCopy, HTMLElement, "voice-monitor-copy");
756
1154
  const voiceWaveGlow = requireElement(root, root.dataset.voiceWaveGlow, SVGPathElement, "voice-wave-glow");
757
1155
  const voiceWavePath = requireElement(root, root.dataset.voiceWavePath, SVGPathElement, "voice-wave-path");
758
- const guidedVoice = createVoiceStream(guidedPath);
759
- const generalVoice = createVoiceStream(generalPath);
760
- const stopGuidedBinding = bindVoiceHTMX(guidedVoice, { element: syncElement });
761
- const stopGeneralBinding = bindVoiceHTMX(generalVoice, {
762
- element: syncElement
1156
+ const guidedVoice = createVoiceController(guidedPath, {
1157
+ capture: {
1158
+ onLevel: (level) => {
1159
+ waveLevels = pushVoiceWaveLevel(waveLevels, level);
1160
+ renderWave();
1161
+ }
1162
+ },
1163
+ preset: "guided-intake"
763
1164
  });
1165
+ const generalVoice = createVoiceController(generalPath, {
1166
+ capture: {
1167
+ onLevel: (level) => {
1168
+ waveLevels = pushVoiceWaveLevel(waveLevels, level);
1169
+ renderWave();
1170
+ }
1171
+ },
1172
+ preset: "dictation"
1173
+ });
1174
+ const stopGuidedBinding = guidedVoice.bindHTMX({ element: syncElement });
1175
+ const stopGeneralBinding = generalVoice.bindHTMX({ element: syncElement });
764
1176
  let activeMode = null;
765
1177
  let hasStartedModes = {
766
1178
  general: false,
@@ -821,12 +1233,8 @@ var initVoiceHTMXRoot = (root) => {
821
1233
  </article>` : ""}`;
822
1234
  renderWave();
823
1235
  };
824
- const microphone = createDemoMicrophone((audio) => currentVoice().sendAudio(audio), (level) => {
825
- waveLevels = pushVoiceWaveLevel(waveLevels, level);
826
- renderWave();
827
- });
828
1236
  const stopMic = () => {
829
- microphone.stop();
1237
+ currentVoice().stopRecording();
830
1238
  isCapturing = false;
831
1239
  micError = null;
832
1240
  waveLevels = createInitialVoiceWaveLevels();
@@ -839,12 +1247,12 @@ var initVoiceHTMXRoot = (root) => {
839
1247
  [mode]: true
840
1248
  };
841
1249
  try {
842
- await microphone.start();
1250
+ await currentVoice().startRecording();
843
1251
  micError = null;
844
1252
  isCapturing = true;
845
1253
  render();
846
1254
  } catch (error) {
847
- microphone.stop();
1255
+ currentVoice().stopRecording();
848
1256
  isCapturing = false;
849
1257
  waveLevels = createInitialVoiceWaveLevels();
850
1258
  micError = formatErrorMessage(error);
@@ -863,7 +1271,8 @@ var initVoiceHTMXRoot = (root) => {
863
1271
  stopMic();
864
1272
  });
865
1273
  window.addEventListener("beforeunload", () => {
866
- microphone.stop();
1274
+ guidedVoice.stopRecording();
1275
+ generalVoice.stopRecording();
867
1276
  stopGuidedBinding();
868
1277
  stopGeneralBinding();
869
1278
  guidedVoice.close();