@absolutejs/voice 0.0.20 → 0.0.22-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +884 -4
  2. package/dist/angular/index.d.ts +1 -0
  3. package/dist/angular/index.js +759 -3
  4. package/dist/angular/voice-controller.service.d.ts +27 -0
  5. package/dist/angular/voice-stream.service.d.ts +6 -0
  6. package/dist/audioConditioning.d.ts +3 -0
  7. package/dist/client/actions.d.ts +48 -0
  8. package/dist/client/audioPlayer.d.ts +40 -0
  9. package/dist/client/connection.d.ts +5 -0
  10. package/dist/client/controller.d.ts +2 -0
  11. package/dist/client/duplex.d.ts +3 -0
  12. package/dist/client/htmxBootstrap.js +660 -167
  13. package/dist/client/index.d.ts +3 -0
  14. package/dist/client/index.js +991 -6
  15. package/dist/client/microphone.d.ts +4 -2
  16. package/dist/correction.d.ts +33 -0
  17. package/dist/fileStore.d.ts +27 -0
  18. package/dist/index.d.ts +15 -0
  19. package/dist/index.js +3721 -298
  20. package/dist/ops.d.ts +100 -0
  21. package/dist/presets.d.ts +13 -0
  22. package/dist/react/index.d.ts +1 -0
  23. package/dist/react/index.js +728 -3
  24. package/dist/react/useVoiceController.d.ts +26 -0
  25. package/dist/react/useVoiceStream.d.ts +7 -0
  26. package/dist/routing.d.ts +3 -0
  27. package/dist/runtimeOps.d.ts +23 -0
  28. package/dist/store.d.ts +2 -2
  29. package/dist/svelte/index.d.ts +1 -0
  30. package/dist/svelte/index.js +691 -3
  31. package/dist/telephony/response.d.ts +7 -0
  32. package/dist/telephony/twilio.d.ts +116 -0
  33. package/dist/testing/benchmark.d.ts +93 -2
  34. package/dist/testing/corrected.d.ts +41 -0
  35. package/dist/testing/duplex.d.ts +59 -0
  36. package/dist/testing/fixtures.d.ts +18 -2
  37. package/dist/testing/index.d.ts +5 -0
  38. package/dist/testing/index.js +6247 -402
  39. package/dist/testing/review.d.ts +143 -0
  40. package/dist/testing/sessionBenchmark.d.ts +92 -2
  41. package/dist/testing/stt.d.ts +3 -1
  42. package/dist/testing/telephony.d.ts +70 -0
  43. package/dist/testing/tts.d.ts +73 -0
  44. package/dist/turnDetection.d.ts +5 -1
  45. package/dist/turnProfiles.d.ts +6 -0
  46. package/dist/types.d.ts +487 -10
  47. package/dist/vue/index.d.ts +1 -0
  48. package/dist/vue/index.js +750 -3
  49. package/dist/vue/useVoiceController.d.ts +30 -0
  50. package/dist/vue/useVoiceStream.d.ts +11 -0
  51. package/fixtures/README.md +9 -0
  52. package/fixtures/manifest.json +59 -1
  53. package/fixtures/pcm/dialogue-three-clean.pcm +0 -0
  54. package/fixtures/pcm/dialogue-three-mixed.pcm +0 -0
  55. package/fixtures/pcm/dialogue-two-clean.pcm +0 -0
  56. package/fixtures/pcm/dialogue-two-noisy.pcm +0 -0
  57. package/package.json +135 -1
@@ -99,6 +99,14 @@ var normalizeErrorMessage = (value) => {
99
99
  };
100
100
  var serverMessageToAction = (message) => {
101
101
  switch (message.type) {
102
+ case "audio":
103
+ return {
104
+ chunk: Uint8Array.from(atob(message.chunkBase64), (char) => char.charCodeAt(0)),
105
+ format: message.format,
106
+ receivedAt: message.receivedAt,
107
+ turnId: message.turnId,
108
+ type: "audio"
109
+ };
102
110
  case "assistant":
103
111
  return {
104
112
  text: message.text,
@@ -127,6 +135,7 @@ var serverMessageToAction = (message) => {
127
135
  case "session":
128
136
  return {
129
137
  sessionId: message.sessionId,
138
+ scenarioId: message.scenarioId,
130
139
  status: message.status,
131
140
  type: "session"
132
141
  };
@@ -147,24 +156,30 @@ var WS_NORMAL_CLOSURE = 1000;
147
156
  var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
148
157
  var DEFAULT_PING_INTERVAL = 30000;
149
158
  var RECONNECT_DELAY_MS = 500;
159
+ var DEFAULT_SCENARIO_QUERY_PARAM = "scenarioId";
150
160
  var noop = () => {};
151
161
  var noopUnsubscribe = () => noop;
152
162
  var NOOP_CONNECTION = {
163
+ start: () => {},
153
164
  close: noop,
154
165
  endTurn: noop,
155
166
  getReadyState: () => WS_CLOSED,
167
+ getScenarioId: () => "",
156
168
  getSessionId: () => "",
157
169
  send: noop,
158
170
  sendAudio: noop,
159
171
  subscribe: noopUnsubscribe
160
172
  };
161
173
  var createSessionId = () => crypto.randomUUID();
162
- var buildWsUrl = (path, sessionId) => {
174
+ var buildWsUrl = (path, sessionId, scenarioId) => {
163
175
  const { hostname, port, protocol } = window.location;
164
176
  const wsProtocol = protocol === "https:" ? "wss:" : "ws:";
165
177
  const portSuffix = port ? `:${port}` : "";
166
178
  const url = new URL(`${wsProtocol}//${hostname}${portSuffix}${path}`);
167
179
  url.searchParams.set("sessionId", sessionId);
180
+ if (scenarioId) {
181
+ url.searchParams.set(DEFAULT_SCENARIO_QUERY_PARAM, scenarioId);
182
+ }
168
183
  return url.toString();
169
184
  };
170
185
  var isVoiceServerMessage = (value) => {
@@ -172,6 +187,7 @@ var isVoiceServerMessage = (value) => {
172
187
  return false;
173
188
  }
174
189
  switch (value.type) {
190
+ case "audio":
175
191
  case "assistant":
176
192
  case "complete":
177
193
  case "error":
@@ -207,6 +223,7 @@ var createVoiceConnection = (path, options = {}) => {
207
223
  const state = {
208
224
  isConnected: false,
209
225
  pendingMessages: [],
226
+ scenarioId: options.scenarioId ?? null,
210
227
  pingInterval: null,
211
228
  reconnectAttempts: 0,
212
229
  reconnectTimeout: null,
@@ -244,13 +261,14 @@ var createVoiceConnection = (path, options = {}) => {
244
261
  }, RECONNECT_DELAY_MS);
245
262
  };
246
263
  const connect = () => {
247
- const ws = new WebSocket(buildWsUrl(path, state.sessionId));
264
+ const ws = new WebSocket(buildWsUrl(path, state.sessionId, state.scenarioId));
248
265
  ws.binaryType = "arraybuffer";
249
266
  ws.onopen = () => {
250
267
  state.isConnected = true;
251
268
  state.reconnectAttempts = 0;
252
269
  flushPendingMessages();
253
270
  listeners.forEach((listener) => listener({
271
+ scenarioId: state.scenarioId ?? undefined,
254
272
  sessionId: state.sessionId,
255
273
  status: "active",
256
274
  type: "session"
@@ -268,6 +286,7 @@ var createVoiceConnection = (path, options = {}) => {
268
286
  }
269
287
  if (parsed.type === "session") {
270
288
  state.sessionId = parsed.sessionId;
289
+ state.scenarioId = parsed.scenarioId ?? state.scenarioId;
271
290
  }
272
291
  listeners.forEach((listener) => listener(parsed));
273
292
  };
@@ -291,6 +310,19 @@ var createVoiceConnection = (path, options = {}) => {
291
310
  const send = (message) => {
292
311
  sendSerialized(JSON.stringify(message));
293
312
  };
313
+ const start = (input = {}) => {
314
+ if (input.sessionId) {
315
+ state.sessionId = input.sessionId;
316
+ }
317
+ if (input.scenarioId) {
318
+ state.scenarioId = input.scenarioId;
319
+ }
320
+ send({
321
+ type: "start",
322
+ sessionId: state.sessionId,
323
+ scenarioId: state.scenarioId ?? undefined
324
+ });
325
+ };
294
326
  const sendAudio = (audio) => {
295
327
  sendSerialized(audio);
296
328
  };
@@ -314,9 +346,11 @@ var createVoiceConnection = (path, options = {}) => {
314
346
  };
315
347
  connect();
316
348
  return {
349
+ start,
317
350
  close,
318
351
  endTurn,
319
352
  getReadyState: () => state.ws?.readyState ?? WS_CLOSED,
353
+ getScenarioId: () => state.scenarioId ?? "",
320
354
  getSessionId: () => state.sessionId,
321
355
  send,
322
356
  sendAudio,
@@ -326,9 +360,11 @@ var createVoiceConnection = (path, options = {}) => {
326
360
 
327
361
  // src/client/store.ts
328
362
  var createInitialState = () => ({
363
+ assistantAudio: [],
329
364
  assistantTexts: [],
330
365
  error: null,
331
366
  isConnected: false,
367
+ scenarioId: null,
332
368
  partial: "",
333
369
  sessionId: null,
334
370
  status: "idle",
@@ -342,6 +378,20 @@ var createVoiceStreamStore = () => {
342
378
  };
343
379
  const dispatch = (action) => {
344
380
  switch (action.type) {
381
+ case "audio":
382
+ state = {
383
+ ...state,
384
+ assistantAudio: [
385
+ ...state.assistantAudio,
386
+ {
387
+ chunk: action.chunk,
388
+ format: action.format,
389
+ receivedAt: action.receivedAt,
390
+ turnId: action.turnId
391
+ }
392
+ ]
393
+ };
394
+ break;
345
395
  case "assistant":
346
396
  state = {
347
397
  ...state,
@@ -390,6 +440,7 @@ var createVoiceStreamStore = () => {
390
440
  state = {
391
441
  ...state,
392
442
  error: null,
443
+ scenarioId: action.scenarioId ?? state.scenarioId,
393
444
  isConnected: action.status === "active",
394
445
  sessionId: action.sessionId,
395
446
  status: action.status
@@ -423,6 +474,12 @@ var createVoiceStream = (path, options = {}) => {
423
474
  const connection = createVoiceConnection(path, options);
424
475
  const store = createVoiceStreamStore();
425
476
  const subscribers = new Set;
477
+ const start = (input) => Promise.resolve().then(() => {
478
+ if (!input?.sessionId && !input?.scenarioId) {
479
+ return;
480
+ }
481
+ connection.start(input);
482
+ });
426
483
  const notify = () => {
427
484
  subscribers.forEach((subscriber) => subscriber());
428
485
  };
@@ -455,6 +512,10 @@ var createVoiceStream = (path, options = {}) => {
455
512
  get isConnected() {
456
513
  return store.getSnapshot().isConnected;
457
514
  },
515
+ get scenarioId() {
516
+ return store.getSnapshot().scenarioId;
517
+ },
518
+ start,
458
519
  get partial() {
459
520
  return store.getSnapshot().partial;
460
521
  },
@@ -470,6 +531,9 @@ var createVoiceStream = (path, options = {}) => {
470
531
  get assistantTexts() {
471
532
  return store.getSnapshot().assistantTexts;
472
533
  },
534
+ get assistantAudio() {
535
+ return store.getSnapshot().assistantAudio;
536
+ },
473
537
  sendAudio(audio) {
474
538
  connection.sendAudio(audio);
475
539
  },
@@ -484,6 +548,630 @@ var createVoiceStream = (path, options = {}) => {
484
548
 
485
549
  // src/svelte/createVoiceStream.ts
486
550
  var createVoiceStream2 = (path, options = {}) => createVoiceStream(path, options);
551
+ // src/client/htmx.ts
552
+ var DEFAULT_EVENT_NAME = "voice-refresh";
553
+ var DEFAULT_QUERY_PARAM = "sessionId";
554
+ var resolveElement = (input) => {
555
+ if (typeof input !== "string") {
556
+ return input;
557
+ }
558
+ return document.querySelector(input);
559
+ };
560
+ var buildRoute = (element, route, queryParam, sessionId) => {
561
+ const baseRoute = route ?? element.getAttribute("hx-get") ?? "";
562
+ if (!baseRoute) {
563
+ return "";
564
+ }
565
+ const url = new URL(baseRoute, window.location.origin);
566
+ if (sessionId) {
567
+ url.searchParams.set(queryParam, sessionId);
568
+ } else {
569
+ url.searchParams.delete(queryParam);
570
+ }
571
+ return `${url.pathname}${url.search}${url.hash}`;
572
+ };
573
+ var bindVoiceHTMX = (stream, options) => {
574
+ if (typeof window === "undefined" || typeof document === "undefined") {
575
+ return () => {};
576
+ }
577
+ const element = resolveElement(options.element);
578
+ if (!element) {
579
+ return () => {};
580
+ }
581
+ const eventName = options.eventName ?? DEFAULT_EVENT_NAME;
582
+ const queryParam = options.sessionQueryParam ?? DEFAULT_QUERY_PARAM;
583
+ const sync = () => {
584
+ const htmxWindow = window;
585
+ const nextRoute = buildRoute(element, options.route, queryParam, stream.sessionId);
586
+ if (nextRoute) {
587
+ element.setAttribute("hx-get", nextRoute);
588
+ }
589
+ htmxWindow.htmx?.process?.(element);
590
+ htmxWindow.htmx?.trigger?.(element, eventName);
591
+ };
592
+ const unsubscribe = stream.subscribe(sync);
593
+ sync();
594
+ return () => {
595
+ unsubscribe();
596
+ };
597
+ };
598
+
599
+ // src/client/microphone.ts
600
+ var clampSample = (value) => Math.max(-1, Math.min(1, value));
601
+ var floatTo16BitPCM = (input) => {
602
+ const output = new Int16Array(input.length);
603
+ for (let index = 0;index < input.length; index += 1) {
604
+ const sample = clampSample(input[index] ?? 0);
605
+ output[index] = sample < 0 ? sample * 32768 : sample * 32767;
606
+ }
607
+ return new Uint8Array(output.buffer);
608
+ };
609
+ var getPcmLevel = (audio) => {
610
+ const bytes = audio instanceof Uint8Array ? audio : new Uint8Array(audio);
611
+ if (bytes.byteLength < 2) {
612
+ return 0;
613
+ }
614
+ const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
615
+ if (samples.length === 0) {
616
+ return 0;
617
+ }
618
+ let sumSquares = 0;
619
+ for (const sample of samples) {
620
+ const normalized = sample / 32768;
621
+ sumSquares += normalized * normalized;
622
+ }
623
+ return Math.min(1, Math.max(0, Math.sqrt(sumSquares / samples.length) * 5.5));
624
+ };
625
+ var downsampleBuffer = (input, sourceRate, targetRate) => {
626
+ if (sourceRate === targetRate) {
627
+ return input;
628
+ }
629
+ const ratio = sourceRate / targetRate;
630
+ const length = Math.round(input.length / ratio);
631
+ const output = new Float32Array(length);
632
+ let offsetResult = 0;
633
+ let offsetBuffer = 0;
634
+ while (offsetResult < output.length) {
635
+ const nextOffsetBuffer = Math.round((offsetResult + 1) * ratio);
636
+ let accum = 0;
637
+ let count = 0;
638
+ for (let index = offsetBuffer;index < nextOffsetBuffer && index < input.length; index += 1) {
639
+ accum += input[index] ?? 0;
640
+ count += 1;
641
+ }
642
+ output[offsetResult] = count > 0 ? accum / count : 0;
643
+ offsetResult += 1;
644
+ offsetBuffer = nextOffsetBuffer;
645
+ }
646
+ return output;
647
+ };
648
+ var createMicrophoneCapture = (options) => {
649
+ let audioContext = null;
650
+ let sourceNode = null;
651
+ let processorNode = null;
652
+ let mediaStream = null;
653
+ const start = async () => {
654
+ if (typeof navigator === "undefined" || !navigator.mediaDevices?.getUserMedia) {
655
+ throw new Error("Browser microphone capture requires navigator.mediaDevices.getUserMedia.");
656
+ }
657
+ const AudioContextCtor = (typeof window !== "undefined" ? window.AudioContext ?? window.webkitAudioContext : undefined) ?? AudioContext;
658
+ if (!AudioContextCtor) {
659
+ throw new Error("Browser microphone capture requires AudioContext support.");
660
+ }
661
+ mediaStream = await navigator.mediaDevices.getUserMedia({
662
+ audio: {
663
+ channelCount: options.channelCount ?? 1
664
+ }
665
+ });
666
+ audioContext = new AudioContextCtor;
667
+ sourceNode = audioContext.createMediaStreamSource(mediaStream);
668
+ processorNode = audioContext.createScriptProcessor(4096, 1, 1);
669
+ processorNode.onaudioprocess = (event) => {
670
+ const channel = event.inputBuffer.getChannelData(0);
671
+ const downsampled = downsampleBuffer(channel, audioContext?.sampleRate ?? 48000, options.sampleRateHz ?? 16000);
672
+ const pcm = floatTo16BitPCM(downsampled);
673
+ options.onLevel?.(getPcmLevel(pcm));
674
+ options.onAudio(pcm);
675
+ };
676
+ sourceNode.connect(processorNode);
677
+ processorNode.connect(audioContext.destination);
678
+ };
679
+ const stop = () => {
680
+ processorNode?.disconnect();
681
+ sourceNode?.disconnect();
682
+ mediaStream?.getTracks().forEach((track) => track.stop());
683
+ audioContext?.close();
684
+ options.onLevel?.(0);
685
+ audioContext = null;
686
+ mediaStream = null;
687
+ processorNode = null;
688
+ sourceNode = null;
689
+ };
690
+ return { start, stop };
691
+ };
692
+
693
+ // src/audioConditioning.ts
694
+ var DEFAULT_TARGET_LEVEL = 0.08;
695
+ var DEFAULT_MAX_GAIN = 3;
696
+ var DEFAULT_NOISE_GATE_THRESHOLD = 0.006;
697
+ var DEFAULT_NOISE_GATE_ATTENUATION = 0.15;
698
+ var toInt16Array = (audio) => {
699
+ if (audio instanceof ArrayBuffer) {
700
+ return new Int16Array(audio, 0, Math.floor(audio.byteLength / 2));
701
+ }
702
+ return new Int16Array(audio.buffer, audio.byteOffset, Math.floor(audio.byteLength / 2));
703
+ };
704
+ var computeRms = (samples) => {
705
+ if (samples.length === 0) {
706
+ return 0;
707
+ }
708
+ let sumSquares = 0;
709
+ for (const sample of samples) {
710
+ const normalized = sample / 32768;
711
+ sumSquares += normalized * normalized;
712
+ }
713
+ return Math.sqrt(sumSquares / samples.length);
714
+ };
715
+ var resolveAudioConditioningConfig = (config) => {
716
+ if (!config || config.enabled === false) {
717
+ return;
718
+ }
719
+ return {
720
+ enabled: true,
721
+ maxGain: config.maxGain ?? DEFAULT_MAX_GAIN,
722
+ noiseGateAttenuation: config.noiseGateAttenuation ?? DEFAULT_NOISE_GATE_ATTENUATION,
723
+ noiseGateThreshold: config.noiseGateThreshold ?? DEFAULT_NOISE_GATE_THRESHOLD,
724
+ targetLevel: config.targetLevel ?? DEFAULT_TARGET_LEVEL
725
+ };
726
+ };
727
+ var conditionAudioChunk = (audio, config) => {
728
+ if (!config) {
729
+ return audio;
730
+ }
731
+ const source = toInt16Array(audio);
732
+ if (source.length === 0) {
733
+ return audio;
734
+ }
735
+ const rms = computeRms(source);
736
+ const output = new Int16Array(source.length);
737
+ const gateFactor = rms < config.noiseGateThreshold ? config.noiseGateAttenuation : 1;
738
+ const baseLevel = Math.max(rms * gateFactor, 0.000001);
739
+ const gain = Math.min(config.maxGain, config.targetLevel / baseLevel);
740
+ const appliedGain = Math.max(0.25, gain) * gateFactor;
741
+ for (let index = 0;index < source.length; index += 1) {
742
+ const next = Math.round(source[index] * appliedGain);
743
+ output[index] = Math.max(-32768, Math.min(32767, next));
744
+ }
745
+ return new Uint8Array(output.buffer);
746
+ };
747
+
748
+ // src/turnProfiles.ts
749
+ var TURN_PROFILE_DEFAULTS = {
750
+ balanced: {
751
+ qualityProfile: "general",
752
+ silenceMs: 1400,
753
+ speechThreshold: 0.012,
754
+ transcriptStabilityMs: 1000
755
+ },
756
+ fast: {
757
+ qualityProfile: "general",
758
+ silenceMs: 700,
759
+ speechThreshold: 0.015,
760
+ transcriptStabilityMs: 450
761
+ },
762
+ "long-form": {
763
+ qualityProfile: "general",
764
+ silenceMs: 2200,
765
+ speechThreshold: 0.01,
766
+ transcriptStabilityMs: 1500
767
+ }
768
+ };
769
+ var QUALITY_PROFILE_DEFAULTS = {
770
+ general: {},
771
+ "accent-heavy": {
772
+ silenceMs: 1200,
773
+ speechThreshold: 0.01,
774
+ transcriptStabilityMs: 1200
775
+ },
776
+ "noisy-room": {
777
+ silenceMs: 2000,
778
+ speechThreshold: 0.02,
779
+ transcriptStabilityMs: 1600
780
+ },
781
+ "short-command": {
782
+ silenceMs: 500,
783
+ speechThreshold: 0.016,
784
+ transcriptStabilityMs: 420
785
+ }
786
+ };
787
+ var DEFAULT_TURN_PROFILE = "fast";
788
+ var DEFAULT_QUALITY_PROFILE = "general";
789
+ var resolveTurnDetectionConfig = (config) => {
790
+ const profile = config?.profile ?? DEFAULT_TURN_PROFILE;
791
+ const qualityProfile = config?.qualityProfile ?? DEFAULT_QUALITY_PROFILE;
792
+ const preset = TURN_PROFILE_DEFAULTS[profile];
793
+ const quality = QUALITY_PROFILE_DEFAULTS[qualityProfile];
794
+ return {
795
+ profile,
796
+ qualityProfile,
797
+ silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
798
+ speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
799
+ transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
800
+ };
801
+ };
802
+
803
+ // src/presets.ts
804
+ var PRESET_INPUTS = {
805
+ chat: {
806
+ audioConditioning: {
807
+ enabled: true,
808
+ maxGain: 2.5,
809
+ noiseGateAttenuation: 0,
810
+ noiseGateThreshold: 0.004,
811
+ targetLevel: 0.08
812
+ },
813
+ capture: {
814
+ channelCount: 1,
815
+ sampleRateHz: 16000
816
+ },
817
+ connection: {
818
+ maxReconnectAttempts: 10,
819
+ pingInterval: 30000,
820
+ reconnect: true
821
+ },
822
+ sttLifecycle: "continuous",
823
+ turnDetection: {
824
+ qualityProfile: "short-command",
825
+ profile: "balanced"
826
+ }
827
+ },
828
+ default: {
829
+ capture: {
830
+ channelCount: 1,
831
+ sampleRateHz: 16000
832
+ },
833
+ connection: {
834
+ maxReconnectAttempts: 10,
835
+ pingInterval: 30000,
836
+ reconnect: true
837
+ },
838
+ sttLifecycle: "continuous",
839
+ turnDetection: {
840
+ qualityProfile: "general",
841
+ profile: "fast"
842
+ }
843
+ },
844
+ dictation: {
845
+ audioConditioning: {
846
+ enabled: true,
847
+ maxGain: 2.25,
848
+ noiseGateAttenuation: 0.05,
849
+ noiseGateThreshold: 0.003,
850
+ targetLevel: 0.08
851
+ },
852
+ capture: {
853
+ channelCount: 1,
854
+ sampleRateHz: 16000
855
+ },
856
+ connection: {
857
+ maxReconnectAttempts: 12,
858
+ pingInterval: 30000,
859
+ reconnect: true
860
+ },
861
+ sttLifecycle: "continuous",
862
+ turnDetection: {
863
+ qualityProfile: "accent-heavy",
864
+ profile: "long-form"
865
+ }
866
+ },
867
+ "guided-intake": {
868
+ audioConditioning: {
869
+ enabled: true,
870
+ maxGain: 2.5,
871
+ noiseGateAttenuation: 0,
872
+ noiseGateThreshold: 0.004,
873
+ targetLevel: 0.08
874
+ },
875
+ capture: {
876
+ channelCount: 1,
877
+ sampleRateHz: 16000
878
+ },
879
+ connection: {
880
+ maxReconnectAttempts: 12,
881
+ pingInterval: 30000,
882
+ reconnect: true
883
+ },
884
+ sttLifecycle: "turn-scoped",
885
+ turnDetection: {
886
+ qualityProfile: "accent-heavy",
887
+ profile: "long-form"
888
+ }
889
+ },
890
+ "noisy-room": {
891
+ audioConditioning: {
892
+ enabled: true,
893
+ maxGain: 3,
894
+ noiseGateAttenuation: 0.12,
895
+ noiseGateThreshold: 0.006,
896
+ targetLevel: 0.085
897
+ },
898
+ capture: {
899
+ channelCount: 1,
900
+ sampleRateHz: 16000
901
+ },
902
+ connection: {
903
+ maxReconnectAttempts: 14,
904
+ pingInterval: 45000,
905
+ reconnect: true
906
+ },
907
+ sttLifecycle: "continuous",
908
+ turnDetection: {
909
+ qualityProfile: "noisy-room",
910
+ profile: "long-form",
911
+ silenceMs: 2100,
912
+ speechThreshold: 0.02,
913
+ transcriptStabilityMs: 1650
914
+ }
915
+ },
916
+ "pstn-balanced": {
917
+ audioConditioning: {
918
+ enabled: true,
919
+ maxGain: 2.8,
920
+ noiseGateAttenuation: 0.07,
921
+ noiseGateThreshold: 0.005,
922
+ targetLevel: 0.08
923
+ },
924
+ capture: {
925
+ channelCount: 1,
926
+ sampleRateHz: 16000
927
+ },
928
+ connection: {
929
+ maxReconnectAttempts: 14,
930
+ pingInterval: 45000,
931
+ reconnect: true
932
+ },
933
+ sttLifecycle: "continuous",
934
+ turnDetection: {
935
+ qualityProfile: "noisy-room",
936
+ profile: "long-form",
937
+ silenceMs: 660,
938
+ speechThreshold: 0.012,
939
+ transcriptStabilityMs: 300
940
+ }
941
+ },
942
+ "pstn-fast": {
943
+ audioConditioning: {
944
+ enabled: true,
945
+ maxGain: 2.75,
946
+ noiseGateAttenuation: 0.06,
947
+ noiseGateThreshold: 0.005,
948
+ targetLevel: 0.08
949
+ },
950
+ capture: {
951
+ channelCount: 1,
952
+ sampleRateHz: 16000
953
+ },
954
+ connection: {
955
+ maxReconnectAttempts: 14,
956
+ pingInterval: 45000,
957
+ reconnect: true
958
+ },
959
+ sttLifecycle: "continuous",
960
+ turnDetection: {
961
+ qualityProfile: "noisy-room",
962
+ profile: "long-form",
963
+ silenceMs: 620,
964
+ speechThreshold: 0.012,
965
+ transcriptStabilityMs: 280
966
+ }
967
+ },
968
+ reliability: {
969
+ audioConditioning: {
970
+ enabled: true,
971
+ maxGain: 2.9,
972
+ noiseGateAttenuation: 0.08,
973
+ noiseGateThreshold: 0.005,
974
+ targetLevel: 0.08
975
+ },
976
+ capture: {
977
+ channelCount: 1,
978
+ sampleRateHz: 16000
979
+ },
980
+ connection: {
981
+ maxReconnectAttempts: 14,
982
+ pingInterval: 45000,
983
+ reconnect: true
984
+ },
985
+ sttLifecycle: "continuous",
986
+ turnDetection: {
987
+ qualityProfile: "noisy-room",
988
+ profile: "long-form"
989
+ }
990
+ }
991
+ };
992
+ var resolveVoiceRuntimePreset = (name = "default") => {
993
+ const preset = PRESET_INPUTS[name];
994
+ return {
995
+ audioConditioning: resolveAudioConditioningConfig(preset.audioConditioning),
996
+ capture: {
997
+ channelCount: preset.capture?.channelCount ?? 1,
998
+ sampleRateHz: preset.capture?.sampleRateHz ?? 16000
999
+ },
1000
+ connection: {
1001
+ ...preset.connection
1002
+ },
1003
+ name,
1004
+ sttLifecycle: preset.sttLifecycle ?? "continuous",
1005
+ turnDetection: resolveTurnDetectionConfig(preset.turnDetection)
1006
+ };
1007
+ };
1008
+
1009
+ // src/client/controller.ts
1010
+ var createInitialState2 = (stream) => ({
1011
+ assistantAudio: [...stream.assistantAudio],
1012
+ assistantTexts: [...stream.assistantTexts],
1013
+ error: stream.error,
1014
+ isConnected: stream.isConnected,
1015
+ isRecording: false,
1016
+ partial: stream.partial,
1017
+ recordingError: null,
1018
+ sessionId: stream.sessionId,
1019
+ scenarioId: stream.scenarioId,
1020
+ status: stream.status,
1021
+ turns: [...stream.turns]
1022
+ });
1023
+ var createVoiceController = (path, options = {}) => {
1024
+ const preset = resolveVoiceRuntimePreset(options.preset);
1025
+ const stream = createVoiceStream(path, {
1026
+ ...preset.connection,
1027
+ ...options.connection
1028
+ });
1029
+ let capture = null;
1030
+ let state = createInitialState2(stream);
1031
+ const subscribers = new Set;
1032
+ const notify = () => {
1033
+ for (const subscriber of subscribers) {
1034
+ subscriber();
1035
+ }
1036
+ };
1037
+ const sync = () => {
1038
+ state = {
1039
+ ...state,
1040
+ assistantAudio: [...stream.assistantAudio],
1041
+ assistantTexts: [...stream.assistantTexts],
1042
+ error: stream.error,
1043
+ isConnected: stream.isConnected,
1044
+ partial: stream.partial,
1045
+ sessionId: stream.sessionId,
1046
+ scenarioId: stream.scenarioId,
1047
+ status: stream.status,
1048
+ turns: [...stream.turns]
1049
+ };
1050
+ if (options.autoStopOnComplete !== false && state.status === "completed" && state.isRecording) {
1051
+ capture?.stop();
1052
+ capture = null;
1053
+ state = {
1054
+ ...state,
1055
+ isRecording: false
1056
+ };
1057
+ }
1058
+ notify();
1059
+ };
1060
+ const unsubscribeStream = stream.subscribe(sync);
1061
+ sync();
1062
+ const ensureCapture = () => {
1063
+ if (capture) {
1064
+ return capture;
1065
+ }
1066
+ capture = createMicrophoneCapture({
1067
+ channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
1068
+ onLevel: options.capture?.onLevel,
1069
+ onAudio: (audio) => stream.sendAudio(audio),
1070
+ sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
1071
+ });
1072
+ return capture;
1073
+ };
1074
+ const stopRecording = () => {
1075
+ capture?.stop();
1076
+ capture = null;
1077
+ state = {
1078
+ ...state,
1079
+ isRecording: false
1080
+ };
1081
+ notify();
1082
+ };
1083
+ const startRecording = async () => {
1084
+ if (state.isRecording) {
1085
+ return;
1086
+ }
1087
+ try {
1088
+ state = {
1089
+ ...state,
1090
+ recordingError: null
1091
+ };
1092
+ notify();
1093
+ await ensureCapture().start();
1094
+ state = {
1095
+ ...state,
1096
+ isRecording: true
1097
+ };
1098
+ notify();
1099
+ } catch (error) {
1100
+ capture = null;
1101
+ state = {
1102
+ ...state,
1103
+ isRecording: false,
1104
+ recordingError: error instanceof Error ? error.message : String(error)
1105
+ };
1106
+ notify();
1107
+ throw error;
1108
+ }
1109
+ };
1110
+ const close = () => {
1111
+ unsubscribeStream();
1112
+ stopRecording();
1113
+ stream.close();
1114
+ };
1115
+ return {
1116
+ bindHTMX(bindingOptions) {
1117
+ return bindVoiceHTMX(stream, bindingOptions);
1118
+ },
1119
+ close,
1120
+ endTurn: () => stream.endTurn(),
1121
+ get error() {
1122
+ return state.error;
1123
+ },
1124
+ getServerSnapshot: () => state,
1125
+ getSnapshot: () => state,
1126
+ get isConnected() {
1127
+ return state.isConnected;
1128
+ },
1129
+ get isRecording() {
1130
+ return state.isRecording;
1131
+ },
1132
+ get partial() {
1133
+ return state.partial;
1134
+ },
1135
+ get recordingError() {
1136
+ return state.recordingError;
1137
+ },
1138
+ sendAudio: (audio) => stream.sendAudio(audio),
1139
+ get sessionId() {
1140
+ return state.sessionId;
1141
+ },
1142
+ get scenarioId() {
1143
+ return state.scenarioId;
1144
+ },
1145
+ startRecording,
1146
+ get status() {
1147
+ return state.status;
1148
+ },
1149
+ stopRecording,
1150
+ subscribe: (subscriber) => {
1151
+ subscribers.add(subscriber);
1152
+ return () => {
1153
+ subscribers.delete(subscriber);
1154
+ };
1155
+ },
1156
+ toggleRecording: async () => {
1157
+ if (state.isRecording) {
1158
+ stopRecording();
1159
+ return;
1160
+ }
1161
+ await startRecording();
1162
+ },
1163
+ get turns() {
1164
+ return state.turns;
1165
+ },
1166
+ get assistantTexts() {
1167
+ return state.assistantTexts;
1168
+ },
1169
+ get assistantAudio() {
1170
+ return state.assistantAudio;
1171
+ }
1172
+ };
1173
+ };
487
1174
  export {
488
- createVoiceStream2 as createVoiceStream
1175
+ createVoiceStream2 as createVoiceStream,
1176
+ createVoiceController
489
1177
  };