@absolutejs/voice 0.0.19 → 0.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +387 -4
  2. package/dist/angular/index.d.ts +1 -0
  3. package/dist/angular/index.js +669 -3
  4. package/dist/angular/voice-controller.service.d.ts +21 -0
  5. package/dist/audioConditioning.d.ts +3 -0
  6. package/dist/client/actions.d.ts +7 -0
  7. package/dist/client/connection.d.ts +5 -0
  8. package/dist/client/controller.d.ts +2 -0
  9. package/dist/client/htmxBootstrap.js +576 -167
  10. package/dist/client/index.d.ts +1 -0
  11. package/dist/client/index.js +486 -3
  12. package/dist/client/microphone.d.ts +4 -2
  13. package/dist/correction.d.ts +16 -0
  14. package/dist/index.d.ts +4 -0
  15. package/dist/index.js +1314 -283
  16. package/dist/presets.d.ts +13 -0
  17. package/dist/react/index.d.ts +1 -0
  18. package/dist/react/index.js +642 -3
  19. package/dist/react/useVoiceController.d.ts +20 -0
  20. package/dist/react/useVoiceStream.d.ts +1 -0
  21. package/dist/store.d.ts +2 -2
  22. package/dist/svelte/index.d.ts +1 -0
  23. package/dist/svelte/index.js +607 -3
  24. package/dist/testing/benchmark.d.ts +36 -0
  25. package/dist/testing/fixtures.d.ts +1 -0
  26. package/dist/testing/index.d.ts +2 -0
  27. package/dist/testing/index.js +1975 -4
  28. package/dist/testing/resilience.d.ts +20 -0
  29. package/dist/testing/sessionBenchmark.d.ts +126 -0
  30. package/dist/testing/stt.d.ts +1 -0
  31. package/dist/turnDetection.d.ts +5 -1
  32. package/dist/turnProfiles.d.ts +6 -0
  33. package/dist/types.d.ts +198 -8
  34. package/dist/vue/index.d.ts +1 -0
  35. package/dist/vue/index.js +660 -3
  36. package/dist/vue/useVoiceController.d.ts +19 -0
  37. package/fixtures/README.md +24 -0
  38. package/fixtures/manifest.json +127 -0
  39. package/fixtures/pcm/dialogue-three-clean.pcm +0 -0
  40. package/fixtures/pcm/dialogue-three-mixed.pcm +0 -0
  41. package/fixtures/pcm/dialogue-two-clean.pcm +0 -0
  42. package/fixtures/pcm/dialogue-two-noisy.pcm +0 -0
  43. package/fixtures/pcm/multiturn-three-mixed.pcm +0 -0
  44. package/fixtures/pcm/multiturn-two-clean.pcm +0 -0
  45. package/fixtures/pcm/stella-bulgaria-bulgarian20.pcm +0 -0
  46. package/fixtures/pcm/stella-jamaica-jamaican-creole-english1.pcm +0 -0
  47. package/fixtures/pcm/stella-liberia-liberian-pidgin-english2.pcm +0 -0
  48. package/fixtures/pcm/stella-sierra-leone-krio5.pcm +0 -0
  49. package/package.json +25 -1
@@ -127,6 +127,7 @@ var serverMessageToAction = (message) => {
127
127
  case "session":
128
128
  return {
129
129
  sessionId: message.sessionId,
130
+ scenarioId: message.scenarioId,
130
131
  status: message.status,
131
132
  type: "session"
132
133
  };
@@ -147,24 +148,30 @@ var WS_NORMAL_CLOSURE = 1000;
147
148
  var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
148
149
  var DEFAULT_PING_INTERVAL = 30000;
149
150
  var RECONNECT_DELAY_MS = 500;
151
+ var DEFAULT_SCENARIO_QUERY_PARAM = "scenarioId";
150
152
  var noop = () => {};
151
153
  var noopUnsubscribe = () => noop;
152
154
  var NOOP_CONNECTION = {
155
+ start: () => {},
153
156
  close: noop,
154
157
  endTurn: noop,
155
158
  getReadyState: () => WS_CLOSED,
159
+ getScenarioId: () => "",
156
160
  getSessionId: () => "",
157
161
  send: noop,
158
162
  sendAudio: noop,
159
163
  subscribe: noopUnsubscribe
160
164
  };
161
165
  var createSessionId = () => crypto.randomUUID();
162
- var buildWsUrl = (path, sessionId) => {
166
+ var buildWsUrl = (path, sessionId, scenarioId) => {
163
167
  const { hostname, port, protocol } = window.location;
164
168
  const wsProtocol = protocol === "https:" ? "wss:" : "ws:";
165
169
  const portSuffix = port ? `:${port}` : "";
166
170
  const url = new URL(`${wsProtocol}//${hostname}${portSuffix}${path}`);
167
171
  url.searchParams.set("sessionId", sessionId);
172
+ if (scenarioId) {
173
+ url.searchParams.set(DEFAULT_SCENARIO_QUERY_PARAM, scenarioId);
174
+ }
168
175
  return url.toString();
169
176
  };
170
177
  var isVoiceServerMessage = (value) => {
@@ -207,6 +214,7 @@ var createVoiceConnection = (path, options = {}) => {
207
214
  const state = {
208
215
  isConnected: false,
209
216
  pendingMessages: [],
217
+ scenarioId: options.scenarioId ?? null,
210
218
  pingInterval: null,
211
219
  reconnectAttempts: 0,
212
220
  reconnectTimeout: null,
@@ -244,13 +252,14 @@ var createVoiceConnection = (path, options = {}) => {
244
252
  }, RECONNECT_DELAY_MS);
245
253
  };
246
254
  const connect = () => {
247
- const ws = new WebSocket(buildWsUrl(path, state.sessionId));
255
+ const ws = new WebSocket(buildWsUrl(path, state.sessionId, state.scenarioId));
248
256
  ws.binaryType = "arraybuffer";
249
257
  ws.onopen = () => {
250
258
  state.isConnected = true;
251
259
  state.reconnectAttempts = 0;
252
260
  flushPendingMessages();
253
261
  listeners.forEach((listener) => listener({
262
+ scenarioId: state.scenarioId ?? undefined,
254
263
  sessionId: state.sessionId,
255
264
  status: "active",
256
265
  type: "session"
@@ -268,6 +277,7 @@ var createVoiceConnection = (path, options = {}) => {
268
277
  }
269
278
  if (parsed.type === "session") {
270
279
  state.sessionId = parsed.sessionId;
280
+ state.scenarioId = parsed.scenarioId ?? state.scenarioId;
271
281
  }
272
282
  listeners.forEach((listener) => listener(parsed));
273
283
  };
@@ -291,6 +301,19 @@ var createVoiceConnection = (path, options = {}) => {
291
301
  const send = (message) => {
292
302
  sendSerialized(JSON.stringify(message));
293
303
  };
304
+ const start = (input = {}) => {
305
+ if (input.sessionId) {
306
+ state.sessionId = input.sessionId;
307
+ }
308
+ if (input.scenarioId) {
309
+ state.scenarioId = input.scenarioId;
310
+ }
311
+ send({
312
+ type: "start",
313
+ sessionId: state.sessionId,
314
+ scenarioId: state.scenarioId ?? undefined
315
+ });
316
+ };
294
317
  const sendAudio = (audio) => {
295
318
  sendSerialized(audio);
296
319
  };
@@ -314,9 +337,11 @@ var createVoiceConnection = (path, options = {}) => {
314
337
  };
315
338
  connect();
316
339
  return {
340
+ start,
317
341
  close,
318
342
  endTurn,
319
343
  getReadyState: () => state.ws?.readyState ?? WS_CLOSED,
344
+ getScenarioId: () => state.scenarioId ?? "",
320
345
  getSessionId: () => state.sessionId,
321
346
  send,
322
347
  sendAudio,
@@ -329,6 +354,7 @@ var createInitialState = () => ({
329
354
  assistantTexts: [],
330
355
  error: null,
331
356
  isConnected: false,
357
+ scenarioId: null,
332
358
  partial: "",
333
359
  sessionId: null,
334
360
  status: "idle",
@@ -390,6 +416,7 @@ var createVoiceStreamStore = () => {
390
416
  state = {
391
417
  ...state,
392
418
  error: null,
419
+ scenarioId: action.scenarioId ?? state.scenarioId,
393
420
  isConnected: action.status === "active",
394
421
  sessionId: action.sessionId,
395
422
  status: action.status
@@ -423,6 +450,12 @@ var createVoiceStream = (path, options = {}) => {
423
450
  const connection = createVoiceConnection(path, options);
424
451
  const store = createVoiceStreamStore();
425
452
  const subscribers = new Set;
453
+ const start = (input) => Promise.resolve().then(() => {
454
+ if (!input?.sessionId && !input?.scenarioId) {
455
+ return;
456
+ }
457
+ connection.start(input);
458
+ });
426
459
  const notify = () => {
427
460
  subscribers.forEach((subscriber) => subscriber());
428
461
  };
@@ -455,6 +488,10 @@ var createVoiceStream = (path, options = {}) => {
455
488
  get isConnected() {
456
489
  return store.getSnapshot().isConnected;
457
490
  },
491
+ get scenarioId() {
492
+ return store.getSnapshot().scenarioId;
493
+ },
494
+ start,
458
495
  get partial() {
459
496
  return store.getSnapshot().partial;
460
497
  },
@@ -484,6 +521,573 @@ var createVoiceStream = (path, options = {}) => {
484
521
 
485
522
  // src/svelte/createVoiceStream.ts
486
523
  var createVoiceStream2 = (path, options = {}) => createVoiceStream(path, options);
524
+ // src/client/htmx.ts
525
+ var DEFAULT_EVENT_NAME = "voice-refresh";
526
+ var DEFAULT_QUERY_PARAM = "sessionId";
527
+ var resolveElement = (input) => {
528
+ if (typeof input !== "string") {
529
+ return input;
530
+ }
531
+ return document.querySelector(input);
532
+ };
533
+ var buildRoute = (element, route, queryParam, sessionId) => {
534
+ const baseRoute = route ?? element.getAttribute("hx-get") ?? "";
535
+ if (!baseRoute) {
536
+ return "";
537
+ }
538
+ const url = new URL(baseRoute, window.location.origin);
539
+ if (sessionId) {
540
+ url.searchParams.set(queryParam, sessionId);
541
+ } else {
542
+ url.searchParams.delete(queryParam);
543
+ }
544
+ return `${url.pathname}${url.search}${url.hash}`;
545
+ };
546
+ var bindVoiceHTMX = (stream, options) => {
547
+ if (typeof window === "undefined" || typeof document === "undefined") {
548
+ return () => {};
549
+ }
550
+ const element = resolveElement(options.element);
551
+ if (!element) {
552
+ return () => {};
553
+ }
554
+ const eventName = options.eventName ?? DEFAULT_EVENT_NAME;
555
+ const queryParam = options.sessionQueryParam ?? DEFAULT_QUERY_PARAM;
556
+ const sync = () => {
557
+ const htmxWindow = window;
558
+ const nextRoute = buildRoute(element, options.route, queryParam, stream.sessionId);
559
+ if (nextRoute) {
560
+ element.setAttribute("hx-get", nextRoute);
561
+ }
562
+ htmxWindow.htmx?.process?.(element);
563
+ htmxWindow.htmx?.trigger?.(element, eventName);
564
+ };
565
+ const unsubscribe = stream.subscribe(sync);
566
+ sync();
567
+ return () => {
568
+ unsubscribe();
569
+ };
570
+ };
571
+
572
+ // src/client/microphone.ts
573
+ var clampSample = (value) => Math.max(-1, Math.min(1, value));
574
+ var floatTo16BitPCM = (input) => {
575
+ const output = new Int16Array(input.length);
576
+ for (let index = 0;index < input.length; index += 1) {
577
+ const sample = clampSample(input[index] ?? 0);
578
+ output[index] = sample < 0 ? sample * 32768 : sample * 32767;
579
+ }
580
+ return new Uint8Array(output.buffer);
581
+ };
582
+ var getPcmLevel = (audio) => {
583
+ const bytes = audio instanceof Uint8Array ? audio : new Uint8Array(audio);
584
+ if (bytes.byteLength < 2) {
585
+ return 0;
586
+ }
587
+ const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
588
+ if (samples.length === 0) {
589
+ return 0;
590
+ }
591
+ let sumSquares = 0;
592
+ for (const sample of samples) {
593
+ const normalized = sample / 32768;
594
+ sumSquares += normalized * normalized;
595
+ }
596
+ return Math.min(1, Math.max(0, Math.sqrt(sumSquares / samples.length) * 5.5));
597
+ };
598
+ var downsampleBuffer = (input, sourceRate, targetRate) => {
599
+ if (sourceRate === targetRate) {
600
+ return input;
601
+ }
602
+ const ratio = sourceRate / targetRate;
603
+ const length = Math.round(input.length / ratio);
604
+ const output = new Float32Array(length);
605
+ let offsetResult = 0;
606
+ let offsetBuffer = 0;
607
+ while (offsetResult < output.length) {
608
+ const nextOffsetBuffer = Math.round((offsetResult + 1) * ratio);
609
+ let accum = 0;
610
+ let count = 0;
611
+ for (let index = offsetBuffer;index < nextOffsetBuffer && index < input.length; index += 1) {
612
+ accum += input[index] ?? 0;
613
+ count += 1;
614
+ }
615
+ output[offsetResult] = count > 0 ? accum / count : 0;
616
+ offsetResult += 1;
617
+ offsetBuffer = nextOffsetBuffer;
618
+ }
619
+ return output;
620
+ };
621
+ var createMicrophoneCapture = (options) => {
622
+ let audioContext = null;
623
+ let sourceNode = null;
624
+ let processorNode = null;
625
+ let mediaStream = null;
626
+ const start = async () => {
627
+ if (typeof navigator === "undefined" || !navigator.mediaDevices?.getUserMedia) {
628
+ throw new Error("Browser microphone capture requires navigator.mediaDevices.getUserMedia.");
629
+ }
630
+ const AudioContextCtor = (typeof window !== "undefined" ? window.AudioContext ?? window.webkitAudioContext : undefined) ?? AudioContext;
631
+ if (!AudioContextCtor) {
632
+ throw new Error("Browser microphone capture requires AudioContext support.");
633
+ }
634
+ mediaStream = await navigator.mediaDevices.getUserMedia({
635
+ audio: {
636
+ channelCount: options.channelCount ?? 1
637
+ }
638
+ });
639
+ audioContext = new AudioContextCtor;
640
+ sourceNode = audioContext.createMediaStreamSource(mediaStream);
641
+ processorNode = audioContext.createScriptProcessor(4096, 1, 1);
642
+ processorNode.onaudioprocess = (event) => {
643
+ const channel = event.inputBuffer.getChannelData(0);
644
+ const downsampled = downsampleBuffer(channel, audioContext?.sampleRate ?? 48000, options.sampleRateHz ?? 16000);
645
+ const pcm = floatTo16BitPCM(downsampled);
646
+ options.onLevel?.(getPcmLevel(pcm));
647
+ options.onAudio(pcm);
648
+ };
649
+ sourceNode.connect(processorNode);
650
+ processorNode.connect(audioContext.destination);
651
+ };
652
+ const stop = () => {
653
+ processorNode?.disconnect();
654
+ sourceNode?.disconnect();
655
+ mediaStream?.getTracks().forEach((track) => track.stop());
656
+ audioContext?.close();
657
+ options.onLevel?.(0);
658
+ audioContext = null;
659
+ mediaStream = null;
660
+ processorNode = null;
661
+ sourceNode = null;
662
+ };
663
+ return { start, stop };
664
+ };
665
+
666
+ // src/audioConditioning.ts
667
+ var DEFAULT_TARGET_LEVEL = 0.08;
668
+ var DEFAULT_MAX_GAIN = 3;
669
+ var DEFAULT_NOISE_GATE_THRESHOLD = 0.006;
670
+ var DEFAULT_NOISE_GATE_ATTENUATION = 0.15;
671
+ var toInt16Array = (audio) => {
672
+ if (audio instanceof ArrayBuffer) {
673
+ return new Int16Array(audio, 0, Math.floor(audio.byteLength / 2));
674
+ }
675
+ return new Int16Array(audio.buffer, audio.byteOffset, Math.floor(audio.byteLength / 2));
676
+ };
677
+ var computeRms = (samples) => {
678
+ if (samples.length === 0) {
679
+ return 0;
680
+ }
681
+ let sumSquares = 0;
682
+ for (const sample of samples) {
683
+ const normalized = sample / 32768;
684
+ sumSquares += normalized * normalized;
685
+ }
686
+ return Math.sqrt(sumSquares / samples.length);
687
+ };
688
+ var resolveAudioConditioningConfig = (config) => {
689
+ if (!config || config.enabled === false) {
690
+ return;
691
+ }
692
+ return {
693
+ enabled: true,
694
+ maxGain: config.maxGain ?? DEFAULT_MAX_GAIN,
695
+ noiseGateAttenuation: config.noiseGateAttenuation ?? DEFAULT_NOISE_GATE_ATTENUATION,
696
+ noiseGateThreshold: config.noiseGateThreshold ?? DEFAULT_NOISE_GATE_THRESHOLD,
697
+ targetLevel: config.targetLevel ?? DEFAULT_TARGET_LEVEL
698
+ };
699
+ };
700
+ var conditionAudioChunk = (audio, config) => {
701
+ if (!config) {
702
+ return audio;
703
+ }
704
+ const source = toInt16Array(audio);
705
+ if (source.length === 0) {
706
+ return audio;
707
+ }
708
+ const rms = computeRms(source);
709
+ const output = new Int16Array(source.length);
710
+ const gateFactor = rms < config.noiseGateThreshold ? config.noiseGateAttenuation : 1;
711
+ const baseLevel = Math.max(rms * gateFactor, 0.000001);
712
+ const gain = Math.min(config.maxGain, config.targetLevel / baseLevel);
713
+ const appliedGain = Math.max(0.25, gain) * gateFactor;
714
+ for (let index = 0;index < source.length; index += 1) {
715
+ const next = Math.round(source[index] * appliedGain);
716
+ output[index] = Math.max(-32768, Math.min(32767, next));
717
+ }
718
+ return new Uint8Array(output.buffer);
719
+ };
720
+
721
+ // src/turnProfiles.ts
722
+ var TURN_PROFILE_DEFAULTS = {
723
+ balanced: {
724
+ qualityProfile: "general",
725
+ silenceMs: 1400,
726
+ speechThreshold: 0.012,
727
+ transcriptStabilityMs: 1000
728
+ },
729
+ fast: {
730
+ qualityProfile: "general",
731
+ silenceMs: 700,
732
+ speechThreshold: 0.015,
733
+ transcriptStabilityMs: 450
734
+ },
735
+ "long-form": {
736
+ qualityProfile: "general",
737
+ silenceMs: 2200,
738
+ speechThreshold: 0.01,
739
+ transcriptStabilityMs: 1500
740
+ }
741
+ };
742
+ var QUALITY_PROFILE_DEFAULTS = {
743
+ general: {},
744
+ "accent-heavy": {
745
+ silenceMs: 1200,
746
+ speechThreshold: 0.01,
747
+ transcriptStabilityMs: 1200
748
+ },
749
+ "noisy-room": {
750
+ silenceMs: 2000,
751
+ speechThreshold: 0.02,
752
+ transcriptStabilityMs: 1600
753
+ },
754
+ "short-command": {
755
+ silenceMs: 500,
756
+ speechThreshold: 0.016,
757
+ transcriptStabilityMs: 420
758
+ }
759
+ };
760
+ var DEFAULT_TURN_PROFILE = "fast";
761
+ var DEFAULT_QUALITY_PROFILE = "general";
762
+ var resolveTurnDetectionConfig = (config) => {
763
+ const profile = config?.profile ?? DEFAULT_TURN_PROFILE;
764
+ const qualityProfile = config?.qualityProfile ?? DEFAULT_QUALITY_PROFILE;
765
+ const preset = TURN_PROFILE_DEFAULTS[profile];
766
+ const quality = QUALITY_PROFILE_DEFAULTS[qualityProfile];
767
+ return {
768
+ profile,
769
+ qualityProfile,
770
+ silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
771
+ speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
772
+ transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
773
+ };
774
+ };
775
+
776
+ // src/presets.ts
777
+ var PRESET_INPUTS = {
778
+ chat: {
779
+ audioConditioning: {
780
+ enabled: true,
781
+ maxGain: 2.5,
782
+ noiseGateAttenuation: 0,
783
+ noiseGateThreshold: 0.004,
784
+ targetLevel: 0.08
785
+ },
786
+ capture: {
787
+ channelCount: 1,
788
+ sampleRateHz: 16000
789
+ },
790
+ connection: {
791
+ maxReconnectAttempts: 10,
792
+ pingInterval: 30000,
793
+ reconnect: true
794
+ },
795
+ sttLifecycle: "continuous",
796
+ turnDetection: {
797
+ qualityProfile: "short-command",
798
+ profile: "balanced"
799
+ }
800
+ },
801
+ default: {
802
+ capture: {
803
+ channelCount: 1,
804
+ sampleRateHz: 16000
805
+ },
806
+ connection: {
807
+ maxReconnectAttempts: 10,
808
+ pingInterval: 30000,
809
+ reconnect: true
810
+ },
811
+ sttLifecycle: "continuous",
812
+ turnDetection: {
813
+ qualityProfile: "general",
814
+ profile: "fast"
815
+ }
816
+ },
817
+ dictation: {
818
+ audioConditioning: {
819
+ enabled: true,
820
+ maxGain: 2.25,
821
+ noiseGateAttenuation: 0.05,
822
+ noiseGateThreshold: 0.003,
823
+ targetLevel: 0.08
824
+ },
825
+ capture: {
826
+ channelCount: 1,
827
+ sampleRateHz: 16000
828
+ },
829
+ connection: {
830
+ maxReconnectAttempts: 12,
831
+ pingInterval: 30000,
832
+ reconnect: true
833
+ },
834
+ sttLifecycle: "continuous",
835
+ turnDetection: {
836
+ qualityProfile: "accent-heavy",
837
+ profile: "long-form"
838
+ }
839
+ },
840
+ "guided-intake": {
841
+ audioConditioning: {
842
+ enabled: true,
843
+ maxGain: 2.5,
844
+ noiseGateAttenuation: 0,
845
+ noiseGateThreshold: 0.004,
846
+ targetLevel: 0.08
847
+ },
848
+ capture: {
849
+ channelCount: 1,
850
+ sampleRateHz: 16000
851
+ },
852
+ connection: {
853
+ maxReconnectAttempts: 12,
854
+ pingInterval: 30000,
855
+ reconnect: true
856
+ },
857
+ sttLifecycle: "turn-scoped",
858
+ turnDetection: {
859
+ qualityProfile: "accent-heavy",
860
+ profile: "long-form"
861
+ }
862
+ },
863
+ "noisy-room": {
864
+ audioConditioning: {
865
+ enabled: true,
866
+ maxGain: 3,
867
+ noiseGateAttenuation: 0.12,
868
+ noiseGateThreshold: 0.006,
869
+ targetLevel: 0.085
870
+ },
871
+ capture: {
872
+ channelCount: 1,
873
+ sampleRateHz: 16000
874
+ },
875
+ connection: {
876
+ maxReconnectAttempts: 14,
877
+ pingInterval: 45000,
878
+ reconnect: true
879
+ },
880
+ sttLifecycle: "continuous",
881
+ turnDetection: {
882
+ qualityProfile: "noisy-room",
883
+ profile: "long-form",
884
+ silenceMs: 2100,
885
+ speechThreshold: 0.02,
886
+ transcriptStabilityMs: 1650
887
+ }
888
+ },
889
+ reliability: {
890
+ audioConditioning: {
891
+ enabled: true,
892
+ maxGain: 2.9,
893
+ noiseGateAttenuation: 0.08,
894
+ noiseGateThreshold: 0.005,
895
+ targetLevel: 0.08
896
+ },
897
+ capture: {
898
+ channelCount: 1,
899
+ sampleRateHz: 16000
900
+ },
901
+ connection: {
902
+ maxReconnectAttempts: 14,
903
+ pingInterval: 45000,
904
+ reconnect: true
905
+ },
906
+ sttLifecycle: "continuous",
907
+ turnDetection: {
908
+ qualityProfile: "noisy-room",
909
+ profile: "long-form"
910
+ }
911
+ }
912
+ };
913
+ var resolveVoiceRuntimePreset = (name = "default") => {
914
+ const preset = PRESET_INPUTS[name];
915
+ return {
916
+ audioConditioning: resolveAudioConditioningConfig(preset.audioConditioning),
917
+ capture: {
918
+ channelCount: preset.capture?.channelCount ?? 1,
919
+ sampleRateHz: preset.capture?.sampleRateHz ?? 16000
920
+ },
921
+ connection: {
922
+ ...preset.connection
923
+ },
924
+ name,
925
+ sttLifecycle: preset.sttLifecycle ?? "continuous",
926
+ turnDetection: resolveTurnDetectionConfig(preset.turnDetection)
927
+ };
928
+ };
929
+
930
+ // src/client/controller.ts
931
+ var createInitialState2 = (stream) => ({
932
+ assistantTexts: [...stream.assistantTexts],
933
+ error: stream.error,
934
+ isConnected: stream.isConnected,
935
+ isRecording: false,
936
+ partial: stream.partial,
937
+ recordingError: null,
938
+ sessionId: stream.sessionId,
939
+ scenarioId: stream.scenarioId,
940
+ status: stream.status,
941
+ turns: [...stream.turns]
942
+ });
943
+ var createVoiceController = (path, options = {}) => {
944
+ const preset = resolveVoiceRuntimePreset(options.preset);
945
+ const stream = createVoiceStream(path, {
946
+ ...preset.connection,
947
+ ...options.connection
948
+ });
949
+ let capture = null;
950
+ let state = createInitialState2(stream);
951
+ const subscribers = new Set;
952
+ const notify = () => {
953
+ for (const subscriber of subscribers) {
954
+ subscriber();
955
+ }
956
+ };
957
+ const sync = () => {
958
+ state = {
959
+ ...state,
960
+ assistantTexts: [...stream.assistantTexts],
961
+ error: stream.error,
962
+ isConnected: stream.isConnected,
963
+ partial: stream.partial,
964
+ sessionId: stream.sessionId,
965
+ scenarioId: stream.scenarioId,
966
+ status: stream.status,
967
+ turns: [...stream.turns]
968
+ };
969
+ if (options.autoStopOnComplete !== false && state.status === "completed" && state.isRecording) {
970
+ capture?.stop();
971
+ capture = null;
972
+ state = {
973
+ ...state,
974
+ isRecording: false
975
+ };
976
+ }
977
+ notify();
978
+ };
979
+ const unsubscribeStream = stream.subscribe(sync);
980
+ sync();
981
+ const ensureCapture = () => {
982
+ if (capture) {
983
+ return capture;
984
+ }
985
+ capture = createMicrophoneCapture({
986
+ channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
987
+ onLevel: options.capture?.onLevel,
988
+ onAudio: (audio) => stream.sendAudio(audio),
989
+ sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
990
+ });
991
+ return capture;
992
+ };
993
+ const stopRecording = () => {
994
+ capture?.stop();
995
+ capture = null;
996
+ state = {
997
+ ...state,
998
+ isRecording: false
999
+ };
1000
+ notify();
1001
+ };
1002
+ const startRecording = async () => {
1003
+ if (state.isRecording) {
1004
+ return;
1005
+ }
1006
+ try {
1007
+ state = {
1008
+ ...state,
1009
+ recordingError: null
1010
+ };
1011
+ notify();
1012
+ await ensureCapture().start();
1013
+ state = {
1014
+ ...state,
1015
+ isRecording: true
1016
+ };
1017
+ notify();
1018
+ } catch (error) {
1019
+ capture = null;
1020
+ state = {
1021
+ ...state,
1022
+ isRecording: false,
1023
+ recordingError: error instanceof Error ? error.message : String(error)
1024
+ };
1025
+ notify();
1026
+ throw error;
1027
+ }
1028
+ };
1029
+ const close = () => {
1030
+ unsubscribeStream();
1031
+ stopRecording();
1032
+ stream.close();
1033
+ };
1034
+ return {
1035
+ bindHTMX(bindingOptions) {
1036
+ return bindVoiceHTMX(stream, bindingOptions);
1037
+ },
1038
+ close,
1039
+ endTurn: () => stream.endTurn(),
1040
+ get error() {
1041
+ return state.error;
1042
+ },
1043
+ getServerSnapshot: () => state,
1044
+ getSnapshot: () => state,
1045
+ get isConnected() {
1046
+ return state.isConnected;
1047
+ },
1048
+ get isRecording() {
1049
+ return state.isRecording;
1050
+ },
1051
+ get partial() {
1052
+ return state.partial;
1053
+ },
1054
+ get recordingError() {
1055
+ return state.recordingError;
1056
+ },
1057
+ sendAudio: (audio) => stream.sendAudio(audio),
1058
+ get sessionId() {
1059
+ return state.sessionId;
1060
+ },
1061
+ get scenarioId() {
1062
+ return state.scenarioId;
1063
+ },
1064
+ startRecording,
1065
+ get status() {
1066
+ return state.status;
1067
+ },
1068
+ stopRecording,
1069
+ subscribe: (subscriber) => {
1070
+ subscribers.add(subscriber);
1071
+ return () => {
1072
+ subscribers.delete(subscriber);
1073
+ };
1074
+ },
1075
+ toggleRecording: async () => {
1076
+ if (state.isRecording) {
1077
+ stopRecording();
1078
+ return;
1079
+ }
1080
+ await startRecording();
1081
+ },
1082
+ get turns() {
1083
+ return state.turns;
1084
+ },
1085
+ get assistantTexts() {
1086
+ return state.assistantTexts;
1087
+ }
1088
+ };
1089
+ };
487
1090
  export {
488
- createVoiceStream2 as createVoiceStream
1091
+ createVoiceStream2 as createVoiceStream,
1092
+ createVoiceController
489
1093
  };