@absolutejs/voice 0.0.20 → 0.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +387 -4
  2. package/dist/angular/index.d.ts +1 -0
  3. package/dist/angular/index.js +669 -3
  4. package/dist/angular/voice-controller.service.d.ts +21 -0
  5. package/dist/audioConditioning.d.ts +3 -0
  6. package/dist/client/actions.d.ts +7 -0
  7. package/dist/client/connection.d.ts +5 -0
  8. package/dist/client/controller.d.ts +2 -0
  9. package/dist/client/htmxBootstrap.js +576 -167
  10. package/dist/client/index.d.ts +1 -0
  11. package/dist/client/index.js +486 -3
  12. package/dist/client/microphone.d.ts +4 -2
  13. package/dist/correction.d.ts +16 -0
  14. package/dist/index.d.ts +4 -0
  15. package/dist/index.js +1314 -283
  16. package/dist/presets.d.ts +13 -0
  17. package/dist/react/index.d.ts +1 -0
  18. package/dist/react/index.js +642 -3
  19. package/dist/react/useVoiceController.d.ts +20 -0
  20. package/dist/react/useVoiceStream.d.ts +1 -0
  21. package/dist/store.d.ts +2 -2
  22. package/dist/svelte/index.d.ts +1 -0
  23. package/dist/svelte/index.js +607 -3
  24. package/dist/testing/benchmark.d.ts +36 -0
  25. package/dist/testing/index.js +1453 -241
  26. package/dist/testing/sessionBenchmark.d.ts +67 -2
  27. package/dist/testing/stt.d.ts +1 -0
  28. package/dist/turnDetection.d.ts +5 -1
  29. package/dist/turnProfiles.d.ts +6 -0
  30. package/dist/types.d.ts +198 -8
  31. package/dist/vue/index.d.ts +1 -0
  32. package/dist/vue/index.js +660 -3
  33. package/dist/vue/useVoiceController.d.ts +19 -0
  34. package/fixtures/README.md +9 -0
  35. package/fixtures/manifest.json +59 -1
  36. package/fixtures/pcm/dialogue-three-clean.pcm +0 -0
  37. package/fixtures/pcm/dialogue-three-mixed.pcm +0 -0
  38. package/fixtures/pcm/dialogue-two-clean.pcm +0 -0
  39. package/fixtures/pcm/dialogue-two-noisy.pcm +0 -0
  40. package/package.json +21 -1
@@ -1,4 +1,5 @@
1
1
  export { createVoiceConnection } from './connection';
2
2
  export { createVoiceStream } from './createVoiceStream';
3
+ export { createVoiceController } from './controller';
3
4
  export { bindVoiceHTMX } from './htmx';
4
5
  export { createMicrophoneCapture } from './microphone';
@@ -76,24 +76,30 @@ var WS_NORMAL_CLOSURE = 1000;
76
76
  var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
77
77
  var DEFAULT_PING_INTERVAL = 30000;
78
78
  var RECONNECT_DELAY_MS = 500;
79
+ var DEFAULT_SCENARIO_QUERY_PARAM = "scenarioId";
79
80
  var noop = () => {};
80
81
  var noopUnsubscribe = () => noop;
81
82
  var NOOP_CONNECTION = {
83
+ start: () => {},
82
84
  close: noop,
83
85
  endTurn: noop,
84
86
  getReadyState: () => WS_CLOSED,
87
+ getScenarioId: () => "",
85
88
  getSessionId: () => "",
86
89
  send: noop,
87
90
  sendAudio: noop,
88
91
  subscribe: noopUnsubscribe
89
92
  };
90
93
  var createSessionId = () => crypto.randomUUID();
91
- var buildWsUrl = (path, sessionId) => {
94
+ var buildWsUrl = (path, sessionId, scenarioId) => {
92
95
  const { hostname, port, protocol } = window.location;
93
96
  const wsProtocol = protocol === "https:" ? "wss:" : "ws:";
94
97
  const portSuffix = port ? `:${port}` : "";
95
98
  const url = new URL(`${wsProtocol}//${hostname}${portSuffix}${path}`);
96
99
  url.searchParams.set("sessionId", sessionId);
100
+ if (scenarioId) {
101
+ url.searchParams.set(DEFAULT_SCENARIO_QUERY_PARAM, scenarioId);
102
+ }
97
103
  return url.toString();
98
104
  };
99
105
  var isVoiceServerMessage = (value) => {
@@ -136,6 +142,7 @@ var createVoiceConnection = (path, options = {}) => {
136
142
  const state = {
137
143
  isConnected: false,
138
144
  pendingMessages: [],
145
+ scenarioId: options.scenarioId ?? null,
139
146
  pingInterval: null,
140
147
  reconnectAttempts: 0,
141
148
  reconnectTimeout: null,
@@ -173,13 +180,14 @@ var createVoiceConnection = (path, options = {}) => {
173
180
  }, RECONNECT_DELAY_MS);
174
181
  };
175
182
  const connect = () => {
176
- const ws = new WebSocket(buildWsUrl(path, state.sessionId));
183
+ const ws = new WebSocket(buildWsUrl(path, state.sessionId, state.scenarioId));
177
184
  ws.binaryType = "arraybuffer";
178
185
  ws.onopen = () => {
179
186
  state.isConnected = true;
180
187
  state.reconnectAttempts = 0;
181
188
  flushPendingMessages();
182
189
  listeners.forEach((listener) => listener({
190
+ scenarioId: state.scenarioId ?? undefined,
183
191
  sessionId: state.sessionId,
184
192
  status: "active",
185
193
  type: "session"
@@ -197,6 +205,7 @@ var createVoiceConnection = (path, options = {}) => {
197
205
  }
198
206
  if (parsed.type === "session") {
199
207
  state.sessionId = parsed.sessionId;
208
+ state.scenarioId = parsed.scenarioId ?? state.scenarioId;
200
209
  }
201
210
  listeners.forEach((listener) => listener(parsed));
202
211
  };
@@ -220,6 +229,19 @@ var createVoiceConnection = (path, options = {}) => {
220
229
  const send = (message) => {
221
230
  sendSerialized(JSON.stringify(message));
222
231
  };
232
+ const start = (input = {}) => {
233
+ if (input.sessionId) {
234
+ state.sessionId = input.sessionId;
235
+ }
236
+ if (input.scenarioId) {
237
+ state.scenarioId = input.scenarioId;
238
+ }
239
+ send({
240
+ type: "start",
241
+ sessionId: state.sessionId,
242
+ scenarioId: state.scenarioId ?? undefined
243
+ });
244
+ };
223
245
  const sendAudio = (audio) => {
224
246
  sendSerialized(audio);
225
247
  };
@@ -243,9 +265,11 @@ var createVoiceConnection = (path, options = {}) => {
243
265
  };
244
266
  connect();
245
267
  return {
268
+ start,
246
269
  close,
247
270
  endTurn,
248
271
  getReadyState: () => state.ws?.readyState ?? WS_CLOSED,
272
+ getScenarioId: () => state.scenarioId ?? "",
249
273
  getSessionId: () => state.sessionId,
250
274
  send,
251
275
  sendAudio,
@@ -310,6 +334,7 @@ var serverMessageToAction = (message) => {
310
334
  case "session":
311
335
  return {
312
336
  sessionId: message.sessionId,
337
+ scenarioId: message.scenarioId,
313
338
  status: message.status,
314
339
  type: "session"
315
340
  };
@@ -328,6 +353,7 @@ var createInitialState = () => ({
328
353
  assistantTexts: [],
329
354
  error: null,
330
355
  isConnected: false,
356
+ scenarioId: null,
331
357
  partial: "",
332
358
  sessionId: null,
333
359
  status: "idle",
@@ -389,6 +415,7 @@ var createVoiceStreamStore = () => {
389
415
  state = {
390
416
  ...state,
391
417
  error: null,
418
+ scenarioId: action.scenarioId ?? state.scenarioId,
392
419
  isConnected: action.status === "active",
393
420
  sessionId: action.sessionId,
394
421
  status: action.status
@@ -422,6 +449,12 @@ var createVoiceStream = (path, options = {}) => {
422
449
  const connection = createVoiceConnection(path, options);
423
450
  const store = createVoiceStreamStore();
424
451
  const subscribers = new Set;
452
+ const start = (input) => Promise.resolve().then(() => {
453
+ if (!input?.sessionId && !input?.scenarioId) {
454
+ return;
455
+ }
456
+ connection.start(input);
457
+ });
425
458
  const notify = () => {
426
459
  subscribers.forEach((subscriber) => subscriber());
427
460
  };
@@ -454,6 +487,10 @@ var createVoiceStream = (path, options = {}) => {
454
487
  get isConnected() {
455
488
  return store.getSnapshot().isConnected;
456
489
  },
490
+ get scenarioId() {
491
+ return store.getSnapshot().scenarioId;
492
+ },
493
+ start,
457
494
  get partial() {
458
495
  return store.getSnapshot().partial;
459
496
  },
@@ -527,6 +564,7 @@ var bindVoiceHTMX = (stream, options) => {
527
564
  unsubscribe();
528
565
  };
529
566
  };
567
+
530
568
  // src/client/microphone.ts
531
569
  var clampSample = (value) => Math.max(-1, Math.min(1, value));
532
570
  var floatTo16BitPCM = (input) => {
@@ -537,6 +575,22 @@ var floatTo16BitPCM = (input) => {
537
575
  }
538
576
  return new Uint8Array(output.buffer);
539
577
  };
578
+ var getPcmLevel = (audio) => {
579
+ const bytes = audio instanceof Uint8Array ? audio : new Uint8Array(audio);
580
+ if (bytes.byteLength < 2) {
581
+ return 0;
582
+ }
583
+ const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
584
+ if (samples.length === 0) {
585
+ return 0;
586
+ }
587
+ let sumSquares = 0;
588
+ for (const sample of samples) {
589
+ const normalized = sample / 32768;
590
+ sumSquares += normalized * normalized;
591
+ }
592
+ return Math.min(1, Math.max(0, Math.sqrt(sumSquares / samples.length) * 5.5));
593
+ };
540
594
  var downsampleBuffer = (input, sourceRate, targetRate) => {
541
595
  if (sourceRate === targetRate) {
542
596
  return input;
@@ -584,7 +638,9 @@ var createMicrophoneCapture = (options) => {
584
638
  processorNode.onaudioprocess = (event) => {
585
639
  const channel = event.inputBuffer.getChannelData(0);
586
640
  const downsampled = downsampleBuffer(channel, audioContext?.sampleRate ?? 48000, options.sampleRateHz ?? 16000);
587
- options.onAudio(floatTo16BitPCM(downsampled));
641
+ const pcm = floatTo16BitPCM(downsampled);
642
+ options.onLevel?.(getPcmLevel(pcm));
643
+ options.onAudio(pcm);
588
644
  };
589
645
  sourceNode.connect(processorNode);
590
646
  processorNode.connect(audioContext.destination);
@@ -594,6 +650,7 @@ var createMicrophoneCapture = (options) => {
594
650
  sourceNode?.disconnect();
595
651
  mediaStream?.getTracks().forEach((track) => track.stop());
596
652
  audioContext?.close();
653
+ options.onLevel?.(0);
597
654
  audioContext = null;
598
655
  mediaStream = null;
599
656
  processorNode = null;
@@ -601,8 +658,434 @@ var createMicrophoneCapture = (options) => {
601
658
  };
602
659
  return { start, stop };
603
660
  };
661
+
662
+ // src/audioConditioning.ts
663
+ var DEFAULT_TARGET_LEVEL = 0.08;
664
+ var DEFAULT_MAX_GAIN = 3;
665
+ var DEFAULT_NOISE_GATE_THRESHOLD = 0.006;
666
+ var DEFAULT_NOISE_GATE_ATTENUATION = 0.15;
667
+ var toInt16Array = (audio) => {
668
+ if (audio instanceof ArrayBuffer) {
669
+ return new Int16Array(audio, 0, Math.floor(audio.byteLength / 2));
670
+ }
671
+ return new Int16Array(audio.buffer, audio.byteOffset, Math.floor(audio.byteLength / 2));
672
+ };
673
+ var computeRms = (samples) => {
674
+ if (samples.length === 0) {
675
+ return 0;
676
+ }
677
+ let sumSquares = 0;
678
+ for (const sample of samples) {
679
+ const normalized = sample / 32768;
680
+ sumSquares += normalized * normalized;
681
+ }
682
+ return Math.sqrt(sumSquares / samples.length);
683
+ };
684
+ var resolveAudioConditioningConfig = (config) => {
685
+ if (!config || config.enabled === false) {
686
+ return;
687
+ }
688
+ return {
689
+ enabled: true,
690
+ maxGain: config.maxGain ?? DEFAULT_MAX_GAIN,
691
+ noiseGateAttenuation: config.noiseGateAttenuation ?? DEFAULT_NOISE_GATE_ATTENUATION,
692
+ noiseGateThreshold: config.noiseGateThreshold ?? DEFAULT_NOISE_GATE_THRESHOLD,
693
+ targetLevel: config.targetLevel ?? DEFAULT_TARGET_LEVEL
694
+ };
695
+ };
696
+ var conditionAudioChunk = (audio, config) => {
697
+ if (!config) {
698
+ return audio;
699
+ }
700
+ const source = toInt16Array(audio);
701
+ if (source.length === 0) {
702
+ return audio;
703
+ }
704
+ const rms = computeRms(source);
705
+ const output = new Int16Array(source.length);
706
+ const gateFactor = rms < config.noiseGateThreshold ? config.noiseGateAttenuation : 1;
707
+ const baseLevel = Math.max(rms * gateFactor, 0.000001);
708
+ const gain = Math.min(config.maxGain, config.targetLevel / baseLevel);
709
+ const appliedGain = Math.max(0.25, gain) * gateFactor;
710
+ for (let index = 0;index < source.length; index += 1) {
711
+ const next = Math.round(source[index] * appliedGain);
712
+ output[index] = Math.max(-32768, Math.min(32767, next));
713
+ }
714
+ return new Uint8Array(output.buffer);
715
+ };
716
+
717
+ // src/turnProfiles.ts
718
+ var TURN_PROFILE_DEFAULTS = {
719
+ balanced: {
720
+ qualityProfile: "general",
721
+ silenceMs: 1400,
722
+ speechThreshold: 0.012,
723
+ transcriptStabilityMs: 1000
724
+ },
725
+ fast: {
726
+ qualityProfile: "general",
727
+ silenceMs: 700,
728
+ speechThreshold: 0.015,
729
+ transcriptStabilityMs: 450
730
+ },
731
+ "long-form": {
732
+ qualityProfile: "general",
733
+ silenceMs: 2200,
734
+ speechThreshold: 0.01,
735
+ transcriptStabilityMs: 1500
736
+ }
737
+ };
738
+ var QUALITY_PROFILE_DEFAULTS = {
739
+ general: {},
740
+ "accent-heavy": {
741
+ silenceMs: 1200,
742
+ speechThreshold: 0.01,
743
+ transcriptStabilityMs: 1200
744
+ },
745
+ "noisy-room": {
746
+ silenceMs: 2000,
747
+ speechThreshold: 0.02,
748
+ transcriptStabilityMs: 1600
749
+ },
750
+ "short-command": {
751
+ silenceMs: 500,
752
+ speechThreshold: 0.016,
753
+ transcriptStabilityMs: 420
754
+ }
755
+ };
756
+ var DEFAULT_TURN_PROFILE = "fast";
757
+ var DEFAULT_QUALITY_PROFILE = "general";
758
+ var resolveTurnDetectionConfig = (config) => {
759
+ const profile = config?.profile ?? DEFAULT_TURN_PROFILE;
760
+ const qualityProfile = config?.qualityProfile ?? DEFAULT_QUALITY_PROFILE;
761
+ const preset = TURN_PROFILE_DEFAULTS[profile];
762
+ const quality = QUALITY_PROFILE_DEFAULTS[qualityProfile];
763
+ return {
764
+ profile,
765
+ qualityProfile,
766
+ silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
767
+ speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
768
+ transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
769
+ };
770
+ };
771
+
772
+ // src/presets.ts
773
+ var PRESET_INPUTS = {
774
+ chat: {
775
+ audioConditioning: {
776
+ enabled: true,
777
+ maxGain: 2.5,
778
+ noiseGateAttenuation: 0,
779
+ noiseGateThreshold: 0.004,
780
+ targetLevel: 0.08
781
+ },
782
+ capture: {
783
+ channelCount: 1,
784
+ sampleRateHz: 16000
785
+ },
786
+ connection: {
787
+ maxReconnectAttempts: 10,
788
+ pingInterval: 30000,
789
+ reconnect: true
790
+ },
791
+ sttLifecycle: "continuous",
792
+ turnDetection: {
793
+ qualityProfile: "short-command",
794
+ profile: "balanced"
795
+ }
796
+ },
797
+ default: {
798
+ capture: {
799
+ channelCount: 1,
800
+ sampleRateHz: 16000
801
+ },
802
+ connection: {
803
+ maxReconnectAttempts: 10,
804
+ pingInterval: 30000,
805
+ reconnect: true
806
+ },
807
+ sttLifecycle: "continuous",
808
+ turnDetection: {
809
+ qualityProfile: "general",
810
+ profile: "fast"
811
+ }
812
+ },
813
+ dictation: {
814
+ audioConditioning: {
815
+ enabled: true,
816
+ maxGain: 2.25,
817
+ noiseGateAttenuation: 0.05,
818
+ noiseGateThreshold: 0.003,
819
+ targetLevel: 0.08
820
+ },
821
+ capture: {
822
+ channelCount: 1,
823
+ sampleRateHz: 16000
824
+ },
825
+ connection: {
826
+ maxReconnectAttempts: 12,
827
+ pingInterval: 30000,
828
+ reconnect: true
829
+ },
830
+ sttLifecycle: "continuous",
831
+ turnDetection: {
832
+ qualityProfile: "accent-heavy",
833
+ profile: "long-form"
834
+ }
835
+ },
836
+ "guided-intake": {
837
+ audioConditioning: {
838
+ enabled: true,
839
+ maxGain: 2.5,
840
+ noiseGateAttenuation: 0,
841
+ noiseGateThreshold: 0.004,
842
+ targetLevel: 0.08
843
+ },
844
+ capture: {
845
+ channelCount: 1,
846
+ sampleRateHz: 16000
847
+ },
848
+ connection: {
849
+ maxReconnectAttempts: 12,
850
+ pingInterval: 30000,
851
+ reconnect: true
852
+ },
853
+ sttLifecycle: "turn-scoped",
854
+ turnDetection: {
855
+ qualityProfile: "accent-heavy",
856
+ profile: "long-form"
857
+ }
858
+ },
859
+ "noisy-room": {
860
+ audioConditioning: {
861
+ enabled: true,
862
+ maxGain: 3,
863
+ noiseGateAttenuation: 0.12,
864
+ noiseGateThreshold: 0.006,
865
+ targetLevel: 0.085
866
+ },
867
+ capture: {
868
+ channelCount: 1,
869
+ sampleRateHz: 16000
870
+ },
871
+ connection: {
872
+ maxReconnectAttempts: 14,
873
+ pingInterval: 45000,
874
+ reconnect: true
875
+ },
876
+ sttLifecycle: "continuous",
877
+ turnDetection: {
878
+ qualityProfile: "noisy-room",
879
+ profile: "long-form",
880
+ silenceMs: 2100,
881
+ speechThreshold: 0.02,
882
+ transcriptStabilityMs: 1650
883
+ }
884
+ },
885
+ reliability: {
886
+ audioConditioning: {
887
+ enabled: true,
888
+ maxGain: 2.9,
889
+ noiseGateAttenuation: 0.08,
890
+ noiseGateThreshold: 0.005,
891
+ targetLevel: 0.08
892
+ },
893
+ capture: {
894
+ channelCount: 1,
895
+ sampleRateHz: 16000
896
+ },
897
+ connection: {
898
+ maxReconnectAttempts: 14,
899
+ pingInterval: 45000,
900
+ reconnect: true
901
+ },
902
+ sttLifecycle: "continuous",
903
+ turnDetection: {
904
+ qualityProfile: "noisy-room",
905
+ profile: "long-form"
906
+ }
907
+ }
908
+ };
909
+ var resolveVoiceRuntimePreset = (name = "default") => {
910
+ const preset = PRESET_INPUTS[name];
911
+ return {
912
+ audioConditioning: resolveAudioConditioningConfig(preset.audioConditioning),
913
+ capture: {
914
+ channelCount: preset.capture?.channelCount ?? 1,
915
+ sampleRateHz: preset.capture?.sampleRateHz ?? 16000
916
+ },
917
+ connection: {
918
+ ...preset.connection
919
+ },
920
+ name,
921
+ sttLifecycle: preset.sttLifecycle ?? "continuous",
922
+ turnDetection: resolveTurnDetectionConfig(preset.turnDetection)
923
+ };
924
+ };
925
+
926
+ // src/client/controller.ts
927
+ var createInitialState2 = (stream) => ({
928
+ assistantTexts: [...stream.assistantTexts],
929
+ error: stream.error,
930
+ isConnected: stream.isConnected,
931
+ isRecording: false,
932
+ partial: stream.partial,
933
+ recordingError: null,
934
+ sessionId: stream.sessionId,
935
+ scenarioId: stream.scenarioId,
936
+ status: stream.status,
937
+ turns: [...stream.turns]
938
+ });
939
+ var createVoiceController = (path, options = {}) => {
940
+ const preset = resolveVoiceRuntimePreset(options.preset);
941
+ const stream = createVoiceStream(path, {
942
+ ...preset.connection,
943
+ ...options.connection
944
+ });
945
+ let capture = null;
946
+ let state = createInitialState2(stream);
947
+ const subscribers = new Set;
948
+ const notify = () => {
949
+ for (const subscriber of subscribers) {
950
+ subscriber();
951
+ }
952
+ };
953
+ const sync = () => {
954
+ state = {
955
+ ...state,
956
+ assistantTexts: [...stream.assistantTexts],
957
+ error: stream.error,
958
+ isConnected: stream.isConnected,
959
+ partial: stream.partial,
960
+ sessionId: stream.sessionId,
961
+ scenarioId: stream.scenarioId,
962
+ status: stream.status,
963
+ turns: [...stream.turns]
964
+ };
965
+ if (options.autoStopOnComplete !== false && state.status === "completed" && state.isRecording) {
966
+ capture?.stop();
967
+ capture = null;
968
+ state = {
969
+ ...state,
970
+ isRecording: false
971
+ };
972
+ }
973
+ notify();
974
+ };
975
+ const unsubscribeStream = stream.subscribe(sync);
976
+ sync();
977
+ const ensureCapture = () => {
978
+ if (capture) {
979
+ return capture;
980
+ }
981
+ capture = createMicrophoneCapture({
982
+ channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
983
+ onLevel: options.capture?.onLevel,
984
+ onAudio: (audio) => stream.sendAudio(audio),
985
+ sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
986
+ });
987
+ return capture;
988
+ };
989
+ const stopRecording = () => {
990
+ capture?.stop();
991
+ capture = null;
992
+ state = {
993
+ ...state,
994
+ isRecording: false
995
+ };
996
+ notify();
997
+ };
998
+ const startRecording = async () => {
999
+ if (state.isRecording) {
1000
+ return;
1001
+ }
1002
+ try {
1003
+ state = {
1004
+ ...state,
1005
+ recordingError: null
1006
+ };
1007
+ notify();
1008
+ await ensureCapture().start();
1009
+ state = {
1010
+ ...state,
1011
+ isRecording: true
1012
+ };
1013
+ notify();
1014
+ } catch (error) {
1015
+ capture = null;
1016
+ state = {
1017
+ ...state,
1018
+ isRecording: false,
1019
+ recordingError: error instanceof Error ? error.message : String(error)
1020
+ };
1021
+ notify();
1022
+ throw error;
1023
+ }
1024
+ };
1025
+ const close = () => {
1026
+ unsubscribeStream();
1027
+ stopRecording();
1028
+ stream.close();
1029
+ };
1030
+ return {
1031
+ bindHTMX(bindingOptions) {
1032
+ return bindVoiceHTMX(stream, bindingOptions);
1033
+ },
1034
+ close,
1035
+ endTurn: () => stream.endTurn(),
1036
+ get error() {
1037
+ return state.error;
1038
+ },
1039
+ getServerSnapshot: () => state,
1040
+ getSnapshot: () => state,
1041
+ get isConnected() {
1042
+ return state.isConnected;
1043
+ },
1044
+ get isRecording() {
1045
+ return state.isRecording;
1046
+ },
1047
+ get partial() {
1048
+ return state.partial;
1049
+ },
1050
+ get recordingError() {
1051
+ return state.recordingError;
1052
+ },
1053
+ sendAudio: (audio) => stream.sendAudio(audio),
1054
+ get sessionId() {
1055
+ return state.sessionId;
1056
+ },
1057
+ get scenarioId() {
1058
+ return state.scenarioId;
1059
+ },
1060
+ startRecording,
1061
+ get status() {
1062
+ return state.status;
1063
+ },
1064
+ stopRecording,
1065
+ subscribe: (subscriber) => {
1066
+ subscribers.add(subscriber);
1067
+ return () => {
1068
+ subscribers.delete(subscriber);
1069
+ };
1070
+ },
1071
+ toggleRecording: async () => {
1072
+ if (state.isRecording) {
1073
+ stopRecording();
1074
+ return;
1075
+ }
1076
+ await startRecording();
1077
+ },
1078
+ get turns() {
1079
+ return state.turns;
1080
+ },
1081
+ get assistantTexts() {
1082
+ return state.assistantTexts;
1083
+ }
1084
+ };
1085
+ };
604
1086
  export {
605
1087
  createVoiceStream,
1088
+ createVoiceController,
606
1089
  createVoiceConnection,
607
1090
  createMicrophoneCapture,
608
1091
  bindVoiceHTMX
@@ -1,7 +1,9 @@
1
+ import type { VoiceCaptureOptions } from '../types';
1
2
  type MicrophoneCaptureOptions = {
2
- channelCount?: 1 | 2;
3
+ channelCount?: VoiceCaptureOptions['channelCount'];
4
+ onLevel?: VoiceCaptureOptions['onLevel'];
3
5
  onAudio: (audio: Uint8Array) => void;
4
- sampleRateHz?: number;
6
+ sampleRateHz?: VoiceCaptureOptions['sampleRateHz'];
5
7
  };
6
8
  type MicrophoneCapture = {
7
9
  start: () => Promise<void>;
@@ -0,0 +1,16 @@
1
+ import type { VoicePhraseHint, VoiceTurnCorrectionHandler } from './types';
2
+ export type VoicePhraseHintCorrectionMatch = {
3
+ alias: string;
4
+ hint: VoicePhraseHint;
5
+ };
6
+ export type VoicePhraseHintCorrectionResult = {
7
+ changed: boolean;
8
+ matches: VoicePhraseHintCorrectionMatch[];
9
+ text: string;
10
+ };
11
+ export type VoicePhraseHintCorrectionOptions = {
12
+ provider?: string;
13
+ reason?: string;
14
+ };
15
+ export declare const applyPhraseHintCorrections: (text: string, phraseHints: VoicePhraseHint[]) => VoicePhraseHintCorrectionResult;
16
+ export declare const createPhraseHintCorrectionHandler: (options?: VoicePhraseHintCorrectionOptions) => VoiceTurnCorrectionHandler;
package/dist/index.d.ts CHANGED
@@ -2,4 +2,8 @@ export { voice } from './plugin';
2
2
  export { createVoiceMemoryStore } from './memoryStore';
3
3
  export { createVoiceSession } from './session';
4
4
  export { createId, createVoiceSessionRecord } from './store';
5
+ export { applyPhraseHintCorrections, createPhraseHintCorrectionHandler } from './correction';
6
+ export { conditionAudioChunk, resolveAudioConditioningConfig } from './audioConditioning';
7
+ export { resolveVoiceRuntimePreset } from './presets';
8
+ export { resolveTurnDetectionConfig, TURN_PROFILE_DEFAULTS } from './turnProfiles';
5
9
  export * from './types';