@absolutejs/voice 0.0.20 → 0.0.22-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +884 -4
  2. package/dist/angular/index.d.ts +1 -0
  3. package/dist/angular/index.js +759 -3
  4. package/dist/angular/voice-controller.service.d.ts +27 -0
  5. package/dist/angular/voice-stream.service.d.ts +6 -0
  6. package/dist/audioConditioning.d.ts +3 -0
  7. package/dist/client/actions.d.ts +48 -0
  8. package/dist/client/audioPlayer.d.ts +40 -0
  9. package/dist/client/connection.d.ts +5 -0
  10. package/dist/client/controller.d.ts +2 -0
  11. package/dist/client/duplex.d.ts +3 -0
  12. package/dist/client/htmxBootstrap.js +660 -167
  13. package/dist/client/index.d.ts +3 -0
  14. package/dist/client/index.js +991 -6
  15. package/dist/client/microphone.d.ts +4 -2
  16. package/dist/correction.d.ts +33 -0
  17. package/dist/fileStore.d.ts +27 -0
  18. package/dist/index.d.ts +15 -0
  19. package/dist/index.js +3721 -298
  20. package/dist/ops.d.ts +100 -0
  21. package/dist/presets.d.ts +13 -0
  22. package/dist/react/index.d.ts +1 -0
  23. package/dist/react/index.js +728 -3
  24. package/dist/react/useVoiceController.d.ts +26 -0
  25. package/dist/react/useVoiceStream.d.ts +7 -0
  26. package/dist/routing.d.ts +3 -0
  27. package/dist/runtimeOps.d.ts +23 -0
  28. package/dist/store.d.ts +2 -2
  29. package/dist/svelte/index.d.ts +1 -0
  30. package/dist/svelte/index.js +691 -3
  31. package/dist/telephony/response.d.ts +7 -0
  32. package/dist/telephony/twilio.d.ts +116 -0
  33. package/dist/testing/benchmark.d.ts +93 -2
  34. package/dist/testing/corrected.d.ts +41 -0
  35. package/dist/testing/duplex.d.ts +59 -0
  36. package/dist/testing/fixtures.d.ts +18 -2
  37. package/dist/testing/index.d.ts +5 -0
  38. package/dist/testing/index.js +6247 -402
  39. package/dist/testing/review.d.ts +143 -0
  40. package/dist/testing/sessionBenchmark.d.ts +92 -2
  41. package/dist/testing/stt.d.ts +3 -1
  42. package/dist/testing/telephony.d.ts +70 -0
  43. package/dist/testing/tts.d.ts +73 -0
  44. package/dist/turnDetection.d.ts +5 -1
  45. package/dist/turnProfiles.d.ts +6 -0
  46. package/dist/types.d.ts +487 -10
  47. package/dist/vue/index.d.ts +1 -0
  48. package/dist/vue/index.js +750 -3
  49. package/dist/vue/useVoiceController.d.ts +30 -0
  50. package/dist/vue/useVoiceStream.d.ts +11 -0
  51. package/fixtures/README.md +9 -0
  52. package/fixtures/manifest.json +59 -1
  53. package/fixtures/pcm/dialogue-three-clean.pcm +0 -0
  54. package/fixtures/pcm/dialogue-three-mixed.pcm +0 -0
  55. package/fixtures/pcm/dialogue-two-clean.pcm +0 -0
  56. package/fixtures/pcm/dialogue-two-noisy.pcm +0 -0
  57. package/package.json +135 -1
@@ -102,6 +102,14 @@ var normalizeErrorMessage = (value) => {
102
102
  };
103
103
  var serverMessageToAction = (message) => {
104
104
  switch (message.type) {
105
+ case "audio":
106
+ return {
107
+ chunk: Uint8Array.from(atob(message.chunkBase64), (char) => char.charCodeAt(0)),
108
+ format: message.format,
109
+ receivedAt: message.receivedAt,
110
+ turnId: message.turnId,
111
+ type: "audio"
112
+ };
105
113
  case "assistant":
106
114
  return {
107
115
  text: message.text,
@@ -130,6 +138,7 @@ var serverMessageToAction = (message) => {
130
138
  case "session":
131
139
  return {
132
140
  sessionId: message.sessionId,
141
+ scenarioId: message.scenarioId,
133
142
  status: message.status,
134
143
  type: "session"
135
144
  };
@@ -150,24 +159,30 @@ var WS_NORMAL_CLOSURE = 1000;
150
159
  var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
151
160
  var DEFAULT_PING_INTERVAL = 30000;
152
161
  var RECONNECT_DELAY_MS = 500;
162
+ var DEFAULT_SCENARIO_QUERY_PARAM = "scenarioId";
153
163
  var noop = () => {};
154
164
  var noopUnsubscribe = () => noop;
155
165
  var NOOP_CONNECTION = {
166
+ start: () => {},
156
167
  close: noop,
157
168
  endTurn: noop,
158
169
  getReadyState: () => WS_CLOSED,
170
+ getScenarioId: () => "",
159
171
  getSessionId: () => "",
160
172
  send: noop,
161
173
  sendAudio: noop,
162
174
  subscribe: noopUnsubscribe
163
175
  };
164
176
  var createSessionId = () => crypto.randomUUID();
165
- var buildWsUrl = (path, sessionId) => {
177
+ var buildWsUrl = (path, sessionId, scenarioId) => {
166
178
  const { hostname, port, protocol } = window.location;
167
179
  const wsProtocol = protocol === "https:" ? "wss:" : "ws:";
168
180
  const portSuffix = port ? `:${port}` : "";
169
181
  const url = new URL(`${wsProtocol}//${hostname}${portSuffix}${path}`);
170
182
  url.searchParams.set("sessionId", sessionId);
183
+ if (scenarioId) {
184
+ url.searchParams.set(DEFAULT_SCENARIO_QUERY_PARAM, scenarioId);
185
+ }
171
186
  return url.toString();
172
187
  };
173
188
  var isVoiceServerMessage = (value) => {
@@ -175,6 +190,7 @@ var isVoiceServerMessage = (value) => {
175
190
  return false;
176
191
  }
177
192
  switch (value.type) {
193
+ case "audio":
178
194
  case "assistant":
179
195
  case "complete":
180
196
  case "error":
@@ -210,6 +226,7 @@ var createVoiceConnection = (path, options = {}) => {
210
226
  const state = {
211
227
  isConnected: false,
212
228
  pendingMessages: [],
229
+ scenarioId: options.scenarioId ?? null,
213
230
  pingInterval: null,
214
231
  reconnectAttempts: 0,
215
232
  reconnectTimeout: null,
@@ -247,13 +264,14 @@ var createVoiceConnection = (path, options = {}) => {
247
264
  }, RECONNECT_DELAY_MS);
248
265
  };
249
266
  const connect = () => {
250
- const ws = new WebSocket(buildWsUrl(path, state.sessionId));
267
+ const ws = new WebSocket(buildWsUrl(path, state.sessionId, state.scenarioId));
251
268
  ws.binaryType = "arraybuffer";
252
269
  ws.onopen = () => {
253
270
  state.isConnected = true;
254
271
  state.reconnectAttempts = 0;
255
272
  flushPendingMessages();
256
273
  listeners.forEach((listener) => listener({
274
+ scenarioId: state.scenarioId ?? undefined,
257
275
  sessionId: state.sessionId,
258
276
  status: "active",
259
277
  type: "session"
@@ -271,6 +289,7 @@ var createVoiceConnection = (path, options = {}) => {
271
289
  }
272
290
  if (parsed.type === "session") {
273
291
  state.sessionId = parsed.sessionId;
292
+ state.scenarioId = parsed.scenarioId ?? state.scenarioId;
274
293
  }
275
294
  listeners.forEach((listener) => listener(parsed));
276
295
  };
@@ -294,6 +313,19 @@ var createVoiceConnection = (path, options = {}) => {
294
313
  const send = (message) => {
295
314
  sendSerialized(JSON.stringify(message));
296
315
  };
316
+ const start = (input = {}) => {
317
+ if (input.sessionId) {
318
+ state.sessionId = input.sessionId;
319
+ }
320
+ if (input.scenarioId) {
321
+ state.scenarioId = input.scenarioId;
322
+ }
323
+ send({
324
+ type: "start",
325
+ sessionId: state.sessionId,
326
+ scenarioId: state.scenarioId ?? undefined
327
+ });
328
+ };
297
329
  const sendAudio = (audio) => {
298
330
  sendSerialized(audio);
299
331
  };
@@ -317,9 +349,11 @@ var createVoiceConnection = (path, options = {}) => {
317
349
  };
318
350
  connect();
319
351
  return {
352
+ start,
320
353
  close,
321
354
  endTurn,
322
355
  getReadyState: () => state.ws?.readyState ?? WS_CLOSED,
356
+ getScenarioId: () => state.scenarioId ?? "",
323
357
  getSessionId: () => state.sessionId,
324
358
  send,
325
359
  sendAudio,
@@ -329,9 +363,11 @@ var createVoiceConnection = (path, options = {}) => {
329
363
 
330
364
  // src/client/store.ts
331
365
  var createInitialState = () => ({
366
+ assistantAudio: [],
332
367
  assistantTexts: [],
333
368
  error: null,
334
369
  isConnected: false,
370
+ scenarioId: null,
335
371
  partial: "",
336
372
  sessionId: null,
337
373
  status: "idle",
@@ -345,6 +381,20 @@ var createVoiceStreamStore = () => {
345
381
  };
346
382
  const dispatch = (action) => {
347
383
  switch (action.type) {
384
+ case "audio":
385
+ state = {
386
+ ...state,
387
+ assistantAudio: [
388
+ ...state.assistantAudio,
389
+ {
390
+ chunk: action.chunk,
391
+ format: action.format,
392
+ receivedAt: action.receivedAt,
393
+ turnId: action.turnId
394
+ }
395
+ ]
396
+ };
397
+ break;
348
398
  case "assistant":
349
399
  state = {
350
400
  ...state,
@@ -393,6 +443,7 @@ var createVoiceStreamStore = () => {
393
443
  state = {
394
444
  ...state,
395
445
  error: null,
446
+ scenarioId: action.scenarioId ?? state.scenarioId,
396
447
  isConnected: action.status === "active",
397
448
  sessionId: action.sessionId,
398
449
  status: action.status
@@ -426,6 +477,12 @@ var createVoiceStream = (path, options = {}) => {
426
477
  const connection = createVoiceConnection(path, options);
427
478
  const store = createVoiceStreamStore();
428
479
  const subscribers = new Set;
480
+ const start = (input) => Promise.resolve().then(() => {
481
+ if (!input?.sessionId && !input?.scenarioId) {
482
+ return;
483
+ }
484
+ connection.start(input);
485
+ });
429
486
  const notify = () => {
430
487
  subscribers.forEach((subscriber) => subscriber());
431
488
  };
@@ -458,6 +515,10 @@ var createVoiceStream = (path, options = {}) => {
458
515
  get isConnected() {
459
516
  return store.getSnapshot().isConnected;
460
517
  },
518
+ get scenarioId() {
519
+ return store.getSnapshot().scenarioId;
520
+ },
521
+ start,
461
522
  get partial() {
462
523
  return store.getSnapshot().partial;
463
524
  },
@@ -473,6 +534,9 @@ var createVoiceStream = (path, options = {}) => {
473
534
  get assistantTexts() {
474
535
  return store.getSnapshot().assistantTexts;
475
536
  },
537
+ get assistantAudio() {
538
+ return store.getSnapshot().assistantAudio;
539
+ },
476
540
  sendAudio(audio) {
477
541
  connection.sendAudio(audio);
478
542
  },
@@ -494,6 +558,7 @@ var _init = __decoratorStart(undefined);
494
558
  class VoiceStreamService {
495
559
  connect(path, options = {}) {
496
560
  const stream = createVoiceStream(path, options);
561
+ const assistantAudioSignal = signal([]);
497
562
  const assistantTextsSignal = signal([]);
498
563
  const errorSignal = signal(null);
499
564
  const isConnectedSignal = signal(false);
@@ -502,6 +567,7 @@ class VoiceStreamService {
502
567
  const statusSignal = signal(stream.status);
503
568
  const turnsSignal = signal([]);
504
569
  const sync = () => {
570
+ assistantAudioSignal.set([...stream.assistantAudio]);
505
571
  assistantTextsSignal.set([...stream.assistantTexts]);
506
572
  errorSignal.set(stream.error);
507
573
  isConnectedSignal.set(stream.isConnected);
@@ -513,6 +579,7 @@ class VoiceStreamService {
513
579
  const unsubscribe = stream.subscribe(sync);
514
580
  sync();
515
581
  return {
582
+ assistantAudio: computed(() => assistantAudioSignal()),
516
583
  assistantTexts: computed(() => assistantTextsSignal()),
517
584
  close: () => {
518
585
  unsubscribe();
@@ -533,6 +600,695 @@ VoiceStreamService = __decorateElement(_init, 0, "VoiceStreamService", _dec, Voi
533
600
  __runInitializers(_init, 1, VoiceStreamService);
534
601
  __decoratorMetadata(_init, VoiceStreamService);
535
602
  let _VoiceStreamService = VoiceStreamService;
603
+ // src/angular/voice-controller.service.ts
604
+ import { computed as computed2, Injectable as Injectable2, signal as signal2 } from "@angular/core";
605
+
606
+ // src/client/htmx.ts
607
+ var DEFAULT_EVENT_NAME = "voice-refresh";
608
+ var DEFAULT_QUERY_PARAM = "sessionId";
609
+ var resolveElement = (input) => {
610
+ if (typeof input !== "string") {
611
+ return input;
612
+ }
613
+ return document.querySelector(input);
614
+ };
615
+ var buildRoute = (element, route, queryParam, sessionId) => {
616
+ const baseRoute = route ?? element.getAttribute("hx-get") ?? "";
617
+ if (!baseRoute) {
618
+ return "";
619
+ }
620
+ const url = new URL(baseRoute, window.location.origin);
621
+ if (sessionId) {
622
+ url.searchParams.set(queryParam, sessionId);
623
+ } else {
624
+ url.searchParams.delete(queryParam);
625
+ }
626
+ return `${url.pathname}${url.search}${url.hash}`;
627
+ };
628
+ var bindVoiceHTMX = (stream, options) => {
629
+ if (typeof window === "undefined" || typeof document === "undefined") {
630
+ return () => {};
631
+ }
632
+ const element = resolveElement(options.element);
633
+ if (!element) {
634
+ return () => {};
635
+ }
636
+ const eventName = options.eventName ?? DEFAULT_EVENT_NAME;
637
+ const queryParam = options.sessionQueryParam ?? DEFAULT_QUERY_PARAM;
638
+ const sync = () => {
639
+ const htmxWindow = window;
640
+ const nextRoute = buildRoute(element, options.route, queryParam, stream.sessionId);
641
+ if (nextRoute) {
642
+ element.setAttribute("hx-get", nextRoute);
643
+ }
644
+ htmxWindow.htmx?.process?.(element);
645
+ htmxWindow.htmx?.trigger?.(element, eventName);
646
+ };
647
+ const unsubscribe = stream.subscribe(sync);
648
+ sync();
649
+ return () => {
650
+ unsubscribe();
651
+ };
652
+ };
653
+
654
+ // src/client/microphone.ts
655
+ var clampSample = (value) => Math.max(-1, Math.min(1, value));
656
+ var floatTo16BitPCM = (input) => {
657
+ const output = new Int16Array(input.length);
658
+ for (let index = 0;index < input.length; index += 1) {
659
+ const sample = clampSample(input[index] ?? 0);
660
+ output[index] = sample < 0 ? sample * 32768 : sample * 32767;
661
+ }
662
+ return new Uint8Array(output.buffer);
663
+ };
664
+ var getPcmLevel = (audio) => {
665
+ const bytes = audio instanceof Uint8Array ? audio : new Uint8Array(audio);
666
+ if (bytes.byteLength < 2) {
667
+ return 0;
668
+ }
669
+ const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
670
+ if (samples.length === 0) {
671
+ return 0;
672
+ }
673
+ let sumSquares = 0;
674
+ for (const sample of samples) {
675
+ const normalized = sample / 32768;
676
+ sumSquares += normalized * normalized;
677
+ }
678
+ return Math.min(1, Math.max(0, Math.sqrt(sumSquares / samples.length) * 5.5));
679
+ };
680
+ var downsampleBuffer = (input, sourceRate, targetRate) => {
681
+ if (sourceRate === targetRate) {
682
+ return input;
683
+ }
684
+ const ratio = sourceRate / targetRate;
685
+ const length = Math.round(input.length / ratio);
686
+ const output = new Float32Array(length);
687
+ let offsetResult = 0;
688
+ let offsetBuffer = 0;
689
+ while (offsetResult < output.length) {
690
+ const nextOffsetBuffer = Math.round((offsetResult + 1) * ratio);
691
+ let accum = 0;
692
+ let count = 0;
693
+ for (let index = offsetBuffer;index < nextOffsetBuffer && index < input.length; index += 1) {
694
+ accum += input[index] ?? 0;
695
+ count += 1;
696
+ }
697
+ output[offsetResult] = count > 0 ? accum / count : 0;
698
+ offsetResult += 1;
699
+ offsetBuffer = nextOffsetBuffer;
700
+ }
701
+ return output;
702
+ };
703
+ var createMicrophoneCapture = (options) => {
704
+ let audioContext = null;
705
+ let sourceNode = null;
706
+ let processorNode = null;
707
+ let mediaStream = null;
708
+ const start = async () => {
709
+ if (typeof navigator === "undefined" || !navigator.mediaDevices?.getUserMedia) {
710
+ throw new Error("Browser microphone capture requires navigator.mediaDevices.getUserMedia.");
711
+ }
712
+ const AudioContextCtor = (typeof window !== "undefined" ? window.AudioContext ?? window.webkitAudioContext : undefined) ?? AudioContext;
713
+ if (!AudioContextCtor) {
714
+ throw new Error("Browser microphone capture requires AudioContext support.");
715
+ }
716
+ mediaStream = await navigator.mediaDevices.getUserMedia({
717
+ audio: {
718
+ channelCount: options.channelCount ?? 1
719
+ }
720
+ });
721
+ audioContext = new AudioContextCtor;
722
+ sourceNode = audioContext.createMediaStreamSource(mediaStream);
723
+ processorNode = audioContext.createScriptProcessor(4096, 1, 1);
724
+ processorNode.onaudioprocess = (event) => {
725
+ const channel = event.inputBuffer.getChannelData(0);
726
+ const downsampled = downsampleBuffer(channel, audioContext?.sampleRate ?? 48000, options.sampleRateHz ?? 16000);
727
+ const pcm = floatTo16BitPCM(downsampled);
728
+ options.onLevel?.(getPcmLevel(pcm));
729
+ options.onAudio(pcm);
730
+ };
731
+ sourceNode.connect(processorNode);
732
+ processorNode.connect(audioContext.destination);
733
+ };
734
+ const stop = () => {
735
+ processorNode?.disconnect();
736
+ sourceNode?.disconnect();
737
+ mediaStream?.getTracks().forEach((track) => track.stop());
738
+ audioContext?.close();
739
+ options.onLevel?.(0);
740
+ audioContext = null;
741
+ mediaStream = null;
742
+ processorNode = null;
743
+ sourceNode = null;
744
+ };
745
+ return { start, stop };
746
+ };
747
+
748
+ // src/audioConditioning.ts
749
+ var DEFAULT_TARGET_LEVEL = 0.08;
750
+ var DEFAULT_MAX_GAIN = 3;
751
+ var DEFAULT_NOISE_GATE_THRESHOLD = 0.006;
752
+ var DEFAULT_NOISE_GATE_ATTENUATION = 0.15;
753
+ var toInt16Array = (audio) => {
754
+ if (audio instanceof ArrayBuffer) {
755
+ return new Int16Array(audio, 0, Math.floor(audio.byteLength / 2));
756
+ }
757
+ return new Int16Array(audio.buffer, audio.byteOffset, Math.floor(audio.byteLength / 2));
758
+ };
759
+ var computeRms = (samples) => {
760
+ if (samples.length === 0) {
761
+ return 0;
762
+ }
763
+ let sumSquares = 0;
764
+ for (const sample of samples) {
765
+ const normalized = sample / 32768;
766
+ sumSquares += normalized * normalized;
767
+ }
768
+ return Math.sqrt(sumSquares / samples.length);
769
+ };
770
+ var resolveAudioConditioningConfig = (config) => {
771
+ if (!config || config.enabled === false) {
772
+ return;
773
+ }
774
+ return {
775
+ enabled: true,
776
+ maxGain: config.maxGain ?? DEFAULT_MAX_GAIN,
777
+ noiseGateAttenuation: config.noiseGateAttenuation ?? DEFAULT_NOISE_GATE_ATTENUATION,
778
+ noiseGateThreshold: config.noiseGateThreshold ?? DEFAULT_NOISE_GATE_THRESHOLD,
779
+ targetLevel: config.targetLevel ?? DEFAULT_TARGET_LEVEL
780
+ };
781
+ };
782
+ var conditionAudioChunk = (audio, config) => {
783
+ if (!config) {
784
+ return audio;
785
+ }
786
+ const source = toInt16Array(audio);
787
+ if (source.length === 0) {
788
+ return audio;
789
+ }
790
+ const rms = computeRms(source);
791
+ const output = new Int16Array(source.length);
792
+ const gateFactor = rms < config.noiseGateThreshold ? config.noiseGateAttenuation : 1;
793
+ const baseLevel = Math.max(rms * gateFactor, 0.000001);
794
+ const gain = Math.min(config.maxGain, config.targetLevel / baseLevel);
795
+ const appliedGain = Math.max(0.25, gain) * gateFactor;
796
+ for (let index = 0;index < source.length; index += 1) {
797
+ const next = Math.round(source[index] * appliedGain);
798
+ output[index] = Math.max(-32768, Math.min(32767, next));
799
+ }
800
+ return new Uint8Array(output.buffer);
801
+ };
802
+
803
+ // src/turnProfiles.ts
804
+ var TURN_PROFILE_DEFAULTS = {
805
+ balanced: {
806
+ qualityProfile: "general",
807
+ silenceMs: 1400,
808
+ speechThreshold: 0.012,
809
+ transcriptStabilityMs: 1000
810
+ },
811
+ fast: {
812
+ qualityProfile: "general",
813
+ silenceMs: 700,
814
+ speechThreshold: 0.015,
815
+ transcriptStabilityMs: 450
816
+ },
817
+ "long-form": {
818
+ qualityProfile: "general",
819
+ silenceMs: 2200,
820
+ speechThreshold: 0.01,
821
+ transcriptStabilityMs: 1500
822
+ }
823
+ };
824
+ var QUALITY_PROFILE_DEFAULTS = {
825
+ general: {},
826
+ "accent-heavy": {
827
+ silenceMs: 1200,
828
+ speechThreshold: 0.01,
829
+ transcriptStabilityMs: 1200
830
+ },
831
+ "noisy-room": {
832
+ silenceMs: 2000,
833
+ speechThreshold: 0.02,
834
+ transcriptStabilityMs: 1600
835
+ },
836
+ "short-command": {
837
+ silenceMs: 500,
838
+ speechThreshold: 0.016,
839
+ transcriptStabilityMs: 420
840
+ }
841
+ };
842
+ var DEFAULT_TURN_PROFILE = "fast";
843
+ var DEFAULT_QUALITY_PROFILE = "general";
844
+ var resolveTurnDetectionConfig = (config) => {
845
+ const profile = config?.profile ?? DEFAULT_TURN_PROFILE;
846
+ const qualityProfile = config?.qualityProfile ?? DEFAULT_QUALITY_PROFILE;
847
+ const preset = TURN_PROFILE_DEFAULTS[profile];
848
+ const quality = QUALITY_PROFILE_DEFAULTS[qualityProfile];
849
+ return {
850
+ profile,
851
+ qualityProfile,
852
+ silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
853
+ speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
854
+ transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
855
+ };
856
+ };
857
+
858
+ // src/presets.ts
859
+ var PRESET_INPUTS = {
860
+ chat: {
861
+ audioConditioning: {
862
+ enabled: true,
863
+ maxGain: 2.5,
864
+ noiseGateAttenuation: 0,
865
+ noiseGateThreshold: 0.004,
866
+ targetLevel: 0.08
867
+ },
868
+ capture: {
869
+ channelCount: 1,
870
+ sampleRateHz: 16000
871
+ },
872
+ connection: {
873
+ maxReconnectAttempts: 10,
874
+ pingInterval: 30000,
875
+ reconnect: true
876
+ },
877
+ sttLifecycle: "continuous",
878
+ turnDetection: {
879
+ qualityProfile: "short-command",
880
+ profile: "balanced"
881
+ }
882
+ },
883
+ default: {
884
+ capture: {
885
+ channelCount: 1,
886
+ sampleRateHz: 16000
887
+ },
888
+ connection: {
889
+ maxReconnectAttempts: 10,
890
+ pingInterval: 30000,
891
+ reconnect: true
892
+ },
893
+ sttLifecycle: "continuous",
894
+ turnDetection: {
895
+ qualityProfile: "general",
896
+ profile: "fast"
897
+ }
898
+ },
899
+ dictation: {
900
+ audioConditioning: {
901
+ enabled: true,
902
+ maxGain: 2.25,
903
+ noiseGateAttenuation: 0.05,
904
+ noiseGateThreshold: 0.003,
905
+ targetLevel: 0.08
906
+ },
907
+ capture: {
908
+ channelCount: 1,
909
+ sampleRateHz: 16000
910
+ },
911
+ connection: {
912
+ maxReconnectAttempts: 12,
913
+ pingInterval: 30000,
914
+ reconnect: true
915
+ },
916
+ sttLifecycle: "continuous",
917
+ turnDetection: {
918
+ qualityProfile: "accent-heavy",
919
+ profile: "long-form"
920
+ }
921
+ },
922
+ "guided-intake": {
923
+ audioConditioning: {
924
+ enabled: true,
925
+ maxGain: 2.5,
926
+ noiseGateAttenuation: 0,
927
+ noiseGateThreshold: 0.004,
928
+ targetLevel: 0.08
929
+ },
930
+ capture: {
931
+ channelCount: 1,
932
+ sampleRateHz: 16000
933
+ },
934
+ connection: {
935
+ maxReconnectAttempts: 12,
936
+ pingInterval: 30000,
937
+ reconnect: true
938
+ },
939
+ sttLifecycle: "turn-scoped",
940
+ turnDetection: {
941
+ qualityProfile: "accent-heavy",
942
+ profile: "long-form"
943
+ }
944
+ },
945
+ "noisy-room": {
946
+ audioConditioning: {
947
+ enabled: true,
948
+ maxGain: 3,
949
+ noiseGateAttenuation: 0.12,
950
+ noiseGateThreshold: 0.006,
951
+ targetLevel: 0.085
952
+ },
953
+ capture: {
954
+ channelCount: 1,
955
+ sampleRateHz: 16000
956
+ },
957
+ connection: {
958
+ maxReconnectAttempts: 14,
959
+ pingInterval: 45000,
960
+ reconnect: true
961
+ },
962
+ sttLifecycle: "continuous",
963
+ turnDetection: {
964
+ qualityProfile: "noisy-room",
965
+ profile: "long-form",
966
+ silenceMs: 2100,
967
+ speechThreshold: 0.02,
968
+ transcriptStabilityMs: 1650
969
+ }
970
+ },
971
+ "pstn-balanced": {
972
+ audioConditioning: {
973
+ enabled: true,
974
+ maxGain: 2.8,
975
+ noiseGateAttenuation: 0.07,
976
+ noiseGateThreshold: 0.005,
977
+ targetLevel: 0.08
978
+ },
979
+ capture: {
980
+ channelCount: 1,
981
+ sampleRateHz: 16000
982
+ },
983
+ connection: {
984
+ maxReconnectAttempts: 14,
985
+ pingInterval: 45000,
986
+ reconnect: true
987
+ },
988
+ sttLifecycle: "continuous",
989
+ turnDetection: {
990
+ qualityProfile: "noisy-room",
991
+ profile: "long-form",
992
+ silenceMs: 660,
993
+ speechThreshold: 0.012,
994
+ transcriptStabilityMs: 300
995
+ }
996
+ },
997
+ "pstn-fast": {
998
+ audioConditioning: {
999
+ enabled: true,
1000
+ maxGain: 2.75,
1001
+ noiseGateAttenuation: 0.06,
1002
+ noiseGateThreshold: 0.005,
1003
+ targetLevel: 0.08
1004
+ },
1005
+ capture: {
1006
+ channelCount: 1,
1007
+ sampleRateHz: 16000
1008
+ },
1009
+ connection: {
1010
+ maxReconnectAttempts: 14,
1011
+ pingInterval: 45000,
1012
+ reconnect: true
1013
+ },
1014
+ sttLifecycle: "continuous",
1015
+ turnDetection: {
1016
+ qualityProfile: "noisy-room",
1017
+ profile: "long-form",
1018
+ silenceMs: 620,
1019
+ speechThreshold: 0.012,
1020
+ transcriptStabilityMs: 280
1021
+ }
1022
+ },
1023
+ reliability: {
1024
+ audioConditioning: {
1025
+ enabled: true,
1026
+ maxGain: 2.9,
1027
+ noiseGateAttenuation: 0.08,
1028
+ noiseGateThreshold: 0.005,
1029
+ targetLevel: 0.08
1030
+ },
1031
+ capture: {
1032
+ channelCount: 1,
1033
+ sampleRateHz: 16000
1034
+ },
1035
+ connection: {
1036
+ maxReconnectAttempts: 14,
1037
+ pingInterval: 45000,
1038
+ reconnect: true
1039
+ },
1040
+ sttLifecycle: "continuous",
1041
+ turnDetection: {
1042
+ qualityProfile: "noisy-room",
1043
+ profile: "long-form"
1044
+ }
1045
+ }
1046
+ };
1047
+ var resolveVoiceRuntimePreset = (name = "default") => {
1048
+ const preset = PRESET_INPUTS[name];
1049
+ return {
1050
+ audioConditioning: resolveAudioConditioningConfig(preset.audioConditioning),
1051
+ capture: {
1052
+ channelCount: preset.capture?.channelCount ?? 1,
1053
+ sampleRateHz: preset.capture?.sampleRateHz ?? 16000
1054
+ },
1055
+ connection: {
1056
+ ...preset.connection
1057
+ },
1058
+ name,
1059
+ sttLifecycle: preset.sttLifecycle ?? "continuous",
1060
+ turnDetection: resolveTurnDetectionConfig(preset.turnDetection)
1061
+ };
1062
+ };
1063
+
1064
+ // src/client/controller.ts
1065
+ var createInitialState2 = (stream) => ({
1066
+ assistantAudio: [...stream.assistantAudio],
1067
+ assistantTexts: [...stream.assistantTexts],
1068
+ error: stream.error,
1069
+ isConnected: stream.isConnected,
1070
+ isRecording: false,
1071
+ partial: stream.partial,
1072
+ recordingError: null,
1073
+ sessionId: stream.sessionId,
1074
+ scenarioId: stream.scenarioId,
1075
+ status: stream.status,
1076
+ turns: [...stream.turns]
1077
+ });
1078
+ var createVoiceController = (path, options = {}) => {
1079
+ const preset = resolveVoiceRuntimePreset(options.preset);
1080
+ const stream = createVoiceStream(path, {
1081
+ ...preset.connection,
1082
+ ...options.connection
1083
+ });
1084
+ let capture = null;
1085
+ let state = createInitialState2(stream);
1086
+ const subscribers = new Set;
1087
+ const notify = () => {
1088
+ for (const subscriber of subscribers) {
1089
+ subscriber();
1090
+ }
1091
+ };
1092
+ const sync = () => {
1093
+ state = {
1094
+ ...state,
1095
+ assistantAudio: [...stream.assistantAudio],
1096
+ assistantTexts: [...stream.assistantTexts],
1097
+ error: stream.error,
1098
+ isConnected: stream.isConnected,
1099
+ partial: stream.partial,
1100
+ sessionId: stream.sessionId,
1101
+ scenarioId: stream.scenarioId,
1102
+ status: stream.status,
1103
+ turns: [...stream.turns]
1104
+ };
1105
+ if (options.autoStopOnComplete !== false && state.status === "completed" && state.isRecording) {
1106
+ capture?.stop();
1107
+ capture = null;
1108
+ state = {
1109
+ ...state,
1110
+ isRecording: false
1111
+ };
1112
+ }
1113
+ notify();
1114
+ };
1115
+ const unsubscribeStream = stream.subscribe(sync);
1116
+ sync();
1117
+ const ensureCapture = () => {
1118
+ if (capture) {
1119
+ return capture;
1120
+ }
1121
+ capture = createMicrophoneCapture({
1122
+ channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
1123
+ onLevel: options.capture?.onLevel,
1124
+ onAudio: (audio) => stream.sendAudio(audio),
1125
+ sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
1126
+ });
1127
+ return capture;
1128
+ };
1129
+ const stopRecording = () => {
1130
+ capture?.stop();
1131
+ capture = null;
1132
+ state = {
1133
+ ...state,
1134
+ isRecording: false
1135
+ };
1136
+ notify();
1137
+ };
1138
+ const startRecording = async () => {
1139
+ if (state.isRecording) {
1140
+ return;
1141
+ }
1142
+ try {
1143
+ state = {
1144
+ ...state,
1145
+ recordingError: null
1146
+ };
1147
+ notify();
1148
+ await ensureCapture().start();
1149
+ state = {
1150
+ ...state,
1151
+ isRecording: true
1152
+ };
1153
+ notify();
1154
+ } catch (error) {
1155
+ capture = null;
1156
+ state = {
1157
+ ...state,
1158
+ isRecording: false,
1159
+ recordingError: error instanceof Error ? error.message : String(error)
1160
+ };
1161
+ notify();
1162
+ throw error;
1163
+ }
1164
+ };
1165
+ const close = () => {
1166
+ unsubscribeStream();
1167
+ stopRecording();
1168
+ stream.close();
1169
+ };
1170
+ return {
1171
+ bindHTMX(bindingOptions) {
1172
+ return bindVoiceHTMX(stream, bindingOptions);
1173
+ },
1174
+ close,
1175
+ endTurn: () => stream.endTurn(),
1176
+ get error() {
1177
+ return state.error;
1178
+ },
1179
+ getServerSnapshot: () => state,
1180
+ getSnapshot: () => state,
1181
+ get isConnected() {
1182
+ return state.isConnected;
1183
+ },
1184
+ get isRecording() {
1185
+ return state.isRecording;
1186
+ },
1187
+ get partial() {
1188
+ return state.partial;
1189
+ },
1190
+ get recordingError() {
1191
+ return state.recordingError;
1192
+ },
1193
+ sendAudio: (audio) => stream.sendAudio(audio),
1194
+ get sessionId() {
1195
+ return state.sessionId;
1196
+ },
1197
+ get scenarioId() {
1198
+ return state.scenarioId;
1199
+ },
1200
+ startRecording,
1201
+ get status() {
1202
+ return state.status;
1203
+ },
1204
+ stopRecording,
1205
+ subscribe: (subscriber) => {
1206
+ subscribers.add(subscriber);
1207
+ return () => {
1208
+ subscribers.delete(subscriber);
1209
+ };
1210
+ },
1211
+ toggleRecording: async () => {
1212
+ if (state.isRecording) {
1213
+ stopRecording();
1214
+ return;
1215
+ }
1216
+ await startRecording();
1217
+ },
1218
+ get turns() {
1219
+ return state.turns;
1220
+ },
1221
+ get assistantTexts() {
1222
+ return state.assistantTexts;
1223
+ },
1224
+ get assistantAudio() {
1225
+ return state.assistantAudio;
1226
+ }
1227
+ };
1228
+ };
1229
+
1230
+ // src/angular/voice-controller.service.ts
1231
+ var _dec = [
1232
+ Injectable2({ providedIn: "root" })
1233
+ ];
1234
+ var _init = __decoratorStart(undefined);
1235
+
1236
+ class VoiceControllerService {
1237
+ connect(path, options = {}) {
1238
+ const controller = createVoiceController(path, options);
1239
+ const assistantAudioSignal = signal2([]);
1240
+ const assistantTextsSignal = signal2([]);
1241
+ const errorSignal = signal2(null);
1242
+ const isConnectedSignal = signal2(false);
1243
+ const isRecordingSignal = signal2(false);
1244
+ const partialSignal = signal2("");
1245
+ const recordingErrorSignal = signal2(null);
1246
+ const sessionIdSignal = signal2(controller.sessionId);
1247
+ const statusSignal = signal2(controller.status);
1248
+ const turnsSignal = signal2([]);
1249
+ const sync = () => {
1250
+ assistantAudioSignal.set([...controller.assistantAudio]);
1251
+ assistantTextsSignal.set([...controller.assistantTexts]);
1252
+ errorSignal.set(controller.error);
1253
+ isConnectedSignal.set(controller.isConnected);
1254
+ isRecordingSignal.set(controller.isRecording);
1255
+ partialSignal.set(controller.partial);
1256
+ recordingErrorSignal.set(controller.recordingError);
1257
+ sessionIdSignal.set(controller.sessionId);
1258
+ statusSignal.set(controller.status);
1259
+ turnsSignal.set([...controller.turns]);
1260
+ };
1261
+ const unsubscribe = controller.subscribe(sync);
1262
+ sync();
1263
+ return {
1264
+ assistantAudio: computed2(() => assistantAudioSignal()),
1265
+ assistantTexts: computed2(() => assistantTextsSignal()),
1266
+ bindHTMX: controller.bindHTMX,
1267
+ close: () => {
1268
+ unsubscribe();
1269
+ controller.close();
1270
+ },
1271
+ endTurn: () => controller.endTurn(),
1272
+ error: computed2(() => errorSignal()),
1273
+ isConnected: computed2(() => isConnectedSignal()),
1274
+ isRecording: computed2(() => isRecordingSignal()),
1275
+ partial: computed2(() => partialSignal()),
1276
+ recordingError: computed2(() => recordingErrorSignal()),
1277
+ sendAudio: (audio) => controller.sendAudio(audio),
1278
+ sessionId: computed2(() => sessionIdSignal()),
1279
+ startRecording: () => controller.startRecording(),
1280
+ status: computed2(() => statusSignal()),
1281
+ stopRecording: () => controller.stopRecording(),
1282
+ toggleRecording: () => controller.toggleRecording(),
1283
+ turns: computed2(() => turnsSignal())
1284
+ };
1285
+ }
1286
+ }
1287
+ VoiceControllerService = __decorateElement(_init, 0, "VoiceControllerService", _dec, VoiceControllerService);
1288
+ __runInitializers(_init, 1, VoiceControllerService);
1289
+ __decoratorMetadata(_init, VoiceControllerService);
1290
+ let _VoiceControllerService = VoiceControllerService;
536
1291
  export {
537
- VoiceStreamService
1292
+ VoiceStreamService,
1293
+ VoiceControllerService
538
1294
  };