npm - @absolutejs/voice - Versions diffs - 0.0.20 → 0.0.21 - Mend

@absolutejs/voice 0.0.20 → 0.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/README.md +387 -4
package/dist/angular/index.d.ts +1 -0
package/dist/angular/index.js +669 -3
package/dist/angular/voice-controller.service.d.ts +21 -0
package/dist/audioConditioning.d.ts +3 -0
package/dist/client/actions.d.ts +7 -0
package/dist/client/connection.d.ts +5 -0
package/dist/client/controller.d.ts +2 -0
package/dist/client/htmxBootstrap.js +576 -167
package/dist/client/index.d.ts +1 -0
package/dist/client/index.js +486 -3
package/dist/client/microphone.d.ts +4 -2
package/dist/correction.d.ts +16 -0
package/dist/index.d.ts +4 -0
package/dist/index.js +1314 -283
package/dist/presets.d.ts +13 -0
package/dist/react/index.d.ts +1 -0
package/dist/react/index.js +642 -3
package/dist/react/useVoiceController.d.ts +20 -0
package/dist/react/useVoiceStream.d.ts +1 -0
package/dist/store.d.ts +2 -2
package/dist/svelte/index.d.ts +1 -0
package/dist/svelte/index.js +607 -3
package/dist/testing/benchmark.d.ts +36 -0
package/dist/testing/index.js +1453 -241
package/dist/testing/sessionBenchmark.d.ts +67 -2
package/dist/testing/stt.d.ts +1 -0
package/dist/turnDetection.d.ts +5 -1
package/dist/turnProfiles.d.ts +6 -0
package/dist/types.d.ts +198 -8
package/dist/vue/index.d.ts +1 -0
package/dist/vue/index.js +660 -3
package/dist/vue/useVoiceController.d.ts +19 -0
package/fixtures/README.md +9 -0
package/fixtures/manifest.json +59 -1
package/fixtures/pcm/dialogue-three-clean.pcm +0 -0
package/fixtures/pcm/dialogue-three-mixed.pcm +0 -0
package/fixtures/pcm/dialogue-two-clean.pcm +0 -0
package/fixtures/pcm/dialogue-two-noisy.pcm +0 -0
package/package.json +21 -1

package/dist/client/htmxBootstrap.js CHANGED Viewed

@@ -1,3 +1,145 @@
+// src/client/htmx.ts
+var DEFAULT_EVENT_NAME = "voice-refresh";
+var DEFAULT_QUERY_PARAM = "sessionId";
+var resolveElement = (input) => {
+  if (typeof input !== "string") {
+    return input;
+  }
+  return document.querySelector(input);
+};
+var buildRoute = (element, route, queryParam, sessionId) => {
+  const baseRoute = route ?? element.getAttribute("hx-get") ?? "";
+  if (!baseRoute) {
+    return "";
+  }
+  const url = new URL(baseRoute, window.location.origin);
+  if (sessionId) {
+    url.searchParams.set(queryParam, sessionId);
+  } else {
+    url.searchParams.delete(queryParam);
+  }
+  return `${url.pathname}${url.search}${url.hash}`;
+};
+var bindVoiceHTMX = (stream, options) => {
+  if (typeof window === "undefined" || typeof document === "undefined") {
+    return () => {};
+  }
+  const element = resolveElement(options.element);
+  if (!element) {
+    return () => {};
+  }
+  const eventName = options.eventName ?? DEFAULT_EVENT_NAME;
+  const queryParam = options.sessionQueryParam ?? DEFAULT_QUERY_PARAM;
+  const sync = () => {
+    const htmxWindow = window;
+    const nextRoute = buildRoute(element, options.route, queryParam, stream.sessionId);
+    if (nextRoute) {
+      element.setAttribute("hx-get", nextRoute);
+    }
+    htmxWindow.htmx?.process?.(element);
+    htmxWindow.htmx?.trigger?.(element, eventName);
+  };
+  const unsubscribe = stream.subscribe(sync);
+  sync();
+  return () => {
+    unsubscribe();
+  };
+};
+// src/client/microphone.ts
+var clampSample = (value) => Math.max(-1, Math.min(1, value));
+var floatTo16BitPCM = (input) => {
+  const output = new Int16Array(input.length);
+  for (let index = 0;index < input.length; index += 1) {
+    const sample = clampSample(input[index] ?? 0);
+    output[index] = sample < 0 ? sample * 32768 : sample * 32767;
+  }
+  return new Uint8Array(output.buffer);
+};
+var getPcmLevel = (audio) => {
+  const bytes = audio instanceof Uint8Array ? audio : new Uint8Array(audio);
+  if (bytes.byteLength < 2) {
+    return 0;
+  }
+  const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
+  if (samples.length === 0) {
+    return 0;
+  }
+  let sumSquares = 0;
+  for (const sample of samples) {
+    const normalized = sample / 32768;
+    sumSquares += normalized * normalized;
+  }
+  return Math.min(1, Math.max(0, Math.sqrt(sumSquares / samples.length) * 5.5));
+};
+var downsampleBuffer = (input, sourceRate, targetRate) => {
+  if (sourceRate === targetRate) {
+    return input;
+  }
+  const ratio = sourceRate / targetRate;
+  const length = Math.round(input.length / ratio);
+  const output = new Float32Array(length);
+  let offsetResult = 0;
+  let offsetBuffer = 0;
+  while (offsetResult < output.length) {
+    const nextOffsetBuffer = Math.round((offsetResult + 1) * ratio);
+    let accum = 0;
+    let count = 0;
+    for (let index = offsetBuffer;index < nextOffsetBuffer && index < input.length; index += 1) {
+      accum += input[index] ?? 0;
+      count += 1;
+    }
+    output[offsetResult] = count > 0 ? accum / count : 0;
+    offsetResult += 1;
+    offsetBuffer = nextOffsetBuffer;
+  }
+  return output;
+};
+var createMicrophoneCapture = (options) => {
+  let audioContext = null;
+  let sourceNode = null;
+  let processorNode = null;
+  let mediaStream = null;
+  const start = async () => {
+    if (typeof navigator === "undefined" || !navigator.mediaDevices?.getUserMedia) {
+      throw new Error("Browser microphone capture requires navigator.mediaDevices.getUserMedia.");
+    }
+    const AudioContextCtor = (typeof window !== "undefined" ? window.AudioContext ?? window.webkitAudioContext : undefined) ?? AudioContext;
+    if (!AudioContextCtor) {
+      throw new Error("Browser microphone capture requires AudioContext support.");
+    }
+    mediaStream = await navigator.mediaDevices.getUserMedia({
+      audio: {
+        channelCount: options.channelCount ?? 1
+      }
+    });
+    audioContext = new AudioContextCtor;
+    sourceNode = audioContext.createMediaStreamSource(mediaStream);
+    processorNode = audioContext.createScriptProcessor(4096, 1, 1);
+    processorNode.onaudioprocess = (event) => {
+      const channel = event.inputBuffer.getChannelData(0);
+      const downsampled = downsampleBuffer(channel, audioContext?.sampleRate ?? 48000, options.sampleRateHz ?? 16000);
+      const pcm = floatTo16BitPCM(downsampled);
+      options.onLevel?.(getPcmLevel(pcm));
+      options.onAudio(pcm);
+    };
+    sourceNode.connect(processorNode);
+    processorNode.connect(audioContext.destination);
+  };
+  const stop = () => {
+    processorNode?.disconnect();
+    sourceNode?.disconnect();
+    mediaStream?.getTracks().forEach((track) => track.stop());
+    audioContext?.close();
+    options.onLevel?.(0);
+    audioContext = null;
+    mediaStream = null;
+    processorNode = null;
+    sourceNode = null;
+  };
+  return { start, stop };
+};
 // src/client/actions.ts
 var normalizeErrorMessage = (value) => {
   if (typeof value === "string" && value.trim()) {
@@ -56,6 +198,7 @@ var serverMessageToAction = (message) => {
     case "session":
       return {
         sessionId: message.sessionId,
+        scenarioId: message.scenarioId,
         status: message.status,
         type: "session"
       };
@@ -76,24 +219,30 @@ var WS_NORMAL_CLOSURE = 1000;
 var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
 var DEFAULT_PING_INTERVAL = 30000;
 var RECONNECT_DELAY_MS = 500;
+var DEFAULT_SCENARIO_QUERY_PARAM = "scenarioId";
 var noop = () => {};
 var noopUnsubscribe = () => noop;
 var NOOP_CONNECTION = {
+  start: () => {},
   close: noop,
   endTurn: noop,
   getReadyState: () => WS_CLOSED,
+  getScenarioId: () => "",
   getSessionId: () => "",
   send: noop,
   sendAudio: noop,
   subscribe: noopUnsubscribe
 };
 var createSessionId = () => crypto.randomUUID();
-var buildWsUrl = (path, sessionId) => {
+var buildWsUrl = (path, sessionId, scenarioId) => {
   const { hostname, port, protocol } = window.location;
   const wsProtocol = protocol === "https:" ? "wss:" : "ws:";
   const portSuffix = port ? `:${port}` : "";
   const url = new URL(`${wsProtocol}//${hostname}${portSuffix}${path}`);
   url.searchParams.set("sessionId", sessionId);
+  if (scenarioId) {
+    url.searchParams.set(DEFAULT_SCENARIO_QUERY_PARAM, scenarioId);
+  }
   return url.toString();
 };
 var isVoiceServerMessage = (value) => {
@@ -136,6 +285,7 @@ var createVoiceConnection = (path, options = {}) => {
   const state = {
     isConnected: false,
     pendingMessages: [],
+    scenarioId: options.scenarioId ?? null,
     pingInterval: null,
     reconnectAttempts: 0,
     reconnectTimeout: null,
@@ -173,13 +323,14 @@ var createVoiceConnection = (path, options = {}) => {
     }, RECONNECT_DELAY_MS);
   };
   const connect = () => {
-    const ws = new WebSocket(buildWsUrl(path, state.sessionId));
+    const ws = new WebSocket(buildWsUrl(path, state.sessionId, state.scenarioId));
     ws.binaryType = "arraybuffer";
     ws.onopen = () => {
       state.isConnected = true;
       state.reconnectAttempts = 0;
       flushPendingMessages();
       listeners.forEach((listener) => listener({
+        scenarioId: state.scenarioId ?? undefined,
         sessionId: state.sessionId,
         status: "active",
         type: "session"
@@ -197,6 +348,7 @@ var createVoiceConnection = (path, options = {}) => {
       }
       if (parsed.type === "session") {
         state.sessionId = parsed.sessionId;
+        state.scenarioId = parsed.scenarioId ?? state.scenarioId;
       }
       listeners.forEach((listener) => listener(parsed));
     };
@@ -220,6 +372,19 @@ var createVoiceConnection = (path, options = {}) => {
   const send = (message) => {
     sendSerialized(JSON.stringify(message));
   };
+  const start = (input = {}) => {
+    if (input.sessionId) {
+      state.sessionId = input.sessionId;
+    }
+    if (input.scenarioId) {
+      state.scenarioId = input.scenarioId;
+    }
+    send({
+      type: "start",
+      sessionId: state.sessionId,
+      scenarioId: state.scenarioId ?? undefined
+    });
+  };
   const sendAudio = (audio) => {
     sendSerialized(audio);
   };
@@ -243,9 +408,11 @@ var createVoiceConnection = (path, options = {}) => {
   };
   connect();
   return {
+    start,
     close,
     endTurn,
     getReadyState: () => state.ws?.readyState ?? WS_CLOSED,
+    getScenarioId: () => state.scenarioId ?? "",
     getSessionId: () => state.sessionId,
     send,
     sendAudio,
@@ -258,6 +425,7 @@ var createInitialState = () => ({
   assistantTexts: [],
   error: null,
   isConnected: false,
+  scenarioId: null,
   partial: "",
   sessionId: null,
   status: "idle",
@@ -319,6 +487,7 @@ var createVoiceStreamStore = () => {
         state = {
           ...state,
           error: null,
+          scenarioId: action.scenarioId ?? state.scenarioId,
           isConnected: action.status === "active",
           sessionId: action.sessionId,
           status: action.status
@@ -352,6 +521,12 @@ var createVoiceStream = (path, options = {}) => {
   const connection = createVoiceConnection(path, options);
   const store = createVoiceStreamStore();
   const subscribers = new Set;
+  const start = (input) => Promise.resolve().then(() => {
+    if (!input?.sessionId && !input?.scenarioId) {
+      return;
+    }
+    connection.start(input);
+  });
   const notify = () => {
     subscribers.forEach((subscriber) => subscriber());
   };
@@ -384,6 +559,10 @@ var createVoiceStream = (path, options = {}) => {
     get isConnected() {
       return store.getSnapshot().isConnected;
     },
+    get scenarioId() {
+      return store.getSnapshot().scenarioId;
+    },
+    start,
     get partial() {
       return store.getSnapshot().partial;
     },
@@ -411,127 +590,392 @@ var createVoiceStream = (path, options = {}) => {
   };
 };
-// src/client/htmx.ts
-var DEFAULT_EVENT_NAME = "voice-refresh";
-var DEFAULT_QUERY_PARAM = "sessionId";
-var resolveElement = (input) => {
-  if (typeof input !== "string") {
-    return input;
+// src/audioConditioning.ts
+var DEFAULT_TARGET_LEVEL = 0.08;
+var DEFAULT_MAX_GAIN = 3;
+var DEFAULT_NOISE_GATE_THRESHOLD = 0.006;
+var DEFAULT_NOISE_GATE_ATTENUATION = 0.15;
+var resolveAudioConditioningConfig = (config) => {
+  if (!config || config.enabled === false) {
+    return;
   }
-  return document.querySelector(input);
+  return {
+    enabled: true,
+    maxGain: config.maxGain ?? DEFAULT_MAX_GAIN,
+    noiseGateAttenuation: config.noiseGateAttenuation ?? DEFAULT_NOISE_GATE_ATTENUATION,
+    noiseGateThreshold: config.noiseGateThreshold ?? DEFAULT_NOISE_GATE_THRESHOLD,
+    targetLevel: config.targetLevel ?? DEFAULT_TARGET_LEVEL
+  };
 };
-var buildRoute = (element, route, queryParam, sessionId) => {
-  const baseRoute = route ?? element.getAttribute("hx-get") ?? "";
-  if (!baseRoute) {
-    return "";
-  }
-  const url = new URL(baseRoute, window.location.origin);
-  if (sessionId) {
-    url.searchParams.set(queryParam, sessionId);
-  } else {
-    url.searchParams.delete(queryParam);
+// src/turnProfiles.ts
+var TURN_PROFILE_DEFAULTS = {
+  balanced: {
+    qualityProfile: "general",
+    silenceMs: 1400,
+    speechThreshold: 0.012,
+    transcriptStabilityMs: 1000
+  },
+  fast: {
+    qualityProfile: "general",
+    silenceMs: 700,
+    speechThreshold: 0.015,
+    transcriptStabilityMs: 450
+  },
+  "long-form": {
+    qualityProfile: "general",
+    silenceMs: 2200,
+    speechThreshold: 0.01,
+    transcriptStabilityMs: 1500
   }
-  return `${url.pathname}${url.search}${url.hash}`;
 };
-var bindVoiceHTMX = (stream, options) => {
-  if (typeof window === "undefined" || typeof document === "undefined") {
-    return () => {};
-  }
-  const element = resolveElement(options.element);
-  if (!element) {
-    return () => {};
+var QUALITY_PROFILE_DEFAULTS = {
+  general: {},
+  "accent-heavy": {
+    silenceMs: 1200,
+    speechThreshold: 0.01,
+    transcriptStabilityMs: 1200
+  },
+  "noisy-room": {
+    silenceMs: 2000,
+    speechThreshold: 0.02,
+    transcriptStabilityMs: 1600
+  },
+  "short-command": {
+    silenceMs: 500,
+    speechThreshold: 0.016,
+    transcriptStabilityMs: 420
   }
-  const eventName = options.eventName ?? DEFAULT_EVENT_NAME;
-  const queryParam = options.sessionQueryParam ?? DEFAULT_QUERY_PARAM;
-  const sync = () => {
-    const htmxWindow = window;
-    const nextRoute = buildRoute(element, options.route, queryParam, stream.sessionId);
-    if (nextRoute) {
-      element.setAttribute("hx-get", nextRoute);
-    }
-    htmxWindow.htmx?.process?.(element);
-    htmxWindow.htmx?.trigger?.(element, eventName);
-  };
-  const unsubscribe = stream.subscribe(sync);
-  sync();
-  return () => {
-    unsubscribe();
+};
+var DEFAULT_TURN_PROFILE = "fast";
+var DEFAULT_QUALITY_PROFILE = "general";
+var resolveTurnDetectionConfig = (config) => {
+  const profile = config?.profile ?? DEFAULT_TURN_PROFILE;
+  const qualityProfile = config?.qualityProfile ?? DEFAULT_QUALITY_PROFILE;
+  const preset = TURN_PROFILE_DEFAULTS[profile];
+  const quality = QUALITY_PROFILE_DEFAULTS[qualityProfile];
+  return {
+    profile,
+    qualityProfile,
+    silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
+    speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
+    transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
   };
 };
-// src/client/microphone.ts
-var clampSample = (value) => Math.max(-1, Math.min(1, value));
-var floatTo16BitPCM = (input) => {
-  const output = new Int16Array(input.length);
-  for (let index = 0;index < input.length; index += 1) {
-    const sample = clampSample(input[index] ?? 0);
-    output[index] = sample < 0 ? sample * 32768 : sample * 32767;
-  }
-  return new Uint8Array(output.buffer);
-};
-var downsampleBuffer = (input, sourceRate, targetRate) => {
-  if (sourceRate === targetRate) {
-    return input;
-  }
-  const ratio = sourceRate / targetRate;
-  const length = Math.round(input.length / ratio);
-  const output = new Float32Array(length);
-  let offsetResult = 0;
-  let offsetBuffer = 0;
-  while (offsetResult < output.length) {
-    const nextOffsetBuffer = Math.round((offsetResult + 1) * ratio);
-    let accum = 0;
-    let count = 0;
-    for (let index = offsetBuffer;index < nextOffsetBuffer && index < input.length; index += 1) {
-      accum += input[index] ?? 0;
-      count += 1;
+// src/presets.ts
+var PRESET_INPUTS = {
+  chat: {
+    audioConditioning: {
+      enabled: true,
+      maxGain: 2.5,
+      noiseGateAttenuation: 0,
+      noiseGateThreshold: 0.004,
+      targetLevel: 0.08
+    },
+    capture: {
+      channelCount: 1,
+      sampleRateHz: 16000
+    },
+    connection: {
+      maxReconnectAttempts: 10,
+      pingInterval: 30000,
+      reconnect: true
+    },
+    sttLifecycle: "continuous",
+    turnDetection: {
+      qualityProfile: "short-command",
+      profile: "balanced"
+    }
+  },
+  default: {
+    capture: {
+      channelCount: 1,
+      sampleRateHz: 16000
+    },
+    connection: {
+      maxReconnectAttempts: 10,
+      pingInterval: 30000,
+      reconnect: true
+    },
+    sttLifecycle: "continuous",
+    turnDetection: {
+      qualityProfile: "general",
+      profile: "fast"
+    }
+  },
+  dictation: {
+    audioConditioning: {
+      enabled: true,
+      maxGain: 2.25,
+      noiseGateAttenuation: 0.05,
+      noiseGateThreshold: 0.003,
+      targetLevel: 0.08
+    },
+    capture: {
+      channelCount: 1,
+      sampleRateHz: 16000
+    },
+    connection: {
+      maxReconnectAttempts: 12,
+      pingInterval: 30000,
+      reconnect: true
+    },
+    sttLifecycle: "continuous",
+    turnDetection: {
+      qualityProfile: "accent-heavy",
+      profile: "long-form"
+    }
+  },
+  "guided-intake": {
+    audioConditioning: {
+      enabled: true,
+      maxGain: 2.5,
+      noiseGateAttenuation: 0,
+      noiseGateThreshold: 0.004,
+      targetLevel: 0.08
+    },
+    capture: {
+      channelCount: 1,
+      sampleRateHz: 16000
+    },
+    connection: {
+      maxReconnectAttempts: 12,
+      pingInterval: 30000,
+      reconnect: true
+    },
+    sttLifecycle: "turn-scoped",
+    turnDetection: {
+      qualityProfile: "accent-heavy",
+      profile: "long-form"
+    }
+  },
+  "noisy-room": {
+    audioConditioning: {
+      enabled: true,
+      maxGain: 3,
+      noiseGateAttenuation: 0.12,
+      noiseGateThreshold: 0.006,
+      targetLevel: 0.085
+    },
+    capture: {
+      channelCount: 1,
+      sampleRateHz: 16000
+    },
+    connection: {
+      maxReconnectAttempts: 14,
+      pingInterval: 45000,
+      reconnect: true
+    },
+    sttLifecycle: "continuous",
+    turnDetection: {
+      qualityProfile: "noisy-room",
+      profile: "long-form",
+      silenceMs: 2100,
+      speechThreshold: 0.02,
+      transcriptStabilityMs: 1650
+    }
+  },
+  reliability: {
+    audioConditioning: {
+      enabled: true,
+      maxGain: 2.9,
+      noiseGateAttenuation: 0.08,
+      noiseGateThreshold: 0.005,
+      targetLevel: 0.08
+    },
+    capture: {
+      channelCount: 1,
+      sampleRateHz: 16000
+    },
+    connection: {
+      maxReconnectAttempts: 14,
+      pingInterval: 45000,
+      reconnect: true
+    },
+    sttLifecycle: "continuous",
+    turnDetection: {
+      qualityProfile: "noisy-room",
+      profile: "long-form"
     }
-    output[offsetResult] = count > 0 ? accum / count : 0;
-    offsetResult += 1;
-    offsetBuffer = nextOffsetBuffer;
   }
-  return output;
 };
-var createMicrophoneCapture = (options) => {
-  let audioContext = null;
-  let sourceNode = null;
-  let processorNode = null;
-  let mediaStream = null;
-  const start = async () => {
-    if (typeof navigator === "undefined" || !navigator.mediaDevices?.getUserMedia) {
-      throw new Error("Browser microphone capture requires navigator.mediaDevices.getUserMedia.");
+var resolveVoiceRuntimePreset = (name = "default") => {
+  const preset = PRESET_INPUTS[name];
+  return {
+    audioConditioning: resolveAudioConditioningConfig(preset.audioConditioning),
+    capture: {
+      channelCount: preset.capture?.channelCount ?? 1,
+      sampleRateHz: preset.capture?.sampleRateHz ?? 16000
+    },
+    connection: {
+      ...preset.connection
+    },
+    name,
+    sttLifecycle: preset.sttLifecycle ?? "continuous",
+    turnDetection: resolveTurnDetectionConfig(preset.turnDetection)
+  };
+};
+// src/client/controller.ts
+var createInitialState2 = (stream) => ({
+  assistantTexts: [...stream.assistantTexts],
+  error: stream.error,
+  isConnected: stream.isConnected,
+  isRecording: false,
+  partial: stream.partial,
+  recordingError: null,
+  sessionId: stream.sessionId,
+  scenarioId: stream.scenarioId,
+  status: stream.status,
+  turns: [...stream.turns]
+});
+var createVoiceController = (path, options = {}) => {
+  const preset = resolveVoiceRuntimePreset(options.preset);
+  const stream = createVoiceStream(path, {
+    ...preset.connection,
+    ...options.connection
+  });
+  let capture = null;
+  let state = createInitialState2(stream);
+  const subscribers = new Set;
+  const notify = () => {
+    for (const subscriber of subscribers) {
+      subscriber();
     }
-    const AudioContextCtor = (typeof window !== "undefined" ? window.AudioContext ?? window.webkitAudioContext : undefined) ?? AudioContext;
-    if (!AudioContextCtor) {
-      throw new Error("Browser microphone capture requires AudioContext support.");
+  };
+  const sync = () => {
+    state = {
+      ...state,
+      assistantTexts: [...stream.assistantTexts],
+      error: stream.error,
+      isConnected: stream.isConnected,
+      partial: stream.partial,
+      sessionId: stream.sessionId,
+      scenarioId: stream.scenarioId,
+      status: stream.status,
+      turns: [...stream.turns]
+    };
+    if (options.autoStopOnComplete !== false && state.status === "completed" && state.isRecording) {
+      capture?.stop();
+      capture = null;
+      state = {
+        ...state,
+        isRecording: false
+      };
     }
-    mediaStream = await navigator.mediaDevices.getUserMedia({
-      audio: {
-        channelCount: options.channelCount ?? 1
-      }
+    notify();
+  };
+  const unsubscribeStream = stream.subscribe(sync);
+  sync();
+  const ensureCapture = () => {
+    if (capture) {
+      return capture;
+    }
+    capture = createMicrophoneCapture({
+      channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
+      onLevel: options.capture?.onLevel,
+      onAudio: (audio) => stream.sendAudio(audio),
+      sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
     });
-    audioContext = new AudioContextCtor;
-    sourceNode = audioContext.createMediaStreamSource(mediaStream);
-    processorNode = audioContext.createScriptProcessor(4096, 1, 1);
-    processorNode.onaudioprocess = (event) => {
-      const channel = event.inputBuffer.getChannelData(0);
-      const downsampled = downsampleBuffer(channel, audioContext?.sampleRate ?? 48000, options.sampleRateHz ?? 16000);
-      options.onAudio(floatTo16BitPCM(downsampled));
+    return capture;
+  };
+  const stopRecording = () => {
+    capture?.stop();
+    capture = null;
+    state = {
+      ...state,
+      isRecording: false
     };
-    sourceNode.connect(processorNode);
-    processorNode.connect(audioContext.destination);
+    notify();
   };
-  const stop = () => {
-    processorNode?.disconnect();
-    sourceNode?.disconnect();
-    mediaStream?.getTracks().forEach((track) => track.stop());
-    audioContext?.close();
-    audioContext = null;
-    mediaStream = null;
-    processorNode = null;
-    sourceNode = null;
+  const startRecording = async () => {
+    if (state.isRecording) {
+      return;
+    }
+    try {
+      state = {
+        ...state,
+        recordingError: null
+      };
+      notify();
+      await ensureCapture().start();
+      state = {
+        ...state,
+        isRecording: true
+      };
+      notify();
+    } catch (error) {
+      capture = null;
+      state = {
+        ...state,
+        isRecording: false,
+        recordingError: error instanceof Error ? error.message : String(error)
+      };
+      notify();
+      throw error;
+    }
+  };
+  const close = () => {
+    unsubscribeStream();
+    stopRecording();
+    stream.close();
+  };
+  return {
+    bindHTMX(bindingOptions) {
+      return bindVoiceHTMX(stream, bindingOptions);
+    },
+    close,
+    endTurn: () => stream.endTurn(),
+    get error() {
+      return state.error;
+    },
+    getServerSnapshot: () => state,
+    getSnapshot: () => state,
+    get isConnected() {
+      return state.isConnected;
+    },
+    get isRecording() {
+      return state.isRecording;
+    },
+    get partial() {
+      return state.partial;
+    },
+    get recordingError() {
+      return state.recordingError;
+    },
+    sendAudio: (audio) => stream.sendAudio(audio),
+    get sessionId() {
+      return state.sessionId;
+    },
+    get scenarioId() {
+      return state.scenarioId;
+    },
+    startRecording,
+    get status() {
+      return state.status;
+    },
+    stopRecording,
+    subscribe: (subscriber) => {
+      subscribers.add(subscriber);
+      return () => {
+        subscribers.delete(subscriber);
+      };
+    },
+    toggleRecording: async () => {
+      if (state.isRecording) {
+        stopRecording();
+        return;
+      }
+      await startRecording();
+    },
+    get turns() {
+      return state.turns;
+    },
+    get assistantTexts() {
+      return state.assistantTexts;
+    }
   };
-  return { start, stop };
 };
 // src/client/htmxBootstrap.ts
@@ -540,10 +984,10 @@ var VOICE_WAVE_WIDTH = 320;
 var VOICE_WAVE_HEIGHT = 88;
 var DEFAULT_GUIDED_LABEL = "Guided test";
 var DEFAULT_GENERAL_LABEL = "General recording";
-var DEFAULT_IDLE_LEAD = "Pick a mode to begin the demo.";
+var DEFAULT_IDLE_LEAD = "Pick a scenario to begin the demo.";
 var DEFAULT_GUIDED_LEAD = "I can walk you through a short guided voice test.";
 var DEFAULT_GENERAL_LEAD = "I can capture one freeform recording and confirm that it landed.";
-var DEFAULT_IDLE_PROMPT = "Choose a mode to begin. Guided test asks follow-up prompts. General recording just captures what you say.";
+var DEFAULT_IDLE_PROMPT = "Choose a scenario to begin. Guided test asks follow-up prompts. General recording just captures what you say.";
 var DEFAULT_GENERAL_IDLE_PROMPT = "Click Start general recording to capture one freeform answer.";
 var DEFAULT_GENERAL_LIVE_PROMPT = "Speak freely. When you pause, the recording will be captured.";
 var DEFAULT_GENERAL_COMPLETE_PROMPT = "Recording saved. Start again if you want another capture.";
@@ -631,23 +1075,6 @@ var createVoiceWavePath = (levels, width = VOICE_WAVE_WIDTH, height = VOICE_WAVE
   }
   return path;
 };
-var getPcmLevel = (audio) => {
-  const bytes = audio instanceof Uint8Array ? audio : new Uint8Array(audio);
-  if (bytes.byteLength < 2) {
-    return 0;
-  }
-  const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
-  if (samples.length === 0) {
-    return 0;
-  }
-  let sumSquares = 0;
-  for (const sample of samples) {
-    const normalized = sample / 32768;
-    sumSquares += normalized * normalized;
-  }
-  const rms = Math.sqrt(sumSquares / samples.length);
-  return clamp(rms * 5.5, 0, 1);
-};
 var parsePromptList = (value) => {
   if (!value) {
     return DEFAULT_GUIDED_PROMPTS;
@@ -704,35 +1131,6 @@ var resolvePromptMessage = (input) => {
   }
   return input.guidedPrompts[input.turnCount] ?? DEFAULT_GUIDED_OVERFLOW_PROMPT;
 };
-var createDemoMicrophone = (onAudio, onLevel) => {
-  let capture = null;
-  return {
-    start: async () => {
-      if (capture) {
-        return;
-      }
-      const nextCapture = createMicrophoneCapture({
-        onAudio: (audio) => {
-          onLevel(getPcmLevel(audio));
-          onAudio(audio);
-        },
-        sampleRateHz: 16000
-      });
-      capture = nextCapture;
-      try {
-        await capture.start();
-      } catch (error) {
-        capture = null;
-        throw error;
-      }
-    },
-    stop: () => {
-      capture?.stop();
-      capture = null;
-      onLevel(0);
-    }
-  };
-};
 var initVoiceHTMXRoot = (root) => {
   const guidedPath = root.dataset.voiceGuidedPath;
   const generalPath = root.dataset.voiceGeneralPath;
@@ -755,12 +1153,26 @@ var initVoiceHTMXRoot = (root) => {
   const voiceMonitorCopy = requireElement(root, root.dataset.voiceMonitorCopy, HTMLElement, "voice-monitor-copy");
   const voiceWaveGlow = requireElement(root, root.dataset.voiceWaveGlow, SVGPathElement, "voice-wave-glow");
   const voiceWavePath = requireElement(root, root.dataset.voiceWavePath, SVGPathElement, "voice-wave-path");
-  const guidedVoice = createVoiceStream(guidedPath);
-  const generalVoice = createVoiceStream(generalPath);
-  const stopGuidedBinding = bindVoiceHTMX(guidedVoice, { element: syncElement });
-  const stopGeneralBinding = bindVoiceHTMX(generalVoice, {
-    element: syncElement
+  const guidedVoice = createVoiceController(guidedPath, {
+    capture: {
+      onLevel: (level) => {
+        waveLevels = pushVoiceWaveLevel(waveLevels, level);
+        renderWave();
+      }
+    },
+    preset: "guided-intake"
   });
+  const generalVoice = createVoiceController(generalPath, {
+    capture: {
+      onLevel: (level) => {
+        waveLevels = pushVoiceWaveLevel(waveLevels, level);
+        renderWave();
+      }
+    },
+    preset: "dictation"
+  });
+  const stopGuidedBinding = guidedVoice.bindHTMX({ element: syncElement });
+  const stopGeneralBinding = generalVoice.bindHTMX({ element: syncElement });
   let activeMode = null;
   let hasStartedModes = {
     general: false,
@@ -821,12 +1233,8 @@ var initVoiceHTMXRoot = (root) => {
 </article>` : ""}`;
     renderWave();
   };
-  const microphone = createDemoMicrophone((audio) => currentVoice().sendAudio(audio), (level) => {
-    waveLevels = pushVoiceWaveLevel(waveLevels, level);
-    renderWave();
-  });
   const stopMic = () => {
-    microphone.stop();
+    currentVoice().stopRecording();
     isCapturing = false;
     micError = null;
     waveLevels = createInitialVoiceWaveLevels();
@@ -839,12 +1247,12 @@ var initVoiceHTMXRoot = (root) => {
       [mode]: true
     };
     try {
-      await microphone.start();
+      await currentVoice().startRecording();
       micError = null;
       isCapturing = true;
       render();
     } catch (error) {
-      microphone.stop();
+      currentVoice().stopRecording();
       isCapturing = false;
       waveLevels = createInitialVoiceWaveLevels();
       micError = formatErrorMessage(error);
@@ -863,7 +1271,8 @@ var initVoiceHTMXRoot = (root) => {
     stopMic();
   });
   window.addEventListener("beforeunload", () => {
-    microphone.stop();
+    guidedVoice.stopRecording();
+    generalVoice.stopRecording();
     stopGuidedBinding();
     stopGeneralBinding();
     guidedVoice.close();