npm - @absolutejs/voice - Versions diffs - 0.0.20 → 0.0.22-beta.0 - Mend

@absolutejs/voice 0.0.20 → 0.0.22-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

package/README.md +884 -4
package/dist/angular/index.d.ts +1 -0
package/dist/angular/index.js +759 -3
package/dist/angular/voice-controller.service.d.ts +27 -0
package/dist/angular/voice-stream.service.d.ts +6 -0
package/dist/audioConditioning.d.ts +3 -0
package/dist/client/actions.d.ts +48 -0
package/dist/client/audioPlayer.d.ts +40 -0
package/dist/client/connection.d.ts +5 -0
package/dist/client/controller.d.ts +2 -0
package/dist/client/duplex.d.ts +3 -0
package/dist/client/htmxBootstrap.js +660 -167
package/dist/client/index.d.ts +3 -0
package/dist/client/index.js +991 -6
package/dist/client/microphone.d.ts +4 -2
package/dist/correction.d.ts +33 -0
package/dist/fileStore.d.ts +27 -0
package/dist/index.d.ts +15 -0
package/dist/index.js +3721 -298
package/dist/ops.d.ts +100 -0
package/dist/presets.d.ts +13 -0
package/dist/react/index.d.ts +1 -0
package/dist/react/index.js +728 -3
package/dist/react/useVoiceController.d.ts +26 -0
package/dist/react/useVoiceStream.d.ts +7 -0
package/dist/routing.d.ts +3 -0
package/dist/runtimeOps.d.ts +23 -0
package/dist/store.d.ts +2 -2
package/dist/svelte/index.d.ts +1 -0
package/dist/svelte/index.js +691 -3
package/dist/telephony/response.d.ts +7 -0
package/dist/telephony/twilio.d.ts +116 -0
package/dist/testing/benchmark.d.ts +93 -2
package/dist/testing/corrected.d.ts +41 -0
package/dist/testing/duplex.d.ts +59 -0
package/dist/testing/fixtures.d.ts +18 -2
package/dist/testing/index.d.ts +5 -0
package/dist/testing/index.js +6247 -402
package/dist/testing/review.d.ts +143 -0
package/dist/testing/sessionBenchmark.d.ts +92 -2
package/dist/testing/stt.d.ts +3 -1
package/dist/testing/telephony.d.ts +70 -0
package/dist/testing/tts.d.ts +73 -0
package/dist/turnDetection.d.ts +5 -1
package/dist/turnProfiles.d.ts +6 -0
package/dist/types.d.ts +487 -10
package/dist/vue/index.d.ts +1 -0
package/dist/vue/index.js +750 -3
package/dist/vue/useVoiceController.d.ts +30 -0
package/dist/vue/useVoiceStream.d.ts +11 -0
package/fixtures/README.md +9 -0
package/fixtures/manifest.json +59 -1
package/fixtures/pcm/dialogue-three-clean.pcm +0 -0
package/fixtures/pcm/dialogue-three-mixed.pcm +0 -0
package/fixtures/pcm/dialogue-two-clean.pcm +0 -0
package/fixtures/pcm/dialogue-two-noisy.pcm +0 -0
package/package.json +135 -1

package/dist/types.d.ts CHANGED Viewed

@@ -1,21 +1,99 @@
 import type { SessionStore } from '@absolutejs/absolute';
+import type { StoredVoiceIntegrationEvent, StoredVoiceOpsTask, VoiceIntegrationEventStore, VoiceOpsTask, VoiceOpsTaskStore } from './ops';
+import type { StoredVoiceCallReviewArtifact, VoiceCallReviewArtifact, VoiceCallReviewStore } from './testing/review';
 export type AudioFormat = {
     container: 'raw';
-    encoding: 'pcm_s16le';
+    encoding: 'alaw' | 'mulaw' | 'pcm_s16le';
     sampleRateHz: number;
     channels: 1 | 2;
 };
 export type AudioChunk = ArrayBuffer | ArrayBufferView;
+export type VoiceLanguageStrategy = {
+    mode: 'auto-detect';
+    allowedLanguages?: string[];
+} | {
+    mode: 'fixed';
+    primaryLanguage: string;
+    secondaryLanguages?: string[];
+} | {
+    mode: 'allow-switching';
+    primaryLanguage?: string;
+    secondaryLanguages: string[];
+};
+export type VoicePhraseHint = {
+    text: string;
+    aliases?: string[];
+    boost?: number;
+    metadata?: Record<string, unknown>;
+};
+export type VoiceCorrectionRiskTier = 'safe' | 'balanced' | 'risky';
+export type VoiceDomainTerm = {
+    text: string;
+    aliases?: string[];
+    boost?: number;
+    language?: string;
+    metadata?: Record<string, unknown>;
+    pronunciation?: string;
+};
+export type VoiceLexiconEntry = {
+    text: string;
+    aliases?: string[];
+    language?: string;
+    metadata?: Record<string, unknown>;
+    pronunciation?: string;
+};
 export type Transcript = {
     id: string;
     text: string;
     isFinal: boolean;
     confidence?: number;
     language?: string;
+    speaker?: string | number;
     startedAtMs?: number;
     endedAtMs?: number;
     vendor?: string;
 };
+export type VoiceTranscriptQuality = {
+    averageConfidence?: number;
+    confidenceSampleCount: number;
+    correction?: VoiceTurnCorrectionDiagnostics;
+    cost?: VoiceTurnCostEstimate;
+    fallbackUsed: boolean;
+    finalTranscriptCount: number;
+    fallback?: VoiceFallbackDiagnostics;
+    partialTranscriptCount: number;
+    selectedTranscriptCount: number;
+    source: 'fallback' | 'primary';
+};
+export type VoiceTurnCorrectionDiagnostics = {
+    attempted: boolean;
+    changed: boolean;
+    correctedText: string;
+    metadata?: Record<string, unknown>;
+    originalText: string;
+    provider?: string;
+    reason?: string;
+};
+export type VoiceTurnCostEstimate = {
+    estimatedRelativeCostUnits: number;
+    fallbackAttemptCount: number;
+    fallbackReplayAudioMs: number;
+    primaryAudioMs: number;
+    totalBillableAudioMs: number;
+};
+export type VoiceFallbackSelectionReason = 'fallback-empty' | 'primary-empty' | 'word-count-margin' | 'confidence-margin' | 'word-count-tiebreak' | 'kept-primary';
+export type VoiceFallbackDiagnostics = {
+    attempted: boolean;
+    fallbackConfidence?: number;
+    fallbackText?: string;
+    fallbackWordCount?: number;
+    primaryConfidence: number;
+    primaryText: string;
+    primaryWordCount: number;
+    selected: boolean;
+    selectionReason: VoiceFallbackSelectionReason;
+    trigger: 'empty-turn' | 'low-confidence' | 'empty-or-low-confidence' | 'always';
+};
 export type VoicePartialEvent = {
     type: 'partial';
     transcript: Transcript;
@@ -58,6 +136,9 @@ export type STTAdapterSession = {
 export type STTAdapterOpenOptions = {
     sessionId: string;
     format: AudioFormat;
+    languageStrategy?: VoiceLanguageStrategy;
+    lexicon?: VoiceLexiconEntry[];
+    phraseHints?: VoicePhraseHint[];
     signal?: AbortSignal;
 };
 export type STTAdapter<TOptions extends STTAdapterOpenOptions = STTAdapterOpenOptions> = {
@@ -82,6 +163,7 @@ export type TTSAdapterSession = {
 };
 export type TTSAdapterOpenOptions = {
     sessionId: string;
+    lexicon?: VoiceLexiconEntry[];
     signal?: AbortSignal;
 };
 export type TTSAdapter<TOptions extends TTSAdapterOpenOptions = TTSAdapterOpenOptions> = {
@@ -99,6 +181,9 @@ export type RealtimeAdapterSession = {
 export type RealtimeAdapterOpenOptions = {
     sessionId: string;
     format: AudioFormat;
+    languageStrategy?: VoiceLanguageStrategy;
+    lexicon?: VoiceLexiconEntry[];
+    phraseHints?: VoicePhraseHint[];
     signal?: AbortSignal;
 };
 export type RealtimeAdapter<TOptions extends RealtimeAdapterOpenOptions = RealtimeAdapterOpenOptions> = {
@@ -109,11 +194,23 @@ export type VoiceSessionStatus = 'active' | 'reconnecting' | 'completed' | 'fail
 export type VoiceTurnRecord<TResult = unknown> = {
     id: string;
     text: string;
+    quality?: VoiceTranscriptQuality;
     transcripts: Transcript[];
     assistantText?: string;
     committedAt: number;
     result?: TResult;
 };
+export type VoiceCostTelemetryConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
+    fallbackPassCostUnit?: number;
+    onTurnCost?: (input: {
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+        context: TContext;
+        estimate: VoiceTurnCostEstimate;
+        session: TSession;
+        turn: VoiceTurnRecord<TResult>;
+    }) => Promise<void> | void;
+    primaryPassCostUnit?: number;
+};
 export type VoiceSessionRecord<TMeta = Record<string, never>, TResult = unknown> = {
     id: string;
     createdAt: number;
@@ -123,8 +220,13 @@ export type VoiceSessionRecord<TMeta = Record<string, never>, TResult = unknown>
     currentTurn: {
         transcripts: Transcript[];
         partialText: string;
+        partialStartedAt?: number;
+        partialEndedAt?: number;
         finalText: string;
         lastAudioAt?: number;
+        lastSpeechAt?: number;
+        lastTranscriptAt?: number;
+        silenceStartedAt?: number;
     };
     turns: VoiceTurnRecord<TResult>[];
     committedTurnIds: string[];
@@ -132,7 +234,15 @@ export type VoiceSessionRecord<TMeta = Record<string, never>, TResult = unknown>
         attempts: number;
         lastDisconnectAt?: number;
     };
+    lastCommittedTurn?: {
+        signature: string;
+        text: string;
+        transcriptIds: string[];
+        committedAt: number;
+    };
+    call?: VoiceCallLifecycleState;
     metadata?: TMeta;
+    scenarioId?: string;
 };
 export type VoiceSessionSummary = {
     id: string;
@@ -141,6 +251,22 @@ export type VoiceSessionSummary = {
     status: VoiceSessionStatus;
     turnCount: number;
 };
+export type VoiceCallDisposition = 'completed' | 'transferred' | 'escalated' | 'voicemail' | 'no-answer' | 'failed' | 'closed';
+export type VoiceCallLifecycleEvent = {
+    at: number;
+    type: 'start' | 'end' | 'transfer' | 'escalation' | 'voicemail' | 'no-answer';
+    disposition?: VoiceCallDisposition;
+    metadata?: Record<string, unknown>;
+    reason?: string;
+    target?: string;
+};
+export type VoiceCallLifecycleState = {
+    disposition?: VoiceCallDisposition;
+    endedAt?: number;
+    events: VoiceCallLifecycleEvent[];
+    lastEventAt: number;
+    startedAt: number;
+};
 export type VoiceSessionStore<TSession extends VoiceSessionRecord = VoiceSessionRecord> = SessionStore<TSession, VoiceSessionSummary>;
 export type VoiceLogger = {
     debug?: (message: string, meta?: Record<string, unknown>) => void;
@@ -153,6 +279,59 @@ export type VoiceReconnectConfig = {
     timeout?: number;
     maxAttempts?: number;
 };
+export type VoiceRuntimePreset = 'default' | 'chat' | 'guided-intake' | 'dictation' | 'noisy-room' | 'pstn-balanced' | 'pstn-fast' | 'reliability';
+export type VoiceSTTLifecycle = 'continuous' | 'turn-scoped';
+export type VoiceTurnProfile = 'fast' | 'balanced' | 'long-form';
+export type VoiceTurnQualityProfile = 'general' | 'accent-heavy' | 'noisy-room' | 'short-command';
+export type VoiceTurnFallbackTrigger = 'empty-turn' | 'low-confidence' | 'empty-or-low-confidence' | 'always';
+export type VoiceSTTFallbackConfig = {
+    adapter: STTAdapter;
+    trigger?: VoiceTurnFallbackTrigger;
+    confidenceThreshold?: number;
+    minTextLength?: number;
+    replayWindowMs?: number;
+    settleMs?: number;
+    completionTimeoutMs?: number;
+    maxAttemptsPerTurn?: number;
+};
+export type VoiceResolvedSTTFallbackConfig = {
+    adapter: STTAdapter;
+    trigger: VoiceTurnFallbackTrigger;
+    confidenceThreshold: number;
+    minTextLength: number;
+    replayWindowMs: number;
+    settleMs: number;
+    completionTimeoutMs: number;
+    maxAttemptsPerTurn: number;
+};
+export type VoiceTurnDetectionConfig = {
+    profile?: VoiceTurnProfile;
+    qualityProfile?: VoiceTurnQualityProfile;
+    silenceMs?: number;
+    speechThreshold?: number;
+    transcriptStabilityMs?: number;
+};
+export type VoiceResolvedTurnDetectionConfig = {
+    qualityProfile: VoiceTurnQualityProfile;
+    profile: VoiceTurnProfile;
+    silenceMs: number;
+    speechThreshold: number;
+    transcriptStabilityMs: number;
+};
+export type VoiceAudioConditioningConfig = {
+    enabled?: boolean;
+    targetLevel?: number;
+    maxGain?: number;
+    noiseGateThreshold?: number;
+    noiseGateAttenuation?: number;
+};
+export type VoiceResolvedAudioConditioningConfig = {
+    enabled: true;
+    targetLevel: number;
+    maxGain: number;
+    noiseGateThreshold: number;
+    noiseGateAttenuation: number;
+};
 export type VoiceSocket = {
     send: (data: string | Uint8Array | ArrayBuffer) => void | Promise<void>;
     close: (code?: number, reason?: string) => void | Promise<void>;
@@ -164,7 +343,26 @@ export type VoiceSessionHandle<TContext = unknown, TSession extends VoiceSession
     commitTurn: (reason?: VoiceEndOfTurnEvent['reason']) => Promise<void>;
     disconnect: (event?: VoiceCloseEvent) => Promise<void>;
     complete: (result?: TResult) => Promise<void>;
+    escalate: (input: {
+        metadata?: Record<string, unknown>;
+        reason: string;
+        result?: TResult;
+    }) => Promise<void>;
     fail: (error: unknown) => Promise<void>;
+    markNoAnswer: (input?: {
+        metadata?: Record<string, unknown>;
+        result?: TResult;
+    }) => Promise<void>;
+    markVoicemail: (input?: {
+        metadata?: Record<string, unknown>;
+        result?: TResult;
+    }) => Promise<void>;
+    transfer: (input: {
+        metadata?: Record<string, unknown>;
+        reason?: string;
+        result?: TResult;
+        target: string;
+    }) => Promise<void>;
     close: (reason?: string) => Promise<void>;
     snapshot: () => Promise<TSession>;
 };
@@ -172,7 +370,48 @@ export type VoiceRouteResult<TResult = unknown> = {
     complete?: boolean;
     result?: TResult;
     assistantText?: string;
+    transfer?: {
+        metadata?: Record<string, unknown>;
+        reason?: string;
+        target: string;
+    };
+    escalate?: {
+        metadata?: Record<string, unknown>;
+        reason: string;
+    };
+    voicemail?: {
+        metadata?: Record<string, unknown>;
+    };
+    noAnswer?: {
+        metadata?: Record<string, unknown>;
+    };
 };
+export type VoiceTurnCorrectionResult = string | {
+    text: string;
+    reason?: string;
+    provider?: string;
+    metadata?: Record<string, unknown>;
+};
+export type VoiceTurnCorrectionHandler<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = (input: {
+    api: VoiceSessionHandle<TContext, TSession, TResult>;
+    context: TContext;
+    fallback?: VoiceFallbackDiagnostics;
+    lexicon: VoiceLexiconEntry[];
+    phraseHints: VoicePhraseHint[];
+    session: TSession;
+    text: string;
+    transcripts: Transcript[];
+}) => Promise<VoiceTurnCorrectionResult | void> | VoiceTurnCorrectionResult | void;
+export type VoicePhraseHintResolver<TContext = unknown> = (input: {
+    context: TContext;
+    scenarioId?: string;
+    sessionId: string;
+}) => Promise<VoicePhraseHint[] | void> | VoicePhraseHint[] | void;
+export type VoiceLexiconResolver<TContext = unknown> = (input: {
+    context: TContext;
+    scenarioId?: string;
+    sessionId: string;
+}) => Promise<VoiceLexiconEntry[] | void> | VoiceLexiconEntry[] | void;
 export type VoiceOnTurnObjectHandler<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = (input: {
     context: TContext;
     session: TSession;
@@ -181,11 +420,26 @@ export type VoiceOnTurnObjectHandler<TContext = unknown, TSession extends VoiceS
 }) => Promise<VoiceRouteResult<TResult> | void> | VoiceRouteResult<TResult> | void;
 export type VoiceOnTurnHandler<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = VoiceOnTurnObjectHandler<TContext, TSession, TResult> | ((session: TSession, turn: VoiceTurnRecord, api: VoiceSessionHandle<TContext, TSession, TResult>, context: TContext) => Promise<VoiceRouteResult<TResult> | void> | VoiceRouteResult<TResult> | void);
 export type VoiceRouteConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
+    onCallStart?: (input: {
+        context: TContext;
+        session: TSession;
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+    }) => Promise<void> | void;
+    onCallEnd?: (input: {
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+        context: TContext;
+        disposition: VoiceCallDisposition;
+        metadata?: Record<string, unknown>;
+        reason?: string;
+        session: TSession;
+        target?: string;
+    }) => Promise<void> | void;
     onSession?: (input: {
         context: TContext;
         session: TSession;
         api: VoiceSessionHandle<TContext, TSession, TResult>;
     }) => Promise<void> | void;
+    correctTurn?: VoiceTurnCorrectionHandler<TContext, TSession, TResult>;
     onTurn: VoiceOnTurnHandler<TContext, TSession, TResult>;
     onComplete: (input: {
         context: TContext;
@@ -199,34 +453,111 @@ export type VoiceRouteConfig<TContext = unknown, TSession extends VoiceSessionRe
         error: unknown;
         api?: VoiceSessionHandle<TContext, TSession, TResult>;
     }) => Promise<void> | void;
+    onEscalation?: (input: {
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+        context: TContext;
+        metadata?: Record<string, unknown>;
+        reason: string;
+        session: TSession;
+    }) => Promise<void> | void;
+    onNoAnswer?: (input: {
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+        context: TContext;
+        metadata?: Record<string, unknown>;
+        session: TSession;
+    }) => Promise<void> | void;
+    onTransfer?: (input: {
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+        context: TContext;
+        metadata?: Record<string, unknown>;
+        reason?: string;
+        session: TSession;
+        target: string;
+    }) => Promise<void> | void;
+    onVoicemail?: (input: {
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+        context: TContext;
+        metadata?: Record<string, unknown>;
+        session: TSession;
+    }) => Promise<void> | void;
+};
+export type VoiceRuntimeOpsConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
+    buildReview?: (input: {
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+        context: TContext;
+        disposition: VoiceCallDisposition;
+        metadata?: Record<string, unknown>;
+        reason?: string;
+        result?: TResult;
+        session: TSession;
+        target?: string;
+    }) => Promise<VoiceCallReviewArtifact | StoredVoiceCallReviewArtifact | void> | VoiceCallReviewArtifact | StoredVoiceCallReviewArtifact | void;
+    createTaskFromReview?: (input: {
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+        context: TContext;
+        disposition: VoiceCallDisposition;
+        review: StoredVoiceCallReviewArtifact;
+        session: TSession;
+    }) => Promise<Omit<VoiceOpsTask, 'id'> | VoiceOpsTask | StoredVoiceOpsTask | null | void> | Omit<VoiceOpsTask, 'id'> | VoiceOpsTask | StoredVoiceOpsTask | null | void;
+    events?: VoiceIntegrationEventStore;
+    onEvent?: (input: {
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+        context: TContext;
+        event: StoredVoiceIntegrationEvent;
+        session: TSession;
+    }) => Promise<void> | void;
+    reviews?: VoiceCallReviewStore;
+    tasks?: VoiceOpsTaskStore;
 };
 export type VoiceNormalizedRouteConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = Omit<VoiceRouteConfig<TContext, TSession, TResult>, 'onTurn'> & {
     onTurn: VoiceOnTurnObjectHandler<TContext, TSession, TResult>;
 };
+export type VoiceScenario = {
+    id: string;
+    name?: string;
+    description?: string;
+    metadata?: Record<string, unknown>;
+};
+export type VoiceExpectedSpeakerTurn = {
+    speaker: string;
+    text: string;
+};
 export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
+    costTelemetry?: VoiceCostTelemetryConfig<TContext, TSession, TResult>;
     path: string;
+    languageStrategy?: VoiceLanguageStrategy;
+    lexicon?: VoiceLexiconEntry[] | VoiceLexiconResolver<TContext>;
+    phraseHints?: VoicePhraseHint[] | VoicePhraseHintResolver<TContext>;
+    preset?: VoiceRuntimePreset;
     stt: STTAdapter;
+    sttFallback?: VoiceSTTFallbackConfig;
+    sttLifecycle?: VoiceSTTLifecycle;
     tts?: TTSAdapter;
     session: VoiceSessionStore<NoInfer<TSession>>;
     reconnect?: VoiceReconnectConfig;
-    turnDetection?: {
-        silenceMs?: number;
-        speechThreshold?: number;
-    };
+    turnDetection?: VoiceTurnDetectionConfig;
+    audioConditioning?: VoiceAudioConditioningConfig;
     logger?: VoiceLogger;
     htmx?: boolean | VoiceHTMXConfig<TSession, NoInfer<TResult>>;
+    ops?: VoiceRuntimeOpsConfig<TContext, TSession, TResult>;
 } & VoiceRouteConfig<TContext, TSession, TResult>;
 export type CreateVoiceSessionOptions<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
+    costTelemetry?: VoiceCostTelemetryConfig<TContext, TSession, TResult>;
     id: string;
     context: TContext;
     socket: VoiceSocket;
     stt: STTAdapter;
+    tts?: TTSAdapter;
+    languageStrategy?: VoiceLanguageStrategy;
+    lexicon?: VoiceLexiconEntry[];
+    sttFallback?: VoiceResolvedSTTFallbackConfig;
     store: VoiceSessionStore<TSession>;
     reconnect: Required<VoiceReconnectConfig>;
-    turnDetection: {
-        silenceMs: number;
-        speechThreshold: number;
-    };
+    phraseHints?: VoicePhraseHint[];
+    scenarioId?: string;
+    sttLifecycle: VoiceSTTLifecycle;
+    turnDetection: VoiceResolvedTurnDetectionConfig;
+    audioConditioning?: VoiceResolvedAudioConditioningConfig;
     route: VoiceNormalizedRouteConfig<TContext, TSession, TResult>;
     logger?: VoiceLogger;
 };
@@ -234,6 +565,7 @@ export type CreateVoiceSession = <TContext = unknown, TSession extends VoiceSess
 export type VoiceClientStartMessage = {
     type: 'start';
     sessionId?: string;
+    scenarioId?: string;
 };
 export type VoiceClientEndTurnMessage = {
     type: 'end_turn';
@@ -250,6 +582,7 @@ export type VoiceServerSessionMessage = {
     type: 'session';
     sessionId: string;
     status: VoiceSessionStatus;
+    scenarioId?: string;
 };
 export type VoiceServerPartialMessage = {
     type: 'partial';
@@ -268,6 +601,13 @@ export type VoiceServerAssistantMessage = {
     text: string;
     turnId?: string;
 };
+export type VoiceServerAudioMessage = {
+    type: 'audio';
+    chunkBase64: string;
+    format: AudioFormat;
+    receivedAt: number;
+    turnId?: string;
+};
 export type VoiceServerCompleteMessage = {
     type: 'complete';
     sessionId: string;
@@ -280,17 +620,54 @@ export type VoiceServerErrorMessage = {
 export type VoiceServerPongMessage = {
     type: 'pong';
 };
-export type VoiceServerMessage<TResult = unknown> = VoiceServerSessionMessage | VoiceServerPartialMessage | VoiceServerFinalMessage | VoiceServerTurnMessage<TResult> | VoiceServerAssistantMessage | VoiceServerCompleteMessage | VoiceServerErrorMessage | VoiceServerPongMessage;
+export type VoiceServerMessage<TResult = unknown> = VoiceServerSessionMessage | VoiceServerPartialMessage | VoiceServerFinalMessage | VoiceServerTurnMessage<TResult> | VoiceServerAssistantMessage | VoiceServerAudioMessage | VoiceServerCompleteMessage | VoiceServerErrorMessage | VoiceServerPongMessage;
 export type VoiceConnectionOptions = {
     protocols?: string[];
+    scenarioId?: string;
     reconnect?: boolean;
     maxReconnectAttempts?: number;
     pingInterval?: number;
     sessionId?: string;
 };
+export type VoiceCaptureOptions = {
+    channelCount?: 1 | 2;
+    onLevel?: (level: number) => void;
+    sampleRateHz?: number;
+};
+export type VoiceControllerOptions = {
+    preset?: VoiceRuntimePreset;
+    connection?: VoiceConnectionOptions;
+    capture?: VoiceCaptureOptions;
+    autoStopOnComplete?: boolean;
+};
+export type VoiceBargeInOptions = {
+    enabled?: boolean;
+    interruptOnPartial?: boolean;
+    interruptThreshold?: number;
+};
+export type VoiceAudioPlayerOptions = {
+    autoStart?: boolean;
+    createAudioContext?: () => AudioContext;
+    lookaheadMs?: number;
+};
+export type VoiceDuplexControllerOptions = VoiceControllerOptions & {
+    audioPlayer?: VoiceAudioPlayerOptions;
+    bargeIn?: VoiceBargeInOptions;
+};
+export type VoiceSTTRoutingGoal = 'best' | 'low-cost';
+export type VoiceSTTRoutingCorrectionMode = 'generic' | 'none' | 'risky-turn';
+export type VoiceSTTRoutingStrategy = {
+    benchmarkSessionTarget: 'deepgram-corrected' | 'deepgram-flux';
+    correctionMode: VoiceSTTRoutingCorrectionMode;
+    goal: VoiceSTTRoutingGoal;
+    notes: string[];
+    preset: VoiceRuntimePreset;
+    sttLifecycle: VoiceSTTLifecycle;
+};
 export type VoiceHTMXRenderInput<TResult = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord> = {
     assistantTexts: string[];
     partial: string;
+    scenarioId?: string;
     result?: TResult;
     session?: TSession;
     sessionId?: string;
@@ -322,15 +699,26 @@ export type VoiceHTMXOptions<TSession extends VoiceSessionRecord = VoiceSessionR
 export type VoiceHTMXConfig<TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = VoiceHTMXRenderer<TSession, TResult> | VoiceHTMXOptions<TSession, TResult>;
 export type VoiceStreamState<TResult = unknown> = {
     sessionId: string | null;
+    scenarioId: string | null;
     status: VoiceSessionStatus | 'idle';
     partial: string;
     turns: VoiceTurnRecord<TResult>[];
     assistantTexts: string[];
+    assistantAudio: Array<{
+        chunk: Uint8Array;
+        format: AudioFormat;
+        receivedAt: number;
+        turnId?: string;
+    }>;
     error: string | null;
     isConnected: boolean;
 };
 export type VoiceStream<TResult = unknown> = {
     close: () => void;
+    start: (input?: {
+        scenarioId?: string;
+        sessionId?: string;
+    }) => Promise<void>;
     endTurn: () => void;
     error: string | null;
     getServerSnapshot: () => VoiceStreamState<TResult>;
@@ -339,10 +727,92 @@ export type VoiceStream<TResult = unknown> = {
     partial: string;
     sendAudio: (audio: Uint8Array | ArrayBuffer) => void;
     sessionId: string | null;
+    scenarioId: string | null;
+    status: VoiceSessionStatus | 'idle';
+    subscribe: (subscriber: () => void) => () => void;
+    turns: VoiceTurnRecord<TResult>[];
+    assistantTexts: string[];
+    assistantAudio: Array<{
+        chunk: Uint8Array;
+        format: AudioFormat;
+        receivedAt: number;
+        turnId?: string;
+    }>;
+};
+export type VoiceControllerState<TResult = unknown> = VoiceStreamState<TResult> & {
+    isRecording: boolean;
+    recordingError: string | null;
+};
+export type VoiceAudioPlayerState = {
+    activeSourceCount: number;
+    error: string | null;
+    isActive: boolean;
+    isPlaying: boolean;
+    lastInterruptLatencyMs?: number;
+    lastPlaybackStopLatencyMs?: number;
+    processedChunkCount: number;
+    queuedChunkCount: number;
+};
+export type VoiceAudioPlayerSource = {
+    assistantAudio: VoiceStreamState['assistantAudio'];
+    subscribe: (subscriber: () => void) => () => void;
+};
+export type VoiceAudioPlayer = {
+    close: () => Promise<void>;
+    error: string | null;
+    getSnapshot: () => VoiceAudioPlayerState;
+    activeSourceCount: number;
+    isActive: boolean;
+    isPlaying: boolean;
+    interrupt: () => Promise<void>;
+    lastInterruptLatencyMs?: number;
+    lastPlaybackStopLatencyMs?: number;
+    pause: () => Promise<void>;
+    processedChunkCount: number;
+    queuedChunkCount: number;
+    start: () => Promise<void>;
+    subscribe: (subscriber: () => void) => () => void;
+};
+export type VoiceBargeInBinding = {
+    close: () => void;
+    handleLevel: (level: number) => void;
+    sendAudio: (audio: Uint8Array | ArrayBuffer) => void;
+};
+export type VoiceController<TResult = unknown> = {
+    bindHTMX: (options: VoiceHTMXBindingOptions) => () => void;
+    close: () => void;
+    endTurn: () => void;
+    start: (input?: {
+        scenarioId?: string;
+        sessionId?: string;
+    }) => Promise<void>;
+    error: string | null;
+    getServerSnapshot: () => VoiceControllerState<TResult>;
+    getSnapshot: () => VoiceControllerState<TResult>;
+    isConnected: boolean;
+    isRecording: boolean;
+    partial: string;
+    recordingError: string | null;
+    sendAudio: (audio: Uint8Array | ArrayBuffer) => void;
+    sessionId: string | null;
+    scenarioId: string | null;
+    startRecording: () => Promise<void>;
     status: VoiceSessionStatus | 'idle';
+    stopRecording: () => void;
     subscribe: (subscriber: () => void) => () => void;
+    toggleRecording: () => Promise<void>;
     turns: VoiceTurnRecord<TResult>[];
     assistantTexts: string[];
+    assistantAudio: Array<{
+        chunk: Uint8Array;
+        format: AudioFormat;
+        receivedAt: number;
+        turnId?: string;
+    }>;
+};
+export type VoiceDuplexController<TResult = unknown> = VoiceController<TResult> & {
+    audioPlayer: VoiceAudioPlayer;
+    interruptAssistant: () => Promise<void>;
 };
 export type VoiceHTMXBindingOptions = {
     element: Element | string;
@@ -353,6 +823,7 @@ export type VoiceHTMXBindingOptions = {
 export type VoiceStoreAction<TResult = unknown> = {
     type: 'session';
     sessionId: string;
+    scenarioId?: string;
     status: VoiceSessionStatus;
 } | {
     type: 'partial';
@@ -366,6 +837,12 @@ export type VoiceStoreAction<TResult = unknown> = {
 } | {
     type: 'assistant';
     text: string;
+} | {
+    type: 'audio';
+    chunk: Uint8Array;
+    format: AudioFormat;
+    receivedAt: number;
+    turnId?: string;
 } | {
     type: 'complete';
     sessionId: string;