npm - @absolutejs/voice - Versions diffs - 0.0.21 → 0.0.22-beta.1 - Mend

@absolutejs/voice 0.0.21 → 0.0.22-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

package/README.md +1046 -2
package/dist/agent.d.ts +113 -0
package/dist/angular/index.js +90 -0
package/dist/angular/voice-controller.service.d.ts +6 -0
package/dist/angular/voice-stream.service.d.ts +6 -0
package/dist/client/actions.d.ts +41 -0
package/dist/client/audioPlayer.d.ts +40 -0
package/dist/client/duplex.d.ts +3 -0
package/dist/client/htmxBootstrap.js +84 -0
package/dist/client/index.d.ts +2 -0
package/dist/client/index.js +507 -5
package/dist/correction.d.ts +18 -1
package/dist/fileStore.d.ts +37 -0
package/dist/index.d.ts +32 -1
package/dist/index.js +8379 -1245
package/dist/ops.d.ts +327 -0
package/dist/opsPresets.d.ts +19 -0
package/dist/opsRuntime.d.ts +66 -0
package/dist/opsSinks.d.ts +149 -0
package/dist/outcomeRecipes.d.ts +18 -0
package/dist/postgresStore.d.ts +31 -0
package/dist/queue.d.ts +276 -0
package/dist/react/index.js +86 -0
package/dist/react/useVoiceController.d.ts +6 -0
package/dist/react/useVoiceStream.d.ts +6 -0
package/dist/routing.d.ts +3 -0
package/dist/runtimeOps.d.ts +23 -0
package/dist/s3Store.d.ts +14 -0
package/dist/sqliteStore.d.ts +26 -0
package/dist/svelte/index.js +84 -0
package/dist/telephony/response.d.ts +7 -0
package/dist/telephony/twilio.d.ts +116 -0
package/dist/testing/benchmark.d.ts +59 -4
package/dist/testing/corrected.d.ts +41 -0
package/dist/testing/duplex.d.ts +59 -0
package/dist/testing/fixtures.d.ts +18 -2
package/dist/testing/index.d.ts +5 -0
package/dist/testing/index.js +5094 -284
package/dist/testing/review.d.ts +143 -0
package/dist/testing/sessionBenchmark.d.ts +25 -0
package/dist/testing/stt.d.ts +2 -1
package/dist/testing/telephony.d.ts +70 -0
package/dist/testing/tts.d.ts +73 -0
package/dist/trace.d.ts +236 -0
package/dist/types.d.ts +320 -3
package/dist/vue/index.js +90 -0
package/dist/vue/useVoiceController.d.ts +11 -0
package/dist/vue/useVoiceStream.d.ts +11 -0
package/package.json +115 -1

package/dist/types.d.ts CHANGED Viewed

@@ -1,23 +1,56 @@
 import type { SessionStore } from '@absolutejs/absolute';
+import type { VoiceOpsDispositionTaskPolicies, VoiceOpsTaskAssignmentRule, VoiceOpsTaskAssignmentRules, VoiceIntegrationWebhookConfig, StoredVoiceIntegrationEvent, StoredVoiceOpsTask, VoiceIntegrationEventStore, VoiceOpsTaskPolicy, VoiceOpsTask, VoiceOpsTaskStore } from './ops';
+import type { VoiceIntegrationSink } from './opsSinks';
+import type { StoredVoiceCallReviewArtifact, VoiceCallReviewArtifact, VoiceCallReviewStore } from './testing/review';
+import type { VoiceTraceEventStore } from './trace';
 export type AudioFormat = {
     container: 'raw';
-    encoding: 'pcm_s16le';
+    encoding: 'alaw' | 'mulaw' | 'pcm_s16le';
     sampleRateHz: number;
     channels: 1 | 2;
 };
 export type AudioChunk = ArrayBuffer | ArrayBufferView;
+export type VoiceLanguageStrategy = {
+    mode: 'auto-detect';
+    allowedLanguages?: string[];
+} | {
+    mode: 'fixed';
+    primaryLanguage: string;
+    secondaryLanguages?: string[];
+} | {
+    mode: 'allow-switching';
+    primaryLanguage?: string;
+    secondaryLanguages: string[];
+};
 export type VoicePhraseHint = {
     text: string;
     aliases?: string[];
     boost?: number;
     metadata?: Record<string, unknown>;
 };
+export type VoiceCorrectionRiskTier = 'safe' | 'balanced' | 'risky';
+export type VoiceDomainTerm = {
+    text: string;
+    aliases?: string[];
+    boost?: number;
+    language?: string;
+    metadata?: Record<string, unknown>;
+    pronunciation?: string;
+};
+export type VoiceLexiconEntry = {
+    text: string;
+    aliases?: string[];
+    language?: string;
+    metadata?: Record<string, unknown>;
+    pronunciation?: string;
+};
 export type Transcript = {
     id: string;
     text: string;
     isFinal: boolean;
     confidence?: number;
     language?: string;
+    speaker?: string | number;
     startedAtMs?: number;
     endedAtMs?: number;
     vendor?: string;
@@ -26,6 +59,7 @@ export type VoiceTranscriptQuality = {
     averageConfidence?: number;
     confidenceSampleCount: number;
     correction?: VoiceTurnCorrectionDiagnostics;
+    cost?: VoiceTurnCostEstimate;
     fallbackUsed: boolean;
     finalTranscriptCount: number;
     fallback?: VoiceFallbackDiagnostics;
@@ -42,6 +76,13 @@ export type VoiceTurnCorrectionDiagnostics = {
     provider?: string;
     reason?: string;
 };
+export type VoiceTurnCostEstimate = {
+    estimatedRelativeCostUnits: number;
+    fallbackAttemptCount: number;
+    fallbackReplayAudioMs: number;
+    primaryAudioMs: number;
+    totalBillableAudioMs: number;
+};
 export type VoiceFallbackSelectionReason = 'fallback-empty' | 'primary-empty' | 'word-count-margin' | 'confidence-margin' | 'word-count-tiebreak' | 'kept-primary';
 export type VoiceFallbackDiagnostics = {
     attempted: boolean;
@@ -97,6 +138,8 @@ export type STTAdapterSession = {
 export type STTAdapterOpenOptions = {
     sessionId: string;
     format: AudioFormat;
+    languageStrategy?: VoiceLanguageStrategy;
+    lexicon?: VoiceLexiconEntry[];
     phraseHints?: VoicePhraseHint[];
     signal?: AbortSignal;
 };
@@ -122,6 +165,7 @@ export type TTSAdapterSession = {
 };
 export type TTSAdapterOpenOptions = {
     sessionId: string;
+    lexicon?: VoiceLexiconEntry[];
     signal?: AbortSignal;
 };
 export type TTSAdapter<TOptions extends TTSAdapterOpenOptions = TTSAdapterOpenOptions> = {
@@ -139,6 +183,8 @@ export type RealtimeAdapterSession = {
 export type RealtimeAdapterOpenOptions = {
     sessionId: string;
     format: AudioFormat;
+    languageStrategy?: VoiceLanguageStrategy;
+    lexicon?: VoiceLexiconEntry[];
     phraseHints?: VoicePhraseHint[];
     signal?: AbortSignal;
 };
@@ -156,6 +202,17 @@ export type VoiceTurnRecord<TResult = unknown> = {
     committedAt: number;
     result?: TResult;
 };
+export type VoiceCostTelemetryConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
+    fallbackPassCostUnit?: number;
+    onTurnCost?: (input: {
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+        context: TContext;
+        estimate: VoiceTurnCostEstimate;
+        session: TSession;
+        turn: VoiceTurnRecord<TResult>;
+    }) => Promise<void> | void;
+    primaryPassCostUnit?: number;
+};
 export type VoiceSessionRecord<TMeta = Record<string, never>, TResult = unknown> = {
     id: string;
     createdAt: number;
@@ -185,6 +242,7 @@ export type VoiceSessionRecord<TMeta = Record<string, never>, TResult = unknown>
         transcriptIds: string[];
         committedAt: number;
     };
+    call?: VoiceCallLifecycleState;
     metadata?: TMeta;
     scenarioId?: string;
 };
@@ -195,6 +253,22 @@ export type VoiceSessionSummary = {
     status: VoiceSessionStatus;
     turnCount: number;
 };
+export type VoiceCallDisposition = 'completed' | 'transferred' | 'escalated' | 'voicemail' | 'no-answer' | 'failed' | 'closed';
+export type VoiceCallLifecycleEvent = {
+    at: number;
+    type: 'start' | 'end' | 'transfer' | 'escalation' | 'voicemail' | 'no-answer';
+    disposition?: VoiceCallDisposition;
+    metadata?: Record<string, unknown>;
+    reason?: string;
+    target?: string;
+};
+export type VoiceCallLifecycleState = {
+    disposition?: VoiceCallDisposition;
+    endedAt?: number;
+    events: VoiceCallLifecycleEvent[];
+    lastEventAt: number;
+    startedAt: number;
+};
 export type VoiceSessionStore<TSession extends VoiceSessionRecord = VoiceSessionRecord> = SessionStore<TSession, VoiceSessionSummary>;
 export type VoiceLogger = {
     debug?: (message: string, meta?: Record<string, unknown>) => void;
@@ -207,7 +281,7 @@ export type VoiceReconnectConfig = {
     timeout?: number;
     maxAttempts?: number;
 };
-export type VoiceRuntimePreset = 'default' | 'chat' | 'guided-intake' | 'dictation' | 'noisy-room' | 'reliability';
+export type VoiceRuntimePreset = 'default' | 'chat' | 'guided-intake' | 'dictation' | 'noisy-room' | 'pstn-balanced' | 'pstn-fast' | 'reliability';
 export type VoiceSTTLifecycle = 'continuous' | 'turn-scoped';
 export type VoiceTurnProfile = 'fast' | 'balanced' | 'long-form';
 export type VoiceTurnQualityProfile = 'general' | 'accent-heavy' | 'noisy-room' | 'short-command';
@@ -271,7 +345,26 @@ export type VoiceSessionHandle<TContext = unknown, TSession extends VoiceSession
     commitTurn: (reason?: VoiceEndOfTurnEvent['reason']) => Promise<void>;
     disconnect: (event?: VoiceCloseEvent) => Promise<void>;
     complete: (result?: TResult) => Promise<void>;
+    escalate: (input: {
+        metadata?: Record<string, unknown>;
+        reason: string;
+        result?: TResult;
+    }) => Promise<void>;
     fail: (error: unknown) => Promise<void>;
+    markNoAnswer: (input?: {
+        metadata?: Record<string, unknown>;
+        result?: TResult;
+    }) => Promise<void>;
+    markVoicemail: (input?: {
+        metadata?: Record<string, unknown>;
+        result?: TResult;
+    }) => Promise<void>;
+    transfer: (input: {
+        metadata?: Record<string, unknown>;
+        reason?: string;
+        result?: TResult;
+        target: string;
+    }) => Promise<void>;
     close: (reason?: string) => Promise<void>;
     snapshot: () => Promise<TSession>;
 };
@@ -279,6 +372,21 @@ export type VoiceRouteResult<TResult = unknown> = {
     complete?: boolean;
     result?: TResult;
     assistantText?: string;
+    transfer?: {
+        metadata?: Record<string, unknown>;
+        reason?: string;
+        target: string;
+    };
+    escalate?: {
+        metadata?: Record<string, unknown>;
+        reason: string;
+    };
+    voicemail?: {
+        metadata?: Record<string, unknown>;
+    };
+    noAnswer?: {
+        metadata?: Record<string, unknown>;
+    };
 };
 export type VoiceTurnCorrectionResult = string | {
     text: string;
@@ -290,6 +398,7 @@ export type VoiceTurnCorrectionHandler<TContext = unknown, TSession extends Voic
     api: VoiceSessionHandle<TContext, TSession, TResult>;
     context: TContext;
     fallback?: VoiceFallbackDiagnostics;
+    lexicon: VoiceLexiconEntry[];
     phraseHints: VoicePhraseHint[];
     session: TSession;
     text: string;
@@ -300,6 +409,11 @@ export type VoicePhraseHintResolver<TContext = unknown> = (input: {
     scenarioId?: string;
     sessionId: string;
 }) => Promise<VoicePhraseHint[] | void> | VoicePhraseHint[] | void;
+export type VoiceLexiconResolver<TContext = unknown> = (input: {
+    context: TContext;
+    scenarioId?: string;
+    sessionId: string;
+}) => Promise<VoiceLexiconEntry[] | void> | VoiceLexiconEntry[] | void;
 export type VoiceOnTurnObjectHandler<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = (input: {
     context: TContext;
     session: TSession;
@@ -308,6 +422,20 @@ export type VoiceOnTurnObjectHandler<TContext = unknown, TSession extends VoiceS
 }) => Promise<VoiceRouteResult<TResult> | void> | VoiceRouteResult<TResult> | void;
 export type VoiceOnTurnHandler<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = VoiceOnTurnObjectHandler<TContext, TSession, TResult> | ((session: TSession, turn: VoiceTurnRecord, api: VoiceSessionHandle<TContext, TSession, TResult>, context: TContext) => Promise<VoiceRouteResult<TResult> | void> | VoiceRouteResult<TResult> | void);
 export type VoiceRouteConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
+    onCallStart?: (input: {
+        context: TContext;
+        session: TSession;
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+    }) => Promise<void> | void;
+    onCallEnd?: (input: {
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+        context: TContext;
+        disposition: VoiceCallDisposition;
+        metadata?: Record<string, unknown>;
+        reason?: string;
+        session: TSession;
+        target?: string;
+    }) => Promise<void> | void;
     onSession?: (input: {
         context: TContext;
         session: TSession;
@@ -327,6 +455,87 @@ export type VoiceRouteConfig<TContext = unknown, TSession extends VoiceSessionRe
         error: unknown;
         api?: VoiceSessionHandle<TContext, TSession, TResult>;
     }) => Promise<void> | void;
+    onEscalation?: (input: {
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+        context: TContext;
+        metadata?: Record<string, unknown>;
+        reason: string;
+        session: TSession;
+    }) => Promise<void> | void;
+    onNoAnswer?: (input: {
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+        context: TContext;
+        metadata?: Record<string, unknown>;
+        session: TSession;
+    }) => Promise<void> | void;
+    onTransfer?: (input: {
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+        context: TContext;
+        metadata?: Record<string, unknown>;
+        reason?: string;
+        session: TSession;
+        target: string;
+    }) => Promise<void> | void;
+    onVoicemail?: (input: {
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+        context: TContext;
+        metadata?: Record<string, unknown>;
+        session: TSession;
+    }) => Promise<void> | void;
+};
+export type VoiceRuntimeOpsConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
+    buildReview?: (input: {
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+        context: TContext;
+        disposition: VoiceCallDisposition;
+        metadata?: Record<string, unknown>;
+        reason?: string;
+        result?: TResult;
+        session: TSession;
+        target?: string;
+    }) => Promise<VoiceCallReviewArtifact | StoredVoiceCallReviewArtifact | void> | VoiceCallReviewArtifact | StoredVoiceCallReviewArtifact | void;
+    createTaskFromReview?: (input: {
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+        context: TContext;
+        disposition: VoiceCallDisposition;
+        review: StoredVoiceCallReviewArtifact;
+        session: TSession;
+    }) => Promise<Omit<VoiceOpsTask, 'id'> | VoiceOpsTask | StoredVoiceOpsTask | null | void> | Omit<VoiceOpsTask, 'id'> | VoiceOpsTask | StoredVoiceOpsTask | null | void;
+    resolveTaskPolicy?: (input: {
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+        context: TContext;
+        disposition: VoiceCallDisposition;
+        metadata?: Record<string, unknown>;
+        reason?: string;
+        review?: StoredVoiceCallReviewArtifact;
+        session: TSession;
+        target?: string;
+        task: StoredVoiceOpsTask;
+    }) => Promise<VoiceOpsTaskPolicy | void> | VoiceOpsTaskPolicy | void;
+    resolveTaskAssignment?: (input: {
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+        context: TContext;
+        disposition: VoiceCallDisposition;
+        metadata?: Record<string, unknown>;
+        reason?: string;
+        review?: StoredVoiceCallReviewArtifact;
+        session: TSession;
+        target?: string;
+        task: StoredVoiceOpsTask;
+    }) => Promise<VoiceOpsTaskAssignmentRule | void> | VoiceOpsTaskAssignmentRule | void;
+    taskAssignmentRules?: VoiceOpsTaskAssignmentRules;
+    taskPolicies?: VoiceOpsDispositionTaskPolicies;
+    events?: VoiceIntegrationEventStore;
+    onEvent?: (input: {
+        api: VoiceSessionHandle<TContext, TSession, TResult>;
+        context: TContext;
+        event: StoredVoiceIntegrationEvent;
+        session: TSession;
+    }) => Promise<void> | void;
+    reviews?: VoiceCallReviewStore;
+    sinks?: VoiceIntegrationSink[];
+    tasks?: VoiceOpsTaskStore;
+    webhook?: VoiceIntegrationWebhookConfig;
 };
 export type VoiceNormalizedRouteConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = Omit<VoiceRouteConfig<TContext, TSession, TResult>, 'onTurn'> & {
     onTurn: VoiceOnTurnObjectHandler<TContext, TSession, TResult>;
@@ -337,8 +546,15 @@ export type VoiceScenario = {
     description?: string;
     metadata?: Record<string, unknown>;
 };
+export type VoiceExpectedSpeakerTurn = {
+    speaker: string;
+    text: string;
+};
 export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
+    costTelemetry?: VoiceCostTelemetryConfig<TContext, TSession, TResult>;
     path: string;
+    languageStrategy?: VoiceLanguageStrategy;
+    lexicon?: VoiceLexiconEntry[] | VoiceLexiconResolver<TContext>;
     phraseHints?: VoicePhraseHint[] | VoicePhraseHintResolver<TContext>;
     preset?: VoiceRuntimePreset;
     stt: STTAdapter;
@@ -351,14 +567,21 @@ export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionR
     audioConditioning?: VoiceAudioConditioningConfig;
     logger?: VoiceLogger;
     htmx?: boolean | VoiceHTMXConfig<TSession, NoInfer<TResult>>;
+    ops?: VoiceRuntimeOpsConfig<TContext, TSession, TResult>;
+    trace?: VoiceTraceEventStore;
 } & VoiceRouteConfig<TContext, TSession, TResult>;
 export type CreateVoiceSessionOptions<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
+    costTelemetry?: VoiceCostTelemetryConfig<TContext, TSession, TResult>;
     id: string;
     context: TContext;
     socket: VoiceSocket;
     stt: STTAdapter;
+    tts?: TTSAdapter;
+    languageStrategy?: VoiceLanguageStrategy;
+    lexicon?: VoiceLexiconEntry[];
     sttFallback?: VoiceResolvedSTTFallbackConfig;
     store: VoiceSessionStore<TSession>;
+    trace?: VoiceTraceEventStore;
     reconnect: Required<VoiceReconnectConfig>;
     phraseHints?: VoicePhraseHint[];
     scenarioId?: string;
@@ -408,6 +631,13 @@ export type VoiceServerAssistantMessage = {
     text: string;
     turnId?: string;
 };
+export type VoiceServerAudioMessage = {
+    type: 'audio';
+    chunkBase64: string;
+    format: AudioFormat;
+    receivedAt: number;
+    turnId?: string;
+};
 export type VoiceServerCompleteMessage = {
     type: 'complete';
     sessionId: string;
@@ -420,7 +650,7 @@ export type VoiceServerErrorMessage = {
 export type VoiceServerPongMessage = {
     type: 'pong';
 };
-export type VoiceServerMessage<TResult = unknown> = VoiceServerSessionMessage | VoiceServerPartialMessage | VoiceServerFinalMessage | VoiceServerTurnMessage<TResult> | VoiceServerAssistantMessage | VoiceServerCompleteMessage | VoiceServerErrorMessage | VoiceServerPongMessage;
+export type VoiceServerMessage<TResult = unknown> = VoiceServerSessionMessage | VoiceServerPartialMessage | VoiceServerFinalMessage | VoiceServerTurnMessage<TResult> | VoiceServerAssistantMessage | VoiceServerAudioMessage | VoiceServerCompleteMessage | VoiceServerErrorMessage | VoiceServerPongMessage;
 export type VoiceConnectionOptions = {
     protocols?: string[];
     scenarioId?: string;
@@ -440,6 +670,30 @@ export type VoiceControllerOptions = {
     capture?: VoiceCaptureOptions;
     autoStopOnComplete?: boolean;
 };
+export type VoiceBargeInOptions = {
+    enabled?: boolean;
+    interruptOnPartial?: boolean;
+    interruptThreshold?: number;
+};
+export type VoiceAudioPlayerOptions = {
+    autoStart?: boolean;
+    createAudioContext?: () => AudioContext;
+    lookaheadMs?: number;
+};
+export type VoiceDuplexControllerOptions = VoiceControllerOptions & {
+    audioPlayer?: VoiceAudioPlayerOptions;
+    bargeIn?: VoiceBargeInOptions;
+};
+export type VoiceSTTRoutingGoal = 'best' | 'low-cost';
+export type VoiceSTTRoutingCorrectionMode = 'generic' | 'none' | 'risky-turn';
+export type VoiceSTTRoutingStrategy = {
+    benchmarkSessionTarget: 'deepgram-corrected' | 'deepgram-flux';
+    correctionMode: VoiceSTTRoutingCorrectionMode;
+    goal: VoiceSTTRoutingGoal;
+    notes: string[];
+    preset: VoiceRuntimePreset;
+    sttLifecycle: VoiceSTTLifecycle;
+};
 export type VoiceHTMXRenderInput<TResult = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord> = {
     assistantTexts: string[];
     partial: string;
@@ -480,6 +734,12 @@ export type VoiceStreamState<TResult = unknown> = {
     partial: string;
     turns: VoiceTurnRecord<TResult>[];
     assistantTexts: string[];
+    assistantAudio: Array<{
+        chunk: Uint8Array;
+        format: AudioFormat;
+        receivedAt: number;
+        turnId?: string;
+    }>;
     error: string | null;
     isConnected: boolean;
 };
@@ -502,11 +762,52 @@ export type VoiceStream<TResult = unknown> = {
     subscribe: (subscriber: () => void) => () => void;
     turns: VoiceTurnRecord<TResult>[];
     assistantTexts: string[];
+    assistantAudio: Array<{
+        chunk: Uint8Array;
+        format: AudioFormat;
+        receivedAt: number;
+        turnId?: string;
+    }>;
 };
 export type VoiceControllerState<TResult = unknown> = VoiceStreamState<TResult> & {
     isRecording: boolean;
     recordingError: string | null;
 };
+export type VoiceAudioPlayerState = {
+    activeSourceCount: number;
+    error: string | null;
+    isActive: boolean;
+    isPlaying: boolean;
+    lastInterruptLatencyMs?: number;
+    lastPlaybackStopLatencyMs?: number;
+    processedChunkCount: number;
+    queuedChunkCount: number;
+};
+export type VoiceAudioPlayerSource = {
+    assistantAudio: VoiceStreamState['assistantAudio'];
+    subscribe: (subscriber: () => void) => () => void;
+};
+export type VoiceAudioPlayer = {
+    close: () => Promise<void>;
+    error: string | null;
+    getSnapshot: () => VoiceAudioPlayerState;
+    activeSourceCount: number;
+    isActive: boolean;
+    isPlaying: boolean;
+    interrupt: () => Promise<void>;
+    lastInterruptLatencyMs?: number;
+    lastPlaybackStopLatencyMs?: number;
+    pause: () => Promise<void>;
+    processedChunkCount: number;
+    queuedChunkCount: number;
+    start: () => Promise<void>;
+    subscribe: (subscriber: () => void) => () => void;
+};
+export type VoiceBargeInBinding = {
+    close: () => void;
+    handleLevel: (level: number) => void;
+    sendAudio: (audio: Uint8Array | ArrayBuffer) => void;
+};
 export type VoiceController<TResult = unknown> = {
     bindHTMX: (options: VoiceHTMXBindingOptions) => () => void;
     close: () => void;
@@ -532,6 +833,16 @@ export type VoiceController<TResult = unknown> = {
     toggleRecording: () => Promise<void>;
     turns: VoiceTurnRecord<TResult>[];
     assistantTexts: string[];
+    assistantAudio: Array<{
+        chunk: Uint8Array;
+        format: AudioFormat;
+        receivedAt: number;
+        turnId?: string;
+    }>;
+};
+export type VoiceDuplexController<TResult = unknown> = VoiceController<TResult> & {
+    audioPlayer: VoiceAudioPlayer;
+    interruptAssistant: () => Promise<void>;
 };
 export type VoiceHTMXBindingOptions = {
     element: Element | string;
@@ -556,6 +867,12 @@ export type VoiceStoreAction<TResult = unknown> = {
 } | {
     type: 'assistant';
     text: string;
+} | {
+    type: 'audio';
+    chunk: Uint8Array;
+    format: AudioFormat;
+    receivedAt: number;
+    turnId?: string;
 } | {
     type: 'complete';
     sessionId: string;