npm - @craftedxp/voice-js - Versions diffs - 0.2.0 - Mend

@craftedxp/voice-js 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/node.d.ts ADDED Viewed

@@ -0,0 +1,270 @@
+type CallState = 'idle' | 'connecting' | 'listening' | 'user_speaking' | 'agent_speaking' | 'ended' | 'error';
+type TranscriptEntry = {
+    id: string;
+    role: 'user';
+    text: string;
+    committed: boolean;
+} | {
+    id: string;
+    role: 'agent';
+    text: string;
+    interrupted?: boolean;
+} | {
+    id: string;
+    role: 'tool';
+    text: string;
+} | {
+    id: string;
+    role: 'system';
+    text: string;
+};
+type CallErrorCode = 'missing_credentials' | 'forbidden' | 'mic_denied' | 'mic_start_failed' | 'audio_session_failed' | 'token_expired' | 'token_invalid' | 'unauthorized' | 'network_unreachable' | 'socket_error' | 'payment_required' | 'not_found' | 'silence_timeout' | 'server_error';
+interface CallError {
+    code: CallErrorCode;
+    message: string;
+}
+type CallEndReason = 'agent_ended' | 'user_hangup' | 'timeout' | 'error';
+interface CallEndEvent {
+    reason: CallEndReason;
+    errorCode?: CallErrorCode;
+    durationMs: number;
+}
+interface VolumeEvent {
+    input: number;
+    output: number;
+}
+type ServerMessage = Record<string, unknown> & {
+    type?: string;
+};
+interface ProtocolState {
+    state: CallState;
+    transcript: TranscriptEntry[];
+    agentBubbleId: string | null;
+    idCounter: number;
+    endReason: CallEndReason | null;
+}
+declare const createProtocolState: () => ProtocolState;
+interface ProtocolCallbacks {
+    onState: (next: CallState) => void;
+    onTranscript: (entries: TranscriptEntry[]) => void;
+    onError: (err: CallError) => void;
+    onInterrupt: () => void;
+    onAgentTurnStart: () => void;
+    onCallEnd: (reason: CallEndReason) => void;
+}
+declare function handleServerMessage(raw: string, state: ProtocolState, cb: ProtocolCallbacks): void;
+interface BuildWsUrlArgs {
+    apiBase: string;
+    agentId: string;
+    token: string;
+    bargeIn?: boolean;
+}
+declare function buildWsUrl(args: BuildWsUrlArgs): string;
+interface FetchTokenArgs {
+    /** The agent the SDK is about to call. */
+    agentId: string;
+    /**
+     * Optional consumer-side user identifier. Round-tripped to the server
+     * as `contactId` for Phase 11 contact memory. The SDK does not
+     * inspect this; your backend uses it to scope the token mint.
+     */
+    userId?: string;
+    /**
+     * Per-call structured context lowered into the agent's effective
+     * system prompt server-side at session open. Opaque to the SDK.
+     */
+    context?: Record<string, unknown>;
+    /**
+     * String key/value pairs round-tripped on the `call.ended` webhook.
+     * Capped at 1 KB total server-side. NOT lowered into the system prompt.
+     */
+    metadata?: Record<string, string>;
+}
+type FetchToken = (args: FetchTokenArgs) => Promise<string>;
+interface VoiceClientConfig {
+    /**
+     * Full HTTPS URL of the Voxline server. The WebSocket scheme is
+     * derived: `https` → `wss`, `http` → `ws`. No trailing slash needed.
+     */
+    apiBase: string;
+    /**
+     * Called by the SDK whenever it needs a fresh `ct_` token (initial
+     * connect; mid-call refresh on `token_expired`). Your implementation
+     * should hit YOUR backend, which holds the `sk_` API key and mints
+     * via `POST /v1/call-tokens` (or `client.callTokens.mint` from
+     * @craftedxp/sdk-node). Never embed `sk_` in JS code that ships to a
+     * client.
+     */
+    fetchToken: FetchToken;
+    /**
+     * Optional metadata applied to EVERY startCall. Per-call `metadata`
+     * in `startCall` is merged on top (per-call wins on key conflicts).
+     * Useful for dashboard-wide tags like `{ surface: 'web', appVersion }`.
+     */
+    defaultMetadata?: Record<string, string>;
+    /**
+     * Optional context applied to EVERY startCall. Per-call `context` in
+     * `startCall` is merged on top. Useful for cross-call invariants like
+     * the signed-in user's locale.
+     */
+    defaultContext?: Record<string, unknown>;
+}
+interface StartCallOptions {
+    /** The agent to call. */
+    agentId: string;
+    /** Per-call user identifier. Round-tripped to fetchToken as `userId`. */
+    userId?: string;
+    /**
+     * Per-call structured context. Merged on top of `defaultContext`
+     * configured at factory time.
+     */
+    context?: Record<string, unknown>;
+    /**
+     * Per-call metadata. Merged on top of `defaultMetadata` configured
+     * at factory time.
+     */
+    metadata?: Record<string, string>;
+    /**
+     * When false, the SDK + server stay full-duplex but barge-in is
+     * suppressed. Useful for alarm-style flows where the user shouldn't
+     * accidentally interrupt the script. Default true.
+     */
+    bargeIn?: boolean;
+    /**
+     * Test-only escape hatch — pass a pre-minted `ct_` directly and skip
+     * the `fetchToken` call. Don't use this in production code: tokens
+     * expire and the SDK can't re-mint without the callback.
+     */
+    token?: string;
+    onStateChange?: (state: CallState) => void;
+    onTranscript?: (entries: TranscriptEntry[]) => void;
+    onError?: (err: CallError) => void;
+    onEnd?: (end: CallEndEvent) => void;
+    /** Volume-meter event for VU UIs. ~10 Hz cadence (browser bundle only). */
+    onVolume?: (vol: VolumeEvent) => void;
+}
+interface Call {
+    /** Current state. Snapshot — subscribe via onStateChange for live updates. */
+    readonly state: CallState;
+    /** Full transcript so far. Snapshot — subscribe via onTranscript for live updates. */
+    readonly transcript: TranscriptEntry[];
+    /** True after `mute()` and before `unmute()`. */
+    readonly isMuted: boolean;
+    /** End the call locally. Closes the WS, stops the mic, fires onEnd. Idempotent. */
+    end: () => void;
+    /** Mute mic frames. Wire stays active so server endpointing doesn't false-positive. Idempotent. */
+    mute: () => void;
+    /** Unmute mic frames. Idempotent. */
+    unmute: () => void;
+}
+interface VoiceClientFactory {
+    /** Read back the resolved config (post trailing-slash normalisation). */
+    readonly config: VoiceClientConfig;
+    /**
+     * Open a fresh call. Returns when the WS is open; rejects on
+     * pre-flight failure (missing config, fetchToken throw, etc). Mid-
+     * call failures arrive via the per-call `onError` callback — they
+     * don't reject this promise.
+     */
+    startCall: (options: StartCallOptions) => Promise<Call>;
+}
+type RWSEvent = {
+    type: 'open';
+} | {
+    type: 'reconnected';
+} | {
+    type: 'message';
+    data: string | ArrayBuffer;
+} | {
+    type: 'close';
+    code: number;
+    reason: string;
+    permanent: boolean;
+} | {
+    type: 'error';
+    error: Error;
+};
+interface WebSocketLike {
+    binaryType: string;
+    readyState: number;
+    onopen: ((ev: unknown) => void) | null;
+    onmessage: ((ev: {
+        data: string | ArrayBuffer;
+    }) => void) | null;
+    onerror: ((ev: unknown) => void) | null;
+    onclose: ((ev: {
+        code: number;
+        reason: string;
+    }) => void) | null;
+    send: (data: string | ArrayBuffer | ArrayBufferView) => void;
+    close: (code?: number, reason?: string) => void;
+}
+type WebSocketFactory = (url: string) => WebSocketLike;
+interface RWSOptions {
+    url: string;
+    wsFactory: WebSocketFactory;
+    maxRetries?: number;
+    initialBackoffMs?: number;
+    maxBackoffMs?: number;
+}
+declare const createReconnectingWebSocket: (options: RWSOptions, onEvent: (ev: RWSEvent) => void) => {
+    send: (data: string | ArrayBuffer | ArrayBufferView) => void;
+    close: (code?: number, reason?: string) => void;
+    readyState: () => number;
+};
+type ReconnectingWebSocket = ReturnType<typeof createReconnectingWebSocket>;
+interface NodeStartCallOptions extends StartCallOptions {
+    /**
+     * Fires for each binary PCM frame the server pushes (Int16 LE mono
+     * @ 16 kHz — same as the browser playback path). Wire to your
+     * preferred output: write to a `sox -t raw -r 16000 -e signed -b 16
+     * -c 1 - default` subprocess, queue into PortAudio, relay over RTP,
+     * etc. If you don't supply this callback, agent audio is dropped on
+     * the floor.
+     */
+    onAudioChunk?: (pcm: ArrayBuffer) => void;
+}
+interface NodeCall extends Call {
+    /**
+     * Push one mic frame to the server. Expected: Int16 LE mono PCM @
+     * 16 kHz. Capture cadence ~100 ms / ~3.2 KB per frame is fine.
+     * Returns `false` if the WS isn't open yet (caller may want to
+     * back-pressure or drop).
+     */
+    sendAudioChunk: (pcm: ArrayBuffer | ArrayBufferView) => boolean;
+}
+/**
+ * One-time SDK setup for Node.js / Electron-main consumers. Returns a
+ * factory you call `startCall` on for every voice call. Same shape as
+ * the browser entry but the returned `Call` has an extra
+ * `sendAudioChunk` method for raw-PCM input, and `startCall` accepts
+ * an `onAudioChunk` callback for raw-PCM output.
+ *
+ * Example (vterm-style CLI, sox sub-process for I/O):
+ *
+ *   import { configureVoiceClient } from '@craftedxp/voice-js/node'
+ *   import { spawn } from 'child_process'
+ *
+ *   const voice = configureVoiceClient({
+ *     apiBase: 'https://api.your-server.com',
+ *     fetchToken: async () => mintFromMyBackend(),
+ *   })
+ *
+ *   const mic = spawn('sox', [...recArgs, '-r', '16000', '-c', '1', '-b', '16', '-e', 'signed', '-t', 'raw', '-'])
+ *   const spk = spawn('sox', ['-t', 'raw', '-r', '16000', '-c', '1', '-b', '16', '-e', 'signed', '-', ...playArgs])
+ *
+ *   const call = await voice.startCall({
+ *     agentId: 'agt_xxx',
+ *     onAudioChunk: (pcm) => spk.stdin.write(Buffer.from(pcm)),
+ *     onEnd: () => { mic.kill(); spk.stdin.end() },
+ *   })
+ *
+ *   mic.stdout.on('data', (chunk) => call.sendAudioChunk(chunk))
+ */
+declare function configureVoiceClient(config: VoiceClientConfig): VoiceClientFactory;
+export { type Call, type CallEndEvent, type CallEndReason, type CallError, type CallErrorCode, type CallState, type FetchToken, type FetchTokenArgs, type NodeCall, type NodeStartCallOptions, type ProtocolCallbacks, type ProtocolState, type RWSEvent, type RWSOptions, type ReconnectingWebSocket, type ServerMessage, type StartCallOptions, type TranscriptEntry, type VoiceClientConfig, type VoiceClientFactory, type VolumeEvent, type WebSocketFactory, type WebSocketLike, buildWsUrl, configureVoiceClient, createProtocolState, createReconnectingWebSocket, handleServerMessage };