@shvm/vani-client 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,196 @@
1
+ # `@shvm/vani-client`
2
+
3
+ Minimal, opinionated **headless voice agent client** for the web:
4
+
5
+ - A React hook (`useVoiceSession`) that manages:
6
+ - mic voice activity detection (VAD)
7
+ - websocket lifecycle
8
+ - audio streaming (client → server)
9
+ - transcript + state machine state
10
+ - server → client events (partial/final transcript, feedback, errors)
11
+ - A small **shared contract** module (types + websocket message schema) to keep client/server in lockstep.
12
+
13
+ This package also ships an optional UI layer at `@shvm/vani-client/ui` (used by the site). For now it intentionally relies on the host app’s Tailwind/CSS setup (no isolated CSS shipped yet).
14
+
15
+ ---
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ npm i @shvm/vani-client
21
+ ```
22
+
23
+ Peer dependency:
24
+ - `react` (this package targets modern React; currently developed against React 19)
25
+
26
+ ---
27
+
28
+ ## Quick start (headless)
29
+
30
+ ```tsx
31
+ import { useVoiceSession } from "@shvm/vani-client/headless";
32
+
33
+ export function VoiceWidget() {
34
+ const voice = useVoiceSession({
35
+ serverUrl: "https://your-app.com",
36
+ onMessage: (m) => console.log(m.role, m.content),
37
+ onError: (e) => console.error(e),
38
+ });
39
+
40
+ return (
41
+ <div>
42
+ <div>Status: {voice.status}</div>
43
+ <button onClick={voice.connect} disabled={voice.status !== "disconnected" && voice.status !== "error"}>
44
+ Connect
45
+ </button>
46
+ <button onClick={voice.cancel} disabled={voice.status !== "processing" && voice.status !== "speaking"}>
47
+ Cancel
48
+ </button>
49
+ <ul>
50
+ {voice.transcript.map((m) => (
51
+ <li key={m.id}>
52
+ <b>{m.role}:</b> {m.content}
53
+ </li>
54
+ ))}
55
+ </ul>
56
+ </div>
57
+ );
58
+ }
59
+ ```
60
+
61
+ If you omit `serverUrl`, the hook defaults to the current origin and connects to `ws(s)://<host>/ws/<sessionId>`.
62
+
63
+ ---
64
+
65
+ ## Server URL configuration
66
+
67
+ `useVoiceSession()` can build the websocket URL in 3 ways (in priority order):
68
+
69
+ ### 1) `getWebSocketUrl(sessionId)` (full override)
70
+
71
+ ```ts
72
+ useVoiceSession({
73
+ getWebSocketUrl: (sessionId) => `wss://voice.example.com/ws/${sessionId}`,
74
+ });
75
+ ```
76
+
77
+ ### 2) `serverUrl` + `wsPath(sessionId)`
78
+
79
+ ```ts
80
+ useVoiceSession({
81
+ serverUrl: "https://example.com",
82
+ wsPath: (sessionId) => `/ws/${sessionId}`,
83
+ });
84
+ ```
85
+
86
+ - `serverUrl` may be `https://…`, `http://…`, `wss://…`, or `ws://…`
87
+ - `https` → `wss`, `http` → `ws`
88
+ - Default `wsPath` is `/ws/${sessionId}`
89
+
90
+ ### 3) Default (current window location)
91
+
92
+ If neither override is provided:
93
+
94
+ - `wss://<host>/ws/<sessionId>` when on `https:`
95
+ - `ws://<host>/ws/<sessionId>` when on `http:`
96
+
97
+ ---
98
+
99
+ ## Client/server contract (websocket)
100
+
101
+ This package exports the contract types from `@shvm/vani-client/shared`.
102
+
103
+ ### Client → server JSON
104
+
105
+ ```ts
106
+ import type { ClientToServerJson } from "@shvm/vani-client/shared";
107
+ ```
108
+
109
+ Events:
110
+ - `{ type: "start"; config?: VoiceConfig }`
111
+ - `{ type: "stop" }`
112
+ - `{ type: "reset" }`
113
+ - `{ type: "text.message"; content: string }`
114
+
115
+ Audio is sent as **binary websocket messages** (the client currently sends WAV bytes for VAD end-of-speech).
116
+
117
+ ### Server → client JSON
118
+
119
+ ```ts
120
+ import type { ServerToClientJson } from "@shvm/vani-client/shared";
121
+ ```
122
+
123
+ Events:
124
+ - `{ type: "state"; value: SessionStatus }`
125
+ - `{ type: "transcript.final"; text: string }`
126
+ - `{ type: "assistant.message"; message: { role: "assistant"; content: string } }`
127
+ - `{ type: "assistant.partial"; text: string }`
128
+ - `{ type: "feedback"; message: string }`
129
+ - `{ type: "error"; reason: string }`
130
+
131
+ ---
132
+
133
+ ## Voice model configuration
134
+
135
+ The client sends a `VoiceConfig` as part of `{ type: "start" }`.
136
+
137
+ ```ts
138
+ import type { VoiceConfig } from "@shvm/vani-client/shared";
139
+ ```
140
+
141
+ The server is responsible for implementing STT/LLM/TTS using the config, and streaming back:
142
+ - transcript text
143
+ - assistant text (partial or final)
144
+ - assistant audio (binary websocket frames)
145
+
146
+ ---
147
+
148
+ ## How to run a server
149
+
150
+ This package is intentionally server-agnostic.
151
+
152
+ You need a websocket endpoint that:
153
+ 1. Accepts JSON control messages (start/stop/reset/text)
154
+ 2. Accepts binary audio frames
155
+ 3. Emits state + transcript + assistant messages
156
+ 4. Emits assistant audio as binary frames
157
+
158
+ ### Cloudflare Durable Object (reference)
159
+
160
+ This repo includes a working reference server implementation under:
161
+ - `src/vani/server/runtime/machine.ts`
162
+ - `src/vani/server/handlers/VoiceSessionDO.ts`
163
+
164
+ It exposes:
165
+ - `GET /ws/:sessionId` websocket upgrade → DO stub fetch
166
+
167
+ ---
168
+
169
+ ## What this package is (and is not)
170
+
171
+ **Is**
172
+ - A pragmatic, minimal headless voice client for a “voice chat” style agent
173
+ - Opinionated around websocket streaming and a small state machine
174
+ - Designed to keep a clean seam between UI and logic
175
+
176
+ **Is not**
177
+ - A full UI kit (yet)
178
+ - A general telephony/IVR SDK
179
+ - A full speech pipeline framework (you bring your server models)
180
+
181
+ ---
182
+
183
+ ## Roadmap
184
+
185
+ - Isolate UI styling (scoped + packaged CSS) and/or split UI into a separate package
186
+ - Improve config surface for:
187
+ - custom session ID strategy
188
+ - custom audio encoding/container
189
+ - optional token-level partials
190
+ - Add a non-React adapter (pure JS client) if needed
191
+
192
+ ---
193
+
194
+ ## License
195
+
196
+ MIT (see repository license).
@@ -0,0 +1,205 @@
1
+ import * as xstate from 'xstate';
2
+ import { j as VoiceStatus, e as SessionStatus, a as ClientMessage, V as VoiceConfig } from '../voice-BwU4C7fN.js';
3
+
4
+ declare function createBlobUrl(blob: Blob): string | undefined;
5
+
6
+ interface DebugEvent {
7
+ id: string;
8
+ type: "state_change" | "socket_event" | "audio_input" | "audio_output" | "transcript" | "llm_token" | "error";
9
+ timestamp: number;
10
+ details: unknown;
11
+ blobUrl?: string;
12
+ }
13
+ interface ClientContext {
14
+ status: VoiceStatus;
15
+ serverStatus: SessionStatus;
16
+ transcript: ClientMessage[];
17
+ history: DebugEvent[];
18
+ error: string | null;
19
+ isPlaying: boolean;
20
+ }
21
+ type ClientEvent = {
22
+ type: "CONNECT";
23
+ } | {
24
+ type: "DISCONNECT";
25
+ } | {
26
+ type: "CONNECTED";
27
+ } | {
28
+ type: "SET_ERROR";
29
+ error: string;
30
+ } | {
31
+ type: "SERVER_STATE_CHANGE";
32
+ status: SessionStatus;
33
+ } | {
34
+ type: "START_LISTENING";
35
+ } | {
36
+ type: "STOP_LISTENING";
37
+ } | {
38
+ type: "ADD_MESSAGE";
39
+ role: ClientMessage["role"];
40
+ content: string;
41
+ } | {
42
+ type: "AUDIO_PLAYBACK_START";
43
+ } | {
44
+ type: "AUDIO_PLAYBACK_END";
45
+ } | {
46
+ type: "LOG_EVENT";
47
+ eventType: DebugEvent["type"];
48
+ details: unknown;
49
+ blob?: Blob;
50
+ } | {
51
+ type: "TIMEOUT";
52
+ } | {
53
+ type: "CANCEL";
54
+ } | {
55
+ type: "TOOL_CALL_START";
56
+ toolName: string;
57
+ } | {
58
+ type: "TOOL_CALL_END";
59
+ toolName: string;
60
+ };
61
+ declare const clientMachine: xstate.StateMachine<ClientContext, {
62
+ type: "CONNECT";
63
+ } | {
64
+ type: "DISCONNECT";
65
+ } | {
66
+ type: "CONNECTED";
67
+ } | {
68
+ type: "SET_ERROR";
69
+ error: string;
70
+ } | {
71
+ type: "SERVER_STATE_CHANGE";
72
+ status: SessionStatus;
73
+ } | {
74
+ type: "START_LISTENING";
75
+ } | {
76
+ type: "STOP_LISTENING";
77
+ } | {
78
+ type: "ADD_MESSAGE";
79
+ role: ClientMessage["role"];
80
+ content: string;
81
+ } | {
82
+ type: "AUDIO_PLAYBACK_START";
83
+ } | {
84
+ type: "AUDIO_PLAYBACK_END";
85
+ } | {
86
+ type: "LOG_EVENT";
87
+ eventType: DebugEvent["type"];
88
+ details: unknown;
89
+ blob?: Blob;
90
+ } | {
91
+ type: "TIMEOUT";
92
+ } | {
93
+ type: "CANCEL";
94
+ } | {
95
+ type: "TOOL_CALL_START";
96
+ toolName: string;
97
+ } | {
98
+ type: "TOOL_CALL_END";
99
+ toolName: string;
100
+ }, {}, never, {
101
+ type: "setStatusConfig";
102
+ params: xstate.NonReducibleUnknown;
103
+ } | {
104
+ type: "setConnected";
105
+ params: xstate.NonReducibleUnknown;
106
+ } | {
107
+ type: "setDisconnected";
108
+ params: xstate.NonReducibleUnknown;
109
+ } | {
110
+ type: "setError";
111
+ params: xstate.NonReducibleUnknown;
112
+ } | {
113
+ type: "updateServerStatus";
114
+ params: xstate.NonReducibleUnknown;
115
+ } | {
116
+ type: "setPlaying";
117
+ params: xstate.NonReducibleUnknown;
118
+ } | {
119
+ type: "addMessage";
120
+ params: xstate.NonReducibleUnknown;
121
+ } | {
122
+ type: "logEvent";
123
+ params: xstate.NonReducibleUnknown;
124
+ } | {
125
+ type: "addToolCallStart";
126
+ params: xstate.NonReducibleUnknown;
127
+ } | {
128
+ type: "addToolCallEnd";
129
+ params: xstate.NonReducibleUnknown;
130
+ } | {
131
+ type: "clearError";
132
+ params: xstate.NonReducibleUnknown;
133
+ }, {
134
+ type: "isServerThinkingOrSpeaking";
135
+ params: unknown;
136
+ }, never, "disconnected" | "connecting" | "listening" | "speaking" | "error" | {
137
+ connected: "idle" | "processing";
138
+ }, string, xstate.NonReducibleUnknown, xstate.NonReducibleUnknown, xstate.EventObject, xstate.MetaObject, {
139
+ id: "client";
140
+ states: {
141
+ readonly disconnected: {};
142
+ readonly connecting: {};
143
+ readonly connected: {
144
+ states: {
145
+ readonly idle: {};
146
+ readonly processing: {};
147
+ };
148
+ };
149
+ readonly listening: {};
150
+ readonly speaking: {};
151
+ readonly error: {};
152
+ };
153
+ }>;
154
+
155
+ interface UseVoiceSessionProps {
156
+ onError?: (error: string) => void;
157
+ onMessage?: (msg: {
158
+ role: 'user' | 'assistant';
159
+ content: string;
160
+ }) => void;
161
+ onFeedback?: (message: string) => void;
162
+ initialTranscript?: ClientMessage[];
163
+ config?: VoiceConfig;
164
+ /**
165
+ * Base server URL used to construct the websocket URL.
166
+ * Examples:
167
+ * - "https://example.com"
168
+ * - "wss://example.com"
169
+ *
170
+ * Default: "https://shvm.in"
171
+ */
172
+ serverUrl?: string;
173
+ /**
174
+ * Full override for websocket URL construction. Takes precedence over `serverUrl`.
175
+ */
176
+ getWebSocketUrl?: (sessionId: string) => string;
177
+ /**
178
+ * Override session id for the websocket route (e.g. `/ws/:sessionId`).
179
+ * If omitted, a random one is generated once per hook instance.
180
+ */
181
+ sessionId?: string;
182
+ /**
183
+ * Customizes the websocket path appended to the server base URL.
184
+ * Default: `/ws/${sessionId}`
185
+ */
186
+ wsPath?: (sessionId: string) => string;
187
+ }
188
+ declare function useVoiceSession(props?: UseVoiceSessionProps): {
189
+ vadListening: boolean;
190
+ vadLoading: boolean;
191
+ vadErrored: string | false;
192
+ userSpeaking: boolean;
193
+ connect: () => void;
194
+ disconnect: () => void;
195
+ sendMessage: (text: string) => void;
196
+ cancel: () => void;
197
+ status: VoiceStatus;
198
+ serverStatus: SessionStatus;
199
+ transcript: ClientMessage[];
200
+ history: DebugEvent[];
201
+ error: string | null;
202
+ isPlaying: boolean;
203
+ };
204
+
205
+ export { type ClientContext, type ClientEvent, type DebugEvent, ClientMessage as Message, SessionStatus, type UseVoiceSessionProps, VoiceConfig, VoiceStatus, clientMachine, createBlobUrl, useVoiceSession };