@shvm/vani-client 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +196 -0
- package/dist/headless/index.d.ts +205 -0
- package/dist/headless/index.js +624 -0
- package/dist/headless/index.js.map +1 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +651 -0
- package/dist/index.js.map +1 -0
- package/dist/shared/index.d.ts +49 -0
- package/dist/shared/index.js +30 -0
- package/dist/shared/index.js.map +1 -0
- package/dist/ui/index.d.ts +39 -0
- package/dist/ui/index.js +559 -0
- package/dist/ui/index.js.map +1 -0
- package/dist/voice-BwU4C7fN.d.ts +51 -0
- package/package.json +68 -0
package/README.md
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
# `@shvm/vani-client`
|
|
2
|
+
|
|
3
|
+
Minimal, opinionated **headless voice agent client** for the web:
|
|
4
|
+
|
|
5
|
+
- A React hook (`useVoiceSession`) that manages:
|
|
6
|
+
- mic voice activity detection (VAD)
|
|
7
|
+
- websocket lifecycle
|
|
8
|
+
- audio streaming (client → server)
|
|
9
|
+
- transcript + state machine state
|
|
10
|
+
- server → client events (partial/final transcript, feedback, errors)
|
|
11
|
+
- A small **shared contract** module (types + websocket message schema) to keep client/server in lockstep.
|
|
12
|
+
|
|
13
|
+
This package also ships an optional UI layer at `@shvm/vani-client/ui` (used by the site). For now it intentionally relies on the host app’s Tailwind/CSS setup (no isolated CSS shipped yet).
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
npm i @shvm/vani-client
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Peer dependency:
|
|
24
|
+
- `react` (this package targets modern React; currently developed against React 19)
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Quick start (headless)
|
|
29
|
+
|
|
30
|
+
```tsx
|
|
31
|
+
import { useVoiceSession } from "@shvm/vani-client/headless";
|
|
32
|
+
|
|
33
|
+
export function VoiceWidget() {
|
|
34
|
+
const voice = useVoiceSession({
|
|
35
|
+
serverUrl: "https://your-app.com",
|
|
36
|
+
onMessage: (m) => console.log(m.role, m.content),
|
|
37
|
+
onError: (e) => console.error(e),
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
return (
|
|
41
|
+
<div>
|
|
42
|
+
<div>Status: {voice.status}</div>
|
|
43
|
+
<button onClick={voice.connect} disabled={voice.status !== "disconnected" && voice.status !== "error"}>
|
|
44
|
+
Connect
|
|
45
|
+
</button>
|
|
46
|
+
<button onClick={voice.cancel} disabled={voice.status !== "processing" && voice.status !== "speaking"}>
|
|
47
|
+
Cancel
|
|
48
|
+
</button>
|
|
49
|
+
<ul>
|
|
50
|
+
{voice.transcript.map((m) => (
|
|
51
|
+
<li key={m.id}>
|
|
52
|
+
<b>{m.role}:</b> {m.content}
|
|
53
|
+
</li>
|
|
54
|
+
))}
|
|
55
|
+
</ul>
|
|
56
|
+
</div>
|
|
57
|
+
);
|
|
58
|
+
}
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
If you omit `serverUrl`, the hook defaults to the current origin and connects to `ws(s)://<host>/ws/<sessionId>`.
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## Server URL configuration
|
|
66
|
+
|
|
67
|
+
`useVoiceSession()` can build the websocket URL in 3 ways (in priority order):
|
|
68
|
+
|
|
69
|
+
### 1) `getWebSocketUrl(sessionId)` (full override)
|
|
70
|
+
|
|
71
|
+
```ts
|
|
72
|
+
useVoiceSession({
|
|
73
|
+
getWebSocketUrl: (sessionId) => `wss://voice.example.com/ws/${sessionId}`,
|
|
74
|
+
});
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### 2) `serverUrl` + `wsPath(sessionId)`
|
|
78
|
+
|
|
79
|
+
```ts
|
|
80
|
+
useVoiceSession({
|
|
81
|
+
serverUrl: "https://example.com",
|
|
82
|
+
wsPath: (sessionId) => `/ws/${sessionId}`,
|
|
83
|
+
});
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
- `serverUrl` may be `https://…`, `http://…`, `wss://…`, or `ws://…`
|
|
87
|
+
- `https` → `wss`, `http` → `ws`
|
|
88
|
+
- Default `wsPath` is `/ws/${sessionId}`
|
|
89
|
+
|
|
90
|
+
### 3) Default (current window location)
|
|
91
|
+
|
|
92
|
+
If neither override is provided:
|
|
93
|
+
|
|
94
|
+
- `wss://<host>/ws/<sessionId>` when on `https:`
|
|
95
|
+
- `ws://<host>/ws/<sessionId>` when on `http:`
|
|
96
|
+
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
## Client/server contract (websocket)
|
|
100
|
+
|
|
101
|
+
This package exports the contract types from `@shvm/vani-client/shared`.
|
|
102
|
+
|
|
103
|
+
### Client → server JSON
|
|
104
|
+
|
|
105
|
+
```ts
|
|
106
|
+
import type { ClientToServerJson } from "@shvm/vani-client/shared";
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Events:
|
|
110
|
+
- `{ type: "start"; config?: VoiceConfig }`
|
|
111
|
+
- `{ type: "stop" }`
|
|
112
|
+
- `{ type: "reset" }`
|
|
113
|
+
- `{ type: "text.message"; content: string }`
|
|
114
|
+
|
|
115
|
+
Audio is sent as **binary websocket messages** (the client currently sends WAV bytes for VAD end-of-speech).
|
|
116
|
+
|
|
117
|
+
### Server → client JSON
|
|
118
|
+
|
|
119
|
+
```ts
|
|
120
|
+
import type { ServerToClientJson } from "@shvm/vani-client/shared";
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Events:
|
|
124
|
+
- `{ type: "state"; value: SessionStatus }`
|
|
125
|
+
- `{ type: "transcript.final"; text: string }`
|
|
126
|
+
- `{ type: "assistant.message"; message: { role: "assistant"; content: string } }`
|
|
127
|
+
- `{ type: "assistant.partial"; text: string }`
|
|
128
|
+
- `{ type: "feedback"; message: string }`
|
|
129
|
+
- `{ type: "error"; reason: string }`
|
|
130
|
+
|
|
131
|
+
---
|
|
132
|
+
|
|
133
|
+
## Voice model configuration
|
|
134
|
+
|
|
135
|
+
The client sends a `VoiceConfig` as part of `{ type: "start" }`.
|
|
136
|
+
|
|
137
|
+
```ts
|
|
138
|
+
import type { VoiceConfig } from "@shvm/vani-client/shared";
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
The server is responsible for implementing STT/LLM/TTS using the config, and streaming back:
|
|
142
|
+
- transcript text
|
|
143
|
+
- assistant text (partial or final)
|
|
144
|
+
- assistant audio (binary websocket frames)
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
|
|
148
|
+
## How to run a server
|
|
149
|
+
|
|
150
|
+
This package is intentionally server-agnostic.
|
|
151
|
+
|
|
152
|
+
You need a websocket endpoint that:
|
|
153
|
+
1. Accepts JSON control messages (start/stop/reset/text)
|
|
154
|
+
2. Accepts binary audio frames
|
|
155
|
+
3. Emits state + transcript + assistant messages
|
|
156
|
+
4. Emits assistant audio as binary frames
|
|
157
|
+
|
|
158
|
+
### Cloudflare Durable Object (reference)
|
|
159
|
+
|
|
160
|
+
This repo includes a working reference server implementation under:
|
|
161
|
+
- `src/vani/server/runtime/machine.ts`
|
|
162
|
+
- `src/vani/server/handlers/VoiceSessionDO.ts`
|
|
163
|
+
|
|
164
|
+
It exposes:
|
|
165
|
+
- `GET /ws/:sessionId` websocket upgrade → DO stub fetch
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## What this package is (and is not)
|
|
170
|
+
|
|
171
|
+
**Is**
|
|
172
|
+
- A pragmatic, minimal headless voice client for a “voice chat” style agent
|
|
173
|
+
- Opinionated around websocket streaming and a small state machine
|
|
174
|
+
- Designed to keep a clean seam between UI and logic
|
|
175
|
+
|
|
176
|
+
**Is not**
|
|
177
|
+
- A full UI kit (yet)
|
|
178
|
+
- A general telephony/IVR SDK
|
|
179
|
+
- A full speech pipeline framework (you bring your server models)
|
|
180
|
+
|
|
181
|
+
---
|
|
182
|
+
|
|
183
|
+
## Roadmap
|
|
184
|
+
|
|
185
|
+
- Isolate UI styling (scoped + packaged CSS) and/or split UI into a separate package
|
|
186
|
+
- Improve config surface for:
|
|
187
|
+
- custom session ID strategy
|
|
188
|
+
- custom audio encoding/container
|
|
189
|
+
- optional token-level partials
|
|
190
|
+
- Add a non-React adapter (pure JS client) if needed
|
|
191
|
+
|
|
192
|
+
---
|
|
193
|
+
|
|
194
|
+
## License
|
|
195
|
+
|
|
196
|
+
MIT (see repository license).
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
import * as xstate from 'xstate';
|
|
2
|
+
import { j as VoiceStatus, e as SessionStatus, a as ClientMessage, V as VoiceConfig } from '../voice-BwU4C7fN.js';
|
|
3
|
+
|
|
4
|
+
declare function createBlobUrl(blob: Blob): string | undefined;
|
|
5
|
+
|
|
6
|
+
interface DebugEvent {
|
|
7
|
+
id: string;
|
|
8
|
+
type: "state_change" | "socket_event" | "audio_input" | "audio_output" | "transcript" | "llm_token" | "error";
|
|
9
|
+
timestamp: number;
|
|
10
|
+
details: unknown;
|
|
11
|
+
blobUrl?: string;
|
|
12
|
+
}
|
|
13
|
+
interface ClientContext {
|
|
14
|
+
status: VoiceStatus;
|
|
15
|
+
serverStatus: SessionStatus;
|
|
16
|
+
transcript: ClientMessage[];
|
|
17
|
+
history: DebugEvent[];
|
|
18
|
+
error: string | null;
|
|
19
|
+
isPlaying: boolean;
|
|
20
|
+
}
|
|
21
|
+
type ClientEvent = {
|
|
22
|
+
type: "CONNECT";
|
|
23
|
+
} | {
|
|
24
|
+
type: "DISCONNECT";
|
|
25
|
+
} | {
|
|
26
|
+
type: "CONNECTED";
|
|
27
|
+
} | {
|
|
28
|
+
type: "SET_ERROR";
|
|
29
|
+
error: string;
|
|
30
|
+
} | {
|
|
31
|
+
type: "SERVER_STATE_CHANGE";
|
|
32
|
+
status: SessionStatus;
|
|
33
|
+
} | {
|
|
34
|
+
type: "START_LISTENING";
|
|
35
|
+
} | {
|
|
36
|
+
type: "STOP_LISTENING";
|
|
37
|
+
} | {
|
|
38
|
+
type: "ADD_MESSAGE";
|
|
39
|
+
role: ClientMessage["role"];
|
|
40
|
+
content: string;
|
|
41
|
+
} | {
|
|
42
|
+
type: "AUDIO_PLAYBACK_START";
|
|
43
|
+
} | {
|
|
44
|
+
type: "AUDIO_PLAYBACK_END";
|
|
45
|
+
} | {
|
|
46
|
+
type: "LOG_EVENT";
|
|
47
|
+
eventType: DebugEvent["type"];
|
|
48
|
+
details: unknown;
|
|
49
|
+
blob?: Blob;
|
|
50
|
+
} | {
|
|
51
|
+
type: "TIMEOUT";
|
|
52
|
+
} | {
|
|
53
|
+
type: "CANCEL";
|
|
54
|
+
} | {
|
|
55
|
+
type: "TOOL_CALL_START";
|
|
56
|
+
toolName: string;
|
|
57
|
+
} | {
|
|
58
|
+
type: "TOOL_CALL_END";
|
|
59
|
+
toolName: string;
|
|
60
|
+
};
|
|
61
|
+
declare const clientMachine: xstate.StateMachine<ClientContext, {
|
|
62
|
+
type: "CONNECT";
|
|
63
|
+
} | {
|
|
64
|
+
type: "DISCONNECT";
|
|
65
|
+
} | {
|
|
66
|
+
type: "CONNECTED";
|
|
67
|
+
} | {
|
|
68
|
+
type: "SET_ERROR";
|
|
69
|
+
error: string;
|
|
70
|
+
} | {
|
|
71
|
+
type: "SERVER_STATE_CHANGE";
|
|
72
|
+
status: SessionStatus;
|
|
73
|
+
} | {
|
|
74
|
+
type: "START_LISTENING";
|
|
75
|
+
} | {
|
|
76
|
+
type: "STOP_LISTENING";
|
|
77
|
+
} | {
|
|
78
|
+
type: "ADD_MESSAGE";
|
|
79
|
+
role: ClientMessage["role"];
|
|
80
|
+
content: string;
|
|
81
|
+
} | {
|
|
82
|
+
type: "AUDIO_PLAYBACK_START";
|
|
83
|
+
} | {
|
|
84
|
+
type: "AUDIO_PLAYBACK_END";
|
|
85
|
+
} | {
|
|
86
|
+
type: "LOG_EVENT";
|
|
87
|
+
eventType: DebugEvent["type"];
|
|
88
|
+
details: unknown;
|
|
89
|
+
blob?: Blob;
|
|
90
|
+
} | {
|
|
91
|
+
type: "TIMEOUT";
|
|
92
|
+
} | {
|
|
93
|
+
type: "CANCEL";
|
|
94
|
+
} | {
|
|
95
|
+
type: "TOOL_CALL_START";
|
|
96
|
+
toolName: string;
|
|
97
|
+
} | {
|
|
98
|
+
type: "TOOL_CALL_END";
|
|
99
|
+
toolName: string;
|
|
100
|
+
}, {}, never, {
|
|
101
|
+
type: "setStatusConfig";
|
|
102
|
+
params: xstate.NonReducibleUnknown;
|
|
103
|
+
} | {
|
|
104
|
+
type: "setConnected";
|
|
105
|
+
params: xstate.NonReducibleUnknown;
|
|
106
|
+
} | {
|
|
107
|
+
type: "setDisconnected";
|
|
108
|
+
params: xstate.NonReducibleUnknown;
|
|
109
|
+
} | {
|
|
110
|
+
type: "setError";
|
|
111
|
+
params: xstate.NonReducibleUnknown;
|
|
112
|
+
} | {
|
|
113
|
+
type: "updateServerStatus";
|
|
114
|
+
params: xstate.NonReducibleUnknown;
|
|
115
|
+
} | {
|
|
116
|
+
type: "setPlaying";
|
|
117
|
+
params: xstate.NonReducibleUnknown;
|
|
118
|
+
} | {
|
|
119
|
+
type: "addMessage";
|
|
120
|
+
params: xstate.NonReducibleUnknown;
|
|
121
|
+
} | {
|
|
122
|
+
type: "logEvent";
|
|
123
|
+
params: xstate.NonReducibleUnknown;
|
|
124
|
+
} | {
|
|
125
|
+
type: "addToolCallStart";
|
|
126
|
+
params: xstate.NonReducibleUnknown;
|
|
127
|
+
} | {
|
|
128
|
+
type: "addToolCallEnd";
|
|
129
|
+
params: xstate.NonReducibleUnknown;
|
|
130
|
+
} | {
|
|
131
|
+
type: "clearError";
|
|
132
|
+
params: xstate.NonReducibleUnknown;
|
|
133
|
+
}, {
|
|
134
|
+
type: "isServerThinkingOrSpeaking";
|
|
135
|
+
params: unknown;
|
|
136
|
+
}, never, "disconnected" | "connecting" | "listening" | "speaking" | "error" | {
|
|
137
|
+
connected: "idle" | "processing";
|
|
138
|
+
}, string, xstate.NonReducibleUnknown, xstate.NonReducibleUnknown, xstate.EventObject, xstate.MetaObject, {
|
|
139
|
+
id: "client";
|
|
140
|
+
states: {
|
|
141
|
+
readonly disconnected: {};
|
|
142
|
+
readonly connecting: {};
|
|
143
|
+
readonly connected: {
|
|
144
|
+
states: {
|
|
145
|
+
readonly idle: {};
|
|
146
|
+
readonly processing: {};
|
|
147
|
+
};
|
|
148
|
+
};
|
|
149
|
+
readonly listening: {};
|
|
150
|
+
readonly speaking: {};
|
|
151
|
+
readonly error: {};
|
|
152
|
+
};
|
|
153
|
+
}>;
|
|
154
|
+
|
|
155
|
+
interface UseVoiceSessionProps {
|
|
156
|
+
onError?: (error: string) => void;
|
|
157
|
+
onMessage?: (msg: {
|
|
158
|
+
role: 'user' | 'assistant';
|
|
159
|
+
content: string;
|
|
160
|
+
}) => void;
|
|
161
|
+
onFeedback?: (message: string) => void;
|
|
162
|
+
initialTranscript?: ClientMessage[];
|
|
163
|
+
config?: VoiceConfig;
|
|
164
|
+
/**
|
|
165
|
+
* Base server URL used to construct the websocket URL.
|
|
166
|
+
* Examples:
|
|
167
|
+
* - "https://example.com"
|
|
168
|
+
* - "wss://example.com"
|
|
169
|
+
*
|
|
170
|
+
* Default: "https://shvm.in"
|
|
171
|
+
*/
|
|
172
|
+
serverUrl?: string;
|
|
173
|
+
/**
|
|
174
|
+
* Full override for websocket URL construction. Takes precedence over `serverUrl`.
|
|
175
|
+
*/
|
|
176
|
+
getWebSocketUrl?: (sessionId: string) => string;
|
|
177
|
+
/**
|
|
178
|
+
* Override session id for the websocket route (e.g. `/ws/:sessionId`).
|
|
179
|
+
* If omitted, a random one is generated once per hook instance.
|
|
180
|
+
*/
|
|
181
|
+
sessionId?: string;
|
|
182
|
+
/**
|
|
183
|
+
* Customizes the websocket path appended to the server base URL.
|
|
184
|
+
* Default: `/ws/${sessionId}`
|
|
185
|
+
*/
|
|
186
|
+
wsPath?: (sessionId: string) => string;
|
|
187
|
+
}
|
|
188
|
+
declare function useVoiceSession(props?: UseVoiceSessionProps): {
|
|
189
|
+
vadListening: boolean;
|
|
190
|
+
vadLoading: boolean;
|
|
191
|
+
vadErrored: string | false;
|
|
192
|
+
userSpeaking: boolean;
|
|
193
|
+
connect: () => void;
|
|
194
|
+
disconnect: () => void;
|
|
195
|
+
sendMessage: (text: string) => void;
|
|
196
|
+
cancel: () => void;
|
|
197
|
+
status: VoiceStatus;
|
|
198
|
+
serverStatus: SessionStatus;
|
|
199
|
+
transcript: ClientMessage[];
|
|
200
|
+
history: DebugEvent[];
|
|
201
|
+
error: string | null;
|
|
202
|
+
isPlaying: boolean;
|
|
203
|
+
};
|
|
204
|
+
|
|
205
|
+
export { type ClientContext, type ClientEvent, type DebugEvent, ClientMessage as Message, SessionStatus, type UseVoiceSessionProps, VoiceConfig, VoiceStatus, clientMachine, createBlobUrl, useVoiceSession };
|