@shvm/vani-client 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +196 -0
- package/dist/headless/index.d.ts +205 -0
- package/dist/headless/index.js +624 -0
- package/dist/headless/index.js.map +1 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +651 -0
- package/dist/index.js.map +1 -0
- package/dist/shared/index.d.ts +49 -0
- package/dist/shared/index.js +30 -0
- package/dist/shared/index.js.map +1 -0
- package/dist/ui/index.d.ts +39 -0
- package/dist/ui/index.js +559 -0
- package/dist/ui/index.js.map +1 -0
- package/dist/voice-BwU4C7fN.d.ts +51 -0
- package/package.json +68 -0
|
@@ -0,0 +1,624 @@
|
|
|
1
|
+
import { useRef, useCallback, useEffect } from 'react';
|
|
2
|
+
import { useMicVAD } from '@ricky0123/vad-react';
|
|
3
|
+
import * as ort from 'onnxruntime-web';
|
|
4
|
+
import { useActor } from '@xstate/react';
|
|
5
|
+
import { fromCallback, setup, assign } from 'xstate';
|
|
6
|
+
|
|
7
|
+
// src/headless/adapters/blobUrl.ts
|
|
8
|
+
function createBlobUrl(blob) {
|
|
9
|
+
try {
|
|
10
|
+
if (typeof URL === "undefined") return void 0;
|
|
11
|
+
if (typeof URL.createObjectURL !== "function") return void 0;
|
|
12
|
+
return URL.createObjectURL(blob);
|
|
13
|
+
} catch {
|
|
14
|
+
return void 0;
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
fromCallback(() => {
|
|
18
|
+
return () => {
|
|
19
|
+
};
|
|
20
|
+
});
|
|
21
|
+
fromCallback(() => {
|
|
22
|
+
return () => {
|
|
23
|
+
};
|
|
24
|
+
});
|
|
25
|
+
var clientMachine = setup({
|
|
26
|
+
types: {
|
|
27
|
+
context: {},
|
|
28
|
+
events: {}
|
|
29
|
+
},
|
|
30
|
+
actions: {
|
|
31
|
+
setStatusConfig: assign({
|
|
32
|
+
status: () => "connecting"
|
|
33
|
+
}),
|
|
34
|
+
setConnected: assign({
|
|
35
|
+
status: () => "idle",
|
|
36
|
+
history: ({ context }) => [
|
|
37
|
+
...context.history,
|
|
38
|
+
{
|
|
39
|
+
id: Math.random().toString(36).slice(2),
|
|
40
|
+
type: "socket_event",
|
|
41
|
+
timestamp: Date.now(),
|
|
42
|
+
details: { status: "connected" }
|
|
43
|
+
}
|
|
44
|
+
]
|
|
45
|
+
}),
|
|
46
|
+
setDisconnected: assign({
|
|
47
|
+
status: () => "disconnected",
|
|
48
|
+
history: ({ context }) => [
|
|
49
|
+
...context.history,
|
|
50
|
+
{
|
|
51
|
+
id: Math.random().toString(36).slice(2),
|
|
52
|
+
type: "socket_event",
|
|
53
|
+
timestamp: Date.now(),
|
|
54
|
+
details: { status: "disconnected" }
|
|
55
|
+
}
|
|
56
|
+
]
|
|
57
|
+
}),
|
|
58
|
+
setError: assign({
|
|
59
|
+
status: () => "error",
|
|
60
|
+
error: ({ event }) => event.type === "SET_ERROR" ? event.error : null,
|
|
61
|
+
history: ({ context, event }) => [
|
|
62
|
+
...context.history,
|
|
63
|
+
{
|
|
64
|
+
id: Math.random().toString(36).slice(2),
|
|
65
|
+
type: "error",
|
|
66
|
+
timestamp: Date.now(),
|
|
67
|
+
details: { message: event.type === "SET_ERROR" ? event.error : "Unknown error" }
|
|
68
|
+
}
|
|
69
|
+
]
|
|
70
|
+
}),
|
|
71
|
+
updateServerStatus: assign({
|
|
72
|
+
serverStatus: ({ context, event }) => event.type === "SERVER_STATE_CHANGE" ? event.status : context.serverStatus,
|
|
73
|
+
history: ({ context, event }) => {
|
|
74
|
+
if (event.type !== "SERVER_STATE_CHANGE") return context.history;
|
|
75
|
+
return [
|
|
76
|
+
...context.history,
|
|
77
|
+
{
|
|
78
|
+
id: Math.random().toString(36).slice(2),
|
|
79
|
+
type: "state_change",
|
|
80
|
+
timestamp: Date.now(),
|
|
81
|
+
details: { from: context.serverStatus, to: event.status, source: "server" }
|
|
82
|
+
}
|
|
83
|
+
];
|
|
84
|
+
}
|
|
85
|
+
}),
|
|
86
|
+
setPlaying: assign({
|
|
87
|
+
isPlaying: ({ event }) => event.type === "AUDIO_PLAYBACK_START"
|
|
88
|
+
}),
|
|
89
|
+
addMessage: assign({
|
|
90
|
+
transcript: ({ context, event }) => {
|
|
91
|
+
if (event.type !== "ADD_MESSAGE") return context.transcript;
|
|
92
|
+
return [
|
|
93
|
+
...context.transcript,
|
|
94
|
+
{
|
|
95
|
+
id: Math.random().toString(36).slice(2),
|
|
96
|
+
role: event.role,
|
|
97
|
+
content: event.content,
|
|
98
|
+
timestamp: Date.now()
|
|
99
|
+
}
|
|
100
|
+
];
|
|
101
|
+
},
|
|
102
|
+
history: ({ context, event }) => {
|
|
103
|
+
if (event.type !== "ADD_MESSAGE") return context.history;
|
|
104
|
+
return [
|
|
105
|
+
...context.history,
|
|
106
|
+
{
|
|
107
|
+
id: Math.random().toString(36).slice(2),
|
|
108
|
+
type: "transcript",
|
|
109
|
+
timestamp: Date.now(),
|
|
110
|
+
details: { role: event.role, text: event.content }
|
|
111
|
+
}
|
|
112
|
+
];
|
|
113
|
+
}
|
|
114
|
+
}),
|
|
115
|
+
logEvent: assign({
|
|
116
|
+
history: ({ context, event }) => {
|
|
117
|
+
if (event.type !== "LOG_EVENT") return context.history;
|
|
118
|
+
return [
|
|
119
|
+
...context.history,
|
|
120
|
+
{
|
|
121
|
+
id: Math.random().toString(36).slice(2),
|
|
122
|
+
type: event.eventType,
|
|
123
|
+
timestamp: Date.now(),
|
|
124
|
+
details: event.details,
|
|
125
|
+
blobUrl: event.blob ? createBlobUrl(event.blob) : void 0
|
|
126
|
+
}
|
|
127
|
+
];
|
|
128
|
+
}
|
|
129
|
+
}),
|
|
130
|
+
addToolCallStart: assign({
|
|
131
|
+
transcript: ({ context, event }) => {
|
|
132
|
+
if (event.type !== "TOOL_CALL_START") return context.transcript;
|
|
133
|
+
const newTranscript = [...context.transcript];
|
|
134
|
+
if (newTranscript.length === 0 || newTranscript[newTranscript.length - 1].role !== "assistant") {
|
|
135
|
+
newTranscript.push({
|
|
136
|
+
id: Math.random().toString(36).slice(2),
|
|
137
|
+
role: "assistant",
|
|
138
|
+
content: "",
|
|
139
|
+
timestamp: Date.now(),
|
|
140
|
+
toolCalls: []
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
const lastMsg = newTranscript[newTranscript.length - 1];
|
|
144
|
+
lastMsg.toolCalls = lastMsg.toolCalls || [];
|
|
145
|
+
lastMsg.toolCalls.push({ name: event.toolName, status: "calling" });
|
|
146
|
+
return newTranscript;
|
|
147
|
+
}
|
|
148
|
+
}),
|
|
149
|
+
addToolCallEnd: assign({
|
|
150
|
+
transcript: ({ context, event }) => {
|
|
151
|
+
if (event.type !== "TOOL_CALL_END") return context.transcript;
|
|
152
|
+
const newTranscript = [...context.transcript];
|
|
153
|
+
if (newTranscript.length > 0) {
|
|
154
|
+
const lastMsg = newTranscript[newTranscript.length - 1];
|
|
155
|
+
if (lastMsg.role === "assistant" && lastMsg.toolCalls) {
|
|
156
|
+
const activeTool = lastMsg.toolCalls.slice().reverse().find((t) => t.name === event.toolName && t.status === "calling");
|
|
157
|
+
if (activeTool) activeTool.status = "finished";
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
return newTranscript;
|
|
161
|
+
}
|
|
162
|
+
}),
|
|
163
|
+
clearError: assign({
|
|
164
|
+
error: null
|
|
165
|
+
})
|
|
166
|
+
},
|
|
167
|
+
guards: {
|
|
168
|
+
isServerThinkingOrSpeaking: ({ context, event }) => {
|
|
169
|
+
const status = event.type === "SERVER_STATE_CHANGE" ? event.status : context.serverStatus;
|
|
170
|
+
return status === "thinking" || status === "speaking";
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}).createMachine({
|
|
174
|
+
id: "client",
|
|
175
|
+
initial: "disconnected",
|
|
176
|
+
context: {
|
|
177
|
+
status: "disconnected",
|
|
178
|
+
serverStatus: "idle",
|
|
179
|
+
transcript: [],
|
|
180
|
+
history: [],
|
|
181
|
+
error: null,
|
|
182
|
+
isPlaying: false
|
|
183
|
+
},
|
|
184
|
+
on: {
|
|
185
|
+
LOG_EVENT: { actions: "logEvent" },
|
|
186
|
+
ADD_MESSAGE: { actions: ["addMessage", "clearError"] },
|
|
187
|
+
TOOL_CALL_START: { actions: "addToolCallStart" },
|
|
188
|
+
TOOL_CALL_END: { actions: "addToolCallEnd" },
|
|
189
|
+
SET_ERROR: { target: ".error", actions: "setError" },
|
|
190
|
+
DISCONNECT: { target: ".disconnected", actions: "setDisconnected" }
|
|
191
|
+
},
|
|
192
|
+
states: {
|
|
193
|
+
disconnected: {
|
|
194
|
+
on: {
|
|
195
|
+
CONNECT: { target: "connecting", actions: "setStatusConfig" }
|
|
196
|
+
}
|
|
197
|
+
},
|
|
198
|
+
connecting: {
|
|
199
|
+
on: {
|
|
200
|
+
CONNECTED: { target: "connected", actions: "setConnected" }
|
|
201
|
+
}
|
|
202
|
+
},
|
|
203
|
+
connected: {
|
|
204
|
+
initial: "idle",
|
|
205
|
+
states: {
|
|
206
|
+
idle: {
|
|
207
|
+
entry: assign({ status: "idle" }),
|
|
208
|
+
on: {
|
|
209
|
+
START_LISTENING: { target: "#client.listening", actions: "clearError" },
|
|
210
|
+
AUDIO_PLAYBACK_START: { target: "#client.speaking", actions: "setPlaying" },
|
|
211
|
+
SERVER_STATE_CHANGE: [
|
|
212
|
+
{
|
|
213
|
+
guard: "isServerThinkingOrSpeaking",
|
|
214
|
+
target: "processing",
|
|
215
|
+
actions: "updateServerStatus"
|
|
216
|
+
},
|
|
217
|
+
{
|
|
218
|
+
actions: "updateServerStatus"
|
|
219
|
+
}
|
|
220
|
+
]
|
|
221
|
+
}
|
|
222
|
+
},
|
|
223
|
+
processing: {
|
|
224
|
+
entry: assign({ status: "processing" }),
|
|
225
|
+
after: {
|
|
226
|
+
2e4: { target: "idle", actions: assign({ error: "Server timed out. Interactions will reset." }) }
|
|
227
|
+
},
|
|
228
|
+
on: {
|
|
229
|
+
CANCEL: { target: "idle", actions: "clearError" },
|
|
230
|
+
START_LISTENING: { target: "#client.listening", actions: "clearError" },
|
|
231
|
+
AUDIO_PLAYBACK_START: { target: "#client.speaking", actions: "setPlaying" },
|
|
232
|
+
SERVER_STATE_CHANGE: [
|
|
233
|
+
{
|
|
234
|
+
guard: ({ event }) => event.status === "listening" || event.status === "idle",
|
|
235
|
+
target: "idle",
|
|
236
|
+
actions: "updateServerStatus"
|
|
237
|
+
},
|
|
238
|
+
{
|
|
239
|
+
actions: "updateServerStatus"
|
|
240
|
+
}
|
|
241
|
+
]
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
},
|
|
246
|
+
listening: {
|
|
247
|
+
entry: assign({ status: "listening" }),
|
|
248
|
+
on: {
|
|
249
|
+
STOP_LISTENING: { target: "connected.processing" },
|
|
250
|
+
SERVER_STATE_CHANGE: { actions: "updateServerStatus" }
|
|
251
|
+
}
|
|
252
|
+
},
|
|
253
|
+
speaking: {
|
|
254
|
+
entry: assign({ status: "speaking" }),
|
|
255
|
+
on: {
|
|
256
|
+
AUDIO_PLAYBACK_END: {
|
|
257
|
+
target: "connected.idle",
|
|
258
|
+
actions: assign({ isPlaying: false })
|
|
259
|
+
},
|
|
260
|
+
SERVER_STATE_CHANGE: { actions: "updateServerStatus" }
|
|
261
|
+
}
|
|
262
|
+
},
|
|
263
|
+
error: {
|
|
264
|
+
on: {
|
|
265
|
+
CONNECT: { target: "connecting", actions: "setStatusConfig" },
|
|
266
|
+
START_LISTENING: { target: "listening", actions: "clearError" }
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
// src/headless/utils/webSocketUrl.ts
|
|
273
|
+
var DEFAULT_VOICE_SERVER_URL = "https://shvm.in";
|
|
274
|
+
function buildVoiceWebSocketUrl({
|
|
275
|
+
sessionId,
|
|
276
|
+
serverUrl,
|
|
277
|
+
wsPath,
|
|
278
|
+
getWebSocketUrlOverride
|
|
279
|
+
}) {
|
|
280
|
+
if (getWebSocketUrlOverride) return getWebSocketUrlOverride(sessionId);
|
|
281
|
+
const wsPathValue = wsPath ? wsPath(sessionId) : `/ws/${sessionId}`;
|
|
282
|
+
const base = new URL(serverUrl ?? DEFAULT_VOICE_SERVER_URL);
|
|
283
|
+
const protocol = base.protocol === "https:" ? "wss:" : base.protocol === "http:" ? "ws:" : base.protocol;
|
|
284
|
+
base.protocol = protocol;
|
|
285
|
+
return new URL(wsPathValue, base).toString();
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// src/headless/hooks/useVoiceSession.ts
|
|
289
|
+
var ONNX_WASM_BASE_PATH = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.22.0/dist/";
|
|
290
|
+
var VAD_BASE_ASSET_PATH = "https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.29/dist/";
|
|
291
|
+
var VAD_MODEL_URL = "https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.29/dist/silero_vad_v5.onnx";
|
|
292
|
+
var ortConfigured = false;
|
|
293
|
+
function ensureOrtConfig() {
|
|
294
|
+
if (ortConfigured) return;
|
|
295
|
+
if (typeof window !== "undefined") {
|
|
296
|
+
ort.env.wasm.wasmPaths = ONNX_WASM_BASE_PATH;
|
|
297
|
+
ort.env.wasm.proxy = false;
|
|
298
|
+
}
|
|
299
|
+
ortConfigured = true;
|
|
300
|
+
}
|
|
301
|
+
var VAD_SAMPLE_RATE = 16e3;
|
|
302
|
+
function writeString(view, offset, text) {
|
|
303
|
+
for (let i = 0; i < text.length; i += 1) {
|
|
304
|
+
view.setUint8(offset + i, text.charCodeAt(i));
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
function encodeWav(audio, sampleRate) {
|
|
308
|
+
const buffer = new ArrayBuffer(44 + audio.length * 2);
|
|
309
|
+
const view = new DataView(buffer);
|
|
310
|
+
writeString(view, 0, "RIFF");
|
|
311
|
+
view.setUint32(4, 36 + audio.length * 2, true);
|
|
312
|
+
writeString(view, 8, "WAVE");
|
|
313
|
+
writeString(view, 12, "fmt ");
|
|
314
|
+
view.setUint32(16, 16, true);
|
|
315
|
+
view.setUint16(20, 1, true);
|
|
316
|
+
view.setUint16(22, 1, true);
|
|
317
|
+
view.setUint32(24, sampleRate, true);
|
|
318
|
+
view.setUint32(28, sampleRate * 2, true);
|
|
319
|
+
view.setUint16(32, 2, true);
|
|
320
|
+
view.setUint16(34, 16, true);
|
|
321
|
+
writeString(view, 36, "data");
|
|
322
|
+
view.setUint32(40, audio.length * 2, true);
|
|
323
|
+
let offset = 44;
|
|
324
|
+
for (let i = 0; i < audio.length; i += 1) {
|
|
325
|
+
const sample = Math.max(-1, Math.min(1, audio[i]));
|
|
326
|
+
view.setInt16(offset, sample < 0 ? sample * 32768 : sample * 32767, true);
|
|
327
|
+
offset += 2;
|
|
328
|
+
}
|
|
329
|
+
return buffer;
|
|
330
|
+
}
|
|
331
|
+
function useVoiceSession(props = {}) {
|
|
332
|
+
const {
|
|
333
|
+
onError,
|
|
334
|
+
onMessage,
|
|
335
|
+
onFeedback,
|
|
336
|
+
initialTranscript,
|
|
337
|
+
config,
|
|
338
|
+
serverUrl,
|
|
339
|
+
getWebSocketUrl: getWebSocketUrlOverride,
|
|
340
|
+
sessionId,
|
|
341
|
+
wsPath
|
|
342
|
+
} = props;
|
|
343
|
+
ensureOrtConfig();
|
|
344
|
+
const [snapshot, send, actorRef] = useActor(clientMachine);
|
|
345
|
+
const state = snapshot.context;
|
|
346
|
+
const wsRef = useRef(null);
|
|
347
|
+
const audioContextRef = useRef(null);
|
|
348
|
+
const audioQueueRef = useRef([]);
|
|
349
|
+
const isPlaybackLoopRunning = useRef(false);
|
|
350
|
+
const onErrorCallbackRef = useRef(onError);
|
|
351
|
+
const onMessageCallbackRef = useRef(onMessage);
|
|
352
|
+
const onFeedbackCallbackRef = useRef(onFeedback);
|
|
353
|
+
const configRef = useRef(config);
|
|
354
|
+
const turnActiveRef = useRef(false);
|
|
355
|
+
const lastVADErrorRef = useRef(null);
|
|
356
|
+
const hasSeededTranscriptRef = useRef(false);
|
|
357
|
+
const sessionIdRef = useRef(null);
|
|
358
|
+
if (sessionIdRef.current === null) {
|
|
359
|
+
sessionIdRef.current = sessionId ?? (typeof crypto !== "undefined" && "randomUUID" in crypto ? (
|
|
360
|
+
// @ts-ignore
|
|
361
|
+
crypto.randomUUID()
|
|
362
|
+
) : "session-" + Math.floor(Math.random() * 1e4));
|
|
363
|
+
}
|
|
364
|
+
const buildWebSocketUrl = useCallback((activeSessionId) => {
|
|
365
|
+
return buildVoiceWebSocketUrl({
|
|
366
|
+
sessionId: activeSessionId,
|
|
367
|
+
serverUrl,
|
|
368
|
+
wsPath,
|
|
369
|
+
getWebSocketUrlOverride
|
|
370
|
+
});
|
|
371
|
+
}, [getWebSocketUrlOverride, serverUrl, wsPath]);
|
|
372
|
+
useEffect(() => {
|
|
373
|
+
onErrorCallbackRef.current = onError;
|
|
374
|
+
onMessageCallbackRef.current = onMessage;
|
|
375
|
+
onFeedbackCallbackRef.current = onFeedback;
|
|
376
|
+
configRef.current = config;
|
|
377
|
+
}, [onError, onMessage, onFeedback, config]);
|
|
378
|
+
useEffect(() => {
|
|
379
|
+
if (hasSeededTranscriptRef.current) return;
|
|
380
|
+
if (!initialTranscript || initialTranscript.length === 0) return;
|
|
381
|
+
if (actorRef.getSnapshot().context.transcript.length > 0) {
|
|
382
|
+
hasSeededTranscriptRef.current = true;
|
|
383
|
+
return;
|
|
384
|
+
}
|
|
385
|
+
initialTranscript.forEach((msg) => {
|
|
386
|
+
send({ type: "ADD_MESSAGE", role: msg.role, content: msg.content });
|
|
387
|
+
});
|
|
388
|
+
hasSeededTranscriptRef.current = true;
|
|
389
|
+
}, [actorRef, initialTranscript, send]);
|
|
390
|
+
const initAudio = async () => {
|
|
391
|
+
if (audioContextRef.current && audioContextRef.current.state !== "closed") {
|
|
392
|
+
if (audioContextRef.current.state === "suspended") {
|
|
393
|
+
await audioContextRef.current.resume();
|
|
394
|
+
}
|
|
395
|
+
return;
|
|
396
|
+
}
|
|
397
|
+
try {
|
|
398
|
+
audioContextRef.current = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 24e3 });
|
|
399
|
+
if (audioContextRef.current.state === "suspended") {
|
|
400
|
+
await audioContextRef.current.resume();
|
|
401
|
+
}
|
|
402
|
+
} catch (e) {
|
|
403
|
+
console.error("[Voice] Audio init error:", e);
|
|
404
|
+
const msg = "Audio initialization failed: " + e.message;
|
|
405
|
+
console.error("[Voice] Sending SET_ERROR (Audio Init)", msg);
|
|
406
|
+
send({ type: "SET_ERROR", error: msg });
|
|
407
|
+
throw e;
|
|
408
|
+
}
|
|
409
|
+
};
|
|
410
|
+
const handleSpeechStart = useCallback(() => {
|
|
411
|
+
const ws = wsRef.current;
|
|
412
|
+
if (!ws || ws.readyState !== WebSocket.OPEN) return;
|
|
413
|
+
if (turnActiveRef.current) return;
|
|
414
|
+
const currentContext = actorRef.getSnapshot().context;
|
|
415
|
+
const status = currentContext.status;
|
|
416
|
+
currentContext.error;
|
|
417
|
+
if (status === "speaking" || status === "processing") {
|
|
418
|
+
console.log("[Voice] Busy, rejecting speech input (strict turn-by-turn).");
|
|
419
|
+
return;
|
|
420
|
+
}
|
|
421
|
+
if (status !== "idle" && status !== "listening" && status !== "error") return;
|
|
422
|
+
turnActiveRef.current = true;
|
|
423
|
+
ws.send(JSON.stringify({
|
|
424
|
+
type: "start",
|
|
425
|
+
config: configRef.current
|
|
426
|
+
}));
|
|
427
|
+
send({ type: "START_LISTENING" });
|
|
428
|
+
}, [actorRef, send]);
|
|
429
|
+
const sendMessage = useCallback((text) => {
|
|
430
|
+
const ws = wsRef.current;
|
|
431
|
+
if (ws && ws.readyState === WebSocket.OPEN) {
|
|
432
|
+
ws.send(JSON.stringify({ type: "text.message", content: text }));
|
|
433
|
+
send({ type: "ADD_MESSAGE", role: "user", content: text });
|
|
434
|
+
send({ type: "SERVER_STATE_CHANGE", status: "speaking" });
|
|
435
|
+
}
|
|
436
|
+
}, [send]);
|
|
437
|
+
const handleSpeechEnd = useCallback(async (audio) => {
|
|
438
|
+
const ws = wsRef.current;
|
|
439
|
+
if (!turnActiveRef.current) return;
|
|
440
|
+
turnActiveRef.current = false;
|
|
441
|
+
if (!ws || ws.readyState !== WebSocket.OPEN) {
|
|
442
|
+
send({ type: "SERVER_STATE_CHANGE", status: "idle" });
|
|
443
|
+
return;
|
|
444
|
+
}
|
|
445
|
+
const wavBuffer = encodeWav(audio, VAD_SAMPLE_RATE);
|
|
446
|
+
const wavBlob = new Blob([wavBuffer], { type: "audio/wav" });
|
|
447
|
+
ws.send(wavBuffer);
|
|
448
|
+
send({ type: "LOG_EVENT", eventType: "audio_input", details: { size: wavBuffer.byteLength }, blob: wavBlob });
|
|
449
|
+
ws.send(JSON.stringify({ type: "stop" }));
|
|
450
|
+
send({ type: "STOP_LISTENING" });
|
|
451
|
+
}, [send]);
|
|
452
|
+
const handleVADMisfire = useCallback(() => {
|
|
453
|
+
if (!turnActiveRef.current) return;
|
|
454
|
+
turnActiveRef.current = false;
|
|
455
|
+
send({ type: "SERVER_STATE_CHANGE", status: "idle" });
|
|
456
|
+
}, [send]);
|
|
457
|
+
const vad = useMicVAD({
|
|
458
|
+
startOnLoad: false,
|
|
459
|
+
onSpeechStart: handleSpeechStart,
|
|
460
|
+
onSpeechEnd: handleSpeechEnd,
|
|
461
|
+
onVADMisfire: handleVADMisfire,
|
|
462
|
+
// @ts-expect-error
|
|
463
|
+
workletURL: VAD_BASE_ASSET_PATH + "vad.worklet.bundle.min.js",
|
|
464
|
+
modelURL: VAD_MODEL_URL,
|
|
465
|
+
onnxWASMBasePath: ONNX_WASM_BASE_PATH,
|
|
466
|
+
baseAssetPath: VAD_BASE_ASSET_PATH
|
|
467
|
+
});
|
|
468
|
+
useEffect(() => {
|
|
469
|
+
if (!vad.errored) return;
|
|
470
|
+
const message = typeof vad.errored === "string" ? vad.errored : vad.errored.message || "VAD failed to load";
|
|
471
|
+
if (lastVADErrorRef.current === message) return;
|
|
472
|
+
lastVADErrorRef.current = message;
|
|
473
|
+
console.error("[Voice] Sending SET_ERROR (VAD)", message);
|
|
474
|
+
send({ type: "SET_ERROR", error: message });
|
|
475
|
+
}, [vad.errored, send]);
|
|
476
|
+
useEffect(() => {
|
|
477
|
+
const shouldListen = state.status === "idle" || state.status === "listening";
|
|
478
|
+
if (shouldListen && !vad.listening && !vad.loading && !vad.errored) {
|
|
479
|
+
vad.start();
|
|
480
|
+
} else if (!shouldListen && vad.listening) {
|
|
481
|
+
vad.pause();
|
|
482
|
+
}
|
|
483
|
+
}, [state.status, vad.listening, vad.loading, vad.errored, vad.start, vad.pause]);
|
|
484
|
+
const connect = useCallback(() => {
|
|
485
|
+
if (wsRef.current) wsRef.current.close();
|
|
486
|
+
initAudio().catch((err) => console.warn("[Voice] Early audio init failed", err));
|
|
487
|
+
console.log("[Voice] Connect called");
|
|
488
|
+
send({ type: "CONNECT" });
|
|
489
|
+
const sessionId2 = sessionIdRef.current || "session-" + Math.floor(Math.random() * 1e4);
|
|
490
|
+
const url = buildWebSocketUrl(sessionId2);
|
|
491
|
+
const ws = new WebSocket(url);
|
|
492
|
+
wsRef.current = ws;
|
|
493
|
+
ws.onopen = () => {
|
|
494
|
+
send({ type: "CONNECTED" });
|
|
495
|
+
initAudio().catch(() => {
|
|
496
|
+
});
|
|
497
|
+
};
|
|
498
|
+
ws.onmessage = async (event) => {
|
|
499
|
+
if (event.data instanceof Blob) {
|
|
500
|
+
const buf = await event.data.arrayBuffer();
|
|
501
|
+
send({ type: "LOG_EVENT", eventType: "audio_output", details: { size: buf.byteLength }, blob: event.data });
|
|
502
|
+
queueAudio(buf);
|
|
503
|
+
return;
|
|
504
|
+
}
|
|
505
|
+
try {
|
|
506
|
+
const data = JSON.parse(event.data);
|
|
507
|
+
handleMessage(data);
|
|
508
|
+
} catch (e) {
|
|
509
|
+
console.error("[Voice] Parse error", e);
|
|
510
|
+
}
|
|
511
|
+
};
|
|
512
|
+
ws.onclose = (e) => {
|
|
513
|
+
console.log("[Voice] Closed", e.code);
|
|
514
|
+
send({ type: "DISCONNECT" });
|
|
515
|
+
};
|
|
516
|
+
ws.onerror = (e) => {
|
|
517
|
+
console.error("[Voice] WS Error", e);
|
|
518
|
+
send({ type: "SET_ERROR", error: "Connection failed: " + (e instanceof ErrorEvent ? e.message : "Unknown") });
|
|
519
|
+
};
|
|
520
|
+
}, [buildWebSocketUrl, send]);
|
|
521
|
+
const handleMessage = (data) => {
|
|
522
|
+
switch (data.type) {
|
|
523
|
+
case "state":
|
|
524
|
+
send({ type: "SERVER_STATE_CHANGE", status: data.value });
|
|
525
|
+
break;
|
|
526
|
+
case "transcript.final":
|
|
527
|
+
send({ type: "ADD_MESSAGE", role: "user", content: data.text });
|
|
528
|
+
onMessageCallbackRef.current?.({ role: "user", content: data.text });
|
|
529
|
+
break;
|
|
530
|
+
case "assistant.message":
|
|
531
|
+
send({ type: "ADD_MESSAGE", role: "assistant", content: data.message.content });
|
|
532
|
+
onMessageCallbackRef.current?.({ role: "assistant", content: data.message.content });
|
|
533
|
+
break;
|
|
534
|
+
case "assistant.partial":
|
|
535
|
+
break;
|
|
536
|
+
case "error":
|
|
537
|
+
console.error("[Voice] Sending SET_ERROR (Server)", data.reason);
|
|
538
|
+
send({ type: "SET_ERROR", error: data.reason });
|
|
539
|
+
break;
|
|
540
|
+
case "feedback":
|
|
541
|
+
onFeedbackCallbackRef.current?.(data.message);
|
|
542
|
+
break;
|
|
543
|
+
case "tool.call.start":
|
|
544
|
+
send({ type: "TOOL_CALL_START", toolName: data.toolName });
|
|
545
|
+
break;
|
|
546
|
+
case "tool.call.end":
|
|
547
|
+
send({ type: "TOOL_CALL_END", toolName: data.toolName });
|
|
548
|
+
break;
|
|
549
|
+
}
|
|
550
|
+
};
|
|
551
|
+
const queueAudio = (buffer) => {
|
|
552
|
+
audioQueueRef.current.push(buffer);
|
|
553
|
+
if (!isPlaybackLoopRunning.current) {
|
|
554
|
+
playQueue();
|
|
555
|
+
}
|
|
556
|
+
};
|
|
557
|
+
const playQueue = async () => {
|
|
558
|
+
isPlaybackLoopRunning.current = true;
|
|
559
|
+
while (audioQueueRef.current.length > 0) {
|
|
560
|
+
send({ type: "AUDIO_PLAYBACK_START" });
|
|
561
|
+
const buffer = audioQueueRef.current.shift();
|
|
562
|
+
if (!buffer) continue;
|
|
563
|
+
try {
|
|
564
|
+
const ctx = audioContextRef.current;
|
|
565
|
+
const decoded = await ctx.decodeAudioData(buffer.slice(0));
|
|
566
|
+
await new Promise((resolve) => {
|
|
567
|
+
const source = ctx.createBufferSource();
|
|
568
|
+
source.buffer = decoded;
|
|
569
|
+
source.connect(ctx.destination);
|
|
570
|
+
source.onended = () => resolve();
|
|
571
|
+
source.start(0);
|
|
572
|
+
});
|
|
573
|
+
} catch (e) {
|
|
574
|
+
console.warn("[Voice] Decode failed, trying fallback", e);
|
|
575
|
+
try {
|
|
576
|
+
const blob = new Blob([buffer], { type: "audio/wav" });
|
|
577
|
+
const url = URL.createObjectURL(blob);
|
|
578
|
+
const audio = new Audio(url);
|
|
579
|
+
await new Promise((resolve, reject) => {
|
|
580
|
+
audio.onended = () => {
|
|
581
|
+
URL.revokeObjectURL(url);
|
|
582
|
+
resolve();
|
|
583
|
+
};
|
|
584
|
+
audio.onerror = reject;
|
|
585
|
+
audio.play().catch(reject);
|
|
586
|
+
});
|
|
587
|
+
} catch (err) {
|
|
588
|
+
console.error("[Voice] Playback failed completely", err);
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
send({ type: "AUDIO_PLAYBACK_END" });
|
|
593
|
+
isPlaybackLoopRunning.current = false;
|
|
594
|
+
};
|
|
595
|
+
const disconnect = useCallback(() => {
|
|
596
|
+
if (wsRef.current) {
|
|
597
|
+
wsRef.current.close();
|
|
598
|
+
wsRef.current = null;
|
|
599
|
+
}
|
|
600
|
+
send({ type: "DISCONNECT" });
|
|
601
|
+
}, [send]);
|
|
602
|
+
const cancel = useCallback(() => {
|
|
603
|
+
const ws = wsRef.current;
|
|
604
|
+
if (ws && ws.readyState === WebSocket.OPEN) {
|
|
605
|
+
ws.send(JSON.stringify({ type: "reset" }));
|
|
606
|
+
}
|
|
607
|
+
send({ type: "CANCEL" });
|
|
608
|
+
}, [send]);
|
|
609
|
+
return {
|
|
610
|
+
...state,
|
|
611
|
+
vadListening: vad.listening,
|
|
612
|
+
vadLoading: vad.loading,
|
|
613
|
+
vadErrored: vad.errored,
|
|
614
|
+
userSpeaking: vad.userSpeaking,
|
|
615
|
+
connect,
|
|
616
|
+
disconnect,
|
|
617
|
+
sendMessage,
|
|
618
|
+
cancel
|
|
619
|
+
};
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
export { clientMachine, createBlobUrl, useVoiceSession };
|
|
623
|
+
//# sourceMappingURL=index.js.map
|
|
624
|
+
//# sourceMappingURL=index.js.map
|