@jchaffin/voicekit 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +369 -0
- package/dist/adapters/deepgram.d.mts +43 -0
- package/dist/adapters/deepgram.d.ts +43 -0
- package/dist/adapters/deepgram.js +216 -0
- package/dist/adapters/deepgram.mjs +162 -0
- package/dist/adapters/elevenlabs.d.mts +41 -0
- package/dist/adapters/elevenlabs.d.ts +41 -0
- package/dist/adapters/elevenlabs.js +304 -0
- package/dist/adapters/elevenlabs.mjs +250 -0
- package/dist/adapters/livekit.d.mts +44 -0
- package/dist/adapters/livekit.d.ts +44 -0
- package/dist/adapters/livekit.js +225 -0
- package/dist/adapters/livekit.mjs +161 -0
- package/dist/adapters/openai.d.mts +41 -0
- package/dist/adapters/openai.d.ts +41 -0
- package/dist/adapters/openai.js +350 -0
- package/dist/adapters/openai.mjs +294 -0
- package/dist/chunk-22WLZIXO.mjs +33 -0
- package/dist/chunk-T3II3DRG.mjs +178 -0
- package/dist/chunk-UZ2VGPZD.mjs +33 -0
- package/dist/chunk-Y6FXYEAI.mjs +10 -0
- package/dist/index.d.mts +693 -0
- package/dist/index.d.ts +693 -0
- package/dist/index.js +1838 -0
- package/dist/index.mjs +1593 -0
- package/dist/server.d.mts +80 -0
- package/dist/server.d.ts +80 -0
- package/dist/server.js +147 -0
- package/dist/server.mjs +119 -0
- package/dist/types-DY31oVB1.d.mts +150 -0
- package/dist/types-DY31oVB1.d.ts +150 -0
- package/dist/types-mThnXW9S.d.mts +150 -0
- package/dist/types-mThnXW9S.d.ts +150 -0
- package/dist/types-uLnzb8NE.d.mts +150 -0
- package/dist/types-uLnzb8NE.d.ts +150 -0
- package/package.json +100 -0
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,1593 @@
|
|
|
1
|
+
import {
|
|
2
|
+
TOOL_RESULT_EVENT,
|
|
3
|
+
createAPITool,
|
|
4
|
+
createEventTool,
|
|
5
|
+
createNavigationTool,
|
|
6
|
+
createRAGTool,
|
|
7
|
+
createSearchTool,
|
|
8
|
+
defineTool
|
|
9
|
+
} from "./chunk-T3II3DRG.mjs";
|
|
10
|
+
import {
|
|
11
|
+
EventEmitter
|
|
12
|
+
} from "./chunk-22WLZIXO.mjs";
|
|
13
|
+
|
|
14
|
+
// src/VoiceProvider.tsx
|
|
15
|
+
import {
|
|
16
|
+
createContext,
|
|
17
|
+
useContext,
|
|
18
|
+
useState,
|
|
19
|
+
useRef,
|
|
20
|
+
useCallback,
|
|
21
|
+
useEffect
|
|
22
|
+
} from "react";
|
|
23
|
+
import { jsx } from "react/jsx-runtime";
|
|
24
|
+
var VoiceContext = createContext(null);
|
|
25
|
+
function VoiceProvider({
|
|
26
|
+
children,
|
|
27
|
+
adapter,
|
|
28
|
+
agent,
|
|
29
|
+
sessionEndpoint = "/api/session",
|
|
30
|
+
model,
|
|
31
|
+
language = "en",
|
|
32
|
+
onStatusChange,
|
|
33
|
+
onTranscriptUpdate,
|
|
34
|
+
onToolCall,
|
|
35
|
+
onError
|
|
36
|
+
}) {
|
|
37
|
+
const [status, setStatus] = useState("DISCONNECTED");
|
|
38
|
+
const [transcript, setTranscript] = useState([]);
|
|
39
|
+
const [isMuted, setIsMuted] = useState(false);
|
|
40
|
+
const sessionRef = useRef(null);
|
|
41
|
+
const audioRef = useRef(null);
|
|
42
|
+
const statusRef = useRef("DISCONNECTED");
|
|
43
|
+
const currentMsgIdRef = useRef(null);
|
|
44
|
+
useEffect(() => {
|
|
45
|
+
statusRef.current = status;
|
|
46
|
+
}, [status]);
|
|
47
|
+
useEffect(() => {
|
|
48
|
+
if (typeof window === "undefined") return;
|
|
49
|
+
const audio = document.createElement("audio");
|
|
50
|
+
audio.autoplay = true;
|
|
51
|
+
audio.style.display = "none";
|
|
52
|
+
document.body.appendChild(audio);
|
|
53
|
+
audioRef.current = audio;
|
|
54
|
+
return () => {
|
|
55
|
+
try {
|
|
56
|
+
audio.pause();
|
|
57
|
+
audio.srcObject = null;
|
|
58
|
+
audio.remove();
|
|
59
|
+
} catch {
|
|
60
|
+
}
|
|
61
|
+
};
|
|
62
|
+
}, []);
|
|
63
|
+
const updateStatus = useCallback((newStatus) => {
|
|
64
|
+
setStatus(newStatus);
|
|
65
|
+
onStatusChange?.(newStatus);
|
|
66
|
+
}, [onStatusChange]);
|
|
67
|
+
const addMessage = useCallback((role, text, id) => {
|
|
68
|
+
const message = {
|
|
69
|
+
id: id || crypto.randomUUID(),
|
|
70
|
+
role,
|
|
71
|
+
text,
|
|
72
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
73
|
+
status: "pending"
|
|
74
|
+
};
|
|
75
|
+
setTranscript((prev) => {
|
|
76
|
+
const updated = [...prev, message];
|
|
77
|
+
onTranscriptUpdate?.(updated);
|
|
78
|
+
return updated;
|
|
79
|
+
});
|
|
80
|
+
return message.id;
|
|
81
|
+
}, [onTranscriptUpdate]);
|
|
82
|
+
const updateMessage = useCallback((id, text, append = false) => {
|
|
83
|
+
setTranscript((prev) => {
|
|
84
|
+
const updated = prev.map(
|
|
85
|
+
(m) => m.id === id ? { ...m, text: append ? m.text + text : text } : m
|
|
86
|
+
);
|
|
87
|
+
onTranscriptUpdate?.(updated);
|
|
88
|
+
return updated;
|
|
89
|
+
});
|
|
90
|
+
}, [onTranscriptUpdate]);
|
|
91
|
+
const completeMessage = useCallback((id) => {
|
|
92
|
+
setTranscript((prev) => {
|
|
93
|
+
const updated = prev.map(
|
|
94
|
+
(m) => m.id === id ? { ...m, status: "complete" } : m
|
|
95
|
+
);
|
|
96
|
+
onTranscriptUpdate?.(updated);
|
|
97
|
+
return updated;
|
|
98
|
+
});
|
|
99
|
+
}, [onTranscriptUpdate]);
|
|
100
|
+
const wireSessionEvents = useCallback((session) => {
|
|
101
|
+
session.on("user_transcript", (data) => {
|
|
102
|
+
if (data.isFinal) {
|
|
103
|
+
addMessage("user", data.text || data.delta || "");
|
|
104
|
+
}
|
|
105
|
+
});
|
|
106
|
+
session.on("assistant_transcript", (data) => {
|
|
107
|
+
if (data.isFinal) {
|
|
108
|
+
if (currentMsgIdRef.current) {
|
|
109
|
+
completeMessage(currentMsgIdRef.current);
|
|
110
|
+
currentMsgIdRef.current = null;
|
|
111
|
+
}
|
|
112
|
+
} else if (data.delta) {
|
|
113
|
+
if (!currentMsgIdRef.current) {
|
|
114
|
+
currentMsgIdRef.current = addMessage("assistant", data.delta);
|
|
115
|
+
} else {
|
|
116
|
+
updateMessage(currentMsgIdRef.current, data.delta, true);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
});
|
|
120
|
+
session.on("tool_call_end", (name, input, output) => {
|
|
121
|
+
onToolCall?.(name, input, output);
|
|
122
|
+
});
|
|
123
|
+
session.on("error", (error) => {
|
|
124
|
+
console.error("VoiceKit session error:", error);
|
|
125
|
+
onError?.(error);
|
|
126
|
+
});
|
|
127
|
+
}, [addMessage, updateMessage, completeMessage, onToolCall, onError]);
|
|
128
|
+
const fetchToken = useCallback(async () => {
|
|
129
|
+
try {
|
|
130
|
+
const res = await fetch(sessionEndpoint, { method: "POST" });
|
|
131
|
+
if (!res.ok) return null;
|
|
132
|
+
const data = await res.json();
|
|
133
|
+
return data.ephemeralKey || data.token || null;
|
|
134
|
+
} catch {
|
|
135
|
+
return null;
|
|
136
|
+
}
|
|
137
|
+
}, [sessionEndpoint]);
|
|
138
|
+
const connect = useCallback(async () => {
|
|
139
|
+
if (statusRef.current !== "DISCONNECTED") return;
|
|
140
|
+
if (!audioRef.current) return;
|
|
141
|
+
updateStatus("CONNECTING");
|
|
142
|
+
try {
|
|
143
|
+
const token = await fetchToken();
|
|
144
|
+
if (!token) {
|
|
145
|
+
onError?.(new Error("Failed to get session key"));
|
|
146
|
+
updateStatus("DISCONNECTED");
|
|
147
|
+
return;
|
|
148
|
+
}
|
|
149
|
+
const session = adapter.createSession(agent, { model, language });
|
|
150
|
+
sessionRef.current = session;
|
|
151
|
+
wireSessionEvents(session);
|
|
152
|
+
await session.connect({
|
|
153
|
+
authToken: token,
|
|
154
|
+
audioElement: audioRef.current
|
|
155
|
+
});
|
|
156
|
+
updateStatus("CONNECTED");
|
|
157
|
+
setTimeout(() => {
|
|
158
|
+
session.sendRawEvent?.({ type: "response.create" });
|
|
159
|
+
}, 500);
|
|
160
|
+
} catch (error) {
|
|
161
|
+
console.error("VoiceKit connection failed:", error);
|
|
162
|
+
onError?.(error instanceof Error ? error : new Error(String(error)));
|
|
163
|
+
updateStatus("DISCONNECTED");
|
|
164
|
+
}
|
|
165
|
+
}, [adapter, agent, model, language, fetchToken, wireSessionEvents, updateStatus, onError]);
|
|
166
|
+
const disconnect = useCallback(async () => {
|
|
167
|
+
if (sessionRef.current) {
|
|
168
|
+
try {
|
|
169
|
+
await sessionRef.current.disconnect();
|
|
170
|
+
} catch {
|
|
171
|
+
}
|
|
172
|
+
sessionRef.current = null;
|
|
173
|
+
}
|
|
174
|
+
currentMsgIdRef.current = null;
|
|
175
|
+
updateStatus("DISCONNECTED");
|
|
176
|
+
}, [updateStatus]);
|
|
177
|
+
const sendMessage = useCallback((text) => {
|
|
178
|
+
if (!sessionRef.current || statusRef.current !== "CONNECTED") return;
|
|
179
|
+
sessionRef.current.interrupt();
|
|
180
|
+
sessionRef.current.sendMessage(text);
|
|
181
|
+
}, []);
|
|
182
|
+
const interrupt = useCallback(() => {
|
|
183
|
+
sessionRef.current?.interrupt();
|
|
184
|
+
}, []);
|
|
185
|
+
const mute = useCallback((muted) => {
|
|
186
|
+
setIsMuted(muted);
|
|
187
|
+
sessionRef.current?.mute(muted);
|
|
188
|
+
if (audioRef.current) {
|
|
189
|
+
audioRef.current.muted = muted;
|
|
190
|
+
}
|
|
191
|
+
}, []);
|
|
192
|
+
const clearTranscript = useCallback(() => {
|
|
193
|
+
setTranscript([]);
|
|
194
|
+
onTranscriptUpdate?.([]);
|
|
195
|
+
}, [onTranscriptUpdate]);
|
|
196
|
+
useEffect(() => {
|
|
197
|
+
return () => {
|
|
198
|
+
try {
|
|
199
|
+
sessionRef.current?.disconnect();
|
|
200
|
+
} catch {
|
|
201
|
+
}
|
|
202
|
+
};
|
|
203
|
+
}, []);
|
|
204
|
+
const value = {
|
|
205
|
+
status,
|
|
206
|
+
connect,
|
|
207
|
+
disconnect,
|
|
208
|
+
transcript,
|
|
209
|
+
clearTranscript,
|
|
210
|
+
sendMessage,
|
|
211
|
+
interrupt,
|
|
212
|
+
mute,
|
|
213
|
+
isMuted,
|
|
214
|
+
agent
|
|
215
|
+
};
|
|
216
|
+
return /* @__PURE__ */ jsx(VoiceContext.Provider, { value, children });
|
|
217
|
+
}
|
|
218
|
+
function useVoice() {
|
|
219
|
+
const context = useContext(VoiceContext);
|
|
220
|
+
if (!context) {
|
|
221
|
+
throw new Error("useVoice must be used within a VoiceProvider");
|
|
222
|
+
}
|
|
223
|
+
return context;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// src/components/VoiceChat.tsx
|
|
227
|
+
import { useRef as useRef2, useEffect as useEffect2 } from "react";
|
|
228
|
+
import { Fragment, jsx as jsx2, jsxs } from "react/jsx-runtime";
|
|
229
|
+
function Message({ message, userClassName, assistantClassName }) {
|
|
230
|
+
const isUser = message.role === "user";
|
|
231
|
+
return /* @__PURE__ */ jsx2("div", { className: `flex ${isUser ? "justify-end" : "justify-start"}`, children: /* @__PURE__ */ jsx2(
|
|
232
|
+
"div",
|
|
233
|
+
{
|
|
234
|
+
className: `max-w-[80%] rounded-2xl px-4 py-2 ${isUser ? userClassName || "bg-blue-500 text-white rounded-br-md" : assistantClassName || "bg-gray-100 dark:bg-gray-800 text-gray-900 dark:text-gray-100 rounded-bl-md"}`,
|
|
235
|
+
children: /* @__PURE__ */ jsx2("p", { className: "text-sm whitespace-pre-wrap", children: message.text })
|
|
236
|
+
}
|
|
237
|
+
) });
|
|
238
|
+
}
|
|
239
|
+
function Transcript({
|
|
240
|
+
messages,
|
|
241
|
+
userClassName,
|
|
242
|
+
assistantClassName,
|
|
243
|
+
emptyMessage = "Start a conversation..."
|
|
244
|
+
}) {
|
|
245
|
+
const containerRef = useRef2(null);
|
|
246
|
+
const userScrolledUp = useRef2(false);
|
|
247
|
+
useEffect2(() => {
|
|
248
|
+
const container = containerRef.current;
|
|
249
|
+
if (!container) return;
|
|
250
|
+
const handleScroll = () => {
|
|
251
|
+
const isAtBottom = container.scrollHeight - container.scrollTop - container.clientHeight < 50;
|
|
252
|
+
userScrolledUp.current = !isAtBottom;
|
|
253
|
+
};
|
|
254
|
+
container.addEventListener("scroll", handleScroll);
|
|
255
|
+
return () => container.removeEventListener("scroll", handleScroll);
|
|
256
|
+
}, []);
|
|
257
|
+
useEffect2(() => {
|
|
258
|
+
if (containerRef.current && messages.length > 0 && !userScrolledUp.current) {
|
|
259
|
+
containerRef.current.scrollTo({
|
|
260
|
+
top: containerRef.current.scrollHeight,
|
|
261
|
+
behavior: "smooth"
|
|
262
|
+
});
|
|
263
|
+
}
|
|
264
|
+
}, [messages]);
|
|
265
|
+
if (messages.length === 0) {
|
|
266
|
+
return /* @__PURE__ */ jsx2("div", { className: "flex items-center justify-center h-full text-gray-500", children: emptyMessage });
|
|
267
|
+
}
|
|
268
|
+
return /* @__PURE__ */ jsx2("div", { ref: containerRef, className: "flex flex-col gap-3 overflow-y-auto h-full p-4", children: messages.map((msg) => /* @__PURE__ */ jsx2(
|
|
269
|
+
Message,
|
|
270
|
+
{
|
|
271
|
+
message: msg,
|
|
272
|
+
userClassName,
|
|
273
|
+
assistantClassName
|
|
274
|
+
},
|
|
275
|
+
msg.id
|
|
276
|
+
)) });
|
|
277
|
+
}
|
|
278
|
+
function StatusIndicator({
|
|
279
|
+
className = "",
|
|
280
|
+
connectedText = "Connected",
|
|
281
|
+
connectingText = "Connecting...",
|
|
282
|
+
disconnectedText = "Disconnected"
|
|
283
|
+
}) {
|
|
284
|
+
const { status } = useVoice();
|
|
285
|
+
const statusConfig = {
|
|
286
|
+
CONNECTED: { color: "bg-green-500", text: connectedText, pulse: true },
|
|
287
|
+
CONNECTING: { color: "bg-yellow-500", text: connectingText, pulse: true },
|
|
288
|
+
DISCONNECTED: { color: "bg-gray-400", text: disconnectedText, pulse: false }
|
|
289
|
+
};
|
|
290
|
+
const config = statusConfig[status];
|
|
291
|
+
return /* @__PURE__ */ jsxs("div", { className: `flex items-center gap-2 ${className}`, children: [
|
|
292
|
+
/* @__PURE__ */ jsx2("div", { className: `w-2 h-2 rounded-full ${config.color} ${config.pulse ? "animate-pulse" : ""}` }),
|
|
293
|
+
/* @__PURE__ */ jsx2("span", { className: "text-sm", children: config.text })
|
|
294
|
+
] });
|
|
295
|
+
}
|
|
296
|
+
function ConnectButton({
|
|
297
|
+
className = "",
|
|
298
|
+
connectText = "Start",
|
|
299
|
+
disconnectText = "End",
|
|
300
|
+
connectingText = "Connecting...",
|
|
301
|
+
children
|
|
302
|
+
}) {
|
|
303
|
+
const { status, connect, disconnect } = useVoice();
|
|
304
|
+
const handleClick = () => {
|
|
305
|
+
if (status === "CONNECTED") {
|
|
306
|
+
disconnect();
|
|
307
|
+
} else if (status === "DISCONNECTED") {
|
|
308
|
+
connect();
|
|
309
|
+
}
|
|
310
|
+
};
|
|
311
|
+
const text = status === "CONNECTED" ? disconnectText : status === "CONNECTING" ? connectingText : connectText;
|
|
312
|
+
return /* @__PURE__ */ jsx2(
|
|
313
|
+
"button",
|
|
314
|
+
{
|
|
315
|
+
onClick: handleClick,
|
|
316
|
+
disabled: status === "CONNECTING",
|
|
317
|
+
className: className || `px-4 py-2 rounded-lg font-medium transition-colors ${status === "CONNECTED" ? "bg-red-500 hover:bg-red-600 text-white" : status === "CONNECTING" ? "bg-gray-300 text-gray-500 cursor-not-allowed" : "bg-blue-500 hover:bg-blue-600 text-white"}`,
|
|
318
|
+
children: children || text
|
|
319
|
+
}
|
|
320
|
+
);
|
|
321
|
+
}
|
|
322
|
+
function ChatInput({
|
|
323
|
+
placeholder = "Type a message...",
|
|
324
|
+
className = "",
|
|
325
|
+
buttonText = "Send",
|
|
326
|
+
onSend
|
|
327
|
+
}) {
|
|
328
|
+
const { sendMessage, status } = useVoice();
|
|
329
|
+
const inputRef = useRef2(null);
|
|
330
|
+
const handleSubmit = (e) => {
|
|
331
|
+
e.preventDefault();
|
|
332
|
+
const text = inputRef.current?.value.trim();
|
|
333
|
+
if (!text) return;
|
|
334
|
+
if (onSend) {
|
|
335
|
+
onSend(text);
|
|
336
|
+
} else {
|
|
337
|
+
sendMessage(text);
|
|
338
|
+
}
|
|
339
|
+
if (inputRef.current) {
|
|
340
|
+
inputRef.current.value = "";
|
|
341
|
+
}
|
|
342
|
+
};
|
|
343
|
+
const disabled = status !== "CONNECTED";
|
|
344
|
+
return /* @__PURE__ */ jsxs("form", { onSubmit: handleSubmit, className: `flex gap-2 ${className}`, children: [
|
|
345
|
+
/* @__PURE__ */ jsx2(
|
|
346
|
+
"input",
|
|
347
|
+
{
|
|
348
|
+
ref: inputRef,
|
|
349
|
+
type: "text",
|
|
350
|
+
placeholder,
|
|
351
|
+
disabled,
|
|
352
|
+
className: "flex-1 px-4 py-2 rounded-lg border border-gray-300 dark:border-gray-600 \n bg-white dark:bg-gray-800 text-gray-900 dark:text-gray-100\n focus:outline-none focus:ring-2 focus:ring-blue-500\n disabled:opacity-50 disabled:cursor-not-allowed"
|
|
353
|
+
}
|
|
354
|
+
),
|
|
355
|
+
/* @__PURE__ */ jsx2(
|
|
356
|
+
"button",
|
|
357
|
+
{
|
|
358
|
+
type: "submit",
|
|
359
|
+
disabled,
|
|
360
|
+
className: "px-4 py-2 bg-blue-500 text-white rounded-lg font-medium\n hover:bg-blue-600 disabled:opacity-50 disabled:cursor-not-allowed",
|
|
361
|
+
children: buttonText
|
|
362
|
+
}
|
|
363
|
+
)
|
|
364
|
+
] });
|
|
365
|
+
}
|
|
366
|
+
function VoiceChat({
|
|
367
|
+
className = "",
|
|
368
|
+
height = "400px",
|
|
369
|
+
showHeader = true,
|
|
370
|
+
showInput = true,
|
|
371
|
+
emptyState,
|
|
372
|
+
header,
|
|
373
|
+
footer
|
|
374
|
+
}) {
|
|
375
|
+
const { status, transcript, connect, disconnect, clearTranscript } = useVoice();
|
|
376
|
+
const defaultEmptyState = /* @__PURE__ */ jsxs("div", { className: "flex flex-col items-center justify-center gap-4", children: [
|
|
377
|
+
/* @__PURE__ */ jsx2(ConnectButton, {}),
|
|
378
|
+
/* @__PURE__ */ jsx2("p", { className: "text-sm text-gray-500", children: status === "CONNECTING" ? "Connecting..." : "Click to start a conversation" })
|
|
379
|
+
] });
|
|
380
|
+
return /* @__PURE__ */ jsxs("div", { className: `flex flex-col rounded-xl border border-gray-200 dark:border-gray-700
|
|
381
|
+
bg-white dark:bg-gray-900 overflow-hidden ${className}`, children: [
|
|
382
|
+
showHeader && /* @__PURE__ */ jsx2("div", { className: "flex items-center justify-between px-4 py-3 border-b border-gray-200 dark:border-gray-700", children: header || /* @__PURE__ */ jsxs(Fragment, { children: [
|
|
383
|
+
/* @__PURE__ */ jsx2(StatusIndicator, {}),
|
|
384
|
+
/* @__PURE__ */ jsxs("div", { className: "flex gap-2", children: [
|
|
385
|
+
transcript.length > 0 && /* @__PURE__ */ jsx2(
|
|
386
|
+
"button",
|
|
387
|
+
{
|
|
388
|
+
onClick: clearTranscript,
|
|
389
|
+
className: "text-sm text-gray-500 hover:text-gray-700",
|
|
390
|
+
children: "Clear"
|
|
391
|
+
}
|
|
392
|
+
),
|
|
393
|
+
/* @__PURE__ */ jsx2(
|
|
394
|
+
"button",
|
|
395
|
+
{
|
|
396
|
+
onClick: status === "CONNECTED" ? disconnect : connect,
|
|
397
|
+
className: `text-sm font-medium ${status === "CONNECTED" ? "text-red-500 hover:text-red-600" : "text-green-500 hover:text-green-600"}`,
|
|
398
|
+
children: status === "CONNECTED" ? "End" : "Connect"
|
|
399
|
+
}
|
|
400
|
+
)
|
|
401
|
+
] })
|
|
402
|
+
] }) }),
|
|
403
|
+
/* @__PURE__ */ jsx2("div", { style: { height }, className: "overflow-hidden", children: /* @__PURE__ */ jsx2(
|
|
404
|
+
Transcript,
|
|
405
|
+
{
|
|
406
|
+
messages: transcript,
|
|
407
|
+
emptyMessage: emptyState || defaultEmptyState
|
|
408
|
+
}
|
|
409
|
+
) }),
|
|
410
|
+
footer || showInput && status === "CONNECTED" && /* @__PURE__ */ jsx2("div", { className: "p-4 border-t border-gray-200 dark:border-gray-700", children: /* @__PURE__ */ jsx2(ChatInput, {}) })
|
|
411
|
+
] });
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
// src/createAgent.ts
|
|
415
|
+
function createAgent(config) {
|
|
416
|
+
const { name, instructions, tools = [], voice } = config;
|
|
417
|
+
const fullInstructions = `
|
|
418
|
+
${instructions}
|
|
419
|
+
|
|
420
|
+
# Response Guidelines
|
|
421
|
+
- Keep responses concise (2-3 sentences max)
|
|
422
|
+
- Answer questions directly before asking follow-ups
|
|
423
|
+
- Use tools silently without announcing them
|
|
424
|
+
- Speak naturally and conversationally
|
|
425
|
+
`.trim();
|
|
426
|
+
return {
|
|
427
|
+
name,
|
|
428
|
+
instructions: fullInstructions,
|
|
429
|
+
tools,
|
|
430
|
+
voice
|
|
431
|
+
};
|
|
432
|
+
}
|
|
433
|
+
function createAgentFromTemplate(config) {
|
|
434
|
+
const {
|
|
435
|
+
name,
|
|
436
|
+
role,
|
|
437
|
+
personality = "Professional and helpful",
|
|
438
|
+
capabilities = [],
|
|
439
|
+
constraints = [],
|
|
440
|
+
tools = [],
|
|
441
|
+
context = {}
|
|
442
|
+
} = config;
|
|
443
|
+
const capabilitiesSection = capabilities.length > 0 ? `## What You Can Do
|
|
444
|
+
${capabilities.map((c) => `- ${c}`).join("\n")}` : "";
|
|
445
|
+
const constraintsSection = constraints.length > 0 ? `## Constraints
|
|
446
|
+
${constraints.map((c) => `- ${c}`).join("\n")}` : "";
|
|
447
|
+
const contextSection = Object.keys(context).length > 0 ? `## Context
|
|
448
|
+
\`\`\`json
|
|
449
|
+
${JSON.stringify(context, null, 2)}
|
|
450
|
+
\`\`\`` : "";
|
|
451
|
+
const instructions = `
|
|
452
|
+
You are ${name}, ${role}.
|
|
453
|
+
|
|
454
|
+
## Personality
|
|
455
|
+
${personality}
|
|
456
|
+
|
|
457
|
+
${capabilitiesSection}
|
|
458
|
+
|
|
459
|
+
${constraintsSection}
|
|
460
|
+
|
|
461
|
+
${contextSection}
|
|
462
|
+
`.trim();
|
|
463
|
+
return createAgent({
|
|
464
|
+
name,
|
|
465
|
+
instructions,
|
|
466
|
+
tools
|
|
467
|
+
});
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
// src/hooks/toolHooks.ts
|
|
471
|
+
import { useEffect as useEffect3, useCallback as useCallback2, useState as useState2, useRef as useRef3 } from "react";
|
|
472
|
+
function useToolResults() {
|
|
473
|
+
const [results, setResults] = useState2([]);
|
|
474
|
+
useEffect3(() => {
|
|
475
|
+
const handler = (event) => {
|
|
476
|
+
setResults((prev) => [...prev, event.detail]);
|
|
477
|
+
};
|
|
478
|
+
window.addEventListener(TOOL_RESULT_EVENT, handler);
|
|
479
|
+
return () => window.removeEventListener(TOOL_RESULT_EVENT, handler);
|
|
480
|
+
}, []);
|
|
481
|
+
const clear = useCallback2(() => setResults([]), []);
|
|
482
|
+
return {
|
|
483
|
+
results,
|
|
484
|
+
lastResult: results[results.length - 1] || null,
|
|
485
|
+
clear
|
|
486
|
+
};
|
|
487
|
+
}
|
|
488
|
+
function useToolListener(toolName, handler) {
|
|
489
|
+
const handlerRef = useRef3(handler);
|
|
490
|
+
handlerRef.current = handler;
|
|
491
|
+
useEffect3(() => {
|
|
492
|
+
const eventHandler = (event) => {
|
|
493
|
+
if (event.detail.name === toolName) {
|
|
494
|
+
handlerRef.current(event.detail.input, event.detail.result);
|
|
495
|
+
}
|
|
496
|
+
};
|
|
497
|
+
window.addEventListener(TOOL_RESULT_EVENT, eventHandler);
|
|
498
|
+
return () => window.removeEventListener(TOOL_RESULT_EVENT, eventHandler);
|
|
499
|
+
}, [toolName]);
|
|
500
|
+
}
|
|
501
|
+
function useToolResult(toolName) {
|
|
502
|
+
const [state, setState] = useState2(null);
|
|
503
|
+
useEffect3(() => {
|
|
504
|
+
const handler = (event) => {
|
|
505
|
+
if (event.detail.name === toolName) {
|
|
506
|
+
setState({ input: event.detail.input, result: event.detail.result });
|
|
507
|
+
}
|
|
508
|
+
};
|
|
509
|
+
window.addEventListener(TOOL_RESULT_EVENT, handler);
|
|
510
|
+
return () => window.removeEventListener(TOOL_RESULT_EVENT, handler);
|
|
511
|
+
}, [toolName]);
|
|
512
|
+
const clear = useCallback2(() => setState(null), []);
|
|
513
|
+
return {
|
|
514
|
+
input: state?.input ?? null,
|
|
515
|
+
result: state?.result ?? null,
|
|
516
|
+
hasResult: state !== null,
|
|
517
|
+
clear
|
|
518
|
+
};
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
// src/hooks/useAudioRecorder.ts
|
|
522
|
+
import { useRef as useRef4, useCallback as useCallback3 } from "react";
|
|
523
|
+
|
|
524
|
+
// src/utils/audio.ts
|
|
525
|
+
function writeString(view, offset, str) {
|
|
526
|
+
for (let i = 0; i < str.length; i++) {
|
|
527
|
+
view.setUint8(offset + i, str.charCodeAt(i));
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
function floatTo16BitPCM(output, offset, input) {
|
|
531
|
+
for (let i = 0; i < input.length; i++, offset += 2) {
|
|
532
|
+
const s = Math.max(-1, Math.min(1, input[i]));
|
|
533
|
+
output.setInt16(offset, s < 0 ? s * 32768 : s * 32767, true);
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
function encodeWAV(samples, sampleRate) {
|
|
537
|
+
const buffer = new ArrayBuffer(44 + samples.length * 2);
|
|
538
|
+
const view = new DataView(buffer);
|
|
539
|
+
writeString(view, 0, "RIFF");
|
|
540
|
+
view.setUint32(4, 36 + samples.length * 2, true);
|
|
541
|
+
writeString(view, 8, "WAVE");
|
|
542
|
+
writeString(view, 12, "fmt ");
|
|
543
|
+
view.setUint32(16, 16, true);
|
|
544
|
+
view.setUint16(20, 1, true);
|
|
545
|
+
view.setUint16(22, 1, true);
|
|
546
|
+
view.setUint32(24, sampleRate, true);
|
|
547
|
+
view.setUint32(28, sampleRate * 2, true);
|
|
548
|
+
view.setUint16(32, 2, true);
|
|
549
|
+
view.setUint16(34, 16, true);
|
|
550
|
+
writeString(view, 36, "data");
|
|
551
|
+
view.setUint32(40, samples.length * 2, true);
|
|
552
|
+
floatTo16BitPCM(view, 44, samples);
|
|
553
|
+
return buffer;
|
|
554
|
+
}
|
|
555
|
+
async function convertWebMToWav(blob) {
|
|
556
|
+
const arrayBuffer = await blob.arrayBuffer();
|
|
557
|
+
const audioContext = new AudioContext();
|
|
558
|
+
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
|
|
559
|
+
const numChannels = audioBuffer.numberOfChannels;
|
|
560
|
+
const length = audioBuffer.length;
|
|
561
|
+
const combined = new Float32Array(length);
|
|
562
|
+
for (let channel = 0; channel < numChannels; channel++) {
|
|
563
|
+
const channelData = audioBuffer.getChannelData(channel);
|
|
564
|
+
for (let i = 0; i < length; i++) {
|
|
565
|
+
combined[i] += channelData[i];
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
for (let i = 0; i < length; i++) {
|
|
569
|
+
combined[i] /= numChannels;
|
|
570
|
+
}
|
|
571
|
+
const wavBuffer = encodeWAV(combined, audioBuffer.sampleRate);
|
|
572
|
+
return new Blob([wavBuffer], { type: "audio/wav" });
|
|
573
|
+
}
|
|
574
|
+
function audioFormatForCodec(codec) {
|
|
575
|
+
switch (codec.toLowerCase()) {
|
|
576
|
+
case "opus":
|
|
577
|
+
case "pcm":
|
|
578
|
+
return "pcm16";
|
|
579
|
+
case "g711":
|
|
580
|
+
return "g711_ulaw";
|
|
581
|
+
default:
|
|
582
|
+
return "pcm16";
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
function applyCodecPreferences(pc, codec) {
|
|
586
|
+
if (codec === "g711") {
|
|
587
|
+
pc.getTransceivers().forEach((transceiver) => {
|
|
588
|
+
if (transceiver.sender.track?.kind === "audio") {
|
|
589
|
+
transceiver.setCodecPreferences([
|
|
590
|
+
{ mimeType: "audio/PCMU", clockRate: 8e3 },
|
|
591
|
+
{ mimeType: "audio/PCMA", clockRate: 8e3 }
|
|
592
|
+
]);
|
|
593
|
+
}
|
|
594
|
+
});
|
|
595
|
+
}
|
|
596
|
+
return pc;
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
// src/hooks/useAudioRecorder.ts
|
|
600
|
+
function useAudioRecorder() {
|
|
601
|
+
const mediaRecorderRef = useRef4(null);
|
|
602
|
+
const recordedChunksRef = useRef4([]);
|
|
603
|
+
const startRecording = useCallback3(async (stream) => {
|
|
604
|
+
if (mediaRecorderRef.current?.state === "recording") {
|
|
605
|
+
return;
|
|
606
|
+
}
|
|
607
|
+
try {
|
|
608
|
+
const mediaRecorder = new MediaRecorder(stream, { mimeType: "audio/webm" });
|
|
609
|
+
mediaRecorder.ondataavailable = (event) => {
|
|
610
|
+
if (event.data?.size > 0) {
|
|
611
|
+
recordedChunksRef.current.push(event.data);
|
|
612
|
+
}
|
|
613
|
+
};
|
|
614
|
+
mediaRecorder.start();
|
|
615
|
+
mediaRecorderRef.current = mediaRecorder;
|
|
616
|
+
} catch (error) {
|
|
617
|
+
console.error("Failed to start recording:", error);
|
|
618
|
+
throw error;
|
|
619
|
+
}
|
|
620
|
+
}, []);
|
|
621
|
+
const stopRecording = useCallback3(() => {
|
|
622
|
+
if (mediaRecorderRef.current) {
|
|
623
|
+
try {
|
|
624
|
+
mediaRecorderRef.current.requestData();
|
|
625
|
+
} catch {
|
|
626
|
+
}
|
|
627
|
+
try {
|
|
628
|
+
mediaRecorderRef.current.stop();
|
|
629
|
+
} catch {
|
|
630
|
+
}
|
|
631
|
+
mediaRecorderRef.current = null;
|
|
632
|
+
}
|
|
633
|
+
}, []);
|
|
634
|
+
const downloadRecording = useCallback3(async (filename) => {
|
|
635
|
+
if (mediaRecorderRef.current?.state === "recording") {
|
|
636
|
+
mediaRecorderRef.current.requestData();
|
|
637
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
638
|
+
}
|
|
639
|
+
if (recordedChunksRef.current.length === 0) {
|
|
640
|
+
return null;
|
|
641
|
+
}
|
|
642
|
+
const webmBlob = new Blob(recordedChunksRef.current, { type: "audio/webm" });
|
|
643
|
+
try {
|
|
644
|
+
const wavBlob = await convertWebMToWav(webmBlob);
|
|
645
|
+
const url = URL.createObjectURL(wavBlob);
|
|
646
|
+
const now = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
647
|
+
const name = filename || `voice_recording_${now}.wav`;
|
|
648
|
+
const a = document.createElement("a");
|
|
649
|
+
a.style.display = "none";
|
|
650
|
+
a.href = url;
|
|
651
|
+
a.download = name;
|
|
652
|
+
document.body.appendChild(a);
|
|
653
|
+
a.click();
|
|
654
|
+
document.body.removeChild(a);
|
|
655
|
+
setTimeout(() => URL.revokeObjectURL(url), 100);
|
|
656
|
+
return wavBlob;
|
|
657
|
+
} catch (error) {
|
|
658
|
+
console.error("Failed to convert recording:", error);
|
|
659
|
+
throw error;
|
|
660
|
+
}
|
|
661
|
+
}, []);
|
|
662
|
+
const getRecordingBlob = useCallback3(async () => {
|
|
663
|
+
if (recordedChunksRef.current.length === 0) {
|
|
664
|
+
return null;
|
|
665
|
+
}
|
|
666
|
+
const webmBlob = new Blob(recordedChunksRef.current, { type: "audio/webm" });
|
|
667
|
+
return convertWebMToWav(webmBlob);
|
|
668
|
+
}, []);
|
|
669
|
+
const clearRecording = useCallback3(() => {
|
|
670
|
+
recordedChunksRef.current = [];
|
|
671
|
+
}, []);
|
|
672
|
+
return {
|
|
673
|
+
startRecording,
|
|
674
|
+
stopRecording,
|
|
675
|
+
downloadRecording,
|
|
676
|
+
getRecordingBlob,
|
|
677
|
+
clearRecording,
|
|
678
|
+
isRecording: () => mediaRecorderRef.current?.state === "recording"
|
|
679
|
+
};
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
// src/hooks/useRealtimeSession.ts
|
|
683
|
+
import { useCallback as useCallback6, useRef as useRef6, useState as useState5, useEffect as useEffect5 } from "react";
|
|
684
|
+
|
|
685
|
+
// src/contexts/EventContext.tsx
|
|
686
|
+
import { createContext as createContext2, useContext as useContext2, useState as useState3, useCallback as useCallback4 } from "react";
|
|
687
|
+
import { jsx as jsx3 } from "react/jsx-runtime";
|
|
688
|
+
var EventContext = createContext2(void 0);
|
|
689
|
+
var EventProvider = ({ children }) => {
|
|
690
|
+
const [loggedEvents, setLoggedEvents] = useState3([]);
|
|
691
|
+
const addLoggedEvent = useCallback4(
|
|
692
|
+
(direction, eventName, eventData) => {
|
|
693
|
+
const id = typeof eventData.event_id === "number" ? eventData.event_id : Date.now();
|
|
694
|
+
setLoggedEvents((prev) => [
|
|
695
|
+
...prev,
|
|
696
|
+
{
|
|
697
|
+
id,
|
|
698
|
+
direction,
|
|
699
|
+
eventName,
|
|
700
|
+
eventData,
|
|
701
|
+
timestamp: (/* @__PURE__ */ new Date()).toLocaleTimeString(),
|
|
702
|
+
expanded: false
|
|
703
|
+
}
|
|
704
|
+
]);
|
|
705
|
+
},
|
|
706
|
+
[]
|
|
707
|
+
);
|
|
708
|
+
const logClientEvent = useCallback4(
|
|
709
|
+
(eventObj, eventNameSuffix = "") => {
|
|
710
|
+
const name = `${eventObj.type || ""} ${eventNameSuffix || ""}`.trim();
|
|
711
|
+
addLoggedEvent("client", name, eventObj);
|
|
712
|
+
},
|
|
713
|
+
[addLoggedEvent]
|
|
714
|
+
);
|
|
715
|
+
const logServerEvent = useCallback4(
|
|
716
|
+
(eventObj, eventNameSuffix = "") => {
|
|
717
|
+
const name = `${eventObj.type || ""} ${eventNameSuffix || ""}`.trim();
|
|
718
|
+
addLoggedEvent("server", name, eventObj);
|
|
719
|
+
},
|
|
720
|
+
[addLoggedEvent]
|
|
721
|
+
);
|
|
722
|
+
const logHistoryItem = useCallback4(
|
|
723
|
+
(item) => {
|
|
724
|
+
let eventName = item.type;
|
|
725
|
+
if (item.type === "message") {
|
|
726
|
+
eventName = `${item.role}.${item.status || "unknown"}`;
|
|
727
|
+
}
|
|
728
|
+
if (item.type === "function_call") {
|
|
729
|
+
eventName = `function.${item.name || "unknown"}.${item.status || "unknown"}`;
|
|
730
|
+
}
|
|
731
|
+
addLoggedEvent("server", eventName, item);
|
|
732
|
+
},
|
|
733
|
+
[addLoggedEvent]
|
|
734
|
+
);
|
|
735
|
+
const toggleExpand = useCallback4((id) => {
|
|
736
|
+
setLoggedEvents(
|
|
737
|
+
(prev) => prev.map((log) => log.id === id ? { ...log, expanded: !log.expanded } : log)
|
|
738
|
+
);
|
|
739
|
+
}, []);
|
|
740
|
+
const clearEvents = useCallback4(() => {
|
|
741
|
+
setLoggedEvents([]);
|
|
742
|
+
}, []);
|
|
743
|
+
return /* @__PURE__ */ jsx3(
|
|
744
|
+
EventContext.Provider,
|
|
745
|
+
{
|
|
746
|
+
value: { loggedEvents, logClientEvent, logServerEvent, logHistoryItem, toggleExpand, clearEvents },
|
|
747
|
+
children
|
|
748
|
+
}
|
|
749
|
+
);
|
|
750
|
+
};
|
|
751
|
+
function useEvent() {
|
|
752
|
+
const context = useContext2(EventContext);
|
|
753
|
+
if (!context) {
|
|
754
|
+
throw new Error("useEvent must be used within an EventProvider");
|
|
755
|
+
}
|
|
756
|
+
return context;
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
// src/hooks/useSessionHistory.ts
|
|
760
|
+
import { useRef as useRef5 } from "react";
|
|
761
|
+
|
|
762
|
+
// src/contexts/TranscriptContext.tsx
|
|
763
|
+
import {
|
|
764
|
+
createContext as createContext3,
|
|
765
|
+
useContext as useContext3,
|
|
766
|
+
useState as useState4,
|
|
767
|
+
useCallback as useCallback5
|
|
768
|
+
} from "react";
|
|
769
|
+
import { jsx as jsx4 } from "react/jsx-runtime";
|
|
770
|
+
var TranscriptContext = createContext3(void 0);
|
|
771
|
+
function newTimestampPretty() {
|
|
772
|
+
return (/* @__PURE__ */ new Date()).toLocaleTimeString([], {
|
|
773
|
+
hour12: false,
|
|
774
|
+
hour: "2-digit",
|
|
775
|
+
minute: "2-digit",
|
|
776
|
+
second: "2-digit"
|
|
777
|
+
});
|
|
778
|
+
}
|
|
779
|
+
function generateId() {
|
|
780
|
+
return Math.random().toString(36).substring(2, 15);
|
|
781
|
+
}
|
|
782
|
+
var TranscriptProvider = ({ children }) => {
|
|
783
|
+
const [transcriptItems, setTranscriptItems] = useState4([]);
|
|
784
|
+
const addTranscriptMessage = useCallback5(
|
|
785
|
+
(itemId, role, text = "", isHidden = false) => {
|
|
786
|
+
setTranscriptItems((prev) => {
|
|
787
|
+
if (prev.some((i) => i.itemId === itemId)) return prev;
|
|
788
|
+
return [
|
|
789
|
+
...prev,
|
|
790
|
+
{
|
|
791
|
+
itemId,
|
|
792
|
+
type: "MESSAGE",
|
|
793
|
+
role,
|
|
794
|
+
title: text,
|
|
795
|
+
expanded: false,
|
|
796
|
+
timestamp: newTimestampPretty(),
|
|
797
|
+
createdAtMs: Date.now(),
|
|
798
|
+
status: "IN_PROGRESS",
|
|
799
|
+
isHidden
|
|
800
|
+
}
|
|
801
|
+
];
|
|
802
|
+
});
|
|
803
|
+
},
|
|
804
|
+
[]
|
|
805
|
+
);
|
|
806
|
+
const updateTranscriptMessage = useCallback5(
|
|
807
|
+
(itemId, newText, append = false) => {
|
|
808
|
+
setTranscriptItems(
|
|
809
|
+
(prev) => prev.map((item) => {
|
|
810
|
+
if (item.itemId === itemId && item.type === "MESSAGE") {
|
|
811
|
+
return {
|
|
812
|
+
...item,
|
|
813
|
+
title: append ? (item.title ?? "") + newText : newText
|
|
814
|
+
};
|
|
815
|
+
}
|
|
816
|
+
return item;
|
|
817
|
+
})
|
|
818
|
+
);
|
|
819
|
+
},
|
|
820
|
+
[]
|
|
821
|
+
);
|
|
822
|
+
const addTranscriptBreadcrumb = useCallback5(
|
|
823
|
+
(title, data) => {
|
|
824
|
+
setTranscriptItems((prev) => [
|
|
825
|
+
...prev,
|
|
826
|
+
{
|
|
827
|
+
itemId: `breadcrumb-${generateId()}`,
|
|
828
|
+
type: "BREADCRUMB",
|
|
829
|
+
title,
|
|
830
|
+
data,
|
|
831
|
+
expanded: false,
|
|
832
|
+
timestamp: newTimestampPretty(),
|
|
833
|
+
createdAtMs: Date.now(),
|
|
834
|
+
status: "DONE",
|
|
835
|
+
isHidden: false
|
|
836
|
+
}
|
|
837
|
+
]);
|
|
838
|
+
},
|
|
839
|
+
[]
|
|
840
|
+
);
|
|
841
|
+
const toggleTranscriptItemExpand = useCallback5((itemId) => {
|
|
842
|
+
setTranscriptItems(
|
|
843
|
+
(prev) => prev.map(
|
|
844
|
+
(log) => log.itemId === itemId ? { ...log, expanded: !log.expanded } : log
|
|
845
|
+
)
|
|
846
|
+
);
|
|
847
|
+
}, []);
|
|
848
|
+
const updateTranscriptItem = useCallback5(
|
|
849
|
+
(itemId, updatedProperties) => {
|
|
850
|
+
setTranscriptItems(
|
|
851
|
+
(prev) => prev.map(
|
|
852
|
+
(item) => item.itemId === itemId ? { ...item, ...updatedProperties } : item
|
|
853
|
+
)
|
|
854
|
+
);
|
|
855
|
+
},
|
|
856
|
+
[]
|
|
857
|
+
);
|
|
858
|
+
const clearTranscript = useCallback5(() => {
|
|
859
|
+
setTranscriptItems([]);
|
|
860
|
+
}, []);
|
|
861
|
+
return /* @__PURE__ */ jsx4(
|
|
862
|
+
TranscriptContext.Provider,
|
|
863
|
+
{
|
|
864
|
+
value: {
|
|
865
|
+
transcriptItems,
|
|
866
|
+
addTranscriptMessage,
|
|
867
|
+
updateTranscriptMessage,
|
|
868
|
+
addTranscriptBreadcrumb,
|
|
869
|
+
toggleTranscriptItemExpand,
|
|
870
|
+
updateTranscriptItem,
|
|
871
|
+
clearTranscript
|
|
872
|
+
},
|
|
873
|
+
children
|
|
874
|
+
}
|
|
875
|
+
);
|
|
876
|
+
};
|
|
877
|
+
function useTranscript() {
|
|
878
|
+
const context = useContext3(TranscriptContext);
|
|
879
|
+
if (!context) {
|
|
880
|
+
throw new Error("useTranscript must be used within a TranscriptProvider");
|
|
881
|
+
}
|
|
882
|
+
return context;
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
// src/hooks/useSessionHistory.ts
|
|
886
|
+
function useSessionHistory() {
|
|
887
|
+
const {
|
|
888
|
+
transcriptItems,
|
|
889
|
+
addTranscriptBreadcrumb,
|
|
890
|
+
addTranscriptMessage,
|
|
891
|
+
updateTranscriptMessage,
|
|
892
|
+
updateTranscriptItem
|
|
893
|
+
} = useTranscript();
|
|
894
|
+
const { logServerEvent } = useEvent();
|
|
895
|
+
const accumulatedTextRef = useRef5(/* @__PURE__ */ new Map());
|
|
896
|
+
const pendingDeltasRef = useRef5(/* @__PURE__ */ new Map());
|
|
897
|
+
const deltaTimerRef = useRef5(/* @__PURE__ */ new Map());
|
|
898
|
+
const interruptedItemsRef = useRef5(/* @__PURE__ */ new Set());
|
|
899
|
+
const totalAudioDurationRef = useRef5(/* @__PURE__ */ new Map());
|
|
900
|
+
const extractMessageText = (content = []) => {
|
|
901
|
+
if (!Array.isArray(content)) return "";
|
|
902
|
+
return content.map((c) => {
|
|
903
|
+
if (!c || typeof c !== "object") return "";
|
|
904
|
+
const item = c;
|
|
905
|
+
if (item.type === "input_text") return item.text ?? "";
|
|
906
|
+
if (item.type === "audio") return item.transcript ?? "";
|
|
907
|
+
return "";
|
|
908
|
+
}).filter(Boolean).join("\n");
|
|
909
|
+
};
|
|
910
|
+
const extractFunctionCallByName = (name, content = []) => {
|
|
911
|
+
if (!Array.isArray(content)) return void 0;
|
|
912
|
+
return content.find(
|
|
913
|
+
(c) => c && typeof c === "object" && c.type === "function_call" && c.name === name
|
|
914
|
+
);
|
|
915
|
+
};
|
|
916
|
+
const maybeParseJson = (val) => {
|
|
917
|
+
if (typeof val === "string") {
|
|
918
|
+
try {
|
|
919
|
+
return JSON.parse(val);
|
|
920
|
+
} catch {
|
|
921
|
+
return val;
|
|
922
|
+
}
|
|
923
|
+
}
|
|
924
|
+
return val;
|
|
925
|
+
};
|
|
926
|
+
const extractLastAssistantMessage = (history = []) => {
|
|
927
|
+
if (!Array.isArray(history)) return void 0;
|
|
928
|
+
return [...history].reverse().find(
|
|
929
|
+
(c) => c && typeof c === "object" && c.type === "message" && c.role === "assistant"
|
|
930
|
+
);
|
|
931
|
+
};
|
|
932
|
+
const extractModeration = (obj) => {
|
|
933
|
+
if (!obj || typeof obj !== "object") return void 0;
|
|
934
|
+
const o = obj;
|
|
935
|
+
if ("moderationCategory" in o) return o;
|
|
936
|
+
if ("outputInfo" in o) return extractModeration(o.outputInfo);
|
|
937
|
+
if ("output" in o) return extractModeration(o.output);
|
|
938
|
+
if ("result" in o) return extractModeration(o.result);
|
|
939
|
+
return void 0;
|
|
940
|
+
};
|
|
941
|
+
const sketchilyDetectGuardrailMessage = (text) => {
|
|
942
|
+
return text.match(/Failure Details: (\{.*?\})/)?.[1];
|
|
943
|
+
};
|
|
944
|
+
function handleAgentToolStart(details, _agent, functionCall) {
|
|
945
|
+
const context = details?.context;
|
|
946
|
+
const history = context?.history;
|
|
947
|
+
const lastFunctionCall = extractFunctionCallByName(functionCall.name, history);
|
|
948
|
+
addTranscriptBreadcrumb(`function call: ${lastFunctionCall?.name}`, lastFunctionCall?.arguments);
|
|
949
|
+
}
|
|
950
|
+
function handleAgentToolEnd(details, _agent, functionCall, result) {
|
|
951
|
+
const context = details?.context;
|
|
952
|
+
const history = context?.history;
|
|
953
|
+
const lastFunctionCall = extractFunctionCallByName(functionCall.name, history);
|
|
954
|
+
addTranscriptBreadcrumb(`function call result: ${lastFunctionCall?.name}`, maybeParseJson(result));
|
|
955
|
+
}
|
|
956
|
+
function handleHistoryAdded(item) {
|
|
957
|
+
if (!item || item.type !== "message") return;
|
|
958
|
+
const { itemId, role, content = [] } = item;
|
|
959
|
+
if (itemId && role) {
|
|
960
|
+
let text = extractMessageText(content);
|
|
961
|
+
if (role === "assistant" && !text) {
|
|
962
|
+
text = "";
|
|
963
|
+
} else if (role === "user" && !text) {
|
|
964
|
+
return;
|
|
965
|
+
}
|
|
966
|
+
const guardrailMessage = sketchilyDetectGuardrailMessage(text);
|
|
967
|
+
if (guardrailMessage) {
|
|
968
|
+
const failureDetails = JSON.parse(guardrailMessage);
|
|
969
|
+
addTranscriptBreadcrumb("Output Guardrail Active", { details: failureDetails });
|
|
970
|
+
} else {
|
|
971
|
+
addTranscriptMessage(itemId, role, text);
|
|
972
|
+
}
|
|
973
|
+
}
|
|
974
|
+
}
|
|
975
|
+
function handleHistoryUpdated(items) {
|
|
976
|
+
items.forEach((item) => {
|
|
977
|
+
if (!item || item.type !== "message") return;
|
|
978
|
+
const { itemId, role, content = [] } = item;
|
|
979
|
+
if (interruptedItemsRef.current.has(itemId)) return;
|
|
980
|
+
if (role === "assistant") return;
|
|
981
|
+
const text = extractMessageText(content);
|
|
982
|
+
if (text) {
|
|
983
|
+
updateTranscriptMessage(itemId, text, false);
|
|
984
|
+
}
|
|
985
|
+
});
|
|
986
|
+
}
|
|
987
|
+
const pendingTextRef = useRef5(/* @__PURE__ */ new Map());
|
|
988
|
+
const displayedTextRef = useRef5(/* @__PURE__ */ new Map());
|
|
989
|
+
function handleTranscriptionDelta(item, audioPositionMs) {
|
|
990
|
+
const itemId = item.item_id;
|
|
991
|
+
const deltaText = item.delta || "";
|
|
992
|
+
if (!itemId || !deltaText) return;
|
|
993
|
+
if (interruptedItemsRef.current.has(itemId)) return;
|
|
994
|
+
const text = (accumulatedTextRef.current.get(itemId) || "") + deltaText;
|
|
995
|
+
accumulatedTextRef.current.set(itemId, text);
|
|
996
|
+
pendingTextRef.current.set(itemId, text);
|
|
997
|
+
displayedTextRef.current.set(itemId, text);
|
|
998
|
+
if (audioPositionMs !== void 0 && audioPositionMs > 0) {
|
|
999
|
+
totalAudioDurationRef.current.set(itemId, audioPositionMs);
|
|
1000
|
+
}
|
|
1001
|
+
if (text.replace(/[\s.…]+/g, "").length === 0) return;
|
|
1002
|
+
updateTranscriptMessage(itemId, text, false);
|
|
1003
|
+
}
|
|
1004
|
+
function handleTranscriptionCompleted(item) {
|
|
1005
|
+
const itemId = item.item_id;
|
|
1006
|
+
if (interruptedItemsRef.current.has(itemId)) return;
|
|
1007
|
+
if (itemId) {
|
|
1008
|
+
const timer = deltaTimerRef.current.get(itemId);
|
|
1009
|
+
if (timer) clearTimeout(timer);
|
|
1010
|
+
deltaTimerRef.current.delete(itemId);
|
|
1011
|
+
pendingDeltasRef.current.delete(itemId);
|
|
1012
|
+
pendingTextRef.current.delete(itemId);
|
|
1013
|
+
displayedTextRef.current.delete(itemId);
|
|
1014
|
+
accumulatedTextRef.current.delete(itemId);
|
|
1015
|
+
totalAudioDurationRef.current.delete(itemId);
|
|
1016
|
+
const displayedText = displayedTextRef.current.get(itemId);
|
|
1017
|
+
const finalText = displayedText || item.transcript || "";
|
|
1018
|
+
const stripped = finalText.replace(/[\s.…]+/g, "");
|
|
1019
|
+
if (stripped.length > 0) {
|
|
1020
|
+
updateTranscriptMessage(itemId, finalText, false);
|
|
1021
|
+
}
|
|
1022
|
+
updateTranscriptItem(itemId, { status: "DONE" });
|
|
1023
|
+
const transcriptItem = transcriptItems.find((i) => i.itemId === itemId);
|
|
1024
|
+
if (transcriptItem?.guardrailResult?.status === "IN_PROGRESS") {
|
|
1025
|
+
updateTranscriptItem(itemId, {
|
|
1026
|
+
guardrailResult: {
|
|
1027
|
+
status: "DONE",
|
|
1028
|
+
category: "NONE",
|
|
1029
|
+
rationale: ""
|
|
1030
|
+
}
|
|
1031
|
+
});
|
|
1032
|
+
}
|
|
1033
|
+
}
|
|
1034
|
+
}
|
|
1035
|
+
function handleGuardrailTripped(details, _agent, guardrail) {
|
|
1036
|
+
const result = guardrail.result;
|
|
1037
|
+
const output = result?.output;
|
|
1038
|
+
const outputInfo = output?.outputInfo;
|
|
1039
|
+
const moderation = extractModeration(outputInfo);
|
|
1040
|
+
logServerEvent({ type: "guardrail_tripped", payload: moderation });
|
|
1041
|
+
const context = details?.context;
|
|
1042
|
+
const history = context?.history;
|
|
1043
|
+
const lastAssistant = extractLastAssistantMessage(history);
|
|
1044
|
+
if (lastAssistant && moderation) {
|
|
1045
|
+
const category = moderation.moderationCategory ?? "NONE";
|
|
1046
|
+
const rationale = moderation.moderationRationale ?? "";
|
|
1047
|
+
const offendingText = moderation.testText;
|
|
1048
|
+
updateTranscriptItem(lastAssistant.itemId, {
|
|
1049
|
+
guardrailResult: {
|
|
1050
|
+
status: "DONE",
|
|
1051
|
+
category,
|
|
1052
|
+
rationale,
|
|
1053
|
+
testText: offendingText
|
|
1054
|
+
}
|
|
1055
|
+
});
|
|
1056
|
+
}
|
|
1057
|
+
}
|
|
1058
|
+
const transcriptItemsRef = useRef5(transcriptItems);
|
|
1059
|
+
transcriptItemsRef.current = transcriptItems;
|
|
1060
|
+
const handlersRef = useRef5({
|
|
1061
|
+
handleAgentToolStart,
|
|
1062
|
+
handleAgentToolEnd,
|
|
1063
|
+
handleHistoryUpdated,
|
|
1064
|
+
handleHistoryAdded,
|
|
1065
|
+
handleTranscriptionDelta,
|
|
1066
|
+
handleTranscriptionCompleted,
|
|
1067
|
+
isInterrupted: (itemId) => interruptedItemsRef.current.has(itemId),
|
|
1068
|
+
handleTruncation: (itemId, audioEndMs, totalAudioMs) => {
|
|
1069
|
+
if (interruptedItemsRef.current.has(itemId)) return;
|
|
1070
|
+
const timer = deltaTimerRef.current.get(itemId);
|
|
1071
|
+
if (timer) clearTimeout(timer);
|
|
1072
|
+
deltaTimerRef.current.delete(itemId);
|
|
1073
|
+
const fullText = pendingTextRef.current.get(itemId) || accumulatedTextRef.current.get(itemId) || "";
|
|
1074
|
+
pendingDeltasRef.current.delete(itemId);
|
|
1075
|
+
pendingTextRef.current.delete(itemId);
|
|
1076
|
+
displayedTextRef.current.delete(itemId);
|
|
1077
|
+
accumulatedTextRef.current.delete(itemId);
|
|
1078
|
+
totalAudioDurationRef.current.delete(itemId);
|
|
1079
|
+
interruptedItemsRef.current.add(itemId);
|
|
1080
|
+
if (!fullText || totalAudioMs <= 0) {
|
|
1081
|
+
updateTranscriptItem(itemId, { isHidden: true, status: "DONE" });
|
|
1082
|
+
return;
|
|
1083
|
+
}
|
|
1084
|
+
const fractionSpoken = Math.min(Math.max(audioEndMs / totalAudioMs, 0), 1);
|
|
1085
|
+
const estimatedCharPos = Math.floor(fullText.length * fractionSpoken);
|
|
1086
|
+
let truncatePos = estimatedCharPos;
|
|
1087
|
+
while (truncatePos > 0 && !/\s/.test(fullText[truncatePos - 1])) {
|
|
1088
|
+
truncatePos--;
|
|
1089
|
+
}
|
|
1090
|
+
if (truncatePos === 0 && estimatedCharPos > 0) {
|
|
1091
|
+
truncatePos = estimatedCharPos;
|
|
1092
|
+
while (truncatePos < fullText.length && !/\s/.test(fullText[truncatePos])) {
|
|
1093
|
+
truncatePos++;
|
|
1094
|
+
}
|
|
1095
|
+
}
|
|
1096
|
+
const truncatedText = fullText.slice(0, truncatePos).trim();
|
|
1097
|
+
if (truncatedText.length > 0) {
|
|
1098
|
+
updateTranscriptMessage(itemId, truncatedText + "...", false);
|
|
1099
|
+
updateTranscriptItem(itemId, { status: "DONE" });
|
|
1100
|
+
} else {
|
|
1101
|
+
updateTranscriptItem(itemId, { isHidden: true, status: "DONE" });
|
|
1102
|
+
}
|
|
1103
|
+
},
|
|
1104
|
+
handleGuardrailTripped
|
|
1105
|
+
});
|
|
1106
|
+
return handlersRef;
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
// src/hooks/useRealtimeSession.ts
|
|
1110
|
+
function useRealtimeSession(callbacks = {}) {
|
|
1111
|
+
const sessionRef = useRef6(null);
|
|
1112
|
+
const [status, setStatus] = useState5("DISCONNECTED");
|
|
1113
|
+
const { logClientEvent, logServerEvent } = useEvent();
|
|
1114
|
+
const codecParamRef = useRef6("opus");
|
|
1115
|
+
const updateStatus = useCallback6(
|
|
1116
|
+
(s) => {
|
|
1117
|
+
setStatus(s);
|
|
1118
|
+
callbacks.onConnectionChange?.(s);
|
|
1119
|
+
logClientEvent({}, s);
|
|
1120
|
+
},
|
|
1121
|
+
[callbacks, logClientEvent]
|
|
1122
|
+
);
|
|
1123
|
+
const historyHandlers = useSessionHistory().current;
|
|
1124
|
+
const interruptedRef = useRef6(/* @__PURE__ */ new Set());
|
|
1125
|
+
useEffect5(() => {
|
|
1126
|
+
if (typeof window !== "undefined") {
|
|
1127
|
+
const params = new URLSearchParams(window.location.search);
|
|
1128
|
+
const codec = params.get("codec");
|
|
1129
|
+
if (codec) {
|
|
1130
|
+
codecParamRef.current = codec.toLowerCase();
|
|
1131
|
+
}
|
|
1132
|
+
}
|
|
1133
|
+
}, []);
|
|
1134
|
+
const wireNormalizedEvents = useCallback6((session) => {
|
|
1135
|
+
session.on("user_speech_started", () => {
|
|
1136
|
+
});
|
|
1137
|
+
session.on("user_transcript", (data) => {
|
|
1138
|
+
if (data.isFinal) {
|
|
1139
|
+
const text = data.text || data.delta || "";
|
|
1140
|
+
if (text.replace(/[\s.…,!?]+/g, "").length === 0) return;
|
|
1141
|
+
historyHandlers.handleTranscriptionCompleted({
|
|
1142
|
+
item_id: data.itemId,
|
|
1143
|
+
transcript: text
|
|
1144
|
+
});
|
|
1145
|
+
} else if (data.delta) {
|
|
1146
|
+
historyHandlers.handleTranscriptionDelta({
|
|
1147
|
+
item_id: data.itemId,
|
|
1148
|
+
delta: data.delta
|
|
1149
|
+
});
|
|
1150
|
+
}
|
|
1151
|
+
});
|
|
1152
|
+
session.on("assistant_transcript", (data) => {
|
|
1153
|
+
if (interruptedRef.current.has(data.itemId)) return;
|
|
1154
|
+
if (data.isFinal) {
|
|
1155
|
+
historyHandlers.handleTranscriptionCompleted({
|
|
1156
|
+
item_id: data.itemId,
|
|
1157
|
+
transcript: data.text || ""
|
|
1158
|
+
});
|
|
1159
|
+
} else if (data.delta) {
|
|
1160
|
+
historyHandlers.handleTranscriptionDelta(
|
|
1161
|
+
{ item_id: data.itemId, delta: data.delta }
|
|
1162
|
+
);
|
|
1163
|
+
}
|
|
1164
|
+
});
|
|
1165
|
+
session.on("tool_call_start", (name, input) => {
|
|
1166
|
+
historyHandlers.handleAgentToolStart(
|
|
1167
|
+
{},
|
|
1168
|
+
void 0,
|
|
1169
|
+
{ name, arguments: input }
|
|
1170
|
+
);
|
|
1171
|
+
});
|
|
1172
|
+
session.on("tool_call_end", (name, input, result) => {
|
|
1173
|
+
historyHandlers.handleAgentToolEnd(
|
|
1174
|
+
{},
|
|
1175
|
+
void 0,
|
|
1176
|
+
{ name, arguments: input },
|
|
1177
|
+
result
|
|
1178
|
+
);
|
|
1179
|
+
});
|
|
1180
|
+
session.on("agent_handoff", (_from, to) => {
|
|
1181
|
+
callbacks.onAgentHandoff?.(to);
|
|
1182
|
+
});
|
|
1183
|
+
session.on("guardrail_tripped", (info) => {
|
|
1184
|
+
historyHandlers.handleGuardrailTripped(
|
|
1185
|
+
{},
|
|
1186
|
+
void 0,
|
|
1187
|
+
{ result: info }
|
|
1188
|
+
);
|
|
1189
|
+
});
|
|
1190
|
+
session.on("raw_event", (event) => {
|
|
1191
|
+
const ev = event;
|
|
1192
|
+
if (ev.type === "conversation.item.truncated") {
|
|
1193
|
+
const itemId = ev.item_id;
|
|
1194
|
+
if (itemId) interruptedRef.current.add(itemId);
|
|
1195
|
+
return;
|
|
1196
|
+
}
|
|
1197
|
+
if (ev.type === "history_updated") {
|
|
1198
|
+
historyHandlers.handleHistoryUpdated(ev.items);
|
|
1199
|
+
return;
|
|
1200
|
+
}
|
|
1201
|
+
if (ev.type === "history_added") {
|
|
1202
|
+
historyHandlers.handleHistoryAdded(ev.item);
|
|
1203
|
+
return;
|
|
1204
|
+
}
|
|
1205
|
+
logServerEvent(ev);
|
|
1206
|
+
});
|
|
1207
|
+
session.on("error", (error) => {
|
|
1208
|
+
const e = error;
|
|
1209
|
+
const msg = e instanceof Error ? e.message : typeof e === "string" ? e : JSON.stringify(e);
|
|
1210
|
+
const errObj = typeof e === "object" && e?.error ? e.error : e;
|
|
1211
|
+
const code = typeof errObj === "object" && errObj?.code ? String(errObj.code) : "";
|
|
1212
|
+
const msgStr = typeof msg === "string" ? msg : "";
|
|
1213
|
+
const isBenign = code === "response_cancel_not_active" || code === "conversation_already_has_active_response" || msgStr.includes("response_cancel_not_active") || msgStr.includes("conversation_already_has_active_response");
|
|
1214
|
+
if (isBenign) return;
|
|
1215
|
+
console.error("Session error:", msg);
|
|
1216
|
+
logServerEvent({ type: "error", message: msg });
|
|
1217
|
+
});
|
|
1218
|
+
}, [callbacks, historyHandlers, logServerEvent]);
|
|
1219
|
+
const connect = useCallback6(
|
|
1220
|
+
async ({
|
|
1221
|
+
getEphemeralKey,
|
|
1222
|
+
initialAgents,
|
|
1223
|
+
audioElement,
|
|
1224
|
+
extraContext,
|
|
1225
|
+
outputGuardrails,
|
|
1226
|
+
adapter
|
|
1227
|
+
}) => {
|
|
1228
|
+
if (sessionRef.current) return;
|
|
1229
|
+
if (!adapter) {
|
|
1230
|
+
throw new Error(
|
|
1231
|
+
"useRealtimeSession: `adapter` is required in ConnectOptions. Pass an adapter like openai() from @jchaffin/voicekit/openai."
|
|
1232
|
+
);
|
|
1233
|
+
}
|
|
1234
|
+
updateStatus("CONNECTING");
|
|
1235
|
+
const ek = await getEphemeralKey();
|
|
1236
|
+
const rootAgent = initialAgents[0];
|
|
1237
|
+
const codecParam = codecParamRef.current;
|
|
1238
|
+
const session = adapter.createSession(rootAgent, {
|
|
1239
|
+
codec: codecParam,
|
|
1240
|
+
language: "en"
|
|
1241
|
+
});
|
|
1242
|
+
sessionRef.current = session;
|
|
1243
|
+
wireNormalizedEvents(session);
|
|
1244
|
+
try {
|
|
1245
|
+
await session.connect({
|
|
1246
|
+
authToken: ek,
|
|
1247
|
+
audioElement,
|
|
1248
|
+
context: extraContext,
|
|
1249
|
+
outputGuardrails
|
|
1250
|
+
});
|
|
1251
|
+
updateStatus("CONNECTED");
|
|
1252
|
+
} catch (connectError) {
|
|
1253
|
+
console.error("Connection error:", connectError);
|
|
1254
|
+
sessionRef.current = null;
|
|
1255
|
+
updateStatus("DISCONNECTED");
|
|
1256
|
+
throw connectError;
|
|
1257
|
+
}
|
|
1258
|
+
},
|
|
1259
|
+
[updateStatus, wireNormalizedEvents]
|
|
1260
|
+
);
|
|
1261
|
+
const disconnect = useCallback6(async () => {
|
|
1262
|
+
if (sessionRef.current) {
|
|
1263
|
+
try {
|
|
1264
|
+
await sessionRef.current.disconnect();
|
|
1265
|
+
} catch (error) {
|
|
1266
|
+
console.error("Error closing session:", error);
|
|
1267
|
+
} finally {
|
|
1268
|
+
sessionRef.current = null;
|
|
1269
|
+
updateStatus("DISCONNECTED");
|
|
1270
|
+
}
|
|
1271
|
+
} else {
|
|
1272
|
+
updateStatus("DISCONNECTED");
|
|
1273
|
+
}
|
|
1274
|
+
}, [updateStatus]);
|
|
1275
|
+
const interrupt = useCallback6(() => {
|
|
1276
|
+
sessionRef.current?.interrupt();
|
|
1277
|
+
}, []);
|
|
1278
|
+
const sendUserText = useCallback6((text) => {
|
|
1279
|
+
if (!sessionRef.current) throw new Error("Session not connected");
|
|
1280
|
+
sessionRef.current.sendMessage(text);
|
|
1281
|
+
}, []);
|
|
1282
|
+
const sendEvent = useCallback6((ev) => {
|
|
1283
|
+
sessionRef.current?.sendRawEvent?.(ev);
|
|
1284
|
+
}, []);
|
|
1285
|
+
const mute = useCallback6((m) => {
|
|
1286
|
+
sessionRef.current?.mute(m);
|
|
1287
|
+
}, []);
|
|
1288
|
+
const pushToTalkStart = useCallback6(() => {
|
|
1289
|
+
sessionRef.current?.sendRawEvent?.({ type: "input_audio_buffer.clear" });
|
|
1290
|
+
}, []);
|
|
1291
|
+
const pushToTalkStop = useCallback6(() => {
|
|
1292
|
+
sessionRef.current?.sendRawEvent?.({ type: "input_audio_buffer.commit" });
|
|
1293
|
+
sessionRef.current?.sendRawEvent?.({ type: "response.create" });
|
|
1294
|
+
}, []);
|
|
1295
|
+
return {
|
|
1296
|
+
status,
|
|
1297
|
+
connect,
|
|
1298
|
+
disconnect,
|
|
1299
|
+
sendUserText,
|
|
1300
|
+
sendEvent,
|
|
1301
|
+
mute,
|
|
1302
|
+
pushToTalkStart,
|
|
1303
|
+
pushToTalkStop,
|
|
1304
|
+
interrupt
|
|
1305
|
+
};
|
|
1306
|
+
}
|
|
1307
|
+
|
|
1308
|
+
// src/guardrails.ts
|
|
1309
|
+
import { z } from "zod";
|
|
1310
|
+
var MODERATION_CATEGORIES = [
|
|
1311
|
+
"OFFENSIVE",
|
|
1312
|
+
"OFF_BRAND",
|
|
1313
|
+
"VIOLENCE",
|
|
1314
|
+
"NONE"
|
|
1315
|
+
];
|
|
1316
|
+
var ModerationCategoryZod = z.enum([...MODERATION_CATEGORIES]);
|
|
1317
|
+
var GuardrailOutputZod = z.object({
|
|
1318
|
+
moderationRationale: z.string(),
|
|
1319
|
+
moderationCategory: ModerationCategoryZod,
|
|
1320
|
+
testText: z.string().optional()
|
|
1321
|
+
}).strict();
|
|
1322
|
+
async function runGuardrailClassifier(message, config = {}) {
|
|
1323
|
+
const {
|
|
1324
|
+
apiEndpoint = "/api/responses",
|
|
1325
|
+
model = "gpt-4o-mini",
|
|
1326
|
+
categories = MODERATION_CATEGORIES,
|
|
1327
|
+
companyName = "Company"
|
|
1328
|
+
} = config;
|
|
1329
|
+
const categoryDescriptions = categories.map((cat) => {
|
|
1330
|
+
switch (cat) {
|
|
1331
|
+
case "OFFENSIVE":
|
|
1332
|
+
return "- OFFENSIVE: Content that includes hate speech, discriminatory language, insults, slurs, or harassment.";
|
|
1333
|
+
case "OFF_BRAND":
|
|
1334
|
+
return "- OFF_BRAND: Content that discusses competitors in a disparaging way.";
|
|
1335
|
+
case "VIOLENCE":
|
|
1336
|
+
return "- VIOLENCE: Content that includes explicit threats, incitement of harm, or graphic descriptions of physical injury or violence.";
|
|
1337
|
+
case "NONE":
|
|
1338
|
+
return "- NONE: If no other classes are appropriate and the message is fine.";
|
|
1339
|
+
default:
|
|
1340
|
+
return `- ${cat}: Custom category.`;
|
|
1341
|
+
}
|
|
1342
|
+
}).join("\n");
|
|
1343
|
+
const messages = [
|
|
1344
|
+
{
|
|
1345
|
+
role: "user",
|
|
1346
|
+
content: `You are an expert at classifying text according to moderation policies. Consider the provided message, analyze potential classes from output_classes, and output the best classification. Output json, following the provided schema. Keep your analysis and reasoning short and to the point, maximum 2 sentences.
|
|
1347
|
+
|
|
1348
|
+
<info>
|
|
1349
|
+
- Company name: ${companyName}
|
|
1350
|
+
</info>
|
|
1351
|
+
|
|
1352
|
+
<message>
|
|
1353
|
+
${message}
|
|
1354
|
+
</message>
|
|
1355
|
+
|
|
1356
|
+
<output_classes>
|
|
1357
|
+
${categoryDescriptions}
|
|
1358
|
+
</output_classes>
|
|
1359
|
+
`
|
|
1360
|
+
}
|
|
1361
|
+
];
|
|
1362
|
+
const response = await fetch(apiEndpoint, {
|
|
1363
|
+
method: "POST",
|
|
1364
|
+
headers: { "Content-Type": "application/json" },
|
|
1365
|
+
body: JSON.stringify({
|
|
1366
|
+
model,
|
|
1367
|
+
input: messages,
|
|
1368
|
+
text: {
|
|
1369
|
+
format: {
|
|
1370
|
+
type: "json_schema",
|
|
1371
|
+
name: "output_format",
|
|
1372
|
+
schema: GuardrailOutputZod
|
|
1373
|
+
}
|
|
1374
|
+
}
|
|
1375
|
+
})
|
|
1376
|
+
});
|
|
1377
|
+
if (!response.ok) return null;
|
|
1378
|
+
try {
|
|
1379
|
+
const data = await response.json();
|
|
1380
|
+
return GuardrailOutputZod.parse(data);
|
|
1381
|
+
} catch {
|
|
1382
|
+
return null;
|
|
1383
|
+
}
|
|
1384
|
+
}
|
|
1385
|
+
function createModerationGuardrail(config = {}) {
|
|
1386
|
+
return {
|
|
1387
|
+
name: "moderation_guardrail",
|
|
1388
|
+
async execute({ agentOutput }) {
|
|
1389
|
+
try {
|
|
1390
|
+
const res = await runGuardrailClassifier(agentOutput, config);
|
|
1391
|
+
const triggered = res?.moderationCategory !== "NONE";
|
|
1392
|
+
return {
|
|
1393
|
+
tripwireTriggered: triggered || false,
|
|
1394
|
+
outputInfo: res || { error: "guardrail_failed" }
|
|
1395
|
+
};
|
|
1396
|
+
} catch {
|
|
1397
|
+
return {
|
|
1398
|
+
tripwireTriggered: false,
|
|
1399
|
+
outputInfo: { error: "guardrail_failed" }
|
|
1400
|
+
};
|
|
1401
|
+
}
|
|
1402
|
+
}
|
|
1403
|
+
};
|
|
1404
|
+
}
|
|
1405
|
+
function createCustomGuardrail(name, classifier) {
|
|
1406
|
+
return {
|
|
1407
|
+
name,
|
|
1408
|
+
async execute({ agentOutput }) {
|
|
1409
|
+
try {
|
|
1410
|
+
const { triggered, info } = await classifier(agentOutput);
|
|
1411
|
+
return {
|
|
1412
|
+
tripwireTriggered: triggered,
|
|
1413
|
+
outputInfo: info
|
|
1414
|
+
};
|
|
1415
|
+
} catch {
|
|
1416
|
+
return {
|
|
1417
|
+
tripwireTriggered: false,
|
|
1418
|
+
outputInfo: { error: "guardrail_failed" }
|
|
1419
|
+
};
|
|
1420
|
+
}
|
|
1421
|
+
}
|
|
1422
|
+
};
|
|
1423
|
+
}
|
|
1424
|
+
|
|
1425
|
+
// src/suggestions/SuggestionContext.tsx
|
|
1426
|
+
import { createContext as createContext4, useContext as useContext4, useState as useState6, useCallback as useCallback7, useEffect as useEffect6 } from "react";
|
|
1427
|
+
|
|
1428
|
+
// src/suggestions/types.ts
|
|
1429
|
+
var SUGGESTION_EVENT = "voicekit:suggestions";
|
|
1430
|
+
|
|
1431
|
+
// src/suggestions/SuggestionContext.tsx
|
|
1432
|
+
import { jsx as jsx5 } from "react/jsx-runtime";
|
|
1433
|
+
var SuggestionCtx = createContext4(null);
|
|
1434
|
+
function SuggestionProvider({
|
|
1435
|
+
children,
|
|
1436
|
+
onSelect,
|
|
1437
|
+
autoClear = true
|
|
1438
|
+
}) {
|
|
1439
|
+
const [suggestions, setSuggestionsState] = useState6(null);
|
|
1440
|
+
const setSuggestions = useCallback7((group) => {
|
|
1441
|
+
setSuggestionsState(group);
|
|
1442
|
+
}, []);
|
|
1443
|
+
const clearSuggestions2 = useCallback7(() => {
|
|
1444
|
+
setSuggestionsState(null);
|
|
1445
|
+
}, []);
|
|
1446
|
+
const selectSuggestion = useCallback7(
|
|
1447
|
+
(item) => {
|
|
1448
|
+
onSelect?.(item);
|
|
1449
|
+
if (autoClear) setSuggestionsState(null);
|
|
1450
|
+
},
|
|
1451
|
+
[onSelect, autoClear]
|
|
1452
|
+
);
|
|
1453
|
+
useEffect6(() => {
|
|
1454
|
+
const handler = (e) => {
|
|
1455
|
+
const detail = e.detail;
|
|
1456
|
+
if (detail?.group) {
|
|
1457
|
+
setSuggestionsState(detail.group);
|
|
1458
|
+
}
|
|
1459
|
+
};
|
|
1460
|
+
window.addEventListener(SUGGESTION_EVENT, handler);
|
|
1461
|
+
return () => window.removeEventListener(SUGGESTION_EVENT, handler);
|
|
1462
|
+
}, []);
|
|
1463
|
+
const value = {
|
|
1464
|
+
suggestions,
|
|
1465
|
+
setSuggestions,
|
|
1466
|
+
selectSuggestion,
|
|
1467
|
+
clearSuggestions: clearSuggestions2
|
|
1468
|
+
};
|
|
1469
|
+
return /* @__PURE__ */ jsx5(SuggestionCtx.Provider, { value, children });
|
|
1470
|
+
}
|
|
1471
|
+
function useSuggestions() {
|
|
1472
|
+
const ctx = useContext4(SuggestionCtx);
|
|
1473
|
+
if (!ctx) {
|
|
1474
|
+
throw new Error("useSuggestions must be used within a SuggestionProvider");
|
|
1475
|
+
}
|
|
1476
|
+
return ctx;
|
|
1477
|
+
}
|
|
1478
|
+
|
|
1479
|
+
// src/suggestions/emitSuggestions.ts
|
|
1480
|
+
function emitSuggestions(group) {
|
|
1481
|
+
if (typeof window === "undefined") return;
|
|
1482
|
+
window.dispatchEvent(
|
|
1483
|
+
new CustomEvent(SUGGESTION_EVENT, {
|
|
1484
|
+
detail: { group }
|
|
1485
|
+
})
|
|
1486
|
+
);
|
|
1487
|
+
}
|
|
1488
|
+
function clearSuggestions() {
|
|
1489
|
+
if (typeof window === "undefined") return;
|
|
1490
|
+
window.dispatchEvent(
|
|
1491
|
+
new CustomEvent(SUGGESTION_EVENT, {
|
|
1492
|
+
detail: { group: null }
|
|
1493
|
+
})
|
|
1494
|
+
);
|
|
1495
|
+
}
|
|
1496
|
+
|
|
1497
|
+
// src/suggestions/SuggestionChips.tsx
|
|
1498
|
+
import React6 from "react";
|
|
1499
|
+
import { jsx as jsx6, jsxs as jsxs2 } from "react/jsx-runtime";
|
|
1500
|
+
function SuggestionChips({
|
|
1501
|
+
group: groupOverride,
|
|
1502
|
+
renderItem,
|
|
1503
|
+
className,
|
|
1504
|
+
chipClassName
|
|
1505
|
+
}) {
|
|
1506
|
+
const { suggestions, selectSuggestion } = useSuggestions();
|
|
1507
|
+
const group = groupOverride ?? suggestions;
|
|
1508
|
+
if (!group || group.items.length === 0) return null;
|
|
1509
|
+
return /* @__PURE__ */ jsxs2("div", { className: className ?? "vk-suggestions", children: [
|
|
1510
|
+
group.prompt && /* @__PURE__ */ jsx6("p", { className: "vk-suggestions-prompt", style: { fontSize: "0.875rem", opacity: 0.7, marginBottom: "0.5rem" }, children: group.prompt }),
|
|
1511
|
+
/* @__PURE__ */ jsx6(
|
|
1512
|
+
"div",
|
|
1513
|
+
{
|
|
1514
|
+
className: "vk-suggestions-list",
|
|
1515
|
+
style: { display: "flex", flexWrap: "wrap", gap: "0.5rem" },
|
|
1516
|
+
children: group.items.map((item) => {
|
|
1517
|
+
const handleClick = () => selectSuggestion(item);
|
|
1518
|
+
if (renderItem) {
|
|
1519
|
+
return /* @__PURE__ */ jsx6(React6.Fragment, { children: renderItem(item, handleClick) }, item.id);
|
|
1520
|
+
}
|
|
1521
|
+
return /* @__PURE__ */ jsx6(
|
|
1522
|
+
"button",
|
|
1523
|
+
{
|
|
1524
|
+
onClick: handleClick,
|
|
1525
|
+
className: chipClassName ?? "vk-chip",
|
|
1526
|
+
style: chipClassName ? void 0 : {
|
|
1527
|
+
display: "inline-flex",
|
|
1528
|
+
alignItems: "center",
|
|
1529
|
+
gap: "0.375rem",
|
|
1530
|
+
padding: "0.5rem 0.75rem",
|
|
1531
|
+
borderRadius: "9999px",
|
|
1532
|
+
fontSize: "0.875rem",
|
|
1533
|
+
fontWeight: 500,
|
|
1534
|
+
border: "1px solid rgba(99,102,241,0.3)",
|
|
1535
|
+
background: "rgba(99,102,241,0.08)",
|
|
1536
|
+
color: "inherit",
|
|
1537
|
+
cursor: "pointer",
|
|
1538
|
+
transition: "all 0.15s"
|
|
1539
|
+
},
|
|
1540
|
+
children: item.label
|
|
1541
|
+
},
|
|
1542
|
+
item.id
|
|
1543
|
+
);
|
|
1544
|
+
})
|
|
1545
|
+
}
|
|
1546
|
+
)
|
|
1547
|
+
] });
|
|
1548
|
+
}
|
|
1549
|
+
export {
|
|
1550
|
+
ChatInput,
|
|
1551
|
+
ConnectButton,
|
|
1552
|
+
EventEmitter,
|
|
1553
|
+
EventProvider,
|
|
1554
|
+
GuardrailOutputZod,
|
|
1555
|
+
MODERATION_CATEGORIES,
|
|
1556
|
+
ModerationCategoryZod,
|
|
1557
|
+
SUGGESTION_EVENT,
|
|
1558
|
+
StatusIndicator,
|
|
1559
|
+
SuggestionChips,
|
|
1560
|
+
SuggestionProvider,
|
|
1561
|
+
TOOL_RESULT_EVENT,
|
|
1562
|
+
Transcript,
|
|
1563
|
+
TranscriptProvider,
|
|
1564
|
+
VoiceChat,
|
|
1565
|
+
VoiceProvider,
|
|
1566
|
+
applyCodecPreferences,
|
|
1567
|
+
audioFormatForCodec,
|
|
1568
|
+
clearSuggestions,
|
|
1569
|
+
convertWebMToWav,
|
|
1570
|
+
createAPITool,
|
|
1571
|
+
createAgent,
|
|
1572
|
+
createAgentFromTemplate,
|
|
1573
|
+
createCustomGuardrail,
|
|
1574
|
+
createEventTool,
|
|
1575
|
+
createModerationGuardrail,
|
|
1576
|
+
createNavigationTool,
|
|
1577
|
+
createRAGTool,
|
|
1578
|
+
createSearchTool,
|
|
1579
|
+
defineTool,
|
|
1580
|
+
emitSuggestions,
|
|
1581
|
+
encodeWAV,
|
|
1582
|
+
runGuardrailClassifier,
|
|
1583
|
+
useAudioRecorder,
|
|
1584
|
+
useEvent,
|
|
1585
|
+
useRealtimeSession,
|
|
1586
|
+
useSessionHistory,
|
|
1587
|
+
useSuggestions,
|
|
1588
|
+
useToolListener,
|
|
1589
|
+
useToolResult,
|
|
1590
|
+
useToolResults,
|
|
1591
|
+
useTranscript,
|
|
1592
|
+
useVoice
|
|
1593
|
+
};
|