@jchaffin/voicekit 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs ADDED
@@ -0,0 +1,1593 @@
1
+ import {
2
+ TOOL_RESULT_EVENT,
3
+ createAPITool,
4
+ createEventTool,
5
+ createNavigationTool,
6
+ createRAGTool,
7
+ createSearchTool,
8
+ defineTool
9
+ } from "./chunk-T3II3DRG.mjs";
10
+ import {
11
+ EventEmitter
12
+ } from "./chunk-22WLZIXO.mjs";
13
+
14
+ // src/VoiceProvider.tsx
15
+ import {
16
+ createContext,
17
+ useContext,
18
+ useState,
19
+ useRef,
20
+ useCallback,
21
+ useEffect
22
+ } from "react";
23
+ import { jsx } from "react/jsx-runtime";
24
+ var VoiceContext = createContext(null);
25
+ function VoiceProvider({
26
+ children,
27
+ adapter,
28
+ agent,
29
+ sessionEndpoint = "/api/session",
30
+ model,
31
+ language = "en",
32
+ onStatusChange,
33
+ onTranscriptUpdate,
34
+ onToolCall,
35
+ onError
36
+ }) {
37
+ const [status, setStatus] = useState("DISCONNECTED");
38
+ const [transcript, setTranscript] = useState([]);
39
+ const [isMuted, setIsMuted] = useState(false);
40
+ const sessionRef = useRef(null);
41
+ const audioRef = useRef(null);
42
+ const statusRef = useRef("DISCONNECTED");
43
+ const currentMsgIdRef = useRef(null);
44
+ useEffect(() => {
45
+ statusRef.current = status;
46
+ }, [status]);
47
+ useEffect(() => {
48
+ if (typeof window === "undefined") return;
49
+ const audio = document.createElement("audio");
50
+ audio.autoplay = true;
51
+ audio.style.display = "none";
52
+ document.body.appendChild(audio);
53
+ audioRef.current = audio;
54
+ return () => {
55
+ try {
56
+ audio.pause();
57
+ audio.srcObject = null;
58
+ audio.remove();
59
+ } catch {
60
+ }
61
+ };
62
+ }, []);
63
+ const updateStatus = useCallback((newStatus) => {
64
+ setStatus(newStatus);
65
+ onStatusChange?.(newStatus);
66
+ }, [onStatusChange]);
67
+ const addMessage = useCallback((role, text, id) => {
68
+ const message = {
69
+ id: id || crypto.randomUUID(),
70
+ role,
71
+ text,
72
+ timestamp: /* @__PURE__ */ new Date(),
73
+ status: "pending"
74
+ };
75
+ setTranscript((prev) => {
76
+ const updated = [...prev, message];
77
+ onTranscriptUpdate?.(updated);
78
+ return updated;
79
+ });
80
+ return message.id;
81
+ }, [onTranscriptUpdate]);
82
+ const updateMessage = useCallback((id, text, append = false) => {
83
+ setTranscript((prev) => {
84
+ const updated = prev.map(
85
+ (m) => m.id === id ? { ...m, text: append ? m.text + text : text } : m
86
+ );
87
+ onTranscriptUpdate?.(updated);
88
+ return updated;
89
+ });
90
+ }, [onTranscriptUpdate]);
91
+ const completeMessage = useCallback((id) => {
92
+ setTranscript((prev) => {
93
+ const updated = prev.map(
94
+ (m) => m.id === id ? { ...m, status: "complete" } : m
95
+ );
96
+ onTranscriptUpdate?.(updated);
97
+ return updated;
98
+ });
99
+ }, [onTranscriptUpdate]);
100
+ const wireSessionEvents = useCallback((session) => {
101
+ session.on("user_transcript", (data) => {
102
+ if (data.isFinal) {
103
+ addMessage("user", data.text || data.delta || "");
104
+ }
105
+ });
106
+ session.on("assistant_transcript", (data) => {
107
+ if (data.isFinal) {
108
+ if (currentMsgIdRef.current) {
109
+ completeMessage(currentMsgIdRef.current);
110
+ currentMsgIdRef.current = null;
111
+ }
112
+ } else if (data.delta) {
113
+ if (!currentMsgIdRef.current) {
114
+ currentMsgIdRef.current = addMessage("assistant", data.delta);
115
+ } else {
116
+ updateMessage(currentMsgIdRef.current, data.delta, true);
117
+ }
118
+ }
119
+ });
120
+ session.on("tool_call_end", (name, input, output) => {
121
+ onToolCall?.(name, input, output);
122
+ });
123
+ session.on("error", (error) => {
124
+ console.error("VoiceKit session error:", error);
125
+ onError?.(error);
126
+ });
127
+ }, [addMessage, updateMessage, completeMessage, onToolCall, onError]);
128
+ const fetchToken = useCallback(async () => {
129
+ try {
130
+ const res = await fetch(sessionEndpoint, { method: "POST" });
131
+ if (!res.ok) return null;
132
+ const data = await res.json();
133
+ return data.ephemeralKey || data.token || null;
134
+ } catch {
135
+ return null;
136
+ }
137
+ }, [sessionEndpoint]);
138
+ const connect = useCallback(async () => {
139
+ if (statusRef.current !== "DISCONNECTED") return;
140
+ if (!audioRef.current) return;
141
+ updateStatus("CONNECTING");
142
+ try {
143
+ const token = await fetchToken();
144
+ if (!token) {
145
+ onError?.(new Error("Failed to get session key"));
146
+ updateStatus("DISCONNECTED");
147
+ return;
148
+ }
149
+ const session = adapter.createSession(agent, { model, language });
150
+ sessionRef.current = session;
151
+ wireSessionEvents(session);
152
+ await session.connect({
153
+ authToken: token,
154
+ audioElement: audioRef.current
155
+ });
156
+ updateStatus("CONNECTED");
157
+ setTimeout(() => {
158
+ session.sendRawEvent?.({ type: "response.create" });
159
+ }, 500);
160
+ } catch (error) {
161
+ console.error("VoiceKit connection failed:", error);
162
+ onError?.(error instanceof Error ? error : new Error(String(error)));
163
+ updateStatus("DISCONNECTED");
164
+ }
165
+ }, [adapter, agent, model, language, fetchToken, wireSessionEvents, updateStatus, onError]);
166
+ const disconnect = useCallback(async () => {
167
+ if (sessionRef.current) {
168
+ try {
169
+ await sessionRef.current.disconnect();
170
+ } catch {
171
+ }
172
+ sessionRef.current = null;
173
+ }
174
+ currentMsgIdRef.current = null;
175
+ updateStatus("DISCONNECTED");
176
+ }, [updateStatus]);
177
+ const sendMessage = useCallback((text) => {
178
+ if (!sessionRef.current || statusRef.current !== "CONNECTED") return;
179
+ sessionRef.current.interrupt();
180
+ sessionRef.current.sendMessage(text);
181
+ }, []);
182
+ const interrupt = useCallback(() => {
183
+ sessionRef.current?.interrupt();
184
+ }, []);
185
+ const mute = useCallback((muted) => {
186
+ setIsMuted(muted);
187
+ sessionRef.current?.mute(muted);
188
+ if (audioRef.current) {
189
+ audioRef.current.muted = muted;
190
+ }
191
+ }, []);
192
+ const clearTranscript = useCallback(() => {
193
+ setTranscript([]);
194
+ onTranscriptUpdate?.([]);
195
+ }, [onTranscriptUpdate]);
196
+ useEffect(() => {
197
+ return () => {
198
+ try {
199
+ sessionRef.current?.disconnect();
200
+ } catch {
201
+ }
202
+ };
203
+ }, []);
204
+ const value = {
205
+ status,
206
+ connect,
207
+ disconnect,
208
+ transcript,
209
+ clearTranscript,
210
+ sendMessage,
211
+ interrupt,
212
+ mute,
213
+ isMuted,
214
+ agent
215
+ };
216
+ return /* @__PURE__ */ jsx(VoiceContext.Provider, { value, children });
217
+ }
218
+ function useVoice() {
219
+ const context = useContext(VoiceContext);
220
+ if (!context) {
221
+ throw new Error("useVoice must be used within a VoiceProvider");
222
+ }
223
+ return context;
224
+ }
225
+
226
+ // src/components/VoiceChat.tsx
227
+ import { useRef as useRef2, useEffect as useEffect2 } from "react";
228
+ import { Fragment, jsx as jsx2, jsxs } from "react/jsx-runtime";
229
+ function Message({ message, userClassName, assistantClassName }) {
230
+ const isUser = message.role === "user";
231
+ return /* @__PURE__ */ jsx2("div", { className: `flex ${isUser ? "justify-end" : "justify-start"}`, children: /* @__PURE__ */ jsx2(
232
+ "div",
233
+ {
234
+ className: `max-w-[80%] rounded-2xl px-4 py-2 ${isUser ? userClassName || "bg-blue-500 text-white rounded-br-md" : assistantClassName || "bg-gray-100 dark:bg-gray-800 text-gray-900 dark:text-gray-100 rounded-bl-md"}`,
235
+ children: /* @__PURE__ */ jsx2("p", { className: "text-sm whitespace-pre-wrap", children: message.text })
236
+ }
237
+ ) });
238
+ }
239
+ function Transcript({
240
+ messages,
241
+ userClassName,
242
+ assistantClassName,
243
+ emptyMessage = "Start a conversation..."
244
+ }) {
245
+ const containerRef = useRef2(null);
246
+ const userScrolledUp = useRef2(false);
247
+ useEffect2(() => {
248
+ const container = containerRef.current;
249
+ if (!container) return;
250
+ const handleScroll = () => {
251
+ const isAtBottom = container.scrollHeight - container.scrollTop - container.clientHeight < 50;
252
+ userScrolledUp.current = !isAtBottom;
253
+ };
254
+ container.addEventListener("scroll", handleScroll);
255
+ return () => container.removeEventListener("scroll", handleScroll);
256
+ }, []);
257
+ useEffect2(() => {
258
+ if (containerRef.current && messages.length > 0 && !userScrolledUp.current) {
259
+ containerRef.current.scrollTo({
260
+ top: containerRef.current.scrollHeight,
261
+ behavior: "smooth"
262
+ });
263
+ }
264
+ }, [messages]);
265
+ if (messages.length === 0) {
266
+ return /* @__PURE__ */ jsx2("div", { className: "flex items-center justify-center h-full text-gray-500", children: emptyMessage });
267
+ }
268
+ return /* @__PURE__ */ jsx2("div", { ref: containerRef, className: "flex flex-col gap-3 overflow-y-auto h-full p-4", children: messages.map((msg) => /* @__PURE__ */ jsx2(
269
+ Message,
270
+ {
271
+ message: msg,
272
+ userClassName,
273
+ assistantClassName
274
+ },
275
+ msg.id
276
+ )) });
277
+ }
278
+ function StatusIndicator({
279
+ className = "",
280
+ connectedText = "Connected",
281
+ connectingText = "Connecting...",
282
+ disconnectedText = "Disconnected"
283
+ }) {
284
+ const { status } = useVoice();
285
+ const statusConfig = {
286
+ CONNECTED: { color: "bg-green-500", text: connectedText, pulse: true },
287
+ CONNECTING: { color: "bg-yellow-500", text: connectingText, pulse: true },
288
+ DISCONNECTED: { color: "bg-gray-400", text: disconnectedText, pulse: false }
289
+ };
290
+ const config = statusConfig[status];
291
+ return /* @__PURE__ */ jsxs("div", { className: `flex items-center gap-2 ${className}`, children: [
292
+ /* @__PURE__ */ jsx2("div", { className: `w-2 h-2 rounded-full ${config.color} ${config.pulse ? "animate-pulse" : ""}` }),
293
+ /* @__PURE__ */ jsx2("span", { className: "text-sm", children: config.text })
294
+ ] });
295
+ }
296
+ function ConnectButton({
297
+ className = "",
298
+ connectText = "Start",
299
+ disconnectText = "End",
300
+ connectingText = "Connecting...",
301
+ children
302
+ }) {
303
+ const { status, connect, disconnect } = useVoice();
304
+ const handleClick = () => {
305
+ if (status === "CONNECTED") {
306
+ disconnect();
307
+ } else if (status === "DISCONNECTED") {
308
+ connect();
309
+ }
310
+ };
311
+ const text = status === "CONNECTED" ? disconnectText : status === "CONNECTING" ? connectingText : connectText;
312
+ return /* @__PURE__ */ jsx2(
313
+ "button",
314
+ {
315
+ onClick: handleClick,
316
+ disabled: status === "CONNECTING",
317
+ className: className || `px-4 py-2 rounded-lg font-medium transition-colors ${status === "CONNECTED" ? "bg-red-500 hover:bg-red-600 text-white" : status === "CONNECTING" ? "bg-gray-300 text-gray-500 cursor-not-allowed" : "bg-blue-500 hover:bg-blue-600 text-white"}`,
318
+ children: children || text
319
+ }
320
+ );
321
+ }
322
+ function ChatInput({
323
+ placeholder = "Type a message...",
324
+ className = "",
325
+ buttonText = "Send",
326
+ onSend
327
+ }) {
328
+ const { sendMessage, status } = useVoice();
329
+ const inputRef = useRef2(null);
330
+ const handleSubmit = (e) => {
331
+ e.preventDefault();
332
+ const text = inputRef.current?.value.trim();
333
+ if (!text) return;
334
+ if (onSend) {
335
+ onSend(text);
336
+ } else {
337
+ sendMessage(text);
338
+ }
339
+ if (inputRef.current) {
340
+ inputRef.current.value = "";
341
+ }
342
+ };
343
+ const disabled = status !== "CONNECTED";
344
+ return /* @__PURE__ */ jsxs("form", { onSubmit: handleSubmit, className: `flex gap-2 ${className}`, children: [
345
+ /* @__PURE__ */ jsx2(
346
+ "input",
347
+ {
348
+ ref: inputRef,
349
+ type: "text",
350
+ placeholder,
351
+ disabled,
352
+ className: "flex-1 px-4 py-2 rounded-lg border border-gray-300 dark:border-gray-600 \n bg-white dark:bg-gray-800 text-gray-900 dark:text-gray-100\n focus:outline-none focus:ring-2 focus:ring-blue-500\n disabled:opacity-50 disabled:cursor-not-allowed"
353
+ }
354
+ ),
355
+ /* @__PURE__ */ jsx2(
356
+ "button",
357
+ {
358
+ type: "submit",
359
+ disabled,
360
+ className: "px-4 py-2 bg-blue-500 text-white rounded-lg font-medium\n hover:bg-blue-600 disabled:opacity-50 disabled:cursor-not-allowed",
361
+ children: buttonText
362
+ }
363
+ )
364
+ ] });
365
+ }
366
+ function VoiceChat({
367
+ className = "",
368
+ height = "400px",
369
+ showHeader = true,
370
+ showInput = true,
371
+ emptyState,
372
+ header,
373
+ footer
374
+ }) {
375
+ const { status, transcript, connect, disconnect, clearTranscript } = useVoice();
376
+ const defaultEmptyState = /* @__PURE__ */ jsxs("div", { className: "flex flex-col items-center justify-center gap-4", children: [
377
+ /* @__PURE__ */ jsx2(ConnectButton, {}),
378
+ /* @__PURE__ */ jsx2("p", { className: "text-sm text-gray-500", children: status === "CONNECTING" ? "Connecting..." : "Click to start a conversation" })
379
+ ] });
380
+ return /* @__PURE__ */ jsxs("div", { className: `flex flex-col rounded-xl border border-gray-200 dark:border-gray-700
381
+ bg-white dark:bg-gray-900 overflow-hidden ${className}`, children: [
382
+ showHeader && /* @__PURE__ */ jsx2("div", { className: "flex items-center justify-between px-4 py-3 border-b border-gray-200 dark:border-gray-700", children: header || /* @__PURE__ */ jsxs(Fragment, { children: [
383
+ /* @__PURE__ */ jsx2(StatusIndicator, {}),
384
+ /* @__PURE__ */ jsxs("div", { className: "flex gap-2", children: [
385
+ transcript.length > 0 && /* @__PURE__ */ jsx2(
386
+ "button",
387
+ {
388
+ onClick: clearTranscript,
389
+ className: "text-sm text-gray-500 hover:text-gray-700",
390
+ children: "Clear"
391
+ }
392
+ ),
393
+ /* @__PURE__ */ jsx2(
394
+ "button",
395
+ {
396
+ onClick: status === "CONNECTED" ? disconnect : connect,
397
+ className: `text-sm font-medium ${status === "CONNECTED" ? "text-red-500 hover:text-red-600" : "text-green-500 hover:text-green-600"}`,
398
+ children: status === "CONNECTED" ? "End" : "Connect"
399
+ }
400
+ )
401
+ ] })
402
+ ] }) }),
403
+ /* @__PURE__ */ jsx2("div", { style: { height }, className: "overflow-hidden", children: /* @__PURE__ */ jsx2(
404
+ Transcript,
405
+ {
406
+ messages: transcript,
407
+ emptyMessage: emptyState || defaultEmptyState
408
+ }
409
+ ) }),
410
+ footer || showInput && status === "CONNECTED" && /* @__PURE__ */ jsx2("div", { className: "p-4 border-t border-gray-200 dark:border-gray-700", children: /* @__PURE__ */ jsx2(ChatInput, {}) })
411
+ ] });
412
+ }
413
+
414
+ // src/createAgent.ts
415
+ function createAgent(config) {
416
+ const { name, instructions, tools = [], voice } = config;
417
+ const fullInstructions = `
418
+ ${instructions}
419
+
420
+ # Response Guidelines
421
+ - Keep responses concise (2-3 sentences max)
422
+ - Answer questions directly before asking follow-ups
423
+ - Use tools silently without announcing them
424
+ - Speak naturally and conversationally
425
+ `.trim();
426
+ return {
427
+ name,
428
+ instructions: fullInstructions,
429
+ tools,
430
+ voice
431
+ };
432
+ }
433
+ function createAgentFromTemplate(config) {
434
+ const {
435
+ name,
436
+ role,
437
+ personality = "Professional and helpful",
438
+ capabilities = [],
439
+ constraints = [],
440
+ tools = [],
441
+ context = {}
442
+ } = config;
443
+ const capabilitiesSection = capabilities.length > 0 ? `## What You Can Do
444
+ ${capabilities.map((c) => `- ${c}`).join("\n")}` : "";
445
+ const constraintsSection = constraints.length > 0 ? `## Constraints
446
+ ${constraints.map((c) => `- ${c}`).join("\n")}` : "";
447
+ const contextSection = Object.keys(context).length > 0 ? `## Context
448
+ \`\`\`json
449
+ ${JSON.stringify(context, null, 2)}
450
+ \`\`\`` : "";
451
+ const instructions = `
452
+ You are ${name}, ${role}.
453
+
454
+ ## Personality
455
+ ${personality}
456
+
457
+ ${capabilitiesSection}
458
+
459
+ ${constraintsSection}
460
+
461
+ ${contextSection}
462
+ `.trim();
463
+ return createAgent({
464
+ name,
465
+ instructions,
466
+ tools
467
+ });
468
+ }
469
+
470
+ // src/hooks/toolHooks.ts
471
+ import { useEffect as useEffect3, useCallback as useCallback2, useState as useState2, useRef as useRef3 } from "react";
472
+ function useToolResults() {
473
+ const [results, setResults] = useState2([]);
474
+ useEffect3(() => {
475
+ const handler = (event) => {
476
+ setResults((prev) => [...prev, event.detail]);
477
+ };
478
+ window.addEventListener(TOOL_RESULT_EVENT, handler);
479
+ return () => window.removeEventListener(TOOL_RESULT_EVENT, handler);
480
+ }, []);
481
+ const clear = useCallback2(() => setResults([]), []);
482
+ return {
483
+ results,
484
+ lastResult: results[results.length - 1] || null,
485
+ clear
486
+ };
487
+ }
488
+ function useToolListener(toolName, handler) {
489
+ const handlerRef = useRef3(handler);
490
+ handlerRef.current = handler;
491
+ useEffect3(() => {
492
+ const eventHandler = (event) => {
493
+ if (event.detail.name === toolName) {
494
+ handlerRef.current(event.detail.input, event.detail.result);
495
+ }
496
+ };
497
+ window.addEventListener(TOOL_RESULT_EVENT, eventHandler);
498
+ return () => window.removeEventListener(TOOL_RESULT_EVENT, eventHandler);
499
+ }, [toolName]);
500
+ }
501
+ function useToolResult(toolName) {
502
+ const [state, setState] = useState2(null);
503
+ useEffect3(() => {
504
+ const handler = (event) => {
505
+ if (event.detail.name === toolName) {
506
+ setState({ input: event.detail.input, result: event.detail.result });
507
+ }
508
+ };
509
+ window.addEventListener(TOOL_RESULT_EVENT, handler);
510
+ return () => window.removeEventListener(TOOL_RESULT_EVENT, handler);
511
+ }, [toolName]);
512
+ const clear = useCallback2(() => setState(null), []);
513
+ return {
514
+ input: state?.input ?? null,
515
+ result: state?.result ?? null,
516
+ hasResult: state !== null,
517
+ clear
518
+ };
519
+ }
520
+
521
+ // src/hooks/useAudioRecorder.ts
522
+ import { useRef as useRef4, useCallback as useCallback3 } from "react";
523
+
524
+ // src/utils/audio.ts
525
+ function writeString(view, offset, str) {
526
+ for (let i = 0; i < str.length; i++) {
527
+ view.setUint8(offset + i, str.charCodeAt(i));
528
+ }
529
+ }
530
+ function floatTo16BitPCM(output, offset, input) {
531
+ for (let i = 0; i < input.length; i++, offset += 2) {
532
+ const s = Math.max(-1, Math.min(1, input[i]));
533
+ output.setInt16(offset, s < 0 ? s * 32768 : s * 32767, true);
534
+ }
535
+ }
536
+ function encodeWAV(samples, sampleRate) {
537
+ const buffer = new ArrayBuffer(44 + samples.length * 2);
538
+ const view = new DataView(buffer);
539
+ writeString(view, 0, "RIFF");
540
+ view.setUint32(4, 36 + samples.length * 2, true);
541
+ writeString(view, 8, "WAVE");
542
+ writeString(view, 12, "fmt ");
543
+ view.setUint32(16, 16, true);
544
+ view.setUint16(20, 1, true);
545
+ view.setUint16(22, 1, true);
546
+ view.setUint32(24, sampleRate, true);
547
+ view.setUint32(28, sampleRate * 2, true);
548
+ view.setUint16(32, 2, true);
549
+ view.setUint16(34, 16, true);
550
+ writeString(view, 36, "data");
551
+ view.setUint32(40, samples.length * 2, true);
552
+ floatTo16BitPCM(view, 44, samples);
553
+ return buffer;
554
+ }
555
+ async function convertWebMToWav(blob) {
556
+ const arrayBuffer = await blob.arrayBuffer();
557
+ const audioContext = new AudioContext();
558
+ const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
559
+ const numChannels = audioBuffer.numberOfChannels;
560
+ const length = audioBuffer.length;
561
+ const combined = new Float32Array(length);
562
+ for (let channel = 0; channel < numChannels; channel++) {
563
+ const channelData = audioBuffer.getChannelData(channel);
564
+ for (let i = 0; i < length; i++) {
565
+ combined[i] += channelData[i];
566
+ }
567
+ }
568
+ for (let i = 0; i < length; i++) {
569
+ combined[i] /= numChannels;
570
+ }
571
+ const wavBuffer = encodeWAV(combined, audioBuffer.sampleRate);
572
+ return new Blob([wavBuffer], { type: "audio/wav" });
573
+ }
574
+ function audioFormatForCodec(codec) {
575
+ switch (codec.toLowerCase()) {
576
+ case "opus":
577
+ case "pcm":
578
+ return "pcm16";
579
+ case "g711":
580
+ return "g711_ulaw";
581
+ default:
582
+ return "pcm16";
583
+ }
584
+ }
585
+ function applyCodecPreferences(pc, codec) {
586
+ if (codec === "g711") {
587
+ pc.getTransceivers().forEach((transceiver) => {
588
+ if (transceiver.sender.track?.kind === "audio") {
589
+ transceiver.setCodecPreferences([
590
+ { mimeType: "audio/PCMU", clockRate: 8e3 },
591
+ { mimeType: "audio/PCMA", clockRate: 8e3 }
592
+ ]);
593
+ }
594
+ });
595
+ }
596
+ return pc;
597
+ }
598
+
599
+ // src/hooks/useAudioRecorder.ts
600
+ function useAudioRecorder() {
601
+ const mediaRecorderRef = useRef4(null);
602
+ const recordedChunksRef = useRef4([]);
603
+ const startRecording = useCallback3(async (stream) => {
604
+ if (mediaRecorderRef.current?.state === "recording") {
605
+ return;
606
+ }
607
+ try {
608
+ const mediaRecorder = new MediaRecorder(stream, { mimeType: "audio/webm" });
609
+ mediaRecorder.ondataavailable = (event) => {
610
+ if (event.data?.size > 0) {
611
+ recordedChunksRef.current.push(event.data);
612
+ }
613
+ };
614
+ mediaRecorder.start();
615
+ mediaRecorderRef.current = mediaRecorder;
616
+ } catch (error) {
617
+ console.error("Failed to start recording:", error);
618
+ throw error;
619
+ }
620
+ }, []);
621
+ const stopRecording = useCallback3(() => {
622
+ if (mediaRecorderRef.current) {
623
+ try {
624
+ mediaRecorderRef.current.requestData();
625
+ } catch {
626
+ }
627
+ try {
628
+ mediaRecorderRef.current.stop();
629
+ } catch {
630
+ }
631
+ mediaRecorderRef.current = null;
632
+ }
633
+ }, []);
634
+ const downloadRecording = useCallback3(async (filename) => {
635
+ if (mediaRecorderRef.current?.state === "recording") {
636
+ mediaRecorderRef.current.requestData();
637
+ await new Promise((resolve) => setTimeout(resolve, 100));
638
+ }
639
+ if (recordedChunksRef.current.length === 0) {
640
+ return null;
641
+ }
642
+ const webmBlob = new Blob(recordedChunksRef.current, { type: "audio/webm" });
643
+ try {
644
+ const wavBlob = await convertWebMToWav(webmBlob);
645
+ const url = URL.createObjectURL(wavBlob);
646
+ const now = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
647
+ const name = filename || `voice_recording_${now}.wav`;
648
+ const a = document.createElement("a");
649
+ a.style.display = "none";
650
+ a.href = url;
651
+ a.download = name;
652
+ document.body.appendChild(a);
653
+ a.click();
654
+ document.body.removeChild(a);
655
+ setTimeout(() => URL.revokeObjectURL(url), 100);
656
+ return wavBlob;
657
+ } catch (error) {
658
+ console.error("Failed to convert recording:", error);
659
+ throw error;
660
+ }
661
+ }, []);
662
+ const getRecordingBlob = useCallback3(async () => {
663
+ if (recordedChunksRef.current.length === 0) {
664
+ return null;
665
+ }
666
+ const webmBlob = new Blob(recordedChunksRef.current, { type: "audio/webm" });
667
+ return convertWebMToWav(webmBlob);
668
+ }, []);
669
+ const clearRecording = useCallback3(() => {
670
+ recordedChunksRef.current = [];
671
+ }, []);
672
+ return {
673
+ startRecording,
674
+ stopRecording,
675
+ downloadRecording,
676
+ getRecordingBlob,
677
+ clearRecording,
678
+ isRecording: () => mediaRecorderRef.current?.state === "recording"
679
+ };
680
+ }
681
+
682
+ // src/hooks/useRealtimeSession.ts
683
+ import { useCallback as useCallback6, useRef as useRef6, useState as useState5, useEffect as useEffect5 } from "react";
684
+
685
+ // src/contexts/EventContext.tsx
686
+ import { createContext as createContext2, useContext as useContext2, useState as useState3, useCallback as useCallback4 } from "react";
687
+ import { jsx as jsx3 } from "react/jsx-runtime";
688
+ var EventContext = createContext2(void 0);
689
+ var EventProvider = ({ children }) => {
690
+ const [loggedEvents, setLoggedEvents] = useState3([]);
691
+ const addLoggedEvent = useCallback4(
692
+ (direction, eventName, eventData) => {
693
+ const id = typeof eventData.event_id === "number" ? eventData.event_id : Date.now();
694
+ setLoggedEvents((prev) => [
695
+ ...prev,
696
+ {
697
+ id,
698
+ direction,
699
+ eventName,
700
+ eventData,
701
+ timestamp: (/* @__PURE__ */ new Date()).toLocaleTimeString(),
702
+ expanded: false
703
+ }
704
+ ]);
705
+ },
706
+ []
707
+ );
708
+ const logClientEvent = useCallback4(
709
+ (eventObj, eventNameSuffix = "") => {
710
+ const name = `${eventObj.type || ""} ${eventNameSuffix || ""}`.trim();
711
+ addLoggedEvent("client", name, eventObj);
712
+ },
713
+ [addLoggedEvent]
714
+ );
715
+ const logServerEvent = useCallback4(
716
+ (eventObj, eventNameSuffix = "") => {
717
+ const name = `${eventObj.type || ""} ${eventNameSuffix || ""}`.trim();
718
+ addLoggedEvent("server", name, eventObj);
719
+ },
720
+ [addLoggedEvent]
721
+ );
722
+ const logHistoryItem = useCallback4(
723
+ (item) => {
724
+ let eventName = item.type;
725
+ if (item.type === "message") {
726
+ eventName = `${item.role}.${item.status || "unknown"}`;
727
+ }
728
+ if (item.type === "function_call") {
729
+ eventName = `function.${item.name || "unknown"}.${item.status || "unknown"}`;
730
+ }
731
+ addLoggedEvent("server", eventName, item);
732
+ },
733
+ [addLoggedEvent]
734
+ );
735
+ const toggleExpand = useCallback4((id) => {
736
+ setLoggedEvents(
737
+ (prev) => prev.map((log) => log.id === id ? { ...log, expanded: !log.expanded } : log)
738
+ );
739
+ }, []);
740
+ const clearEvents = useCallback4(() => {
741
+ setLoggedEvents([]);
742
+ }, []);
743
+ return /* @__PURE__ */ jsx3(
744
+ EventContext.Provider,
745
+ {
746
+ value: { loggedEvents, logClientEvent, logServerEvent, logHistoryItem, toggleExpand, clearEvents },
747
+ children
748
+ }
749
+ );
750
+ };
751
+ function useEvent() {
752
+ const context = useContext2(EventContext);
753
+ if (!context) {
754
+ throw new Error("useEvent must be used within an EventProvider");
755
+ }
756
+ return context;
757
+ }
758
+
759
+ // src/hooks/useSessionHistory.ts
760
+ import { useRef as useRef5 } from "react";
761
+
762
+ // src/contexts/TranscriptContext.tsx
763
+ import {
764
+ createContext as createContext3,
765
+ useContext as useContext3,
766
+ useState as useState4,
767
+ useCallback as useCallback5
768
+ } from "react";
769
+ import { jsx as jsx4 } from "react/jsx-runtime";
770
+ var TranscriptContext = createContext3(void 0);
771
+ function newTimestampPretty() {
772
+ return (/* @__PURE__ */ new Date()).toLocaleTimeString([], {
773
+ hour12: false,
774
+ hour: "2-digit",
775
+ minute: "2-digit",
776
+ second: "2-digit"
777
+ });
778
+ }
779
+ function generateId() {
780
+ return Math.random().toString(36).substring(2, 15);
781
+ }
782
+ var TranscriptProvider = ({ children }) => {
783
+ const [transcriptItems, setTranscriptItems] = useState4([]);
784
+ const addTranscriptMessage = useCallback5(
785
+ (itemId, role, text = "", isHidden = false) => {
786
+ setTranscriptItems((prev) => {
787
+ if (prev.some((i) => i.itemId === itemId)) return prev;
788
+ return [
789
+ ...prev,
790
+ {
791
+ itemId,
792
+ type: "MESSAGE",
793
+ role,
794
+ title: text,
795
+ expanded: false,
796
+ timestamp: newTimestampPretty(),
797
+ createdAtMs: Date.now(),
798
+ status: "IN_PROGRESS",
799
+ isHidden
800
+ }
801
+ ];
802
+ });
803
+ },
804
+ []
805
+ );
806
+ const updateTranscriptMessage = useCallback5(
807
+ (itemId, newText, append = false) => {
808
+ setTranscriptItems(
809
+ (prev) => prev.map((item) => {
810
+ if (item.itemId === itemId && item.type === "MESSAGE") {
811
+ return {
812
+ ...item,
813
+ title: append ? (item.title ?? "") + newText : newText
814
+ };
815
+ }
816
+ return item;
817
+ })
818
+ );
819
+ },
820
+ []
821
+ );
822
+ const addTranscriptBreadcrumb = useCallback5(
823
+ (title, data) => {
824
+ setTranscriptItems((prev) => [
825
+ ...prev,
826
+ {
827
+ itemId: `breadcrumb-${generateId()}`,
828
+ type: "BREADCRUMB",
829
+ title,
830
+ data,
831
+ expanded: false,
832
+ timestamp: newTimestampPretty(),
833
+ createdAtMs: Date.now(),
834
+ status: "DONE",
835
+ isHidden: false
836
+ }
837
+ ]);
838
+ },
839
+ []
840
+ );
841
+ const toggleTranscriptItemExpand = useCallback5((itemId) => {
842
+ setTranscriptItems(
843
+ (prev) => prev.map(
844
+ (log) => log.itemId === itemId ? { ...log, expanded: !log.expanded } : log
845
+ )
846
+ );
847
+ }, []);
848
+ const updateTranscriptItem = useCallback5(
849
+ (itemId, updatedProperties) => {
850
+ setTranscriptItems(
851
+ (prev) => prev.map(
852
+ (item) => item.itemId === itemId ? { ...item, ...updatedProperties } : item
853
+ )
854
+ );
855
+ },
856
+ []
857
+ );
858
+ const clearTranscript = useCallback5(() => {
859
+ setTranscriptItems([]);
860
+ }, []);
861
+ return /* @__PURE__ */ jsx4(
862
+ TranscriptContext.Provider,
863
+ {
864
+ value: {
865
+ transcriptItems,
866
+ addTranscriptMessage,
867
+ updateTranscriptMessage,
868
+ addTranscriptBreadcrumb,
869
+ toggleTranscriptItemExpand,
870
+ updateTranscriptItem,
871
+ clearTranscript
872
+ },
873
+ children
874
+ }
875
+ );
876
+ };
877
+ function useTranscript() {
878
+ const context = useContext3(TranscriptContext);
879
+ if (!context) {
880
+ throw new Error("useTranscript must be used within a TranscriptProvider");
881
+ }
882
+ return context;
883
+ }
884
+
885
+ // src/hooks/useSessionHistory.ts
886
+ function useSessionHistory() {
887
+ const {
888
+ transcriptItems,
889
+ addTranscriptBreadcrumb,
890
+ addTranscriptMessage,
891
+ updateTranscriptMessage,
892
+ updateTranscriptItem
893
+ } = useTranscript();
894
+ const { logServerEvent } = useEvent();
895
+ const accumulatedTextRef = useRef5(/* @__PURE__ */ new Map());
896
+ const pendingDeltasRef = useRef5(/* @__PURE__ */ new Map());
897
+ const deltaTimerRef = useRef5(/* @__PURE__ */ new Map());
898
+ const interruptedItemsRef = useRef5(/* @__PURE__ */ new Set());
899
+ const totalAudioDurationRef = useRef5(/* @__PURE__ */ new Map());
900
+ const extractMessageText = (content = []) => {
901
+ if (!Array.isArray(content)) return "";
902
+ return content.map((c) => {
903
+ if (!c || typeof c !== "object") return "";
904
+ const item = c;
905
+ if (item.type === "input_text") return item.text ?? "";
906
+ if (item.type === "audio") return item.transcript ?? "";
907
+ return "";
908
+ }).filter(Boolean).join("\n");
909
+ };
910
+ const extractFunctionCallByName = (name, content = []) => {
911
+ if (!Array.isArray(content)) return void 0;
912
+ return content.find(
913
+ (c) => c && typeof c === "object" && c.type === "function_call" && c.name === name
914
+ );
915
+ };
916
+ const maybeParseJson = (val) => {
917
+ if (typeof val === "string") {
918
+ try {
919
+ return JSON.parse(val);
920
+ } catch {
921
+ return val;
922
+ }
923
+ }
924
+ return val;
925
+ };
926
+ const extractLastAssistantMessage = (history = []) => {
927
+ if (!Array.isArray(history)) return void 0;
928
+ return [...history].reverse().find(
929
+ (c) => c && typeof c === "object" && c.type === "message" && c.role === "assistant"
930
+ );
931
+ };
932
+ const extractModeration = (obj) => {
933
+ if (!obj || typeof obj !== "object") return void 0;
934
+ const o = obj;
935
+ if ("moderationCategory" in o) return o;
936
+ if ("outputInfo" in o) return extractModeration(o.outputInfo);
937
+ if ("output" in o) return extractModeration(o.output);
938
+ if ("result" in o) return extractModeration(o.result);
939
+ return void 0;
940
+ };
941
+ const sketchilyDetectGuardrailMessage = (text) => {
942
+ return text.match(/Failure Details: (\{.*?\})/)?.[1];
943
+ };
944
+ function handleAgentToolStart(details, _agent, functionCall) {
945
+ const context = details?.context;
946
+ const history = context?.history;
947
+ const lastFunctionCall = extractFunctionCallByName(functionCall.name, history);
948
+ addTranscriptBreadcrumb(`function call: ${lastFunctionCall?.name}`, lastFunctionCall?.arguments);
949
+ }
950
+ function handleAgentToolEnd(details, _agent, functionCall, result) {
951
+ const context = details?.context;
952
+ const history = context?.history;
953
+ const lastFunctionCall = extractFunctionCallByName(functionCall.name, history);
954
+ addTranscriptBreadcrumb(`function call result: ${lastFunctionCall?.name}`, maybeParseJson(result));
955
+ }
956
+ function handleHistoryAdded(item) {
957
+ if (!item || item.type !== "message") return;
958
+ const { itemId, role, content = [] } = item;
959
+ if (itemId && role) {
960
+ let text = extractMessageText(content);
961
+ if (role === "assistant" && !text) {
962
+ text = "";
963
+ } else if (role === "user" && !text) {
964
+ return;
965
+ }
966
+ const guardrailMessage = sketchilyDetectGuardrailMessage(text);
967
+ if (guardrailMessage) {
968
+ const failureDetails = JSON.parse(guardrailMessage);
969
+ addTranscriptBreadcrumb("Output Guardrail Active", { details: failureDetails });
970
+ } else {
971
+ addTranscriptMessage(itemId, role, text);
972
+ }
973
+ }
974
+ }
975
+ function handleHistoryUpdated(items) {
976
+ items.forEach((item) => {
977
+ if (!item || item.type !== "message") return;
978
+ const { itemId, role, content = [] } = item;
979
+ if (interruptedItemsRef.current.has(itemId)) return;
980
+ if (role === "assistant") return;
981
+ const text = extractMessageText(content);
982
+ if (text) {
983
+ updateTranscriptMessage(itemId, text, false);
984
+ }
985
+ });
986
+ }
987
+ const pendingTextRef = useRef5(/* @__PURE__ */ new Map());
988
+ const displayedTextRef = useRef5(/* @__PURE__ */ new Map());
989
+ function handleTranscriptionDelta(item, audioPositionMs) {
990
+ const itemId = item.item_id;
991
+ const deltaText = item.delta || "";
992
+ if (!itemId || !deltaText) return;
993
+ if (interruptedItemsRef.current.has(itemId)) return;
994
+ const text = (accumulatedTextRef.current.get(itemId) || "") + deltaText;
995
+ accumulatedTextRef.current.set(itemId, text);
996
+ pendingTextRef.current.set(itemId, text);
997
+ displayedTextRef.current.set(itemId, text);
998
+ if (audioPositionMs !== void 0 && audioPositionMs > 0) {
999
+ totalAudioDurationRef.current.set(itemId, audioPositionMs);
1000
+ }
1001
+ if (text.replace(/[\s.…]+/g, "").length === 0) return;
1002
+ updateTranscriptMessage(itemId, text, false);
1003
+ }
1004
+ function handleTranscriptionCompleted(item) {
1005
+ const itemId = item.item_id;
1006
+ if (interruptedItemsRef.current.has(itemId)) return;
1007
+ if (itemId) {
1008
+ const timer = deltaTimerRef.current.get(itemId);
1009
+ if (timer) clearTimeout(timer);
1010
+ deltaTimerRef.current.delete(itemId);
1011
+ pendingDeltasRef.current.delete(itemId);
1012
+ pendingTextRef.current.delete(itemId);
1013
+ displayedTextRef.current.delete(itemId);
1014
+ accumulatedTextRef.current.delete(itemId);
1015
+ totalAudioDurationRef.current.delete(itemId);
1016
+ const displayedText = displayedTextRef.current.get(itemId);
1017
+ const finalText = displayedText || item.transcript || "";
1018
+ const stripped = finalText.replace(/[\s.…]+/g, "");
1019
+ if (stripped.length > 0) {
1020
+ updateTranscriptMessage(itemId, finalText, false);
1021
+ }
1022
+ updateTranscriptItem(itemId, { status: "DONE" });
1023
+ const transcriptItem = transcriptItems.find((i) => i.itemId === itemId);
1024
+ if (transcriptItem?.guardrailResult?.status === "IN_PROGRESS") {
1025
+ updateTranscriptItem(itemId, {
1026
+ guardrailResult: {
1027
+ status: "DONE",
1028
+ category: "NONE",
1029
+ rationale: ""
1030
+ }
1031
+ });
1032
+ }
1033
+ }
1034
+ }
1035
+ function handleGuardrailTripped(details, _agent, guardrail) {
1036
+ const result = guardrail.result;
1037
+ const output = result?.output;
1038
+ const outputInfo = output?.outputInfo;
1039
+ const moderation = extractModeration(outputInfo);
1040
+ logServerEvent({ type: "guardrail_tripped", payload: moderation });
1041
+ const context = details?.context;
1042
+ const history = context?.history;
1043
+ const lastAssistant = extractLastAssistantMessage(history);
1044
+ if (lastAssistant && moderation) {
1045
+ const category = moderation.moderationCategory ?? "NONE";
1046
+ const rationale = moderation.moderationRationale ?? "";
1047
+ const offendingText = moderation.testText;
1048
+ updateTranscriptItem(lastAssistant.itemId, {
1049
+ guardrailResult: {
1050
+ status: "DONE",
1051
+ category,
1052
+ rationale,
1053
+ testText: offendingText
1054
+ }
1055
+ });
1056
+ }
1057
+ }
1058
+ const transcriptItemsRef = useRef5(transcriptItems);
1059
+ transcriptItemsRef.current = transcriptItems;
1060
+ const handlersRef = useRef5({
1061
+ handleAgentToolStart,
1062
+ handleAgentToolEnd,
1063
+ handleHistoryUpdated,
1064
+ handleHistoryAdded,
1065
+ handleTranscriptionDelta,
1066
+ handleTranscriptionCompleted,
1067
+ isInterrupted: (itemId) => interruptedItemsRef.current.has(itemId),
1068
+ handleTruncation: (itemId, audioEndMs, totalAudioMs) => {
1069
+ if (interruptedItemsRef.current.has(itemId)) return;
1070
+ const timer = deltaTimerRef.current.get(itemId);
1071
+ if (timer) clearTimeout(timer);
1072
+ deltaTimerRef.current.delete(itemId);
1073
+ const fullText = pendingTextRef.current.get(itemId) || accumulatedTextRef.current.get(itemId) || "";
1074
+ pendingDeltasRef.current.delete(itemId);
1075
+ pendingTextRef.current.delete(itemId);
1076
+ displayedTextRef.current.delete(itemId);
1077
+ accumulatedTextRef.current.delete(itemId);
1078
+ totalAudioDurationRef.current.delete(itemId);
1079
+ interruptedItemsRef.current.add(itemId);
1080
+ if (!fullText || totalAudioMs <= 0) {
1081
+ updateTranscriptItem(itemId, { isHidden: true, status: "DONE" });
1082
+ return;
1083
+ }
1084
+ const fractionSpoken = Math.min(Math.max(audioEndMs / totalAudioMs, 0), 1);
1085
+ const estimatedCharPos = Math.floor(fullText.length * fractionSpoken);
1086
+ let truncatePos = estimatedCharPos;
1087
+ while (truncatePos > 0 && !/\s/.test(fullText[truncatePos - 1])) {
1088
+ truncatePos--;
1089
+ }
1090
+ if (truncatePos === 0 && estimatedCharPos > 0) {
1091
+ truncatePos = estimatedCharPos;
1092
+ while (truncatePos < fullText.length && !/\s/.test(fullText[truncatePos])) {
1093
+ truncatePos++;
1094
+ }
1095
+ }
1096
+ const truncatedText = fullText.slice(0, truncatePos).trim();
1097
+ if (truncatedText.length > 0) {
1098
+ updateTranscriptMessage(itemId, truncatedText + "...", false);
1099
+ updateTranscriptItem(itemId, { status: "DONE" });
1100
+ } else {
1101
+ updateTranscriptItem(itemId, { isHidden: true, status: "DONE" });
1102
+ }
1103
+ },
1104
+ handleGuardrailTripped
1105
+ });
1106
+ return handlersRef;
1107
+ }
1108
+
1109
+ // src/hooks/useRealtimeSession.ts
1110
+ function useRealtimeSession(callbacks = {}) {
1111
+ const sessionRef = useRef6(null);
1112
+ const [status, setStatus] = useState5("DISCONNECTED");
1113
+ const { logClientEvent, logServerEvent } = useEvent();
1114
+ const codecParamRef = useRef6("opus");
1115
+ const updateStatus = useCallback6(
1116
+ (s) => {
1117
+ setStatus(s);
1118
+ callbacks.onConnectionChange?.(s);
1119
+ logClientEvent({}, s);
1120
+ },
1121
+ [callbacks, logClientEvent]
1122
+ );
1123
+ const historyHandlers = useSessionHistory().current;
1124
+ const interruptedRef = useRef6(/* @__PURE__ */ new Set());
1125
+ useEffect5(() => {
1126
+ if (typeof window !== "undefined") {
1127
+ const params = new URLSearchParams(window.location.search);
1128
+ const codec = params.get("codec");
1129
+ if (codec) {
1130
+ codecParamRef.current = codec.toLowerCase();
1131
+ }
1132
+ }
1133
+ }, []);
1134
+ const wireNormalizedEvents = useCallback6((session) => {
1135
+ session.on("user_speech_started", () => {
1136
+ });
1137
+ session.on("user_transcript", (data) => {
1138
+ if (data.isFinal) {
1139
+ const text = data.text || data.delta || "";
1140
+ if (text.replace(/[\s.…,!?]+/g, "").length === 0) return;
1141
+ historyHandlers.handleTranscriptionCompleted({
1142
+ item_id: data.itemId,
1143
+ transcript: text
1144
+ });
1145
+ } else if (data.delta) {
1146
+ historyHandlers.handleTranscriptionDelta({
1147
+ item_id: data.itemId,
1148
+ delta: data.delta
1149
+ });
1150
+ }
1151
+ });
1152
+ session.on("assistant_transcript", (data) => {
1153
+ if (interruptedRef.current.has(data.itemId)) return;
1154
+ if (data.isFinal) {
1155
+ historyHandlers.handleTranscriptionCompleted({
1156
+ item_id: data.itemId,
1157
+ transcript: data.text || ""
1158
+ });
1159
+ } else if (data.delta) {
1160
+ historyHandlers.handleTranscriptionDelta(
1161
+ { item_id: data.itemId, delta: data.delta }
1162
+ );
1163
+ }
1164
+ });
1165
+ session.on("tool_call_start", (name, input) => {
1166
+ historyHandlers.handleAgentToolStart(
1167
+ {},
1168
+ void 0,
1169
+ { name, arguments: input }
1170
+ );
1171
+ });
1172
+ session.on("tool_call_end", (name, input, result) => {
1173
+ historyHandlers.handleAgentToolEnd(
1174
+ {},
1175
+ void 0,
1176
+ { name, arguments: input },
1177
+ result
1178
+ );
1179
+ });
1180
+ session.on("agent_handoff", (_from, to) => {
1181
+ callbacks.onAgentHandoff?.(to);
1182
+ });
1183
+ session.on("guardrail_tripped", (info) => {
1184
+ historyHandlers.handleGuardrailTripped(
1185
+ {},
1186
+ void 0,
1187
+ { result: info }
1188
+ );
1189
+ });
1190
+ session.on("raw_event", (event) => {
1191
+ const ev = event;
1192
+ if (ev.type === "conversation.item.truncated") {
1193
+ const itemId = ev.item_id;
1194
+ if (itemId) interruptedRef.current.add(itemId);
1195
+ return;
1196
+ }
1197
+ if (ev.type === "history_updated") {
1198
+ historyHandlers.handleHistoryUpdated(ev.items);
1199
+ return;
1200
+ }
1201
+ if (ev.type === "history_added") {
1202
+ historyHandlers.handleHistoryAdded(ev.item);
1203
+ return;
1204
+ }
1205
+ logServerEvent(ev);
1206
+ });
1207
+ session.on("error", (error) => {
1208
+ const e = error;
1209
+ const msg = e instanceof Error ? e.message : typeof e === "string" ? e : JSON.stringify(e);
1210
+ const errObj = typeof e === "object" && e?.error ? e.error : e;
1211
+ const code = typeof errObj === "object" && errObj?.code ? String(errObj.code) : "";
1212
+ const msgStr = typeof msg === "string" ? msg : "";
1213
+ const isBenign = code === "response_cancel_not_active" || code === "conversation_already_has_active_response" || msgStr.includes("response_cancel_not_active") || msgStr.includes("conversation_already_has_active_response");
1214
+ if (isBenign) return;
1215
+ console.error("Session error:", msg);
1216
+ logServerEvent({ type: "error", message: msg });
1217
+ });
1218
+ }, [callbacks, historyHandlers, logServerEvent]);
1219
+ const connect = useCallback6(
1220
+ async ({
1221
+ getEphemeralKey,
1222
+ initialAgents,
1223
+ audioElement,
1224
+ extraContext,
1225
+ outputGuardrails,
1226
+ adapter
1227
+ }) => {
1228
+ if (sessionRef.current) return;
1229
+ if (!adapter) {
1230
+ throw new Error(
1231
+ "useRealtimeSession: `adapter` is required in ConnectOptions. Pass an adapter like openai() from @jchaffin/voicekit/openai."
1232
+ );
1233
+ }
1234
+ updateStatus("CONNECTING");
1235
+ const ek = await getEphemeralKey();
1236
+ const rootAgent = initialAgents[0];
1237
+ const codecParam = codecParamRef.current;
1238
+ const session = adapter.createSession(rootAgent, {
1239
+ codec: codecParam,
1240
+ language: "en"
1241
+ });
1242
+ sessionRef.current = session;
1243
+ wireNormalizedEvents(session);
1244
+ try {
1245
+ await session.connect({
1246
+ authToken: ek,
1247
+ audioElement,
1248
+ context: extraContext,
1249
+ outputGuardrails
1250
+ });
1251
+ updateStatus("CONNECTED");
1252
+ } catch (connectError) {
1253
+ console.error("Connection error:", connectError);
1254
+ sessionRef.current = null;
1255
+ updateStatus("DISCONNECTED");
1256
+ throw connectError;
1257
+ }
1258
+ },
1259
+ [updateStatus, wireNormalizedEvents]
1260
+ );
1261
+ const disconnect = useCallback6(async () => {
1262
+ if (sessionRef.current) {
1263
+ try {
1264
+ await sessionRef.current.disconnect();
1265
+ } catch (error) {
1266
+ console.error("Error closing session:", error);
1267
+ } finally {
1268
+ sessionRef.current = null;
1269
+ updateStatus("DISCONNECTED");
1270
+ }
1271
+ } else {
1272
+ updateStatus("DISCONNECTED");
1273
+ }
1274
+ }, [updateStatus]);
1275
+ const interrupt = useCallback6(() => {
1276
+ sessionRef.current?.interrupt();
1277
+ }, []);
1278
+ const sendUserText = useCallback6((text) => {
1279
+ if (!sessionRef.current) throw new Error("Session not connected");
1280
+ sessionRef.current.sendMessage(text);
1281
+ }, []);
1282
+ const sendEvent = useCallback6((ev) => {
1283
+ sessionRef.current?.sendRawEvent?.(ev);
1284
+ }, []);
1285
+ const mute = useCallback6((m) => {
1286
+ sessionRef.current?.mute(m);
1287
+ }, []);
1288
+ const pushToTalkStart = useCallback6(() => {
1289
+ sessionRef.current?.sendRawEvent?.({ type: "input_audio_buffer.clear" });
1290
+ }, []);
1291
+ const pushToTalkStop = useCallback6(() => {
1292
+ sessionRef.current?.sendRawEvent?.({ type: "input_audio_buffer.commit" });
1293
+ sessionRef.current?.sendRawEvent?.({ type: "response.create" });
1294
+ }, []);
1295
+ return {
1296
+ status,
1297
+ connect,
1298
+ disconnect,
1299
+ sendUserText,
1300
+ sendEvent,
1301
+ mute,
1302
+ pushToTalkStart,
1303
+ pushToTalkStop,
1304
+ interrupt
1305
+ };
1306
+ }
1307
+
1308
+ // src/guardrails.ts
1309
+ import { z } from "zod";
1310
+ var MODERATION_CATEGORIES = [
1311
+ "OFFENSIVE",
1312
+ "OFF_BRAND",
1313
+ "VIOLENCE",
1314
+ "NONE"
1315
+ ];
1316
+ var ModerationCategoryZod = z.enum([...MODERATION_CATEGORIES]);
1317
+ var GuardrailOutputZod = z.object({
1318
+ moderationRationale: z.string(),
1319
+ moderationCategory: ModerationCategoryZod,
1320
+ testText: z.string().optional()
1321
+ }).strict();
1322
+ async function runGuardrailClassifier(message, config = {}) {
1323
+ const {
1324
+ apiEndpoint = "/api/responses",
1325
+ model = "gpt-4o-mini",
1326
+ categories = MODERATION_CATEGORIES,
1327
+ companyName = "Company"
1328
+ } = config;
1329
+ const categoryDescriptions = categories.map((cat) => {
1330
+ switch (cat) {
1331
+ case "OFFENSIVE":
1332
+ return "- OFFENSIVE: Content that includes hate speech, discriminatory language, insults, slurs, or harassment.";
1333
+ case "OFF_BRAND":
1334
+ return "- OFF_BRAND: Content that discusses competitors in a disparaging way.";
1335
+ case "VIOLENCE":
1336
+ return "- VIOLENCE: Content that includes explicit threats, incitement of harm, or graphic descriptions of physical injury or violence.";
1337
+ case "NONE":
1338
+ return "- NONE: If no other classes are appropriate and the message is fine.";
1339
+ default:
1340
+ return `- ${cat}: Custom category.`;
1341
+ }
1342
+ }).join("\n");
1343
+ const messages = [
1344
+ {
1345
+ role: "user",
1346
+ content: `You are an expert at classifying text according to moderation policies. Consider the provided message, analyze potential classes from output_classes, and output the best classification. Output json, following the provided schema. Keep your analysis and reasoning short and to the point, maximum 2 sentences.
1347
+
1348
+ <info>
1349
+ - Company name: ${companyName}
1350
+ </info>
1351
+
1352
+ <message>
1353
+ ${message}
1354
+ </message>
1355
+
1356
+ <output_classes>
1357
+ ${categoryDescriptions}
1358
+ </output_classes>
1359
+ `
1360
+ }
1361
+ ];
1362
+ const response = await fetch(apiEndpoint, {
1363
+ method: "POST",
1364
+ headers: { "Content-Type": "application/json" },
1365
+ body: JSON.stringify({
1366
+ model,
1367
+ input: messages,
1368
+ text: {
1369
+ format: {
1370
+ type: "json_schema",
1371
+ name: "output_format",
1372
+ schema: GuardrailOutputZod
1373
+ }
1374
+ }
1375
+ })
1376
+ });
1377
+ if (!response.ok) return null;
1378
+ try {
1379
+ const data = await response.json();
1380
+ return GuardrailOutputZod.parse(data);
1381
+ } catch {
1382
+ return null;
1383
+ }
1384
+ }
1385
+ function createModerationGuardrail(config = {}) {
1386
+ return {
1387
+ name: "moderation_guardrail",
1388
+ async execute({ agentOutput }) {
1389
+ try {
1390
+ const res = await runGuardrailClassifier(agentOutput, config);
1391
+ const triggered = res?.moderationCategory !== "NONE";
1392
+ return {
1393
+ tripwireTriggered: triggered || false,
1394
+ outputInfo: res || { error: "guardrail_failed" }
1395
+ };
1396
+ } catch {
1397
+ return {
1398
+ tripwireTriggered: false,
1399
+ outputInfo: { error: "guardrail_failed" }
1400
+ };
1401
+ }
1402
+ }
1403
+ };
1404
+ }
1405
+ function createCustomGuardrail(name, classifier) {
1406
+ return {
1407
+ name,
1408
+ async execute({ agentOutput }) {
1409
+ try {
1410
+ const { triggered, info } = await classifier(agentOutput);
1411
+ return {
1412
+ tripwireTriggered: triggered,
1413
+ outputInfo: info
1414
+ };
1415
+ } catch {
1416
+ return {
1417
+ tripwireTriggered: false,
1418
+ outputInfo: { error: "guardrail_failed" }
1419
+ };
1420
+ }
1421
+ }
1422
+ };
1423
+ }
1424
+
1425
+ // src/suggestions/SuggestionContext.tsx
1426
+ import { createContext as createContext4, useContext as useContext4, useState as useState6, useCallback as useCallback7, useEffect as useEffect6 } from "react";
1427
+
1428
+ // src/suggestions/types.ts
1429
+ var SUGGESTION_EVENT = "voicekit:suggestions";
1430
+
1431
+ // src/suggestions/SuggestionContext.tsx
1432
+ import { jsx as jsx5 } from "react/jsx-runtime";
1433
+ var SuggestionCtx = createContext4(null);
1434
+ function SuggestionProvider({
1435
+ children,
1436
+ onSelect,
1437
+ autoClear = true
1438
+ }) {
1439
+ const [suggestions, setSuggestionsState] = useState6(null);
1440
+ const setSuggestions = useCallback7((group) => {
1441
+ setSuggestionsState(group);
1442
+ }, []);
1443
+ const clearSuggestions2 = useCallback7(() => {
1444
+ setSuggestionsState(null);
1445
+ }, []);
1446
+ const selectSuggestion = useCallback7(
1447
+ (item) => {
1448
+ onSelect?.(item);
1449
+ if (autoClear) setSuggestionsState(null);
1450
+ },
1451
+ [onSelect, autoClear]
1452
+ );
1453
+ useEffect6(() => {
1454
+ const handler = (e) => {
1455
+ const detail = e.detail;
1456
+ if (detail?.group) {
1457
+ setSuggestionsState(detail.group);
1458
+ }
1459
+ };
1460
+ window.addEventListener(SUGGESTION_EVENT, handler);
1461
+ return () => window.removeEventListener(SUGGESTION_EVENT, handler);
1462
+ }, []);
1463
+ const value = {
1464
+ suggestions,
1465
+ setSuggestions,
1466
+ selectSuggestion,
1467
+ clearSuggestions: clearSuggestions2
1468
+ };
1469
+ return /* @__PURE__ */ jsx5(SuggestionCtx.Provider, { value, children });
1470
+ }
1471
+ function useSuggestions() {
1472
+ const ctx = useContext4(SuggestionCtx);
1473
+ if (!ctx) {
1474
+ throw new Error("useSuggestions must be used within a SuggestionProvider");
1475
+ }
1476
+ return ctx;
1477
+ }
1478
+
1479
+ // src/suggestions/emitSuggestions.ts
1480
+ function emitSuggestions(group) {
1481
+ if (typeof window === "undefined") return;
1482
+ window.dispatchEvent(
1483
+ new CustomEvent(SUGGESTION_EVENT, {
1484
+ detail: { group }
1485
+ })
1486
+ );
1487
+ }
1488
+ function clearSuggestions() {
1489
+ if (typeof window === "undefined") return;
1490
+ window.dispatchEvent(
1491
+ new CustomEvent(SUGGESTION_EVENT, {
1492
+ detail: { group: null }
1493
+ })
1494
+ );
1495
+ }
1496
+
1497
+ // src/suggestions/SuggestionChips.tsx
1498
+ import React6 from "react";
1499
+ import { jsx as jsx6, jsxs as jsxs2 } from "react/jsx-runtime";
1500
+ function SuggestionChips({
1501
+ group: groupOverride,
1502
+ renderItem,
1503
+ className,
1504
+ chipClassName
1505
+ }) {
1506
+ const { suggestions, selectSuggestion } = useSuggestions();
1507
+ const group = groupOverride ?? suggestions;
1508
+ if (!group || group.items.length === 0) return null;
1509
+ return /* @__PURE__ */ jsxs2("div", { className: className ?? "vk-suggestions", children: [
1510
+ group.prompt && /* @__PURE__ */ jsx6("p", { className: "vk-suggestions-prompt", style: { fontSize: "0.875rem", opacity: 0.7, marginBottom: "0.5rem" }, children: group.prompt }),
1511
+ /* @__PURE__ */ jsx6(
1512
+ "div",
1513
+ {
1514
+ className: "vk-suggestions-list",
1515
+ style: { display: "flex", flexWrap: "wrap", gap: "0.5rem" },
1516
+ children: group.items.map((item) => {
1517
+ const handleClick = () => selectSuggestion(item);
1518
+ if (renderItem) {
1519
+ return /* @__PURE__ */ jsx6(React6.Fragment, { children: renderItem(item, handleClick) }, item.id);
1520
+ }
1521
+ return /* @__PURE__ */ jsx6(
1522
+ "button",
1523
+ {
1524
+ onClick: handleClick,
1525
+ className: chipClassName ?? "vk-chip",
1526
+ style: chipClassName ? void 0 : {
1527
+ display: "inline-flex",
1528
+ alignItems: "center",
1529
+ gap: "0.375rem",
1530
+ padding: "0.5rem 0.75rem",
1531
+ borderRadius: "9999px",
1532
+ fontSize: "0.875rem",
1533
+ fontWeight: 500,
1534
+ border: "1px solid rgba(99,102,241,0.3)",
1535
+ background: "rgba(99,102,241,0.08)",
1536
+ color: "inherit",
1537
+ cursor: "pointer",
1538
+ transition: "all 0.15s"
1539
+ },
1540
+ children: item.label
1541
+ },
1542
+ item.id
1543
+ );
1544
+ })
1545
+ }
1546
+ )
1547
+ ] });
1548
+ }
1549
+ export {
1550
+ ChatInput,
1551
+ ConnectButton,
1552
+ EventEmitter,
1553
+ EventProvider,
1554
+ GuardrailOutputZod,
1555
+ MODERATION_CATEGORIES,
1556
+ ModerationCategoryZod,
1557
+ SUGGESTION_EVENT,
1558
+ StatusIndicator,
1559
+ SuggestionChips,
1560
+ SuggestionProvider,
1561
+ TOOL_RESULT_EVENT,
1562
+ Transcript,
1563
+ TranscriptProvider,
1564
+ VoiceChat,
1565
+ VoiceProvider,
1566
+ applyCodecPreferences,
1567
+ audioFormatForCodec,
1568
+ clearSuggestions,
1569
+ convertWebMToWav,
1570
+ createAPITool,
1571
+ createAgent,
1572
+ createAgentFromTemplate,
1573
+ createCustomGuardrail,
1574
+ createEventTool,
1575
+ createModerationGuardrail,
1576
+ createNavigationTool,
1577
+ createRAGTool,
1578
+ createSearchTool,
1579
+ defineTool,
1580
+ emitSuggestions,
1581
+ encodeWAV,
1582
+ runGuardrailClassifier,
1583
+ useAudioRecorder,
1584
+ useEvent,
1585
+ useRealtimeSession,
1586
+ useSessionHistory,
1587
+ useSuggestions,
1588
+ useToolListener,
1589
+ useToolResult,
1590
+ useToolResults,
1591
+ useTranscript,
1592
+ useVoice
1593
+ };