@jchaffin/voicekit 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,369 @@
1
+ # @jchaffin/voicekit
2
+
3
+ A React library for building voice-enabled AI agents using OpenAI's Realtime API.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install @jchaffin/voicekit @openai/agents
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ### 1. Create a session endpoint
14
+
15
+ ```ts
16
+ // app/api/session/route.ts
17
+ import { createSessionHandler } from '@jchaffin/voicekit/server';
18
+
19
+ export const POST = createSessionHandler();
20
+
21
+ // Or with options:
22
+ export const POST = createSessionHandler({
23
+ model: 'gpt-realtime',
24
+ voice: 'alloy'
25
+ });
26
+ ```
27
+
28
+ ### 2. Define your agent
29
+
30
+ ```tsx
31
+ import { createAgent, defineTool } from '@jchaffin/voicekit';
32
+
33
+ // Define tools
34
+ const weatherTool = defineTool({
35
+ name: 'get_weather',
36
+ description: 'Get current weather for a location',
37
+ parameters: {
38
+ location: { type: 'string', description: 'City name' }
39
+ },
40
+ required: ['location'],
41
+ execute: async ({ location }) => {
42
+ const res = await fetch(`/api/weather?city=${location}`);
43
+ return res.json();
44
+ }
45
+ });
46
+
47
+ // Create agent
48
+ const agent = createAgent({
49
+ name: 'Assistant',
50
+ instructions: `
51
+ You are a helpful voice assistant.
52
+ Help users check the weather and answer questions.
53
+ `,
54
+ tools: [weatherTool]
55
+ });
56
+ ```
57
+
58
+ ### 3. Wrap your app with VoiceProvider
59
+
60
+ ```tsx
61
+ import { VoiceProvider } from '@jchaffin/voicekit';
62
+
63
+ function App() {
64
+ return (
65
+ <VoiceProvider agent={agent}>
66
+ <YourApp />
67
+ </VoiceProvider>
68
+ );
69
+ }
70
+ ```
71
+
72
+ ### 4. Use the useVoice hook
73
+
74
+ ```tsx
75
+ import { useVoice } from '@jchaffin/voicekit';
76
+
77
+ function VoiceChat() {
78
+ const {
79
+ status,
80
+ connect,
81
+ disconnect,
82
+ transcript,
83
+ sendMessage
84
+ } = useVoice();
85
+
86
+ return (
87
+ <div>
88
+ <button onClick={status === 'CONNECTED' ? disconnect : connect}>
89
+ {status === 'CONNECTED' ? 'End Call' : 'Start Call'}
90
+ </button>
91
+
92
+ <div>
93
+ {transcript.map(msg => (
94
+ <p key={msg.id}>
95
+ <strong>{msg.role}:</strong> {msg.text}
96
+ </p>
97
+ ))}
98
+ </div>
99
+
100
+ <input
101
+ type="text"
102
+ onKeyDown={(e) => {
103
+ if (e.key === 'Enter') {
104
+ sendMessage(e.currentTarget.value);
105
+ e.currentTarget.value = '';
106
+ }
107
+ }}
108
+ placeholder="Type a message..."
109
+ />
110
+ </div>
111
+ );
112
+ }
113
+ ```
114
+
115
+ ## API Reference
116
+
117
+ ### UI Components
118
+
119
+ #### `<VoiceChat>`
120
+
121
+ Complete voice chat interface - drop in and go.
122
+
123
+ ```tsx
124
+ import { VoiceProvider, VoiceChat, createAgent } from '@jchaffin/voicekit';
125
+
126
+ const agent = createAgent({ name: 'Bot', instructions: 'Be helpful' });
127
+
128
+ function App() {
129
+ return (
130
+ <VoiceProvider agent={agent}>
131
+ <VoiceChat height="400px" />
132
+ </VoiceProvider>
133
+ );
134
+ }
135
+ ```
136
+
137
+ Props:
138
+ - `height` - Chat area height (default: `'400px'`)
139
+ - `showHeader` - Show status header (default: `true`)
140
+ - `showInput` - Show text input (default: `true`)
141
+ - `emptyState` - Custom empty state content
142
+ - `header` - Custom header content
143
+ - `footer` - Custom footer content
144
+
145
+ #### Individual Components
146
+
147
+ ```tsx
148
+ import {
149
+ Transcript, // Message list
150
+ StatusIndicator, // Connection status dot
151
+ ConnectButton, // Start/end button
152
+ ChatInput // Text input
153
+ } from '@jchaffin/voicekit';
154
+
155
+ // Use within VoiceProvider
156
+ <StatusIndicator />
157
+ <Transcript messages={transcript} />
158
+ <ConnectButton connectText="Start Call" disconnectText="End Call" />
159
+ <ChatInput placeholder="Say something..." />
160
+ ```
161
+
162
+ ### Core Components
163
+
164
+ #### `<VoiceProvider>`
165
+
166
+ Wraps your app to provide voice functionality.
167
+
168
+ ```tsx
169
+ <VoiceProvider
170
+ agent={agent}
171
+ sessionEndpoint="/api/session" // Optional, defaults to /api/session
172
+ model="gpt-4o-realtime-preview" // Optional
173
+ language="en" // Optional
174
+ onStatusChange={(status) => {}} // Optional
175
+ onTranscriptUpdate={(msgs) => {}} // Optional
176
+ onToolCall={(name, input, result) => {}} // Optional
177
+ onError={(error) => {}} // Optional
178
+ >
179
+ {children}
180
+ </VoiceProvider>
181
+ ```
182
+
183
+ ### Hooks
184
+
185
+ #### `useVoice()`
186
+
187
+ Main hook for voice interaction.
188
+
189
+ ```ts
190
+ const {
191
+ status, // 'DISCONNECTED' | 'CONNECTING' | 'CONNECTED'
192
+ connect, // () => Promise<void>
193
+ disconnect, // () => Promise<void>
194
+ transcript, // TranscriptMessage[]
195
+ clearTranscript, // () => void
196
+ sendMessage, // (text: string) => void
197
+ interrupt, // () => void
198
+ mute, // (muted: boolean) => void
199
+ isMuted, // boolean
200
+ agent, // RealtimeAgent
201
+ } = useVoice();
202
+ ```
203
+
204
+ #### `useToolResult(toolName)`
205
+
206
+ Listen for results from a specific tool.
207
+
208
+ ```tsx
209
+ const { result, input, hasResult, clear } = useToolResult('get_weather');
210
+ ```
211
+
212
+ #### `useToolListener(toolName, handler)`
213
+
214
+ Register a callback for tool results.
215
+
216
+ ```tsx
217
+ useToolListener('get_weather', (input, result) => {
218
+ console.log('Weather:', result);
219
+ });
220
+ ```
221
+
222
+ #### `useToolResults()`
223
+
224
+ Get all tool results.
225
+
226
+ ```tsx
227
+ const { results, lastResult, clear } = useToolResults();
228
+ ```
229
+
230
+ ### Tool Builders
231
+
232
+ #### `defineTool(config)`
233
+
234
+ Create a tool with type inference.
235
+
236
+ ```ts
237
+ const tool = defineTool({
238
+ name: 'tool_name',
239
+ description: 'What the tool does',
240
+ parameters: {
241
+ param1: { type: 'string', description: 'Description' },
242
+ param2: { type: 'number', default: 10 }
243
+ },
244
+ required: ['param1'],
245
+ execute: async ({ param1, param2 }) => {
246
+ // Implementation
247
+ return { success: true };
248
+ }
249
+ });
250
+ ```
251
+
252
+ #### `createNavigationTool(sections)`
253
+
254
+ Create a tool for single-page app navigation.
255
+
256
+ ```ts
257
+ const navTool = createNavigationTool(['about', 'projects', 'contact']);
258
+ ```
259
+
260
+ #### `createAPITool(config)`
261
+
262
+ Create a tool that calls an API endpoint.
263
+
264
+ ```ts
265
+ const searchTool = createAPITool({
266
+ name: 'search',
267
+ description: 'Search the database',
268
+ parameters: { query: { type: 'string' } },
269
+ required: ['query'],
270
+ endpoint: '/api/search',
271
+ method: 'POST'
272
+ });
273
+ ```
274
+
275
+ #### `createEventTool(config)`
276
+
277
+ Create a tool that dispatches DOM events for UI updates.
278
+
279
+ ```ts
280
+ const modalTool = createEventTool({
281
+ name: 'show_modal',
282
+ description: 'Show a modal',
283
+ parameters: { title: { type: 'string' } },
284
+ eventType: 'voice:show-modal'
285
+ });
286
+ ```
287
+
288
+ ### Agent Builders
289
+
290
+ #### `createAgent(config)`
291
+
292
+ Create a voice agent.
293
+
294
+ ```ts
295
+ const agent = createAgent({
296
+ name: 'Assistant',
297
+ instructions: 'You are helpful.',
298
+ tools: [tool1, tool2]
299
+ });
300
+ ```
301
+
302
+ #### `createAgentFromTemplate(config)`
303
+
304
+ Create an agent using structured templates.
305
+
306
+ ```ts
307
+ const agent = createAgentFromTemplate({
308
+ name: 'Support Bot',
309
+ role: 'customer support agent',
310
+ personality: 'Friendly and helpful',
311
+ capabilities: ['Answer questions', 'Track orders'],
312
+ constraints: ['Never share private data'],
313
+ tools: [orderTool]
314
+ });
315
+ ```
316
+
317
+ ## Server API
318
+
319
+ Import from `@jchaffin/voicekit/server` for server-side utilities.
320
+
321
+ ### `createSessionHandler(config?)`
322
+
323
+ Creates a request handler for Next.js App Router or similar frameworks.
324
+
325
+ ```ts
326
+ import { createSessionHandler } from '@jchaffin/voicekit/server';
327
+
328
+ // Basic
329
+ export const POST = createSessionHandler();
330
+
331
+ // With config
332
+ export const POST = createSessionHandler({
333
+ apiKey: process.env.CUSTOM_KEY, // defaults to OPENAI_API_KEY
334
+ model: 'gpt-realtime',
335
+ voice: 'alloy'
336
+ });
337
+ ```
338
+
339
+ ### `getEphemeralKey(config?)`
340
+
341
+ Get an ephemeral key directly (for Express, Fastify, etc.)
342
+
343
+ ```ts
344
+ import { getEphemeralKey } from '@jchaffin/voicekit/server';
345
+
346
+ app.post('/api/session', async (req, res) => {
347
+ const result = await getEphemeralKey();
348
+ if (result.error) {
349
+ return res.status(500).json({ error: result.error });
350
+ }
351
+ res.json({ ephemeralKey: result.ephemeralKey });
352
+ });
353
+ ```
354
+
355
+ ### `handleOptions()` / `corsHeaders()`
356
+
357
+ CORS helpers for preflight requests.
358
+
359
+ ```ts
360
+ import { handleOptions, corsHeaders } from '@jchaffin/voicekit/server';
361
+
362
+ export function OPTIONS() {
363
+ return handleOptions();
364
+ }
365
+ ```
366
+
367
+ ## License
368
+
369
+ MIT
@@ -0,0 +1,43 @@
1
+ import { i as SessionOptions, g as ServerSessionConfig, e as VoiceAdapter, S as ServerAdapter } from '../types-DY31oVB1.mjs';
2
+
3
+ /**
4
+ * Deepgram adapter for VoiceKit.
5
+ *
6
+ * Peer dependency: @deepgram/sdk (>= 3.0.0)
7
+ *
8
+ * Deepgram provides STT (listen) and TTS (speak) but does not offer a
9
+ * single "conversational AI" socket like OpenAI or ElevenLabs. This adapter
10
+ * wires Deepgram live transcription for the user's mic audio and expects
11
+ * a server-side agent (e.g. your own LLM pipeline) to handle the assistant
12
+ * logic and push assistant transcripts/audio back via a companion WebSocket.
13
+ *
14
+ * Usage:
15
+ * ```ts
16
+ * import { deepgram } from '@jchaffin/voicekit/deepgram';
17
+ *
18
+ * <VoiceProvider
19
+ * adapter={deepgram({ agentUrl: 'wss://my-backend/agent' })}
20
+ * agent={agent}
21
+ * />
22
+ * ```
23
+ */
24
+
25
+ interface DeepgramAdapterOptions extends SessionOptions {
26
+ /** WebSocket URL of your agent backend that orchestrates Deepgram STT + LLM + TTS */
27
+ agentUrl: string;
28
+ }
29
+ /**
30
+ * Create a Deepgram adapter.
31
+ *
32
+ * ```ts
33
+ * import { deepgram } from '@jchaffin/voicekit/deepgram';
34
+ * <VoiceProvider adapter={deepgram({ agentUrl: 'wss://...' })} agent={agent} />
35
+ * ```
36
+ */
37
+ declare function deepgram(options: DeepgramAdapterOptions): VoiceAdapter;
38
+ interface DeepgramServerConfig extends ServerSessionConfig {
39
+ apiKey?: string;
40
+ }
41
+ declare function deepgramServer(config?: DeepgramServerConfig): ServerAdapter;
42
+
43
+ export { type DeepgramAdapterOptions, type DeepgramServerConfig, deepgram, deepgramServer, deepgram as default };
@@ -0,0 +1,43 @@
1
+ import { i as SessionOptions, g as ServerSessionConfig, e as VoiceAdapter, S as ServerAdapter } from '../types-DY31oVB1.js';
2
+
3
+ /**
4
+ * Deepgram adapter for VoiceKit.
5
+ *
6
+ * Peer dependency: @deepgram/sdk (>= 3.0.0)
7
+ *
8
+ * Deepgram provides STT (listen) and TTS (speak) but does not offer a
9
+ * single "conversational AI" socket like OpenAI or ElevenLabs. This adapter
10
+ * wires Deepgram live transcription for the user's mic audio and expects
11
+ * a server-side agent (e.g. your own LLM pipeline) to handle the assistant
12
+ * logic and push assistant transcripts/audio back via a companion WebSocket.
13
+ *
14
+ * Usage:
15
+ * ```ts
16
+ * import { deepgram } from '@jchaffin/voicekit/deepgram';
17
+ *
18
+ * <VoiceProvider
19
+ * adapter={deepgram({ agentUrl: 'wss://my-backend/agent' })}
20
+ * agent={agent}
21
+ * />
22
+ * ```
23
+ */
24
+
25
+ interface DeepgramAdapterOptions extends SessionOptions {
26
+ /** WebSocket URL of your agent backend that orchestrates Deepgram STT + LLM + TTS */
27
+ agentUrl: string;
28
+ }
29
+ /**
30
+ * Create a Deepgram adapter.
31
+ *
32
+ * ```ts
33
+ * import { deepgram } from '@jchaffin/voicekit/deepgram';
34
+ * <VoiceProvider adapter={deepgram({ agentUrl: 'wss://...' })} agent={agent} />
35
+ * ```
36
+ */
37
+ declare function deepgram(options: DeepgramAdapterOptions): VoiceAdapter;
38
+ interface DeepgramServerConfig extends ServerSessionConfig {
39
+ apiKey?: string;
40
+ }
41
+ declare function deepgramServer(config?: DeepgramServerConfig): ServerAdapter;
42
+
43
+ export { type DeepgramAdapterOptions, type DeepgramServerConfig, deepgram, deepgramServer, deepgram as default };
@@ -0,0 +1,216 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/adapters/deepgram.ts
21
+ var deepgram_exports = {};
22
+ __export(deepgram_exports, {
23
+ deepgram: () => deepgram,
24
+ deepgramServer: () => deepgramServer,
25
+ default: () => deepgram_default
26
+ });
27
+ module.exports = __toCommonJS(deepgram_exports);
28
+
29
+ // src/core/EventEmitter.ts
30
+ var EventEmitter = class {
31
+ constructor() {
32
+ this.handlers = /* @__PURE__ */ new Map();
33
+ }
34
+ on(event, handler) {
35
+ let set = this.handlers.get(event);
36
+ if (!set) {
37
+ set = /* @__PURE__ */ new Set();
38
+ this.handlers.set(event, set);
39
+ }
40
+ set.add(handler);
41
+ }
42
+ off(event, handler) {
43
+ this.handlers.get(event)?.delete(handler);
44
+ }
45
+ emit(event, ...args) {
46
+ this.handlers.get(event)?.forEach((fn) => {
47
+ try {
48
+ fn(...args);
49
+ } catch (e) {
50
+ console.error(`EventEmitter error in "${event}":`, e);
51
+ }
52
+ });
53
+ }
54
+ removeAllListeners() {
55
+ this.handlers.clear();
56
+ }
57
+ };
58
+
59
+ // src/adapters/deepgram.ts
60
+ var DeepgramSession = class extends EventEmitter {
61
+ constructor(agent, agentUrl, options) {
62
+ super();
63
+ this.ws = null;
64
+ this.mediaStream = null;
65
+ this.mediaRecorder = null;
66
+ this.agent = agent;
67
+ this.agentUrl = agentUrl;
68
+ this.options = options;
69
+ }
70
+ async connect(config) {
71
+ const url = new URL(this.agentUrl);
72
+ url.searchParams.set("token", config.authToken);
73
+ if (this.options.model) url.searchParams.set("model", this.options.model);
74
+ if (this.options.language) url.searchParams.set("language", this.options.language);
75
+ this.ws = new WebSocket(url.toString());
76
+ await new Promise((resolve, reject) => {
77
+ const ws = this.ws;
78
+ ws.onopen = () => resolve();
79
+ ws.onerror = (e) => reject(new Error("WebSocket connection failed"));
80
+ ws.onclose = () => this.emit("status_change", "DISCONNECTED");
81
+ });
82
+ this.ws.onmessage = (event) => {
83
+ try {
84
+ const msg = JSON.parse(event.data);
85
+ this.handleMessage(msg, config.audioElement);
86
+ } catch {
87
+ this.emit("raw_event", event.data);
88
+ }
89
+ };
90
+ this.ws.onerror = () => {
91
+ this.emit("error", new Error("Deepgram WebSocket error"));
92
+ };
93
+ this.ws.send(JSON.stringify({
94
+ type: "agent_config",
95
+ agent: {
96
+ name: this.agent.name,
97
+ instructions: this.agent.instructions,
98
+ tools: (this.agent.tools || []).map((t) => ({
99
+ name: t.name,
100
+ description: t.description,
101
+ parameters: t.parameters
102
+ }))
103
+ }
104
+ }));
105
+ this.mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
106
+ this.mediaRecorder = new MediaRecorder(this.mediaStream, {
107
+ mimeType: MediaRecorder.isTypeSupported("audio/webm;codecs=opus") ? "audio/webm;codecs=opus" : "audio/webm"
108
+ });
109
+ this.mediaRecorder.ondataavailable = (e) => {
110
+ if (e.data.size > 0 && this.ws?.readyState === WebSocket.OPEN) {
111
+ this.ws.send(e.data);
112
+ }
113
+ };
114
+ this.mediaRecorder.start(250);
115
+ this.emit("status_change", "CONNECTED");
116
+ }
117
+ async disconnect() {
118
+ this.mediaRecorder?.stop();
119
+ this.mediaStream?.getTracks().forEach((t) => t.stop());
120
+ this.mediaRecorder = null;
121
+ this.mediaStream = null;
122
+ if (this.ws) {
123
+ this.ws.close();
124
+ this.ws = null;
125
+ }
126
+ this.removeAllListeners();
127
+ }
128
+ sendMessage(text) {
129
+ this.ws?.send(JSON.stringify({ type: "user_message", text }));
130
+ }
131
+ interrupt() {
132
+ this.ws?.send(JSON.stringify({ type: "interrupt" }));
133
+ }
134
+ mute(muted) {
135
+ this.mediaStream?.getAudioTracks().forEach((t) => {
136
+ t.enabled = !muted;
137
+ });
138
+ }
139
+ sendRawEvent(event) {
140
+ this.ws?.send(JSON.stringify(event));
141
+ }
142
+ handleMessage(msg, audioElement) {
143
+ switch (msg.type) {
144
+ case "user_transcript":
145
+ this.emit("user_transcript", {
146
+ itemId: msg.itemId || msg.id || "",
147
+ delta: msg.delta,
148
+ text: msg.text,
149
+ isFinal: msg.is_final ?? msg.isFinal ?? !!msg.text
150
+ });
151
+ break;
152
+ case "assistant_transcript":
153
+ this.emit("assistant_transcript", {
154
+ itemId: msg.itemId || msg.id || "",
155
+ delta: msg.delta,
156
+ text: msg.text,
157
+ isFinal: msg.is_final ?? msg.isFinal ?? !!msg.text
158
+ });
159
+ break;
160
+ case "audio":
161
+ if (msg.data && audioElement) {
162
+ this.emit("audio_delta", msg.itemId || "", msg.data);
163
+ }
164
+ break;
165
+ case "tool_call_start":
166
+ this.emit("tool_call_start", msg.name, msg.input);
167
+ break;
168
+ case "tool_call_end":
169
+ this.emit("tool_call_end", msg.name, msg.input, msg.output);
170
+ break;
171
+ case "speech_started":
172
+ this.emit("user_speech_started");
173
+ break;
174
+ case "error":
175
+ this.emit("error", new Error(msg.message || "Deepgram error"));
176
+ break;
177
+ default:
178
+ this.emit("raw_event", msg);
179
+ break;
180
+ }
181
+ }
182
+ };
183
+ function deepgram(options) {
184
+ return {
185
+ name: "deepgram",
186
+ createSession(agent, sessionOpts) {
187
+ return new DeepgramSession(agent, options.agentUrl, { ...options, ...sessionOpts });
188
+ }
189
+ };
190
+ }
191
+ function deepgramServer(config = {}) {
192
+ const getSessionToken = async (overrides = {}) => {
193
+ const merged = { ...config, ...overrides };
194
+ const apiKey = merged.apiKey || process.env.DEEPGRAM_API_KEY;
195
+ if (!apiKey) return { error: "Deepgram API key not configured" };
196
+ return { token: apiKey };
197
+ };
198
+ return {
199
+ getSessionToken,
200
+ createSessionHandler(overrides) {
201
+ return async (_request) => {
202
+ const result = await getSessionToken(overrides);
203
+ if (result.error) {
204
+ return Response.json({ error: result.error }, { status: 500 });
205
+ }
206
+ return Response.json({ ephemeralKey: result.token });
207
+ };
208
+ }
209
+ };
210
+ }
211
+ var deepgram_default = deepgram;
212
+ // Annotate the CommonJS export names for ESM import in node:
213
+ 0 && (module.exports = {
214
+ deepgram,
215
+ deepgramServer
216
+ });