@jchaffin/voicekit 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,162 @@
1
+ import {
2
+ EventEmitter
3
+ } from "../chunk-22WLZIXO.mjs";
4
+
5
+ // src/adapters/deepgram.ts
6
+ var DeepgramSession = class extends EventEmitter {
7
+ constructor(agent, agentUrl, options) {
8
+ super();
9
+ this.ws = null;
10
+ this.mediaStream = null;
11
+ this.mediaRecorder = null;
12
+ this.agent = agent;
13
+ this.agentUrl = agentUrl;
14
+ this.options = options;
15
+ }
16
+ async connect(config) {
17
+ const url = new URL(this.agentUrl);
18
+ url.searchParams.set("token", config.authToken);
19
+ if (this.options.model) url.searchParams.set("model", this.options.model);
20
+ if (this.options.language) url.searchParams.set("language", this.options.language);
21
+ this.ws = new WebSocket(url.toString());
22
+ await new Promise((resolve, reject) => {
23
+ const ws = this.ws;
24
+ ws.onopen = () => resolve();
25
+ ws.onerror = (e) => reject(new Error("WebSocket connection failed"));
26
+ ws.onclose = () => this.emit("status_change", "DISCONNECTED");
27
+ });
28
+ this.ws.onmessage = (event) => {
29
+ try {
30
+ const msg = JSON.parse(event.data);
31
+ this.handleMessage(msg, config.audioElement);
32
+ } catch {
33
+ this.emit("raw_event", event.data);
34
+ }
35
+ };
36
+ this.ws.onerror = () => {
37
+ this.emit("error", new Error("Deepgram WebSocket error"));
38
+ };
39
+ this.ws.send(JSON.stringify({
40
+ type: "agent_config",
41
+ agent: {
42
+ name: this.agent.name,
43
+ instructions: this.agent.instructions,
44
+ tools: (this.agent.tools || []).map((t) => ({
45
+ name: t.name,
46
+ description: t.description,
47
+ parameters: t.parameters
48
+ }))
49
+ }
50
+ }));
51
+ this.mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
52
+ this.mediaRecorder = new MediaRecorder(this.mediaStream, {
53
+ mimeType: MediaRecorder.isTypeSupported("audio/webm;codecs=opus") ? "audio/webm;codecs=opus" : "audio/webm"
54
+ });
55
+ this.mediaRecorder.ondataavailable = (e) => {
56
+ if (e.data.size > 0 && this.ws?.readyState === WebSocket.OPEN) {
57
+ this.ws.send(e.data);
58
+ }
59
+ };
60
+ this.mediaRecorder.start(250);
61
+ this.emit("status_change", "CONNECTED");
62
+ }
63
+ async disconnect() {
64
+ this.mediaRecorder?.stop();
65
+ this.mediaStream?.getTracks().forEach((t) => t.stop());
66
+ this.mediaRecorder = null;
67
+ this.mediaStream = null;
68
+ if (this.ws) {
69
+ this.ws.close();
70
+ this.ws = null;
71
+ }
72
+ this.removeAllListeners();
73
+ }
74
+ sendMessage(text) {
75
+ this.ws?.send(JSON.stringify({ type: "user_message", text }));
76
+ }
77
+ interrupt() {
78
+ this.ws?.send(JSON.stringify({ type: "interrupt" }));
79
+ }
80
+ mute(muted) {
81
+ this.mediaStream?.getAudioTracks().forEach((t) => {
82
+ t.enabled = !muted;
83
+ });
84
+ }
85
+ sendRawEvent(event) {
86
+ this.ws?.send(JSON.stringify(event));
87
+ }
88
+ handleMessage(msg, audioElement) {
89
+ switch (msg.type) {
90
+ case "user_transcript":
91
+ this.emit("user_transcript", {
92
+ itemId: msg.itemId || msg.id || "",
93
+ delta: msg.delta,
94
+ text: msg.text,
95
+ isFinal: msg.is_final ?? msg.isFinal ?? !!msg.text
96
+ });
97
+ break;
98
+ case "assistant_transcript":
99
+ this.emit("assistant_transcript", {
100
+ itemId: msg.itemId || msg.id || "",
101
+ delta: msg.delta,
102
+ text: msg.text,
103
+ isFinal: msg.is_final ?? msg.isFinal ?? !!msg.text
104
+ });
105
+ break;
106
+ case "audio":
107
+ if (msg.data && audioElement) {
108
+ this.emit("audio_delta", msg.itemId || "", msg.data);
109
+ }
110
+ break;
111
+ case "tool_call_start":
112
+ this.emit("tool_call_start", msg.name, msg.input);
113
+ break;
114
+ case "tool_call_end":
115
+ this.emit("tool_call_end", msg.name, msg.input, msg.output);
116
+ break;
117
+ case "speech_started":
118
+ this.emit("user_speech_started");
119
+ break;
120
+ case "error":
121
+ this.emit("error", new Error(msg.message || "Deepgram error"));
122
+ break;
123
+ default:
124
+ this.emit("raw_event", msg);
125
+ break;
126
+ }
127
+ }
128
+ };
129
+ function deepgram(options) {
130
+ return {
131
+ name: "deepgram",
132
+ createSession(agent, sessionOpts) {
133
+ return new DeepgramSession(agent, options.agentUrl, { ...options, ...sessionOpts });
134
+ }
135
+ };
136
+ }
137
+ function deepgramServer(config = {}) {
138
+ const getSessionToken = async (overrides = {}) => {
139
+ const merged = { ...config, ...overrides };
140
+ const apiKey = merged.apiKey || process.env.DEEPGRAM_API_KEY;
141
+ if (!apiKey) return { error: "Deepgram API key not configured" };
142
+ return { token: apiKey };
143
+ };
144
+ return {
145
+ getSessionToken,
146
+ createSessionHandler(overrides) {
147
+ return async (_request) => {
148
+ const result = await getSessionToken(overrides);
149
+ if (result.error) {
150
+ return Response.json({ error: result.error }, { status: 500 });
151
+ }
152
+ return Response.json({ ephemeralKey: result.token });
153
+ };
154
+ }
155
+ };
156
+ }
157
+ var deepgram_default = deepgram;
158
+ export {
159
+ deepgram,
160
+ deepgramServer,
161
+ deepgram_default as default
162
+ };
@@ -0,0 +1,41 @@
1
+ import { i as SessionOptions, g as ServerSessionConfig, e as VoiceAdapter, S as ServerAdapter } from '../types-DY31oVB1.mjs';
2
+
3
+ /**
4
+ * ElevenLabs Conversational AI adapter for VoiceKit.
5
+ *
6
+ * Uses ElevenLabs' WebSocket-based Conversational AI API.
7
+ *
8
+ * Usage:
9
+ * ```ts
10
+ * import { elevenlabs } from '@jchaffin/voicekit/elevenlabs';
11
+ *
12
+ * <VoiceProvider
13
+ * adapter={elevenlabs({ agentId: 'your-agent-id' })}
14
+ * agent={agent}
15
+ * />
16
+ * ```
17
+ *
18
+ * The session endpoint should return `{ ephemeralKey: "<signed_url>" }`.
19
+ * For public agents, pass the agent_id directly.
20
+ */
21
+
22
+ interface ElevenLabsAdapterOptions extends SessionOptions {
23
+ /** ElevenLabs agent ID */
24
+ agentId: string;
25
+ }
26
+ /**
27
+ * Create an ElevenLabs Conversational AI adapter.
28
+ *
29
+ * ```ts
30
+ * import { elevenlabs } from '@jchaffin/voicekit/elevenlabs';
31
+ * <VoiceProvider adapter={elevenlabs({ agentId: '...' })} agent={agent} />
32
+ * ```
33
+ */
34
+ declare function elevenlabs(options: ElevenLabsAdapterOptions): VoiceAdapter;
35
+ interface ElevenLabsServerConfig extends ServerSessionConfig {
36
+ apiKey?: string;
37
+ agentId?: string;
38
+ }
39
+ declare function elevenlabsServer(config?: ElevenLabsServerConfig): ServerAdapter;
40
+
41
+ export { type ElevenLabsAdapterOptions, type ElevenLabsServerConfig, elevenlabs as default, elevenlabs, elevenlabsServer };
@@ -0,0 +1,41 @@
1
+ import { i as SessionOptions, g as ServerSessionConfig, e as VoiceAdapter, S as ServerAdapter } from '../types-DY31oVB1.js';
2
+
3
+ /**
4
+ * ElevenLabs Conversational AI adapter for VoiceKit.
5
+ *
6
+ * Uses ElevenLabs' WebSocket-based Conversational AI API.
7
+ *
8
+ * Usage:
9
+ * ```ts
10
+ * import { elevenlabs } from '@jchaffin/voicekit/elevenlabs';
11
+ *
12
+ * <VoiceProvider
13
+ * adapter={elevenlabs({ agentId: 'your-agent-id' })}
14
+ * agent={agent}
15
+ * />
16
+ * ```
17
+ *
18
+ * The session endpoint should return `{ ephemeralKey: "<signed_url>" }`.
19
+ * For public agents, pass the agent_id directly.
20
+ */
21
+
22
+ interface ElevenLabsAdapterOptions extends SessionOptions {
23
+ /** ElevenLabs agent ID */
24
+ agentId: string;
25
+ }
26
+ /**
27
+ * Create an ElevenLabs Conversational AI adapter.
28
+ *
29
+ * ```ts
30
+ * import { elevenlabs } from '@jchaffin/voicekit/elevenlabs';
31
+ * <VoiceProvider adapter={elevenlabs({ agentId: '...' })} agent={agent} />
32
+ * ```
33
+ */
34
+ declare function elevenlabs(options: ElevenLabsAdapterOptions): VoiceAdapter;
35
+ interface ElevenLabsServerConfig extends ServerSessionConfig {
36
+ apiKey?: string;
37
+ agentId?: string;
38
+ }
39
+ declare function elevenlabsServer(config?: ElevenLabsServerConfig): ServerAdapter;
40
+
41
+ export { type ElevenLabsAdapterOptions, type ElevenLabsServerConfig, elevenlabs as default, elevenlabs, elevenlabsServer };
@@ -0,0 +1,304 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/adapters/elevenlabs.ts
21
+ var elevenlabs_exports = {};
22
+ __export(elevenlabs_exports, {
23
+ default: () => elevenlabs_default,
24
+ elevenlabs: () => elevenlabs,
25
+ elevenlabsServer: () => elevenlabsServer
26
+ });
27
+ module.exports = __toCommonJS(elevenlabs_exports);
28
+
29
+ // src/core/EventEmitter.ts
30
+ var EventEmitter = class {
31
+ constructor() {
32
+ this.handlers = /* @__PURE__ */ new Map();
33
+ }
34
+ on(event, handler) {
35
+ let set = this.handlers.get(event);
36
+ if (!set) {
37
+ set = /* @__PURE__ */ new Set();
38
+ this.handlers.set(event, set);
39
+ }
40
+ set.add(handler);
41
+ }
42
+ off(event, handler) {
43
+ this.handlers.get(event)?.delete(handler);
44
+ }
45
+ emit(event, ...args) {
46
+ this.handlers.get(event)?.forEach((fn) => {
47
+ try {
48
+ fn(...args);
49
+ } catch (e) {
50
+ console.error(`EventEmitter error in "${event}":`, e);
51
+ }
52
+ });
53
+ }
54
+ removeAllListeners() {
55
+ this.handlers.clear();
56
+ }
57
+ };
58
+
59
+ // src/adapters/elevenlabs.ts
60
+ var ELEVENLABS_WS_BASE = "wss://api.elevenlabs.io/v1/convai/conversation";
61
+ var ElevenLabsSession = class extends EventEmitter {
62
+ constructor(agent, agentId, options) {
63
+ super();
64
+ this.ws = null;
65
+ this.mediaStream = null;
66
+ this.audioContext = null;
67
+ this.scriptProcessor = null;
68
+ this.playbackCtx = null;
69
+ this.agent = agent;
70
+ this.agentId = agentId;
71
+ this.options = options;
72
+ }
73
+ async connect(config) {
74
+ const wsUrl = config.authToken?.startsWith("wss://") ? config.authToken : `${ELEVENLABS_WS_BASE}?agent_id=${this.agentId}`;
75
+ this.ws = new WebSocket(wsUrl);
76
+ await new Promise((resolve, reject) => {
77
+ const ws = this.ws;
78
+ ws.onopen = () => resolve();
79
+ ws.onerror = () => reject(new Error("ElevenLabs WebSocket connection failed"));
80
+ });
81
+ this.ws.onmessage = (event) => {
82
+ try {
83
+ const msg = JSON.parse(event.data);
84
+ this.handleMessage(msg, config.audioElement);
85
+ } catch {
86
+ this.emit("raw_event", event.data);
87
+ }
88
+ };
89
+ this.ws.onclose = () => {
90
+ this.emit("status_change", "DISCONNECTED");
91
+ };
92
+ this.ws.onerror = () => {
93
+ this.emit("error", new Error("ElevenLabs WebSocket error"));
94
+ };
95
+ if (this.agent.instructions) {
96
+ this.ws.send(JSON.stringify({
97
+ type: "contextual_update",
98
+ text: this.agent.instructions
99
+ }));
100
+ }
101
+ this.mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
102
+ this.audioContext = new AudioContext({ sampleRate: 16e3 });
103
+ const source = this.audioContext.createMediaStreamSource(this.mediaStream);
104
+ this.scriptProcessor = this.audioContext.createScriptProcessor(4096, 1, 1);
105
+ this.scriptProcessor.onaudioprocess = (e) => {
106
+ if (this.ws?.readyState !== WebSocket.OPEN) return;
107
+ const input = e.inputBuffer.getChannelData(0);
108
+ const pcm16 = new Int16Array(input.length);
109
+ for (let i = 0; i < input.length; i++) {
110
+ const s = Math.max(-1, Math.min(1, input[i]));
111
+ pcm16[i] = s < 0 ? s * 32768 : s * 32767;
112
+ }
113
+ const bytes = new Uint8Array(pcm16.buffer);
114
+ let binary = "";
115
+ for (let j = 0; j < bytes.length; j++) binary += String.fromCharCode(bytes[j]);
116
+ const base64 = btoa(binary);
117
+ this.ws.send(JSON.stringify({ user_audio_chunk: base64 }));
118
+ };
119
+ source.connect(this.scriptProcessor);
120
+ this.scriptProcessor.connect(this.audioContext.destination);
121
+ this.emit("status_change", "CONNECTED");
122
+ }
123
+ async disconnect() {
124
+ this.scriptProcessor?.disconnect();
125
+ this.audioContext?.close();
126
+ this.mediaStream?.getTracks().forEach((t) => t.stop());
127
+ this.playbackCtx?.close();
128
+ this.scriptProcessor = null;
129
+ this.audioContext = null;
130
+ this.mediaStream = null;
131
+ this.playbackCtx = null;
132
+ if (this.ws) {
133
+ this.ws.close();
134
+ this.ws = null;
135
+ }
136
+ this.removeAllListeners();
137
+ }
138
+ sendMessage(text) {
139
+ this.ws?.send(JSON.stringify({ type: "user_message", text }));
140
+ }
141
+ interrupt() {
142
+ this.ws?.send(JSON.stringify({ type: "interrupt" }));
143
+ }
144
+ mute(muted) {
145
+ this.mediaStream?.getAudioTracks().forEach((t) => {
146
+ t.enabled = !muted;
147
+ });
148
+ }
149
+ sendRawEvent(event) {
150
+ this.ws?.send(JSON.stringify(event));
151
+ }
152
+ handleMessage(msg, audioElement) {
153
+ switch (msg.type) {
154
+ case "user_transcript":
155
+ this.emit("user_transcript", {
156
+ itemId: msg.id || "",
157
+ text: msg.user_transcript_event?.user_transcript || msg.text || "",
158
+ isFinal: msg.user_transcript_event?.is_final ?? true
159
+ });
160
+ break;
161
+ case "agent_response":
162
+ this.emit("assistant_transcript", {
163
+ itemId: msg.id || "",
164
+ delta: msg.agent_response_event?.agent_response || msg.delta || "",
165
+ isFinal: false
166
+ });
167
+ break;
168
+ case "agent_response_correction":
169
+ this.emit("assistant_transcript", {
170
+ itemId: msg.id || "",
171
+ text: msg.agent_response_correction_event?.corrected_text || "",
172
+ isFinal: true
173
+ });
174
+ break;
175
+ case "audio": {
176
+ const audioData = msg.audio_event?.audio_base_64 || msg.audio;
177
+ if (audioData) {
178
+ this.emit("audio_delta", msg.id || "", audioData);
179
+ this.playAudioChunk(audioData, audioElement);
180
+ }
181
+ break;
182
+ }
183
+ case "client_tool_call":
184
+ this.emit("tool_call_start", msg.client_tool_call?.tool_name || "", msg.client_tool_call?.parameters);
185
+ this.executeToolCall(msg);
186
+ break;
187
+ case "vad":
188
+ if (msg.vad_event?.type === "SPEECH_START") {
189
+ this.emit("user_speech_started");
190
+ }
191
+ break;
192
+ case "error":
193
+ this.emit("error", new Error(msg.message || msg.error || "ElevenLabs error"));
194
+ break;
195
+ default:
196
+ this.emit("raw_event", msg);
197
+ break;
198
+ }
199
+ }
200
+ async executeToolCall(msg) {
201
+ const toolCall = msg.client_tool_call;
202
+ if (!toolCall) return;
203
+ const toolDef = this.agent.tools?.find((t) => t.name === toolCall.tool_name);
204
+ if (!toolDef) {
205
+ this.ws?.send(JSON.stringify({
206
+ type: "client_tool_result",
207
+ tool_call_id: toolCall.tool_call_id,
208
+ result: JSON.stringify({ error: `Tool ${toolCall.tool_name} not found` })
209
+ }));
210
+ return;
211
+ }
212
+ try {
213
+ const result = await toolDef.execute(toolCall.parameters || {});
214
+ this.emit("tool_call_end", toolCall.tool_name, toolCall.parameters, result);
215
+ this.ws?.send(JSON.stringify({
216
+ type: "client_tool_result",
217
+ tool_call_id: toolCall.tool_call_id,
218
+ result: JSON.stringify(result)
219
+ }));
220
+ } catch (err) {
221
+ const errorResult = { error: String(err) };
222
+ this.emit("tool_call_end", toolCall.tool_name, toolCall.parameters, errorResult);
223
+ this.ws?.send(JSON.stringify({
224
+ type: "client_tool_result",
225
+ tool_call_id: toolCall.tool_call_id,
226
+ result: JSON.stringify(errorResult)
227
+ }));
228
+ }
229
+ }
230
+ playAudioChunk(base64, audioElement) {
231
+ if (!audioElement) return;
232
+ try {
233
+ if (!this.playbackCtx) {
234
+ this.playbackCtx = new AudioContext({ sampleRate: 22050 });
235
+ }
236
+ const binary = atob(base64);
237
+ const bytes = new Uint8Array(binary.length);
238
+ for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
239
+ const samples = new Float32Array(bytes.length / 2);
240
+ const view = new DataView(bytes.buffer);
241
+ for (let i = 0; i < samples.length; i++) {
242
+ samples[i] = view.getInt16(i * 2, true) / 32768;
243
+ }
244
+ const buffer = this.playbackCtx.createBuffer(1, samples.length, 22050);
245
+ buffer.copyToChannel(samples, 0);
246
+ const src = this.playbackCtx.createBufferSource();
247
+ src.buffer = buffer;
248
+ src.connect(this.playbackCtx.destination);
249
+ src.start();
250
+ } catch {
251
+ }
252
+ }
253
+ };
254
+ function elevenlabs(options) {
255
+ return {
256
+ name: "elevenlabs",
257
+ createSession(agent, sessionOpts) {
258
+ return new ElevenLabsSession(agent, options.agentId, { ...options, ...sessionOpts });
259
+ }
260
+ };
261
+ }
262
+ function elevenlabsServer(config = {}) {
263
+ const getSessionToken = async (overrides = {}) => {
264
+ const merged = { ...config, ...overrides };
265
+ const apiKey = merged.apiKey || process.env.ELEVENLABS_API_KEY;
266
+ const agentId = merged.agentId || process.env.ELEVENLABS_AGENT_ID;
267
+ if (!apiKey) return { error: "ElevenLabs API key not configured" };
268
+ if (!agentId) return { error: "ElevenLabs agent ID not configured" };
269
+ try {
270
+ const res = await fetch(
271
+ `https://api.elevenlabs.io/v1/convai/conversation/get_signed_url?agent_id=${agentId}`,
272
+ {
273
+ method: "GET",
274
+ headers: { "xi-api-key": apiKey }
275
+ }
276
+ );
277
+ if (!res.ok) {
278
+ return { error: `ElevenLabs API error: ${res.status}` };
279
+ }
280
+ const data = await res.json();
281
+ return { token: data.signed_url || "" };
282
+ } catch (err) {
283
+ return { error: String(err) };
284
+ }
285
+ };
286
+ return {
287
+ getSessionToken,
288
+ createSessionHandler(overrides) {
289
+ return async (_request) => {
290
+ const result = await getSessionToken(overrides);
291
+ if (result.error) {
292
+ return Response.json({ error: result.error }, { status: 500 });
293
+ }
294
+ return Response.json({ ephemeralKey: result.token });
295
+ };
296
+ }
297
+ };
298
+ }
299
+ var elevenlabs_default = elevenlabs;
300
+ // Annotate the CommonJS export names for ESM import in node:
301
+ 0 && (module.exports = {
302
+ elevenlabs,
303
+ elevenlabsServer
304
+ });