@jchaffin/voicekit 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +369 -0
- package/dist/adapters/deepgram.d.mts +43 -0
- package/dist/adapters/deepgram.d.ts +43 -0
- package/dist/adapters/deepgram.js +216 -0
- package/dist/adapters/deepgram.mjs +162 -0
- package/dist/adapters/elevenlabs.d.mts +41 -0
- package/dist/adapters/elevenlabs.d.ts +41 -0
- package/dist/adapters/elevenlabs.js +304 -0
- package/dist/adapters/elevenlabs.mjs +250 -0
- package/dist/adapters/livekit.d.mts +44 -0
- package/dist/adapters/livekit.d.ts +44 -0
- package/dist/adapters/livekit.js +225 -0
- package/dist/adapters/livekit.mjs +161 -0
- package/dist/adapters/openai.d.mts +41 -0
- package/dist/adapters/openai.d.ts +41 -0
- package/dist/adapters/openai.js +350 -0
- package/dist/adapters/openai.mjs +294 -0
- package/dist/chunk-22WLZIXO.mjs +33 -0
- package/dist/chunk-T3II3DRG.mjs +178 -0
- package/dist/chunk-UZ2VGPZD.mjs +33 -0
- package/dist/chunk-Y6FXYEAI.mjs +10 -0
- package/dist/index.d.mts +693 -0
- package/dist/index.d.ts +693 -0
- package/dist/index.js +1838 -0
- package/dist/index.mjs +1593 -0
- package/dist/server.d.mts +80 -0
- package/dist/server.d.ts +80 -0
- package/dist/server.js +147 -0
- package/dist/server.mjs +119 -0
- package/dist/types-DY31oVB1.d.mts +150 -0
- package/dist/types-DY31oVB1.d.ts +150 -0
- package/dist/types-mThnXW9S.d.mts +150 -0
- package/dist/types-mThnXW9S.d.ts +150 -0
- package/dist/types-uLnzb8NE.d.mts +150 -0
- package/dist/types-uLnzb8NE.d.ts +150 -0
- package/package.json +100 -0
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import {
|
|
2
|
+
EventEmitter
|
|
3
|
+
} from "../chunk-22WLZIXO.mjs";
|
|
4
|
+
|
|
5
|
+
// src/adapters/livekit.ts
|
|
6
|
+
var LiveKitSession = class extends EventEmitter {
|
|
7
|
+
constructor(agent, serverUrl, options) {
|
|
8
|
+
super();
|
|
9
|
+
this.room = null;
|
|
10
|
+
this.agent = agent;
|
|
11
|
+
this.serverUrl = serverUrl;
|
|
12
|
+
this.options = options;
|
|
13
|
+
}
|
|
14
|
+
async connect(config) {
|
|
15
|
+
const { Room, RoomEvent, Track } = await import("livekit-client");
|
|
16
|
+
this.room = new Room();
|
|
17
|
+
this.room.on(RoomEvent.TrackSubscribed, (track, _pub, participant) => {
|
|
18
|
+
if (track.kind === Track.Kind.Audio) {
|
|
19
|
+
const el = config.audioElement || document.createElement("audio");
|
|
20
|
+
track.attach(el);
|
|
21
|
+
if (!config.audioElement) {
|
|
22
|
+
el.autoplay = true;
|
|
23
|
+
el.style.display = "none";
|
|
24
|
+
document.body.appendChild(el);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
});
|
|
28
|
+
this.room.on(RoomEvent.DataReceived, (payload, participant, kind) => {
|
|
29
|
+
try {
|
|
30
|
+
const msg = JSON.parse(new TextDecoder().decode(payload));
|
|
31
|
+
this.handleDataMessage(msg, participant);
|
|
32
|
+
} catch {
|
|
33
|
+
this.emit("raw_event", { payload, participant, kind });
|
|
34
|
+
}
|
|
35
|
+
});
|
|
36
|
+
this.room.on(RoomEvent.Disconnected, () => {
|
|
37
|
+
this.emit("status_change", "DISCONNECTED");
|
|
38
|
+
});
|
|
39
|
+
this.room.on(RoomEvent.Reconnecting, () => {
|
|
40
|
+
this.emit("status_change", "CONNECTING");
|
|
41
|
+
});
|
|
42
|
+
this.room.on(RoomEvent.Reconnected, () => {
|
|
43
|
+
this.emit("status_change", "CONNECTED");
|
|
44
|
+
});
|
|
45
|
+
await this.room.connect(this.serverUrl, config.authToken);
|
|
46
|
+
await this.room.localParticipant.setMicrophoneEnabled(true);
|
|
47
|
+
this.emit("status_change", "CONNECTED");
|
|
48
|
+
}
|
|
49
|
+
async disconnect() {
|
|
50
|
+
if (this.room) {
|
|
51
|
+
await this.room.disconnect();
|
|
52
|
+
this.room = null;
|
|
53
|
+
}
|
|
54
|
+
this.removeAllListeners();
|
|
55
|
+
}
|
|
56
|
+
sendMessage(text) {
|
|
57
|
+
if (!this.room) throw new Error("Not connected");
|
|
58
|
+
const data = new TextEncoder().encode(JSON.stringify({ type: "user_message", text }));
|
|
59
|
+
this.room.localParticipant.publishData(data, { reliable: true });
|
|
60
|
+
}
|
|
61
|
+
interrupt() {
|
|
62
|
+
if (!this.room) return;
|
|
63
|
+
const data = new TextEncoder().encode(JSON.stringify({ type: "interrupt" }));
|
|
64
|
+
this.room.localParticipant.publishData(data, { reliable: true });
|
|
65
|
+
}
|
|
66
|
+
mute(muted) {
|
|
67
|
+
this.room?.localParticipant?.setMicrophoneEnabled(!muted);
|
|
68
|
+
}
|
|
69
|
+
sendRawEvent(event) {
|
|
70
|
+
if (!this.room) return;
|
|
71
|
+
const data = new TextEncoder().encode(JSON.stringify(event));
|
|
72
|
+
this.room.localParticipant.publishData(data, { reliable: true });
|
|
73
|
+
}
|
|
74
|
+
handleDataMessage(msg, _participant) {
|
|
75
|
+
switch (msg.type) {
|
|
76
|
+
case "user_transcript":
|
|
77
|
+
this.emit("user_transcript", {
|
|
78
|
+
itemId: msg.itemId || msg.id || "",
|
|
79
|
+
delta: msg.delta,
|
|
80
|
+
text: msg.text,
|
|
81
|
+
isFinal: msg.isFinal ?? !!msg.text
|
|
82
|
+
});
|
|
83
|
+
break;
|
|
84
|
+
case "assistant_transcript":
|
|
85
|
+
this.emit("assistant_transcript", {
|
|
86
|
+
itemId: msg.itemId || msg.id || "",
|
|
87
|
+
delta: msg.delta,
|
|
88
|
+
text: msg.text,
|
|
89
|
+
isFinal: msg.isFinal ?? !!msg.text
|
|
90
|
+
});
|
|
91
|
+
break;
|
|
92
|
+
case "tool_call_start":
|
|
93
|
+
this.emit("tool_call_start", msg.name, msg.input);
|
|
94
|
+
break;
|
|
95
|
+
case "tool_call_end":
|
|
96
|
+
this.emit("tool_call_end", msg.name, msg.input, msg.output);
|
|
97
|
+
break;
|
|
98
|
+
case "agent_handoff":
|
|
99
|
+
this.emit("agent_handoff", msg.from || "", msg.to || "");
|
|
100
|
+
break;
|
|
101
|
+
case "error":
|
|
102
|
+
this.emit("error", new Error(msg.message || "LiveKit error"));
|
|
103
|
+
break;
|
|
104
|
+
case "speech_started":
|
|
105
|
+
this.emit("user_speech_started");
|
|
106
|
+
break;
|
|
107
|
+
default:
|
|
108
|
+
this.emit("raw_event", msg);
|
|
109
|
+
break;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
};
|
|
113
|
+
function livekit(options) {
|
|
114
|
+
return {
|
|
115
|
+
name: "livekit",
|
|
116
|
+
createSession(agent, sessionOpts) {
|
|
117
|
+
return new LiveKitSession(agent, options.serverUrl, { ...options, ...sessionOpts });
|
|
118
|
+
}
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
function livekitServer(config = {}) {
|
|
122
|
+
const getSessionToken = async (overrides = {}) => {
|
|
123
|
+
const merged = { ...config, ...overrides };
|
|
124
|
+
const apiKey = merged.apiKey || process.env.LIVEKIT_API_KEY;
|
|
125
|
+
const apiSecret = merged.apiSecret || process.env.LIVEKIT_API_SECRET;
|
|
126
|
+
if (!apiKey || !apiSecret) {
|
|
127
|
+
return { error: "LiveKit API key and secret are required" };
|
|
128
|
+
}
|
|
129
|
+
try {
|
|
130
|
+
const { AccessToken } = await import("livekit-server-sdk");
|
|
131
|
+
const roomName = merged.roomName || `room-${Date.now()}`;
|
|
132
|
+
const identity = merged.identity || `user-${Date.now()}`;
|
|
133
|
+
const at = new AccessToken(apiKey, apiSecret, {
|
|
134
|
+
identity,
|
|
135
|
+
ttl: (merged.ttl || 600).toString() + "s"
|
|
136
|
+
});
|
|
137
|
+
at.addGrant({ roomJoin: true, room: roomName });
|
|
138
|
+
return { token: await at.toJwt() };
|
|
139
|
+
} catch (err) {
|
|
140
|
+
return { error: String(err) };
|
|
141
|
+
}
|
|
142
|
+
};
|
|
143
|
+
return {
|
|
144
|
+
getSessionToken,
|
|
145
|
+
createSessionHandler(overrides) {
|
|
146
|
+
return async (_request) => {
|
|
147
|
+
const result = await getSessionToken(overrides);
|
|
148
|
+
if (result.error) {
|
|
149
|
+
return Response.json({ error: result.error }, { status: 500 });
|
|
150
|
+
}
|
|
151
|
+
return Response.json({ ephemeralKey: result.token });
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
var livekit_default = livekit;
|
|
157
|
+
export {
|
|
158
|
+
livekit_default as default,
|
|
159
|
+
livekit,
|
|
160
|
+
livekitServer
|
|
161
|
+
};
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { i as SessionOptions, g as ServerSessionConfig, e as VoiceAdapter, S as ServerAdapter } from '../types-DY31oVB1.mjs';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* OpenAI Realtime API adapter for VoiceKit.
|
|
5
|
+
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* ```ts
|
|
8
|
+
* import { openai } from '@jchaffin/voicekit/openai';
|
|
9
|
+
*
|
|
10
|
+
* <VoiceProvider adapter={openai({ model: 'gpt-realtime' })} agent={agent}>
|
|
11
|
+
* ```
|
|
12
|
+
*
|
|
13
|
+
* Peer dependency: @openai/agents (>= 0.0.15)
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
interface OpenAIAdapterOptions extends SessionOptions {
|
|
17
|
+
model?: string;
|
|
18
|
+
language?: string;
|
|
19
|
+
codec?: string;
|
|
20
|
+
voice?: string;
|
|
21
|
+
transcriptionModel?: string;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Create an OpenAI Realtime adapter.
|
|
25
|
+
*
|
|
26
|
+
* ```ts
|
|
27
|
+
* import { openai } from '@jchaffin/voicekit/openai';
|
|
28
|
+
* <VoiceProvider adapter={openai()} agent={agent} />
|
|
29
|
+
* ```
|
|
30
|
+
*/
|
|
31
|
+
declare function openai(options?: OpenAIAdapterOptions): VoiceAdapter;
|
|
32
|
+
interface OpenAIServerConfig extends ServerSessionConfig {
|
|
33
|
+
apiKey?: string;
|
|
34
|
+
model?: string;
|
|
35
|
+
voice?: string;
|
|
36
|
+
instructions?: string;
|
|
37
|
+
expiresIn?: number;
|
|
38
|
+
}
|
|
39
|
+
declare function openaiServer(config?: OpenAIServerConfig): ServerAdapter;
|
|
40
|
+
|
|
41
|
+
export { type OpenAIAdapterOptions, type OpenAIServerConfig, openai as default, openai, openaiServer };
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { i as SessionOptions, g as ServerSessionConfig, e as VoiceAdapter, S as ServerAdapter } from '../types-DY31oVB1.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* OpenAI Realtime API adapter for VoiceKit.
|
|
5
|
+
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* ```ts
|
|
8
|
+
* import { openai } from '@jchaffin/voicekit/openai';
|
|
9
|
+
*
|
|
10
|
+
* <VoiceProvider adapter={openai({ model: 'gpt-realtime' })} agent={agent}>
|
|
11
|
+
* ```
|
|
12
|
+
*
|
|
13
|
+
* Peer dependency: @openai/agents (>= 0.0.15)
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
interface OpenAIAdapterOptions extends SessionOptions {
|
|
17
|
+
model?: string;
|
|
18
|
+
language?: string;
|
|
19
|
+
codec?: string;
|
|
20
|
+
voice?: string;
|
|
21
|
+
transcriptionModel?: string;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Create an OpenAI Realtime adapter.
|
|
25
|
+
*
|
|
26
|
+
* ```ts
|
|
27
|
+
* import { openai } from '@jchaffin/voicekit/openai';
|
|
28
|
+
* <VoiceProvider adapter={openai()} agent={agent} />
|
|
29
|
+
* ```
|
|
30
|
+
*/
|
|
31
|
+
declare function openai(options?: OpenAIAdapterOptions): VoiceAdapter;
|
|
32
|
+
interface OpenAIServerConfig extends ServerSessionConfig {
|
|
33
|
+
apiKey?: string;
|
|
34
|
+
model?: string;
|
|
35
|
+
voice?: string;
|
|
36
|
+
instructions?: string;
|
|
37
|
+
expiresIn?: number;
|
|
38
|
+
}
|
|
39
|
+
declare function openaiServer(config?: OpenAIServerConfig): ServerAdapter;
|
|
40
|
+
|
|
41
|
+
export { type OpenAIAdapterOptions, type OpenAIServerConfig, openai as default, openai, openaiServer };
|
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// src/adapters/openai.ts
|
|
21
|
+
var openai_exports = {};
|
|
22
|
+
__export(openai_exports, {
|
|
23
|
+
default: () => openai_default,
|
|
24
|
+
openai: () => openai,
|
|
25
|
+
openaiServer: () => openaiServer
|
|
26
|
+
});
|
|
27
|
+
module.exports = __toCommonJS(openai_exports);
|
|
28
|
+
var import_realtime = require("@openai/agents/realtime");
|
|
29
|
+
|
|
30
|
+
// src/core/EventEmitter.ts
|
|
31
|
+
var EventEmitter = class {
|
|
32
|
+
constructor() {
|
|
33
|
+
this.handlers = /* @__PURE__ */ new Map();
|
|
34
|
+
}
|
|
35
|
+
on(event, handler) {
|
|
36
|
+
let set = this.handlers.get(event);
|
|
37
|
+
if (!set) {
|
|
38
|
+
set = /* @__PURE__ */ new Set();
|
|
39
|
+
this.handlers.set(event, set);
|
|
40
|
+
}
|
|
41
|
+
set.add(handler);
|
|
42
|
+
}
|
|
43
|
+
off(event, handler) {
|
|
44
|
+
this.handlers.get(event)?.delete(handler);
|
|
45
|
+
}
|
|
46
|
+
emit(event, ...args) {
|
|
47
|
+
this.handlers.get(event)?.forEach((fn) => {
|
|
48
|
+
try {
|
|
49
|
+
fn(...args);
|
|
50
|
+
} catch (e) {
|
|
51
|
+
console.error(`EventEmitter error in "${event}":`, e);
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
removeAllListeners() {
|
|
56
|
+
this.handlers.clear();
|
|
57
|
+
}
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
// src/tools.ts
|
|
61
|
+
var TOOL_RESULT_EVENT = "voicekit:tool-result";
|
|
62
|
+
function emitToolResult(name, input, result) {
|
|
63
|
+
if (typeof window !== "undefined") {
|
|
64
|
+
window.dispatchEvent(new CustomEvent(TOOL_RESULT_EVENT, {
|
|
65
|
+
detail: { name, input, result, timestamp: Date.now() }
|
|
66
|
+
}));
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// src/adapters/openai.ts
|
|
71
|
+
function convertTool(def) {
|
|
72
|
+
return (0, import_realtime.tool)({
|
|
73
|
+
name: def.name,
|
|
74
|
+
description: def.description,
|
|
75
|
+
parameters: {
|
|
76
|
+
type: "object",
|
|
77
|
+
properties: def.parameters.properties,
|
|
78
|
+
required: def.parameters.required || [],
|
|
79
|
+
additionalProperties: false
|
|
80
|
+
},
|
|
81
|
+
execute: async (input) => {
|
|
82
|
+
try {
|
|
83
|
+
const result = await def.execute(input);
|
|
84
|
+
emitToolResult(def.name, input, result);
|
|
85
|
+
return result;
|
|
86
|
+
} catch (error) {
|
|
87
|
+
const errorResult = { success: false, error: String(error) };
|
|
88
|
+
emitToolResult(def.name, input, errorResult);
|
|
89
|
+
return errorResult;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
var OpenAISession = class extends EventEmitter {
|
|
95
|
+
constructor(agent, options) {
|
|
96
|
+
super();
|
|
97
|
+
this.session = null;
|
|
98
|
+
this.responseInFlight = false;
|
|
99
|
+
this.agent = agent;
|
|
100
|
+
this.options = options;
|
|
101
|
+
}
|
|
102
|
+
async connect(config) {
|
|
103
|
+
const audioElement = config.audioElement;
|
|
104
|
+
this.session = new import_realtime.RealtimeSession(this.agent, {
|
|
105
|
+
transport: new import_realtime.OpenAIRealtimeWebRTC({
|
|
106
|
+
audioElement,
|
|
107
|
+
...this.options.codec === "g711" && {
|
|
108
|
+
changePeerConnection: async (pc) => {
|
|
109
|
+
pc.getTransceivers().forEach((transceiver) => {
|
|
110
|
+
if (transceiver.sender.track?.kind === "audio") {
|
|
111
|
+
transceiver.setCodecPreferences([
|
|
112
|
+
{ mimeType: "audio/PCMU", clockRate: 8e3 },
|
|
113
|
+
{ mimeType: "audio/PCMA", clockRate: 8e3 }
|
|
114
|
+
]);
|
|
115
|
+
}
|
|
116
|
+
});
|
|
117
|
+
return pc;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}),
|
|
121
|
+
model: this.options.model || "gpt-realtime",
|
|
122
|
+
config: {
|
|
123
|
+
inputAudioFormat: this.options.codec === "g711" ? "g711_ulaw" : "pcm16",
|
|
124
|
+
outputAudioFormat: this.options.codec === "g711" ? "g711_ulaw" : "pcm16",
|
|
125
|
+
inputAudioTranscription: {
|
|
126
|
+
model: this.options.transcriptionModel || "gpt-4o-transcribe",
|
|
127
|
+
language: this.options.language || "en"
|
|
128
|
+
}
|
|
129
|
+
},
|
|
130
|
+
outputGuardrails: config.outputGuardrails ?? [],
|
|
131
|
+
context: config.context ?? {}
|
|
132
|
+
});
|
|
133
|
+
this.wireEvents(this.session);
|
|
134
|
+
await this.session.connect({ apiKey: config.authToken });
|
|
135
|
+
this.emit("status_change", "CONNECTED");
|
|
136
|
+
}
|
|
137
|
+
async disconnect() {
|
|
138
|
+
if (this.session) {
|
|
139
|
+
try {
|
|
140
|
+
await this.session.close();
|
|
141
|
+
} catch {
|
|
142
|
+
}
|
|
143
|
+
this.session = null;
|
|
144
|
+
}
|
|
145
|
+
this.removeAllListeners();
|
|
146
|
+
this.emit("status_change", "DISCONNECTED");
|
|
147
|
+
}
|
|
148
|
+
async sendMessage(text) {
|
|
149
|
+
if (!this.session) throw new Error("Session not connected");
|
|
150
|
+
if (this.responseInFlight) {
|
|
151
|
+
this.session.interrupt();
|
|
152
|
+
await new Promise((resolve) => {
|
|
153
|
+
const onDone = (event) => {
|
|
154
|
+
if (event.type === "response.done" || event.type === "response.cancelled") {
|
|
155
|
+
this.off("raw_event", onDone);
|
|
156
|
+
resolve();
|
|
157
|
+
}
|
|
158
|
+
};
|
|
159
|
+
this.on("raw_event", onDone);
|
|
160
|
+
setTimeout(resolve, 1500);
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
this.session.sendMessage(text);
|
|
164
|
+
}
|
|
165
|
+
interrupt() {
|
|
166
|
+
this.session?.interrupt();
|
|
167
|
+
}
|
|
168
|
+
mute(muted) {
|
|
169
|
+
this.session?.mute(muted);
|
|
170
|
+
}
|
|
171
|
+
sendRawEvent(event) {
|
|
172
|
+
this.session?.transport.sendEvent(event);
|
|
173
|
+
}
|
|
174
|
+
// Map OpenAI SDK events -> normalized SessionEvents
|
|
175
|
+
wireEvents(session) {
|
|
176
|
+
session.on("transport_event", (event) => {
|
|
177
|
+
const type = event.type;
|
|
178
|
+
switch (type) {
|
|
179
|
+
case "input_audio_buffer.speech_started":
|
|
180
|
+
this.emit("user_speech_started");
|
|
181
|
+
break;
|
|
182
|
+
case "conversation.item.input_audio_transcription.delta":
|
|
183
|
+
this.emit("user_transcript", {
|
|
184
|
+
itemId: event.item_id,
|
|
185
|
+
delta: event.delta || "",
|
|
186
|
+
isFinal: false
|
|
187
|
+
});
|
|
188
|
+
break;
|
|
189
|
+
case "conversation.item.input_audio_transcription.completed":
|
|
190
|
+
this.emit("user_transcript", {
|
|
191
|
+
itemId: event.item_id,
|
|
192
|
+
text: event.transcript || "",
|
|
193
|
+
isFinal: true
|
|
194
|
+
});
|
|
195
|
+
break;
|
|
196
|
+
case "response.audio_transcript.delta":
|
|
197
|
+
case "response.output_audio_transcript.delta":
|
|
198
|
+
this.emit("assistant_transcript", {
|
|
199
|
+
itemId: event.item_id,
|
|
200
|
+
delta: event.delta || "",
|
|
201
|
+
isFinal: false
|
|
202
|
+
});
|
|
203
|
+
break;
|
|
204
|
+
case "response.audio_transcript.done":
|
|
205
|
+
case "response.output_audio_transcript.done":
|
|
206
|
+
this.emit("assistant_transcript", {
|
|
207
|
+
itemId: event.item_id,
|
|
208
|
+
text: event.transcript || "",
|
|
209
|
+
isFinal: true
|
|
210
|
+
});
|
|
211
|
+
break;
|
|
212
|
+
case "response.audio.delta":
|
|
213
|
+
case "response.output_audio.delta":
|
|
214
|
+
this.emit("audio_delta", event.item_id, event.delta);
|
|
215
|
+
break;
|
|
216
|
+
case "response.created":
|
|
217
|
+
this.responseInFlight = true;
|
|
218
|
+
this.emit("raw_event", event);
|
|
219
|
+
break;
|
|
220
|
+
case "response.done":
|
|
221
|
+
this.responseInFlight = false;
|
|
222
|
+
this.emit("raw_event", event);
|
|
223
|
+
break;
|
|
224
|
+
case "conversation.item.truncated":
|
|
225
|
+
this.emit("raw_event", event);
|
|
226
|
+
break;
|
|
227
|
+
default:
|
|
228
|
+
this.emit("raw_event", event);
|
|
229
|
+
break;
|
|
230
|
+
}
|
|
231
|
+
});
|
|
232
|
+
session.on("agent_tool_start", ((...args) => {
|
|
233
|
+
const functionCall = args[2];
|
|
234
|
+
if (functionCall) {
|
|
235
|
+
this.emit("tool_call_start", functionCall.name, functionCall.arguments);
|
|
236
|
+
}
|
|
237
|
+
}));
|
|
238
|
+
session.on("agent_tool_end", ((...args) => {
|
|
239
|
+
const functionCall = args[2];
|
|
240
|
+
const result = args[3];
|
|
241
|
+
if (functionCall) {
|
|
242
|
+
this.emit("tool_call_end", functionCall.name, functionCall.arguments, result);
|
|
243
|
+
}
|
|
244
|
+
}));
|
|
245
|
+
session.on("agent_handoff", ((...args) => {
|
|
246
|
+
const item = args[0];
|
|
247
|
+
const context = item?.context;
|
|
248
|
+
const history = context?.history;
|
|
249
|
+
if (history?.length) {
|
|
250
|
+
const lastMessage = history[history.length - 1];
|
|
251
|
+
const agentName = (lastMessage.name || "").split("transfer_to_").pop() || "";
|
|
252
|
+
this.emit("agent_handoff", "", agentName);
|
|
253
|
+
}
|
|
254
|
+
}));
|
|
255
|
+
session.on("guardrail_tripped", ((...args) => {
|
|
256
|
+
this.emit("guardrail_tripped", args);
|
|
257
|
+
}));
|
|
258
|
+
session.on("history_updated", ((...args) => {
|
|
259
|
+
this.emit("raw_event", { type: "history_updated", items: args[0] });
|
|
260
|
+
}));
|
|
261
|
+
session.on("history_added", ((...args) => {
|
|
262
|
+
this.emit("raw_event", { type: "history_added", item: args[0] });
|
|
263
|
+
}));
|
|
264
|
+
session.on("error", (error) => {
|
|
265
|
+
if (error instanceof Error) {
|
|
266
|
+
this.emit("error", error);
|
|
267
|
+
} else if (error && typeof error === "object") {
|
|
268
|
+
const obj = error;
|
|
269
|
+
const msg = obj.message || obj.error?.message || JSON.stringify(error);
|
|
270
|
+
this.emit("error", new Error(msg));
|
|
271
|
+
} else {
|
|
272
|
+
this.emit("error", new Error(String(error)));
|
|
273
|
+
}
|
|
274
|
+
});
|
|
275
|
+
}
|
|
276
|
+
};
|
|
277
|
+
function openai(options = {}) {
|
|
278
|
+
return {
|
|
279
|
+
name: "openai",
|
|
280
|
+
createSession(agentConfig, sessionOpts) {
|
|
281
|
+
const merged = { ...options, ...sessionOpts };
|
|
282
|
+
const agent = buildRealtimeAgent(agentConfig);
|
|
283
|
+
return new OpenAISession(agent, merged);
|
|
284
|
+
}
|
|
285
|
+
};
|
|
286
|
+
}
|
|
287
|
+
function buildRealtimeAgent(config) {
|
|
288
|
+
const tools = (config.tools || []).map(convertTool);
|
|
289
|
+
return new import_realtime.RealtimeAgent({
|
|
290
|
+
name: config.name,
|
|
291
|
+
instructions: config.instructions,
|
|
292
|
+
tools
|
|
293
|
+
});
|
|
294
|
+
}
|
|
295
|
+
function openaiServer(config = {}) {
|
|
296
|
+
const getSessionToken = async (overrides = {}) => {
|
|
297
|
+
const merged = { ...config, ...overrides };
|
|
298
|
+
const apiKey = merged.apiKey || process.env.OPENAI_API_KEY;
|
|
299
|
+
if (!apiKey) return { error: "OpenAI API key not configured" };
|
|
300
|
+
try {
|
|
301
|
+
const response = await fetch("https://api.openai.com/v1/realtime/client_secrets", {
|
|
302
|
+
method: "POST",
|
|
303
|
+
headers: {
|
|
304
|
+
Authorization: `Bearer ${apiKey}`,
|
|
305
|
+
"Content-Type": "application/json"
|
|
306
|
+
},
|
|
307
|
+
body: JSON.stringify({
|
|
308
|
+
expires_after: {
|
|
309
|
+
anchor: "created_at",
|
|
310
|
+
seconds: merged.expiresIn || 600
|
|
311
|
+
},
|
|
312
|
+
session: {
|
|
313
|
+
type: "realtime",
|
|
314
|
+
model: merged.model || "gpt-realtime",
|
|
315
|
+
...merged.voice && { audio: { output: { voice: merged.voice } } },
|
|
316
|
+
...merged.instructions && { instructions: merged.instructions }
|
|
317
|
+
}
|
|
318
|
+
})
|
|
319
|
+
});
|
|
320
|
+
if (!response.ok) {
|
|
321
|
+
const text = await response.text();
|
|
322
|
+
console.error("OpenAI client_secrets error:", text);
|
|
323
|
+
return { error: `OpenAI API error: ${response.status}` };
|
|
324
|
+
}
|
|
325
|
+
const data = await response.json();
|
|
326
|
+
if (!data.value) return { error: "Invalid response from OpenAI" };
|
|
327
|
+
return { token: data.value };
|
|
328
|
+
} catch (err) {
|
|
329
|
+
return { error: String(err) };
|
|
330
|
+
}
|
|
331
|
+
};
|
|
332
|
+
return {
|
|
333
|
+
getSessionToken,
|
|
334
|
+
createSessionHandler(overrides) {
|
|
335
|
+
return async (_request) => {
|
|
336
|
+
const result = await getSessionToken(overrides);
|
|
337
|
+
if (result.error) {
|
|
338
|
+
return Response.json({ error: result.error }, { status: 500 });
|
|
339
|
+
}
|
|
340
|
+
return Response.json({ ephemeralKey: result.token });
|
|
341
|
+
};
|
|
342
|
+
}
|
|
343
|
+
};
|
|
344
|
+
}
|
|
345
|
+
var openai_default = openai;
|
|
346
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
347
|
+
0 && (module.exports = {
|
|
348
|
+
openai,
|
|
349
|
+
openaiServer
|
|
350
|
+
});
|