@kernl-sdk/openai 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +5 -0
- package/CHANGELOG.md +15 -0
- package/dist/__tests__/realtime.integration.test.d.ts +2 -0
- package/dist/__tests__/realtime.integration.test.d.ts.map +1 -0
- package/dist/__tests__/realtime.integration.test.js +169 -0
- package/dist/__tests__/realtime.test.d.ts +2 -0
- package/dist/__tests__/realtime.test.d.ts.map +1 -0
- package/dist/__tests__/realtime.test.js +314 -0
- package/dist/convert/__tests__/event.test.d.ts +2 -0
- package/dist/convert/__tests__/event.test.d.ts.map +1 -0
- package/dist/convert/__tests__/event.test.js +514 -0
- package/dist/convert/event.d.ts +24 -0
- package/dist/convert/event.d.ts.map +1 -0
- package/dist/convert/event.js +398 -0
- package/dist/convert/types.d.ts +259 -0
- package/dist/convert/types.d.ts.map +1 -0
- package/dist/convert/types.js +1 -0
- package/dist/index.d.ts +36 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +16 -0
- package/dist/realtime.d.ts +30 -0
- package/dist/realtime.d.ts.map +1 -0
- package/dist/realtime.js +214 -0
- package/package.json +54 -0
- package/src/__tests__/realtime.integration.test.ts +217 -0
- package/src/__tests__/realtime.test.ts +421 -0
- package/src/convert/__tests__/event.test.ts +592 -0
- package/src/convert/event.ts +481 -0
- package/src/convert/types.ts +344 -0
- package/src/index.ts +41 -0
- package/src/realtime.ts +276 -0
- package/tsconfig.json +13 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { OpenAIRealtimeModel, type OpenAIRealtimeOptions } from "./realtime.js";
|
|
2
|
+
/**
|
|
3
|
+
* OpenAI provider interface.
|
|
4
|
+
*/
|
|
5
|
+
export interface OpenAIProvider {
|
|
6
|
+
/**
|
|
7
|
+
* Create a realtime model.
|
|
8
|
+
*
|
|
9
|
+
* @example
|
|
10
|
+
* ```ts
|
|
11
|
+
* import { openai } from '@kernl-sdk/openai';
|
|
12
|
+
*
|
|
13
|
+
* const model = openai.realtime('gpt-4o-realtime-preview');
|
|
14
|
+
* ```
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* ```ts
|
|
18
|
+
* const model = openai.realtime('gpt-4o-realtime-preview', {
|
|
19
|
+
* apiKey: 'sk-...',
|
|
20
|
+
* });
|
|
21
|
+
* ```
|
|
22
|
+
*/
|
|
23
|
+
realtime(modelId: string, options?: OpenAIRealtimeOptions): OpenAIRealtimeModel;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* OpenAI provider.
|
|
27
|
+
*
|
|
28
|
+
* @example
|
|
29
|
+
* ```ts
|
|
30
|
+
* import { openai } from '@kernl-sdk/openai';
|
|
31
|
+
*
|
|
32
|
+
* const model = openai.realtime('gpt-4o-realtime-preview');
|
|
33
|
+
* ```
|
|
34
|
+
*/
|
|
35
|
+
export declare const openai: OpenAIProvider;
|
|
36
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,KAAK,qBAAqB,EAAE,MAAM,YAAY,CAAC;AAE7E;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B;;;;;;;;;;;;;;;;OAgBG;IACH,QAAQ,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,qBAAqB,GAAG,mBAAmB,CAAC;CACjF;AAED;;;;;;;;;GASG;AACH,eAAO,MAAM,MAAM,EAAE,cAIpB,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { OpenAIRealtimeModel } from "./realtime.js";
|
|
2
|
+
/**
|
|
3
|
+
* OpenAI provider.
|
|
4
|
+
*
|
|
5
|
+
* @example
|
|
6
|
+
* ```ts
|
|
7
|
+
* import { openai } from '@kernl-sdk/openai';
|
|
8
|
+
*
|
|
9
|
+
* const model = openai.realtime('gpt-4o-realtime-preview');
|
|
10
|
+
* ```
|
|
11
|
+
*/
|
|
12
|
+
export const openai = {
|
|
13
|
+
realtime(modelId, options) {
|
|
14
|
+
return new OpenAIRealtimeModel(modelId, options);
|
|
15
|
+
},
|
|
16
|
+
};
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import type { RealtimeModel, RealtimeConnection, RealtimeConnectOptions } from "@kernl-sdk/protocol";
|
|
2
|
+
/**
|
|
3
|
+
* Options for creating an OpenAI realtime model.
|
|
4
|
+
*/
|
|
5
|
+
export interface OpenAIRealtimeOptions {
|
|
6
|
+
/**
|
|
7
|
+
* OpenAI API key. Defaults to OPENAI_API_KEY env var.
|
|
8
|
+
*/
|
|
9
|
+
apiKey?: string;
|
|
10
|
+
/**
|
|
11
|
+
* Base URL for the realtime API.
|
|
12
|
+
*/
|
|
13
|
+
baseUrl?: string;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* OpenAI realtime model implementation.
|
|
17
|
+
*/
|
|
18
|
+
export declare class OpenAIRealtimeModel implements RealtimeModel {
|
|
19
|
+
readonly spec: "1.0";
|
|
20
|
+
readonly provider = "openai";
|
|
21
|
+
readonly modelId: string;
|
|
22
|
+
private apiKey;
|
|
23
|
+
private baseUrl;
|
|
24
|
+
constructor(modelId: string, options?: OpenAIRealtimeOptions);
|
|
25
|
+
/**
|
|
26
|
+
* Establish a WebSocket connection to the OpenAI realtime API.
|
|
27
|
+
*/
|
|
28
|
+
connect(options?: RealtimeConnectOptions): Promise<RealtimeConnection>;
|
|
29
|
+
}
|
|
30
|
+
//# sourceMappingURL=realtime.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"realtime.d.ts","sourceRoot":"","sources":["../src/realtime.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,aAAa,EACb,kBAAkB,EAClB,sBAAsB,EAGvB,MAAM,qBAAqB,CAAC;AAO7B;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC;;OAEG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB;;OAEG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,qBAAa,mBAAoB,YAAW,aAAa;IACvD,QAAQ,CAAC,IAAI,EAAG,KAAK,CAAU;IAC/B,QAAQ,CAAC,QAAQ,YAAY;IAC7B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IAEzB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,OAAO,CAAS;gBAEZ,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,qBAAqB;IAU5D;;OAEG;IACG,OAAO,CAAC,OAAO,CAAC,EAAE,sBAAsB,GAAG,OAAO,CAAC,kBAAkB,CAAC;CAmD7E"}
|
package/dist/realtime.js
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
import { EventEmitter } from "node:events";
|
|
2
|
+
import WebSocket from "ws";
|
|
3
|
+
import { CLIENT_EVENT, SERVER_EVENT } from "./convert/event.js";
|
|
4
|
+
const OPENAI_REALTIME_URL = "wss://api.openai.com/v1/realtime";
|
|
5
|
+
/**
|
|
6
|
+
* OpenAI realtime model implementation.
|
|
7
|
+
*/
|
|
8
|
+
export class OpenAIRealtimeModel {
|
|
9
|
+
spec = "1.0";
|
|
10
|
+
provider = "openai";
|
|
11
|
+
modelId;
|
|
12
|
+
apiKey;
|
|
13
|
+
baseUrl;
|
|
14
|
+
constructor(modelId, options) {
|
|
15
|
+
this.modelId = modelId;
|
|
16
|
+
this.apiKey = options?.apiKey ?? process.env.OPENAI_API_KEY ?? "";
|
|
17
|
+
this.baseUrl = options?.baseUrl ?? OPENAI_REALTIME_URL;
|
|
18
|
+
if (!this.apiKey) {
|
|
19
|
+
throw new Error("OpenAI API key is required");
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Establish a WebSocket connection to the OpenAI realtime API.
|
|
24
|
+
*/
|
|
25
|
+
async connect(options) {
|
|
26
|
+
const url = `${this.baseUrl}?model=${this.modelId}`;
|
|
27
|
+
const ws = new WebSocket(url, {
|
|
28
|
+
headers: {
|
|
29
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
30
|
+
"OpenAI-Beta": "realtime=v1",
|
|
31
|
+
},
|
|
32
|
+
});
|
|
33
|
+
const connection = new OpenAIRealtimeConnection(ws);
|
|
34
|
+
await new Promise((resolve, reject) => {
|
|
35
|
+
if (options?.abort?.aborted) {
|
|
36
|
+
return reject(new Error("Connection aborted"));
|
|
37
|
+
}
|
|
38
|
+
const cleanup = () => {
|
|
39
|
+
ws.off("open", onOpen);
|
|
40
|
+
ws.off("error", onError);
|
|
41
|
+
options?.abort?.removeEventListener("abort", onAbort);
|
|
42
|
+
};
|
|
43
|
+
const onOpen = () => {
|
|
44
|
+
cleanup();
|
|
45
|
+
resolve();
|
|
46
|
+
};
|
|
47
|
+
const onError = (err) => {
|
|
48
|
+
cleanup();
|
|
49
|
+
reject(err);
|
|
50
|
+
};
|
|
51
|
+
const onAbort = () => {
|
|
52
|
+
cleanup();
|
|
53
|
+
ws.close();
|
|
54
|
+
reject(new Error("Connection aborted"));
|
|
55
|
+
};
|
|
56
|
+
ws.on("open", onOpen);
|
|
57
|
+
ws.on("error", onError);
|
|
58
|
+
options?.abort?.addEventListener("abort", onAbort);
|
|
59
|
+
});
|
|
60
|
+
if (options?.sessionConfig) {
|
|
61
|
+
connection.send({
|
|
62
|
+
kind: "session.update",
|
|
63
|
+
config: options.sessionConfig,
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
return connection;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* OpenAI realtime connection implementation.
|
|
71
|
+
*/
|
|
72
|
+
class OpenAIRealtimeConnection extends EventEmitter {
|
|
73
|
+
ws;
|
|
74
|
+
_status = "connecting";
|
|
75
|
+
_muted = false;
|
|
76
|
+
_sessionId = null;
|
|
77
|
+
// audio state tracking for interruption
|
|
78
|
+
currid;
|
|
79
|
+
curridx;
|
|
80
|
+
faudtime; /* first audio timestamp */
|
|
81
|
+
audlenms = 0;
|
|
82
|
+
responding = false;
|
|
83
|
+
constructor(socket) {
|
|
84
|
+
super();
|
|
85
|
+
this.ws = socket;
|
|
86
|
+
socket.on("message", (data) => {
|
|
87
|
+
try {
|
|
88
|
+
const raw = JSON.parse(data.toString());
|
|
89
|
+
// track audio state for interruption handling
|
|
90
|
+
if (raw.type === "response.output_audio.delta") {
|
|
91
|
+
this.currid = raw.item_id;
|
|
92
|
+
this.curridx = raw.content_index;
|
|
93
|
+
if (this.faudtime === undefined) {
|
|
94
|
+
this.faudtime = Date.now();
|
|
95
|
+
this.audlenms = 0;
|
|
96
|
+
}
|
|
97
|
+
// calculate audio length assuming 24kHz PCM16
|
|
98
|
+
// TODO: support g711 (8kHz, 1 byte/sample) and configurable PCM rates
|
|
99
|
+
const bytes = base64ByteLength(raw.delta);
|
|
100
|
+
this.audlenms += (bytes / 2 / 24000) * 1000;
|
|
101
|
+
}
|
|
102
|
+
else if (raw.type === "response.created") {
|
|
103
|
+
this.responding = true;
|
|
104
|
+
}
|
|
105
|
+
else if (raw.type === "response.done") {
|
|
106
|
+
this.responding = false;
|
|
107
|
+
this.reset();
|
|
108
|
+
}
|
|
109
|
+
else if (raw.type === "input_audio_buffer.speech_started") {
|
|
110
|
+
this.interrupt();
|
|
111
|
+
}
|
|
112
|
+
const event = SERVER_EVENT.decode(raw);
|
|
113
|
+
if (event) {
|
|
114
|
+
if (event.kind === "session.created") {
|
|
115
|
+
this._sessionId = event.session.id;
|
|
116
|
+
}
|
|
117
|
+
this.emit("event", event);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
catch (err) {
|
|
121
|
+
this.emit("error", err instanceof Error ? err : new Error(String(err)));
|
|
122
|
+
}
|
|
123
|
+
});
|
|
124
|
+
socket.on("open", () => {
|
|
125
|
+
this._status = "connected";
|
|
126
|
+
this.emit("status", this._status);
|
|
127
|
+
});
|
|
128
|
+
socket.on("close", () => {
|
|
129
|
+
this._status = "closed";
|
|
130
|
+
this.reset();
|
|
131
|
+
this.emit("status", this._status);
|
|
132
|
+
});
|
|
133
|
+
socket.on("error", (err) => {
|
|
134
|
+
this.emit("error", err);
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
get status() {
|
|
138
|
+
return this._status;
|
|
139
|
+
}
|
|
140
|
+
get muted() {
|
|
141
|
+
return this._muted;
|
|
142
|
+
}
|
|
143
|
+
get sessionId() {
|
|
144
|
+
return this._sessionId;
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Send a client event to the OpenAI realtime API.
|
|
148
|
+
*/
|
|
149
|
+
send(event) {
|
|
150
|
+
const encoded = CLIENT_EVENT.encode(event);
|
|
151
|
+
if (encoded && this.ws.readyState === WebSocket.OPEN) {
|
|
152
|
+
this.ws.send(JSON.stringify(encoded));
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Close the WebSocket connection.
|
|
157
|
+
*/
|
|
158
|
+
close() {
|
|
159
|
+
this.reset();
|
|
160
|
+
this.ws.close();
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Mute audio input.
|
|
164
|
+
*/
|
|
165
|
+
mute() {
|
|
166
|
+
this._muted = true;
|
|
167
|
+
}
|
|
168
|
+
/**
|
|
169
|
+
* Unmute audio input.
|
|
170
|
+
*/
|
|
171
|
+
unmute() {
|
|
172
|
+
this._muted = false;
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Interrupt the current response.
|
|
176
|
+
*/
|
|
177
|
+
interrupt() {
|
|
178
|
+
// cancel ongoing response
|
|
179
|
+
if (this.responding) {
|
|
180
|
+
this.send({ kind: "response.cancel" });
|
|
181
|
+
this.responding = false;
|
|
182
|
+
}
|
|
183
|
+
// truncate if we have audio state
|
|
184
|
+
if (this.currid && this.faudtime !== undefined) {
|
|
185
|
+
const elapsed = Date.now() - this.faudtime;
|
|
186
|
+
const endms = Math.max(0, Math.floor(Math.min(elapsed, this.audlenms)));
|
|
187
|
+
if (this.ws.readyState === WebSocket.OPEN) {
|
|
188
|
+
this.ws.send(JSON.stringify({
|
|
189
|
+
type: "conversation.item.truncate",
|
|
190
|
+
item_id: this.currid,
|
|
191
|
+
content_index: this.curridx ?? 0,
|
|
192
|
+
audio_end_ms: endms,
|
|
193
|
+
}));
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
this.reset();
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Reset audio tracking state.
|
|
200
|
+
*/
|
|
201
|
+
reset() {
|
|
202
|
+
this.currid = undefined;
|
|
203
|
+
this.curridx = undefined;
|
|
204
|
+
this.faudtime = undefined;
|
|
205
|
+
this.audlenms = 0;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
/**
|
|
209
|
+
* Get byte length from base64 string without decoding.
|
|
210
|
+
*/
|
|
211
|
+
function base64ByteLength(b64) {
|
|
212
|
+
const padding = b64.endsWith("==") ? 2 : b64.endsWith("=") ? 1 : 0;
|
|
213
|
+
return (b64.length * 3) / 4 - padding;
|
|
214
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@kernl-sdk/openai",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "OpenAI provider for kernl",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"kernl",
|
|
7
|
+
"openai",
|
|
8
|
+
"realtime",
|
|
9
|
+
"voice",
|
|
10
|
+
"ai"
|
|
11
|
+
],
|
|
12
|
+
"author": "dremnik",
|
|
13
|
+
"license": "MIT",
|
|
14
|
+
"repository": {
|
|
15
|
+
"type": "git",
|
|
16
|
+
"url": "https://github.com/kernl-sdk/kernl.git",
|
|
17
|
+
"directory": "packages/providers/openai"
|
|
18
|
+
},
|
|
19
|
+
"homepage": "https://github.com/kernl-sdk/kernl#readme",
|
|
20
|
+
"bugs": {
|
|
21
|
+
"url": "https://github.com/kernl-sdk/kernl/issues"
|
|
22
|
+
},
|
|
23
|
+
"type": "module",
|
|
24
|
+
"publishConfig": {
|
|
25
|
+
"access": "public"
|
|
26
|
+
},
|
|
27
|
+
"exports": {
|
|
28
|
+
".": {
|
|
29
|
+
"types": "./dist/index.d.ts",
|
|
30
|
+
"import": "./dist/index.js"
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
"scripts": {
|
|
34
|
+
"build": "tsc && tsc-alias --resolve-full-paths",
|
|
35
|
+
"dev": "tsc --watch",
|
|
36
|
+
"check-types": "tsc --noEmit",
|
|
37
|
+
"test": "vitest",
|
|
38
|
+
"test:watch": "vitest --watch",
|
|
39
|
+
"test:run": "vitest run"
|
|
40
|
+
},
|
|
41
|
+
"dependencies": {
|
|
42
|
+
"@kernl-sdk/protocol": "workspace:*",
|
|
43
|
+
"@kernl-sdk/shared": "workspace:*",
|
|
44
|
+
"ws": "^8.18.0"
|
|
45
|
+
},
|
|
46
|
+
"devDependencies": {
|
|
47
|
+
"@types/json-schema": "^7.0.15",
|
|
48
|
+
"@types/node": "^24.10.0",
|
|
49
|
+
"@types/ws": "^8.18.0",
|
|
50
|
+
"tsc-alias": "^1.8.10",
|
|
51
|
+
"typescript": "5.9.2",
|
|
52
|
+
"vitest": "^4.0.8"
|
|
53
|
+
}
|
|
54
|
+
}
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
import { describe, it, expect, beforeAll, afterAll } from "vitest";
|
|
2
|
+
|
|
3
|
+
import type {
|
|
4
|
+
RealtimeServerEvent,
|
|
5
|
+
RealtimeConnection,
|
|
6
|
+
} from "@kernl-sdk/protocol";
|
|
7
|
+
import { OpenAIRealtimeModel } from "../realtime";
|
|
8
|
+
|
|
9
|
+
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
|
|
10
|
+
|
|
11
|
+
describe.skipIf(!OPENAI_API_KEY)("OpenAI Realtime Integration", () => {
|
|
12
|
+
let model: OpenAIRealtimeModel;
|
|
13
|
+
|
|
14
|
+
beforeAll(() => {
|
|
15
|
+
model = new OpenAIRealtimeModel("gpt-realtime", {
|
|
16
|
+
apiKey: OPENAI_API_KEY,
|
|
17
|
+
});
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
it("should connect and receive session.created", async () => {
|
|
21
|
+
const conn = await model.connect();
|
|
22
|
+
const events: RealtimeServerEvent[] = [];
|
|
23
|
+
|
|
24
|
+
const sessionCreated = new Promise<void>((resolve, reject) => {
|
|
25
|
+
const timeout = setTimeout(() => reject(new Error("timeout")), 10000);
|
|
26
|
+
conn.on("event", (e: RealtimeServerEvent) => {
|
|
27
|
+
events.push(e);
|
|
28
|
+
if (e.kind === "session.created") {
|
|
29
|
+
clearTimeout(timeout);
|
|
30
|
+
resolve();
|
|
31
|
+
}
|
|
32
|
+
});
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
await sessionCreated;
|
|
36
|
+
conn.close();
|
|
37
|
+
|
|
38
|
+
expect(events.some((e) => e.kind === "session.created")).toBe(true);
|
|
39
|
+
expect(conn.sessionId).toBeTruthy();
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it("should complete text round-trip", async () => {
|
|
43
|
+
const conn = await model.connect();
|
|
44
|
+
const events: RealtimeServerEvent[] = [];
|
|
45
|
+
|
|
46
|
+
conn.on("event", (e: RealtimeServerEvent) => {
|
|
47
|
+
events.push(e);
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
// wait for session
|
|
51
|
+
await waitFor(conn, "session.created");
|
|
52
|
+
|
|
53
|
+
// configure text-only mode
|
|
54
|
+
conn.send({
|
|
55
|
+
kind: "session.update",
|
|
56
|
+
config: {
|
|
57
|
+
modalities: ["text"],
|
|
58
|
+
instructions: "You are a helpful assistant. Be very brief.",
|
|
59
|
+
},
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
await waitFor(conn, "session.updated");
|
|
63
|
+
|
|
64
|
+
// add user message
|
|
65
|
+
conn.send({
|
|
66
|
+
kind: "item.create",
|
|
67
|
+
item: {
|
|
68
|
+
kind: "message",
|
|
69
|
+
id: "test-msg-1",
|
|
70
|
+
role: "user",
|
|
71
|
+
content: [{ kind: "text", text: "Say exactly: hello world" }],
|
|
72
|
+
},
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
// trigger response
|
|
76
|
+
conn.send({ kind: "response.create" });
|
|
77
|
+
|
|
78
|
+
// wait for response to complete
|
|
79
|
+
await waitFor(conn, "response.done", 15000);
|
|
80
|
+
|
|
81
|
+
conn.close();
|
|
82
|
+
|
|
83
|
+
// verify event flow
|
|
84
|
+
const kinds = events.map((e) => e.kind);
|
|
85
|
+
expect(kinds).toContain("session.created");
|
|
86
|
+
expect(kinds).toContain("session.updated");
|
|
87
|
+
expect(kinds).toContain("response.created");
|
|
88
|
+
expect(kinds).toContain("response.done");
|
|
89
|
+
|
|
90
|
+
// verify we got text output
|
|
91
|
+
const textOutput = events.find((e) => e.kind === "text.output");
|
|
92
|
+
expect(textOutput).toBeDefined();
|
|
93
|
+
if (textOutput?.kind === "text.output") {
|
|
94
|
+
expect(textOutput.text.toLowerCase()).toContain("hello");
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// verify response completed successfully
|
|
98
|
+
const done = events.find((e) => e.kind === "response.done");
|
|
99
|
+
if (done?.kind === "response.done") {
|
|
100
|
+
expect(done.status).toBe("completed");
|
|
101
|
+
}
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it("should handle tool calling", { timeout: 10000 }, async () => {
|
|
105
|
+
const conn = await model.connect();
|
|
106
|
+
const events: RealtimeServerEvent[] = [];
|
|
107
|
+
|
|
108
|
+
conn.on("event", (e: RealtimeServerEvent) => {
|
|
109
|
+
events.push(e);
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
await waitFor(conn, "session.created");
|
|
113
|
+
|
|
114
|
+
// configure with a tool
|
|
115
|
+
conn.send({
|
|
116
|
+
kind: "session.update",
|
|
117
|
+
config: {
|
|
118
|
+
modalities: ["text"],
|
|
119
|
+
instructions: "You have access to tools. Use them when appropriate.",
|
|
120
|
+
tools: [
|
|
121
|
+
{
|
|
122
|
+
kind: "function",
|
|
123
|
+
name: "get_weather",
|
|
124
|
+
description: "Get the current weather for a location",
|
|
125
|
+
parameters: {
|
|
126
|
+
type: "object",
|
|
127
|
+
properties: {
|
|
128
|
+
location: { type: "string", description: "City name" },
|
|
129
|
+
},
|
|
130
|
+
required: ["location"],
|
|
131
|
+
},
|
|
132
|
+
},
|
|
133
|
+
],
|
|
134
|
+
},
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
await waitFor(conn, "session.updated");
|
|
138
|
+
|
|
139
|
+
// ask about weather
|
|
140
|
+
conn.send({
|
|
141
|
+
kind: "item.create",
|
|
142
|
+
item: {
|
|
143
|
+
kind: "message",
|
|
144
|
+
id: "test-msg-2",
|
|
145
|
+
role: "user",
|
|
146
|
+
content: [{ kind: "text", text: "What is the weather in Tokyo?" }],
|
|
147
|
+
},
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
conn.send({ kind: "response.create" });
|
|
151
|
+
|
|
152
|
+
// wait for tool call
|
|
153
|
+
const toolCall = await waitFor(conn, "tool.call", 15000);
|
|
154
|
+
|
|
155
|
+
expect(toolCall.kind).toBe("tool.call");
|
|
156
|
+
if (toolCall.kind !== "tool.call") {
|
|
157
|
+
throw new Error("Expected tool.call");
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
expect(toolCall.toolId).toBe("get_weather");
|
|
161
|
+
const args = JSON.parse(toolCall.arguments);
|
|
162
|
+
expect(args.location.toLowerCase()).toContain("tokyo");
|
|
163
|
+
|
|
164
|
+
// wait for first response to complete before sending tool result
|
|
165
|
+
await waitFor(conn, "response.done", 15000);
|
|
166
|
+
|
|
167
|
+
// send tool result
|
|
168
|
+
conn.send({
|
|
169
|
+
kind: "tool.result",
|
|
170
|
+
callId: toolCall.callId,
|
|
171
|
+
result: JSON.stringify({ temperature: 22, condition: "sunny" }),
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
// trigger follow-up response
|
|
175
|
+
conn.send({ kind: "response.create" });
|
|
176
|
+
|
|
177
|
+
// wait for second response to complete
|
|
178
|
+
await waitFor(conn, "response.done", 15000);
|
|
179
|
+
|
|
180
|
+
conn.close();
|
|
181
|
+
|
|
182
|
+
// verify we got text mentioning the weather
|
|
183
|
+
const textEvents = events.filter((e) => e.kind === "text.output");
|
|
184
|
+
const allText = textEvents
|
|
185
|
+
.map((e) => (e.kind === "text.output" ? e.text : ""))
|
|
186
|
+
.join(" ")
|
|
187
|
+
.toLowerCase();
|
|
188
|
+
|
|
189
|
+
expect(allText).toMatch(/sunny|22|tokyo/i);
|
|
190
|
+
});
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Wait for a specific event kind.
|
|
195
|
+
*/
|
|
196
|
+
function waitFor(
|
|
197
|
+
conn: RealtimeConnection,
|
|
198
|
+
kind: RealtimeServerEvent["kind"],
|
|
199
|
+
timeout = 10000,
|
|
200
|
+
): Promise<RealtimeServerEvent> {
|
|
201
|
+
return new Promise((resolve, reject) => {
|
|
202
|
+
const timer = setTimeout(() => {
|
|
203
|
+
conn.off("event", handler);
|
|
204
|
+
reject(new Error(`timeout waiting for ${kind}`));
|
|
205
|
+
}, timeout);
|
|
206
|
+
|
|
207
|
+
const handler = (e: RealtimeServerEvent) => {
|
|
208
|
+
if (e.kind === kind) {
|
|
209
|
+
clearTimeout(timer);
|
|
210
|
+
conn.off("event", handler);
|
|
211
|
+
resolve(e);
|
|
212
|
+
}
|
|
213
|
+
};
|
|
214
|
+
|
|
215
|
+
conn.on("event", handler);
|
|
216
|
+
});
|
|
217
|
+
}
|