@kernl-sdk/openai 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +5 -0
- package/CHANGELOG.md +15 -0
- package/dist/__tests__/realtime.integration.test.d.ts +2 -0
- package/dist/__tests__/realtime.integration.test.d.ts.map +1 -0
- package/dist/__tests__/realtime.integration.test.js +169 -0
- package/dist/__tests__/realtime.test.d.ts +2 -0
- package/dist/__tests__/realtime.test.d.ts.map +1 -0
- package/dist/__tests__/realtime.test.js +314 -0
- package/dist/convert/__tests__/event.test.d.ts +2 -0
- package/dist/convert/__tests__/event.test.d.ts.map +1 -0
- package/dist/convert/__tests__/event.test.js +514 -0
- package/dist/convert/event.d.ts +24 -0
- package/dist/convert/event.d.ts.map +1 -0
- package/dist/convert/event.js +398 -0
- package/dist/convert/types.d.ts +259 -0
- package/dist/convert/types.d.ts.map +1 -0
- package/dist/convert/types.js +1 -0
- package/dist/index.d.ts +36 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +16 -0
- package/dist/realtime.d.ts +30 -0
- package/dist/realtime.d.ts.map +1 -0
- package/dist/realtime.js +214 -0
- package/package.json +54 -0
- package/src/__tests__/realtime.integration.test.ts +217 -0
- package/src/__tests__/realtime.test.ts +421 -0
- package/src/convert/__tests__/event.test.ts +592 -0
- package/src/convert/event.ts +481 -0
- package/src/convert/types.ts +344 -0
- package/src/index.ts +41 -0
- package/src/realtime.ts +276 -0
- package/tsconfig.json +13 -0
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
import type { JSONSchema7 } from "json-schema";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* OpenAI Realtime API wire types (GA).
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
// =============================================================================
|
|
8
|
+
// Client Events
|
|
9
|
+
// =============================================================================
|
|
10
|
+
|
|
11
|
+
export type OpenAIClientEvent =
|
|
12
|
+
| OpenAISessionUpdate
|
|
13
|
+
| OpenAIInputAudioBufferAppend
|
|
14
|
+
| OpenAIInputAudioBufferCommit
|
|
15
|
+
| OpenAIInputAudioBufferClear
|
|
16
|
+
| OpenAIConversationItemCreate
|
|
17
|
+
| OpenAIConversationItemDelete
|
|
18
|
+
| OpenAIConversationItemTruncate
|
|
19
|
+
| OpenAIResponseCreate
|
|
20
|
+
| OpenAIResponseCancel;
|
|
21
|
+
|
|
22
|
+
export interface OpenAISessionUpdate {
|
|
23
|
+
type: "session.update";
|
|
24
|
+
session: OpenAISessionConfig;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export interface OpenAIInputAudioBufferAppend {
|
|
28
|
+
type: "input_audio_buffer.append";
|
|
29
|
+
audio: string;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface OpenAIInputAudioBufferCommit {
|
|
33
|
+
type: "input_audio_buffer.commit";
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export interface OpenAIInputAudioBufferClear {
|
|
37
|
+
type: "input_audio_buffer.clear";
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface OpenAIConversationItemCreate {
|
|
41
|
+
type: "conversation.item.create";
|
|
42
|
+
item: OpenAIItem;
|
|
43
|
+
previous_item_id?: string;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export interface OpenAIConversationItemDelete {
|
|
47
|
+
type: "conversation.item.delete";
|
|
48
|
+
item_id: string;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export interface OpenAIConversationItemTruncate {
|
|
52
|
+
type: "conversation.item.truncate";
|
|
53
|
+
item_id: string;
|
|
54
|
+
content_index: number;
|
|
55
|
+
audio_end_ms: number;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export interface OpenAIResponseCreate {
|
|
59
|
+
type: "response.create";
|
|
60
|
+
response?: {
|
|
61
|
+
instructions?: string;
|
|
62
|
+
modalities?: ("text" | "audio")[];
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export interface OpenAIResponseCancel {
|
|
67
|
+
type: "response.cancel";
|
|
68
|
+
response_id?: string;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// =============================================================================
|
|
72
|
+
// Server Events
|
|
73
|
+
// =============================================================================
|
|
74
|
+
|
|
75
|
+
export type OpenAIServerEvent =
|
|
76
|
+
| OpenAISessionCreated
|
|
77
|
+
| OpenAISessionUpdated
|
|
78
|
+
| OpenAIError
|
|
79
|
+
| OpenAIInputAudioBufferCommitted
|
|
80
|
+
| OpenAIInputAudioBufferCleared
|
|
81
|
+
| OpenAIInputAudioBufferSpeechStarted
|
|
82
|
+
| OpenAIInputAudioBufferSpeechStopped
|
|
83
|
+
| OpenAIConversationItemCreated
|
|
84
|
+
| OpenAIConversationItemDone
|
|
85
|
+
| OpenAIConversationItemDeleted
|
|
86
|
+
| OpenAIConversationItemTruncated
|
|
87
|
+
| OpenAIResponseCreated
|
|
88
|
+
| OpenAIResponseDone
|
|
89
|
+
| OpenAIResponseOutputAudioDelta
|
|
90
|
+
| OpenAIResponseOutputAudioDone
|
|
91
|
+
| OpenAIResponseTextDelta
|
|
92
|
+
| OpenAIResponseTextDone
|
|
93
|
+
| OpenAIInputAudioTranscriptionDelta
|
|
94
|
+
| OpenAIInputAudioTranscriptionCompleted
|
|
95
|
+
| OpenAIResponseOutputAudioTranscriptDelta
|
|
96
|
+
| OpenAIResponseOutputAudioTranscriptDone
|
|
97
|
+
| OpenAIResponseOutputItemAdded
|
|
98
|
+
| OpenAIResponseOutputItemDone
|
|
99
|
+
| OpenAIResponseFunctionCallArgumentsDelta
|
|
100
|
+
| OpenAIResponseFunctionCallArgumentsDone;
|
|
101
|
+
|
|
102
|
+
export interface OpenAISessionCreated {
|
|
103
|
+
type: "session.created";
|
|
104
|
+
session: OpenAISession;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export interface OpenAISessionUpdated {
|
|
108
|
+
type: "session.updated";
|
|
109
|
+
session: OpenAISession;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
export interface OpenAIError {
|
|
113
|
+
type: "error";
|
|
114
|
+
error: {
|
|
115
|
+
code: string;
|
|
116
|
+
message: string;
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
export interface OpenAIInputAudioBufferCommitted {
|
|
121
|
+
type: "input_audio_buffer.committed";
|
|
122
|
+
item_id: string;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
export interface OpenAIInputAudioBufferCleared {
|
|
126
|
+
type: "input_audio_buffer.cleared";
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
export interface OpenAIInputAudioBufferSpeechStarted {
|
|
130
|
+
type: "input_audio_buffer.speech_started";
|
|
131
|
+
audio_start_ms: number;
|
|
132
|
+
item_id: string;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
export interface OpenAIInputAudioBufferSpeechStopped {
|
|
136
|
+
type: "input_audio_buffer.speech_stopped";
|
|
137
|
+
audio_end_ms: number;
|
|
138
|
+
item_id: string;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
export interface OpenAIConversationItemCreated {
|
|
142
|
+
type: "conversation.item.created";
|
|
143
|
+
item: OpenAIItem;
|
|
144
|
+
previous_item_id?: string;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
export interface OpenAIConversationItemDone {
|
|
148
|
+
type: "conversation.item.done";
|
|
149
|
+
item: OpenAIItem;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
export interface OpenAIConversationItemDeleted {
|
|
153
|
+
type: "conversation.item.deleted";
|
|
154
|
+
item_id: string;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
export interface OpenAIConversationItemTruncated {
|
|
158
|
+
type: "conversation.item.truncated";
|
|
159
|
+
item_id: string;
|
|
160
|
+
audio_end_ms: number;
|
|
161
|
+
content_index: number;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
export interface OpenAIResponseCreated {
|
|
165
|
+
type: "response.created";
|
|
166
|
+
response: {
|
|
167
|
+
id: string;
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
export interface OpenAIResponseDone {
|
|
172
|
+
type: "response.done";
|
|
173
|
+
response: {
|
|
174
|
+
id: string;
|
|
175
|
+
status: "completed" | "cancelled" | "failed" | "incomplete" | "in_progress";
|
|
176
|
+
usage?: {
|
|
177
|
+
input_tokens: number;
|
|
178
|
+
output_tokens: number;
|
|
179
|
+
total_tokens?: number;
|
|
180
|
+
};
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
export interface OpenAIResponseOutputAudioDelta {
|
|
185
|
+
type: "response.output_audio.delta";
|
|
186
|
+
response_id: string;
|
|
187
|
+
item_id: string;
|
|
188
|
+
content_index: number;
|
|
189
|
+
delta: string;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
export interface OpenAIResponseOutputAudioDone {
|
|
193
|
+
type: "response.output_audio.done";
|
|
194
|
+
response_id: string;
|
|
195
|
+
item_id: string;
|
|
196
|
+
content_index: number;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
export interface OpenAIResponseTextDelta {
|
|
200
|
+
type: "response.text.delta";
|
|
201
|
+
response_id: string;
|
|
202
|
+
item_id: string;
|
|
203
|
+
content_index: number;
|
|
204
|
+
delta: string;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
export interface OpenAIResponseTextDone {
|
|
208
|
+
type: "response.text.done";
|
|
209
|
+
response_id: string;
|
|
210
|
+
item_id: string;
|
|
211
|
+
content_index: number;
|
|
212
|
+
text: string;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
export interface OpenAIInputAudioTranscriptionDelta {
|
|
216
|
+
type: "conversation.item.input_audio_transcription.delta";
|
|
217
|
+
item_id: string;
|
|
218
|
+
content_index?: number;
|
|
219
|
+
delta: string;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
export interface OpenAIInputAudioTranscriptionCompleted {
|
|
223
|
+
type: "conversation.item.input_audio_transcription.completed";
|
|
224
|
+
item_id: string;
|
|
225
|
+
content_index: number;
|
|
226
|
+
transcript: string;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
export interface OpenAIResponseOutputAudioTranscriptDelta {
|
|
230
|
+
type: "response.output_audio_transcript.delta";
|
|
231
|
+
response_id: string;
|
|
232
|
+
item_id: string;
|
|
233
|
+
content_index: number;
|
|
234
|
+
delta: string;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
export interface OpenAIResponseOutputAudioTranscriptDone {
|
|
238
|
+
type: "response.output_audio_transcript.done";
|
|
239
|
+
response_id: string;
|
|
240
|
+
item_id: string;
|
|
241
|
+
content_index: number;
|
|
242
|
+
transcript: string;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
export interface OpenAIResponseOutputItemAdded {
|
|
246
|
+
type: "response.output_item.added";
|
|
247
|
+
response_id: string;
|
|
248
|
+
output_index: number;
|
|
249
|
+
item: OpenAIFunctionCallItem | OpenAIItem;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
export interface OpenAIResponseOutputItemDone {
|
|
253
|
+
type: "response.output_item.done";
|
|
254
|
+
response_id: string;
|
|
255
|
+
output_index: number;
|
|
256
|
+
item: OpenAIFunctionCallItem | OpenAIItem;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
export interface OpenAIResponseFunctionCallArgumentsDelta {
|
|
260
|
+
type: "response.function_call_arguments.delta";
|
|
261
|
+
response_id: string;
|
|
262
|
+
item_id: string;
|
|
263
|
+
call_id: string;
|
|
264
|
+
delta: string;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
export interface OpenAIResponseFunctionCallArgumentsDone {
|
|
268
|
+
type: "response.function_call_arguments.done";
|
|
269
|
+
response_id: string;
|
|
270
|
+
item_id: string;
|
|
271
|
+
call_id: string;
|
|
272
|
+
name: string;
|
|
273
|
+
arguments: string;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// =============================================================================
|
|
277
|
+
// Shared Types
|
|
278
|
+
// =============================================================================
|
|
279
|
+
|
|
280
|
+
export interface OpenAISession {
|
|
281
|
+
id: string;
|
|
282
|
+
instructions?: string;
|
|
283
|
+
voice?: string;
|
|
284
|
+
modalities?: ("text" | "audio")[];
|
|
285
|
+
turn_detection?: OpenAITurnDetection;
|
|
286
|
+
input_audio_format?: string;
|
|
287
|
+
output_audio_format?: string;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
export interface OpenAISessionConfig {
|
|
291
|
+
instructions?: string;
|
|
292
|
+
voice?: string;
|
|
293
|
+
modalities?: ("text" | "audio")[];
|
|
294
|
+
tools?: OpenAITool[];
|
|
295
|
+
turn_detection?: OpenAITurnDetection;
|
|
296
|
+
input_audio_format?: string;
|
|
297
|
+
output_audio_format?: string;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
export interface OpenAITurnDetection {
|
|
301
|
+
type: "server_vad" | "none";
|
|
302
|
+
threshold?: number;
|
|
303
|
+
silence_duration_ms?: number;
|
|
304
|
+
prefix_padding_ms?: number;
|
|
305
|
+
create_response?: boolean;
|
|
306
|
+
interrupt_response?: boolean;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
export interface OpenAITool {
|
|
310
|
+
type: "function";
|
|
311
|
+
name: string;
|
|
312
|
+
description?: string;
|
|
313
|
+
parameters?: JSONSchema7;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
export type OpenAIItem =
|
|
317
|
+
| OpenAIMessageItem
|
|
318
|
+
| OpenAIFunctionCallItem
|
|
319
|
+
| OpenAIFunctionCallOutputItem;
|
|
320
|
+
|
|
321
|
+
export interface OpenAIMessageItem {
|
|
322
|
+
type: "message";
|
|
323
|
+
role: "user" | "assistant" | "system";
|
|
324
|
+
content: OpenAIContentPart[];
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
export interface OpenAIFunctionCallItem {
|
|
328
|
+
type: "function_call";
|
|
329
|
+
call_id: string;
|
|
330
|
+
name: string;
|
|
331
|
+
arguments: string;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
export interface OpenAIFunctionCallOutputItem {
|
|
335
|
+
type: "function_call_output";
|
|
336
|
+
call_id: string;
|
|
337
|
+
output: string;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
export type OpenAIContentPart =
|
|
341
|
+
| { type: "input_text"; text: string }
|
|
342
|
+
| { type: "input_audio"; audio: string }
|
|
343
|
+
| { type: "output_text"; text: string }
|
|
344
|
+
| { type: "output_audio"; audio: string; transcript?: string };
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { OpenAIRealtimeModel, type OpenAIRealtimeOptions } from "./realtime";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* OpenAI provider interface.
|
|
5
|
+
*/
|
|
6
|
+
export interface OpenAIProvider {
|
|
7
|
+
/**
|
|
8
|
+
* Create a realtime model.
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```ts
|
|
12
|
+
* import { openai } from '@kernl-sdk/openai';
|
|
13
|
+
*
|
|
14
|
+
* const model = openai.realtime('gpt-4o-realtime-preview');
|
|
15
|
+
* ```
|
|
16
|
+
*
|
|
17
|
+
* @example
|
|
18
|
+
* ```ts
|
|
19
|
+
* const model = openai.realtime('gpt-4o-realtime-preview', {
|
|
20
|
+
* apiKey: 'sk-...',
|
|
21
|
+
* });
|
|
22
|
+
* ```
|
|
23
|
+
*/
|
|
24
|
+
realtime(modelId: string, options?: OpenAIRealtimeOptions): OpenAIRealtimeModel;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* OpenAI provider.
|
|
29
|
+
*
|
|
30
|
+
* @example
|
|
31
|
+
* ```ts
|
|
32
|
+
* import { openai } from '@kernl-sdk/openai';
|
|
33
|
+
*
|
|
34
|
+
* const model = openai.realtime('gpt-4o-realtime-preview');
|
|
35
|
+
* ```
|
|
36
|
+
*/
|
|
37
|
+
export const openai: OpenAIProvider = {
|
|
38
|
+
realtime(modelId: string, options?: OpenAIRealtimeOptions) {
|
|
39
|
+
return new OpenAIRealtimeModel(modelId, options);
|
|
40
|
+
},
|
|
41
|
+
};
|
package/src/realtime.ts
ADDED
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
import { EventEmitter } from "node:events";
|
|
2
|
+
import WebSocket from "ws";
|
|
3
|
+
|
|
4
|
+
import type {
|
|
5
|
+
RealtimeModel,
|
|
6
|
+
RealtimeConnection,
|
|
7
|
+
RealtimeConnectOptions,
|
|
8
|
+
RealtimeClientEvent,
|
|
9
|
+
TransportStatus,
|
|
10
|
+
} from "@kernl-sdk/protocol";
|
|
11
|
+
|
|
12
|
+
import { CLIENT_EVENT, SERVER_EVENT } from "./convert/event";
|
|
13
|
+
import type { OpenAIServerEvent } from "./convert/types";
|
|
14
|
+
|
|
15
|
+
const OPENAI_REALTIME_URL = "wss://api.openai.com/v1/realtime";
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Options for creating an OpenAI realtime model.
|
|
19
|
+
*/
|
|
20
|
+
export interface OpenAIRealtimeOptions {
|
|
21
|
+
/**
|
|
22
|
+
* OpenAI API key. Defaults to OPENAI_API_KEY env var.
|
|
23
|
+
*/
|
|
24
|
+
apiKey?: string;
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Base URL for the realtime API.
|
|
28
|
+
*/
|
|
29
|
+
baseUrl?: string;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* OpenAI realtime model implementation.
|
|
34
|
+
*/
|
|
35
|
+
export class OpenAIRealtimeModel implements RealtimeModel {
|
|
36
|
+
readonly spec = "1.0" as const;
|
|
37
|
+
readonly provider = "openai";
|
|
38
|
+
readonly modelId: string;
|
|
39
|
+
|
|
40
|
+
private apiKey: string;
|
|
41
|
+
private baseUrl: string;
|
|
42
|
+
|
|
43
|
+
constructor(modelId: string, options?: OpenAIRealtimeOptions) {
|
|
44
|
+
this.modelId = modelId;
|
|
45
|
+
this.apiKey = options?.apiKey ?? process.env.OPENAI_API_KEY ?? "";
|
|
46
|
+
this.baseUrl = options?.baseUrl ?? OPENAI_REALTIME_URL;
|
|
47
|
+
|
|
48
|
+
if (!this.apiKey) {
|
|
49
|
+
throw new Error("OpenAI API key is required");
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Establish a WebSocket connection to the OpenAI realtime API.
|
|
55
|
+
*/
|
|
56
|
+
async connect(options?: RealtimeConnectOptions): Promise<RealtimeConnection> {
|
|
57
|
+
const url = `${this.baseUrl}?model=${this.modelId}`;
|
|
58
|
+
|
|
59
|
+
const ws = new WebSocket(url, {
|
|
60
|
+
headers: {
|
|
61
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
62
|
+
"OpenAI-Beta": "realtime=v1",
|
|
63
|
+
},
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
const connection = new OpenAIRealtimeConnection(ws);
|
|
67
|
+
|
|
68
|
+
await new Promise<void>((resolve, reject) => {
|
|
69
|
+
if (options?.abort?.aborted) {
|
|
70
|
+
return reject(new Error("Connection aborted"));
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const cleanup = () => {
|
|
74
|
+
ws.off("open", onOpen);
|
|
75
|
+
ws.off("error", onError);
|
|
76
|
+
options?.abort?.removeEventListener("abort", onAbort);
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
const onOpen = () => {
|
|
80
|
+
cleanup();
|
|
81
|
+
resolve();
|
|
82
|
+
};
|
|
83
|
+
const onError = (err: Error) => {
|
|
84
|
+
cleanup();
|
|
85
|
+
reject(err);
|
|
86
|
+
};
|
|
87
|
+
const onAbort = () => {
|
|
88
|
+
cleanup();
|
|
89
|
+
ws.close();
|
|
90
|
+
reject(new Error("Connection aborted"));
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
ws.on("open", onOpen);
|
|
94
|
+
ws.on("error", onError);
|
|
95
|
+
options?.abort?.addEventListener("abort", onAbort);
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
if (options?.sessionConfig) {
|
|
99
|
+
connection.send({
|
|
100
|
+
kind: "session.update",
|
|
101
|
+
config: options.sessionConfig,
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return connection;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* OpenAI realtime connection implementation.
|
|
111
|
+
*/
|
|
112
|
+
class OpenAIRealtimeConnection
|
|
113
|
+
extends EventEmitter
|
|
114
|
+
implements RealtimeConnection
|
|
115
|
+
{
|
|
116
|
+
private ws: WebSocket;
|
|
117
|
+
private _status: TransportStatus = "connecting";
|
|
118
|
+
private _muted = false;
|
|
119
|
+
private _sessionId: string | null = null;
|
|
120
|
+
|
|
121
|
+
// audio state tracking for interruption
|
|
122
|
+
private currid: string | undefined;
|
|
123
|
+
private curridx: number | undefined;
|
|
124
|
+
private faudtime: number | undefined; /* first audio timestamp */
|
|
125
|
+
private audlenms: number = 0;
|
|
126
|
+
private responding: boolean = false;
|
|
127
|
+
|
|
128
|
+
constructor(socket: WebSocket) {
|
|
129
|
+
super();
|
|
130
|
+
this.ws = socket;
|
|
131
|
+
|
|
132
|
+
socket.on("message", (data) => {
|
|
133
|
+
try {
|
|
134
|
+
const raw = JSON.parse(data.toString()) as OpenAIServerEvent;
|
|
135
|
+
|
|
136
|
+
// track audio state for interruption handling
|
|
137
|
+
if (raw.type === "response.output_audio.delta") {
|
|
138
|
+
this.currid = raw.item_id;
|
|
139
|
+
this.curridx = raw.content_index;
|
|
140
|
+
if (this.faudtime === undefined) {
|
|
141
|
+
this.faudtime = Date.now();
|
|
142
|
+
this.audlenms = 0;
|
|
143
|
+
}
|
|
144
|
+
// calculate audio length assuming 24kHz PCM16
|
|
145
|
+
// TODO: support g711 (8kHz, 1 byte/sample) and configurable PCM rates
|
|
146
|
+
const bytes = base64ByteLength(raw.delta);
|
|
147
|
+
this.audlenms += (bytes / 2 / 24000) * 1000;
|
|
148
|
+
} else if (raw.type === "response.created") {
|
|
149
|
+
this.responding = true;
|
|
150
|
+
} else if (raw.type === "response.done") {
|
|
151
|
+
this.responding = false;
|
|
152
|
+
this.reset();
|
|
153
|
+
} else if (raw.type === "input_audio_buffer.speech_started") {
|
|
154
|
+
this.interrupt();
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const event = SERVER_EVENT.decode(raw);
|
|
158
|
+
if (event) {
|
|
159
|
+
if (event.kind === "session.created") {
|
|
160
|
+
this._sessionId = event.session.id;
|
|
161
|
+
}
|
|
162
|
+
this.emit("event", event);
|
|
163
|
+
}
|
|
164
|
+
} catch (err) {
|
|
165
|
+
this.emit("error", err instanceof Error ? err : new Error(String(err)));
|
|
166
|
+
}
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
socket.on("open", () => {
|
|
170
|
+
this._status = "connected";
|
|
171
|
+
this.emit("status", this._status);
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
socket.on("close", () => {
|
|
175
|
+
this._status = "closed";
|
|
176
|
+
this.reset();
|
|
177
|
+
this.emit("status", this._status);
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
socket.on("error", (err) => {
|
|
181
|
+
this.emit("error", err);
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
get status(): TransportStatus {
|
|
186
|
+
return this._status;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
get muted(): boolean {
|
|
190
|
+
return this._muted;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
get sessionId(): string | null {
|
|
194
|
+
return this._sessionId;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Send a client event to the OpenAI realtime API.
|
|
199
|
+
*/
|
|
200
|
+
send(event: RealtimeClientEvent): void {
|
|
201
|
+
const encoded = CLIENT_EVENT.encode(event);
|
|
202
|
+
if (encoded && this.ws.readyState === WebSocket.OPEN) {
|
|
203
|
+
this.ws.send(JSON.stringify(encoded));
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Close the WebSocket connection.
|
|
209
|
+
*/
|
|
210
|
+
close(): void {
|
|
211
|
+
this.reset();
|
|
212
|
+
this.ws.close();
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Mute audio input.
|
|
217
|
+
*/
|
|
218
|
+
mute(): void {
|
|
219
|
+
this._muted = true;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* Unmute audio input.
|
|
224
|
+
*/
|
|
225
|
+
unmute(): void {
|
|
226
|
+
this._muted = false;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* Interrupt the current response.
|
|
231
|
+
*/
|
|
232
|
+
interrupt(): void {
|
|
233
|
+
// cancel ongoing response
|
|
234
|
+
if (this.responding) {
|
|
235
|
+
this.send({ kind: "response.cancel" });
|
|
236
|
+
this.responding = false;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// truncate if we have audio state
|
|
240
|
+
if (this.currid && this.faudtime !== undefined) {
|
|
241
|
+
const elapsed = Date.now() - this.faudtime;
|
|
242
|
+
const endms = Math.max(0, Math.floor(Math.min(elapsed, this.audlenms)));
|
|
243
|
+
|
|
244
|
+
if (this.ws.readyState === WebSocket.OPEN) {
|
|
245
|
+
this.ws.send(
|
|
246
|
+
JSON.stringify({
|
|
247
|
+
type: "conversation.item.truncate",
|
|
248
|
+
item_id: this.currid,
|
|
249
|
+
content_index: this.curridx ?? 0,
|
|
250
|
+
audio_end_ms: endms,
|
|
251
|
+
}),
|
|
252
|
+
);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
this.reset();
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Reset audio tracking state.
|
|
261
|
+
*/
|
|
262
|
+
private reset(): void {
|
|
263
|
+
this.currid = undefined;
|
|
264
|
+
this.curridx = undefined;
|
|
265
|
+
this.faudtime = undefined;
|
|
266
|
+
this.audlenms = 0;
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* Get byte length from base64 string without decoding.
|
|
272
|
+
*/
|
|
273
|
+
function base64ByteLength(b64: string): number {
|
|
274
|
+
const padding = b64.endsWith("==") ? 2 : b64.endsWith("=") ? 1 : 0;
|
|
275
|
+
return (b64.length * 3) / 4 - padding;
|
|
276
|
+
}
|
package/tsconfig.json
ADDED