@kernl-sdk/openai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,481 @@
1
+ import type { Codec } from "@kernl-sdk/shared/lib";
2
+ import { randomID } from "@kernl-sdk/shared/lib";
3
+ import type {
4
+ RealtimeClientEvent,
5
+ RealtimeServerEvent,
6
+ RealtimeSessionConfig,
7
+ TurnDetectionConfig,
8
+ LanguageModelItem,
9
+ } from "@kernl-sdk/protocol";
10
+
11
+ import type {
12
+ OpenAIClientEvent,
13
+ OpenAIServerEvent,
14
+ OpenAISessionConfig,
15
+ OpenAITurnDetection,
16
+ OpenAIItem,
17
+ OpenAIContentPart,
18
+ } from "./types";
19
+
20
+ /**
21
+ * Codec for turn detection config.
22
+ */
23
+ export const TURN_DETECTION: Codec<TurnDetectionConfig, OpenAITurnDetection> = {
24
+ /**
25
+ * Convert kernl turn detection to OpenAI format.
26
+ */
27
+ encode(config) {
28
+ return {
29
+ type: config.mode === "manual" ? "none" : config.mode,
30
+ threshold: config.threshold,
31
+ silence_duration_ms: config.silenceDurationMs,
32
+ prefix_padding_ms: config.prefixPaddingMs,
33
+ create_response: config.createResponse,
34
+ interrupt_response: config.interruptResponse,
35
+ };
36
+ },
37
+
38
+ /**
39
+ * Convert OpenAI turn detection to kernl format.
40
+ */
41
+ decode(config) {
42
+ return {
43
+ mode: config.type === "none" ? "manual" : config.type,
44
+ threshold: config.threshold,
45
+ silenceDurationMs: config.silence_duration_ms,
46
+ prefixPaddingMs: config.prefix_padding_ms,
47
+ createResponse: config.create_response,
48
+ interruptResponse: config.interrupt_response,
49
+ };
50
+ },
51
+ };
52
+
53
+ /**
54
+ * Codec for session config.
55
+ */
56
+ export const SESSION_CONFIG: Codec<RealtimeSessionConfig, OpenAISessionConfig> =
57
+ {
58
+ /**
59
+ * Convert kernl session config to OpenAI format.
60
+ */
61
+ encode(config) {
62
+ return {
63
+ instructions: config.instructions,
64
+ modalities: config.modalities,
65
+ voice: config.voice?.voiceId,
66
+ input_audio_format: config.audio?.inputFormat?.mimeType,
67
+ output_audio_format: config.audio?.outputFormat?.mimeType,
68
+ turn_detection: config.turnDetection
69
+ ? TURN_DETECTION.encode(config.turnDetection)
70
+ : undefined,
71
+ tools: config.tools
72
+ ?.filter((t) => t.kind === "function")
73
+ .map((t) => ({
74
+ type: "function" as const,
75
+ name: t.name,
76
+ description: t.description,
77
+ parameters: t.parameters,
78
+ })),
79
+ };
80
+ },
81
+
82
+ /**
83
+ * Convert OpenAI session config to kernl format.
84
+ */
85
+ decode(config) {
86
+ return {
87
+ instructions: config.instructions,
88
+ modalities: config.modalities,
89
+ voice: config.voice ? { voiceId: config.voice } : undefined,
90
+ turnDetection: config.turn_detection
91
+ ? TURN_DETECTION.decode(config.turn_detection)
92
+ : undefined,
93
+ };
94
+ },
95
+ };
96
+
97
+ /**
98
+ * Codec for conversation items.
99
+ */
100
+ export const ITEM: Codec<LanguageModelItem, OpenAIItem> = {
101
+ /**
102
+ * Convert kernl item to OpenAI format.
103
+ */
104
+ encode(item) {
105
+ switch (item.kind) {
106
+ case "message": {
107
+ const content: OpenAIContentPart[] = item.content.map((c) => {
108
+ switch (c.kind) {
109
+ case "text":
110
+ return item.role === "assistant"
111
+ ? { type: "output_text", text: c.text }
112
+ : { type: "input_text", text: c.text };
113
+ case "file":
114
+ // audio files get sent as input_audio
115
+ if (
116
+ c.mimeType.startsWith("audio/") &&
117
+ "data" in c &&
118
+ typeof c.data === "string"
119
+ ) {
120
+ return { type: "input_audio", audio: c.data };
121
+ }
122
+ return { type: "input_text", text: "" };
123
+ default:
124
+ return { type: "input_text", text: "" };
125
+ }
126
+ });
127
+ return { type: "message", role: item.role, content };
128
+ }
129
+
130
+ case "tool-call":
131
+ return {
132
+ type: "function_call",
133
+ call_id: item.callId,
134
+ name: item.toolId,
135
+ arguments: item.arguments,
136
+ };
137
+
138
+ case "tool-result":
139
+ return {
140
+ type: "function_call_output",
141
+ call_id: item.callId,
142
+ output: item.error ?? JSON.stringify(item.result) ?? "",
143
+ };
144
+
145
+ default:
146
+ throw new Error(
147
+ `Unsupported item kind: ${(item as LanguageModelItem).kind}`,
148
+ );
149
+ }
150
+ },
151
+
152
+ /**
153
+ * Convert OpenAI item to kernl format.
154
+ */
155
+ decode(item) {
156
+ switch (item.type) {
157
+ case "message":
158
+ return {
159
+ kind: "message",
160
+ id: randomID(),
161
+ role: item.role,
162
+ content: item.content.map((c) => ({
163
+ kind: "text" as const,
164
+ text: "text" in c ? c.text : "",
165
+ })),
166
+ };
167
+
168
+ case "function_call":
169
+ return {
170
+ kind: "tool-call",
171
+ callId: item.call_id,
172
+ toolId: item.name,
173
+ state: "completed" as const,
174
+ arguments: item.arguments,
175
+ };
176
+
177
+ case "function_call_output":
178
+ return {
179
+ kind: "tool-result",
180
+ callId: item.call_id,
181
+ toolId: "",
182
+ state: "completed" as const,
183
+ result: item.output,
184
+ error: null,
185
+ };
186
+
187
+ default:
188
+ throw new Error(
189
+ `Unsupported OpenAI item type: ${(item as OpenAIItem).type}`,
190
+ );
191
+ }
192
+ },
193
+ };
194
+
195
+ /**
196
+ * Codec for client events (kernl → OpenAI).
197
+ */
198
+ export const CLIENT_EVENT: Codec<
199
+ RealtimeClientEvent,
200
+ OpenAIClientEvent | null
201
+ > = {
202
+ /**
203
+ * Convert kernl client event to OpenAI wire format.
204
+ */
205
+ encode(event) {
206
+ switch (event.kind) {
207
+ case "session.update":
208
+ return {
209
+ type: "session.update",
210
+ session: SESSION_CONFIG.encode(event.config),
211
+ };
212
+
213
+ case "audio.input.append":
214
+ return { type: "input_audio_buffer.append", audio: event.audio };
215
+
216
+ case "audio.input.commit":
217
+ return { type: "input_audio_buffer.commit" };
218
+
219
+ case "audio.input.clear":
220
+ return { type: "input_audio_buffer.clear" };
221
+
222
+ case "item.create":
223
+ return {
224
+ type: "conversation.item.create",
225
+ item: ITEM.encode(event.item),
226
+ previous_item_id: event.previousItemId,
227
+ };
228
+
229
+ case "item.delete":
230
+ return { type: "conversation.item.delete", item_id: event.itemId };
231
+
232
+ case "item.truncate":
233
+ return {
234
+ type: "conversation.item.truncate",
235
+ item_id: event.itemId,
236
+ content_index: 0,
237
+ audio_end_ms: event.audioEndMs,
238
+ };
239
+
240
+ case "response.create":
241
+ return {
242
+ type: "response.create",
243
+ response: event.config
244
+ ? {
245
+ instructions: event.config.instructions,
246
+ modalities: event.config.modalities,
247
+ }
248
+ : undefined,
249
+ };
250
+
251
+ case "response.cancel":
252
+ return { type: "response.cancel", response_id: event.responseId };
253
+
254
+ case "tool.result":
255
+ return {
256
+ type: "conversation.item.create",
257
+ item: {
258
+ type: "function_call_output",
259
+ call_id: event.callId,
260
+ output: event.error ?? event.result ?? "",
261
+ },
262
+ };
263
+
264
+ case "activity.start":
265
+ case "activity.end":
266
+ return null;
267
+
268
+ default:
269
+ return null;
270
+ }
271
+ },
272
+
273
+ /**
274
+ * Not implemented - use SERVER_EVENT.decode instead.
275
+ */
276
+ decode() {
277
+ throw new Error("CLIENT_EVENT.decode: use SERVER_EVENT instead");
278
+ },
279
+ };
280
+
281
+ /**
282
+ * Codec for server events (OpenAI → kernl).
283
+ */
284
+ export const SERVER_EVENT: Codec<
285
+ RealtimeServerEvent | null,
286
+ OpenAIServerEvent
287
+ > = {
288
+ /**
289
+ * Not implemented - use CLIENT_EVENT.encode instead.
290
+ */
291
+ encode() {
292
+ throw new Error("SERVER_EVENT.encode: use CLIENT_EVENT instead");
293
+ },
294
+
295
+ /**
296
+ * Convert OpenAI server event to kernl format.
297
+ */
298
+ decode(event) {
299
+ switch (event.type) {
300
+ case "session.created":
301
+ return {
302
+ kind: "session.created",
303
+ session: {
304
+ id: event.session.id,
305
+ config: SESSION_CONFIG.decode(event.session),
306
+ },
307
+ };
308
+
309
+ case "session.updated":
310
+ return {
311
+ kind: "session.updated",
312
+ session: {
313
+ id: event.session.id,
314
+ config: SESSION_CONFIG.decode(event.session),
315
+ },
316
+ };
317
+
318
+ case "error":
319
+ return {
320
+ kind: "session.error",
321
+ error: { code: event.error.code, message: event.error.message },
322
+ };
323
+
324
+ case "input_audio_buffer.committed":
325
+ return { kind: "audio.input.committed", itemId: event.item_id };
326
+
327
+ case "input_audio_buffer.cleared":
328
+ return { kind: "audio.input.cleared" };
329
+
330
+ case "input_audio_buffer.speech_started":
331
+ return {
332
+ kind: "speech.started",
333
+ audioStartMs: event.audio_start_ms,
334
+ itemId: event.item_id,
335
+ };
336
+
337
+ case "input_audio_buffer.speech_stopped":
338
+ return {
339
+ kind: "speech.stopped",
340
+ audioEndMs: event.audio_end_ms,
341
+ itemId: event.item_id,
342
+ };
343
+
344
+ case "conversation.item.created":
345
+ return {
346
+ kind: "item.created",
347
+ item: ITEM.decode(event.item),
348
+ previousItemId: event.previous_item_id,
349
+ };
350
+
351
+ case "conversation.item.done":
352
+ return null;
353
+
354
+ case "conversation.item.deleted":
355
+ return { kind: "item.deleted", itemId: event.item_id };
356
+
357
+ case "conversation.item.truncated":
358
+ return {
359
+ kind: "item.truncated",
360
+ itemId: event.item_id,
361
+ audioEndMs: event.audio_end_ms,
362
+ };
363
+
364
+ case "response.created":
365
+ return { kind: "response.created", responseId: event.response.id };
366
+
367
+ case "response.done": {
368
+ const status =
369
+ event.response.status === "incomplete" ||
370
+ event.response.status === "in_progress"
371
+ ? "failed"
372
+ : event.response.status;
373
+ return {
374
+ kind: "response.done",
375
+ responseId: event.response.id,
376
+ status,
377
+ usage: event.response.usage
378
+ ? {
379
+ inputTokens: event.response.usage.input_tokens,
380
+ outputTokens: event.response.usage.output_tokens,
381
+ totalTokens: event.response.usage.total_tokens,
382
+ }
383
+ : undefined,
384
+ };
385
+ }
386
+
387
+ case "response.output_audio.delta":
388
+ return {
389
+ kind: "audio.output.delta",
390
+ responseId: event.response_id,
391
+ itemId: event.item_id,
392
+ audio: event.delta,
393
+ };
394
+
395
+ case "response.output_audio.done":
396
+ return {
397
+ kind: "audio.output.done",
398
+ responseId: event.response_id,
399
+ itemId: event.item_id,
400
+ };
401
+
402
+ case "response.text.delta":
403
+ return {
404
+ kind: "text.output.delta",
405
+ responseId: event.response_id,
406
+ itemId: event.item_id,
407
+ delta: event.delta,
408
+ };
409
+
410
+ case "response.text.done":
411
+ return {
412
+ kind: "text.output",
413
+ responseId: event.response_id,
414
+ itemId: event.item_id,
415
+ text: event.text,
416
+ };
417
+
418
+ case "conversation.item.input_audio_transcription.delta":
419
+ return {
420
+ kind: "transcript.input.delta",
421
+ itemId: event.item_id,
422
+ delta: event.delta,
423
+ };
424
+
425
+ case "conversation.item.input_audio_transcription.completed":
426
+ return {
427
+ kind: "transcript.input",
428
+ itemId: event.item_id,
429
+ text: event.transcript,
430
+ };
431
+
432
+ case "response.output_audio_transcript.delta":
433
+ return {
434
+ kind: "transcript.output.delta",
435
+ responseId: event.response_id,
436
+ itemId: event.item_id,
437
+ delta: event.delta,
438
+ };
439
+
440
+ case "response.output_audio_transcript.done":
441
+ return {
442
+ kind: "transcript.output",
443
+ responseId: event.response_id,
444
+ itemId: event.item_id,
445
+ text: event.transcript,
446
+ };
447
+
448
+ case "response.output_item.added":
449
+ if (event.item.type === "function_call") {
450
+ return {
451
+ kind: "tool.start",
452
+ responseId: event.response_id,
453
+ callId: event.item.call_id,
454
+ toolId: event.item.name,
455
+ };
456
+ }
457
+ return null;
458
+
459
+ case "response.output_item.done":
460
+ return null;
461
+
462
+ case "response.function_call_arguments.delta":
463
+ return {
464
+ kind: "tool.delta",
465
+ callId: event.call_id,
466
+ delta: event.delta,
467
+ };
468
+
469
+ case "response.function_call_arguments.done":
470
+ return {
471
+ kind: "tool.call",
472
+ callId: event.call_id,
473
+ toolId: event.name,
474
+ arguments: event.arguments,
475
+ };
476
+
477
+ default:
478
+ return null;
479
+ }
480
+ },
481
+ };