@kernl-sdk/xai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/.turbo/turbo-build.log +5 -0
  2. package/dist/__tests__/realtime.integration.test.d.ts +2 -0
  3. package/dist/__tests__/realtime.integration.test.d.ts.map +1 -0
  4. package/dist/__tests__/realtime.integration.test.js +157 -0
  5. package/dist/__tests__/realtime.test.d.ts +2 -0
  6. package/dist/__tests__/realtime.test.d.ts.map +1 -0
  7. package/dist/__tests__/realtime.test.js +263 -0
  8. package/dist/connection.d.ts +47 -0
  9. package/dist/connection.d.ts.map +1 -0
  10. package/dist/connection.js +138 -0
  11. package/dist/convert/event.d.ts +28 -0
  12. package/dist/convert/event.d.ts.map +1 -0
  13. package/dist/convert/event.js +314 -0
  14. package/dist/convert/types.d.ts +212 -0
  15. package/dist/convert/types.d.ts.map +1 -0
  16. package/dist/convert/types.js +1 -0
  17. package/dist/index.d.ts +36 -0
  18. package/dist/index.d.ts.map +1 -0
  19. package/dist/index.js +16 -0
  20. package/dist/model.d.ts +36 -0
  21. package/dist/model.d.ts.map +1 -0
  22. package/dist/model.js +112 -0
  23. package/dist/protocol.d.ts +212 -0
  24. package/dist/protocol.d.ts.map +1 -0
  25. package/dist/protocol.js +1 -0
  26. package/dist/realtime/connection.d.ts +47 -0
  27. package/dist/realtime/connection.d.ts.map +1 -0
  28. package/dist/realtime/connection.js +138 -0
  29. package/dist/realtime/convert/event.d.ts +28 -0
  30. package/dist/realtime/convert/event.d.ts.map +1 -0
  31. package/dist/realtime/convert/event.js +314 -0
  32. package/dist/realtime/model.d.ts +36 -0
  33. package/dist/realtime/model.d.ts.map +1 -0
  34. package/dist/realtime/model.js +111 -0
  35. package/dist/realtime/protocol.d.ts +212 -0
  36. package/dist/realtime/protocol.d.ts.map +1 -0
  37. package/dist/realtime/protocol.js +1 -0
  38. package/dist/realtime.d.ts +36 -0
  39. package/dist/realtime.d.ts.map +1 -0
  40. package/dist/realtime.js +250 -0
  41. package/package.json +55 -0
  42. package/src/__tests__/realtime.integration.test.ts +203 -0
  43. package/src/__tests__/realtime.test.ts +350 -0
  44. package/src/index.ts +41 -0
  45. package/src/realtime/connection.ts +167 -0
  46. package/src/realtime/convert/event.ts +388 -0
  47. package/src/realtime/model.ts +162 -0
  48. package/src/realtime/protocol.ts +286 -0
  49. package/tsconfig.json +13 -0
@@ -0,0 +1,388 @@
1
+ import type { Codec } from "@kernl-sdk/shared/lib";
2
+ import { randomID } from "@kernl-sdk/shared/lib";
3
+ import type {
4
+ RealtimeClientEvent,
5
+ RealtimeServerEvent,
6
+ RealtimeSessionConfig,
7
+ TurnDetectionConfig,
8
+ LanguageModelItem,
9
+ AudioConfig,
10
+ } from "@kernl-sdk/protocol";
11
+
12
+ import type {
13
+ GrokClientEvent,
14
+ GrokServerEvent,
15
+ GrokSessionConfig,
16
+ GrokTurnDetection,
17
+ GrokItem,
18
+ GrokContentPart,
19
+ GrokAudioConfig,
20
+ GrokTool,
21
+ GrokVoice,
22
+ } from "../protocol";
23
+
24
+ /**
25
+ * Map kernl voice ID to Grok voice name.
26
+ * Falls back to "Ara" (default) if not a valid Grok voice.
27
+ */
28
+ function toGrokVoice(voiceId: string | undefined): GrokVoice | undefined {
29
+ if (!voiceId) return undefined;
30
+ const validVoices: GrokVoice[] = ["Ara", "Rex", "Sal", "Eve", "Leo"];
31
+ return validVoices.includes(voiceId as GrokVoice)
32
+ ? (voiceId as GrokVoice)
33
+ : "Ara";
34
+ }
35
+
36
+ /**
37
+ * Codec for turn detection config.
38
+ */
39
+ export const TURN_DETECTION: Codec<
40
+ TurnDetectionConfig,
41
+ GrokTurnDetection | null
42
+ > = {
43
+ encode(config) {
44
+ return {
45
+ type: config.mode === "manual" ? null : "server_vad",
46
+ };
47
+ },
48
+
49
+ decode(config) {
50
+ return {
51
+ mode: config?.type === "server_vad" ? "server_vad" : "manual",
52
+ };
53
+ },
54
+ };
55
+
56
+ /**
57
+ * Codec for audio config.
58
+ */
59
+ export const AUDIO_CONFIG: Codec<AudioConfig, GrokAudioConfig> = {
60
+ encode(config) {
61
+ const result: GrokAudioConfig = {};
62
+
63
+ if (config.inputFormat) {
64
+ result.input = {
65
+ format: {
66
+ type: (config.inputFormat.mimeType as "audio/pcm") || "audio/pcm",
67
+ rate: config.inputFormat.sampleRate,
68
+ },
69
+ };
70
+ }
71
+
72
+ if (config.outputFormat) {
73
+ result.output = {
74
+ format: {
75
+ type: (config.outputFormat.mimeType as "audio/pcm") || "audio/pcm",
76
+ rate: config.outputFormat.sampleRate,
77
+ },
78
+ };
79
+ }
80
+
81
+ return result;
82
+ },
83
+
84
+ decode(config) {
85
+ return {
86
+ inputFormat: config.input?.format
87
+ ? {
88
+ mimeType: config.input.format.type,
89
+ sampleRate: config.input.format.rate,
90
+ }
91
+ : undefined,
92
+ outputFormat: config.output?.format
93
+ ? {
94
+ mimeType: config.output.format.type,
95
+ sampleRate: config.output.format.rate,
96
+ }
97
+ : undefined,
98
+ };
99
+ },
100
+ };
101
+
102
+ /**
103
+ * Codec for session config.
104
+ */
105
+ export const SESSION_CONFIG: Codec<RealtimeSessionConfig, GrokSessionConfig> = {
106
+ encode(config) {
107
+ const tools: GrokTool[] | undefined = config.tools
108
+ ?.filter((t) => t.kind === "function")
109
+ .map((t) => ({
110
+ type: "function" as const,
111
+ name: t.name,
112
+ description: t.description,
113
+ parameters: t.parameters,
114
+ }));
115
+
116
+ return {
117
+ instructions: config.instructions,
118
+ voice: toGrokVoice(config.voice?.voiceId),
119
+ turn_detection: config.turnDetection
120
+ ? TURN_DETECTION.encode(config.turnDetection)
121
+ : undefined,
122
+ audio: config.audio ? AUDIO_CONFIG.encode(config.audio) : undefined,
123
+ tools: tools?.length ? tools : undefined,
124
+ };
125
+ },
126
+
127
+ decode(config) {
128
+ return {
129
+ instructions: config.instructions,
130
+ voice: config.voice ? { voiceId: config.voice } : undefined,
131
+ turnDetection: config.turn_detection
132
+ ? TURN_DETECTION.decode(config.turn_detection)
133
+ : undefined,
134
+ audio: config.audio ? AUDIO_CONFIG.decode(config.audio) : undefined,
135
+ };
136
+ },
137
+ };
138
+
139
+ /**
140
+ * Codec for conversation items.
141
+ */
142
+ export const ITEM: Codec<LanguageModelItem, GrokItem> = {
143
+ encode(item) {
144
+ switch (item.kind) {
145
+ case "message": {
146
+ const content: GrokContentPart[] = item.content.map((c) => {
147
+ switch (c.kind) {
148
+ case "text":
149
+ return { type: "input_text", text: c.text };
150
+ default:
151
+ return { type: "input_text", text: "" };
152
+ }
153
+ });
154
+ return {
155
+ type: "message",
156
+ role: item.role as "user" | "assistant",
157
+ content,
158
+ };
159
+ }
160
+
161
+ case "tool-result":
162
+ return {
163
+ type: "function_call_output",
164
+ call_id: item.callId,
165
+ output: item.error ?? JSON.stringify(item.result) ?? "",
166
+ };
167
+
168
+ default:
169
+ throw new Error(
170
+ `Unsupported item kind: ${(item as LanguageModelItem).kind}`,
171
+ );
172
+ }
173
+ },
174
+
175
+ decode(item) {
176
+ switch (item.type) {
177
+ case "message":
178
+ return {
179
+ kind: "message",
180
+ id: randomID(),
181
+ role: item.role,
182
+ content: item.content.map((c) => ({
183
+ kind: "text" as const,
184
+ text: "text" in c ? c.text : "",
185
+ })),
186
+ };
187
+
188
+ case "function_call_output":
189
+ return {
190
+ kind: "tool-result",
191
+ callId: item.call_id,
192
+ toolId: "",
193
+ state: "completed" as const,
194
+ result: item.output,
195
+ error: null,
196
+ };
197
+
198
+ default:
199
+ throw new Error(
200
+ `Unsupported Grok item type: ${(item as GrokItem).type}`,
201
+ );
202
+ }
203
+ },
204
+ };
205
+
206
+ /**
207
+ * Codec for client events (kernl → Grok).
208
+ */
209
+ export const CLIENT_EVENT: Codec<RealtimeClientEvent, GrokClientEvent | null> =
210
+ {
211
+ encode(event) {
212
+ switch (event.kind) {
213
+ case "session.update":
214
+ return {
215
+ type: "session.update",
216
+ session: SESSION_CONFIG.encode(event.config),
217
+ };
218
+
219
+ case "audio.input.append":
220
+ return { type: "input_audio_buffer.append", audio: event.audio };
221
+
222
+ case "audio.input.commit":
223
+ return { type: "input_audio_buffer.commit" };
224
+
225
+ case "audio.input.clear":
226
+ return { type: "input_audio_buffer.clear" };
227
+
228
+ case "item.create":
229
+ return {
230
+ type: "conversation.item.create",
231
+ item: ITEM.encode(event.item),
232
+ previous_item_id: event.previousItemId,
233
+ };
234
+
235
+ case "response.create":
236
+ return { type: "response.create" };
237
+
238
+ case "tool.result":
239
+ return {
240
+ type: "conversation.item.create",
241
+ item: {
242
+ type: "function_call_output",
243
+ call_id: event.callId,
244
+ output: event.error ?? event.result ?? "",
245
+ },
246
+ };
247
+
248
+ // Unsupported by Grok
249
+ case "item.delete":
250
+ case "item.truncate":
251
+ case "response.cancel":
252
+ case "activity.start":
253
+ case "activity.end":
254
+ return null;
255
+
256
+ default:
257
+ return null;
258
+ }
259
+ },
260
+
261
+ decode() {
262
+ throw new Error("CLIENT_EVENT.decode: use SERVER_EVENT instead");
263
+ },
264
+ };
265
+
266
+ /**
267
+ * Codec for server events (Grok → kernl).
268
+ */
269
+ export const SERVER_EVENT: Codec<RealtimeServerEvent | null, GrokServerEvent> =
270
+ {
271
+ encode() {
272
+ throw new Error("SERVER_EVENT.encode: use CLIENT_EVENT instead");
273
+ },
274
+
275
+ decode(event) {
276
+ switch (event.type) {
277
+ case "conversation.created":
278
+ // Grok sends conversation.created instead of session.created
279
+ return {
280
+ kind: "session.created",
281
+ session: {
282
+ id: event.conversation.id,
283
+ config: {},
284
+ },
285
+ };
286
+
287
+ case "session.updated":
288
+ return {
289
+ kind: "session.updated",
290
+ session: {
291
+ id: event.event_id,
292
+ config: SESSION_CONFIG.decode(event.session),
293
+ },
294
+ };
295
+
296
+ case "input_audio_buffer.committed":
297
+ return { kind: "audio.input.committed", itemId: event.item_id };
298
+
299
+ case "input_audio_buffer.cleared":
300
+ return { kind: "audio.input.cleared" };
301
+
302
+ case "input_audio_buffer.speech_started":
303
+ return {
304
+ kind: "speech.started",
305
+ audioStartMs: 0, // Grok doesn't provide this
306
+ itemId: event.item_id,
307
+ };
308
+
309
+ case "input_audio_buffer.speech_stopped":
310
+ return {
311
+ kind: "speech.stopped",
312
+ audioEndMs: 0, // Grok doesn't provide this
313
+ itemId: event.item_id,
314
+ };
315
+
316
+ case "conversation.item.added":
317
+ return {
318
+ kind: "item.created",
319
+ item: ITEM.decode(event.item),
320
+ previousItemId: event.previous_item_id,
321
+ };
322
+
323
+ case "conversation.item.input_audio_transcription.completed":
324
+ return {
325
+ kind: "transcript.input",
326
+ itemId: event.item_id,
327
+ text: event.transcript,
328
+ };
329
+
330
+ case "response.created":
331
+ return { kind: "response.created", responseId: event.response.id };
332
+
333
+ case "response.output_item.added":
334
+ // Could emit tool.start here for function calls
335
+ return null;
336
+
337
+ case "response.done":
338
+ return {
339
+ kind: "response.done",
340
+ responseId: event.response.id,
341
+ status: event.response.status === "completed" ? "completed" : "failed",
342
+ usage: undefined,
343
+ };
344
+
345
+ case "response.output_audio.delta":
346
+ return {
347
+ kind: "audio.output.delta",
348
+ responseId: event.response_id,
349
+ itemId: event.item_id,
350
+ audio: event.delta,
351
+ };
352
+
353
+ case "response.output_audio.done":
354
+ return {
355
+ kind: "audio.output.done",
356
+ responseId: event.response_id,
357
+ itemId: event.item_id,
358
+ };
359
+
360
+ case "response.output_audio_transcript.delta":
361
+ return {
362
+ kind: "transcript.output.delta",
363
+ responseId: event.response_id,
364
+ itemId: event.item_id,
365
+ delta: event.delta,
366
+ };
367
+
368
+ case "response.output_audio_transcript.done":
369
+ return {
370
+ kind: "transcript.output",
371
+ responseId: event.response_id,
372
+ itemId: event.item_id,
373
+ text: "", // Grok doesn't include final text in done event
374
+ };
375
+
376
+ case "response.function_call_arguments.done":
377
+ return {
378
+ kind: "tool.call",
379
+ callId: event.call_id,
380
+ toolId: event.name,
381
+ arguments: event.arguments,
382
+ };
383
+
384
+ default:
385
+ return null;
386
+ }
387
+ },
388
+ };
@@ -0,0 +1,162 @@
1
+ import type {
2
+ RealtimeModel,
3
+ RealtimeConnection,
4
+ RealtimeConnectOptions,
5
+ ClientCredential,
6
+ } from "@kernl-sdk/protocol";
7
+
8
+ import { GrokRealtimeConnection } from "./connection";
9
+
10
+ const XAI_REALTIME_URL = "wss://api.x.ai/v1/realtime";
11
+ const XAI_CLIENT_SECRETS_URL = "https://api.x.ai/v1/realtime/client_secrets";
12
+
13
+ /**
14
+ * Options for creating a Grok realtime model.
15
+ */
16
+ export interface GrokRealtimeOptions {
17
+ /**
18
+ * xAI API key. Defaults to XAI_API_KEY env var.
19
+ */
20
+ apiKey?: string;
21
+
22
+ /**
23
+ * Base URL for the realtime API.
24
+ */
25
+ baseUrl?: string;
26
+ }
27
+
28
+ /**
29
+ * Grok (xAI) realtime model implementation.
30
+ */
31
+ export class GrokRealtimeModel implements RealtimeModel {
32
+ readonly spec = "1.0" as const;
33
+ readonly provider = "xai";
34
+ readonly modelId = "grok-realtime";
35
+
36
+ private apiKey: string | null;
37
+ private baseUrl: string;
38
+
39
+ constructor(options?: GrokRealtimeOptions) {
40
+ this.apiKey =
41
+ options?.apiKey ??
42
+ (typeof process !== "undefined" ? process.env?.XAI_API_KEY : null) ??
43
+ null;
44
+ this.baseUrl = options?.baseUrl ?? XAI_REALTIME_URL;
45
+ }
46
+
47
+ /**
48
+ * Create ephemeral credential for client-side connections.
49
+ *
50
+ * Must be called server-side where API key is available.
51
+ */
52
+ async authenticate(): Promise<ClientCredential> {
53
+ if (!this.apiKey) {
54
+ throw new Error(
55
+ "API key required for authenticate(). " +
56
+ "Call this server-side where XAI_API_KEY is available.",
57
+ );
58
+ }
59
+
60
+ const res = await fetch(XAI_CLIENT_SECRETS_URL, {
61
+ method: "POST",
62
+ headers: {
63
+ Authorization: `Bearer ${this.apiKey}`,
64
+ "Content-Type": "application/json",
65
+ },
66
+ body: JSON.stringify({
67
+ expires_after: { seconds: 300 },
68
+ }),
69
+ });
70
+
71
+ if (!res.ok) {
72
+ const text = await res.text();
73
+ throw new Error(`Failed to create credential: ${res.status} ${text}`);
74
+ }
75
+
76
+ const data = (await res.json()) as { value: string };
77
+ return {
78
+ kind: "token",
79
+ token: data.value,
80
+ expiresAt: new Date(Date.now() + 300_000), // 5 min TTL
81
+ };
82
+ }
83
+
84
+ /**
85
+ * Establish a WebSocket connection to the Grok realtime API.
86
+ */
87
+ async connect(options?: RealtimeConnectOptions): Promise<RealtimeConnection> {
88
+ const credential = options?.credential;
89
+
90
+ if (credential && credential.kind !== "token") {
91
+ throw new Error(
92
+ `Grok requires token credentials, got "${credential.kind}".`,
93
+ );
94
+ }
95
+
96
+ const authToken = credential?.token ?? this.apiKey;
97
+
98
+ if (!authToken) {
99
+ throw new Error(
100
+ "No API key or credential provided. " +
101
+ "Either set XAI_API_KEY or pass a credential from authenticate().",
102
+ );
103
+ }
104
+
105
+ // Use injectable WebSocket or globalThis.WebSocket
106
+ const WS = options?.websocket ?? globalThis.WebSocket;
107
+ if (!WS) {
108
+ throw new Error(
109
+ "No WebSocket available. In Node.js <22, use WebSocketTransport with the 'ws' package:\n" +
110
+ " import WebSocket from 'ws';\n" +
111
+ " import { WebSocketTransport } from 'kernl';\n" +
112
+ " new RealtimeSession(agent, { transport: new WebSocketTransport({ websocket: WebSocket }), ... })",
113
+ );
114
+ }
115
+
116
+ // xAI uses OpenAI-compatible subprotocols for browser WebSocket auth
117
+ const protocols = [
118
+ "realtime",
119
+ `openai-insecure-api-key.${authToken}`,
120
+ "openai-beta.realtime-v1",
121
+ ];
122
+ const ws = new WS(this.baseUrl, protocols);
123
+
124
+ const connection = new GrokRealtimeConnection(ws);
125
+
126
+ await new Promise<void>((resolve, reject) => {
127
+ if (options?.abort?.aborted) {
128
+ return reject(new Error("Connection aborted"));
129
+ }
130
+
131
+ const onOpen = () => {
132
+ cleanup();
133
+ resolve();
134
+ };
135
+ const onError = (event: unknown) => {
136
+ cleanup();
137
+ const err =
138
+ event instanceof Error
139
+ ? event
140
+ : new Error("WebSocket connection failed");
141
+ reject(err);
142
+ };
143
+ const onAbort = () => {
144
+ cleanup();
145
+ ws.close();
146
+ reject(new Error("Connection aborted"));
147
+ };
148
+
149
+ const cleanup = () => {
150
+ ws.removeEventListener("open", onOpen);
151
+ ws.removeEventListener("error", onError);
152
+ options?.abort?.removeEventListener("abort", onAbort);
153
+ };
154
+
155
+ ws.addEventListener("open", onOpen);
156
+ ws.addEventListener("error", onError);
157
+ options?.abort?.addEventListener("abort", onAbort);
158
+ });
159
+
160
+ return connection;
161
+ }
162
+ }