@kernl-sdk/xai 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +5 -0
- package/dist/__tests__/realtime.integration.test.d.ts +2 -0
- package/dist/__tests__/realtime.integration.test.d.ts.map +1 -0
- package/dist/__tests__/realtime.integration.test.js +157 -0
- package/dist/__tests__/realtime.test.d.ts +2 -0
- package/dist/__tests__/realtime.test.d.ts.map +1 -0
- package/dist/__tests__/realtime.test.js +263 -0
- package/dist/connection.d.ts +47 -0
- package/dist/connection.d.ts.map +1 -0
- package/dist/connection.js +138 -0
- package/dist/convert/event.d.ts +28 -0
- package/dist/convert/event.d.ts.map +1 -0
- package/dist/convert/event.js +314 -0
- package/dist/convert/types.d.ts +212 -0
- package/dist/convert/types.d.ts.map +1 -0
- package/dist/convert/types.js +1 -0
- package/dist/index.d.ts +36 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +16 -0
- package/dist/model.d.ts +36 -0
- package/dist/model.d.ts.map +1 -0
- package/dist/model.js +112 -0
- package/dist/protocol.d.ts +212 -0
- package/dist/protocol.d.ts.map +1 -0
- package/dist/protocol.js +1 -0
- package/dist/realtime/connection.d.ts +47 -0
- package/dist/realtime/connection.d.ts.map +1 -0
- package/dist/realtime/connection.js +138 -0
- package/dist/realtime/convert/event.d.ts +28 -0
- package/dist/realtime/convert/event.d.ts.map +1 -0
- package/dist/realtime/convert/event.js +314 -0
- package/dist/realtime/model.d.ts +36 -0
- package/dist/realtime/model.d.ts.map +1 -0
- package/dist/realtime/model.js +111 -0
- package/dist/realtime/protocol.d.ts +212 -0
- package/dist/realtime/protocol.d.ts.map +1 -0
- package/dist/realtime/protocol.js +1 -0
- package/dist/realtime.d.ts +36 -0
- package/dist/realtime.d.ts.map +1 -0
- package/dist/realtime.js +250 -0
- package/package.json +55 -0
- package/src/__tests__/realtime.integration.test.ts +203 -0
- package/src/__tests__/realtime.test.ts +350 -0
- package/src/index.ts +41 -0
- package/src/realtime/connection.ts +167 -0
- package/src/realtime/convert/event.ts +388 -0
- package/src/realtime/model.ts +162 -0
- package/src/realtime/protocol.ts +286 -0
- package/tsconfig.json +13 -0
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
2
|
+
import { EventEmitter } from "node:events";
|
|
3
|
+
|
|
4
|
+
import type {
|
|
5
|
+
RealtimeServerEvent,
|
|
6
|
+
TransportStatus,
|
|
7
|
+
WebSocketConstructor,
|
|
8
|
+
} from "@kernl-sdk/protocol";
|
|
9
|
+
import type { GrokServerEvent } from "../realtime/protocol";
|
|
10
|
+
import { GrokRealtimeModel } from "../realtime/model";
|
|
11
|
+
|
|
12
|
+
// Track mock WebSocket instances
|
|
13
|
+
const wsInstances: TestWebSocket[] = [];
|
|
14
|
+
|
|
15
|
+
interface TestWebSocket extends EventEmitter {
|
|
16
|
+
send: (data: string | ArrayBuffer) => void;
|
|
17
|
+
close: () => void;
|
|
18
|
+
readyState: number;
|
|
19
|
+
OPEN: number;
|
|
20
|
+
addEventListener: (type: string, listener: (event: unknown) => void) => void;
|
|
21
|
+
removeEventListener: (type: string, listener: (event: unknown) => void) => void;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function createMockWebSocket(): TestWebSocket {
|
|
25
|
+
const emitter = new EventEmitter() as TestWebSocket;
|
|
26
|
+
emitter.send = vi.fn();
|
|
27
|
+
emitter.close = vi.fn();
|
|
28
|
+
emitter.readyState = 1; // OPEN
|
|
29
|
+
emitter.OPEN = 1;
|
|
30
|
+
emitter.addEventListener = emitter.on.bind(emitter) as TestWebSocket["addEventListener"];
|
|
31
|
+
emitter.removeEventListener = emitter.off.bind(emitter) as TestWebSocket["removeEventListener"];
|
|
32
|
+
return emitter;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// Mock WebSocket constructor that tracks instances
|
|
36
|
+
const MockWebSocket = function () {
|
|
37
|
+
const instance = createMockWebSocket();
|
|
38
|
+
wsInstances.push(instance);
|
|
39
|
+
return instance;
|
|
40
|
+
} as unknown as WebSocketConstructor;
|
|
41
|
+
|
|
42
|
+
describe("GrokRealtimeModel", () => {
|
|
43
|
+
beforeEach(() => {
|
|
44
|
+
vi.clearAllMocks();
|
|
45
|
+
wsInstances.length = 0;
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it("should require API key", async () => {
|
|
49
|
+
const originalEnv = process.env.XAI_API_KEY;
|
|
50
|
+
delete process.env.XAI_API_KEY;
|
|
51
|
+
|
|
52
|
+
const model = new GrokRealtimeModel();
|
|
53
|
+
|
|
54
|
+
// Connect should fail without API key
|
|
55
|
+
await expect(model.connect({ websocket: MockWebSocket })).rejects.toThrow(
|
|
56
|
+
"No API key or credential provided",
|
|
57
|
+
);
|
|
58
|
+
|
|
59
|
+
process.env.XAI_API_KEY = originalEnv;
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it("should accept API key via options", () => {
|
|
63
|
+
const model = new GrokRealtimeModel({
|
|
64
|
+
apiKey: "test-key",
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
expect(model.modelId).toBe("grok-realtime");
|
|
68
|
+
expect(model.provider).toBe("xai");
|
|
69
|
+
expect(model.spec).toBe("1.0");
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it("should use XAI_API_KEY env var", () => {
|
|
73
|
+
const originalEnv = process.env.XAI_API_KEY;
|
|
74
|
+
process.env.XAI_API_KEY = "env-key";
|
|
75
|
+
|
|
76
|
+
const model = new GrokRealtimeModel();
|
|
77
|
+
expect(model.modelId).toBe("grok-realtime");
|
|
78
|
+
|
|
79
|
+
process.env.XAI_API_KEY = originalEnv;
|
|
80
|
+
});
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
describe("base64ByteLength", () => {
|
|
84
|
+
// Test the helper function indirectly through the module
|
|
85
|
+
// The actual function is not exported, but we can verify the audio length calculation works
|
|
86
|
+
|
|
87
|
+
it("should calculate correct byte length for base64 without padding", () => {
|
|
88
|
+
// "AAAA" = 3 bytes (no padding needed for 3 bytes)
|
|
89
|
+
const b64NoPadding = "AAAA";
|
|
90
|
+
const expectedBytes = 3;
|
|
91
|
+
const padding = 0;
|
|
92
|
+
const calculated = (b64NoPadding.length * 3) / 4 - padding;
|
|
93
|
+
expect(calculated).toBe(expectedBytes);
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
it("should calculate correct byte length for base64 with single padding", () => {
|
|
97
|
+
// "AAA=" represents 2 bytes
|
|
98
|
+
const b64SinglePad = "AAA=";
|
|
99
|
+
const expectedBytes = 2;
|
|
100
|
+
const padding = 1;
|
|
101
|
+
const calculated = (b64SinglePad.length * 3) / 4 - padding;
|
|
102
|
+
expect(calculated).toBe(expectedBytes);
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
it("should calculate correct byte length for base64 with double padding", () => {
|
|
106
|
+
// "AA==" represents 1 byte
|
|
107
|
+
const b64DoublePad = "AA==";
|
|
108
|
+
const expectedBytes = 1;
|
|
109
|
+
const padding = 2;
|
|
110
|
+
const calculated = (b64DoublePad.length * 3) / 4 - padding;
|
|
111
|
+
expect(calculated).toBe(expectedBytes);
|
|
112
|
+
});
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
describe("audio length calculation", () => {
|
|
116
|
+
it("should calculate correct duration for 24kHz PCM16", () => {
|
|
117
|
+
// 24kHz PCM16 = 24000 samples/sec, 2 bytes/sample = 48000 bytes/sec
|
|
118
|
+
// 48000 bytes = 1000ms
|
|
119
|
+
// 4800 bytes = 100ms
|
|
120
|
+
const bytes = 4800;
|
|
121
|
+
const expectedMs = (bytes / 2 / 24000) * 1000;
|
|
122
|
+
expect(expectedMs).toBe(100);
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
it("should accumulate audio length from multiple chunks", () => {
|
|
126
|
+
// Simulate multiple audio chunks
|
|
127
|
+
const chunk1Bytes = 2400; // 50ms
|
|
128
|
+
const chunk2Bytes = 2400; // 50ms
|
|
129
|
+
const chunk3Bytes = 2400; // 50ms
|
|
130
|
+
|
|
131
|
+
let totalMs = 0;
|
|
132
|
+
totalMs += (chunk1Bytes / 2 / 24000) * 1000;
|
|
133
|
+
totalMs += (chunk2Bytes / 2 / 24000) * 1000;
|
|
134
|
+
totalMs += (chunk3Bytes / 2 / 24000) * 1000;
|
|
135
|
+
|
|
136
|
+
expect(totalMs).toBe(150);
|
|
137
|
+
});
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
describe("GrokRealtimeConnection (mocked WebSocket)", () => {
|
|
141
|
+
const apiKey = "test-key";
|
|
142
|
+
|
|
143
|
+
beforeEach(() => {
|
|
144
|
+
vi.clearAllMocks();
|
|
145
|
+
wsInstances.length = 0;
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
const getLastSocket = (): TestWebSocket => {
|
|
149
|
+
if (wsInstances.length === 0) {
|
|
150
|
+
throw new Error("No WebSocket instances were created");
|
|
151
|
+
}
|
|
152
|
+
return wsInstances[wsInstances.length - 1];
|
|
153
|
+
};
|
|
154
|
+
|
|
155
|
+
const emitServerEvent = (
|
|
156
|
+
socket: TestWebSocket,
|
|
157
|
+
event: GrokServerEvent,
|
|
158
|
+
): void => {
|
|
159
|
+
const payload = JSON.stringify(event);
|
|
160
|
+
socket.emit("message", { data: payload });
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
const createConnectedRealtime = async () => {
|
|
164
|
+
const model = new GrokRealtimeModel({ apiKey });
|
|
165
|
+
const connectPromise = model.connect({ websocket: MockWebSocket });
|
|
166
|
+
|
|
167
|
+
const socket = getLastSocket();
|
|
168
|
+
// Simulate successful WebSocket open.
|
|
169
|
+
socket.emit("open");
|
|
170
|
+
|
|
171
|
+
const connection = await connectPromise;
|
|
172
|
+
|
|
173
|
+
return { connection, socket };
|
|
174
|
+
};
|
|
175
|
+
|
|
176
|
+
it("should process a basic conversation flow and emit events", async () => {
|
|
177
|
+
const { connection, socket } = await createConnectedRealtime();
|
|
178
|
+
|
|
179
|
+
const statusEvents: TransportStatus[] = [];
|
|
180
|
+
const realtimeEvents: RealtimeServerEvent[] = [];
|
|
181
|
+
|
|
182
|
+
(connection as unknown as EventEmitter).on(
|
|
183
|
+
"status",
|
|
184
|
+
(status: TransportStatus) => {
|
|
185
|
+
statusEvents.push(status);
|
|
186
|
+
},
|
|
187
|
+
);
|
|
188
|
+
|
|
189
|
+
(connection as unknown as EventEmitter).on(
|
|
190
|
+
"event",
|
|
191
|
+
(event: RealtimeServerEvent) => {
|
|
192
|
+
realtimeEvents.push(event);
|
|
193
|
+
},
|
|
194
|
+
);
|
|
195
|
+
|
|
196
|
+
// Verify initial status after open.
|
|
197
|
+
expect(
|
|
198
|
+
(connection as unknown as { status: TransportStatus }).status,
|
|
199
|
+
).toBe("connected");
|
|
200
|
+
|
|
201
|
+
// conversation.created (Grok's equivalent of session.created)
|
|
202
|
+
emitServerEvent(socket, {
|
|
203
|
+
type: "conversation.created",
|
|
204
|
+
event_id: "evt-1",
|
|
205
|
+
conversation: { id: "conv-1", object: "realtime.conversation" },
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
// response.created
|
|
209
|
+
emitServerEvent(socket, {
|
|
210
|
+
type: "response.created",
|
|
211
|
+
event_id: "evt-2",
|
|
212
|
+
response: { id: "resp-1", object: "realtime.response", status: "in_progress", output: [] },
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
// small audio delta then done
|
|
216
|
+
emitServerEvent(socket, {
|
|
217
|
+
type: "response.output_audio.delta",
|
|
218
|
+
event_id: "evt-3",
|
|
219
|
+
response_id: "resp-1",
|
|
220
|
+
item_id: "item-1",
|
|
221
|
+
output_index: 0,
|
|
222
|
+
content_index: 0,
|
|
223
|
+
delta: "AAAA",
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
emitServerEvent(socket, {
|
|
227
|
+
type: "response.output_audio.done",
|
|
228
|
+
event_id: "evt-4",
|
|
229
|
+
response_id: "resp-1",
|
|
230
|
+
item_id: "item-1",
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
// transcript delta then done
|
|
234
|
+
emitServerEvent(socket, {
|
|
235
|
+
type: "response.output_audio_transcript.delta",
|
|
236
|
+
event_id: "evt-5",
|
|
237
|
+
response_id: "resp-1",
|
|
238
|
+
item_id: "item-1",
|
|
239
|
+
delta: "Hello",
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
emitServerEvent(socket, {
|
|
243
|
+
type: "response.output_audio_transcript.done",
|
|
244
|
+
event_id: "evt-6",
|
|
245
|
+
response_id: "resp-1",
|
|
246
|
+
item_id: "item-1",
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
// input transcription
|
|
250
|
+
emitServerEvent(socket, {
|
|
251
|
+
type: "conversation.item.input_audio_transcription.completed",
|
|
252
|
+
event_id: "evt-7",
|
|
253
|
+
item_id: "item-1",
|
|
254
|
+
transcript: "User said hello",
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
// response.done
|
|
258
|
+
emitServerEvent(socket, {
|
|
259
|
+
type: "response.done",
|
|
260
|
+
event_id: "evt-8",
|
|
261
|
+
response: {
|
|
262
|
+
id: "resp-1",
|
|
263
|
+
object: "realtime.response",
|
|
264
|
+
status: "completed",
|
|
265
|
+
},
|
|
266
|
+
});
|
|
267
|
+
|
|
268
|
+
// Close socket to trigger status change and reset.
|
|
269
|
+
socket.emit("close");
|
|
270
|
+
|
|
271
|
+
// Status events should include closed (connected is emitted before we subscribe).
|
|
272
|
+
expect(statusEvents).toContain("closed");
|
|
273
|
+
|
|
274
|
+
// We should have seen several realtime events in a sensible order.
|
|
275
|
+
const kinds = realtimeEvents.map((e) => e?.kind);
|
|
276
|
+
expect(kinds).toContain("session.created");
|
|
277
|
+
expect(kinds).toContain("response.created");
|
|
278
|
+
expect(kinds).toContain("audio.output.delta");
|
|
279
|
+
expect(kinds).toContain("audio.output.done");
|
|
280
|
+
expect(kinds).toContain("transcript.output.delta");
|
|
281
|
+
expect(kinds).toContain("transcript.output");
|
|
282
|
+
expect(kinds).toContain("transcript.input");
|
|
283
|
+
expect(kinds).toContain("response.done");
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
it("should emit interrupted event on speech start", async () => {
|
|
287
|
+
const { connection, socket } = await createConnectedRealtime();
|
|
288
|
+
|
|
289
|
+
let interrupted = false;
|
|
290
|
+
(connection as unknown as EventEmitter).on("interrupted", () => {
|
|
291
|
+
interrupted = true;
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
// Mark that a response is in progress with some audio.
|
|
295
|
+
emitServerEvent(socket, {
|
|
296
|
+
type: "response.created",
|
|
297
|
+
event_id: "evt-1",
|
|
298
|
+
response: { id: "resp-1", object: "realtime.response", status: "in_progress", output: [] },
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
// Single audio delta chunk
|
|
302
|
+
emitServerEvent(socket, {
|
|
303
|
+
type: "response.output_audio.delta",
|
|
304
|
+
event_id: "evt-2",
|
|
305
|
+
response_id: "resp-1",
|
|
306
|
+
item_id: "item-1",
|
|
307
|
+
output_index: 0,
|
|
308
|
+
content_index: 0,
|
|
309
|
+
delta: "AAAA",
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
// speech_started should trigger interrupt logic
|
|
313
|
+
emitServerEvent(socket, {
|
|
314
|
+
type: "input_audio_buffer.speech_started",
|
|
315
|
+
event_id: "evt-3",
|
|
316
|
+
item_id: "item-2",
|
|
317
|
+
});
|
|
318
|
+
|
|
319
|
+
expect(interrupted).toBe(true);
|
|
320
|
+
});
|
|
321
|
+
|
|
322
|
+
it("should handle tool calls", async () => {
|
|
323
|
+
const { connection, socket } = await createConnectedRealtime();
|
|
324
|
+
|
|
325
|
+
const realtimeEvents: RealtimeServerEvent[] = [];
|
|
326
|
+
(connection as unknown as EventEmitter).on(
|
|
327
|
+
"event",
|
|
328
|
+
(event: RealtimeServerEvent) => {
|
|
329
|
+
realtimeEvents.push(event);
|
|
330
|
+
},
|
|
331
|
+
);
|
|
332
|
+
|
|
333
|
+
// function call arguments done
|
|
334
|
+
emitServerEvent(socket, {
|
|
335
|
+
type: "response.function_call_arguments.done",
|
|
336
|
+
event_id: "evt-1",
|
|
337
|
+
call_id: "call-1",
|
|
338
|
+
name: "get_weather",
|
|
339
|
+
arguments: '{"location":"Tokyo"}',
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
const toolCall = realtimeEvents.find((e) => e.kind === "tool.call");
|
|
343
|
+
expect(toolCall).toBeDefined();
|
|
344
|
+
if (toolCall?.kind === "tool.call") {
|
|
345
|
+
expect(toolCall.toolId).toBe("get_weather");
|
|
346
|
+
expect(toolCall.callId).toBe("call-1");
|
|
347
|
+
expect(JSON.parse(toolCall.arguments)).toEqual({ location: "Tokyo" });
|
|
348
|
+
}
|
|
349
|
+
});
|
|
350
|
+
});
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { GrokRealtimeModel, type GrokRealtimeOptions } from "./realtime/model";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* xAI provider interface.
|
|
5
|
+
*/
|
|
6
|
+
export interface XAIProvider {
|
|
7
|
+
/**
|
|
8
|
+
* Create a Grok realtime voice model.
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```ts
|
|
12
|
+
* import { xai } from '@kernl-sdk/xai';
|
|
13
|
+
*
|
|
14
|
+
* const model = xai.realtime();
|
|
15
|
+
* ```
|
|
16
|
+
*
|
|
17
|
+
* @example
|
|
18
|
+
* ```ts
|
|
19
|
+
* const model = xai.realtime({
|
|
20
|
+
* apiKey: 'xai-...',
|
|
21
|
+
* });
|
|
22
|
+
* ```
|
|
23
|
+
*/
|
|
24
|
+
realtime(options?: GrokRealtimeOptions): GrokRealtimeModel;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* xAI provider.
|
|
29
|
+
*
|
|
30
|
+
* @example
|
|
31
|
+
* ```ts
|
|
32
|
+
* import { xai } from '@kernl-sdk/xai';
|
|
33
|
+
*
|
|
34
|
+
* const model = xai.realtime();
|
|
35
|
+
* ```
|
|
36
|
+
*/
|
|
37
|
+
export const xai: XAIProvider = {
|
|
38
|
+
realtime(options?: GrokRealtimeOptions) {
|
|
39
|
+
return new GrokRealtimeModel(options);
|
|
40
|
+
},
|
|
41
|
+
};
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import { Emitter } from "@kernl-sdk/shared";
|
|
2
|
+
import {
|
|
3
|
+
WS_OPEN,
|
|
4
|
+
type RealtimeConnection,
|
|
5
|
+
type RealtimeConnectionEvents,
|
|
6
|
+
type RealtimeClientEvent,
|
|
7
|
+
type TransportStatus,
|
|
8
|
+
type WebSocketLike,
|
|
9
|
+
} from "@kernl-sdk/protocol";
|
|
10
|
+
|
|
11
|
+
import type { GrokServerEvent } from "./protocol";
|
|
12
|
+
import { CLIENT_EVENT, SERVER_EVENT } from "./convert/event";
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Grok realtime connection implementation.
|
|
16
|
+
*/
|
|
17
|
+
export class GrokRealtimeConnection
|
|
18
|
+
extends Emitter<RealtimeConnectionEvents>
|
|
19
|
+
implements RealtimeConnection
|
|
20
|
+
{
|
|
21
|
+
private ws: WebSocketLike;
|
|
22
|
+
private _status: TransportStatus = "connecting";
|
|
23
|
+
private _muted = false;
|
|
24
|
+
private _sessionId: string | null = null;
|
|
25
|
+
|
|
26
|
+
// audio state tracking for interruption
|
|
27
|
+
private currid: string | undefined;
|
|
28
|
+
private faudtime: number | undefined;
|
|
29
|
+
private audlenms: number = 0;
|
|
30
|
+
private responding: boolean = false;
|
|
31
|
+
|
|
32
|
+
constructor(socket: WebSocketLike) {
|
|
33
|
+
super();
|
|
34
|
+
this.ws = socket;
|
|
35
|
+
|
|
36
|
+
socket.addEventListener("message", (event: unknown) => {
|
|
37
|
+
try {
|
|
38
|
+
const data =
|
|
39
|
+
event && typeof event === "object" && "data" in event
|
|
40
|
+
? (event as { data: string }).data
|
|
41
|
+
: String(event);
|
|
42
|
+
const raw = JSON.parse(data) as GrokServerEvent;
|
|
43
|
+
|
|
44
|
+
// track audio state for interruption handling
|
|
45
|
+
if (raw.type === "response.output_audio.delta") {
|
|
46
|
+
this.currid = raw.item_id;
|
|
47
|
+
if (this.faudtime === undefined) {
|
|
48
|
+
this.faudtime = Date.now();
|
|
49
|
+
this.audlenms = 0;
|
|
50
|
+
}
|
|
51
|
+
// calculate audio length assuming 24kHz PCM16
|
|
52
|
+
const bytes = base64ByteLength(raw.delta);
|
|
53
|
+
this.audlenms += (bytes / 2 / 24000) * 1000;
|
|
54
|
+
} else if (raw.type === "response.created") {
|
|
55
|
+
this.responding = true;
|
|
56
|
+
} else if (raw.type === "response.done") {
|
|
57
|
+
this.responding = false;
|
|
58
|
+
this.reset();
|
|
59
|
+
} else if (raw.type === "input_audio_buffer.speech_started") {
|
|
60
|
+
this.interrupt();
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const event_ = SERVER_EVENT.decode(raw);
|
|
64
|
+
if (event_) {
|
|
65
|
+
if (event_.kind === "session.created") {
|
|
66
|
+
this._sessionId = event_.session.id;
|
|
67
|
+
}
|
|
68
|
+
this.emit("event", event_);
|
|
69
|
+
}
|
|
70
|
+
} catch (err) {
|
|
71
|
+
this.emit("error", err instanceof Error ? err : new Error(String(err)));
|
|
72
|
+
}
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
socket.addEventListener("open", () => {
|
|
76
|
+
this._status = "connected";
|
|
77
|
+
this.emit("status", this._status);
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
socket.addEventListener("close", () => {
|
|
81
|
+
this._status = "closed";
|
|
82
|
+
this.reset();
|
|
83
|
+
this.emit("status", this._status);
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
socket.addEventListener("error", (event: unknown) => {
|
|
87
|
+
const err = event instanceof Error ? event : new Error("WebSocket error");
|
|
88
|
+
this.emit("error", err);
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
get status(): TransportStatus {
|
|
93
|
+
return this._status;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
get muted(): boolean {
|
|
97
|
+
return this._muted;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
get sessionId(): string | null {
|
|
101
|
+
return this._sessionId;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Send a client event to the Grok realtime API.
|
|
106
|
+
*/
|
|
107
|
+
send(event: RealtimeClientEvent): void {
|
|
108
|
+
const encoded = CLIENT_EVENT.encode(event);
|
|
109
|
+
if (encoded && this.ws.readyState === WS_OPEN) {
|
|
110
|
+
this.ws.send(JSON.stringify(encoded));
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Close the WebSocket connection.
|
|
116
|
+
*/
|
|
117
|
+
close(): void {
|
|
118
|
+
this.reset();
|
|
119
|
+
this.ws.close();
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Mute audio input.
|
|
124
|
+
*/
|
|
125
|
+
mute(): void {
|
|
126
|
+
this._muted = true;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Unmute audio input.
|
|
131
|
+
*/
|
|
132
|
+
unmute(): void {
|
|
133
|
+
this._muted = false;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Interrupt the current response.
|
|
138
|
+
*
|
|
139
|
+
* Note: Grok doesn't support response.cancel or item.truncate,
|
|
140
|
+
* so we just reset local state and emit the interrupted event.
|
|
141
|
+
*/
|
|
142
|
+
interrupt(): void {
|
|
143
|
+
if (this.responding) {
|
|
144
|
+
this.responding = false;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
this.emit("interrupted");
|
|
148
|
+
this.reset();
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Reset audio tracking state.
|
|
153
|
+
*/
|
|
154
|
+
private reset(): void {
|
|
155
|
+
this.currid = undefined;
|
|
156
|
+
this.faudtime = undefined;
|
|
157
|
+
this.audlenms = 0;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Get byte length from base64 string without decoding.
|
|
163
|
+
*/
|
|
164
|
+
function base64ByteLength(b64: string): number {
|
|
165
|
+
const padding = b64.endsWith("==") ? 2 : b64.endsWith("=") ? 1 : 0;
|
|
166
|
+
return (b64.length * 3) / 4 - padding;
|
|
167
|
+
}
|