@livekit/agents-plugin-openai 1.0.49 → 1.0.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/dist/index.cjs +5 -2
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.cts +1 -0
  4. package/dist/index.d.ts +1 -0
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +4 -2
  7. package/dist/index.js.map +1 -1
  8. package/dist/llm.test.cjs +31 -16
  9. package/dist/llm.test.cjs.map +1 -1
  10. package/dist/llm.test.js +32 -17
  11. package/dist/llm.test.js.map +1 -1
  12. package/dist/responses/llm.cjs +71 -16
  13. package/dist/responses/llm.cjs.map +1 -1
  14. package/dist/responses/llm.d.cts +10 -25
  15. package/dist/responses/llm.d.ts +10 -25
  16. package/dist/responses/llm.d.ts.map +1 -1
  17. package/dist/responses/llm.js +71 -14
  18. package/dist/responses/llm.js.map +1 -1
  19. package/dist/responses/llm.test.cjs +32 -17
  20. package/dist/responses/llm.test.cjs.map +1 -1
  21. package/dist/responses/llm.test.js +33 -18
  22. package/dist/responses/llm.test.js.map +1 -1
  23. package/dist/stt.cjs +7 -3
  24. package/dist/stt.cjs.map +1 -1
  25. package/dist/stt.d.ts.map +1 -1
  26. package/dist/stt.js +8 -4
  27. package/dist/stt.js.map +1 -1
  28. package/dist/stt.test.cjs +11 -3
  29. package/dist/stt.test.cjs.map +1 -1
  30. package/dist/stt.test.js +12 -4
  31. package/dist/stt.test.js.map +1 -1
  32. package/dist/tts.test.cjs +11 -3
  33. package/dist/tts.test.cjs.map +1 -1
  34. package/dist/tts.test.js +12 -4
  35. package/dist/tts.test.js.map +1 -1
  36. package/dist/ws/index.cjs +29 -0
  37. package/dist/ws/index.cjs.map +1 -0
  38. package/dist/ws/index.d.cts +3 -0
  39. package/dist/ws/index.d.ts +3 -0
  40. package/dist/ws/index.d.ts.map +1 -0
  41. package/dist/ws/index.js +5 -0
  42. package/dist/ws/index.js.map +1 -0
  43. package/dist/ws/llm.cjs +502 -0
  44. package/dist/ws/llm.cjs.map +1 -0
  45. package/dist/ws/llm.d.cts +74 -0
  46. package/dist/ws/llm.d.ts +74 -0
  47. package/dist/ws/llm.d.ts.map +1 -0
  48. package/dist/ws/llm.js +485 -0
  49. package/dist/ws/llm.js.map +1 -0
  50. package/dist/ws/llm.test.cjs +26 -0
  51. package/dist/ws/llm.test.cjs.map +1 -0
  52. package/dist/ws/llm.test.d.cts +2 -0
  53. package/dist/ws/llm.test.d.ts +2 -0
  54. package/dist/ws/llm.test.d.ts.map +1 -0
  55. package/dist/ws/llm.test.js +25 -0
  56. package/dist/ws/llm.test.js.map +1 -0
  57. package/dist/ws/types.cjs +128 -0
  58. package/dist/ws/types.cjs.map +1 -0
  59. package/dist/ws/types.d.cts +167 -0
  60. package/dist/ws/types.d.ts +167 -0
  61. package/dist/ws/types.d.ts.map +1 -0
  62. package/dist/ws/types.js +95 -0
  63. package/dist/ws/types.js.map +1 -0
  64. package/package.json +6 -5
  65. package/src/index.ts +1 -0
  66. package/src/llm.test.ts +31 -17
  67. package/src/responses/llm.test.ts +32 -18
  68. package/src/responses/llm.ts +105 -19
  69. package/src/stt.test.ts +12 -4
  70. package/src/stt.ts +8 -4
  71. package/src/tts.test.ts +12 -4
  72. package/src/ws/index.ts +17 -0
  73. package/src/ws/llm.test.ts +30 -0
  74. package/src/ws/llm.ts +665 -0
  75. package/src/ws/types.ts +131 -0
package/src/ws/llm.ts ADDED
@@ -0,0 +1,665 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import type { APIConnectOptions } from '@livekit/agents';
5
+ import {
6
+ APIConnectionError,
7
+ APIStatusError,
8
+ APITimeoutError,
9
+ ConnectionPool,
10
+ DEFAULT_API_CONNECT_OPTIONS,
11
+ llm,
12
+ stream,
13
+ toError,
14
+ } from '@livekit/agents';
15
+ import type OpenAI from 'openai';
16
+ import { WebSocket } from 'ws';
17
+ import type { ChatModels } from '../models.js';
18
+ import type {
19
+ WsOutputItemDoneEvent,
20
+ WsOutputTextDeltaEvent,
21
+ WsResponseCompletedEvent,
22
+ WsResponseCreateEvent,
23
+ WsResponseCreatedEvent,
24
+ WsResponseFailedEvent,
25
+ WsServerEvent,
26
+ } from './types.js';
27
+ import { wsServerEventSchema } from './types.js';
28
+
29
+ const OPENAI_RESPONSES_WS_URL = 'wss://api.openai.com/v1/responses';
30
+
31
+ // OpenAI enforces a 60-minute maximum duration on Responses WebSocket connections.
32
+ const WS_MAX_SESSION_DURATION = 3_600_000;
33
+
34
+ // ============================================================================
35
+ // Internal: ResponsesWebSocket
36
+ //
37
+ // Wraps a single raw WebSocket connection. Maintains a FIFO queue of
38
+ // StreamChannels — one per outstanding response.create request — and
39
+ // dispatches every incoming server-event to the front of the queue.
40
+ // A response is terminated (and its channel closed) when the service sends
41
+ // response.completed, response.failed, or error.
42
+ //
43
+ // ============================================================================
44
+
45
+ export class ResponsesWebSocket {
46
+ #ws: WebSocket;
47
+ // FIFO queue: the front entry receives validated WsServerEvents for the in-flight response.
48
+ #outputQueue: stream.StreamChannel<WsServerEvent>[] = [];
49
+
50
+ constructor(ws: WebSocket) {
51
+ this.#ws = ws;
52
+
53
+ ws.on('message', (data: Buffer) => {
54
+ const current = this.#outputQueue[0];
55
+ if (!current) return;
56
+
57
+ let raw: unknown;
58
+ try {
59
+ raw = JSON.parse(data.toString());
60
+ } catch {
61
+ return;
62
+ }
63
+
64
+ // Validate and type-narrow with Zod at write time so readers always
65
+ // receive a fully-typed WsServerEvent.
66
+ const parsed = wsServerEventSchema.safeParse(raw);
67
+ if (!parsed.success) return;
68
+
69
+ const event = parsed.data;
70
+ void current.write(event);
71
+
72
+ // Close and dequeue on any terminal event.
73
+ if (
74
+ event.type === 'response.completed' ||
75
+ event.type === 'response.failed' ||
76
+ event.type === 'error'
77
+ ) {
78
+ void current.close();
79
+ this.#outputQueue.shift();
80
+ }
81
+ });
82
+
83
+ ws.on('close', () => {
84
+ // If the WebSocket closes while requests are still in flight, synthesise
85
+ // a typed error event so all readers can handle it cleanly.
86
+ for (const current of this.#outputQueue) {
87
+ if (!current.closed) {
88
+ const closeError: WsServerEvent = {
89
+ type: 'error',
90
+ error: {
91
+ code: 'websocket_closed',
92
+ message: 'OpenAI Responses WebSocket closed unexpectedly',
93
+ },
94
+ };
95
+ void current.write(closeError).finally(() => current.close());
96
+ }
97
+ }
98
+ this.#outputQueue = [];
99
+ });
100
+ }
101
+
102
+ /**
103
+ * Send a response.create event. Returns a typed `StreamChannel<WsServerEvent>`
104
+ * that yields validated server events until the response terminates.
105
+ */
106
+ sendRequest(payload: WsResponseCreateEvent): stream.StreamChannel<WsServerEvent> {
107
+ if (this.#ws.readyState !== WebSocket.OPEN) {
108
+ throw new APIConnectionError({
109
+ message: `OpenAI Responses WebSocket is not open (state ${getWebSocketStateLabel(this.#ws.readyState)})`,
110
+ options: { retryable: true },
111
+ });
112
+ }
113
+
114
+ const channel = stream.createStreamChannel<WsServerEvent>();
115
+ this.#outputQueue.push(channel);
116
+ this.#ws.send(JSON.stringify(payload));
117
+ return channel;
118
+ }
119
+
120
+ close(): void {
121
+ // Drain pending channels before closing the socket.
122
+ for (const ch of this.#outputQueue) {
123
+ void ch.close();
124
+ }
125
+ this.#outputQueue = [];
126
+ this.#ws.close();
127
+ }
128
+ }
129
+
130
+ // ============================================================================
131
+ // LLMOptions
132
+ // ============================================================================
133
+
134
+ export interface WSLLMOptions {
135
+ model: string | ChatModels;
136
+ apiKey?: string;
137
+ baseURL?: string;
138
+ temperature?: number;
139
+ parallelToolCalls?: boolean;
140
+ toolChoice?: llm.ToolChoice;
141
+ store?: boolean;
142
+ metadata?: Record<string, string>;
143
+ strictToolSchema?: boolean;
144
+ }
145
+
146
+ const defaultLLMOptions: WSLLMOptions = {
147
+ model: 'gpt-4.1',
148
+ apiKey: process.env.OPENAI_API_KEY,
149
+ strictToolSchema: true,
150
+ };
151
+
152
+ // ============================================================================
153
+ // LLM
154
+ // ============================================================================
155
+
156
+ export class WSLLM extends llm.LLM {
157
+ #opts: WSLLMOptions;
158
+ #pool: ConnectionPool<ResponsesWebSocket>;
159
+ #prevResponseId = '';
160
+ #prevChatCtx: llm.ChatContext | null = null;
161
+ #pendingToolCalls = new Set<string>();
162
+
163
+ /**
164
+ * Create a new instance of the OpenAI Responses API WebSocket LLM.
165
+ *
166
+ * @remarks
167
+ * `apiKey` must be set to your OpenAI API key, either using the argument or
168
+ * by setting the `OPENAI_API_KEY` environment variable.
169
+ *
170
+ * A persistent WebSocket connection to `/v1/responses` is maintained and
171
+ * reused across turns, reducing per-turn continuation overhead for
172
+ * tool-call-heavy workflows.
173
+ */
174
+ constructor(opts: Partial<WSLLMOptions> = defaultLLMOptions) {
175
+ super();
176
+
177
+ this.#opts = { ...defaultLLMOptions, ...opts };
178
+ if (!this.#opts.apiKey) {
179
+ throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY');
180
+ }
181
+
182
+ this.#pool = new ConnectionPool<ResponsesWebSocket>({
183
+ maxSessionDuration: WS_MAX_SESSION_DURATION,
184
+ connectCb: async (timeoutMs: number) => {
185
+ const wsUrl = this.#opts.baseURL
186
+ ? `${this.#opts.baseURL.replace(/^https?/, 'wss').replace(/\/+$/, '')}/responses`
187
+ : OPENAI_RESPONSES_WS_URL;
188
+ const ws = await connectWs(wsUrl, this.#opts.apiKey!, timeoutMs);
189
+ return new ResponsesWebSocket(ws);
190
+ },
191
+ closeCb: async (conn: ResponsesWebSocket) => {
192
+ conn.close();
193
+ },
194
+ });
195
+ }
196
+
197
+ label(): string {
198
+ return 'openai.ws.LLM';
199
+ }
200
+
201
+ get model(): string {
202
+ return this.#opts.model;
203
+ }
204
+
205
+ prewarm(): void {
206
+ this.#pool.prewarm();
207
+ }
208
+
209
+ async close(): Promise<void> {
210
+ await this.#pool.close();
211
+ }
212
+
213
+ override async aclose(): Promise<void> {
214
+ await this.close();
215
+ }
216
+
217
+ /** Called by LLMStream once response.created fires to atomically persist both the
218
+ * response ID and its corresponding chat context for the next turn's diff. */
219
+ _onResponseCreated(responseId: string, chatCtx: llm.ChatContext): void {
220
+ this.#prevResponseId = responseId;
221
+ this.#prevChatCtx = chatCtx;
222
+ }
223
+
224
+ _setPendingToolCalls(callIds: Set<string>): void {
225
+ this.#pendingToolCalls = callIds;
226
+ }
227
+
228
+ chat({
229
+ chatCtx,
230
+ toolCtx,
231
+ connOptions = DEFAULT_API_CONNECT_OPTIONS,
232
+ parallelToolCalls,
233
+ toolChoice,
234
+ extraKwargs,
235
+ }: {
236
+ chatCtx: llm.ChatContext;
237
+ toolCtx?: llm.ToolContext;
238
+ connOptions?: APIConnectOptions;
239
+ parallelToolCalls?: boolean;
240
+ toolChoice?: llm.ToolChoice;
241
+ extraKwargs?: Record<string, unknown>;
242
+ }): WSLLMStream {
243
+ const modelOptions: Record<string, unknown> = { ...(extraKwargs ?? {}) };
244
+
245
+ parallelToolCalls =
246
+ parallelToolCalls !== undefined ? parallelToolCalls : this.#opts.parallelToolCalls;
247
+ if (toolCtx && Object.keys(toolCtx).length > 0 && parallelToolCalls !== undefined) {
248
+ modelOptions.parallel_tool_calls = parallelToolCalls;
249
+ }
250
+
251
+ toolChoice =
252
+ toolChoice !== undefined ? toolChoice : (this.#opts.toolChoice as llm.ToolChoice | undefined);
253
+ if (toolChoice) {
254
+ modelOptions.tool_choice = toolChoice;
255
+ }
256
+
257
+ if (this.#opts.temperature !== undefined) {
258
+ modelOptions.temperature = this.#opts.temperature;
259
+ }
260
+
261
+ if (this.#opts.store !== undefined) {
262
+ modelOptions.store = this.#opts.store;
263
+ }
264
+
265
+ if (this.#opts.metadata) {
266
+ modelOptions.metadata = this.#opts.metadata;
267
+ }
268
+
269
+ let inputChatCtx = chatCtx;
270
+ let prevResponseId: string | undefined;
271
+ const canUseStoredResponse = modelOptions.store !== false;
272
+
273
+ if (canUseStoredResponse && this.#prevChatCtx && this.#prevResponseId) {
274
+ const diff = llm.computeChatCtxDiff(this.#prevChatCtx, chatCtx);
275
+ const lastPrevItemId = this.#prevChatCtx.items.at(-1)?.id ?? null;
276
+
277
+ if (
278
+ diff.toRemove.length === 0 &&
279
+ diff.toCreate.length > 0 &&
280
+ diff.toCreate[0]![0] === lastPrevItemId
281
+ ) {
282
+ // All new items are appended after the tail of the previous context —
283
+ // safe to send only the incremental input with previous_response_id,
284
+ // but only if all pending tool calls from the previous response have
285
+ // their corresponding function_call_output in the new items.
286
+ const newItemIds = new Set(diff.toCreate.map(([, id]) => id));
287
+ const newItems = chatCtx.items.filter((item: llm.ChatItem) => newItemIds.has(item.id));
288
+ const pendingToolCallsCompleted = this.#pendingToolCallsCompleted(newItems);
289
+ if (pendingToolCallsCompleted) {
290
+ inputChatCtx = new llm.ChatContext(newItems);
291
+ prevResponseId = this.#prevResponseId;
292
+ }
293
+ }
294
+ // Otherwise: items were removed or inserted mid-history — fall back to
295
+ // sending the full context with no previous_response_id.
296
+ }
297
+
298
+ return new WSLLMStream(this, {
299
+ pool: this.#pool,
300
+ model: this.#opts.model,
301
+ chatCtx: inputChatCtx,
302
+ fullChatCtx: chatCtx,
303
+ toolCtx,
304
+ connOptions,
305
+ modelOptions,
306
+ prevResponseId,
307
+ strictToolSchema: this.#opts.strictToolSchema ?? true,
308
+ });
309
+ }
310
+
311
+ #pendingToolCallsCompleted(items: llm.ChatItem[]): boolean {
312
+ if (this.#pendingToolCalls.size === 0) return true;
313
+ const completedCallIds = new Set(
314
+ items
315
+ .filter((item): item is llm.FunctionCallOutput => item.type === 'function_call_output')
316
+ .map((item) => item.callId),
317
+ );
318
+ return [...this.#pendingToolCalls].every((callId) => completedCallIds.has(callId));
319
+ }
320
+ }
321
+
322
+ // ============================================================================
323
+ // WsLLMStream
324
+ // ============================================================================
325
+
326
+ export class WSLLMStream extends llm.LLMStream {
327
+ #llm: WSLLM;
328
+ #pool: ConnectionPool<ResponsesWebSocket>;
329
+ #model: string | ChatModels;
330
+ #modelOptions: Record<string, unknown>;
331
+ #strictToolSchema: boolean;
332
+ #prevResponseId?: string;
333
+ /** Full chat context — used as fallback when previous_response_id is stale. */
334
+ #fullChatCtx: llm.ChatContext;
335
+ #responseId = '';
336
+ #pendingToolCalls = new Set<string>();
337
+
338
+ constructor(
339
+ llm: WSLLM,
340
+ {
341
+ pool,
342
+ model,
343
+ chatCtx,
344
+ fullChatCtx,
345
+ toolCtx,
346
+ connOptions,
347
+ modelOptions,
348
+ prevResponseId,
349
+ strictToolSchema,
350
+ }: {
351
+ pool: ConnectionPool<ResponsesWebSocket>;
352
+ model: string | ChatModels;
353
+ chatCtx: llm.ChatContext;
354
+ fullChatCtx: llm.ChatContext;
355
+ toolCtx?: llm.ToolContext;
356
+ connOptions: APIConnectOptions;
357
+ modelOptions: Record<string, unknown>;
358
+ prevResponseId?: string;
359
+ strictToolSchema: boolean;
360
+ },
361
+ ) {
362
+ super(llm, { chatCtx, toolCtx, connOptions });
363
+ this.#llm = llm;
364
+ this.#pool = pool;
365
+ this.#model = model;
366
+ this.#modelOptions = modelOptions;
367
+ this.#strictToolSchema = strictToolSchema;
368
+ this.#prevResponseId = prevResponseId;
369
+ this.#fullChatCtx = fullChatCtx;
370
+ }
371
+
372
+ protected async run(): Promise<void> {
373
+ let retryable = true;
374
+
375
+ try {
376
+ await this.#pool.withConnection(async (conn: ResponsesWebSocket) => {
377
+ const needsRetry = await this.#runWithConn(conn, this.chatCtx, this.#prevResponseId);
378
+
379
+ if (needsRetry) {
380
+ // previous_response_id was evicted from the server-side cache.
381
+ // Retry once on the same connection with the full context and no ID.
382
+ retryable = true;
383
+ await this.#runWithConn(conn, this.#fullChatCtx, undefined);
384
+ }
385
+ });
386
+ } catch (error) {
387
+ if (
388
+ error instanceof APIStatusError ||
389
+ error instanceof APITimeoutError ||
390
+ error instanceof APIConnectionError
391
+ ) {
392
+ throw error;
393
+ }
394
+ throw new APIConnectionError({
395
+ message: toError(error).message,
396
+ options: { retryable },
397
+ });
398
+ }
399
+ }
400
+
401
+ /**
402
+ * Execute a single response.create round-trip on the given connection.
403
+ * Returns `true` when the caller should retry with the full chat context
404
+ * (i.e. `previous_response_not_found`), `false` otherwise.
405
+ */
406
+ async #runWithConn(
407
+ conn: ResponsesWebSocket,
408
+ chatCtx: llm.ChatContext,
409
+ prevResponseId: string | undefined,
410
+ ): Promise<boolean> {
411
+ const messages = (await chatCtx.toProviderFormat(
412
+ 'openai.responses',
413
+ )) as OpenAI.Responses.ResponseInputItem[];
414
+
415
+ const tools = this.toolCtx
416
+ ? Object.entries(this.toolCtx).map(([name, func]) => {
417
+ const oaiParams = {
418
+ type: 'function' as const,
419
+ name,
420
+ description: func.description,
421
+ parameters: llm.toJsonSchema(
422
+ func.parameters,
423
+ true,
424
+ this.#strictToolSchema,
425
+ ) as unknown as OpenAI.Responses.FunctionTool['parameters'],
426
+ } as OpenAI.Responses.FunctionTool;
427
+
428
+ if (this.#strictToolSchema) {
429
+ oaiParams.strict = true;
430
+ }
431
+
432
+ return oaiParams;
433
+ })
434
+ : undefined;
435
+
436
+ const requestOptions: Record<string, unknown> = { ...this.#modelOptions };
437
+ if (!tools) {
438
+ delete requestOptions.tool_choice;
439
+ }
440
+
441
+ const payload: WsResponseCreateEvent = {
442
+ type: 'response.create',
443
+ model: this.#model as string,
444
+ input: messages as unknown[],
445
+ tools: (tools ?? []) as unknown[],
446
+ ...(prevResponseId ? { previous_response_id: prevResponseId } : {}),
447
+ ...requestOptions,
448
+ };
449
+
450
+ let channel: stream.StreamChannel<WsServerEvent>;
451
+ try {
452
+ channel = conn.sendRequest(payload);
453
+ } catch (error) {
454
+ if (error instanceof APIConnectionError) {
455
+ conn.close();
456
+ this.#pool.invalidate();
457
+ }
458
+ throw error;
459
+ }
460
+ const reader = channel.stream().getReader();
461
+
462
+ // Events are already Zod-validated by ResponsesWebSocket before being
463
+ // written to the channel, so no re-parsing is needed here.
464
+ try {
465
+ while (true) {
466
+ const { done, value: event } = await reader.read();
467
+ if (done) break;
468
+
469
+ let chunk: llm.ChatChunk | undefined;
470
+
471
+ switch (event.type) {
472
+ case 'error': {
473
+ const retry = this.#handleError(event, conn);
474
+ if (retry) return true;
475
+ break;
476
+ }
477
+ case 'response.created':
478
+ this.#handleResponseCreated(event);
479
+ break;
480
+ case 'response.output_item.done':
481
+ chunk = this.#handleOutputItemDone(event);
482
+ break;
483
+ case 'response.output_text.delta':
484
+ chunk = this.#handleOutputTextDelta(event);
485
+ break;
486
+ case 'response.completed':
487
+ chunk = this.#handleResponseCompleted(event);
488
+ break;
489
+ case 'response.failed':
490
+ this.#handleResponseFailed(event);
491
+ break;
492
+ default:
493
+ break;
494
+ }
495
+
496
+ if (chunk) {
497
+ this.queue.put(chunk);
498
+ }
499
+ }
500
+ } finally {
501
+ reader.releaseLock();
502
+ }
503
+
504
+ return false;
505
+ }
506
+
507
+ /**
508
+ * Returns `true` when the caller should retry with full context
509
+ * (`previous_response_not_found`), throws for all other errors.
510
+ */
511
+ #handleError(event: WsServerEvent & { type: 'error' }, conn: ResponsesWebSocket): boolean {
512
+ const code = event.error?.code;
513
+
514
+ if (code === 'previous_response_not_found') {
515
+ // The server-side in-memory cache was evicted (e.g. after a failed turn
516
+ // or reconnect). Signal the caller to retry with the full context.
517
+ return true;
518
+ }
519
+
520
+ if (code === 'websocket_connection_limit_reached' || code === 'websocket_closed') {
521
+ // Transient connection issue (timeout, network drop, or 60-min limit).
522
+ // Evict this connection so the pool opens a fresh one on retry.
523
+ conn.close();
524
+ this.#pool.invalidate();
525
+ throw new APIConnectionError({
526
+ message: event.error?.message ?? `WebSocket closed (${code})`,
527
+ options: { retryable: true },
528
+ });
529
+ }
530
+
531
+ throw new APIStatusError({
532
+ message: event.error?.message ?? event.message ?? 'Unknown error from OpenAI Responses WS',
533
+ options: {
534
+ statusCode: event.status ?? -1,
535
+ retryable: false,
536
+ },
537
+ });
538
+ }
539
+
540
+ #handleResponseCreated(event: WsResponseCreatedEvent): void {
541
+ this.#responseId = event.response.id;
542
+ this.#llm._onResponseCreated(event.response.id, this.#fullChatCtx);
543
+ }
544
+
545
+ #handleOutputItemDone(event: WsOutputItemDoneEvent): llm.ChatChunk | undefined {
546
+ if (event.item.type === 'function_call') {
547
+ this.#pendingToolCalls.add(event.item.call_id);
548
+ return {
549
+ id: this.#responseId,
550
+ delta: {
551
+ role: 'assistant',
552
+ content: undefined,
553
+ toolCalls: [
554
+ llm.FunctionCall.create({
555
+ callId: event.item.call_id,
556
+ name: event.item.name,
557
+ args: event.item.arguments,
558
+ }),
559
+ ],
560
+ },
561
+ };
562
+ }
563
+ return undefined;
564
+ }
565
+
566
+ #handleOutputTextDelta(event: WsOutputTextDeltaEvent): llm.ChatChunk {
567
+ return {
568
+ id: this.#responseId,
569
+ delta: {
570
+ role: 'assistant',
571
+ content: event.delta,
572
+ },
573
+ };
574
+ }
575
+
576
+ #handleResponseCompleted(event: WsResponseCompletedEvent): llm.ChatChunk | undefined {
577
+ this.#llm._setPendingToolCalls(this.#pendingToolCalls);
578
+
579
+ if (event.response.usage) {
580
+ return {
581
+ id: this.#responseId,
582
+ usage: {
583
+ completionTokens: event.response.usage.output_tokens,
584
+ promptTokens: event.response.usage.input_tokens,
585
+ promptCachedTokens: event.response.usage.input_tokens_details.cached_tokens,
586
+ totalTokens: event.response.usage.total_tokens,
587
+ },
588
+ };
589
+ }
590
+ return undefined;
591
+ }
592
+
593
+ #handleResponseFailed(event: WsResponseFailedEvent): void {
594
+ throw new APIStatusError({
595
+ message: event.response?.error?.message ?? 'Response failed',
596
+ options: { statusCode: -1, retryable: false },
597
+ });
598
+ }
599
+ }
600
+
601
+ // ============================================================================
602
+ // Internal helpers
603
+ // ============================================================================
604
+
605
+ async function connectWs(url: string, apiKey: string, timeoutMs: number): Promise<WebSocket> {
606
+ return new Promise<WebSocket>((resolve, reject) => {
607
+ const ws = new WebSocket(url, {
608
+ headers: { Authorization: `Bearer ${apiKey}` },
609
+ });
610
+
611
+ let settled = false;
612
+
613
+ const timer = setTimeout(() => {
614
+ settled = true;
615
+ ws.close();
616
+ reject(
617
+ new APIConnectionError({ message: 'Timeout connecting to OpenAI Responses WebSocket' }),
618
+ );
619
+ }, timeoutMs);
620
+
621
+ ws.once('open', () => {
622
+ if (settled) return;
623
+ settled = true;
624
+ clearTimeout(timer);
625
+ resolve(ws);
626
+ });
627
+
628
+ ws.once('error', (err) => {
629
+ if (settled) return;
630
+ settled = true;
631
+ clearTimeout(timer);
632
+ reject(
633
+ new APIConnectionError({
634
+ message: `Error connecting to OpenAI Responses WebSocket: ${err.message}`,
635
+ }),
636
+ );
637
+ });
638
+
639
+ ws.once('close', (code) => {
640
+ if (settled) return;
641
+ settled = true;
642
+ clearTimeout(timer);
643
+ reject(
644
+ new APIConnectionError({
645
+ message: `OpenAI Responses WebSocket closed unexpectedly during connect (code ${code})`,
646
+ }),
647
+ );
648
+ });
649
+ });
650
+ }
651
+
652
+ function getWebSocketStateLabel(readyState: number): string {
653
+ switch (readyState) {
654
+ case WebSocket.CONNECTING:
655
+ return 'CONNECTING';
656
+ case WebSocket.OPEN:
657
+ return 'OPEN';
658
+ case WebSocket.CLOSING:
659
+ return 'CLOSING';
660
+ case WebSocket.CLOSED:
661
+ return 'CLOSED';
662
+ default:
663
+ return `UNKNOWN:${readyState}`;
664
+ }
665
+ }