@copilotkit/aimock 1.22.1 → 1.23.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/.claude-plugin/marketplace.json +1 -1
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/CHANGELOG.md +28 -0
  4. package/README.md +10 -10
  5. package/dist/agui-types.d.ts.map +1 -1
  6. package/dist/cli.cjs +7 -1
  7. package/dist/cli.cjs.map +1 -1
  8. package/dist/cli.js +7 -1
  9. package/dist/cli.js.map +1 -1
  10. package/dist/fixture-loader.cjs +2 -1
  11. package/dist/fixture-loader.cjs.map +1 -1
  12. package/dist/fixture-loader.d.cts.map +1 -1
  13. package/dist/fixture-loader.d.ts.map +1 -1
  14. package/dist/fixture-loader.js +2 -1
  15. package/dist/fixture-loader.js.map +1 -1
  16. package/dist/gemini.cjs +1 -1
  17. package/dist/gemini.cjs.map +1 -1
  18. package/dist/gemini.js +1 -1
  19. package/dist/gemini.js.map +1 -1
  20. package/dist/helpers.cjs +1 -0
  21. package/dist/helpers.cjs.map +1 -1
  22. package/dist/helpers.d.cts.map +1 -1
  23. package/dist/helpers.d.ts.map +1 -1
  24. package/dist/helpers.js +1 -0
  25. package/dist/helpers.js.map +1 -1
  26. package/dist/model-utils.cjs +11 -0
  27. package/dist/model-utils.cjs.map +1 -0
  28. package/dist/model-utils.js +10 -0
  29. package/dist/model-utils.js.map +1 -0
  30. package/dist/recorder.cjs +19 -4
  31. package/dist/recorder.cjs.map +1 -1
  32. package/dist/recorder.d.cts.map +1 -1
  33. package/dist/recorder.d.ts.map +1 -1
  34. package/dist/recorder.js +19 -4
  35. package/dist/recorder.js.map +1 -1
  36. package/dist/router.cjs +7 -3
  37. package/dist/router.cjs.map +1 -1
  38. package/dist/router.js +7 -3
  39. package/dist/router.js.map +1 -1
  40. package/dist/server.cjs +1 -1
  41. package/dist/server.cjs.map +1 -1
  42. package/dist/server.js +1 -1
  43. package/dist/server.js.map +1 -1
  44. package/dist/types.d.cts +17 -2
  45. package/dist/types.d.cts.map +1 -1
  46. package/dist/types.d.ts +17 -2
  47. package/dist/types.d.ts.map +1 -1
  48. package/dist/vector-types.d.ts.map +1 -1
  49. package/dist/ws-realtime.cjs +577 -215
  50. package/dist/ws-realtime.cjs.map +1 -1
  51. package/dist/ws-realtime.d.cts.map +1 -1
  52. package/dist/ws-realtime.d.ts.map +1 -1
  53. package/dist/ws-realtime.js +577 -215
  54. package/dist/ws-realtime.js.map +1 -1
  55. package/package.json +1 -1
@@ -25,12 +25,34 @@ function realtimeItemsToMessages(items, instructions, logger) {
25
25
  content: instructions
26
26
  });
27
27
  for (const item of items) if (item.type === "message") {
28
- const text = item.content?.[0]?.text ?? "";
29
28
  const role = item.role === "assistant" ? "assistant" : item.role === "system" ? "system" : "user";
30
- messages.push({
31
- role,
32
- content: text
33
- });
29
+ if (item.content?.some((p) => p.type === "input_text" || p.type === "input_image" || p.type === "input_audio") && item.content) {
30
+ const mappedContent = item.content.map((part) => {
31
+ if (part.type === "input_text") return {
32
+ type: "text",
33
+ text: part.text ?? ""
34
+ };
35
+ if (part.type === "input_image") return {
36
+ type: "image_url",
37
+ image_url: { url: part.url ?? "" }
38
+ };
39
+ if (part.type === "input_audio") return {
40
+ type: "text",
41
+ text: "[audio input]"
42
+ };
43
+ return part;
44
+ });
45
+ messages.push({
46
+ role,
47
+ content: mappedContent
48
+ });
49
+ } else {
50
+ const text = item.content?.[0]?.text ?? "";
51
+ messages.push({
52
+ role,
53
+ content: text
54
+ });
55
+ }
34
56
  } else if (item.type === "function_call") {
35
57
  if (!item.name) logger?.warn("Realtime function_call item missing 'name'");
36
58
  messages.push({
@@ -55,23 +77,112 @@ function realtimeItemsToMessages(items, instructions, logger) {
55
77
  }
56
78
  return messages;
57
79
  }
58
- function evt(type, extra = {}) {
59
- return JSON.stringify({
60
- type,
61
- event_id: realtimeId("event"),
62
- ...extra
80
+ /** GA -> Beta event name mapping */
81
+ const GA_TO_BETA_EVENT = {
82
+ "response.output_text.delta": "response.text.delta",
83
+ "response.output_text.done": "response.text.done",
84
+ "response.output_audio.delta": "response.audio.delta",
85
+ "response.output_audio.done": "response.audio.done",
86
+ "response.output_audio_transcript.delta": "response.audio_transcript.delta",
87
+ "response.output_audio_transcript.done": "response.audio_transcript.done",
88
+ "conversation.item.added": "conversation.item.created"
89
+ };
90
+ /** GA -> Beta content type mapping */
91
+ const GA_TO_BETA_CONTENT_TYPE = {
92
+ output_text: "text",
93
+ output_audio: "audio"
94
+ };
95
+ /** Events suppressed in Beta mode (GA-only events) */
96
+ const BETA_SUPPRESSED_EVENTS = new Set(["conversation.item.done"]);
97
+ function translateGAToBeta(event) {
98
+ const type = event.type;
99
+ if (BETA_SUPPRESSED_EVENTS.has(type)) return null;
100
+ const translated = { ...event };
101
+ if (GA_TO_BETA_EVENT[type]) translated.type = GA_TO_BETA_EVENT[type];
102
+ if (translated.part && typeof translated.part === "object") {
103
+ const part = { ...translated.part };
104
+ if (typeof part.type === "string" && GA_TO_BETA_CONTENT_TYPE[part.type]) part.type = GA_TO_BETA_CONTENT_TYPE[part.type];
105
+ translated.part = part;
106
+ }
107
+ if (translated.content_part && typeof translated.content_part === "object") {
108
+ const cp = { ...translated.content_part };
109
+ if (typeof cp.type === "string" && GA_TO_BETA_CONTENT_TYPE[cp.type]) cp.type = GA_TO_BETA_CONTENT_TYPE[cp.type];
110
+ translated.content_part = cp;
111
+ }
112
+ if (Array.isArray(translated.content)) translated.content = translated.content.map((c) => {
113
+ if (typeof c.type === "string" && GA_TO_BETA_CONTENT_TYPE[c.type]) return {
114
+ ...c,
115
+ type: GA_TO_BETA_CONTENT_TYPE[c.type]
116
+ };
117
+ return c;
63
118
  });
119
+ if (translated.item && typeof translated.item === "object") {
120
+ const item = { ...translated.item };
121
+ delete item.phase;
122
+ if (Array.isArray(item.content)) item.content = item.content.map((c) => {
123
+ if (typeof c.type === "string" && GA_TO_BETA_CONTENT_TYPE[c.type]) return {
124
+ ...c,
125
+ type: GA_TO_BETA_CONTENT_TYPE[c.type]
126
+ };
127
+ return c;
128
+ });
129
+ translated.item = item;
130
+ }
131
+ if (translated.response && typeof translated.response === "object") {
132
+ const resp = { ...translated.response };
133
+ if (Array.isArray(resp.output)) resp.output = resp.output.map((outItem) => {
134
+ const o = { ...outItem };
135
+ if (Array.isArray(o.content)) o.content = o.content.map((c) => typeof c.type === "string" && GA_TO_BETA_CONTENT_TYPE[c.type] ? {
136
+ ...c,
137
+ type: GA_TO_BETA_CONTENT_TYPE[c.type]
138
+ } : c);
139
+ return o;
140
+ });
141
+ translated.response = resp;
142
+ }
143
+ if (type === "session.created" || type === "session.updated") {
144
+ if (translated.session && typeof translated.session === "object") {
145
+ const session = { ...translated.session };
146
+ if (session.audio && typeof session.audio === "object") {
147
+ const audio = session.audio;
148
+ session.voice = audio.voice;
149
+ session.input_audio_format = audio.input_audio_format;
150
+ session.output_audio_format = audio.output_audio_format;
151
+ session.input_audio_transcription = audio.input_audio_transcription;
152
+ delete session.audio;
153
+ }
154
+ delete session.type;
155
+ delete session.reasoning;
156
+ translated.session = session;
157
+ }
158
+ }
159
+ return translated;
160
+ }
161
+ function sendEvent(ws, event, isBeta) {
162
+ const out = {
163
+ ...event,
164
+ event_id: event.event_id ?? realtimeId("event")
165
+ };
166
+ if (isBeta) {
167
+ const translated = translateGAToBeta(out);
168
+ if (translated === null) return;
169
+ ws.send(JSON.stringify(translated));
170
+ } else ws.send(JSON.stringify(out));
64
171
  }
65
- function buildErrorRealtimeEvent(message, type = "invalid_request_error", code) {
66
- return evt("error", { error: {
67
- message,
68
- type,
69
- code
70
- } });
172
+ function buildErrorRealtimeEvent(ws, message, isBeta, type = "invalid_request_error", code) {
173
+ sendEvent(ws, {
174
+ type: "error",
175
+ error: {
176
+ message,
177
+ type,
178
+ code
179
+ }
180
+ }, isBeta);
71
181
  }
72
182
  function handleWebSocketRealtime(ws, fixtures, journal, defaults) {
73
183
  const { logger } = defaults;
74
184
  const sessionId = realtimeId("sess");
185
+ const isBeta = defaults.upgradeHeaders?.["openai-beta"] ? String(defaults.upgradeHeaders["openai-beta"]).includes("realtime=v1") : false;
75
186
  const session = {
76
187
  model: defaults.model,
77
188
  modalities: ["text"],
@@ -80,86 +191,227 @@ function handleWebSocketRealtime(ws, fixtures, journal, defaults) {
80
191
  voice: null,
81
192
  input_audio_format: null,
82
193
  output_audio_format: null,
194
+ input_audio_noise_reduction: null,
195
+ input_audio_transcription: null,
83
196
  turn_detection: null,
84
- temperature: .8
197
+ temperature: .8,
198
+ type: "conversation",
199
+ reasoning: null
85
200
  };
86
201
  const conversationItems = [];
87
- ws.send(evt("session.created", { session: {
88
- id: sessionId,
89
- object: "realtime.session",
90
- ...session,
91
- expires_at: Math.floor(Date.now() / 1e3) + 3600,
92
- max_response_output_tokens: "inf",
93
- input_audio_transcription: null,
94
- tool_choice: "auto"
95
- } }));
202
+ sendEvent(ws, {
203
+ type: "session.created",
204
+ session: {
205
+ id: sessionId,
206
+ object: "realtime.session",
207
+ model: session.model,
208
+ expires_at: Math.floor(Date.now() / 1e3) + 3600,
209
+ modalities: session.modalities,
210
+ instructions: session.instructions,
211
+ tools: session.tools,
212
+ tool_choice: "auto",
213
+ temperature: session.temperature,
214
+ max_response_output_tokens: "inf",
215
+ audio: {
216
+ voice: session.voice,
217
+ input_audio_format: session.input_audio_format,
218
+ output_audio_format: session.output_audio_format,
219
+ input_audio_noise_reduction: session.input_audio_noise_reduction,
220
+ input_audio_transcription: session.input_audio_transcription
221
+ },
222
+ turn_detection: session.turn_detection,
223
+ type: session.type,
224
+ reasoning: session.reasoning
225
+ }
226
+ }, isBeta);
96
227
  let pending = Promise.resolve();
97
228
  ws.on("message", (raw) => {
98
- pending = pending.then(() => processMessage(raw, ws, fixtures, journal, defaults, session, conversationItems).catch((err) => {
229
+ pending = pending.then(() => processMessage(raw, ws, fixtures, journal, defaults, session, conversationItems, isBeta).catch((err) => {
99
230
  const msg = err instanceof Error ? err.message : "Internal error";
100
231
  logger.error(`WebSocket realtime error: ${msg}`);
101
232
  try {
102
- ws.send(buildErrorRealtimeEvent(msg, "server_error"));
233
+ buildErrorRealtimeEvent(ws, msg, isBeta, "server_error");
103
234
  } catch (sendErr) {
104
235
  defaults.logger.debug(`Failed to send error to client: ${sendErr instanceof Error ? sendErr.message : "unknown"}`);
105
236
  }
106
237
  }));
107
238
  });
108
239
  }
109
- async function processMessage(raw, ws, fixtures, journal, defaults, session, conversationItems) {
240
+ async function processMessage(raw, ws, fixtures, journal, defaults, session, conversationItems, isBeta) {
110
241
  let parsed;
111
242
  try {
112
243
  parsed = JSON.parse(raw);
113
244
  } catch (parseErr) {
114
- const detail = parseErr instanceof Error ? parseErr.message : "unknown";
115
- ws.send(buildErrorRealtimeEvent(`Malformed JSON: ${detail}`, "invalid_request_error", "invalid_json"));
245
+ buildErrorRealtimeEvent(ws, `Malformed JSON: ${parseErr instanceof Error ? parseErr.message : "unknown"}`, isBeta, "invalid_request_error", "invalid_json");
116
246
  return;
117
247
  }
118
248
  const msgType = parsed.type;
119
249
  if (msgType === "session.update") {
120
250
  if (parsed.session) {
121
- if (parsed.session.instructions !== void 0) session.instructions = parsed.session.instructions;
122
- if (parsed.session.tools !== void 0) session.tools = parsed.session.tools;
123
- if (parsed.session.modalities !== void 0) session.modalities = parsed.session.modalities;
124
- if (parsed.session.model !== void 0) session.model = parsed.session.model;
125
- if (parsed.session.temperature !== void 0) session.temperature = parsed.session.temperature;
251
+ const s = parsed.session;
252
+ const validTypes = new Set([
253
+ "conversation",
254
+ "transcription",
255
+ "translation"
256
+ ]);
257
+ if (s.type !== void 0) {
258
+ if (!validTypes.has(s.type)) {
259
+ sendEvent(ws, {
260
+ type: "error",
261
+ error: {
262
+ message: `Invalid session type: ${s.type}`,
263
+ type: "invalid_request_error",
264
+ code: "invalid_session_config"
265
+ }
266
+ }, isBeta);
267
+ return;
268
+ }
269
+ }
270
+ const prevModel = session.model;
271
+ const prevType = session.type;
272
+ if (s.instructions !== void 0) session.instructions = s.instructions;
273
+ if (s.tools !== void 0) session.tools = s.tools;
274
+ if (s.modalities !== void 0) session.modalities = s.modalities;
275
+ if (s.model !== void 0) session.model = s.model;
276
+ if (s.temperature !== void 0) session.temperature = s.temperature;
277
+ if (s.type !== void 0) session.type = s.type;
278
+ if (s.audio) {
279
+ const audio = s.audio;
280
+ if (audio.voice !== void 0) session.voice = audio.voice;
281
+ if (audio.input_audio_format !== void 0) session.input_audio_format = audio.input_audio_format;
282
+ if (audio.output_audio_format !== void 0) session.output_audio_format = audio.output_audio_format;
283
+ if (audio.input_audio_noise_reduction !== void 0) session.input_audio_noise_reduction = audio.input_audio_noise_reduction;
284
+ if (audio.input_audio_transcription !== void 0) session.input_audio_transcription = audio.input_audio_transcription;
285
+ }
286
+ if (s.voice !== void 0) session.voice = s.voice;
287
+ if (s.input_audio_format !== void 0) session.input_audio_format = s.input_audio_format;
288
+ if (s.output_audio_format !== void 0) session.output_audio_format = s.output_audio_format;
289
+ if (s.reasoning !== void 0) session.reasoning = s.reasoning;
290
+ const transcriptionModels = new Set([
291
+ "gpt-4o-transcribe",
292
+ "gpt-4o-mini-transcribe",
293
+ "gpt-realtime-whisper",
294
+ "whisper-1"
295
+ ]);
296
+ const translationModels = new Set([
297
+ "gpt-4o-transcribe",
298
+ "gpt-4o-mini-transcribe",
299
+ "gpt-realtime-translate"
300
+ ]);
301
+ if (session.type === "transcription" && !transcriptionModels.has(session.model)) {
302
+ session.model = prevModel;
303
+ session.type = prevType;
304
+ sendEvent(ws, {
305
+ type: "error",
306
+ error: {
307
+ message: `Model ${s.model ?? prevModel} does not support session type transcription`,
308
+ type: "invalid_request_error",
309
+ code: "invalid_session_config"
310
+ }
311
+ }, isBeta);
312
+ return;
313
+ }
314
+ if (session.type === "translation" && !translationModels.has(session.model)) {
315
+ session.model = prevModel;
316
+ session.type = prevType;
317
+ sendEvent(ws, {
318
+ type: "error",
319
+ error: {
320
+ message: `Model ${s.model ?? prevModel} does not support session type translation`,
321
+ type: "invalid_request_error",
322
+ code: "invalid_session_config"
323
+ }
324
+ }, isBeta);
325
+ return;
326
+ }
126
327
  }
127
- ws.send(evt("session.updated", { session: {
128
- ...session,
129
- object: "realtime.session",
130
- expires_at: Math.floor(Date.now() / 1e3) + 3600,
131
- max_response_output_tokens: "inf",
132
- input_audio_transcription: null,
133
- tool_choice: "auto"
134
- } }));
328
+ sendEvent(ws, {
329
+ type: "session.updated",
330
+ session: {
331
+ object: "realtime.session",
332
+ model: session.model,
333
+ expires_at: Math.floor(Date.now() / 1e3) + 3600,
334
+ modalities: session.modalities,
335
+ instructions: session.instructions,
336
+ tools: session.tools,
337
+ tool_choice: "auto",
338
+ temperature: session.temperature,
339
+ max_response_output_tokens: "inf",
340
+ audio: {
341
+ voice: session.voice,
342
+ input_audio_format: session.input_audio_format,
343
+ output_audio_format: session.output_audio_format,
344
+ input_audio_noise_reduction: session.input_audio_noise_reduction,
345
+ input_audio_transcription: session.input_audio_transcription
346
+ },
347
+ turn_detection: session.turn_detection,
348
+ type: session.type,
349
+ reasoning: session.reasoning
350
+ }
351
+ }, isBeta);
135
352
  return;
136
353
  }
137
354
  if (msgType === "conversation.item.create") {
138
355
  if (!parsed.item) {
139
- ws.send(buildErrorRealtimeEvent("Missing 'item' in conversation.item.create", "invalid_request_error"));
356
+ buildErrorRealtimeEvent(ws, "Missing 'item' in conversation.item.create", isBeta, "invalid_request_error");
140
357
  return;
141
358
  }
142
359
  const item = parsed.item;
143
360
  if (!item.id) item.id = realtimeId("item");
144
361
  const previousId = conversationItems.length > 0 ? conversationItems[conversationItems.length - 1].id ?? null : null;
145
362
  conversationItems.push(item);
146
- ws.send(evt("conversation.item.created", {
363
+ sendEvent(ws, {
364
+ type: "conversation.item.added",
147
365
  previous_item_id: previousId,
148
366
  item
149
- }));
367
+ }, isBeta);
150
368
  return;
151
369
  }
152
370
  if (msgType === "response.create") {
153
- await handleResponseCreate(ws, fixtures, journal, defaults, session, conversationItems, parsed.response);
371
+ await handleResponseCreate(ws, fixtures, journal, defaults, session, conversationItems, isBeta, parsed.response);
372
+ return;
373
+ }
374
+ if (msgType === "input_audio_buffer.append") return;
375
+ if (msgType === "input_audio_buffer.commit") {
376
+ sendEvent(ws, { type: "input_audio_buffer.committed" }, isBeta);
377
+ if (session.type === "transcription" || session.type === "translation") {
378
+ const audioItem = {
379
+ type: "message",
380
+ id: realtimeId("item"),
381
+ role: "user",
382
+ content: [{
383
+ type: "input_audio",
384
+ transcript: null
385
+ }]
386
+ };
387
+ conversationItems.push(audioItem);
388
+ sendEvent(ws, {
389
+ type: "conversation.item.added",
390
+ item: audioItem
391
+ }, isBeta);
392
+ }
393
+ return;
394
+ }
395
+ if (msgType === "input_audio_buffer.clear") {
396
+ sendEvent(ws, { type: "input_audio_buffer.cleared" }, isBeta);
397
+ return;
398
+ }
399
+ if (msgType === "response.cancel") {
400
+ sendEvent(ws, { type: "response.cancelled" }, isBeta);
154
401
  return;
155
402
  }
156
403
  }
157
- async function handleResponseCreate(ws, fixtures, journal, defaults, session, conversationItems, responseOverrides) {
404
+ async function handleResponseCreate(ws, fixtures, journal, defaults, session, conversationItems, isBeta, responseOverrides) {
158
405
  const messages = realtimeItemsToMessages(conversationItems, (responseOverrides?.instructions ?? session.instructions) || void 0, defaults.logger);
406
+ const endpointType = {
407
+ conversation: "realtime",
408
+ transcription: "realtime-transcription",
409
+ translation: "realtime-translation"
410
+ }[session.type] ?? "realtime";
159
411
  const completionReq = {
160
412
  model: session.model,
161
413
  messages,
162
- _endpointType: "chat"
414
+ _endpointType: endpointType
163
415
  };
164
416
  const testId = defaults.testId ?? DEFAULT_TEST_ID;
165
417
  const fixture = matchFixture(fixtures, completionReq, journal.getFixtureMatchCountsForTest(testId), defaults.requestTransform);
@@ -193,33 +445,39 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
193
445
  ...strictOverrideField(defaults.strict, defaults.upgradeHeaders)
194
446
  }
195
447
  });
196
- ws.send(evt("response.created", { response: {
197
- id: responseId,
198
- object: "realtime.response",
199
- status: "failed",
200
- status_details: null,
201
- output: [],
202
- usage: null
203
- } }));
204
- ws.send(evt("response.done", { response: {
205
- id: responseId,
206
- object: "realtime.response",
207
- status: "failed",
208
- output: [],
209
- status_details: {
210
- type: "error",
211
- error: {
212
- message: "No fixture matched",
213
- type: "invalid_request_error",
214
- code: "no_fixture_match"
448
+ sendEvent(ws, {
449
+ type: "response.created",
450
+ response: {
451
+ id: responseId,
452
+ object: "realtime.response",
453
+ status: "failed",
454
+ status_details: null,
455
+ output: [],
456
+ usage: null
457
+ }
458
+ }, isBeta);
459
+ sendEvent(ws, {
460
+ type: "response.done",
461
+ response: {
462
+ id: responseId,
463
+ object: "realtime.response",
464
+ status: "failed",
465
+ output: [],
466
+ status_details: {
467
+ type: "error",
468
+ error: {
469
+ message: "No fixture matched",
470
+ type: "invalid_request_error",
471
+ code: "no_fixture_match"
472
+ }
473
+ },
474
+ usage: {
475
+ total_tokens: 0,
476
+ input_tokens: 0,
477
+ output_tokens: 0
215
478
  }
216
- },
217
- usage: {
218
- total_tokens: 0,
219
- input_tokens: 0,
220
- output_tokens: 0
221
479
  }
222
- } }));
480
+ }, isBeta);
223
481
  return;
224
482
  }
225
483
  const response = await resolveResponse(fixture, completionReq);
@@ -237,33 +495,39 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
237
495
  fixture
238
496
  }
239
497
  });
240
- ws.send(evt("response.created", { response: {
241
- id: responseId,
242
- object: "realtime.response",
243
- status: "failed",
244
- status_details: null,
245
- output: [],
246
- usage: null
247
- } }));
248
- ws.send(evt("response.done", { response: {
249
- id: responseId,
250
- object: "realtime.response",
251
- status: "failed",
252
- output: [],
253
- status_details: {
254
- type: "error",
255
- error: {
256
- message: response.error.message,
257
- type: response.error.type,
258
- code: response.error.code
498
+ sendEvent(ws, {
499
+ type: "response.created",
500
+ response: {
501
+ id: responseId,
502
+ object: "realtime.response",
503
+ status: "failed",
504
+ status_details: null,
505
+ output: [],
506
+ usage: null
507
+ }
508
+ }, isBeta);
509
+ sendEvent(ws, {
510
+ type: "response.done",
511
+ response: {
512
+ id: responseId,
513
+ object: "realtime.response",
514
+ status: "failed",
515
+ output: [],
516
+ status_details: {
517
+ type: "error",
518
+ error: {
519
+ message: response.error.message,
520
+ type: response.error.type,
521
+ code: response.error.code
522
+ }
523
+ },
524
+ usage: {
525
+ total_tokens: 0,
526
+ input_tokens: 0,
527
+ output_tokens: 0
259
528
  }
260
- },
261
- usage: {
262
- total_tokens: 0,
263
- input_tokens: 0,
264
- output_tokens: 0
265
529
  }
266
- } }));
530
+ }, isBeta);
267
531
  return;
268
532
  }
269
533
  if (isContentWithToolCallsResponse(response)) {
@@ -277,14 +541,17 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
277
541
  fixture
278
542
  }
279
543
  });
280
- ws.send(evt("response.created", { response: {
281
- id: responseId,
282
- object: "realtime.response",
283
- status: "in_progress",
284
- status_details: null,
285
- output: [],
286
- usage: null
287
- } }));
544
+ sendEvent(ws, {
545
+ type: "response.created",
546
+ response: {
547
+ id: responseId,
548
+ object: "realtime.response",
549
+ status: "in_progress",
550
+ status_details: null,
551
+ output: [],
552
+ usage: null
553
+ }
554
+ }, isBeta);
288
555
  const interruption = createInterruptionSignal(fixture);
289
556
  let interrupted = false;
290
557
  const allOutputItems = [];
@@ -297,11 +564,13 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
297
564
  role: "assistant",
298
565
  status: "completed",
299
566
  content: [{
300
- type: "text",
567
+ type: "output_text",
301
568
  text: response.content
302
569
  }]
303
570
  };
304
- ws.send(evt("response.output_item.added", {
571
+ const textPhase = response.toolCalls && response.toolCalls.length > 0 ? "commentary" : "final_answer";
572
+ sendEvent(ws, {
573
+ type: "response.output_item.added",
305
574
  response_id: responseId,
306
575
  output_index: textOutputIndex,
307
576
  item: {
@@ -309,19 +578,21 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
309
578
  type: "message",
310
579
  role: "assistant",
311
580
  status: "in_progress",
312
- content: []
581
+ content: [],
582
+ phase: textPhase
313
583
  }
314
- }));
315
- ws.send(evt("response.content_part.added", {
584
+ }, isBeta);
585
+ sendEvent(ws, {
586
+ type: "response.content_part.added",
316
587
  response_id: responseId,
317
588
  item_id: textItemId,
318
589
  output_index: textOutputIndex,
319
590
  content_index: contentIndex,
320
591
  part: {
321
- type: "text",
592
+ type: "output_text",
322
593
  text: ""
323
594
  }
324
- }));
595
+ }, isBeta);
325
596
  const content = response.content;
326
597
  for (let i = 0; i < content.length; i += chunkSize) {
327
598
  if (ws.isClosed) break;
@@ -331,14 +602,14 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
331
602
  break;
332
603
  }
333
604
  if (ws.isClosed) break;
334
- const chunk = content.slice(i, i + chunkSize);
335
- ws.send(evt("response.text.delta", {
605
+ sendEvent(ws, {
606
+ type: "response.output_text.delta",
336
607
  response_id: responseId,
337
608
  item_id: textItemId,
338
609
  output_index: textOutputIndex,
339
610
  content_index: contentIndex,
340
- delta: chunk
341
- }));
611
+ delta: content.slice(i, i + chunkSize)
612
+ }, isBeta);
342
613
  interruption?.tick();
343
614
  if (interruption?.signal.aborted) {
344
615
  interrupted = true;
@@ -356,36 +627,53 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
356
627
  interruption?.cleanup();
357
628
  return;
358
629
  }
359
- ws.send(evt("response.text.done", {
630
+ sendEvent(ws, {
631
+ type: "response.output_text.done",
360
632
  response_id: responseId,
361
633
  item_id: textItemId,
362
634
  output_index: textOutputIndex,
363
635
  content_index: contentIndex,
364
636
  text: content
365
- }));
637
+ }, isBeta);
366
638
  if (ws.isClosed) {
367
639
  interruption?.cleanup();
368
640
  return;
369
641
  }
370
- ws.send(evt("response.content_part.done", {
642
+ sendEvent(ws, {
643
+ type: "response.content_part.done",
371
644
  response_id: responseId,
372
645
  item_id: textItemId,
373
646
  output_index: textOutputIndex,
374
647
  content_index: contentIndex,
375
648
  part: {
376
- type: "text",
649
+ type: "output_text",
377
650
  text: content
378
651
  }
379
- }));
652
+ }, isBeta);
380
653
  if (ws.isClosed) {
381
654
  interruption?.cleanup();
382
655
  return;
383
656
  }
384
- ws.send(evt("response.output_item.done", {
657
+ sendEvent(ws, {
658
+ type: "response.output_item.done",
385
659
  response_id: responseId,
386
660
  output_index: textOutputIndex,
387
- item: textOutputItem
388
- }));
661
+ item: {
662
+ ...textOutputItem,
663
+ phase: textPhase
664
+ }
665
+ }, isBeta);
666
+ sendEvent(ws, {
667
+ type: "conversation.item.done",
668
+ item: {
669
+ id: textItemId,
670
+ object: "realtime.item",
671
+ type: "message",
672
+ role: "assistant",
673
+ status: "completed",
674
+ content: textOutputItem.content
675
+ }
676
+ }, isBeta);
389
677
  if (ws.isClosed) {
390
678
  interruption?.cleanup();
391
679
  return;
@@ -404,7 +692,8 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
404
692
  name: tc.name,
405
693
  arguments: tc.arguments
406
694
  };
407
- ws.send(evt("response.output_item.added", {
695
+ sendEvent(ws, {
696
+ type: "response.output_item.added",
408
697
  response_id: responseId,
409
698
  output_index: outputIndex,
410
699
  item: {
@@ -413,9 +702,10 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
413
702
  status: "in_progress",
414
703
  call_id: callId,
415
704
  name: tc.name,
416
- arguments: ""
705
+ arguments: "",
706
+ phase: "final_answer"
417
707
  }
418
- }));
708
+ }, isBeta);
419
709
  const args = tc.arguments;
420
710
  for (let i = 0; i < args.length; i += chunkSize) {
421
711
  if (ws.isClosed) break;
@@ -425,14 +715,14 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
425
715
  break;
426
716
  }
427
717
  if (ws.isClosed) break;
428
- const chunk = args.slice(i, i + chunkSize);
429
- ws.send(evt("response.function_call_arguments.delta", {
718
+ sendEvent(ws, {
719
+ type: "response.function_call_arguments.delta",
430
720
  response_id: responseId,
431
721
  item_id: itemId,
432
722
  output_index: outputIndex,
433
723
  call_id: callId,
434
- delta: chunk
435
- }));
724
+ delta: args.slice(i, i + chunkSize)
725
+ }, isBeta);
436
726
  interruption?.tick();
437
727
  if (interruption?.signal.aborted) {
438
728
  interrupted = true;
@@ -441,19 +731,36 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
441
731
  }
442
732
  if (interrupted) break;
443
733
  if (ws.isClosed) break;
444
- ws.send(evt("response.function_call_arguments.done", {
734
+ sendEvent(ws, {
735
+ type: "response.function_call_arguments.done",
445
736
  response_id: responseId,
446
737
  item_id: itemId,
447
738
  output_index: outputIndex,
448
739
  call_id: callId,
449
740
  arguments: args
450
- }));
741
+ }, isBeta);
451
742
  if (ws.isClosed) break;
452
- ws.send(evt("response.output_item.done", {
743
+ sendEvent(ws, {
744
+ type: "response.output_item.done",
453
745
  response_id: responseId,
454
746
  output_index: outputIndex,
455
- item: toolOutputItem
456
- }));
747
+ item: {
748
+ ...toolOutputItem,
749
+ phase: "final_answer"
750
+ }
751
+ }, isBeta);
752
+ sendEvent(ws, {
753
+ type: "conversation.item.done",
754
+ item: {
755
+ id: itemId,
756
+ object: "realtime.item",
757
+ type: "function_call",
758
+ status: "completed",
759
+ call_id: callId,
760
+ name: tc.name,
761
+ arguments: args
762
+ }
763
+ }, isBeta);
457
764
  if (ws.isClosed) break;
458
765
  allOutputItems.push(toolOutputItem);
459
766
  }
@@ -466,17 +773,20 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
466
773
  }
467
774
  interruption?.cleanup();
468
775
  if (ws.isClosed) return;
469
- ws.send(evt("response.done", { response: {
470
- id: responseId,
471
- object: "realtime.response",
472
- status: "completed",
473
- output: allOutputItems,
474
- usage: {
475
- total_tokens: 0,
476
- input_tokens: 0,
477
- output_tokens: 0
776
+ sendEvent(ws, {
777
+ type: "response.done",
778
+ response: {
779
+ id: responseId,
780
+ object: "realtime.response",
781
+ status: "completed",
782
+ output: allOutputItems,
783
+ usage: {
784
+ total_tokens: 0,
785
+ input_tokens: 0,
786
+ output_tokens: 0
787
+ }
478
788
  }
479
- } }));
789
+ }, isBeta);
480
790
  conversationItems.push({
481
791
  type: "message",
482
792
  id: textItemId,
@@ -509,19 +819,23 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
509
819
  role: "assistant",
510
820
  status: "completed",
511
821
  content: [{
512
- type: "text",
822
+ type: "output_text",
513
823
  text: response.content
514
824
  }]
515
825
  };
516
- ws.send(evt("response.created", { response: {
517
- id: responseId,
518
- object: "realtime.response",
519
- status: "in_progress",
520
- status_details: null,
521
- output: [],
522
- usage: null
523
- } }));
524
- ws.send(evt("response.output_item.added", {
826
+ sendEvent(ws, {
827
+ type: "response.created",
828
+ response: {
829
+ id: responseId,
830
+ object: "realtime.response",
831
+ status: "in_progress",
832
+ status_details: null,
833
+ output: [],
834
+ usage: null
835
+ }
836
+ }, isBeta);
837
+ sendEvent(ws, {
838
+ type: "response.output_item.added",
525
839
  response_id: responseId,
526
840
  output_index: outputIndex,
527
841
  item: {
@@ -529,19 +843,21 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
529
843
  type: "message",
530
844
  role: "assistant",
531
845
  status: "in_progress",
532
- content: []
846
+ content: [],
847
+ phase: "final_answer"
533
848
  }
534
- }));
535
- ws.send(evt("response.content_part.added", {
849
+ }, isBeta);
850
+ sendEvent(ws, {
851
+ type: "response.content_part.added",
536
852
  response_id: responseId,
537
853
  item_id: itemId,
538
854
  output_index: outputIndex,
539
855
  content_index: contentIndex,
540
856
  part: {
541
- type: "text",
857
+ type: "output_text",
542
858
  text: ""
543
859
  }
544
- }));
860
+ }, isBeta);
545
861
  const content = response.content;
546
862
  const interruption = createInterruptionSignal(fixture);
547
863
  let interrupted = false;
@@ -553,14 +869,14 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
553
869
  break;
554
870
  }
555
871
  if (ws.isClosed) break;
556
- const chunk = content.slice(i, i + chunkSize);
557
- ws.send(evt("response.text.delta", {
872
+ sendEvent(ws, {
873
+ type: "response.output_text.delta",
558
874
  response_id: responseId,
559
875
  item_id: itemId,
560
876
  output_index: outputIndex,
561
877
  content_index: contentIndex,
562
- delta: chunk
563
- }));
878
+ delta: content.slice(i, i + chunkSize)
879
+ }, isBeta);
564
880
  interruption?.tick();
565
881
  if (interruption?.signal.aborted) {
566
882
  interrupted = true;
@@ -576,39 +892,59 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
576
892
  }
577
893
  interruption?.cleanup();
578
894
  if (ws.isClosed) return;
579
- ws.send(evt("response.text.done", {
895
+ sendEvent(ws, {
896
+ type: "response.output_text.done",
580
897
  response_id: responseId,
581
898
  item_id: itemId,
582
899
  output_index: outputIndex,
583
900
  content_index: contentIndex,
584
901
  text: content
585
- }));
586
- ws.send(evt("response.content_part.done", {
902
+ }, isBeta);
903
+ sendEvent(ws, {
904
+ type: "response.content_part.done",
587
905
  response_id: responseId,
588
906
  item_id: itemId,
589
907
  output_index: outputIndex,
590
908
  content_index: contentIndex,
591
909
  part: {
592
- type: "text",
910
+ type: "output_text",
593
911
  text: content
594
912
  }
595
- }));
596
- ws.send(evt("response.output_item.done", {
913
+ }, isBeta);
914
+ sendEvent(ws, {
915
+ type: "response.output_item.done",
597
916
  response_id: responseId,
598
917
  output_index: outputIndex,
599
- item: outputItem
600
- }));
601
- ws.send(evt("response.done", { response: {
602
- id: responseId,
603
- object: "realtime.response",
604
- status: "completed",
605
- output: [outputItem],
606
- usage: {
607
- total_tokens: 0,
608
- input_tokens: 0,
609
- output_tokens: 0
918
+ item: {
919
+ ...outputItem,
920
+ phase: "final_answer"
921
+ }
922
+ }, isBeta);
923
+ sendEvent(ws, {
924
+ type: "conversation.item.done",
925
+ item: {
926
+ id: itemId,
927
+ object: "realtime.item",
928
+ type: "message",
929
+ role: "assistant",
930
+ status: "completed",
931
+ content: outputItem.content
932
+ }
933
+ }, isBeta);
934
+ sendEvent(ws, {
935
+ type: "response.done",
936
+ response: {
937
+ id: responseId,
938
+ object: "realtime.response",
939
+ status: "completed",
940
+ output: [outputItem],
941
+ usage: {
942
+ total_tokens: 0,
943
+ input_tokens: 0,
944
+ output_tokens: 0
945
+ }
610
946
  }
611
- } }));
947
+ }, isBeta);
612
948
  conversationItems.push({
613
949
  type: "message",
614
950
  id: itemId,
@@ -631,14 +967,17 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
631
967
  fixture
632
968
  }
633
969
  });
634
- ws.send(evt("response.created", { response: {
635
- id: responseId,
636
- object: "realtime.response",
637
- status: "in_progress",
638
- status_details: null,
639
- output: [],
640
- usage: null
641
- } }));
970
+ sendEvent(ws, {
971
+ type: "response.created",
972
+ response: {
973
+ id: responseId,
974
+ object: "realtime.response",
975
+ status: "in_progress",
976
+ status_details: null,
977
+ output: [],
978
+ usage: null
979
+ }
980
+ }, isBeta);
642
981
  const outputItems = [];
643
982
  const interruption = createInterruptionSignal(fixture);
644
983
  let interrupted = false;
@@ -654,7 +993,8 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
654
993
  name: tc.name,
655
994
  arguments: tc.arguments
656
995
  };
657
- ws.send(evt("response.output_item.added", {
996
+ sendEvent(ws, {
997
+ type: "response.output_item.added",
658
998
  response_id: responseId,
659
999
  output_index: tcIdx,
660
1000
  item: {
@@ -663,9 +1003,10 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
663
1003
  status: "in_progress",
664
1004
  call_id: callId,
665
1005
  name: tc.name,
666
- arguments: ""
1006
+ arguments: "",
1007
+ phase: "final_answer"
667
1008
  }
668
- }));
1009
+ }, isBeta);
669
1010
  const args = tc.arguments;
670
1011
  for (let i = 0; i < args.length; i += chunkSize) {
671
1012
  if (ws.isClosed) break;
@@ -676,13 +1017,14 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
676
1017
  }
677
1018
  if (ws.isClosed) break;
678
1019
  const chunk = args.slice(i, i + chunkSize);
679
- ws.send(evt("response.function_call_arguments.delta", {
1020
+ sendEvent(ws, {
1021
+ type: "response.function_call_arguments.delta",
680
1022
  response_id: responseId,
681
1023
  item_id: itemId,
682
1024
  output_index: tcIdx,
683
1025
  call_id: callId,
684
1026
  delta: chunk
685
- }));
1027
+ }, isBeta);
686
1028
  interruption?.tick();
687
1029
  if (interruption?.signal.aborted) {
688
1030
  interrupted = true;
@@ -691,18 +1033,35 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
691
1033
  }
692
1034
  if (interrupted) break;
693
1035
  if (ws.isClosed) break;
694
- ws.send(evt("response.function_call_arguments.done", {
1036
+ sendEvent(ws, {
1037
+ type: "response.function_call_arguments.done",
695
1038
  response_id: responseId,
696
1039
  item_id: itemId,
697
1040
  output_index: tcIdx,
698
1041
  call_id: callId,
699
1042
  arguments: args
700
- }));
701
- ws.send(evt("response.output_item.done", {
1043
+ }, isBeta);
1044
+ sendEvent(ws, {
1045
+ type: "response.output_item.done",
702
1046
  response_id: responseId,
703
1047
  output_index: tcIdx,
704
- item: outputItem
705
- }));
1048
+ item: {
1049
+ ...outputItem,
1050
+ phase: "final_answer"
1051
+ }
1052
+ }, isBeta);
1053
+ sendEvent(ws, {
1054
+ type: "conversation.item.done",
1055
+ item: {
1056
+ id: itemId,
1057
+ object: "realtime.item",
1058
+ type: "function_call",
1059
+ status: "completed",
1060
+ call_id: callId,
1061
+ name: tc.name,
1062
+ arguments: args
1063
+ }
1064
+ }, isBeta);
706
1065
  outputItems.push(outputItem);
707
1066
  }
708
1067
  if (interrupted) {
@@ -714,17 +1073,20 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
714
1073
  }
715
1074
  interruption?.cleanup();
716
1075
  if (ws.isClosed) return;
717
- ws.send(evt("response.done", { response: {
718
- id: responseId,
719
- object: "realtime.response",
720
- status: "completed",
721
- output: outputItems,
722
- usage: {
723
- total_tokens: 0,
724
- input_tokens: 0,
725
- output_tokens: 0
1076
+ sendEvent(ws, {
1077
+ type: "response.done",
1078
+ response: {
1079
+ id: responseId,
1080
+ object: "realtime.response",
1081
+ status: "completed",
1082
+ output: outputItems,
1083
+ usage: {
1084
+ total_tokens: 0,
1085
+ input_tokens: 0,
1086
+ output_tokens: 0
1087
+ }
726
1088
  }
727
- } }));
1089
+ }, isBeta);
728
1090
  for (const item of outputItems) conversationItems.push(item);
729
1091
  return;
730
1092
  }
@@ -738,7 +1100,7 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
738
1100
  fixture
739
1101
  }
740
1102
  });
741
- ws.send(buildErrorRealtimeEvent("Fixture response did not match any known type", "server_error"));
1103
+ buildErrorRealtimeEvent(ws, "Fixture response did not match any known type", isBeta, "server_error");
742
1104
  }
743
1105
 
744
1106
  //#endregion