@copilotkit/aimock 1.22.1 → 1.23.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/.claude-plugin/marketplace.json +1 -1
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/CHANGELOG.md +28 -0
  4. package/README.md +10 -10
  5. package/dist/agui-types.d.ts.map +1 -1
  6. package/dist/cli.cjs +7 -1
  7. package/dist/cli.cjs.map +1 -1
  8. package/dist/cli.js +7 -1
  9. package/dist/cli.js.map +1 -1
  10. package/dist/fixture-loader.cjs +2 -1
  11. package/dist/fixture-loader.cjs.map +1 -1
  12. package/dist/fixture-loader.d.cts.map +1 -1
  13. package/dist/fixture-loader.d.ts.map +1 -1
  14. package/dist/fixture-loader.js +2 -1
  15. package/dist/fixture-loader.js.map +1 -1
  16. package/dist/gemini.cjs +1 -1
  17. package/dist/gemini.cjs.map +1 -1
  18. package/dist/gemini.js +1 -1
  19. package/dist/gemini.js.map +1 -1
  20. package/dist/helpers.cjs +1 -0
  21. package/dist/helpers.cjs.map +1 -1
  22. package/dist/helpers.d.cts.map +1 -1
  23. package/dist/helpers.d.ts.map +1 -1
  24. package/dist/helpers.js +1 -0
  25. package/dist/helpers.js.map +1 -1
  26. package/dist/model-utils.cjs +11 -0
  27. package/dist/model-utils.cjs.map +1 -0
  28. package/dist/model-utils.js +10 -0
  29. package/dist/model-utils.js.map +1 -0
  30. package/dist/recorder.cjs +19 -4
  31. package/dist/recorder.cjs.map +1 -1
  32. package/dist/recorder.d.cts.map +1 -1
  33. package/dist/recorder.d.ts.map +1 -1
  34. package/dist/recorder.js +19 -4
  35. package/dist/recorder.js.map +1 -1
  36. package/dist/router.cjs +7 -3
  37. package/dist/router.cjs.map +1 -1
  38. package/dist/router.js +7 -3
  39. package/dist/router.js.map +1 -1
  40. package/dist/server.cjs +1 -1
  41. package/dist/server.cjs.map +1 -1
  42. package/dist/server.js +1 -1
  43. package/dist/server.js.map +1 -1
  44. package/dist/types.d.cts +17 -2
  45. package/dist/types.d.cts.map +1 -1
  46. package/dist/types.d.ts +17 -2
  47. package/dist/types.d.ts.map +1 -1
  48. package/dist/vector-types.d.ts.map +1 -1
  49. package/dist/ws-realtime.cjs +577 -215
  50. package/dist/ws-realtime.cjs.map +1 -1
  51. package/dist/ws-realtime.d.cts.map +1 -1
  52. package/dist/ws-realtime.d.ts.map +1 -1
  53. package/dist/ws-realtime.js +577 -215
  54. package/dist/ws-realtime.js.map +1 -1
  55. package/package.json +1 -1
@@ -26,12 +26,34 @@ function realtimeItemsToMessages(items, instructions, logger) {
26
26
  content: instructions
27
27
  });
28
28
  for (const item of items) if (item.type === "message") {
29
- const text = item.content?.[0]?.text ?? "";
30
29
  const role = item.role === "assistant" ? "assistant" : item.role === "system" ? "system" : "user";
31
- messages.push({
32
- role,
33
- content: text
34
- });
30
+ if (item.content?.some((p) => p.type === "input_text" || p.type === "input_image" || p.type === "input_audio") && item.content) {
31
+ const mappedContent = item.content.map((part) => {
32
+ if (part.type === "input_text") return {
33
+ type: "text",
34
+ text: part.text ?? ""
35
+ };
36
+ if (part.type === "input_image") return {
37
+ type: "image_url",
38
+ image_url: { url: part.url ?? "" }
39
+ };
40
+ if (part.type === "input_audio") return {
41
+ type: "text",
42
+ text: "[audio input]"
43
+ };
44
+ return part;
45
+ });
46
+ messages.push({
47
+ role,
48
+ content: mappedContent
49
+ });
50
+ } else {
51
+ const text = item.content?.[0]?.text ?? "";
52
+ messages.push({
53
+ role,
54
+ content: text
55
+ });
56
+ }
35
57
  } else if (item.type === "function_call") {
36
58
  if (!item.name) logger?.warn("Realtime function_call item missing 'name'");
37
59
  messages.push({
@@ -56,23 +78,112 @@ function realtimeItemsToMessages(items, instructions, logger) {
56
78
  }
57
79
  return messages;
58
80
  }
59
- function evt(type, extra = {}) {
60
- return JSON.stringify({
61
- type,
62
- event_id: realtimeId("event"),
63
- ...extra
81
+ /** GA -> Beta event name mapping */
82
+ const GA_TO_BETA_EVENT = {
83
+ "response.output_text.delta": "response.text.delta",
84
+ "response.output_text.done": "response.text.done",
85
+ "response.output_audio.delta": "response.audio.delta",
86
+ "response.output_audio.done": "response.audio.done",
87
+ "response.output_audio_transcript.delta": "response.audio_transcript.delta",
88
+ "response.output_audio_transcript.done": "response.audio_transcript.done",
89
+ "conversation.item.added": "conversation.item.created"
90
+ };
91
+ /** GA -> Beta content type mapping */
92
+ const GA_TO_BETA_CONTENT_TYPE = {
93
+ output_text: "text",
94
+ output_audio: "audio"
95
+ };
96
+ /** Events suppressed in Beta mode (GA-only events) */
97
+ const BETA_SUPPRESSED_EVENTS = new Set(["conversation.item.done"]);
98
+ function translateGAToBeta(event) {
99
+ const type = event.type;
100
+ if (BETA_SUPPRESSED_EVENTS.has(type)) return null;
101
+ const translated = { ...event };
102
+ if (GA_TO_BETA_EVENT[type]) translated.type = GA_TO_BETA_EVENT[type];
103
+ if (translated.part && typeof translated.part === "object") {
104
+ const part = { ...translated.part };
105
+ if (typeof part.type === "string" && GA_TO_BETA_CONTENT_TYPE[part.type]) part.type = GA_TO_BETA_CONTENT_TYPE[part.type];
106
+ translated.part = part;
107
+ }
108
+ if (translated.content_part && typeof translated.content_part === "object") {
109
+ const cp = { ...translated.content_part };
110
+ if (typeof cp.type === "string" && GA_TO_BETA_CONTENT_TYPE[cp.type]) cp.type = GA_TO_BETA_CONTENT_TYPE[cp.type];
111
+ translated.content_part = cp;
112
+ }
113
+ if (Array.isArray(translated.content)) translated.content = translated.content.map((c) => {
114
+ if (typeof c.type === "string" && GA_TO_BETA_CONTENT_TYPE[c.type]) return {
115
+ ...c,
116
+ type: GA_TO_BETA_CONTENT_TYPE[c.type]
117
+ };
118
+ return c;
64
119
  });
120
+ if (translated.item && typeof translated.item === "object") {
121
+ const item = { ...translated.item };
122
+ delete item.phase;
123
+ if (Array.isArray(item.content)) item.content = item.content.map((c) => {
124
+ if (typeof c.type === "string" && GA_TO_BETA_CONTENT_TYPE[c.type]) return {
125
+ ...c,
126
+ type: GA_TO_BETA_CONTENT_TYPE[c.type]
127
+ };
128
+ return c;
129
+ });
130
+ translated.item = item;
131
+ }
132
+ if (translated.response && typeof translated.response === "object") {
133
+ const resp = { ...translated.response };
134
+ if (Array.isArray(resp.output)) resp.output = resp.output.map((outItem) => {
135
+ const o = { ...outItem };
136
+ if (Array.isArray(o.content)) o.content = o.content.map((c) => typeof c.type === "string" && GA_TO_BETA_CONTENT_TYPE[c.type] ? {
137
+ ...c,
138
+ type: GA_TO_BETA_CONTENT_TYPE[c.type]
139
+ } : c);
140
+ return o;
141
+ });
142
+ translated.response = resp;
143
+ }
144
+ if (type === "session.created" || type === "session.updated") {
145
+ if (translated.session && typeof translated.session === "object") {
146
+ const session = { ...translated.session };
147
+ if (session.audio && typeof session.audio === "object") {
148
+ const audio = session.audio;
149
+ session.voice = audio.voice;
150
+ session.input_audio_format = audio.input_audio_format;
151
+ session.output_audio_format = audio.output_audio_format;
152
+ session.input_audio_transcription = audio.input_audio_transcription;
153
+ delete session.audio;
154
+ }
155
+ delete session.type;
156
+ delete session.reasoning;
157
+ translated.session = session;
158
+ }
159
+ }
160
+ return translated;
161
+ }
162
+ function sendEvent(ws, event, isBeta) {
163
+ const out = {
164
+ ...event,
165
+ event_id: event.event_id ?? realtimeId("event")
166
+ };
167
+ if (isBeta) {
168
+ const translated = translateGAToBeta(out);
169
+ if (translated === null) return;
170
+ ws.send(JSON.stringify(translated));
171
+ } else ws.send(JSON.stringify(out));
65
172
  }
66
- function buildErrorRealtimeEvent(message, type = "invalid_request_error", code) {
67
- return evt("error", { error: {
68
- message,
69
- type,
70
- code
71
- } });
173
+ function buildErrorRealtimeEvent(ws, message, isBeta, type = "invalid_request_error", code) {
174
+ sendEvent(ws, {
175
+ type: "error",
176
+ error: {
177
+ message,
178
+ type,
179
+ code
180
+ }
181
+ }, isBeta);
72
182
  }
73
183
  function handleWebSocketRealtime(ws, fixtures, journal, defaults) {
74
184
  const { logger } = defaults;
75
185
  const sessionId = realtimeId("sess");
186
+ const isBeta = defaults.upgradeHeaders?.["openai-beta"] ? String(defaults.upgradeHeaders["openai-beta"]).includes("realtime=v1") : false;
76
187
  const session = {
77
188
  model: defaults.model,
78
189
  modalities: ["text"],
@@ -81,86 +192,227 @@ function handleWebSocketRealtime(ws, fixtures, journal, defaults) {
81
192
  voice: null,
82
193
  input_audio_format: null,
83
194
  output_audio_format: null,
195
+ input_audio_noise_reduction: null,
196
+ input_audio_transcription: null,
84
197
  turn_detection: null,
85
- temperature: .8
198
+ temperature: .8,
199
+ type: "conversation",
200
+ reasoning: null
86
201
  };
87
202
  const conversationItems = [];
88
- ws.send(evt("session.created", { session: {
89
- id: sessionId,
90
- object: "realtime.session",
91
- ...session,
92
- expires_at: Math.floor(Date.now() / 1e3) + 3600,
93
- max_response_output_tokens: "inf",
94
- input_audio_transcription: null,
95
- tool_choice: "auto"
96
- } }));
203
+ sendEvent(ws, {
204
+ type: "session.created",
205
+ session: {
206
+ id: sessionId,
207
+ object: "realtime.session",
208
+ model: session.model,
209
+ expires_at: Math.floor(Date.now() / 1e3) + 3600,
210
+ modalities: session.modalities,
211
+ instructions: session.instructions,
212
+ tools: session.tools,
213
+ tool_choice: "auto",
214
+ temperature: session.temperature,
215
+ max_response_output_tokens: "inf",
216
+ audio: {
217
+ voice: session.voice,
218
+ input_audio_format: session.input_audio_format,
219
+ output_audio_format: session.output_audio_format,
220
+ input_audio_noise_reduction: session.input_audio_noise_reduction,
221
+ input_audio_transcription: session.input_audio_transcription
222
+ },
223
+ turn_detection: session.turn_detection,
224
+ type: session.type,
225
+ reasoning: session.reasoning
226
+ }
227
+ }, isBeta);
97
228
  let pending = Promise.resolve();
98
229
  ws.on("message", (raw) => {
99
- pending = pending.then(() => processMessage(raw, ws, fixtures, journal, defaults, session, conversationItems).catch((err) => {
230
+ pending = pending.then(() => processMessage(raw, ws, fixtures, journal, defaults, session, conversationItems, isBeta).catch((err) => {
100
231
  const msg = err instanceof Error ? err.message : "Internal error";
101
232
  logger.error(`WebSocket realtime error: ${msg}`);
102
233
  try {
103
- ws.send(buildErrorRealtimeEvent(msg, "server_error"));
234
+ buildErrorRealtimeEvent(ws, msg, isBeta, "server_error");
104
235
  } catch (sendErr) {
105
236
  defaults.logger.debug(`Failed to send error to client: ${sendErr instanceof Error ? sendErr.message : "unknown"}`);
106
237
  }
107
238
  }));
108
239
  });
109
240
  }
110
- async function processMessage(raw, ws, fixtures, journal, defaults, session, conversationItems) {
241
+ async function processMessage(raw, ws, fixtures, journal, defaults, session, conversationItems, isBeta) {
111
242
  let parsed;
112
243
  try {
113
244
  parsed = JSON.parse(raw);
114
245
  } catch (parseErr) {
115
- const detail = parseErr instanceof Error ? parseErr.message : "unknown";
116
- ws.send(buildErrorRealtimeEvent(`Malformed JSON: ${detail}`, "invalid_request_error", "invalid_json"));
246
+ buildErrorRealtimeEvent(ws, `Malformed JSON: ${parseErr instanceof Error ? parseErr.message : "unknown"}`, isBeta, "invalid_request_error", "invalid_json");
117
247
  return;
118
248
  }
119
249
  const msgType = parsed.type;
120
250
  if (msgType === "session.update") {
121
251
  if (parsed.session) {
122
- if (parsed.session.instructions !== void 0) session.instructions = parsed.session.instructions;
123
- if (parsed.session.tools !== void 0) session.tools = parsed.session.tools;
124
- if (parsed.session.modalities !== void 0) session.modalities = parsed.session.modalities;
125
- if (parsed.session.model !== void 0) session.model = parsed.session.model;
126
- if (parsed.session.temperature !== void 0) session.temperature = parsed.session.temperature;
252
+ const s = parsed.session;
253
+ const validTypes = new Set([
254
+ "conversation",
255
+ "transcription",
256
+ "translation"
257
+ ]);
258
+ if (s.type !== void 0) {
259
+ if (!validTypes.has(s.type)) {
260
+ sendEvent(ws, {
261
+ type: "error",
262
+ error: {
263
+ message: `Invalid session type: ${s.type}`,
264
+ type: "invalid_request_error",
265
+ code: "invalid_session_config"
266
+ }
267
+ }, isBeta);
268
+ return;
269
+ }
270
+ }
271
+ const prevModel = session.model;
272
+ const prevType = session.type;
273
+ if (s.instructions !== void 0) session.instructions = s.instructions;
274
+ if (s.tools !== void 0) session.tools = s.tools;
275
+ if (s.modalities !== void 0) session.modalities = s.modalities;
276
+ if (s.model !== void 0) session.model = s.model;
277
+ if (s.temperature !== void 0) session.temperature = s.temperature;
278
+ if (s.type !== void 0) session.type = s.type;
279
+ if (s.audio) {
280
+ const audio = s.audio;
281
+ if (audio.voice !== void 0) session.voice = audio.voice;
282
+ if (audio.input_audio_format !== void 0) session.input_audio_format = audio.input_audio_format;
283
+ if (audio.output_audio_format !== void 0) session.output_audio_format = audio.output_audio_format;
284
+ if (audio.input_audio_noise_reduction !== void 0) session.input_audio_noise_reduction = audio.input_audio_noise_reduction;
285
+ if (audio.input_audio_transcription !== void 0) session.input_audio_transcription = audio.input_audio_transcription;
286
+ }
287
+ if (s.voice !== void 0) session.voice = s.voice;
288
+ if (s.input_audio_format !== void 0) session.input_audio_format = s.input_audio_format;
289
+ if (s.output_audio_format !== void 0) session.output_audio_format = s.output_audio_format;
290
+ if (s.reasoning !== void 0) session.reasoning = s.reasoning;
291
+ const transcriptionModels = new Set([
292
+ "gpt-4o-transcribe",
293
+ "gpt-4o-mini-transcribe",
294
+ "gpt-realtime-whisper",
295
+ "whisper-1"
296
+ ]);
297
+ const translationModels = new Set([
298
+ "gpt-4o-transcribe",
299
+ "gpt-4o-mini-transcribe",
300
+ "gpt-realtime-translate"
301
+ ]);
302
+ if (session.type === "transcription" && !transcriptionModels.has(session.model)) {
303
+ session.model = prevModel;
304
+ session.type = prevType;
305
+ sendEvent(ws, {
306
+ type: "error",
307
+ error: {
308
+ message: `Model ${s.model ?? prevModel} does not support session type transcription`,
309
+ type: "invalid_request_error",
310
+ code: "invalid_session_config"
311
+ }
312
+ }, isBeta);
313
+ return;
314
+ }
315
+ if (session.type === "translation" && !translationModels.has(session.model)) {
316
+ session.model = prevModel;
317
+ session.type = prevType;
318
+ sendEvent(ws, {
319
+ type: "error",
320
+ error: {
321
+ message: `Model ${s.model ?? prevModel} does not support session type translation`,
322
+ type: "invalid_request_error",
323
+ code: "invalid_session_config"
324
+ }
325
+ }, isBeta);
326
+ return;
327
+ }
127
328
  }
128
- ws.send(evt("session.updated", { session: {
129
- ...session,
130
- object: "realtime.session",
131
- expires_at: Math.floor(Date.now() / 1e3) + 3600,
132
- max_response_output_tokens: "inf",
133
- input_audio_transcription: null,
134
- tool_choice: "auto"
135
- } }));
329
+ sendEvent(ws, {
330
+ type: "session.updated",
331
+ session: {
332
+ object: "realtime.session",
333
+ model: session.model,
334
+ expires_at: Math.floor(Date.now() / 1e3) + 3600,
335
+ modalities: session.modalities,
336
+ instructions: session.instructions,
337
+ tools: session.tools,
338
+ tool_choice: "auto",
339
+ temperature: session.temperature,
340
+ max_response_output_tokens: "inf",
341
+ audio: {
342
+ voice: session.voice,
343
+ input_audio_format: session.input_audio_format,
344
+ output_audio_format: session.output_audio_format,
345
+ input_audio_noise_reduction: session.input_audio_noise_reduction,
346
+ input_audio_transcription: session.input_audio_transcription
347
+ },
348
+ turn_detection: session.turn_detection,
349
+ type: session.type,
350
+ reasoning: session.reasoning
351
+ }
352
+ }, isBeta);
136
353
  return;
137
354
  }
138
355
  if (msgType === "conversation.item.create") {
139
356
  if (!parsed.item) {
140
- ws.send(buildErrorRealtimeEvent("Missing 'item' in conversation.item.create", "invalid_request_error"));
357
+ buildErrorRealtimeEvent(ws, "Missing 'item' in conversation.item.create", isBeta, "invalid_request_error");
141
358
  return;
142
359
  }
143
360
  const item = parsed.item;
144
361
  if (!item.id) item.id = realtimeId("item");
145
362
  const previousId = conversationItems.length > 0 ? conversationItems[conversationItems.length - 1].id ?? null : null;
146
363
  conversationItems.push(item);
147
- ws.send(evt("conversation.item.created", {
364
+ sendEvent(ws, {
365
+ type: "conversation.item.added",
148
366
  previous_item_id: previousId,
149
367
  item
150
- }));
368
+ }, isBeta);
151
369
  return;
152
370
  }
153
371
  if (msgType === "response.create") {
154
- await handleResponseCreate(ws, fixtures, journal, defaults, session, conversationItems, parsed.response);
372
+ await handleResponseCreate(ws, fixtures, journal, defaults, session, conversationItems, isBeta, parsed.response);
373
+ return;
374
+ }
375
+ if (msgType === "input_audio_buffer.append") return;
376
+ if (msgType === "input_audio_buffer.commit") {
377
+ sendEvent(ws, { type: "input_audio_buffer.committed" }, isBeta);
378
+ if (session.type === "transcription" || session.type === "translation") {
379
+ const audioItem = {
380
+ type: "message",
381
+ id: realtimeId("item"),
382
+ role: "user",
383
+ content: [{
384
+ type: "input_audio",
385
+ transcript: null
386
+ }]
387
+ };
388
+ conversationItems.push(audioItem);
389
+ sendEvent(ws, {
390
+ type: "conversation.item.added",
391
+ item: audioItem
392
+ }, isBeta);
393
+ }
394
+ return;
395
+ }
396
+ if (msgType === "input_audio_buffer.clear") {
397
+ sendEvent(ws, { type: "input_audio_buffer.cleared" }, isBeta);
398
+ return;
399
+ }
400
+ if (msgType === "response.cancel") {
401
+ sendEvent(ws, { type: "response.cancelled" }, isBeta);
155
402
  return;
156
403
  }
157
404
  }
158
- async function handleResponseCreate(ws, fixtures, journal, defaults, session, conversationItems, responseOverrides) {
405
+ async function handleResponseCreate(ws, fixtures, journal, defaults, session, conversationItems, isBeta, responseOverrides) {
159
406
  const messages = realtimeItemsToMessages(conversationItems, (responseOverrides?.instructions ?? session.instructions) || void 0, defaults.logger);
407
+ const endpointType = {
408
+ conversation: "realtime",
409
+ transcription: "realtime-transcription",
410
+ translation: "realtime-translation"
411
+ }[session.type] ?? "realtime";
160
412
  const completionReq = {
161
413
  model: session.model,
162
414
  messages,
163
- _endpointType: "chat"
415
+ _endpointType: endpointType
164
416
  };
165
417
  const testId = defaults.testId ?? require_constants.DEFAULT_TEST_ID;
166
418
  const fixture = require_router.matchFixture(fixtures, completionReq, journal.getFixtureMatchCountsForTest(testId), defaults.requestTransform);
@@ -194,33 +446,39 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
194
446
  ...require_helpers.strictOverrideField(defaults.strict, defaults.upgradeHeaders)
195
447
  }
196
448
  });
197
- ws.send(evt("response.created", { response: {
198
- id: responseId,
199
- object: "realtime.response",
200
- status: "failed",
201
- status_details: null,
202
- output: [],
203
- usage: null
204
- } }));
205
- ws.send(evt("response.done", { response: {
206
- id: responseId,
207
- object: "realtime.response",
208
- status: "failed",
209
- output: [],
210
- status_details: {
211
- type: "error",
212
- error: {
213
- message: "No fixture matched",
214
- type: "invalid_request_error",
215
- code: "no_fixture_match"
449
+ sendEvent(ws, {
450
+ type: "response.created",
451
+ response: {
452
+ id: responseId,
453
+ object: "realtime.response",
454
+ status: "failed",
455
+ status_details: null,
456
+ output: [],
457
+ usage: null
458
+ }
459
+ }, isBeta);
460
+ sendEvent(ws, {
461
+ type: "response.done",
462
+ response: {
463
+ id: responseId,
464
+ object: "realtime.response",
465
+ status: "failed",
466
+ output: [],
467
+ status_details: {
468
+ type: "error",
469
+ error: {
470
+ message: "No fixture matched",
471
+ type: "invalid_request_error",
472
+ code: "no_fixture_match"
473
+ }
474
+ },
475
+ usage: {
476
+ total_tokens: 0,
477
+ input_tokens: 0,
478
+ output_tokens: 0
216
479
  }
217
- },
218
- usage: {
219
- total_tokens: 0,
220
- input_tokens: 0,
221
- output_tokens: 0
222
480
  }
223
- } }));
481
+ }, isBeta);
224
482
  return;
225
483
  }
226
484
  const response = await require_helpers.resolveResponse(fixture, completionReq);
@@ -238,33 +496,39 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
238
496
  fixture
239
497
  }
240
498
  });
241
- ws.send(evt("response.created", { response: {
242
- id: responseId,
243
- object: "realtime.response",
244
- status: "failed",
245
- status_details: null,
246
- output: [],
247
- usage: null
248
- } }));
249
- ws.send(evt("response.done", { response: {
250
- id: responseId,
251
- object: "realtime.response",
252
- status: "failed",
253
- output: [],
254
- status_details: {
255
- type: "error",
256
- error: {
257
- message: response.error.message,
258
- type: response.error.type,
259
- code: response.error.code
499
+ sendEvent(ws, {
500
+ type: "response.created",
501
+ response: {
502
+ id: responseId,
503
+ object: "realtime.response",
504
+ status: "failed",
505
+ status_details: null,
506
+ output: [],
507
+ usage: null
508
+ }
509
+ }, isBeta);
510
+ sendEvent(ws, {
511
+ type: "response.done",
512
+ response: {
513
+ id: responseId,
514
+ object: "realtime.response",
515
+ status: "failed",
516
+ output: [],
517
+ status_details: {
518
+ type: "error",
519
+ error: {
520
+ message: response.error.message,
521
+ type: response.error.type,
522
+ code: response.error.code
523
+ }
524
+ },
525
+ usage: {
526
+ total_tokens: 0,
527
+ input_tokens: 0,
528
+ output_tokens: 0
260
529
  }
261
- },
262
- usage: {
263
- total_tokens: 0,
264
- input_tokens: 0,
265
- output_tokens: 0
266
530
  }
267
- } }));
531
+ }, isBeta);
268
532
  return;
269
533
  }
270
534
  if (require_helpers.isContentWithToolCallsResponse(response)) {
@@ -278,14 +542,17 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
278
542
  fixture
279
543
  }
280
544
  });
281
- ws.send(evt("response.created", { response: {
282
- id: responseId,
283
- object: "realtime.response",
284
- status: "in_progress",
285
- status_details: null,
286
- output: [],
287
- usage: null
288
- } }));
545
+ sendEvent(ws, {
546
+ type: "response.created",
547
+ response: {
548
+ id: responseId,
549
+ object: "realtime.response",
550
+ status: "in_progress",
551
+ status_details: null,
552
+ output: [],
553
+ usage: null
554
+ }
555
+ }, isBeta);
289
556
  const interruption = require_interruption.createInterruptionSignal(fixture);
290
557
  let interrupted = false;
291
558
  const allOutputItems = [];
@@ -298,11 +565,13 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
298
565
  role: "assistant",
299
566
  status: "completed",
300
567
  content: [{
301
- type: "text",
568
+ type: "output_text",
302
569
  text: response.content
303
570
  }]
304
571
  };
305
- ws.send(evt("response.output_item.added", {
572
+ const textPhase = response.toolCalls && response.toolCalls.length > 0 ? "commentary" : "final_answer";
573
+ sendEvent(ws, {
574
+ type: "response.output_item.added",
306
575
  response_id: responseId,
307
576
  output_index: textOutputIndex,
308
577
  item: {
@@ -310,19 +579,21 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
310
579
  type: "message",
311
580
  role: "assistant",
312
581
  status: "in_progress",
313
- content: []
582
+ content: [],
583
+ phase: textPhase
314
584
  }
315
- }));
316
- ws.send(evt("response.content_part.added", {
585
+ }, isBeta);
586
+ sendEvent(ws, {
587
+ type: "response.content_part.added",
317
588
  response_id: responseId,
318
589
  item_id: textItemId,
319
590
  output_index: textOutputIndex,
320
591
  content_index: contentIndex,
321
592
  part: {
322
- type: "text",
593
+ type: "output_text",
323
594
  text: ""
324
595
  }
325
- }));
596
+ }, isBeta);
326
597
  const content = response.content;
327
598
  for (let i = 0; i < content.length; i += chunkSize) {
328
599
  if (ws.isClosed) break;
@@ -332,14 +603,14 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
332
603
  break;
333
604
  }
334
605
  if (ws.isClosed) break;
335
- const chunk = content.slice(i, i + chunkSize);
336
- ws.send(evt("response.text.delta", {
606
+ sendEvent(ws, {
607
+ type: "response.output_text.delta",
337
608
  response_id: responseId,
338
609
  item_id: textItemId,
339
610
  output_index: textOutputIndex,
340
611
  content_index: contentIndex,
341
- delta: chunk
342
- }));
612
+ delta: content.slice(i, i + chunkSize)
613
+ }, isBeta);
343
614
  interruption?.tick();
344
615
  if (interruption?.signal.aborted) {
345
616
  interrupted = true;
@@ -357,36 +628,53 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
357
628
  interruption?.cleanup();
358
629
  return;
359
630
  }
360
- ws.send(evt("response.text.done", {
631
+ sendEvent(ws, {
632
+ type: "response.output_text.done",
361
633
  response_id: responseId,
362
634
  item_id: textItemId,
363
635
  output_index: textOutputIndex,
364
636
  content_index: contentIndex,
365
637
  text: content
366
- }));
638
+ }, isBeta);
367
639
  if (ws.isClosed) {
368
640
  interruption?.cleanup();
369
641
  return;
370
642
  }
371
- ws.send(evt("response.content_part.done", {
643
+ sendEvent(ws, {
644
+ type: "response.content_part.done",
372
645
  response_id: responseId,
373
646
  item_id: textItemId,
374
647
  output_index: textOutputIndex,
375
648
  content_index: contentIndex,
376
649
  part: {
377
- type: "text",
650
+ type: "output_text",
378
651
  text: content
379
652
  }
380
- }));
653
+ }, isBeta);
381
654
  if (ws.isClosed) {
382
655
  interruption?.cleanup();
383
656
  return;
384
657
  }
385
- ws.send(evt("response.output_item.done", {
658
+ sendEvent(ws, {
659
+ type: "response.output_item.done",
386
660
  response_id: responseId,
387
661
  output_index: textOutputIndex,
388
- item: textOutputItem
389
- }));
662
+ item: {
663
+ ...textOutputItem,
664
+ phase: textPhase
665
+ }
666
+ }, isBeta);
667
+ sendEvent(ws, {
668
+ type: "conversation.item.done",
669
+ item: {
670
+ id: textItemId,
671
+ object: "realtime.item",
672
+ type: "message",
673
+ role: "assistant",
674
+ status: "completed",
675
+ content: textOutputItem.content
676
+ }
677
+ }, isBeta);
390
678
  if (ws.isClosed) {
391
679
  interruption?.cleanup();
392
680
  return;
@@ -405,7 +693,8 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
405
693
  name: tc.name,
406
694
  arguments: tc.arguments
407
695
  };
408
- ws.send(evt("response.output_item.added", {
696
+ sendEvent(ws, {
697
+ type: "response.output_item.added",
409
698
  response_id: responseId,
410
699
  output_index: outputIndex,
411
700
  item: {
@@ -414,9 +703,10 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
414
703
  status: "in_progress",
415
704
  call_id: callId,
416
705
  name: tc.name,
417
- arguments: ""
706
+ arguments: "",
707
+ phase: "final_answer"
418
708
  }
419
- }));
709
+ }, isBeta);
420
710
  const args = tc.arguments;
421
711
  for (let i = 0; i < args.length; i += chunkSize) {
422
712
  if (ws.isClosed) break;
@@ -426,14 +716,14 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
426
716
  break;
427
717
  }
428
718
  if (ws.isClosed) break;
429
- const chunk = args.slice(i, i + chunkSize);
430
- ws.send(evt("response.function_call_arguments.delta", {
719
+ sendEvent(ws, {
720
+ type: "response.function_call_arguments.delta",
431
721
  response_id: responseId,
432
722
  item_id: itemId,
433
723
  output_index: outputIndex,
434
724
  call_id: callId,
435
- delta: chunk
436
- }));
725
+ delta: args.slice(i, i + chunkSize)
726
+ }, isBeta);
437
727
  interruption?.tick();
438
728
  if (interruption?.signal.aborted) {
439
729
  interrupted = true;
@@ -442,19 +732,36 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
442
732
  }
443
733
  if (interrupted) break;
444
734
  if (ws.isClosed) break;
445
- ws.send(evt("response.function_call_arguments.done", {
735
+ sendEvent(ws, {
736
+ type: "response.function_call_arguments.done",
446
737
  response_id: responseId,
447
738
  item_id: itemId,
448
739
  output_index: outputIndex,
449
740
  call_id: callId,
450
741
  arguments: args
451
- }));
742
+ }, isBeta);
452
743
  if (ws.isClosed) break;
453
- ws.send(evt("response.output_item.done", {
744
+ sendEvent(ws, {
745
+ type: "response.output_item.done",
454
746
  response_id: responseId,
455
747
  output_index: outputIndex,
456
- item: toolOutputItem
457
- }));
748
+ item: {
749
+ ...toolOutputItem,
750
+ phase: "final_answer"
751
+ }
752
+ }, isBeta);
753
+ sendEvent(ws, {
754
+ type: "conversation.item.done",
755
+ item: {
756
+ id: itemId,
757
+ object: "realtime.item",
758
+ type: "function_call",
759
+ status: "completed",
760
+ call_id: callId,
761
+ name: tc.name,
762
+ arguments: args
763
+ }
764
+ }, isBeta);
458
765
  if (ws.isClosed) break;
459
766
  allOutputItems.push(toolOutputItem);
460
767
  }
@@ -467,17 +774,20 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
467
774
  }
468
775
  interruption?.cleanup();
469
776
  if (ws.isClosed) return;
470
- ws.send(evt("response.done", { response: {
471
- id: responseId,
472
- object: "realtime.response",
473
- status: "completed",
474
- output: allOutputItems,
475
- usage: {
476
- total_tokens: 0,
477
- input_tokens: 0,
478
- output_tokens: 0
777
+ sendEvent(ws, {
778
+ type: "response.done",
779
+ response: {
780
+ id: responseId,
781
+ object: "realtime.response",
782
+ status: "completed",
783
+ output: allOutputItems,
784
+ usage: {
785
+ total_tokens: 0,
786
+ input_tokens: 0,
787
+ output_tokens: 0
788
+ }
479
789
  }
480
- } }));
790
+ }, isBeta);
481
791
  conversationItems.push({
482
792
  type: "message",
483
793
  id: textItemId,
@@ -510,19 +820,23 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
510
820
  role: "assistant",
511
821
  status: "completed",
512
822
  content: [{
513
- type: "text",
823
+ type: "output_text",
514
824
  text: response.content
515
825
  }]
516
826
  };
517
- ws.send(evt("response.created", { response: {
518
- id: responseId,
519
- object: "realtime.response",
520
- status: "in_progress",
521
- status_details: null,
522
- output: [],
523
- usage: null
524
- } }));
525
- ws.send(evt("response.output_item.added", {
827
+ sendEvent(ws, {
828
+ type: "response.created",
829
+ response: {
830
+ id: responseId,
831
+ object: "realtime.response",
832
+ status: "in_progress",
833
+ status_details: null,
834
+ output: [],
835
+ usage: null
836
+ }
837
+ }, isBeta);
838
+ sendEvent(ws, {
839
+ type: "response.output_item.added",
526
840
  response_id: responseId,
527
841
  output_index: outputIndex,
528
842
  item: {
@@ -530,19 +844,21 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
530
844
  type: "message",
531
845
  role: "assistant",
532
846
  status: "in_progress",
533
- content: []
847
+ content: [],
848
+ phase: "final_answer"
534
849
  }
535
- }));
536
- ws.send(evt("response.content_part.added", {
850
+ }, isBeta);
851
+ sendEvent(ws, {
852
+ type: "response.content_part.added",
537
853
  response_id: responseId,
538
854
  item_id: itemId,
539
855
  output_index: outputIndex,
540
856
  content_index: contentIndex,
541
857
  part: {
542
- type: "text",
858
+ type: "output_text",
543
859
  text: ""
544
860
  }
545
- }));
861
+ }, isBeta);
546
862
  const content = response.content;
547
863
  const interruption = require_interruption.createInterruptionSignal(fixture);
548
864
  let interrupted = false;
@@ -554,14 +870,14 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
554
870
  break;
555
871
  }
556
872
  if (ws.isClosed) break;
557
- const chunk = content.slice(i, i + chunkSize);
558
- ws.send(evt("response.text.delta", {
873
+ sendEvent(ws, {
874
+ type: "response.output_text.delta",
559
875
  response_id: responseId,
560
876
  item_id: itemId,
561
877
  output_index: outputIndex,
562
878
  content_index: contentIndex,
563
- delta: chunk
564
- }));
879
+ delta: content.slice(i, i + chunkSize)
880
+ }, isBeta);
565
881
  interruption?.tick();
566
882
  if (interruption?.signal.aborted) {
567
883
  interrupted = true;
@@ -577,39 +893,59 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
577
893
  }
578
894
  interruption?.cleanup();
579
895
  if (ws.isClosed) return;
580
- ws.send(evt("response.text.done", {
896
+ sendEvent(ws, {
897
+ type: "response.output_text.done",
581
898
  response_id: responseId,
582
899
  item_id: itemId,
583
900
  output_index: outputIndex,
584
901
  content_index: contentIndex,
585
902
  text: content
586
- }));
587
- ws.send(evt("response.content_part.done", {
903
+ }, isBeta);
904
+ sendEvent(ws, {
905
+ type: "response.content_part.done",
588
906
  response_id: responseId,
589
907
  item_id: itemId,
590
908
  output_index: outputIndex,
591
909
  content_index: contentIndex,
592
910
  part: {
593
- type: "text",
911
+ type: "output_text",
594
912
  text: content
595
913
  }
596
- }));
597
- ws.send(evt("response.output_item.done", {
914
+ }, isBeta);
915
+ sendEvent(ws, {
916
+ type: "response.output_item.done",
598
917
  response_id: responseId,
599
918
  output_index: outputIndex,
600
- item: outputItem
601
- }));
602
- ws.send(evt("response.done", { response: {
603
- id: responseId,
604
- object: "realtime.response",
605
- status: "completed",
606
- output: [outputItem],
607
- usage: {
608
- total_tokens: 0,
609
- input_tokens: 0,
610
- output_tokens: 0
919
+ item: {
920
+ ...outputItem,
921
+ phase: "final_answer"
922
+ }
923
+ }, isBeta);
924
+ sendEvent(ws, {
925
+ type: "conversation.item.done",
926
+ item: {
927
+ id: itemId,
928
+ object: "realtime.item",
929
+ type: "message",
930
+ role: "assistant",
931
+ status: "completed",
932
+ content: outputItem.content
933
+ }
934
+ }, isBeta);
935
+ sendEvent(ws, {
936
+ type: "response.done",
937
+ response: {
938
+ id: responseId,
939
+ object: "realtime.response",
940
+ status: "completed",
941
+ output: [outputItem],
942
+ usage: {
943
+ total_tokens: 0,
944
+ input_tokens: 0,
945
+ output_tokens: 0
946
+ }
611
947
  }
612
- } }));
948
+ }, isBeta);
613
949
  conversationItems.push({
614
950
  type: "message",
615
951
  id: itemId,
@@ -632,14 +968,17 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
632
968
  fixture
633
969
  }
634
970
  });
635
- ws.send(evt("response.created", { response: {
636
- id: responseId,
637
- object: "realtime.response",
638
- status: "in_progress",
639
- status_details: null,
640
- output: [],
641
- usage: null
642
- } }));
971
+ sendEvent(ws, {
972
+ type: "response.created",
973
+ response: {
974
+ id: responseId,
975
+ object: "realtime.response",
976
+ status: "in_progress",
977
+ status_details: null,
978
+ output: [],
979
+ usage: null
980
+ }
981
+ }, isBeta);
643
982
  const outputItems = [];
644
983
  const interruption = require_interruption.createInterruptionSignal(fixture);
645
984
  let interrupted = false;
@@ -655,7 +994,8 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
655
994
  name: tc.name,
656
995
  arguments: tc.arguments
657
996
  };
658
- ws.send(evt("response.output_item.added", {
997
+ sendEvent(ws, {
998
+ type: "response.output_item.added",
659
999
  response_id: responseId,
660
1000
  output_index: tcIdx,
661
1001
  item: {
@@ -664,9 +1004,10 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
664
1004
  status: "in_progress",
665
1005
  call_id: callId,
666
1006
  name: tc.name,
667
- arguments: ""
1007
+ arguments: "",
1008
+ phase: "final_answer"
668
1009
  }
669
- }));
1010
+ }, isBeta);
670
1011
  const args = tc.arguments;
671
1012
  for (let i = 0; i < args.length; i += chunkSize) {
672
1013
  if (ws.isClosed) break;
@@ -677,13 +1018,14 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
677
1018
  }
678
1019
  if (ws.isClosed) break;
679
1020
  const chunk = args.slice(i, i + chunkSize);
680
- ws.send(evt("response.function_call_arguments.delta", {
1021
+ sendEvent(ws, {
1022
+ type: "response.function_call_arguments.delta",
681
1023
  response_id: responseId,
682
1024
  item_id: itemId,
683
1025
  output_index: tcIdx,
684
1026
  call_id: callId,
685
1027
  delta: chunk
686
- }));
1028
+ }, isBeta);
687
1029
  interruption?.tick();
688
1030
  if (interruption?.signal.aborted) {
689
1031
  interrupted = true;
@@ -692,18 +1034,35 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
692
1034
  }
693
1035
  if (interrupted) break;
694
1036
  if (ws.isClosed) break;
695
- ws.send(evt("response.function_call_arguments.done", {
1037
+ sendEvent(ws, {
1038
+ type: "response.function_call_arguments.done",
696
1039
  response_id: responseId,
697
1040
  item_id: itemId,
698
1041
  output_index: tcIdx,
699
1042
  call_id: callId,
700
1043
  arguments: args
701
- }));
702
- ws.send(evt("response.output_item.done", {
1044
+ }, isBeta);
1045
+ sendEvent(ws, {
1046
+ type: "response.output_item.done",
703
1047
  response_id: responseId,
704
1048
  output_index: tcIdx,
705
- item: outputItem
706
- }));
1049
+ item: {
1050
+ ...outputItem,
1051
+ phase: "final_answer"
1052
+ }
1053
+ }, isBeta);
1054
+ sendEvent(ws, {
1055
+ type: "conversation.item.done",
1056
+ item: {
1057
+ id: itemId,
1058
+ object: "realtime.item",
1059
+ type: "function_call",
1060
+ status: "completed",
1061
+ call_id: callId,
1062
+ name: tc.name,
1063
+ arguments: args
1064
+ }
1065
+ }, isBeta);
707
1066
  outputItems.push(outputItem);
708
1067
  }
709
1068
  if (interrupted) {
@@ -715,17 +1074,20 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
715
1074
  }
716
1075
  interruption?.cleanup();
717
1076
  if (ws.isClosed) return;
718
- ws.send(evt("response.done", { response: {
719
- id: responseId,
720
- object: "realtime.response",
721
- status: "completed",
722
- output: outputItems,
723
- usage: {
724
- total_tokens: 0,
725
- input_tokens: 0,
726
- output_tokens: 0
1077
+ sendEvent(ws, {
1078
+ type: "response.done",
1079
+ response: {
1080
+ id: responseId,
1081
+ object: "realtime.response",
1082
+ status: "completed",
1083
+ output: outputItems,
1084
+ usage: {
1085
+ total_tokens: 0,
1086
+ input_tokens: 0,
1087
+ output_tokens: 0
1088
+ }
727
1089
  }
728
- } }));
1090
+ }, isBeta);
729
1091
  for (const item of outputItems) conversationItems.push(item);
730
1092
  return;
731
1093
  }
@@ -739,7 +1101,7 @@ async function handleResponseCreate(ws, fixtures, journal, defaults, session, co
739
1101
  fixture
740
1102
  }
741
1103
  });
742
- ws.send(buildErrorRealtimeEvent("Fixture response did not match any known type", "server_error"));
1104
+ buildErrorRealtimeEvent(ws, "Fixture response did not match any known type", isBeta, "server_error");
743
1105
  }
744
1106
 
745
1107
  //#endregion