@ai-sdk/xai 4.0.0-beta.7 → 4.0.0-beta.75
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +660 -9
- package/README.md +2 -0
- package/dist/index.d.ts +213 -68
- package/dist/index.js +2074 -781
- package/dist/index.js.map +1 -1
- package/docs/01-xai.mdx +445 -54
- package/package.json +15 -15
- package/src/convert-to-xai-chat-messages.ts +48 -27
- package/src/convert-xai-chat-usage.ts +3 -3
- package/src/files/xai-files-api.ts +16 -0
- package/src/files/xai-files-options.ts +19 -0
- package/src/files/xai-files.ts +94 -0
- package/src/index.ts +9 -4
- package/src/map-xai-finish-reason.ts +2 -2
- package/src/realtime/index.ts +2 -0
- package/src/realtime/xai-realtime-event-mapper.ts +399 -0
- package/src/realtime/xai-realtime-model-options.ts +3 -0
- package/src/realtime/xai-realtime-model.ts +101 -0
- package/src/remove-additional-properties.ts +24 -0
- package/src/responses/convert-to-xai-responses-input.ts +100 -23
- package/src/responses/convert-xai-responses-usage.ts +3 -3
- package/src/responses/map-xai-responses-finish-reason.ts +3 -2
- package/src/responses/xai-responses-api.ts +31 -1
- package/src/responses/{xai-responses-options.ts → xai-responses-language-model-options.ts} +12 -7
- package/src/responses/xai-responses-language-model.ts +157 -60
- package/src/responses/xai-responses-prepare-tools.ts +10 -8
- package/src/tool/code-execution.ts +2 -2
- package/src/tool/file-search.ts +2 -2
- package/src/tool/mcp-server.ts +2 -2
- package/src/tool/view-image.ts +2 -2
- package/src/tool/view-x-video.ts +2 -2
- package/src/tool/web-search.ts +4 -2
- package/src/tool/x-search.ts +2 -2
- package/src/{xai-chat-options.ts → xai-chat-language-model-options.ts} +28 -13
- package/src/xai-chat-language-model.ts +65 -29
- package/src/xai-chat-prompt.ts +2 -1
- package/src/xai-error.ts +13 -3
- package/src/xai-image-model.ts +28 -11
- package/src/xai-prepare-tools.ts +9 -8
- package/src/xai-provider.ts +115 -19
- package/src/xai-speech-model-options.ts +55 -0
- package/src/xai-speech-model.ts +167 -0
- package/src/xai-transcription-model-options.ts +70 -0
- package/src/xai-transcription-model.ts +166 -0
- package/src/xai-video-model-options.ts +145 -0
- package/src/xai-video-model.ts +129 -22
- package/dist/index.d.mts +0 -377
- package/dist/index.mjs +0 -3070
- package/dist/index.mjs.map +0 -1
- package/src/xai-video-options.ts +0 -23
- /package/src/{xai-image-options.ts → xai-image-model-options.ts} +0 -0
|
@@ -0,0 +1,399 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
Experimental_RealtimeModelV4ClientEvent as RealtimeModelV4ClientEvent,
|
|
3
|
+
Experimental_RealtimeModelV4ServerEvent as RealtimeModelV4ServerEvent,
|
|
4
|
+
Experimental_RealtimeModelV4SessionConfig as RealtimeModelV4SessionConfig,
|
|
5
|
+
} from '@ai-sdk/provider';
|
|
6
|
+
|
|
7
|
+
type XaiRealtimeWireEvent = {
|
|
8
|
+
type: string;
|
|
9
|
+
session?: { id?: string };
|
|
10
|
+
item?: { id?: string } & Record<string, unknown>;
|
|
11
|
+
response?: { id?: string; status?: string };
|
|
12
|
+
error?: { message?: string; code?: string };
|
|
13
|
+
item_id: string;
|
|
14
|
+
previous_item_id?: string;
|
|
15
|
+
response_id: string;
|
|
16
|
+
transcript?: string;
|
|
17
|
+
delta: string;
|
|
18
|
+
text?: string;
|
|
19
|
+
call_id: string;
|
|
20
|
+
name: string;
|
|
21
|
+
arguments: string;
|
|
22
|
+
message?: string;
|
|
23
|
+
code?: string;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
export function parseXaiRealtimeServerEvent(
|
|
27
|
+
raw: unknown,
|
|
28
|
+
): RealtimeModelV4ServerEvent {
|
|
29
|
+
const event = raw as XaiRealtimeWireEvent;
|
|
30
|
+
const type = event.type;
|
|
31
|
+
|
|
32
|
+
switch (type) {
|
|
33
|
+
case 'session.created':
|
|
34
|
+
return {
|
|
35
|
+
type: 'session-created',
|
|
36
|
+
sessionId: event.session?.id,
|
|
37
|
+
raw,
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
case 'session.updated':
|
|
41
|
+
return { type: 'session-updated', raw };
|
|
42
|
+
|
|
43
|
+
case 'conversation.created':
|
|
44
|
+
return { type: 'custom', rawType: type, raw };
|
|
45
|
+
|
|
46
|
+
case 'input_audio_buffer.speech_started':
|
|
47
|
+
return {
|
|
48
|
+
type: 'speech-started',
|
|
49
|
+
itemId: event.item_id,
|
|
50
|
+
raw,
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
case 'input_audio_buffer.speech_stopped':
|
|
54
|
+
return {
|
|
55
|
+
type: 'speech-stopped',
|
|
56
|
+
itemId: event.item_id,
|
|
57
|
+
raw,
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
case 'input_audio_buffer.committed':
|
|
61
|
+
return {
|
|
62
|
+
type: 'audio-committed',
|
|
63
|
+
itemId: event.item_id,
|
|
64
|
+
previousItemId: event.previous_item_id,
|
|
65
|
+
raw,
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
case 'conversation.item.added':
|
|
69
|
+
return {
|
|
70
|
+
type: 'conversation-item-added',
|
|
71
|
+
itemId: event.item?.id ?? event.item_id,
|
|
72
|
+
item: event.item,
|
|
73
|
+
raw,
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
case 'conversation.item.input_audio_transcription.completed':
|
|
77
|
+
return {
|
|
78
|
+
type: 'input-transcription-completed',
|
|
79
|
+
itemId: event.item_id,
|
|
80
|
+
transcript: event.transcript ?? '',
|
|
81
|
+
raw,
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
case 'response.created':
|
|
85
|
+
return {
|
|
86
|
+
type: 'response-created',
|
|
87
|
+
responseId: event.response?.id ?? event.response_id,
|
|
88
|
+
raw,
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
case 'response.done':
|
|
92
|
+
return {
|
|
93
|
+
type: 'response-done',
|
|
94
|
+
responseId: event.response?.id ?? event.response_id,
|
|
95
|
+
status: event.response?.status ?? 'completed',
|
|
96
|
+
raw,
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
case 'response.output_item.added':
|
|
100
|
+
return {
|
|
101
|
+
type: 'output-item-added',
|
|
102
|
+
responseId: event.response_id,
|
|
103
|
+
itemId: event.item?.id ?? event.item_id,
|
|
104
|
+
raw,
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
case 'response.output_item.done':
|
|
108
|
+
return {
|
|
109
|
+
type: 'output-item-done',
|
|
110
|
+
responseId: event.response_id,
|
|
111
|
+
itemId: event.item?.id ?? event.item_id,
|
|
112
|
+
raw,
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
case 'response.content_part.added':
|
|
116
|
+
return {
|
|
117
|
+
type: 'content-part-added',
|
|
118
|
+
responseId: event.response_id,
|
|
119
|
+
itemId: event.item_id,
|
|
120
|
+
raw,
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
case 'response.content_part.done':
|
|
124
|
+
return {
|
|
125
|
+
type: 'content-part-done',
|
|
126
|
+
responseId: event.response_id,
|
|
127
|
+
itemId: event.item_id,
|
|
128
|
+
raw,
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
case 'response.output_audio.delta':
|
|
132
|
+
return {
|
|
133
|
+
type: 'audio-delta',
|
|
134
|
+
responseId: event.response_id,
|
|
135
|
+
itemId: event.item_id,
|
|
136
|
+
delta: event.delta,
|
|
137
|
+
raw,
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
case 'response.output_audio.done':
|
|
141
|
+
return {
|
|
142
|
+
type: 'audio-done',
|
|
143
|
+
responseId: event.response_id,
|
|
144
|
+
itemId: event.item_id,
|
|
145
|
+
raw,
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
case 'response.output_audio_transcript.delta':
|
|
149
|
+
return {
|
|
150
|
+
type: 'audio-transcript-delta',
|
|
151
|
+
responseId: event.response_id,
|
|
152
|
+
itemId: event.item_id,
|
|
153
|
+
delta: event.delta,
|
|
154
|
+
raw,
|
|
155
|
+
};
|
|
156
|
+
|
|
157
|
+
case 'response.output_audio_transcript.done':
|
|
158
|
+
return {
|
|
159
|
+
type: 'audio-transcript-done',
|
|
160
|
+
responseId: event.response_id,
|
|
161
|
+
itemId: event.item_id,
|
|
162
|
+
transcript: event.transcript,
|
|
163
|
+
raw,
|
|
164
|
+
};
|
|
165
|
+
|
|
166
|
+
case 'response.text.delta':
|
|
167
|
+
return {
|
|
168
|
+
type: 'text-delta',
|
|
169
|
+
responseId: event.response_id,
|
|
170
|
+
itemId: event.item_id,
|
|
171
|
+
delta: event.delta,
|
|
172
|
+
raw,
|
|
173
|
+
};
|
|
174
|
+
|
|
175
|
+
case 'response.text.done':
|
|
176
|
+
return {
|
|
177
|
+
type: 'text-done',
|
|
178
|
+
responseId: event.response_id,
|
|
179
|
+
itemId: event.item_id,
|
|
180
|
+
text: event.text,
|
|
181
|
+
raw,
|
|
182
|
+
};
|
|
183
|
+
|
|
184
|
+
case 'response.function_call_arguments.delta':
|
|
185
|
+
return {
|
|
186
|
+
type: 'function-call-arguments-delta',
|
|
187
|
+
responseId: event.response_id,
|
|
188
|
+
itemId: event.item_id,
|
|
189
|
+
callId: event.call_id,
|
|
190
|
+
delta: event.delta,
|
|
191
|
+
raw,
|
|
192
|
+
};
|
|
193
|
+
|
|
194
|
+
case 'response.function_call_arguments.done':
|
|
195
|
+
return {
|
|
196
|
+
type: 'function-call-arguments-done',
|
|
197
|
+
responseId: event.response_id,
|
|
198
|
+
itemId: event.item_id,
|
|
199
|
+
callId: event.call_id,
|
|
200
|
+
name: event.name,
|
|
201
|
+
arguments: event.arguments,
|
|
202
|
+
raw,
|
|
203
|
+
};
|
|
204
|
+
|
|
205
|
+
case 'mcp_list_tools.in_progress':
|
|
206
|
+
case 'mcp_list_tools.completed':
|
|
207
|
+
case 'mcp_list_tools.failed':
|
|
208
|
+
case 'response.mcp_call_arguments.delta':
|
|
209
|
+
case 'response.mcp_call_arguments.done':
|
|
210
|
+
case 'response.mcp_call.in_progress':
|
|
211
|
+
case 'response.mcp_call.completed':
|
|
212
|
+
case 'response.mcp_call.failed':
|
|
213
|
+
return { type: 'custom', rawType: type, raw };
|
|
214
|
+
|
|
215
|
+
case 'error':
|
|
216
|
+
return {
|
|
217
|
+
type: 'error',
|
|
218
|
+
message: event.error?.message ?? event.message ?? 'Unknown error',
|
|
219
|
+
code: event.error?.code ?? event.code,
|
|
220
|
+
raw,
|
|
221
|
+
};
|
|
222
|
+
|
|
223
|
+
default:
|
|
224
|
+
return { type: 'custom', rawType: type, raw };
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
export function serializeXaiRealtimeClientEvent(
|
|
229
|
+
event: RealtimeModelV4ClientEvent,
|
|
230
|
+
): unknown {
|
|
231
|
+
switch (event.type) {
|
|
232
|
+
case 'session-update':
|
|
233
|
+
return {
|
|
234
|
+
type: 'session.update',
|
|
235
|
+
session: buildXaiSessionConfig(event.config),
|
|
236
|
+
};
|
|
237
|
+
|
|
238
|
+
case 'input-audio-append':
|
|
239
|
+
return {
|
|
240
|
+
type: 'input_audio_buffer.append',
|
|
241
|
+
audio: event.audio,
|
|
242
|
+
};
|
|
243
|
+
|
|
244
|
+
case 'input-audio-commit':
|
|
245
|
+
return { type: 'input_audio_buffer.commit' };
|
|
246
|
+
|
|
247
|
+
case 'input-audio-clear':
|
|
248
|
+
return { type: 'input_audio_buffer.clear' };
|
|
249
|
+
|
|
250
|
+
case 'conversation-item-create': {
|
|
251
|
+
const item = event.item;
|
|
252
|
+
switch (item.type) {
|
|
253
|
+
case 'text-message':
|
|
254
|
+
return {
|
|
255
|
+
type: 'conversation.item.create',
|
|
256
|
+
item: {
|
|
257
|
+
type: 'message',
|
|
258
|
+
role: item.role,
|
|
259
|
+
content: [{ type: 'input_text', text: item.text }],
|
|
260
|
+
},
|
|
261
|
+
};
|
|
262
|
+
case 'audio-message':
|
|
263
|
+
return {
|
|
264
|
+
type: 'conversation.item.create',
|
|
265
|
+
item: {
|
|
266
|
+
type: 'message',
|
|
267
|
+
role: item.role,
|
|
268
|
+
content: [{ type: 'input_audio', audio: item.audio }],
|
|
269
|
+
},
|
|
270
|
+
};
|
|
271
|
+
case 'function-call-output':
|
|
272
|
+
return {
|
|
273
|
+
type: 'conversation.item.create',
|
|
274
|
+
item: {
|
|
275
|
+
type: 'function_call_output',
|
|
276
|
+
call_id: item.callId,
|
|
277
|
+
output: item.output,
|
|
278
|
+
},
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
break;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
case 'conversation-item-truncate':
|
|
285
|
+
// xAI does not support `conversation.item.truncate` over WebSocket (it is
|
|
286
|
+
// silently ignored by the server). Barge-in still works because the SDK
|
|
287
|
+
// stops local playback when `speech_started` fires, so dropping the event
|
|
288
|
+
// here just avoids sending a no-op.
|
|
289
|
+
return undefined;
|
|
290
|
+
|
|
291
|
+
case 'response-create':
|
|
292
|
+
return {
|
|
293
|
+
type: 'response.create',
|
|
294
|
+
...(event.options != null
|
|
295
|
+
? {
|
|
296
|
+
response: {
|
|
297
|
+
...(event.options.modalities != null
|
|
298
|
+
? { modalities: event.options.modalities }
|
|
299
|
+
: {}),
|
|
300
|
+
...(event.options.instructions != null
|
|
301
|
+
? { instructions: event.options.instructions }
|
|
302
|
+
: {}),
|
|
303
|
+
},
|
|
304
|
+
}
|
|
305
|
+
: {}),
|
|
306
|
+
};
|
|
307
|
+
|
|
308
|
+
case 'response-cancel':
|
|
309
|
+
return { type: 'response.cancel' };
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
export function buildXaiSessionConfig(
|
|
314
|
+
config: RealtimeModelV4SessionConfig,
|
|
315
|
+
): Record<string, unknown> {
|
|
316
|
+
const session: Record<string, unknown> = {};
|
|
317
|
+
|
|
318
|
+
if (config.instructions != null) {
|
|
319
|
+
session.instructions = config.instructions;
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
if (config.voice != null) {
|
|
323
|
+
session.voice = config.voice;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
const audio: Record<string, unknown> = {};
|
|
327
|
+
|
|
328
|
+
if (config.inputAudioFormat != null) {
|
|
329
|
+
audio.input = {
|
|
330
|
+
format: {
|
|
331
|
+
type: config.inputAudioFormat.type,
|
|
332
|
+
...(config.inputAudioFormat.rate != null
|
|
333
|
+
? { rate: config.inputAudioFormat.rate }
|
|
334
|
+
: {}),
|
|
335
|
+
},
|
|
336
|
+
};
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
if (config.outputAudioFormat != null) {
|
|
340
|
+
audio.output = {
|
|
341
|
+
format: {
|
|
342
|
+
type: config.outputAudioFormat.type,
|
|
343
|
+
...(config.outputAudioFormat.rate != null
|
|
344
|
+
? { rate: config.outputAudioFormat.rate }
|
|
345
|
+
: {}),
|
|
346
|
+
},
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
if (Object.keys(audio).length > 0) {
|
|
351
|
+
session.audio = audio;
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
if (config.turnDetection != null) {
|
|
355
|
+
if (config.turnDetection.type === 'disabled') {
|
|
356
|
+
session.turn_detection = null;
|
|
357
|
+
} else {
|
|
358
|
+
const td: Record<string, unknown> = {
|
|
359
|
+
type: 'server_vad',
|
|
360
|
+
};
|
|
361
|
+
if (config.turnDetection.threshold != null) {
|
|
362
|
+
td.threshold = config.turnDetection.threshold;
|
|
363
|
+
}
|
|
364
|
+
if (config.turnDetection.silenceDurationMs != null) {
|
|
365
|
+
td.silence_duration_ms = config.turnDetection.silenceDurationMs;
|
|
366
|
+
}
|
|
367
|
+
if (config.turnDetection.prefixPaddingMs != null) {
|
|
368
|
+
td.prefix_padding_ms = config.turnDetection.prefixPaddingMs;
|
|
369
|
+
}
|
|
370
|
+
session.turn_detection = td;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
if (config.tools != null && config.tools.length > 0) {
|
|
375
|
+
session.tools = config.tools.map(tool => ({
|
|
376
|
+
type: tool.type,
|
|
377
|
+
name: tool.name,
|
|
378
|
+
description: tool.description,
|
|
379
|
+
parameters: tool.parameters,
|
|
380
|
+
}));
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
if (config.providerOptions != null) {
|
|
384
|
+
const xaiOptions = config.providerOptions as Record<string, unknown>;
|
|
385
|
+
|
|
386
|
+
if (Array.isArray(xaiOptions.tools)) {
|
|
387
|
+
const existingTools = (session.tools as unknown[]) ?? [];
|
|
388
|
+
session.tools = [...existingTools, ...xaiOptions.tools];
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
for (const [key, value] of Object.entries(xaiOptions)) {
|
|
392
|
+
if (key !== 'tools') {
|
|
393
|
+
session[key] = value;
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
return session;
|
|
399
|
+
}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
Experimental_RealtimeModelV4 as RealtimeModelV4,
|
|
3
|
+
Experimental_RealtimeModelV4ClientEvent as RealtimeModelV4ClientEvent,
|
|
4
|
+
Experimental_RealtimeModelV4ClientSecretOptions as RealtimeModelV4ClientSecretOptions,
|
|
5
|
+
Experimental_RealtimeModelV4ClientSecretResult as RealtimeModelV4ClientSecretResult,
|
|
6
|
+
Experimental_RealtimeModelV4ServerEvent as RealtimeModelV4ServerEvent,
|
|
7
|
+
Experimental_RealtimeModelV4SessionConfig as RealtimeModelV4SessionConfig,
|
|
8
|
+
} from '@ai-sdk/provider';
|
|
9
|
+
import type { FetchFunction } from '@ai-sdk/provider-utils';
|
|
10
|
+
import {
|
|
11
|
+
buildXaiSessionConfig,
|
|
12
|
+
parseXaiRealtimeServerEvent,
|
|
13
|
+
serializeXaiRealtimeClientEvent,
|
|
14
|
+
} from './xai-realtime-event-mapper';
|
|
15
|
+
|
|
16
|
+
export type XaiRealtimeModelConfig = {
|
|
17
|
+
provider: string;
|
|
18
|
+
baseURL: string;
|
|
19
|
+
headers: () => Record<string, string | undefined>;
|
|
20
|
+
fetch?: FetchFunction;
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
export class XaiRealtimeModel implements RealtimeModelV4 {
|
|
24
|
+
readonly specificationVersion = 'v4' as const;
|
|
25
|
+
readonly provider: string;
|
|
26
|
+
readonly modelId: string;
|
|
27
|
+
|
|
28
|
+
private readonly config: XaiRealtimeModelConfig;
|
|
29
|
+
|
|
30
|
+
constructor(modelId: string, config: XaiRealtimeModelConfig) {
|
|
31
|
+
this.modelId = modelId;
|
|
32
|
+
this.provider = config.provider;
|
|
33
|
+
this.config = config;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
async doCreateClientSecret(
|
|
37
|
+
options: RealtimeModelV4ClientSecretOptions,
|
|
38
|
+
): Promise<RealtimeModelV4ClientSecretResult> {
|
|
39
|
+
const fetchFn = this.config.fetch ?? fetch;
|
|
40
|
+
const url = `${this.config.baseURL}/realtime/client_secrets`;
|
|
41
|
+
|
|
42
|
+
const body: Record<string, unknown> = {};
|
|
43
|
+
if (options.expiresAfterSeconds != null) {
|
|
44
|
+
body.expires_after = { seconds: options.expiresAfterSeconds };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const response = await fetchFn(url, {
|
|
48
|
+
method: 'POST',
|
|
49
|
+
headers: {
|
|
50
|
+
...this.config.headers(),
|
|
51
|
+
'Content-Type': 'application/json',
|
|
52
|
+
},
|
|
53
|
+
body: JSON.stringify(body),
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
if (!response.ok) {
|
|
57
|
+
const text = await response.text();
|
|
58
|
+
throw new Error(
|
|
59
|
+
`xAI realtime client secret request failed: ${response.status} ${text}`,
|
|
60
|
+
);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const data = (await response.json()) as {
|
|
64
|
+
value: string;
|
|
65
|
+
expires_at?: number;
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
return {
|
|
69
|
+
token: data.value,
|
|
70
|
+
// xAI selects the voice model from the `model` query parameter on the
|
|
71
|
+
// WebSocket URL. Without it the model choice is silently ignored and the
|
|
72
|
+
// server falls back to its default voice model.
|
|
73
|
+
url: `wss://${new URL(this.config.baseURL).host}/v1/realtime?model=${encodeURIComponent(this.modelId)}`,
|
|
74
|
+
expiresAt: data.expires_at,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
getWebSocketConfig(options: { token: string; url: string }): {
|
|
79
|
+
url: string;
|
|
80
|
+
protocols?: string[];
|
|
81
|
+
} {
|
|
82
|
+
return {
|
|
83
|
+
url: options.url,
|
|
84
|
+
protocols: [`xai-client-secret.${options.token}`],
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
parseServerEvent(raw: unknown): RealtimeModelV4ServerEvent {
|
|
89
|
+
return parseXaiRealtimeServerEvent(raw);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
serializeClientEvent(event: RealtimeModelV4ClientEvent): unknown {
|
|
93
|
+
return serializeXaiRealtimeClientEvent(event);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
buildSessionConfig(
|
|
97
|
+
config: RealtimeModelV4SessionConfig,
|
|
98
|
+
): Record<string, unknown> {
|
|
99
|
+
return buildXaiSessionConfig(config);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recursively removes `additionalProperties: false` entries from a JSON
|
|
3
|
+
* schema.
|
|
4
|
+
* Used to sanitize tool input schemas before sending them to the xAI API.
|
|
5
|
+
* https://docs.x.ai/developers/model-capabilities/text/structured-outputs#supported-types
|
|
6
|
+
*/
|
|
7
|
+
export function removeAdditionalPropertiesFalse(value: unknown): unknown {
|
|
8
|
+
if (Array.isArray(value)) {
|
|
9
|
+
return value.map(removeAdditionalPropertiesFalse);
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
if (value == null || typeof value !== 'object') {
|
|
13
|
+
return value;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
const result: Record<string, unknown> = {};
|
|
17
|
+
for (const [key, propertyValue] of Object.entries(value)) {
|
|
18
|
+
if (key === 'additionalProperties' && propertyValue === false) {
|
|
19
|
+
continue;
|
|
20
|
+
}
|
|
21
|
+
result[key] = removeAdditionalPropertiesFalse(propertyValue);
|
|
22
|
+
}
|
|
23
|
+
return result;
|
|
24
|
+
}
|
|
@@ -1,10 +1,15 @@
|
|
|
1
1
|
import {
|
|
2
|
-
SharedV3Warning,
|
|
3
|
-
LanguageModelV3Message,
|
|
4
2
|
UnsupportedFunctionalityError,
|
|
3
|
+
type SharedV4Warning,
|
|
4
|
+
type LanguageModelV4Message,
|
|
5
5
|
} from '@ai-sdk/provider';
|
|
6
|
-
import { convertToBase64 } from '@ai-sdk/provider-utils';
|
|
7
6
|
import {
|
|
7
|
+
convertToBase64,
|
|
8
|
+
getTopLevelMediaType,
|
|
9
|
+
resolveFullMediaType,
|
|
10
|
+
resolveProviderReference,
|
|
11
|
+
} from '@ai-sdk/provider-utils';
|
|
12
|
+
import type {
|
|
8
13
|
XaiResponsesInput,
|
|
9
14
|
XaiResponsesUserMessageContentPart,
|
|
10
15
|
} from './xai-responses-api';
|
|
@@ -12,14 +17,14 @@ import {
|
|
|
12
17
|
export async function convertToXaiResponsesInput({
|
|
13
18
|
prompt,
|
|
14
19
|
}: {
|
|
15
|
-
prompt:
|
|
20
|
+
prompt: LanguageModelV4Message[];
|
|
16
21
|
store?: boolean;
|
|
17
22
|
}): Promise<{
|
|
18
23
|
input: XaiResponsesInput;
|
|
19
|
-
inputWarnings:
|
|
24
|
+
inputWarnings: SharedV4Warning[];
|
|
20
25
|
}> {
|
|
21
26
|
const input: XaiResponsesInput = [];
|
|
22
|
-
const inputWarnings:
|
|
27
|
+
const inputWarnings: SharedV4Warning[] = [];
|
|
23
28
|
|
|
24
29
|
for (const message of prompt) {
|
|
25
30
|
switch (message.role) {
|
|
@@ -42,22 +47,51 @@ export async function convertToXaiResponsesInput({
|
|
|
42
47
|
}
|
|
43
48
|
|
|
44
49
|
case 'file': {
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
:
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
}
|
|
50
|
+
switch (block.data.type) {
|
|
51
|
+
case 'reference': {
|
|
52
|
+
contentParts.push({
|
|
53
|
+
type: 'input_file',
|
|
54
|
+
file_id: resolveProviderReference({
|
|
55
|
+
reference: block.data.reference,
|
|
56
|
+
provider: 'xai',
|
|
57
|
+
}),
|
|
58
|
+
});
|
|
59
|
+
break;
|
|
60
|
+
}
|
|
61
|
+
case 'text': {
|
|
62
|
+
throw new UnsupportedFunctionalityError({
|
|
63
|
+
functionality: 'text file parts',
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
case 'url':
|
|
67
|
+
case 'data': {
|
|
68
|
+
if (getTopLevelMediaType(block.mediaType) === 'image') {
|
|
69
|
+
const imageUrl =
|
|
70
|
+
block.data.type === 'url'
|
|
71
|
+
? block.data.url.toString()
|
|
72
|
+
: `data:${resolveFullMediaType({ part: block })};base64,${convertToBase64(block.data.data)}`;
|
|
73
|
+
|
|
74
|
+
contentParts.push({
|
|
75
|
+
type: 'input_image',
|
|
76
|
+
image_url: imageUrl,
|
|
77
|
+
});
|
|
78
|
+
} else if (block.data.type === 'url') {
|
|
79
|
+
// xAI's Responses API accepts non-image documents (PDF, text, CSV, etc.)
|
|
80
|
+
// via `{ type: 'input_file', file_url }`. See
|
|
81
|
+
// https://docs.x.ai/docs/guides/chat-with-files. Inline bytes for
|
|
82
|
+
// non-image files are not supported by xAI; callers must upload via
|
|
83
|
+
// the Files API and pass a provider reference (file_id) instead.
|
|
84
|
+
contentParts.push({
|
|
85
|
+
type: 'input_file',
|
|
86
|
+
file_url: block.data.url.toString(),
|
|
87
|
+
});
|
|
88
|
+
} else {
|
|
89
|
+
throw new UnsupportedFunctionalityError({
|
|
90
|
+
functionality: `file part media type ${block.mediaType} as inline data (xAI Responses requires a URL or a Files API reference for non-image files)`,
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
break;
|
|
94
|
+
}
|
|
61
95
|
}
|
|
62
96
|
break;
|
|
63
97
|
}
|
|
@@ -123,7 +157,50 @@ export async function convertToXaiResponsesInput({
|
|
|
123
157
|
break;
|
|
124
158
|
}
|
|
125
159
|
|
|
126
|
-
case 'reasoning':
|
|
160
|
+
case 'reasoning': {
|
|
161
|
+
const itemId =
|
|
162
|
+
typeof part.providerOptions?.xai?.itemId === 'string'
|
|
163
|
+
? part.providerOptions.xai.itemId
|
|
164
|
+
: undefined;
|
|
165
|
+
const encryptedContent =
|
|
166
|
+
typeof part.providerOptions?.xai?.reasoningEncryptedContent ===
|
|
167
|
+
'string'
|
|
168
|
+
? part.providerOptions.xai.reasoningEncryptedContent
|
|
169
|
+
: undefined;
|
|
170
|
+
|
|
171
|
+
if (itemId != null || encryptedContent != null) {
|
|
172
|
+
const summaryParts: Array<{
|
|
173
|
+
type: 'summary_text';
|
|
174
|
+
text: string;
|
|
175
|
+
}> = [];
|
|
176
|
+
if (part.text.length > 0) {
|
|
177
|
+
summaryParts.push({
|
|
178
|
+
type: 'summary_text',
|
|
179
|
+
text: part.text,
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
input.push({
|
|
184
|
+
type: 'reasoning',
|
|
185
|
+
id: itemId ?? '',
|
|
186
|
+
summary: summaryParts,
|
|
187
|
+
status: 'completed',
|
|
188
|
+
...(encryptedContent != null && {
|
|
189
|
+
encrypted_content: encryptedContent,
|
|
190
|
+
}),
|
|
191
|
+
});
|
|
192
|
+
} else {
|
|
193
|
+
inputWarnings.push({
|
|
194
|
+
type: 'other',
|
|
195
|
+
message:
|
|
196
|
+
'Reasoning parts without itemId or encrypted content cannot be sent back to xAI. Skipping.',
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
break;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
case 'reasoning-file':
|
|
203
|
+
case 'custom':
|
|
127
204
|
case 'file': {
|
|
128
205
|
inputWarnings.push({
|
|
129
206
|
type: 'other',
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { XaiResponsesUsage } from './xai-responses-api';
|
|
1
|
+
import type { LanguageModelV4Usage } from '@ai-sdk/provider';
|
|
2
|
+
import type { XaiResponsesUsage } from './xai-responses-api';
|
|
3
3
|
|
|
4
4
|
export function convertXaiResponsesUsage(
|
|
5
5
|
usage: XaiResponsesUsage,
|
|
6
|
-
):
|
|
6
|
+
): LanguageModelV4Usage {
|
|
7
7
|
const cacheReadTokens = usage.input_tokens_details?.cached_tokens ?? 0;
|
|
8
8
|
const reasoningTokens = usage.output_tokens_details?.reasoning_tokens ?? 0;
|
|
9
9
|
|