@ai-sdk/xai 4.0.0-canary.70 → 4.0.0-canary.71

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,26 @@
1
1
  # @ai-sdk/xai
2
2
 
3
+ ## 4.0.0-canary.71
4
+
5
+ ### Patch Changes
6
+
7
+ - ce769dd: feat(provider): add experimental Realtime API support for voice conversations
8
+
9
+ Adds first-class support for realtime (speech-to-speech) APIs:
10
+
11
+ - `Experimental_RealtimeModelV4` spec in `@ai-sdk/provider` with normalized event types and factory
12
+ - OpenAI, Google, and xAI realtime provider implementations
13
+ - `openai.experimental_realtime()` / `google.experimental_realtime()` / `xai.experimental_realtime()` work in both server and browser
14
+ - `.getToken()` static method on each provider for server-side ephemeral token creation
15
+ - `experimental_getRealtimeToolDefinitions` helper for provider session tool definitions
16
+ - `experimental_useRealtime` hook in `@ai-sdk/react` returning `UIMessage[]` (aligned with `useChat`), with `onToolCall` and `addToolOutput` for client-driven tool execution
17
+ - `inputAudioTranscription` session config for showing transcribed user audio messages when supported by the provider
18
+
19
+ - Updated dependencies [ce769dd]
20
+ - @ai-sdk/provider@4.0.0-canary.18
21
+ - @ai-sdk/openai-compatible@3.0.0-canary.54
22
+ - @ai-sdk/provider-utils@5.0.0-canary.46
23
+
3
24
  ## 4.0.0-canary.70
4
25
 
5
26
  ### Patch Changes
package/dist/index.d.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  import { z } from 'zod/v4';
2
2
  import * as _ai_sdk_provider_utils from '@ai-sdk/provider-utils';
3
3
  import { InferSchema, FetchFunction } from '@ai-sdk/provider-utils';
4
- import { ProviderV4, LanguageModelV4, ImageModelV4, Experimental_VideoModelV4, FilesV4 } from '@ai-sdk/provider';
4
+ import { ProviderV4, LanguageModelV4, ImageModelV4, Experimental_VideoModelV4, Experimental_RealtimeFactoryV4, FilesV4, Experimental_RealtimeModelV4, Experimental_RealtimeModelV4ClientSecretOptions, Experimental_RealtimeModelV4ClientSecretResult, Experimental_RealtimeModelV4ServerEvent, Experimental_RealtimeModelV4ClientEvent, Experimental_RealtimeModelV4SessionConfig } from '@ai-sdk/provider';
5
5
 
6
6
  type XaiChatModelId = 'grok-4.20-non-reasoning' | 'grok-4.20-reasoning' | 'grok-4.3' | 'grok-latest' | (string & {});
7
7
  declare const xaiLanguageModelChatOptions: z.ZodObject<{
@@ -419,6 +419,7 @@ interface XaiProvider extends ProviderV4 {
419
419
  * Creates an Xai video model for video generation.
420
420
  */
421
421
  videoModel(modelId: XaiVideoModelId): Experimental_VideoModelV4;
422
+ experimental_realtime: Experimental_RealtimeFactoryV4;
422
423
  /**
423
424
  * Returns the xAI files interface for uploading files.
424
425
  */
@@ -454,6 +455,31 @@ interface XaiProviderSettings {
454
455
  declare function createXai(options?: XaiProviderSettings): XaiProvider;
455
456
  declare const xai: XaiProvider;
456
457
 
458
+ type XaiRealtimeModelConfig = {
459
+ provider: string;
460
+ baseURL: string;
461
+ headers: () => Record<string, string | undefined>;
462
+ fetch?: FetchFunction;
463
+ };
464
+ declare class XaiRealtimeModel implements Experimental_RealtimeModelV4 {
465
+ readonly specificationVersion: "v4";
466
+ readonly provider: string;
467
+ readonly modelId: string;
468
+ private readonly config;
469
+ constructor(modelId: string, config: XaiRealtimeModelConfig);
470
+ doCreateClientSecret(options: Experimental_RealtimeModelV4ClientSecretOptions): Promise<Experimental_RealtimeModelV4ClientSecretResult>;
471
+ getWebSocketConfig(options: {
472
+ token: string;
473
+ url: string;
474
+ }): {
475
+ url: string;
476
+ protocols?: string[];
477
+ };
478
+ parseServerEvent(raw: unknown): Experimental_RealtimeModelV4ServerEvent;
479
+ serializeClientEvent(event: Experimental_RealtimeModelV4ClientEvent): unknown;
480
+ buildSessionConfig(config: Experimental_RealtimeModelV4SessionConfig): Record<string, unknown>;
481
+ }
482
+
457
483
  declare const VERSION: string;
458
484
 
459
- export { VERSION, type XaiErrorData, type XaiFilesOptions, type XaiImageModelOptions, type XaiImageModelOptions as XaiImageProviderOptions, type XaiLanguageModelChatOptions, type XaiLanguageModelResponsesOptions, type XaiProvider, type XaiLanguageModelChatOptions as XaiProviderOptions, type XaiProviderSettings, type XaiLanguageModelResponsesOptions as XaiResponsesProviderOptions, type XaiVideoModelId, type XaiVideoModelOptions, type XaiVideoModelOptions as XaiVideoProviderOptions, codeExecution, createXai, mcpServer, viewImage, viewXVideo, webSearch, xSearch, xai, xaiTools };
485
+ export { XaiRealtimeModel as Experimental_XaiRealtimeModel, type XaiRealtimeModelConfig as Experimental_XaiRealtimeModelConfig, VERSION, type XaiErrorData, type XaiFilesOptions, type XaiImageModelOptions, type XaiImageModelOptions as XaiImageProviderOptions, type XaiLanguageModelChatOptions, type XaiLanguageModelResponsesOptions, type XaiProvider, type XaiLanguageModelChatOptions as XaiProviderOptions, type XaiProviderSettings, type XaiLanguageModelResponsesOptions as XaiResponsesProviderOptions, type XaiVideoModelId, type XaiVideoModelOptions, type XaiVideoModelOptions as XaiVideoProviderOptions, codeExecution, createXai, mcpServer, viewImage, viewXVideo, webSearch, xSearch, xai, xaiTools };
package/dist/index.js CHANGED
@@ -2969,6 +2969,375 @@ var XaiResponsesLanguageModel = class _XaiResponsesLanguageModel {
2969
2969
  }
2970
2970
  };
2971
2971
 
2972
+ // src/realtime/xai-realtime-event-mapper.ts
2973
+ function parseXaiRealtimeServerEvent(raw) {
2974
+ var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o, _p, _q, _r, _s;
2975
+ const event = raw;
2976
+ const type = event.type;
2977
+ switch (type) {
2978
+ case "session.created":
2979
+ return {
2980
+ type: "session-created",
2981
+ sessionId: (_a = event.session) == null ? void 0 : _a.id,
2982
+ raw
2983
+ };
2984
+ case "session.updated":
2985
+ return { type: "session-updated", raw };
2986
+ case "conversation.created":
2987
+ return { type: "custom", rawType: type, raw };
2988
+ case "input_audio_buffer.speech_started":
2989
+ return {
2990
+ type: "speech-started",
2991
+ itemId: event.item_id,
2992
+ raw
2993
+ };
2994
+ case "input_audio_buffer.speech_stopped":
2995
+ return {
2996
+ type: "speech-stopped",
2997
+ itemId: event.item_id,
2998
+ raw
2999
+ };
3000
+ case "input_audio_buffer.committed":
3001
+ return {
3002
+ type: "audio-committed",
3003
+ itemId: event.item_id,
3004
+ previousItemId: event.previous_item_id,
3005
+ raw
3006
+ };
3007
+ case "conversation.item.added":
3008
+ return {
3009
+ type: "conversation-item-added",
3010
+ itemId: (_c = (_b = event.item) == null ? void 0 : _b.id) != null ? _c : event.item_id,
3011
+ item: event.item,
3012
+ raw
3013
+ };
3014
+ case "conversation.item.input_audio_transcription.completed":
3015
+ return {
3016
+ type: "input-transcription-completed",
3017
+ itemId: event.item_id,
3018
+ transcript: (_d = event.transcript) != null ? _d : "",
3019
+ raw
3020
+ };
3021
+ case "response.created":
3022
+ return {
3023
+ type: "response-created",
3024
+ responseId: (_f = (_e = event.response) == null ? void 0 : _e.id) != null ? _f : event.response_id,
3025
+ raw
3026
+ };
3027
+ case "response.done":
3028
+ return {
3029
+ type: "response-done",
3030
+ responseId: (_h = (_g = event.response) == null ? void 0 : _g.id) != null ? _h : event.response_id,
3031
+ status: (_j = (_i = event.response) == null ? void 0 : _i.status) != null ? _j : "completed",
3032
+ raw
3033
+ };
3034
+ case "response.output_item.added":
3035
+ return {
3036
+ type: "output-item-added",
3037
+ responseId: event.response_id,
3038
+ itemId: (_l = (_k = event.item) == null ? void 0 : _k.id) != null ? _l : event.item_id,
3039
+ raw
3040
+ };
3041
+ case "response.output_item.done":
3042
+ return {
3043
+ type: "output-item-done",
3044
+ responseId: event.response_id,
3045
+ itemId: (_n = (_m = event.item) == null ? void 0 : _m.id) != null ? _n : event.item_id,
3046
+ raw
3047
+ };
3048
+ case "response.content_part.added":
3049
+ return {
3050
+ type: "content-part-added",
3051
+ responseId: event.response_id,
3052
+ itemId: event.item_id,
3053
+ raw
3054
+ };
3055
+ case "response.content_part.done":
3056
+ return {
3057
+ type: "content-part-done",
3058
+ responseId: event.response_id,
3059
+ itemId: event.item_id,
3060
+ raw
3061
+ };
3062
+ case "response.output_audio.delta":
3063
+ return {
3064
+ type: "audio-delta",
3065
+ responseId: event.response_id,
3066
+ itemId: event.item_id,
3067
+ delta: event.delta,
3068
+ raw
3069
+ };
3070
+ case "response.output_audio.done":
3071
+ return {
3072
+ type: "audio-done",
3073
+ responseId: event.response_id,
3074
+ itemId: event.item_id,
3075
+ raw
3076
+ };
3077
+ case "response.output_audio_transcript.delta":
3078
+ return {
3079
+ type: "audio-transcript-delta",
3080
+ responseId: event.response_id,
3081
+ itemId: event.item_id,
3082
+ delta: event.delta,
3083
+ raw
3084
+ };
3085
+ case "response.output_audio_transcript.done":
3086
+ return {
3087
+ type: "audio-transcript-done",
3088
+ responseId: event.response_id,
3089
+ itemId: event.item_id,
3090
+ transcript: event.transcript,
3091
+ raw
3092
+ };
3093
+ case "response.text.delta":
3094
+ return {
3095
+ type: "text-delta",
3096
+ responseId: event.response_id,
3097
+ itemId: event.item_id,
3098
+ delta: event.delta,
3099
+ raw
3100
+ };
3101
+ case "response.text.done":
3102
+ return {
3103
+ type: "text-done",
3104
+ responseId: event.response_id,
3105
+ itemId: event.item_id,
3106
+ text: event.text,
3107
+ raw
3108
+ };
3109
+ case "response.function_call_arguments.delta":
3110
+ return {
3111
+ type: "function-call-arguments-delta",
3112
+ responseId: event.response_id,
3113
+ itemId: event.item_id,
3114
+ callId: event.call_id,
3115
+ delta: event.delta,
3116
+ raw
3117
+ };
3118
+ case "response.function_call_arguments.done":
3119
+ return {
3120
+ type: "function-call-arguments-done",
3121
+ responseId: event.response_id,
3122
+ itemId: event.item_id,
3123
+ callId: event.call_id,
3124
+ name: event.name,
3125
+ arguments: event.arguments,
3126
+ raw
3127
+ };
3128
+ case "mcp_list_tools.in_progress":
3129
+ case "mcp_list_tools.completed":
3130
+ case "mcp_list_tools.failed":
3131
+ case "response.mcp_call_arguments.delta":
3132
+ case "response.mcp_call_arguments.done":
3133
+ case "response.mcp_call.in_progress":
3134
+ case "response.mcp_call.completed":
3135
+ case "response.mcp_call.failed":
3136
+ return { type: "custom", rawType: type, raw };
3137
+ case "error":
3138
+ return {
3139
+ type: "error",
3140
+ message: (_q = (_p = (_o = event.error) == null ? void 0 : _o.message) != null ? _p : event.message) != null ? _q : "Unknown error",
3141
+ code: (_s = (_r = event.error) == null ? void 0 : _r.code) != null ? _s : event.code,
3142
+ raw
3143
+ };
3144
+ default:
3145
+ return { type: "custom", rawType: type, raw };
3146
+ }
3147
+ }
3148
+ function serializeXaiRealtimeClientEvent(event) {
3149
+ switch (event.type) {
3150
+ case "session-update":
3151
+ return {
3152
+ type: "session.update",
3153
+ session: buildXaiSessionConfig(event.config)
3154
+ };
3155
+ case "input-audio-append":
3156
+ return {
3157
+ type: "input_audio_buffer.append",
3158
+ audio: event.audio
3159
+ };
3160
+ case "input-audio-commit":
3161
+ return { type: "input_audio_buffer.commit" };
3162
+ case "input-audio-clear":
3163
+ return { type: "input_audio_buffer.clear" };
3164
+ case "conversation-item-create": {
3165
+ const item = event.item;
3166
+ switch (item.type) {
3167
+ case "text-message":
3168
+ return {
3169
+ type: "conversation.item.create",
3170
+ item: {
3171
+ type: "message",
3172
+ role: item.role,
3173
+ content: [{ type: "input_text", text: item.text }]
3174
+ }
3175
+ };
3176
+ case "audio-message":
3177
+ return {
3178
+ type: "conversation.item.create",
3179
+ item: {
3180
+ type: "message",
3181
+ role: item.role,
3182
+ content: [{ type: "input_audio", audio: item.audio }]
3183
+ }
3184
+ };
3185
+ case "function-call-output":
3186
+ return {
3187
+ type: "conversation.item.create",
3188
+ item: {
3189
+ type: "function_call_output",
3190
+ call_id: item.callId,
3191
+ output: item.output
3192
+ }
3193
+ };
3194
+ }
3195
+ break;
3196
+ }
3197
+ case "conversation-item-truncate":
3198
+ return void 0;
3199
+ case "response-create":
3200
+ return {
3201
+ type: "response.create",
3202
+ ...event.options != null ? {
3203
+ response: {
3204
+ ...event.options.modalities != null ? { modalities: event.options.modalities } : {},
3205
+ ...event.options.instructions != null ? { instructions: event.options.instructions } : {}
3206
+ }
3207
+ } : {}
3208
+ };
3209
+ case "response-cancel":
3210
+ return { type: "response.cancel" };
3211
+ }
3212
+ }
3213
+ function buildXaiSessionConfig(config) {
3214
+ var _a;
3215
+ const session = {};
3216
+ if (config.instructions != null) {
3217
+ session.instructions = config.instructions;
3218
+ }
3219
+ if (config.voice != null) {
3220
+ session.voice = config.voice;
3221
+ }
3222
+ const audio = {};
3223
+ if (config.inputAudioFormat != null) {
3224
+ audio.input = {
3225
+ format: {
3226
+ type: config.inputAudioFormat.type,
3227
+ ...config.inputAudioFormat.rate != null ? { rate: config.inputAudioFormat.rate } : {}
3228
+ }
3229
+ };
3230
+ }
3231
+ if (config.outputAudioFormat != null) {
3232
+ audio.output = {
3233
+ format: {
3234
+ type: config.outputAudioFormat.type,
3235
+ ...config.outputAudioFormat.rate != null ? { rate: config.outputAudioFormat.rate } : {}
3236
+ }
3237
+ };
3238
+ }
3239
+ if (Object.keys(audio).length > 0) {
3240
+ session.audio = audio;
3241
+ }
3242
+ if (config.turnDetection != null) {
3243
+ if (config.turnDetection.type === "disabled") {
3244
+ session.turn_detection = null;
3245
+ } else {
3246
+ const td = {
3247
+ type: "server_vad"
3248
+ };
3249
+ if (config.turnDetection.threshold != null) {
3250
+ td.threshold = config.turnDetection.threshold;
3251
+ }
3252
+ if (config.turnDetection.silenceDurationMs != null) {
3253
+ td.silence_duration_ms = config.turnDetection.silenceDurationMs;
3254
+ }
3255
+ if (config.turnDetection.prefixPaddingMs != null) {
3256
+ td.prefix_padding_ms = config.turnDetection.prefixPaddingMs;
3257
+ }
3258
+ session.turn_detection = td;
3259
+ }
3260
+ }
3261
+ if (config.tools != null && config.tools.length > 0) {
3262
+ session.tools = config.tools.map((tool) => ({
3263
+ type: tool.type,
3264
+ name: tool.name,
3265
+ description: tool.description,
3266
+ parameters: tool.parameters
3267
+ }));
3268
+ }
3269
+ if (config.providerOptions != null) {
3270
+ const xaiOptions = config.providerOptions;
3271
+ if (Array.isArray(xaiOptions.tools)) {
3272
+ const existingTools = (_a = session.tools) != null ? _a : [];
3273
+ session.tools = [...existingTools, ...xaiOptions.tools];
3274
+ }
3275
+ for (const [key, value] of Object.entries(xaiOptions)) {
3276
+ if (key !== "tools") {
3277
+ session[key] = value;
3278
+ }
3279
+ }
3280
+ }
3281
+ return session;
3282
+ }
3283
+
3284
+ // src/realtime/xai-realtime-model.ts
3285
+ var XaiRealtimeModel = class {
3286
+ constructor(modelId, config) {
3287
+ this.specificationVersion = "v4";
3288
+ this.modelId = modelId;
3289
+ this.provider = config.provider;
3290
+ this.config = config;
3291
+ }
3292
+ async doCreateClientSecret(options) {
3293
+ var _a;
3294
+ const fetchFn = (_a = this.config.fetch) != null ? _a : fetch;
3295
+ const url = `${this.config.baseURL}/realtime/client_secrets`;
3296
+ const body = {};
3297
+ if (options.expiresAfterSeconds != null) {
3298
+ body.expires_after = { seconds: options.expiresAfterSeconds };
3299
+ }
3300
+ const response = await fetchFn(url, {
3301
+ method: "POST",
3302
+ headers: {
3303
+ ...this.config.headers(),
3304
+ "Content-Type": "application/json"
3305
+ },
3306
+ body: JSON.stringify(body)
3307
+ });
3308
+ if (!response.ok) {
3309
+ const text = await response.text();
3310
+ throw new Error(
3311
+ `xAI realtime client secret request failed: ${response.status} ${text}`
3312
+ );
3313
+ }
3314
+ const data = await response.json();
3315
+ return {
3316
+ token: data.value,
3317
+ // xAI selects the voice model from the `model` query parameter on the
3318
+ // WebSocket URL. Without it the model choice is silently ignored and the
3319
+ // server falls back to its default voice model.
3320
+ url: `wss://${new URL(this.config.baseURL).host}/v1/realtime?model=${encodeURIComponent(this.modelId)}`,
3321
+ expiresAt: data.expires_at
3322
+ };
3323
+ }
3324
+ getWebSocketConfig(options) {
3325
+ return {
3326
+ url: options.url,
3327
+ protocols: [`xai-client-secret.${options.token}`]
3328
+ };
3329
+ }
3330
+ parseServerEvent(raw) {
3331
+ return parseXaiRealtimeServerEvent(raw);
3332
+ }
3333
+ serializeClientEvent(event) {
3334
+ return serializeXaiRealtimeClientEvent(event);
3335
+ }
3336
+ buildSessionConfig(config) {
3337
+ return buildXaiSessionConfig(config);
3338
+ }
3339
+ };
3340
+
2972
3341
  // src/tool/code-execution.ts
2973
3342
  import { createProviderExecutedToolFactory as createProviderExecutedToolFactory5 } from "@ai-sdk/provider-utils";
2974
3343
  import { z as z12 } from "zod/v4";
@@ -3024,7 +3393,7 @@ var xaiTools = {
3024
3393
  };
3025
3394
 
3026
3395
  // src/version.ts
3027
- var VERSION = true ? "4.0.0-canary.70" : "0.0.0-test";
3396
+ var VERSION = true ? "4.0.0-canary.71" : "0.0.0-test";
3028
3397
 
3029
3398
  // src/files/xai-files.ts
3030
3399
  import {
@@ -3527,6 +3896,31 @@ function createXai(options = {}) {
3527
3896
  fetch: options.fetch
3528
3897
  });
3529
3898
  };
3899
+ const createRealtimeModel = (modelId) => {
3900
+ return new XaiRealtimeModel(modelId, {
3901
+ provider: "xai.realtime",
3902
+ baseURL: baseURL != null ? baseURL : "https://api.x.ai/v1",
3903
+ headers: getHeaders,
3904
+ fetch: options.fetch
3905
+ });
3906
+ };
3907
+ const experimentalRealtimeFactory = Object.assign(
3908
+ (modelId) => createRealtimeModel(modelId),
3909
+ {
3910
+ getToken: async (tokenOptions) => {
3911
+ const model = createRealtimeModel(tokenOptions.model);
3912
+ const secret = await model.doCreateClientSecret({
3913
+ sessionConfig: tokenOptions.sessionConfig,
3914
+ expiresAfterSeconds: tokenOptions.expiresAfterSeconds
3915
+ });
3916
+ return {
3917
+ token: secret.token,
3918
+ url: secret.url,
3919
+ expiresAt: secret.expiresAt
3920
+ };
3921
+ }
3922
+ }
3923
+ );
3530
3924
  const createFiles = () => new XaiFiles({
3531
3925
  provider: "xai.files",
3532
3926
  baseURL,
@@ -3546,12 +3940,14 @@ function createXai(options = {}) {
3546
3940
  provider.image = createImageModel;
3547
3941
  provider.videoModel = createVideoModel;
3548
3942
  provider.video = createVideoModel;
3943
+ provider.experimental_realtime = experimentalRealtimeFactory;
3549
3944
  provider.files = createFiles;
3550
3945
  provider.tools = xaiTools;
3551
3946
  return provider;
3552
3947
  }
3553
3948
  var xai = createXai();
3554
3949
  export {
3950
+ XaiRealtimeModel as Experimental_XaiRealtimeModel,
3555
3951
  VERSION,
3556
3952
  codeExecution,
3557
3953
  createXai,