@ai-sdk/xai 4.0.0-canary.70 → 4.0.0-canary.72

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,33 @@
1
1
  # @ai-sdk/xai
2
2
 
3
+ ## 4.0.0-canary.72
4
+
5
+ ### Patch Changes
6
+
7
+ - 7486744: Add xAI speech-to-text transcription support.
8
+ - 7486744: feat(provider/xai): add text-to-speech support
9
+
10
+ ## 4.0.0-canary.71
11
+
12
+ ### Patch Changes
13
+
14
+ - ce769dd: feat(provider): add experimental Realtime API support for voice conversations
15
+
16
+ Adds first-class support for realtime (speech-to-speech) APIs:
17
+
18
+ - `Experimental_RealtimeModelV4` spec in `@ai-sdk/provider` with normalized event types and factory
19
+ - OpenAI, Google, and xAI realtime provider implementations
20
+ - `openai.experimental_realtime()` / `google.experimental_realtime()` / `xai.experimental_realtime()` work in both server and browser
21
+ - `.getToken()` static method on each provider for server-side ephemeral token creation
22
+ - `experimental_getRealtimeToolDefinitions` helper for provider session tool definitions
23
+ - `experimental_useRealtime` hook in `@ai-sdk/react` returning `UIMessage[]` (aligned with `useChat`), with `onToolCall` and `addToolOutput` for client-driven tool execution
24
+ - `inputAudioTranscription` session config for showing transcribed user audio messages when supported by the provider
25
+
26
+ - Updated dependencies [ce769dd]
27
+ - @ai-sdk/provider@4.0.0-canary.18
28
+ - @ai-sdk/openai-compatible@3.0.0-canary.54
29
+ - @ai-sdk/provider-utils@5.0.0-canary.46
30
+
3
31
  ## 4.0.0-canary.70
4
32
 
5
33
  ### Patch Changes
package/dist/index.d.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  import { z } from 'zod/v4';
2
2
  import * as _ai_sdk_provider_utils from '@ai-sdk/provider-utils';
3
3
  import { InferSchema, FetchFunction } from '@ai-sdk/provider-utils';
4
- import { ProviderV4, LanguageModelV4, ImageModelV4, Experimental_VideoModelV4, FilesV4 } from '@ai-sdk/provider';
4
+ import { ProviderV4, LanguageModelV4, ImageModelV4, Experimental_VideoModelV4, Experimental_RealtimeFactoryV4, SpeechModelV4, TranscriptionModelV4, FilesV4, Experimental_RealtimeModelV4, Experimental_RealtimeModelV4ClientSecretOptions, Experimental_RealtimeModelV4ClientSecretResult, Experimental_RealtimeModelV4ServerEvent, Experimental_RealtimeModelV4ClientEvent, Experimental_RealtimeModelV4SessionConfig } from '@ai-sdk/provider';
5
5
 
6
6
  type XaiChatModelId = 'grok-4.20-non-reasoning' | 'grok-4.20-reasoning' | 'grok-4.3' | 'grok-latest' | (string & {});
7
7
  declare const xaiLanguageModelChatOptions: z.ZodObject<{
@@ -179,6 +179,27 @@ interface XaiLegacyReferenceToVideoOptions extends XaiVideoSharedOptions {
179
179
  */
180
180
  type XaiVideoModelOptions = XaiVideoGenerationOptions | XaiVideoEditModeOptions | XaiVideoExtendModeOptions | XaiVideoReferenceToVideoOptions | XaiLegacyEditVideoOptions | XaiLegacyReferenceToVideoOptions;
181
181
 
182
+ declare const xaiSpeechModelOptionsSchema: _ai_sdk_provider_utils.LazySchema<{
183
+ sampleRate?: 8000 | 16000 | 22050 | 24000 | 44100 | 48000 | null | undefined;
184
+ bitRate?: 32000 | 64000 | 96000 | 128000 | 192000 | null | undefined;
185
+ optimizeStreamingLatency?: 0 | 1 | 2 | null | undefined;
186
+ textNormalization?: boolean | null | undefined;
187
+ }>;
188
+ type XaiSpeechModelOptions = InferSchema<typeof xaiSpeechModelOptionsSchema>;
189
+
190
+ declare const xaiTranscriptionModelOptionsSchema: _ai_sdk_provider_utils.LazySchema<{
191
+ audioFormat?: "pcm" | "mulaw" | "alaw" | null | undefined;
192
+ sampleRate?: 8000 | 16000 | 22050 | 24000 | 44100 | 48000 | null | undefined;
193
+ language?: string | null | undefined;
194
+ format?: boolean | null | undefined;
195
+ multichannel?: boolean | null | undefined;
196
+ channels?: number | null | undefined;
197
+ diarize?: boolean | null | undefined;
198
+ keyterm?: string | string[] | null | undefined;
199
+ fillerWords?: boolean | null | undefined;
200
+ }>;
201
+ type XaiTranscriptionModelOptions = InferSchema<typeof xaiTranscriptionModelOptionsSchema>;
202
+
182
203
  declare const xaiFilesOptionsSchema: _ai_sdk_provider_utils.LazySchema<{
183
204
  [x: string]: unknown;
184
205
  teamId?: string | undefined;
@@ -419,6 +440,23 @@ interface XaiProvider extends ProviderV4 {
419
440
  * Creates an Xai video model for video generation.
420
441
  */
421
442
  videoModel(modelId: XaiVideoModelId): Experimental_VideoModelV4;
443
+ experimental_realtime: Experimental_RealtimeFactoryV4;
444
+ /**
445
+ * Creates an xAI model for speech generation (text-to-speech).
446
+ */
447
+ speech(): SpeechModelV4;
448
+ /**
449
+ * Creates an xAI model for speech generation (text-to-speech).
450
+ */
451
+ speechModel(): SpeechModelV4;
452
+ /**
453
+ * Creates an xAI model for speech-to-text transcription.
454
+ */
455
+ transcription(): TranscriptionModelV4;
456
+ /**
457
+ * Creates an xAI model for speech-to-text transcription.
458
+ */
459
+ transcriptionModel(): TranscriptionModelV4;
422
460
  /**
423
461
  * Returns the xAI files interface for uploading files.
424
462
  */
@@ -454,6 +492,31 @@ interface XaiProviderSettings {
454
492
  declare function createXai(options?: XaiProviderSettings): XaiProvider;
455
493
  declare const xai: XaiProvider;
456
494
 
495
+ type XaiRealtimeModelConfig = {
496
+ provider: string;
497
+ baseURL: string;
498
+ headers: () => Record<string, string | undefined>;
499
+ fetch?: FetchFunction;
500
+ };
501
+ declare class XaiRealtimeModel implements Experimental_RealtimeModelV4 {
502
+ readonly specificationVersion: "v4";
503
+ readonly provider: string;
504
+ readonly modelId: string;
505
+ private readonly config;
506
+ constructor(modelId: string, config: XaiRealtimeModelConfig);
507
+ doCreateClientSecret(options: Experimental_RealtimeModelV4ClientSecretOptions): Promise<Experimental_RealtimeModelV4ClientSecretResult>;
508
+ getWebSocketConfig(options: {
509
+ token: string;
510
+ url: string;
511
+ }): {
512
+ url: string;
513
+ protocols?: string[];
514
+ };
515
+ parseServerEvent(raw: unknown): Experimental_RealtimeModelV4ServerEvent;
516
+ serializeClientEvent(event: Experimental_RealtimeModelV4ClientEvent): unknown;
517
+ buildSessionConfig(config: Experimental_RealtimeModelV4SessionConfig): Record<string, unknown>;
518
+ }
519
+
457
520
  declare const VERSION: string;
458
521
 
459
- export { VERSION, type XaiErrorData, type XaiFilesOptions, type XaiImageModelOptions, type XaiImageModelOptions as XaiImageProviderOptions, type XaiLanguageModelChatOptions, type XaiLanguageModelResponsesOptions, type XaiProvider, type XaiLanguageModelChatOptions as XaiProviderOptions, type XaiProviderSettings, type XaiLanguageModelResponsesOptions as XaiResponsesProviderOptions, type XaiVideoModelId, type XaiVideoModelOptions, type XaiVideoModelOptions as XaiVideoProviderOptions, codeExecution, createXai, mcpServer, viewImage, viewXVideo, webSearch, xSearch, xai, xaiTools };
522
+ export { XaiRealtimeModel as Experimental_XaiRealtimeModel, type XaiRealtimeModelConfig as Experimental_XaiRealtimeModelConfig, VERSION, type XaiErrorData, type XaiFilesOptions, type XaiImageModelOptions, type XaiImageModelOptions as XaiImageProviderOptions, type XaiLanguageModelChatOptions, type XaiLanguageModelResponsesOptions, type XaiProvider, type XaiLanguageModelChatOptions as XaiProviderOptions, type XaiProviderSettings, type XaiLanguageModelResponsesOptions as XaiResponsesProviderOptions, type XaiSpeechModelOptions, type XaiTranscriptionModelOptions, type XaiVideoModelId, type XaiVideoModelOptions, type XaiVideoModelOptions as XaiVideoProviderOptions, codeExecution, createXai, mcpServer, viewImage, viewXVideo, webSearch, xSearch, xai, xaiTools };