@ai-sdk/xai 4.0.0-canary.71 → 4.0.0-canary.73
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/dist/index.d.ts +39 -2
- package/dist/index.js +372 -1
- package/dist/index.js.map +1 -1
- package/docs/01-xai.mdx +192 -0
- package/package.json +3 -3
- package/src/index.ts +2 -0
- package/src/xai-provider.ts +46 -0
- package/src/xai-speech-model-options.ts +55 -0
- package/src/xai-speech-model.ts +167 -0
- package/src/xai-transcription-model-options.ts +70 -0
- package/src/xai-transcription-model.ts +166 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,20 @@
|
|
|
1
1
|
# @ai-sdk/xai
|
|
2
2
|
|
|
3
|
+
## 4.0.0-canary.73
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- Updated dependencies [bae5e2b]
|
|
8
|
+
- @ai-sdk/provider-utils@5.0.0-canary.47
|
|
9
|
+
- @ai-sdk/openai-compatible@3.0.0-canary.55
|
|
10
|
+
|
|
11
|
+
## 4.0.0-canary.72
|
|
12
|
+
|
|
13
|
+
### Patch Changes
|
|
14
|
+
|
|
15
|
+
- 7486744: Add xAI speech-to-text transcription support.
|
|
16
|
+
- 7486744: feat(provider/xai): add text-to-speech support
|
|
17
|
+
|
|
3
18
|
## 4.0.0-canary.71
|
|
4
19
|
|
|
5
20
|
### Patch Changes
|
package/dist/index.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { z } from 'zod/v4';
|
|
2
2
|
import * as _ai_sdk_provider_utils from '@ai-sdk/provider-utils';
|
|
3
3
|
import { InferSchema, FetchFunction } from '@ai-sdk/provider-utils';
|
|
4
|
-
import { ProviderV4, LanguageModelV4, ImageModelV4, Experimental_VideoModelV4, Experimental_RealtimeFactoryV4, FilesV4, Experimental_RealtimeModelV4, Experimental_RealtimeModelV4ClientSecretOptions, Experimental_RealtimeModelV4ClientSecretResult, Experimental_RealtimeModelV4ServerEvent, Experimental_RealtimeModelV4ClientEvent, Experimental_RealtimeModelV4SessionConfig } from '@ai-sdk/provider';
|
|
4
|
+
import { ProviderV4, LanguageModelV4, ImageModelV4, Experimental_VideoModelV4, Experimental_RealtimeFactoryV4, SpeechModelV4, TranscriptionModelV4, FilesV4, Experimental_RealtimeModelV4, Experimental_RealtimeModelV4ClientSecretOptions, Experimental_RealtimeModelV4ClientSecretResult, Experimental_RealtimeModelV4ServerEvent, Experimental_RealtimeModelV4ClientEvent, Experimental_RealtimeModelV4SessionConfig } from '@ai-sdk/provider';
|
|
5
5
|
|
|
6
6
|
type XaiChatModelId = 'grok-4.20-non-reasoning' | 'grok-4.20-reasoning' | 'grok-4.3' | 'grok-latest' | (string & {});
|
|
7
7
|
declare const xaiLanguageModelChatOptions: z.ZodObject<{
|
|
@@ -179,6 +179,27 @@ interface XaiLegacyReferenceToVideoOptions extends XaiVideoSharedOptions {
|
|
|
179
179
|
*/
|
|
180
180
|
type XaiVideoModelOptions = XaiVideoGenerationOptions | XaiVideoEditModeOptions | XaiVideoExtendModeOptions | XaiVideoReferenceToVideoOptions | XaiLegacyEditVideoOptions | XaiLegacyReferenceToVideoOptions;
|
|
181
181
|
|
|
182
|
+
declare const xaiSpeechModelOptionsSchema: _ai_sdk_provider_utils.LazySchema<{
|
|
183
|
+
sampleRate?: 8000 | 16000 | 22050 | 24000 | 44100 | 48000 | null | undefined;
|
|
184
|
+
bitRate?: 32000 | 64000 | 96000 | 128000 | 192000 | null | undefined;
|
|
185
|
+
optimizeStreamingLatency?: 0 | 1 | 2 | null | undefined;
|
|
186
|
+
textNormalization?: boolean | null | undefined;
|
|
187
|
+
}>;
|
|
188
|
+
type XaiSpeechModelOptions = InferSchema<typeof xaiSpeechModelOptionsSchema>;
|
|
189
|
+
|
|
190
|
+
declare const xaiTranscriptionModelOptionsSchema: _ai_sdk_provider_utils.LazySchema<{
|
|
191
|
+
audioFormat?: "pcm" | "mulaw" | "alaw" | null | undefined;
|
|
192
|
+
sampleRate?: 8000 | 16000 | 22050 | 24000 | 44100 | 48000 | null | undefined;
|
|
193
|
+
language?: string | null | undefined;
|
|
194
|
+
format?: boolean | null | undefined;
|
|
195
|
+
multichannel?: boolean | null | undefined;
|
|
196
|
+
channels?: number | null | undefined;
|
|
197
|
+
diarize?: boolean | null | undefined;
|
|
198
|
+
keyterm?: string | string[] | null | undefined;
|
|
199
|
+
fillerWords?: boolean | null | undefined;
|
|
200
|
+
}>;
|
|
201
|
+
type XaiTranscriptionModelOptions = InferSchema<typeof xaiTranscriptionModelOptionsSchema>;
|
|
202
|
+
|
|
182
203
|
declare const xaiFilesOptionsSchema: _ai_sdk_provider_utils.LazySchema<{
|
|
183
204
|
[x: string]: unknown;
|
|
184
205
|
teamId?: string | undefined;
|
|
@@ -420,6 +441,22 @@ interface XaiProvider extends ProviderV4 {
|
|
|
420
441
|
*/
|
|
421
442
|
videoModel(modelId: XaiVideoModelId): Experimental_VideoModelV4;
|
|
422
443
|
experimental_realtime: Experimental_RealtimeFactoryV4;
|
|
444
|
+
/**
|
|
445
|
+
* Creates an xAI model for speech generation (text-to-speech).
|
|
446
|
+
*/
|
|
447
|
+
speech(): SpeechModelV4;
|
|
448
|
+
/**
|
|
449
|
+
* Creates an xAI model for speech generation (text-to-speech).
|
|
450
|
+
*/
|
|
451
|
+
speechModel(): SpeechModelV4;
|
|
452
|
+
/**
|
|
453
|
+
* Creates an xAI model for speech-to-text transcription.
|
|
454
|
+
*/
|
|
455
|
+
transcription(): TranscriptionModelV4;
|
|
456
|
+
/**
|
|
457
|
+
* Creates an xAI model for speech-to-text transcription.
|
|
458
|
+
*/
|
|
459
|
+
transcriptionModel(): TranscriptionModelV4;
|
|
423
460
|
/**
|
|
424
461
|
* Returns the xAI files interface for uploading files.
|
|
425
462
|
*/
|
|
@@ -482,4 +519,4 @@ declare class XaiRealtimeModel implements Experimental_RealtimeModelV4 {
|
|
|
482
519
|
|
|
483
520
|
declare const VERSION: string;
|
|
484
521
|
|
|
485
|
-
export { XaiRealtimeModel as Experimental_XaiRealtimeModel, type XaiRealtimeModelConfig as Experimental_XaiRealtimeModelConfig, VERSION, type XaiErrorData, type XaiFilesOptions, type XaiImageModelOptions, type XaiImageModelOptions as XaiImageProviderOptions, type XaiLanguageModelChatOptions, type XaiLanguageModelResponsesOptions, type XaiProvider, type XaiLanguageModelChatOptions as XaiProviderOptions, type XaiProviderSettings, type XaiLanguageModelResponsesOptions as XaiResponsesProviderOptions, type XaiVideoModelId, type XaiVideoModelOptions, type XaiVideoModelOptions as XaiVideoProviderOptions, codeExecution, createXai, mcpServer, viewImage, viewXVideo, webSearch, xSearch, xai, xaiTools };
|
|
522
|
+
export { XaiRealtimeModel as Experimental_XaiRealtimeModel, type XaiRealtimeModelConfig as Experimental_XaiRealtimeModelConfig, VERSION, type XaiErrorData, type XaiFilesOptions, type XaiImageModelOptions, type XaiImageModelOptions as XaiImageProviderOptions, type XaiLanguageModelChatOptions, type XaiLanguageModelResponsesOptions, type XaiProvider, type XaiLanguageModelChatOptions as XaiProviderOptions, type XaiProviderSettings, type XaiLanguageModelResponsesOptions as XaiResponsesProviderOptions, type XaiSpeechModelOptions, type XaiTranscriptionModelOptions, type XaiVideoModelId, type XaiVideoModelOptions, type XaiVideoModelOptions as XaiVideoProviderOptions, codeExecution, createXai, mcpServer, viewImage, viewXVideo, webSearch, xSearch, xai, xaiTools };
|
package/dist/index.js
CHANGED
|
@@ -3393,7 +3393,7 @@ var xaiTools = {
|
|
|
3393
3393
|
};
|
|
3394
3394
|
|
|
3395
3395
|
// src/version.ts
|
|
3396
|
-
var VERSION = true ? "4.0.0-canary.
|
|
3396
|
+
var VERSION = true ? "4.0.0-canary.73" : "0.0.0-test";
|
|
3397
3397
|
|
|
3398
3398
|
// src/files/xai-files.ts
|
|
3399
3399
|
import {
|
|
@@ -3845,6 +3845,357 @@ var xaiVideoStatusResponseSchema = z18.object({
|
|
|
3845
3845
|
}).nullish()
|
|
3846
3846
|
});
|
|
3847
3847
|
|
|
3848
|
+
// src/xai-speech-model.ts
|
|
3849
|
+
import {
|
|
3850
|
+
combineHeaders as combineHeaders6,
|
|
3851
|
+
createBinaryResponseHandler as createBinaryResponseHandler2,
|
|
3852
|
+
parseProviderOptions as parseProviderOptions6,
|
|
3853
|
+
postJsonToApi as postJsonToApi5,
|
|
3854
|
+
resolve,
|
|
3855
|
+
serializeModelOptions as serializeModelOptions4,
|
|
3856
|
+
WORKFLOW_DESERIALIZE as WORKFLOW_DESERIALIZE4,
|
|
3857
|
+
WORKFLOW_SERIALIZE as WORKFLOW_SERIALIZE4
|
|
3858
|
+
} from "@ai-sdk/provider-utils";
|
|
3859
|
+
|
|
3860
|
+
// src/xai-speech-model-options.ts
|
|
3861
|
+
import {
|
|
3862
|
+
lazySchema as lazySchema8,
|
|
3863
|
+
zodSchema as zodSchema8
|
|
3864
|
+
} from "@ai-sdk/provider-utils";
|
|
3865
|
+
import { z as z19 } from "zod/v4";
|
|
3866
|
+
var xaiSpeechModelOptionsSchema = lazySchema8(
|
|
3867
|
+
() => zodSchema8(
|
|
3868
|
+
z19.object({
|
|
3869
|
+
/**
|
|
3870
|
+
* Sample rate of the generated audio in Hz.
|
|
3871
|
+
*/
|
|
3872
|
+
sampleRate: z19.union([
|
|
3873
|
+
z19.literal(8e3),
|
|
3874
|
+
z19.literal(16e3),
|
|
3875
|
+
z19.literal(22050),
|
|
3876
|
+
z19.literal(24e3),
|
|
3877
|
+
z19.literal(44100),
|
|
3878
|
+
z19.literal(48e3)
|
|
3879
|
+
]).nullish(),
|
|
3880
|
+
/**
|
|
3881
|
+
* MP3 bit rate in bits per second. Only applies when outputFormat is mp3.
|
|
3882
|
+
*/
|
|
3883
|
+
bitRate: z19.union([
|
|
3884
|
+
z19.literal(32e3),
|
|
3885
|
+
z19.literal(64e3),
|
|
3886
|
+
z19.literal(96e3),
|
|
3887
|
+
z19.literal(128e3),
|
|
3888
|
+
z19.literal(192e3)
|
|
3889
|
+
]).nullish(),
|
|
3890
|
+
/**
|
|
3891
|
+
* Reduce time to first audio chunk, trading some quality for latency.
|
|
3892
|
+
*/
|
|
3893
|
+
optimizeStreamingLatency: z19.union([z19.literal(0), z19.literal(1), z19.literal(2)]).nullish(),
|
|
3894
|
+
/**
|
|
3895
|
+
* Normalize written-form text into spoken-form text before synthesis.
|
|
3896
|
+
*/
|
|
3897
|
+
textNormalization: z19.boolean().nullish()
|
|
3898
|
+
})
|
|
3899
|
+
)
|
|
3900
|
+
);
|
|
3901
|
+
|
|
3902
|
+
// src/xai-speech-model.ts
|
|
3903
|
+
var XaiSpeechModel = class _XaiSpeechModel {
|
|
3904
|
+
constructor(modelId, config) {
|
|
3905
|
+
this.modelId = modelId;
|
|
3906
|
+
this.config = config;
|
|
3907
|
+
this.specificationVersion = "v4";
|
|
3908
|
+
}
|
|
3909
|
+
static [WORKFLOW_SERIALIZE4](model) {
|
|
3910
|
+
return serializeModelOptions4({
|
|
3911
|
+
modelId: model.modelId,
|
|
3912
|
+
config: model.config
|
|
3913
|
+
});
|
|
3914
|
+
}
|
|
3915
|
+
static [WORKFLOW_DESERIALIZE4](options) {
|
|
3916
|
+
return new _XaiSpeechModel(options.modelId, options.config);
|
|
3917
|
+
}
|
|
3918
|
+
get provider() {
|
|
3919
|
+
return this.config.provider;
|
|
3920
|
+
}
|
|
3921
|
+
async getArgs({
|
|
3922
|
+
text,
|
|
3923
|
+
voice = "eve",
|
|
3924
|
+
outputFormat = "mp3",
|
|
3925
|
+
instructions,
|
|
3926
|
+
speed,
|
|
3927
|
+
language = "auto",
|
|
3928
|
+
providerOptions
|
|
3929
|
+
}) {
|
|
3930
|
+
const warnings = [];
|
|
3931
|
+
const xaiOptions = await parseProviderOptions6({
|
|
3932
|
+
provider: "xai",
|
|
3933
|
+
providerOptions,
|
|
3934
|
+
schema: xaiSpeechModelOptionsSchema
|
|
3935
|
+
});
|
|
3936
|
+
let codec = "mp3";
|
|
3937
|
+
if (["mp3", "wav", "pcm", "mulaw", "alaw"].includes(outputFormat)) {
|
|
3938
|
+
codec = outputFormat;
|
|
3939
|
+
} else {
|
|
3940
|
+
warnings.push({
|
|
3941
|
+
type: "unsupported",
|
|
3942
|
+
feature: "outputFormat",
|
|
3943
|
+
details: `Unsupported output format: ${outputFormat}. Using mp3 instead.`
|
|
3944
|
+
});
|
|
3945
|
+
}
|
|
3946
|
+
if (instructions != null) {
|
|
3947
|
+
warnings.push({
|
|
3948
|
+
type: "unsupported",
|
|
3949
|
+
feature: "instructions",
|
|
3950
|
+
details: "xAI speech models do not support the `instructions` option. Use xAI speech tags in `text` to control delivery."
|
|
3951
|
+
});
|
|
3952
|
+
}
|
|
3953
|
+
const output_format = {
|
|
3954
|
+
codec
|
|
3955
|
+
};
|
|
3956
|
+
if ((xaiOptions == null ? void 0 : xaiOptions.sampleRate) != null) {
|
|
3957
|
+
output_format.sample_rate = xaiOptions.sampleRate;
|
|
3958
|
+
}
|
|
3959
|
+
if ((xaiOptions == null ? void 0 : xaiOptions.bitRate) != null) {
|
|
3960
|
+
if (codec === "mp3") {
|
|
3961
|
+
output_format.bit_rate = xaiOptions.bitRate;
|
|
3962
|
+
} else {
|
|
3963
|
+
warnings.push({
|
|
3964
|
+
type: "unsupported",
|
|
3965
|
+
feature: "providerOptions",
|
|
3966
|
+
details: "xAI `bitRate` is supported only for mp3 output. It was ignored."
|
|
3967
|
+
});
|
|
3968
|
+
}
|
|
3969
|
+
}
|
|
3970
|
+
const requestBody = {
|
|
3971
|
+
text,
|
|
3972
|
+
voice_id: voice,
|
|
3973
|
+
language,
|
|
3974
|
+
output_format,
|
|
3975
|
+
speed,
|
|
3976
|
+
optimize_streaming_latency: xaiOptions == null ? void 0 : xaiOptions.optimizeStreamingLatency,
|
|
3977
|
+
text_normalization: xaiOptions == null ? void 0 : xaiOptions.textNormalization
|
|
3978
|
+
};
|
|
3979
|
+
return { requestBody, warnings };
|
|
3980
|
+
}
|
|
3981
|
+
async doGenerate(options) {
|
|
3982
|
+
var _a, _b, _c;
|
|
3983
|
+
const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
|
|
3984
|
+
const { requestBody, warnings } = await this.getArgs(options);
|
|
3985
|
+
const {
|
|
3986
|
+
value: audio,
|
|
3987
|
+
responseHeaders,
|
|
3988
|
+
rawValue: rawResponse
|
|
3989
|
+
} = await postJsonToApi5({
|
|
3990
|
+
url: `${this.config.baseURL}/tts`,
|
|
3991
|
+
headers: combineHeaders6(
|
|
3992
|
+
this.config.headers ? await resolve(this.config.headers) : void 0,
|
|
3993
|
+
options.headers
|
|
3994
|
+
),
|
|
3995
|
+
body: requestBody,
|
|
3996
|
+
failedResponseHandler: xaiFailedResponseHandler,
|
|
3997
|
+
successfulResponseHandler: createBinaryResponseHandler2(),
|
|
3998
|
+
abortSignal: options.abortSignal,
|
|
3999
|
+
fetch: this.config.fetch
|
|
4000
|
+
});
|
|
4001
|
+
return {
|
|
4002
|
+
audio,
|
|
4003
|
+
warnings,
|
|
4004
|
+
request: {
|
|
4005
|
+
body: JSON.stringify(requestBody)
|
|
4006
|
+
},
|
|
4007
|
+
response: {
|
|
4008
|
+
timestamp: currentDate,
|
|
4009
|
+
modelId: this.modelId,
|
|
4010
|
+
headers: responseHeaders,
|
|
4011
|
+
body: rawResponse
|
|
4012
|
+
}
|
|
4013
|
+
};
|
|
4014
|
+
}
|
|
4015
|
+
};
|
|
4016
|
+
|
|
4017
|
+
// src/xai-transcription-model.ts
|
|
4018
|
+
import {
|
|
4019
|
+
combineHeaders as combineHeaders7,
|
|
4020
|
+
convertBase64ToUint8Array,
|
|
4021
|
+
createJsonResponseHandler as createJsonResponseHandler6,
|
|
4022
|
+
mediaTypeToExtension,
|
|
4023
|
+
parseProviderOptions as parseProviderOptions7,
|
|
4024
|
+
postFormDataToApi as postFormDataToApi2,
|
|
4025
|
+
serializeModelOptions as serializeModelOptions5,
|
|
4026
|
+
WORKFLOW_DESERIALIZE as WORKFLOW_DESERIALIZE5,
|
|
4027
|
+
WORKFLOW_SERIALIZE as WORKFLOW_SERIALIZE5
|
|
4028
|
+
} from "@ai-sdk/provider-utils";
|
|
4029
|
+
import { z as z21 } from "zod/v4";
|
|
4030
|
+
|
|
4031
|
+
// src/xai-transcription-model-options.ts
|
|
4032
|
+
import {
|
|
4033
|
+
lazySchema as lazySchema9,
|
|
4034
|
+
zodSchema as zodSchema9
|
|
4035
|
+
} from "@ai-sdk/provider-utils";
|
|
4036
|
+
import { z as z20 } from "zod/v4";
|
|
4037
|
+
var xaiTranscriptionModelOptionsSchema = lazySchema9(
|
|
4038
|
+
() => zodSchema9(
|
|
4039
|
+
z20.object({
|
|
4040
|
+
/**
|
|
4041
|
+
* Audio encoding for raw, headerless input audio.
|
|
4042
|
+
*/
|
|
4043
|
+
audioFormat: z20.enum(["pcm", "mulaw", "alaw"]).nullish(),
|
|
4044
|
+
/**
|
|
4045
|
+
* Sample rate of the input audio in Hz.
|
|
4046
|
+
*/
|
|
4047
|
+
sampleRate: z20.union([
|
|
4048
|
+
z20.literal(8e3),
|
|
4049
|
+
z20.literal(16e3),
|
|
4050
|
+
z20.literal(22050),
|
|
4051
|
+
z20.literal(24e3),
|
|
4052
|
+
z20.literal(44100),
|
|
4053
|
+
z20.literal(48e3)
|
|
4054
|
+
]).nullish(),
|
|
4055
|
+
/**
|
|
4056
|
+
* Language code used for inverse text normalization.
|
|
4057
|
+
*/
|
|
4058
|
+
language: z20.string().nullish(),
|
|
4059
|
+
/**
|
|
4060
|
+
* Enable inverse text normalization. Requires `language`.
|
|
4061
|
+
*/
|
|
4062
|
+
format: z20.boolean().nullish(),
|
|
4063
|
+
/**
|
|
4064
|
+
* Enable per-channel transcription for multichannel audio.
|
|
4065
|
+
*/
|
|
4066
|
+
multichannel: z20.boolean().nullish(),
|
|
4067
|
+
/**
|
|
4068
|
+
* Number of interleaved audio channels.
|
|
4069
|
+
*/
|
|
4070
|
+
channels: z20.number().int().min(2).max(8).nullish(),
|
|
4071
|
+
/**
|
|
4072
|
+
* Enable speaker diarization.
|
|
4073
|
+
*/
|
|
4074
|
+
diarize: z20.boolean().nullish(),
|
|
4075
|
+
/**
|
|
4076
|
+
* Terms to bias transcription toward.
|
|
4077
|
+
*/
|
|
4078
|
+
keyterm: z20.union([z20.string(), z20.array(z20.string())]).nullish(),
|
|
4079
|
+
/**
|
|
4080
|
+
* Include filler words such as "uh" and "um" in the transcript.
|
|
4081
|
+
*/
|
|
4082
|
+
fillerWords: z20.boolean().nullish()
|
|
4083
|
+
})
|
|
4084
|
+
)
|
|
4085
|
+
);
|
|
4086
|
+
|
|
4087
|
+
// src/xai-transcription-model.ts
|
|
4088
|
+
var XaiTranscriptionModel = class _XaiTranscriptionModel {
|
|
4089
|
+
constructor(modelId, config) {
|
|
4090
|
+
this.modelId = modelId;
|
|
4091
|
+
this.config = config;
|
|
4092
|
+
this.specificationVersion = "v4";
|
|
4093
|
+
}
|
|
4094
|
+
static [WORKFLOW_SERIALIZE5](model) {
|
|
4095
|
+
return serializeModelOptions5({
|
|
4096
|
+
modelId: model.modelId,
|
|
4097
|
+
config: model.config
|
|
4098
|
+
});
|
|
4099
|
+
}
|
|
4100
|
+
static [WORKFLOW_DESERIALIZE5](options) {
|
|
4101
|
+
return new _XaiTranscriptionModel(options.modelId, options.config);
|
|
4102
|
+
}
|
|
4103
|
+
get provider() {
|
|
4104
|
+
return this.config.provider;
|
|
4105
|
+
}
|
|
4106
|
+
async getArgs({
|
|
4107
|
+
audio,
|
|
4108
|
+
mediaType,
|
|
4109
|
+
providerOptions
|
|
4110
|
+
}) {
|
|
4111
|
+
const warnings = [];
|
|
4112
|
+
const xaiOptions = await parseProviderOptions7({
|
|
4113
|
+
provider: "xai",
|
|
4114
|
+
providerOptions,
|
|
4115
|
+
schema: xaiTranscriptionModelOptionsSchema
|
|
4116
|
+
});
|
|
4117
|
+
const formData = new FormData();
|
|
4118
|
+
const transcriptionOptions = {
|
|
4119
|
+
audio_format: xaiOptions == null ? void 0 : xaiOptions.audioFormat,
|
|
4120
|
+
sample_rate: xaiOptions == null ? void 0 : xaiOptions.sampleRate,
|
|
4121
|
+
language: xaiOptions == null ? void 0 : xaiOptions.language,
|
|
4122
|
+
format: xaiOptions == null ? void 0 : xaiOptions.format,
|
|
4123
|
+
multichannel: xaiOptions == null ? void 0 : xaiOptions.multichannel,
|
|
4124
|
+
channels: xaiOptions == null ? void 0 : xaiOptions.channels,
|
|
4125
|
+
diarize: xaiOptions == null ? void 0 : xaiOptions.diarize,
|
|
4126
|
+
filler_words: xaiOptions == null ? void 0 : xaiOptions.fillerWords
|
|
4127
|
+
};
|
|
4128
|
+
for (const [key, value] of Object.entries(transcriptionOptions)) {
|
|
4129
|
+
if (value != null) {
|
|
4130
|
+
formData.append(key, String(value));
|
|
4131
|
+
}
|
|
4132
|
+
}
|
|
4133
|
+
if ((xaiOptions == null ? void 0 : xaiOptions.keyterm) != null) {
|
|
4134
|
+
const keyterms = Array.isArray(xaiOptions.keyterm) ? xaiOptions.keyterm : [xaiOptions.keyterm];
|
|
4135
|
+
for (const keyterm of keyterms) {
|
|
4136
|
+
formData.append("keyterm", keyterm);
|
|
4137
|
+
}
|
|
4138
|
+
}
|
|
4139
|
+
const blob = audio instanceof Uint8Array ? new Blob([audio]) : new Blob([convertBase64ToUint8Array(audio)]);
|
|
4140
|
+
const fileExtension = mediaTypeToExtension(mediaType);
|
|
4141
|
+
formData.append(
|
|
4142
|
+
"file",
|
|
4143
|
+
new File([blob], "audio", { type: mediaType }),
|
|
4144
|
+
`audio.${fileExtension}`
|
|
4145
|
+
);
|
|
4146
|
+
return { formData, warnings };
|
|
4147
|
+
}
|
|
4148
|
+
async doGenerate(options) {
|
|
4149
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _i;
|
|
4150
|
+
const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
|
|
4151
|
+
const { formData, warnings } = await this.getArgs(options);
|
|
4152
|
+
const {
|
|
4153
|
+
value: response,
|
|
4154
|
+
responseHeaders,
|
|
4155
|
+
rawValue: rawResponse
|
|
4156
|
+
} = await postFormDataToApi2({
|
|
4157
|
+
url: `${(_d = this.config.baseURL) != null ? _d : "https://api.x.ai/v1"}/stt`,
|
|
4158
|
+
headers: combineHeaders7((_f = (_e = this.config).headers) == null ? void 0 : _f.call(_e), options.headers),
|
|
4159
|
+
formData,
|
|
4160
|
+
failedResponseHandler: xaiFailedResponseHandler,
|
|
4161
|
+
successfulResponseHandler: createJsonResponseHandler6(
|
|
4162
|
+
xaiTranscriptionResponseSchema
|
|
4163
|
+
),
|
|
4164
|
+
abortSignal: options.abortSignal,
|
|
4165
|
+
fetch: this.config.fetch
|
|
4166
|
+
});
|
|
4167
|
+
return {
|
|
4168
|
+
text: response.text,
|
|
4169
|
+
segments: (_h = (_g = response.words) == null ? void 0 : _g.map((word) => ({
|
|
4170
|
+
text: word.text,
|
|
4171
|
+
startSecond: word.start,
|
|
4172
|
+
endSecond: word.end
|
|
4173
|
+
}))) != null ? _h : [],
|
|
4174
|
+
language: response.language || void 0,
|
|
4175
|
+
durationInSeconds: (_i = response.duration) != null ? _i : void 0,
|
|
4176
|
+
warnings,
|
|
4177
|
+
response: {
|
|
4178
|
+
timestamp: currentDate,
|
|
4179
|
+
modelId: this.modelId,
|
|
4180
|
+
headers: responseHeaders,
|
|
4181
|
+
body: rawResponse
|
|
4182
|
+
}
|
|
4183
|
+
};
|
|
4184
|
+
}
|
|
4185
|
+
};
|
|
4186
|
+
var xaiTranscriptionResponseSchema = z21.object({
|
|
4187
|
+
text: z21.string(),
|
|
4188
|
+
language: z21.string().nullish(),
|
|
4189
|
+
duration: z21.number().nullish(),
|
|
4190
|
+
words: z21.array(
|
|
4191
|
+
z21.object({
|
|
4192
|
+
text: z21.string(),
|
|
4193
|
+
start: z21.number(),
|
|
4194
|
+
end: z21.number()
|
|
4195
|
+
})
|
|
4196
|
+
).nullish()
|
|
4197
|
+
});
|
|
4198
|
+
|
|
3848
4199
|
// src/xai-provider.ts
|
|
3849
4200
|
function createXai(options = {}) {
|
|
3850
4201
|
var _a;
|
|
@@ -3904,6 +4255,22 @@ function createXai(options = {}) {
|
|
|
3904
4255
|
fetch: options.fetch
|
|
3905
4256
|
});
|
|
3906
4257
|
};
|
|
4258
|
+
const createSpeechModel = () => {
|
|
4259
|
+
return new XaiSpeechModel("", {
|
|
4260
|
+
provider: "xai.speech",
|
|
4261
|
+
baseURL,
|
|
4262
|
+
headers: getHeaders,
|
|
4263
|
+
fetch: options.fetch
|
|
4264
|
+
});
|
|
4265
|
+
};
|
|
4266
|
+
const createTranscriptionModel = () => {
|
|
4267
|
+
return new XaiTranscriptionModel("", {
|
|
4268
|
+
provider: "xai.transcription",
|
|
4269
|
+
baseURL,
|
|
4270
|
+
headers: getHeaders,
|
|
4271
|
+
fetch: options.fetch
|
|
4272
|
+
});
|
|
4273
|
+
};
|
|
3907
4274
|
const experimentalRealtimeFactory = Object.assign(
|
|
3908
4275
|
(modelId) => createRealtimeModel(modelId),
|
|
3909
4276
|
{
|
|
@@ -3941,6 +4308,10 @@ function createXai(options = {}) {
|
|
|
3941
4308
|
provider.videoModel = createVideoModel;
|
|
3942
4309
|
provider.video = createVideoModel;
|
|
3943
4310
|
provider.experimental_realtime = experimentalRealtimeFactory;
|
|
4311
|
+
provider.speechModel = createSpeechModel;
|
|
4312
|
+
provider.speech = createSpeechModel;
|
|
4313
|
+
provider.transcriptionModel = createTranscriptionModel;
|
|
4314
|
+
provider.transcription = createTranscriptionModel;
|
|
3944
4315
|
provider.files = createFiles;
|
|
3945
4316
|
provider.tools = xaiTools;
|
|
3946
4317
|
return provider;
|