@ai-sdk/openai 0.0.0-1c33ba03-20260114162300 → 0.0.0-4115c213-20260122152721
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +50 -3
- package/dist/index.d.mts +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +29 -1
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +29 -1
- package/dist/index.mjs.map +1 -1
- package/dist/internal/index.d.mts +1 -1
- package/dist/internal/index.d.ts +1 -1
- package/dist/internal/index.js +28 -0
- package/dist/internal/index.js.map +1 -1
- package/dist/internal/index.mjs +28 -0
- package/dist/internal/index.mjs.map +1 -1
- package/docs/03-openai.mdx +2018 -0
- package/package.json +14 -5
- package/src/chat/convert-openai-chat-usage.ts +57 -0
- package/src/chat/convert-to-openai-chat-messages.ts +225 -0
- package/src/chat/get-response-metadata.ts +15 -0
- package/src/chat/map-openai-finish-reason.ts +19 -0
- package/src/chat/openai-chat-api.ts +198 -0
- package/src/chat/openai-chat-language-model.ts +700 -0
- package/src/chat/openai-chat-options.ts +186 -0
- package/src/chat/openai-chat-prepare-tools.ts +84 -0
- package/src/chat/openai-chat-prompt.ts +70 -0
- package/src/completion/convert-openai-completion-usage.ts +46 -0
- package/src/completion/convert-to-openai-completion-prompt.ts +93 -0
- package/src/completion/get-response-metadata.ts +15 -0
- package/src/completion/map-openai-finish-reason.ts +19 -0
- package/src/completion/openai-completion-api.ts +81 -0
- package/src/completion/openai-completion-language-model.ts +336 -0
- package/src/completion/openai-completion-options.ts +58 -0
- package/src/embedding/openai-embedding-api.ts +13 -0
- package/src/embedding/openai-embedding-model.ts +95 -0
- package/src/embedding/openai-embedding-options.ts +30 -0
- package/src/image/openai-image-api.ts +35 -0
- package/src/image/openai-image-model.ts +305 -0
- package/src/image/openai-image-options.ts +28 -0
- package/src/index.ts +9 -0
- package/src/internal/index.ts +19 -0
- package/src/openai-config.ts +18 -0
- package/src/openai-error.ts +22 -0
- package/src/openai-language-model-capabilities.ts +54 -0
- package/src/openai-provider.ts +270 -0
- package/src/openai-tools.ts +114 -0
- package/src/responses/convert-openai-responses-usage.ts +53 -0
- package/src/responses/convert-to-openai-responses-input.ts +597 -0
- package/src/responses/map-openai-responses-finish-reason.ts +22 -0
- package/src/responses/openai-responses-api.ts +1086 -0
- package/src/responses/openai-responses-language-model.ts +1932 -0
- package/src/responses/openai-responses-options.ts +312 -0
- package/src/responses/openai-responses-prepare-tools.ts +264 -0
- package/src/responses/openai-responses-provider-metadata.ts +39 -0
- package/src/speech/openai-speech-api.ts +38 -0
- package/src/speech/openai-speech-model.ts +137 -0
- package/src/speech/openai-speech-options.ts +22 -0
- package/src/tool/apply-patch.ts +141 -0
- package/src/tool/code-interpreter.ts +104 -0
- package/src/tool/file-search.ts +145 -0
- package/src/tool/image-generation.ts +126 -0
- package/src/tool/local-shell.ts +72 -0
- package/src/tool/mcp.ts +125 -0
- package/src/tool/shell.ts +85 -0
- package/src/tool/web-search-preview.ts +139 -0
- package/src/tool/web-search.ts +179 -0
- package/src/transcription/openai-transcription-api.ts +37 -0
- package/src/transcription/openai-transcription-model.ts +232 -0
- package/src/transcription/openai-transcription-options.ts +50 -0
- package/src/transcription/transcription-test.mp3 +0 -0
- package/src/version.ts +6 -0
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
import {
|
|
2
|
+
TranscriptionModelV3,
|
|
3
|
+
TranscriptionModelV3CallOptions,
|
|
4
|
+
SharedV3Warning,
|
|
5
|
+
} from '@ai-sdk/provider';
|
|
6
|
+
import {
|
|
7
|
+
combineHeaders,
|
|
8
|
+
convertBase64ToUint8Array,
|
|
9
|
+
createJsonResponseHandler,
|
|
10
|
+
mediaTypeToExtension,
|
|
11
|
+
parseProviderOptions,
|
|
12
|
+
postFormDataToApi,
|
|
13
|
+
} from '@ai-sdk/provider-utils';
|
|
14
|
+
import { OpenAIConfig } from '../openai-config';
|
|
15
|
+
import { openaiFailedResponseHandler } from '../openai-error';
|
|
16
|
+
import { openaiTranscriptionResponseSchema } from './openai-transcription-api';
|
|
17
|
+
import {
|
|
18
|
+
OpenAITranscriptionModelId,
|
|
19
|
+
openAITranscriptionProviderOptions,
|
|
20
|
+
OpenAITranscriptionProviderOptions,
|
|
21
|
+
} from './openai-transcription-options';
|
|
22
|
+
|
|
23
|
+
export type OpenAITranscriptionCallOptions = Omit<
|
|
24
|
+
TranscriptionModelV3CallOptions,
|
|
25
|
+
'providerOptions'
|
|
26
|
+
> & {
|
|
27
|
+
providerOptions?: {
|
|
28
|
+
openai?: OpenAITranscriptionProviderOptions;
|
|
29
|
+
};
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
interface OpenAITranscriptionModelConfig extends OpenAIConfig {
|
|
33
|
+
_internal?: {
|
|
34
|
+
currentDate?: () => Date;
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// https://platform.openai.com/docs/guides/speech-to-text#supported-languages
|
|
39
|
+
const languageMap = {
|
|
40
|
+
afrikaans: 'af',
|
|
41
|
+
arabic: 'ar',
|
|
42
|
+
armenian: 'hy',
|
|
43
|
+
azerbaijani: 'az',
|
|
44
|
+
belarusian: 'be',
|
|
45
|
+
bosnian: 'bs',
|
|
46
|
+
bulgarian: 'bg',
|
|
47
|
+
catalan: 'ca',
|
|
48
|
+
chinese: 'zh',
|
|
49
|
+
croatian: 'hr',
|
|
50
|
+
czech: 'cs',
|
|
51
|
+
danish: 'da',
|
|
52
|
+
dutch: 'nl',
|
|
53
|
+
english: 'en',
|
|
54
|
+
estonian: 'et',
|
|
55
|
+
finnish: 'fi',
|
|
56
|
+
french: 'fr',
|
|
57
|
+
galician: 'gl',
|
|
58
|
+
german: 'de',
|
|
59
|
+
greek: 'el',
|
|
60
|
+
hebrew: 'he',
|
|
61
|
+
hindi: 'hi',
|
|
62
|
+
hungarian: 'hu',
|
|
63
|
+
icelandic: 'is',
|
|
64
|
+
indonesian: 'id',
|
|
65
|
+
italian: 'it',
|
|
66
|
+
japanese: 'ja',
|
|
67
|
+
kannada: 'kn',
|
|
68
|
+
kazakh: 'kk',
|
|
69
|
+
korean: 'ko',
|
|
70
|
+
latvian: 'lv',
|
|
71
|
+
lithuanian: 'lt',
|
|
72
|
+
macedonian: 'mk',
|
|
73
|
+
malay: 'ms',
|
|
74
|
+
marathi: 'mr',
|
|
75
|
+
maori: 'mi',
|
|
76
|
+
nepali: 'ne',
|
|
77
|
+
norwegian: 'no',
|
|
78
|
+
persian: 'fa',
|
|
79
|
+
polish: 'pl',
|
|
80
|
+
portuguese: 'pt',
|
|
81
|
+
romanian: 'ro',
|
|
82
|
+
russian: 'ru',
|
|
83
|
+
serbian: 'sr',
|
|
84
|
+
slovak: 'sk',
|
|
85
|
+
slovenian: 'sl',
|
|
86
|
+
spanish: 'es',
|
|
87
|
+
swahili: 'sw',
|
|
88
|
+
swedish: 'sv',
|
|
89
|
+
tagalog: 'tl',
|
|
90
|
+
tamil: 'ta',
|
|
91
|
+
thai: 'th',
|
|
92
|
+
turkish: 'tr',
|
|
93
|
+
ukrainian: 'uk',
|
|
94
|
+
urdu: 'ur',
|
|
95
|
+
vietnamese: 'vi',
|
|
96
|
+
welsh: 'cy',
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
export class OpenAITranscriptionModel implements TranscriptionModelV3 {
|
|
100
|
+
readonly specificationVersion = 'v3';
|
|
101
|
+
|
|
102
|
+
get provider(): string {
|
|
103
|
+
return this.config.provider;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
constructor(
|
|
107
|
+
readonly modelId: OpenAITranscriptionModelId,
|
|
108
|
+
private readonly config: OpenAITranscriptionModelConfig,
|
|
109
|
+
) {}
|
|
110
|
+
|
|
111
|
+
private async getArgs({
|
|
112
|
+
audio,
|
|
113
|
+
mediaType,
|
|
114
|
+
providerOptions,
|
|
115
|
+
}: OpenAITranscriptionCallOptions) {
|
|
116
|
+
const warnings: SharedV3Warning[] = [];
|
|
117
|
+
|
|
118
|
+
// Parse provider options
|
|
119
|
+
const openAIOptions = await parseProviderOptions({
|
|
120
|
+
provider: 'openai',
|
|
121
|
+
providerOptions,
|
|
122
|
+
schema: openAITranscriptionProviderOptions,
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
// Create form data with base fields
|
|
126
|
+
const formData = new FormData();
|
|
127
|
+
const blob =
|
|
128
|
+
audio instanceof Uint8Array
|
|
129
|
+
? new Blob([audio])
|
|
130
|
+
: new Blob([convertBase64ToUint8Array(audio)]);
|
|
131
|
+
|
|
132
|
+
formData.append('model', this.modelId);
|
|
133
|
+
const fileExtension = mediaTypeToExtension(mediaType);
|
|
134
|
+
formData.append(
|
|
135
|
+
'file',
|
|
136
|
+
new File([blob], 'audio', { type: mediaType }),
|
|
137
|
+
`audio.${fileExtension}`,
|
|
138
|
+
);
|
|
139
|
+
|
|
140
|
+
// Add provider-specific options
|
|
141
|
+
if (openAIOptions) {
|
|
142
|
+
const transcriptionModelOptions = {
|
|
143
|
+
include: openAIOptions.include,
|
|
144
|
+
language: openAIOptions.language,
|
|
145
|
+
prompt: openAIOptions.prompt,
|
|
146
|
+
// https://platform.openai.com/docs/api-reference/audio/createTranscription#audio_createtranscription-response_format
|
|
147
|
+
// prefer verbose_json to get segments for models that support it
|
|
148
|
+
response_format: [
|
|
149
|
+
'gpt-4o-transcribe',
|
|
150
|
+
'gpt-4o-mini-transcribe',
|
|
151
|
+
].includes(this.modelId)
|
|
152
|
+
? 'json'
|
|
153
|
+
: 'verbose_json',
|
|
154
|
+
temperature: openAIOptions.temperature,
|
|
155
|
+
timestamp_granularities: openAIOptions.timestampGranularities,
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
for (const [key, value] of Object.entries(transcriptionModelOptions)) {
|
|
159
|
+
if (value != null) {
|
|
160
|
+
if (Array.isArray(value)) {
|
|
161
|
+
for (const item of value) {
|
|
162
|
+
formData.append(`${key}[]`, String(item));
|
|
163
|
+
}
|
|
164
|
+
} else {
|
|
165
|
+
formData.append(key, String(value));
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return {
|
|
172
|
+
formData,
|
|
173
|
+
warnings,
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
async doGenerate(
|
|
178
|
+
options: OpenAITranscriptionCallOptions,
|
|
179
|
+
): Promise<Awaited<ReturnType<TranscriptionModelV3['doGenerate']>>> {
|
|
180
|
+
const currentDate = this.config._internal?.currentDate?.() ?? new Date();
|
|
181
|
+
const { formData, warnings } = await this.getArgs(options);
|
|
182
|
+
|
|
183
|
+
const {
|
|
184
|
+
value: response,
|
|
185
|
+
responseHeaders,
|
|
186
|
+
rawValue: rawResponse,
|
|
187
|
+
} = await postFormDataToApi({
|
|
188
|
+
url: this.config.url({
|
|
189
|
+
path: '/audio/transcriptions',
|
|
190
|
+
modelId: this.modelId,
|
|
191
|
+
}),
|
|
192
|
+
headers: combineHeaders(this.config.headers(), options.headers),
|
|
193
|
+
formData,
|
|
194
|
+
failedResponseHandler: openaiFailedResponseHandler,
|
|
195
|
+
successfulResponseHandler: createJsonResponseHandler(
|
|
196
|
+
openaiTranscriptionResponseSchema,
|
|
197
|
+
),
|
|
198
|
+
abortSignal: options.abortSignal,
|
|
199
|
+
fetch: this.config.fetch,
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
const language =
|
|
203
|
+
response.language != null && response.language in languageMap
|
|
204
|
+
? languageMap[response.language as keyof typeof languageMap]
|
|
205
|
+
: undefined;
|
|
206
|
+
|
|
207
|
+
return {
|
|
208
|
+
text: response.text,
|
|
209
|
+
segments:
|
|
210
|
+
response.segments?.map(segment => ({
|
|
211
|
+
text: segment.text,
|
|
212
|
+
startSecond: segment.start,
|
|
213
|
+
endSecond: segment.end,
|
|
214
|
+
})) ??
|
|
215
|
+
response.words?.map(word => ({
|
|
216
|
+
text: word.word,
|
|
217
|
+
startSecond: word.start,
|
|
218
|
+
endSecond: word.end,
|
|
219
|
+
})) ??
|
|
220
|
+
[],
|
|
221
|
+
language,
|
|
222
|
+
durationInSeconds: response.duration ?? undefined,
|
|
223
|
+
warnings,
|
|
224
|
+
response: {
|
|
225
|
+
timestamp: currentDate,
|
|
226
|
+
modelId: this.modelId,
|
|
227
|
+
headers: responseHeaders,
|
|
228
|
+
body: rawResponse,
|
|
229
|
+
},
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { InferSchema, lazySchema, zodSchema } from '@ai-sdk/provider-utils';
|
|
2
|
+
import { z } from 'zod/v4';
|
|
3
|
+
|
|
4
|
+
export type OpenAITranscriptionModelId =
|
|
5
|
+
| 'whisper-1'
|
|
6
|
+
| 'gpt-4o-mini-transcribe'
|
|
7
|
+
| 'gpt-4o-transcribe'
|
|
8
|
+
| (string & {});
|
|
9
|
+
|
|
10
|
+
// https://platform.openai.com/docs/api-reference/audio/createTranscription
|
|
11
|
+
export const openAITranscriptionProviderOptions = lazySchema(() =>
|
|
12
|
+
zodSchema(
|
|
13
|
+
z.object({
|
|
14
|
+
/**
|
|
15
|
+
* Additional information to include in the transcription response.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
include: z.array(z.string()).optional(),
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* The language of the input audio in ISO-639-1 format.
|
|
22
|
+
*/
|
|
23
|
+
language: z.string().optional(),
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* An optional text to guide the model's style or continue a previous audio segment.
|
|
27
|
+
*/
|
|
28
|
+
prompt: z.string().optional(),
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* The sampling temperature, between 0 and 1.
|
|
32
|
+
* @default 0
|
|
33
|
+
*/
|
|
34
|
+
temperature: z.number().min(0).max(1).default(0).optional(),
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* The timestamp granularities to populate for this transcription.
|
|
38
|
+
* @default ['segment']
|
|
39
|
+
*/
|
|
40
|
+
timestampGranularities: z
|
|
41
|
+
.array(z.enum(['word', 'segment']))
|
|
42
|
+
.default(['segment'])
|
|
43
|
+
.optional(),
|
|
44
|
+
}),
|
|
45
|
+
),
|
|
46
|
+
);
|
|
47
|
+
|
|
48
|
+
export type OpenAITranscriptionProviderOptions = InferSchema<
|
|
49
|
+
typeof openAITranscriptionProviderOptions
|
|
50
|
+
>;
|
|
Binary file
|