@ai-sdk/alibaba 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,398 @@
1
+ import {
2
+ AISDKError,
3
+ type Experimental_VideoModelV3,
4
+ type SharedV3Warning,
5
+ } from '@ai-sdk/provider';
6
+ import {
7
+ combineHeaders,
8
+ convertUint8ArrayToBase64,
9
+ createJsonErrorResponseHandler,
10
+ createJsonResponseHandler,
11
+ delay,
12
+ type FetchFunction,
13
+ getFromApi,
14
+ lazySchema,
15
+ parseProviderOptions,
16
+ postJsonToApi,
17
+ type Resolvable,
18
+ resolve,
19
+ zodSchema,
20
+ } from '@ai-sdk/provider-utils';
21
+ import { z } from 'zod/v4';
22
+ import type { AlibabaVideoModelId } from './alibaba-video-settings';
23
+
24
+ export type AlibabaVideoProviderOptions = {
25
+ /** Negative prompt to specify what to avoid (max 500 chars). */
26
+ negativePrompt?: string | null;
27
+ /** URL to audio file for audio-video sync (WAV/MP3, 3-30s, max 15MB). */
28
+ audioUrl?: string | null;
29
+ /** Enable prompt extension/rewriting for better generation. Defaults to true. */
30
+ promptExtend?: boolean | null;
31
+ /** Shot type: 'single' for single-shot or 'multi' for multi-shot narrative. */
32
+ shotType?: 'single' | 'multi' | null;
33
+ /** Whether to add watermark to generated video. Defaults to false. */
34
+ watermark?: boolean | null;
35
+ /** Enable audio generation (for I2V/R2V models). */
36
+ audio?: boolean | null;
37
+ /**
38
+ * Reference URLs for reference-to-video mode.
39
+ * Array of URLs to images (0-5) and/or videos (0-3), max 5 total.
40
+ * Use character identifiers (character1, character2) in prompts to reference them.
41
+ */
42
+ referenceUrls?: string[] | null;
43
+ /** Polling interval in milliseconds. Defaults to 5000 (5 seconds). */
44
+ pollIntervalMs?: number | null;
45
+ /** Maximum wait time in milliseconds for video generation. Defaults to 600000 (10 minutes). */
46
+ pollTimeoutMs?: number | null;
47
+ [key: string]: unknown;
48
+ };
49
+
50
+ const alibabaVideoProviderOptionsSchema = lazySchema(() =>
51
+ zodSchema(
52
+ z
53
+ .object({
54
+ negativePrompt: z.string().nullish(),
55
+ audioUrl: z.string().nullish(),
56
+ promptExtend: z.boolean().nullish(),
57
+ shotType: z.enum(['single', 'multi']).nullish(),
58
+ watermark: z.boolean().nullish(),
59
+ audio: z.boolean().nullish(),
60
+ referenceUrls: z.array(z.string()).nullish(),
61
+ pollIntervalMs: z.number().positive().nullish(),
62
+ pollTimeoutMs: z.number().positive().nullish(),
63
+ })
64
+ .passthrough(),
65
+ ),
66
+ );
67
+
68
+ interface AlibabaVideoModelConfig {
69
+ provider: string;
70
+ baseURL: string;
71
+ headers: Resolvable<Record<string, string | undefined>>;
72
+ fetch?: FetchFunction;
73
+ _internal?: {
74
+ currentDate?: () => Date;
75
+ };
76
+ }
77
+
78
+ // DashScope native API error format (different from OpenAI-compatible endpoint)
79
+ const alibabaVideoErrorSchema = z.object({
80
+ code: z.string().nullish(),
81
+ message: z.string(),
82
+ request_id: z.string().nullish(),
83
+ });
84
+
85
+ const alibabaVideoFailedResponseHandler = createJsonErrorResponseHandler({
86
+ errorSchema: alibabaVideoErrorSchema,
87
+ errorToMessage: data => data.message,
88
+ });
89
+
90
+ const alibabaVideoCreateTaskSchema = z.object({
91
+ output: z
92
+ .object({
93
+ task_status: z.string(),
94
+ task_id: z.string(),
95
+ })
96
+ .nullish(),
97
+ request_id: z.string().nullish(),
98
+ });
99
+
100
+ const alibabaVideoTaskStatusSchema = z.object({
101
+ output: z
102
+ .object({
103
+ task_id: z.string(),
104
+ task_status: z.string(),
105
+ video_url: z.string().nullish(),
106
+ submit_time: z.string().nullish(),
107
+ scheduled_time: z.string().nullish(),
108
+ end_time: z.string().nullish(),
109
+ orig_prompt: z.string().nullish(),
110
+ actual_prompt: z.string().nullish(),
111
+ code: z.string().nullish(),
112
+ message: z.string().nullish(),
113
+ })
114
+ .nullish(),
115
+ usage: z
116
+ .object({
117
+ duration: z.number().nullish(),
118
+ output_video_duration: z.number().nullish(),
119
+ SR: z.number().nullish(),
120
+ size: z.string().nullish(),
121
+ })
122
+ .nullish(),
123
+ request_id: z.string().nullish(),
124
+ });
125
+
126
+ type AlibabaVideoTaskStatusResponse = z.infer<
127
+ typeof alibabaVideoTaskStatusSchema
128
+ >;
129
+
130
+ function detectMode(modelId: string): 't2v' | 'i2v' | 'r2v' {
131
+ if (modelId.includes('-i2v')) return 'i2v';
132
+ if (modelId.includes('-r2v')) return 'r2v';
133
+ return 't2v';
134
+ }
135
+
136
+ export class AlibabaVideoModel implements Experimental_VideoModelV3 {
137
+ readonly specificationVersion = 'v3';
138
+ readonly maxVideosPerCall = 1;
139
+
140
+ get provider(): string {
141
+ return this.config.provider;
142
+ }
143
+
144
+ constructor(
145
+ readonly modelId: AlibabaVideoModelId,
146
+ private readonly config: AlibabaVideoModelConfig,
147
+ ) {}
148
+
149
+ async doGenerate(
150
+ options: Parameters<Experimental_VideoModelV3['doGenerate']>[0],
151
+ ): Promise<Awaited<ReturnType<Experimental_VideoModelV3['doGenerate']>>> {
152
+ const currentDate = this.config._internal?.currentDate?.() ?? new Date();
153
+ const warnings: SharedV3Warning[] = [];
154
+ const mode = detectMode(this.modelId);
155
+
156
+ const alibabaOptions = (await parseProviderOptions({
157
+ provider: 'alibaba',
158
+ providerOptions: options.providerOptions,
159
+ schema: alibabaVideoProviderOptionsSchema,
160
+ })) as AlibabaVideoProviderOptions | undefined;
161
+
162
+ // Build input object
163
+ const input: Record<string, unknown> = {};
164
+
165
+ if (options.prompt != null) {
166
+ input.prompt = options.prompt;
167
+ }
168
+
169
+ if (alibabaOptions?.negativePrompt != null) {
170
+ input.negative_prompt = alibabaOptions.negativePrompt;
171
+ }
172
+
173
+ if (alibabaOptions?.audioUrl != null) {
174
+ input.audio_url = alibabaOptions.audioUrl;
175
+ }
176
+
177
+ // Handle image input for I2V mode
178
+ if (mode === 'i2v' && options.image != null) {
179
+ if (options.image.type === 'url') {
180
+ input.img_url = options.image.url;
181
+ } else {
182
+ const base64Data =
183
+ typeof options.image.data === 'string'
184
+ ? options.image.data
185
+ : convertUint8ArrayToBase64(options.image.data);
186
+ input.img_url = base64Data;
187
+ }
188
+ }
189
+
190
+ // Handle reference URLs for R2V mode
191
+ if (mode === 'r2v' && alibabaOptions?.referenceUrls != null) {
192
+ input.reference_urls = alibabaOptions.referenceUrls;
193
+ }
194
+
195
+ // Build parameters object
196
+ const parameters: Record<string, unknown> = {};
197
+
198
+ if (options.duration != null) {
199
+ parameters.duration = options.duration;
200
+ }
201
+
202
+ if (options.seed != null) {
203
+ parameters.seed = options.seed;
204
+ }
205
+
206
+ // Resolution / Size mapping
207
+ if (options.resolution != null) {
208
+ if (mode === 'i2v') {
209
+ // I2V uses "720P" / "1080P" format
210
+ const resolutionMap: Record<string, string> = {
211
+ '1280x720': '720P',
212
+ '720x1280': '720P',
213
+ '960x960': '720P',
214
+ '1088x832': '720P',
215
+ '832x1088': '720P',
216
+ '1920x1080': '1080P',
217
+ '1080x1920': '1080P',
218
+ '1440x1440': '1080P',
219
+ '1632x1248': '1080P',
220
+ '1248x1632': '1080P',
221
+ '832x480': '480P',
222
+ '480x832': '480P',
223
+ '624x624': '480P',
224
+ };
225
+ parameters.resolution =
226
+ resolutionMap[options.resolution] || options.resolution;
227
+ } else {
228
+ // T2V and R2V use "WIDTH*HEIGHT" format for the size parameter
229
+ // Convert "WIDTHxHEIGHT" (SDK standard) to "WIDTH*HEIGHT" (Alibaba API)
230
+ parameters.size = options.resolution.replace('x', '*');
231
+ }
232
+ }
233
+
234
+ // Provider-specific parameters
235
+ if (alibabaOptions?.promptExtend != null) {
236
+ parameters.prompt_extend = alibabaOptions.promptExtend;
237
+ }
238
+ if (alibabaOptions?.shotType != null) {
239
+ parameters.shot_type = alibabaOptions.shotType;
240
+ }
241
+ if (alibabaOptions?.watermark != null) {
242
+ parameters.watermark = alibabaOptions.watermark;
243
+ }
244
+ if (alibabaOptions?.audio != null) {
245
+ parameters.audio = alibabaOptions.audio;
246
+ }
247
+
248
+ // Warn about unsupported standard options
249
+ if (options.aspectRatio) {
250
+ warnings.push({
251
+ type: 'unsupported',
252
+ feature: 'aspectRatio',
253
+ details:
254
+ 'Alibaba video models use explicit size/resolution dimensions. Use the resolution option or providerOptions.alibaba for size control.',
255
+ });
256
+ }
257
+ if (options.fps) {
258
+ warnings.push({
259
+ type: 'unsupported',
260
+ feature: 'fps',
261
+ details: 'Alibaba video models do not support custom FPS.',
262
+ });
263
+ }
264
+ if (options.n != null && options.n > 1) {
265
+ warnings.push({
266
+ type: 'unsupported',
267
+ feature: 'n',
268
+ details:
269
+ 'Alibaba video models only support generating 1 video per call.',
270
+ });
271
+ }
272
+
273
+ // Step 1: Create task
274
+ const { value: createResponse } = await postJsonToApi({
275
+ url: `${this.config.baseURL}/api/v1/services/aigc/video-generation/video-synthesis`,
276
+ headers: combineHeaders(
277
+ await resolve(this.config.headers),
278
+ options.headers,
279
+ {
280
+ 'X-DashScope-Async': 'enable',
281
+ },
282
+ ),
283
+ body: {
284
+ model: this.modelId,
285
+ input,
286
+ parameters,
287
+ },
288
+ successfulResponseHandler: createJsonResponseHandler(
289
+ alibabaVideoCreateTaskSchema,
290
+ ),
291
+ failedResponseHandler: alibabaVideoFailedResponseHandler,
292
+ abortSignal: options.abortSignal,
293
+ fetch: this.config.fetch,
294
+ });
295
+
296
+ const taskId = createResponse.output?.task_id;
297
+ if (!taskId) {
298
+ throw new AISDKError({
299
+ name: 'ALIBABA_VIDEO_GENERATION_ERROR',
300
+ message: `No task_id returned from Alibaba API. Response: ${JSON.stringify(createResponse)}`,
301
+ });
302
+ }
303
+
304
+ // Step 2: Poll for task completion
305
+ const pollIntervalMs = alibabaOptions?.pollIntervalMs ?? 5000;
306
+ const pollTimeoutMs = alibabaOptions?.pollTimeoutMs ?? 600000;
307
+ const startTime = Date.now();
308
+ let finalResponse: AlibabaVideoTaskStatusResponse | undefined;
309
+ let responseHeaders: Record<string, string> | undefined;
310
+
311
+ while (true) {
312
+ await delay(pollIntervalMs, { abortSignal: options.abortSignal });
313
+
314
+ if (Date.now() - startTime > pollTimeoutMs) {
315
+ throw new AISDKError({
316
+ name: 'ALIBABA_VIDEO_GENERATION_TIMEOUT',
317
+ message: `Video generation timed out after ${pollTimeoutMs}ms`,
318
+ });
319
+ }
320
+
321
+ const { value: statusResponse, responseHeaders: pollHeaders } =
322
+ await getFromApi({
323
+ url: `${this.config.baseURL}/api/v1/tasks/${taskId}`,
324
+ headers: combineHeaders(
325
+ await resolve(this.config.headers),
326
+ options.headers,
327
+ ),
328
+ successfulResponseHandler: createJsonResponseHandler(
329
+ alibabaVideoTaskStatusSchema,
330
+ ),
331
+ failedResponseHandler: alibabaVideoFailedResponseHandler,
332
+ abortSignal: options.abortSignal,
333
+ fetch: this.config.fetch,
334
+ });
335
+
336
+ responseHeaders = pollHeaders;
337
+ const taskStatus = statusResponse.output?.task_status;
338
+
339
+ if (taskStatus === 'SUCCEEDED') {
340
+ finalResponse = statusResponse;
341
+ break;
342
+ }
343
+
344
+ if (taskStatus === 'FAILED' || taskStatus === 'CANCELED') {
345
+ throw new AISDKError({
346
+ name: 'ALIBABA_VIDEO_GENERATION_FAILED',
347
+ message: `Video generation ${taskStatus.toLowerCase()}. Task ID: ${taskId}. ${statusResponse.output?.message ?? ''}`,
348
+ });
349
+ }
350
+
351
+ // Continue polling for PENDING, RUNNING, UNKNOWN statuses
352
+ }
353
+
354
+ const videoUrl = finalResponse?.output?.video_url;
355
+ if (!videoUrl) {
356
+ throw new AISDKError({
357
+ name: 'ALIBABA_VIDEO_GENERATION_ERROR',
358
+ message: `No video URL in response. Task ID: ${taskId}`,
359
+ });
360
+ }
361
+
362
+ return {
363
+ videos: [
364
+ {
365
+ type: 'url',
366
+ url: videoUrl,
367
+ mediaType: 'video/mp4',
368
+ },
369
+ ],
370
+ warnings,
371
+ response: {
372
+ timestamp: currentDate,
373
+ modelId: this.modelId,
374
+ headers: responseHeaders,
375
+ },
376
+ providerMetadata: {
377
+ alibaba: {
378
+ taskId,
379
+ videoUrl,
380
+ ...(finalResponse?.output?.actual_prompt
381
+ ? { actualPrompt: finalResponse.output.actual_prompt }
382
+ : {}),
383
+ ...(finalResponse?.usage
384
+ ? {
385
+ usage: {
386
+ duration: finalResponse.usage.duration,
387
+ outputVideoDuration:
388
+ finalResponse.usage.output_video_duration,
389
+ resolution: finalResponse.usage.SR,
390
+ size: finalResponse.usage.size,
391
+ },
392
+ }
393
+ : {}),
394
+ },
395
+ },
396
+ };
397
+ }
398
+ }
@@ -0,0 +1,12 @@
1
+ // https://www.alibabacloud.com/help/en/model-studio/use-video-generation
2
+ export type AlibabaVideoModelId =
3
+ // Text-to-Video
4
+ | 'wan2.6-t2v'
5
+ | 'wan2.5-t2v-preview'
6
+ // Image-to-Video (first frame)
7
+ | 'wan2.6-i2v'
8
+ | 'wan2.6-i2v-flash'
9
+ // Reference-to-Video
10
+ | 'wan2.6-r2v'
11
+ | 'wan2.6-r2v-flash'
12
+ | (string & {});
@@ -1,5 +1,5 @@
1
- import { LanguageModelV3Usage } from '@ai-sdk/provider';
2
1
  import { convertOpenAICompatibleChatUsage } from '@ai-sdk/openai-compatible/internal';
2
+ import type { LanguageModelV3Usage } from '@ai-sdk/provider';
3
3
 
4
4
  export type AlibabaUsage = {
5
5
  prompt_tokens?: number | null;
@@ -1,11 +1,11 @@
1
1
  import {
2
- LanguageModelV3DataContent,
3
- LanguageModelV3Prompt,
2
+ type LanguageModelV3DataContent,
3
+ type LanguageModelV3Prompt,
4
4
  UnsupportedFunctionalityError,
5
5
  } from '@ai-sdk/provider';
6
6
  import { convertToBase64 } from '@ai-sdk/provider-utils';
7
- import { AlibabaChatPrompt } from './alibaba-chat-prompt';
8
- import { CacheControlValidator } from './get-cache-control';
7
+ import type { AlibabaChatPrompt } from './alibaba-chat-prompt';
8
+ import type { CacheControlValidator } from './get-cache-control';
9
9
 
10
10
  function formatImageUrl({
11
11
  data,
@@ -1,5 +1,8 @@
1
- import { SharedV3Warning, SharedV3ProviderMetadata } from '@ai-sdk/provider';
2
- import { AlibabaCacheControl } from './alibaba-chat-prompt';
1
+ import type {
2
+ SharedV3ProviderMetadata,
3
+ SharedV3Warning,
4
+ } from '@ai-sdk/provider';
5
+ import type { AlibabaCacheControl } from './alibaba-chat-prompt';
3
6
 
4
7
  // Alibaba allows a maximum of 4 cache breakpoints per request
5
8
  const MAX_CACHE_BREAKPOINTS = 4;
package/src/index.ts CHANGED
@@ -1,12 +1,15 @@
1
- export { createAlibaba, alibaba } from './alibaba-provider';
2
- export type {
3
- AlibabaProvider,
4
- AlibabaProviderSettings,
5
- } from './alibaba-provider';
6
1
  export type {
7
2
  AlibabaChatModelId,
8
3
  AlibabaProviderOptions,
9
4
  } from './alibaba-chat-options';
10
5
  export type { AlibabaCacheControl } from './alibaba-chat-prompt';
6
+ export {
7
+ type AlibabaProvider,
8
+ type AlibabaProviderSettings,
9
+ alibaba,
10
+ createAlibaba,
11
+ } from './alibaba-provider';
12
+ export type { AlibabaVideoProviderOptions } from './alibaba-video-model';
13
+ export type { AlibabaVideoModelId } from './alibaba-video-settings';
11
14
  export type { AlibabaUsage } from './convert-alibaba-usage';
12
15
  export { VERSION } from './version';