@ai-sdk/alibaba 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,605 @@
1
+ import {
2
+ InvalidResponseDataError,
3
+ LanguageModelV3,
4
+ LanguageModelV3CallOptions,
5
+ LanguageModelV3Content,
6
+ LanguageModelV3FinishReason,
7
+ LanguageModelV3GenerateResult,
8
+ LanguageModelV3StreamPart,
9
+ LanguageModelV3StreamResult,
10
+ SharedV3Warning,
11
+ } from '@ai-sdk/provider';
12
+ import {
13
+ combineHeaders,
14
+ createEventSourceResponseHandler,
15
+ createJsonResponseHandler,
16
+ FetchFunction,
17
+ generateId,
18
+ isParsableJson,
19
+ parseProviderOptions,
20
+ ParseResult,
21
+ postJsonToApi,
22
+ } from '@ai-sdk/provider-utils';
23
+ import { z } from 'zod/v4';
24
+ import {
25
+ getResponseMetadata,
26
+ mapOpenAICompatibleFinishReason,
27
+ prepareTools,
28
+ } from '@ai-sdk/openai-compatible/internal';
29
+ import { AlibabaConfig } from './alibaba-config';
30
+ import {
31
+ AlibabaChatModelId,
32
+ alibabaProviderOptions,
33
+ } from './alibaba-chat-options';
34
+ import { alibabaFailedResponseHandler } from './alibaba-provider';
35
+ import { convertToAlibabaChatMessages } from './convert-to-alibaba-chat-messages';
36
+ import { convertAlibabaUsage } from './convert-alibaba-usage';
37
+ import { CacheControlValidator } from './get-cache-control';
38
+
39
+ /**
40
+ * Alibaba language model implementation.
41
+ *
42
+ * Implements LanguageModelV3 interface for Alibaba Cloud's Qwen models.
43
+ * Supports OpenAI-compatible chat completions API with Alibaba-specific features:
44
+ * - Reasoning/thinking mode (enable_thinking, reasoning_content)
45
+ * - Thinking budget control (thinking_budget)
46
+ * - Prompt caching (cached_tokens tracking)
47
+ */
48
+ export class AlibabaLanguageModel implements LanguageModelV3 {
49
+ readonly specificationVersion = 'v3';
50
+ readonly modelId: AlibabaChatModelId;
51
+
52
+ private readonly config: AlibabaConfig;
53
+
54
+ constructor(modelId: AlibabaChatModelId, config: AlibabaConfig) {
55
+ this.modelId = modelId;
56
+ this.config = config;
57
+ }
58
+
59
+ get provider(): string {
60
+ return this.config.provider;
61
+ }
62
+
63
+ readonly supportedUrls: Record<string, RegExp[]> = {
64
+ 'image/*': [/^https?:\/\/.*$/],
65
+ };
66
+
67
+ /**
68
+ * Builds request arguments for Alibaba API call.
69
+ * Converts AI SDK options to Alibaba API format.
70
+ */
71
+ private async getArgs({
72
+ prompt,
73
+ maxOutputTokens,
74
+ temperature,
75
+ topP,
76
+ topK,
77
+ frequencyPenalty,
78
+ presencePenalty,
79
+ stopSequences,
80
+ responseFormat,
81
+ seed,
82
+ providerOptions,
83
+ tools,
84
+ toolChoice,
85
+ }: LanguageModelV3CallOptions) {
86
+ const warnings: SharedV3Warning[] = [];
87
+
88
+ const cacheControlValidator = new CacheControlValidator();
89
+
90
+ const alibabaOptions = await parseProviderOptions({
91
+ provider: 'alibaba',
92
+ providerOptions,
93
+ schema: alibabaProviderOptions,
94
+ });
95
+
96
+ // Warn about unsupported features
97
+ if (frequencyPenalty != null) {
98
+ warnings.push({ type: 'unsupported', feature: 'frequencyPenalty' });
99
+ }
100
+
101
+ // Build base request arguments
102
+ const baseArgs = {
103
+ model: this.modelId,
104
+ max_tokens: maxOutputTokens,
105
+ temperature,
106
+ top_p: topP,
107
+ top_k: topK,
108
+ presence_penalty: presencePenalty,
109
+ stop: stopSequences,
110
+ seed,
111
+ response_format:
112
+ responseFormat?.type === 'json'
113
+ ? responseFormat.schema != null
114
+ ? {
115
+ type: 'json_schema',
116
+ json_schema: {
117
+ schema: responseFormat.schema,
118
+ name: responseFormat.name ?? 'response',
119
+ description: responseFormat.description,
120
+ },
121
+ }
122
+ : { type: 'json_object' }
123
+ : undefined,
124
+
125
+ // Alibaba-specific options
126
+ ...(alibabaOptions?.enableThinking != null
127
+ ? { enable_thinking: alibabaOptions.enableThinking }
128
+ : {}),
129
+ ...(alibabaOptions?.thinkingBudget != null
130
+ ? { thinking_budget: alibabaOptions.thinkingBudget }
131
+ : {}),
132
+
133
+ // Convert messages with cache control support
134
+ messages: convertToAlibabaChatMessages({
135
+ prompt,
136
+ cacheControlValidator,
137
+ }),
138
+ };
139
+
140
+ // Prepare tools
141
+ const {
142
+ tools: alibabaTools,
143
+ toolChoice: alibabaToolChoice,
144
+ toolWarnings,
145
+ } = prepareTools({ tools, toolChoice });
146
+
147
+ warnings.push(...cacheControlValidator.getWarnings());
148
+
149
+ return {
150
+ args: {
151
+ ...baseArgs,
152
+ tools: alibabaTools,
153
+ tool_choice: alibabaToolChoice,
154
+ ...(alibabaTools != null &&
155
+ alibabaOptions?.parallelToolCalls !== undefined
156
+ ? { parallel_tool_calls: alibabaOptions.parallelToolCalls }
157
+ : {}),
158
+ },
159
+ warnings: [...warnings, ...toolWarnings],
160
+ };
161
+ }
162
+
163
+ async doGenerate(
164
+ options: LanguageModelV3CallOptions,
165
+ ): Promise<LanguageModelV3GenerateResult> {
166
+ const { args, warnings } = await this.getArgs(options);
167
+
168
+ const {
169
+ responseHeaders,
170
+ value: response,
171
+ rawValue: rawResponse,
172
+ } = await postJsonToApi({
173
+ url: `${this.config.baseURL}/chat/completions`,
174
+ headers: combineHeaders(this.config.headers(), options.headers),
175
+ body: args,
176
+ failedResponseHandler: alibabaFailedResponseHandler,
177
+ successfulResponseHandler: createJsonResponseHandler(
178
+ alibabaChatResponseSchema,
179
+ ),
180
+ abortSignal: options.abortSignal,
181
+ fetch: this.config.fetch,
182
+ });
183
+
184
+ const choice = response.choices[0];
185
+ const content: Array<LanguageModelV3Content> = [];
186
+
187
+ // text content:
188
+ const text = choice.message.content;
189
+ if (text != null && text.length > 0) {
190
+ content.push({ type: 'text', text });
191
+ }
192
+
193
+ // reasoning content (Alibaba uses 'reasoning_content' field):
194
+ const reasoning = choice.message.reasoning_content;
195
+ if (reasoning != null && reasoning.length > 0) {
196
+ content.push({
197
+ type: 'reasoning',
198
+ text: reasoning,
199
+ });
200
+ }
201
+
202
+ // tool calls:
203
+ if (choice.message.tool_calls != null) {
204
+ for (const toolCall of choice.message.tool_calls) {
205
+ content.push({
206
+ type: 'tool-call',
207
+ toolCallId: toolCall.id,
208
+ toolName: toolCall.function.name,
209
+ input: toolCall.function.arguments!,
210
+ });
211
+ }
212
+ }
213
+
214
+ return {
215
+ content,
216
+ finishReason: {
217
+ unified: mapOpenAICompatibleFinishReason(choice.finish_reason),
218
+ raw: choice.finish_reason ?? undefined,
219
+ },
220
+ usage: convertAlibabaUsage(response.usage),
221
+ request: { body: JSON.stringify(args) },
222
+ response: {
223
+ ...getResponseMetadata(response),
224
+ headers: responseHeaders,
225
+ body: rawResponse,
226
+ },
227
+ warnings,
228
+ };
229
+ }
230
+
231
+ async doStream(
232
+ options: LanguageModelV3CallOptions,
233
+ ): Promise<LanguageModelV3StreamResult> {
234
+ const { args, warnings } = await this.getArgs(options);
235
+ const body = {
236
+ ...args,
237
+ stream: true,
238
+ stream_options: this.config.includeUsage
239
+ ? { include_usage: true }
240
+ : undefined,
241
+ };
242
+
243
+ const { responseHeaders, value: response } = await postJsonToApi({
244
+ url: `${this.config.baseURL}/chat/completions`,
245
+ headers: combineHeaders(this.config.headers(), options.headers),
246
+ body,
247
+ failedResponseHandler: alibabaFailedResponseHandler,
248
+ successfulResponseHandler: createEventSourceResponseHandler(
249
+ alibabaChatChunkSchema,
250
+ ),
251
+ abortSignal: options.abortSignal,
252
+ fetch: this.config.fetch,
253
+ });
254
+
255
+ // Track state across chunks
256
+ let finishReason: LanguageModelV3FinishReason = {
257
+ unified: 'other',
258
+ raw: undefined,
259
+ };
260
+ let usage: z.infer<typeof alibabaUsageSchema> | undefined = undefined;
261
+
262
+ let isFirstChunk = true;
263
+ let activeText = false;
264
+ let activeReasoningId: string | null = null;
265
+
266
+ // Track tool calls for accumulation across chunks
267
+ const toolCalls: Array<{
268
+ id: string;
269
+ type: 'function';
270
+ function: { name: string; arguments: string };
271
+ hasFinished: boolean;
272
+ }> = [];
273
+
274
+ return {
275
+ stream: response.pipeThrough(
276
+ new TransformStream<
277
+ ParseResult<z.infer<typeof alibabaChatChunkSchema>>,
278
+ LanguageModelV3StreamPart
279
+ >({
280
+ start(controller) {
281
+ controller.enqueue({ type: 'stream-start', warnings });
282
+ },
283
+
284
+ transform(chunk, controller) {
285
+ // Emit raw chunk if requested (before anything else)
286
+ if (options.includeRawChunks) {
287
+ controller.enqueue({ type: 'raw', rawValue: chunk.rawValue });
288
+ }
289
+
290
+ // Handle parse errors
291
+ if (!chunk.success) {
292
+ controller.enqueue({ type: 'error', error: chunk.error });
293
+ return;
294
+ }
295
+
296
+ const value = chunk.value;
297
+
298
+ // Emit response metadata on first chunk
299
+ if (isFirstChunk) {
300
+ isFirstChunk = false;
301
+ controller.enqueue({
302
+ type: 'response-metadata',
303
+ ...getResponseMetadata(value),
304
+ });
305
+ }
306
+
307
+ // Track usage (appears in final chunk)
308
+ if (value.usage != null) {
309
+ usage = value.usage;
310
+ }
311
+
312
+ // Skip processing if no choices (usage-only chunk)
313
+ if (value.choices.length === 0) {
314
+ return;
315
+ }
316
+
317
+ const choice = value.choices[0];
318
+ const delta = choice.delta;
319
+
320
+ // Handle reasoning content streaming (Alibaba thinking mode)
321
+ if (
322
+ delta.reasoning_content != null &&
323
+ delta.reasoning_content.length > 0
324
+ ) {
325
+ if (activeReasoningId == null) {
326
+ // End any active text before starting reasoning
327
+ if (activeText) {
328
+ controller.enqueue({ type: 'text-end', id: '0' });
329
+ activeText = false;
330
+ }
331
+
332
+ activeReasoningId = generateId();
333
+ controller.enqueue({
334
+ type: 'reasoning-start',
335
+ id: activeReasoningId,
336
+ });
337
+ }
338
+
339
+ controller.enqueue({
340
+ type: 'reasoning-delta',
341
+ id: activeReasoningId,
342
+ delta: delta.reasoning_content,
343
+ });
344
+ }
345
+
346
+ // Handle text content streaming
347
+ if (delta.content != null && delta.content.length > 0) {
348
+ // End any active reasoning before starting text
349
+ if (activeReasoningId != null) {
350
+ controller.enqueue({
351
+ type: 'reasoning-end',
352
+ id: activeReasoningId,
353
+ });
354
+ activeReasoningId = null;
355
+ }
356
+
357
+ if (!activeText) {
358
+ controller.enqueue({ type: 'text-start', id: '0' });
359
+ activeText = true;
360
+ }
361
+
362
+ controller.enqueue({
363
+ type: 'text-delta',
364
+ id: '0',
365
+ delta: delta.content,
366
+ });
367
+ }
368
+
369
+ // Handle tool call streaming
370
+ if (delta.tool_calls != null) {
371
+ // End any active reasoning or text before tool calls
372
+ if (activeReasoningId != null) {
373
+ controller.enqueue({
374
+ type: 'reasoning-end',
375
+ id: activeReasoningId,
376
+ });
377
+ activeReasoningId = null;
378
+ }
379
+ if (activeText) {
380
+ controller.enqueue({ type: 'text-end', id: '0' });
381
+ activeText = false;
382
+ }
383
+
384
+ for (const toolCallDelta of delta.tool_calls) {
385
+ const index = toolCallDelta.index ?? toolCalls.length;
386
+
387
+ // New tool call - first chunk with id and name
388
+ if (toolCalls[index] == null) {
389
+ if (toolCallDelta.id == null) {
390
+ throw new InvalidResponseDataError({
391
+ data: toolCallDelta,
392
+ message: `Expected 'id' to be a string.`,
393
+ });
394
+ }
395
+
396
+ if (toolCallDelta.function?.name == null) {
397
+ throw new InvalidResponseDataError({
398
+ data: toolCallDelta,
399
+ message: `Expected 'function.name' to be a string.`,
400
+ });
401
+ }
402
+
403
+ controller.enqueue({
404
+ type: 'tool-input-start',
405
+ id: toolCallDelta.id,
406
+ toolName: toolCallDelta.function.name,
407
+ });
408
+
409
+ toolCalls[index] = {
410
+ id: toolCallDelta.id,
411
+ type: 'function',
412
+ function: {
413
+ name: toolCallDelta.function.name,
414
+ arguments: toolCallDelta.function.arguments ?? '',
415
+ },
416
+ hasFinished: false,
417
+ };
418
+
419
+ const toolCall = toolCalls[index];
420
+
421
+ // Send initial delta if arguments started
422
+ if (toolCall.function.arguments.length > 0) {
423
+ controller.enqueue({
424
+ type: 'tool-input-delta',
425
+ id: toolCall.id,
426
+ delta: toolCall.function.arguments,
427
+ });
428
+ }
429
+
430
+ // Check if already complete (some providers send full tool call at once)
431
+ if (isParsableJson(toolCall.function.arguments)) {
432
+ controller.enqueue({
433
+ type: 'tool-input-end',
434
+ id: toolCall.id,
435
+ });
436
+
437
+ controller.enqueue({
438
+ type: 'tool-call',
439
+ toolCallId: toolCall.id,
440
+ toolName: toolCall.function.name,
441
+ input: toolCall.function.arguments,
442
+ });
443
+
444
+ toolCall.hasFinished = true;
445
+ }
446
+
447
+ continue;
448
+ }
449
+
450
+ // Existing tool call - accumulate arguments
451
+ const toolCall = toolCalls[index];
452
+
453
+ if (toolCall.hasFinished) {
454
+ continue;
455
+ }
456
+
457
+ // Append arguments if not null (skip arguments: null chunks)
458
+ if (toolCallDelta.function?.arguments != null) {
459
+ toolCall.function.arguments +=
460
+ toolCallDelta.function.arguments;
461
+
462
+ controller.enqueue({
463
+ type: 'tool-input-delta',
464
+ id: toolCall.id,
465
+ delta: toolCallDelta.function.arguments,
466
+ });
467
+ }
468
+
469
+ // Check if tool call is now complete
470
+ if (isParsableJson(toolCall.function.arguments)) {
471
+ controller.enqueue({
472
+ type: 'tool-input-end',
473
+ id: toolCall.id,
474
+ });
475
+
476
+ controller.enqueue({
477
+ type: 'tool-call',
478
+ toolCallId: toolCall.id,
479
+ toolName: toolCall.function.name,
480
+ input: toolCall.function.arguments,
481
+ });
482
+
483
+ toolCall.hasFinished = true;
484
+ }
485
+ }
486
+ }
487
+
488
+ // Track finish reason
489
+ if (choice.finish_reason != null) {
490
+ finishReason = {
491
+ unified: mapOpenAICompatibleFinishReason(choice.finish_reason),
492
+ raw: choice.finish_reason,
493
+ };
494
+ }
495
+ },
496
+
497
+ flush(controller) {
498
+ if (activeReasoningId != null) {
499
+ controller.enqueue({
500
+ type: 'reasoning-end',
501
+ id: activeReasoningId,
502
+ });
503
+ }
504
+
505
+ if (activeText) {
506
+ controller.enqueue({ type: 'text-end', id: '0' });
507
+ }
508
+
509
+ controller.enqueue({
510
+ type: 'finish',
511
+ finishReason,
512
+ usage: convertAlibabaUsage(usage),
513
+ });
514
+ },
515
+ }),
516
+ ),
517
+ request: { body: JSON.stringify(body) },
518
+ response: { headers: responseHeaders },
519
+ };
520
+ }
521
+ }
522
+
523
+ /**
524
+ * Reference for schemas below:
525
+ * https://www.alibabacloud.com/help/en/model-studio/qwen-api-via-openai-chat-completions
526
+ */
527
+ const alibabaUsageSchema = z.object({
528
+ prompt_tokens: z.number(),
529
+ completion_tokens: z.number(),
530
+ total_tokens: z.number(),
531
+ prompt_tokens_details: z
532
+ .object({
533
+ cached_tokens: z.number().nullish(),
534
+ cache_creation_input_tokens: z.number().nullish(),
535
+ })
536
+ .nullish(),
537
+ completion_tokens_details: z
538
+ .object({
539
+ reasoning_tokens: z.number().nullish(),
540
+ })
541
+ .nullish(),
542
+ });
543
+
544
+ const alibabaChatResponseSchema = z.object({
545
+ id: z.string().nullish(),
546
+ created: z.number().nullish(),
547
+ model: z.string().nullish(),
548
+ choices: z.array(
549
+ z.object({
550
+ message: z.object({
551
+ role: z.literal('assistant').nullish(),
552
+ content: z.string().nullish(),
553
+ reasoning_content: z.string().nullish(), // Alibaba thinking mode
554
+ tool_calls: z
555
+ .array(
556
+ z.object({
557
+ id: z.string(),
558
+ type: z.literal('function'),
559
+ function: z.object({
560
+ name: z.string(),
561
+ arguments: z.string(),
562
+ }),
563
+ }),
564
+ )
565
+ .nullish(),
566
+ }),
567
+ finish_reason: z.string().nullish(),
568
+ index: z.number(),
569
+ }),
570
+ ),
571
+ usage: alibabaUsageSchema.nullish(),
572
+ });
573
+
574
+ const alibabaChatChunkSchema = z.object({
575
+ id: z.string().nullish(),
576
+ created: z.number().nullish(),
577
+ model: z.string().nullish(),
578
+ choices: z.array(
579
+ z.object({
580
+ delta: z.object({
581
+ role: z.enum(['assistant']).nullish(),
582
+ content: z.string().nullish(),
583
+ reasoning_content: z.string().nullish(), // Alibaba thinking mode delta
584
+ tool_calls: z
585
+ .array(
586
+ z.object({
587
+ index: z.number().nullish(), // Index for accumulating tool calls
588
+ id: z.string().nullish(),
589
+ type: z.literal('function').nullish(),
590
+ function: z
591
+ .object({
592
+ name: z.string().nullish(),
593
+ arguments: z.string().nullish(),
594
+ })
595
+ .nullish(),
596
+ }),
597
+ )
598
+ .nullish(),
599
+ }),
600
+ finish_reason: z.string().nullish(),
601
+ index: z.number(),
602
+ }),
603
+ ),
604
+ usage: alibabaUsageSchema.nullish(), // Usage only appears in final chunk
605
+ });
@@ -0,0 +1,53 @@
1
+ import { z } from 'zod/v4';
2
+
3
+ // https://www.alibabacloud.com/help/en/model-studio/models
4
+ export type AlibabaChatModelId =
5
+ // commercial edition - hybrid-thinking mode (disabled by default)
6
+ | 'qwen3-max'
7
+ | 'qwen3-max-preview'
8
+ | 'qwen-plus'
9
+ | 'qwen-plus-latest'
10
+ | 'qwen-flash'
11
+ | 'qwen-turbo'
12
+ | 'qwen-turbo-latest'
13
+ // open-source edition - hybrid-thinking mode (enabled by default)
14
+ | 'qwen3-235b-a22b'
15
+ | 'qwen3-32b'
16
+ | 'qwen3-30b-a3b'
17
+ | 'qwen3-14b'
18
+ // thinking-only mode
19
+ | 'qwen3-next-80b-a3b-thinking'
20
+ | 'qwen3-235b-a22b-thinking-2507'
21
+ | 'qwen3-30b-a3b-thinking-2507'
22
+ | 'qwq-plus'
23
+ | 'qwq-plus-latest'
24
+ | 'qwq-32b'
25
+ // code models
26
+ | 'qwen-coder'
27
+ | 'qwen3-coder-plus'
28
+ | 'qwen3-coder-flash'
29
+ | (string & {});
30
+
31
+ export const alibabaProviderOptions = z.object({
32
+ /**
33
+ * Enable thinking/reasoning mode for supported models.
34
+ * When enabled, the model generates reasoning content before the response.
35
+ *
36
+ * @default false
37
+ */
38
+ enableThinking: z.boolean().optional(),
39
+
40
+ /**
41
+ * Maximum number of reasoning tokens to generate.
42
+ */
43
+ thinkingBudget: z.number().positive().optional(),
44
+
45
+ /**
46
+ * Whether to enable parallel function calling during tool use.
47
+ *
48
+ * @default true
49
+ */
50
+ parallelToolCalls: z.boolean().optional(),
51
+ });
52
+
53
+ export type AlibabaProviderOptions = z.infer<typeof alibabaProviderOptions>;