@agentor/dashscope 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -12,6 +12,10 @@
12
12
  - **Responses API** - `/responses` endpoint with built-in tools support
13
13
  - **Embedding** - Text vectorization via OpenAI-compatible `/embeddings` endpoint
14
14
  - **Reranking** - Document reranking via `/reranks` endpoint
15
+ - **Image Generation** - Text-to-image via multimodal generation endpoint
16
+ - **Video Generation** - Text-to-video and image-to-video with async polling
17
+ - **Speech Synthesis** - Text-to-speech for CosyVoice and Qwen-TTS models
18
+ - **Transcription** - Speech-to-text for short and long audio
15
19
  - **Built-in Tools** - Web search, code interpreter, web extractor, file search, image search, MCP integration
16
20
  - **Thinking Mode** - Enable reasoning/thinking with configurable budget
17
21
  - **Multi-region** - Beijing, Singapore, US, Germany regions
@@ -318,6 +322,113 @@ const { ranking } = await rerank({
318
322
  });
319
323
  ```
320
324
 
325
+ ## Image Generation
326
+
327
+ ```typescript
328
+ import { generateImage } from "ai";
329
+
330
+ const { images } = await generateImage({
331
+ model: dashscope.imageModel("qwen-image-plus"),
332
+ prompt: "A cute cat sitting on a windowsill with sunlight streaming in",
333
+ providerOptions: {
334
+ dashscope: {
335
+ size: "1024*1024",
336
+ },
337
+ },
338
+ });
339
+
340
+ // images[0].uint8Array — raw image data
341
+ // images[0].base64 — base64 encoded image
342
+ ```
343
+
344
+ ## Video Generation
345
+
346
+ ```typescript
347
+ import { experimental_generateVideo as generateVideo } from "ai";
348
+
349
+ // Text-to-video
350
+ const { videos } = await generateVideo({
351
+ model: dashscope.videoModel("wan2.6-t2v"),
352
+ prompt: "A golden retriever running through a field of sunflowers",
353
+ providerOptions: {
354
+ dashscope: {
355
+ size: "1280*720",
356
+ duration: 5,
357
+ },
358
+ },
359
+ });
360
+ ```
361
+
362
+ ### Image-to-Video
363
+
364
+ Use a model ID containing `-i2v` for image-to-video mode:
365
+
366
+ ```typescript
367
+ const { videos } = await generateVideo({
368
+ model: dashscope.videoModel("wan2.6-i2v-turbo"),
369
+ prompt: "The cat stretches and walks away",
370
+ providerOptions: {
371
+ dashscope: {
372
+ resolution: "720P",
373
+ },
374
+ },
375
+ image: "data:image/png;base64,...", // or a URL string
376
+ });
377
+ ```
378
+
379
+ ## Speech Synthesis (TTS)
380
+
381
+ ```typescript
382
+ import { experimental_generateSpeech as generateSpeech } from "ai";
383
+ import { writeFileSync } from "fs";
384
+
385
+ const { audio } = await generateSpeech({
386
+ model: dashscope.speechModel("cosyvoice-v3-flash"),
387
+ text: "Hello, welcome to Agentor.",
388
+ providerOptions: {
389
+ dashscope: {
390
+ voice: "longanyang",
391
+ format: "wav",
392
+ sampleRate: 24000,
393
+ },
394
+ },
395
+ });
396
+
397
+ writeFileSync("output.wav", audio.uint8Array);
398
+ ```
399
+
400
+ ## Transcription (Speech-to-Text)
401
+
402
+ ### Short Audio (Sync)
403
+
404
+ ```typescript
405
+ import { experimental_transcribe as transcribe } from "ai";
406
+
407
+ const { text } = await transcribe({
408
+ model: dashscope.transcriptionModel("qwen3-asr-flash"),
409
+ audio: new URL("https://example.com/audio.mp3"),
410
+ });
411
+
412
+ console.log(text);
413
+ ```
414
+
415
+ ### Long Audio (Async)
416
+
417
+ For async models, provide the audio URL via `providerOptions`:
418
+
419
+ ```typescript
420
+ const { text, segments } = await transcribe({
421
+ model: dashscope.transcriptionModel("qwen3-asr-flash-filetrans"),
422
+ audio: new Uint8Array(0), // placeholder
423
+ providerOptions: {
424
+ dashscope: {
425
+ fileUrl: "https://example.com/long-audio.mp3",
426
+ enableWords: true,
427
+ },
428
+ },
429
+ });
430
+ ```
431
+
321
432
  ## Provider Configuration
322
433
 
323
434
  ```typescript
@@ -332,6 +443,85 @@ const dashscope = createDashScope({
332
443
  });
333
444
  ```
334
445
 
446
+ ## Available Models
447
+
448
+ > For the complete and up-to-date model list, see [Alibaba Cloud Model Studio](https://help.aliyun.com/zh/model-studio/models).
449
+
450
+ ### Language Models (Chat)
451
+
452
+ | Model | Description |
453
+ | --------------------- | ----------------------------------------- |
454
+ | `qwen3.6-max-preview` | Flagship model with strongest reasoning |
455
+ | `qwen3.6-plus` | Recommended, balanced capability and cost |
456
+ | `qwen3.6-flash` | Fastest, ultra-low cost |
457
+ | `qwen3.5-plus` | Enhanced reasoning model |
458
+ | `qwen3.5-flash` | Fast and efficient model |
459
+ | `qwen3-coder-plus` | Code-optimized model |
460
+ | `qwen3-coder-flash` | Fast code model |
461
+ | `qwq-plus` | Dedicated reasoning model |
462
+ | `deepseek-v4-pro` | DeepSeek V4 Pro |
463
+ | `deepseek-v4-flash` | DeepSeek V4 Flash |
464
+ | `kimi-k2.6` | Moonshot Kimi K2.6 |
465
+ | `glm-5.1` | Zhipu GLM 5.1 |
466
+
467
+ ### Embedding Models
468
+
469
+ | Model | Dimensions | Description |
470
+ | ------------------------------ | ----------------------- | ----------------------------------- |
471
+ | `text-embedding-v4` | 64-2048 (default 1024) | Text embedding for search/RAG |
472
+ | `text-embedding-v3` | 512-1024 (default 1024) | Legacy text embedding |
473
+ | `qwen3-vl-embedding` | 256-2560 (default 2560) | Multimodal (text + image) embedding |
474
+ | `tongyi-embedding-vision-plus` | 64-1152 (default 1152) | Cross-modal search embedding |
475
+
476
+ ### Reranking Models
477
+
478
+ | Model | Description |
479
+ | ----------------- | --------------------------------------- |
480
+ | `qwen3-rerank` | Text reranking, 100+ languages |
481
+ | `qwen3-vl-rerank` | Multimodal reranking (text/image/video) |
482
+ | `gte-rerank-v2` | Semantic text reranking |
483
+
484
+ ### Image Models
485
+
486
+ | Model | Description |
487
+ | -------------------- | -------------------------------------------- |
488
+ | `wan2.7-image-pro` | Latest Wan image generation, up to 4096x4096 |
489
+ | `wan2.7-image` | Wan image generation, up to 2048x2048 |
490
+ | `qwen-image-2.0-pro` | Qwen image generation and editing |
491
+ | `qwen-image-max` | High quality image generation |
492
+ | `qwen-image-plus` | Enhanced image generation |
493
+ | `z-image-turbo` | Fast image generation |
494
+
495
+ ### Video Models
496
+
497
+ | Model | Mode | Description |
498
+ | ------------------ | ---- | ------------------------------------- |
499
+ | `wan2.7-t2v` | T2V | Recommended text-to-video with audio |
500
+ | `wan2.6-t2v` | T2V | Text-to-video with audio |
501
+ | `wan2.2-t2v-plus` | T2V | Text-to-video (silent) |
502
+ | `wan2.7-i2v` | I2V | Recommended image-to-video with audio |
503
+ | `wan2.6-i2v` | I2V | Image-to-video with audio |
504
+ | `wan2.6-i2v-flash` | I2V | Fast image-to-video |
505
+
506
+ ### Speech Models (TTS)
507
+
508
+ | Model | Description |
509
+ | -------------------------- | ---------------------------------- |
510
+ | `cosyvoice-v3.5-plus` | Latest flagship, best quality |
511
+ | `cosyvoice-v3.5-flash` | Latest lightweight |
512
+ | `cosyvoice-v3-plus` | V3 enhanced |
513
+ | `cosyvoice-v3-flash` | V3 fast synthesis |
514
+ | `qwen3-tts-flash-realtime` | Qwen TTS with 17 human-like voices |
515
+
516
+ ### Transcription Models (STT)
517
+
518
+ | Model | Mode | Description |
519
+ | --------------------------- | ----- | ------------------------------ |
520
+ | `qwen3-asr-flash` | Sync | Short audio (up to 5 min) |
521
+ | `qwen3-asr-flash-filetrans` | Async | Long audio (up to 12 hours) |
522
+ | `fun-asr` | Async | Speaker diarization, hot words |
523
+ | `paraformer-v2` | Async | Legacy async transcription |
524
+
335
525
  ## License
336
526
 
337
527
  MIT © [Demo Macro](https://www.demomacro.com/)
package/dist/index.d.mts CHANGED
@@ -1,7 +1,7 @@
1
1
  import { OpenAICompatibleEmbeddingModel } from "@ai-sdk/openai-compatible";
2
2
  import * as _$_ai_sdk_provider_utils0 from "@ai-sdk/provider-utils";
3
3
  import { FetchFunction } from "@ai-sdk/provider-utils";
4
- import { EmbeddingModelV3, LanguageModelV3, RerankingModelV3, RerankingModelV3CallOptions, SharedV3Warning } from "@ai-sdk/provider";
4
+ import { EmbeddingModelV3, Experimental_VideoModelV3, Experimental_VideoModelV3CallOptions, ImageModelV3, ImageModelV3CallOptions, LanguageModelV3, RerankingModelV3, RerankingModelV3CallOptions, SharedV3Warning, SpeechModelV3, SpeechModelV3CallOptions, TranscriptionModelV3, TranscriptionModelV3CallOptions } from "@ai-sdk/provider";
5
5
 
6
6
  //#region src/tools.d.ts
7
7
  declare const webSearchToolFactory: _$_ai_sdk_provider_utils0.ProviderToolFactoryWithOutputSchema<Record<string, never>, {
@@ -132,16 +132,12 @@ type DashScopeResponsesTools = typeof responsesTools;
132
132
  //#endregion
133
133
  //#region src/types.d.ts
134
134
  type DashScopeRegion = "beijing" | "singapore" | "us" | "germany";
135
- declare const DASHSCOPE_REGION_BASE_URLS: Record<DashScopeRegion, {
136
- baseURL: string;
137
- videoBaseURL: string;
138
- }>;
135
+ declare const DASHSCOPE_REGION_URLS: Record<DashScopeRegion, string>;
139
136
  interface DashScopeProviderSettings {
140
137
  apiKey?: string;
141
138
  region?: DashScopeRegion;
142
139
  workspaceId?: string;
143
140
  baseURL?: string;
144
- videoBaseURL?: string;
145
141
  headers?: Record<string, string>;
146
142
  fetch?: FetchFunction;
147
143
  includeUsage?: boolean;
@@ -184,6 +180,10 @@ interface DashScopeProvider {
184
180
  languageModel(modelId: string): LanguageModelV3;
185
181
  embeddingModel(modelId: string): EmbeddingModelV3;
186
182
  rerankingModel(modelId: string): RerankingModelV3;
183
+ imageModel(modelId: string): ImageModelV3;
184
+ videoModel(modelId: string): Experimental_VideoModelV3;
185
+ speechModel(modelId: string): SpeechModelV3;
186
+ transcriptionModel(modelId: string): TranscriptionModelV3;
187
187
  chatOptions: (options: DashScopeChatOptions) => {
188
188
  providerOptions: {
189
189
  dashscope: DashScopeChatOptions;
@@ -215,6 +215,37 @@ declare class DashScopeEmbeddingModel extends OpenAICompatibleEmbeddingModel {
215
215
  constructor(modelId: string, config: DashScopeConfig);
216
216
  }
217
217
  //#endregion
218
+ //#region src/image.d.ts
219
+ interface DashScopeImageOptions {
220
+ /** Output image size, e.g. "2048*2048", "1024*1024", "1K", "2K". */
221
+ size?: string;
222
+ /** Negative prompt describing what to avoid. */
223
+ negativePrompt?: string;
224
+ /** Enable prompt extension/rewriting. Default depends on model. */
225
+ promptExtend?: boolean;
226
+ /** Add watermark. Default false. */
227
+ watermark?: boolean;
228
+ /** Number of images to generate. Default 1. */
229
+ n?: number;
230
+ }
231
+ declare class DashScopeImageModel implements ImageModelV3 {
232
+ readonly specificationVersion: "v3";
233
+ readonly modelId: string;
234
+ private readonly config;
235
+ constructor(modelId: string, config: DashScopeConfig);
236
+ get provider(): string;
237
+ get maxImagesPerCall(): number | undefined;
238
+ doGenerate(options: ImageModelV3CallOptions): Promise<{
239
+ images: string[];
240
+ warnings: SharedV3Warning[];
241
+ response: {
242
+ timestamp: Date;
243
+ modelId: string;
244
+ headers: Record<string, string> | undefined;
245
+ };
246
+ }>;
247
+ }
248
+ //#endregion
218
249
  //#region src/rerank.d.ts
219
250
  interface DashScopeRerankOptions {
220
251
  /** English instruction to guide the reranking strategy. */
@@ -240,10 +271,134 @@ declare class DashScopeRerankingModel implements RerankingModelV3 {
240
271
  }>;
241
272
  }
242
273
  //#endregion
274
+ //#region src/speech.d.ts
275
+ interface DashScopeSpeechOptions {
276
+ /** Voice name. Model-specific, e.g. "longanyang" for CosyVoice, "Cherry" for Qwen-TTS. */
277
+ voice?: string;
278
+ /** Output audio format: "wav", "mp3", "pcm". Default depends on model. */
279
+ format?: string;
280
+ /** Sample rate. Default depends on model. */
281
+ sampleRate?: number;
282
+ /** Language type for Qwen-TTS: "Chinese" | "English" | "Japanese" | etc. */
283
+ languageType?: string;
284
+ /** Speaking speed. 0.5-2.0, default 1.0. */
285
+ speed?: number;
286
+ /** Volume. 0.5-2.0, default 1.0. */
287
+ volume?: number;
288
+ /** Pitch. -12 to 12, default 0. */
289
+ pitch?: number;
290
+ }
291
+ declare class DashScopeSpeechModel implements SpeechModelV3 {
292
+ readonly specificationVersion: "v3";
293
+ readonly modelId: string;
294
+ private readonly config;
295
+ constructor(modelId: string, config: DashScopeConfig);
296
+ get provider(): string;
297
+ doGenerate(options: SpeechModelV3CallOptions): Promise<{
298
+ audio: Uint8Array<ArrayBuffer>;
299
+ warnings: SharedV3Warning[];
300
+ request: {
301
+ body: Record<string, unknown>;
302
+ };
303
+ response: {
304
+ timestamp: Date;
305
+ modelId: string;
306
+ headers: Record<string, string> | undefined;
307
+ };
308
+ }>;
309
+ }
310
+ //#endregion
311
+ //#region src/transcription.d.ts
312
+ interface DashScopeTranscriptionOptions {
313
+ /**
314
+ * Publicly accessible audio file URL for async transcription.
315
+ * Required for async models (filetrans, fun-asr, paraformer) when using long audio.
316
+ */
317
+ fileUrl?: string;
318
+ /** Language hint(s), e.g. ["zh", "en"]. */
319
+ languageHints?: string[];
320
+ /** Enable inverse text normalization (convert spoken numbers/dates to written form). */
321
+ enableItn?: boolean;
322
+ /** Enable word-level timestamps. */
323
+ enableWords?: boolean;
324
+ /** Channel IDs to transcribe. Default [0]. */
325
+ channelId?: number[];
326
+ /** Polling interval in ms. Default 5000. (async mode only) */
327
+ pollIntervalMs?: number;
328
+ /** Polling timeout in ms. Default 600000. (async mode only) */
329
+ pollTimeoutMs?: number;
330
+ }
331
+ declare class DashScopeTranscriptionModel implements TranscriptionModelV3 {
332
+ readonly specificationVersion: "v3";
333
+ readonly modelId: string;
334
+ private readonly config;
335
+ constructor(modelId: string, config: DashScopeConfig);
336
+ get provider(): string;
337
+ doGenerate(options: TranscriptionModelV3CallOptions): Promise<{
338
+ text: string;
339
+ segments: {
340
+ text: string;
341
+ startSecond: number;
342
+ endSecond: number;
343
+ }[];
344
+ language: undefined;
345
+ durationInSeconds: undefined;
346
+ warnings: SharedV3Warning[];
347
+ response: {
348
+ timestamp: Date;
349
+ modelId: string;
350
+ headers: Record<string, string> | undefined;
351
+ };
352
+ }>;
353
+ private doSync;
354
+ private doAsync;
355
+ }
356
+ //#endregion
357
+ //#region src/video.d.ts
358
+ interface DashScopeVideoOptions {
359
+ /** Negative prompt. */
360
+ negativePrompt?: string;
361
+ /** Enable prompt extension. */
362
+ promptExtend?: boolean;
363
+ /** Add watermark. Default false. */
364
+ watermark?: boolean;
365
+ /** Resolution for I2V: "720P" | "1080P". For T2V: use size "WIDTH*HEIGHT". */
366
+ resolution?: string;
367
+ /** Size in "WIDTH*HEIGHT" format (T2V/R2V). */
368
+ size?: string;
369
+ /** Video duration in seconds. */
370
+ duration?: number;
371
+ /** Polling interval in ms. Default 5000. */
372
+ pollIntervalMs?: number;
373
+ /** Polling timeout in ms. Default 600000. */
374
+ pollTimeoutMs?: number;
375
+ }
376
+ declare class DashScopeVideoModel implements Experimental_VideoModelV3 {
377
+ readonly specificationVersion: "v3";
378
+ readonly modelId: string;
379
+ private readonly config;
380
+ constructor(modelId: string, config: DashScopeConfig);
381
+ get provider(): string;
382
+ get maxVideosPerCall(): number | undefined;
383
+ doGenerate(options: Experimental_VideoModelV3CallOptions): Promise<{
384
+ videos: {
385
+ type: "url";
386
+ url: string;
387
+ mediaType: string;
388
+ }[];
389
+ warnings: SharedV3Warning[];
390
+ response: {
391
+ timestamp: Date;
392
+ modelId: string;
393
+ headers: Record<string, string> | undefined;
394
+ };
395
+ }>;
396
+ }
397
+ //#endregion
243
398
  //#region src/provider.d.ts
244
399
  declare function createDashScope(options?: DashScopeProviderSettings): DashScopeProvider;
245
400
  //#endregion
246
401
  //#region src/index.d.ts
247
402
  declare const dashscope: DashScopeProvider;
248
403
  //#endregion
249
- export { DASHSCOPE_REGION_BASE_URLS, DashScopeChatOptions, DashScopeEmbeddingModel, DashScopeEmbeddingOptions, DashScopeProvider, DashScopeProviderSettings, DashScopeRegion, DashScopeRerankOptions, DashScopeRerankingModel, DashScopeResponsesNamespace, DashScopeResponsesOptions, DashScopeResponsesTools, createDashScope, dashscope, responsesTools };
404
+ export { DASHSCOPE_REGION_URLS, DashScopeChatOptions, DashScopeEmbeddingModel, DashScopeEmbeddingOptions, DashScopeImageModel, DashScopeImageOptions, DashScopeProvider, DashScopeProviderSettings, DashScopeRegion, DashScopeRerankOptions, DashScopeRerankingModel, DashScopeResponsesNamespace, DashScopeResponsesOptions, DashScopeResponsesTools, DashScopeSpeechModel, DashScopeSpeechOptions, DashScopeTranscriptionModel, DashScopeTranscriptionOptions, DashScopeVideoModel, DashScopeVideoOptions, createDashScope, dashscope, responsesTools };
package/dist/index.mjs CHANGED
@@ -1,13 +1,14 @@
1
1
  import { OpenAICompatibleEmbeddingModel } from "@ai-sdk/openai-compatible";
2
- import { combineHeaders, convertToBase64, createEventSourceResponseHandler, createJsonErrorResponseHandler, createJsonResponseHandler, createProviderToolFactoryWithOutputSchema, generateId, isParsableJson, lazySchema, parseProviderOptions, postJsonToApi, zodSchema } from "@ai-sdk/provider-utils";
2
+ import { combineHeaders, convertToBase64, createEventSourceResponseHandler, createJsonErrorResponseHandler, createJsonResponseHandler, createProviderToolFactoryWithOutputSchema, delay, generateId, getFromApi, isParsableJson, lazySchema, parseProviderOptions, postJsonToApi, zodSchema } from "@ai-sdk/provider-utils";
3
3
  import { z } from "zod/v4";
4
+ import { AISDKError } from "@ai-sdk/provider";
4
5
  import { convertOpenAICompatibleChatUsage, getResponseMetadata, mapOpenAICompatibleFinishReason, prepareTools } from "@ai-sdk/openai-compatible/internal";
5
6
  //#region src/embedding.ts
6
7
  var DashScopeEmbeddingModel = class extends OpenAICompatibleEmbeddingModel {
7
8
  constructor(modelId, config) {
8
9
  super(modelId, {
9
10
  provider: config.provider,
10
- url: () => `${config.baseURL}/embeddings`,
11
+ url: () => `${config.baseURL}/compatible-mode/v1/embeddings`,
11
12
  headers: config.headers,
12
13
  fetch: config.fetch
13
14
  });
@@ -21,6 +22,14 @@ const failedResponseHandler = createJsonErrorResponseHandler({
21
22
  }) })),
22
23
  errorToMessage: (data) => data.error.message
23
24
  });
25
+ const nativeFailedHandler = createJsonErrorResponseHandler({
26
+ errorSchema: zodSchema(z.object({
27
+ code: z.string().nullish(),
28
+ message: z.string(),
29
+ request_id: z.string().nullish()
30
+ })),
31
+ errorToMessage: (data) => data.message
32
+ });
24
33
  function convertResponsesUsage(usage) {
25
34
  if (!usage) return {
26
35
  inputTokens: {
@@ -50,6 +59,90 @@ function convertResponsesUsage(usage) {
50
59
  raw: usage
51
60
  };
52
61
  }
62
+ function uint8ArrayToBase64(data) {
63
+ let binary = "";
64
+ for (let i = 0; i < data.length; i++) binary += String.fromCharCode(data[i]);
65
+ return btoa(binary);
66
+ }
67
+ //#endregion
68
+ //#region src/image.ts
69
+ const imageOptionsSchema = z.object({
70
+ size: z.string().optional(),
71
+ negativePrompt: z.string().optional(),
72
+ promptExtend: z.boolean().optional(),
73
+ watermark: z.boolean().optional(),
74
+ n: z.number().optional()
75
+ });
76
+ const imageResponseSchema = zodSchema(z.object({
77
+ output: z.object({ choices: z.array(z.object({ message: z.object({ content: z.array(z.object({ image: z.string().optional() })) }) })).optional() }).nullish(),
78
+ usage: z.object({
79
+ image_count: z.number().optional(),
80
+ width: z.number().optional(),
81
+ height: z.number().optional()
82
+ }).nullish(),
83
+ request_id: z.string().nullish()
84
+ }));
85
+ var DashScopeImageModel = class {
86
+ specificationVersion = "v3";
87
+ modelId;
88
+ config;
89
+ constructor(modelId, config) {
90
+ this.modelId = modelId;
91
+ this.config = config;
92
+ }
93
+ get provider() {
94
+ return this.config.provider;
95
+ }
96
+ get maxImagesPerCall() {
97
+ return 1;
98
+ }
99
+ async doGenerate(options) {
100
+ const warnings = [];
101
+ const dsOptions = await parseProviderOptions({
102
+ provider: "dashscope",
103
+ providerOptions: options.providerOptions,
104
+ schema: imageOptionsSchema
105
+ });
106
+ const body = {
107
+ model: this.modelId,
108
+ input: { messages: [{
109
+ role: "user",
110
+ content: [{ text: options.prompt }]
111
+ }] },
112
+ parameters: {
113
+ ...dsOptions?.size != null && { size: dsOptions.size },
114
+ ...dsOptions?.negativePrompt != null && { negative_prompt: dsOptions.negativePrompt },
115
+ ...dsOptions?.promptExtend != null && { prompt_extend: dsOptions.promptExtend },
116
+ ...dsOptions?.watermark != null && { watermark: dsOptions.watermark },
117
+ ...dsOptions?.n != null && { n: dsOptions.n }
118
+ }
119
+ };
120
+ const { responseHeaders, value: response } = await postJsonToApi({
121
+ url: `${this.config.baseURL}/api/v1/services/aigc/multimodal-generation/generation`,
122
+ headers: combineHeaders(this.config.headers(), options.headers),
123
+ body,
124
+ failedResponseHandler: nativeFailedHandler,
125
+ successfulResponseHandler: createJsonResponseHandler(imageResponseSchema),
126
+ abortSignal: options.abortSignal,
127
+ fetch: this.config.fetch
128
+ });
129
+ const imageUrls = response.output?.choices?.flatMap((c) => c.message.content.filter((p) => p.image != null).map((p) => p.image)) ?? [];
130
+ const images = [];
131
+ for (const url of imageUrls) {
132
+ const buffer = await (await (this.config.fetch ?? fetch)(url, { headers: this.config.headers() })).arrayBuffer();
133
+ images.push(uint8ArrayToBase64(new Uint8Array(buffer)));
134
+ }
135
+ return {
136
+ images,
137
+ warnings,
138
+ response: {
139
+ timestamp: /* @__PURE__ */ new Date(),
140
+ modelId: this.modelId,
141
+ headers: responseHeaders
142
+ }
143
+ };
144
+ }
145
+ };
53
146
  //#endregion
54
147
  //#region src/rerank.ts
55
148
  const rerankResponseSchema = zodSchema(z.object({
@@ -81,7 +174,7 @@ var DashScopeRerankingModel = class {
81
174
  ...options.topN != null && { top_n: options.topN }
82
175
  };
83
176
  const { responseHeaders, value: response } = await postJsonToApi({
84
- url: `${this.config.baseURL.replace("/compatible-mode/", "/compatible-api/")}/reranks`,
177
+ url: `${this.config.baseURL}/compatible-api/v1/reranks`,
85
178
  headers: combineHeaders(this.config.headers(), options.headers),
86
179
  body,
87
180
  failedResponseHandler,
@@ -104,6 +197,94 @@ var DashScopeRerankingModel = class {
104
197
  }
105
198
  };
106
199
  //#endregion
200
+ //#region src/speech.ts
201
+ const speechOptionsSchema = z.object({
202
+ voice: z.string().optional(),
203
+ format: z.string().optional(),
204
+ sampleRate: z.number().optional(),
205
+ languageType: z.string().optional(),
206
+ speed: z.number().optional(),
207
+ volume: z.number().optional(),
208
+ pitch: z.number().optional()
209
+ });
210
+ const cosyvoiceResponseSchema = zodSchema(z.object({
211
+ output: z.object({ audio: z.object({ url: z.string().optional() }).nullish() }).nullish(),
212
+ request_id: z.string().nullish()
213
+ }));
214
+ var DashScopeSpeechModel = class {
215
+ specificationVersion = "v3";
216
+ modelId;
217
+ config;
218
+ constructor(modelId, config) {
219
+ this.modelId = modelId;
220
+ this.config = config;
221
+ }
222
+ get provider() {
223
+ return this.config.provider;
224
+ }
225
+ async doGenerate(options) {
226
+ const warnings = [];
227
+ const dsOptions = await parseProviderOptions({
228
+ provider: "dashscope",
229
+ providerOptions: options.providerOptions,
230
+ schema: speechOptionsSchema
231
+ });
232
+ const voice = dsOptions?.voice ?? "longanyang";
233
+ const format = dsOptions?.format ?? "wav";
234
+ const sampleRate = dsOptions?.sampleRate ?? 24e3;
235
+ const isCosyVoice = this.modelId.startsWith("cosyvoice");
236
+ let url;
237
+ let body;
238
+ if (isCosyVoice) {
239
+ url = `${this.config.baseURL}/api/v1/services/audio/tts/SpeechSynthesizer`;
240
+ body = {
241
+ model: this.modelId,
242
+ input: {
243
+ text: options.text,
244
+ voice,
245
+ format,
246
+ sample_rate: sampleRate,
247
+ ...dsOptions?.speed != null && { speech_rate: dsOptions.speed },
248
+ ...dsOptions?.volume != null && { volume: dsOptions.volume },
249
+ ...dsOptions?.pitch != null && { pitch_rate: dsOptions.pitch }
250
+ }
251
+ };
252
+ } else {
253
+ url = `${this.config.baseURL}/api/v1/services/aigc/multimodal-generation/generation`;
254
+ body = {
255
+ model: this.modelId,
256
+ input: {
257
+ text: options.text,
258
+ voice,
259
+ ...dsOptions?.languageType != null && { language_type: dsOptions.languageType }
260
+ }
261
+ };
262
+ }
263
+ const { responseHeaders, value: response } = await postJsonToApi({
264
+ url,
265
+ headers: combineHeaders(this.config.headers(), options.headers),
266
+ body,
267
+ failedResponseHandler: nativeFailedHandler,
268
+ successfulResponseHandler: createJsonResponseHandler(cosyvoiceResponseSchema),
269
+ abortSignal: options.abortSignal,
270
+ fetch: this.config.fetch
271
+ });
272
+ const audioUrl = response.output?.audio?.url;
273
+ if (!audioUrl) throw new Error("No audio URL returned from TTS API");
274
+ const audioBuffer = await (await (this.config.fetch ?? fetch)(audioUrl, { headers: this.config.headers() })).arrayBuffer();
275
+ return {
276
+ audio: new Uint8Array(audioBuffer),
277
+ warnings,
278
+ request: { body },
279
+ response: {
280
+ timestamp: /* @__PURE__ */ new Date(),
281
+ modelId: this.modelId,
282
+ headers: responseHeaders
283
+ }
284
+ };
285
+ }
286
+ };
287
+ //#endregion
107
288
  //#region src/tools.ts
108
289
  const webSearchToolFactory = createProviderToolFactoryWithOutputSchema({
109
290
  id: "dashscope.web_search",
@@ -178,23 +359,339 @@ const responsesTools = {
178
359
  mcp: (args) => mcpToolFactory(args)
179
360
  };
180
361
  //#endregion
362
+ //#region src/transcription.ts
363
+ const transcriptionOptionsSchema = z.object({
364
+ fileUrl: z.string().optional(),
365
+ languageHints: z.array(z.string()).optional(),
366
+ enableItn: z.boolean().optional(),
367
+ enableWords: z.boolean().optional(),
368
+ channelId: z.array(z.number()).optional(),
369
+ pollIntervalMs: z.number().positive().optional(),
370
+ pollTimeoutMs: z.number().positive().optional()
371
+ });
372
+ const syncResponseSchema = zodSchema(z.object({
373
+ output: z.object({ choices: z.array(z.object({ message: z.object({ content: z.array(z.object({ text: z.string().optional() })) }) })).optional() }).nullish(),
374
+ request_id: z.string().nullish()
375
+ }));
376
+ const createTaskSchema$1 = zodSchema(z.object({
377
+ output: z.object({
378
+ task_id: z.string(),
379
+ task_status: z.string()
380
+ }).nullish(),
381
+ request_id: z.string().nullish()
382
+ }));
383
+ const taskStatusSchema$1 = zodSchema(z.object({
384
+ output: z.object({
385
+ task_id: z.string(),
386
+ task_status: z.string(),
387
+ result: z.object({ transcription_url: z.string().nullish() }).nullish(),
388
+ results: z.array(z.object({
389
+ subtask_status: z.string().nullish(),
390
+ transcription_url: z.string().nullish()
391
+ })).nullish(),
392
+ code: z.string().nullish(),
393
+ message: z.string().nullish()
394
+ }).nullish(),
395
+ request_id: z.string().nullish()
396
+ }));
397
+ function isAsyncModel(modelId) {
398
+ return modelId.includes("filetrans") || modelId.startsWith("fun-asr") || modelId.startsWith("paraformer");
399
+ }
400
+ function buildAudioUrl(audio, mediaType) {
401
+ if (typeof audio === "string") {
402
+ if (audio.startsWith("http")) return audio;
403
+ return `data:${mediaType};base64,${audio}`;
404
+ }
405
+ return `data:${mediaType};base64,${uint8ArrayToBase64(audio)}`;
406
+ }
407
+ var DashScopeTranscriptionModel = class {
408
+ specificationVersion = "v3";
409
+ modelId;
410
+ config;
411
+ constructor(modelId, config) {
412
+ this.modelId = modelId;
413
+ this.config = config;
414
+ }
415
+ get provider() {
416
+ return this.config.provider;
417
+ }
418
+ async doGenerate(options) {
419
+ const warnings = [];
420
+ const dsOptions = await parseProviderOptions({
421
+ provider: "dashscope",
422
+ providerOptions: options.providerOptions,
423
+ schema: transcriptionOptionsSchema
424
+ }) ?? null;
425
+ if (isAsyncModel(this.modelId) && dsOptions?.fileUrl) return this.doAsync(options, dsOptions, warnings);
426
+ return this.doSync(options, dsOptions, warnings);
427
+ }
428
+ async doSync(options, dsOptions, warnings) {
429
+ const audioUrl = buildAudioUrl(options.audio, options.mediaType);
430
+ const body = {
431
+ model: this.modelId,
432
+ input: { messages: [{
433
+ role: "user",
434
+ content: [{ audio: audioUrl }]
435
+ }] },
436
+ parameters: {
437
+ result_format: "message",
438
+ ...dsOptions?.enableItn != null && { asr_options: { enable_itn: dsOptions.enableItn } }
439
+ }
440
+ };
441
+ const { responseHeaders, value: response } = await postJsonToApi({
442
+ url: `${this.config.baseURL}/api/v1/services/aigc/multimodal-generation/generation`,
443
+ headers: combineHeaders(this.config.headers(), options.headers),
444
+ body,
445
+ failedResponseHandler: nativeFailedHandler,
446
+ successfulResponseHandler: createJsonResponseHandler(syncResponseSchema),
447
+ abortSignal: options.abortSignal,
448
+ fetch: this.config.fetch
449
+ });
450
+ return {
451
+ text: response.output?.choices?.[0]?.message.content.filter((p) => p.text != null).map((p) => p.text).join("") ?? "",
452
+ segments: [],
453
+ language: void 0,
454
+ durationInSeconds: void 0,
455
+ warnings,
456
+ request: { body },
457
+ response: {
458
+ timestamp: /* @__PURE__ */ new Date(),
459
+ modelId: this.modelId,
460
+ headers: responseHeaders
461
+ }
462
+ };
463
+ }
464
+ async doAsync(options, dsOptions, warnings) {
465
+ const audioUrl = dsOptions?.fileUrl;
466
+ if (!audioUrl) throw new AISDKError({
467
+ name: "DASHSCOPE_TRANSCRIPTION_ERROR",
468
+ message: "Async transcription requires providerOptions.dashscope.fileUrl with a publicly accessible audio URL."
469
+ });
470
+ const parameters = {};
471
+ if (dsOptions?.channelId != null) parameters.channel_id = dsOptions.channelId;
472
+ if (dsOptions?.enableItn != null) parameters.enable_itn = dsOptions.enableItn;
473
+ if (dsOptions?.enableWords != null) parameters.enable_words = dsOptions.enableWords;
474
+ if (dsOptions?.languageHints?.length) parameters.language_hints = dsOptions.languageHints;
475
+ const { value: createResponse } = await postJsonToApi({
476
+ url: `${this.config.baseURL}/api/v1/services/audio/asr/transcription`,
477
+ headers: combineHeaders(this.config.headers(), options.headers, { "X-DashScope-Async": "enable" }),
478
+ body: {
479
+ model: this.modelId,
480
+ input: { file_url: audioUrl },
481
+ ...Object.keys(parameters).length > 0 && { parameters }
482
+ },
483
+ successfulResponseHandler: createJsonResponseHandler(createTaskSchema$1),
484
+ failedResponseHandler: nativeFailedHandler,
485
+ abortSignal: options.abortSignal,
486
+ fetch: this.config.fetch
487
+ });
488
+ const taskId = createResponse.output?.task_id;
489
+ if (!taskId) throw new AISDKError({
490
+ name: "DASHSCOPE_TRANSCRIPTION_ERROR",
491
+ message: `No task_id returned. Response: ${JSON.stringify(createResponse)}`
492
+ });
493
+ const pollInterval = dsOptions?.pollIntervalMs ?? 5e3;
494
+ const pollTimeout = dsOptions?.pollTimeoutMs ?? 6e5;
495
+ const startTime = Date.now();
496
+ while (true) {
497
+ await delay(pollInterval, { abortSignal: options.abortSignal });
498
+ if (Date.now() - startTime > pollTimeout) throw new AISDKError({
499
+ name: "DASHSCOPE_TRANSCRIPTION_TIMEOUT",
500
+ message: `Transcription timed out after ${pollTimeout}ms`
501
+ });
502
+ const { value: status, responseHeaders } = await getFromApi({
503
+ url: `${this.config.baseURL}/api/v1/tasks/${taskId}`,
504
+ headers: combineHeaders(this.config.headers(), options.headers, { "X-DashScope-Async": "enable" }),
505
+ successfulResponseHandler: createJsonResponseHandler(taskStatusSchema$1),
506
+ failedResponseHandler: nativeFailedHandler,
507
+ abortSignal: options.abortSignal,
508
+ fetch: this.config.fetch
509
+ });
510
+ const taskStatus = status.output?.task_status;
511
+ if (taskStatus === "SUCCEEDED") {
512
+ let transcriptionUrl = status.output?.result?.transcription_url;
513
+ if (!transcriptionUrl) transcriptionUrl = ((status.output?.results)?.find((r) => r.subtask_status === "SUCCEEDED"))?.transcription_url;
514
+ if (!transcriptionUrl) throw new AISDKError({
515
+ name: "DASHSCOPE_TRANSCRIPTION_ERROR",
516
+ message: `No transcription URL in response. Task ID: ${taskId}`
517
+ });
518
+ const resultData = await (await (this.config.fetch ?? fetch)(transcriptionUrl)).json();
519
+ let text = "";
520
+ const segments = [];
521
+ if (resultData.transcripts) for (const transcript of resultData.transcripts) {
522
+ text += transcript.text;
523
+ if (transcript.sentences) {
524
+ for (const sentence of transcript.sentences) if (sentence.begin_time != null && sentence.end_time != null) segments.push({
525
+ text: sentence.text,
526
+ startSecond: sentence.begin_time / 1e3,
527
+ endSecond: sentence.end_time / 1e3
528
+ });
529
+ }
530
+ }
531
+ return {
532
+ text,
533
+ segments,
534
+ language: void 0,
535
+ durationInSeconds: void 0,
536
+ warnings,
537
+ response: {
538
+ timestamp: /* @__PURE__ */ new Date(),
539
+ modelId: this.modelId,
540
+ headers: responseHeaders
541
+ }
542
+ };
543
+ }
544
+ if (taskStatus === "FAILED" || taskStatus === "CANCELED") throw new AISDKError({
545
+ name: "DASHSCOPE_TRANSCRIPTION_FAILED",
546
+ message: `Transcription ${taskStatus.toLowerCase()}. ${status.output?.message ?? ""}`
547
+ });
548
+ }
549
+ }
550
+ };
551
+ //#endregion
181
552
  //#region src/types.ts
182
- const DASHSCOPE_REGION_BASE_URLS = {
183
- beijing: {
184
- baseURL: "https://dashscope.aliyuncs.com/compatible-mode/v1",
185
- videoBaseURL: "https://dashscope.aliyuncs.com"
186
- },
187
- singapore: {
188
- baseURL: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
189
- videoBaseURL: "https://dashscope-intl.aliyuncs.com"
190
- },
191
- us: {
192
- baseURL: "https://dashscope-us.aliyuncs.com/compatible-mode/v1",
193
- videoBaseURL: "https://dashscope-us.aliyuncs.com"
194
- },
195
- germany: {
196
- baseURL: "https://{workspaceId}.eu-central-1.maas.aliyuncs.com/compatible-mode/v1",
197
- videoBaseURL: "https://{workspaceId}.eu-central-1.maas.aliyuncs.com"
553
+ const DASHSCOPE_REGION_URLS = {
554
+ beijing: "https://dashscope.aliyuncs.com",
555
+ singapore: "https://dashscope-intl.aliyuncs.com",
556
+ us: "https://dashscope-us.aliyuncs.com",
557
+ germany: "https://{workspaceId}.eu-central-1.maas.aliyuncs.com"
558
+ };
559
+ //#endregion
560
+ //#region src/video.ts
561
+ const videoOptionsSchema = z.object({
562
+ negativePrompt: z.string().optional(),
563
+ promptExtend: z.boolean().optional(),
564
+ watermark: z.boolean().optional(),
565
+ resolution: z.string().optional(),
566
+ size: z.string().optional(),
567
+ duration: z.number().optional(),
568
+ pollIntervalMs: z.number().positive().optional(),
569
+ pollTimeoutMs: z.number().positive().optional()
570
+ });
571
+ const createTaskSchema = zodSchema(z.object({
572
+ output: z.object({
573
+ task_id: z.string(),
574
+ task_status: z.string()
575
+ }).nullish(),
576
+ request_id: z.string().nullish()
577
+ }));
578
+ const taskStatusSchema = zodSchema(z.object({
579
+ output: z.object({
580
+ task_id: z.string(),
581
+ task_status: z.string(),
582
+ video_url: z.string().nullish(),
583
+ submit_time: z.string().nullish(),
584
+ scheduled_time: z.string().nullish(),
585
+ end_time: z.string().nullish(),
586
+ code: z.string().nullish(),
587
+ message: z.string().nullish()
588
+ }).nullish(),
589
+ usage: z.object({
590
+ duration: z.number().nullish(),
591
+ output_video_duration: z.number().nullish(),
592
+ size: z.string().nullish()
593
+ }).nullish(),
594
+ request_id: z.string().nullish()
595
+ }));
596
+ function detectMode(modelId) {
597
+ return modelId.includes("-i2v") ? "i2v" : "t2v";
598
+ }
599
+ var DashScopeVideoModel = class {
600
+ specificationVersion = "v3";
601
+ modelId;
602
+ config;
603
+ constructor(modelId, config) {
604
+ this.modelId = modelId;
605
+ this.config = config;
606
+ }
607
+ get provider() {
608
+ return this.config.provider;
609
+ }
610
+ get maxVideosPerCall() {
611
+ return 1;
612
+ }
613
+ async doGenerate(options) {
614
+ const warnings = [];
615
+ const mode = detectMode(this.modelId);
616
+ const dsOptions = await parseProviderOptions({
617
+ provider: "dashscope",
618
+ providerOptions: options.providerOptions,
619
+ schema: videoOptionsSchema
620
+ });
621
+ const input = {};
622
+ if (options.prompt != null) input.prompt = options.prompt;
623
+ if (dsOptions?.negativePrompt != null) input.negative_prompt = dsOptions.negativePrompt;
624
+ if (mode === "i2v" && options.image != null) if (options.image.type === "url") input.img_url = options.image.url;
625
+ else input.img_url = typeof options.image.data === "string" ? options.image.data : uint8ArrayToBase64(options.image.data);
626
+ const parameters = {};
627
+ if (dsOptions?.duration != null) parameters.duration = dsOptions.duration;
628
+ if (options.seed != null) parameters.seed = options.seed;
629
+ if (dsOptions?.promptExtend != null) parameters.prompt_extend = dsOptions.promptExtend;
630
+ if (dsOptions?.watermark != null) parameters.watermark = dsOptions.watermark;
631
+ if (mode === "i2v" && dsOptions?.resolution != null) parameters.resolution = dsOptions.resolution;
632
+ else if (options.resolution != null) parameters.size = options.resolution.replace("x", "*");
633
+ else if (dsOptions?.size != null) parameters.size = dsOptions.size;
634
+ const { value: createResponse } = await postJsonToApi({
635
+ url: `${this.config.baseURL}/api/v1/services/aigc/video-generation/video-synthesis`,
636
+ headers: combineHeaders(this.config.headers(), options.headers, { "X-DashScope-Async": "enable" }),
637
+ body: {
638
+ model: this.modelId,
639
+ input,
640
+ parameters
641
+ },
642
+ successfulResponseHandler: createJsonResponseHandler(createTaskSchema),
643
+ failedResponseHandler: nativeFailedHandler,
644
+ abortSignal: options.abortSignal,
645
+ fetch: this.config.fetch
646
+ });
647
+ const taskId = createResponse.output?.task_id;
648
+ if (!taskId) throw new AISDKError({
649
+ name: "DASHSCOPE_VIDEO_ERROR",
650
+ message: `No task_id returned. Response: ${JSON.stringify(createResponse)}`
651
+ });
652
+ const pollInterval = dsOptions?.pollIntervalMs ?? 5e3;
653
+ const pollTimeout = dsOptions?.pollTimeoutMs ?? 6e5;
654
+ const startTime = Date.now();
655
+ while (true) {
656
+ await delay(pollInterval, { abortSignal: options.abortSignal });
657
+ if (Date.now() - startTime > pollTimeout) throw new AISDKError({
658
+ name: "DASHSCOPE_VIDEO_TIMEOUT",
659
+ message: `Video generation timed out after ${pollTimeout}ms`
660
+ });
661
+ const { value: status, responseHeaders } = await getFromApi({
662
+ url: `${this.config.baseURL}/api/v1/tasks/${taskId}`,
663
+ headers: combineHeaders(this.config.headers(), options.headers),
664
+ successfulResponseHandler: createJsonResponseHandler(taskStatusSchema),
665
+ failedResponseHandler: nativeFailedHandler,
666
+ abortSignal: options.abortSignal,
667
+ fetch: this.config.fetch
668
+ });
669
+ const taskStatus = status.output?.task_status;
670
+ if (taskStatus === "SUCCEEDED") {
671
+ const videoUrl = status.output?.video_url;
672
+ if (!videoUrl) throw new AISDKError({
673
+ name: "DASHSCOPE_VIDEO_ERROR",
674
+ message: `No video URL in response. Task ID: ${taskId}`
675
+ });
676
+ return {
677
+ videos: [{
678
+ type: "url",
679
+ url: videoUrl,
680
+ mediaType: "video/mp4"
681
+ }],
682
+ warnings,
683
+ response: {
684
+ timestamp: /* @__PURE__ */ new Date(),
685
+ modelId: this.modelId,
686
+ headers: responseHeaders
687
+ }
688
+ };
689
+ }
690
+ if (taskStatus === "FAILED" || taskStatus === "CANCELED") throw new AISDKError({
691
+ name: "DASHSCOPE_VIDEO_FAILED",
692
+ message: `Video generation ${taskStatus.toLowerCase()}. ${status.output?.message ?? ""}`
693
+ });
694
+ }
198
695
  }
199
696
  };
200
697
  //#endregion
@@ -397,14 +894,7 @@ var DashScopeChatLanguageModel = class {
397
894
  ...options.presencePenalty != null && { presence_penalty: options.presencePenalty },
398
895
  ...options.stopSequences?.length && { stop: options.stopSequences },
399
896
  ...options.seed != null && { seed: options.seed },
400
- ...options.responseFormat?.type === "json" && { response_format: options.responseFormat.schema != null ? {
401
- type: "json_schema",
402
- json_schema: {
403
- schema: options.responseFormat.schema,
404
- name: options.responseFormat.name ?? "response",
405
- description: options.responseFormat.description
406
- }
407
- } : { type: "json_object" } },
897
+ ...options.responseFormat?.type === "json" && { response_format: { type: "json_object" } },
408
898
  ...apiTools != null && {
409
899
  tools: apiTools,
410
900
  tool_choice: toolChoice
@@ -422,7 +912,7 @@ var DashScopeChatLanguageModel = class {
422
912
  async doGenerate(options) {
423
913
  const { args, warnings } = await this.getArgs(options);
424
914
  const { responseHeaders, value: response } = await postJsonToApi({
425
- url: `${this.config.baseURL}/chat/completions`,
915
+ url: `${this.config.baseURL}/compatible-mode/v1/chat/completions`,
426
916
  headers: combineHeaders(this.config.headers(), options.headers),
427
917
  body: args,
428
918
  failedResponseHandler,
@@ -468,7 +958,7 @@ var DashScopeChatLanguageModel = class {
468
958
  stream: true
469
959
  };
470
960
  const { responseHeaders, value: response } = await postJsonToApi({
471
- url: `${this.config.baseURL}/chat/completions`,
961
+ url: `${this.config.baseURL}/compatible-mode/v1/chat/completions`,
472
962
  headers: combineHeaders(this.config.headers(), options.headers),
473
963
  body,
474
964
  failedResponseHandler,
@@ -1067,7 +1557,7 @@ var DashScopeResponsesLanguageModel = class {
1067
1557
  async doGenerate(options) {
1068
1558
  const { args: body, warnings } = await this.getArgs(options);
1069
1559
  const { responseHeaders, value: response } = await postJsonToApi({
1070
- url: `${this.config.baseURL}/responses`,
1560
+ url: `${this.config.baseURL}/compatible-mode/v1/responses`,
1071
1561
  headers: combineHeaders(this.config.headers(), options.headers),
1072
1562
  body,
1073
1563
  failedResponseHandler,
@@ -1103,7 +1593,7 @@ var DashScopeResponsesLanguageModel = class {
1103
1593
  async doStream(options) {
1104
1594
  const { args: body, warnings } = await this.getArgs(options);
1105
1595
  const { responseHeaders, value: response } = await postJsonToApi({
1106
- url: `${this.config.baseURL}/responses`,
1596
+ url: `${this.config.baseURL}/compatible-mode/v1/responses`,
1107
1597
  headers: combineHeaders(this.config.headers(), options.headers),
1108
1598
  body: {
1109
1599
  ...body,
@@ -1212,10 +1702,9 @@ var DashScopeResponsesLanguageModel = class {
1212
1702
  //#endregion
1213
1703
  //#region src/provider.ts
1214
1704
  function createDashScope(options = {}) {
1215
- const { region = "beijing", workspaceId, baseURL: explicitBaseURL, videoBaseURL: _explicitVideoBaseURL, includeUsage, ...rest } = options;
1216
- const regionUrls = DASHSCOPE_REGION_BASE_URLS[region];
1217
- const baseURL = (explicitBaseURL ?? regionUrls.baseURL).replace("{workspaceId}", workspaceId ?? "");
1705
+ const { region = "beijing", workspaceId, baseURL: explicitBaseURL, includeUsage, ...rest } = options;
1218
1706
  if (region === "germany" && !explicitBaseURL && !workspaceId) throw new Error("workspaceId is required when region is 'germany'. See https://help.aliyun.com/zh/model-studio/obtain-the-app-id-and-workspace-id");
1707
+ const baseURL = (explicitBaseURL ?? DASHSCOPE_REGION_URLS[region]).replace("{workspaceId}", workspaceId ?? "");
1219
1708
  const apiKey = rest.apiKey ?? process.env.DASHSCOPE_API_KEY;
1220
1709
  const getHeaders = () => {
1221
1710
  const headers = {};
@@ -1223,32 +1712,53 @@ function createDashScope(options = {}) {
1223
1712
  if (rest.headers) Object.assign(headers, rest.headers);
1224
1713
  return headers;
1225
1714
  };
1226
- const chatConfig = {
1715
+ const baseConfig = {
1227
1716
  provider: "dashscope",
1228
1717
  baseURL,
1229
1718
  headers: getHeaders,
1230
- fetch: rest.fetch,
1231
- includeUsage
1719
+ fetch: rest.fetch
1232
1720
  };
1233
- const createChatModel = (modelId) => new DashScopeChatLanguageModel(modelId, chatConfig);
1234
- const createEmbeddingModel = (modelId) => new DashScopeEmbeddingModel(modelId, chatConfig);
1721
+ const createChatModel = (modelId) => new DashScopeChatLanguageModel(modelId, {
1722
+ ...baseConfig,
1723
+ includeUsage
1724
+ });
1725
+ const createEmbeddingModel = (modelId) => new DashScopeEmbeddingModel(modelId, {
1726
+ ...baseConfig,
1727
+ includeUsage
1728
+ });
1235
1729
  const createRerankingModel = (modelId) => new DashScopeRerankingModel(modelId, {
1236
- provider: "dashscope.rerank",
1237
- baseURL,
1238
- headers: getHeaders,
1239
- fetch: rest.fetch
1730
+ ...baseConfig,
1731
+ provider: "dashscope.rerank"
1240
1732
  });
1241
1733
  const createResponsesModel = (modelId) => new DashScopeResponsesLanguageModel(modelId, {
1242
- provider: "dashscope.responses",
1243
- baseURL,
1244
- headers: getHeaders,
1245
- fetch: rest.fetch
1734
+ ...baseConfig,
1735
+ provider: "dashscope.responses"
1246
1736
  });
1247
1737
  const responses = Object.assign(createResponsesModel, { tools: responsesTools });
1738
+ const createImageModel = (modelId) => new DashScopeImageModel(modelId, {
1739
+ ...baseConfig,
1740
+ provider: "dashscope.image"
1741
+ });
1742
+ const createVideoModel = (modelId) => new DashScopeVideoModel(modelId, {
1743
+ ...baseConfig,
1744
+ provider: "dashscope.video"
1745
+ });
1746
+ const createSpeechModel = (modelId) => new DashScopeSpeechModel(modelId, {
1747
+ ...baseConfig,
1748
+ provider: "dashscope.speech"
1749
+ });
1750
+ const createTranscriptionModel = (modelId) => new DashScopeTranscriptionModel(modelId, {
1751
+ ...baseConfig,
1752
+ provider: "dashscope.transcription"
1753
+ });
1248
1754
  return Object.assign(createChatModel, {
1249
1755
  languageModel: createChatModel,
1250
1756
  embeddingModel: createEmbeddingModel,
1251
1757
  rerankingModel: createRerankingModel,
1758
+ imageModel: createImageModel,
1759
+ videoModel: createVideoModel,
1760
+ speechModel: createSpeechModel,
1761
+ transcriptionModel: createTranscriptionModel,
1252
1762
  chatOptions: (chatOpts) => ({ providerOptions: { dashscope: chatOpts } }),
1253
1763
  responsesOptions: (responsesOpts) => ({ providerOptions: { dashscope: responsesOpts } }),
1254
1764
  responses
@@ -1258,4 +1768,4 @@ function createDashScope(options = {}) {
1258
1768
  //#region src/index.ts
1259
1769
  const dashscope = createDashScope();
1260
1770
  //#endregion
1261
- export { DASHSCOPE_REGION_BASE_URLS, DashScopeEmbeddingModel, DashScopeRerankingModel, createDashScope, dashscope, responsesTools };
1771
+ export { DASHSCOPE_REGION_URLS, DashScopeEmbeddingModel, DashScopeImageModel, DashScopeRerankingModel, DashScopeSpeechModel, DashScopeTranscriptionModel, DashScopeVideoModel, createDashScope, dashscope, responsesTools };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@agentor/dashscope",
3
- "version": "0.0.1",
3
+ "version": "0.0.2",
4
4
  "description": "AI SDK provider for Alibaba Cloud DashScope (Bailian) API",
5
5
  "keywords": [
6
6
  "ai",