@agentor/dashscope 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +190 -0
- package/dist/index.d.mts +162 -7
- package/dist/index.mjs +558 -48
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -12,6 +12,10 @@
|
|
|
12
12
|
- **Responses API** - `/responses` endpoint with built-in tools support
|
|
13
13
|
- **Embedding** - Text vectorization via OpenAI-compatible `/embeddings` endpoint
|
|
14
14
|
- **Reranking** - Document reranking via `/reranks` endpoint
|
|
15
|
+
- **Image Generation** - Text-to-image via multimodal generation endpoint
|
|
16
|
+
- **Video Generation** - Text-to-video and image-to-video with async polling
|
|
17
|
+
- **Speech Synthesis** - Text-to-speech for CosyVoice and Qwen-TTS models
|
|
18
|
+
- **Transcription** - Speech-to-text for short and long audio
|
|
15
19
|
- **Built-in Tools** - Web search, code interpreter, web extractor, file search, image search, MCP integration
|
|
16
20
|
- **Thinking Mode** - Enable reasoning/thinking with configurable budget
|
|
17
21
|
- **Multi-region** - Beijing, Singapore, US, Germany regions
|
|
@@ -318,6 +322,113 @@ const { ranking } = await rerank({
|
|
|
318
322
|
});
|
|
319
323
|
```
|
|
320
324
|
|
|
325
|
+
## Image Generation
|
|
326
|
+
|
|
327
|
+
```typescript
|
|
328
|
+
import { generateImage } from "ai";
|
|
329
|
+
|
|
330
|
+
const { images } = await generateImage({
|
|
331
|
+
model: dashscope.imageModel("qwen-image-plus"),
|
|
332
|
+
prompt: "A cute cat sitting on a windowsill with sunlight streaming in",
|
|
333
|
+
providerOptions: {
|
|
334
|
+
dashscope: {
|
|
335
|
+
size: "1024*1024",
|
|
336
|
+
},
|
|
337
|
+
},
|
|
338
|
+
});
|
|
339
|
+
|
|
340
|
+
// images[0].uint8Array — raw image data
|
|
341
|
+
// images[0].base64 — base64 encoded image
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
## Video Generation
|
|
345
|
+
|
|
346
|
+
```typescript
|
|
347
|
+
import { experimental_generateVideo as generateVideo } from "ai";
|
|
348
|
+
|
|
349
|
+
// Text-to-video
|
|
350
|
+
const { videos } = await generateVideo({
|
|
351
|
+
model: dashscope.videoModel("wan2.6-t2v"),
|
|
352
|
+
prompt: "A golden retriever running through a field of sunflowers",
|
|
353
|
+
providerOptions: {
|
|
354
|
+
dashscope: {
|
|
355
|
+
size: "1280*720",
|
|
356
|
+
duration: 5,
|
|
357
|
+
},
|
|
358
|
+
},
|
|
359
|
+
});
|
|
360
|
+
```
|
|
361
|
+
|
|
362
|
+
### Image-to-Video
|
|
363
|
+
|
|
364
|
+
Use a model ID containing `-i2v` for image-to-video mode:
|
|
365
|
+
|
|
366
|
+
```typescript
|
|
367
|
+
const { videos } = await generateVideo({
|
|
368
|
+
model: dashscope.videoModel("wan2.6-i2v-turbo"),
|
|
369
|
+
prompt: "The cat stretches and walks away",
|
|
370
|
+
providerOptions: {
|
|
371
|
+
dashscope: {
|
|
372
|
+
resolution: "720P",
|
|
373
|
+
},
|
|
374
|
+
},
|
|
375
|
+
image: "data:image/png;base64,...", // or a URL string
|
|
376
|
+
});
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
## Speech Synthesis (TTS)
|
|
380
|
+
|
|
381
|
+
```typescript
|
|
382
|
+
import { experimental_generateSpeech as generateSpeech } from "ai";
|
|
383
|
+
import { writeFileSync } from "fs";
|
|
384
|
+
|
|
385
|
+
const { audio } = await generateSpeech({
|
|
386
|
+
model: dashscope.speechModel("cosyvoice-v3-flash"),
|
|
387
|
+
text: "Hello, welcome to Agentor.",
|
|
388
|
+
providerOptions: {
|
|
389
|
+
dashscope: {
|
|
390
|
+
voice: "longanyang",
|
|
391
|
+
format: "wav",
|
|
392
|
+
sampleRate: 24000,
|
|
393
|
+
},
|
|
394
|
+
},
|
|
395
|
+
});
|
|
396
|
+
|
|
397
|
+
writeFileSync("output.wav", audio.uint8Array);
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
## Transcription (Speech-to-Text)
|
|
401
|
+
|
|
402
|
+
### Short Audio (Sync)
|
|
403
|
+
|
|
404
|
+
```typescript
|
|
405
|
+
import { experimental_transcribe as transcribe } from "ai";
|
|
406
|
+
|
|
407
|
+
const { text } = await transcribe({
|
|
408
|
+
model: dashscope.transcriptionModel("qwen3-asr-flash"),
|
|
409
|
+
audio: new URL("https://example.com/audio.mp3"),
|
|
410
|
+
});
|
|
411
|
+
|
|
412
|
+
console.log(text);
|
|
413
|
+
```
|
|
414
|
+
|
|
415
|
+
### Long Audio (Async)
|
|
416
|
+
|
|
417
|
+
For async models, provide the audio URL via `providerOptions`:
|
|
418
|
+
|
|
419
|
+
```typescript
|
|
420
|
+
const { text, segments } = await transcribe({
|
|
421
|
+
model: dashscope.transcriptionModel("qwen3-asr-flash-filetrans"),
|
|
422
|
+
audio: new Uint8Array(0), // placeholder
|
|
423
|
+
providerOptions: {
|
|
424
|
+
dashscope: {
|
|
425
|
+
fileUrl: "https://example.com/long-audio.mp3",
|
|
426
|
+
enableWords: true,
|
|
427
|
+
},
|
|
428
|
+
},
|
|
429
|
+
});
|
|
430
|
+
```
|
|
431
|
+
|
|
321
432
|
## Provider Configuration
|
|
322
433
|
|
|
323
434
|
```typescript
|
|
@@ -332,6 +443,85 @@ const dashscope = createDashScope({
|
|
|
332
443
|
});
|
|
333
444
|
```
|
|
334
445
|
|
|
446
|
+
## Available Models
|
|
447
|
+
|
|
448
|
+
> For the complete and up-to-date model list, see [Alibaba Cloud Model Studio](https://help.aliyun.com/zh/model-studio/models).
|
|
449
|
+
|
|
450
|
+
### Language Models (Chat)
|
|
451
|
+
|
|
452
|
+
| Model | Description |
|
|
453
|
+
| --------------------- | ----------------------------------------- |
|
|
454
|
+
| `qwen3.6-max-preview` | Flagship model with strongest reasoning |
|
|
455
|
+
| `qwen3.6-plus` | Recommended, balanced capability and cost |
|
|
456
|
+
| `qwen3.6-flash` | Fastest, ultra-low cost |
|
|
457
|
+
| `qwen3.5-plus` | Enhanced reasoning model |
|
|
458
|
+
| `qwen3.5-flash` | Fast and efficient model |
|
|
459
|
+
| `qwen3-coder-plus` | Code-optimized model |
|
|
460
|
+
| `qwen3-coder-flash` | Fast code model |
|
|
461
|
+
| `qwq-plus` | Dedicated reasoning model |
|
|
462
|
+
| `deepseek-v4-pro` | DeepSeek V4 Pro |
|
|
463
|
+
| `deepseek-v4-flash` | DeepSeek V4 Flash |
|
|
464
|
+
| `kimi-k2.6` | Moonshot Kimi K2.6 |
|
|
465
|
+
| `glm-5.1` | Zhipu GLM 5.1 |
|
|
466
|
+
|
|
467
|
+
### Embedding Models
|
|
468
|
+
|
|
469
|
+
| Model | Dimensions | Description |
|
|
470
|
+
| ------------------------------ | ----------------------- | ----------------------------------- |
|
|
471
|
+
| `text-embedding-v4` | 64-2048 (default 1024) | Text embedding for search/RAG |
|
|
472
|
+
| `text-embedding-v3` | 512-1024 (default 1024) | Legacy text embedding |
|
|
473
|
+
| `qwen3-vl-embedding` | 256-2560 (default 2560) | Multimodal (text + image) embedding |
|
|
474
|
+
| `tongyi-embedding-vision-plus` | 64-1152 (default 1152) | Cross-modal search embedding |
|
|
475
|
+
|
|
476
|
+
### Reranking Models
|
|
477
|
+
|
|
478
|
+
| Model | Description |
|
|
479
|
+
| ----------------- | --------------------------------------- |
|
|
480
|
+
| `qwen3-rerank` | Text reranking, 100+ languages |
|
|
481
|
+
| `qwen3-vl-rerank` | Multimodal reranking (text/image/video) |
|
|
482
|
+
| `gte-rerank-v2` | Semantic text reranking |
|
|
483
|
+
|
|
484
|
+
### Image Models
|
|
485
|
+
|
|
486
|
+
| Model | Description |
|
|
487
|
+
| -------------------- | -------------------------------------------- |
|
|
488
|
+
| `wan2.7-image-pro` | Latest Wan image generation, up to 4096x4096 |
|
|
489
|
+
| `wan2.7-image` | Wan image generation, up to 2048x2048 |
|
|
490
|
+
| `qwen-image-2.0-pro` | Qwen image generation and editing |
|
|
491
|
+
| `qwen-image-max` | High quality image generation |
|
|
492
|
+
| `qwen-image-plus` | Enhanced image generation |
|
|
493
|
+
| `z-image-turbo` | Fast image generation |
|
|
494
|
+
|
|
495
|
+
### Video Models
|
|
496
|
+
|
|
497
|
+
| Model | Mode | Description |
|
|
498
|
+
| ------------------ | ---- | ------------------------------------- |
|
|
499
|
+
| `wan2.7-t2v` | T2V | Recommended text-to-video with audio |
|
|
500
|
+
| `wan2.6-t2v` | T2V | Text-to-video with audio |
|
|
501
|
+
| `wan2.2-t2v-plus` | T2V | Text-to-video (silent) |
|
|
502
|
+
| `wan2.7-i2v` | I2V | Recommended image-to-video with audio |
|
|
503
|
+
| `wan2.6-i2v` | I2V | Image-to-video with audio |
|
|
504
|
+
| `wan2.6-i2v-flash` | I2V | Fast image-to-video |
|
|
505
|
+
|
|
506
|
+
### Speech Models (TTS)
|
|
507
|
+
|
|
508
|
+
| Model | Description |
|
|
509
|
+
| -------------------------- | ---------------------------------- |
|
|
510
|
+
| `cosyvoice-v3.5-plus` | Latest flagship, best quality |
|
|
511
|
+
| `cosyvoice-v3.5-flash` | Latest lightweight |
|
|
512
|
+
| `cosyvoice-v3-plus` | V3 enhanced |
|
|
513
|
+
| `cosyvoice-v3-flash` | V3 fast synthesis |
|
|
514
|
+
| `qwen3-tts-flash-realtime` | Qwen TTS with 17 human-like voices |
|
|
515
|
+
|
|
516
|
+
### Transcription Models (STT)
|
|
517
|
+
|
|
518
|
+
| Model | Mode | Description |
|
|
519
|
+
| --------------------------- | ----- | ------------------------------ |
|
|
520
|
+
| `qwen3-asr-flash` | Sync | Short audio (up to 5 min) |
|
|
521
|
+
| `qwen3-asr-flash-filetrans` | Async | Long audio (up to 12 hours) |
|
|
522
|
+
| `fun-asr` | Async | Speaker diarization, hot words |
|
|
523
|
+
| `paraformer-v2` | Async | Legacy async transcription |
|
|
524
|
+
|
|
335
525
|
## License
|
|
336
526
|
|
|
337
527
|
MIT © [Demo Macro](https://www.demomacro.com/)
|
package/dist/index.d.mts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { OpenAICompatibleEmbeddingModel } from "@ai-sdk/openai-compatible";
|
|
2
2
|
import * as _$_ai_sdk_provider_utils0 from "@ai-sdk/provider-utils";
|
|
3
3
|
import { FetchFunction } from "@ai-sdk/provider-utils";
|
|
4
|
-
import { EmbeddingModelV3, LanguageModelV3, RerankingModelV3, RerankingModelV3CallOptions, SharedV3Warning } from "@ai-sdk/provider";
|
|
4
|
+
import { EmbeddingModelV3, Experimental_VideoModelV3, Experimental_VideoModelV3CallOptions, ImageModelV3, ImageModelV3CallOptions, LanguageModelV3, RerankingModelV3, RerankingModelV3CallOptions, SharedV3Warning, SpeechModelV3, SpeechModelV3CallOptions, TranscriptionModelV3, TranscriptionModelV3CallOptions } from "@ai-sdk/provider";
|
|
5
5
|
|
|
6
6
|
//#region src/tools.d.ts
|
|
7
7
|
declare const webSearchToolFactory: _$_ai_sdk_provider_utils0.ProviderToolFactoryWithOutputSchema<Record<string, never>, {
|
|
@@ -132,16 +132,12 @@ type DashScopeResponsesTools = typeof responsesTools;
|
|
|
132
132
|
//#endregion
|
|
133
133
|
//#region src/types.d.ts
|
|
134
134
|
type DashScopeRegion = "beijing" | "singapore" | "us" | "germany";
|
|
135
|
-
declare const
|
|
136
|
-
baseURL: string;
|
|
137
|
-
videoBaseURL: string;
|
|
138
|
-
}>;
|
|
135
|
+
declare const DASHSCOPE_REGION_URLS: Record<DashScopeRegion, string>;
|
|
139
136
|
interface DashScopeProviderSettings {
|
|
140
137
|
apiKey?: string;
|
|
141
138
|
region?: DashScopeRegion;
|
|
142
139
|
workspaceId?: string;
|
|
143
140
|
baseURL?: string;
|
|
144
|
-
videoBaseURL?: string;
|
|
145
141
|
headers?: Record<string, string>;
|
|
146
142
|
fetch?: FetchFunction;
|
|
147
143
|
includeUsage?: boolean;
|
|
@@ -184,6 +180,10 @@ interface DashScopeProvider {
|
|
|
184
180
|
languageModel(modelId: string): LanguageModelV3;
|
|
185
181
|
embeddingModel(modelId: string): EmbeddingModelV3;
|
|
186
182
|
rerankingModel(modelId: string): RerankingModelV3;
|
|
183
|
+
imageModel(modelId: string): ImageModelV3;
|
|
184
|
+
videoModel(modelId: string): Experimental_VideoModelV3;
|
|
185
|
+
speechModel(modelId: string): SpeechModelV3;
|
|
186
|
+
transcriptionModel(modelId: string): TranscriptionModelV3;
|
|
187
187
|
chatOptions: (options: DashScopeChatOptions) => {
|
|
188
188
|
providerOptions: {
|
|
189
189
|
dashscope: DashScopeChatOptions;
|
|
@@ -215,6 +215,37 @@ declare class DashScopeEmbeddingModel extends OpenAICompatibleEmbeddingModel {
|
|
|
215
215
|
constructor(modelId: string, config: DashScopeConfig);
|
|
216
216
|
}
|
|
217
217
|
//#endregion
|
|
218
|
+
//#region src/image.d.ts
|
|
219
|
+
interface DashScopeImageOptions {
|
|
220
|
+
/** Output image size, e.g. "2048*2048", "1024*1024", "1K", "2K". */
|
|
221
|
+
size?: string;
|
|
222
|
+
/** Negative prompt describing what to avoid. */
|
|
223
|
+
negativePrompt?: string;
|
|
224
|
+
/** Enable prompt extension/rewriting. Default depends on model. */
|
|
225
|
+
promptExtend?: boolean;
|
|
226
|
+
/** Add watermark. Default false. */
|
|
227
|
+
watermark?: boolean;
|
|
228
|
+
/** Number of images to generate. Default 1. */
|
|
229
|
+
n?: number;
|
|
230
|
+
}
|
|
231
|
+
declare class DashScopeImageModel implements ImageModelV3 {
|
|
232
|
+
readonly specificationVersion: "v3";
|
|
233
|
+
readonly modelId: string;
|
|
234
|
+
private readonly config;
|
|
235
|
+
constructor(modelId: string, config: DashScopeConfig);
|
|
236
|
+
get provider(): string;
|
|
237
|
+
get maxImagesPerCall(): number | undefined;
|
|
238
|
+
doGenerate(options: ImageModelV3CallOptions): Promise<{
|
|
239
|
+
images: string[];
|
|
240
|
+
warnings: SharedV3Warning[];
|
|
241
|
+
response: {
|
|
242
|
+
timestamp: Date;
|
|
243
|
+
modelId: string;
|
|
244
|
+
headers: Record<string, string> | undefined;
|
|
245
|
+
};
|
|
246
|
+
}>;
|
|
247
|
+
}
|
|
248
|
+
//#endregion
|
|
218
249
|
//#region src/rerank.d.ts
|
|
219
250
|
interface DashScopeRerankOptions {
|
|
220
251
|
/** English instruction to guide the reranking strategy. */
|
|
@@ -240,10 +271,134 @@ declare class DashScopeRerankingModel implements RerankingModelV3 {
|
|
|
240
271
|
}>;
|
|
241
272
|
}
|
|
242
273
|
//#endregion
|
|
274
|
+
//#region src/speech.d.ts
|
|
275
|
+
interface DashScopeSpeechOptions {
|
|
276
|
+
/** Voice name. Model-specific, e.g. "longanyang" for CosyVoice, "Cherry" for Qwen-TTS. */
|
|
277
|
+
voice?: string;
|
|
278
|
+
/** Output audio format: "wav", "mp3", "pcm". Default depends on model. */
|
|
279
|
+
format?: string;
|
|
280
|
+
/** Sample rate. Default depends on model. */
|
|
281
|
+
sampleRate?: number;
|
|
282
|
+
/** Language type for Qwen-TTS: "Chinese" | "English" | "Japanese" | etc. */
|
|
283
|
+
languageType?: string;
|
|
284
|
+
/** Speaking speed. 0.5-2.0, default 1.0. */
|
|
285
|
+
speed?: number;
|
|
286
|
+
/** Volume. 0.5-2.0, default 1.0. */
|
|
287
|
+
volume?: number;
|
|
288
|
+
/** Pitch. -12 to 12, default 0. */
|
|
289
|
+
pitch?: number;
|
|
290
|
+
}
|
|
291
|
+
declare class DashScopeSpeechModel implements SpeechModelV3 {
|
|
292
|
+
readonly specificationVersion: "v3";
|
|
293
|
+
readonly modelId: string;
|
|
294
|
+
private readonly config;
|
|
295
|
+
constructor(modelId: string, config: DashScopeConfig);
|
|
296
|
+
get provider(): string;
|
|
297
|
+
doGenerate(options: SpeechModelV3CallOptions): Promise<{
|
|
298
|
+
audio: Uint8Array<ArrayBuffer>;
|
|
299
|
+
warnings: SharedV3Warning[];
|
|
300
|
+
request: {
|
|
301
|
+
body: Record<string, unknown>;
|
|
302
|
+
};
|
|
303
|
+
response: {
|
|
304
|
+
timestamp: Date;
|
|
305
|
+
modelId: string;
|
|
306
|
+
headers: Record<string, string> | undefined;
|
|
307
|
+
};
|
|
308
|
+
}>;
|
|
309
|
+
}
|
|
310
|
+
//#endregion
|
|
311
|
+
//#region src/transcription.d.ts
|
|
312
|
+
interface DashScopeTranscriptionOptions {
|
|
313
|
+
/**
|
|
314
|
+
* Publicly accessible audio file URL for async transcription.
|
|
315
|
+
* Required for async models (filetrans, fun-asr, paraformer) when using long audio.
|
|
316
|
+
*/
|
|
317
|
+
fileUrl?: string;
|
|
318
|
+
/** Language hint(s), e.g. ["zh", "en"]. */
|
|
319
|
+
languageHints?: string[];
|
|
320
|
+
/** Enable inverse text normalization (convert spoken numbers/dates to written form). */
|
|
321
|
+
enableItn?: boolean;
|
|
322
|
+
/** Enable word-level timestamps. */
|
|
323
|
+
enableWords?: boolean;
|
|
324
|
+
/** Channel IDs to transcribe. Default [0]. */
|
|
325
|
+
channelId?: number[];
|
|
326
|
+
/** Polling interval in ms. Default 5000. (async mode only) */
|
|
327
|
+
pollIntervalMs?: number;
|
|
328
|
+
/** Polling timeout in ms. Default 600000. (async mode only) */
|
|
329
|
+
pollTimeoutMs?: number;
|
|
330
|
+
}
|
|
331
|
+
declare class DashScopeTranscriptionModel implements TranscriptionModelV3 {
|
|
332
|
+
readonly specificationVersion: "v3";
|
|
333
|
+
readonly modelId: string;
|
|
334
|
+
private readonly config;
|
|
335
|
+
constructor(modelId: string, config: DashScopeConfig);
|
|
336
|
+
get provider(): string;
|
|
337
|
+
doGenerate(options: TranscriptionModelV3CallOptions): Promise<{
|
|
338
|
+
text: string;
|
|
339
|
+
segments: {
|
|
340
|
+
text: string;
|
|
341
|
+
startSecond: number;
|
|
342
|
+
endSecond: number;
|
|
343
|
+
}[];
|
|
344
|
+
language: undefined;
|
|
345
|
+
durationInSeconds: undefined;
|
|
346
|
+
warnings: SharedV3Warning[];
|
|
347
|
+
response: {
|
|
348
|
+
timestamp: Date;
|
|
349
|
+
modelId: string;
|
|
350
|
+
headers: Record<string, string> | undefined;
|
|
351
|
+
};
|
|
352
|
+
}>;
|
|
353
|
+
private doSync;
|
|
354
|
+
private doAsync;
|
|
355
|
+
}
|
|
356
|
+
//#endregion
|
|
357
|
+
//#region src/video.d.ts
|
|
358
|
+
interface DashScopeVideoOptions {
|
|
359
|
+
/** Negative prompt. */
|
|
360
|
+
negativePrompt?: string;
|
|
361
|
+
/** Enable prompt extension. */
|
|
362
|
+
promptExtend?: boolean;
|
|
363
|
+
/** Add watermark. Default false. */
|
|
364
|
+
watermark?: boolean;
|
|
365
|
+
/** Resolution for I2V: "720P" | "1080P". For T2V: use size "WIDTH*HEIGHT". */
|
|
366
|
+
resolution?: string;
|
|
367
|
+
/** Size in "WIDTH*HEIGHT" format (T2V/R2V). */
|
|
368
|
+
size?: string;
|
|
369
|
+
/** Video duration in seconds. */
|
|
370
|
+
duration?: number;
|
|
371
|
+
/** Polling interval in ms. Default 5000. */
|
|
372
|
+
pollIntervalMs?: number;
|
|
373
|
+
/** Polling timeout in ms. Default 600000. */
|
|
374
|
+
pollTimeoutMs?: number;
|
|
375
|
+
}
|
|
376
|
+
declare class DashScopeVideoModel implements Experimental_VideoModelV3 {
|
|
377
|
+
readonly specificationVersion: "v3";
|
|
378
|
+
readonly modelId: string;
|
|
379
|
+
private readonly config;
|
|
380
|
+
constructor(modelId: string, config: DashScopeConfig);
|
|
381
|
+
get provider(): string;
|
|
382
|
+
get maxVideosPerCall(): number | undefined;
|
|
383
|
+
doGenerate(options: Experimental_VideoModelV3CallOptions): Promise<{
|
|
384
|
+
videos: {
|
|
385
|
+
type: "url";
|
|
386
|
+
url: string;
|
|
387
|
+
mediaType: string;
|
|
388
|
+
}[];
|
|
389
|
+
warnings: SharedV3Warning[];
|
|
390
|
+
response: {
|
|
391
|
+
timestamp: Date;
|
|
392
|
+
modelId: string;
|
|
393
|
+
headers: Record<string, string> | undefined;
|
|
394
|
+
};
|
|
395
|
+
}>;
|
|
396
|
+
}
|
|
397
|
+
//#endregion
|
|
243
398
|
//#region src/provider.d.ts
|
|
244
399
|
declare function createDashScope(options?: DashScopeProviderSettings): DashScopeProvider;
|
|
245
400
|
//#endregion
|
|
246
401
|
//#region src/index.d.ts
|
|
247
402
|
declare const dashscope: DashScopeProvider;
|
|
248
403
|
//#endregion
|
|
249
|
-
export {
|
|
404
|
+
export { DASHSCOPE_REGION_URLS, DashScopeChatOptions, DashScopeEmbeddingModel, DashScopeEmbeddingOptions, DashScopeImageModel, DashScopeImageOptions, DashScopeProvider, DashScopeProviderSettings, DashScopeRegion, DashScopeRerankOptions, DashScopeRerankingModel, DashScopeResponsesNamespace, DashScopeResponsesOptions, DashScopeResponsesTools, DashScopeSpeechModel, DashScopeSpeechOptions, DashScopeTranscriptionModel, DashScopeTranscriptionOptions, DashScopeVideoModel, DashScopeVideoOptions, createDashScope, dashscope, responsesTools };
|
package/dist/index.mjs
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
import { OpenAICompatibleEmbeddingModel } from "@ai-sdk/openai-compatible";
|
|
2
|
-
import { combineHeaders, convertToBase64, createEventSourceResponseHandler, createJsonErrorResponseHandler, createJsonResponseHandler, createProviderToolFactoryWithOutputSchema, generateId, isParsableJson, lazySchema, parseProviderOptions, postJsonToApi, zodSchema } from "@ai-sdk/provider-utils";
|
|
2
|
+
import { combineHeaders, convertToBase64, createEventSourceResponseHandler, createJsonErrorResponseHandler, createJsonResponseHandler, createProviderToolFactoryWithOutputSchema, delay, generateId, getFromApi, isParsableJson, lazySchema, parseProviderOptions, postJsonToApi, zodSchema } from "@ai-sdk/provider-utils";
|
|
3
3
|
import { z } from "zod/v4";
|
|
4
|
+
import { AISDKError } from "@ai-sdk/provider";
|
|
4
5
|
import { convertOpenAICompatibleChatUsage, getResponseMetadata, mapOpenAICompatibleFinishReason, prepareTools } from "@ai-sdk/openai-compatible/internal";
|
|
5
6
|
//#region src/embedding.ts
|
|
6
7
|
var DashScopeEmbeddingModel = class extends OpenAICompatibleEmbeddingModel {
|
|
7
8
|
constructor(modelId, config) {
|
|
8
9
|
super(modelId, {
|
|
9
10
|
provider: config.provider,
|
|
10
|
-
url: () => `${config.baseURL}/embeddings`,
|
|
11
|
+
url: () => `${config.baseURL}/compatible-mode/v1/embeddings`,
|
|
11
12
|
headers: config.headers,
|
|
12
13
|
fetch: config.fetch
|
|
13
14
|
});
|
|
@@ -21,6 +22,14 @@ const failedResponseHandler = createJsonErrorResponseHandler({
|
|
|
21
22
|
}) })),
|
|
22
23
|
errorToMessage: (data) => data.error.message
|
|
23
24
|
});
|
|
25
|
+
const nativeFailedHandler = createJsonErrorResponseHandler({
|
|
26
|
+
errorSchema: zodSchema(z.object({
|
|
27
|
+
code: z.string().nullish(),
|
|
28
|
+
message: z.string(),
|
|
29
|
+
request_id: z.string().nullish()
|
|
30
|
+
})),
|
|
31
|
+
errorToMessage: (data) => data.message
|
|
32
|
+
});
|
|
24
33
|
function convertResponsesUsage(usage) {
|
|
25
34
|
if (!usage) return {
|
|
26
35
|
inputTokens: {
|
|
@@ -50,6 +59,90 @@ function convertResponsesUsage(usage) {
|
|
|
50
59
|
raw: usage
|
|
51
60
|
};
|
|
52
61
|
}
|
|
62
|
+
function uint8ArrayToBase64(data) {
|
|
63
|
+
let binary = "";
|
|
64
|
+
for (let i = 0; i < data.length; i++) binary += String.fromCharCode(data[i]);
|
|
65
|
+
return btoa(binary);
|
|
66
|
+
}
|
|
67
|
+
//#endregion
|
|
68
|
+
//#region src/image.ts
|
|
69
|
+
const imageOptionsSchema = z.object({
|
|
70
|
+
size: z.string().optional(),
|
|
71
|
+
negativePrompt: z.string().optional(),
|
|
72
|
+
promptExtend: z.boolean().optional(),
|
|
73
|
+
watermark: z.boolean().optional(),
|
|
74
|
+
n: z.number().optional()
|
|
75
|
+
});
|
|
76
|
+
const imageResponseSchema = zodSchema(z.object({
|
|
77
|
+
output: z.object({ choices: z.array(z.object({ message: z.object({ content: z.array(z.object({ image: z.string().optional() })) }) })).optional() }).nullish(),
|
|
78
|
+
usage: z.object({
|
|
79
|
+
image_count: z.number().optional(),
|
|
80
|
+
width: z.number().optional(),
|
|
81
|
+
height: z.number().optional()
|
|
82
|
+
}).nullish(),
|
|
83
|
+
request_id: z.string().nullish()
|
|
84
|
+
}));
|
|
85
|
+
var DashScopeImageModel = class {
|
|
86
|
+
specificationVersion = "v3";
|
|
87
|
+
modelId;
|
|
88
|
+
config;
|
|
89
|
+
constructor(modelId, config) {
|
|
90
|
+
this.modelId = modelId;
|
|
91
|
+
this.config = config;
|
|
92
|
+
}
|
|
93
|
+
get provider() {
|
|
94
|
+
return this.config.provider;
|
|
95
|
+
}
|
|
96
|
+
get maxImagesPerCall() {
|
|
97
|
+
return 1;
|
|
98
|
+
}
|
|
99
|
+
async doGenerate(options) {
|
|
100
|
+
const warnings = [];
|
|
101
|
+
const dsOptions = await parseProviderOptions({
|
|
102
|
+
provider: "dashscope",
|
|
103
|
+
providerOptions: options.providerOptions,
|
|
104
|
+
schema: imageOptionsSchema
|
|
105
|
+
});
|
|
106
|
+
const body = {
|
|
107
|
+
model: this.modelId,
|
|
108
|
+
input: { messages: [{
|
|
109
|
+
role: "user",
|
|
110
|
+
content: [{ text: options.prompt }]
|
|
111
|
+
}] },
|
|
112
|
+
parameters: {
|
|
113
|
+
...dsOptions?.size != null && { size: dsOptions.size },
|
|
114
|
+
...dsOptions?.negativePrompt != null && { negative_prompt: dsOptions.negativePrompt },
|
|
115
|
+
...dsOptions?.promptExtend != null && { prompt_extend: dsOptions.promptExtend },
|
|
116
|
+
...dsOptions?.watermark != null && { watermark: dsOptions.watermark },
|
|
117
|
+
...dsOptions?.n != null && { n: dsOptions.n }
|
|
118
|
+
}
|
|
119
|
+
};
|
|
120
|
+
const { responseHeaders, value: response } = await postJsonToApi({
|
|
121
|
+
url: `${this.config.baseURL}/api/v1/services/aigc/multimodal-generation/generation`,
|
|
122
|
+
headers: combineHeaders(this.config.headers(), options.headers),
|
|
123
|
+
body,
|
|
124
|
+
failedResponseHandler: nativeFailedHandler,
|
|
125
|
+
successfulResponseHandler: createJsonResponseHandler(imageResponseSchema),
|
|
126
|
+
abortSignal: options.abortSignal,
|
|
127
|
+
fetch: this.config.fetch
|
|
128
|
+
});
|
|
129
|
+
const imageUrls = response.output?.choices?.flatMap((c) => c.message.content.filter((p) => p.image != null).map((p) => p.image)) ?? [];
|
|
130
|
+
const images = [];
|
|
131
|
+
for (const url of imageUrls) {
|
|
132
|
+
const buffer = await (await (this.config.fetch ?? fetch)(url, { headers: this.config.headers() })).arrayBuffer();
|
|
133
|
+
images.push(uint8ArrayToBase64(new Uint8Array(buffer)));
|
|
134
|
+
}
|
|
135
|
+
return {
|
|
136
|
+
images,
|
|
137
|
+
warnings,
|
|
138
|
+
response: {
|
|
139
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
140
|
+
modelId: this.modelId,
|
|
141
|
+
headers: responseHeaders
|
|
142
|
+
}
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
};
|
|
53
146
|
//#endregion
|
|
54
147
|
//#region src/rerank.ts
|
|
55
148
|
const rerankResponseSchema = zodSchema(z.object({
|
|
@@ -81,7 +174,7 @@ var DashScopeRerankingModel = class {
|
|
|
81
174
|
...options.topN != null && { top_n: options.topN }
|
|
82
175
|
};
|
|
83
176
|
const { responseHeaders, value: response } = await postJsonToApi({
|
|
84
|
-
url: `${this.config.baseURL
|
|
177
|
+
url: `${this.config.baseURL}/compatible-api/v1/reranks`,
|
|
85
178
|
headers: combineHeaders(this.config.headers(), options.headers),
|
|
86
179
|
body,
|
|
87
180
|
failedResponseHandler,
|
|
@@ -104,6 +197,94 @@ var DashScopeRerankingModel = class {
|
|
|
104
197
|
}
|
|
105
198
|
};
|
|
106
199
|
//#endregion
|
|
200
|
+
//#region src/speech.ts
|
|
201
|
+
const speechOptionsSchema = z.object({
|
|
202
|
+
voice: z.string().optional(),
|
|
203
|
+
format: z.string().optional(),
|
|
204
|
+
sampleRate: z.number().optional(),
|
|
205
|
+
languageType: z.string().optional(),
|
|
206
|
+
speed: z.number().optional(),
|
|
207
|
+
volume: z.number().optional(),
|
|
208
|
+
pitch: z.number().optional()
|
|
209
|
+
});
|
|
210
|
+
const cosyvoiceResponseSchema = zodSchema(z.object({
|
|
211
|
+
output: z.object({ audio: z.object({ url: z.string().optional() }).nullish() }).nullish(),
|
|
212
|
+
request_id: z.string().nullish()
|
|
213
|
+
}));
|
|
214
|
+
var DashScopeSpeechModel = class {
|
|
215
|
+
specificationVersion = "v3";
|
|
216
|
+
modelId;
|
|
217
|
+
config;
|
|
218
|
+
constructor(modelId, config) {
|
|
219
|
+
this.modelId = modelId;
|
|
220
|
+
this.config = config;
|
|
221
|
+
}
|
|
222
|
+
get provider() {
|
|
223
|
+
return this.config.provider;
|
|
224
|
+
}
|
|
225
|
+
async doGenerate(options) {
|
|
226
|
+
const warnings = [];
|
|
227
|
+
const dsOptions = await parseProviderOptions({
|
|
228
|
+
provider: "dashscope",
|
|
229
|
+
providerOptions: options.providerOptions,
|
|
230
|
+
schema: speechOptionsSchema
|
|
231
|
+
});
|
|
232
|
+
const voice = dsOptions?.voice ?? "longanyang";
|
|
233
|
+
const format = dsOptions?.format ?? "wav";
|
|
234
|
+
const sampleRate = dsOptions?.sampleRate ?? 24e3;
|
|
235
|
+
const isCosyVoice = this.modelId.startsWith("cosyvoice");
|
|
236
|
+
let url;
|
|
237
|
+
let body;
|
|
238
|
+
if (isCosyVoice) {
|
|
239
|
+
url = `${this.config.baseURL}/api/v1/services/audio/tts/SpeechSynthesizer`;
|
|
240
|
+
body = {
|
|
241
|
+
model: this.modelId,
|
|
242
|
+
input: {
|
|
243
|
+
text: options.text,
|
|
244
|
+
voice,
|
|
245
|
+
format,
|
|
246
|
+
sample_rate: sampleRate,
|
|
247
|
+
...dsOptions?.speed != null && { speech_rate: dsOptions.speed },
|
|
248
|
+
...dsOptions?.volume != null && { volume: dsOptions.volume },
|
|
249
|
+
...dsOptions?.pitch != null && { pitch_rate: dsOptions.pitch }
|
|
250
|
+
}
|
|
251
|
+
};
|
|
252
|
+
} else {
|
|
253
|
+
url = `${this.config.baseURL}/api/v1/services/aigc/multimodal-generation/generation`;
|
|
254
|
+
body = {
|
|
255
|
+
model: this.modelId,
|
|
256
|
+
input: {
|
|
257
|
+
text: options.text,
|
|
258
|
+
voice,
|
|
259
|
+
...dsOptions?.languageType != null && { language_type: dsOptions.languageType }
|
|
260
|
+
}
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
const { responseHeaders, value: response } = await postJsonToApi({
|
|
264
|
+
url,
|
|
265
|
+
headers: combineHeaders(this.config.headers(), options.headers),
|
|
266
|
+
body,
|
|
267
|
+
failedResponseHandler: nativeFailedHandler,
|
|
268
|
+
successfulResponseHandler: createJsonResponseHandler(cosyvoiceResponseSchema),
|
|
269
|
+
abortSignal: options.abortSignal,
|
|
270
|
+
fetch: this.config.fetch
|
|
271
|
+
});
|
|
272
|
+
const audioUrl = response.output?.audio?.url;
|
|
273
|
+
if (!audioUrl) throw new Error("No audio URL returned from TTS API");
|
|
274
|
+
const audioBuffer = await (await (this.config.fetch ?? fetch)(audioUrl, { headers: this.config.headers() })).arrayBuffer();
|
|
275
|
+
return {
|
|
276
|
+
audio: new Uint8Array(audioBuffer),
|
|
277
|
+
warnings,
|
|
278
|
+
request: { body },
|
|
279
|
+
response: {
|
|
280
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
281
|
+
modelId: this.modelId,
|
|
282
|
+
headers: responseHeaders
|
|
283
|
+
}
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
};
|
|
287
|
+
//#endregion
|
|
107
288
|
//#region src/tools.ts
|
|
108
289
|
const webSearchToolFactory = createProviderToolFactoryWithOutputSchema({
|
|
109
290
|
id: "dashscope.web_search",
|
|
@@ -178,23 +359,339 @@ const responsesTools = {
|
|
|
178
359
|
mcp: (args) => mcpToolFactory(args)
|
|
179
360
|
};
|
|
180
361
|
//#endregion
|
|
362
|
+
//#region src/transcription.ts
|
|
363
|
+
const transcriptionOptionsSchema = z.object({
|
|
364
|
+
fileUrl: z.string().optional(),
|
|
365
|
+
languageHints: z.array(z.string()).optional(),
|
|
366
|
+
enableItn: z.boolean().optional(),
|
|
367
|
+
enableWords: z.boolean().optional(),
|
|
368
|
+
channelId: z.array(z.number()).optional(),
|
|
369
|
+
pollIntervalMs: z.number().positive().optional(),
|
|
370
|
+
pollTimeoutMs: z.number().positive().optional()
|
|
371
|
+
});
|
|
372
|
+
const syncResponseSchema = zodSchema(z.object({
|
|
373
|
+
output: z.object({ choices: z.array(z.object({ message: z.object({ content: z.array(z.object({ text: z.string().optional() })) }) })).optional() }).nullish(),
|
|
374
|
+
request_id: z.string().nullish()
|
|
375
|
+
}));
|
|
376
|
+
const createTaskSchema$1 = zodSchema(z.object({
|
|
377
|
+
output: z.object({
|
|
378
|
+
task_id: z.string(),
|
|
379
|
+
task_status: z.string()
|
|
380
|
+
}).nullish(),
|
|
381
|
+
request_id: z.string().nullish()
|
|
382
|
+
}));
|
|
383
|
+
const taskStatusSchema$1 = zodSchema(z.object({
|
|
384
|
+
output: z.object({
|
|
385
|
+
task_id: z.string(),
|
|
386
|
+
task_status: z.string(),
|
|
387
|
+
result: z.object({ transcription_url: z.string().nullish() }).nullish(),
|
|
388
|
+
results: z.array(z.object({
|
|
389
|
+
subtask_status: z.string().nullish(),
|
|
390
|
+
transcription_url: z.string().nullish()
|
|
391
|
+
})).nullish(),
|
|
392
|
+
code: z.string().nullish(),
|
|
393
|
+
message: z.string().nullish()
|
|
394
|
+
}).nullish(),
|
|
395
|
+
request_id: z.string().nullish()
|
|
396
|
+
}));
|
|
397
|
+
function isAsyncModel(modelId) {
|
|
398
|
+
return modelId.includes("filetrans") || modelId.startsWith("fun-asr") || modelId.startsWith("paraformer");
|
|
399
|
+
}
|
|
400
|
+
function buildAudioUrl(audio, mediaType) {
|
|
401
|
+
if (typeof audio === "string") {
|
|
402
|
+
if (audio.startsWith("http")) return audio;
|
|
403
|
+
return `data:${mediaType};base64,${audio}`;
|
|
404
|
+
}
|
|
405
|
+
return `data:${mediaType};base64,${uint8ArrayToBase64(audio)}`;
|
|
406
|
+
}
|
|
407
|
+
var DashScopeTranscriptionModel = class {
|
|
408
|
+
specificationVersion = "v3";
|
|
409
|
+
modelId;
|
|
410
|
+
config;
|
|
411
|
+
constructor(modelId, config) {
|
|
412
|
+
this.modelId = modelId;
|
|
413
|
+
this.config = config;
|
|
414
|
+
}
|
|
415
|
+
get provider() {
|
|
416
|
+
return this.config.provider;
|
|
417
|
+
}
|
|
418
|
+
async doGenerate(options) {
|
|
419
|
+
const warnings = [];
|
|
420
|
+
const dsOptions = await parseProviderOptions({
|
|
421
|
+
provider: "dashscope",
|
|
422
|
+
providerOptions: options.providerOptions,
|
|
423
|
+
schema: transcriptionOptionsSchema
|
|
424
|
+
}) ?? null;
|
|
425
|
+
if (isAsyncModel(this.modelId) && dsOptions?.fileUrl) return this.doAsync(options, dsOptions, warnings);
|
|
426
|
+
return this.doSync(options, dsOptions, warnings);
|
|
427
|
+
}
|
|
428
|
+
async doSync(options, dsOptions, warnings) {
|
|
429
|
+
const audioUrl = buildAudioUrl(options.audio, options.mediaType);
|
|
430
|
+
const body = {
|
|
431
|
+
model: this.modelId,
|
|
432
|
+
input: { messages: [{
|
|
433
|
+
role: "user",
|
|
434
|
+
content: [{ audio: audioUrl }]
|
|
435
|
+
}] },
|
|
436
|
+
parameters: {
|
|
437
|
+
result_format: "message",
|
|
438
|
+
...dsOptions?.enableItn != null && { asr_options: { enable_itn: dsOptions.enableItn } }
|
|
439
|
+
}
|
|
440
|
+
};
|
|
441
|
+
const { responseHeaders, value: response } = await postJsonToApi({
|
|
442
|
+
url: `${this.config.baseURL}/api/v1/services/aigc/multimodal-generation/generation`,
|
|
443
|
+
headers: combineHeaders(this.config.headers(), options.headers),
|
|
444
|
+
body,
|
|
445
|
+
failedResponseHandler: nativeFailedHandler,
|
|
446
|
+
successfulResponseHandler: createJsonResponseHandler(syncResponseSchema),
|
|
447
|
+
abortSignal: options.abortSignal,
|
|
448
|
+
fetch: this.config.fetch
|
|
449
|
+
});
|
|
450
|
+
return {
|
|
451
|
+
text: response.output?.choices?.[0]?.message.content.filter((p) => p.text != null).map((p) => p.text).join("") ?? "",
|
|
452
|
+
segments: [],
|
|
453
|
+
language: void 0,
|
|
454
|
+
durationInSeconds: void 0,
|
|
455
|
+
warnings,
|
|
456
|
+
request: { body },
|
|
457
|
+
response: {
|
|
458
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
459
|
+
modelId: this.modelId,
|
|
460
|
+
headers: responseHeaders
|
|
461
|
+
}
|
|
462
|
+
};
|
|
463
|
+
}
|
|
464
|
+
async doAsync(options, dsOptions, warnings) {
|
|
465
|
+
const audioUrl = dsOptions?.fileUrl;
|
|
466
|
+
if (!audioUrl) throw new AISDKError({
|
|
467
|
+
name: "DASHSCOPE_TRANSCRIPTION_ERROR",
|
|
468
|
+
message: "Async transcription requires providerOptions.dashscope.fileUrl with a publicly accessible audio URL."
|
|
469
|
+
});
|
|
470
|
+
const parameters = {};
|
|
471
|
+
if (dsOptions?.channelId != null) parameters.channel_id = dsOptions.channelId;
|
|
472
|
+
if (dsOptions?.enableItn != null) parameters.enable_itn = dsOptions.enableItn;
|
|
473
|
+
if (dsOptions?.enableWords != null) parameters.enable_words = dsOptions.enableWords;
|
|
474
|
+
if (dsOptions?.languageHints?.length) parameters.language_hints = dsOptions.languageHints;
|
|
475
|
+
const { value: createResponse } = await postJsonToApi({
|
|
476
|
+
url: `${this.config.baseURL}/api/v1/services/audio/asr/transcription`,
|
|
477
|
+
headers: combineHeaders(this.config.headers(), options.headers, { "X-DashScope-Async": "enable" }),
|
|
478
|
+
body: {
|
|
479
|
+
model: this.modelId,
|
|
480
|
+
input: { file_url: audioUrl },
|
|
481
|
+
...Object.keys(parameters).length > 0 && { parameters }
|
|
482
|
+
},
|
|
483
|
+
successfulResponseHandler: createJsonResponseHandler(createTaskSchema$1),
|
|
484
|
+
failedResponseHandler: nativeFailedHandler,
|
|
485
|
+
abortSignal: options.abortSignal,
|
|
486
|
+
fetch: this.config.fetch
|
|
487
|
+
});
|
|
488
|
+
const taskId = createResponse.output?.task_id;
|
|
489
|
+
if (!taskId) throw new AISDKError({
|
|
490
|
+
name: "DASHSCOPE_TRANSCRIPTION_ERROR",
|
|
491
|
+
message: `No task_id returned. Response: ${JSON.stringify(createResponse)}`
|
|
492
|
+
});
|
|
493
|
+
const pollInterval = dsOptions?.pollIntervalMs ?? 5e3;
|
|
494
|
+
const pollTimeout = dsOptions?.pollTimeoutMs ?? 6e5;
|
|
495
|
+
const startTime = Date.now();
|
|
496
|
+
while (true) {
|
|
497
|
+
await delay(pollInterval, { abortSignal: options.abortSignal });
|
|
498
|
+
if (Date.now() - startTime > pollTimeout) throw new AISDKError({
|
|
499
|
+
name: "DASHSCOPE_TRANSCRIPTION_TIMEOUT",
|
|
500
|
+
message: `Transcription timed out after ${pollTimeout}ms`
|
|
501
|
+
});
|
|
502
|
+
const { value: status, responseHeaders } = await getFromApi({
|
|
503
|
+
url: `${this.config.baseURL}/api/v1/tasks/${taskId}`,
|
|
504
|
+
headers: combineHeaders(this.config.headers(), options.headers, { "X-DashScope-Async": "enable" }),
|
|
505
|
+
successfulResponseHandler: createJsonResponseHandler(taskStatusSchema$1),
|
|
506
|
+
failedResponseHandler: nativeFailedHandler,
|
|
507
|
+
abortSignal: options.abortSignal,
|
|
508
|
+
fetch: this.config.fetch
|
|
509
|
+
});
|
|
510
|
+
const taskStatus = status.output?.task_status;
|
|
511
|
+
if (taskStatus === "SUCCEEDED") {
|
|
512
|
+
let transcriptionUrl = status.output?.result?.transcription_url;
|
|
513
|
+
if (!transcriptionUrl) transcriptionUrl = ((status.output?.results)?.find((r) => r.subtask_status === "SUCCEEDED"))?.transcription_url;
|
|
514
|
+
if (!transcriptionUrl) throw new AISDKError({
|
|
515
|
+
name: "DASHSCOPE_TRANSCRIPTION_ERROR",
|
|
516
|
+
message: `No transcription URL in response. Task ID: ${taskId}`
|
|
517
|
+
});
|
|
518
|
+
const resultData = await (await (this.config.fetch ?? fetch)(transcriptionUrl)).json();
|
|
519
|
+
let text = "";
|
|
520
|
+
const segments = [];
|
|
521
|
+
if (resultData.transcripts) for (const transcript of resultData.transcripts) {
|
|
522
|
+
text += transcript.text;
|
|
523
|
+
if (transcript.sentences) {
|
|
524
|
+
for (const sentence of transcript.sentences) if (sentence.begin_time != null && sentence.end_time != null) segments.push({
|
|
525
|
+
text: sentence.text,
|
|
526
|
+
startSecond: sentence.begin_time / 1e3,
|
|
527
|
+
endSecond: sentence.end_time / 1e3
|
|
528
|
+
});
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
return {
|
|
532
|
+
text,
|
|
533
|
+
segments,
|
|
534
|
+
language: void 0,
|
|
535
|
+
durationInSeconds: void 0,
|
|
536
|
+
warnings,
|
|
537
|
+
response: {
|
|
538
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
539
|
+
modelId: this.modelId,
|
|
540
|
+
headers: responseHeaders
|
|
541
|
+
}
|
|
542
|
+
};
|
|
543
|
+
}
|
|
544
|
+
if (taskStatus === "FAILED" || taskStatus === "CANCELED") throw new AISDKError({
|
|
545
|
+
name: "DASHSCOPE_TRANSCRIPTION_FAILED",
|
|
546
|
+
message: `Transcription ${taskStatus.toLowerCase()}. ${status.output?.message ?? ""}`
|
|
547
|
+
});
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
};
|
|
551
|
+
//#endregion
|
|
181
552
|
//#region src/types.ts
|
|
182
|
-
const
|
|
183
|
-
beijing:
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
553
|
+
const DASHSCOPE_REGION_URLS = {
|
|
554
|
+
beijing: "https://dashscope.aliyuncs.com",
|
|
555
|
+
singapore: "https://dashscope-intl.aliyuncs.com",
|
|
556
|
+
us: "https://dashscope-us.aliyuncs.com",
|
|
557
|
+
germany: "https://{workspaceId}.eu-central-1.maas.aliyuncs.com"
|
|
558
|
+
};
|
|
559
|
+
//#endregion
|
|
560
|
+
//#region src/video.ts
|
|
561
|
+
const videoOptionsSchema = z.object({
|
|
562
|
+
negativePrompt: z.string().optional(),
|
|
563
|
+
promptExtend: z.boolean().optional(),
|
|
564
|
+
watermark: z.boolean().optional(),
|
|
565
|
+
resolution: z.string().optional(),
|
|
566
|
+
size: z.string().optional(),
|
|
567
|
+
duration: z.number().optional(),
|
|
568
|
+
pollIntervalMs: z.number().positive().optional(),
|
|
569
|
+
pollTimeoutMs: z.number().positive().optional()
|
|
570
|
+
});
|
|
571
|
+
const createTaskSchema = zodSchema(z.object({
|
|
572
|
+
output: z.object({
|
|
573
|
+
task_id: z.string(),
|
|
574
|
+
task_status: z.string()
|
|
575
|
+
}).nullish(),
|
|
576
|
+
request_id: z.string().nullish()
|
|
577
|
+
}));
|
|
578
|
+
const taskStatusSchema = zodSchema(z.object({
|
|
579
|
+
output: z.object({
|
|
580
|
+
task_id: z.string(),
|
|
581
|
+
task_status: z.string(),
|
|
582
|
+
video_url: z.string().nullish(),
|
|
583
|
+
submit_time: z.string().nullish(),
|
|
584
|
+
scheduled_time: z.string().nullish(),
|
|
585
|
+
end_time: z.string().nullish(),
|
|
586
|
+
code: z.string().nullish(),
|
|
587
|
+
message: z.string().nullish()
|
|
588
|
+
}).nullish(),
|
|
589
|
+
usage: z.object({
|
|
590
|
+
duration: z.number().nullish(),
|
|
591
|
+
output_video_duration: z.number().nullish(),
|
|
592
|
+
size: z.string().nullish()
|
|
593
|
+
}).nullish(),
|
|
594
|
+
request_id: z.string().nullish()
|
|
595
|
+
}));
|
|
596
|
+
function detectMode(modelId) {
|
|
597
|
+
return modelId.includes("-i2v") ? "i2v" : "t2v";
|
|
598
|
+
}
|
|
599
|
+
var DashScopeVideoModel = class {
|
|
600
|
+
specificationVersion = "v3";
|
|
601
|
+
modelId;
|
|
602
|
+
config;
|
|
603
|
+
constructor(modelId, config) {
|
|
604
|
+
this.modelId = modelId;
|
|
605
|
+
this.config = config;
|
|
606
|
+
}
|
|
607
|
+
get provider() {
|
|
608
|
+
return this.config.provider;
|
|
609
|
+
}
|
|
610
|
+
get maxVideosPerCall() {
|
|
611
|
+
return 1;
|
|
612
|
+
}
|
|
613
|
+
async doGenerate(options) {
|
|
614
|
+
const warnings = [];
|
|
615
|
+
const mode = detectMode(this.modelId);
|
|
616
|
+
const dsOptions = await parseProviderOptions({
|
|
617
|
+
provider: "dashscope",
|
|
618
|
+
providerOptions: options.providerOptions,
|
|
619
|
+
schema: videoOptionsSchema
|
|
620
|
+
});
|
|
621
|
+
const input = {};
|
|
622
|
+
if (options.prompt != null) input.prompt = options.prompt;
|
|
623
|
+
if (dsOptions?.negativePrompt != null) input.negative_prompt = dsOptions.negativePrompt;
|
|
624
|
+
if (mode === "i2v" && options.image != null) if (options.image.type === "url") input.img_url = options.image.url;
|
|
625
|
+
else input.img_url = typeof options.image.data === "string" ? options.image.data : uint8ArrayToBase64(options.image.data);
|
|
626
|
+
const parameters = {};
|
|
627
|
+
if (dsOptions?.duration != null) parameters.duration = dsOptions.duration;
|
|
628
|
+
if (options.seed != null) parameters.seed = options.seed;
|
|
629
|
+
if (dsOptions?.promptExtend != null) parameters.prompt_extend = dsOptions.promptExtend;
|
|
630
|
+
if (dsOptions?.watermark != null) parameters.watermark = dsOptions.watermark;
|
|
631
|
+
if (mode === "i2v" && dsOptions?.resolution != null) parameters.resolution = dsOptions.resolution;
|
|
632
|
+
else if (options.resolution != null) parameters.size = options.resolution.replace("x", "*");
|
|
633
|
+
else if (dsOptions?.size != null) parameters.size = dsOptions.size;
|
|
634
|
+
const { value: createResponse } = await postJsonToApi({
|
|
635
|
+
url: `${this.config.baseURL}/api/v1/services/aigc/video-generation/video-synthesis`,
|
|
636
|
+
headers: combineHeaders(this.config.headers(), options.headers, { "X-DashScope-Async": "enable" }),
|
|
637
|
+
body: {
|
|
638
|
+
model: this.modelId,
|
|
639
|
+
input,
|
|
640
|
+
parameters
|
|
641
|
+
},
|
|
642
|
+
successfulResponseHandler: createJsonResponseHandler(createTaskSchema),
|
|
643
|
+
failedResponseHandler: nativeFailedHandler,
|
|
644
|
+
abortSignal: options.abortSignal,
|
|
645
|
+
fetch: this.config.fetch
|
|
646
|
+
});
|
|
647
|
+
const taskId = createResponse.output?.task_id;
|
|
648
|
+
if (!taskId) throw new AISDKError({
|
|
649
|
+
name: "DASHSCOPE_VIDEO_ERROR",
|
|
650
|
+
message: `No task_id returned. Response: ${JSON.stringify(createResponse)}`
|
|
651
|
+
});
|
|
652
|
+
const pollInterval = dsOptions?.pollIntervalMs ?? 5e3;
|
|
653
|
+
const pollTimeout = dsOptions?.pollTimeoutMs ?? 6e5;
|
|
654
|
+
const startTime = Date.now();
|
|
655
|
+
while (true) {
|
|
656
|
+
await delay(pollInterval, { abortSignal: options.abortSignal });
|
|
657
|
+
if (Date.now() - startTime > pollTimeout) throw new AISDKError({
|
|
658
|
+
name: "DASHSCOPE_VIDEO_TIMEOUT",
|
|
659
|
+
message: `Video generation timed out after ${pollTimeout}ms`
|
|
660
|
+
});
|
|
661
|
+
const { value: status, responseHeaders } = await getFromApi({
|
|
662
|
+
url: `${this.config.baseURL}/api/v1/tasks/${taskId}`,
|
|
663
|
+
headers: combineHeaders(this.config.headers(), options.headers),
|
|
664
|
+
successfulResponseHandler: createJsonResponseHandler(taskStatusSchema),
|
|
665
|
+
failedResponseHandler: nativeFailedHandler,
|
|
666
|
+
abortSignal: options.abortSignal,
|
|
667
|
+
fetch: this.config.fetch
|
|
668
|
+
});
|
|
669
|
+
const taskStatus = status.output?.task_status;
|
|
670
|
+
if (taskStatus === "SUCCEEDED") {
|
|
671
|
+
const videoUrl = status.output?.video_url;
|
|
672
|
+
if (!videoUrl) throw new AISDKError({
|
|
673
|
+
name: "DASHSCOPE_VIDEO_ERROR",
|
|
674
|
+
message: `No video URL in response. Task ID: ${taskId}`
|
|
675
|
+
});
|
|
676
|
+
return {
|
|
677
|
+
videos: [{
|
|
678
|
+
type: "url",
|
|
679
|
+
url: videoUrl,
|
|
680
|
+
mediaType: "video/mp4"
|
|
681
|
+
}],
|
|
682
|
+
warnings,
|
|
683
|
+
response: {
|
|
684
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
685
|
+
modelId: this.modelId,
|
|
686
|
+
headers: responseHeaders
|
|
687
|
+
}
|
|
688
|
+
};
|
|
689
|
+
}
|
|
690
|
+
if (taskStatus === "FAILED" || taskStatus === "CANCELED") throw new AISDKError({
|
|
691
|
+
name: "DASHSCOPE_VIDEO_FAILED",
|
|
692
|
+
message: `Video generation ${taskStatus.toLowerCase()}. ${status.output?.message ?? ""}`
|
|
693
|
+
});
|
|
694
|
+
}
|
|
198
695
|
}
|
|
199
696
|
};
|
|
200
697
|
//#endregion
|
|
@@ -397,14 +894,7 @@ var DashScopeChatLanguageModel = class {
|
|
|
397
894
|
...options.presencePenalty != null && { presence_penalty: options.presencePenalty },
|
|
398
895
|
...options.stopSequences?.length && { stop: options.stopSequences },
|
|
399
896
|
...options.seed != null && { seed: options.seed },
|
|
400
|
-
...options.responseFormat?.type === "json" && { response_format:
|
|
401
|
-
type: "json_schema",
|
|
402
|
-
json_schema: {
|
|
403
|
-
schema: options.responseFormat.schema,
|
|
404
|
-
name: options.responseFormat.name ?? "response",
|
|
405
|
-
description: options.responseFormat.description
|
|
406
|
-
}
|
|
407
|
-
} : { type: "json_object" } },
|
|
897
|
+
...options.responseFormat?.type === "json" && { response_format: { type: "json_object" } },
|
|
408
898
|
...apiTools != null && {
|
|
409
899
|
tools: apiTools,
|
|
410
900
|
tool_choice: toolChoice
|
|
@@ -422,7 +912,7 @@ var DashScopeChatLanguageModel = class {
|
|
|
422
912
|
async doGenerate(options) {
|
|
423
913
|
const { args, warnings } = await this.getArgs(options);
|
|
424
914
|
const { responseHeaders, value: response } = await postJsonToApi({
|
|
425
|
-
url: `${this.config.baseURL}/chat/completions`,
|
|
915
|
+
url: `${this.config.baseURL}/compatible-mode/v1/chat/completions`,
|
|
426
916
|
headers: combineHeaders(this.config.headers(), options.headers),
|
|
427
917
|
body: args,
|
|
428
918
|
failedResponseHandler,
|
|
@@ -468,7 +958,7 @@ var DashScopeChatLanguageModel = class {
|
|
|
468
958
|
stream: true
|
|
469
959
|
};
|
|
470
960
|
const { responseHeaders, value: response } = await postJsonToApi({
|
|
471
|
-
url: `${this.config.baseURL}/chat/completions`,
|
|
961
|
+
url: `${this.config.baseURL}/compatible-mode/v1/chat/completions`,
|
|
472
962
|
headers: combineHeaders(this.config.headers(), options.headers),
|
|
473
963
|
body,
|
|
474
964
|
failedResponseHandler,
|
|
@@ -1067,7 +1557,7 @@ var DashScopeResponsesLanguageModel = class {
|
|
|
1067
1557
|
async doGenerate(options) {
|
|
1068
1558
|
const { args: body, warnings } = await this.getArgs(options);
|
|
1069
1559
|
const { responseHeaders, value: response } = await postJsonToApi({
|
|
1070
|
-
url: `${this.config.baseURL}/responses`,
|
|
1560
|
+
url: `${this.config.baseURL}/compatible-mode/v1/responses`,
|
|
1071
1561
|
headers: combineHeaders(this.config.headers(), options.headers),
|
|
1072
1562
|
body,
|
|
1073
1563
|
failedResponseHandler,
|
|
@@ -1103,7 +1593,7 @@ var DashScopeResponsesLanguageModel = class {
|
|
|
1103
1593
|
async doStream(options) {
|
|
1104
1594
|
const { args: body, warnings } = await this.getArgs(options);
|
|
1105
1595
|
const { responseHeaders, value: response } = await postJsonToApi({
|
|
1106
|
-
url: `${this.config.baseURL}/responses`,
|
|
1596
|
+
url: `${this.config.baseURL}/compatible-mode/v1/responses`,
|
|
1107
1597
|
headers: combineHeaders(this.config.headers(), options.headers),
|
|
1108
1598
|
body: {
|
|
1109
1599
|
...body,
|
|
@@ -1212,10 +1702,9 @@ var DashScopeResponsesLanguageModel = class {
|
|
|
1212
1702
|
//#endregion
|
|
1213
1703
|
//#region src/provider.ts
|
|
1214
1704
|
function createDashScope(options = {}) {
|
|
1215
|
-
const { region = "beijing", workspaceId, baseURL: explicitBaseURL,
|
|
1216
|
-
const regionUrls = DASHSCOPE_REGION_BASE_URLS[region];
|
|
1217
|
-
const baseURL = (explicitBaseURL ?? regionUrls.baseURL).replace("{workspaceId}", workspaceId ?? "");
|
|
1705
|
+
const { region = "beijing", workspaceId, baseURL: explicitBaseURL, includeUsage, ...rest } = options;
|
|
1218
1706
|
if (region === "germany" && !explicitBaseURL && !workspaceId) throw new Error("workspaceId is required when region is 'germany'. See https://help.aliyun.com/zh/model-studio/obtain-the-app-id-and-workspace-id");
|
|
1707
|
+
const baseURL = (explicitBaseURL ?? DASHSCOPE_REGION_URLS[region]).replace("{workspaceId}", workspaceId ?? "");
|
|
1219
1708
|
const apiKey = rest.apiKey ?? process.env.DASHSCOPE_API_KEY;
|
|
1220
1709
|
const getHeaders = () => {
|
|
1221
1710
|
const headers = {};
|
|
@@ -1223,32 +1712,53 @@ function createDashScope(options = {}) {
|
|
|
1223
1712
|
if (rest.headers) Object.assign(headers, rest.headers);
|
|
1224
1713
|
return headers;
|
|
1225
1714
|
};
|
|
1226
|
-
const
|
|
1715
|
+
const baseConfig = {
|
|
1227
1716
|
provider: "dashscope",
|
|
1228
1717
|
baseURL,
|
|
1229
1718
|
headers: getHeaders,
|
|
1230
|
-
fetch: rest.fetch
|
|
1231
|
-
includeUsage
|
|
1719
|
+
fetch: rest.fetch
|
|
1232
1720
|
};
|
|
1233
|
-
const createChatModel = (modelId) => new DashScopeChatLanguageModel(modelId,
|
|
1234
|
-
|
|
1721
|
+
const createChatModel = (modelId) => new DashScopeChatLanguageModel(modelId, {
|
|
1722
|
+
...baseConfig,
|
|
1723
|
+
includeUsage
|
|
1724
|
+
});
|
|
1725
|
+
const createEmbeddingModel = (modelId) => new DashScopeEmbeddingModel(modelId, {
|
|
1726
|
+
...baseConfig,
|
|
1727
|
+
includeUsage
|
|
1728
|
+
});
|
|
1235
1729
|
const createRerankingModel = (modelId) => new DashScopeRerankingModel(modelId, {
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
headers: getHeaders,
|
|
1239
|
-
fetch: rest.fetch
|
|
1730
|
+
...baseConfig,
|
|
1731
|
+
provider: "dashscope.rerank"
|
|
1240
1732
|
});
|
|
1241
1733
|
const createResponsesModel = (modelId) => new DashScopeResponsesLanguageModel(modelId, {
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
headers: getHeaders,
|
|
1245
|
-
fetch: rest.fetch
|
|
1734
|
+
...baseConfig,
|
|
1735
|
+
provider: "dashscope.responses"
|
|
1246
1736
|
});
|
|
1247
1737
|
const responses = Object.assign(createResponsesModel, { tools: responsesTools });
|
|
1738
|
+
const createImageModel = (modelId) => new DashScopeImageModel(modelId, {
|
|
1739
|
+
...baseConfig,
|
|
1740
|
+
provider: "dashscope.image"
|
|
1741
|
+
});
|
|
1742
|
+
const createVideoModel = (modelId) => new DashScopeVideoModel(modelId, {
|
|
1743
|
+
...baseConfig,
|
|
1744
|
+
provider: "dashscope.video"
|
|
1745
|
+
});
|
|
1746
|
+
const createSpeechModel = (modelId) => new DashScopeSpeechModel(modelId, {
|
|
1747
|
+
...baseConfig,
|
|
1748
|
+
provider: "dashscope.speech"
|
|
1749
|
+
});
|
|
1750
|
+
const createTranscriptionModel = (modelId) => new DashScopeTranscriptionModel(modelId, {
|
|
1751
|
+
...baseConfig,
|
|
1752
|
+
provider: "dashscope.transcription"
|
|
1753
|
+
});
|
|
1248
1754
|
return Object.assign(createChatModel, {
|
|
1249
1755
|
languageModel: createChatModel,
|
|
1250
1756
|
embeddingModel: createEmbeddingModel,
|
|
1251
1757
|
rerankingModel: createRerankingModel,
|
|
1758
|
+
imageModel: createImageModel,
|
|
1759
|
+
videoModel: createVideoModel,
|
|
1760
|
+
speechModel: createSpeechModel,
|
|
1761
|
+
transcriptionModel: createTranscriptionModel,
|
|
1252
1762
|
chatOptions: (chatOpts) => ({ providerOptions: { dashscope: chatOpts } }),
|
|
1253
1763
|
responsesOptions: (responsesOpts) => ({ providerOptions: { dashscope: responsesOpts } }),
|
|
1254
1764
|
responses
|
|
@@ -1258,4 +1768,4 @@ function createDashScope(options = {}) {
|
|
|
1258
1768
|
//#region src/index.ts
|
|
1259
1769
|
const dashscope = createDashScope();
|
|
1260
1770
|
//#endregion
|
|
1261
|
-
export {
|
|
1771
|
+
export { DASHSCOPE_REGION_URLS, DashScopeEmbeddingModel, DashScopeImageModel, DashScopeRerankingModel, DashScopeSpeechModel, DashScopeTranscriptionModel, DashScopeVideoModel, createDashScope, dashscope, responsesTools };
|