@agentor/dashscope 0.0.0 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +267 -4
- package/dist/index.d.mts +208 -14
- package/dist/index.mjs +673 -134
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -10,6 +10,12 @@
|
|
|
10
10
|
|
|
11
11
|
- **Chat Completions API** - Standard `/chat/completions` with function calling, streaming, and reasoning
|
|
12
12
|
- **Responses API** - `/responses` endpoint with built-in tools support
|
|
13
|
+
- **Embedding** - Text vectorization via OpenAI-compatible `/embeddings` endpoint
|
|
14
|
+
- **Reranking** - Document reranking via `/reranks` endpoint
|
|
15
|
+
- **Image Generation** - Text-to-image via multimodal generation endpoint
|
|
16
|
+
- **Video Generation** - Text-to-video and image-to-video with async polling
|
|
17
|
+
- **Speech Synthesis** - Text-to-speech for CosyVoice and Qwen-TTS models
|
|
18
|
+
- **Transcription** - Speech-to-text for short and long audio
|
|
13
19
|
- **Built-in Tools** - Web search, code interpreter, web extractor, file search, image search, MCP integration
|
|
14
20
|
- **Thinking Mode** - Enable reasoning/thinking with configurable budget
|
|
15
21
|
- **Multi-region** - Beijing, Singapore, US, Germany regions
|
|
@@ -245,20 +251,277 @@ const first = await generateText({
|
|
|
245
251
|
});
|
|
246
252
|
```
|
|
247
253
|
|
|
254
|
+
## Embedding
|
|
255
|
+
|
|
256
|
+
```typescript
|
|
257
|
+
import { embed, embedMany } from "ai";
|
|
258
|
+
|
|
259
|
+
// Single text embedding
|
|
260
|
+
const { embedding, usage } = await embed({
|
|
261
|
+
model: dashscope.embeddingModel("text-embedding-v4"),
|
|
262
|
+
value: "The clothes quality is excellent",
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
console.log(embedding.length); // 1024 (default dimensions)
|
|
266
|
+
|
|
267
|
+
// Batch embedding
|
|
268
|
+
const { embeddings } = await embedMany({
|
|
269
|
+
model: dashscope.embeddingModel("text-embedding-v4"),
|
|
270
|
+
values: ["Hello world", "Machine learning is fascinating"],
|
|
271
|
+
});
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
### Custom Dimensions
|
|
275
|
+
|
|
276
|
+
```typescript
|
|
277
|
+
const { embedding } = await embed({
|
|
278
|
+
model: dashscope.embeddingModel("text-embedding-v4"),
|
|
279
|
+
value: "Custom dimension embedding",
|
|
280
|
+
providerOptions: {
|
|
281
|
+
openaiCompatible: {
|
|
282
|
+
dimensions: 256,
|
|
283
|
+
},
|
|
284
|
+
},
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
console.log(embedding.length); // 256
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
## Reranking
|
|
291
|
+
|
|
292
|
+
```typescript
|
|
293
|
+
import { rerank } from "ai";
|
|
294
|
+
|
|
295
|
+
const { ranking } = await rerank({
|
|
296
|
+
model: dashscope.rerankingModel("qwen3-rerank"),
|
|
297
|
+
query: "What is a reranking model?",
|
|
298
|
+
documents: [
|
|
299
|
+
"Reranking models sort candidate texts by relevance",
|
|
300
|
+
"Quantum computing is a frontier field",
|
|
301
|
+
"Pre-trained models brought advances to reranking",
|
|
302
|
+
],
|
|
303
|
+
});
|
|
304
|
+
|
|
305
|
+
for (const item of ranking) {
|
|
306
|
+
console.log(`Index: ${item.originalIndex}, Score: ${item.score}`);
|
|
307
|
+
}
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
### Top N Results
|
|
311
|
+
|
|
312
|
+
```typescript
|
|
313
|
+
const { ranking } = await rerank({
|
|
314
|
+
model: dashscope.rerankingModel("qwen3-rerank"),
|
|
315
|
+
query: "How to reset password?",
|
|
316
|
+
documents: [
|
|
317
|
+
"Go to Settings > Security > Change Password",
|
|
318
|
+
"Forgot your password?",
|
|
319
|
+
"Two-factor authentication is supported",
|
|
320
|
+
],
|
|
321
|
+
topN: 2,
|
|
322
|
+
});
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
## Image Generation
|
|
326
|
+
|
|
327
|
+
```typescript
|
|
328
|
+
import { generateImage } from "ai";
|
|
329
|
+
|
|
330
|
+
const { images } = await generateImage({
|
|
331
|
+
model: dashscope.imageModel("qwen-image-plus"),
|
|
332
|
+
prompt: "A cute cat sitting on a windowsill with sunlight streaming in",
|
|
333
|
+
providerOptions: {
|
|
334
|
+
dashscope: {
|
|
335
|
+
size: "1024*1024",
|
|
336
|
+
},
|
|
337
|
+
},
|
|
338
|
+
});
|
|
339
|
+
|
|
340
|
+
// images[0].uint8Array — raw image data
|
|
341
|
+
// images[0].base64 — base64 encoded image
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
## Video Generation
|
|
345
|
+
|
|
346
|
+
```typescript
|
|
347
|
+
import { experimental_generateVideo as generateVideo } from "ai";
|
|
348
|
+
|
|
349
|
+
// Text-to-video
|
|
350
|
+
const { videos } = await generateVideo({
|
|
351
|
+
model: dashscope.videoModel("wan2.6-t2v"),
|
|
352
|
+
prompt: "A golden retriever running through a field of sunflowers",
|
|
353
|
+
providerOptions: {
|
|
354
|
+
dashscope: {
|
|
355
|
+
size: "1280*720",
|
|
356
|
+
duration: 5,
|
|
357
|
+
},
|
|
358
|
+
},
|
|
359
|
+
});
|
|
360
|
+
```
|
|
361
|
+
|
|
362
|
+
### Image-to-Video
|
|
363
|
+
|
|
364
|
+
Use a model ID containing `-i2v` for image-to-video mode:
|
|
365
|
+
|
|
366
|
+
```typescript
|
|
367
|
+
const { videos } = await generateVideo({
|
|
368
|
+
model: dashscope.videoModel("wan2.6-i2v-turbo"),
|
|
369
|
+
prompt: "The cat stretches and walks away",
|
|
370
|
+
providerOptions: {
|
|
371
|
+
dashscope: {
|
|
372
|
+
resolution: "720P",
|
|
373
|
+
},
|
|
374
|
+
},
|
|
375
|
+
image: "data:image/png;base64,...", // or a URL string
|
|
376
|
+
});
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
## Speech Synthesis (TTS)
|
|
380
|
+
|
|
381
|
+
```typescript
|
|
382
|
+
import { experimental_generateSpeech as generateSpeech } from "ai";
|
|
383
|
+
import { writeFileSync } from "fs";
|
|
384
|
+
|
|
385
|
+
const { audio } = await generateSpeech({
|
|
386
|
+
model: dashscope.speechModel("cosyvoice-v3-flash"),
|
|
387
|
+
text: "Hello, welcome to Agentor.",
|
|
388
|
+
providerOptions: {
|
|
389
|
+
dashscope: {
|
|
390
|
+
voice: "longanyang",
|
|
391
|
+
format: "wav",
|
|
392
|
+
sampleRate: 24000,
|
|
393
|
+
},
|
|
394
|
+
},
|
|
395
|
+
});
|
|
396
|
+
|
|
397
|
+
writeFileSync("output.wav", audio.uint8Array);
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
## Transcription (Speech-to-Text)
|
|
401
|
+
|
|
402
|
+
### Short Audio (Sync)
|
|
403
|
+
|
|
404
|
+
```typescript
|
|
405
|
+
import { experimental_transcribe as transcribe } from "ai";
|
|
406
|
+
|
|
407
|
+
const { text } = await transcribe({
|
|
408
|
+
model: dashscope.transcriptionModel("qwen3-asr-flash"),
|
|
409
|
+
audio: new URL("https://example.com/audio.mp3"),
|
|
410
|
+
});
|
|
411
|
+
|
|
412
|
+
console.log(text);
|
|
413
|
+
```
|
|
414
|
+
|
|
415
|
+
### Long Audio (Async)
|
|
416
|
+
|
|
417
|
+
For async models, provide the audio URL via `providerOptions`:
|
|
418
|
+
|
|
419
|
+
```typescript
|
|
420
|
+
const { text, segments } = await transcribe({
|
|
421
|
+
model: dashscope.transcriptionModel("qwen3-asr-flash-filetrans"),
|
|
422
|
+
audio: new Uint8Array(0), // placeholder
|
|
423
|
+
providerOptions: {
|
|
424
|
+
dashscope: {
|
|
425
|
+
fileUrl: "https://example.com/long-audio.mp3",
|
|
426
|
+
enableWords: true,
|
|
427
|
+
},
|
|
428
|
+
},
|
|
429
|
+
});
|
|
430
|
+
```
|
|
431
|
+
|
|
248
432
|
## Provider Configuration
|
|
249
433
|
|
|
250
434
|
```typescript
|
|
251
435
|
import { createDashScope } from "@agentor/dashscope";
|
|
252
436
|
|
|
253
437
|
const dashscope = createDashScope({
|
|
254
|
-
apiKey: "sk-xxx",
|
|
255
|
-
region: "beijing",
|
|
256
|
-
workspaceId: "ws-xxx",
|
|
257
|
-
baseURL: "https://custom-endpoint.com",
|
|
438
|
+
apiKey: "sk-xxx", // or set DASHSCOPE_API_KEY env var
|
|
439
|
+
region: "beijing", // beijing | singapore | us | germany
|
|
440
|
+
workspaceId: "ws-xxx", // required for germany region
|
|
441
|
+
baseURL: "https://custom-endpoint.com", // override default base URL
|
|
258
442
|
headers: { "X-Custom-Header": "value" }, // custom headers
|
|
259
443
|
});
|
|
260
444
|
```
|
|
261
445
|
|
|
446
|
+
## Available Models
|
|
447
|
+
|
|
448
|
+
> For the complete and up-to-date model list, see [Alibaba Cloud Model Studio](https://help.aliyun.com/zh/model-studio/models).
|
|
449
|
+
|
|
450
|
+
### Language Models (Chat)
|
|
451
|
+
|
|
452
|
+
| Model | Description |
|
|
453
|
+
| --------------------- | ----------------------------------------- |
|
|
454
|
+
| `qwen3.6-max-preview` | Flagship model with strongest reasoning |
|
|
455
|
+
| `qwen3.6-plus` | Recommended, balanced capability and cost |
|
|
456
|
+
| `qwen3.6-flash` | Fastest, ultra-low cost |
|
|
457
|
+
| `qwen3.5-plus` | Enhanced reasoning model |
|
|
458
|
+
| `qwen3.5-flash` | Fast and efficient model |
|
|
459
|
+
| `qwen3-coder-plus` | Code-optimized model |
|
|
460
|
+
| `qwen3-coder-flash` | Fast code model |
|
|
461
|
+
| `qwq-plus` | Dedicated reasoning model |
|
|
462
|
+
| `deepseek-v4-pro` | DeepSeek V4 Pro |
|
|
463
|
+
| `deepseek-v4-flash` | DeepSeek V4 Flash |
|
|
464
|
+
| `kimi-k2.6` | Moonshot Kimi K2.6 |
|
|
465
|
+
| `glm-5.1` | Zhipu GLM 5.1 |
|
|
466
|
+
|
|
467
|
+
### Embedding Models
|
|
468
|
+
|
|
469
|
+
| Model | Dimensions | Description |
|
|
470
|
+
| ------------------------------ | ----------------------- | ----------------------------------- |
|
|
471
|
+
| `text-embedding-v4` | 64-2048 (default 1024) | Text embedding for search/RAG |
|
|
472
|
+
| `text-embedding-v3` | 512-1024 (default 1024) | Legacy text embedding |
|
|
473
|
+
| `qwen3-vl-embedding` | 256-2560 (default 2560) | Multimodal (text + image) embedding |
|
|
474
|
+
| `tongyi-embedding-vision-plus` | 64-1152 (default 1152) | Cross-modal search embedding |
|
|
475
|
+
|
|
476
|
+
### Reranking Models
|
|
477
|
+
|
|
478
|
+
| Model | Description |
|
|
479
|
+
| ----------------- | --------------------------------------- |
|
|
480
|
+
| `qwen3-rerank` | Text reranking, 100+ languages |
|
|
481
|
+
| `qwen3-vl-rerank` | Multimodal reranking (text/image/video) |
|
|
482
|
+
| `gte-rerank-v2` | Semantic text reranking |
|
|
483
|
+
|
|
484
|
+
### Image Models
|
|
485
|
+
|
|
486
|
+
| Model | Description |
|
|
487
|
+
| -------------------- | -------------------------------------------- |
|
|
488
|
+
| `wan2.7-image-pro` | Latest Wan image generation, up to 4096x4096 |
|
|
489
|
+
| `wan2.7-image` | Wan image generation, up to 2048x2048 |
|
|
490
|
+
| `qwen-image-2.0-pro` | Qwen image generation and editing |
|
|
491
|
+
| `qwen-image-max` | High quality image generation |
|
|
492
|
+
| `qwen-image-plus` | Enhanced image generation |
|
|
493
|
+
| `z-image-turbo` | Fast image generation |
|
|
494
|
+
|
|
495
|
+
### Video Models
|
|
496
|
+
|
|
497
|
+
| Model | Mode | Description |
|
|
498
|
+
| ------------------ | ---- | ------------------------------------- |
|
|
499
|
+
| `wan2.7-t2v` | T2V | Recommended text-to-video with audio |
|
|
500
|
+
| `wan2.6-t2v` | T2V | Text-to-video with audio |
|
|
501
|
+
| `wan2.2-t2v-plus` | T2V | Text-to-video (silent) |
|
|
502
|
+
| `wan2.7-i2v` | I2V | Recommended image-to-video with audio |
|
|
503
|
+
| `wan2.6-i2v` | I2V | Image-to-video with audio |
|
|
504
|
+
| `wan2.6-i2v-flash` | I2V | Fast image-to-video |
|
|
505
|
+
|
|
506
|
+
### Speech Models (TTS)
|
|
507
|
+
|
|
508
|
+
| Model | Description |
|
|
509
|
+
| -------------------------- | ---------------------------------- |
|
|
510
|
+
| `cosyvoice-v3.5-plus` | Latest flagship, best quality |
|
|
511
|
+
| `cosyvoice-v3.5-flash` | Latest lightweight |
|
|
512
|
+
| `cosyvoice-v3-plus` | V3 enhanced |
|
|
513
|
+
| `cosyvoice-v3-flash` | V3 fast synthesis |
|
|
514
|
+
| `qwen3-tts-flash-realtime` | Qwen TTS with 17 human-like voices |
|
|
515
|
+
|
|
516
|
+
### Transcription Models (STT)
|
|
517
|
+
|
|
518
|
+
| Model | Mode | Description |
|
|
519
|
+
| --------------------------- | ----- | ------------------------------ |
|
|
520
|
+
| `qwen3-asr-flash` | Sync | Short audio (up to 5 min) |
|
|
521
|
+
| `qwen3-asr-flash-filetrans` | Async | Long audio (up to 12 hours) |
|
|
522
|
+
| `fun-asr` | Async | Speaker diarization, hot words |
|
|
523
|
+
| `paraformer-v2` | Async | Legacy async transcription |
|
|
524
|
+
|
|
262
525
|
## License
|
|
263
526
|
|
|
264
527
|
MIT © [Demo Macro](https://www.demomacro.com/)
|
package/dist/index.d.mts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
import { OpenAICompatibleEmbeddingModel } from "@ai-sdk/openai-compatible";
|
|
1
2
|
import * as _$_ai_sdk_provider_utils0 from "@ai-sdk/provider-utils";
|
|
2
3
|
import { FetchFunction } from "@ai-sdk/provider-utils";
|
|
3
|
-
import { LanguageModelV3 } from "@ai-sdk/provider";
|
|
4
|
+
import { EmbeddingModelV3, Experimental_VideoModelV3, Experimental_VideoModelV3CallOptions, ImageModelV3, ImageModelV3CallOptions, LanguageModelV3, RerankingModelV3, RerankingModelV3CallOptions, SharedV3Warning, SpeechModelV3, SpeechModelV3CallOptions, TranscriptionModelV3, TranscriptionModelV3CallOptions } from "@ai-sdk/provider";
|
|
4
5
|
|
|
5
6
|
//#region src/tools.d.ts
|
|
6
7
|
declare const webSearchToolFactory: _$_ai_sdk_provider_utils0.ProviderToolFactoryWithOutputSchema<Record<string, never>, {
|
|
@@ -131,16 +132,12 @@ type DashScopeResponsesTools = typeof responsesTools;
|
|
|
131
132
|
//#endregion
|
|
132
133
|
//#region src/types.d.ts
|
|
133
134
|
type DashScopeRegion = "beijing" | "singapore" | "us" | "germany";
|
|
134
|
-
declare const
|
|
135
|
-
baseURL: string;
|
|
136
|
-
videoBaseURL: string;
|
|
137
|
-
}>;
|
|
135
|
+
declare const DASHSCOPE_REGION_URLS: Record<DashScopeRegion, string>;
|
|
138
136
|
interface DashScopeProviderSettings {
|
|
139
137
|
apiKey?: string;
|
|
140
138
|
region?: DashScopeRegion;
|
|
141
139
|
workspaceId?: string;
|
|
142
140
|
baseURL?: string;
|
|
143
|
-
videoBaseURL?: string;
|
|
144
141
|
headers?: Record<string, string>;
|
|
145
142
|
fetch?: FetchFunction;
|
|
146
143
|
includeUsage?: boolean;
|
|
@@ -164,13 +161,6 @@ interface DashScopeChatOptions {
|
|
|
164
161
|
/** Enable code interpreter (requires enableThinking). */
|
|
165
162
|
enableCodeInterpreter?: boolean;
|
|
166
163
|
}
|
|
167
|
-
interface DashScopeChatConfig {
|
|
168
|
-
provider: string;
|
|
169
|
-
baseURL: string;
|
|
170
|
-
headers: () => Record<string, string>;
|
|
171
|
-
fetch?: FetchFunction;
|
|
172
|
-
includeUsage?: boolean;
|
|
173
|
-
}
|
|
174
164
|
interface DashScopeResponsesOptions {
|
|
175
165
|
enableThinking?: boolean;
|
|
176
166
|
reasoning?: {
|
|
@@ -188,6 +178,12 @@ interface DashScopeResponsesNamespace {
|
|
|
188
178
|
interface DashScopeProvider {
|
|
189
179
|
(modelId: string): LanguageModelV3;
|
|
190
180
|
languageModel(modelId: string): LanguageModelV3;
|
|
181
|
+
embeddingModel(modelId: string): EmbeddingModelV3;
|
|
182
|
+
rerankingModel(modelId: string): RerankingModelV3;
|
|
183
|
+
imageModel(modelId: string): ImageModelV3;
|
|
184
|
+
videoModel(modelId: string): Experimental_VideoModelV3;
|
|
185
|
+
speechModel(modelId: string): SpeechModelV3;
|
|
186
|
+
transcriptionModel(modelId: string): TranscriptionModelV3;
|
|
191
187
|
chatOptions: (options: DashScopeChatOptions) => {
|
|
192
188
|
providerOptions: {
|
|
193
189
|
dashscope: DashScopeChatOptions;
|
|
@@ -201,10 +197,208 @@ interface DashScopeProvider {
|
|
|
201
197
|
responses: DashScopeResponsesNamespace;
|
|
202
198
|
}
|
|
203
199
|
//#endregion
|
|
200
|
+
//#region src/utils.d.ts
|
|
201
|
+
interface DashScopeConfig {
|
|
202
|
+
provider: string;
|
|
203
|
+
baseURL: string;
|
|
204
|
+
headers: () => Record<string, string>;
|
|
205
|
+
fetch?: FetchFunction;
|
|
206
|
+
includeUsage?: boolean;
|
|
207
|
+
}
|
|
208
|
+
//#endregion
|
|
209
|
+
//#region src/embedding.d.ts
|
|
210
|
+
interface DashScopeEmbeddingOptions {
|
|
211
|
+
/** Output embedding dimensions. Supported by text-embedding-v4, text-embedding-v3, etc. */
|
|
212
|
+
dimensions?: number;
|
|
213
|
+
}
|
|
214
|
+
declare class DashScopeEmbeddingModel extends OpenAICompatibleEmbeddingModel {
|
|
215
|
+
constructor(modelId: string, config: DashScopeConfig);
|
|
216
|
+
}
|
|
217
|
+
//#endregion
|
|
218
|
+
//#region src/image.d.ts
|
|
219
|
+
interface DashScopeImageOptions {
|
|
220
|
+
/** Output image size, e.g. "2048*2048", "1024*1024", "1K", "2K". */
|
|
221
|
+
size?: string;
|
|
222
|
+
/** Negative prompt describing what to avoid. */
|
|
223
|
+
negativePrompt?: string;
|
|
224
|
+
/** Enable prompt extension/rewriting. Default depends on model. */
|
|
225
|
+
promptExtend?: boolean;
|
|
226
|
+
/** Add watermark. Default false. */
|
|
227
|
+
watermark?: boolean;
|
|
228
|
+
/** Number of images to generate. Default 1. */
|
|
229
|
+
n?: number;
|
|
230
|
+
}
|
|
231
|
+
declare class DashScopeImageModel implements ImageModelV3 {
|
|
232
|
+
readonly specificationVersion: "v3";
|
|
233
|
+
readonly modelId: string;
|
|
234
|
+
private readonly config;
|
|
235
|
+
constructor(modelId: string, config: DashScopeConfig);
|
|
236
|
+
get provider(): string;
|
|
237
|
+
get maxImagesPerCall(): number | undefined;
|
|
238
|
+
doGenerate(options: ImageModelV3CallOptions): Promise<{
|
|
239
|
+
images: string[];
|
|
240
|
+
warnings: SharedV3Warning[];
|
|
241
|
+
response: {
|
|
242
|
+
timestamp: Date;
|
|
243
|
+
modelId: string;
|
|
244
|
+
headers: Record<string, string> | undefined;
|
|
245
|
+
};
|
|
246
|
+
}>;
|
|
247
|
+
}
|
|
248
|
+
//#endregion
|
|
249
|
+
//#region src/rerank.d.ts
|
|
250
|
+
interface DashScopeRerankOptions {
|
|
251
|
+
/** English instruction to guide the reranking strategy. */
|
|
252
|
+
instruct?: string;
|
|
253
|
+
}
|
|
254
|
+
declare class DashScopeRerankingModel implements RerankingModelV3 {
|
|
255
|
+
readonly specificationVersion: "v3";
|
|
256
|
+
readonly modelId: string;
|
|
257
|
+
private readonly config;
|
|
258
|
+
constructor(modelId: string, config: DashScopeConfig);
|
|
259
|
+
get provider(): string;
|
|
260
|
+
doRerank(options: RerankingModelV3CallOptions): Promise<{
|
|
261
|
+
ranking: {
|
|
262
|
+
index: number;
|
|
263
|
+
relevanceScore: number;
|
|
264
|
+
}[];
|
|
265
|
+
warnings: SharedV3Warning[];
|
|
266
|
+
response: {
|
|
267
|
+
id: string | undefined;
|
|
268
|
+
modelId: string | undefined;
|
|
269
|
+
headers: Record<string, string> | undefined;
|
|
270
|
+
};
|
|
271
|
+
}>;
|
|
272
|
+
}
|
|
273
|
+
//#endregion
|
|
274
|
+
//#region src/speech.d.ts
|
|
275
|
+
interface DashScopeSpeechOptions {
|
|
276
|
+
/** Voice name. Model-specific, e.g. "longanyang" for CosyVoice, "Cherry" for Qwen-TTS. */
|
|
277
|
+
voice?: string;
|
|
278
|
+
/** Output audio format: "wav", "mp3", "pcm". Default depends on model. */
|
|
279
|
+
format?: string;
|
|
280
|
+
/** Sample rate. Default depends on model. */
|
|
281
|
+
sampleRate?: number;
|
|
282
|
+
/** Language type for Qwen-TTS: "Chinese" | "English" | "Japanese" | etc. */
|
|
283
|
+
languageType?: string;
|
|
284
|
+
/** Speaking speed. 0.5-2.0, default 1.0. */
|
|
285
|
+
speed?: number;
|
|
286
|
+
/** Volume. 0.5-2.0, default 1.0. */
|
|
287
|
+
volume?: number;
|
|
288
|
+
/** Pitch. -12 to 12, default 0. */
|
|
289
|
+
pitch?: number;
|
|
290
|
+
}
|
|
291
|
+
declare class DashScopeSpeechModel implements SpeechModelV3 {
|
|
292
|
+
readonly specificationVersion: "v3";
|
|
293
|
+
readonly modelId: string;
|
|
294
|
+
private readonly config;
|
|
295
|
+
constructor(modelId: string, config: DashScopeConfig);
|
|
296
|
+
get provider(): string;
|
|
297
|
+
doGenerate(options: SpeechModelV3CallOptions): Promise<{
|
|
298
|
+
audio: Uint8Array<ArrayBuffer>;
|
|
299
|
+
warnings: SharedV3Warning[];
|
|
300
|
+
request: {
|
|
301
|
+
body: Record<string, unknown>;
|
|
302
|
+
};
|
|
303
|
+
response: {
|
|
304
|
+
timestamp: Date;
|
|
305
|
+
modelId: string;
|
|
306
|
+
headers: Record<string, string> | undefined;
|
|
307
|
+
};
|
|
308
|
+
}>;
|
|
309
|
+
}
|
|
310
|
+
//#endregion
|
|
311
|
+
//#region src/transcription.d.ts
|
|
312
|
+
interface DashScopeTranscriptionOptions {
|
|
313
|
+
/**
|
|
314
|
+
* Publicly accessible audio file URL for async transcription.
|
|
315
|
+
* Required for async models (filetrans, fun-asr, paraformer) when using long audio.
|
|
316
|
+
*/
|
|
317
|
+
fileUrl?: string;
|
|
318
|
+
/** Language hint(s), e.g. ["zh", "en"]. */
|
|
319
|
+
languageHints?: string[];
|
|
320
|
+
/** Enable inverse text normalization (convert spoken numbers/dates to written form). */
|
|
321
|
+
enableItn?: boolean;
|
|
322
|
+
/** Enable word-level timestamps. */
|
|
323
|
+
enableWords?: boolean;
|
|
324
|
+
/** Channel IDs to transcribe. Default [0]. */
|
|
325
|
+
channelId?: number[];
|
|
326
|
+
/** Polling interval in ms. Default 5000. (async mode only) */
|
|
327
|
+
pollIntervalMs?: number;
|
|
328
|
+
/** Polling timeout in ms. Default 600000. (async mode only) */
|
|
329
|
+
pollTimeoutMs?: number;
|
|
330
|
+
}
|
|
331
|
+
declare class DashScopeTranscriptionModel implements TranscriptionModelV3 {
|
|
332
|
+
readonly specificationVersion: "v3";
|
|
333
|
+
readonly modelId: string;
|
|
334
|
+
private readonly config;
|
|
335
|
+
constructor(modelId: string, config: DashScopeConfig);
|
|
336
|
+
get provider(): string;
|
|
337
|
+
doGenerate(options: TranscriptionModelV3CallOptions): Promise<{
|
|
338
|
+
text: string;
|
|
339
|
+
segments: {
|
|
340
|
+
text: string;
|
|
341
|
+
startSecond: number;
|
|
342
|
+
endSecond: number;
|
|
343
|
+
}[];
|
|
344
|
+
language: undefined;
|
|
345
|
+
durationInSeconds: undefined;
|
|
346
|
+
warnings: SharedV3Warning[];
|
|
347
|
+
response: {
|
|
348
|
+
timestamp: Date;
|
|
349
|
+
modelId: string;
|
|
350
|
+
headers: Record<string, string> | undefined;
|
|
351
|
+
};
|
|
352
|
+
}>;
|
|
353
|
+
private doSync;
|
|
354
|
+
private doAsync;
|
|
355
|
+
}
|
|
356
|
+
//#endregion
|
|
357
|
+
//#region src/video.d.ts
|
|
358
|
+
interface DashScopeVideoOptions {
|
|
359
|
+
/** Negative prompt. */
|
|
360
|
+
negativePrompt?: string;
|
|
361
|
+
/** Enable prompt extension. */
|
|
362
|
+
promptExtend?: boolean;
|
|
363
|
+
/** Add watermark. Default false. */
|
|
364
|
+
watermark?: boolean;
|
|
365
|
+
/** Resolution for I2V: "720P" | "1080P". For T2V: use size "WIDTH*HEIGHT". */
|
|
366
|
+
resolution?: string;
|
|
367
|
+
/** Size in "WIDTH*HEIGHT" format (T2V/R2V). */
|
|
368
|
+
size?: string;
|
|
369
|
+
/** Video duration in seconds. */
|
|
370
|
+
duration?: number;
|
|
371
|
+
/** Polling interval in ms. Default 5000. */
|
|
372
|
+
pollIntervalMs?: number;
|
|
373
|
+
/** Polling timeout in ms. Default 600000. */
|
|
374
|
+
pollTimeoutMs?: number;
|
|
375
|
+
}
|
|
376
|
+
declare class DashScopeVideoModel implements Experimental_VideoModelV3 {
|
|
377
|
+
readonly specificationVersion: "v3";
|
|
378
|
+
readonly modelId: string;
|
|
379
|
+
private readonly config;
|
|
380
|
+
constructor(modelId: string, config: DashScopeConfig);
|
|
381
|
+
get provider(): string;
|
|
382
|
+
get maxVideosPerCall(): number | undefined;
|
|
383
|
+
doGenerate(options: Experimental_VideoModelV3CallOptions): Promise<{
|
|
384
|
+
videos: {
|
|
385
|
+
type: "url";
|
|
386
|
+
url: string;
|
|
387
|
+
mediaType: string;
|
|
388
|
+
}[];
|
|
389
|
+
warnings: SharedV3Warning[];
|
|
390
|
+
response: {
|
|
391
|
+
timestamp: Date;
|
|
392
|
+
modelId: string;
|
|
393
|
+
headers: Record<string, string> | undefined;
|
|
394
|
+
};
|
|
395
|
+
}>;
|
|
396
|
+
}
|
|
397
|
+
//#endregion
|
|
204
398
|
//#region src/provider.d.ts
|
|
205
399
|
declare function createDashScope(options?: DashScopeProviderSettings): DashScopeProvider;
|
|
206
400
|
//#endregion
|
|
207
401
|
//#region src/index.d.ts
|
|
208
402
|
declare const dashscope: DashScopeProvider;
|
|
209
403
|
//#endregion
|
|
210
|
-
export {
|
|
404
|
+
export { DASHSCOPE_REGION_URLS, DashScopeChatOptions, DashScopeEmbeddingModel, DashScopeEmbeddingOptions, DashScopeImageModel, DashScopeImageOptions, DashScopeProvider, DashScopeProviderSettings, DashScopeRegion, DashScopeRerankOptions, DashScopeRerankingModel, DashScopeResponsesNamespace, DashScopeResponsesOptions, DashScopeResponsesTools, DashScopeSpeechModel, DashScopeSpeechOptions, DashScopeTranscriptionModel, DashScopeTranscriptionOptions, DashScopeVideoModel, DashScopeVideoOptions, createDashScope, dashscope, responsesTools };
|