@vectorx/ai-sdk 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/ai.d.ts +1 -1
- package/lib/ai.js +14 -0
- package/lib/model-type.d.ts +26 -0
- package/lib/models/Chat.d.ts +2 -2
- package/lib/models/Default/index.d.ts +1 -0
- package/lib/models/Default/index.js +27 -1
- package/lib/models/Qwen25T2iPreview/index.d.ts +3 -1
- package/lib/models/Qwen25T2iPreview/index.js +49 -0
- package/lib/models/Qwen3VlPlus/index.d.ts +2 -0
- package/lib/models/Qwen3VlPlus/index.js +66 -2
- package/lib/models/QwenCosyVoiceTTS/index.d.ts +8 -0
- package/lib/models/QwenCosyVoiceTTS/index.js +178 -0
- package/lib/models/QwenDocTurbo/adapters/DashScope.d.ts +2 -0
- package/lib/models/QwenDocTurbo/adapters/DashScope.js +86 -11
- package/lib/models/QwenDocTurbo/adapters/OpenAICompat.d.ts +2 -0
- package/lib/models/QwenDocTurbo/adapters/OpenAICompat.js +102 -3
- package/lib/models/QwenImage/index.d.ts +3 -1
- package/lib/models/QwenImage/index.js +52 -1
- package/lib/models/QwenImage20/index.d.ts +91 -0
- package/lib/models/QwenImage20/index.js +244 -0
- package/lib/models/QwenImageEdit/index.d.ts +3 -1
- package/lib/models/QwenImageEdit/index.js +52 -1
- package/lib/models/QwenSketchToImage/index.d.ts +1 -1
- package/lib/models/QwenStyleRepaintV1/index.d.ts +1 -1
- package/lib/models/QwenStyleRepaintV1/index.js +3 -1
- package/lib/models/QwenVlMax/index.d.ts +2 -0
- package/lib/models/QwenVlMax/index.js +67 -2
- package/lib/models/TTSModel.d.ts +11 -0
- package/lib/models/TTSModel.js +75 -0
- package/lib/models/Wan26Image/index.d.ts +3 -1
- package/lib/models/Wan26Image/index.js +52 -1
- package/lib/models/index.d.ts +25 -3
- package/lib/models/index.js +49 -2
- package/lib/models/react.d.ts +6 -2
- package/lib/models/react.js +22 -0
- package/lib/stream.js +10 -7
- package/lib/tokenManager.js +11 -1
- package/package.json +9 -7
package/lib/ai.d.ts
CHANGED
|
@@ -10,7 +10,7 @@ export declare class AI {
|
|
|
10
10
|
protected request: IAbstractRequest;
|
|
11
11
|
tokenManager: TokenManager;
|
|
12
12
|
constructor(request: IAbstractRequest, baseUrl: string, env: AiSdkEnv);
|
|
13
|
-
createModel(model: models.ModelName | models.MultiModalModelName): models.ReActModel;
|
|
13
|
+
createModel(model: models.ModelName | models.MultiModalModelName | models.TTSModelName): models.ReActModel;
|
|
14
14
|
createAgent(options: types.IAgentOptions): Agent;
|
|
15
15
|
registerFunctionTool(functionTool: types.FunctionTool): void;
|
|
16
16
|
}
|
package/lib/ai.js
CHANGED
|
@@ -47,19 +47,33 @@ class AI {
|
|
|
47
47
|
this.tokenManager = new tokenManager_1.TokenManager(this.request, this.baseUrl);
|
|
48
48
|
}
|
|
49
49
|
createModel(model) {
|
|
50
|
+
var _a;
|
|
51
|
+
const _debug = process.env.VECTORX_DEBUG === "1";
|
|
52
|
+
if (_debug)
|
|
53
|
+
console.log("[AI:createModel] model:", model, "env:", this.env, "baseUrl:", (_a = this.baseUrl) === null || _a === void 0 ? void 0 : _a.slice(0, 50));
|
|
50
54
|
if (this.env === index_1.AiSdkEnv.MiniProgram) {
|
|
51
55
|
console.warn(`[${index_1.AiSdkEnv.MiniProgram}] is not supported for model creation`);
|
|
52
56
|
return null;
|
|
53
57
|
}
|
|
54
58
|
if (!models.isValidModel(model)) {
|
|
59
|
+
console.error("[AI:createModel] invalid model:", model);
|
|
55
60
|
throw new Error(`[${model}] is not supported, please check the model name`);
|
|
56
61
|
}
|
|
62
|
+
if (models.isTTSModel(model)) {
|
|
63
|
+
if (_debug)
|
|
64
|
+
console.log("[AI:createModel] creating TTS model:", model);
|
|
65
|
+
return new models.ReActModel(new models.AliyunCosyVoiceTTSModel(this.tokenManager, model));
|
|
66
|
+
}
|
|
57
67
|
switch (model) {
|
|
58
68
|
case models.MultiModalModelName.QwenImage:
|
|
59
69
|
return new models.ReActModel(new models.QwenImageModel(this.request, models.QwenImageModel.BASE_URL, this.tokenManager));
|
|
60
70
|
case models.MultiModalModelName.QwenImageEditPlus:
|
|
61
71
|
case models.MultiModalModelName.QwenImageEdit:
|
|
62
72
|
return new models.ReActModel(new models.QwenImageEditModel(this.request, models.QwenImageEditModel.BASE_URL, this.tokenManager, model));
|
|
73
|
+
case models.MultiModalModelName.QwenImage20Pro:
|
|
74
|
+
return new models.ReActModel(new models.QwenImage20ProModel(this.request, models.QwenImage20ProModel.BASE_URL, this.tokenManager));
|
|
75
|
+
case models.MultiModalModelName.QwenImage20:
|
|
76
|
+
return new models.ReActModel(new models.QwenImage20FastModel(this.request, models.QwenImage20FastModel.BASE_URL, this.tokenManager));
|
|
63
77
|
case models.MultiModalModelName.WanxSketchToImageLite:
|
|
64
78
|
return new models.ReActModel(new models.WanxSketchToImageLiteModel(this.request, models.WanxSketchToImageLiteModel.BASE_URL, this.tokenManager));
|
|
65
79
|
case models.MultiModalModelName.QwenStyleRepaintV1:
|
package/lib/model-type.d.ts
CHANGED
|
@@ -187,6 +187,32 @@ interface Usage {
|
|
|
187
187
|
reasoning_tokens: number;
|
|
188
188
|
total_tokens: number;
|
|
189
189
|
}
|
|
190
|
+
export interface TTSRequestOptions {
|
|
191
|
+
text: string;
|
|
192
|
+
voice: string;
|
|
193
|
+
format?: "mp3" | "wav" | "pcm" | "opus";
|
|
194
|
+
sampleRate?: 8000 | 16000 | 22050 | 24000 | 44100 | 48000;
|
|
195
|
+
volume?: number;
|
|
196
|
+
rate?: number;
|
|
197
|
+
pitch?: number;
|
|
198
|
+
instruction?: string;
|
|
199
|
+
languageHints?: string[];
|
|
200
|
+
}
|
|
201
|
+
export interface TTSStreamChunk {
|
|
202
|
+
audio: Buffer;
|
|
203
|
+
event: "sentence-begin" | "sentence-synthesis" | "sentence-end";
|
|
204
|
+
sentence?: {
|
|
205
|
+
index: number;
|
|
206
|
+
originalText?: string;
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
export type DoStreamSpeechOutput = AsyncIterableReadableStream<TTSStreamChunk>;
|
|
210
|
+
export interface DoGenerateSpeechOutput {
|
|
211
|
+
audio: Buffer;
|
|
212
|
+
usage: {
|
|
213
|
+
characters: number;
|
|
214
|
+
};
|
|
215
|
+
}
|
|
190
216
|
export interface IAgent {
|
|
191
217
|
sendMessage(input: SendMessageInput): Promise<any>;
|
|
192
218
|
getHistoryMessages?(input: GetHistoryMessagesParams): Promise<GetHistoryMessagesResponse>;
|
package/lib/models/Chat.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { IAbstractRequest } from "@vectorx/ai-types";
|
|
2
|
-
import { DoGenerateOutput, DoStreamOutput, ModelRequestOptions, ReqOptions } from "../model-type";
|
|
3
|
-
import { TokenManager } from "../tokenManager";
|
|
2
|
+
import type { DoGenerateOutput, DoStreamOutput, ModelRequestOptions, ReqOptions } from "../model-type";
|
|
3
|
+
import type { TokenManager } from "../tokenManager";
|
|
4
4
|
export declare abstract class SimpleChatModel {
|
|
5
5
|
protected req: IAbstractRequest;
|
|
6
6
|
protected baseUrl: string;
|
|
@@ -5,6 +5,7 @@ import type { ModelName } from "../index";
|
|
|
5
5
|
export declare class DefaultSimpleModel extends SimpleChatModel {
|
|
6
6
|
modelName: ModelName;
|
|
7
7
|
constructor(req: IAbstractRequest, baseUrl: string, modelName: ModelName);
|
|
8
|
+
private adaptMessages;
|
|
8
9
|
protected modelRequest(data: ModelRequestOptions, options?: ReqOptions): Promise<ReadableStream<Uint8Array> | Promise<unknown>>;
|
|
9
10
|
doGenerate(data: ModelRequestOptions, options?: ReqOptions): Promise<DoGenerateOutput>;
|
|
10
11
|
doStream(data: ModelRequestOptions, options?: ReqOptions): Promise<DoStreamOutput>;
|
|
@@ -30,24 +30,45 @@ class DefaultSimpleModel extends Chat_1.SimpleChatModel {
|
|
|
30
30
|
super(req, baseUrl, "conversation/chat");
|
|
31
31
|
this.modelName = modelName;
|
|
32
32
|
}
|
|
33
|
+
adaptMessages(data) {
|
|
34
|
+
if (!data.messages)
|
|
35
|
+
return data;
|
|
36
|
+
return Object.assign(Object.assign({}, data), { messages: data.messages.map((msg) => {
|
|
37
|
+
if (typeof msg.content === "string" || !Array.isArray(msg.content))
|
|
38
|
+
return msg;
|
|
39
|
+
return Object.assign(Object.assign({}, msg), { content: msg.content.map((item) => {
|
|
40
|
+
if (item.type !== "text")
|
|
41
|
+
return item;
|
|
42
|
+
if ("content" in item)
|
|
43
|
+
return item;
|
|
44
|
+
if ("text" in item)
|
|
45
|
+
return { type: "text", content: item.text };
|
|
46
|
+
return item;
|
|
47
|
+
}) });
|
|
48
|
+
}) });
|
|
49
|
+
}
|
|
33
50
|
modelRequest(data_1) {
|
|
34
51
|
return __awaiter(this, arguments, void 0, function* (data, options = {
|
|
35
52
|
timeout: 30 * 1000,
|
|
36
53
|
}) {
|
|
37
54
|
const fetchHeaders = Object.assign({ "Content-Type": "application/json" }, ((options === null || options === void 0 ? void 0 : options.headers) || {}));
|
|
38
55
|
data.stream && Object.assign(fetchHeaders, { Accept: "text/event-stream" });
|
|
56
|
+
const reqUrl = `${this.baseUrl}/${this.subUrl}`;
|
|
57
|
+
console.log("[DefaultSimpleModel:modelRequest] fetching:", reqUrl, JSON.stringify(data, null, 2));
|
|
39
58
|
const { data: responseData, header } = (yield this.req.fetch({
|
|
40
|
-
url:
|
|
59
|
+
url: reqUrl,
|
|
41
60
|
headers: Object.assign({}, fetchHeaders),
|
|
42
61
|
body: JSON.stringify(data),
|
|
43
62
|
method: "post",
|
|
44
63
|
stream: Boolean(data.stream),
|
|
45
64
|
}));
|
|
65
|
+
console.log("[DefaultSimpleModel:modelRequest] response received, type:", typeof responseData, "hasHeader:", !!header);
|
|
46
66
|
return (0, utils_1.handleResponseData)(responseData, header);
|
|
47
67
|
});
|
|
48
68
|
}
|
|
49
69
|
doGenerate(data, options) {
|
|
50
70
|
return __awaiter(this, void 0, void 0, function* () {
|
|
71
|
+
data = this.adaptMessages(data);
|
|
51
72
|
data.model = this.modelName;
|
|
52
73
|
const payload = Object.assign(Object.assign(Object.assign({}, defaultOptions), data), { stream: false });
|
|
53
74
|
const res = yield this.modelRequest(payload, options);
|
|
@@ -56,9 +77,14 @@ class DefaultSimpleModel extends Chat_1.SimpleChatModel {
|
|
|
56
77
|
}
|
|
57
78
|
doStream(data, options) {
|
|
58
79
|
return __awaiter(this, void 0, void 0, function* () {
|
|
80
|
+
var _a;
|
|
81
|
+
data = this.adaptMessages(data);
|
|
59
82
|
data.model = this.modelName;
|
|
60
83
|
const payload = Object.assign(Object.assign(Object.assign({}, defaultOptions), data), { stream: true });
|
|
84
|
+
console.log("[DefaultSimpleModel:doStream] url:", `${this.baseUrl}/${this.subUrl}`, "model:", this.modelName);
|
|
85
|
+
console.log("[DefaultSimpleModel:doStream] messages count:", (_a = payload.messages) === null || _a === void 0 ? void 0 : _a.length);
|
|
61
86
|
const _stream = (yield this.modelRequest(payload, options));
|
|
87
|
+
console.log("[DefaultSimpleModel:doStream] got stream:", !!_stream, "type:", typeof _stream);
|
|
62
88
|
const stream = (0, stream_1.toPolyfillReadable)(_stream);
|
|
63
89
|
const standardStream = (0, stream_1.intoStandardStream)(stream);
|
|
64
90
|
return (0, stream_1.createAsyncIterable)(standardStream);
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { IAbstractRequest } from "@vectorx/ai-types";
|
|
2
2
|
import type { DoGenerateOutput, DoStreamOutput, ModelRequestOptions, ReqOptions } from "../../model-type";
|
|
3
|
-
import { TokenManager } from "../../tokenManager";
|
|
3
|
+
import type { TokenManager } from "../../tokenManager";
|
|
4
4
|
import { SimpleChatModel } from "../Chat";
|
|
5
5
|
export interface Qwen25T2iPreviewParameters {
|
|
6
6
|
size?: string;
|
|
@@ -71,6 +71,8 @@ export declare class Qwen25T2iPreviewModel extends SimpleChatModel {
|
|
|
71
71
|
parameters?: Qwen25T2iPreviewParameters;
|
|
72
72
|
}): Qwen25T2iPreviewRequestOptions;
|
|
73
73
|
protected modelRequest(data: Qwen25T2iPreviewRequestOptions, options?: ReqOptions): Promise<ReadableStream<Uint8Array> | Promise<unknown>>;
|
|
74
|
+
private isErrorResponse;
|
|
75
|
+
private normalizeErrorResponse;
|
|
74
76
|
doGenerate(data: ModelRequestOptions, options?: ReqOptions): Promise<DoGenerateOutput>;
|
|
75
77
|
doStream(data: ModelRequestOptions, options?: ReqOptions): Promise<DoStreamOutput>;
|
|
76
78
|
}
|
|
@@ -149,11 +149,60 @@ class Qwen25T2iPreviewModel extends Chat_1.SimpleChatModel {
|
|
|
149
149
|
return (0, utils_1.handleResponseData)(responseData, header);
|
|
150
150
|
});
|
|
151
151
|
}
|
|
152
|
+
isErrorResponse(obj) {
|
|
153
|
+
if ((obj === null || obj === void 0 ? void 0 : obj.error) && typeof obj.error === "object" && obj.error.code)
|
|
154
|
+
return true;
|
|
155
|
+
if (typeof (obj === null || obj === void 0 ? void 0 : obj.code) === "string" && !(obj === null || obj === void 0 ? void 0 : obj.output))
|
|
156
|
+
return true;
|
|
157
|
+
if (typeof (obj === null || obj === void 0 ? void 0 : obj.code) === "number" && obj.code !== 0 && !(obj === null || obj === void 0 ? void 0 : obj.output))
|
|
158
|
+
return true;
|
|
159
|
+
return false;
|
|
160
|
+
}
|
|
161
|
+
normalizeErrorResponse(res) {
|
|
162
|
+
var _a, _b;
|
|
163
|
+
const err = typeof (res === null || res === void 0 ? void 0 : res.error) === "object" ? res.error : {};
|
|
164
|
+
const errorCode = (err === null || err === void 0 ? void 0 : err.code) || (res === null || res === void 0 ? void 0 : res.code) || "unknown_error";
|
|
165
|
+
const errorMessage = (err === null || err === void 0 ? void 0 : err.message) || (res === null || res === void 0 ? void 0 : res.message) || "Unknown error";
|
|
166
|
+
const id = (res === null || res === void 0 ? void 0 : res.id) || (res === null || res === void 0 ? void 0 : res.request_id) || "";
|
|
167
|
+
const created = (_a = res === null || res === void 0 ? void 0 : res.created) !== null && _a !== void 0 ? _a : Math.floor(Date.now() / 1000);
|
|
168
|
+
return {
|
|
169
|
+
id,
|
|
170
|
+
object: "chat.completion",
|
|
171
|
+
created,
|
|
172
|
+
model: (_b = res === null || res === void 0 ? void 0 : res.model) !== null && _b !== void 0 ? _b : this.modelName,
|
|
173
|
+
log_id: id,
|
|
174
|
+
error: `[${errorCode}] ${errorMessage}`,
|
|
175
|
+
code: typeof (res === null || res === void 0 ? void 0 : res.code) === "number" ? res.code : -1,
|
|
176
|
+
choices: [
|
|
177
|
+
{
|
|
178
|
+
index: 0,
|
|
179
|
+
message: {
|
|
180
|
+
id,
|
|
181
|
+
role: "assistant",
|
|
182
|
+
type: "error",
|
|
183
|
+
content: `[${errorCode}] ${errorMessage}`,
|
|
184
|
+
reasoning_content: "",
|
|
185
|
+
},
|
|
186
|
+
finish_reason: "error",
|
|
187
|
+
},
|
|
188
|
+
],
|
|
189
|
+
usage: {
|
|
190
|
+
prompt_tokens: 0,
|
|
191
|
+
completion_tokens: 0,
|
|
192
|
+
knowledge_tokens: 0,
|
|
193
|
+
reasoning_tokens: 0,
|
|
194
|
+
total_tokens: 0,
|
|
195
|
+
},
|
|
196
|
+
};
|
|
197
|
+
}
|
|
152
198
|
doGenerate(data, options) {
|
|
153
199
|
return __awaiter(this, void 0, void 0, function* () {
|
|
154
200
|
data.model = this.modelName;
|
|
155
201
|
const payload = this.coverModelRequestToQwenInput(data);
|
|
156
202
|
const res = (yield this.modelRequest(payload, options));
|
|
203
|
+
if (this.isErrorResponse(res)) {
|
|
204
|
+
return this.normalizeErrorResponse(res);
|
|
205
|
+
}
|
|
157
206
|
return this.normalizeStandardImageCompletion(res, this.modelName);
|
|
158
207
|
});
|
|
159
208
|
}
|
|
@@ -72,6 +72,8 @@ export declare class Qwen3VlPlus extends SimpleChatModel {
|
|
|
72
72
|
constructor(req: IAbstractRequest, baseUrl: string, modelName: MultiModalModelName, tokenManager: TokenManager);
|
|
73
73
|
protected modelRequest(data: Qwen3VlPlusAPIInput, options?: ReqOptions): Promise<ReadableStream<Uint8Array> | Promise<unknown>>;
|
|
74
74
|
protected normalizeResponse(response: Qwen3VlPlusResponse): DoGenerateOutput;
|
|
75
|
+
private normalizeErrorResponse;
|
|
76
|
+
private isErrorResponse;
|
|
75
77
|
doGenerate(data: ModelRequestOptions, options?: ReqOptions): Promise<DoGenerateOutput>;
|
|
76
78
|
doStream(data: ModelRequestOptions, options?: ReqOptions): Promise<DoStreamOutput>;
|
|
77
79
|
private convertToQwen3VlPlusRequestOptions;
|
|
@@ -77,11 +77,61 @@ class Qwen3VlPlus extends Chat_1.SimpleChatModel {
|
|
|
77
77
|
},
|
|
78
78
|
};
|
|
79
79
|
}
|
|
80
|
+
normalizeErrorResponse(res) {
|
|
81
|
+
var _a, _b;
|
|
82
|
+
const err = (res === null || res === void 0 ? void 0 : res.error) || {};
|
|
83
|
+
const errorCode = (err === null || err === void 0 ? void 0 : err.code) || (res === null || res === void 0 ? void 0 : res.code) || "unknown_error";
|
|
84
|
+
const errorMessage = (err === null || err === void 0 ? void 0 : err.message) || (res === null || res === void 0 ? void 0 : res.message) || "Unknown error";
|
|
85
|
+
const id = (res === null || res === void 0 ? void 0 : res.id) || (res === null || res === void 0 ? void 0 : res.request_id) || "";
|
|
86
|
+
const created = (_a = res === null || res === void 0 ? void 0 : res.created) !== null && _a !== void 0 ? _a : Math.floor(Date.now() / 1000);
|
|
87
|
+
return {
|
|
88
|
+
id,
|
|
89
|
+
object: "chat.completion",
|
|
90
|
+
created,
|
|
91
|
+
model: (_b = res === null || res === void 0 ? void 0 : res.model) !== null && _b !== void 0 ? _b : this.modelName,
|
|
92
|
+
log_id: id,
|
|
93
|
+
error: `[${errorCode}] ${errorMessage}`,
|
|
94
|
+
code: typeof (res === null || res === void 0 ? void 0 : res.code) === "number" ? res.code : -1,
|
|
95
|
+
choices: [
|
|
96
|
+
{
|
|
97
|
+
index: 0,
|
|
98
|
+
message: {
|
|
99
|
+
id,
|
|
100
|
+
role: "assistant",
|
|
101
|
+
type: "error",
|
|
102
|
+
content: `[${errorCode}] ${errorMessage}`,
|
|
103
|
+
reasoning_content: "",
|
|
104
|
+
},
|
|
105
|
+
finish_reason: "error",
|
|
106
|
+
},
|
|
107
|
+
],
|
|
108
|
+
usage: {
|
|
109
|
+
prompt_tokens: 0,
|
|
110
|
+
completion_tokens: 0,
|
|
111
|
+
knowledge_tokens: 0,
|
|
112
|
+
reasoning_tokens: 0,
|
|
113
|
+
total_tokens: 0,
|
|
114
|
+
},
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
isErrorResponse(obj) {
|
|
118
|
+
if ((obj === null || obj === void 0 ? void 0 : obj.error) && typeof obj.error === "object" && obj.error.code)
|
|
119
|
+
return true;
|
|
120
|
+
if ((obj === null || obj === void 0 ? void 0 : obj.error) && typeof obj.error === "string" && obj.error.length > 0 && !obj.choices)
|
|
121
|
+
return true;
|
|
122
|
+
if (typeof (obj === null || obj === void 0 ? void 0 : obj.code) === "number" && obj.code !== 0 && !obj.choices)
|
|
123
|
+
return true;
|
|
124
|
+
return false;
|
|
125
|
+
}
|
|
80
126
|
doGenerate(data, options) {
|
|
81
127
|
return __awaiter(this, void 0, void 0, function* () {
|
|
82
128
|
const qwen3VlPlusData = this.convertToQwen3VlPlusRequestOptions(data);
|
|
83
129
|
const requestData = Object.assign(Object.assign({}, qwen3VlPlusData), { stream: false });
|
|
84
130
|
const res = yield this.modelRequest(requestData, options);
|
|
131
|
+
const resObj = res;
|
|
132
|
+
if (this.isErrorResponse(resObj)) {
|
|
133
|
+
return this.normalizeErrorResponse(resObj);
|
|
134
|
+
}
|
|
85
135
|
return this.normalizeResponse(res);
|
|
86
136
|
});
|
|
87
137
|
}
|
|
@@ -92,7 +142,19 @@ class Qwen3VlPlus extends Chat_1.SimpleChatModel {
|
|
|
92
142
|
const _stream = (yield this.modelRequest(requestData, options));
|
|
93
143
|
const stream = (0, stream_1.toPolyfillReadable)(_stream);
|
|
94
144
|
const standardStream = (0, stream_1.intoStandardStream)(stream);
|
|
95
|
-
|
|
145
|
+
const self = this;
|
|
146
|
+
const errorNormalizingStream = standardStream.pipeThrough(new stream_1.TransformStream({
|
|
147
|
+
transform(chunk, controller) {
|
|
148
|
+
const raw = chunk;
|
|
149
|
+
if (self.isErrorResponse(raw)) {
|
|
150
|
+
controller.enqueue(self.normalizeErrorResponse(raw));
|
|
151
|
+
}
|
|
152
|
+
else {
|
|
153
|
+
controller.enqueue(chunk);
|
|
154
|
+
}
|
|
155
|
+
},
|
|
156
|
+
}));
|
|
157
|
+
return (0, stream_1.createAsyncIterable)(errorNormalizingStream);
|
|
96
158
|
});
|
|
97
159
|
}
|
|
98
160
|
convertToQwen3VlPlusRequestOptions(data) {
|
|
@@ -100,7 +162,9 @@ class Qwen3VlPlus extends Chat_1.SimpleChatModel {
|
|
|
100
162
|
const clamp = (value, min, max, defaultValue) => value !== undefined ? Math.max(min, Math.min(max, value)) : defaultValue;
|
|
101
163
|
const messages = (data.messages || []).map((msg) => ({
|
|
102
164
|
role: msg.role,
|
|
103
|
-
content: Array.isArray(msg.content)
|
|
165
|
+
content: Array.isArray(msg.content)
|
|
166
|
+
? (0, model_type_1.filterContentByTypes)(msg.content, ["text", "image_url"])
|
|
167
|
+
: msg.content,
|
|
104
168
|
}));
|
|
105
169
|
return {
|
|
106
170
|
model: this.modelName,
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { DoStreamSpeechOutput, TTSRequestOptions } from "../../model-type";
|
|
2
|
+
import type { TokenManager } from "../../tokenManager";
|
|
3
|
+
import { SimpleTTSModel } from "../TTSModel";
|
|
4
|
+
import type { TTSModelName } from "../index";
|
|
5
|
+
export declare class AliyunCosyVoiceTTSModel extends SimpleTTSModel {
|
|
6
|
+
constructor(tokenManager: TokenManager, modelName: TTSModelName);
|
|
7
|
+
doStreamSpeech(options: TTSRequestOptions): Promise<DoStreamSpeechOutput>;
|
|
8
|
+
}
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
exports.AliyunCosyVoiceTTSModel = void 0;
|
|
16
|
+
const node_crypto_1 = require("node:crypto");
|
|
17
|
+
const ws_1 = __importDefault(require("ws"));
|
|
18
|
+
const TTSModel_1 = require("../TTSModel");
|
|
19
|
+
const DASHSCOPE_WSS_URL = "wss://dashscope.aliyuncs.com/api-ws/v1/inference/";
|
|
20
|
+
class AliyunCosyVoiceTTSModel extends TTSModel_1.SimpleTTSModel {
|
|
21
|
+
constructor(tokenManager, modelName) {
|
|
22
|
+
super(tokenManager, modelName);
|
|
23
|
+
}
|
|
24
|
+
doStreamSpeech(options) {
|
|
25
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
26
|
+
console.log("[AliyunCosyVoiceTTS:doStreamSpeech] start, model:", this.modelName, "voice:", options.voice);
|
|
27
|
+
const token = yield this.getValidToken();
|
|
28
|
+
console.log("[AliyunCosyVoiceTTS:doStreamSpeech] token obtained:", token ? `${token.slice(0, 8)}...` : "(empty)");
|
|
29
|
+
const taskId = (0, node_crypto_1.randomUUID)().replace(/-/g, "");
|
|
30
|
+
console.log("[AliyunCosyVoiceTTS:doStreamSpeech] taskId:", taskId);
|
|
31
|
+
const modelName = this.modelName;
|
|
32
|
+
return new Promise((resolve, reject) => {
|
|
33
|
+
console.log("[AliyunCosyVoiceTTS:doStreamSpeech] connecting to WSS:", DASHSCOPE_WSS_URL);
|
|
34
|
+
const ws = new ws_1.default(DASHSCOPE_WSS_URL, {
|
|
35
|
+
headers: { Authorization: `Bearer ${token}` },
|
|
36
|
+
});
|
|
37
|
+
let taskStarted = false;
|
|
38
|
+
let totalCharacters = 0;
|
|
39
|
+
let currentSentenceIndex = 0;
|
|
40
|
+
let currentOriginalText;
|
|
41
|
+
let pendingEvent = null;
|
|
42
|
+
const controller = new ReadableStream({
|
|
43
|
+
start(streamController) {
|
|
44
|
+
ws.on("open", () => {
|
|
45
|
+
console.log("[AliyunCosyVoiceTTS] WebSocket opened, sending run-task...");
|
|
46
|
+
ws.send(JSON.stringify({
|
|
47
|
+
header: {
|
|
48
|
+
action: "run-task",
|
|
49
|
+
task_id: taskId,
|
|
50
|
+
streaming: "duplex",
|
|
51
|
+
},
|
|
52
|
+
payload: {
|
|
53
|
+
task_group: "audio",
|
|
54
|
+
task: "tts",
|
|
55
|
+
function: "SpeechSynthesizer",
|
|
56
|
+
model: modelName,
|
|
57
|
+
parameters: Object.assign(Object.assign(Object.assign(Object.assign(Object.assign(Object.assign(Object.assign({ text_type: "PlainText", voice: options.voice }, (options.format && { format: options.format })), (options.sampleRate && { sample_rate: options.sampleRate })), (options.volume != null && { volume: options.volume })), (options.rate != null && { rate: options.rate })), (options.pitch != null && { pitch: options.pitch })), (options.instruction && { instruction: options.instruction })), (options.languageHints && { language_hints: options.languageHints })),
|
|
58
|
+
input: {},
|
|
59
|
+
},
|
|
60
|
+
}));
|
|
61
|
+
});
|
|
62
|
+
ws.on("message", (data, isBinary) => {
|
|
63
|
+
var _a, _b, _c, _d, _e, _f;
|
|
64
|
+
console.log("[AliyunCosyVoiceTTS] ws.message received, isBinary:", isBinary, "size:", typeof data === "string" ? data.length : data.length);
|
|
65
|
+
if (isBinary) {
|
|
66
|
+
const audioBuffer = Buffer.isBuffer(data) ? data : Buffer.from(data);
|
|
67
|
+
streamController.enqueue({
|
|
68
|
+
audio: audioBuffer,
|
|
69
|
+
event: pendingEvent !== null && pendingEvent !== void 0 ? pendingEvent : "sentence-synthesis",
|
|
70
|
+
sentence: Object.assign({ index: currentSentenceIndex }, (currentOriginalText && { originalText: currentOriginalText })),
|
|
71
|
+
});
|
|
72
|
+
pendingEvent = null;
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
const msgStr = typeof data === "string" ? data : data.toString();
|
|
76
|
+
let event;
|
|
77
|
+
try {
|
|
78
|
+
event = JSON.parse(msgStr);
|
|
79
|
+
}
|
|
80
|
+
catch (_g) {
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
const { header, payload } = event;
|
|
84
|
+
console.log("[AliyunCosyVoiceTTS] server event:", header.event, header.error_code ? `error: ${header.error_code}` : "");
|
|
85
|
+
switch (header.event) {
|
|
86
|
+
case "task-started":
|
|
87
|
+
console.log("[AliyunCosyVoiceTTS] task-started, will send text and finish...");
|
|
88
|
+
taskStarted = true;
|
|
89
|
+
sendTextAndFinish();
|
|
90
|
+
break;
|
|
91
|
+
case "result-generated": {
|
|
92
|
+
const outputType = (_a = payload === null || payload === void 0 ? void 0 : payload.output) === null || _a === void 0 ? void 0 : _a.type;
|
|
93
|
+
if (!outputType)
|
|
94
|
+
break;
|
|
95
|
+
if ((_b = payload === null || payload === void 0 ? void 0 : payload.output) === null || _b === void 0 ? void 0 : _b.sentence) {
|
|
96
|
+
currentSentenceIndex = payload.output.sentence.index;
|
|
97
|
+
}
|
|
98
|
+
currentOriginalText = (_c = payload === null || payload === void 0 ? void 0 : payload.output) === null || _c === void 0 ? void 0 : _c.original_text;
|
|
99
|
+
if (outputType === "sentence-synthesis") {
|
|
100
|
+
pendingEvent = "sentence-synthesis";
|
|
101
|
+
}
|
|
102
|
+
else {
|
|
103
|
+
streamController.enqueue({
|
|
104
|
+
audio: Buffer.alloc(0),
|
|
105
|
+
event: outputType,
|
|
106
|
+
sentence: Object.assign({ index: currentSentenceIndex }, (currentOriginalText && { originalText: currentOriginalText })),
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
if ((_d = payload === null || payload === void 0 ? void 0 : payload.usage) === null || _d === void 0 ? void 0 : _d.characters) {
|
|
110
|
+
totalCharacters = payload.usage.characters;
|
|
111
|
+
}
|
|
112
|
+
break;
|
|
113
|
+
}
|
|
114
|
+
case "task-finished":
|
|
115
|
+
console.log("[AliyunCosyVoiceTTS] task-finished, characters:", (_e = payload === null || payload === void 0 ? void 0 : payload.usage) === null || _e === void 0 ? void 0 : _e.characters);
|
|
116
|
+
if ((_f = payload === null || payload === void 0 ? void 0 : payload.usage) === null || _f === void 0 ? void 0 : _f.characters) {
|
|
117
|
+
totalCharacters = payload.usage.characters;
|
|
118
|
+
}
|
|
119
|
+
streamController.close();
|
|
120
|
+
ws.close();
|
|
121
|
+
break;
|
|
122
|
+
case "task-failed":
|
|
123
|
+
console.error("[AliyunCosyVoiceTTS] task-failed:", header.error_code, header.error_message);
|
|
124
|
+
streamController.error(new Error(`TTS task failed: [${header.error_code}] ${header.error_message}`));
|
|
125
|
+
ws.close();
|
|
126
|
+
break;
|
|
127
|
+
}
|
|
128
|
+
});
|
|
129
|
+
ws.on("error", (err) => {
|
|
130
|
+
console.error("[AliyunCosyVoiceTTS] ws.error:", err.message);
|
|
131
|
+
try {
|
|
132
|
+
streamController.error(err);
|
|
133
|
+
}
|
|
134
|
+
catch (_a) {
|
|
135
|
+
}
|
|
136
|
+
});
|
|
137
|
+
ws.on("close", (code, reason) => {
|
|
138
|
+
console.log("[AliyunCosyVoiceTTS] ws.close, code:", code, "reason:", reason === null || reason === void 0 ? void 0 : reason.toString());
|
|
139
|
+
try {
|
|
140
|
+
streamController.close();
|
|
141
|
+
}
|
|
142
|
+
catch (_a) {
|
|
143
|
+
}
|
|
144
|
+
});
|
|
145
|
+
function sendTextAndFinish() {
|
|
146
|
+
if (!taskStarted)
|
|
147
|
+
return;
|
|
148
|
+
console.log("[AliyunCosyVoiceTTS] sendTextAndFinish, text length:", options.text.length);
|
|
149
|
+
ws.send(JSON.stringify({
|
|
150
|
+
header: {
|
|
151
|
+
action: "continue-task",
|
|
152
|
+
task_id: taskId,
|
|
153
|
+
streaming: "duplex",
|
|
154
|
+
},
|
|
155
|
+
payload: {
|
|
156
|
+
input: { text: options.text },
|
|
157
|
+
},
|
|
158
|
+
}));
|
|
159
|
+
ws.send(JSON.stringify({
|
|
160
|
+
header: {
|
|
161
|
+
action: "finish-task",
|
|
162
|
+
task_id: taskId,
|
|
163
|
+
streaming: "duplex",
|
|
164
|
+
},
|
|
165
|
+
payload: {
|
|
166
|
+
input: {},
|
|
167
|
+
},
|
|
168
|
+
}));
|
|
169
|
+
}
|
|
170
|
+
},
|
|
171
|
+
});
|
|
172
|
+
const asyncIterableStream = this.createAsyncIterable(controller);
|
|
173
|
+
resolve(asyncIterableStream);
|
|
174
|
+
});
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
exports.AliyunCosyVoiceTTSModel = AliyunCosyVoiceTTSModel;
|
|
@@ -21,5 +21,7 @@ export declare class DashScopeApi implements QwenDocTurboApi {
|
|
|
21
21
|
buildPayload(data: ModelRequestOptions, stream: boolean): QwenDocTurboAPIInput;
|
|
22
22
|
request(payload: QwenDocTurboAPIInput, options?: ReqOptions): Promise<ReadableStream<Uint8Array> | unknown>;
|
|
23
23
|
normalizeResponse(response: QwenDocTurboResponse): DoGenerateOutput;
|
|
24
|
+
private isErrorResponse;
|
|
25
|
+
private normalizeErrorResponse;
|
|
24
26
|
normalizeStream(_stream: ReadableStream<Uint8Array>): ReadableStream<BaseDoStreamOutputChunk>;
|
|
25
27
|
}
|