cerevox 3.0.0-beta.3 → 3.0.0-beta.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/ai.d.ts +25 -17
- package/dist/core/ai.d.ts.map +1 -1
- package/dist/core/ai.js +372 -151
- package/dist/core/ai.js.map +1 -1
- package/dist/mcp/servers/helper/doubao_voices_full.js +1 -1
- package/dist/mcp/servers/prompts/image-prompt-optimizer bak.md +68 -0
- package/dist/mcp/servers/prompts/image-prompt-optimizer.md +14 -34
- package/dist/mcp/servers/prompts/rules/creative-ad.md +2 -2
- package/dist/mcp/servers/prompts/rules/expert.md +1 -1
- package/dist/mcp/servers/prompts/rules/freeform.md +2 -2
- package/dist/mcp/servers/prompts/rules/general-video.md +10 -5
- package/dist/mcp/servers/prompts/rules/story-telling.md +3 -3
- package/dist/mcp/servers/prompts/skills/workflows/general-video.md +2 -2
- package/dist/mcp/servers/prompts/skills/workflows/music-video.md +1 -1
- package/dist/mcp/servers/prompts/zerocut-core.md +0 -5
- package/dist/mcp/servers/zerocut.d.ts.map +1 -1
- package/dist/mcp/servers/zerocut.js +490 -587
- package/dist/mcp/servers/zerocut.js.map +1 -1
- package/dist/utils/coze.d.ts.map +1 -1
- package/dist/utils/coze.js +4 -1
- package/dist/utils/coze.js.map +1 -1
- package/package.json +1 -1
|
@@ -50,7 +50,6 @@ const constants_1 = require("../../utils/constants");
|
|
|
50
50
|
const videokit_1 = require("../../utils/videokit");
|
|
51
51
|
const promises_1 = require("node:fs/promises");
|
|
52
52
|
const node_path_1 = __importStar(require("node:path"));
|
|
53
|
-
const doubao_voices_full_1 = require("./helper/doubao_voices_full");
|
|
54
53
|
const node_fs_1 = require("node:fs");
|
|
55
54
|
const coze_1 = require("../../utils/coze");
|
|
56
55
|
const mp3_duration_1 = __importDefault(require("mp3-duration"));
|
|
@@ -94,10 +93,13 @@ function validateFileName(fileName) {
|
|
|
94
93
|
if (!fileName || fileName.trim() === '') {
|
|
95
94
|
throw new Error('File name cannot be empty');
|
|
96
95
|
}
|
|
96
|
+
if (fileName.startsWith('materials/')) {
|
|
97
|
+
fileName = fileName.replace('materials/', '');
|
|
98
|
+
}
|
|
97
99
|
if (fileName.includes('..') ||
|
|
98
100
|
fileName.includes('/') ||
|
|
99
101
|
fileName.includes('\\')) {
|
|
100
|
-
throw new Error('Invalid file name: contains illegal characters');
|
|
102
|
+
throw new Error('Invalid file name: contains illegal characters, cannot contain ".." or "/" or "\\"');
|
|
101
103
|
}
|
|
102
104
|
return fileName.trim();
|
|
103
105
|
}
|
|
@@ -130,6 +132,7 @@ async function initProject(session) {
|
|
|
130
132
|
return workDir;
|
|
131
133
|
}
|
|
132
134
|
async function saveMaterial(session, url, saveToFileName) {
|
|
135
|
+
saveToFileName = validateFileName(saveToFileName);
|
|
133
136
|
const terminal = session.terminal;
|
|
134
137
|
const saveToPath = `/home/user/cerevox-zerocut/projects/${terminal.id}/materials/${saveToFileName}`;
|
|
135
138
|
const saveLocalPath = (0, node_path_1.resolve)(projectLocalDir, 'materials', saveToFileName);
|
|
@@ -514,7 +517,7 @@ server.registerTool('project-close', {
|
|
|
514
517
|
.min(0)
|
|
515
518
|
.max(20)
|
|
516
519
|
.default(5)
|
|
517
|
-
.describe('Close the session after the specified number of minutes. Default is 5 minutes.
|
|
520
|
+
.describe('Close the session after the specified number of minutes. Default is 5 minutes. 除非用户要求立即关闭会话,将该参数设置为0,否则应默认设为5'),
|
|
518
521
|
},
|
|
519
522
|
}, async ({ inMinutes }) => {
|
|
520
523
|
try {
|
|
@@ -663,75 +666,12 @@ server.registerTool('retrieve-rules-context', {
|
|
|
663
666
|
return createErrorResponse(`Failed to load rules context prompt for ${prompt}: ${error}`, 'retrieve-rules-context');
|
|
664
667
|
}
|
|
665
668
|
});
|
|
666
|
-
// 列出项目下的所有文件
|
|
667
|
-
server.registerTool('list-project-files', {
|
|
668
|
-
title: 'List Project Files',
|
|
669
|
-
description: 'List all files in the materials directory.',
|
|
670
|
-
inputSchema: {},
|
|
671
|
-
}, async () => {
|
|
672
|
-
try {
|
|
673
|
-
// 验证session状态
|
|
674
|
-
const currentSession = await validateSession('list-project-files');
|
|
675
|
-
console.log('Listing project files...');
|
|
676
|
-
const terminal = currentSession.terminal;
|
|
677
|
-
if (!terminal) {
|
|
678
|
-
throw new Error('Terminal not available in current session');
|
|
679
|
-
}
|
|
680
|
-
let cwd;
|
|
681
|
-
try {
|
|
682
|
-
cwd = await terminal.getCwd();
|
|
683
|
-
}
|
|
684
|
-
catch (cwdError) {
|
|
685
|
-
console.error('Failed to get current working directory:', cwdError);
|
|
686
|
-
throw new Error('Failed to get current working directory');
|
|
687
|
-
}
|
|
688
|
-
console.log(`Current working directory: ${cwd}`);
|
|
689
|
-
// 安全地列出各目录文件,失败时返回空数组
|
|
690
|
-
const listFilesWithFallback = async (path, dirName) => {
|
|
691
|
-
try {
|
|
692
|
-
const files = await currentSession.files.listFiles(path);
|
|
693
|
-
console.log(`Found ${files?.length || 0} files in ${dirName}`);
|
|
694
|
-
return files || [];
|
|
695
|
-
}
|
|
696
|
-
catch (error) {
|
|
697
|
-
console.warn(`Failed to list files in ${dirName} (${path}):`, error);
|
|
698
|
-
return [];
|
|
699
|
-
}
|
|
700
|
-
};
|
|
701
|
-
const [rootFiles, materialsFiles, outputFiles] = await Promise.all([
|
|
702
|
-
listFilesWithFallback(cwd, 'root'),
|
|
703
|
-
listFilesWithFallback(`${cwd}/materials`, 'materials'),
|
|
704
|
-
listFilesWithFallback(`${cwd}/output`, 'output'),
|
|
705
|
-
]);
|
|
706
|
-
const result = {
|
|
707
|
-
success: true,
|
|
708
|
-
cwd,
|
|
709
|
-
root: rootFiles,
|
|
710
|
-
materials: materialsFiles,
|
|
711
|
-
output: outputFiles,
|
|
712
|
-
totalFiles: rootFiles.length + materialsFiles.length + outputFiles.length,
|
|
713
|
-
timestamp: new Date().toISOString(),
|
|
714
|
-
};
|
|
715
|
-
console.log(`Total files found: ${result.totalFiles}`);
|
|
716
|
-
return {
|
|
717
|
-
content: [
|
|
718
|
-
{
|
|
719
|
-
type: 'text',
|
|
720
|
-
text: JSON.stringify(result),
|
|
721
|
-
},
|
|
722
|
-
],
|
|
723
|
-
};
|
|
724
|
-
}
|
|
725
|
-
catch (error) {
|
|
726
|
-
return createErrorResponse(error, 'list-project-files');
|
|
727
|
-
}
|
|
728
|
-
});
|
|
729
669
|
server.registerTool('generate-character-image', {
|
|
730
670
|
title: 'Generate Character Image',
|
|
731
671
|
description: 'Generate a turnaround image or portrait for any character.',
|
|
732
672
|
inputSchema: {
|
|
733
673
|
type: zod_1.z
|
|
734
|
-
.enum(['banana', 'banana-pro', 'seedream'])
|
|
674
|
+
.enum(['banana', 'banana-pro', 'seedream', 'seedream-pro'])
|
|
735
675
|
.optional()
|
|
736
676
|
.default('banana'),
|
|
737
677
|
name: zod_1.z.string().describe('The name of the character.'),
|
|
@@ -761,7 +701,9 @@ server.registerTool('generate-character-image', {
|
|
|
761
701
|
.boolean()
|
|
762
702
|
.default(true)
|
|
763
703
|
.describe('是否生成三视图。true: 生成4096x3072的三视图,false: 生成2304x4096的竖版人物正视图'),
|
|
764
|
-
saveToFileName: zod_1.z
|
|
704
|
+
saveToFileName: zod_1.z
|
|
705
|
+
.string()
|
|
706
|
+
.describe('The filename to save. 应该是png文件'),
|
|
765
707
|
},
|
|
766
708
|
}, async ({ type, name, gender, age, appearance, clothing, personality, detail_features, style, saveToFileName, referenceImage, referenceImagePrompt, isTurnaround, }) => {
|
|
767
709
|
try {
|
|
@@ -930,49 +872,6 @@ ${roleDescriptionPrompt}
|
|
|
930
872
|
return createErrorResponse(error, 'generate-character-image');
|
|
931
873
|
}
|
|
932
874
|
});
|
|
933
|
-
server.registerTool('generate-line-sketch', {
|
|
934
|
-
title: 'Generate Line Sketch',
|
|
935
|
-
description: 'Generate line sketch material based on user prompt.',
|
|
936
|
-
inputSchema: {
|
|
937
|
-
prompt: zod_1.z.string().describe('The prompt to generate line sketch.'),
|
|
938
|
-
saveToFileName: zod_1.z
|
|
939
|
-
.string()
|
|
940
|
-
.describe('The filename to save the generated line sketch.'),
|
|
941
|
-
},
|
|
942
|
-
}, async ({ prompt, saveToFileName }) => {
|
|
943
|
-
try {
|
|
944
|
-
// 验证session状态
|
|
945
|
-
await validateSession('generate-line-sketch');
|
|
946
|
-
// 验证文件名
|
|
947
|
-
validateFileName(saveToFileName);
|
|
948
|
-
// 调用AI生成线稿
|
|
949
|
-
const res = await session.ai.generateLineSketch({ prompt });
|
|
950
|
-
if (res && res.url) {
|
|
951
|
-
// 保存到本地
|
|
952
|
-
await saveMaterial(session, res.url, saveToFileName);
|
|
953
|
-
const result = {
|
|
954
|
-
success: true,
|
|
955
|
-
url: res.url,
|
|
956
|
-
localPath: getMaterialUri(session, saveToFileName),
|
|
957
|
-
timestamp: new Date().toISOString(),
|
|
958
|
-
};
|
|
959
|
-
return {
|
|
960
|
-
content: [
|
|
961
|
-
{
|
|
962
|
-
type: 'text',
|
|
963
|
-
text: JSON.stringify(result),
|
|
964
|
-
},
|
|
965
|
-
],
|
|
966
|
-
};
|
|
967
|
-
}
|
|
968
|
-
else {
|
|
969
|
-
throw new Error('No URL returned from AI service');
|
|
970
|
-
}
|
|
971
|
-
}
|
|
972
|
-
catch (error) {
|
|
973
|
-
return createErrorResponse(error, 'generate-line-sketch');
|
|
974
|
-
}
|
|
975
|
-
});
|
|
976
875
|
server.registerTool('upload-custom-material', {
|
|
977
876
|
title: 'Upload Custom Material',
|
|
978
877
|
description: 'Upload material files (images: jpeg/png, videos: mp4, audio: mp3) from the local filesystem to the materials directory. For video and audio files, duration information will be included in the response.',
|
|
@@ -1061,15 +960,21 @@ server.registerTool('upload-custom-material', {
|
|
|
1061
960
|
});
|
|
1062
961
|
server.registerTool('generate-image', {
|
|
1063
962
|
title: 'Generate Image',
|
|
1064
|
-
description:
|
|
963
|
+
description: `生成图片,支持批量生成1-9张图`,
|
|
1065
964
|
inputSchema: {
|
|
1066
965
|
type: zod_1.z
|
|
1067
|
-
.enum([
|
|
966
|
+
.enum([
|
|
967
|
+
'banana',
|
|
968
|
+
'banana-pro',
|
|
969
|
+
'seedream',
|
|
970
|
+
'seedream-pro',
|
|
971
|
+
'line-sketch',
|
|
972
|
+
])
|
|
1068
973
|
.optional()
|
|
1069
974
|
.default('seedream'),
|
|
1070
975
|
prompt: zod_1.z
|
|
1071
976
|
.string()
|
|
1072
|
-
.describe('The prompt to generate. 一般要严格对应 storyboard 中当前场景的 start_frame 或 end_frame
|
|
977
|
+
.describe('The prompt to generate. 一般要严格对应 storyboard 中当前场景的 start_frame 或 end_frame 中的字段描述,如果是生成线稿,则 type 使用 line-sketch'),
|
|
1073
978
|
sceneIndex: zod_1.z
|
|
1074
979
|
.number()
|
|
1075
980
|
.min(1)
|
|
@@ -1124,17 +1029,21 @@ server.registerTool('generate-image', {
|
|
|
1124
1029
|
])
|
|
1125
1030
|
.default('720x1280')
|
|
1126
1031
|
.describe('The size of the image.'),
|
|
1127
|
-
|
|
1032
|
+
imageCount: zod_1.z
|
|
1033
|
+
.number()
|
|
1034
|
+
.min(1)
|
|
1035
|
+
.max(9)
|
|
1036
|
+
.optional()
|
|
1037
|
+
.default(1)
|
|
1038
|
+
.describe('The number of images to generate. 最多支持9张图,多了容易超时'),
|
|
1039
|
+
saveToFileNames: zod_1.z
|
|
1040
|
+
.array(zod_1.z.string())
|
|
1041
|
+
.describe('The filenames to save. 数量要和imageCount对应,应该是png文件'),
|
|
1128
1042
|
watermark: zod_1.z
|
|
1129
1043
|
.boolean()
|
|
1130
1044
|
.optional()
|
|
1131
1045
|
.default(false)
|
|
1132
1046
|
.describe('Whether to add watermark to the image.'),
|
|
1133
|
-
optimizePrompt: zod_1.z
|
|
1134
|
-
.boolean()
|
|
1135
|
-
.optional()
|
|
1136
|
-
.default(false)
|
|
1137
|
-
.describe('Whether to optimize the prompt.'),
|
|
1138
1047
|
referenceImages: zod_1.z
|
|
1139
1048
|
.array(zod_1.z.object({
|
|
1140
1049
|
image: zod_1.z.string().describe('Local image file path'),
|
|
@@ -1163,7 +1072,7 @@ server.registerTool('generate-image', {
|
|
|
1163
1072
|
\`\`\`
|
|
1164
1073
|
`),
|
|
1165
1074
|
},
|
|
1166
|
-
}, async ({ type = 'seedream', prompt, sceneIndex, storyBoardFile = 'storyboard.json', skipConsistencyCheck = false, size = '720x1280',
|
|
1075
|
+
}, async ({ type = 'seedream', prompt, sceneIndex, storyBoardFile = 'storyboard.json', skipConsistencyCheck = false, size = '720x1280', imageCount = 1, saveToFileNames, watermark, referenceImages, }, context) => {
|
|
1167
1076
|
try {
|
|
1168
1077
|
// 验证session状态
|
|
1169
1078
|
const currentSession = await validateSession('generate-image');
|
|
@@ -1173,7 +1082,6 @@ server.registerTool('generate-image', {
|
|
|
1173
1082
|
checkStoryboardFlag = true;
|
|
1174
1083
|
return createErrorResponse('必须先审查生成的 storyboard.json 内容,确保每个场景中的stage_atmosphere内容按照规则被正确融合到start_frame和video_prompt中,不得遗漏,检查完成后先汇报,如果有问题,应当先修改 storyboard.json 内容,然后再调用 generate-image 生成图片。注意修改 storyboard 内容时,仅修改相应字段的字符串值,不要破坏JSON格式!', 'generate-image');
|
|
1175
1084
|
}
|
|
1176
|
-
const validatedFileName = validateFileName(saveToFileName);
|
|
1177
1085
|
// 校验 prompt 与 storyboard.json 中场景设定的一致性
|
|
1178
1086
|
if (sceneIndex && !skipConsistencyCheck) {
|
|
1179
1087
|
try {
|
|
@@ -1257,53 +1165,74 @@ server.registerTool('generate-image', {
|
|
|
1257
1165
|
// 检查并替换英文单引号包裹的中文内容为中文双引号
|
|
1258
1166
|
// 这样才能让 seedream 生成更好的中文文字
|
|
1259
1167
|
let processedPrompt = prompt.replace(/'([^']*[\u4e00-\u9fff][^']*)'/g, '“$1”');
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1168
|
+
try {
|
|
1169
|
+
const ai = currentSession.ai;
|
|
1170
|
+
const promptOptimizer = await (0, promises_1.readFile)((0, node_path_1.resolve)(__dirname, './prompts/image-prompt-optimizer.md'), 'utf8');
|
|
1171
|
+
const schema = {
|
|
1172
|
+
name: 'optimize_image_prompt',
|
|
1173
|
+
schema: {
|
|
1174
|
+
type: 'object',
|
|
1175
|
+
properties: {
|
|
1176
|
+
prompt_optimized: {
|
|
1177
|
+
type: 'string',
|
|
1178
|
+
description: '优化后的提示词',
|
|
1270
1179
|
},
|
|
1271
|
-
{
|
|
1272
|
-
|
|
1273
|
-
|
|
1180
|
+
metaphor_modifiers: {
|
|
1181
|
+
type: 'array',
|
|
1182
|
+
description: '从 prompt_optimized 中抽取的所有比喻修饰词(字符串数组)',
|
|
1183
|
+
items: {
|
|
1184
|
+
type: 'string',
|
|
1185
|
+
description: '比喻性修饰词,例如 “如羽毛般轻盈”、“像晨雾一样柔和”',
|
|
1186
|
+
},
|
|
1274
1187
|
},
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
}
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1188
|
+
},
|
|
1189
|
+
required: ['prompt_optimized', 'metaphor_modifiers'],
|
|
1190
|
+
},
|
|
1191
|
+
};
|
|
1192
|
+
const completion = await ai.getCompletions({
|
|
1193
|
+
model: 'Doubao-Seed-1.6',
|
|
1194
|
+
messages: [
|
|
1195
|
+
{
|
|
1196
|
+
role: 'system',
|
|
1197
|
+
content: promptOptimizer,
|
|
1198
|
+
},
|
|
1199
|
+
{
|
|
1200
|
+
role: 'user',
|
|
1201
|
+
content: `## 用户指令
|
|
1202
|
+
|
|
1203
|
+
${processedPrompt.trim()}
|
|
1204
|
+
|
|
1205
|
+
## 参考图
|
|
1206
|
+
|
|
1207
|
+
${referenceImages?.map((ref, index) => `图${index + 1}:${ref.image}`).join('\n') || '无'}`,
|
|
1208
|
+
},
|
|
1209
|
+
],
|
|
1210
|
+
response_format: {
|
|
1211
|
+
type: 'json_schema',
|
|
1212
|
+
json_schema: schema,
|
|
1213
|
+
},
|
|
1214
|
+
});
|
|
1215
|
+
const optimizedPrompt = completion.choices[0]?.message?.content.trim();
|
|
1216
|
+
if (optimizedPrompt) {
|
|
1217
|
+
try {
|
|
1218
|
+
const { prompt_optimized, metaphor_modifiers } = JSON.parse(optimizedPrompt);
|
|
1219
|
+
processedPrompt = `${prompt_optimized}`;
|
|
1220
|
+
if (metaphor_modifiers?.length) {
|
|
1221
|
+
processedPrompt += `\n\n注意:下面这些是形象比喻,并不是输出内容。\n${metaphor_modifiers}`;
|
|
1300
1222
|
}
|
|
1301
1223
|
}
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1224
|
+
catch (ex) {
|
|
1225
|
+
console.error('Failed to parse optimized prompt:', ex);
|
|
1226
|
+
processedPrompt = optimizedPrompt;
|
|
1227
|
+
}
|
|
1305
1228
|
}
|
|
1306
1229
|
}
|
|
1230
|
+
catch (error) {
|
|
1231
|
+
console.error('Failed to optimize prompt:', error);
|
|
1232
|
+
}
|
|
1233
|
+
if (imageCount > 1) {
|
|
1234
|
+
processedPrompt = `$请生成${imageCount}张相关图片 ${processedPrompt}`;
|
|
1235
|
+
}
|
|
1307
1236
|
console.log(`Generating image with prompt: ${processedPrompt.substring(0, 100)}...`);
|
|
1308
1237
|
// 处理参考图片
|
|
1309
1238
|
let imageBase64Array;
|
|
@@ -1375,23 +1304,48 @@ ${processedPrompt}`.trim();
|
|
|
1375
1304
|
}
|
|
1376
1305
|
}
|
|
1377
1306
|
const ai = currentSession.ai;
|
|
1378
|
-
const
|
|
1307
|
+
const { taskUrl } = await ai.generateImage({
|
|
1379
1308
|
type,
|
|
1380
1309
|
prompt: processedPrompt,
|
|
1381
1310
|
size,
|
|
1382
1311
|
watermark,
|
|
1383
1312
|
image: imageBase64Array,
|
|
1313
|
+
async: true,
|
|
1314
|
+
});
|
|
1315
|
+
let progress = 0;
|
|
1316
|
+
const res = await ai.waitForTaskComplete({
|
|
1317
|
+
taskUrl,
|
|
1318
|
+
onProgress: async (metaData) => {
|
|
1319
|
+
try {
|
|
1320
|
+
await sendProgress(context, ++progress, undefined, JSON.stringify(metaData));
|
|
1321
|
+
}
|
|
1322
|
+
catch (progressError) {
|
|
1323
|
+
console.warn('Failed to send progress update:', progressError);
|
|
1324
|
+
}
|
|
1325
|
+
},
|
|
1384
1326
|
});
|
|
1385
1327
|
if (!res) {
|
|
1386
1328
|
throw new Error('Failed to generate image: no response from AI service');
|
|
1387
1329
|
}
|
|
1388
|
-
if (res.
|
|
1330
|
+
if (res.urls && res.urls.length > 0) {
|
|
1389
1331
|
console.log('Image generated successfully, saving to materials...');
|
|
1390
|
-
|
|
1332
|
+
let uris = [];
|
|
1333
|
+
if (res.urls.length === 1) {
|
|
1334
|
+
uris = [
|
|
1335
|
+
await saveMaterial(currentSession, res.urls[0], validateFileName(saveToFileNames[0])),
|
|
1336
|
+
];
|
|
1337
|
+
}
|
|
1338
|
+
else {
|
|
1339
|
+
// 多图场景
|
|
1340
|
+
uris = await Promise.all(res.urls.map((url, i) => {
|
|
1341
|
+
const fileName = validateFileName(saveToFileNames[i]);
|
|
1342
|
+
return saveMaterial(currentSession, url, fileName);
|
|
1343
|
+
}));
|
|
1344
|
+
}
|
|
1391
1345
|
const result = {
|
|
1392
1346
|
success: true,
|
|
1393
1347
|
// source: res.url,
|
|
1394
|
-
|
|
1348
|
+
uris,
|
|
1395
1349
|
prompt: processedPrompt,
|
|
1396
1350
|
size,
|
|
1397
1351
|
timestamp: new Date().toISOString(),
|
|
@@ -1432,12 +1386,14 @@ server.registerTool('edit-image', {
|
|
|
1432
1386
|
inputSchema: {
|
|
1433
1387
|
prompt: zod_1.z.string().describe('要编辑图片的中文提示词'),
|
|
1434
1388
|
type: zod_1.z
|
|
1435
|
-
.enum(['banana-pro', 'banana', 'seedream'])
|
|
1389
|
+
.enum(['banana-pro', 'banana', 'seedream', 'seedream-pro'])
|
|
1436
1390
|
.optional()
|
|
1437
1391
|
.default('seedream')
|
|
1438
1392
|
.describe('The type of image model to use.'),
|
|
1439
1393
|
sourceImageFileName: zod_1.z.string().describe('The source image file name.'),
|
|
1440
|
-
saveToFileName: zod_1.z
|
|
1394
|
+
saveToFileName: zod_1.z
|
|
1395
|
+
.string()
|
|
1396
|
+
.describe('The filename to save. 应该是png文件'),
|
|
1441
1397
|
size: zod_1.z
|
|
1442
1398
|
.enum([
|
|
1443
1399
|
'1024x1024',
|
|
@@ -1560,7 +1516,7 @@ server.registerTool('edit-image', {
|
|
|
1560
1516
|
let lastEffect = '';
|
|
1561
1517
|
server.registerTool('generate-video', {
|
|
1562
1518
|
title: 'Generate Video',
|
|
1563
|
-
description:
|
|
1519
|
+
description: `图生视频和首尾帧生视频工具`,
|
|
1564
1520
|
inputSchema: {
|
|
1565
1521
|
prompt: zod_1.z
|
|
1566
1522
|
.string()
|
|
@@ -1592,6 +1548,8 @@ server.registerTool('generate-video', {
|
|
|
1592
1548
|
'hailuo-fast',
|
|
1593
1549
|
'vidu',
|
|
1594
1550
|
'vidu-pro',
|
|
1551
|
+
'vidu-uc',
|
|
1552
|
+
'vidu-uc-pro',
|
|
1595
1553
|
'kling',
|
|
1596
1554
|
'kling-pro',
|
|
1597
1555
|
'pixv',
|
|
@@ -1601,7 +1559,9 @@ server.registerTool('generate-video', {
|
|
|
1601
1559
|
])
|
|
1602
1560
|
.default('lite')
|
|
1603
1561
|
.describe('除非用户明确提出使用其他模型,否则一律用lite模型;zero 系列模型适合创作8-23秒带故事情节的短片'),
|
|
1604
|
-
saveToFileName: zod_1.z
|
|
1562
|
+
saveToFileName: zod_1.z
|
|
1563
|
+
.string()
|
|
1564
|
+
.describe('The filename to save. 应该是mp4文件'),
|
|
1605
1565
|
start_frame: zod_1.z
|
|
1606
1566
|
.string()
|
|
1607
1567
|
.optional()
|
|
@@ -2008,7 +1968,7 @@ server.registerTool('generate-video', {
|
|
|
2008
1968
|
console.warn('Failed to send progress update:', progressError);
|
|
2009
1969
|
}
|
|
2010
1970
|
},
|
|
2011
|
-
waitForFinish:
|
|
1971
|
+
waitForFinish: type !== 'zero',
|
|
2012
1972
|
});
|
|
2013
1973
|
if (!res) {
|
|
2014
1974
|
throw new Error('Failed to generate video: no response from AI service');
|
|
@@ -2056,7 +2016,7 @@ server.registerTool('generate-video', {
|
|
|
2056
2016
|
type: 'text',
|
|
2057
2017
|
text: JSON.stringify({
|
|
2058
2018
|
success: true,
|
|
2059
|
-
message: '该视频生成任务正在运行中,它是异步任务,且执行时间较长,你应立即调用工具 wait-for-task-finish
|
|
2019
|
+
message: '该视频生成任务正在运行中,它是异步任务,且执行时间较长,你应立即调用工具 wait-for-task-finish 来等待任务结束,如 wait-for-task-finish 工具调用超时,你应立即再次重新调用直到任务结束。',
|
|
2060
2020
|
taskUrl: res.taskUrl,
|
|
2061
2021
|
}),
|
|
2062
2022
|
},
|
|
@@ -2086,7 +2046,7 @@ server.registerTool('generate-video', {
|
|
|
2086
2046
|
}
|
|
2087
2047
|
});
|
|
2088
2048
|
server.registerTool('wait-for-task-finish', {
|
|
2089
|
-
title: 'Wait Workflow or VideoTask Done',
|
|
2049
|
+
title: 'Wait Workflow or VideoTask Done;只有正在运行Coze工作流或者有异步生成视频任务时才需要执行这个工具',
|
|
2090
2050
|
description: 'Wait for a workflow to complete.',
|
|
2091
2051
|
inputSchema: {
|
|
2092
2052
|
taskUrl: zod_1.z
|
|
@@ -2094,14 +2054,14 @@ server.registerTool('wait-for-task-finish', {
|
|
|
2094
2054
|
.describe('The taskUrl of the video task to wait for.'),
|
|
2095
2055
|
saveToFileName: zod_1.z
|
|
2096
2056
|
.string()
|
|
2097
|
-
.describe('The file name to save the video to.'),
|
|
2057
|
+
.describe('The file name to save the video to. 应该是mp4文件'),
|
|
2098
2058
|
},
|
|
2099
2059
|
}, async ({ taskUrl, saveToFileName }, context) => {
|
|
2100
2060
|
try {
|
|
2101
2061
|
const currentSession = await validateSession('wait-for-task-finish');
|
|
2102
2062
|
const ai = currentSession.ai;
|
|
2103
2063
|
let progress = 0;
|
|
2104
|
-
const res = await ai.
|
|
2064
|
+
const res = await ai.waitForTaskComplete({
|
|
2105
2065
|
taskUrl,
|
|
2106
2066
|
onProgress: async (metaData) => {
|
|
2107
2067
|
try {
|
|
@@ -2169,7 +2129,7 @@ server.registerTool('generate-sound-effect', {
|
|
|
2169
2129
|
.describe('The duration of the sound which will be generated in seconds. Must be at least 0.5 and at most 30. If set to None we will guess the optimal duration using the prompt. Defaults to None.'),
|
|
2170
2130
|
saveToFileName: zod_1.z
|
|
2171
2131
|
.string()
|
|
2172
|
-
.describe('The filename to save.
|
|
2132
|
+
.describe('The filename to save. 应该是mp3文件'),
|
|
2173
2133
|
},
|
|
2174
2134
|
}, async ({ prompt_in_english, loop, saveToFileName, duration_seconds }) => {
|
|
2175
2135
|
try {
|
|
@@ -2216,14 +2176,32 @@ server.registerTool('generate-sound-effect', {
|
|
|
2216
2176
|
return createErrorResponse(error, 'generate-sound-effect');
|
|
2217
2177
|
}
|
|
2218
2178
|
});
|
|
2219
|
-
server.registerTool('generate-music', {
|
|
2220
|
-
title: '
|
|
2221
|
-
description: '
|
|
2179
|
+
server.registerTool('generate-music-or-mv', {
|
|
2180
|
+
title: '创作音乐(Music)或音乐视频(Music Video)',
|
|
2181
|
+
description: '生成音乐,包括MV(music video)、BGM 或 歌曲',
|
|
2222
2182
|
inputSchema: {
|
|
2223
2183
|
prompt: zod_1.z.string().describe('The prompt to generate.'),
|
|
2184
|
+
singerPhoto: zod_1.z
|
|
2185
|
+
.string()
|
|
2186
|
+
.optional()
|
|
2187
|
+
.describe('The singer photo to use. 只有type为music_video的时候才生效,也可以不传,模型会自动生成'),
|
|
2188
|
+
mvOrientation: zod_1.z
|
|
2189
|
+
.enum(['portrait', 'landscape'])
|
|
2190
|
+
.optional()
|
|
2191
|
+
.describe('The orientation of the music video. Defaults to portrait.')
|
|
2192
|
+
.default('portrait'),
|
|
2193
|
+
mvOriginalSong: zod_1.z
|
|
2194
|
+
.string()
|
|
2195
|
+
.optional()
|
|
2196
|
+
.describe('用于生成mv的音乐. 只有type为music_video的时候才生效,也可以不传,模型会自动创作'),
|
|
2197
|
+
mvGenSubtitles: zod_1.z
|
|
2198
|
+
.boolean()
|
|
2199
|
+
.optional()
|
|
2200
|
+
.default(false)
|
|
2201
|
+
.describe('是否生成mv的字幕. 默认为false,只有type为music_video的时候才生效'),
|
|
2224
2202
|
type: zod_1.z
|
|
2225
|
-
.enum(['bgm', 'song'])
|
|
2226
|
-
.describe('The type of music. Defaults to
|
|
2203
|
+
.enum(['bgm', 'song', 'music_video'])
|
|
2204
|
+
.describe('The type of music. Defaults to BGM. ⚠️ 如果 type 是 music_video,会直接生成音频和视频,**不需要**额外专门生成歌曲')
|
|
2227
2205
|
.default('bgm'),
|
|
2228
2206
|
model: zod_1.z
|
|
2229
2207
|
.enum(['doubao', 'minimax'])
|
|
@@ -2239,9 +2217,11 @@ server.registerTool('generate-music', {
|
|
|
2239
2217
|
.boolean()
|
|
2240
2218
|
.default(false)
|
|
2241
2219
|
.describe('Whether to skip copyright check.'),
|
|
2242
|
-
saveToFileName: zod_1.z
|
|
2220
|
+
saveToFileName: zod_1.z
|
|
2221
|
+
.string()
|
|
2222
|
+
.describe('The filename to save. 如果type是music video,应该是mp4文件,否则应该是mp3文件'),
|
|
2243
2223
|
},
|
|
2244
|
-
}, async ({ prompt, type, model, duration, skipCopyCheck, saveToFileName }, context) => {
|
|
2224
|
+
}, async ({ prompt, singerPhoto, mvOrientation, mvOriginalSong, mvGenSubtitles, type, model, duration, skipCopyCheck, saveToFileName, }, context) => {
|
|
2245
2225
|
try {
|
|
2246
2226
|
// 验证session状态
|
|
2247
2227
|
const currentSession = await validateSession('generate-music');
|
|
@@ -2252,24 +2232,54 @@ server.registerTool('generate-music', {
|
|
|
2252
2232
|
if (type === 'bgm' && duration > 120) {
|
|
2253
2233
|
throw new Error('BGM duration must be at most 120 seconds.');
|
|
2254
2234
|
}
|
|
2255
|
-
|
|
2256
|
-
|
|
2257
|
-
|
|
2258
|
-
|
|
2259
|
-
|
|
2260
|
-
|
|
2261
|
-
|
|
2262
|
-
|
|
2263
|
-
|
|
2264
|
-
|
|
2265
|
-
|
|
2266
|
-
|
|
2267
|
-
|
|
2235
|
+
let res;
|
|
2236
|
+
if (type === 'music_video') {
|
|
2237
|
+
const singer_photo = singerPhoto
|
|
2238
|
+
? await getMaterialUri(currentSession, singerPhoto)
|
|
2239
|
+
: undefined;
|
|
2240
|
+
const original_song = mvOriginalSong
|
|
2241
|
+
? await getMaterialUri(currentSession, mvOriginalSong)
|
|
2242
|
+
: undefined;
|
|
2243
|
+
res = await ai.generateZeroCutMusicVideo({
|
|
2244
|
+
// prompt: `${prompt.trim()} 音乐时长${duration}秒`,
|
|
2245
|
+
prompt,
|
|
2246
|
+
singerPhoto: singer_photo,
|
|
2247
|
+
orientation: mvOrientation,
|
|
2248
|
+
genSubtitles: mvGenSubtitles,
|
|
2249
|
+
originalSong: original_song,
|
|
2250
|
+
duration,
|
|
2251
|
+
resolution: '720p',
|
|
2252
|
+
onProgress: async (metaData) => {
|
|
2253
|
+
try {
|
|
2254
|
+
await sendProgress(context, metaData.Result?.Progress ?? ++progress, metaData.Result?.Progress ? 100 : undefined, JSON.stringify(metaData));
|
|
2255
|
+
}
|
|
2256
|
+
catch (progressError) {
|
|
2257
|
+
console.warn('Failed to send progress update:', progressError);
|
|
2258
|
+
}
|
|
2259
|
+
},
|
|
2260
|
+
waitForFinish: false,
|
|
2261
|
+
});
|
|
2262
|
+
}
|
|
2263
|
+
else {
|
|
2264
|
+
const finalPrompt = `${prompt.trim()} ${type === 'bgm' ? `纯音乐无歌词,时长${duration}秒` : `时长${duration}秒,使用${model}模型`}`;
|
|
2265
|
+
res = await ai.generateMusic({
|
|
2266
|
+
prompt: finalPrompt,
|
|
2267
|
+
skipCopyCheck,
|
|
2268
|
+
onProgress: async (metaData) => {
|
|
2269
|
+
try {
|
|
2270
|
+
await sendProgress(context, metaData.Result?.Progress ?? ++progress, metaData.Result?.Progress ? 100 : undefined, JSON.stringify(metaData));
|
|
2271
|
+
}
|
|
2272
|
+
catch (progressError) {
|
|
2273
|
+
console.warn('Failed to send progress update:', progressError);
|
|
2274
|
+
}
|
|
2275
|
+
},
|
|
2276
|
+
});
|
|
2277
|
+
}
|
|
2268
2278
|
if (!res) {
|
|
2269
|
-
throw new Error('Failed to generate
|
|
2279
|
+
throw new Error('Failed to generate Music: no response from AI service');
|
|
2270
2280
|
}
|
|
2271
2281
|
if (res.url) {
|
|
2272
|
-
console.log('
|
|
2282
|
+
console.log('Music generated successfully, saving to materials...');
|
|
2273
2283
|
const uri = await saveMaterial(currentSession, res.url, validatedFileName);
|
|
2274
2284
|
const { url, duration: bgmDuration, captions, ...opts } = res;
|
|
2275
2285
|
// 保存captions到本地
|
|
@@ -2307,15 +2317,29 @@ server.registerTool('generate-music', {
|
|
|
2307
2317
|
],
|
|
2308
2318
|
};
|
|
2309
2319
|
}
|
|
2320
|
+
else if (res.taskUrl) {
|
|
2321
|
+
return {
|
|
2322
|
+
content: [
|
|
2323
|
+
{
|
|
2324
|
+
type: 'text',
|
|
2325
|
+
text: JSON.stringify({
|
|
2326
|
+
success: true,
|
|
2327
|
+
message: '该视频生成任务正在运行中,它是异步任务,且执行时间较长,你应立即调用工具 wait-for-task-finish 来等待任务结束,如如 wait-for-task-finish 工具调用超时,你应立即再次重新调用直到任务结束。',
|
|
2328
|
+
taskUrl: res.taskUrl,
|
|
2329
|
+
}),
|
|
2330
|
+
},
|
|
2331
|
+
],
|
|
2332
|
+
};
|
|
2333
|
+
}
|
|
2310
2334
|
else {
|
|
2311
|
-
console.warn('
|
|
2335
|
+
console.warn('Music generation completed but no URL returned');
|
|
2312
2336
|
return {
|
|
2313
2337
|
content: [
|
|
2314
2338
|
{
|
|
2315
2339
|
type: 'text',
|
|
2316
2340
|
text: JSON.stringify({
|
|
2317
2341
|
success: false,
|
|
2318
|
-
error: 'No
|
|
2342
|
+
error: 'No Music URL returned from AI service',
|
|
2319
2343
|
response: res,
|
|
2320
2344
|
timestamp: new Date().toISOString(),
|
|
2321
2345
|
}),
|
|
@@ -2325,7 +2349,7 @@ server.registerTool('generate-music', {
|
|
|
2325
2349
|
}
|
|
2326
2350
|
}
|
|
2327
2351
|
catch (error) {
|
|
2328
|
-
return createErrorResponse(error, 'generate-
|
|
2352
|
+
return createErrorResponse(error, 'generate-music');
|
|
2329
2353
|
}
|
|
2330
2354
|
});
|
|
2331
2355
|
server.registerTool('generate-scene-tts', {
|
|
@@ -2352,7 +2376,9 @@ server.registerTool('generate-scene-tts', {
|
|
|
2352
2376
|
.string()
|
|
2353
2377
|
.optional()
|
|
2354
2378
|
.describe('跳过校验的理由,如果skipConsistencyCheck设为true,必须要传这个参数'),
|
|
2355
|
-
saveToFileName: zod_1.z
|
|
2379
|
+
saveToFileName: zod_1.z
|
|
2380
|
+
.string()
|
|
2381
|
+
.describe('The filename to save. 应该是mp3文件'),
|
|
2356
2382
|
speed: zod_1.z
|
|
2357
2383
|
.number()
|
|
2358
2384
|
.min(0.5)
|
|
@@ -2374,44 +2400,44 @@ server.registerTool('generate-scene-tts', {
|
|
|
2374
2400
|
.optional()
|
|
2375
2401
|
.default(1.0)
|
|
2376
2402
|
.describe('The volume of the tts.'),
|
|
2377
|
-
emotion: zod_1.z
|
|
2378
|
-
.enum([
|
|
2379
|
-
'storytelling',
|
|
2380
|
-
'neutral',
|
|
2381
|
-
'excited',
|
|
2382
|
-
'coldness',
|
|
2383
|
-
'angry',
|
|
2384
|
-
'sad',
|
|
2385
|
-
'happy',
|
|
2386
|
-
'surprised',
|
|
2387
|
-
'fear',
|
|
2388
|
-
'depressed',
|
|
2389
|
-
'lovey-dovey',
|
|
2390
|
-
'shy',
|
|
2391
|
-
'comfort',
|
|
2392
|
-
'tension',
|
|
2393
|
-
'tender',
|
|
2394
|
-
'magnetic',
|
|
2395
|
-
'vocal - fry',
|
|
2396
|
-
'ASMR',
|
|
2397
|
-
])
|
|
2398
|
-
.optional(),
|
|
2399
2403
|
voiceID: zod_1.z
|
|
2400
2404
|
.string()
|
|
2401
|
-
.describe(`适合作为视频配音的音色ID
|
|
2405
|
+
.describe(`适合作为视频配音的音色ID,除非用户指定,否则你必须确保已通过 pick-voice 工具挑选出真实存在的音色。`),
|
|
2406
|
+
context_texts: zod_1.z
|
|
2407
|
+
.array(zod_1.z.string())
|
|
2408
|
+
.default([])
|
|
2409
|
+
.describe(`语音合成的辅助信息,用于模型对话式合成,能更好的体现语音情感
|
|
2410
|
+
|
|
2411
|
+
可以探索,比如常见示例有以下几种:
|
|
2412
|
+
|
|
2413
|
+
1. 语速调整
|
|
2414
|
+
- context_texts: ["你可以说慢一点吗?"]
|
|
2415
|
+
2. 情绪/语气调整
|
|
2416
|
+
- context_texts=["你可以用特别特别痛心的语气说话吗?"]
|
|
2417
|
+
- context_texts=["嗯,你的语气再欢乐一点"]
|
|
2418
|
+
3. 音量调整
|
|
2419
|
+
- context_texts=["你嗓门再小点。"]
|
|
2420
|
+
4. 音感调整
|
|
2421
|
+
- context_texts=["你能用骄傲的语气来说话吗?"]
|
|
2422
|
+
`),
|
|
2402
2423
|
explicit_language: zod_1.z.enum(['zh', 'en', 'ja']).optional().default('zh'),
|
|
2403
2424
|
},
|
|
2404
|
-
}, async ({ text, sceneIndex, storyBoardFile, skipConsistencyCheck, voiceID, saveToFileName, speed, pitch, volume,
|
|
2425
|
+
}, async ({ text, sceneIndex, storyBoardFile, skipConsistencyCheck, voiceID, saveToFileName, speed, pitch, volume, context_texts, explicit_language, }) => {
|
|
2405
2426
|
try {
|
|
2406
2427
|
// 验证session状态
|
|
2407
2428
|
const currentSession = await validateSession('generate-scene-tts');
|
|
2408
2429
|
const validatedFileName = validateFileName(saveToFileName);
|
|
2409
2430
|
const finalSpeed = speed ?? 1;
|
|
2410
2431
|
volume = volume ?? 1;
|
|
2432
|
+
const ai = currentSession.ai;
|
|
2411
2433
|
let scene = null;
|
|
2412
2434
|
// 校验 text 与 storyboard.json 中场景设定的一致性
|
|
2413
2435
|
if (sceneIndex && !skipConsistencyCheck) {
|
|
2414
2436
|
try {
|
|
2437
|
+
const voice = (await ai.listVoices()).find(v => v.id === voiceID);
|
|
2438
|
+
if (!voice) {
|
|
2439
|
+
return createErrorResponse(`Voice ${voiceID} not found in voice-list. Use pick-voice tool to pick an available voice. 若用户坚持要使用该音色,需跳过一致性检查。`, 'generate-scene-tts');
|
|
2440
|
+
}
|
|
2415
2441
|
const storyBoardPath = (0, node_path_1.resolve)(process.env.ZEROCUT_PROJECT_CWD || process.cwd(), projectLocalDir, storyBoardFile);
|
|
2416
2442
|
if ((0, node_fs_1.existsSync)(storyBoardPath)) {
|
|
2417
2443
|
const storyBoardContent = await (0, promises_1.readFile)(storyBoardPath, 'utf8');
|
|
@@ -2462,9 +2488,8 @@ server.registerTool('generate-scene-tts', {
|
|
|
2462
2488
|
}
|
|
2463
2489
|
}
|
|
2464
2490
|
console.log(`Generating TTS with voice: ${voiceID}, speed: ${finalSpeed}, text: ${text.substring(0, 100)}...`);
|
|
2465
|
-
const ai = currentSession.ai;
|
|
2466
2491
|
if (voiceID.startsWith('BV0')) {
|
|
2467
|
-
throw new Error(`BV0*
|
|
2492
|
+
throw new Error(`BV0* 系列音色已弃用,你必须通过 pick-voice 工具挑选一个真实存在的音色。`);
|
|
2468
2493
|
}
|
|
2469
2494
|
const type = voiceID.startsWith('zh_') ||
|
|
2470
2495
|
voiceID.startsWith('en_') ||
|
|
@@ -2474,30 +2499,80 @@ server.registerTool('generate-scene-tts', {
|
|
|
2474
2499
|
? 'volcano'
|
|
2475
2500
|
: 'minimax';
|
|
2476
2501
|
let res;
|
|
2502
|
+
let emotion = 'auto';
|
|
2477
2503
|
if (type === 'volcano') {
|
|
2478
|
-
const voice = doubao_voices_full_1.doubaoVoicesFull.find(v => v.voiceID === voiceID);
|
|
2479
|
-
if (!voice) {
|
|
2480
|
-
return createErrorResponse(`Voice ${voiceID} not found in Doubao voices. Use search-voices tool to find available voices.`, 'generate-scene-tts');
|
|
2481
|
-
}
|
|
2482
|
-
const emotions = voice.emotions || [];
|
|
2483
|
-
if (emotion && !emotions.includes(emotion)) {
|
|
2484
|
-
emotion = 'neutral';
|
|
2485
|
-
}
|
|
2486
|
-
// 修复可能的 emotion 错误情况
|
|
2487
|
-
emotion = emotion || 'neutral';
|
|
2488
2504
|
volume = Math.max(Math.min(volume, 2.0), 0.5);
|
|
2489
2505
|
res = await ai.textToSpeechVolc({
|
|
2490
2506
|
text: text.trim(),
|
|
2491
2507
|
speaker: voiceID,
|
|
2492
2508
|
speed: Math.floor(100 * (finalSpeed - 1)),
|
|
2493
2509
|
volume: Math.floor(100 * (volume - 1)),
|
|
2494
|
-
|
|
2510
|
+
context_texts,
|
|
2495
2511
|
explicit_language,
|
|
2496
2512
|
voice_to_caption: explicit_language === 'zh' || explicit_language === 'en',
|
|
2497
2513
|
});
|
|
2498
2514
|
}
|
|
2499
2515
|
else {
|
|
2500
|
-
emotion =
|
|
2516
|
+
emotion = 'neutral';
|
|
2517
|
+
if (context_texts.length > 0) {
|
|
2518
|
+
const prompt = `根据用户输入语音内容和上下文内容,从文字判断语音合理的情感,然后选择以下情感**之一**返回结果:
|
|
2519
|
+
|
|
2520
|
+
"happy", "sad", "angry", "fearful", "disgusted", "surprised", "calm", "fluent", "whisper", "neutral"
|
|
2521
|
+
|
|
2522
|
+
## 要求
|
|
2523
|
+
输出 JSON 格式,包含一个 emotion 字段,值为以上情感之一。
|
|
2524
|
+
`;
|
|
2525
|
+
const schema = {
|
|
2526
|
+
name: 'emotion_schema',
|
|
2527
|
+
schema: {
|
|
2528
|
+
type: 'object',
|
|
2529
|
+
properties: {
|
|
2530
|
+
emotion: {
|
|
2531
|
+
type: 'string',
|
|
2532
|
+
enum: [
|
|
2533
|
+
'neutral',
|
|
2534
|
+
'happy',
|
|
2535
|
+
'sad',
|
|
2536
|
+
'angry',
|
|
2537
|
+
'fearful',
|
|
2538
|
+
'disgusted',
|
|
2539
|
+
'surprised',
|
|
2540
|
+
'calm',
|
|
2541
|
+
'fluent',
|
|
2542
|
+
'whisper',
|
|
2543
|
+
],
|
|
2544
|
+
description: '用户输入语音的情感',
|
|
2545
|
+
},
|
|
2546
|
+
},
|
|
2547
|
+
required: ['emotion'],
|
|
2548
|
+
},
|
|
2549
|
+
};
|
|
2550
|
+
const payload = {
|
|
2551
|
+
model: 'Doubao-Seed-1.6',
|
|
2552
|
+
messages: [
|
|
2553
|
+
{
|
|
2554
|
+
role: 'system',
|
|
2555
|
+
content: prompt,
|
|
2556
|
+
},
|
|
2557
|
+
{
|
|
2558
|
+
role: 'user',
|
|
2559
|
+
content: `## 语音内容:
|
|
2560
|
+
${text.trim()}
|
|
2561
|
+
|
|
2562
|
+
## 语音上下文
|
|
2563
|
+
${context_texts.join('\n')}
|
|
2564
|
+
`,
|
|
2565
|
+
},
|
|
2566
|
+
],
|
|
2567
|
+
response_format: {
|
|
2568
|
+
type: 'json_schema',
|
|
2569
|
+
json_schema: schema,
|
|
2570
|
+
},
|
|
2571
|
+
};
|
|
2572
|
+
const completion = await ai.getCompletions(payload);
|
|
2573
|
+
const emotionObj = JSON.parse(completion.choices[0]?.message?.content ?? '{}');
|
|
2574
|
+
emotion = emotionObj.emotion ?? 'neutral';
|
|
2575
|
+
}
|
|
2501
2576
|
res = await ai.textToSpeech({
|
|
2502
2577
|
text: text.trim(),
|
|
2503
2578
|
voiceName: voiceID,
|
|
@@ -2538,6 +2613,8 @@ server.registerTool('generate-scene-tts', {
|
|
|
2538
2613
|
uri,
|
|
2539
2614
|
durationMs: Math.floor((duration || 0) * 1000),
|
|
2540
2615
|
text,
|
|
2616
|
+
emotion,
|
|
2617
|
+
context_texts,
|
|
2541
2618
|
voiceName: voiceID,
|
|
2542
2619
|
speed: finalSpeed,
|
|
2543
2620
|
timestamp: new Date().toISOString(),
|
|
@@ -2567,7 +2644,7 @@ server.registerTool('generate-scene-tts', {
|
|
|
2567
2644
|
type: 'text',
|
|
2568
2645
|
text: JSON.stringify({
|
|
2569
2646
|
success: false,
|
|
2570
|
-
error: 'No TTS URL returned from AI service. You should use
|
|
2647
|
+
error: 'No TTS URL returned from AI service. You should use pick-voice tool to pick an available voice.',
|
|
2571
2648
|
response: res,
|
|
2572
2649
|
timestamp: new Date().toISOString(),
|
|
2573
2650
|
}),
|
|
@@ -2800,111 +2877,71 @@ server.registerTool('get-schema', {
|
|
|
2800
2877
|
return createErrorResponse(error, 'get-schema');
|
|
2801
2878
|
}
|
|
2802
2879
|
});
|
|
2803
|
-
server.registerTool('
|
|
2804
|
-
title: '
|
|
2805
|
-
description: '
|
|
2880
|
+
server.registerTool('pick-voice', {
|
|
2881
|
+
title: 'Pick Voice',
|
|
2882
|
+
description: '根据用户需求,选择尽可能符合要求的语音,在合适的情况下,优先采用 volcano_tts_2 类型的语音',
|
|
2806
2883
|
inputSchema: {
|
|
2807
|
-
|
|
2808
|
-
.
|
|
2809
|
-
'
|
|
2810
|
-
|
|
2811
|
-
|
|
2812
|
-
'dialect_fun',
|
|
2813
|
-
'dialogue',
|
|
2814
|
-
'kids_content',
|
|
2815
|
-
'news_explainer',
|
|
2816
|
-
'podcast_voiceover',
|
|
2817
|
-
'product_ad',
|
|
2818
|
-
'promo_trailer',
|
|
2819
|
-
'roleplay_drama',
|
|
2820
|
-
'story_narration',
|
|
2821
|
-
'storytelling',
|
|
2822
|
-
'tutorial',
|
|
2823
|
-
]))
|
|
2824
|
-
.optional()
|
|
2825
|
-
.describe('Filter by scenes (e.g., ["product_ad", "tutorial"]). If not provided, no scene filtering is applied.'),
|
|
2826
|
-
emotions: zod_1.z
|
|
2827
|
-
.array(zod_1.z.enum([
|
|
2828
|
-
'ASMR',
|
|
2829
|
-
'affectionate',
|
|
2830
|
-
'angry',
|
|
2831
|
-
'authoritative',
|
|
2832
|
-
'chat',
|
|
2833
|
-
'coldness',
|
|
2834
|
-
'depressed',
|
|
2835
|
-
'excited',
|
|
2836
|
-
'fear',
|
|
2837
|
-
'happy',
|
|
2838
|
-
'hate',
|
|
2839
|
-
'neutral',
|
|
2840
|
-
'sad',
|
|
2841
|
-
'surprised',
|
|
2842
|
-
'warm',
|
|
2843
|
-
]))
|
|
2884
|
+
prompt: zod_1.z
|
|
2885
|
+
.string()
|
|
2886
|
+
.describe('用户需求描述,例如:一个有亲和力的,适合给孩子讲故事的语音'),
|
|
2887
|
+
custom_design: zod_1.z
|
|
2888
|
+
.boolean()
|
|
2844
2889
|
.optional()
|
|
2845
|
-
.describe('
|
|
2846
|
-
|
|
2847
|
-
.
|
|
2890
|
+
.describe('是否自定义语音,由于要消耗较多积分,因此**只有用户明确要求自己设计语音**,才将该参数设为true'),
|
|
2891
|
+
custom_design_preview: zod_1.z
|
|
2892
|
+
.string()
|
|
2848
2893
|
.optional()
|
|
2849
|
-
.describe('
|
|
2850
|
-
|
|
2851
|
-
.
|
|
2894
|
+
.describe('用户自定义语音的预览文本,用于展示自定义语音的效果,只有 custom_design 为 true 时才需要'),
|
|
2895
|
+
custom_design_save_to: zod_1.z
|
|
2896
|
+
.string()
|
|
2852
2897
|
.optional()
|
|
2853
|
-
.describe('
|
|
2898
|
+
.describe('自定义语音的保存路径,例如:custom_voice.mp3 custom_voice_{id}.mp3'),
|
|
2854
2899
|
},
|
|
2855
|
-
}, async ({
|
|
2900
|
+
}, async ({ prompt, custom_design, custom_design_preview, custom_design_save_to, }) => {
|
|
2856
2901
|
try {
|
|
2857
|
-
|
|
2858
|
-
|
|
2859
|
-
|
|
2860
|
-
|
|
2861
|
-
|
|
2862
|
-
|
|
2863
|
-
|
|
2864
|
-
|
|
2865
|
-
|
|
2866
|
-
|
|
2867
|
-
if (emotions.includes('neutral') && !voice.emotions) {
|
|
2868
|
-
return true;
|
|
2869
|
-
}
|
|
2870
|
-
return (voice.emotions &&
|
|
2871
|
-
voice.emotions.some(emotion => emotions.includes(emotion)));
|
|
2902
|
+
// 验证session状态
|
|
2903
|
+
const currentSession = await validateSession('pick-voice');
|
|
2904
|
+
const ai = currentSession.ai;
|
|
2905
|
+
if (custom_design) {
|
|
2906
|
+
if (!custom_design_preview) {
|
|
2907
|
+
throw new Error('custom_design_preview is required when custom_design is true');
|
|
2908
|
+
}
|
|
2909
|
+
const data = await currentSession.ai.voiceDesign({
|
|
2910
|
+
prompt,
|
|
2911
|
+
previewText: custom_design_preview,
|
|
2872
2912
|
});
|
|
2873
|
-
|
|
2874
|
-
|
|
2875
|
-
|
|
2876
|
-
|
|
2877
|
-
|
|
2878
|
-
}
|
|
2879
|
-
// Filter by gender
|
|
2880
|
-
if (gender) {
|
|
2881
|
-
filteredVoices = filteredVoices.filter(voice => {
|
|
2882
|
-
const voiceId = voice.voiceID.toLowerCase();
|
|
2883
|
-
if (gender === 'male') {
|
|
2884
|
-
return voiceId.includes('_male_');
|
|
2913
|
+
if (data.voice_id) {
|
|
2914
|
+
const trial_audio = data.trial_audio;
|
|
2915
|
+
let uri = '';
|
|
2916
|
+
if (trial_audio) {
|
|
2917
|
+
uri = await saveMaterial(currentSession, trial_audio, custom_design_save_to || `custom_voice_${data.voice_id}.mp3`);
|
|
2885
2918
|
}
|
|
2886
|
-
|
|
2887
|
-
|
|
2888
|
-
|
|
2889
|
-
|
|
2890
|
-
|
|
2919
|
+
return {
|
|
2920
|
+
content: [
|
|
2921
|
+
{
|
|
2922
|
+
type: 'text',
|
|
2923
|
+
text: JSON.stringify({
|
|
2924
|
+
success: true,
|
|
2925
|
+
...data,
|
|
2926
|
+
uri,
|
|
2927
|
+
timestamp: new Date().toISOString(),
|
|
2928
|
+
}),
|
|
2929
|
+
},
|
|
2930
|
+
],
|
|
2931
|
+
};
|
|
2932
|
+
}
|
|
2933
|
+
else {
|
|
2934
|
+
throw new Error(`Voice design failed, ${JSON.stringify(data)}`);
|
|
2935
|
+
}
|
|
2891
2936
|
}
|
|
2937
|
+
const data = await ai.pickVoice({ prompt });
|
|
2892
2938
|
return {
|
|
2893
2939
|
content: [
|
|
2894
2940
|
{
|
|
2895
2941
|
type: 'text',
|
|
2896
2942
|
text: JSON.stringify({
|
|
2897
2943
|
success: true,
|
|
2898
|
-
data
|
|
2899
|
-
totalCount: filteredVoices.length,
|
|
2900
|
-
voices: filteredVoices,
|
|
2901
|
-
filters: {
|
|
2902
|
-
scenes: scenes || null,
|
|
2903
|
-
emotions: emotions || null,
|
|
2904
|
-
languages: languages || null,
|
|
2905
|
-
gender: gender || null,
|
|
2906
|
-
},
|
|
2907
|
-
},
|
|
2944
|
+
...data,
|
|
2908
2945
|
timestamp: new Date().toISOString(),
|
|
2909
2946
|
}),
|
|
2910
2947
|
},
|
|
@@ -2912,52 +2949,7 @@ server.registerTool('search-voices', {
|
|
|
2912
2949
|
};
|
|
2913
2950
|
}
|
|
2914
2951
|
catch (error) {
|
|
2915
|
-
return createErrorResponse(error, '
|
|
2916
|
-
}
|
|
2917
|
-
});
|
|
2918
|
-
server.registerTool('voice-design', {
|
|
2919
|
-
title: 'Voice Design',
|
|
2920
|
-
description: 'Design a voice based on a prompt. The voice will be designed based on the prompt and preview text.',
|
|
2921
|
-
inputSchema: {
|
|
2922
|
-
prompt: zod_1.z.string().describe('The prompt to design the voice.'),
|
|
2923
|
-
previewText: zod_1.z.string().describe('The preview text to design the voice.'),
|
|
2924
|
-
saveToFileName: zod_1.z
|
|
2925
|
-
.string()
|
|
2926
|
-
.describe('The file name to save the designed voice.'),
|
|
2927
|
-
},
|
|
2928
|
-
}, async ({ prompt, previewText, saveToFileName }) => {
|
|
2929
|
-
try {
|
|
2930
|
-
const currentSession = await validateSession('voice-design');
|
|
2931
|
-
const data = await currentSession.ai.voiceDesign({
|
|
2932
|
-
prompt,
|
|
2933
|
-
previewText,
|
|
2934
|
-
});
|
|
2935
|
-
if (data.voice_id) {
|
|
2936
|
-
const trial_audio = data.trial_audio;
|
|
2937
|
-
let uri = '';
|
|
2938
|
-
if (trial_audio) {
|
|
2939
|
-
uri = await saveMaterial(currentSession, trial_audio, saveToFileName);
|
|
2940
|
-
}
|
|
2941
|
-
return {
|
|
2942
|
-
content: [
|
|
2943
|
-
{
|
|
2944
|
-
type: 'text',
|
|
2945
|
-
text: JSON.stringify({
|
|
2946
|
-
success: true,
|
|
2947
|
-
...data,
|
|
2948
|
-
uri,
|
|
2949
|
-
timestamp: new Date().toISOString(),
|
|
2950
|
-
}),
|
|
2951
|
-
},
|
|
2952
|
-
],
|
|
2953
|
-
};
|
|
2954
|
-
}
|
|
2955
|
-
else {
|
|
2956
|
-
throw new Error(`Voice design failed, ${JSON.stringify(data)}`);
|
|
2957
|
-
}
|
|
2958
|
-
}
|
|
2959
|
-
catch (error) {
|
|
2960
|
-
return createErrorResponse(error, 'voice-design');
|
|
2952
|
+
return createErrorResponse(error, 'pick-voice');
|
|
2961
2953
|
}
|
|
2962
2954
|
});
|
|
2963
2955
|
server.registerTool('media-analyzer', {
|
|
@@ -3173,206 +3165,6 @@ server.registerTool('media-analyzer', {
|
|
|
3173
3165
|
return createErrorResponse(error, 'media-analyzer');
|
|
3174
3166
|
}
|
|
3175
3167
|
});
|
|
3176
|
-
// server.registerTool(
|
|
3177
|
-
// 'image-aligner',
|
|
3178
|
-
// {
|
|
3179
|
-
// title: 'Image Aligner',
|
|
3180
|
-
// description:
|
|
3181
|
-
// 'Analyze image quality and alignment with prompt using AI Image Quality Inspector.',
|
|
3182
|
-
// inputSchema: {
|
|
3183
|
-
// imageFileName: z
|
|
3184
|
-
// .string()
|
|
3185
|
-
// .describe('The image file name in materials directory to analyze.'),
|
|
3186
|
-
// sceneIndex: z.number().min(1).describe('场景索引,从1开始的下标'),
|
|
3187
|
-
// storyBoardFile: z
|
|
3188
|
-
// .string()
|
|
3189
|
-
// .optional()
|
|
3190
|
-
// .default('storyboard.json')
|
|
3191
|
-
// .describe('故事板文件路径'),
|
|
3192
|
-
// imagePrompt: z
|
|
3193
|
-
// .string()
|
|
3194
|
-
// .optional()
|
|
3195
|
-
// .describe('可选的图片提示词,如果提供则覆盖storyboard中的提示词'),
|
|
3196
|
-
// customPrompt: z
|
|
3197
|
-
// .string()
|
|
3198
|
-
// .optional()
|
|
3199
|
-
// .describe('可选的额外用户要求,用于补充图片质量评估的特定需求'),
|
|
3200
|
-
// },
|
|
3201
|
-
// },
|
|
3202
|
-
// async ({
|
|
3203
|
-
// imageFileName,
|
|
3204
|
-
// sceneIndex,
|
|
3205
|
-
// storyBoardFile = 'storyboard.json',
|
|
3206
|
-
// imagePrompt,
|
|
3207
|
-
// customPrompt,
|
|
3208
|
-
// }) => {
|
|
3209
|
-
// try {
|
|
3210
|
-
// const currentSession = await validateSession('image-aligner');
|
|
3211
|
-
// // 验证图片文件
|
|
3212
|
-
// validateImageFile(imageFileName);
|
|
3213
|
-
// // 获取图片 URL
|
|
3214
|
-
// const imageUrl = getMaterialUri(currentSession, imageFileName);
|
|
3215
|
-
// // 确定要使用的提示词
|
|
3216
|
-
// let finalPrompt = imagePrompt;
|
|
3217
|
-
// // 如果没有提供imagePrompt,则从storyboard中获取
|
|
3218
|
-
// if (!imagePrompt) {
|
|
3219
|
-
// try {
|
|
3220
|
-
// const storyBoardPath = resolve(
|
|
3221
|
-
// process.env.ZEROCUT_PROJECT_CWD || process.cwd(),
|
|
3222
|
-
// projectLocalDir,
|
|
3223
|
-
// storyBoardFile
|
|
3224
|
-
// );
|
|
3225
|
-
// if (existsSync(storyBoardPath)) {
|
|
3226
|
-
// const storyBoardContent = await readFile(storyBoardPath, 'utf8');
|
|
3227
|
-
// const storyBoard = JSON.parse(storyBoardContent);
|
|
3228
|
-
// if (storyBoard.scenes && Array.isArray(storyBoard.scenes)) {
|
|
3229
|
-
// const scene = storyBoard.scenes[sceneIndex - 1]; // sceneIndex 从1开始,数组从0开始
|
|
3230
|
-
// if (scene) {
|
|
3231
|
-
// // 根据文件名判断优先级:若end_frame存在且imageFileName包含"_end"则优先取end_frame,否则取start_frame
|
|
3232
|
-
// if (scene.end_frame && imageFileName.includes('_end')) {
|
|
3233
|
-
// finalPrompt = scene.end_frame;
|
|
3234
|
-
// } else {
|
|
3235
|
-
// finalPrompt = scene.start_frame || scene.end_frame;
|
|
3236
|
-
// }
|
|
3237
|
-
// if (!finalPrompt) {
|
|
3238
|
-
// return createErrorResponse(
|
|
3239
|
-
// `场景 ${sceneIndex} 中未找到 start_frame 或 end_frame 提示词`,
|
|
3240
|
-
// 'image-aligner'
|
|
3241
|
-
// );
|
|
3242
|
-
// }
|
|
3243
|
-
// } else {
|
|
3244
|
-
// return createErrorResponse(
|
|
3245
|
-
// `在 ${storyBoardFile} 中未找到场景索引 ${sceneIndex}`,
|
|
3246
|
-
// 'image-aligner'
|
|
3247
|
-
// );
|
|
3248
|
-
// }
|
|
3249
|
-
// } else {
|
|
3250
|
-
// return createErrorResponse(
|
|
3251
|
-
// `${storyBoardFile} 文件格式不正确,缺少 scenes 数组`,
|
|
3252
|
-
// 'image-aligner'
|
|
3253
|
-
// );
|
|
3254
|
-
// }
|
|
3255
|
-
// } else {
|
|
3256
|
-
// return createErrorResponse(
|
|
3257
|
-
// `故事板文件不存在: ${storyBoardPath}`,
|
|
3258
|
-
// 'image-aligner'
|
|
3259
|
-
// );
|
|
3260
|
-
// }
|
|
3261
|
-
// } catch (error) {
|
|
3262
|
-
// return createErrorResponse(
|
|
3263
|
-
// `读取或解析故事板文件失败: ${error}`,
|
|
3264
|
-
// 'image-aligner'
|
|
3265
|
-
// );
|
|
3266
|
-
// }
|
|
3267
|
-
// }
|
|
3268
|
-
// // 如果仍然没有提示词,返回错误
|
|
3269
|
-
// if (!finalPrompt) {
|
|
3270
|
-
// return createErrorResponse(
|
|
3271
|
-
// '未提供 imagePrompt 且无法从故事板中获取提示词',
|
|
3272
|
-
// 'image-aligner'
|
|
3273
|
-
// );
|
|
3274
|
-
// }
|
|
3275
|
-
// // 读取图片质量检查指南
|
|
3276
|
-
// const alignerGuidelinePath = resolve(
|
|
3277
|
-
// __dirname,
|
|
3278
|
-
// './prompts/reasonings/image_aligner.md'
|
|
3279
|
-
// );
|
|
3280
|
-
// let alignerGuideline = '';
|
|
3281
|
-
// try {
|
|
3282
|
-
// alignerGuideline = await readFile(alignerGuidelinePath, 'utf8');
|
|
3283
|
-
// } catch (error) {
|
|
3284
|
-
// console.warn('无法读取图片质量检查指南:', error);
|
|
3285
|
-
// alignerGuideline =
|
|
3286
|
-
// '请对图片质量进行评估,包括构图、色彩、清晰度等方面。';
|
|
3287
|
-
// }
|
|
3288
|
-
// // 构建系统提示
|
|
3289
|
-
// const systemPrompt = `你是一个专业的AI图片质量检查员。请根据以下指南对图片进行评估:
|
|
3290
|
-
// ${alignerGuideline}
|
|
3291
|
-
// 请严格按照指南中的JSON格式返回评估结果。`;
|
|
3292
|
-
// // 构建用户提示
|
|
3293
|
-
// const userPrompt = `请对这张图片进行质量评估。
|
|
3294
|
-
// 原始提示词:${finalPrompt}${
|
|
3295
|
-
// customPrompt
|
|
3296
|
-
// ? `
|
|
3297
|
-
// 额外要求:${customPrompt}`
|
|
3298
|
-
// : ''
|
|
3299
|
-
// }
|
|
3300
|
-
// 请按照指南要求,返回包含评分、问题列表和优化建议的JSON格式结果。`;
|
|
3301
|
-
// // 调用AI模型进行图片质量评估
|
|
3302
|
-
// const ai = currentSession.ai;
|
|
3303
|
-
// const completion = await ai.getCompletions({
|
|
3304
|
-
// model: 'Doubao-Seed-1.6',
|
|
3305
|
-
// messages: [
|
|
3306
|
-
// {
|
|
3307
|
-
// role: 'system',
|
|
3308
|
-
// content: systemPrompt,
|
|
3309
|
-
// },
|
|
3310
|
-
// {
|
|
3311
|
-
// role: 'user',
|
|
3312
|
-
// content: [
|
|
3313
|
-
// {
|
|
3314
|
-
// type: 'image_url',
|
|
3315
|
-
// image_url: {
|
|
3316
|
-
// url: imageUrl,
|
|
3317
|
-
// },
|
|
3318
|
-
// },
|
|
3319
|
-
// {
|
|
3320
|
-
// type: 'text',
|
|
3321
|
-
// text: userPrompt,
|
|
3322
|
-
// },
|
|
3323
|
-
// ],
|
|
3324
|
-
// },
|
|
3325
|
-
// ],
|
|
3326
|
-
// });
|
|
3327
|
-
// const result = completion.choices[0]?.message?.content;
|
|
3328
|
-
// if (!result) {
|
|
3329
|
-
// throw new Error('No response from AI model');
|
|
3330
|
-
// }
|
|
3331
|
-
// // 解析AI响应
|
|
3332
|
-
// let alignmentResult;
|
|
3333
|
-
// try {
|
|
3334
|
-
// // 尝试从响应中提取JSON
|
|
3335
|
-
// const jsonMatch =
|
|
3336
|
-
// result.match(/```json\s*([\s\S]*?)\s*```/) ||
|
|
3337
|
-
// result.match(/\{[\s\S]*\}/);
|
|
3338
|
-
// if (jsonMatch) {
|
|
3339
|
-
// alignmentResult = JSON.parse(jsonMatch[1] || jsonMatch[0]);
|
|
3340
|
-
// } else {
|
|
3341
|
-
// // 如果没有找到JSON格式,尝试直接解析整个响应
|
|
3342
|
-
// alignmentResult = JSON.parse(result);
|
|
3343
|
-
// }
|
|
3344
|
-
// } catch (error) {
|
|
3345
|
-
// // 如果解析失败,返回原始响应
|
|
3346
|
-
// alignmentResult = {
|
|
3347
|
-
// error: 'JSON解析失败',
|
|
3348
|
-
// raw_response: result,
|
|
3349
|
-
// };
|
|
3350
|
-
// }
|
|
3351
|
-
// return {
|
|
3352
|
-
// content: [
|
|
3353
|
-
// {
|
|
3354
|
-
// type: 'text',
|
|
3355
|
-
// text: JSON.stringify({
|
|
3356
|
-
// success: true,
|
|
3357
|
-
// imageFileName,
|
|
3358
|
-
// sceneIndex,
|
|
3359
|
-
// storyBoardFile,
|
|
3360
|
-
// imagePrompt: finalPrompt,
|
|
3361
|
-
// customPrompt,
|
|
3362
|
-
// promptSource: imagePrompt ? 'manual_override' : 'storyboard',
|
|
3363
|
-
// analysis: alignmentResult,
|
|
3364
|
-
// imageUrl,
|
|
3365
|
-
// nextActionSuggest:
|
|
3366
|
-
// '可根据分析结果调整提示词,修改storyboard后,重新生成图片。',
|
|
3367
|
-
// }),
|
|
3368
|
-
// },
|
|
3369
|
-
// ],
|
|
3370
|
-
// };
|
|
3371
|
-
// } catch (error) {
|
|
3372
|
-
// return createErrorResponse(error, 'image-aligner');
|
|
3373
|
-
// }
|
|
3374
|
-
// }
|
|
3375
|
-
// );
|
|
3376
3168
|
server.registerTool('audio-video-sync', {
|
|
3377
3169
|
title: 'Audio Video Sync',
|
|
3378
3170
|
description: 'Generate audio-video-synced video by matching video with audio. 还可以对口型。',
|
|
@@ -3399,7 +3191,7 @@ server.registerTool('audio-video-sync', {
|
|
|
3399
3191
|
.describe('The reference photo face for lip sync.'),
|
|
3400
3192
|
saveToFileName: zod_1.z
|
|
3401
3193
|
.string()
|
|
3402
|
-
.describe('The filename to save the audio-video-synced video.'),
|
|
3194
|
+
.describe('The filename to save the audio-video-synced video. 应该是mp4文件'),
|
|
3403
3195
|
},
|
|
3404
3196
|
}, async ({ lipSync, lipSyncType, lipSyncPadAudio, videoFileName, audioFileName, audioInMs, refPhotoFileName, saveToFileName, }, context) => {
|
|
3405
3197
|
try {
|
|
@@ -3527,7 +3319,7 @@ server.registerTool('audio-video-sync', {
|
|
|
3527
3319
|
}
|
|
3528
3320
|
});
|
|
3529
3321
|
server.registerTool('generate-video-by-ref', {
|
|
3530
|
-
title: '
|
|
3322
|
+
title: '参考生视频(包含文生视频)工具',
|
|
3531
3323
|
description: 'Generate video using reference images. Supports sora2, sora2-pro (1 image max), veo3.1, veo3.1-pro (1 image max), lite and pro (4 images max), vidu (7 images max). Can work without reference images (0 images).',
|
|
3532
3324
|
inputSchema: {
|
|
3533
3325
|
prompt: zod_1.z
|
|
@@ -3577,6 +3369,7 @@ server.registerTool('generate-video-by-ref', {
|
|
|
3577
3369
|
'veo3.1',
|
|
3578
3370
|
'veo3.1-pro',
|
|
3579
3371
|
'vidu',
|
|
3372
|
+
'vidu-uc',
|
|
3580
3373
|
'pixv',
|
|
3581
3374
|
])
|
|
3582
3375
|
.default('lite')
|
|
@@ -3588,7 +3381,7 @@ server.registerTool('generate-video-by-ref', {
|
|
|
3588
3381
|
.describe('Whether to mute the video (effective for sora2 and veo3.1).'),
|
|
3589
3382
|
saveToFileName: zod_1.z
|
|
3590
3383
|
.string()
|
|
3591
|
-
.describe('The filename to save the generated video.'),
|
|
3384
|
+
.describe('The filename to save the generated video. 应该是mp4文件'),
|
|
3592
3385
|
sceneIndex: zod_1.z
|
|
3593
3386
|
.number()
|
|
3594
3387
|
.min(1)
|
|
@@ -3794,7 +3587,54 @@ server.registerTool('generate-video-by-ref', {
|
|
|
3794
3587
|
if (promptPrefix) {
|
|
3795
3588
|
promptPrefix += '\n';
|
|
3796
3589
|
}
|
|
3797
|
-
|
|
3590
|
+
let finalPrompt = `${promptPrefix}${prompt}`;
|
|
3591
|
+
if (type === 'pixv') {
|
|
3592
|
+
const completion = await ai.getCompletions({
|
|
3593
|
+
model: 'Doubao-Seed-1.6',
|
|
3594
|
+
messages: [
|
|
3595
|
+
{
|
|
3596
|
+
role: 'system',
|
|
3597
|
+
content: `你根据主体信息,优化用户指令,使描述中的内容正确引用主体名称。
|
|
3598
|
+
|
|
3599
|
+
具体方式为,将用户指令中引用主体信息中主体名称的部分,用 “@主体名” 的形式替代,注意它和前后内容之间也需要用**空格**分隔。
|
|
3600
|
+
|
|
3601
|
+
## 例子
|
|
3602
|
+
|
|
3603
|
+
### 输入:
|
|
3604
|
+
|
|
3605
|
+
主体信息
|
|
3606
|
+
[
|
|
3607
|
+
{"type": "subject", "fileName": "dog.png", "ref_name": "狗"},
|
|
3608
|
+
{"type": "background", "fileName": "room.png", "ref_name": "房间"}
|
|
3609
|
+
]
|
|
3610
|
+
|
|
3611
|
+
用户指令
|
|
3612
|
+
一只狗在房间里玩耍
|
|
3613
|
+
|
|
3614
|
+
### 输出:
|
|
3615
|
+
一只 @狗 在 @房间 里玩耍
|
|
3616
|
+
|
|
3617
|
+
---
|
|
3618
|
+
|
|
3619
|
+
## 要求与约束
|
|
3620
|
+
|
|
3621
|
+
只输出替换主体名后的用户指令,不要输出其他任何额外内容
|
|
3622
|
+
`,
|
|
3623
|
+
},
|
|
3624
|
+
{
|
|
3625
|
+
role: 'user',
|
|
3626
|
+
content: `## 主体信息
|
|
3627
|
+
|
|
3628
|
+
${JSON.stringify(referenceImages)}
|
|
3629
|
+
|
|
3630
|
+
## 用户指令
|
|
3631
|
+
|
|
3632
|
+
${prompt.trim()}`,
|
|
3633
|
+
},
|
|
3634
|
+
],
|
|
3635
|
+
});
|
|
3636
|
+
finalPrompt = completion.choices[0]?.message?.content.trim();
|
|
3637
|
+
}
|
|
3798
3638
|
// 调用 referencesToVideo 函数
|
|
3799
3639
|
const result = await currentSession.ai.referencesToVideo({
|
|
3800
3640
|
prompt: finalPrompt,
|
|
@@ -3823,7 +3663,7 @@ server.registerTool('generate-video-by-ref', {
|
|
|
3823
3663
|
type: 'text',
|
|
3824
3664
|
text: JSON.stringify({
|
|
3825
3665
|
success: true,
|
|
3826
|
-
message: '该视频生成任务正在运行中,它是异步任务,且执行时间较长,你应立即调用工具 wait-for-task-finish
|
|
3666
|
+
message: '该视频生成任务正在运行中,它是异步任务,且执行时间较长,你应立即调用工具 wait-for-task-finish 来等待任务结束,如如 wait-for-task-finish 工具调用超时,你应立即再次重新调用直到任务结束。',
|
|
3827
3667
|
taskUrl: result.taskUrl,
|
|
3828
3668
|
}),
|
|
3829
3669
|
},
|
|
@@ -3898,7 +3738,7 @@ server.registerTool('extend-video-duration', {
|
|
|
3898
3738
|
.describe('Optional end frame image file name in materials directory to guide the video extension.'),
|
|
3899
3739
|
saveToFileName: zod_1.z
|
|
3900
3740
|
.string()
|
|
3901
|
-
.describe('The filename to save the extended video.'),
|
|
3741
|
+
.describe('The filename to save the extended video. 应该是mp4文件'),
|
|
3902
3742
|
},
|
|
3903
3743
|
}, async ({ videoFileName, duration, resolution, prompt, type = 'turbo', endFrame, saveToFileName, }, context) => {
|
|
3904
3744
|
try {
|
|
@@ -3994,11 +3834,11 @@ server.registerTool('use-template', {
|
|
|
3994
3834
|
.describe('Optional materials to use in the template.'),
|
|
3995
3835
|
saveToFileName: zod_1.z
|
|
3996
3836
|
.string()
|
|
3997
|
-
.describe('The filename to save the generated material.'),
|
|
3837
|
+
.describe('The filename to save the generated material. 根据用户具体需求,应该是mp4或png文件'),
|
|
3998
3838
|
},
|
|
3999
3839
|
}, async ({ user_request, saveToFileName, materials }) => {
|
|
4000
3840
|
try {
|
|
4001
|
-
const currentSession = await validateSession('
|
|
3841
|
+
const currentSession = await validateSession('use-template');
|
|
4002
3842
|
const ai = currentSession.ai;
|
|
4003
3843
|
const data = await ai.listTemplates('all');
|
|
4004
3844
|
const templates = data.map(item => ({
|
|
@@ -4013,7 +3853,7 @@ server.registerTool('use-template', {
|
|
|
4013
3853
|
messages: [
|
|
4014
3854
|
{
|
|
4015
3855
|
role: 'system',
|
|
4016
|
-
content:
|
|
3856
|
+
content: `你根据用户需求,分析需求与模板描述(description)和触发器(trigger)的匹配程度,从以下模板中选择一个匹配的模板,返回模板ID:\n\n${JSON.stringify(templates)}\n\n**约束**:只输出模板ID,不需要其他解释,如果没有匹配的模版,输出"无匹配模版"`,
|
|
4017
3857
|
},
|
|
4018
3858
|
{
|
|
4019
3859
|
role: 'user',
|
|
@@ -4256,6 +4096,69 @@ server.registerTool('search-context', {
|
|
|
4256
4096
|
return createErrorResponse(error, 'search-context');
|
|
4257
4097
|
}
|
|
4258
4098
|
});
|
|
4099
|
+
// 列出项目下的所有文件
|
|
4100
|
+
server.registerTool('list-project-files', {
|
|
4101
|
+
title: 'List Project Files',
|
|
4102
|
+
description: 'List all files in the materials directory.',
|
|
4103
|
+
inputSchema: {},
|
|
4104
|
+
}, async () => {
|
|
4105
|
+
try {
|
|
4106
|
+
// 验证session状态
|
|
4107
|
+
const currentSession = await validateSession('list-project-files');
|
|
4108
|
+
console.log('Listing project files...');
|
|
4109
|
+
const terminal = currentSession.terminal;
|
|
4110
|
+
if (!terminal) {
|
|
4111
|
+
throw new Error('Terminal not available in current session');
|
|
4112
|
+
}
|
|
4113
|
+
let cwd;
|
|
4114
|
+
try {
|
|
4115
|
+
cwd = await terminal.getCwd();
|
|
4116
|
+
}
|
|
4117
|
+
catch (cwdError) {
|
|
4118
|
+
console.error('Failed to get current working directory:', cwdError);
|
|
4119
|
+
throw new Error('Failed to get current working directory');
|
|
4120
|
+
}
|
|
4121
|
+
console.log(`Current working directory: ${cwd}`);
|
|
4122
|
+
// 安全地列出各目录文件,失败时返回空数组
|
|
4123
|
+
const listFilesWithFallback = async (path, dirName) => {
|
|
4124
|
+
try {
|
|
4125
|
+
const files = await currentSession.files.listFiles(path);
|
|
4126
|
+
console.log(`Found ${files?.length || 0} files in ${dirName}`);
|
|
4127
|
+
return files || [];
|
|
4128
|
+
}
|
|
4129
|
+
catch (error) {
|
|
4130
|
+
console.warn(`Failed to list files in ${dirName} (${path}):`, error);
|
|
4131
|
+
return [];
|
|
4132
|
+
}
|
|
4133
|
+
};
|
|
4134
|
+
const [rootFiles, materialsFiles, outputFiles] = await Promise.all([
|
|
4135
|
+
listFilesWithFallback(cwd, 'root'),
|
|
4136
|
+
listFilesWithFallback(`${cwd}/materials`, 'materials'),
|
|
4137
|
+
listFilesWithFallback(`${cwd}/output`, 'output'),
|
|
4138
|
+
]);
|
|
4139
|
+
const result = {
|
|
4140
|
+
success: true,
|
|
4141
|
+
cwd,
|
|
4142
|
+
root: rootFiles,
|
|
4143
|
+
materials: materialsFiles,
|
|
4144
|
+
output: outputFiles,
|
|
4145
|
+
totalFiles: rootFiles.length + materialsFiles.length + outputFiles.length,
|
|
4146
|
+
timestamp: new Date().toISOString(),
|
|
4147
|
+
};
|
|
4148
|
+
console.log(`Total files found: ${result.totalFiles}`);
|
|
4149
|
+
return {
|
|
4150
|
+
content: [
|
|
4151
|
+
{
|
|
4152
|
+
type: 'text',
|
|
4153
|
+
text: JSON.stringify(result),
|
|
4154
|
+
},
|
|
4155
|
+
],
|
|
4156
|
+
};
|
|
4157
|
+
}
|
|
4158
|
+
catch (error) {
|
|
4159
|
+
return createErrorResponse(error, 'list-project-files');
|
|
4160
|
+
}
|
|
4161
|
+
});
|
|
4259
4162
|
server.registerTool('build-capcat-draft', {
|
|
4260
4163
|
title: 'Build CapCut Draft',
|
|
4261
4164
|
description: 'Read draft_content.json file, parse JSON and generate URIs for all assets in timeline tracks, then output the processed JSON string.',
|