cerevox 3.0.0-beta.9 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/ai.d.ts +25 -17
- package/dist/core/ai.d.ts.map +1 -1
- package/dist/core/ai.js +372 -151
- package/dist/core/ai.js.map +1 -1
- package/dist/mcp/servers/helper/doubao_voices_full.js +1 -1
- package/dist/mcp/servers/prompts/rules/creative-ad.md +2 -2
- package/dist/mcp/servers/prompts/rules/expert.md +1 -1
- package/dist/mcp/servers/prompts/rules/freeform.md +2 -2
- package/dist/mcp/servers/prompts/rules/general-video.md +10 -5
- package/dist/mcp/servers/prompts/rules/story-telling.md +3 -3
- package/dist/mcp/servers/prompts/skills/workflows/general-video.md +2 -2
- package/dist/mcp/servers/prompts/skills/workflows/music-video.md +1 -1
- package/dist/mcp/servers/zerocut.d.ts.map +1 -1
- package/dist/mcp/servers/zerocut.js +361 -523
- package/dist/mcp/servers/zerocut.js.map +1 -1
- package/dist/utils/coze.d.ts.map +1 -1
- package/dist/utils/coze.js +4 -1
- package/dist/utils/coze.js.map +1 -1
- package/package.json +1 -1
|
@@ -50,7 +50,6 @@ const constants_1 = require("../../utils/constants");
|
|
|
50
50
|
const videokit_1 = require("../../utils/videokit");
|
|
51
51
|
const promises_1 = require("node:fs/promises");
|
|
52
52
|
const node_path_1 = __importStar(require("node:path"));
|
|
53
|
-
const doubao_voices_full_1 = require("./helper/doubao_voices_full");
|
|
54
53
|
const node_fs_1 = require("node:fs");
|
|
55
54
|
const coze_1 = require("../../utils/coze");
|
|
56
55
|
const mp3_duration_1 = __importDefault(require("mp3-duration"));
|
|
@@ -94,10 +93,13 @@ function validateFileName(fileName) {
|
|
|
94
93
|
if (!fileName || fileName.trim() === '') {
|
|
95
94
|
throw new Error('File name cannot be empty');
|
|
96
95
|
}
|
|
96
|
+
if (fileName.startsWith('materials/')) {
|
|
97
|
+
fileName = fileName.replace('materials/', '');
|
|
98
|
+
}
|
|
97
99
|
if (fileName.includes('..') ||
|
|
98
100
|
fileName.includes('/') ||
|
|
99
101
|
fileName.includes('\\')) {
|
|
100
|
-
throw new Error('Invalid file name: contains illegal characters');
|
|
102
|
+
throw new Error('Invalid file name: contains illegal characters, cannot contain ".." or "/" or "\\"');
|
|
101
103
|
}
|
|
102
104
|
return fileName.trim();
|
|
103
105
|
}
|
|
@@ -130,6 +132,7 @@ async function initProject(session) {
|
|
|
130
132
|
return workDir;
|
|
131
133
|
}
|
|
132
134
|
async function saveMaterial(session, url, saveToFileName) {
|
|
135
|
+
saveToFileName = validateFileName(saveToFileName);
|
|
133
136
|
const terminal = session.terminal;
|
|
134
137
|
const saveToPath = `/home/user/cerevox-zerocut/projects/${terminal.id}/materials/${saveToFileName}`;
|
|
135
138
|
const saveLocalPath = (0, node_path_1.resolve)(projectLocalDir, 'materials', saveToFileName);
|
|
@@ -514,7 +517,7 @@ server.registerTool('project-close', {
|
|
|
514
517
|
.min(0)
|
|
515
518
|
.max(20)
|
|
516
519
|
.default(5)
|
|
517
|
-
.describe('Close the session after the specified number of minutes. Default is 5 minutes.
|
|
520
|
+
.describe('Close the session after the specified number of minutes. Default is 5 minutes. 除非用户要求立即关闭会话,将该参数设置为0,否则应默认设为5'),
|
|
518
521
|
},
|
|
519
522
|
}, async ({ inMinutes }) => {
|
|
520
523
|
try {
|
|
@@ -663,69 +666,6 @@ server.registerTool('retrieve-rules-context', {
|
|
|
663
666
|
return createErrorResponse(`Failed to load rules context prompt for ${prompt}: ${error}`, 'retrieve-rules-context');
|
|
664
667
|
}
|
|
665
668
|
});
|
|
666
|
-
// 列出项目下的所有文件
|
|
667
|
-
server.registerTool('list-project-files', {
|
|
668
|
-
title: 'List Project Files',
|
|
669
|
-
description: 'List all files in the materials directory.',
|
|
670
|
-
inputSchema: {},
|
|
671
|
-
}, async () => {
|
|
672
|
-
try {
|
|
673
|
-
// 验证session状态
|
|
674
|
-
const currentSession = await validateSession('list-project-files');
|
|
675
|
-
console.log('Listing project files...');
|
|
676
|
-
const terminal = currentSession.terminal;
|
|
677
|
-
if (!terminal) {
|
|
678
|
-
throw new Error('Terminal not available in current session');
|
|
679
|
-
}
|
|
680
|
-
let cwd;
|
|
681
|
-
try {
|
|
682
|
-
cwd = await terminal.getCwd();
|
|
683
|
-
}
|
|
684
|
-
catch (cwdError) {
|
|
685
|
-
console.error('Failed to get current working directory:', cwdError);
|
|
686
|
-
throw new Error('Failed to get current working directory');
|
|
687
|
-
}
|
|
688
|
-
console.log(`Current working directory: ${cwd}`);
|
|
689
|
-
// 安全地列出各目录文件,失败时返回空数组
|
|
690
|
-
const listFilesWithFallback = async (path, dirName) => {
|
|
691
|
-
try {
|
|
692
|
-
const files = await currentSession.files.listFiles(path);
|
|
693
|
-
console.log(`Found ${files?.length || 0} files in ${dirName}`);
|
|
694
|
-
return files || [];
|
|
695
|
-
}
|
|
696
|
-
catch (error) {
|
|
697
|
-
console.warn(`Failed to list files in ${dirName} (${path}):`, error);
|
|
698
|
-
return [];
|
|
699
|
-
}
|
|
700
|
-
};
|
|
701
|
-
const [rootFiles, materialsFiles, outputFiles] = await Promise.all([
|
|
702
|
-
listFilesWithFallback(cwd, 'root'),
|
|
703
|
-
listFilesWithFallback(`${cwd}/materials`, 'materials'),
|
|
704
|
-
listFilesWithFallback(`${cwd}/output`, 'output'),
|
|
705
|
-
]);
|
|
706
|
-
const result = {
|
|
707
|
-
success: true,
|
|
708
|
-
cwd,
|
|
709
|
-
root: rootFiles,
|
|
710
|
-
materials: materialsFiles,
|
|
711
|
-
output: outputFiles,
|
|
712
|
-
totalFiles: rootFiles.length + materialsFiles.length + outputFiles.length,
|
|
713
|
-
timestamp: new Date().toISOString(),
|
|
714
|
-
};
|
|
715
|
-
console.log(`Total files found: ${result.totalFiles}`);
|
|
716
|
-
return {
|
|
717
|
-
content: [
|
|
718
|
-
{
|
|
719
|
-
type: 'text',
|
|
720
|
-
text: JSON.stringify(result),
|
|
721
|
-
},
|
|
722
|
-
],
|
|
723
|
-
};
|
|
724
|
-
}
|
|
725
|
-
catch (error) {
|
|
726
|
-
return createErrorResponse(error, 'list-project-files');
|
|
727
|
-
}
|
|
728
|
-
});
|
|
729
669
|
server.registerTool('generate-character-image', {
|
|
730
670
|
title: 'Generate Character Image',
|
|
731
671
|
description: 'Generate a turnaround image or portrait for any character.',
|
|
@@ -932,49 +872,6 @@ ${roleDescriptionPrompt}
|
|
|
932
872
|
return createErrorResponse(error, 'generate-character-image');
|
|
933
873
|
}
|
|
934
874
|
});
|
|
935
|
-
server.registerTool('generate-line-sketch', {
|
|
936
|
-
title: 'Generate Line Sketch',
|
|
937
|
-
description: 'Generate line sketch material based on user prompt.',
|
|
938
|
-
inputSchema: {
|
|
939
|
-
prompt: zod_1.z.string().describe('The prompt to generate line sketch.'),
|
|
940
|
-
saveToFileName: zod_1.z
|
|
941
|
-
.string()
|
|
942
|
-
.describe('The filename to save the generated line sketch. 应该是png文件'),
|
|
943
|
-
},
|
|
944
|
-
}, async ({ prompt, saveToFileName }) => {
|
|
945
|
-
try {
|
|
946
|
-
// 验证session状态
|
|
947
|
-
await validateSession('generate-line-sketch');
|
|
948
|
-
// 验证文件名
|
|
949
|
-
validateFileName(saveToFileName);
|
|
950
|
-
// 调用AI生成线稿
|
|
951
|
-
const res = await session.ai.generateLineSketch({ prompt });
|
|
952
|
-
if (res && res.url) {
|
|
953
|
-
// 保存到本地
|
|
954
|
-
await saveMaterial(session, res.url, saveToFileName);
|
|
955
|
-
const result = {
|
|
956
|
-
success: true,
|
|
957
|
-
url: res.url,
|
|
958
|
-
localPath: getMaterialUri(session, saveToFileName),
|
|
959
|
-
timestamp: new Date().toISOString(),
|
|
960
|
-
};
|
|
961
|
-
return {
|
|
962
|
-
content: [
|
|
963
|
-
{
|
|
964
|
-
type: 'text',
|
|
965
|
-
text: JSON.stringify(result),
|
|
966
|
-
},
|
|
967
|
-
],
|
|
968
|
-
};
|
|
969
|
-
}
|
|
970
|
-
else {
|
|
971
|
-
throw new Error('No URL returned from AI service');
|
|
972
|
-
}
|
|
973
|
-
}
|
|
974
|
-
catch (error) {
|
|
975
|
-
return createErrorResponse(error, 'generate-line-sketch');
|
|
976
|
-
}
|
|
977
|
-
});
|
|
978
875
|
server.registerTool('upload-custom-material', {
|
|
979
876
|
title: 'Upload Custom Material',
|
|
980
877
|
description: 'Upload material files (images: jpeg/png, videos: mp4, audio: mp3) from the local filesystem to the materials directory. For video and audio files, duration information will be included in the response.',
|
|
@@ -1063,15 +960,21 @@ server.registerTool('upload-custom-material', {
|
|
|
1063
960
|
});
|
|
1064
961
|
server.registerTool('generate-image', {
|
|
1065
962
|
title: 'Generate Image',
|
|
1066
|
-
description:
|
|
963
|
+
description: `生成图片,支持批量生成1-9张图,若用户要求生成关联图片或组图,请一次生成,不要分几次生成`,
|
|
1067
964
|
inputSchema: {
|
|
1068
965
|
type: zod_1.z
|
|
1069
|
-
.enum([
|
|
966
|
+
.enum([
|
|
967
|
+
'banana',
|
|
968
|
+
'banana-pro',
|
|
969
|
+
'seedream',
|
|
970
|
+
'seedream-pro',
|
|
971
|
+
'line-sketch',
|
|
972
|
+
])
|
|
1070
973
|
.optional()
|
|
1071
974
|
.default('seedream'),
|
|
1072
975
|
prompt: zod_1.z
|
|
1073
976
|
.string()
|
|
1074
|
-
.describe('The prompt to generate. 一般要严格对应 storyboard 中当前场景的 start_frame 或 end_frame
|
|
977
|
+
.describe('The prompt to generate. 一般要严格对应 storyboard 中当前场景的 start_frame 或 end_frame 中的字段描述,如果是生成线稿,则 type 使用 line-sketch'),
|
|
1075
978
|
sceneIndex: zod_1.z
|
|
1076
979
|
.number()
|
|
1077
980
|
.min(1)
|
|
@@ -1126,9 +1029,16 @@ server.registerTool('generate-image', {
|
|
|
1126
1029
|
])
|
|
1127
1030
|
.default('720x1280')
|
|
1128
1031
|
.describe('The size of the image.'),
|
|
1129
|
-
|
|
1130
|
-
.
|
|
1131
|
-
.
|
|
1032
|
+
imageCount: zod_1.z
|
|
1033
|
+
.number()
|
|
1034
|
+
.min(1)
|
|
1035
|
+
.max(9)
|
|
1036
|
+
.optional()
|
|
1037
|
+
.default(1)
|
|
1038
|
+
.describe('The number of images to generate. 最多支持9张图,多了容易超时'),
|
|
1039
|
+
saveToFileNames: zod_1.z
|
|
1040
|
+
.array(zod_1.z.string())
|
|
1041
|
+
.describe('The filenames to save. 数量要和imageCount对应,应该是png文件'),
|
|
1132
1042
|
watermark: zod_1.z
|
|
1133
1043
|
.boolean()
|
|
1134
1044
|
.optional()
|
|
@@ -1162,7 +1072,7 @@ server.registerTool('generate-image', {
|
|
|
1162
1072
|
\`\`\`
|
|
1163
1073
|
`),
|
|
1164
1074
|
},
|
|
1165
|
-
}, async ({ type = 'seedream', prompt, sceneIndex, storyBoardFile = 'storyboard.json', skipConsistencyCheck = false, size = '720x1280',
|
|
1075
|
+
}, async ({ type = 'seedream', prompt, sceneIndex, storyBoardFile = 'storyboard.json', skipConsistencyCheck = false, size = '720x1280', imageCount = 1, saveToFileNames, watermark, referenceImages, }, context) => {
|
|
1166
1076
|
try {
|
|
1167
1077
|
// 验证session状态
|
|
1168
1078
|
const currentSession = await validateSession('generate-image');
|
|
@@ -1172,7 +1082,6 @@ server.registerTool('generate-image', {
|
|
|
1172
1082
|
checkStoryboardFlag = true;
|
|
1173
1083
|
return createErrorResponse('必须先审查生成的 storyboard.json 内容,确保每个场景中的stage_atmosphere内容按照规则被正确融合到start_frame和video_prompt中,不得遗漏,检查完成后先汇报,如果有问题,应当先修改 storyboard.json 内容,然后再调用 generate-image 生成图片。注意修改 storyboard 内容时,仅修改相应字段的字符串值,不要破坏JSON格式!', 'generate-image');
|
|
1174
1084
|
}
|
|
1175
|
-
const validatedFileName = validateFileName(saveToFileName);
|
|
1176
1085
|
// 校验 prompt 与 storyboard.json 中场景设定的一致性
|
|
1177
1086
|
if (sceneIndex && !skipConsistencyCheck) {
|
|
1178
1087
|
try {
|
|
@@ -1321,6 +1230,9 @@ ${referenceImages?.map((ref, index) => `图${index + 1}:${ref.image}`).join('\
|
|
|
1321
1230
|
catch (error) {
|
|
1322
1231
|
console.error('Failed to optimize prompt:', error);
|
|
1323
1232
|
}
|
|
1233
|
+
if (imageCount > 1) {
|
|
1234
|
+
processedPrompt = `请生成${imageCount}张相关图片(每张都是大图,非拼接) ${processedPrompt}`;
|
|
1235
|
+
}
|
|
1324
1236
|
console.log(`Generating image with prompt: ${processedPrompt.substring(0, 100)}...`);
|
|
1325
1237
|
// 处理参考图片
|
|
1326
1238
|
let imageBase64Array;
|
|
@@ -1392,23 +1304,55 @@ ${processedPrompt}`.trim();
|
|
|
1392
1304
|
}
|
|
1393
1305
|
}
|
|
1394
1306
|
const ai = currentSession.ai;
|
|
1395
|
-
const
|
|
1307
|
+
const { taskUrl } = await ai.generateImage({
|
|
1396
1308
|
type,
|
|
1397
1309
|
prompt: processedPrompt,
|
|
1398
1310
|
size,
|
|
1399
1311
|
watermark,
|
|
1400
1312
|
image: imageBase64Array,
|
|
1313
|
+
async: true,
|
|
1314
|
+
});
|
|
1315
|
+
let progress = 0;
|
|
1316
|
+
const res = await ai.waitForTaskComplete({
|
|
1317
|
+
taskUrl,
|
|
1318
|
+
onProgress: async (metaData) => {
|
|
1319
|
+
try {
|
|
1320
|
+
await sendProgress(context, ++progress, undefined, JSON.stringify(metaData));
|
|
1321
|
+
}
|
|
1322
|
+
catch (progressError) {
|
|
1323
|
+
console.warn('Failed to send progress update:', progressError);
|
|
1324
|
+
}
|
|
1325
|
+
},
|
|
1401
1326
|
});
|
|
1402
1327
|
if (!res) {
|
|
1403
1328
|
throw new Error('Failed to generate image: no response from AI service');
|
|
1404
1329
|
}
|
|
1405
|
-
if (res.
|
|
1330
|
+
if (res.urls && res.urls.length > 0) {
|
|
1406
1331
|
console.log('Image generated successfully, saving to materials...');
|
|
1407
|
-
|
|
1332
|
+
let uris = [];
|
|
1333
|
+
if (res.urls.length === 1 && res.urls[0]) {
|
|
1334
|
+
uris = [
|
|
1335
|
+
await saveMaterial(currentSession, res.urls[0], validateFileName(saveToFileNames[0])),
|
|
1336
|
+
];
|
|
1337
|
+
}
|
|
1338
|
+
else {
|
|
1339
|
+
// 多图场景
|
|
1340
|
+
uris = await Promise.all(res.urls.map((url, i) => {
|
|
1341
|
+
if (!url)
|
|
1342
|
+
return '';
|
|
1343
|
+
try {
|
|
1344
|
+
const fileName = validateFileName(saveToFileNames[i]);
|
|
1345
|
+
return saveMaterial(currentSession, url, fileName);
|
|
1346
|
+
}
|
|
1347
|
+
catch (ex) {
|
|
1348
|
+
return '';
|
|
1349
|
+
}
|
|
1350
|
+
}));
|
|
1351
|
+
}
|
|
1408
1352
|
const result = {
|
|
1409
1353
|
success: true,
|
|
1410
1354
|
// source: res.url,
|
|
1411
|
-
|
|
1355
|
+
uris,
|
|
1412
1356
|
prompt: processedPrompt,
|
|
1413
1357
|
size,
|
|
1414
1358
|
timestamp: new Date().toISOString(),
|
|
@@ -1579,7 +1523,7 @@ server.registerTool('edit-image', {
|
|
|
1579
1523
|
let lastEffect = '';
|
|
1580
1524
|
server.registerTool('generate-video', {
|
|
1581
1525
|
title: 'Generate Video',
|
|
1582
|
-
description:
|
|
1526
|
+
description: `图生视频和首尾帧生视频工具`,
|
|
1583
1527
|
inputSchema: {
|
|
1584
1528
|
prompt: zod_1.z
|
|
1585
1529
|
.string()
|
|
@@ -1611,6 +1555,8 @@ server.registerTool('generate-video', {
|
|
|
1611
1555
|
'hailuo-fast',
|
|
1612
1556
|
'vidu',
|
|
1613
1557
|
'vidu-pro',
|
|
1558
|
+
'vidu-uc',
|
|
1559
|
+
'vidu-uc-pro',
|
|
1614
1560
|
'kling',
|
|
1615
1561
|
'kling-pro',
|
|
1616
1562
|
'pixv',
|
|
@@ -2029,7 +1975,7 @@ server.registerTool('generate-video', {
|
|
|
2029
1975
|
console.warn('Failed to send progress update:', progressError);
|
|
2030
1976
|
}
|
|
2031
1977
|
},
|
|
2032
|
-
waitForFinish:
|
|
1978
|
+
waitForFinish: type !== 'zero',
|
|
2033
1979
|
});
|
|
2034
1980
|
if (!res) {
|
|
2035
1981
|
throw new Error('Failed to generate video: no response from AI service');
|
|
@@ -2077,7 +2023,7 @@ server.registerTool('generate-video', {
|
|
|
2077
2023
|
type: 'text',
|
|
2078
2024
|
text: JSON.stringify({
|
|
2079
2025
|
success: true,
|
|
2080
|
-
message: '该视频生成任务正在运行中,它是异步任务,且执行时间较长,你应立即调用工具 wait-for-task-finish
|
|
2026
|
+
message: '该视频生成任务正在运行中,它是异步任务,且执行时间较长,你应立即调用工具 wait-for-task-finish 来等待任务结束,如 wait-for-task-finish 工具调用超时,你应立即再次重新调用直到任务结束。',
|
|
2081
2027
|
taskUrl: res.taskUrl,
|
|
2082
2028
|
}),
|
|
2083
2029
|
},
|
|
@@ -2107,7 +2053,7 @@ server.registerTool('generate-video', {
|
|
|
2107
2053
|
}
|
|
2108
2054
|
});
|
|
2109
2055
|
server.registerTool('wait-for-task-finish', {
|
|
2110
|
-
title: 'Wait Workflow or VideoTask Done',
|
|
2056
|
+
title: 'Wait Workflow or VideoTask Done;只有正在运行Coze工作流或者有异步生成视频任务时才需要执行这个工具',
|
|
2111
2057
|
description: 'Wait for a workflow to complete.',
|
|
2112
2058
|
inputSchema: {
|
|
2113
2059
|
taskUrl: zod_1.z
|
|
@@ -2122,7 +2068,7 @@ server.registerTool('wait-for-task-finish', {
|
|
|
2122
2068
|
const currentSession = await validateSession('wait-for-task-finish');
|
|
2123
2069
|
const ai = currentSession.ai;
|
|
2124
2070
|
let progress = 0;
|
|
2125
|
-
const res = await ai.
|
|
2071
|
+
const res = await ai.waitForTaskComplete({
|
|
2126
2072
|
taskUrl,
|
|
2127
2073
|
onProgress: async (metaData) => {
|
|
2128
2074
|
try {
|
|
@@ -2237,14 +2183,32 @@ server.registerTool('generate-sound-effect', {
|
|
|
2237
2183
|
return createErrorResponse(error, 'generate-sound-effect');
|
|
2238
2184
|
}
|
|
2239
2185
|
});
|
|
2240
|
-
server.registerTool('generate-music', {
|
|
2241
|
-
title: '
|
|
2242
|
-
description: '
|
|
2186
|
+
server.registerTool('generate-music-or-mv', {
|
|
2187
|
+
title: '创作音乐(Music)或音乐视频(Music Video)',
|
|
2188
|
+
description: '生成音乐,包括MV(music video)、BGM 或 歌曲',
|
|
2243
2189
|
inputSchema: {
|
|
2244
2190
|
prompt: zod_1.z.string().describe('The prompt to generate.'),
|
|
2191
|
+
singerPhoto: zod_1.z
|
|
2192
|
+
.string()
|
|
2193
|
+
.optional()
|
|
2194
|
+
.describe('The singer photo to use. 只有type为music_video的时候才生效,也可以不传,模型会自动生成'),
|
|
2195
|
+
mvOrientation: zod_1.z
|
|
2196
|
+
.enum(['portrait', 'landscape'])
|
|
2197
|
+
.optional()
|
|
2198
|
+
.describe('The orientation of the music video. Defaults to portrait.')
|
|
2199
|
+
.default('portrait'),
|
|
2200
|
+
mvOriginalSong: zod_1.z
|
|
2201
|
+
.string()
|
|
2202
|
+
.optional()
|
|
2203
|
+
.describe('用于生成mv的音乐. 只有type为music_video的时候才生效,也可以不传,模型会自动创作'),
|
|
2204
|
+
mvGenSubtitles: zod_1.z
|
|
2205
|
+
.boolean()
|
|
2206
|
+
.optional()
|
|
2207
|
+
.default(false)
|
|
2208
|
+
.describe('是否生成mv的字幕. 默认为false,只有type为music_video的时候才生效'),
|
|
2245
2209
|
type: zod_1.z
|
|
2246
|
-
.enum(['bgm', 'song'])
|
|
2247
|
-
.describe('The type of music. Defaults to
|
|
2210
|
+
.enum(['bgm', 'song', 'music_video'])
|
|
2211
|
+
.describe('The type of music. Defaults to BGM. ⚠️ 如果 type 是 music_video,会直接生成音频和视频,**不需要**额外专门生成歌曲')
|
|
2248
2212
|
.default('bgm'),
|
|
2249
2213
|
model: zod_1.z
|
|
2250
2214
|
.enum(['doubao', 'minimax'])
|
|
@@ -2262,9 +2226,9 @@ server.registerTool('generate-music', {
|
|
|
2262
2226
|
.describe('Whether to skip copyright check.'),
|
|
2263
2227
|
saveToFileName: zod_1.z
|
|
2264
2228
|
.string()
|
|
2265
|
-
.describe('The filename to save.
|
|
2229
|
+
.describe('The filename to save. 如果type是music video,应该是mp4文件,否则应该是mp3文件'),
|
|
2266
2230
|
},
|
|
2267
|
-
}, async ({ prompt, type, model, duration, skipCopyCheck, saveToFileName }, context) => {
|
|
2231
|
+
}, async ({ prompt, singerPhoto, mvOrientation, mvOriginalSong, mvGenSubtitles, type, model, duration, skipCopyCheck, saveToFileName, }, context) => {
|
|
2268
2232
|
try {
|
|
2269
2233
|
// 验证session状态
|
|
2270
2234
|
const currentSession = await validateSession('generate-music');
|
|
@@ -2275,19 +2239,49 @@ server.registerTool('generate-music', {
|
|
|
2275
2239
|
if (type === 'bgm' && duration > 120) {
|
|
2276
2240
|
throw new Error('BGM duration must be at most 120 seconds.');
|
|
2277
2241
|
}
|
|
2278
|
-
|
|
2279
|
-
|
|
2280
|
-
|
|
2281
|
-
|
|
2282
|
-
|
|
2283
|
-
|
|
2284
|
-
|
|
2285
|
-
|
|
2286
|
-
|
|
2287
|
-
|
|
2288
|
-
|
|
2289
|
-
|
|
2290
|
-
|
|
2242
|
+
let res;
|
|
2243
|
+
if (type === 'music_video') {
|
|
2244
|
+
const singer_photo = singerPhoto
|
|
2245
|
+
? await getMaterialUri(currentSession, singerPhoto)
|
|
2246
|
+
: undefined;
|
|
2247
|
+
const original_song = mvOriginalSong
|
|
2248
|
+
? await getMaterialUri(currentSession, mvOriginalSong)
|
|
2249
|
+
: undefined;
|
|
2250
|
+
res = await ai.generateZeroCutMusicVideo({
|
|
2251
|
+
// prompt: `${prompt.trim()} 音乐时长${duration}秒`,
|
|
2252
|
+
prompt,
|
|
2253
|
+
singerPhoto: singer_photo,
|
|
2254
|
+
orientation: mvOrientation,
|
|
2255
|
+
genSubtitles: mvGenSubtitles,
|
|
2256
|
+
originalSong: original_song,
|
|
2257
|
+
duration,
|
|
2258
|
+
resolution: '720p',
|
|
2259
|
+
onProgress: async (metaData) => {
|
|
2260
|
+
try {
|
|
2261
|
+
await sendProgress(context, metaData.Result?.Progress ?? ++progress, metaData.Result?.Progress ? 100 : undefined, JSON.stringify(metaData));
|
|
2262
|
+
}
|
|
2263
|
+
catch (progressError) {
|
|
2264
|
+
console.warn('Failed to send progress update:', progressError);
|
|
2265
|
+
}
|
|
2266
|
+
},
|
|
2267
|
+
waitForFinish: false,
|
|
2268
|
+
});
|
|
2269
|
+
}
|
|
2270
|
+
else {
|
|
2271
|
+
const finalPrompt = `${prompt.trim()} ${type === 'bgm' ? `纯音乐无歌词,时长${duration}秒` : `时长${duration}秒,使用${model}模型`}`;
|
|
2272
|
+
res = await ai.generateMusic({
|
|
2273
|
+
prompt: finalPrompt,
|
|
2274
|
+
skipCopyCheck,
|
|
2275
|
+
onProgress: async (metaData) => {
|
|
2276
|
+
try {
|
|
2277
|
+
await sendProgress(context, metaData.Result?.Progress ?? ++progress, metaData.Result?.Progress ? 100 : undefined, JSON.stringify(metaData));
|
|
2278
|
+
}
|
|
2279
|
+
catch (progressError) {
|
|
2280
|
+
console.warn('Failed to send progress update:', progressError);
|
|
2281
|
+
}
|
|
2282
|
+
},
|
|
2283
|
+
});
|
|
2284
|
+
}
|
|
2291
2285
|
if (!res) {
|
|
2292
2286
|
throw new Error('Failed to generate Music: no response from AI service');
|
|
2293
2287
|
}
|
|
@@ -2330,6 +2324,20 @@ server.registerTool('generate-music', {
|
|
|
2330
2324
|
],
|
|
2331
2325
|
};
|
|
2332
2326
|
}
|
|
2327
|
+
else if (res.taskUrl) {
|
|
2328
|
+
return {
|
|
2329
|
+
content: [
|
|
2330
|
+
{
|
|
2331
|
+
type: 'text',
|
|
2332
|
+
text: JSON.stringify({
|
|
2333
|
+
success: true,
|
|
2334
|
+
message: '该视频生成任务正在运行中,它是异步任务,且执行时间较长,你应立即调用工具 wait-for-task-finish 来等待任务结束,如如 wait-for-task-finish 工具调用超时,你应立即再次重新调用直到任务结束。',
|
|
2335
|
+
taskUrl: res.taskUrl,
|
|
2336
|
+
}),
|
|
2337
|
+
},
|
|
2338
|
+
],
|
|
2339
|
+
};
|
|
2340
|
+
}
|
|
2333
2341
|
else {
|
|
2334
2342
|
console.warn('Music generation completed but no URL returned');
|
|
2335
2343
|
return {
|
|
@@ -2399,44 +2407,44 @@ server.registerTool('generate-scene-tts', {
|
|
|
2399
2407
|
.optional()
|
|
2400
2408
|
.default(1.0)
|
|
2401
2409
|
.describe('The volume of the tts.'),
|
|
2402
|
-
emotion: zod_1.z
|
|
2403
|
-
.enum([
|
|
2404
|
-
'storytelling',
|
|
2405
|
-
'neutral',
|
|
2406
|
-
'excited',
|
|
2407
|
-
'coldness',
|
|
2408
|
-
'angry',
|
|
2409
|
-
'sad',
|
|
2410
|
-
'happy',
|
|
2411
|
-
'surprised',
|
|
2412
|
-
'fear',
|
|
2413
|
-
'depressed',
|
|
2414
|
-
'lovey-dovey',
|
|
2415
|
-
'shy',
|
|
2416
|
-
'comfort',
|
|
2417
|
-
'tension',
|
|
2418
|
-
'tender',
|
|
2419
|
-
'magnetic',
|
|
2420
|
-
'vocal - fry',
|
|
2421
|
-
'ASMR',
|
|
2422
|
-
])
|
|
2423
|
-
.optional(),
|
|
2424
2410
|
voiceID: zod_1.z
|
|
2425
2411
|
.string()
|
|
2426
|
-
.describe(`适合作为视频配音的音色ID
|
|
2412
|
+
.describe(`适合作为视频配音的音色ID,除非用户指定,否则你必须确保已通过 pick-voice 工具挑选出真实存在的音色。`),
|
|
2413
|
+
context_texts: zod_1.z
|
|
2414
|
+
.array(zod_1.z.string())
|
|
2415
|
+
.default([])
|
|
2416
|
+
.describe(`语音合成的辅助信息,用于模型对话式合成,能更好的体现语音情感
|
|
2417
|
+
|
|
2418
|
+
可以探索,比如常见示例有以下几种:
|
|
2419
|
+
|
|
2420
|
+
1. 语速调整
|
|
2421
|
+
- context_texts: ["你可以说慢一点吗?"]
|
|
2422
|
+
2. 情绪/语气调整
|
|
2423
|
+
- context_texts=["你可以用特别特别痛心的语气说话吗?"]
|
|
2424
|
+
- context_texts=["嗯,你的语气再欢乐一点"]
|
|
2425
|
+
3. 音量调整
|
|
2426
|
+
- context_texts=["你嗓门再小点。"]
|
|
2427
|
+
4. 音感调整
|
|
2428
|
+
- context_texts=["你能用骄傲的语气来说话吗?"]
|
|
2429
|
+
`),
|
|
2427
2430
|
explicit_language: zod_1.z.enum(['zh', 'en', 'ja']).optional().default('zh'),
|
|
2428
2431
|
},
|
|
2429
|
-
}, async ({ text, sceneIndex, storyBoardFile, skipConsistencyCheck, voiceID, saveToFileName, speed, pitch, volume,
|
|
2432
|
+
}, async ({ text, sceneIndex, storyBoardFile, skipConsistencyCheck, voiceID, saveToFileName, speed, pitch, volume, context_texts, explicit_language, }) => {
|
|
2430
2433
|
try {
|
|
2431
2434
|
// 验证session状态
|
|
2432
2435
|
const currentSession = await validateSession('generate-scene-tts');
|
|
2433
2436
|
const validatedFileName = validateFileName(saveToFileName);
|
|
2434
2437
|
const finalSpeed = speed ?? 1;
|
|
2435
2438
|
volume = volume ?? 1;
|
|
2439
|
+
const ai = currentSession.ai;
|
|
2436
2440
|
let scene = null;
|
|
2437
2441
|
// 校验 text 与 storyboard.json 中场景设定的一致性
|
|
2438
2442
|
if (sceneIndex && !skipConsistencyCheck) {
|
|
2439
2443
|
try {
|
|
2444
|
+
const voice = (await ai.listVoices()).find(v => v.id === voiceID);
|
|
2445
|
+
if (!voice) {
|
|
2446
|
+
return createErrorResponse(`Voice ${voiceID} not found in voice-list. Use pick-voice tool to pick an available voice. 若用户坚持要使用该音色,需跳过一致性检查。`, 'generate-scene-tts');
|
|
2447
|
+
}
|
|
2440
2448
|
const storyBoardPath = (0, node_path_1.resolve)(process.env.ZEROCUT_PROJECT_CWD || process.cwd(), projectLocalDir, storyBoardFile);
|
|
2441
2449
|
if ((0, node_fs_1.existsSync)(storyBoardPath)) {
|
|
2442
2450
|
const storyBoardContent = await (0, promises_1.readFile)(storyBoardPath, 'utf8');
|
|
@@ -2487,9 +2495,8 @@ server.registerTool('generate-scene-tts', {
|
|
|
2487
2495
|
}
|
|
2488
2496
|
}
|
|
2489
2497
|
console.log(`Generating TTS with voice: ${voiceID}, speed: ${finalSpeed}, text: ${text.substring(0, 100)}...`);
|
|
2490
|
-
const ai = currentSession.ai;
|
|
2491
2498
|
if (voiceID.startsWith('BV0')) {
|
|
2492
|
-
throw new Error(`BV0*
|
|
2499
|
+
throw new Error(`BV0* 系列音色已弃用,你必须通过 pick-voice 工具挑选一个真实存在的音色。`);
|
|
2493
2500
|
}
|
|
2494
2501
|
const type = voiceID.startsWith('zh_') ||
|
|
2495
2502
|
voiceID.startsWith('en_') ||
|
|
@@ -2499,30 +2506,80 @@ server.registerTool('generate-scene-tts', {
|
|
|
2499
2506
|
? 'volcano'
|
|
2500
2507
|
: 'minimax';
|
|
2501
2508
|
let res;
|
|
2509
|
+
let emotion = 'auto';
|
|
2502
2510
|
if (type === 'volcano') {
|
|
2503
|
-
const voice = doubao_voices_full_1.doubaoVoicesFull.find(v => v.voiceID === voiceID);
|
|
2504
|
-
if (!voice) {
|
|
2505
|
-
return createErrorResponse(`Voice ${voiceID} not found in Doubao voices. Use search-voices tool to find available voices.`, 'generate-scene-tts');
|
|
2506
|
-
}
|
|
2507
|
-
const emotions = voice.emotions || [];
|
|
2508
|
-
if (emotion && !emotions.includes(emotion)) {
|
|
2509
|
-
emotion = 'neutral';
|
|
2510
|
-
}
|
|
2511
|
-
// 修复可能的 emotion 错误情况
|
|
2512
|
-
emotion = emotion || 'neutral';
|
|
2513
2511
|
volume = Math.max(Math.min(volume, 2.0), 0.5);
|
|
2514
2512
|
res = await ai.textToSpeechVolc({
|
|
2515
2513
|
text: text.trim(),
|
|
2516
2514
|
speaker: voiceID,
|
|
2517
2515
|
speed: Math.floor(100 * (finalSpeed - 1)),
|
|
2518
2516
|
volume: Math.floor(100 * (volume - 1)),
|
|
2519
|
-
|
|
2517
|
+
context_texts,
|
|
2520
2518
|
explicit_language,
|
|
2521
2519
|
voice_to_caption: explicit_language === 'zh' || explicit_language === 'en',
|
|
2522
2520
|
});
|
|
2523
2521
|
}
|
|
2524
2522
|
else {
|
|
2525
|
-
emotion =
|
|
2523
|
+
emotion = 'neutral';
|
|
2524
|
+
if (context_texts.length > 0) {
|
|
2525
|
+
const prompt = `根据用户输入语音内容和上下文内容,从文字判断语音合理的情感,然后选择以下情感**之一**返回结果:
|
|
2526
|
+
|
|
2527
|
+
"happy", "sad", "angry", "fearful", "disgusted", "surprised", "calm", "fluent", "whisper", "neutral"
|
|
2528
|
+
|
|
2529
|
+
## 要求
|
|
2530
|
+
输出 JSON 格式,包含一个 emotion 字段,值为以上情感之一。
|
|
2531
|
+
`;
|
|
2532
|
+
const schema = {
|
|
2533
|
+
name: 'emotion_schema',
|
|
2534
|
+
schema: {
|
|
2535
|
+
type: 'object',
|
|
2536
|
+
properties: {
|
|
2537
|
+
emotion: {
|
|
2538
|
+
type: 'string',
|
|
2539
|
+
enum: [
|
|
2540
|
+
'neutral',
|
|
2541
|
+
'happy',
|
|
2542
|
+
'sad',
|
|
2543
|
+
'angry',
|
|
2544
|
+
'fearful',
|
|
2545
|
+
'disgusted',
|
|
2546
|
+
'surprised',
|
|
2547
|
+
'calm',
|
|
2548
|
+
'fluent',
|
|
2549
|
+
'whisper',
|
|
2550
|
+
],
|
|
2551
|
+
description: '用户输入语音的情感',
|
|
2552
|
+
},
|
|
2553
|
+
},
|
|
2554
|
+
required: ['emotion'],
|
|
2555
|
+
},
|
|
2556
|
+
};
|
|
2557
|
+
const payload = {
|
|
2558
|
+
model: 'Doubao-Seed-1.6',
|
|
2559
|
+
messages: [
|
|
2560
|
+
{
|
|
2561
|
+
role: 'system',
|
|
2562
|
+
content: prompt,
|
|
2563
|
+
},
|
|
2564
|
+
{
|
|
2565
|
+
role: 'user',
|
|
2566
|
+
content: `## 语音内容:
|
|
2567
|
+
${text.trim()}
|
|
2568
|
+
|
|
2569
|
+
## 语音上下文
|
|
2570
|
+
${context_texts.join('\n')}
|
|
2571
|
+
`,
|
|
2572
|
+
},
|
|
2573
|
+
],
|
|
2574
|
+
response_format: {
|
|
2575
|
+
type: 'json_schema',
|
|
2576
|
+
json_schema: schema,
|
|
2577
|
+
},
|
|
2578
|
+
};
|
|
2579
|
+
const completion = await ai.getCompletions(payload);
|
|
2580
|
+
const emotionObj = JSON.parse(completion.choices[0]?.message?.content ?? '{}');
|
|
2581
|
+
emotion = emotionObj.emotion ?? 'neutral';
|
|
2582
|
+
}
|
|
2526
2583
|
res = await ai.textToSpeech({
|
|
2527
2584
|
text: text.trim(),
|
|
2528
2585
|
voiceName: voiceID,
|
|
@@ -2563,6 +2620,8 @@ server.registerTool('generate-scene-tts', {
|
|
|
2563
2620
|
uri,
|
|
2564
2621
|
durationMs: Math.floor((duration || 0) * 1000),
|
|
2565
2622
|
text,
|
|
2623
|
+
emotion,
|
|
2624
|
+
context_texts,
|
|
2566
2625
|
voiceName: voiceID,
|
|
2567
2626
|
speed: finalSpeed,
|
|
2568
2627
|
timestamp: new Date().toISOString(),
|
|
@@ -2592,7 +2651,7 @@ server.registerTool('generate-scene-tts', {
|
|
|
2592
2651
|
type: 'text',
|
|
2593
2652
|
text: JSON.stringify({
|
|
2594
2653
|
success: false,
|
|
2595
|
-
error: 'No TTS URL returned from AI service. You should use
|
|
2654
|
+
error: 'No TTS URL returned from AI service. You should use pick-voice tool to pick an available voice.',
|
|
2596
2655
|
response: res,
|
|
2597
2656
|
timestamp: new Date().toISOString(),
|
|
2598
2657
|
}),
|
|
@@ -2825,111 +2884,71 @@ server.registerTool('get-schema', {
|
|
|
2825
2884
|
return createErrorResponse(error, 'get-schema');
|
|
2826
2885
|
}
|
|
2827
2886
|
});
|
|
2828
|
-
server.registerTool('
|
|
2829
|
-
title: '
|
|
2830
|
-
description: '
|
|
2887
|
+
server.registerTool('pick-voice', {
|
|
2888
|
+
title: 'Pick Voice',
|
|
2889
|
+
description: '根据用户需求,选择尽可能符合要求的语音,在合适的情况下,优先采用 volcano_tts_2 类型的语音',
|
|
2831
2890
|
inputSchema: {
|
|
2832
|
-
|
|
2833
|
-
.
|
|
2834
|
-
'
|
|
2835
|
-
|
|
2836
|
-
|
|
2837
|
-
'dialect_fun',
|
|
2838
|
-
'dialogue',
|
|
2839
|
-
'kids_content',
|
|
2840
|
-
'news_explainer',
|
|
2841
|
-
'podcast_voiceover',
|
|
2842
|
-
'product_ad',
|
|
2843
|
-
'promo_trailer',
|
|
2844
|
-
'roleplay_drama',
|
|
2845
|
-
'story_narration',
|
|
2846
|
-
'storytelling',
|
|
2847
|
-
'tutorial',
|
|
2848
|
-
]))
|
|
2849
|
-
.optional()
|
|
2850
|
-
.describe('Filter by scenes (e.g., ["product_ad", "tutorial"]). If not provided, no scene filtering is applied.'),
|
|
2851
|
-
emotions: zod_1.z
|
|
2852
|
-
.array(zod_1.z.enum([
|
|
2853
|
-
'ASMR',
|
|
2854
|
-
'affectionate',
|
|
2855
|
-
'angry',
|
|
2856
|
-
'authoritative',
|
|
2857
|
-
'chat',
|
|
2858
|
-
'coldness',
|
|
2859
|
-
'depressed',
|
|
2860
|
-
'excited',
|
|
2861
|
-
'fear',
|
|
2862
|
-
'happy',
|
|
2863
|
-
'hate',
|
|
2864
|
-
'neutral',
|
|
2865
|
-
'sad',
|
|
2866
|
-
'surprised',
|
|
2867
|
-
'warm',
|
|
2868
|
-
]))
|
|
2891
|
+
prompt: zod_1.z
|
|
2892
|
+
.string()
|
|
2893
|
+
.describe('用户需求描述,例如:一个有亲和力的,适合给孩子讲故事的语音'),
|
|
2894
|
+
custom_design: zod_1.z
|
|
2895
|
+
.boolean()
|
|
2869
2896
|
.optional()
|
|
2870
|
-
.describe('
|
|
2871
|
-
|
|
2872
|
-
.
|
|
2897
|
+
.describe('是否自定义语音,由于要消耗较多积分,因此**只有用户明确要求自己设计语音**,才将该参数设为true'),
|
|
2898
|
+
custom_design_preview: zod_1.z
|
|
2899
|
+
.string()
|
|
2873
2900
|
.optional()
|
|
2874
|
-
.describe('
|
|
2875
|
-
|
|
2876
|
-
.
|
|
2901
|
+
.describe('用户自定义语音的预览文本,用于展示自定义语音的效果,只有 custom_design 为 true 时才需要'),
|
|
2902
|
+
custom_design_save_to: zod_1.z
|
|
2903
|
+
.string()
|
|
2877
2904
|
.optional()
|
|
2878
|
-
.describe('
|
|
2905
|
+
.describe('自定义语音的保存路径,例如:custom_voice.mp3 custom_voice_{id}.mp3'),
|
|
2879
2906
|
},
|
|
2880
|
-
}, async ({
|
|
2907
|
+
}, async ({ prompt, custom_design, custom_design_preview, custom_design_save_to, }) => {
|
|
2881
2908
|
try {
|
|
2882
|
-
|
|
2883
|
-
|
|
2884
|
-
|
|
2885
|
-
|
|
2886
|
-
|
|
2887
|
-
|
|
2888
|
-
|
|
2889
|
-
|
|
2890
|
-
|
|
2891
|
-
|
|
2892
|
-
if (emotions.includes('neutral') && !voice.emotions) {
|
|
2893
|
-
return true;
|
|
2894
|
-
}
|
|
2895
|
-
return (voice.emotions &&
|
|
2896
|
-
voice.emotions.some(emotion => emotions.includes(emotion)));
|
|
2909
|
+
// 验证session状态
|
|
2910
|
+
const currentSession = await validateSession('pick-voice');
|
|
2911
|
+
const ai = currentSession.ai;
|
|
2912
|
+
if (custom_design) {
|
|
2913
|
+
if (!custom_design_preview) {
|
|
2914
|
+
throw new Error('custom_design_preview is required when custom_design is true');
|
|
2915
|
+
}
|
|
2916
|
+
const data = await currentSession.ai.voiceDesign({
|
|
2917
|
+
prompt,
|
|
2918
|
+
previewText: custom_design_preview,
|
|
2897
2919
|
});
|
|
2898
|
-
|
|
2899
|
-
|
|
2900
|
-
|
|
2901
|
-
|
|
2902
|
-
|
|
2903
|
-
}
|
|
2904
|
-
// Filter by gender
|
|
2905
|
-
if (gender) {
|
|
2906
|
-
filteredVoices = filteredVoices.filter(voice => {
|
|
2907
|
-
const voiceId = voice.voiceID.toLowerCase();
|
|
2908
|
-
if (gender === 'male') {
|
|
2909
|
-
return voiceId.includes('_male_');
|
|
2920
|
+
if (data.voice_id) {
|
|
2921
|
+
const trial_audio = data.trial_audio;
|
|
2922
|
+
let uri = '';
|
|
2923
|
+
if (trial_audio) {
|
|
2924
|
+
uri = await saveMaterial(currentSession, trial_audio, custom_design_save_to || `custom_voice_${data.voice_id}.mp3`);
|
|
2910
2925
|
}
|
|
2911
|
-
|
|
2912
|
-
|
|
2913
|
-
|
|
2914
|
-
|
|
2915
|
-
|
|
2926
|
+
return {
|
|
2927
|
+
content: [
|
|
2928
|
+
{
|
|
2929
|
+
type: 'text',
|
|
2930
|
+
text: JSON.stringify({
|
|
2931
|
+
success: true,
|
|
2932
|
+
...data,
|
|
2933
|
+
uri,
|
|
2934
|
+
timestamp: new Date().toISOString(),
|
|
2935
|
+
}),
|
|
2936
|
+
},
|
|
2937
|
+
],
|
|
2938
|
+
};
|
|
2939
|
+
}
|
|
2940
|
+
else {
|
|
2941
|
+
throw new Error(`Voice design failed, ${JSON.stringify(data)}`);
|
|
2942
|
+
}
|
|
2916
2943
|
}
|
|
2944
|
+
const data = await ai.pickVoice({ prompt });
|
|
2917
2945
|
return {
|
|
2918
2946
|
content: [
|
|
2919
2947
|
{
|
|
2920
2948
|
type: 'text',
|
|
2921
2949
|
text: JSON.stringify({
|
|
2922
2950
|
success: true,
|
|
2923
|
-
data
|
|
2924
|
-
totalCount: filteredVoices.length,
|
|
2925
|
-
voices: filteredVoices,
|
|
2926
|
-
filters: {
|
|
2927
|
-
scenes: scenes || null,
|
|
2928
|
-
emotions: emotions || null,
|
|
2929
|
-
languages: languages || null,
|
|
2930
|
-
gender: gender || null,
|
|
2931
|
-
},
|
|
2932
|
-
},
|
|
2951
|
+
...data,
|
|
2933
2952
|
timestamp: new Date().toISOString(),
|
|
2934
2953
|
}),
|
|
2935
2954
|
},
|
|
@@ -2937,52 +2956,7 @@ server.registerTool('search-voices', {
|
|
|
2937
2956
|
};
|
|
2938
2957
|
}
|
|
2939
2958
|
catch (error) {
|
|
2940
|
-
return createErrorResponse(error, '
|
|
2941
|
-
}
|
|
2942
|
-
});
|
|
2943
|
-
server.registerTool('voice-design', {
|
|
2944
|
-
title: 'Voice Design',
|
|
2945
|
-
description: 'Design a voice based on a prompt. The voice will be designed based on the prompt and preview text.',
|
|
2946
|
-
inputSchema: {
|
|
2947
|
-
prompt: zod_1.z.string().describe('The prompt to design the voice.'),
|
|
2948
|
-
previewText: zod_1.z.string().describe('The preview text to design the voice.'),
|
|
2949
|
-
saveToFileName: zod_1.z
|
|
2950
|
-
.string()
|
|
2951
|
-
.describe('The file name to save the designed voice. 应该是mp3文件'),
|
|
2952
|
-
},
|
|
2953
|
-
}, async ({ prompt, previewText, saveToFileName }) => {
|
|
2954
|
-
try {
|
|
2955
|
-
const currentSession = await validateSession('voice-design');
|
|
2956
|
-
const data = await currentSession.ai.voiceDesign({
|
|
2957
|
-
prompt,
|
|
2958
|
-
previewText,
|
|
2959
|
-
});
|
|
2960
|
-
if (data.voice_id) {
|
|
2961
|
-
const trial_audio = data.trial_audio;
|
|
2962
|
-
let uri = '';
|
|
2963
|
-
if (trial_audio) {
|
|
2964
|
-
uri = await saveMaterial(currentSession, trial_audio, saveToFileName);
|
|
2965
|
-
}
|
|
2966
|
-
return {
|
|
2967
|
-
content: [
|
|
2968
|
-
{
|
|
2969
|
-
type: 'text',
|
|
2970
|
-
text: JSON.stringify({
|
|
2971
|
-
success: true,
|
|
2972
|
-
...data,
|
|
2973
|
-
uri,
|
|
2974
|
-
timestamp: new Date().toISOString(),
|
|
2975
|
-
}),
|
|
2976
|
-
},
|
|
2977
|
-
],
|
|
2978
|
-
};
|
|
2979
|
-
}
|
|
2980
|
-
else {
|
|
2981
|
-
throw new Error(`Voice design failed, ${JSON.stringify(data)}`);
|
|
2982
|
-
}
|
|
2983
|
-
}
|
|
2984
|
-
catch (error) {
|
|
2985
|
-
return createErrorResponse(error, 'voice-design');
|
|
2959
|
+
return createErrorResponse(error, 'pick-voice');
|
|
2986
2960
|
}
|
|
2987
2961
|
});
|
|
2988
2962
|
server.registerTool('media-analyzer', {
|
|
@@ -3198,206 +3172,6 @@ server.registerTool('media-analyzer', {
|
|
|
3198
3172
|
return createErrorResponse(error, 'media-analyzer');
|
|
3199
3173
|
}
|
|
3200
3174
|
});
|
|
3201
|
-
// server.registerTool(
|
|
3202
|
-
// 'image-aligner',
|
|
3203
|
-
// {
|
|
3204
|
-
// title: 'Image Aligner',
|
|
3205
|
-
// description:
|
|
3206
|
-
// 'Analyze image quality and alignment with prompt using AI Image Quality Inspector.',
|
|
3207
|
-
// inputSchema: {
|
|
3208
|
-
// imageFileName: z
|
|
3209
|
-
// .string()
|
|
3210
|
-
// .describe('The image file name in materials directory to analyze.'),
|
|
3211
|
-
// sceneIndex: z.number().min(1).describe('场景索引,从1开始的下标'),
|
|
3212
|
-
// storyBoardFile: z
|
|
3213
|
-
// .string()
|
|
3214
|
-
// .optional()
|
|
3215
|
-
// .default('storyboard.json')
|
|
3216
|
-
// .describe('故事板文件路径'),
|
|
3217
|
-
// imagePrompt: z
|
|
3218
|
-
// .string()
|
|
3219
|
-
// .optional()
|
|
3220
|
-
// .describe('可选的图片提示词,如果提供则覆盖storyboard中的提示词'),
|
|
3221
|
-
// customPrompt: z
|
|
3222
|
-
// .string()
|
|
3223
|
-
// .optional()
|
|
3224
|
-
// .describe('可选的额外用户要求,用于补充图片质量评估的特定需求'),
|
|
3225
|
-
// },
|
|
3226
|
-
// },
|
|
3227
|
-
// async ({
|
|
3228
|
-
// imageFileName,
|
|
3229
|
-
// sceneIndex,
|
|
3230
|
-
// storyBoardFile = 'storyboard.json',
|
|
3231
|
-
// imagePrompt,
|
|
3232
|
-
// customPrompt,
|
|
3233
|
-
// }) => {
|
|
3234
|
-
// try {
|
|
3235
|
-
// const currentSession = await validateSession('image-aligner');
|
|
3236
|
-
// // 验证图片文件
|
|
3237
|
-
// validateImageFile(imageFileName);
|
|
3238
|
-
// // 获取图片 URL
|
|
3239
|
-
// const imageUrl = getMaterialUri(currentSession, imageFileName);
|
|
3240
|
-
// // 确定要使用的提示词
|
|
3241
|
-
// let finalPrompt = imagePrompt;
|
|
3242
|
-
// // 如果没有提供imagePrompt,则从storyboard中获取
|
|
3243
|
-
// if (!imagePrompt) {
|
|
3244
|
-
// try {
|
|
3245
|
-
// const storyBoardPath = resolve(
|
|
3246
|
-
// process.env.ZEROCUT_PROJECT_CWD || process.cwd(),
|
|
3247
|
-
// projectLocalDir,
|
|
3248
|
-
// storyBoardFile
|
|
3249
|
-
// );
|
|
3250
|
-
// if (existsSync(storyBoardPath)) {
|
|
3251
|
-
// const storyBoardContent = await readFile(storyBoardPath, 'utf8');
|
|
3252
|
-
// const storyBoard = JSON.parse(storyBoardContent);
|
|
3253
|
-
// if (storyBoard.scenes && Array.isArray(storyBoard.scenes)) {
|
|
3254
|
-
// const scene = storyBoard.scenes[sceneIndex - 1]; // sceneIndex 从1开始,数组从0开始
|
|
3255
|
-
// if (scene) {
|
|
3256
|
-
// // 根据文件名判断优先级:若end_frame存在且imageFileName包含"_end"则优先取end_frame,否则取start_frame
|
|
3257
|
-
// if (scene.end_frame && imageFileName.includes('_end')) {
|
|
3258
|
-
// finalPrompt = scene.end_frame;
|
|
3259
|
-
// } else {
|
|
3260
|
-
// finalPrompt = scene.start_frame || scene.end_frame;
|
|
3261
|
-
// }
|
|
3262
|
-
// if (!finalPrompt) {
|
|
3263
|
-
// return createErrorResponse(
|
|
3264
|
-
// `场景 ${sceneIndex} 中未找到 start_frame 或 end_frame 提示词`,
|
|
3265
|
-
// 'image-aligner'
|
|
3266
|
-
// );
|
|
3267
|
-
// }
|
|
3268
|
-
// } else {
|
|
3269
|
-
// return createErrorResponse(
|
|
3270
|
-
// `在 ${storyBoardFile} 中未找到场景索引 ${sceneIndex}`,
|
|
3271
|
-
// 'image-aligner'
|
|
3272
|
-
// );
|
|
3273
|
-
// }
|
|
3274
|
-
// } else {
|
|
3275
|
-
// return createErrorResponse(
|
|
3276
|
-
// `${storyBoardFile} 文件格式不正确,缺少 scenes 数组`,
|
|
3277
|
-
// 'image-aligner'
|
|
3278
|
-
// );
|
|
3279
|
-
// }
|
|
3280
|
-
// } else {
|
|
3281
|
-
// return createErrorResponse(
|
|
3282
|
-
// `故事板文件不存在: ${storyBoardPath}`,
|
|
3283
|
-
// 'image-aligner'
|
|
3284
|
-
// );
|
|
3285
|
-
// }
|
|
3286
|
-
// } catch (error) {
|
|
3287
|
-
// return createErrorResponse(
|
|
3288
|
-
// `读取或解析故事板文件失败: ${error}`,
|
|
3289
|
-
// 'image-aligner'
|
|
3290
|
-
// );
|
|
3291
|
-
// }
|
|
3292
|
-
// }
|
|
3293
|
-
// // 如果仍然没有提示词,返回错误
|
|
3294
|
-
// if (!finalPrompt) {
|
|
3295
|
-
// return createErrorResponse(
|
|
3296
|
-
// '未提供 imagePrompt 且无法从故事板中获取提示词',
|
|
3297
|
-
// 'image-aligner'
|
|
3298
|
-
// );
|
|
3299
|
-
// }
|
|
3300
|
-
// // 读取图片质量检查指南
|
|
3301
|
-
// const alignerGuidelinePath = resolve(
|
|
3302
|
-
// __dirname,
|
|
3303
|
-
// './prompts/reasonings/image_aligner.md'
|
|
3304
|
-
// );
|
|
3305
|
-
// let alignerGuideline = '';
|
|
3306
|
-
// try {
|
|
3307
|
-
// alignerGuideline = await readFile(alignerGuidelinePath, 'utf8');
|
|
3308
|
-
// } catch (error) {
|
|
3309
|
-
// console.warn('无法读取图片质量检查指南:', error);
|
|
3310
|
-
// alignerGuideline =
|
|
3311
|
-
// '请对图片质量进行评估,包括构图、色彩、清晰度等方面。';
|
|
3312
|
-
// }
|
|
3313
|
-
// // 构建系统提示
|
|
3314
|
-
// const systemPrompt = `你是一个专业的AI图片质量检查员。请根据以下指南对图片进行评估:
|
|
3315
|
-
// ${alignerGuideline}
|
|
3316
|
-
// 请严格按照指南中的JSON格式返回评估结果。`;
|
|
3317
|
-
// // 构建用户提示
|
|
3318
|
-
// const userPrompt = `请对这张图片进行质量评估。
|
|
3319
|
-
// 原始提示词:${finalPrompt}${
|
|
3320
|
-
// customPrompt
|
|
3321
|
-
// ? `
|
|
3322
|
-
// 额外要求:${customPrompt}`
|
|
3323
|
-
// : ''
|
|
3324
|
-
// }
|
|
3325
|
-
// 请按照指南要求,返回包含评分、问题列表和优化建议的JSON格式结果。`;
|
|
3326
|
-
// // 调用AI模型进行图片质量评估
|
|
3327
|
-
// const ai = currentSession.ai;
|
|
3328
|
-
// const completion = await ai.getCompletions({
|
|
3329
|
-
// model: 'Doubao-Seed-1.6',
|
|
3330
|
-
// messages: [
|
|
3331
|
-
// {
|
|
3332
|
-
// role: 'system',
|
|
3333
|
-
// content: systemPrompt,
|
|
3334
|
-
// },
|
|
3335
|
-
// {
|
|
3336
|
-
// role: 'user',
|
|
3337
|
-
// content: [
|
|
3338
|
-
// {
|
|
3339
|
-
// type: 'image_url',
|
|
3340
|
-
// image_url: {
|
|
3341
|
-
// url: imageUrl,
|
|
3342
|
-
// },
|
|
3343
|
-
// },
|
|
3344
|
-
// {
|
|
3345
|
-
// type: 'text',
|
|
3346
|
-
// text: userPrompt,
|
|
3347
|
-
// },
|
|
3348
|
-
// ],
|
|
3349
|
-
// },
|
|
3350
|
-
// ],
|
|
3351
|
-
// });
|
|
3352
|
-
// const result = completion.choices[0]?.message?.content;
|
|
3353
|
-
// if (!result) {
|
|
3354
|
-
// throw new Error('No response from AI model');
|
|
3355
|
-
// }
|
|
3356
|
-
// // 解析AI响应
|
|
3357
|
-
// let alignmentResult;
|
|
3358
|
-
// try {
|
|
3359
|
-
// // 尝试从响应中提取JSON
|
|
3360
|
-
// const jsonMatch =
|
|
3361
|
-
// result.match(/```json\s*([\s\S]*?)\s*```/) ||
|
|
3362
|
-
// result.match(/\{[\s\S]*\}/);
|
|
3363
|
-
// if (jsonMatch) {
|
|
3364
|
-
// alignmentResult = JSON.parse(jsonMatch[1] || jsonMatch[0]);
|
|
3365
|
-
// } else {
|
|
3366
|
-
// // 如果没有找到JSON格式,尝试直接解析整个响应
|
|
3367
|
-
// alignmentResult = JSON.parse(result);
|
|
3368
|
-
// }
|
|
3369
|
-
// } catch (error) {
|
|
3370
|
-
// // 如果解析失败,返回原始响应
|
|
3371
|
-
// alignmentResult = {
|
|
3372
|
-
// error: 'JSON解析失败',
|
|
3373
|
-
// raw_response: result,
|
|
3374
|
-
// };
|
|
3375
|
-
// }
|
|
3376
|
-
// return {
|
|
3377
|
-
// content: [
|
|
3378
|
-
// {
|
|
3379
|
-
// type: 'text',
|
|
3380
|
-
// text: JSON.stringify({
|
|
3381
|
-
// success: true,
|
|
3382
|
-
// imageFileName,
|
|
3383
|
-
// sceneIndex,
|
|
3384
|
-
// storyBoardFile,
|
|
3385
|
-
// imagePrompt: finalPrompt,
|
|
3386
|
-
// customPrompt,
|
|
3387
|
-
// promptSource: imagePrompt ? 'manual_override' : 'storyboard',
|
|
3388
|
-
// analysis: alignmentResult,
|
|
3389
|
-
// imageUrl,
|
|
3390
|
-
// nextActionSuggest:
|
|
3391
|
-
// '可根据分析结果调整提示词,修改storyboard后,重新生成图片。',
|
|
3392
|
-
// }),
|
|
3393
|
-
// },
|
|
3394
|
-
// ],
|
|
3395
|
-
// };
|
|
3396
|
-
// } catch (error) {
|
|
3397
|
-
// return createErrorResponse(error, 'image-aligner');
|
|
3398
|
-
// }
|
|
3399
|
-
// }
|
|
3400
|
-
// );
|
|
3401
3175
|
server.registerTool('audio-video-sync', {
|
|
3402
3176
|
title: 'Audio Video Sync',
|
|
3403
3177
|
description: 'Generate audio-video-synced video by matching video with audio. 还可以对口型。',
|
|
@@ -3552,7 +3326,7 @@ server.registerTool('audio-video-sync', {
|
|
|
3552
3326
|
}
|
|
3553
3327
|
});
|
|
3554
3328
|
server.registerTool('generate-video-by-ref', {
|
|
3555
|
-
title: '
|
|
3329
|
+
title: '参考生视频(包含文生视频)工具',
|
|
3556
3330
|
description: 'Generate video using reference images. Supports sora2, sora2-pro (1 image max), veo3.1, veo3.1-pro (1 image max), lite and pro (4 images max), vidu (7 images max). Can work without reference images (0 images).',
|
|
3557
3331
|
inputSchema: {
|
|
3558
3332
|
prompt: zod_1.z
|
|
@@ -3602,6 +3376,7 @@ server.registerTool('generate-video-by-ref', {
|
|
|
3602
3376
|
'veo3.1',
|
|
3603
3377
|
'veo3.1-pro',
|
|
3604
3378
|
'vidu',
|
|
3379
|
+
'vidu-uc',
|
|
3605
3380
|
'pixv',
|
|
3606
3381
|
])
|
|
3607
3382
|
.default('lite')
|
|
@@ -3895,7 +3670,7 @@ ${prompt.trim()}`,
|
|
|
3895
3670
|
type: 'text',
|
|
3896
3671
|
text: JSON.stringify({
|
|
3897
3672
|
success: true,
|
|
3898
|
-
message: '该视频生成任务正在运行中,它是异步任务,且执行时间较长,你应立即调用工具 wait-for-task-finish
|
|
3673
|
+
message: '该视频生成任务正在运行中,它是异步任务,且执行时间较长,你应立即调用工具 wait-for-task-finish 来等待任务结束,如如 wait-for-task-finish 工具调用超时,你应立即再次重新调用直到任务结束。',
|
|
3899
3674
|
taskUrl: result.taskUrl,
|
|
3900
3675
|
}),
|
|
3901
3676
|
},
|
|
@@ -4328,6 +4103,69 @@ server.registerTool('search-context', {
|
|
|
4328
4103
|
return createErrorResponse(error, 'search-context');
|
|
4329
4104
|
}
|
|
4330
4105
|
});
|
|
4106
|
+
// 列出项目下的所有文件
|
|
4107
|
+
server.registerTool('list-project-files', {
|
|
4108
|
+
title: 'List Project Files',
|
|
4109
|
+
description: 'List all files in the materials directory.',
|
|
4110
|
+
inputSchema: {},
|
|
4111
|
+
}, async () => {
|
|
4112
|
+
try {
|
|
4113
|
+
// 验证session状态
|
|
4114
|
+
const currentSession = await validateSession('list-project-files');
|
|
4115
|
+
console.log('Listing project files...');
|
|
4116
|
+
const terminal = currentSession.terminal;
|
|
4117
|
+
if (!terminal) {
|
|
4118
|
+
throw new Error('Terminal not available in current session');
|
|
4119
|
+
}
|
|
4120
|
+
let cwd;
|
|
4121
|
+
try {
|
|
4122
|
+
cwd = await terminal.getCwd();
|
|
4123
|
+
}
|
|
4124
|
+
catch (cwdError) {
|
|
4125
|
+
console.error('Failed to get current working directory:', cwdError);
|
|
4126
|
+
throw new Error('Failed to get current working directory');
|
|
4127
|
+
}
|
|
4128
|
+
console.log(`Current working directory: ${cwd}`);
|
|
4129
|
+
// 安全地列出各目录文件,失败时返回空数组
|
|
4130
|
+
const listFilesWithFallback = async (path, dirName) => {
|
|
4131
|
+
try {
|
|
4132
|
+
const files = await currentSession.files.listFiles(path);
|
|
4133
|
+
console.log(`Found ${files?.length || 0} files in ${dirName}`);
|
|
4134
|
+
return files || [];
|
|
4135
|
+
}
|
|
4136
|
+
catch (error) {
|
|
4137
|
+
console.warn(`Failed to list files in ${dirName} (${path}):`, error);
|
|
4138
|
+
return [];
|
|
4139
|
+
}
|
|
4140
|
+
};
|
|
4141
|
+
const [rootFiles, materialsFiles, outputFiles] = await Promise.all([
|
|
4142
|
+
listFilesWithFallback(cwd, 'root'),
|
|
4143
|
+
listFilesWithFallback(`${cwd}/materials`, 'materials'),
|
|
4144
|
+
listFilesWithFallback(`${cwd}/output`, 'output'),
|
|
4145
|
+
]);
|
|
4146
|
+
const result = {
|
|
4147
|
+
success: true,
|
|
4148
|
+
cwd,
|
|
4149
|
+
root: rootFiles,
|
|
4150
|
+
materials: materialsFiles,
|
|
4151
|
+
output: outputFiles,
|
|
4152
|
+
totalFiles: rootFiles.length + materialsFiles.length + outputFiles.length,
|
|
4153
|
+
timestamp: new Date().toISOString(),
|
|
4154
|
+
};
|
|
4155
|
+
console.log(`Total files found: ${result.totalFiles}`);
|
|
4156
|
+
return {
|
|
4157
|
+
content: [
|
|
4158
|
+
{
|
|
4159
|
+
type: 'text',
|
|
4160
|
+
text: JSON.stringify(result),
|
|
4161
|
+
},
|
|
4162
|
+
],
|
|
4163
|
+
};
|
|
4164
|
+
}
|
|
4165
|
+
catch (error) {
|
|
4166
|
+
return createErrorResponse(error, 'list-project-files');
|
|
4167
|
+
}
|
|
4168
|
+
});
|
|
4331
4169
|
server.registerTool('build-capcat-draft', {
|
|
4332
4170
|
title: 'Build CapCut Draft',
|
|
4333
4171
|
description: 'Read draft_content.json file, parse JSON and generate URIs for all assets in timeline tracks, then output the processed JSON string.',
|