cerevox 3.6.1 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -51,9 +51,9 @@ const videokit_1 = require("../../utils/videokit");
51
51
  const promises_1 = require("node:fs/promises");
52
52
  const node_path_1 = __importStar(require("node:path"));
53
53
  const node_fs_1 = require("node:fs");
54
- const coze_1 = require("../../utils/coze");
55
54
  const mp3_duration_1 = __importDefault(require("mp3-duration"));
56
55
  const image_size_1 = __importDefault(require("image-size"));
56
+ const seed_1 = require("../../utils/seed");
57
57
  function createErrorResponse(error, operation, details) {
58
58
  const errorMessage = error instanceof Error ? error.message : String(error);
59
59
  console.error(`[${operation}] Error:`, error);
@@ -971,7 +971,7 @@ server.registerTool('generate-image', {
971
971
  'line-sketch',
972
972
  ])
973
973
  .optional()
974
- .default('seedream'),
974
+ .default('seedream-pro'),
975
975
  prompt: zod_1.z
976
976
  .string()
977
977
  .describe('The prompt to generate. 一般要严格对应 storyboard 中当前场景的 start_frame 或 end_frame 中的字段描述,如果是生成线稿,则 type 使用 line-sketch'),
@@ -1048,8 +1048,14 @@ server.registerTool('generate-image', {
1048
1048
  .array(zod_1.z.object({
1049
1049
  image: zod_1.z.string().describe('Local image file path'),
1050
1050
  type: zod_1.z
1051
- .enum(['character', 'object', 'background', 'linesketch'])
1052
- .describe('Type of the reference image. 必须传,如果是参考角色三视图,传character,如果是参考背景图,传background,如果是参考线稿,传linesketch,否则传object'),
1051
+ .enum([
1052
+ 'character',
1053
+ 'subject',
1054
+ 'background',
1055
+ 'linesketch',
1056
+ 'normal',
1057
+ ])
1058
+ .describe('Type of the reference image. 必须传,如果是参考角色三视图,传character,如果是参考背景图,传background,如果是参考线稿,传linesketch,如果是主体,传subject,否则传normal'),
1053
1059
  name: zod_1.z.string().describe('Name for this reference image'),
1054
1060
  description: zod_1.z
1055
1061
  .string()
@@ -1060,13 +1066,13 @@ server.registerTool('generate-image', {
1060
1066
  .describe('Whether this is a turnaround image.如果是三视图,这个参数务必传true'),
1061
1067
  }))
1062
1068
  .optional()
1063
- .describe(`Array of reference images with character or object names.如果stage_atmosphere中有角色apply_reference_image,那么必须要传这个参数生成分镜图片
1069
+ .describe(`Array of reference images with character or subject names.如果stage_atmosphere中有角色apply_reference_image,那么必须要传这个参数生成分镜图片
1064
1070
 
1065
1071
  传参示例
1066
1072
  \`\`\`
1067
1073
  {
1068
1074
  "image": "latiao.jpeg",
1069
- "type": "object",
1075
+ "type": "subject",
1070
1076
  "name": "卫龙辣条",
1071
1077
  }
1072
1078
  \`\`\`
@@ -1077,8 +1083,12 @@ server.registerTool('generate-image', {
1077
1083
  // 验证session状态
1078
1084
  const currentSession = await validateSession('generate-image');
1079
1085
  const storyBoardPath = (0, node_path_1.resolve)(process.env.ZEROCUT_PROJECT_CWD || process.cwd(), projectLocalDir, storyBoardFile);
1086
+ const outlineSheetImagePath = (0, node_path_1.resolve)(process.env.ZEROCUT_PROJECT_CWD || process.cwd(), projectLocalDir, 'materials', 'outline_sheet.png');
1087
+ const hasOutlineSheet = (0, node_fs_1.existsSync)(outlineSheetImagePath);
1080
1088
  // 检查 storyboard 标志
1081
- if (!checkStoryboardFlag && (0, node_fs_1.existsSync)(storyBoardPath)) {
1089
+ if (!hasOutlineSheet &&
1090
+ !checkStoryboardFlag &&
1091
+ (0, node_fs_1.existsSync)(storyBoardPath)) {
1082
1092
  checkStoryboardFlag = true;
1083
1093
  return createErrorResponse('必须先审查生成的 storyboard.json 内容,确保每个场景中的stage_atmosphere内容按照规则被正确融合到start_frame和video_prompt中,不得遗漏,检查完成后先汇报,如果有问题,应当先修改 storyboard.json 内容,然后再调用 generate-image 生成图片。注意修改 storyboard 内容时,仅修改相应字段的字符串值,不要破坏JSON格式!', 'generate-image');
1084
1094
  }
@@ -1098,12 +1108,17 @@ server.registerTool('generate-image', {
1098
1108
  if (storyBoard.scenes && Array.isArray(storyBoard.scenes)) {
1099
1109
  const scene = storyBoard.scenes[sceneIndex - 1]; // sceneIndex 从1开始,数组从0开始
1100
1110
  if (scene) {
1101
- const startFrame = scene.start_frame;
1111
+ const startFrame = scene.start_frame || scene.first_shot;
1102
1112
  const endFrame = scene.end_frame;
1103
1113
  // 检查 prompt 是否严格等于 start_frame 或 end_frame
1104
1114
  if (prompt !== startFrame && prompt !== endFrame) {
1105
1115
  return createErrorResponse('图片提示词必须严格遵照storyboard的设定,如果用户明确指出不需要遵守,请将skipConsistencyCheck设置为true后再次调用', 'generate-image');
1106
1116
  }
1117
+ if (hasOutlineSheet &&
1118
+ (!referenceImages ||
1119
+ !referenceImages.find(item => item.image.includes('outline_sheet.png')))) {
1120
+ return createErrorResponse('如果存在outline_sheet.png这张图,那么必须要传referenceImages参数,且referenceImages中必须要包含outline_sheet.png这张图', 'generate-image');
1121
+ }
1107
1122
  // 校验 size 参数与 storyboard 的 orientation 属性一致性
1108
1123
  if (size && storyBoard.orientation) {
1109
1124
  const isLandscapeSize = [
@@ -1168,62 +1183,64 @@ server.registerTool('generate-image', {
1168
1183
  try {
1169
1184
  const ai = currentSession.ai;
1170
1185
  const promptOptimizer = await (0, promises_1.readFile)((0, node_path_1.resolve)(__dirname, './prompts/image-prompt-optimizer.md'), 'utf8');
1171
- const schema = {
1172
- name: 'optimize_image_prompt',
1173
- schema: {
1174
- type: 'object',
1175
- properties: {
1176
- prompt_optimized: {
1177
- type: 'string',
1178
- description: '优化后的提示词',
1179
- },
1180
- metaphor_modifiers: {
1181
- type: 'array',
1182
- description: '从 prompt_optimized 中抽取的所有比喻修饰词(字符串数组)',
1183
- items: {
1186
+ if (!hasOutlineSheet) {
1187
+ const schema = {
1188
+ name: 'optimize_image_prompt',
1189
+ schema: {
1190
+ type: 'object',
1191
+ properties: {
1192
+ prompt_optimized: {
1184
1193
  type: 'string',
1185
- description: '比喻性修饰词,例如 “如羽毛般轻盈”、“像晨雾一样柔和”',
1194
+ description: '优化后的提示词',
1195
+ },
1196
+ metaphor_modifiers: {
1197
+ type: 'array',
1198
+ description: '从 prompt_optimized 中抽取的所有比喻修饰词(字符串数组)',
1199
+ items: {
1200
+ type: 'string',
1201
+ description: '比喻性修饰词,例如 “如羽毛般轻盈”、“像晨雾一样柔和”',
1202
+ },
1186
1203
  },
1187
1204
  },
1205
+ required: ['prompt_optimized', 'metaphor_modifiers'],
1188
1206
  },
1189
- required: ['prompt_optimized', 'metaphor_modifiers'],
1190
- },
1191
- };
1192
- const completion = await ai.getCompletions({
1193
- model: 'Doubao-Seed-1.6',
1194
- messages: [
1195
- {
1196
- role: 'system',
1197
- content: promptOptimizer,
1198
- },
1199
- {
1200
- role: 'user',
1201
- content: `## 用户指令
1207
+ };
1208
+ const completion = await ai.getCompletions({
1209
+ model: 'Doubao-Seed-1.6',
1210
+ messages: [
1211
+ {
1212
+ role: 'system',
1213
+ content: promptOptimizer,
1214
+ },
1215
+ {
1216
+ role: 'user',
1217
+ content: `## 用户指令
1202
1218
 
1203
- ${processedPrompt.trim()}
1219
+ ${processedPrompt.trim()}
1204
1220
 
1205
- ## 参考图
1221
+ ## 参考图
1206
1222
 
1207
- ${referenceImages?.map((ref, index) => `图${index + 1}:${ref.image}`).join('\n') || '无'}`,
1223
+ ${referenceImages?.map((ref, index) => `图${index + 1}:${ref.image}`).join('\n') || '无'}`,
1224
+ },
1225
+ ],
1226
+ response_format: {
1227
+ type: 'json_schema',
1228
+ json_schema: schema,
1208
1229
  },
1209
- ],
1210
- response_format: {
1211
- type: 'json_schema',
1212
- json_schema: schema,
1213
- },
1214
- });
1215
- const optimizedPrompt = completion.choices[0]?.message?.content.trim();
1216
- if (optimizedPrompt) {
1217
- try {
1218
- const { prompt_optimized, metaphor_modifiers } = JSON.parse(optimizedPrompt);
1219
- processedPrompt = `${prompt_optimized}`;
1220
- if (metaphor_modifiers?.length) {
1221
- processedPrompt += `\n\n注意:下面这些是形象比喻,并不是输出内容。\n${metaphor_modifiers}`;
1230
+ });
1231
+ const optimizedPrompt = completion.choices[0]?.message?.content.trim();
1232
+ if (optimizedPrompt) {
1233
+ try {
1234
+ const { prompt_optimized, metaphor_modifiers } = JSON.parse(optimizedPrompt);
1235
+ processedPrompt = `${prompt_optimized}`;
1236
+ if (metaphor_modifiers?.length) {
1237
+ processedPrompt += `\n\n注意:下面这些是形象比喻,并不是输出内容。\n${metaphor_modifiers}`;
1238
+ }
1239
+ }
1240
+ catch (ex) {
1241
+ console.error('Failed to parse optimized prompt:', ex);
1242
+ processedPrompt = optimizedPrompt;
1222
1243
  }
1223
- }
1224
- catch (ex) {
1225
- console.error('Failed to parse optimized prompt:', ex);
1226
- processedPrompt = optimizedPrompt;
1227
1244
  }
1228
1245
  }
1229
1246
  }
@@ -1272,13 +1289,13 @@ ${referenceImages?.map((ref, index) => `图${index + 1}:${ref.image}`).join('\
1272
1289
  objectPrefix.push(`[图${++index}]是名为“${refImage.name}”的人物角色形象${refImage.description ? `,${refImage.description}` : ''}`);
1273
1290
  }
1274
1291
  }
1275
- else if (refImage.type === 'object') {
1292
+ else if (refImage.type === 'subject') {
1276
1293
  if (refImage.isTurnaround) {
1277
- objectPrefix.push(`[图${++index}]是名为“${refImage.name}”的物件三视图${refImage.description ? `,${refImage.description}` : ''}`);
1294
+ objectPrefix.push(`[图${++index}]是名为“${refImage.name}”的主体三视图${refImage.description ? `,${refImage.description}` : ''}`);
1278
1295
  hasTurnaround = true;
1279
1296
  }
1280
1297
  else {
1281
- objectPrefix.push(`[图${++index}]是名为“${refImage.name}”的物件${refImage.description ? `,${refImage.description}` : ''}`);
1298
+ objectPrefix.push(`[图${++index}]是名为“${refImage.name}”的主体${refImage.description ? `,${refImage.description}` : ''}`);
1282
1299
  }
1283
1300
  }
1284
1301
  else if (refImage.type === 'background') {
@@ -1287,6 +1304,9 @@ ${referenceImages?.map((ref, index) => `图${index + 1}:${ref.image}`).join('\
1287
1304
  else if (refImage.type === 'linesketch') {
1288
1305
  objectPrefix.push(`[图${++index}]是参考线稿,仅参考其中主体位置和形态${refImage.description ? `,${refImage.description}` : ''}`);
1289
1306
  }
1307
+ else {
1308
+ objectPrefix.push(`[图${++index}]是参考图${refImage.name}`);
1309
+ }
1290
1310
  }
1291
1311
  catch (error) {
1292
1312
  console.error(`Failed to load reference image ${imageFilePath} for ${refImage.name}:`, error);
@@ -1520,6 +1540,130 @@ server.registerTool('edit-image', {
1520
1540
  return createErrorResponse(error, 'edit-image');
1521
1541
  }
1522
1542
  });
1543
+ server.registerTool('generate-short-video-outlines', {
1544
+ title: 'Generate Short Video Outlines',
1545
+ description: `根据用户描述生成短视频的大纲`,
1546
+ inputSchema: {
1547
+ prompt: zod_1.z.string().describe('用户描述'),
1548
+ voiceType: zod_1.z
1549
+ .enum(['slient', 'voiceover', 'dialogue'])
1550
+ .optional()
1551
+ .default('voiceover')
1552
+ .describe('语音类型,枚举:无声(slient)、画外音(voiceover)、对话(dialogue)'),
1553
+ language: zod_1.z.string().optional().default('中文').describe('语言'),
1554
+ images: zod_1.z.array(zod_1.z.string().url()).optional().describe('参考图片的URL'),
1555
+ orientation: zod_1.z
1556
+ .enum(['portrait', 'landscape'])
1557
+ .optional()
1558
+ .default('portrait')
1559
+ .describe('视频方向,枚举:竖屏(portrait)、横屏(landscape)'),
1560
+ model: zod_1.z
1561
+ .enum([
1562
+ 'pro',
1563
+ 'hailuo',
1564
+ 'hailuo-fast',
1565
+ 'vidu',
1566
+ 'vidu-turbo',
1567
+ 'vidu-pro',
1568
+ 'kling',
1569
+ 'pixv',
1570
+ 'veo3.1',
1571
+ 'veo3.1-pro',
1572
+ 'zero',
1573
+ 'zero-fast',
1574
+ ])
1575
+ .default('vidu')
1576
+ .describe('除非用户明确提出使用其他模型,否则一律用vidu模型'),
1577
+ },
1578
+ }, async ({ prompt, voiceType, language, images, orientation, model }) => {
1579
+ try {
1580
+ // 验证session状态
1581
+ const currentSession = await validateSession('generate-short-video-outlines');
1582
+ const ai = currentSession.ai;
1583
+ const res = await ai.generateShortVideoOutlines({
1584
+ prompt,
1585
+ voiceType,
1586
+ language,
1587
+ images,
1588
+ videoModel: model,
1589
+ });
1590
+ if (!res) {
1591
+ throw new Error('Failed to generate short video outlines: no response from AI service');
1592
+ }
1593
+ if (res.taskUrl) {
1594
+ return {
1595
+ content: [
1596
+ {
1597
+ type: 'text',
1598
+ text: JSON.stringify({
1599
+ success: true,
1600
+ message: '该任务正在运行中,它是异步任务,且执行时间较长,你应立即调用工具 wait-for-task-finish 来等待任务结束,如 wait-for-task-finish 工具调用超时,你应立即再次重新调用直到任务结束。',
1601
+ taskUrl: res.taskUrl,
1602
+ }),
1603
+ },
1604
+ ],
1605
+ };
1606
+ }
1607
+ else if (res.url) {
1608
+ const url = res.url;
1609
+ await saveMaterial(currentSession, url, 'outline_sheet.png');
1610
+ const { scenes, video_type, voice_type, voiceover_tone, bgm_prompt } = res.data || {};
1611
+ const seed = (0, seed_1.getRandomSeed)();
1612
+ const storyboard = {
1613
+ orientation,
1614
+ video_type,
1615
+ outline_sheet: 'outline_sheet.png',
1616
+ scenes: scenes.map((scene) => {
1617
+ let video_prompt = scene.video_prompt;
1618
+ if (voice_type === 'voiceover' && video_prompt.includes('画外音')) {
1619
+ video_prompt = `${video_prompt} 视频中人物不说话`;
1620
+ if (voiceover_tone && !video_prompt.includes(voiceover_tone)) {
1621
+ video_prompt = `${voiceover_tone} ${video_prompt}`;
1622
+ }
1623
+ }
1624
+ return {
1625
+ ...scene,
1626
+ video_prompt,
1627
+ bgm_prompt,
1628
+ use_video_model: model,
1629
+ voice_type,
1630
+ seed,
1631
+ };
1632
+ }),
1633
+ };
1634
+ const saveLocalPath = (0, node_path_1.resolve)(projectLocalDir, 'storyboard.json');
1635
+ await (0, promises_1.writeFile)(saveLocalPath, JSON.stringify(storyboard, null, 2));
1636
+ return {
1637
+ content: [
1638
+ {
1639
+ type: 'text',
1640
+ text: JSON.stringify({
1641
+ success: true,
1642
+ message: '视频大纲生成成功',
1643
+ storyboard,
1644
+ }),
1645
+ },
1646
+ ],
1647
+ };
1648
+ }
1649
+ return {
1650
+ content: [
1651
+ {
1652
+ type: 'text',
1653
+ text: JSON.stringify({
1654
+ success: false,
1655
+ error: 'No image URL returned from AI service',
1656
+ response: res,
1657
+ timestamp: new Date().toISOString(),
1658
+ }),
1659
+ },
1660
+ ],
1661
+ };
1662
+ }
1663
+ catch (error) {
1664
+ return createErrorResponse(error, 'generate-short-video-outlines');
1665
+ }
1666
+ });
1523
1667
  let lastEffect = '';
1524
1668
  server.registerTool('generate-video', {
1525
1669
  title: 'Generate Video',
@@ -1585,7 +1729,7 @@ server.registerTool('generate-video', {
1585
1729
  .describe('The image file name of the start frame.'),
1586
1730
  duration: zod_1.z
1587
1731
  .number()
1588
- .min(1)
1732
+ .min(0)
1589
1733
  .max(30)
1590
1734
  .describe('The duration of the video. 一般与 tts 配音音频时长向上取整秒(ceil)一致,或者与 timeline_analysis 中确定的歌曲片段时长一致'),
1591
1735
  end_frame: zod_1.z
@@ -1615,6 +1759,9 @@ server.registerTool('generate-video', {
1615
1759
  if (!start_frame && !isZeroModel) {
1616
1760
  return createErrorResponse('start_frame 不能为空', 'generate-video');
1617
1761
  }
1762
+ if (type !== 'pro' && duration === 0) {
1763
+ return createErrorResponse('非 pro 模型的视频时长不能为 0', 'generate-video');
1764
+ }
1618
1765
  if (!isZeroModel && duration > 16) {
1619
1766
  return createErrorResponse('非 zero 系列模型的视频仅支持 16 秒以下时长', 'generate-video');
1620
1767
  }
@@ -3200,59 +3347,80 @@ server.registerTool('media-analyzer', {
3200
3347
  });
3201
3348
  server.registerTool('audio-video-sync', {
3202
3349
  title: 'Audio Video Sync',
3203
- description: 'Generate audio-video-synced video by matching video with audio. 还可以对口型。',
3350
+ description: 'Generate audio-video-synced video by matching video with audio or lip sync.',
3204
3351
  inputSchema: {
3205
- lipSync: zod_1.z.boolean().default(false),
3206
- lipSyncType: zod_1.z
3207
- .enum(['pixv', 'vidu', 'basic', 'lite'])
3352
+ lipSync: zod_1.z
3353
+ .object({
3354
+ type: zod_1.z
3355
+ .enum(['pixv', 'vidu', 'basic', 'lite'])
3356
+ .optional()
3357
+ .default('pixv'),
3358
+ padAudio: zod_1.z.boolean().optional().default(true),
3359
+ })
3208
3360
  .optional()
3209
- .default('pixv'),
3210
- lipSyncPadAudio: zod_1.z.boolean().optional().default(true),
3211
- videoFileName: zod_1.z
3212
- .string()
3213
- .describe('The video file name in materials directory.'),
3214
- audioFileName: zod_1.z
3215
- .string()
3216
- .describe('The audio file name in materials directory.'),
3361
+ .describe('默认不用对口型,除非用户明确指定需要对口型。'),
3362
+ videos: zod_1.z
3363
+ .array(zod_1.z.string())
3364
+ .describe('The video file names in materials directory. 如果多个视频,将会按顺序拼接'),
3365
+ audio: zod_1.z.string().describe('The audio file name in materials directory.'),
3217
3366
  audioInMs: zod_1.z
3218
3367
  .number()
3219
3368
  .default(0)
3220
3369
  .describe('The start time of audio in milliseconds.'),
3221
- refPhotoFileName: zod_1.z
3222
- .string()
3223
- .optional()
3224
- .describe('The reference photo face for lip sync.'),
3370
+ audioFadeOutMs: zod_1.z
3371
+ .number()
3372
+ .default(2000)
3373
+ .describe('The fade out time of audio in milliseconds.'),
3374
+ audioVolume: zod_1.z
3375
+ .number()
3376
+ .default(0.177) // -15db
3377
+ .describe('The volume of audio. 0.0 to 2.0.'),
3378
+ videoAudioVolume: zod_1.z
3379
+ .number()
3380
+ .default(1.0)
3381
+ .describe('The volume of video audio. 0.0 to 2.0.'),
3382
+ loopAudio: zod_1.z.boolean().optional().default(true),
3383
+ addSubtitles: zod_1.z.boolean().optional().default(false),
3225
3384
  saveToFileName: zod_1.z
3226
3385
  .string()
3227
3386
  .describe('The filename to save the audio-video-synced video. 应该是mp4文件'),
3228
3387
  },
3229
- }, async ({ lipSync, lipSyncType, lipSyncPadAudio, videoFileName, audioFileName, audioInMs, refPhotoFileName, saveToFileName, }, context) => {
3388
+ }, async ({ lipSync, videos, audio, audioInMs, audioFadeOutMs, audioVolume, videoAudioVolume, saveToFileName, loopAudio, addSubtitles, }, context) => {
3230
3389
  try {
3231
3390
  // 验证session状态
3232
3391
  const currentSession = await validateSession('audio-video-sync');
3233
3392
  const validatedFileName = validateFileName(saveToFileName);
3234
- console.log(`Starting lip sync: ${videoFileName} + ${audioFileName}`);
3235
3393
  // 验证terminal可用性
3236
3394
  const terminal = currentSession.terminal;
3237
3395
  if (!terminal) {
3238
3396
  throw new Error('Terminal not available in current session');
3239
3397
  }
3240
3398
  // 将materials下的文件名转换为URL
3241
- const videoUrl = getMaterialUri(currentSession, videoFileName);
3242
- const audioUrl = getMaterialUri(currentSession, audioFileName // processedAudioFileName
3399
+ const videoUrls = videos.map(videoFileName => getMaterialUri(currentSession, videoFileName));
3400
+ const audioUrl = getMaterialUri(currentSession, audio // processedAudioFileName
3243
3401
  );
3244
- const refPhotoUrl = refPhotoFileName
3245
- ? getMaterialUri(currentSession, refPhotoFileName)
3246
- : undefined;
3247
- console.log(`Video URL: ${videoUrl}`);
3248
- console.log(`Audio URL: ${audioUrl}`);
3249
- console.log(`Ref Photo URL: ${refPhotoUrl}`);
3402
+ if (videoUrls.length === 0) {
3403
+ throw new Error('No video files provided');
3404
+ }
3405
+ let videoUrl = videoUrls[0];
3406
+ const ai = currentSession.ai;
3407
+ if (videoUrls.length > 1) {
3408
+ videoUrl = await ai.concatMedia({
3409
+ mediaUrls: videoUrls,
3410
+ });
3411
+ }
3250
3412
  if (!lipSync) {
3251
- // 简单同步
3252
- const result = await currentSession.ai.voSync({
3413
+ // 不需要对口型
3414
+ const result = await ai.voSync({
3253
3415
  videoUrl,
3254
3416
  audioUrl,
3255
3417
  audioInMs,
3418
+ audioFadeOutMs,
3419
+ keepVideoAudio: true,
3420
+ audioVolume,
3421
+ videoAudioVolume,
3422
+ loopAudio,
3423
+ subtitles: addSubtitles,
3256
3424
  });
3257
3425
  if (result.url) {
3258
3426
  console.log('Audio sync completed successfully');
@@ -3292,13 +3460,12 @@ server.registerTool('audio-video-sync', {
3292
3460
  }
3293
3461
  // 调用AI的lipSync方法,使用处理后的音频
3294
3462
  let progress = 0;
3295
- const result = await currentSession.ai.lipSync({
3296
- type: lipSyncType,
3463
+ const result = await ai.lipSync({
3464
+ type: lipSync.type,
3297
3465
  videoUrl,
3298
3466
  audioUrl,
3299
3467
  audioInMs,
3300
- ref_photo_url: refPhotoUrl,
3301
- pad_audio: lipSyncPadAudio,
3468
+ pad_audio: lipSync.padAudio,
3302
3469
  onProgress: async (metaData) => {
3303
3470
  console.log('Lip sync progress:', metaData);
3304
3471
  try {
@@ -3311,6 +3478,12 @@ server.registerTool('audio-video-sync', {
3311
3478
  });
3312
3479
  if (result.url) {
3313
3480
  console.log('Lip sync completed successfully');
3481
+ if (addSubtitles) {
3482
+ const addSubtitlesRes = await ai.addVideoSubtitles({
3483
+ videoUrl: result.url,
3484
+ });
3485
+ result.url = addSubtitlesRes.url || result.url;
3486
+ }
3314
3487
  // 保存到项目材料目录
3315
3488
  const uri = await saveMaterial(currentSession, result.url, validatedFileName);
3316
3489
  return {
@@ -3378,7 +3551,7 @@ server.registerTool('generate-video-by-ref', {
3378
3551
  .describe('Array of reference image objects with name, url and type. Can be empty for text-only generation.'),
3379
3552
  duration: zod_1.z
3380
3553
  .number()
3381
- .min(1)
3554
+ .min(0)
3382
3555
  .max(16)
3383
3556
  .optional()
3384
3557
  .default(5)
@@ -3444,8 +3617,18 @@ server.registerTool('generate-video-by-ref', {
3444
3617
  // 验证session状态
3445
3618
  const currentSession = await validateSession('generate-video-by-ref');
3446
3619
  const storyBoardPath = (0, node_path_1.resolve)(process.env.ZEROCUT_PROJECT_CWD || process.cwd(), projectLocalDir, storyBoardFile);
3620
+ if (type !== 'pro' && duration === 0) {
3621
+ return createErrorResponse('非 pro 模型的视频时长不能为 0', 'generate-video');
3622
+ }
3623
+ const outlineSheetImagePath = (0, node_path_1.resolve)(process.env.ZEROCUT_PROJECT_CWD || process.cwd(), projectLocalDir, 'materials', 'outline_sheet.png');
3624
+ const hasOutlineSheet = (0, node_fs_1.existsSync)(outlineSheetImagePath);
3625
+ if (hasOutlineSheet && !skipConsistencyCheck) {
3626
+ return createErrorResponse('如果提供了 outline_sheet.png,应采用 generate-video 图生视频。若用户明确要用参考生视频,则跳过一致性检查。', 'generate-video');
3627
+ }
3447
3628
  // 检查 storyboard 标志
3448
- if (!checkStoryboardFlag && (0, node_fs_1.existsSync)(storyBoardPath)) {
3629
+ if (!hasOutlineSheet &&
3630
+ !checkStoryboardFlag &&
3631
+ (0, node_fs_1.existsSync)(storyBoardPath)) {
3449
3632
  checkStoryboardFlag = true;
3450
3633
  return createErrorResponse(`必须先审查生成的 storyboard.json 内容,按照如下步骤:
3451
3634
 
@@ -4190,127 +4373,146 @@ server.registerTool('list-project-files', {
4190
4373
  return createErrorResponse(error, 'list-project-files');
4191
4374
  }
4192
4375
  });
4193
- server.registerTool('build-capcat-draft', {
4194
- title: 'Build CapCut Draft',
4195
- description: 'Read draft_content.json file, parse JSON and generate URIs for all assets in timeline tracks, then output the processed JSON string.',
4196
- inputSchema: {
4197
- draftContentFile: zod_1.z
4198
- .string()
4199
- .optional()
4200
- .default('draft_content.json')
4201
- .describe('The draft content file name to read (defaults to draft_content.json).'),
4202
- },
4203
- }, async ({ draftContentFile }) => {
4204
- try {
4205
- await validateSession('build-capcat-draft');
4206
- if (!session) {
4207
- throw new Error('No active session');
4208
- }
4209
- // 读取 draft_content.json 文件
4210
- const draftContentPath = (0, node_path_1.join)(projectLocalDir, draftContentFile);
4211
- if (!(0, node_fs_1.existsSync)(draftContentPath)) {
4212
- throw new Error(`${draftContentFile} file not found in project directory`);
4213
- }
4214
- const draftContentRaw = await (0, promises_1.readFile)(draftContentPath, 'utf-8');
4215
- const draftContent = JSON.parse(draftContentRaw);
4216
- const videoInfos = [];
4217
- const audioInfos = [];
4218
- // timeline 中的所有视频资源生成 URI 和视频信息
4219
- if (draftContent.timeline && draftContent.timeline.tracks) {
4220
- for (const track of draftContent.timeline.tracks) {
4221
- if (track.type === 'video' && track.clips) {
4222
- for (const clip of track.clips) {
4223
- if (clip.assetId) {
4224
- // assets 中找到对应的资源
4225
- const asset = draftContent.assets?.find((a) => a.id === clip.assetId);
4226
- if (asset && asset.uri && asset.type === 'video') {
4227
- // 获取本地文件路径并上传到 coze
4228
- const localPath = (0, node_path_1.join)(projectLocalDir, 'materials', (0, node_path_1.basename)(asset.uri));
4229
- const uploadResult = await (0, coze_1.uploadFile)(localPath);
4230
- const videoUrl = uploadResult.url;
4231
- // 构建视频信息对象
4232
- const videoInfo = {
4233
- video_url: videoUrl,
4234
- duration: clip.durationMs
4235
- ? clip.durationMs * 1000
4236
- : asset.durationMs
4237
- ? asset.durationMs * 1000
4238
- : 0, // 转换为微秒
4239
- width: draftContent.settings?.resolution?.width || 1920,
4240
- height: draftContent.settings?.resolution?.height || 1080,
4241
- start: clip.startMs ? clip.startMs * 1000 : 0, // 转换为微秒
4242
- end: ((clip.startMs ?? 0) + (clip.durationMs ?? 0)) * 1000,
4243
- };
4244
- videoInfos.push(videoInfo);
4245
- }
4246
- }
4247
- }
4248
- }
4249
- else if (track.type === 'audio' && track.clips) {
4250
- for (const clip of track.clips) {
4251
- if (clip.assetId) {
4252
- // assets 中找到对应的资源
4253
- const asset = draftContent.assets?.find((a) => a.id === clip.assetId);
4254
- if (asset && asset.uri && asset.type === 'audio') {
4255
- // 获取本地文件路径并上传到 coze
4256
- const localPath = (0, node_path_1.join)(projectLocalDir, 'materials', (0, node_path_1.basename)(asset.uri));
4257
- const uploadResult = await (0, coze_1.uploadFile)(localPath);
4258
- const audioUrl = uploadResult.url;
4259
- // 构建音频信息对象
4260
- const audioInfo = {
4261
- audio_url: audioUrl,
4262
- duration: clip.durationMs
4263
- ? Math.round(clip.durationMs / 1000)
4264
- : asset.durationMs
4265
- ? Math.round(asset.durationMs / 1000)
4266
- : 0, // 转换为秒
4267
- start: clip.startMs ? clip.startMs * 1000 : 0, // 转换为微秒
4268
- end: ((clip.startMs ?? 0) + (clip.durationMs ?? 0)) * 1000,
4269
- audio_effect: '',
4270
- };
4271
- audioInfos.push(audioInfo);
4272
- }
4273
- }
4274
- }
4275
- }
4276
- }
4277
- }
4278
- // 处理字幕信息
4279
- const captionInfos = [];
4280
- if (draftContent.subtitles && Array.isArray(draftContent.subtitles)) {
4281
- for (const subtitle of draftContent.subtitles) {
4282
- captionInfos.push({
4283
- text: subtitle.text || '',
4284
- start: (subtitle.startMs || 0) * 1000, // 转换为微秒
4285
- end: (subtitle.endMs || 0) * 1000, // 转换为微秒
4286
- keyword: '',
4287
- });
4288
- }
4289
- }
4290
- const parameters = {
4291
- video_infos: JSON.stringify(videoInfos),
4292
- audio_infos: JSON.stringify(audioInfos),
4293
- caption_infos: JSON.stringify(captionInfos),
4294
- width: draftContent.settings?.resolution?.width || 720,
4295
- height: draftContent.settings?.resolution?.height || 1280,
4296
- };
4297
- const workflow_id = '7559885633272758313';
4298
- const result = await (0, coze_1.runWorkflow)(workflow_id, parameters);
4299
- // 返回 video_infos、audio_infos、captions、width height 对象
4300
- return {
4301
- content: [
4302
- {
4303
- type: 'text',
4304
- text: JSON.stringify(result, null, 2),
4305
- },
4306
- ],
4307
- };
4308
- }
4309
- catch (error) {
4310
- console.error('Error building CapCut draft:', error);
4311
- return createErrorResponse(error, 'build-capcat-draft');
4312
- }
4313
- });
4376
+ // server.registerTool(
4377
+ // 'build-capcat-draft',
4378
+ // {
4379
+ // title: 'Build CapCut Draft',
4380
+ // description:
4381
+ // 'Read draft_content.json file, parse JSON and generate URIs for all assets in timeline tracks, then output the processed JSON string.',
4382
+ // inputSchema: {
4383
+ // draftContentFile: z
4384
+ // .string()
4385
+ // .optional()
4386
+ // .default('draft_content.json')
4387
+ // .describe(
4388
+ // 'The draft content file name to read (defaults to draft_content.json).'
4389
+ // ),
4390
+ // },
4391
+ // },
4392
+ // async ({ draftContentFile }) => {
4393
+ // try {
4394
+ // await validateSession('build-capcat-draft');
4395
+ // if (!session) {
4396
+ // throw new Error('No active session');
4397
+ // }
4398
+ // // 读取 draft_content.json 文件
4399
+ // const draftContentPath = join(projectLocalDir, draftContentFile);
4400
+ // if (!existsSync(draftContentPath)) {
4401
+ // throw new Error(
4402
+ // `${draftContentFile} file not found in project directory`
4403
+ // );
4404
+ // }
4405
+ // const draftContentRaw = await readFile(draftContentPath, 'utf-8');
4406
+ // const draftContent = JSON.parse(draftContentRaw);
4407
+ // const videoInfos: any[] = [];
4408
+ // const audioInfos: any[] = [];
4409
+ // // timeline 中的所有视频资源生成 URI 和视频信息
4410
+ // if (draftContent.timeline && draftContent.timeline.tracks) {
4411
+ // for (const track of draftContent.timeline.tracks) {
4412
+ // if (track.type === 'video' && track.clips) {
4413
+ // for (const clip of track.clips) {
4414
+ // if (clip.assetId) {
4415
+ // // assets 中找到对应的资源
4416
+ // const asset = draftContent.assets?.find(
4417
+ // (a: any) => a.id === clip.assetId
4418
+ // );
4419
+ // if (asset && asset.uri && asset.type === 'video') {
4420
+ // // 获取本地文件路径并上传到 coze
4421
+ // const localPath = join(
4422
+ // projectLocalDir,
4423
+ // 'materials',
4424
+ // basename(asset.uri)
4425
+ // );
4426
+ // const uploadResult = await uploadFile(localPath);
4427
+ // const videoUrl = uploadResult.url;
4428
+ // // 构建视频信息对象
4429
+ // const videoInfo = {
4430
+ // video_url: videoUrl,
4431
+ // duration: clip.durationMs
4432
+ // ? clip.durationMs * 1000
4433
+ // : asset.durationMs
4434
+ // ? asset.durationMs * 1000
4435
+ // : 0, // 转换为微秒
4436
+ // width: draftContent.settings?.resolution?.width || 1920,
4437
+ // height: draftContent.settings?.resolution?.height || 1080,
4438
+ // start: clip.startMs ? clip.startMs * 1000 : 0, // 转换为微秒
4439
+ // end: ((clip.startMs ?? 0) + (clip.durationMs ?? 0)) * 1000,
4440
+ // };
4441
+ // videoInfos.push(videoInfo);
4442
+ // }
4443
+ // }
4444
+ // }
4445
+ // } else if (track.type === 'audio' && track.clips) {
4446
+ // for (const clip of track.clips) {
4447
+ // if (clip.assetId) {
4448
+ // // assets 中找到对应的资源
4449
+ // const asset = draftContent.assets?.find(
4450
+ // (a: any) => a.id === clip.assetId
4451
+ // );
4452
+ // if (asset && asset.uri && asset.type === 'audio') {
4453
+ // // 获取本地文件路径并上传到 coze
4454
+ // const localPath = join(
4455
+ // projectLocalDir,
4456
+ // 'materials',
4457
+ // basename(asset.uri)
4458
+ // );
4459
+ // const uploadResult = await uploadFile(localPath);
4460
+ // const audioUrl = uploadResult.url;
4461
+ // // 构建音频信息对象
4462
+ // const audioInfo = {
4463
+ // audio_url: audioUrl,
4464
+ // duration: clip.durationMs
4465
+ // ? Math.round(clip.durationMs / 1000)
4466
+ // : asset.durationMs
4467
+ // ? Math.round(asset.durationMs / 1000)
4468
+ // : 0, // 转换为秒
4469
+ // start: clip.startMs ? clip.startMs * 1000 : 0, // 转换为微秒
4470
+ // end: ((clip.startMs ?? 0) + (clip.durationMs ?? 0)) * 1000,
4471
+ // audio_effect: '',
4472
+ // };
4473
+ // audioInfos.push(audioInfo);
4474
+ // }
4475
+ // }
4476
+ // }
4477
+ // }
4478
+ // }
4479
+ // }
4480
+ // // 处理字幕信息
4481
+ // const captionInfos: any[] = [];
4482
+ // if (draftContent.subtitles && Array.isArray(draftContent.subtitles)) {
4483
+ // for (const subtitle of draftContent.subtitles) {
4484
+ // captionInfos.push({
4485
+ // text: subtitle.text || '',
4486
+ // start: (subtitle.startMs || 0) * 1000, // 转换为微秒
4487
+ // end: (subtitle.endMs || 0) * 1000, // 转换为微秒
4488
+ // keyword: '',
4489
+ // });
4490
+ // }
4491
+ // }
4492
+ // const parameters = {
4493
+ // video_infos: JSON.stringify(videoInfos),
4494
+ // audio_infos: JSON.stringify(audioInfos),
4495
+ // caption_infos: JSON.stringify(captionInfos),
4496
+ // width: draftContent.settings?.resolution?.width || 720,
4497
+ // height: draftContent.settings?.resolution?.height || 1280,
4498
+ // };
4499
+ // const workflow_id = '7559885633272758313';
4500
+ // const result = await runWorkflow(workflow_id, parameters);
4501
+ // // 返回 video_infos、audio_infos、captions、width 和 height 对象
4502
+ // return {
4503
+ // content: [
4504
+ // {
4505
+ // type: 'text',
4506
+ // text: JSON.stringify(result, null, 2),
4507
+ // },
4508
+ // ],
4509
+ // };
4510
+ // } catch (error) {
4511
+ // console.error('Error building CapCut draft:', error);
4512
+ // return createErrorResponse(error, 'build-capcat-draft');
4513
+ // }
4514
+ // }
4515
+ // );
4314
4516
  async function run() {
4315
4517
  // Start receiving messages on stdin and sending messages on stdout
4316
4518
  const transport = new stdio_js_1.StdioServerTransport();