cerevox 3.0.0-beta.26 → 3.0.0-beta.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"zerocut.d.ts","sourceRoot":"","sources":["../../../src/mcp/servers/zerocut.ts"],"names":[],"mappings":";AA64KA,wBAAsB,GAAG,kBAKxB"}
1
+ {"version":3,"file":"zerocut.d.ts","sourceRoot":"","sources":["../../../src/mcp/servers/zerocut.ts"],"names":[],"mappings":";AAk4KA,wBAAsB,GAAG,kBAKxB"}
@@ -956,7 +956,7 @@ server.registerTool('upload-custom-material', {
956
956
  });
957
957
  server.registerTool('generate-image', {
958
958
  title: 'Generate Image',
959
- description: `生成图片`,
959
+ description: `生成图片,支持批量生成1-4张图`,
960
960
  inputSchema: {
961
961
  type: zod_1.z
962
962
  .enum([
@@ -1025,9 +1025,16 @@ server.registerTool('generate-image', {
1025
1025
  ])
1026
1026
  .default('720x1280')
1027
1027
  .describe('The size of the image.'),
1028
- saveToFileName: zod_1.z
1029
- .string()
1030
- .describe('The filename to save. 应该是png文件'),
1028
+ imageCount: zod_1.z
1029
+ .number()
1030
+ .min(1)
1031
+ .max(4)
1032
+ .optional()
1033
+ .default(1)
1034
+ .describe('The number of images to generate. 暂时最多支持4张图,多了容易超时'),
1035
+ saveToFileNames: zod_1.z
1036
+ .array(zod_1.z.string())
1037
+ .describe('The filenames to save. 数量要和imageCount对应,应该是png文件'),
1031
1038
  watermark: zod_1.z
1032
1039
  .boolean()
1033
1040
  .optional()
@@ -1061,7 +1068,7 @@ server.registerTool('generate-image', {
1061
1068
  \`\`\`
1062
1069
  `),
1063
1070
  },
1064
- }, async ({ type = 'seedream', prompt, sceneIndex, storyBoardFile = 'storyboard.json', skipConsistencyCheck = false, size = '720x1280', saveToFileName, watermark, referenceImages, }) => {
1071
+ }, async ({ type = 'seedream', prompt, sceneIndex, storyBoardFile = 'storyboard.json', skipConsistencyCheck = false, size = '720x1280', imageCount = 1, saveToFileNames, watermark, referenceImages, }) => {
1065
1072
  try {
1066
1073
  // 验证session状态
1067
1074
  const currentSession = await validateSession('generate-image');
@@ -1071,7 +1078,6 @@ server.registerTool('generate-image', {
1071
1078
  checkStoryboardFlag = true;
1072
1079
  return createErrorResponse('必须先审查生成的 storyboard.json 内容,确保每个场景中的stage_atmosphere内容按照规则被正确融合到start_frame和video_prompt中,不得遗漏,检查完成后先汇报,如果有问题,应当先修改 storyboard.json 内容,然后再调用 generate-image 生成图片。注意修改 storyboard 内容时,仅修改相应字段的字符串值,不要破坏JSON格式!', 'generate-image');
1073
1080
  }
1074
- const validatedFileName = validateFileName(saveToFileName);
1075
1081
  // 校验 prompt 与 storyboard.json 中场景设定的一致性
1076
1082
  if (sceneIndex && !skipConsistencyCheck) {
1077
1083
  try {
@@ -1220,6 +1226,9 @@ ${referenceImages?.map((ref, index) => `图${index + 1}:${ref.image}`).join('\
1220
1226
  catch (error) {
1221
1227
  console.error('Failed to optimize prompt:', error);
1222
1228
  }
1229
+ if (imageCount > 1) {
1230
+ processedPrompt = `$请生成${imageCount}张相关图片 ${processedPrompt}`;
1231
+ }
1223
1232
  console.log(`Generating image with prompt: ${processedPrompt.substring(0, 100)}...`);
1224
1233
  // 处理参考图片
1225
1234
  let imageBase64Array;
@@ -1301,13 +1310,25 @@ ${processedPrompt}`.trim();
1301
1310
  if (!res) {
1302
1311
  throw new Error('Failed to generate image: no response from AI service');
1303
1312
  }
1304
- if (res.url) {
1313
+ if (res.urls && res.urls.length > 0) {
1305
1314
  console.log('Image generated successfully, saving to materials...');
1306
- const uri = await saveMaterial(currentSession, res.url, validatedFileName);
1315
+ let uris = [];
1316
+ if (res.urls.length === 1) {
1317
+ uris = [
1318
+ await saveMaterial(currentSession, res.urls[0], validateFileName(saveToFileNames[0])),
1319
+ ];
1320
+ }
1321
+ else {
1322
+ // 多图场景
1323
+ uris = await Promise.all(res.urls.map((url, i) => {
1324
+ const fileName = validateFileName(saveToFileNames[i]);
1325
+ return saveMaterial(currentSession, url, fileName);
1326
+ }));
1327
+ }
1307
1328
  const result = {
1308
1329
  success: true,
1309
1330
  // source: res.url,
1310
- uri,
1331
+ uris,
1311
1332
  prompt: processedPrompt,
1312
1333
  size,
1313
1334
  timestamp: new Date().toISOString(),
@@ -2364,7 +2385,7 @@ server.registerTool('generate-scene-tts', {
2364
2385
  .describe('The volume of the tts.'),
2365
2386
  voiceID: zod_1.z
2366
2387
  .string()
2367
- .describe(`适合作为视频配音的音色ID,除非用户指定,否则你必须已通过 search_voice 工具检查确定该音色确实是存在的。`),
2388
+ .describe(`适合作为视频配音的音色ID,除非用户指定,否则你必须确保已通过 pick-voice 工具挑选出真实存在的音色。`),
2368
2389
  context_texts: zod_1.z
2369
2390
  .array(zod_1.z.string())
2370
2391
  .default([])
@@ -2398,7 +2419,7 @@ server.registerTool('generate-scene-tts', {
2398
2419
  try {
2399
2420
  const voice = (await ai.listVoices()).find(v => v.id === voiceID);
2400
2421
  if (!voice) {
2401
- return createErrorResponse(`Voice ${voiceID} not found in voice-list. Use search-voices tool to find available voices. 若用户坚持要使用该音色,需跳过一致性检查。`, 'generate-scene-tts');
2422
+ return createErrorResponse(`Voice ${voiceID} not found in voice-list. Use pick-voice tool to pick an available voice. 若用户坚持要使用该音色,需跳过一致性检查。`, 'generate-scene-tts');
2402
2423
  }
2403
2424
  const storyBoardPath = (0, node_path_1.resolve)(process.env.ZEROCUT_PROJECT_CWD || process.cwd(), projectLocalDir, storyBoardFile);
2404
2425
  if ((0, node_fs_1.existsSync)(storyBoardPath)) {
@@ -2451,7 +2472,7 @@ server.registerTool('generate-scene-tts', {
2451
2472
  }
2452
2473
  console.log(`Generating TTS with voice: ${voiceID}, speed: ${finalSpeed}, text: ${text.substring(0, 100)}...`);
2453
2474
  if (voiceID.startsWith('BV0')) {
2454
- throw new Error(`BV0* 系列音色已弃用,你必须已通过 search_voice 工具检查确定该音色确实是存在的。`);
2475
+ throw new Error(`BV0* 系列音色已弃用,你必须通过 pick-voice 工具挑选一个真实存在的音色。`);
2455
2476
  }
2456
2477
  const type = voiceID.startsWith('zh_') ||
2457
2478
  voiceID.startsWith('en_') ||
@@ -2606,7 +2627,7 @@ ${context_texts.join('\n')}
2606
2627
  type: 'text',
2607
2628
  text: JSON.stringify({
2608
2629
  success: false,
2609
- error: 'No TTS URL returned from AI service. You should use search-voices tool to find available voices.',
2630
+ error: 'No TTS URL returned from AI service. You should use pick-voice tool to pick an available voice.',
2610
2631
  response: res,
2611
2632
  timestamp: new Date().toISOString(),
2612
2633
  }),
@@ -2839,67 +2860,71 @@ server.registerTool('get-schema', {
2839
2860
  return createErrorResponse(error, 'get-schema');
2840
2861
  }
2841
2862
  });
2842
- server.registerTool('search-voices', {
2843
- title: 'Search Voices',
2844
- description: 'Search voices from doubao_voices_full based on languages, and gender. 搜索并选择符合要求的语音,在合适的情况下,优先采用 volcano_tts_2 类型的语音',
2863
+ server.registerTool('pick-voice', {
2864
+ title: 'Pick Voice',
2865
+ description: '根据用户需求,选择尽可能符合要求的语音,在合适的情况下,优先采用 volcano_tts_2 类型的语音',
2845
2866
  inputSchema: {
2846
- languages: zod_1.z
2847
- .array(zod_1.z.enum([
2848
- 'zh',
2849
- 'en',
2850
- 'ja',
2851
- 'ko',
2852
- 'es',
2853
- 'pt',
2854
- 'nl',
2855
- 'vi',
2856
- 'ru',
2857
- 'id',
2858
- 'de',
2859
- 'fr',
2860
- 'it',
2861
- 'ar',
2862
- 'tr',
2863
- 'uk',
2864
- ]))
2867
+ prompt: zod_1.z
2868
+ .string()
2869
+ .describe('用户需求描述,例如:一个有亲和力的,适合给孩子讲故事的语音'),
2870
+ custom_design: zod_1.z
2871
+ .boolean()
2865
2872
  .optional()
2866
- .describe('Filter by languages (e.g., ["zh", "en"]). If not provided, no language filtering is applied.'),
2867
- gender: zod_1.z
2868
- .enum(['male', 'female'])
2873
+ .describe('是否自定义语音,由于要消耗较多积分,因此**只有用户明确要求自己设计语音**,才将该参数设为true'),
2874
+ custom_design_preview: zod_1.z
2875
+ .string()
2869
2876
  .optional()
2870
- .describe('Filter by gender (male or female). If not provided, no gender filtering is applied.'),
2877
+ .describe('用户自定义语音的预览文本,用于展示自定义语音的效果,只有 custom_design true 时才需要'),
2878
+ custom_design_save_to: zod_1.z
2879
+ .string()
2880
+ .optional()
2881
+ .describe('自定义语音的保存路径,例如:custom_voice.mp3 custom_voice_{id}.mp3'),
2871
2882
  },
2872
- }, async ({ languages, gender }) => {
2883
+ }, async ({ prompt, custom_design, custom_design_preview, custom_design_save_to, }) => {
2873
2884
  try {
2874
2885
  // 验证session状态
2875
- const currentSession = await validateSession('search-voices');
2886
+ const currentSession = await validateSession('pick-voice');
2876
2887
  const ai = currentSession.ai;
2877
- let filteredVoices = await ai.listVoices();
2878
- // Filter by languages
2879
- if (languages && languages.length > 0) {
2880
- filteredVoices = filteredVoices.filter(voice => voice.languages &&
2881
- voice.languages.some((lang) => languages.includes(lang)));
2882
- }
2883
- // Filter by gender
2884
- if (gender) {
2885
- filteredVoices = filteredVoices.filter(voice => {
2886
- return voice.gender === gender;
2888
+ if (custom_design) {
2889
+ if (!custom_design_preview) {
2890
+ throw new Error('custom_design_preview is required when custom_design is true');
2891
+ }
2892
+ const data = await currentSession.ai.voiceDesign({
2893
+ prompt,
2894
+ previewText: custom_design_preview,
2887
2895
  });
2896
+ if (data.voice_id) {
2897
+ const trial_audio = data.trial_audio;
2898
+ let uri = '';
2899
+ if (trial_audio) {
2900
+ uri = await saveMaterial(currentSession, trial_audio, custom_design_save_to || `custom_voice_${data.voice_id}.mp3`);
2901
+ }
2902
+ return {
2903
+ content: [
2904
+ {
2905
+ type: 'text',
2906
+ text: JSON.stringify({
2907
+ success: true,
2908
+ ...data,
2909
+ uri,
2910
+ timestamp: new Date().toISOString(),
2911
+ }),
2912
+ },
2913
+ ],
2914
+ };
2915
+ }
2916
+ else {
2917
+ throw new Error(`Voice design failed, ${JSON.stringify(data)}`);
2918
+ }
2888
2919
  }
2920
+ const data = await ai.pickVoice({ prompt });
2889
2921
  return {
2890
2922
  content: [
2891
2923
  {
2892
2924
  type: 'text',
2893
2925
  text: JSON.stringify({
2894
2926
  success: true,
2895
- data: {
2896
- totalCount: filteredVoices.length,
2897
- voices: filteredVoices,
2898
- filters: {
2899
- languages: languages || null,
2900
- gender: gender || null,
2901
- },
2902
- },
2927
+ ...data,
2903
2928
  timestamp: new Date().toISOString(),
2904
2929
  }),
2905
2930
  },
@@ -2907,52 +2932,7 @@ server.registerTool('search-voices', {
2907
2932
  };
2908
2933
  }
2909
2934
  catch (error) {
2910
- return createErrorResponse(error, 'search-voices');
2911
- }
2912
- });
2913
- server.registerTool('voice-design', {
2914
- title: 'Voice Design',
2915
- description: 'Design a voice based on a prompt. The voice will be designed based on the prompt and preview text.',
2916
- inputSchema: {
2917
- prompt: zod_1.z.string().describe('The prompt to design the voice.'),
2918
- previewText: zod_1.z.string().describe('The preview text to design the voice.'),
2919
- saveToFileName: zod_1.z
2920
- .string()
2921
- .describe('The file name to save the designed voice. 应该是mp3文件'),
2922
- },
2923
- }, async ({ prompt, previewText, saveToFileName }) => {
2924
- try {
2925
- const currentSession = await validateSession('voice-design');
2926
- const data = await currentSession.ai.voiceDesign({
2927
- prompt,
2928
- previewText,
2929
- });
2930
- if (data.voice_id) {
2931
- const trial_audio = data.trial_audio;
2932
- let uri = '';
2933
- if (trial_audio) {
2934
- uri = await saveMaterial(currentSession, trial_audio, saveToFileName);
2935
- }
2936
- return {
2937
- content: [
2938
- {
2939
- type: 'text',
2940
- text: JSON.stringify({
2941
- success: true,
2942
- ...data,
2943
- uri,
2944
- timestamp: new Date().toISOString(),
2945
- }),
2946
- },
2947
- ],
2948
- };
2949
- }
2950
- else {
2951
- throw new Error(`Voice design failed, ${JSON.stringify(data)}`);
2952
- }
2953
- }
2954
- catch (error) {
2955
- return createErrorResponse(error, 'voice-design');
2935
+ return createErrorResponse(error, 'pick-voice');
2956
2936
  }
2957
2937
  });
2958
2938
  server.registerTool('media-analyzer', {