cerevox 3.11.2 → 3.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -689,7 +689,7 @@ server.registerTool('upload-custom-material', {
689
689
  const validatedFileName = validateFileName(fileName);
690
690
  // 检查文件格式
691
691
  const fileExtension = fileName.toLowerCase().split('.').pop();
692
- const allowedFormats = ['jpeg', 'jpg', 'png', 'mp4', 'mp3'];
692
+ const allowedFormats = ['jpeg', 'jpg', 'png', 'mp4', 'mp3', 'ass'];
693
693
  if (!fileExtension || !allowedFormats.includes(fileExtension)) {
694
694
  throw new Error(`Unsupported file format: ${fileExtension}. Allowed formats: ${allowedFormats.join(', ')}`);
695
695
  }
@@ -899,7 +899,7 @@ server.registerTool('generate-character-image', {
899
899
  try {
900
900
  const promptOptimizer = await (0, promises_1.readFile)((0, node_path_1.resolve)(__dirname, './prompts/character-prompt-optimizer.md'), 'utf8');
901
901
  const completion = await ai.getCompletions({
902
- model: 'Doubao-Seed-1.6-flash',
902
+ model: 'Doubao-Seed-1.8',
903
903
  messages: [
904
904
  {
905
905
  role: 'system',
@@ -1003,6 +1003,7 @@ ${roleDescriptionPrompt}
1003
1003
  if (!res) {
1004
1004
  throw new Error('Failed to generate image: no response from AI service');
1005
1005
  }
1006
+ res.url = res.url || res.urls[0];
1006
1007
  if (res.url) {
1007
1008
  console.log('Image generated successfully, saving to materials...');
1008
1009
  const uri = await saveMaterial(currentSession, res.url, validatedFileName);
@@ -1116,7 +1117,7 @@ server.registerTool('generate-image', {
1116
1117
  '4096x1968',
1117
1118
  ])
1118
1119
  .default('1440x2560')
1119
- .describe('The size of the image.'),
1120
+ .describe('The size of the image. shot-grid 默认为 2560x1440,其余默认 1440x2560'),
1120
1121
  imageCount: zod_1.z
1121
1122
  .number()
1122
1123
  .min(1)
@@ -3462,90 +3463,43 @@ server.registerTool('media-analyzer', {
3462
3463
  const currentSession = await validateSession('media-analyzer');
3463
3464
  // 验证文件格式
3464
3465
  const fileExtension = mediaFileName.toLowerCase().split('.').pop();
3465
- const supportedFormats = ['jpeg', 'jpg', 'png', 'mp4', 'mp3'];
3466
+ const supportedFormats = ['jpeg', 'jpg', 'png', 'webp', 'mp4', 'mp3'];
3466
3467
  if (!fileExtension || !supportedFormats.includes(fileExtension)) {
3467
3468
  throw new Error(`Unsupported file format. Supported formats: ${supportedFormats.join(', ')}`);
3468
3469
  }
3469
3470
  // 获取媒体文件 URL
3470
3471
  const mediaUrl = getMaterialUri(currentSession, mediaFileName);
3471
- // 构建系统提示
3472
- const systemPrompt = `你是一个专业的媒体内容分析师。请仔细分析媒体文件并根据用户的具体需求进行详细分析。
3473
-
3474
- 分析要求:
3475
- 1. 准确描述媒体中的具体内容
3476
- 2. 注意细节,包括颜色、构图、风格、氛围、动作、声音等
3477
- 3. 根据用户的具体需求提供针对性的分析
3478
- 4. 如果是图片或视频,请详细描述视觉风格、色彩搭配、视觉效果等
3479
- 5. 如果是视频,请拆解分镜,然后描述各分镜动作、场景变化、镜头运动等
3480
- 6. 如果是音频,请描述音质、节奏、情感表达等
3481
- 7. 提供清晰、有用的分析结果,便于后续创作工作
3482
-
3483
- 请用中文回答,内容要详细且实用。`;
3484
- // 构建用户提示
3485
- const userPrompt = `请分析这个媒体文件:${analysisRequest}
3472
+ if (fileExtension === 'mp3') {
3473
+ // 音频文件 - 生成字幕并进行分析
3474
+ // 调用AI服务生成字幕
3475
+ const ai = currentSession.ai;
3476
+ const captionsResult = await ai.voiceToCaptions({
3477
+ url: mediaUrl,
3478
+ });
3479
+ console.log(mediaUrl, captionsResult);
3480
+ if (!captionsResult || !captionsResult.utterances) {
3481
+ throw new Error('Failed to generate captions from audio');
3482
+ }
3483
+ const captionsFileName = `${mediaFileName}.captions.json`;
3484
+ // 保存字幕文件到本地材料目录
3485
+ const localDir = node_path_1.default.resolve(projectLocalDir, 'materials');
3486
+ if (!(0, node_fs_1.existsSync)(localDir)) {
3487
+ (0, node_fs_1.mkdirSync)(localDir, { recursive: true });
3488
+ }
3489
+ const captionsFilePath = node_path_1.default.join(localDir, captionsFileName);
3490
+ await (0, promises_1.writeFile)(captionsFilePath, JSON.stringify(captionsResult, null, 2), 'utf-8');
3491
+ // 提取字幕文本内容用于分析
3492
+ const captionsText = captionsResult.utterances
3493
+ .map((caption) => caption.text)
3494
+ .join(' ');
3495
+ const userPrompt = `请分析这个媒体文件:${prompt}
3486
3496
 
3487
3497
  请提供详细的分析结果,包括媒体的具体内容、风格特征、技术特点等相关信息。`;
3488
- // 根据文件类型构建消息内容
3489
- let messageContent;
3490
- if (['jpeg', 'jpg', 'png'].includes(fileExtension)) {
3491
- // 图片文件
3492
- messageContent = [
3498
+ // 构建包含字幕内容的分析提示
3499
+ const messageContent = [
3493
3500
  {
3494
- type: 'image_url',
3495
- image_url: {
3496
- url: mediaUrl,
3497
- },
3498
- },
3499
- {
3500
- type: 'text',
3501
- text: userPrompt,
3502
- },
3503
- ];
3504
- }
3505
- else if (fileExtension === 'mp4') {
3506
- // 视频文件
3507
- messageContent = [
3508
- {
3509
- type: 'video_url',
3510
- video_url: {
3511
- url: mediaUrl,
3512
- },
3513
- },
3514
- {
3515
- type: 'text',
3516
- text: userPrompt,
3517
- },
3518
- ];
3519
- }
3520
- else if (fileExtension === 'mp3') {
3521
- // 音频文件 - 生成字幕并进行分析
3522
- try {
3523
- // 调用AI服务生成字幕
3524
- const ai = currentSession.ai;
3525
- const captionsResult = await ai.voiceToCaptions({
3526
- url: mediaUrl,
3527
- });
3528
- console.log(mediaUrl, captionsResult);
3529
- if (!captionsResult || !captionsResult.utterances) {
3530
- throw new Error('Failed to generate captions from audio');
3531
- }
3532
- const captionsFileName = `${mediaFileName}.captions.json`;
3533
- // 保存字幕文件到本地材料目录
3534
- const localDir = node_path_1.default.resolve(projectLocalDir, 'materials');
3535
- if (!(0, node_fs_1.existsSync)(localDir)) {
3536
- (0, node_fs_1.mkdirSync)(localDir, { recursive: true });
3537
- }
3538
- const captionsFilePath = node_path_1.default.join(localDir, captionsFileName);
3539
- await (0, promises_1.writeFile)(captionsFilePath, JSON.stringify(captionsResult, null, 2), 'utf-8');
3540
- // 提取字幕文本内容用于分析
3541
- const captionsText = captionsResult.utterances
3542
- .map((caption) => caption.text)
3543
- .join(' ');
3544
- // 构建包含字幕内容的分析提示
3545
- messageContent = [
3546
- {
3547
- type: 'text',
3548
- text: `${userPrompt}
3501
+ type: 'input_text',
3502
+ text: `${userPrompt}
3549
3503
 
3550
3504
  音频文件:${mediaFileName}
3551
3505
  字幕内容:${captionsText}
@@ -3558,83 +3512,54 @@ server.registerTool('media-analyzer', {
3558
3512
  5. 创作建议和后续应用方向
3559
3513
 
3560
3514
  字幕文件已保存为:${captionsFileName}`,
3515
+ },
3516
+ ];
3517
+ // 在返回结果中包含字幕文件信息
3518
+ const analysisPayload = {
3519
+ model: 'Doubao-Seed-1.8',
3520
+ input: [
3521
+ {
3522
+ role: 'user',
3523
+ content: messageContent,
3561
3524
  },
3562
- ];
3563
- // 在返回结果中包含字幕文件信息
3564
- const analysisPayload = {
3565
- model: 'Doubao-Seed-1.8',
3566
- messages: [
3567
- {
3568
- role: 'system',
3569
- content: systemPrompt,
3570
- },
3571
- {
3572
- role: 'user',
3573
- content: messageContent,
3574
- },
3575
- ],
3576
- };
3577
- console.log(JSON.stringify(analysisPayload, null, 2));
3578
- const completion = await ai.getCompletions(analysisPayload);
3579
- const analysisResult = completion.choices[0]?.message?.content;
3580
- if (!analysisResult) {
3581
- throw new Error('No response from AI model');
3582
- }
3583
- return {
3584
- content: [
3585
- {
3586
- type: 'text',
3587
- text: JSON.stringify({
3588
- success: true,
3589
- mediaFileName,
3590
- mediaType: fileExtension,
3591
- analysisRequest,
3592
- captionsFileName,
3593
- captionsContent: captionsText,
3594
- analysis: analysisResult,
3595
- mediaUrl,
3596
- timestamp: new Date().toISOString(),
3597
- nextActionSuggest: '可根据字幕内容和分析结果进行后续创作,如生成相关视频、配音或其他素材。',
3598
- }),
3599
- },
3600
- ],
3601
- };
3525
+ ],
3526
+ };
3527
+ console.log(JSON.stringify(analysisPayload, null, 2));
3528
+ const responses = await ai.getResponses(analysisPayload);
3529
+ const analysisResult = responses.output?.find((item) => item.type === 'message')?.content?.[0]?.text;
3530
+ if (!analysisResult) {
3531
+ throw new Error('No response from AI model');
3602
3532
  }
3603
- catch (captionError) {
3604
- // 如果字幕生成失败,回退到原有逻辑
3605
- console.warn('Failed to generate captions:', captionError);
3606
- messageContent = [
3533
+ return {
3534
+ content: [
3607
3535
  {
3608
3536
  type: 'text',
3609
- text: `${userPrompt}\n\n注意:这是一个音频文件 (${mediaFileName}),字幕生成失败,请根据文件名和用户需求提供分析建议。错误信息:${captionError}`,
3537
+ text: JSON.stringify({
3538
+ success: true,
3539
+ mediaFileName,
3540
+ mediaType: fileExtension,
3541
+ analysisRequest,
3542
+ captionsFileName,
3543
+ captionsContent: captionsText,
3544
+ analysis: analysisResult,
3545
+ mediaUrl,
3546
+ timestamp: new Date().toISOString(),
3547
+ nextActionSuggest: '可根据字幕内容和分析结果进行后续创作,如生成相关视频、配音或其他素材。',
3548
+ }),
3610
3549
  },
3611
- ];
3612
- }
3613
- }
3614
- else {
3615
- throw new Error(`Unsupported media type: ${fileExtension}`);
3550
+ ],
3551
+ };
3616
3552
  }
3617
- // 调用AI模型进行媒体内容分析
3618
3553
  const ai = currentSession.ai;
3619
- const payload = {
3620
- model: 'Doubao-Seed-1.6',
3621
- messages: [
3622
- {
3623
- role: 'system',
3624
- content: systemPrompt,
3625
- },
3626
- {
3627
- role: 'user',
3628
- content: messageContent,
3629
- },
3630
- ],
3631
- };
3632
- console.log(JSON.stringify(payload, null, 2));
3633
- const completion = await ai.getCompletions(payload);
3634
- const result = completion.choices[0]?.message?.content;
3635
- if (!result) {
3636
- throw new Error('No response from AI model');
3554
+ let type = 'video';
3555
+ if (['jpeg', 'jpg', 'png', 'webp'].includes(fileExtension)) {
3556
+ type = 'image';
3637
3557
  }
3558
+ const result = await ai.analyzeMedia({
3559
+ mediaUrl,
3560
+ type,
3561
+ prompt: analysisRequest,
3562
+ });
3638
3563
  const metadata = await ai.getMediaMetadata(mediaUrl);
3639
3564
  return {
3640
3565
  content: [