@idk500/video-vision-mcp 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +136 -0
- package/dist/frame-extractor.d.ts +28 -0
- package/dist/frame-extractor.js +246 -0
- package/dist/hunyuan-client.d.ts +95 -0
- package/dist/hunyuan-client.js +319 -0
- package/dist/index.d.ts +24 -0
- package/dist/index.js +813 -0
- package/dist/video-processor.d.ts +68 -0
- package/dist/video-processor.js +478 -0
- package/package.json +67 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,813 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
3
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
4
|
+
import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
|
|
5
|
+
import { HunyuanClient } from './hunyuan-client.js';
|
|
6
|
+
import { VideoProcessor } from './video-processor.js';
|
|
7
|
+
import { FrameExtractor } from './frame-extractor.js';
|
|
8
|
+
class VideoMCPServer {
|
|
9
|
+
server;
|
|
10
|
+
videoProcessor;
|
|
11
|
+
frameExtractor;
|
|
12
|
+
// The OpenAI-compatible API key for the vision/text backend. Historically
|
|
13
|
+
// held Tencent credentials; now it is a single bearer token (e.g. Bigmodel).
|
|
14
|
+
secretId;
|
|
15
|
+
secretKey;
|
|
16
|
+
region;
|
|
17
|
+
constructor(options) {
|
|
18
|
+
// 视觉/文本后端的 API Key。优先级:环境变量 > 启动参数。
|
|
19
|
+
// 兼容历史变量名 TENCENT_SECRET_ID/KEY,同时接受 VISION_API_KEY。
|
|
20
|
+
this.secretId =
|
|
21
|
+
process.env.VISION_API_KEY || process.env.TENCENT_SECRET_ID || options?.secretId;
|
|
22
|
+
this.secretKey = process.env.TENCENT_SECRET_KEY || options?.secretKey;
|
|
23
|
+
this.region = process.env.VISION_ENDPOINT || process.env.TENCENT_REGION || options?.region;
|
|
24
|
+
this.server = new Server({
|
|
25
|
+
name: 'video-mcp',
|
|
26
|
+
version: '1.0.0',
|
|
27
|
+
}, {
|
|
28
|
+
capabilities: {
|
|
29
|
+
tools: {},
|
|
30
|
+
},
|
|
31
|
+
});
|
|
32
|
+
this.videoProcessor = new VideoProcessor();
|
|
33
|
+
this.frameExtractor = new FrameExtractor();
|
|
34
|
+
this.setupToolHandlers();
|
|
35
|
+
}
|
|
36
|
+
setupToolHandlers() {
|
|
37
|
+
// 列出所有可用工具
|
|
38
|
+
this.server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
39
|
+
return {
|
|
40
|
+
tools: [
|
|
41
|
+
{
|
|
42
|
+
name: 'extract_video_frames',
|
|
43
|
+
description: '从视频中提取关键帧图像',
|
|
44
|
+
inputSchema: {
|
|
45
|
+
type: 'object',
|
|
46
|
+
properties: {
|
|
47
|
+
videoPath: {
|
|
48
|
+
type: 'string',
|
|
49
|
+
description: '视频文件路径',
|
|
50
|
+
},
|
|
51
|
+
maxFrames: {
|
|
52
|
+
type: 'number',
|
|
53
|
+
description: '最大提取帧数(默认10帧)',
|
|
54
|
+
default: 10,
|
|
55
|
+
},
|
|
56
|
+
outputDir: {
|
|
57
|
+
type: 'string',
|
|
58
|
+
description: '输出目录路径(可选)',
|
|
59
|
+
},
|
|
60
|
+
strategy: {
|
|
61
|
+
type: 'string',
|
|
62
|
+
enum: ['uniform', 'keyframe', 'scene_change'],
|
|
63
|
+
description: '提取策略:uniform(均匀间隔), keyframe(关键帧), scene_change(场景变化)',
|
|
64
|
+
default: 'uniform',
|
|
65
|
+
},
|
|
66
|
+
},
|
|
67
|
+
required: ['videoPath'],
|
|
68
|
+
},
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
name: 'analyze_video_content',
|
|
72
|
+
description: '使用视觉模型分析视频内容(抽帧后送入多模态模型)',
|
|
73
|
+
inputSchema: {
|
|
74
|
+
type: 'object',
|
|
75
|
+
properties: {
|
|
76
|
+
videoPath: {
|
|
77
|
+
type: 'string',
|
|
78
|
+
description: '视频文件路径',
|
|
79
|
+
},
|
|
80
|
+
prompt: {
|
|
81
|
+
type: 'string',
|
|
82
|
+
description: '分析提示词(可选)',
|
|
83
|
+
default: '请基于这些视频关键帧,用100-200字简洁描述视频的主要内容、场景、人物和动作,不需要逐帧分析。',
|
|
84
|
+
},
|
|
85
|
+
maxFrames: {
|
|
86
|
+
type: 'number',
|
|
87
|
+
description: '最大分析帧数(默认5帧以控制成本)',
|
|
88
|
+
default: 5,
|
|
89
|
+
},
|
|
90
|
+
strategy: {
|
|
91
|
+
type: 'string',
|
|
92
|
+
enum: ['uniform', 'keyframe', 'scene_change'],
|
|
93
|
+
description: '帧提取策略',
|
|
94
|
+
default: 'keyframe',
|
|
95
|
+
},
|
|
96
|
+
secretId: {
|
|
97
|
+
type: 'string',
|
|
98
|
+
description: '视觉模型 API Key(可选,优先使用环境变量 VISION_API_KEY)',
|
|
99
|
+
},
|
|
100
|
+
secretKey: {
|
|
101
|
+
type: 'string',
|
|
102
|
+
description: '保留字段(视觉模型仅需 secretId/apiKey),可选',
|
|
103
|
+
},
|
|
104
|
+
region: {
|
|
105
|
+
type: 'string',
|
|
106
|
+
description: '视觉模型端点 baseURL(可选,默认智谱 Bigmodel paas/v4)',
|
|
107
|
+
},
|
|
108
|
+
},
|
|
109
|
+
required: ['videoPath'],
|
|
110
|
+
},
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
name: 'analyze_image_batch',
|
|
114
|
+
description: '批量分析图片内容',
|
|
115
|
+
inputSchema: {
|
|
116
|
+
type: 'object',
|
|
117
|
+
properties: {
|
|
118
|
+
imagePaths: {
|
|
119
|
+
type: 'array',
|
|
120
|
+
items: { type: 'string' },
|
|
121
|
+
description: '图片文件路径数组',
|
|
122
|
+
},
|
|
123
|
+
prompt: {
|
|
124
|
+
type: 'string',
|
|
125
|
+
description: '分析提示词(可选)',
|
|
126
|
+
default: '请描述这张图片的内容。',
|
|
127
|
+
},
|
|
128
|
+
secretId: {
|
|
129
|
+
type: 'string',
|
|
130
|
+
description: '视觉模型 API Key(可选,优先使用环境变量 VISION_API_KEY)',
|
|
131
|
+
},
|
|
132
|
+
secretKey: {
|
|
133
|
+
type: 'string',
|
|
134
|
+
description: '保留字段(视觉模型仅需 secretId/apiKey),可选',
|
|
135
|
+
},
|
|
136
|
+
region: {
|
|
137
|
+
type: 'string',
|
|
138
|
+
description: '视觉模型端点 baseURL(可选,默认智谱 Bigmodel paas/v4)',
|
|
139
|
+
},
|
|
140
|
+
},
|
|
141
|
+
required: ['imagePaths'],
|
|
142
|
+
},
|
|
143
|
+
},
|
|
144
|
+
{
|
|
145
|
+
name: 'get_video_info',
|
|
146
|
+
description: '获取视频文件基本信息',
|
|
147
|
+
inputSchema: {
|
|
148
|
+
type: 'object',
|
|
149
|
+
properties: {
|
|
150
|
+
videoPath: {
|
|
151
|
+
type: 'string',
|
|
152
|
+
description: '视频文件路径',
|
|
153
|
+
},
|
|
154
|
+
},
|
|
155
|
+
required: ['videoPath'],
|
|
156
|
+
},
|
|
157
|
+
},
|
|
158
|
+
{
|
|
159
|
+
name: 'generate_video_script',
|
|
160
|
+
description: '基于视频内容生成专业拍摄脚本',
|
|
161
|
+
inputSchema: {
|
|
162
|
+
type: 'object',
|
|
163
|
+
properties: {
|
|
164
|
+
videoPath: {
|
|
165
|
+
type: 'string',
|
|
166
|
+
description: '视频文件路径',
|
|
167
|
+
},
|
|
168
|
+
prompt: {
|
|
169
|
+
type: 'string',
|
|
170
|
+
description: '自定义脚本生成要求(可选)',
|
|
171
|
+
},
|
|
172
|
+
maxFrames: {
|
|
173
|
+
type: 'number',
|
|
174
|
+
description: '最大分析帧数(默认5帧以控制成本)',
|
|
175
|
+
default: 5,
|
|
176
|
+
},
|
|
177
|
+
strategy: {
|
|
178
|
+
type: 'string',
|
|
179
|
+
enum: ['uniform', 'keyframe', 'scene_change'],
|
|
180
|
+
description: '帧提取策略',
|
|
181
|
+
default: 'keyframe',
|
|
182
|
+
},
|
|
183
|
+
scriptType: {
|
|
184
|
+
type: 'string',
|
|
185
|
+
enum: ['commercial', 'documentary', 'tutorial', 'narrative', 'custom'],
|
|
186
|
+
description: '脚本类型:commercial(商业广告), documentary(纪录片), tutorial(教学), narrative(叙事), custom(自定义)',
|
|
187
|
+
default: 'commercial',
|
|
188
|
+
},
|
|
189
|
+
targetDuration: {
|
|
190
|
+
type: 'number',
|
|
191
|
+
description: '目标脚本时长(秒)',
|
|
192
|
+
},
|
|
193
|
+
targetAudience: {
|
|
194
|
+
type: 'string',
|
|
195
|
+
description: '目标受众(默认:一般观众)',
|
|
196
|
+
default: '一般观众',
|
|
197
|
+
},
|
|
198
|
+
style: {
|
|
199
|
+
type: 'string',
|
|
200
|
+
description: '拍摄风格(默认:专业、吸引人)',
|
|
201
|
+
default: '专业、吸引人',
|
|
202
|
+
},
|
|
203
|
+
secretId: {
|
|
204
|
+
type: 'string',
|
|
205
|
+
description: '视觉模型 API Key(可选,优先使用环境变量 VISION_API_KEY)',
|
|
206
|
+
},
|
|
207
|
+
secretKey: {
|
|
208
|
+
type: 'string',
|
|
209
|
+
description: '保留字段(视觉模型仅需 secretId/apiKey),可选',
|
|
210
|
+
},
|
|
211
|
+
region: {
|
|
212
|
+
type: 'string',
|
|
213
|
+
description: '视觉模型端点 baseURL(可选,默认智谱 Bigmodel paas/v4)',
|
|
214
|
+
},
|
|
215
|
+
},
|
|
216
|
+
required: ['videoPath'],
|
|
217
|
+
},
|
|
218
|
+
},
|
|
219
|
+
{
|
|
220
|
+
name: 'generate_image_script',
|
|
221
|
+
description: '基于批量图片内容生成专业拍摄脚本',
|
|
222
|
+
inputSchema: {
|
|
223
|
+
type: 'object',
|
|
224
|
+
properties: {
|
|
225
|
+
imagePaths: {
|
|
226
|
+
type: 'array',
|
|
227
|
+
items: { type: 'string' },
|
|
228
|
+
description: '图片文件路径数组',
|
|
229
|
+
},
|
|
230
|
+
prompt: {
|
|
231
|
+
type: 'string',
|
|
232
|
+
description: '自定义脚本生成要求(可选)',
|
|
233
|
+
},
|
|
234
|
+
scriptType: {
|
|
235
|
+
type: 'string',
|
|
236
|
+
enum: ['commercial', 'documentary', 'tutorial', 'narrative', 'custom'],
|
|
237
|
+
description: '脚本类型:commercial(商业广告), documentary(纪录片), tutorial(教学), narrative(叙事), custom(自定义)',
|
|
238
|
+
default: 'commercial',
|
|
239
|
+
},
|
|
240
|
+
targetDuration: {
|
|
241
|
+
type: 'number',
|
|
242
|
+
description: '目标脚本时长(秒)',
|
|
243
|
+
},
|
|
244
|
+
targetAudience: {
|
|
245
|
+
type: 'string',
|
|
246
|
+
description: '目标受众(默认:一般观众)',
|
|
247
|
+
default: '一般观众',
|
|
248
|
+
},
|
|
249
|
+
style: {
|
|
250
|
+
type: 'string',
|
|
251
|
+
description: '拍摄风格(默认:专业、吸引人)',
|
|
252
|
+
default: '专业、吸引人',
|
|
253
|
+
},
|
|
254
|
+
secretId: {
|
|
255
|
+
type: 'string',
|
|
256
|
+
description: '视觉模型 API Key(可选,优先使用环境变量 VISION_API_KEY)',
|
|
257
|
+
},
|
|
258
|
+
secretKey: {
|
|
259
|
+
type: 'string',
|
|
260
|
+
description: '保留字段(视觉模型仅需 secretId/apiKey),可选',
|
|
261
|
+
},
|
|
262
|
+
region: {
|
|
263
|
+
type: 'string',
|
|
264
|
+
description: '视觉模型端点 baseURL(可选,默认智谱 Bigmodel paas/v4)',
|
|
265
|
+
},
|
|
266
|
+
},
|
|
267
|
+
required: ['imagePaths'],
|
|
268
|
+
},
|
|
269
|
+
},
|
|
270
|
+
],
|
|
271
|
+
};
|
|
272
|
+
});
|
|
273
|
+
// 处理工具调用
|
|
274
|
+
this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
275
|
+
const { name, arguments: args } = request.params;
|
|
276
|
+
try {
|
|
277
|
+
switch (name) {
|
|
278
|
+
case 'extract_video_frames':
|
|
279
|
+
return await this.handleExtractFrames(args);
|
|
280
|
+
case 'analyze_video_content':
|
|
281
|
+
return await this.handleAnalyzeVideo(args);
|
|
282
|
+
case 'analyze_image_batch':
|
|
283
|
+
return await this.handleAnalyzeImageBatch(args);
|
|
284
|
+
case 'get_video_info':
|
|
285
|
+
return await this.handleGetVideoInfo(args);
|
|
286
|
+
case 'generate_video_script':
|
|
287
|
+
return await this.handleGenerateVideoScript(args);
|
|
288
|
+
case 'generate_image_script':
|
|
289
|
+
return await this.handleGenerateImageScript(args);
|
|
290
|
+
default:
|
|
291
|
+
throw new Error(`未知的工具: ${name}。支持的工具包括: extract_video_frames, analyze_video_content, analyze_image_batch, get_video_info, generate_video_script, generate_image_script`);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
catch (error) {
|
|
295
|
+
const errorMessage = this.formatError(error, name, args);
|
|
296
|
+
return {
|
|
297
|
+
content: [
|
|
298
|
+
{
|
|
299
|
+
type: 'text',
|
|
300
|
+
text: errorMessage,
|
|
301
|
+
},
|
|
302
|
+
],
|
|
303
|
+
};
|
|
304
|
+
}
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
formatError(error, toolName, args) {
|
|
308
|
+
const baseError = error instanceof Error ? error.message : String(error);
|
|
309
|
+
// 构建上下文信息
|
|
310
|
+
let contextInfo = `\n\n[错误上下文]`;
|
|
311
|
+
contextInfo += `\n- 工具名称: ${toolName}`;
|
|
312
|
+
// 添加参数信息(隐藏敏感信息)
|
|
313
|
+
const sanitizedArgs = { ...args };
|
|
314
|
+
if (sanitizedArgs.secretId) {
|
|
315
|
+
sanitizedArgs.secretId = `***${sanitizedArgs.secretId.slice(-4)}`;
|
|
316
|
+
}
|
|
317
|
+
if (sanitizedArgs.secretKey) {
|
|
318
|
+
sanitizedArgs.secretKey = '***hidden***';
|
|
319
|
+
}
|
|
320
|
+
contextInfo += `\n- 调用参数: ${JSON.stringify(sanitizedArgs, null, 2)}`;
|
|
321
|
+
// 根据不同的错误类型提供具体的解决建议
|
|
322
|
+
let suggestions = `\n\n[解决建议]`;
|
|
323
|
+
if (baseError.includes('API Key 缺失') || baseError.includes('身份验证失败')) {
|
|
324
|
+
suggestions += `\n1. 检查 VISION_API_KEY 环境变量或 secretId 参数是否设置了正确的 API Key`;
|
|
325
|
+
suggestions += `\n2. 确认密钥未过期、未被禁用`;
|
|
326
|
+
suggestions += `\n3. 在 https://open.bigmodel.cn/usercenter/apikeys 重新生成密钥`;
|
|
327
|
+
}
|
|
328
|
+
else if (baseError.includes('访问量过大') || baseError.includes('429') || baseError.includes('调用频率')) {
|
|
329
|
+
suggestions += `\n1. 免费视觉模型 glm-4.6v-flash 有限流,稍等片刻后重试`;
|
|
330
|
+
suggestions += `\n2. 减少抽帧数量(maxFrames)以降低单次请求数`;
|
|
331
|
+
suggestions += `\n3. 如需更高配额,在 Bigmodel 控制台升级套餐`;
|
|
332
|
+
}
|
|
333
|
+
else if (baseError.includes('No such file') || baseError.includes('不存在')) {
|
|
334
|
+
suggestions += `\n1. 检查文件路径是否正确`;
|
|
335
|
+
suggestions += `\n2. 确认文件是否存在`;
|
|
336
|
+
suggestions += `\n3. 检查文件权限是否允许读取`;
|
|
337
|
+
}
|
|
338
|
+
else if (baseError.includes('ffmpeg') || baseError.includes('Failed to get video info')) {
|
|
339
|
+
suggestions += `\n1. 检查 FFmpeg 是否正确安装`;
|
|
340
|
+
suggestions += `\n2. 确认视频文件格式是否受支持`;
|
|
341
|
+
suggestions += `\n3. 检查视频文件是否损坏`;
|
|
342
|
+
suggestions += `\n4. 尝试使用其他视频格式进行测试`;
|
|
343
|
+
}
|
|
344
|
+
else if (baseError.includes('网络') || baseError.includes('timeout') || baseError.includes('ECONNREFUSED')) {
|
|
345
|
+
suggestions += `\n1. 检查网络连接是否正常`;
|
|
346
|
+
suggestions += `\n2. 确认防火墙设置不会阻止API调用`;
|
|
347
|
+
suggestions += `\n3. 尝试稍后重试`;
|
|
348
|
+
suggestions += `\n4. 检查代理设置(如果使用代理)`;
|
|
349
|
+
}
|
|
350
|
+
else {
|
|
351
|
+
suggestions += `\n1. 检查所有输入参数是否正确`;
|
|
352
|
+
suggestions += `\n2. 查看详细错误信息确定问题所在`;
|
|
353
|
+
suggestions += `\n3. 如果问题持续,请联系技术支持`;
|
|
354
|
+
}
|
|
355
|
+
return `❌ 操作失败: ${baseError}${contextInfo}${suggestions}`;
|
|
356
|
+
}
|
|
357
|
+
async handleExtractFrames(args) {
|
|
358
|
+
const { videoPath, maxFrames = 10, outputDir, strategy = 'uniform' } = args;
|
|
359
|
+
try {
|
|
360
|
+
// 参数验证
|
|
361
|
+
if (!videoPath) {
|
|
362
|
+
throw new Error('视频路径参数(videoPath)是必需的');
|
|
363
|
+
}
|
|
364
|
+
// 检查文件是否存在
|
|
365
|
+
const fs = await import('fs/promises');
|
|
366
|
+
try {
|
|
367
|
+
await fs.access(videoPath);
|
|
368
|
+
}
|
|
369
|
+
catch {
|
|
370
|
+
throw new Error(`视频文件不存在或无法访问: ${videoPath}`);
|
|
371
|
+
}
|
|
372
|
+
console.error(`开始提取视频帧: ${videoPath}`);
|
|
373
|
+
console.error(`参数设置 - 最大帧数: ${maxFrames}, 策略: ${strategy}`);
|
|
374
|
+
const frames = await this.frameExtractor.extractFrames(videoPath, {
|
|
375
|
+
maxFrames,
|
|
376
|
+
outputDir,
|
|
377
|
+
strategy,
|
|
378
|
+
});
|
|
379
|
+
if (frames.length === 0) {
|
|
380
|
+
throw new Error('未能从视频中提取到任何帧,请检查视频文件是否有效');
|
|
381
|
+
}
|
|
382
|
+
console.error(`成功提取 ${frames.length} 个视频帧`);
|
|
383
|
+
return {
|
|
384
|
+
content: [
|
|
385
|
+
{
|
|
386
|
+
type: 'text',
|
|
387
|
+
text: `✅ 成功从视频中提取了 ${frames.length} 个帧: ${videoPath}`,
|
|
388
|
+
},
|
|
389
|
+
{
|
|
390
|
+
type: 'text',
|
|
391
|
+
text: `📁 帧文件路径:\n${frames.map((frame, index) => `${index + 1}. ${frame}`).join('\n')}`,
|
|
392
|
+
},
|
|
393
|
+
],
|
|
394
|
+
};
|
|
395
|
+
}
|
|
396
|
+
catch (error) {
|
|
397
|
+
console.error(`视频帧提取失败:`, error);
|
|
398
|
+
throw error;
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
async handleAnalyzeVideo(args) {
|
|
402
|
+
const { videoPath, prompt, maxFrames = 5, strategy = 'keyframe', secretId, secretKey, region } = args;
|
|
403
|
+
try {
|
|
404
|
+
// 参数验证
|
|
405
|
+
if (!videoPath) {
|
|
406
|
+
throw new Error('视频路径参数(videoPath)是必需的');
|
|
407
|
+
}
|
|
408
|
+
// 优先使用环境变量,其次使用参数中的密钥,最后使用构造函数中的密钥
|
|
409
|
+
const finalSecretId = process.env.VISION_API_KEY || process.env.TENCENT_SECRET_ID || secretId || this.secretId;
|
|
410
|
+
const finalSecretKey = secretKey || this.secretKey;
|
|
411
|
+
const finalRegion = process.env.VISION_ENDPOINT || region || this.region;
|
|
412
|
+
if (!finalSecretId) {
|
|
413
|
+
throw new Error(`视觉模型 API Key 缺失。请通过以下方式之一提供:
|
|
414
|
+
1. 环境变量:VISION_API_KEY
|
|
415
|
+
2. 启动参数:--secret-id <key>
|
|
416
|
+
3. 调用参数:secretId(即 API Key)
|
|
417
|
+
|
|
418
|
+
默认后端:智谱 Bigmodel glm-4.6v-flash(OpenAI 兼容)。获取 key:https://open.bigmodel.cn/usercenter/apikeys`);
|
|
419
|
+
}
|
|
420
|
+
// 检查文件是否存在
|
|
421
|
+
const fs = await import('fs/promises');
|
|
422
|
+
try {
|
|
423
|
+
await fs.access(videoPath);
|
|
424
|
+
}
|
|
425
|
+
catch {
|
|
426
|
+
throw new Error(`视频文件不存在或无法访问: ${videoPath}`);
|
|
427
|
+
}
|
|
428
|
+
console.error(`开始分析视频内容: ${videoPath}`);
|
|
429
|
+
console.error(`分析参数 - 最大帧数: ${maxFrames}, 策略: ${strategy}, 地域: ${finalRegion}`);
|
|
430
|
+
const result = await this.videoProcessor.analyzeVideo(videoPath, {
|
|
431
|
+
prompt,
|
|
432
|
+
maxFrames,
|
|
433
|
+
strategy,
|
|
434
|
+
secretId: finalSecretId,
|
|
435
|
+
secretKey: finalSecretKey,
|
|
436
|
+
region: finalRegion,
|
|
437
|
+
});
|
|
438
|
+
console.error(`视频分析完成`);
|
|
439
|
+
return {
|
|
440
|
+
content: [
|
|
441
|
+
{
|
|
442
|
+
type: 'text',
|
|
443
|
+
text: `✅ 视频内容分析完成: ${videoPath}`,
|
|
444
|
+
},
|
|
445
|
+
{
|
|
446
|
+
type: 'text',
|
|
447
|
+
text: `📋 视频内容总结:\n${result.summary}`,
|
|
448
|
+
},
|
|
449
|
+
],
|
|
450
|
+
};
|
|
451
|
+
}
|
|
452
|
+
catch (error) {
|
|
453
|
+
console.error(`视频分析失败:`, error);
|
|
454
|
+
throw error;
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
async handleAnalyzeImageBatch(args) {
|
|
458
|
+
const { imagePaths, prompt, secretId, secretKey, region } = args;
|
|
459
|
+
try {
|
|
460
|
+
// 参数验证
|
|
461
|
+
if (!imagePaths || !Array.isArray(imagePaths) || imagePaths.length === 0) {
|
|
462
|
+
throw new Error('图片路径数组参数(imagePaths)是必需的,且不能为空');
|
|
463
|
+
}
|
|
464
|
+
// 优先使用环境变量,其次使用参数中的密钥,最后使用构造函数中的密钥
|
|
465
|
+
const finalSecretId = process.env.VISION_API_KEY || process.env.TENCENT_SECRET_ID || secretId || this.secretId;
|
|
466
|
+
const finalSecretKey = secretKey || this.secretKey;
|
|
467
|
+
const finalRegion = process.env.VISION_ENDPOINT || region || this.region;
|
|
468
|
+
if (!finalSecretId) {
|
|
469
|
+
throw new Error(`视觉模型 API Key 缺失。请通过以下方式之一提供:
|
|
470
|
+
1. 环境变量:VISION_API_KEY
|
|
471
|
+
2. 启动参数:--secret-id <key>
|
|
472
|
+
3. 调用参数:secretId(即 API Key)
|
|
473
|
+
|
|
474
|
+
默认后端:智谱 Bigmodel glm-4.6v-flash(OpenAI 兼容)。获取 key:https://open.bigmodel.cn/usercenter/apikeys`);
|
|
475
|
+
}
|
|
476
|
+
// 检查所有图片文件是否存在
|
|
477
|
+
const fs = await import('fs/promises');
|
|
478
|
+
const invalidPaths = [];
|
|
479
|
+
for (const imagePath of imagePaths) {
|
|
480
|
+
try {
|
|
481
|
+
await fs.access(imagePath);
|
|
482
|
+
}
|
|
483
|
+
catch {
|
|
484
|
+
invalidPaths.push(imagePath);
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
if (invalidPaths.length > 0) {
|
|
488
|
+
throw new Error(`以下图片文件不存在或无法访问:\n${invalidPaths.join('\n')}`);
|
|
489
|
+
}
|
|
490
|
+
console.error(`开始批量分析图片,共 ${imagePaths.length} 张图片`);
|
|
491
|
+
console.error(`视觉后端端点: ${finalRegion || '(默认智谱 Bigmodel)'}`);
|
|
492
|
+
const hunyuanClient = new HunyuanClient({
|
|
493
|
+
secretId: finalSecretId,
|
|
494
|
+
secretKey: finalSecretKey,
|
|
495
|
+
region: finalRegion,
|
|
496
|
+
});
|
|
497
|
+
const results = await hunyuanClient.analyzeImageBatch(imagePaths, prompt);
|
|
498
|
+
const successCount = results.filter((result) => !result.content.startsWith('Error')).length;
|
|
499
|
+
const totalTokens = results.reduce((sum, result) => sum + result.usage.totalTokens, 0);
|
|
500
|
+
console.error(`批量图片分析完成 - 成功: ${successCount}/${results.length}, 总计使用 ${totalTokens} 个token`);
|
|
501
|
+
return {
|
|
502
|
+
content: [
|
|
503
|
+
{
|
|
504
|
+
type: 'text',
|
|
505
|
+
text: `✅ 批量图片分析完成`,
|
|
506
|
+
},
|
|
507
|
+
{
|
|
508
|
+
type: 'text',
|
|
509
|
+
text: `📊 分析统计:\n- 总图片数: ${imagePaths.length}\n- 成功分析: ${successCount}\n- 失败数量: ${imagePaths.length - successCount}\n- Token使用: ${totalTokens}`,
|
|
510
|
+
},
|
|
511
|
+
{
|
|
512
|
+
type: 'text',
|
|
513
|
+
text: `🖼️ 详细分析结果:`,
|
|
514
|
+
},
|
|
515
|
+
...results.map((result, index) => ({
|
|
516
|
+
type: 'text',
|
|
517
|
+
text: `\n📸 图片 ${index + 1} (${imagePaths[index]}):\n${result.content}`,
|
|
518
|
+
})),
|
|
519
|
+
],
|
|
520
|
+
};
|
|
521
|
+
}
|
|
522
|
+
catch (error) {
|
|
523
|
+
console.error(`批量图片分析失败:`, error);
|
|
524
|
+
throw error;
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
async handleGetVideoInfo(args) {
|
|
528
|
+
const { videoPath } = args;
|
|
529
|
+
try {
|
|
530
|
+
// 参数验证
|
|
531
|
+
if (!videoPath) {
|
|
532
|
+
throw new Error('视频路径参数(videoPath)是必需的');
|
|
533
|
+
}
|
|
534
|
+
// 检查文件是否存在
|
|
535
|
+
const fs = await import('fs/promises');
|
|
536
|
+
try {
|
|
537
|
+
await fs.access(videoPath);
|
|
538
|
+
}
|
|
539
|
+
catch {
|
|
540
|
+
throw new Error(`视频文件不存在或无法访问: ${videoPath}`);
|
|
541
|
+
}
|
|
542
|
+
console.error(`获取视频信息: ${videoPath}`);
|
|
543
|
+
const info = await this.videoProcessor.getVideoInfo(videoPath);
|
|
544
|
+
console.error(`视频信息获取完成 - 时长: ${info.duration}s, 分辨率: ${info.width}x${info.height}`);
|
|
545
|
+
return {
|
|
546
|
+
content: [
|
|
547
|
+
{
|
|
548
|
+
type: 'text',
|
|
549
|
+
text: `✅ 视频信息获取成功: ${videoPath}`,
|
|
550
|
+
},
|
|
551
|
+
{
|
|
552
|
+
type: 'text',
|
|
553
|
+
text: `📹 视频详细信息:`,
|
|
554
|
+
},
|
|
555
|
+
{
|
|
556
|
+
type: 'text',
|
|
557
|
+
text: `⏱️ 时长: ${info.duration.toFixed(2)} 秒 (${Math.floor(info.duration / 60)}分${Math.floor(info.duration % 60)}秒)`,
|
|
558
|
+
},
|
|
559
|
+
{
|
|
560
|
+
type: 'text',
|
|
561
|
+
text: `📐 分辨率: ${info.width} × ${info.height} 像素`,
|
|
562
|
+
},
|
|
563
|
+
{
|
|
564
|
+
type: 'text',
|
|
565
|
+
text: `🎞️ 帧率: ${info.frameRate.toFixed(2)} fps`,
|
|
566
|
+
},
|
|
567
|
+
{
|
|
568
|
+
type: 'text',
|
|
569
|
+
text: `🎬 总帧数: ${info.frameCount} 帧`,
|
|
570
|
+
},
|
|
571
|
+
{
|
|
572
|
+
type: 'text',
|
|
573
|
+
text: `📁 格式: ${info.format}`,
|
|
574
|
+
},
|
|
575
|
+
],
|
|
576
|
+
};
|
|
577
|
+
}
|
|
578
|
+
catch (error) {
|
|
579
|
+
console.error(`获取视频信息失败:`, error);
|
|
580
|
+
throw error;
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
async handleGenerateVideoScript(args) {
|
|
584
|
+
const { videoPath, prompt, maxFrames = 5, strategy = 'keyframe', scriptType = 'commercial', targetDuration, targetAudience = '一般观众', style = '专业、吸引人', secretId, secretKey, region } = args;
|
|
585
|
+
try {
|
|
586
|
+
// 参数验证
|
|
587
|
+
if (!videoPath) {
|
|
588
|
+
throw new Error('视频路径参数(videoPath)是必需的');
|
|
589
|
+
}
|
|
590
|
+
// 优先使用环境变量,其次使用参数中的密钥,最后使用构造函数中的密钥
|
|
591
|
+
const finalSecretId = process.env.VISION_API_KEY || process.env.TENCENT_SECRET_ID || secretId || this.secretId;
|
|
592
|
+
const finalSecretKey = secretKey || this.secretKey;
|
|
593
|
+
const finalRegion = process.env.VISION_ENDPOINT || region || this.region;
|
|
594
|
+
if (!finalSecretId) {
|
|
595
|
+
throw new Error(`视觉模型 API Key 缺失。请通过以下方式之一提供:
|
|
596
|
+
1. 环境变量:VISION_API_KEY
|
|
597
|
+
2. 启动参数:--secret-id <key>
|
|
598
|
+
3. 调用参数:secretId(即 API Key)
|
|
599
|
+
|
|
600
|
+
默认后端:智谱 Bigmodel glm-4.6v-flash(OpenAI 兼容)。获取 key:https://open.bigmodel.cn/usercenter/apikeys`);
|
|
601
|
+
}
|
|
602
|
+
// 检查文件是否存在
|
|
603
|
+
const fs = await import('fs/promises');
|
|
604
|
+
try {
|
|
605
|
+
await fs.access(videoPath);
|
|
606
|
+
}
|
|
607
|
+
catch {
|
|
608
|
+
throw new Error(`视频文件不存在或无法访问: ${videoPath}`);
|
|
609
|
+
}
|
|
610
|
+
console.error(`开始生成视频拍摄脚本: ${videoPath}`);
|
|
611
|
+
console.error(`脚本参数 - 类型: ${scriptType}, 最大帧数: ${maxFrames}, 策略: ${strategy}, 目标受众: ${targetAudience}`);
|
|
612
|
+
const scriptOptions = {
|
|
613
|
+
prompt,
|
|
614
|
+
maxFrames,
|
|
615
|
+
strategy,
|
|
616
|
+
scriptType,
|
|
617
|
+
targetDuration,
|
|
618
|
+
targetAudience,
|
|
619
|
+
style,
|
|
620
|
+
secretId: finalSecretId,
|
|
621
|
+
secretKey: finalSecretKey,
|
|
622
|
+
region: finalRegion,
|
|
623
|
+
};
|
|
624
|
+
const result = await this.videoProcessor.generateVideoScript(videoPath, scriptOptions);
|
|
625
|
+
console.error(`视频脚本生成完成 - 总Token使用: ${result.usage.totalTokens} (分析: ${result.usage.analysisTokens}, 脚本: ${result.usage.scriptTokens})`);
|
|
626
|
+
return {
|
|
627
|
+
content: [
|
|
628
|
+
{
|
|
629
|
+
type: 'text',
|
|
630
|
+
text: `✅ 视频拍摄脚本生成完成: ${videoPath}`,
|
|
631
|
+
},
|
|
632
|
+
{
|
|
633
|
+
type: 'text',
|
|
634
|
+
text: `📊 Token使用统计:
|
|
635
|
+
- 视频分析: ${result.usage.analysisTokens} tokens
|
|
636
|
+
- 脚本生成: ${result.usage.scriptTokens} tokens
|
|
637
|
+
- 总计: ${result.usage.totalTokens} tokens`,
|
|
638
|
+
},
|
|
639
|
+
{
|
|
640
|
+
type: 'text',
|
|
641
|
+
text: `🎬 专业拍摄脚本:
|
|
642
|
+
${result.script}`,
|
|
643
|
+
},
|
|
644
|
+
{
|
|
645
|
+
type: 'text',
|
|
646
|
+
text: `📝 原始视频分析:
|
|
647
|
+
${result.videoAnalysis}`,
|
|
648
|
+
},
|
|
649
|
+
],
|
|
650
|
+
};
|
|
651
|
+
}
|
|
652
|
+
catch (error) {
|
|
653
|
+
console.error(`视频脚本生成失败:`, error);
|
|
654
|
+
throw error;
|
|
655
|
+
}
|
|
656
|
+
}
|
|
657
|
+
async handleGenerateImageScript(args) {
|
|
658
|
+
const { imagePaths, prompt, scriptType = 'commercial', targetDuration, targetAudience = '一般观众', style = '专业、吸引人', secretId, secretKey, region } = args;
|
|
659
|
+
try {
|
|
660
|
+
// 参数验证
|
|
661
|
+
if (!imagePaths || !Array.isArray(imagePaths) || imagePaths.length === 0) {
|
|
662
|
+
throw new Error('图片路径数组参数(imagePaths)是必需的,且不能为空');
|
|
663
|
+
}
|
|
664
|
+
// 视觉/文本后端 API Key。优先环境变量,其次调用参数,最后构造函数。
|
|
665
|
+
const finalSecretId = process.env.VISION_API_KEY || process.env.TENCENT_SECRET_ID || secretId || this.secretId;
|
|
666
|
+
const finalSecretKey = secretKey || this.secretKey;
|
|
667
|
+
const finalRegion = process.env.VISION_ENDPOINT || region || this.region;
|
|
668
|
+
if (!finalSecretId) {
|
|
669
|
+
throw new Error('视觉模型 API Key 缺失。请通过以下方式之一提供:\n'
|
|
670
|
+
+ '1. 环境变量:VISION_API_KEY\n'
|
|
671
|
+
+ '2. 启动参数:--secret-id <key>\n'
|
|
672
|
+
+ '3. 调用参数:secretId(即 API Key)\n\n'
|
|
673
|
+
+ '默认后端:智谱 Bigmodel glm-4.6v-flash(OpenAI 兼容)。'
|
|
674
|
+
+ '获取 key:https://open.bigmodel.cn/usercenter/apikeys');
|
|
675
|
+
}
|
|
676
|
+
// 检查所有图片文件是否存在
|
|
677
|
+
const fs = await import('fs/promises');
|
|
678
|
+
const invalidPaths = [];
|
|
679
|
+
for (const imagePath of imagePaths) {
|
|
680
|
+
try {
|
|
681
|
+
await fs.access(imagePath);
|
|
682
|
+
}
|
|
683
|
+
catch {
|
|
684
|
+
invalidPaths.push(imagePath);
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
if (invalidPaths.length > 0) {
|
|
688
|
+
throw new Error('以下图片文件不存在或无法访问:\
|
|
689
|
+
' + invalidPaths.join('\
|
|
690
|
+
'));
|
|
691
|
+
}
|
|
692
|
+
console.error('开始基于 ' + imagePaths.length + ' 张图片生成拍摄脚本');
|
|
693
|
+
console.error('脚本参数 - 类型: ' + scriptType + ', 目标受众: ' + targetAudience + ', 风格: ' + style);
|
|
694
|
+
const result = await this.videoProcessor.generateImageScript(imagePaths, {
|
|
695
|
+
prompt,
|
|
696
|
+
scriptType,
|
|
697
|
+
targetDuration,
|
|
698
|
+
targetAudience,
|
|
699
|
+
style,
|
|
700
|
+
secretId: finalSecretId,
|
|
701
|
+
secretKey: finalSecretKey,
|
|
702
|
+
region: finalRegion,
|
|
703
|
+
});
|
|
704
|
+
console.error('图片脚本生成完成 - 总Token使用: ' + result.usage.totalTokens + ' (分析: ' + result.usage.analysisTokens + ', 脚本: ' + result.usage.scriptTokens + ')');
|
|
705
|
+
return {
|
|
706
|
+
content: [
|
|
707
|
+
{
|
|
708
|
+
type: 'text',
|
|
709
|
+
text: '✅ 基于图片的拍摄脚本生成完成',
|
|
710
|
+
},
|
|
711
|
+
{
|
|
712
|
+
type: 'text',
|
|
713
|
+
text: '📊 处理统计:\
|
|
714
|
+
- 图片数量: ' + imagePaths.length + '\
|
|
715
|
+
- 脚本类型: ' + scriptType + '\
|
|
716
|
+
- 目标受众: ' + targetAudience,
|
|
717
|
+
},
|
|
718
|
+
{
|
|
719
|
+
type: 'text',
|
|
720
|
+
text: '📊 Token使用统计:\
|
|
721
|
+
- 图片分析: ' + result.usage.analysisTokens + ' tokens\
|
|
722
|
+
- 脚本生成: ' + result.usage.scriptTokens + ' tokens\
|
|
723
|
+
- 总计: ' + result.usage.totalTokens + ' tokens',
|
|
724
|
+
},
|
|
725
|
+
{
|
|
726
|
+
type: 'text',
|
|
727
|
+
text: '🎬 专业拍摄脚本:\
|
|
728
|
+
' + result.script,
|
|
729
|
+
},
|
|
730
|
+
{
|
|
731
|
+
type: 'text',
|
|
732
|
+
text: '📝 原始图片分析:\
|
|
733
|
+
' + result.imageAnalysis,
|
|
734
|
+
},
|
|
735
|
+
],
|
|
736
|
+
};
|
|
737
|
+
}
|
|
738
|
+
catch (error) {
|
|
739
|
+
console.error('图片脚本生成失败:', error);
|
|
740
|
+
throw error;
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
async run() {
|
|
744
|
+
const transport = new StdioServerTransport();
|
|
745
|
+
await this.server.connect(transport);
|
|
746
|
+
console.error('Video MCP Server running on stdio');
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
async function main() {
|
|
750
|
+
// 解析命令行参数
|
|
751
|
+
const args = process.argv.slice(2);
|
|
752
|
+
const options = {};
|
|
753
|
+
for (let i = 0; i < args.length; i++) {
|
|
754
|
+
const arg = args[i];
|
|
755
|
+
const nextArg = args[i + 1];
|
|
756
|
+
switch (arg) {
|
|
757
|
+
case '--secret-id':
|
|
758
|
+
if (nextArg && !nextArg.startsWith('--')) {
|
|
759
|
+
options.secretId = nextArg;
|
|
760
|
+
i++; // 跳过下一个参数,因为它是值
|
|
761
|
+
}
|
|
762
|
+
break;
|
|
763
|
+
case '--secret-key':
|
|
764
|
+
if (nextArg && !nextArg.startsWith('--')) {
|
|
765
|
+
options.secretKey = nextArg;
|
|
766
|
+
i++; // 跳过下一个参数,因为它是值
|
|
767
|
+
}
|
|
768
|
+
break;
|
|
769
|
+
case '--region':
|
|
770
|
+
if (nextArg && !nextArg.startsWith('--')) {
|
|
771
|
+
options.region = nextArg;
|
|
772
|
+
i++; // 跳过下一个参数,因为它是值
|
|
773
|
+
}
|
|
774
|
+
break;
|
|
775
|
+
case '--help':
|
|
776
|
+
case '-h':
|
|
777
|
+
console.error(`
|
|
778
|
+
Video MCP Server - 视频处理和分析服务
|
|
779
|
+
|
|
780
|
+
用法:
|
|
781
|
+
node dist/index.js [选项]
|
|
782
|
+
|
|
783
|
+
选项:
|
|
784
|
+
--secret-id <key> 视觉模型 API Key(也可用环境变量 VISION_API_KEY)
|
|
785
|
+
--secret-key <key> 保留字段(OpenAI 兼容后端不需要)
|
|
786
|
+
--region <url> 视觉模型端点 baseURL(默认智谱 Bigmodel paas/v4)
|
|
787
|
+
--help, -h 显示帮助信息
|
|
788
|
+
|
|
789
|
+
环境变量:
|
|
790
|
+
VISION_API_KEY 视觉/文本模型 API Key
|
|
791
|
+
VISION_ENDPOINT 端点 baseURL(默认 https://open.bigmodel.cn/api/paas/v4)
|
|
792
|
+
VISION_MODEL 视觉模型名(默认 glm-4.6v-flash)
|
|
793
|
+
TEXT_MODEL 文本模型名(默认 glm-4.6)
|
|
794
|
+
|
|
795
|
+
示例:
|
|
796
|
+
node dist/index.js --secret-id=your-api-key
|
|
797
|
+
`);
|
|
798
|
+
process.exit(0);
|
|
799
|
+
break;
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
const server = new VideoMCPServer(options);
|
|
803
|
+
await server.run();
|
|
804
|
+
}
|
|
805
|
+
// 如果直接运行此文件,启动服务器
|
|
806
|
+
if (import.meta.url.endsWith(process.argv[1].replace(/\\/g, '/')) ||
|
|
807
|
+
process.argv[1].includes('index.js')) {
|
|
808
|
+
main().catch((error) => {
|
|
809
|
+
console.error('Server failed to start:', error);
|
|
810
|
+
process.exit(1);
|
|
811
|
+
});
|
|
812
|
+
}
|
|
813
|
+
export { VideoMCPServer };
|