multi-modal-mcp 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # @juzi/multi-modal-mcp
1
+ # multi-modal-mcp
2
2
 
3
3
  基于智谱 AI 的多模态 MCP 服务器,提供文本生成、图片生成、视频生成和多模态理解等功能。
4
4
 
@@ -12,13 +12,13 @@
12
12
  ## 安装
13
13
 
14
14
  ```bash
15
- npm install -g @juzi/multi-modal-mcp
15
+ npm install -g multi-modal-mcp
16
16
  ```
17
17
 
18
18
  或使用 npx 直接运行:
19
19
 
20
20
  ```bash
21
- npx @juzi/multi-modal-mcp@latest
21
+ npx multi-modal-mcp@latest
22
22
  ```
23
23
 
24
24
  ## 配置
@@ -36,13 +36,13 @@ export KEY="your-api-key"
36
36
  ### 直接运行
37
37
 
38
38
  ```bash
39
- npx @juzi/multi-modal-mcp@latest
39
+ npx multi-modal-mcp@latest
40
40
  ```
41
41
 
42
42
  ### 使用 MCP Inspector 调试
43
43
 
44
44
  ```bash
45
- npx @modelcontextprotocol/inspector npx @juzi/multi-modal-mcp@latest
45
+ npx @modelcontextprotocol/inspector npx multi-modal-mcp@latest
46
46
  ```
47
47
 
48
48
  ### 在 MCP 客户端中配置
@@ -54,7 +54,7 @@ npx @modelcontextprotocol/inspector npx @juzi/multi-modal-mcp@latest
54
54
  "mcpServers": {
55
55
  "multi-modal": {
56
56
  "command": "npx",
57
- "args": ["-y", "@juzi/multi-modal-mcp@latest"],
57
+ "args": ["-y", "multi-modal-mcp@latest"],
58
58
  "env": {
59
59
  "KEY": "your-api-key"
60
60
  }
package/dist/config.js CHANGED
@@ -20,5 +20,5 @@ export const config = {
20
20
  /** MCP 服务器名称 */
21
21
  serverName: 'multi-modal-mcp',
22
22
  /** MCP 服务器版本 */
23
- serverVersion: '1.0.0',
23
+ serverVersion: '0.0.3',
24
24
  };
@@ -3,7 +3,7 @@
3
3
  * @Project_description: 智谱AI图片生成工具
4
4
  * @Description: 代码是我抄的,不会也是真的
5
5
  */
6
- import { MCPTool, logger } from 'mcp-framework';
6
+ import { MCPTool } from 'mcp-framework';
7
7
  import { z } from 'zod';
8
8
  import { http } from '../utils/http.js';
9
9
  import { config } from '../config.js';
@@ -33,7 +33,6 @@ class ImageGenerationTool extends MCPTool {
33
33
  * 执行图片生成
34
34
  */
35
35
  async execute(input) {
36
- logger.info(`开始执行图片生成,描述: ${input.prompt},尺寸: ${input.size || '1024x1024'}`);
37
36
  try {
38
37
  const requestData = {
39
38
  model: config.imageModel,
@@ -41,17 +40,14 @@ class ImageGenerationTool extends MCPTool {
41
40
  size: input.size || '1024x1024',
42
41
  watermark_enabled: input.watermark_enabled ?? false,
43
42
  };
44
- logger.info('调用智谱AI图片生成API');
45
43
  const apiResponse = (await http.post('/images/generations', requestData));
46
44
  const imageUrl = apiResponse.data?.[0]?.url;
47
45
  if (!imageUrl)
48
46
  throw new Error('生成图片失败,未返回图片URL');
49
- logger.info('图片生成成功');
50
47
  return imageUrl;
51
48
  }
52
49
  catch (error) {
53
- logger.error(`图片生成失败: ${error}`);
54
- throw new Error(`生成图片时发生错误: ${error}`);
50
+ return `生成图片时发生错误: ${error}`;
55
51
  }
56
52
  }
57
53
  }
@@ -3,7 +3,7 @@
3
3
  * @Project_description: 智谱AI多模态理解工具
4
4
  * @Description: 代码是我抄的,不会也是真的
5
5
  */
6
- import { MCPTool, logger } from 'mcp-framework';
6
+ import { MCPTool } from 'mcp-framework';
7
7
  import { z } from 'zod';
8
8
  import { http } from '../utils/http.js';
9
9
  import { config } from '../config.js';
@@ -17,29 +17,12 @@ class MultiModalUnderstandingTool extends MCPTool {
17
17
  description = '基于GLM-4.6V-Flash模型的多模态理解工具,支持图片、视频、文档等多种媒体类型的智能分析与理解。可执行OCR文字识别、表格解析、内容分析、缺陷检测、图像转提示词、视频标签提取、关键帧提取、时间线生成、脚本生成、视频问答、文档问答、文档对比等多种任务。支持同时处理多种媒体类型。';
18
18
  /** 参数模式定义 */
19
19
  schema = z.object({
20
- content: z
20
+ media_content: z
21
21
  .array(z.object({
22
22
  type: z
23
23
  .enum(['image_url', 'video_url', 'file_url'])
24
24
  .describe('媒体类型:image_url-图片,video_url-视频,file_url-文档'),
25
- image_url: z
26
- .object({
27
- url: z.string().describe('图片的URL地址或Base64编码'),
28
- })
29
- .optional()
30
- .describe('图片URL对象,当type为image_url时必填'),
31
- video_url: z
32
- .object({
33
- url: z.string().describe('视频的URL地址'),
34
- })
35
- .optional()
36
- .describe('视频URL对象,当type为video_url时必填'),
37
- file_url: z
38
- .object({
39
- url: z.string().describe('文件的URL地址'),
40
- })
41
- .optional()
42
- .describe('文件URL对象,当type为file_url时必填'),
25
+ url: z.string().describe('图片、视频、文档的URL地址'),
43
26
  }))
44
27
  .describe('媒体内容列表,支持混合图片、视频、文档'),
45
28
  question: z
@@ -58,9 +41,8 @@ class MultiModalUnderstandingTool extends MCPTool {
58
41
  * 执行多模态理解
59
42
  */
60
43
  async execute(input) {
61
- logger.info(`开始执行多模态理解,问题: ${input.question},媒体数量: ${input.content.length}`);
62
44
  try {
63
- const messageContent = this.buildMessageContent(input.content, input.question);
45
+ const messageContent = this.buildMessageContent(input.media_content, input.question);
64
46
  const requestData = {
65
47
  model: config.visualModel,
66
48
  messages: [
@@ -75,7 +57,6 @@ class MultiModalUnderstandingTool extends MCPTool {
75
57
  temperature: input.temperature,
76
58
  stream: false,
77
59
  };
78
- logger.info('调用智谱AI多模态理解API');
79
60
  const apiResponse = (await http.post('/chat/completions', requestData));
80
61
  const messageContentResult = apiResponse.choices?.[0]?.message?.content;
81
62
  let generatedText = '';
@@ -86,12 +67,10 @@ class MultiModalUnderstandingTool extends MCPTool {
86
67
  const textItem = messageContentResult.find((item) => item.type === 'text');
87
68
  generatedText = textItem?.text || '';
88
69
  }
89
- logger.info('多模态理解成功');
90
70
  return generatedText;
91
71
  }
92
72
  catch (error) {
93
- logger.error(`多模态理解失败: ${error}`);
94
- throw new Error(`多模态理解时发生错误: ${error}`);
73
+ return `多模态理解时发生错误: ${error}`;
95
74
  }
96
75
  }
97
76
  /**
@@ -100,27 +79,27 @@ class MultiModalUnderstandingTool extends MCPTool {
100
79
  buildMessageContent(content, question) {
101
80
  const result = [];
102
81
  for (const item of content) {
103
- if (item.type === 'image_url' && item.image_url) {
82
+ if (item.type === 'file_url') {
104
83
  result.push({
105
- type: 'image_url',
106
- image_url: {
107
- url: item.image_url.url,
84
+ type: item.type,
85
+ file_url: {
86
+ url: item.url,
108
87
  },
109
88
  });
110
89
  }
111
- else if (item.type === 'video_url' && item.video_url) {
90
+ if (item.type === 'image_url') {
112
91
  result.push({
113
- type: 'video_url',
114
- video_url: {
115
- url: item.video_url.url,
92
+ type: item.type,
93
+ image_url: {
94
+ url: item.url,
116
95
  },
117
96
  });
118
97
  }
119
- else if (item.type === 'file_url' && item.file_url) {
98
+ if (item.type === 'video_url') {
120
99
  result.push({
121
- type: 'file_url',
122
- file_url: {
123
- url: item.file_url.url,
100
+ type: item.type,
101
+ video_url: {
102
+ url: item.url,
124
103
  },
125
104
  });
126
105
  }
@@ -3,7 +3,7 @@
3
3
  * @Project_description: 智谱AI文本生成工具
4
4
  * @Description: 代码是我抄的,不会也是真的
5
5
  */
6
- import { MCPTool, logger } from 'mcp-framework';
6
+ import { MCPTool } from 'mcp-framework';
7
7
  import { z } from 'zod';
8
8
  import { http } from '../utils/http.js';
9
9
  import { config } from '../config.js';
@@ -31,7 +31,6 @@ class TextGenerationTool extends MCPTool {
31
31
  * 执行文本生成
32
32
  */
33
33
  async execute(input) {
34
- logger.info(`开始执行文本生成,输入: ${input.messages}`);
35
34
  try {
36
35
  // 构建请求参数
37
36
  const requestData = {
@@ -48,7 +47,6 @@ class TextGenerationTool extends MCPTool {
48
47
  temperature: input.temperature,
49
48
  stream: false, // 确保非流式返回
50
49
  };
51
- logger.info('调用智谱AI文本生成API');
52
50
  // 调用智谱AI API
53
51
  const apiResponse = (await http.post('/chat/completions', requestData));
54
52
  // 提取生成的文本
@@ -63,12 +61,10 @@ class TextGenerationTool extends MCPTool {
63
61
  const textItem = messageContent.find((item) => item.type === 'text');
64
62
  generatedText = textItem?.text || '';
65
63
  }
66
- logger.info('文本生成成功');
67
64
  return generatedText;
68
65
  }
69
66
  catch (error) {
70
- logger.error(`文本生成失败: ${error}`);
71
- throw new Error(`生成文本时发生错误: ${error}`);
67
+ return `生成文本时发生错误: ${error}`;
72
68
  }
73
69
  }
74
70
  }
@@ -3,7 +3,7 @@
3
3
  * @Project_description: 智谱AI视频生成工具
4
4
  * @Description: 代码是我抄的,不会也是真的
5
5
  */
6
- import { MCPTool, logger } from 'mcp-framework';
6
+ import { MCPTool } from 'mcp-framework';
7
7
  import { z } from 'zod';
8
8
  import { http } from '../utils/http.js';
9
9
  import { config } from '../config.js';
@@ -63,7 +63,6 @@ class VideoGenerationTool extends MCPTool {
63
63
  * 执行视频生成
64
64
  */
65
65
  async execute(input) {
66
- logger.info(`开始执行视频生成,描述: ${input.prompt},尺寸: ${input.size || '1024x1024'},帧率: ${input.fps || 30}`);
67
66
  try {
68
67
  const requestData = {
69
68
  model: config.videoModel,
@@ -75,19 +74,15 @@ class VideoGenerationTool extends MCPTool {
75
74
  size: input.size,
76
75
  fps: input.fps || 30,
77
76
  };
78
- logger.info('调用智谱AI视频生成API,创建任务');
79
77
  const apiResponse = (await http.post('/videos/generations', requestData));
80
78
  const taskId = apiResponse.id;
81
79
  if (!taskId)
82
80
  throw new Error('视频生成任务创建失败,未返回任务ID');
83
- logger.info(`视频生成任务创建成功,任务ID: ${taskId},开始轮询任务状态`);
84
81
  const videoUrl = await this.pollTaskStatus(taskId);
85
- logger.info('视频生成成功');
86
82
  return videoUrl;
87
83
  }
88
84
  catch (error) {
89
- logger.error(`视频生成失败: ${error}`);
90
- throw new Error(`视频生成时发生错误: ${error}`);
85
+ return `视频生成时发生错误: ${error}`;
91
86
  }
92
87
  }
93
88
  /**
@@ -96,25 +91,17 @@ class VideoGenerationTool extends MCPTool {
96
91
  async pollTaskStatus(taskId) {
97
92
  const startTime = Date.now();
98
93
  const timeout = 60000;
99
- logger.info(`开始轮询任务状态,任务ID: ${taskId}`);
100
94
  while (true) {
101
95
  const elapsedTime = Date.now() - startTime;
102
96
  if (elapsedTime >= timeout) {
103
- logger.error(`视频生成超时,超过60秒未完成,任务ID: ${taskId}`);
104
97
  throw new Error('视频生成超时,超过60秒未完成');
105
98
  }
106
99
  const apiResponse = (await http.get(`/async-result/${taskId}`));
107
100
  const taskData = apiResponse;
108
101
  if (taskData.task_status === 'SUCCESS' &&
109
102
  taskData.video_result?.[0]?.url) {
110
- logger.info(`任务完成,任务ID: ${taskId},状态: ${taskData.task_status}`);
111
103
  return taskData.video_result[0].url;
112
104
  }
113
- if (taskData.task_status === 'FAILED') {
114
- logger.error(`任务失败,任务ID: ${taskId},状态: ${taskData.task_status}`);
115
- throw new Error('视频生成失败');
116
- }
117
- logger.debug(`任务进行中,任务ID: ${taskId},状态: ${taskData.task_status},已等待: ${Math.floor(elapsedTime / 1000)}秒`);
118
105
  await new Promise((resolve) => setTimeout(resolve, 1000));
119
106
  }
120
107
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "multi-modal-mcp",
3
- "version": "0.0.1",
3
+ "version": "0.0.3",
4
4
  "description": "multi-modal MCP server",
5
5
  "type": "module",
6
6
  "bin": {
@@ -48,4 +48,4 @@
48
48
  ],
49
49
  "author": "橘子",
50
50
  "license": "MIT"
51
- }
51
+ }