npm - multi-modal-mcp - Versions diffs - 0.0.1 → 0.0.3 - Mend

multi-modal-mcp 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md +6 -6
package/dist/config.js +1 -1
package/dist/tools/ImageGenerationTool.js +2 -6
package/dist/tools/MultiModalUnderstandingTool.js +17 -38
package/dist/tools/TextGenerationTool.js +2 -6
package/dist/tools/VideoGenerationTool.js +2 -15
package/package.json +2 -2

package/README.md CHANGED Viewed

@@ -1,4 +1,4 @@
-# @juzi/multi-modal-mcp
+# multi-modal-mcp
 基于智谱 AI 的多模态 MCP 服务器，提供文本生成、图片生成、视频生成和多模态理解等功能。
@@ -12,13 +12,13 @@
 ## 安装
 ```bash
-npm install -g @juzi/multi-modal-mcp
+npm install -g multi-modal-mcp
 ```
 或使用 npx 直接运行：
 ```bash
-npx @juzi/multi-modal-mcp@latest
+npx multi-modal-mcp@latest
 ```
 ## 配置
@@ -36,13 +36,13 @@ export KEY="your-api-key"
 ### 直接运行
 ```bash
-npx @juzi/multi-modal-mcp@latest
+npx multi-modal-mcp@latest
 ```
 ### 使用 MCP Inspector 调试
 ```bash
-npx @modelcontextprotocol/inspector npx @juzi/multi-modal-mcp@latest
+npx @modelcontextprotocol/inspector npx multi-modal-mcp@latest
 ```
 ### 在 MCP 客户端中配置
@@ -54,7 +54,7 @@ npx @modelcontextprotocol/inspector npx @juzi/multi-modal-mcp@latest
   "mcpServers": {
     "multi-modal": {
       "command": "npx",
-      "args": ["-y", "@juzi/multi-modal-mcp@latest"],
+      "args": ["-y", "multi-modal-mcp@latest"],
       "env": {
         "KEY": "your-api-key"
       }

package/dist/config.js CHANGED Viewed

@@ -20,5 +20,5 @@ export const config = {
     /** MCP 服务器名称 */
     serverName: 'multi-modal-mcp',
     /** MCP 服务器版本 */
-    serverVersion: '1.0.0',
+    serverVersion: '0.0.3',
 };

package/dist/tools/ImageGenerationTool.js CHANGED Viewed

@@ -3,7 +3,7 @@
  * @Project_description: 智谱AI图片生成工具
  * @Description: 代码是我抄的，不会也是真的
  */
-import { MCPTool, logger } from 'mcp-framework';
+import { MCPTool } from 'mcp-framework';
 import { z } from 'zod';
 import { http } from '../utils/http.js';
 import { config } from '../config.js';
@@ -33,7 +33,6 @@ class ImageGenerationTool extends MCPTool {
      * 执行图片生成
      */
     async execute(input) {
-        logger.info(`开始执行图片生成，描述: ${input.prompt}，尺寸: ${input.size || '1024x1024'}`);
         try {
             const requestData = {
                 model: config.imageModel,
@@ -41,17 +40,14 @@ class ImageGenerationTool extends MCPTool {
                 size: input.size || '1024x1024',
                 watermark_enabled: input.watermark_enabled ?? false,
             };
-            logger.info('调用智谱AI图片生成API');
             const apiResponse = (await http.post('/images/generations', requestData));
             const imageUrl = apiResponse.data?.[0]?.url;
             if (!imageUrl)
                 throw new Error('生成图片失败，未返回图片URL');
-            logger.info('图片生成成功');
             return imageUrl;
         }
         catch (error) {
-            logger.error(`图片生成失败: ${error}`);
-            throw new Error(`生成图片时发生错误: ${error}`);
+            return `生成图片时发生错误: ${error}`;
         }
     }
 }

package/dist/tools/MultiModalUnderstandingTool.js CHANGED Viewed

@@ -3,7 +3,7 @@
  * @Project_description: 智谱AI多模态理解工具
  * @Description: 代码是我抄的，不会也是真的
  */
-import { MCPTool, logger } from 'mcp-framework';
+import { MCPTool } from 'mcp-framework';
 import { z } from 'zod';
 import { http } from '../utils/http.js';
 import { config } from '../config.js';
@@ -17,29 +17,12 @@ class MultiModalUnderstandingTool extends MCPTool {
     description = '基于GLM-4.6V-Flash模型的多模态理解工具，支持图片、视频、文档等多种媒体类型的智能分析与理解。可执行OCR文字识别、表格解析、内容分析、缺陷检测、图像转提示词、视频标签提取、关键帧提取、时间线生成、脚本生成、视频问答、文档问答、文档对比等多种任务。支持同时处理多种媒体类型。';
     /** 参数模式定义 */
     schema = z.object({
-        content: z
+        media_content: z
             .array(z.object({
             type: z
                 .enum(['image_url', 'video_url', 'file_url'])
                 .describe('媒体类型：image_url-图片，video_url-视频，file_url-文档'),
-            image_url: z
-                .object({
-                url: z.string().describe('图片的URL地址或Base64编码'),
-            })
-                .optional()
-                .describe('图片URL对象，当type为image_url时必填'),
-            video_url: z
-                .object({
-                url: z.string().describe('视频的URL地址'),
-            })
-                .optional()
-                .describe('视频URL对象，当type为video_url时必填'),
-            file_url: z
-                .object({
-                url: z.string().describe('文件的URL地址'),
-            })
-                .optional()
-                .describe('文件URL对象，当type为file_url时必填'),
+            url: z.string().describe('图片、视频、文档的URL地址'),
         }))
             .describe('媒体内容列表，支持混合图片、视频、文档'),
         question: z
@@ -58,9 +41,8 @@ class MultiModalUnderstandingTool extends MCPTool {
      * 执行多模态理解
      */
     async execute(input) {
-        logger.info(`开始执行多模态理解，问题: ${input.question}，媒体数量: ${input.content.length}`);
         try {
-            const messageContent = this.buildMessageContent(input.content, input.question);
+            const messageContent = this.buildMessageContent(input.media_content, input.question);
             const requestData = {
                 model: config.visualModel,
                 messages: [
@@ -75,7 +57,6 @@ class MultiModalUnderstandingTool extends MCPTool {
                 temperature: input.temperature,
                 stream: false,
             };
-            logger.info('调用智谱AI多模态理解API');
             const apiResponse = (await http.post('/chat/completions', requestData));
             const messageContentResult = apiResponse.choices?.[0]?.message?.content;
             let generatedText = '';
@@ -86,12 +67,10 @@ class MultiModalUnderstandingTool extends MCPTool {
                 const textItem = messageContentResult.find((item) => item.type === 'text');
                 generatedText = textItem?.text || '';
             }
-            logger.info('多模态理解成功');
             return generatedText;
         }
         catch (error) {
-            logger.error(`多模态理解失败: ${error}`);
-            throw new Error(`多模态理解时发生错误: ${error}`);
+            return `多模态理解时发生错误: ${error}`;
         }
     }
     /**
@@ -100,27 +79,27 @@ class MultiModalUnderstandingTool extends MCPTool {
     buildMessageContent(content, question) {
         const result = [];
         for (const item of content) {
-            if (item.type === 'image_url' && item.image_url) {
+            if (item.type === 'file_url') {
                 result.push({
-                    type: 'image_url',
-                    image_url: {
-                        url: item.image_url.url,
+                    type: item.type,
+                    file_url: {
+                        url: item.url,
                     },
                 });
             }
-            else if (item.type === 'video_url' && item.video_url) {
+            if (item.type === 'image_url') {
                 result.push({
-                    type: 'video_url',
-                    video_url: {
-                        url: item.video_url.url,
+                    type: item.type,
+                    image_url: {
+                        url: item.url,
                     },
                 });
             }
-            else if (item.type === 'file_url' && item.file_url) {
+            if (item.type === 'video_url') {
                 result.push({
-                    type: 'file_url',
-                    file_url: {
-                        url: item.file_url.url,
+                    type: item.type,
+                    video_url: {
+                        url: item.url,
                     },
                 });
             }

package/dist/tools/TextGenerationTool.js CHANGED Viewed

@@ -3,7 +3,7 @@
  * @Project_description: 智谱AI文本生成工具
  * @Description: 代码是我抄的，不会也是真的
  */
-import { MCPTool, logger } from 'mcp-framework';
+import { MCPTool } from 'mcp-framework';
 import { z } from 'zod';
 import { http } from '../utils/http.js';
 import { config } from '../config.js';
@@ -31,7 +31,6 @@ class TextGenerationTool extends MCPTool {
      * 执行文本生成
      */
     async execute(input) {
-        logger.info(`开始执行文本生成，输入: ${input.messages}`);
         try {
             // 构建请求参数
             const requestData = {
@@ -48,7 +47,6 @@ class TextGenerationTool extends MCPTool {
                 temperature: input.temperature,
                 stream: false, // 确保非流式返回
             };
-            logger.info('调用智谱AI文本生成API');
             // 调用智谱AI API
             const apiResponse = (await http.post('/chat/completions', requestData));
             // 提取生成的文本
@@ -63,12 +61,10 @@ class TextGenerationTool extends MCPTool {
                 const textItem = messageContent.find((item) => item.type === 'text');
                 generatedText = textItem?.text || '';
             }
-            logger.info('文本生成成功');
             return generatedText;
         }
         catch (error) {
-            logger.error(`文本生成失败: ${error}`);
-            throw new Error(`生成文本时发生错误: ${error}`);
+            return `生成文本时发生错误: ${error}`;
         }
     }
 }

package/dist/tools/VideoGenerationTool.js CHANGED Viewed

@@ -3,7 +3,7 @@
  * @Project_description: 智谱AI视频生成工具
  * @Description: 代码是我抄的，不会也是真的
  */
-import { MCPTool, logger } from 'mcp-framework';
+import { MCPTool } from 'mcp-framework';
 import { z } from 'zod';
 import { http } from '../utils/http.js';
 import { config } from '../config.js';
@@ -63,7 +63,6 @@ class VideoGenerationTool extends MCPTool {
      * 执行视频生成
      */
     async execute(input) {
-        logger.info(`开始执行视频生成，描述: ${input.prompt}，尺寸: ${input.size || '1024x1024'}，帧率: ${input.fps || 30}`);
         try {
             const requestData = {
                 model: config.videoModel,
@@ -75,19 +74,15 @@ class VideoGenerationTool extends MCPTool {
                 size: input.size,
                 fps: input.fps || 30,
             };
-            logger.info('调用智谱AI视频生成API，创建任务');
             const apiResponse = (await http.post('/videos/generations', requestData));
             const taskId = apiResponse.id;
             if (!taskId)
                 throw new Error('视频生成任务创建失败，未返回任务ID');
-            logger.info(`视频生成任务创建成功，任务ID: ${taskId}，开始轮询任务状态`);
             const videoUrl = await this.pollTaskStatus(taskId);
-            logger.info('视频生成成功');
             return videoUrl;
         }
         catch (error) {
-            logger.error(`视频生成失败: ${error}`);
-            throw new Error(`视频生成时发生错误: ${error}`);
+            return `视频生成时发生错误: ${error}`;
         }
     }
     /**
@@ -96,25 +91,17 @@ class VideoGenerationTool extends MCPTool {
     async pollTaskStatus(taskId) {
         const startTime = Date.now();
         const timeout = 60000;
-        logger.info(`开始轮询任务状态，任务ID: ${taskId}`);
         while (true) {
             const elapsedTime = Date.now() - startTime;
             if (elapsedTime >= timeout) {
-                logger.error(`视频生成超时，超过60秒未完成，任务ID: ${taskId}`);
                 throw new Error('视频生成超时，超过60秒未完成');
             }
             const apiResponse = (await http.get(`/async-result/${taskId}`));
             const taskData = apiResponse;
             if (taskData.task_status === 'SUCCESS' &&
                 taskData.video_result?.[0]?.url) {
-                logger.info(`任务完成，任务ID: ${taskId}，状态: ${taskData.task_status}`);
                 return taskData.video_result[0].url;
             }
-            if (taskData.task_status === 'FAILED') {
-                logger.error(`任务失败，任务ID: ${taskId}，状态: ${taskData.task_status}`);
-                throw new Error('视频生成失败');
-            }
-            logger.debug(`任务进行中，任务ID: ${taskId}，状态: ${taskData.task_status}，已等待: ${Math.floor(elapsedTime / 1000)}秒`);
             await new Promise((resolve) => setTimeout(resolve, 1000));
         }
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "multi-modal-mcp",
-  "version": "0.0.1",
+  "version": "0.0.3",
   "description": "multi-modal MCP server",
   "type": "module",
   "bin": {
@@ -48,4 +48,4 @@
   ],
   "author": "橘子",
   "license": "MIT"
-}
+}