multi-modal-mcp 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/config.js
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* @Project_description: 智谱AI图片生成工具
|
|
4
4
|
* @Description: 代码是我抄的,不会也是真的
|
|
5
5
|
*/
|
|
6
|
-
import { MCPTool
|
|
6
|
+
import { MCPTool } from 'mcp-framework';
|
|
7
7
|
import { z } from 'zod';
|
|
8
8
|
import { http } from '../utils/http.js';
|
|
9
9
|
import { config } from '../config.js';
|
|
@@ -33,7 +33,6 @@ class ImageGenerationTool extends MCPTool {
|
|
|
33
33
|
* 执行图片生成
|
|
34
34
|
*/
|
|
35
35
|
async execute(input) {
|
|
36
|
-
logger.info(`开始执行图片生成,描述: ${input.prompt},尺寸: ${input.size || '1024x1024'}`);
|
|
37
36
|
try {
|
|
38
37
|
const requestData = {
|
|
39
38
|
model: config.imageModel,
|
|
@@ -41,17 +40,14 @@ class ImageGenerationTool extends MCPTool {
|
|
|
41
40
|
size: input.size || '1024x1024',
|
|
42
41
|
watermark_enabled: input.watermark_enabled ?? false,
|
|
43
42
|
};
|
|
44
|
-
logger.info('调用智谱AI图片生成API');
|
|
45
43
|
const apiResponse = (await http.post('/images/generations', requestData));
|
|
46
44
|
const imageUrl = apiResponse.data?.[0]?.url;
|
|
47
45
|
if (!imageUrl)
|
|
48
46
|
throw new Error('生成图片失败,未返回图片URL');
|
|
49
|
-
logger.info('图片生成成功');
|
|
50
47
|
return imageUrl;
|
|
51
48
|
}
|
|
52
49
|
catch (error) {
|
|
53
|
-
|
|
54
|
-
throw new Error(`生成图片时发生错误: ${error}`);
|
|
50
|
+
return `生成图片时发生错误: ${error}`;
|
|
55
51
|
}
|
|
56
52
|
}
|
|
57
53
|
}
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* @Project_description: 智谱AI多模态理解工具
|
|
4
4
|
* @Description: 代码是我抄的,不会也是真的
|
|
5
5
|
*/
|
|
6
|
-
import { MCPTool
|
|
6
|
+
import { MCPTool } from 'mcp-framework';
|
|
7
7
|
import { z } from 'zod';
|
|
8
8
|
import { http } from '../utils/http.js';
|
|
9
9
|
import { config } from '../config.js';
|
|
@@ -17,29 +17,12 @@ class MultiModalUnderstandingTool extends MCPTool {
|
|
|
17
17
|
description = '基于GLM-4.6V-Flash模型的多模态理解工具,支持图片、视频、文档等多种媒体类型的智能分析与理解。可执行OCR文字识别、表格解析、内容分析、缺陷检测、图像转提示词、视频标签提取、关键帧提取、时间线生成、脚本生成、视频问答、文档问答、文档对比等多种任务。支持同时处理多种媒体类型。';
|
|
18
18
|
/** 参数模式定义 */
|
|
19
19
|
schema = z.object({
|
|
20
|
-
|
|
20
|
+
media_content: z
|
|
21
21
|
.array(z.object({
|
|
22
22
|
type: z
|
|
23
23
|
.enum(['image_url', 'video_url', 'file_url'])
|
|
24
24
|
.describe('媒体类型:image_url-图片,video_url-视频,file_url-文档'),
|
|
25
|
-
|
|
26
|
-
.object({
|
|
27
|
-
url: z.string().describe('图片的URL地址或Base64编码'),
|
|
28
|
-
})
|
|
29
|
-
.optional()
|
|
30
|
-
.describe('图片URL对象,当type为image_url时必填'),
|
|
31
|
-
video_url: z
|
|
32
|
-
.object({
|
|
33
|
-
url: z.string().describe('视频的URL地址'),
|
|
34
|
-
})
|
|
35
|
-
.optional()
|
|
36
|
-
.describe('视频URL对象,当type为video_url时必填'),
|
|
37
|
-
file_url: z
|
|
38
|
-
.object({
|
|
39
|
-
url: z.string().describe('文件的URL地址'),
|
|
40
|
-
})
|
|
41
|
-
.optional()
|
|
42
|
-
.describe('文件URL对象,当type为file_url时必填'),
|
|
25
|
+
url: z.string().describe('图片、视频、文档的URL地址'),
|
|
43
26
|
}))
|
|
44
27
|
.describe('媒体内容列表,支持混合图片、视频、文档'),
|
|
45
28
|
question: z
|
|
@@ -58,9 +41,8 @@ class MultiModalUnderstandingTool extends MCPTool {
|
|
|
58
41
|
* 执行多模态理解
|
|
59
42
|
*/
|
|
60
43
|
async execute(input) {
|
|
61
|
-
logger.info(`开始执行多模态理解,问题: ${input.question},媒体数量: ${input.content.length}`);
|
|
62
44
|
try {
|
|
63
|
-
const messageContent = this.buildMessageContent(input.
|
|
45
|
+
const messageContent = this.buildMessageContent(input.media_content, input.question);
|
|
64
46
|
const requestData = {
|
|
65
47
|
model: config.visualModel,
|
|
66
48
|
messages: [
|
|
@@ -75,7 +57,6 @@ class MultiModalUnderstandingTool extends MCPTool {
|
|
|
75
57
|
temperature: input.temperature,
|
|
76
58
|
stream: false,
|
|
77
59
|
};
|
|
78
|
-
logger.info('调用智谱AI多模态理解API');
|
|
79
60
|
const apiResponse = (await http.post('/chat/completions', requestData));
|
|
80
61
|
const messageContentResult = apiResponse.choices?.[0]?.message?.content;
|
|
81
62
|
let generatedText = '';
|
|
@@ -86,12 +67,10 @@ class MultiModalUnderstandingTool extends MCPTool {
|
|
|
86
67
|
const textItem = messageContentResult.find((item) => item.type === 'text');
|
|
87
68
|
generatedText = textItem?.text || '';
|
|
88
69
|
}
|
|
89
|
-
logger.info('多模态理解成功');
|
|
90
70
|
return generatedText;
|
|
91
71
|
}
|
|
92
72
|
catch (error) {
|
|
93
|
-
|
|
94
|
-
throw new Error(`多模态理解时发生错误: ${error}`);
|
|
73
|
+
return `多模态理解时发生错误: ${error}`;
|
|
95
74
|
}
|
|
96
75
|
}
|
|
97
76
|
/**
|
|
@@ -100,27 +79,27 @@ class MultiModalUnderstandingTool extends MCPTool {
|
|
|
100
79
|
buildMessageContent(content, question) {
|
|
101
80
|
const result = [];
|
|
102
81
|
for (const item of content) {
|
|
103
|
-
if (item.type === '
|
|
82
|
+
if (item.type === 'file_url') {
|
|
104
83
|
result.push({
|
|
105
|
-
type:
|
|
106
|
-
|
|
107
|
-
url: item.
|
|
84
|
+
type: item.type,
|
|
85
|
+
file_url: {
|
|
86
|
+
url: item.url,
|
|
108
87
|
},
|
|
109
88
|
});
|
|
110
89
|
}
|
|
111
|
-
|
|
90
|
+
if (item.type === 'image_url') {
|
|
112
91
|
result.push({
|
|
113
|
-
type:
|
|
114
|
-
|
|
115
|
-
url: item.
|
|
92
|
+
type: item.type,
|
|
93
|
+
image_url: {
|
|
94
|
+
url: item.url,
|
|
116
95
|
},
|
|
117
96
|
});
|
|
118
97
|
}
|
|
119
|
-
|
|
98
|
+
if (item.type === 'video_url') {
|
|
120
99
|
result.push({
|
|
121
|
-
type:
|
|
122
|
-
|
|
123
|
-
url: item.
|
|
100
|
+
type: item.type,
|
|
101
|
+
video_url: {
|
|
102
|
+
url: item.url,
|
|
124
103
|
},
|
|
125
104
|
});
|
|
126
105
|
}
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* @Project_description: 智谱AI文本生成工具
|
|
4
4
|
* @Description: 代码是我抄的,不会也是真的
|
|
5
5
|
*/
|
|
6
|
-
import { MCPTool
|
|
6
|
+
import { MCPTool } from 'mcp-framework';
|
|
7
7
|
import { z } from 'zod';
|
|
8
8
|
import { http } from '../utils/http.js';
|
|
9
9
|
import { config } from '../config.js';
|
|
@@ -31,7 +31,6 @@ class TextGenerationTool extends MCPTool {
|
|
|
31
31
|
* 执行文本生成
|
|
32
32
|
*/
|
|
33
33
|
async execute(input) {
|
|
34
|
-
logger.info(`开始执行文本生成,输入: ${input.messages}`);
|
|
35
34
|
try {
|
|
36
35
|
// 构建请求参数
|
|
37
36
|
const requestData = {
|
|
@@ -48,7 +47,6 @@ class TextGenerationTool extends MCPTool {
|
|
|
48
47
|
temperature: input.temperature,
|
|
49
48
|
stream: false, // 确保非流式返回
|
|
50
49
|
};
|
|
51
|
-
logger.info('调用智谱AI文本生成API');
|
|
52
50
|
// 调用智谱AI API
|
|
53
51
|
const apiResponse = (await http.post('/chat/completions', requestData));
|
|
54
52
|
// 提取生成的文本
|
|
@@ -63,12 +61,10 @@ class TextGenerationTool extends MCPTool {
|
|
|
63
61
|
const textItem = messageContent.find((item) => item.type === 'text');
|
|
64
62
|
generatedText = textItem?.text || '';
|
|
65
63
|
}
|
|
66
|
-
logger.info('文本生成成功');
|
|
67
64
|
return generatedText;
|
|
68
65
|
}
|
|
69
66
|
catch (error) {
|
|
70
|
-
|
|
71
|
-
throw new Error(`生成文本时发生错误: ${error}`);
|
|
67
|
+
return `生成文本时发生错误: ${error}`;
|
|
72
68
|
}
|
|
73
69
|
}
|
|
74
70
|
}
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* @Project_description: 智谱AI视频生成工具
|
|
4
4
|
* @Description: 代码是我抄的,不会也是真的
|
|
5
5
|
*/
|
|
6
|
-
import { MCPTool
|
|
6
|
+
import { MCPTool } from 'mcp-framework';
|
|
7
7
|
import { z } from 'zod';
|
|
8
8
|
import { http } from '../utils/http.js';
|
|
9
9
|
import { config } from '../config.js';
|
|
@@ -63,7 +63,6 @@ class VideoGenerationTool extends MCPTool {
|
|
|
63
63
|
* 执行视频生成
|
|
64
64
|
*/
|
|
65
65
|
async execute(input) {
|
|
66
|
-
logger.info(`开始执行视频生成,描述: ${input.prompt},尺寸: ${input.size || '1024x1024'},帧率: ${input.fps || 30}`);
|
|
67
66
|
try {
|
|
68
67
|
const requestData = {
|
|
69
68
|
model: config.videoModel,
|
|
@@ -75,19 +74,15 @@ class VideoGenerationTool extends MCPTool {
|
|
|
75
74
|
size: input.size,
|
|
76
75
|
fps: input.fps || 30,
|
|
77
76
|
};
|
|
78
|
-
logger.info('调用智谱AI视频生成API,创建任务');
|
|
79
77
|
const apiResponse = (await http.post('/videos/generations', requestData));
|
|
80
78
|
const taskId = apiResponse.id;
|
|
81
79
|
if (!taskId)
|
|
82
80
|
throw new Error('视频生成任务创建失败,未返回任务ID');
|
|
83
|
-
logger.info(`视频生成任务创建成功,任务ID: ${taskId},开始轮询任务状态`);
|
|
84
81
|
const videoUrl = await this.pollTaskStatus(taskId);
|
|
85
|
-
logger.info('视频生成成功');
|
|
86
82
|
return videoUrl;
|
|
87
83
|
}
|
|
88
84
|
catch (error) {
|
|
89
|
-
|
|
90
|
-
throw new Error(`视频生成时发生错误: ${error}`);
|
|
85
|
+
return `视频生成时发生错误: ${error}`;
|
|
91
86
|
}
|
|
92
87
|
}
|
|
93
88
|
/**
|
|
@@ -96,25 +91,17 @@ class VideoGenerationTool extends MCPTool {
|
|
|
96
91
|
async pollTaskStatus(taskId) {
|
|
97
92
|
const startTime = Date.now();
|
|
98
93
|
const timeout = 60000;
|
|
99
|
-
logger.info(`开始轮询任务状态,任务ID: ${taskId}`);
|
|
100
94
|
while (true) {
|
|
101
95
|
const elapsedTime = Date.now() - startTime;
|
|
102
96
|
if (elapsedTime >= timeout) {
|
|
103
|
-
logger.error(`视频生成超时,超过60秒未完成,任务ID: ${taskId}`);
|
|
104
97
|
throw new Error('视频生成超时,超过60秒未完成');
|
|
105
98
|
}
|
|
106
99
|
const apiResponse = (await http.get(`/async-result/${taskId}`));
|
|
107
100
|
const taskData = apiResponse;
|
|
108
101
|
if (taskData.task_status === 'SUCCESS' &&
|
|
109
102
|
taskData.video_result?.[0]?.url) {
|
|
110
|
-
logger.info(`任务完成,任务ID: ${taskId},状态: ${taskData.task_status}`);
|
|
111
103
|
return taskData.video_result[0].url;
|
|
112
104
|
}
|
|
113
|
-
if (taskData.task_status === 'FAILED') {
|
|
114
|
-
logger.error(`任务失败,任务ID: ${taskId},状态: ${taskData.task_status}`);
|
|
115
|
-
throw new Error('视频生成失败');
|
|
116
|
-
}
|
|
117
|
-
logger.debug(`任务进行中,任务ID: ${taskId},状态: ${taskData.task_status},已等待: ${Math.floor(elapsedTime / 1000)}秒`);
|
|
118
105
|
await new Promise((resolve) => setTimeout(resolve, 1000));
|
|
119
106
|
}
|
|
120
107
|
}
|