mcp-hydrocoder-vision 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +129 -0
- package/README_CN.md +136 -0
- package/package.json +39 -0
- package/src/index.ts +283 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 MCP HydroCoder Vision
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# MCP HydroCoder Vision
|
|
2
|
+
|
|
3
|
+
A vision-language MCP server that enables Claude Code to analyze images using **Qwen3 VL 4B** model running locally via LM Studio.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- 🔍 **Image Analysis** - Describe images in detail
|
|
8
|
+
- 📝 **Text Extraction (OCR)** - Extract text from images in multiple languages
|
|
9
|
+
- 💻 **UI to Code** - Generate HTML/CSS/JS code from UI/design screenshots
|
|
10
|
+
- 🏠 **100% Local** - All processing happens on your machine, no cloud API needed
|
|
11
|
+
- ⚡ **Fast** - Qwen3 VL 4B runs efficiently on 8GB VRAM
|
|
12
|
+
|
|
13
|
+
## Prerequisites
|
|
14
|
+
|
|
15
|
+
1. **LM Studio** installed and running
|
|
16
|
+
2. **Qwen3 VL 4B** model loaded in LM Studio
|
|
17
|
+
3. **Node.js 18+**
|
|
18
|
+
|
|
19
|
+
## Installation
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
# Navigate to the project directory
|
|
23
|
+
cd C:\workspace\develop\ccExtensions\mcpHydroVision
|
|
24
|
+
|
|
25
|
+
# Install dependencies
|
|
26
|
+
npm install
|
|
27
|
+
|
|
28
|
+
# Build the project
|
|
29
|
+
npm run build
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Configuration
|
|
33
|
+
|
|
34
|
+
### 1. Start LM Studio
|
|
35
|
+
|
|
36
|
+
1. Open LM Studio
|
|
37
|
+
2. Download and load `Qwen3-VL-4B-Instruct` model
|
|
38
|
+
3. Start the local server (default: `http://localhost:1234`)
|
|
39
|
+
|
|
40
|
+
### 2. Configure Claude Code
|
|
41
|
+
|
|
42
|
+
Add to your `~/.claude/settings.json`:
|
|
43
|
+
|
|
44
|
+
```json
|
|
45
|
+
{
|
|
46
|
+
"mcpServers": {
|
|
47
|
+
"hydrocoder-vision": {
|
|
48
|
+
"command": "node",
|
|
49
|
+
"args": ["C:/workspace/develop/ccExtensions/mcpHydroVision/dist/index.js"],
|
|
50
|
+
"env": {
|
|
51
|
+
"LM_STUDIO_URL": "http://localhost:1234/v1/chat/completions",
|
|
52
|
+
"VISION_MODEL": "Qwen3-VL-4B-Instruct"
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Usage
|
|
60
|
+
|
|
61
|
+
### Available Tools
|
|
62
|
+
|
|
63
|
+
#### `analyzeImage`
|
|
64
|
+
|
|
65
|
+
Analyze an image and get a detailed description.
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
/analyzeImage imagePath: "C:/path/to/image.png" prompt: "What's in this image?"
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
#### `extractText`
|
|
72
|
+
|
|
73
|
+
Extract text from an image (OCR).
|
|
74
|
+
|
|
75
|
+
```
|
|
76
|
+
/extractText imagePath: "C:/path/to/document.png" language: "English"
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
#### `describeForCode`
|
|
80
|
+
|
|
81
|
+
Generate code from a UI/design screenshot.
|
|
82
|
+
|
|
83
|
+
```
|
|
84
|
+
/describeForCode imagePath: "C:/path/to/design.png" framework: "Vue"
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Environment Variables
|
|
88
|
+
|
|
89
|
+
| Variable | Default | Description |
|
|
90
|
+
|----------|---------|-------------|
|
|
91
|
+
| `LM_STUDIO_URL` | `http://localhost:1234/v1/chat/completions` | LM Studio API endpoint |
|
|
92
|
+
| `VISION_MODEL` | `Qwen3-VL-4B-Instruct` | Model name to use |
|
|
93
|
+
|
|
94
|
+
## Development
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
# Run in development mode (watch mode)
|
|
98
|
+
npm run dev
|
|
99
|
+
|
|
100
|
+
# Build for production
|
|
101
|
+
npm run build
|
|
102
|
+
|
|
103
|
+
# Start the built server
|
|
104
|
+
npm start
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Troubleshooting
|
|
108
|
+
|
|
109
|
+
### "Request failed: ECONNREFUSED"
|
|
110
|
+
|
|
111
|
+
- Make sure LM Studio is running
|
|
112
|
+
- Check that the local server is enabled
|
|
113
|
+
- Verify the `LM_STUDIO_URL` is correct
|
|
114
|
+
|
|
115
|
+
### "No response from model"
|
|
116
|
+
|
|
117
|
+
- Ensure Qwen3 VL 4B model is loaded in LM Studio
|
|
118
|
+
- Check LM Studio logs for errors
|
|
119
|
+
- Try a simpler prompt first
|
|
120
|
+
|
|
121
|
+
### Image not found
|
|
122
|
+
|
|
123
|
+
- Use absolute paths
|
|
124
|
+
- Ensure the file exists and is accessible
|
|
125
|
+
- Check file permissions
|
|
126
|
+
|
|
127
|
+
## License
|
|
128
|
+
|
|
129
|
+
MIT
|
package/README_CN.md
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# MCP HydroCoder Vision
|
|
2
|
+
|
|
3
|
+
基于 **Qwen3 VL 4B** 模型的本地视觉语言 MCP 服务器,让 Claude Code 能够识别和分析图像。
|
|
4
|
+
|
|
5
|
+
## 功能特性
|
|
6
|
+
|
|
7
|
+
- 🔍 **图像分析** - 详细描述图像内容
|
|
8
|
+
- 📝 **文字提取 (OCR)** - 支持多种语言的文字提取
|
|
9
|
+
- 💻 **UI 转代码** - 从 UI/设计截图生成 HTML/CSS/JS 代码
|
|
10
|
+
- 🏠 **100% 本地** - 所有处理在本地完成,无需云端 API
|
|
11
|
+
- ⚡ **快速** - Qwen3 VL 4B 在 8GB 显存上高效运行
|
|
12
|
+
|
|
13
|
+
## 前置要求
|
|
14
|
+
|
|
15
|
+
1. **LM Studio** 已安装并运行
|
|
16
|
+
2. **Qwen3 VL 4B** 模型已加载到 LM Studio
|
|
17
|
+
3. **Node.js 18+**
|
|
18
|
+
|
|
19
|
+
## 安装步骤
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
# 进入项目目录
|
|
23
|
+
cd C:\workspace\develop\ccExtensions\mcpHydroVision
|
|
24
|
+
|
|
25
|
+
# 安装依赖
|
|
26
|
+
npm install
|
|
27
|
+
|
|
28
|
+
# 构建项目
|
|
29
|
+
npm run build
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## 配置说明
|
|
33
|
+
|
|
34
|
+
### 1. 启动 LM Studio
|
|
35
|
+
|
|
36
|
+
1. 打开 LM Studio
|
|
37
|
+
2. 下载并加载 `Qwen3-VL-4B-Instruct` 模型
|
|
38
|
+
3. 启动本地服务器(默认:`http://localhost:1234`)
|
|
39
|
+
|
|
40
|
+
### 2. 配置 Claude Code
|
|
41
|
+
|
|
42
|
+
在 `~/.claude/settings.json` 中添加:
|
|
43
|
+
|
|
44
|
+
```json
|
|
45
|
+
{
|
|
46
|
+
"mcpServers": {
|
|
47
|
+
"hydrocoder-vision": {
|
|
48
|
+
"command": "node",
|
|
49
|
+
"args": ["C:/workspace/develop/ccExtensions/mcpHydroVision/dist/index.js"],
|
|
50
|
+
"env": {
|
|
51
|
+
"LM_STUDIO_URL": "http://localhost:1234/v1/chat/completions",
|
|
52
|
+
"VISION_MODEL": "Qwen3-VL-4B-Instruct"
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## 使用方法
|
|
60
|
+
|
|
61
|
+
### 可用工具
|
|
62
|
+
|
|
63
|
+
#### `analyzeImage` - 图像分析
|
|
64
|
+
|
|
65
|
+
分析图像并获取详细描述。
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
/analyzeImage imagePath: "C:/path/to/image.png" prompt: "这张图片里有什么?"
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
#### `extractText` - 文字提取
|
|
72
|
+
|
|
73
|
+
从图像中提取文字(OCR)。
|
|
74
|
+
|
|
75
|
+
```
|
|
76
|
+
/extractText imagePath: "C:/path/to/document.png" language: "Chinese"
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
#### `describeForCode` - UI 转代码
|
|
80
|
+
|
|
81
|
+
从 UI/设计截图生成代码。
|
|
82
|
+
|
|
83
|
+
```
|
|
84
|
+
/describeForCode imagePath: "C:/path/to/design.png" framework: "Vue"
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## 环境变量
|
|
88
|
+
|
|
89
|
+
| 变量 | 默认值 | 说明 |
|
|
90
|
+
|----------|---------|-------------|
|
|
91
|
+
| `LM_STUDIO_URL` | `http://localhost:1234/v1/chat/completions` | LM Studio API 端点 |
|
|
92
|
+
| `VISION_MODEL` | `Qwen3-VL-4B-Instruct` | 使用的模型名称 |
|
|
93
|
+
|
|
94
|
+
## 开发命令
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
# 开发模式(监听模式)
|
|
98
|
+
npm run dev
|
|
99
|
+
|
|
100
|
+
# 生产构建
|
|
101
|
+
npm run build
|
|
102
|
+
|
|
103
|
+
# 启动服务器
|
|
104
|
+
npm start
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## 常见问题
|
|
108
|
+
|
|
109
|
+
### "Request failed: ECONNREFUSED"
|
|
110
|
+
|
|
111
|
+
- 确保 LM Studio 正在运行
|
|
112
|
+
- 检查本地服务器已启用
|
|
113
|
+
- 验证 `LM_STUDIO_URL` 配置正确
|
|
114
|
+
|
|
115
|
+
### "No response from model"
|
|
116
|
+
|
|
117
|
+
- 确保 Qwen3 VL 4B 模型已在 LM Studio 中加载
|
|
118
|
+
- 检查 LM Studio 日志
|
|
119
|
+
- 先尝试简单的提示
|
|
120
|
+
|
|
121
|
+
### 图像未找到
|
|
122
|
+
|
|
123
|
+
- 使用绝对路径
|
|
124
|
+
- 确保文件存在且可访问
|
|
125
|
+
- 检查文件权限
|
|
126
|
+
|
|
127
|
+
## 技术栈
|
|
128
|
+
|
|
129
|
+
- **MCP SDK** - Model Context Protocol
|
|
130
|
+
- **Qwen3 VL 4B** - 视觉语言模型
|
|
131
|
+
- **LM Studio** - 本地模型推理
|
|
132
|
+
- **TypeScript** - 类型安全
|
|
133
|
+
|
|
134
|
+
## 许可证
|
|
135
|
+
|
|
136
|
+
MIT
|
package/package.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "mcp-hydrocoder-vision",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Vision MCP Server for Claude Code - Qwen3 VL 4B integration",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "src/index.ts",
|
|
7
|
+
"bin": {
|
|
8
|
+
"mcp-hydrocoder-vision": "./src/index.ts"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"src/",
|
|
12
|
+
"README.md",
|
|
13
|
+
"README_CN.md",
|
|
14
|
+
"LICENSE"
|
|
15
|
+
],
|
|
16
|
+
"scripts": {
|
|
17
|
+
"build": "tsc",
|
|
18
|
+
"dev": "tsx watch src/index.ts",
|
|
19
|
+
"start": "tsx src/index.ts"
|
|
20
|
+
},
|
|
21
|
+
"keywords": [
|
|
22
|
+
"mcp",
|
|
23
|
+
"vision",
|
|
24
|
+
"image",
|
|
25
|
+
"claude",
|
|
26
|
+
"qwen"
|
|
27
|
+
],
|
|
28
|
+
"author": "",
|
|
29
|
+
"license": "MIT",
|
|
30
|
+
"dependencies": {
|
|
31
|
+
"@modelcontextprotocol/sdk": "^1.0.1",
|
|
32
|
+
"tsx": "^4.7.0",
|
|
33
|
+
"zod": "^3.22.4"
|
|
34
|
+
},
|
|
35
|
+
"devDependencies": {
|
|
36
|
+
"@types/node": "^20.11.0",
|
|
37
|
+
"typescript": "^5.3.3"
|
|
38
|
+
}
|
|
39
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* MCP HydroCoder Vision Server
|
|
5
|
+
*
|
|
6
|
+
* A vision-language MCP server that connects to LM Studio running Qwen3 VL 4B
|
|
7
|
+
* for local image analysis and understanding.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
11
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
12
|
+
import {
|
|
13
|
+
CallToolRequestSchema,
|
|
14
|
+
ListToolsRequestSchema,
|
|
15
|
+
} from '@modelcontextprotocol/sdk/types.js';
|
|
16
|
+
import { z } from 'zod';
|
|
17
|
+
|
|
18
|
+
// LM Studio API configuration
|
|
19
|
+
const LM_STUDIO_URL = process.env.LM_STUDIO_URL || 'http://localhost:1234/v1/chat/completions';
|
|
20
|
+
const DEFAULT_MODEL = process.env.VISION_MODEL || 'Qwen3-VL-4B-Instruct';
|
|
21
|
+
|
|
22
|
+
// Image MIME type mapping
|
|
23
|
+
const MIME_TYPES: Record<string, string> = {
|
|
24
|
+
'.png': 'image/png',
|
|
25
|
+
'.jpg': 'image/jpeg',
|
|
26
|
+
'.jpeg': 'image/jpeg',
|
|
27
|
+
'.gif': 'image/gif',
|
|
28
|
+
'.webp': 'image/webp',
|
|
29
|
+
'.bmp': 'image/bmp',
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Get MIME type from file extension
|
|
34
|
+
*/
|
|
35
|
+
function getMimeType(filePath: string): string {
|
|
36
|
+
const ext = '.' + filePath.split('.').pop()?.toLowerCase();
|
|
37
|
+
return MIME_TYPES[ext] || 'image/png';
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Read file as base64 (Node.js)
|
|
42
|
+
*/
|
|
43
|
+
async function fileToBase64(filePath: string): Promise<string> {
|
|
44
|
+
const fs = await import('fs');
|
|
45
|
+
const path = await import('path');
|
|
46
|
+
|
|
47
|
+
// Handle Windows paths
|
|
48
|
+
const normalizedPath = filePath.replace(/\//g, '\\');
|
|
49
|
+
const absolutePath = path.isAbsolute(normalizedPath)
|
|
50
|
+
? normalizedPath
|
|
51
|
+
: path.resolve(process.cwd(), normalizedPath);
|
|
52
|
+
|
|
53
|
+
const buffer = fs.readFileSync(absolutePath);
|
|
54
|
+
return buffer.toString('base64');
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Call LM Studio API for image analysis
|
|
59
|
+
*/
|
|
60
|
+
async function analyzeImageWithLMStudio(
|
|
61
|
+
imageDataBase64: string,
|
|
62
|
+
mimeType: string,
|
|
63
|
+
prompt: string
|
|
64
|
+
): Promise<string> {
|
|
65
|
+
const http = await import('http');
|
|
66
|
+
const https = await import('https');
|
|
67
|
+
const { URL } = await import('url');
|
|
68
|
+
|
|
69
|
+
return new Promise((resolve, reject) => {
|
|
70
|
+
const url = new URL(LM_STUDIO_URL);
|
|
71
|
+
const client = url.protocol === 'https:' ? https : http;
|
|
72
|
+
|
|
73
|
+
const requestBody = JSON.stringify({
|
|
74
|
+
model: DEFAULT_MODEL,
|
|
75
|
+
messages: [{
|
|
76
|
+
role: 'user',
|
|
77
|
+
content: [
|
|
78
|
+
{ type: 'text', text: prompt },
|
|
79
|
+
{
|
|
80
|
+
type: 'image_url',
|
|
81
|
+
image_url: {
|
|
82
|
+
url: `data:${mimeType};base64,${imageDataBase64}`
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
]
|
|
86
|
+
}],
|
|
87
|
+
max_tokens: 2048,
|
|
88
|
+
temperature: 0.7,
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
const options = {
|
|
92
|
+
hostname: url.hostname,
|
|
93
|
+
port: url.port || (url.protocol === 'https:' ? 443 : 80),
|
|
94
|
+
path: url.pathname + url.search,
|
|
95
|
+
method: 'POST',
|
|
96
|
+
headers: {
|
|
97
|
+
'Content-Type': 'application/json',
|
|
98
|
+
'Content-Length': Buffer.byteLength(requestBody),
|
|
99
|
+
},
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
const req = client.request(options, (res) => {
|
|
103
|
+
let data = '';
|
|
104
|
+
res.on('data', (chunk) => { data += chunk; });
|
|
105
|
+
res.on('end', () => {
|
|
106
|
+
try {
|
|
107
|
+
const result = JSON.parse(data);
|
|
108
|
+
resolve(result.choices?.[0]?.message?.content || 'No response from model');
|
|
109
|
+
} catch (e) {
|
|
110
|
+
reject(new Error(`Failed to parse response: ${e}`));
|
|
111
|
+
}
|
|
112
|
+
});
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
req.on('error', (e) => {
|
|
116
|
+
reject(new Error(`Request failed: ${e.message}. Make sure LM Studio is running with Qwen3 VL 4B loaded.`));
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
req.write(requestBody);
|
|
120
|
+
req.end();
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Input schemas
|
|
125
|
+
const AnalyzeImageInputSchema = z.object({
|
|
126
|
+
imagePath: z.string().describe('Path to the image file to analyze'),
|
|
127
|
+
prompt: z.string().optional().describe('Optional analysis prompt (default: "Describe this image in detail")'),
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
const ExtractTextInputSchema = z.object({
|
|
131
|
+
imagePath: z.string().describe('Path to the image file containing text'),
|
|
132
|
+
language: z.string().optional().describe('Expected language of the text (optional)'),
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
const DescribeForCodeInputSchema = z.object({
|
|
136
|
+
imagePath: z.string().describe('Path to the UI/design image'),
|
|
137
|
+
framework: z.string().optional().describe('Target framework (e.g., "React", "Vue", "HTML/CSS")'),
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
// Create server instance
|
|
141
|
+
const server = new Server(
|
|
142
|
+
{
|
|
143
|
+
name: 'mcp-hydrocoder-vision',
|
|
144
|
+
version: '0.1.0',
|
|
145
|
+
},
|
|
146
|
+
{
|
|
147
|
+
capabilities: {
|
|
148
|
+
tools: {},
|
|
149
|
+
},
|
|
150
|
+
}
|
|
151
|
+
);
|
|
152
|
+
|
|
153
|
+
// Tool handler
|
|
154
|
+
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
155
|
+
const { name, arguments: args } = request.params;
|
|
156
|
+
|
|
157
|
+
try {
|
|
158
|
+
switch (name) {
|
|
159
|
+
case 'analyzeImage': {
|
|
160
|
+
const validated = AnalyzeImageInputSchema.parse(args);
|
|
161
|
+
const mimeType = getMimeType(validated.imagePath);
|
|
162
|
+
const imageData = await fileToBase64(validated.imagePath);
|
|
163
|
+
const prompt = validated.prompt || 'Describe this image in detail.';
|
|
164
|
+
|
|
165
|
+
const result = await analyzeImageWithLMStudio(imageData, mimeType, prompt);
|
|
166
|
+
return {
|
|
167
|
+
content: [{ type: 'text', text: result }],
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
case 'extractText': {
|
|
172
|
+
const validated = ExtractTextInputSchema.parse(args);
|
|
173
|
+
const mimeType = getMimeType(validated.imagePath);
|
|
174
|
+
const imageData = await fileToBase64(validated.imagePath);
|
|
175
|
+
const prompt = validated.language
|
|
176
|
+
? `Extract all text from this image. The text is in ${validated.language}.`
|
|
177
|
+
: 'Extract all text from this image (OCR).';
|
|
178
|
+
|
|
179
|
+
const result = await analyzeImageWithLMStudio(imageData, mimeType, prompt);
|
|
180
|
+
return {
|
|
181
|
+
content: [{ type: 'text', text: result }],
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
case 'describeForCode': {
|
|
186
|
+
const validated = DescribeForCodeInputSchema.parse(args);
|
|
187
|
+
const mimeType = getMimeType(validated.imagePath);
|
|
188
|
+
const imageData = await fileToBase64(validated.imagePath);
|
|
189
|
+
const framework = validated.framework || 'HTML/CSS/JavaScript';
|
|
190
|
+
const prompt = `Analyze this UI/design image and generate ${framework} code that replicates it. Focus on structure, styling, and layout.`;
|
|
191
|
+
|
|
192
|
+
const result = await analyzeImageWithLMStudio(imageData, mimeType, prompt);
|
|
193
|
+
return {
|
|
194
|
+
content: [{ type: 'text', text: result }],
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
default:
|
|
199
|
+
throw new Error(`Unknown tool: ${name}`);
|
|
200
|
+
}
|
|
201
|
+
} catch (error) {
|
|
202
|
+
if (error instanceof z.ZodError) {
|
|
203
|
+
throw new Error(`Invalid input: ${error.errors.map(e => e.message).join(', ')}`);
|
|
204
|
+
}
|
|
205
|
+
throw error;
|
|
206
|
+
}
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
// List available tools
|
|
210
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
211
|
+
return {
|
|
212
|
+
tools: [
|
|
213
|
+
{
|
|
214
|
+
name: 'analyzeImage',
|
|
215
|
+
description: 'Analyze an image and return a detailed description. Uses local Qwen3 VL 4B model via LM Studio.',
|
|
216
|
+
inputSchema: {
|
|
217
|
+
type: 'object',
|
|
218
|
+
properties: {
|
|
219
|
+
imagePath: {
|
|
220
|
+
type: 'string',
|
|
221
|
+
description: 'Path to the image file to analyze',
|
|
222
|
+
},
|
|
223
|
+
prompt: {
|
|
224
|
+
type: 'string',
|
|
225
|
+
description: 'Optional analysis prompt (default: "Describe this image in detail")',
|
|
226
|
+
},
|
|
227
|
+
},
|
|
228
|
+
required: ['imagePath'],
|
|
229
|
+
},
|
|
230
|
+
},
|
|
231
|
+
{
|
|
232
|
+
name: 'extractText',
|
|
233
|
+
description: 'Extract text from an image (OCR). Supports multiple languages.',
|
|
234
|
+
inputSchema: {
|
|
235
|
+
type: 'object',
|
|
236
|
+
properties: {
|
|
237
|
+
imagePath: {
|
|
238
|
+
type: 'string',
|
|
239
|
+
description: 'Path to the image file containing text',
|
|
240
|
+
},
|
|
241
|
+
language: {
|
|
242
|
+
type: 'string',
|
|
243
|
+
description: 'Expected language of the text (optional)',
|
|
244
|
+
},
|
|
245
|
+
},
|
|
246
|
+
required: ['imagePath'],
|
|
247
|
+
},
|
|
248
|
+
},
|
|
249
|
+
{
|
|
250
|
+
name: 'describeForCode',
|
|
251
|
+
description: 'Analyze a UI/design image and generate corresponding code (HTML/CSS/JS, Vue, React, etc.).',
|
|
252
|
+
inputSchema: {
|
|
253
|
+
type: 'object',
|
|
254
|
+
properties: {
|
|
255
|
+
imagePath: {
|
|
256
|
+
type: 'string',
|
|
257
|
+
description: 'Path to the UI/design image',
|
|
258
|
+
},
|
|
259
|
+
framework: {
|
|
260
|
+
type: 'string',
|
|
261
|
+
description: 'Target framework (e.g., "React", "Vue", "HTML/CSS")',
|
|
262
|
+
},
|
|
263
|
+
},
|
|
264
|
+
required: ['imagePath'],
|
|
265
|
+
},
|
|
266
|
+
},
|
|
267
|
+
],
|
|
268
|
+
};
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
// Start server
|
|
272
|
+
async function main() {
|
|
273
|
+
const transport = new StdioServerTransport();
|
|
274
|
+
await server.connect(transport);
|
|
275
|
+
console.error('MCP HydroCoder Vision Server running on stdio');
|
|
276
|
+
console.error(`LM Studio URL: ${LM_STUDIO_URL}`);
|
|
277
|
+
console.error(`Model: ${DEFAULT_MODEL}`);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
main().catch((error) => {
|
|
281
|
+
console.error('Fatal error:', error);
|
|
282
|
+
process.exit(1);
|
|
283
|
+
});
|