luma-mcp 1.1.1 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +7 -1
- package/.github/workflows/release.yml +66 -0
- package/CHANGELOG.md +56 -8
- package/README.md +102 -71
- package/build/config.d.ts +1 -1
- package/build/config.d.ts.map +1 -1
- package/build/config.js +7 -0
- package/build/config.js.map +1 -1
- package/build/index.js +17 -5
- package/build/index.js.map +1 -1
- package/build/prompts.d.ts +1 -2
- package/build/prompts.d.ts.map +1 -1
- package/build/prompts.js +1 -2
- package/build/prompts.js.map +1 -1
- package/build/qwen-client.d.ts +17 -0
- package/build/qwen-client.d.ts.map +1 -0
- package/build/qwen-client.js +80 -0
- package/build/qwen-client.js.map +1 -0
- package/build/siliconflow-client.d.ts +5 -3
- package/build/siliconflow-client.d.ts.map +1 -1
- package/build/siliconflow-client.js +15 -10
- package/build/siliconflow-client.js.map +1 -1
- package/build/zhipu-client.d.ts +6 -3
- package/build/zhipu-client.d.ts.map +1 -1
- package/build/zhipu-client.js +19 -11
- package/build/zhipu-client.js.map +1 -1
- package/package.json +5 -2
- package/test/test-deepseek-raw.ts +7 -1
- package/test/test-qwen.ts +88 -0
package/.env.example
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# ==========================================
|
|
4
4
|
# 模型提供商选择
|
|
5
5
|
# ==========================================
|
|
6
|
-
# 可选值: zhipu, siliconflow
|
|
6
|
+
# 可选值: zhipu, siliconflow, qwen
|
|
7
7
|
# 默认: zhipu
|
|
8
8
|
MODEL_PROVIDER=zhipu
|
|
9
9
|
|
|
@@ -17,12 +17,18 @@ ZHIPU_API_KEY=your-zhipu-api-key-here
|
|
|
17
17
|
# ==========================================
|
|
18
18
|
# SILICONFLOW_API_KEY=your-siliconflow-api-key-here
|
|
19
19
|
|
|
20
|
+
# ==========================================
|
|
21
|
+
# 阿里云通义千问 Qwen3-VL 配置(使用阿里云时需要)
|
|
22
|
+
# ==========================================
|
|
23
|
+
# DASHSCOPE_API_KEY=your-dashscope-api-key-here
|
|
24
|
+
|
|
20
25
|
# ==========================================
|
|
21
26
|
# 通用配置(可选)
|
|
22
27
|
# ==========================================
|
|
23
28
|
# 模型名称(留空则使用默认值)
|
|
24
29
|
# zhipu 默认: glm-4.5v
|
|
25
30
|
# siliconflow 默认: deepseek-ai/DeepSeek-OCR
|
|
31
|
+
# qwen 默认: qwen3-vl-flash
|
|
26
32
|
# MODEL_NAME=
|
|
27
33
|
|
|
28
34
|
# 最大生成 tokens(默认: 4096)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
name: Create Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*.*.*'
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
release:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
permissions:
|
|
12
|
+
contents: write
|
|
13
|
+
|
|
14
|
+
steps:
|
|
15
|
+
- name: Checkout code
|
|
16
|
+
uses: actions/checkout@v4
|
|
17
|
+
with:
|
|
18
|
+
fetch-depth: 0
|
|
19
|
+
|
|
20
|
+
- name: Setup Node.js
|
|
21
|
+
uses: actions/setup-node@v4
|
|
22
|
+
with:
|
|
23
|
+
node-version: '18'
|
|
24
|
+
registry-url: 'https://registry.npmjs.org'
|
|
25
|
+
|
|
26
|
+
- name: Install dependencies
|
|
27
|
+
run: npm ci
|
|
28
|
+
|
|
29
|
+
- name: Build
|
|
30
|
+
run: npm run build
|
|
31
|
+
|
|
32
|
+
- name: Extract version from tag
|
|
33
|
+
id: version
|
|
34
|
+
run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_OUTPUT
|
|
35
|
+
|
|
36
|
+
- name: Extract changelog for this version
|
|
37
|
+
id: changelog
|
|
38
|
+
run: |
|
|
39
|
+
VERSION=${{ steps.version.outputs.VERSION }}
|
|
40
|
+
echo "Extracting changelog for version $VERSION"
|
|
41
|
+
|
|
42
|
+
# Extract changelog content between version headers
|
|
43
|
+
sed -n "/## \[${VERSION}\]/,/## \[/p" CHANGELOG.md | sed '$d' > release_notes.md
|
|
44
|
+
|
|
45
|
+
# If empty, use a default message
|
|
46
|
+
if [ ! -s release_notes.md ]; then
|
|
47
|
+
echo "Release version ${VERSION}" > release_notes.md
|
|
48
|
+
fi
|
|
49
|
+
|
|
50
|
+
cat release_notes.md
|
|
51
|
+
|
|
52
|
+
- name: Create GitHub Release
|
|
53
|
+
uses: softprops/action-gh-release@v1
|
|
54
|
+
with:
|
|
55
|
+
tag_name: v${{ steps.version.outputs.VERSION }}
|
|
56
|
+
name: Release v${{ steps.version.outputs.VERSION }}
|
|
57
|
+
body_path: release_notes.md
|
|
58
|
+
draft: false
|
|
59
|
+
prerelease: false
|
|
60
|
+
env:
|
|
61
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
62
|
+
|
|
63
|
+
- name: Publish to npm
|
|
64
|
+
run: npm publish --access public
|
|
65
|
+
env:
|
|
66
|
+
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,47 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [1.2.1] - 2025-11-18
|
|
6
|
+
|
|
7
|
+
### Changed
|
|
8
|
+
- 📝 **文档优化**: 精简 README,移除冲余配置文件路径说明
|
|
9
|
+
- 📝 **更新日志简化**: 将 README 中的详细更新日志替换为 CHANGELOG.md 链接
|
|
10
|
+
- ✨ **Qwen 测试示例**: 添加 Qwen3-VL-Flash 本地测试命令
|
|
11
|
+
- 💰 **定价信息**: 添加阿里云通义千问定价参考链接
|
|
12
|
+
- 📋 **模型对比**: 更新模型选择表,完善 Qwen3-VL-Flash 信息
|
|
13
|
+
- 🔗 **API Key 获取**: 添加阿里云百炼 API Key 获取指南
|
|
14
|
+
- 📚 **相关链接**: 新增阿里云百炼平台和 Qwen3-VL 文档链接
|
|
15
|
+
- 🐛 **错误信息**: 优化 API 调用失败排查提示,包含阿里云账户
|
|
16
|
+
|
|
17
|
+
### Fixed
|
|
18
|
+
- 🐛 **描述修正**: 修正 package.json 中模型名称为 qwen3-vl-flash
|
|
19
|
+
- 📝 **注释精简**: 简化 prompts.ts 注释头
|
|
20
|
+
|
|
21
|
+
## [1.2.0] - 2025-11-17
|
|
22
|
+
|
|
23
|
+
### Added
|
|
24
|
+
- 🎉 **第三个视觉模型**: 新增阿里云通义千问 Qwen3-VL-Flash 支持
|
|
25
|
+
- 💡 **思考模式**: Qwen3-VL-Flash 支持深度思考模式(enable_thinking),提升复杂场景分析准确性
|
|
26
|
+
- ⚡ **高性价比**: Flash 版本速度更快、成本更低,适合大量使用
|
|
27
|
+
- 🔌 **OpenAI 兼容**: 使用阿里云百炼的 OpenAI 兼容 API,统一接口设计
|
|
28
|
+
- 🌐 **多地域支持**: 默认使用北京地域,支持新加坡地域配置
|
|
29
|
+
|
|
30
|
+
### Changed
|
|
31
|
+
- ⚙️ 新增 `MODEL_PROVIDER=qwen` 和 `DASHSCOPE_API_KEY` 环境变量配置
|
|
32
|
+
- 📝 更新所有文档(中英文),添加 Qwen3-VL-Flash 配置示例
|
|
33
|
+
- 💰 默认使用 qwen3-vl-flash 模型,兹顾性能与成本
|
|
34
|
+
- 🏗️ 重构客户端构造函数,统一参数传递方式
|
|
35
|
+
|
|
36
|
+
### Technical Details
|
|
37
|
+
- 新增文件:
|
|
38
|
+
- `src/qwen-client.ts` - 阿里云通义千问 VL API 客户端实现
|
|
39
|
+
- 修改文件:
|
|
40
|
+
- `src/config.ts` - 添加 'qwen' 提供商支持
|
|
41
|
+
- `src/zhipu-client.ts` - 重构构造函数,支持独立参数
|
|
42
|
+
- `src/siliconflow-client.ts` - 重构构造函数,支持独立参数
|
|
43
|
+
- `src/index.ts` - 添加 Qwen 客户端初始化逻辑
|
|
44
|
+
- `package.json` - 更新版本至 1.2.0,添加 qwen/aliyun/dashscope 关键词
|
|
45
|
+
|
|
5
46
|
## [1.1.1] - 2025-11-13
|
|
6
47
|
|
|
7
48
|
### Added
|
|
@@ -36,7 +77,7 @@ All notable changes to this project will be documented in this file.
|
|
|
36
77
|
- `src/index.ts` - 根据配置动态选择客户端
|
|
37
78
|
- `README.md` - 完整的双模型使用文档
|
|
38
79
|
|
|
39
|
-
## [1.0.3] - 2025-11-
|
|
80
|
+
## [1.0.3] - 2025-11-12
|
|
40
81
|
|
|
41
82
|
### Features
|
|
42
83
|
- 基于智谱 GLM-4.5V 的视觉理解能力
|
|
@@ -48,13 +89,20 @@ All notable changes to this project will be documented in this file.
|
|
|
48
89
|
|
|
49
90
|
**模型对比**:
|
|
50
91
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
|
54
|
-
|
|
|
55
|
-
|
|
56
|
-
|
|
92
|
+
|| 特性 | GLM-4.5V | DeepSeek-OCR | Qwen3-VL-Flash |
|
|
93
|
+
||----------|----------|--------------|----------------|
|
|
94
|
+
|| 提供商 | 智谱清言 | 硅基流动 | 阿里云百炼 |
|
|
95
|
+
|| 费用 | 收费 | **免费** | 收费 |
|
|
96
|
+
|| 中文理解 | 优秀 | 良好 | **优秀** |
|
|
97
|
+
|| OCR 能力 | 良好 | **优秀** | 优秀 |
|
|
98
|
+
|| 思考模式 | ✅ | ❌ | ✅ |
|
|
99
|
+
|| 速度/成本 | 中等 | 免费 | **快/低** |
|
|
100
|
+
|| 综合能力 | 良好 | OCR专精 | **优秀** |
|
|
101
|
+
|| 3D定位 | ❌ | ❌ | ✅ |
|
|
57
102
|
|
|
58
103
|
**推荐使用场景**:
|
|
59
104
|
- 需要 OCR/文字识别 → **DeepSeek-OCR** (免费)
|
|
60
|
-
- 需要深度图片理解 → **GLM-4.5V**
|
|
105
|
+
- 需要深度图片理解 → **Qwen3-VL-Flash** 或 **GLM-4.5V**
|
|
106
|
+
- 需要思考模式 → **Qwen3-VL-Flash** 或 **GLM-4.5V**
|
|
107
|
+
- 需要高性价比 → **Qwen3-VL-Flash** (速度快、成本低)
|
|
108
|
+
- 需要 3D 定位/复杂分析 → **Qwen3-VL-Flash**
|
package/README.md
CHANGED
|
@@ -6,12 +6,14 @@
|
|
|
6
6
|
|
|
7
7
|
## 特性
|
|
8
8
|
|
|
9
|
-
- **多模型支持**:
|
|
9
|
+
- **多模型支持**: 支持三个视觉模型
|
|
10
|
+
- GLM-4.5V(智谱清言)- 付费,中文理解优秀
|
|
11
|
+
- DeepSeek-OCR(硅基流动)- **免费使用**,OCR能力强
|
|
12
|
+
- Qwen3-VL-Flash(阿里云通义千问)- 付费,速度快成本低,支持思考模式
|
|
10
13
|
- **简单设计**: 单一 `analyze_image` 工具处理所有图片分析任务
|
|
11
14
|
- **智能理解**: 自动识别代码、UI、错误等不同场景
|
|
12
15
|
- **全面支持**: 代码截图、界面设计、错误诊断、OCR 文字识别
|
|
13
16
|
- **标准 MCP 协议**: 无缝集成 Claude Desktop、Cline 等 MCP 客户端
|
|
14
|
-
- **免费选项**: DeepSeek-OCR 通过硅基流动提供免费调用
|
|
15
17
|
- **URL 支持**: 支持本地文件和远程图片 URL
|
|
16
18
|
- **重试机制**: 内置指数退避重试,提高可靠性
|
|
17
19
|
|
|
@@ -23,6 +25,7 @@
|
|
|
23
25
|
- **选择一种模型**:
|
|
24
26
|
- **方案 A**: 智谱 AI API Key ([获取地址](https://open.bigmodel.cn/)) - 中文理解优秀
|
|
25
27
|
- **方案 B**: 硅基流动 API Key ([获取地址](https://cloud.siliconflow.cn/)) - **免费使用**,OCR 能力强
|
|
28
|
+
- **方案 C**: 阿里云百炼 API Key ([获取地址](https://bailian.console.aliyun.com/)) - 速度快成本低,支持思考模式
|
|
26
29
|
|
|
27
30
|
### 安装
|
|
28
31
|
|
|
@@ -45,10 +48,6 @@ npx luma-mcp
|
|
|
45
48
|
|
|
46
49
|
#### Claude Desktop
|
|
47
50
|
|
|
48
|
-
**Windows 配置文件位置**: `%APPDATA%\Claude\claude_desktop_config.json`
|
|
49
|
-
|
|
50
|
-
**macOS 配置文件位置**: `~/Library/Application Support/Claude/claude_desktop_config.json`
|
|
51
|
-
|
|
52
51
|
**方案 A: 使用智谱 GLM-4.5V**:
|
|
53
52
|
|
|
54
53
|
```json
|
|
@@ -82,6 +81,23 @@ npx luma-mcp
|
|
|
82
81
|
}
|
|
83
82
|
```
|
|
84
83
|
|
|
84
|
+
**方案 C: 使用阿里云通义千问 Qwen3-VL-Flash**:
|
|
85
|
+
|
|
86
|
+
```json
|
|
87
|
+
{
|
|
88
|
+
"mcpServers": {
|
|
89
|
+
"luma": {
|
|
90
|
+
"command": "npx",
|
|
91
|
+
"args": ["-y", "luma-mcp"],
|
|
92
|
+
"env": {
|
|
93
|
+
"MODEL_PROVIDER": "qwen",
|
|
94
|
+
"DASHSCOPE_API_KEY": "your-dashscope-api-key"
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
```
|
|
100
|
+
|
|
85
101
|
**本地开发(智谱)**:
|
|
86
102
|
|
|
87
103
|
```json
|
|
@@ -154,6 +170,23 @@ npx luma-mcp
|
|
|
154
170
|
}
|
|
155
171
|
```
|
|
156
172
|
|
|
173
|
+
**方案 C: 使用阿里云通义千问 Qwen3-VL-Flash**:
|
|
174
|
+
|
|
175
|
+
```json
|
|
176
|
+
{
|
|
177
|
+
"mcpServers": {
|
|
178
|
+
"luma": {
|
|
179
|
+
"command": "npx",
|
|
180
|
+
"args": ["-y", "luma-mcp"],
|
|
181
|
+
"env": {
|
|
182
|
+
"MODEL_PROVIDER": "qwen",
|
|
183
|
+
"DASHSCOPE_API_KEY": "your-dashscope-api-key"
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
```
|
|
189
|
+
|
|
157
190
|
#### Claude Code (命令行)
|
|
158
191
|
|
|
159
192
|
**使用智谱 GLM-4.5V**:
|
|
@@ -166,6 +199,11 @@ claude mcp add -s user luma-mcp --env ZHIPU_API_KEY=your-api-key -- npx -y luma-
|
|
|
166
199
|
claude mcp add -s user luma-mcp --env MODEL_PROVIDER=siliconflow --env SILICONFLOW_API_KEY=your-api-key -- npx -y luma-mcp
|
|
167
200
|
```
|
|
168
201
|
|
|
202
|
+
**使用阿里云通义千问 Qwen3-VL-Flash**:
|
|
203
|
+
```bash
|
|
204
|
+
claude mcp add -s user luma-mcp --env MODEL_PROVIDER=qwen --env DASHSCOPE_API_KEY=your-api-key -- npx -y luma-mcp
|
|
205
|
+
```
|
|
206
|
+
|
|
169
207
|
#### 其他工具
|
|
170
208
|
|
|
171
209
|
更多 MCP 客户端配置方法请参考[智谱官方文档](https://docs.bigmodel.cn/cn/coding-plan/mcp/vision-mcp-server#claude-code)
|
|
@@ -187,7 +225,7 @@ claude mcp add -s user luma-mcp --env MODEL_PROVIDER=siliconflow --env SILICONFL
|
|
|
187
225
|
|
|
188
226
|
**注意**: 直接在聊天框粘贴图片,非视觉模型不会自动调用 Luma,需要明确指示。
|
|
189
227
|
|
|
190
|
-
### 在 Claude
|
|
228
|
+
### 在 Claude code 中使用
|
|
191
229
|
|
|
192
230
|
配置完成后,在 Claude 对话中可以这样使用:
|
|
193
231
|
|
|
@@ -230,6 +268,19 @@ $env:SILICONFLOW_API_KEY="your-api-key" # Windows PowerShell
|
|
|
230
268
|
npm run test:local ./test.png
|
|
231
269
|
```
|
|
232
270
|
|
|
271
|
+
**测试阿里云通义千问 Qwen3-VL-Flash**:
|
|
272
|
+
```bash
|
|
273
|
+
# 设置 API Key 和提供商
|
|
274
|
+
export MODEL_PROVIDER=qwen
|
|
275
|
+
export DASHSCOPE_API_KEY="your-api-key" # macOS/Linux
|
|
276
|
+
|
|
277
|
+
$env:MODEL_PROVIDER="qwen"
|
|
278
|
+
$env:DASHSCOPE_API_KEY="your-api-key" # Windows PowerShell
|
|
279
|
+
|
|
280
|
+
# 测试本地图片
|
|
281
|
+
npm run test:local ./test.png
|
|
282
|
+
```
|
|
283
|
+
|
|
233
284
|
**其他测试命令**:
|
|
234
285
|
```bash
|
|
235
286
|
# 测试并提问
|
|
@@ -286,31 +337,39 @@ analyze_image({
|
|
|
286
337
|
|
|
287
338
|
### 通用配置
|
|
288
339
|
|
|
289
|
-
| 变量名
|
|
290
|
-
|
|
291
|
-
| `MODEL_PROVIDER`
|
|
292
|
-
| `MODEL_NAME`
|
|
293
|
-
| `MAX_TOKENS`
|
|
294
|
-
| `TEMPERATURE`
|
|
295
|
-
| `TOP_P`
|
|
296
|
-
| `ENABLE_THINKING
|
|
340
|
+
| 变量名 | 必需 | 默认值 | 说明 |
|
|
341
|
+
|-------------------|------|---------|----------------------------------------------|
|
|
342
|
+
| `MODEL_PROVIDER` | 否 | `zhipu` | 模型提供商:`zhipu`、`siliconflow` 或 `qwen` |
|
|
343
|
+
| `MODEL_NAME` | 否 | 见下文 | 模型名称(自动根据提供商选择) |
|
|
344
|
+
| `MAX_TOKENS` | 否 | `4096` | 最大生成 tokens |
|
|
345
|
+
| `TEMPERATURE` | 否 | `0.7` | 温度参数 (0-1) |
|
|
346
|
+
| `TOP_P` | 否 | `0.7` | Top-p 参数 (0-1) |
|
|
347
|
+
| `ENABLE_THINKING` | 否 | `false` | 是否启用思考模式(GLM-4.5V 和 Qwen3-VL-Flash) |
|
|
297
348
|
|
|
298
349
|
### 智谱 GLM-4.5V 专用
|
|
299
350
|
|
|
300
|
-
| 变量名 | 必需
|
|
301
|
-
|
|
302
|
-
| `ZHIPU_API_KEY`
|
|
351
|
+
| 变量名 | 必需 | 默认值 | 说明 |
|
|
352
|
+
|-----------------|----------------|--------|---------------------|
|
|
353
|
+
| `ZHIPU_API_KEY` | 是(使用智谱时) | - | 智谱 AI 的 API 密钥 |
|
|
303
354
|
|
|
304
355
|
默认模型:`glm-4.5v`
|
|
305
356
|
|
|
306
357
|
### 硅基流动 DeepSeek-OCR 专用
|
|
307
358
|
|
|
308
|
-
| 变量名 | 必需
|
|
309
|
-
|
|
310
|
-
| `SILICONFLOW_API_KEY`
|
|
359
|
+
| 变量名 | 必需 | 默认值 | 说明 |
|
|
360
|
+
|-----------------------|--------------------|--------|---------------------|
|
|
361
|
+
| `SILICONFLOW_API_KEY` | 是(使用硅基流动时) | - | 硅基流动的 API 密钥 |
|
|
311
362
|
|
|
312
363
|
默认模型:`deepseek-ai/DeepSeek-OCR`
|
|
313
364
|
|
|
365
|
+
### 阿里云通义千问 Qwen3-VL-Flash 专用
|
|
366
|
+
|
|
367
|
+
| 变量名 | 必需 | 默认值 | 说明 |
|
|
368
|
+
|-------------------|------------------|--------|------------------------|
|
|
369
|
+
| `DASHSCOPE_API_KEY` | 是(使用千问时) | - | 阿里云百炼的 API 密钥 |
|
|
370
|
+
|
|
371
|
+
默认模型:`qwen3-vl-flash`
|
|
372
|
+
|
|
314
373
|
**思考模式说明**:
|
|
315
374
|
- 默认开启,提高图片分析的准确性和详细程度
|
|
316
375
|
- 如需关闭(提高速度、降低成本),请在配置文件中设置:
|
|
@@ -384,6 +443,12 @@ luma-mcp/
|
|
|
384
443
|
3. 进入 API 管理创建 API Key
|
|
385
444
|
4. 复制 API Key 到配置文件
|
|
386
445
|
|
|
446
|
+
**阿里云通义千问 Qwen3-VL-Flash**:
|
|
447
|
+
1. 访问 [阿里云百炼平台](https://bailian.console.aliyun.com/)
|
|
448
|
+
2. 注册/登录账号
|
|
449
|
+
3. 进入 API-KEY 管理创建 API Key
|
|
450
|
+
4. 复制 API Key 到配置文件
|
|
451
|
+
|
|
387
452
|
### 支持哪些图片格式?
|
|
388
453
|
|
|
389
454
|
支持 JPG、PNG、WebP、GIF 格式。建议使用 JPG 格式以获得更好的压缩率。
|
|
@@ -413,7 +478,7 @@ data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
|
|
|
413
478
|
### API 调用失败怎么办?
|
|
414
479
|
|
|
415
480
|
1. 检查 API Key 是否正确
|
|
416
|
-
2.
|
|
481
|
+
2. 确认账户余额充足(智谱/阿里云)
|
|
417
482
|
3. 检查网络连接
|
|
418
483
|
4. 查看日志文件了解详细错误信息
|
|
419
484
|
|
|
@@ -423,25 +488,29 @@ data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA...
|
|
|
423
488
|
|
|
424
489
|
**智谱 GLM-4.5V**: 定价请参考[智谱官方定价](https://open.bigmodel.cn/pricing)。
|
|
425
490
|
|
|
426
|
-
|
|
491
|
+
**阿里云通义千问 Qwen3-VL-Flash**: 定价请参考[阿里云百炼定价](https://help.aliyun.com/zh/model-studio/getting-started/models)。
|
|
492
|
+
|
|
493
|
+
典型场景估算:
|
|
427
494
|
- 简单图片理解: 500-1000 tokens
|
|
428
495
|
- 代码截图分析: 1500-2500 tokens
|
|
429
496
|
- 详细 UI 分析: 2000-3000 tokens
|
|
430
497
|
|
|
431
|
-
|
|
498
|
+
启用思考模式(GLM-4.5V/Qwen3-VL-Flash)会增加约 20-30% tokens。
|
|
432
499
|
|
|
433
500
|
### 如何选择模型?
|
|
434
501
|
|
|
435
|
-
| 特性
|
|
436
|
-
|
|
437
|
-
| **费用**
|
|
438
|
-
| **中文理解** | 优秀 | 良好 |
|
|
439
|
-
| **OCR 能力** | 良好 | **优秀** |
|
|
440
|
-
| **思考模式** | 支持 | 不支持 |
|
|
441
|
-
|
|
|
502
|
+
| 特性 | GLM-4.5V(智谱) | DeepSeek-OCR(硅基流动) | Qwen3-VL-Flash(阿里云) |
|
|
503
|
+
|--------------|----------------|------------------------|------------------------|
|
|
504
|
+
| **费用** | 收费 | **完全免费** | 收费 |
|
|
505
|
+
| **中文理解** | 优秀 | 良好 | **优秀** |
|
|
506
|
+
| **OCR 能力** | 良好 | **优秀** | 优秀 |
|
|
507
|
+
| **思考模式** | 支持 | 不支持 | 支持 |
|
|
508
|
+
| **速度/成本** | 中等 | 免费 | **快速/低成本** |
|
|
509
|
+
| **适用场景** | 通用图片分析 | OCR、文字识别 | 快速分析、3D定位 |
|
|
442
510
|
|
|
443
511
|
**推荐**:
|
|
444
512
|
- 需要 OCR 或文字识别:选择 **DeepSeek-OCR**(免费)
|
|
513
|
+
- 需要快速低成本分析:选择 **Qwen3-VL-Flash**
|
|
445
514
|
- 需要深度图片理解:选择 **GLM-4.5V**
|
|
446
515
|
|
|
447
516
|
## 贡献
|
|
@@ -458,50 +527,12 @@ MIT License
|
|
|
458
527
|
- [GLM-4.5V 文档](https://docs.bigmodel.cn/cn/guide/models/vlm/glm-4.5v)
|
|
459
528
|
- [硅基流动平台](https://cloud.siliconflow.cn/)
|
|
460
529
|
- [DeepSeek-OCR 文档](https://docs.siliconflow.cn/cn/api-reference/chat-completions/chat-completions)
|
|
530
|
+
- [阿里云百炼平台](https://bailian.console.aliyun.com/)
|
|
531
|
+
- [Qwen3-VL 文档](https://help.aliyun.com/zh/model-studio/getting-started/models)
|
|
461
532
|
- [MCP 协议文档](https://modelcontextprotocol.io/)
|
|
462
533
|
|
|
463
534
|
## 更新日志
|
|
464
535
|
|
|
465
|
-
### [1.1.1] - 2025-11-13
|
|
466
|
-
|
|
467
|
-
#### 新增
|
|
468
|
-
- 🖼️ **Data URI 支持**: 支持接收 base64 编码的图片数据(`data:image/png;base64,...`)
|
|
469
|
-
- 🚀 **为未来做准备**: 当 MCP 客户端支持时,可直接传递用户粘贴的图片
|
|
470
|
-
|
|
471
|
-
#### 修改
|
|
472
|
-
- 更新工具描述,说明支持三种输入格式:本地路径、URL、Data URI
|
|
473
|
-
- 新增 Data URI 格式验证(MIME 类型、大小限制)
|
|
474
|
-
|
|
475
|
-
### [1.1.0] - 2025-11-13
|
|
476
|
-
|
|
477
|
-
#### 新增
|
|
478
|
-
- 🎉 **多模型支持**: 新增硅基流动 DeepSeek-OCR 支持
|
|
479
|
-
- 🆓 **免费选项**: DeepSeek-OCR 通过硅基流动提供完全免费的 OCR 服务
|
|
480
|
-
- 📐 **统一接口**: 创建 VisionClient 接口,支持灵活扩展更多视觉模型
|
|
481
|
-
- ⚙️ **灵活配置**: 通过 `MODEL_PROVIDER` 环境变量轻松切换模型
|
|
482
|
-
|
|
483
|
-
#### 修改
|
|
484
|
-
- 🔧 环境变量命名优化,支持通用配置(`MODEL_NAME`、`MAX_TOKENS` 等)
|
|
485
|
-
- 📝 更新文档,提供双模型配置说明和选择建议
|
|
486
|
-
- 🏭️ 重构代码结构,提升可维护性
|
|
487
|
-
|
|
488
|
-
#### 技术细节
|
|
489
|
-
- 新增文件:
|
|
490
|
-
- `src/vision-client.ts` - 视觉模型客户端统一接口
|
|
491
|
-
- `src/siliconflow-client.ts` - 硅基流动 API 客户端实现
|
|
492
|
-
- `.env.example` - 配置示例文件
|
|
493
|
-
- 修改文件:
|
|
494
|
-
- `src/config.ts` - 支持多提供商配置
|
|
495
|
-
- `src/zhipu-client.ts` - 实现 VisionClient 接口
|
|
496
|
-
- `src/index.ts` - 根据配置动态选择客户端
|
|
497
|
-
|
|
498
|
-
### [1.0.3] - 2025-11-12
|
|
499
|
-
|
|
500
|
-
- 基于智谱 GLM-4.5V 的视觉理解能力
|
|
501
|
-
- 支持本地文件和远程 URL
|
|
502
|
-
- 内置重试机制
|
|
503
|
-
- 思考模式支持
|
|
504
|
-
|
|
505
536
|
更多更新历史请查看 [CHANGELOG.md](./CHANGELOG.md)
|
|
506
537
|
|
|
507
538
|
## 作者
|
package/build/config.d.ts
CHANGED
package/build/config.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,MAAM,aAAa,GAAG,OAAO,GAAG,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,MAAM,aAAa,GAAG,OAAO,GAAG,aAAa,GAAG,MAAM,CAAC;AAE7D,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,aAAa,CAAC;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,EAAE,OAAO,CAAC;CACzB;AAED;;GAEG;AACH,wBAAgB,UAAU,IAAI,UAAU,CAwCvC"}
|
package/build/config.js
CHANGED
|
@@ -18,6 +18,13 @@ export function loadConfig() {
|
|
|
18
18
|
throw new Error('SILICONFLOW_API_KEY environment variable is required when using SiliconFlow provider');
|
|
19
19
|
}
|
|
20
20
|
}
|
|
21
|
+
else if (provider === 'qwen') {
|
|
22
|
+
apiKey = process.env.DASHSCOPE_API_KEY;
|
|
23
|
+
defaultModel = 'qwen3-vl-flash';
|
|
24
|
+
if (!apiKey) {
|
|
25
|
+
throw new Error('DASHSCOPE_API_KEY environment variable is required when using Qwen provider');
|
|
26
|
+
}
|
|
27
|
+
}
|
|
21
28
|
else {
|
|
22
29
|
apiKey = process.env.ZHIPU_API_KEY;
|
|
23
30
|
defaultModel = 'glm-4.5v';
|
package/build/config.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAcH;;GAEG;AACH,MAAM,UAAU,UAAU;IACxB,aAAa;IACb,MAAM,QAAQ,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,EAAE,WAAW,EAAE,IAAI,OAAO,CAAkB,CAAC;IAEzF,kBAAkB;IAClB,IAAI,MAA0B,CAAC;IAC/B,IAAI,YAAoB,CAAC;IAEzB,IAAI,QAAQ,KAAK,aAAa,EAAE,CAAC;QAC/B,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC;QACzC,YAAY,GAAG,0BAA0B,CAAC;QAE1C,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,sFAAsF,CAAC,CAAC;QAC1G,CAAC;IACH,CAAC;SAAM,CAAC;QACN,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC;QACnC,YAAY,GAAG,UAAU,CAAC;QAE1B,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,0EAA0E,CAAC,CAAC;QAC9F,CAAC;IACH,CAAC;IAED,OAAO;QACL,QAAQ;QACR,MAAM;QACN,KAAK,EAAE,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,YAAY;QAC7C,SAAS,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,MAAM,EAAE,EAAE,CAAC;QACzD,WAAW,EAAE,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,KAAK,CAAC;QACzD,IAAI,EAAE,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,KAAK,CAAC;QAC5C,cAAc,EAAE,OAAO,CAAC,GAAG,CAAC,eAAe,KAAK,MAAM;KACvD,CAAC;AACJ,CAAC"}
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAcH;;GAEG;AACH,MAAM,UAAU,UAAU;IACxB,aAAa;IACb,MAAM,QAAQ,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,EAAE,WAAW,EAAE,IAAI,OAAO,CAAkB,CAAC;IAEzF,kBAAkB;IAClB,IAAI,MAA0B,CAAC;IAC/B,IAAI,YAAoB,CAAC;IAEzB,IAAI,QAAQ,KAAK,aAAa,EAAE,CAAC;QAC/B,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC;QACzC,YAAY,GAAG,0BAA0B,CAAC;QAE1C,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,sFAAsF,CAAC,CAAC;QAC1G,CAAC;IACH,CAAC;SAAM,IAAI,QAAQ,KAAK,MAAM,EAAE,CAAC;QAC/B,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC;QACvC,YAAY,GAAG,gBAAgB,CAAC;QAEhC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,6EAA6E,CAAC,CAAC;QACjG,CAAC;IACH,CAAC;SAAM,CAAC;QACN,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC;QACnC,YAAY,GAAG,UAAU,CAAC;QAE1B,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,0EAA0E,CAAC,CAAC;QAC9F,CAAC;IACH,CAAC;IAED,OAAO;QACL,QAAQ;QACR,MAAM;QACN,KAAK,EAAE,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,YAAY;QAC7C,SAAS,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,MAAM,EAAE,EAAE,CAAC;QACzD,WAAW,EAAE,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,KAAK,CAAC;QACzD,IAAI,EAAE,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,KAAK,CAAC;QAC5C,cAAc,EAAE,OAAO,CAAC,GAAG,CAAC,eAAe,KAAK,MAAM;KACvD,CAAC;AACJ,CAAC"}
|
package/build/index.js
CHANGED
|
@@ -12,6 +12,7 @@ import { z } from 'zod';
|
|
|
12
12
|
import { loadConfig } from './config.js';
|
|
13
13
|
import { ZhipuClient } from './zhipu-client.js';
|
|
14
14
|
import { SiliconFlowClient } from './siliconflow-client.js';
|
|
15
|
+
import { QwenClient } from './qwen-client.js';
|
|
15
16
|
import { imageToBase64, validateImageSource } from './image-processor.js';
|
|
16
17
|
import { buildAnalysisPrompt } from './prompts.js';
|
|
17
18
|
import { withRetry, createSuccessResponse, createErrorResponse } from './utils/helpers.js';
|
|
@@ -23,9 +24,16 @@ async function createServer() {
|
|
|
23
24
|
// 加载配置
|
|
24
25
|
const config = loadConfig();
|
|
25
26
|
// 根据配置选择模型客户端
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
27
|
+
let visionClient;
|
|
28
|
+
if (config.provider === 'siliconflow') {
|
|
29
|
+
visionClient = new SiliconFlowClient(config.apiKey, config.model, config.maxTokens, config.temperature);
|
|
30
|
+
}
|
|
31
|
+
else if (config.provider === 'qwen') {
|
|
32
|
+
visionClient = new QwenClient(config.apiKey, config.model, config.maxTokens, config.temperature);
|
|
33
|
+
}
|
|
34
|
+
else {
|
|
35
|
+
visionClient = new ZhipuClient(config.apiKey, config.model, config.maxTokens, config.temperature, config.topP);
|
|
36
|
+
}
|
|
29
37
|
logger.info('Vision client initialized', {
|
|
30
38
|
provider: config.provider,
|
|
31
39
|
model: visionClient.getModelName()
|
|
@@ -47,16 +55,20 @@ async function createServer() {
|
|
|
47
55
|
const imageDataUrl = await imageToBase64(imageSource);
|
|
48
56
|
// 3. 构建提示词
|
|
49
57
|
// DeepSeek-OCR 需要简洁的 prompt,不支持复杂格式化
|
|
58
|
+
// Qwen/GLM 使用结构化 prompt
|
|
50
59
|
const fullPrompt = config.provider === 'siliconflow'
|
|
51
60
|
? prompt // DeepSeek-OCR: 直接使用原始 prompt
|
|
52
|
-
: buildAnalysisPrompt(prompt); // GLM
|
|
61
|
+
: buildAnalysisPrompt(prompt); // Qwen/GLM: 使用结构化 prompt
|
|
53
62
|
// 4. 调用视觉模型分析图片
|
|
54
63
|
return await visionClient.analyzeImage(imageDataUrl, fullPrompt);
|
|
55
64
|
}, 2, // 最多重试2次
|
|
56
65
|
1000 // 初始延补1秒
|
|
57
66
|
);
|
|
58
67
|
// 注册工具 - 使用 McpServer.tool() API
|
|
59
|
-
server.tool('analyze_image',
|
|
68
|
+
server.tool('analyze_image', `使用视觉模型分析图片内容。支持三个视觉模型:
|
|
69
|
+
- GLM-4.5V(智谱清言)- 付费,中文理解优秀
|
|
70
|
+
- DeepSeek-OCR(硅基流动)- 免费,OCR能力强
|
|
71
|
+
- Qwen3-VL-Flash(阿里云通义千问)- 付费,速度快成本低,支持思考模式
|
|
60
72
|
|
|
61
73
|
**何时自动调用此工具**:
|
|
62
74
|
1. 用户提供了图片文件路径(包括临时路径、相对路径、绝对路径)
|
package/build/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA;;;GAGG;AAEH,wCAAwC;AACxC,OAAO,EAAE,uBAAuB,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AACpE,uBAAuB,EAAE,CAAC;AAE1B,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAC1E,OAAO,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAC;AACnD,OAAO,EAAE,SAAS,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAE3F;;GAEG;AACH,KAAK,UAAU,YAAY;IACzB,MAAM,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;IAE5C,OAAO;IACP,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;IAE5B,cAAc;IACd,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA;;;GAGG;AAEH,wCAAwC;AACxC,OAAO,EAAE,uBAAuB,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AACpE,uBAAuB,EAAE,CAAC;AAE1B,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAC5D,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAC9C,OAAO,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAC1E,OAAO,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAC;AACnD,OAAO,EAAE,SAAS,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAE3F;;GAEG;AACH,KAAK,UAAU,YAAY;IACzB,MAAM,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;IAE5C,OAAO;IACP,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;IAE5B,cAAc;IACd,IAAI,YAA0B,CAAC;IAE/B,IAAI,MAAM,CAAC,QAAQ,KAAK,aAAa,EAAE,CAAC;QACtC,YAAY,GAAG,IAAI,iBAAiB,CAClC,MAAM,CAAC,MAAM,EACb,MAAM,CAAC,KAAK,EACZ,MAAM,CAAC,SAAS,EAChB,MAAM,CAAC,WAAW,CACnB,CAAC;IACJ,CAAC;SAAM,IAAI,MAAM,CAAC,QAAQ,KAAK,MAAM,EAAE,CAAC;QACtC,YAAY,GAAG,IAAI,UAAU,CAC3B,MAAM,CAAC,MAAM,EACb,MAAM,CAAC,KAAK,EACZ,MAAM,CAAC,SAAS,EAChB,MAAM,CAAC,WAAW,CACnB,CAAC;IACJ,CAAC;SAAM,CAAC;QACN,YAAY,GAAG,IAAI,WAAW,CAC5B,MAAM,CAAC,MAAM,EACb,MAAM,CAAC,KAAK,EACZ,MAAM,CAAC,SAAS,EAChB,MAAM,CAAC,WAAW,EAClB,MAAM,CAAC,IAAI,CACZ,CAAC;IACJ,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,2BAA2B,EAAE;QACvC,QAAQ,EAAE,MAAM,CAAC,QAAQ;QACzB,KAAK,EAAE,YAAY,CAAC,YAAY,EAAE;KACnC,CAAC,CAAC;IAEH,uBAAuB;IACvB,MAAM,MAAM,GAAG,IAAI,SAAS,CAC1B;QACE,IAAI,EAAE,UAAU;QAChB,OAAO,EAAE,OAAO;KACjB,EACD;QACE,YAAY,EAAE;YACZ,KAAK,EAAE,EAAE;SACV;KACF,CACF,CAAC;IAEF,aAAa;IACb,MAAM,gBAAgB,GAAG,SAAS,CAChC,KAAK,EAAE,WAAmB,EAAE,MAAc,EAAE,EAAE;QAC5C,YAAY;QACZ,MAAM,mBAAmB,CAAC,WAAW,CAAC,CAAC;QAEvC,oBAAoB;QACpB,MAAM,YAAY,GAAG,MAAM,aAAa,CAAC,WAAW,CAAC,CAAC;QAEtD,WAAW;QACX,qCAAqC;QACrC,wBAAwB;QACxB,MAAM,UAAU,GAAG,MAAM,CAAC,QAAQ,KAAK,aAAa;YAClD,CAAC,CAAC,MAAM,CAAE,8BAA8B;YACxC,CAAC,CAAC,mBAAmB,CAAC,MAAM,CAAC,CAAC,CAAE,yBAAyB;QAE3D,gBAAgB;QAChB,OAAO,MAAM,YAAY,CAAC,YAAY,CAAC,YAAY,EAAE,UAAU,CAAC,CAAC;IACnE,CAAC,EACD,CAAC,EAAE,SAAS;IACZ,IAAI,CAAC,SAAS;KACf,CAAC;IAEF,iCAAiC;IACjC,MAAM,CAAC,IAAI,CACT,eAAe,EACf;;;;;;;;;;;;;;;;uCAgBmC,EACnC;QACE,YAAY,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,kPAAkP,CAAC;QACrR,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,8HAA8H,CAAC;KAC5J,EACD,KAAK,EAAE,MAAM,EAAE,EAAE;QACf,IAAI,CAAC;YACH,MAAM,CAAC,IAAI,CAAC,iBAAiB,EAAE;gBAC7B,MAAM,EAAE,MAAM,CAAC,YAAY;gBAC3B,MAAM,EAAE,MAAM,CAAC,MAAM;aACtB,CAAC,CAAC;YAEH,YAAY;YACZ,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;YAE1E,MAAM,CAAC,IAAI,CAAC,uCAAuC,CAAC,CAAC;YACrD,OAAO,qBAAqB,CAAC,MAAM,CAAC,CAAC;QACvC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,uBAAuB,EAAE;gBACpC,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC,CAAC;YAEH,OAAO,mBAAmB,CACxB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CACzD,CAAC;QACJ,CAAC;IACH,CAAC,CACF,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,IAAI;IACjB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,YAAY,EAAE,CAAC;QACpC,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;QAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;QAEhC,MAAM,CAAC,IAAI,CAAC,+CAA+C,CAAC,CAAC;IAC/D,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,KAAK,CAAC,iCAAiC,EAAE;YAC9C,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;SAC9D,CAAC,CAAC;QACH,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,SAAS;AACT,OAAO,CAAC,EAAE,CAAC,mBAAmB,EAAE,CAAC,KAAK,EAAE,EAAE;IACxC,MAAM,CAAC,KAAK,CAAC,oBAAoB,EAAE,EAAE,KAAK,EAAE,KAAK,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;IACjF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC;AAEH,OAAO,CAAC,EAAE,CAAC,oBAAoB,EAAE,CAAC,MAAM,EAAE,EAAE;IAC1C,MAAM,CAAC,KAAK,CAAC,qBAAqB,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC;IAChD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC;AAEH,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,GAAG,EAAE;IACxB,MAAM,CAAC,IAAI,CAAC,2CAA2C,CAAC,CAAC;IACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC;AAEH,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,GAAG,EAAE;IACzB,MAAM,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;IAC1D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC;AAEH,IAAI,EAAE,CAAC"}
|
package/build/prompts.d.ts
CHANGED
package/build/prompts.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"prompts.d.ts","sourceRoot":"","sources":["../src/prompts.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"prompts.d.ts","sourceRoot":"","sources":["../src/prompts.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,MAAM,CAkC7D"}
|
package/build/prompts.js
CHANGED
package/build/prompts.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"prompts.js","sourceRoot":"","sources":["../src/prompts.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"prompts.js","sourceRoot":"","sources":["../src/prompts.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,QAAiB;IACnD,IAAI,QAAQ,EAAE,CAAC;QACb,gBAAgB;QAChB,OAAO;;;;EAIT,QAAQ;;;;;;;;;;;CAWT,CAAC,IAAI,EAAE,CAAC;IACP,CAAC;SAAM,CAAC;QACN,aAAa;QACb,OAAO;;;;;;;;;;;CAWV,CAAC,IAAI,EAAE,CAAC;IACP,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 阿里云通义千问VL客户端
|
|
3
|
+
* 使用 OpenAI 兼容接口
|
|
4
|
+
* API 文档: https://help.aliyun.com/zh/model-studio/vision
|
|
5
|
+
*/
|
|
6
|
+
import { VisionClient } from './vision-client.js';
|
|
7
|
+
export declare class QwenClient implements VisionClient {
|
|
8
|
+
private client;
|
|
9
|
+
private apiKey;
|
|
10
|
+
private model;
|
|
11
|
+
private maxTokens;
|
|
12
|
+
private temperature;
|
|
13
|
+
constructor(apiKey: string, model?: string, maxTokens?: number, temperature?: number);
|
|
14
|
+
analyzeImage(imageDataUrl: string, prompt: string, enableThinking?: boolean): Promise<string>;
|
|
15
|
+
getModelName(): string;
|
|
16
|
+
}
|
|
17
|
+
//# sourceMappingURL=qwen-client.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"qwen-client.d.ts","sourceRoot":"","sources":["../src/qwen-client.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAGlD,qBAAa,UAAW,YAAW,YAAY;IAC7C,OAAO,CAAC,MAAM,CAAgB;IAC9B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,WAAW,CAAS;gBAEhB,MAAM,EAAE,MAAM,EAAE,KAAK,GAAE,MAAyB,EAAE,SAAS,GAAE,MAAa,EAAE,WAAW,GAAE,MAAY;IAiB3G,YAAY,CAAC,YAAY,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC;IAoDnG,YAAY,IAAI,MAAM;CAGvB"}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 阿里云通义千问VL客户端
|
|
3
|
+
* 使用 OpenAI 兼容接口
|
|
4
|
+
* API 文档: https://help.aliyun.com/zh/model-studio/vision
|
|
5
|
+
*/
|
|
6
|
+
import axios from 'axios';
|
|
7
|
+
import { buildAnalysisPrompt } from './prompts.js';
|
|
8
|
+
export class QwenClient {
|
|
9
|
+
client;
|
|
10
|
+
apiKey;
|
|
11
|
+
model;
|
|
12
|
+
maxTokens;
|
|
13
|
+
temperature;
|
|
14
|
+
constructor(apiKey, model = 'qwen3-vl-flash', maxTokens = 4096, temperature = 0.7) {
|
|
15
|
+
this.apiKey = apiKey;
|
|
16
|
+
this.model = model;
|
|
17
|
+
this.maxTokens = maxTokens;
|
|
18
|
+
this.temperature = temperature;
|
|
19
|
+
// 使用阿里云百炼的 OpenAI 兼容接口
|
|
20
|
+
this.client = axios.create({
|
|
21
|
+
baseURL: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
|
|
22
|
+
headers: {
|
|
23
|
+
'Authorization': `Bearer ${apiKey}`,
|
|
24
|
+
'Content-Type': 'application/json',
|
|
25
|
+
},
|
|
26
|
+
timeout: 180000, // 180秒超时
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
async analyzeImage(imageDataUrl, prompt, enableThinking) {
|
|
30
|
+
try {
|
|
31
|
+
// Qwen3-VL 支持思考模式,使用 extra_body 传递非标准参数
|
|
32
|
+
const requestBody = {
|
|
33
|
+
model: this.model,
|
|
34
|
+
messages: [
|
|
35
|
+
{
|
|
36
|
+
role: 'user',
|
|
37
|
+
content: [
|
|
38
|
+
{
|
|
39
|
+
type: 'image_url',
|
|
40
|
+
image_url: {
|
|
41
|
+
url: imageDataUrl
|
|
42
|
+
}
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
type: 'text',
|
|
46
|
+
text: buildAnalysisPrompt(prompt)
|
|
47
|
+
}
|
|
48
|
+
]
|
|
49
|
+
}
|
|
50
|
+
],
|
|
51
|
+
max_tokens: this.maxTokens,
|
|
52
|
+
temperature: this.temperature,
|
|
53
|
+
stream: false
|
|
54
|
+
};
|
|
55
|
+
// 如果启用思考模式,添加 extra_body 参数
|
|
56
|
+
if (enableThinking) {
|
|
57
|
+
requestBody.extra_body = {
|
|
58
|
+
enable_thinking: true,
|
|
59
|
+
thinking_budget: 81920 // 最大思考 Token 数
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
const response = await this.client.post('/chat/completions', requestBody);
|
|
63
|
+
if (!response.data?.choices?.[0]?.message?.content) {
|
|
64
|
+
throw new Error('Invalid response format from Qwen API');
|
|
65
|
+
}
|
|
66
|
+
return response.data.choices[0].message.content;
|
|
67
|
+
}
|
|
68
|
+
catch (error) {
|
|
69
|
+
if (axios.isAxiosError(error)) {
|
|
70
|
+
const errorMessage = error.response?.data?.error?.message || error.message;
|
|
71
|
+
throw new Error(`Qwen API error: ${errorMessage}`);
|
|
72
|
+
}
|
|
73
|
+
throw error;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
getModelName() {
|
|
77
|
+
return `Qwen (${this.model})`;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
//# sourceMappingURL=qwen-client.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"qwen-client.js","sourceRoot":"","sources":["../src/qwen-client.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAwB,MAAM,OAAO,CAAC;AAE7C,OAAO,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAC;AAEnD,MAAM,OAAO,UAAU;IACb,MAAM,CAAgB;IACtB,MAAM,CAAS;IACf,KAAK,CAAS;IACd,SAAS,CAAS;IAClB,WAAW,CAAS;IAE5B,YAAY,MAAc,EAAE,QAAgB,gBAAgB,EAAE,YAAoB,IAAI,EAAE,cAAsB,GAAG;QAC/G,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAE/B,uBAAuB;QACvB,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;YACzB,OAAO,EAAE,mDAAmD;YAC5D,OAAO,EAAE;gBACP,eAAe,EAAE,UAAU,MAAM,EAAE;gBACnC,cAAc,EAAE,kBAAkB;aACnC;YACD,OAAO,EAAE,MAAM,EAAE,SAAS;SAC3B,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,YAAY,CAAC,YAAoB,EAAE,MAAc,EAAE,cAAwB;QAC/E,IAAI,CAAC;YACH,wCAAwC;YACxC,MAAM,WAAW,GAAQ;gBACvB,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,QAAQ,EAAE;oBACR;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE;4BACP;gCACE,IAAI,EAAE,WAAW;gCACjB,SAAS,EAAE;oCACT,GAAG,EAAE,YAAY;iCAClB;6BACF;4BACD;gCACE,IAAI,EAAE,MAAM;gCACZ,IAAI,EAAE,mBAAmB,CAAC,MAAM,CAAC;6BAClC;yBACF;qBACF;iBACF;gBACD,UAAU,EAAE,IAAI,CAAC,SAAS;gBAC1B,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,MAAM,EAAE,KAAK;aACd,CAAC;YAEF,4BAA4B;YAC5B,IAAI,cAAc,EAAE,CAAC;gBACnB,WAAW,CAAC,UAAU,GAAG;oBACvB,eAAe,EAAE,IAAI;oBACrB,eAAe,EAAE,KAAK,CAAE,eAAe;iBACxC,CAAC;YACJ,CAAC;YAED,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,mBAAmB,EAAE,WAAW,CAAC,CAAC;YAE1E,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC;gBACnD,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;YAC3D,CAAC;YAED,OAAO,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC;QAElD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,KAAK,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC9B,MAAM,YAAY,GAAG,KAAK,CAAC,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC;gBAC3E,MAAM,IAAI,KAAK,CAAC,mBAAmB,YAAY,EAAE,CAAC,CAAC;YACrD,CAAC;YACD,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED,YAAY;QACV,OAAO,SAAS,IAAI,CAAC,KAAK,GAAG,CAAC;IAChC,CAAC;CACF"}
|
|
@@ -2,15 +2,17 @@
|
|
|
2
2
|
* 硅基流动 DeepSeek-OCR API 客户端
|
|
3
3
|
* 基于 OpenAI 兼容 API
|
|
4
4
|
*/
|
|
5
|
-
import type { LumaConfig } from './config.js';
|
|
6
5
|
import type { VisionClient } from './vision-client.js';
|
|
7
6
|
/**
|
|
8
7
|
* 硅基流动 API 客户端
|
|
9
8
|
*/
|
|
10
9
|
export declare class SiliconFlowClient implements VisionClient {
|
|
11
|
-
private
|
|
10
|
+
private apiKey;
|
|
11
|
+
private model;
|
|
12
|
+
private maxTokens;
|
|
13
|
+
private temperature;
|
|
12
14
|
private apiEndpoint;
|
|
13
|
-
constructor(
|
|
15
|
+
constructor(apiKey: string, model?: string, maxTokens?: number, temperature?: number);
|
|
14
16
|
/**
|
|
15
17
|
* 分析图片
|
|
16
18
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"siliconflow-client.d.ts","sourceRoot":"","sources":["../src/siliconflow-client.ts"],"names":[],"mappings":"AAAA;;;GAGG;
|
|
1
|
+
{"version":3,"file":"siliconflow-client.d.ts","sourceRoot":"","sources":["../src/siliconflow-client.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AA2CvD;;GAEG;AACH,qBAAa,iBAAkB,YAAW,YAAY;IACpD,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,WAAW,CAAoD;gBAE3D,MAAM,EAAE,MAAM,EAAE,KAAK,GAAE,MAAmC,EAAE,SAAS,GAAE,MAAa,EAAE,WAAW,GAAE,MAAY;IAO3H;;OAEG;IACG,YAAY,CAAC,YAAY,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC;IAqEnG;;OAEG;IACH,YAAY,IAAI,MAAM;CAGvB"}
|
|
@@ -8,17 +8,23 @@ import { logger } from './utils/logger.js';
|
|
|
8
8
|
* 硅基流动 API 客户端
|
|
9
9
|
*/
|
|
10
10
|
export class SiliconFlowClient {
|
|
11
|
-
|
|
11
|
+
apiKey;
|
|
12
|
+
model;
|
|
13
|
+
maxTokens;
|
|
14
|
+
temperature;
|
|
12
15
|
apiEndpoint = 'https://api.siliconflow.cn/v1/chat/completions';
|
|
13
|
-
constructor(
|
|
14
|
-
this.
|
|
16
|
+
constructor(apiKey, model = 'deepseek-ai/DeepSeek-OCR', maxTokens = 4096, temperature = 0.7) {
|
|
17
|
+
this.apiKey = apiKey;
|
|
18
|
+
this.model = model;
|
|
19
|
+
this.maxTokens = maxTokens;
|
|
20
|
+
this.temperature = temperature;
|
|
15
21
|
}
|
|
16
22
|
/**
|
|
17
23
|
* 分析图片
|
|
18
24
|
*/
|
|
19
25
|
async analyzeImage(imageDataUrl, prompt, enableThinking) {
|
|
20
26
|
const requestBody = {
|
|
21
|
-
model: this.
|
|
27
|
+
model: this.model,
|
|
22
28
|
messages: [
|
|
23
29
|
{
|
|
24
30
|
role: 'user',
|
|
@@ -36,18 +42,17 @@ export class SiliconFlowClient {
|
|
|
36
42
|
],
|
|
37
43
|
},
|
|
38
44
|
],
|
|
39
|
-
temperature: this.
|
|
40
|
-
max_tokens: this.
|
|
41
|
-
top_p: this.config.topP,
|
|
45
|
+
temperature: this.temperature,
|
|
46
|
+
max_tokens: this.maxTokens,
|
|
42
47
|
stream: false,
|
|
43
48
|
};
|
|
44
49
|
logger.info('Calling SiliconFlow DeepSeek-OCR API', {
|
|
45
|
-
model: this.
|
|
50
|
+
model: this.model,
|
|
46
51
|
});
|
|
47
52
|
try {
|
|
48
53
|
const response = await axios.post(this.apiEndpoint, requestBody, {
|
|
49
54
|
headers: {
|
|
50
|
-
'Authorization': `Bearer ${this.
|
|
55
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
51
56
|
'Content-Type': 'application/json',
|
|
52
57
|
},
|
|
53
58
|
timeout: 60000, // 60秒超时
|
|
@@ -79,7 +84,7 @@ export class SiliconFlowClient {
|
|
|
79
84
|
* 获取模型名称
|
|
80
85
|
*/
|
|
81
86
|
getModelName() {
|
|
82
|
-
return this.
|
|
87
|
+
return `DeepSeek (${this.model})`;
|
|
83
88
|
}
|
|
84
89
|
}
|
|
85
90
|
//# sourceMappingURL=siliconflow-client.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"siliconflow-client.js","sourceRoot":"","sources":["../src/siliconflow-client.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,MAAM,OAAO,CAAC;AAG1B,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AA0C3C;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACpB,MAAM,
|
|
1
|
+
{"version":3,"file":"siliconflow-client.js","sourceRoot":"","sources":["../src/siliconflow-client.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,MAAM,OAAO,CAAC;AAG1B,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AA0C3C;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACpB,MAAM,CAAS;IACf,KAAK,CAAS;IACd,SAAS,CAAS;IAClB,WAAW,CAAS;IACpB,WAAW,GAAG,gDAAgD,CAAC;IAEvE,YAAY,MAAc,EAAE,QAAgB,0BAA0B,EAAE,YAAoB,IAAI,EAAE,cAAsB,GAAG;QACzH,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;IACjC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,YAAY,CAAC,YAAoB,EAAE,MAAc,EAAE,cAAwB;QAC/E,MAAM,WAAW,GAAuB;YACtC,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,QAAQ,EAAE;gBACR;oBACE,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE;wBACP;4BACE,IAAI,EAAE,WAAW;4BACjB,SAAS,EAAE;gCACT,GAAG,EAAE,YAAY;6BAClB;yBACF;wBACD;4BACE,IAAI,EAAE,MAAM;4BACZ,IAAI,EAAE,MAAM;yBACb;qBACF;iBACF;aACF;YACD,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,UAAU,EAAE,IAAI,CAAC,SAAS;YAC1B,MAAM,EAAE,KAAK;SACd,CAAC;QAEF,MAAM,CAAC,IAAI,CAAC,sCAAsC,EAAE;YAClD,KAAK,EAAE,IAAI,CAAC,KAAK;SAClB,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,IAAI,CAAC,WAAW,EAChB,WAAW,EACX;gBACE,OAAO,EAAE;oBACP,eAAe,EAAE,UAAU,IAAI,CAAC,MAAM,EAAE;oBACxC,cAAc,EAAE,kBAAkB;iBACnC;gBACD,OAAO,EAAE,KAAK,EAAE,QAAQ;aACzB,CACF,CAAC;YAEF,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACjE,MAAM,IAAI,KAAK,CAAC,+BAA+B,CAAC,CAAC;YACnD,CAAC;YAED,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC;YACxD,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC;YAElC,MAAM,CAAC,IAAI,CAAC,iCAAiC,EAAE;gBAC7C,MAAM,EAAE,KAAK,EAAE,YAAY,IAAI,CAAC;gBAChC,KAAK,EAAE,QAAQ,CAAC,IAAI,CAAC,KAAK;aAC3B,CAAC,CAAC;YAEH,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,6BAA6B,EAAE;gBAC1C,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC,CAAC;YAEH,IAAI,KAAK,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC9B,MAAM,OAAO,GAAG,KAAK,CAAC,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC;gBACtE,MAAM,MAAM,GAAG,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC;gBACtC,MAAM,IAAI,KAAK,CAAC,0BAA0B,MAAM,IAAI,SAAS,MAAM,OAAO,EAAE,CAAC,CAAC;YAChF,CAAC;YACD,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO,aAAa,IAAI,CAAC,KAAK,GAAG,CAAC;IACpC,CAAC;CACF"}
|
package/build/zhipu-client.d.ts
CHANGED
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* 智谱 GLM-4.5V API 客户端
|
|
3
3
|
*/
|
|
4
|
-
import type { LumaConfig } from './config.js';
|
|
5
4
|
import type { VisionClient } from './vision-client.js';
|
|
6
5
|
/**
|
|
7
6
|
* 智谱 API 客户端
|
|
8
7
|
*/
|
|
9
8
|
export declare class ZhipuClient implements VisionClient {
|
|
10
|
-
private
|
|
9
|
+
private apiKey;
|
|
10
|
+
private model;
|
|
11
|
+
private maxTokens;
|
|
12
|
+
private temperature;
|
|
13
|
+
private topP;
|
|
11
14
|
private apiEndpoint;
|
|
12
|
-
constructor(
|
|
15
|
+
constructor(apiKey: string, model?: string, maxTokens?: number, temperature?: number, topP?: number);
|
|
13
16
|
/**
|
|
14
17
|
* 分析图片
|
|
15
18
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"zhipu-client.d.ts","sourceRoot":"","sources":["../src/zhipu-client.ts"],"names":[],"mappings":"AAAA;;GAEG;
|
|
1
|
+
{"version":3,"file":"zhipu-client.d.ts","sourceRoot":"","sources":["../src/zhipu-client.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AA4CvD;;GAEG;AACH,qBAAa,WAAY,YAAW,YAAY;IAC9C,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,IAAI,CAAS;IACrB,OAAO,CAAC,WAAW,CAA2D;gBAElE,MAAM,EAAE,MAAM,EAAE,KAAK,GAAE,MAAmB,EAAE,SAAS,GAAE,MAAa,EAAE,WAAW,GAAE,MAAY,EAAE,IAAI,GAAE,MAAY;IAQ/H;;OAEG;IACG,YAAY,CAAC,YAAY,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC;IA4EnG;;OAEG;IACH,YAAY,IAAI,MAAM;CAGvB"}
|
package/build/zhipu-client.js
CHANGED
|
@@ -7,17 +7,25 @@ import { logger } from './utils/logger.js';
|
|
|
7
7
|
* 智谱 API 客户端
|
|
8
8
|
*/
|
|
9
9
|
export class ZhipuClient {
|
|
10
|
-
|
|
10
|
+
apiKey;
|
|
11
|
+
model;
|
|
12
|
+
maxTokens;
|
|
13
|
+
temperature;
|
|
14
|
+
topP;
|
|
11
15
|
apiEndpoint = 'https://open.bigmodel.cn/api/paas/v4/chat/completions';
|
|
12
|
-
constructor(
|
|
13
|
-
this.
|
|
16
|
+
constructor(apiKey, model = 'glm-4.5v', maxTokens = 4096, temperature = 0.7, topP = 0.7) {
|
|
17
|
+
this.apiKey = apiKey;
|
|
18
|
+
this.model = model;
|
|
19
|
+
this.maxTokens = maxTokens;
|
|
20
|
+
this.temperature = temperature;
|
|
21
|
+
this.topP = topP;
|
|
14
22
|
}
|
|
15
23
|
/**
|
|
16
24
|
* 分析图片
|
|
17
25
|
*/
|
|
18
26
|
async analyzeImage(imageDataUrl, prompt, enableThinking) {
|
|
19
27
|
const requestBody = {
|
|
20
|
-
model: this.
|
|
28
|
+
model: this.model,
|
|
21
29
|
messages: [
|
|
22
30
|
{
|
|
23
31
|
role: 'user',
|
|
@@ -35,23 +43,23 @@ export class ZhipuClient {
|
|
|
35
43
|
],
|
|
36
44
|
},
|
|
37
45
|
],
|
|
38
|
-
temperature: this.
|
|
39
|
-
max_tokens: this.
|
|
40
|
-
top_p: this.
|
|
46
|
+
temperature: this.temperature,
|
|
47
|
+
max_tokens: this.maxTokens,
|
|
48
|
+
top_p: this.topP,
|
|
41
49
|
thinking: { type: 'enabled' }, // 默认启用思考模式,提高分析准确性
|
|
42
50
|
};
|
|
43
51
|
// 允许显式禁用 thinking(如需要更快速度)
|
|
44
|
-
if (
|
|
52
|
+
if (enableThinking === false) {
|
|
45
53
|
delete requestBody.thinking;
|
|
46
54
|
}
|
|
47
55
|
logger.info('Calling GLM-4.5V API', {
|
|
48
|
-
model: this.
|
|
56
|
+
model: this.model,
|
|
49
57
|
thinking: !!requestBody.thinking
|
|
50
58
|
});
|
|
51
59
|
try {
|
|
52
60
|
const response = await axios.post(this.apiEndpoint, requestBody, {
|
|
53
61
|
headers: {
|
|
54
|
-
'Authorization': `Bearer ${this.
|
|
62
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
55
63
|
'Content-Type': 'application/json',
|
|
56
64
|
},
|
|
57
65
|
timeout: 60000, // 60秒超时
|
|
@@ -83,7 +91,7 @@ export class ZhipuClient {
|
|
|
83
91
|
* 获取模型名称
|
|
84
92
|
*/
|
|
85
93
|
getModelName() {
|
|
86
|
-
return this.
|
|
94
|
+
return `GLM (${this.model})`;
|
|
87
95
|
}
|
|
88
96
|
}
|
|
89
97
|
//# sourceMappingURL=zhipu-client.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"zhipu-client.js","sourceRoot":"","sources":["../src/zhipu-client.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,MAAM,OAAO,CAAC;AAG1B,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AA2C3C;;GAEG;AACH,MAAM,OAAO,WAAW;IACd,MAAM,
|
|
1
|
+
{"version":3,"file":"zhipu-client.js","sourceRoot":"","sources":["../src/zhipu-client.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,MAAM,OAAO,CAAC;AAG1B,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AA2C3C;;GAEG;AACH,MAAM,OAAO,WAAW;IACd,MAAM,CAAS;IACf,KAAK,CAAS;IACd,SAAS,CAAS;IAClB,WAAW,CAAS;IACpB,IAAI,CAAS;IACb,WAAW,GAAG,uDAAuD,CAAC;IAE9E,YAAY,MAAc,EAAE,QAAgB,UAAU,EAAE,YAAoB,IAAI,EAAE,cAAsB,GAAG,EAAE,OAAe,GAAG;QAC7H,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;IACnB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,YAAY,CAAC,YAAoB,EAAE,MAAc,EAAE,cAAwB;QAC/E,MAAM,WAAW,GAAiB;YAChC,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,QAAQ,EAAE;gBACR;oBACE,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE;wBACP;4BACE,IAAI,EAAE,WAAW;4BACjB,SAAS,EAAE;gCACT,GAAG,EAAE,YAAY;6BAClB;yBACF;wBACD;4BACE,IAAI,EAAE,MAAM;4BACZ,IAAI,EAAE,MAAM;yBACb;qBACF;iBACF;aACF;YACD,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,UAAU,EAAE,IAAI,CAAC,SAAS;YAC1B,KAAK,EAAE,IAAI,CAAC,IAAI;YAChB,QAAQ,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,EAAE,mBAAmB;SACnD,CAAC;QAEF,2BAA2B;QAC3B,IAAI,cAAc,KAAK,KAAK,EAAE,CAAC;YAC7B,OAAO,WAAW,CAAC,QAAQ,CAAC;QAC9B,CAAC;QAED,MAAM,CAAC,IAAI,CAAC,sBAAsB,EAAE;YAClC,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,QAAQ,EAAE,CAAC,CAAC,WAAW,CAAC,QAAQ;SACjC,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,IAAI,CAAC,WAAW,EAChB,WAAW,EACX;gBACE,OAAO,EAAE;oBACP,eAAe,EAAE,UAAU,IAAI,CAAC,MAAM,EAAE;oBACxC,cAAc,EAAE,kBAAkB;iBACnC;gBACD,OAAO,EAAE,KAAK,EAAE,QAAQ;aACzB,CACF,CAAC;YAEF,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACjE,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;YAC/C,CAAC;YAED,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC;YACxD,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC;YAElC,MAAM,CAAC,IAAI,CAAC,8BAA8B,EAAE;gBAC1C,MAAM,EAAE,KAAK,EAAE,YAAY,IAAI,CAAC;gBAChC,KAAK,EAAE,QAAQ,CAAC,IAAI,CAAC,KAAK;aAC3B,CAAC,CAAC;YAEH,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,0BAA0B,EAAE;gBACvC,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC,CAAC;YAEH,IAAI,KAAK,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC9B,MAAM,OAAO,GAAG,KAAK,CAAC,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC;gBACtE,MAAM,MAAM,GAAG,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC;gBACtC,MAAM,IAAI,KAAK,CAAC,uBAAuB,MAAM,IAAI,SAAS,MAAM,OAAO,EAAE,CAAC,CAAC;YAC7E,CAAC;YACD,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO,QAAQ,IAAI,CAAC,KAAK,GAAG,CAAC;IAC/B,CAAC;CACF"}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "luma-mcp",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"description": "Multi-model vision understanding MCP server. Supports GLM-4.5V (Zhipu)
|
|
3
|
+
"version": "1.2.1",
|
|
4
|
+
"description": "Multi-model vision understanding MCP server. Supports GLM-4.5V (Zhipu), DeepSeek-OCR (SiliconFlow - Free), and Qwen3-VL-Flash (Aliyun)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
7
7
|
"luma-mcp": "build/index.js"
|
|
@@ -21,6 +21,9 @@
|
|
|
21
21
|
"zhipu",
|
|
22
22
|
"deepseek-ocr",
|
|
23
23
|
"siliconflow",
|
|
24
|
+
"qwen3-vl",
|
|
25
|
+
"aliyun",
|
|
26
|
+
"dashscope",
|
|
24
27
|
"ocr",
|
|
25
28
|
"free",
|
|
26
29
|
"image-understanding",
|
|
@@ -9,7 +9,13 @@ import * as path from 'path';
|
|
|
9
9
|
async function testDeepSeekOCR(imagePath: string) {
|
|
10
10
|
console.log('\n🧪 测试 DeepSeek-OCR API(原始调用)\n');
|
|
11
11
|
|
|
12
|
-
const apiKey =
|
|
12
|
+
const apiKey = process.env.SILICONFLOW_API_KEY;
|
|
13
|
+
|
|
14
|
+
if (!apiKey) {
|
|
15
|
+
console.error('❌ 错误: 需要设置 SILICONFLOW_API_KEY 环境变量');
|
|
16
|
+
console.error('示例: $env:SILICONFLOW_API_KEY="your-api-key"');
|
|
17
|
+
process.exit(1);
|
|
18
|
+
}
|
|
13
19
|
|
|
14
20
|
// 读取图片并转为 base64
|
|
15
21
|
const imageBuffer = fs.readFileSync(imagePath);
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Qwen 客户端测试
|
|
3
|
+
* 测试阿里云通义千问VL视觉理解
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { QwenClient } from '../src/qwen-client.js';
|
|
7
|
+
import { imageToBase64 } from '../src/image-processor.js';
|
|
8
|
+
|
|
9
|
+
async function testQwen() {
|
|
10
|
+
const apiKey = process.env.DASHSCOPE_API_KEY;
|
|
11
|
+
|
|
12
|
+
if (!apiKey) {
|
|
13
|
+
console.error('❌ 错误: 需要设置 DASHSCOPE_API_KEY 环境变量');
|
|
14
|
+
console.log('设置方法:');
|
|
15
|
+
console.log(' macOS/Linux: export DASHSCOPE_API_KEY="your-api-key"');
|
|
16
|
+
console.log(' Windows: $env:DASHSCOPE_API_KEY="your-api-key"');
|
|
17
|
+
process.exit(1);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// 获取图片路径
|
|
21
|
+
const imagePath = process.argv[2];
|
|
22
|
+
if (!imagePath) {
|
|
23
|
+
console.error('❌ 错误: 请提供图片路径');
|
|
24
|
+
console.log('用法: tsx test/test-qwen.ts <图片路径>');
|
|
25
|
+
console.log('示例: tsx test/test-qwen.ts ./test.png');
|
|
26
|
+
process.exit(1);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
console.log('🚀 开始测试 Qwen3-VL-Flash...\n');
|
|
30
|
+
|
|
31
|
+
try {
|
|
32
|
+
// 1. 初始化客户端
|
|
33
|
+
console.log('1️⃣ 初始化 Qwen 客户端...');
|
|
34
|
+
const client = new QwenClient(
|
|
35
|
+
apiKey,
|
|
36
|
+
'qwen3-vl-flash', // 使用高性价比的 Flash 版本
|
|
37
|
+
4096,
|
|
38
|
+
0.7
|
|
39
|
+
);
|
|
40
|
+
console.log(`✅ 客户端初始化成功: ${client.getModelName()}\n`);
|
|
41
|
+
|
|
42
|
+
// 2. 读取图片
|
|
43
|
+
console.log('2️⃣ 读取图片...');
|
|
44
|
+
const imageData = await imageToBase64(imagePath);
|
|
45
|
+
console.log(`✅ 图片读取成功 (${imagePath})\n`);
|
|
46
|
+
|
|
47
|
+
// 3. 测试基础分析
|
|
48
|
+
console.log('3️⃣ 测试基础分析(不启用思考模式)...');
|
|
49
|
+
const basicResult = await client.analyzeImage(
|
|
50
|
+
imageData,
|
|
51
|
+
'请详细分析这张图片的内容',
|
|
52
|
+
false
|
|
53
|
+
);
|
|
54
|
+
console.log('📊 基础分析结果:');
|
|
55
|
+
console.log(basicResult);
|
|
56
|
+
console.log('\n');
|
|
57
|
+
|
|
58
|
+
// 4. 测试思考模式
|
|
59
|
+
console.log('4️⃣ 测试思考模式(enable_thinking=true)...');
|
|
60
|
+
const thinkingResult = await client.analyzeImage(
|
|
61
|
+
imageData,
|
|
62
|
+
'请详细分析这张图片的内容,包括所有细节',
|
|
63
|
+
true // 启用思考模式
|
|
64
|
+
);
|
|
65
|
+
console.log('🧠 思考模式分析结果:');
|
|
66
|
+
console.log(thinkingResult);
|
|
67
|
+
console.log('\n');
|
|
68
|
+
|
|
69
|
+
// 5. 测试 OCR
|
|
70
|
+
console.log('5️⃣ 测试 OCR 能力...');
|
|
71
|
+
const ocrResult = await client.analyzeImage(
|
|
72
|
+
imageData,
|
|
73
|
+
'识别图片中的所有文字',
|
|
74
|
+
false
|
|
75
|
+
);
|
|
76
|
+
console.log('📝 OCR 结果:');
|
|
77
|
+
console.log(ocrResult);
|
|
78
|
+
console.log('\n');
|
|
79
|
+
|
|
80
|
+
console.log('✅ 所有测试完成!');
|
|
81
|
+
|
|
82
|
+
} catch (error) {
|
|
83
|
+
console.error('❌ 测试失败:', error instanceof Error ? error.message : error);
|
|
84
|
+
process.exit(1);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
testQwen();
|