luma-mcp 1.0.3 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example ADDED
@@ -0,0 +1,39 @@
1
+ # Luma MCP 配置示例
2
+
3
+ # ==========================================
4
+ # 模型提供商选择
5
+ # ==========================================
6
+ # 可选值: zhipu, siliconflow
7
+ # 默认: zhipu
8
+ MODEL_PROVIDER=zhipu
9
+
10
+ # ==========================================
11
+ # 智谱 GLM-4.5V 配置(使用智谱时需要)
12
+ # ==========================================
13
+ ZHIPU_API_KEY=your-zhipu-api-key-here
14
+
15
+ # ==========================================
16
+ # 硅基流动 DeepSeek-OCR 配置(使用硅基流动时需要)
17
+ # ==========================================
18
+ # SILICONFLOW_API_KEY=your-siliconflow-api-key-here
19
+
20
+ # ==========================================
21
+ # 通用配置(可选)
22
+ # ==========================================
23
+ # 模型名称(留空则使用默认值)
24
+ # zhipu 默认: glm-4.5v
25
+ # siliconflow 默认: deepseek-ai/DeepSeek-OCR
26
+ # MODEL_NAME=
27
+
28
+ # 最大生成 tokens(默认: 4096)
29
+ # MAX_TOKENS=4096
30
+
31
+ # 温度参数 0-1(默认: 0.7)
32
+ # TEMPERATURE=0.7
33
+
34
+ # Top-p 参数 0-1(默认: 0.7)
35
+ # TOP_P=0.7
36
+
37
+ # 是否启用思考模式(仅 GLM-4.5V 支持,默认: false)
38
+ # 启用后可提高分析准确性,但会增加 20-30% tokens 消耗
39
+ # ENABLE_THINKING=false
package/CHANGELOG.md ADDED
@@ -0,0 +1,60 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ ## [1.1.1] - 2025-11-13
6
+
7
+ ### Added
8
+ - 🖼️ **Data URI 支持**: 支持接收 base64 编码的图片数据 (data:image/png;base64,...)
9
+ - 🚀 **为未来做准备**: 当 MCP 客户端支持时,可直接传递用户粘贴的图片
10
+
11
+ ### Changed
12
+ - 📝 更新工具描述,说明支持三种输入格式:本地路径、URL、Data URI
13
+ - ✅ 新增 Data URI 格式验证(MIME 类型、大小限制)
14
+
15
+ ## [1.1.0] - 2025-11-13
16
+
17
+ ### Added
18
+ - 🎉 **多模型支持**: 新增硅基流动 DeepSeek-OCR 支持
19
+ - 🆓 **免费选项**: DeepSeek-OCR 通过硅基流动提供完全免费的 OCR 服务
20
+ - 📐 **统一接口**: 创建 VisionClient 接口,支持灵活扩展更多视觉模型
21
+ - ⚙️ **灵活配置**: 通过 `MODEL_PROVIDER` 环境变量轻松切换模型
22
+
23
+ ### Changed
24
+ - 🔧 环境变量命名优化,支持通用配置(`MODEL_NAME`、`MAX_TOKENS` 等)
25
+ - 📝 更新文档,提供双模型配置说明和选择建议
26
+ - 🏗️ 重构代码结构,提升可维护性
27
+
28
+ ### Technical Details
29
+ - 新增文件:
30
+ - `src/vision-client.ts` - 视觉模型客户端统一接口
31
+ - `src/siliconflow-client.ts` - 硅基流动 API 客户端实现
32
+ - `.env.example` - 配置示例文件
33
+ - 修改文件:
34
+ - `src/config.ts` - 支持多提供商配置
35
+ - `src/zhipu-client.ts` - 实现 VisionClient 接口
36
+ - `src/index.ts` - 根据配置动态选择客户端
37
+ - `README.md` - 完整的双模型使用文档
38
+
39
+ ## [1.0.3] - 2025-11-XX
40
+
41
+ ### Features
42
+ - 基于智谱 GLM-4.5V 的视觉理解能力
43
+ - 支持本地文件和远程 URL
44
+ - 内置重试机制
45
+ - 思考模式支持
46
+
47
+ ---
48
+
49
+ **模型对比**:
50
+
51
+ | 特性 | GLM-4.5V | DeepSeek-OCR |
52
+ |----------|----------|--------------|
53
+ | 费用 | 收费 | **免费** |
54
+ | 中文理解 | 优秀 | 良好 |
55
+ | OCR 能力 | 良好 | **优秀** |
56
+ | 思考模式 | ✅ | ❌ |
57
+
58
+ **推荐使用场景**:
59
+ - 需要 OCR/文字识别 → **DeepSeek-OCR** (免费)
60
+ - 需要深度图片理解 → **GLM-4.5V**
package/README.md CHANGED
@@ -1,26 +1,28 @@
1
1
  # Luma MCP
2
2
 
3
- 基于智谱 GLM-4.5V 的视觉理解 MCP 服务器,为不支持图片理解的 AI 助手提供视觉能力。
3
+ 多模型视觉理解 MCP 服务器,为不支持图片理解的 AI 助手提供视觉能力。
4
4
 
5
5
  [English](./docs/README_EN.md) | 中文
6
6
 
7
7
  ## 特性
8
8
 
9
+ - **多模型支持**: 支持 GLM-4.5V(智谱)和 DeepSeek-OCR(硅基流动)
9
10
  - **简单设计**: 单一 `analyze_image` 工具处理所有图片分析任务
10
11
  - **智能理解**: 自动识别代码、UI、错误等不同场景
11
- - **全面支持**: 代码截图、界面设计、错误诊断、通用图片
12
+ - **全面支持**: 代码截图、界面设计、错误诊断、OCR 文字识别
12
13
  - **标准 MCP 协议**: 无缝集成 Claude Desktop、Cline 等 MCP 客户端
13
- - **GLM-4.5V 驱动**: 中文理解优秀,API 性价比高
14
+ - **免费选项**: DeepSeek-OCR 通过硅基流动提供免费调用
14
15
  - **URL 支持**: 支持本地文件和远程图片 URL
15
16
  - **重试机制**: 内置指数退避重试,提高可靠性
16
- - **思考模式**: 默认启用深度分析
17
17
 
18
18
  ## 快速开始
19
19
 
20
20
  ### 前置要求
21
21
 
22
22
  - Node.js >= 18.0.0
23
- - 智谱 AI API Key ([获取地址](https://open.bigmodel.cn/))
23
+ - **选择一种模型**:
24
+ - **方案 A**: 智谱 AI API Key ([获取地址](https://open.bigmodel.cn/)) - 中文理解优秀
25
+ - **方案 B**: 硅基流动 API Key ([获取地址](https://cloud.siliconflow.cn/)) - **免费使用**,OCR 能力强
24
26
 
25
27
  ### 安装
26
28
 
@@ -47,7 +49,7 @@ npx luma-mcp
47
49
 
48
50
  **macOS 配置文件位置**: `~/Library/Application Support/Claude/claude_desktop_config.json`
49
51
 
50
- **使用 npx(推荐)**:
52
+ **方案 A: 使用智谱 GLM-4.5V**:
51
53
 
52
54
  ```json
53
55
  {
@@ -63,7 +65,24 @@ npx luma-mcp
63
65
  }
64
66
  ```
65
67
 
66
- **本地开发**:
68
+ **方案 B: 使用硅基流动 DeepSeek-OCR(免费)**:
69
+
70
+ ```json
71
+ {
72
+ "mcpServers": {
73
+ "luma": {
74
+ "command": "npx",
75
+ "args": ["-y", "luma-mcp"],
76
+ "env": {
77
+ "MODEL_PROVIDER": "siliconflow",
78
+ "SILICONFLOW_API_KEY": "your-siliconflow-api-key"
79
+ }
80
+ }
81
+ }
82
+ }
83
+ ```
84
+
85
+ **本地开发(智谱)**:
67
86
 
68
87
  ```json
69
88
  {
@@ -79,13 +98,30 @@ npx luma-mcp
79
98
  }
80
99
  ```
81
100
 
101
+ **本地开发(硅基流动)**:
102
+
103
+ ```json
104
+ {
105
+ "mcpServers": {
106
+ "luma": {
107
+ "command": "node",
108
+ "args": ["D:\\codes\\Luma_mcp\\build\\index.js"],
109
+ "env": {
110
+ "MODEL_PROVIDER": "siliconflow",
111
+ "SILICONFLOW_API_KEY": "your-siliconflow-api-key"
112
+ }
113
+ }
114
+ }
115
+ }
116
+ ```
117
+
82
118
  配置完成后重启 Claude Desktop。
83
119
 
84
120
  #### Cline (VSCode)
85
121
 
86
- **使用 npx(推荐)**:
122
+ 在项目根目录或 `.vscode/` 目录下创建 `mcp.json`
87
123
 
88
- 在项目根目录或 `.vscode/` 目录下创建 `mcp.json`:
124
+ **方案 A: 使用智谱 GLM-4.5V**:
89
125
 
90
126
  ```json
91
127
  {
@@ -101,16 +137,17 @@ npx luma-mcp
101
137
  }
102
138
  ```
103
139
 
104
- **本地开发**:
140
+ **方案 B: 使用硅基流动 DeepSeek-OCR(免费)**:
105
141
 
106
142
  ```json
107
143
  {
108
144
  "mcpServers": {
109
145
  "luma": {
110
- "command": "node",
111
- "args": ["D:\\codes\\Luma_mcp\\build\\index.js"],
146
+ "command": "npx",
147
+ "args": ["-y", "luma-mcp"],
112
148
  "env": {
113
- "ZHIPU_API_KEY": "your-zhipu-api-key"
149
+ "MODEL_PROVIDER": "siliconflow",
150
+ "SILICONFLOW_API_KEY": "your-siliconflow-api-key"
114
151
  }
115
152
  }
116
153
  }
@@ -119,10 +156,16 @@ npx luma-mcp
119
156
 
120
157
  #### Claude Code (命令行)
121
158
 
159
+ **使用智谱 GLM-4.5V**:
122
160
  ```bash
123
161
  claude mcp add -s user luma-mcp --env ZHIPU_API_KEY=your-api-key -- npx -y luma-mcp
124
162
  ```
125
163
 
164
+ **使用硅基流动 DeepSeek-OCR(免费)**:
165
+ ```bash
166
+ claude mcp add -s user luma-mcp --env MODEL_PROVIDER=siliconflow --env SILICONFLOW_API_KEY=your-api-key -- npx -y luma-mcp
167
+ ```
168
+
126
169
  #### 其他工具
127
170
 
128
171
  更多 MCP 客户端配置方法请参考[智谱官方文档](https://docs.bigmodel.cn/cn/coding-plan/mcp/vision-mcp-server#claude-code)
@@ -164,6 +207,7 @@ Claude: [自动调用 analyze_image 工具]
164
207
 
165
208
  不需要 MCP 客户端即可测试:
166
209
 
210
+ **测试智谱 GLM-4.5V**:
167
211
  ```bash
168
212
  # 设置 API Key
169
213
  export ZHIPU_API_KEY="your-api-key" # macOS/Linux
@@ -171,7 +215,23 @@ $env:ZHIPU_API_KEY="your-api-key" # Windows PowerShell
171
215
 
172
216
  # 测试本地图片
173
217
  npm run test:local ./test.png
218
+ ```
219
+
220
+ **测试硅基流动 DeepSeek-OCR**:
221
+ ```bash
222
+ # 设置 API Key 和提供商
223
+ export MODEL_PROVIDER=siliconflow
224
+ export SILICONFLOW_API_KEY="your-api-key" # macOS/Linux
225
+
226
+ $env:MODEL_PROVIDER="siliconflow"
227
+ $env:SILICONFLOW_API_KEY="your-api-key" # Windows PowerShell
228
+
229
+ # 测试本地图片
230
+ npm run test:local ./test.png
231
+ ```
174
232
 
233
+ **其他测试命令**:
234
+ ```bash
175
235
  # 测试并提问
176
236
  npm run test:local ./code-error.png "这段代码有什么问题?"
177
237
 
@@ -187,10 +247,11 @@ npm run test:local https://example.com/image.jpg
187
247
 
188
248
  **参数**:
189
249
 
190
- - `image_source` (必需): 图片路径或 URL
250
+ - `image_source` (必需): 图片来源,支持三种格式
251
+ - **本地文件**: 绝对路径或相对路径(例:`./image.png`, `C:\Users\...\image.jpg`)
252
+ - **远程 URL**: https:// 开头的 URL(例:`https://example.com/pic.jpg`)
253
+ - **Data URI**: Base64 编码的图片数据(例:`...`)
191
254
  - 支持格式: JPG, PNG, WebP, GIF
192
- - 本地文件: 绝对路径或相对路径
193
- - 远程图片: https:// 开头的 URL
194
255
  - `prompt` (必需): 分析指令或问题
195
256
 
196
257
  **示例**:
@@ -213,18 +274,42 @@ analyze_image({
213
274
  image_source: "https://example.com/ui.png",
214
275
  prompt: "分析这个界面的布局和可用性问题"
215
276
  })
277
+
278
+ // Data URI (当客户端支持时)
279
+ analyze_image({
280
+ image_source: "...",
281
+ prompt: "识别图片中的所有文字"
282
+ })
216
283
  ```
217
284
 
218
285
  ## 环境变量
219
286
 
220
- | 变量名 | 必需 | 默认值 | 说明 |
221
- |-------------------------|------|------------|----------------------|
222
- | `ZHIPU_API_KEY` | | - | 智谱 AI 的 API 密钥 |
223
- | `ZHIPU_MODEL` | 否 | `glm-4.5v` | 使用的模型 |
224
- | `ZHIPU_MAX_TOKENS` | 否 | `4096` | 最大生成 tokens |
225
- | `ZHIPU_TEMPERATURE` | 否 | `0.7` | 温度参数 (0-1) |
226
- | `ZHIPU_TOP_P` | 否 | `0.7` | Top-p 参数 (0-1) |
227
- | `ZHIPU_ENABLE_THINKING` | 否 | `true` | 是否启用思考模式 |
287
+ ### 通用配置
288
+
289
+ | 变量名 | 必需 | 默认值 | 说明 |
290
+ |------------------|------|-------------|---------------------------------------|
291
+ | `MODEL_PROVIDER` | 否 | `zhipu` | 模型提供商:`zhipu` 或 `siliconflow` |
292
+ | `MODEL_NAME` | 否 | 见下文 | 模型名称(自动根据提供商选择) |
293
+ | `MAX_TOKENS` | 否 | `4096` | 最大生成 tokens |
294
+ | `TEMPERATURE` | 否 | `0.7` | 温度参数 (0-1) |
295
+ | `TOP_P` | 否 | `0.7` | Top-p 参数 (0-1) |
296
+ | `ENABLE_THINKING`| 否 | `false` | 是否启用思考模式(仅 GLM-4.5V) |
297
+
298
+ ### 智谱 GLM-4.5V 专用
299
+
300
+ | 变量名 | 必需 | 默认值 | 说明 |
301
+ |------------------|---------------------|-------------|----------------------|
302
+ | `ZHIPU_API_KEY` | 是(使用智谱时) | - | 智谱 AI 的 API 密钥 |
303
+
304
+ 默认模型:`glm-4.5v`
305
+
306
+ ### 硅基流动 DeepSeek-OCR 专用
307
+
308
+ | 变量名 | 必需 | 默认值 | 说明 |
309
+ |------------------------|-------------------------|---------------------------------|----------------------------|
310
+ | `SILICONFLOW_API_KEY` | 是(使用硅基流动时) | - | 硅基流动的 API 密钥 |
311
+
312
+ 默认模型:`deepseek-ai/DeepSeek-OCR`
228
313
 
229
314
  **思考模式说明**:
230
315
  - 默认开启,提高图片分析的准确性和详细程度
@@ -264,8 +349,10 @@ npm run test:local <图片路径> [问题]
264
349
  luma-mcp/
265
350
  ├── src/
266
351
  │ ├── index.ts # MCP 服务器入口
267
- │ ├── config.ts # 配置管理
352
+ │ ├── config.ts # 配置管理(支持多模型)
353
+ │ ├── vision-client.ts # 视觉模型客户端接口
268
354
  │ ├── zhipu-client.ts # GLM-4.5V API 客户端
355
+ │ ├── siliconflow-client.ts # DeepSeek-OCR API 客户端
269
356
  │ ├── image-processor.ts # 图片处理
270
357
  │ ├── prompts.ts # 提示词模板
271
358
  │ └── utils/
@@ -285,15 +372,34 @@ luma-mcp/
285
372
 
286
373
  ### 如何获取 API Key?
287
374
 
375
+ **智谱 GLM-4.5V**:
288
376
  1. 访问 [智谱开放平台](https://open.bigmodel.cn/)
289
377
  2. 注册/登录账号
290
378
  3. 进入控制台创建 API Key
291
379
  4. 复制 API Key 到配置文件
292
380
 
381
+ **硅基流动 DeepSeek-OCR(免费)**:
382
+ 1. 访问 [硅基流动平台](https://cloud.siliconflow.cn/)
383
+ 2. 注册/登录账号
384
+ 3. 进入 API 管理创建 API Key
385
+ 4. 复制 API Key 到配置文件
386
+
293
387
  ### 支持哪些图片格式?
294
388
 
295
389
  支持 JPG、PNG、WebP、GIF 格式。建议使用 JPG 格式以获得更好的压缩率。
296
390
 
391
+ ### 什么是 Data URI?
392
+
393
+ Data URI 是一种将图片数据嵌入字符串的方式,格式为:
394
+ ```
395
+ ...
396
+ ```
397
+
398
+ **使用场景**:
399
+ - 当 MCP 客户端(如 Claude Desktop)支持时,可以直接传递用户粘贴的图片
400
+ - 无需保存为临时文件,更加高效
401
+ - 当前支持状态:**服务器已支持**,等待客户端实现
402
+
297
403
  ### 图片大小限制?
298
404
 
299
405
  - 最大文件大小: 10MB
@@ -313,15 +419,31 @@ luma-mcp/
313
419
 
314
420
  ### 成本如何?
315
421
 
316
- GLM-4.5V 定价请参考[智谱官方定价](https://open.bigmodel.cn/pricing)。
422
+ **硅基流动 DeepSeek-OCR**: **完全免费**,无需付费!
423
+
424
+ **智谱 GLM-4.5V**: 定价请参考[智谱官方定价](https://open.bigmodel.cn/pricing)。
317
425
 
318
- 典型场景估算:
426
+ 典型场景估算(GLM-4.5V):
319
427
  - 简单图片理解: 500-1000 tokens
320
428
  - 代码截图分析: 1500-2500 tokens
321
429
  - 详细 UI 分析: 2000-3000 tokens
322
430
 
323
431
  启用思考模式会增加约 20-30% tokens。
324
432
 
433
+ ### 如何选择模型?
434
+
435
+ | 特性 | GLM-4.5V(智谱) | DeepSeek-OCR(硅基流动) |
436
+ |------------|----------------|------------------------|
437
+ | **费用** | 收费 | **完全免费** |
438
+ | **中文理解** | 优秀 | 良好 |
439
+ | **OCR 能力** | 良好 | **优秀** |
440
+ | **思考模式** | 支持 | 不支持 |
441
+ | **适用场景** | 通用图片分析 | OCR、文字识别 |
442
+
443
+ **推荐**:
444
+ - 需要 OCR 或文字识别:选择 **DeepSeek-OCR**(免费)
445
+ - 需要深度图片理解:选择 **GLM-4.5V**
446
+
325
447
  ## 贡献
326
448
 
327
449
  欢迎提交 Issue 和 Pull Request!
@@ -334,8 +456,54 @@ MIT License
334
456
 
335
457
  - [智谱 AI 开放平台](https://open.bigmodel.cn/)
336
458
  - [GLM-4.5V 文档](https://docs.bigmodel.cn/cn/guide/models/vlm/glm-4.5v)
459
+ - [硅基流动平台](https://cloud.siliconflow.cn/)
460
+ - [DeepSeek-OCR 文档](https://docs.siliconflow.cn/cn/api-reference/chat-completions/chat-completions)
337
461
  - [MCP 协议文档](https://modelcontextprotocol.io/)
338
462
 
463
+ ## 更新日志
464
+
465
+ ### [1.1.1] - 2025-11-13
466
+
467
+ #### 新增
468
+ - 🖼️ **Data URI 支持**: 支持接收 base64 编码的图片数据(`data:image/png;base64,...`)
469
+ - 🚀 **为未来做准备**: 当 MCP 客户端支持时,可直接传递用户粘贴的图片
470
+
471
+ #### 修改
472
+ - 更新工具描述,说明支持三种输入格式:本地路径、URL、Data URI
473
+ - 新增 Data URI 格式验证(MIME 类型、大小限制)
474
+
475
+ ### [1.1.0] - 2025-11-13
476
+
477
+ #### 新增
478
+ - 🎉 **多模型支持**: 新增硅基流动 DeepSeek-OCR 支持
479
+ - 🆓 **免费选项**: DeepSeek-OCR 通过硅基流动提供完全免费的 OCR 服务
480
+ - 📐 **统一接口**: 创建 VisionClient 接口,支持灵活扩展更多视觉模型
481
+ - ⚙️ **灵活配置**: 通过 `MODEL_PROVIDER` 环境变量轻松切换模型
482
+
483
+ #### 修改
484
+ - 🔧 环境变量命名优化,支持通用配置(`MODEL_NAME`、`MAX_TOKENS` 等)
485
+ - 📝 更新文档,提供双模型配置说明和选择建议
486
+ - 🏭️ 重构代码结构,提升可维护性
487
+
488
+ #### 技术细节
489
+ - 新增文件:
490
+ - `src/vision-client.ts` - 视觉模型客户端统一接口
491
+ - `src/siliconflow-client.ts` - 硅基流动 API 客户端实现
492
+ - `.env.example` - 配置示例文件
493
+ - 修改文件:
494
+ - `src/config.ts` - 支持多提供商配置
495
+ - `src/zhipu-client.ts` - 实现 VisionClient 接口
496
+ - `src/index.ts` - 根据配置动态选择客户端
497
+
498
+ ### [1.0.3] - 2025-11-12
499
+
500
+ - 基于智谱 GLM-4.5V 的视觉理解能力
501
+ - 支持本地文件和远程 URL
502
+ - 内置重试机制
503
+ - 思考模式支持
504
+
505
+ 更多更新历史请查看 [CHANGELOG.md](./CHANGELOG.md)
506
+
339
507
  ## 作者
340
508
 
341
509
  Jochen
package/build/config.d.ts CHANGED
@@ -2,7 +2,9 @@
2
2
  * 配置管理模块
3
3
  * 从环境变量读取配置
4
4
  */
5
+ export type ModelProvider = 'zhipu' | 'siliconflow';
5
6
  export interface LumaConfig {
7
+ provider: ModelProvider;
6
8
  apiKey: string;
7
9
  model: string;
8
10
  maxTokens: number;
@@ -1 +1 @@
1
- {"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,EAAE,OAAO,CAAC;CACzB;AAED;;GAEG;AACH,wBAAgB,UAAU,IAAI,UAAU,CAevC"}
1
+ {"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,MAAM,aAAa,GAAG,OAAO,GAAG,aAAa,CAAC;AAEpD,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,aAAa,CAAC;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,EAAE,OAAO,CAAC;CACzB;AAED;;GAEG;AACH,wBAAgB,UAAU,IAAI,UAAU,CAiCvC"}
package/build/config.js CHANGED
@@ -6,17 +6,33 @@
6
6
  * 从环境变量加载配置
7
7
  */
8
8
  export function loadConfig() {
9
- const apiKey = process.env.ZHIPU_API_KEY;
10
- if (!apiKey) {
11
- throw new Error('ZHIPU_API_KEY environment variable is required');
9
+ // 确定使用的模型提供商
10
+ const provider = (process.env.MODEL_PROVIDER?.toLowerCase() || 'zhipu');
11
+ // 根据提供商获取 API Key
12
+ let apiKey;
13
+ let defaultModel;
14
+ if (provider === 'siliconflow') {
15
+ apiKey = process.env.SILICONFLOW_API_KEY;
16
+ defaultModel = 'deepseek-ai/DeepSeek-OCR';
17
+ if (!apiKey) {
18
+ throw new Error('SILICONFLOW_API_KEY environment variable is required when using SiliconFlow provider');
19
+ }
20
+ }
21
+ else {
22
+ apiKey = process.env.ZHIPU_API_KEY;
23
+ defaultModel = 'glm-4.5v';
24
+ if (!apiKey) {
25
+ throw new Error('ZHIPU_API_KEY environment variable is required when using Zhipu provider');
26
+ }
12
27
  }
13
28
  return {
29
+ provider,
14
30
  apiKey,
15
- model: process.env.ZHIPU_MODEL || 'glm-4.5v',
16
- maxTokens: parseInt(process.env.ZHIPU_MAX_TOKENS || '4096', 10),
17
- temperature: parseFloat(process.env.ZHIPU_TEMPERATURE || '0.7'),
18
- topP: parseFloat(process.env.ZHIPU_TOP_P || '0.7'),
19
- enableThinking: process.env.ZHIPU_ENABLE_THINKING === 'true',
31
+ model: process.env.MODEL_NAME || defaultModel,
32
+ maxTokens: parseInt(process.env.MAX_TOKENS || '4096', 10),
33
+ temperature: parseFloat(process.env.TEMPERATURE || '0.7'),
34
+ topP: parseFloat(process.env.TOP_P || '0.7'),
35
+ enableThinking: process.env.ENABLE_THINKING === 'true',
20
36
  };
21
37
  }
22
38
  //# sourceMappingURL=config.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAWH;;GAEG;AACH,MAAM,UAAU,UAAU;IACxB,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC;IAEzC,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;IACpE,CAAC;IAED,OAAO;QACL,MAAM;QACN,KAAK,EAAE,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,UAAU;QAC5C,SAAS,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB,IAAI,MAAM,EAAE,EAAE,CAAC;QAC/D,WAAW,EAAE,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,KAAK,CAAC;QAC/D,IAAI,EAAE,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,KAAK,CAAC;QAClD,cAAc,EAAE,OAAO,CAAC,GAAG,CAAC,qBAAqB,KAAK,MAAM;KAC7D,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAcH;;GAEG;AACH,MAAM,UAAU,UAAU;IACxB,aAAa;IACb,MAAM,QAAQ,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,EAAE,WAAW,EAAE,IAAI,OAAO,CAAkB,CAAC;IAEzF,kBAAkB;IAClB,IAAI,MAA0B,CAAC;IAC/B,IAAI,YAAoB,CAAC;IAEzB,IAAI,QAAQ,KAAK,aAAa,EAAE,CAAC;QAC/B,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC;QACzC,YAAY,GAAG,0BAA0B,CAAC;QAE1C,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,sFAAsF,CAAC,CAAC;QAC1G,CAAC;IACH,CAAC;SAAM,CAAC;QACN,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC;QACnC,YAAY,GAAG,UAAU,CAAC;QAE1B,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,0EAA0E,CAAC,CAAC;QAC9F,CAAC;IACH,CAAC;IAED,OAAO;QACL,QAAQ;QACR,MAAM;QACN,KAAK,EAAE,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,YAAY;QAC7C,SAAS,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,MAAM,EAAE,EAAE,CAAC;QACzD,WAAW,EAAE,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,KAAK,CAAC;QACzD,IAAI,EAAE,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,KAAK,CAAC;QAC5C,cAAc,EAAE,OAAO,CAAC,GAAG,CAAC,eAAe,KAAK,MAAM;KACvD,CAAC;AACJ,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"image-processor.d.ts","sourceRoot":"","sources":["../src/image-processor.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAOH;;GAEG;AACH,wBAAsB,mBAAmB,CAAC,WAAW,EAAE,MAAM,EAAE,SAAS,GAAE,MAAW,GAAG,OAAO,CAAC,IAAI,CAAC,CA6BpG;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAyBtE"}
1
+ {"version":3,"file":"image-processor.d.ts","sourceRoot":"","sources":["../src/image-processor.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AA6BH;;GAEG;AACH,wBAAsB,mBAAmB,CAAC,WAAW,EAAE,MAAM,EAAE,SAAS,GAAE,MAAW,GAAG,OAAO,CAAC,IAAI,CAAC,CA6CpG;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CA+BtE"}
@@ -7,10 +7,45 @@ import { readFile, stat } from 'fs/promises';
7
7
  import sharp from 'sharp';
8
8
  import { isUrl } from './utils/helpers.js';
9
9
  import { logger } from './utils/logger.js';
10
+ // 判断是否为 Data URI(data:image/png;base64,....)
11
+ function isDataUri(input) {
12
+ return typeof input === 'string' && input.startsWith('data:') && /;base64,/.test(input);
13
+ }
14
+ // 从 Data URI 获取 mimeType
15
+ function getMimeFromDataUri(input) {
16
+ const match = input.match(/^data:([^;]+);base64,/i);
17
+ return match ? match[1].toLowerCase() : null;
18
+ }
19
+ // 估算 Data URI 的原始字节大小(不含头部)
20
+ function estimateBytesFromDataUri(input) {
21
+ try {
22
+ const base64 = input.split(',')[1] || '';
23
+ // base64 长度 * 3/4,忽略 padding 近似即可
24
+ return Math.floor((base64.length * 3) / 4);
25
+ }
26
+ catch {
27
+ return 0;
28
+ }
29
+ }
10
30
  /**
11
31
  * 验证图片来源(文件或URL)
12
32
  */
13
33
  export async function validateImageSource(imageSource, maxSizeMB = 10) {
34
+ // 如果是 Data URI,则验证 mime 与大小后直接返回
35
+ if (isDataUri(imageSource)) {
36
+ const mime = getMimeFromDataUri(imageSource);
37
+ const allowed = ['image/png', 'image/jpeg', 'image/jpg', 'image/webp', 'image/gif'];
38
+ if (!mime || !allowed.includes(mime)) {
39
+ throw new Error(`Unsupported data URI mimeType: ${mime || 'unknown'}. Supported: ${allowed.join(', ')}`);
40
+ }
41
+ const bytes = estimateBytesFromDataUri(imageSource);
42
+ const sizeMB = bytes / (1024 * 1024);
43
+ if (sizeMB > maxSizeMB) {
44
+ throw new Error(`Image data URI too large: ${sizeMB.toFixed(2)}MB (max: ${maxSizeMB}MB)`);
45
+ }
46
+ logger.debug('Image source is data URI, validated', { mime, sizeMB: sizeMB.toFixed(2) });
47
+ return;
48
+ }
14
49
  // 如果是URL,直接返回
15
50
  if (isUrl(imageSource)) {
16
51
  logger.debug('Image source is URL, skipping validation', { imageSource });
@@ -42,6 +77,11 @@ export async function validateImageSource(imageSource, maxSizeMB = 10) {
42
77
  */
43
78
  export async function imageToBase64(imagePath) {
44
79
  try {
80
+ // 如果是 Data URI,直接返回(已是 data:*;base64, 格式)
81
+ if (isDataUri(imagePath)) {
82
+ logger.info('Using data URI image');
83
+ return imagePath;
84
+ }
45
85
  // 如果是URL,直接返回
46
86
  if (isUrl(imagePath)) {
47
87
  logger.info('Using remote image URL', { url: imagePath });
@@ -1 +1 @@
1
- {"version":3,"file":"image-processor.js","sourceRoot":"","sources":["../src/image-processor.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,aAAa,CAAC;AAC7C,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAC3C,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAE3C;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,WAAmB,EAAE,YAAoB,EAAE;IACnF,cAAc;IACd,IAAI,KAAK,CAAC,WAAW,CAAC,EAAE,CAAC;QACvB,MAAM,CAAC,KAAK,CAAC,0CAA0C,EAAE,EAAE,WAAW,EAAE,CAAC,CAAC;QAC1E,OAAO;IACT,CAAC;IAED,SAAS;IACT,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;QAE9C,IAAI,UAAU,GAAG,SAAS,EAAE,CAAC;YAC3B,MAAM,IAAI,KAAK,CAAC,yBAAyB,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,SAAS,KAAK,CAAC,CAAC;QAC5F,CAAC;QAED,SAAS;QACT,MAAM,GAAG,GAAG,WAAW,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC;QACvD,MAAM,gBAAgB,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,CAAC,CAAC;QAE/D,IAAI,CAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YAC5C,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,gBAAgB,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACjG,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAK,KAAa,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YACrC,MAAM,IAAI,KAAK,CAAC,yBAAyB,WAAW,EAAE,CAAC,CAAC;QAC1D,CAAC;QACD,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CAAC,SAAiB;IACnD,IAAI,CAAC;QACH,cAAc;QACd,IAAI,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC;YACrB,MAAM,CAAC,IAAI,CAAC,wBAAwB,EAAE,EAAE,GAAG,EAAE,SAAS,EAAE,CAAC,CAAC;YAC1D,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,aAAa;QACb,IAAI,WAAW,GAAW,MAAM,QAAQ,CAAC,SAAS,CAAC,CAAC;QAEpD,sBAAsB;QACtB,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,GAAG,IAAI,GAAG,IAAI,EAAE,CAAC;YACzC,MAAM,CAAC,IAAI,CAAC,yBAAyB,EAAE,EAAE,YAAY,EAAE,GAAG,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YACjH,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,aAAa,CAAC,WAAW,CAAC,CAAC,CAAC;QAC9D,CAAC;QAED,aAAa;QACb,MAAM,MAAM,GAAG,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC9C,MAAM,QAAQ,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC;QAExC,OAAO,QAAQ,QAAQ,WAAW,MAAM,EAAE,CAAC;IAC7C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,4BAA4B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;IAC1G,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,aAAa,CAAC,WAAmB;IAC9C,OAAO,KAAK,CAAC,WAAW,CAAC;SACtB,MAAM,CAAC,IAAI,EAAE,IAAI,EAAE;QAClB,GAAG,EAAE,QAAQ;QACb,kBAAkB,EAAE,IAAI;KACzB,CAAC;SACD,IAAI,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;SACrB,QAAQ,EAAE,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,WAAW,CAAC,QAAgB;IACnC,MAAM,GAAG,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC;IAEpD,QAAQ,GAAG,EAAE,CAAC;QACZ,KAAK,KAAK,CAAC;QACX,KAAK,MAAM;YACT,OAAO,YAAY,CAAC;QACtB,KAAK,KAAK;YACR,OAAO,WAAW,CAAC;QACrB,KAAK,MAAM;YACT,OAAO,YAAY,CAAC;QACtB,KAAK,KAAK;YACR,OAAO,WAAW,CAAC;QACrB;YACE,OAAO,YAAY,CAAC,CAAC,YAAY;IACrC,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"image-processor.js","sourceRoot":"","sources":["../src/image-processor.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,aAAa,CAAC;AAC7C,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAC3C,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAE3C,6CAA6C;AAC7C,SAAS,SAAS,CAAC,KAAa;IAC9B,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AAC1F,CAAC;AAED,yBAAyB;AACzB,SAAS,kBAAkB,CAAC,KAAa;IACvC,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC;IACpD,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;AAC/C,CAAC;AAED,4BAA4B;AAC5B,SAAS,wBAAwB,CAAC,KAAa;IAC7C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACzC,kCAAkC;QAClC,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC7C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,CAAC,CAAC;IACX,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,WAAmB,EAAE,YAAoB,EAAE;IACnF,iCAAiC;IACjC,IAAI,SAAS,CAAC,WAAW,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,kBAAkB,CAAC,WAAW,CAAC,CAAC;QAC7C,MAAM,OAAO,GAAG,CAAC,WAAW,EAAE,YAAY,EAAE,WAAW,EAAE,YAAY,EAAE,WAAW,CAAC,CAAC;QACpF,IAAI,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YACrC,MAAM,IAAI,KAAK,CAAC,kCAAkC,IAAI,IAAI,SAAS,gBAAgB,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC3G,CAAC;QACD,MAAM,KAAK,GAAG,wBAAwB,CAAC,WAAW,CAAC,CAAC;QACpD,MAAM,MAAM,GAAG,KAAK,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;QACrC,IAAI,MAAM,GAAG,SAAS,EAAE,CAAC;YACvB,MAAM,IAAI,KAAK,CAAC,6BAA6B,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,SAAS,KAAK,CAAC,CAAC;QAC5F,CAAC;QACD,MAAM,CAAC,KAAK,CAAC,qCAAqC,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACzF,OAAO;IACT,CAAC;IAED,cAAc;IACd,IAAI,KAAK,CAAC,WAAW,CAAC,EAAE,CAAC;QACvB,MAAM,CAAC,KAAK,CAAC,0CAA0C,EAAE,EAAE,WAAW,EAAE,CAAC,CAAC;QAC1E,OAAO;IACT,CAAC;IAED,SAAS;IACT,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;QAE9C,IAAI,UAAU,GAAG,SAAS,EAAE,CAAC;YAC3B,MAAM,IAAI,KAAK,CAAC,yBAAyB,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,SAAS,KAAK,CAAC,CAAC;QAC5F,CAAC;QAED,SAAS;QACT,MAAM,GAAG,GAAG,WAAW,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC;QACvD,MAAM,gBAAgB,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,CAAC,CAAC;QAE/D,IAAI,CAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YAC5C,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,gBAAgB,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACjG,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAK,KAAa,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YACrC,MAAM,IAAI,KAAK,CAAC,yBAAyB,WAAW,EAAE,CAAC,CAAC;QAC1D,CAAC;QACD,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CAAC,SAAiB;IACnD,IAAI,CAAC;QACH,0CAA0C;QAC1C,IAAI,SAAS,CAAC,SAAS,CAAC,EAAE,CAAC;YACzB,MAAM,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;YACpC,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,cAAc;QACd,IAAI,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC;YACrB,MAAM,CAAC,IAAI,CAAC,wBAAwB,EAAE,EAAE,GAAG,EAAE,SAAS,EAAE,CAAC,CAAC;YAC1D,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,aAAa;QACb,IAAI,WAAW,GAAW,MAAM,QAAQ,CAAC,SAAS,CAAC,CAAC;QAEpD,sBAAsB;QACtB,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,GAAG,IAAI,GAAG,IAAI,EAAE,CAAC;YACzC,MAAM,CAAC,IAAI,CAAC,yBAAyB,EAAE,EAAE,YAAY,EAAE,GAAG,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YACjH,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,aAAa,CAAC,WAAW,CAAC,CAAC,CAAC;QAC9D,CAAC;QAED,aAAa;QACb,MAAM,MAAM,GAAG,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC9C,MAAM,QAAQ,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC;QAExC,OAAO,QAAQ,QAAQ,WAAW,MAAM,EAAE,CAAC;IAC7C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,4BAA4B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;IAC1G,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,aAAa,CAAC,WAAmB;IAC9C,OAAO,KAAK,CAAC,WAAW,CAAC;SACtB,MAAM,CAAC,IAAI,EAAE,IAAI,EAAE;QAClB,GAAG,EAAE,QAAQ;QACb,kBAAkB,EAAE,IAAI;KACzB,CAAC;SACD,IAAI,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;SACrB,QAAQ,EAAE,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,WAAW,CAAC,QAAgB;IACnC,MAAM,GAAG,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC;IAEpD,QAAQ,GAAG,EAAE,CAAC;QACZ,KAAK,KAAK,CAAC;QACX,KAAK,MAAM;YACT,OAAO,YAAY,CAAC;QACtB,KAAK,KAAK;YACR,OAAO,WAAW,CAAC;QACrB,KAAK,MAAM;YACT,OAAO,YAAY,CAAC;QACtB,KAAK,KAAK;YACR,OAAO,WAAW,CAAC;QACrB;YACE,OAAO,YAAY,CAAC,CAAC,YAAY;IACrC,CAAC;AACH,CAAC"}
package/build/index.js CHANGED
@@ -11,6 +11,7 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
11
11
  import { z } from 'zod';
12
12
  import { loadConfig } from './config.js';
13
13
  import { ZhipuClient } from './zhipu-client.js';
14
+ import { SiliconFlowClient } from './siliconflow-client.js';
14
15
  import { imageToBase64, validateImageSource } from './image-processor.js';
15
16
  import { buildAnalysisPrompt } from './prompts.js';
16
17
  import { withRetry, createSuccessResponse, createErrorResponse } from './utils/helpers.js';
@@ -21,7 +22,14 @@ async function createServer() {
21
22
  logger.info('Initializing Luma MCP Server');
22
23
  // 加载配置
23
24
  const config = loadConfig();
24
- const zhipuClient = new ZhipuClient(config);
25
+ // 根据配置选择模型客户端
26
+ const visionClient = config.provider === 'siliconflow'
27
+ ? new SiliconFlowClient(config)
28
+ : new ZhipuClient(config);
29
+ logger.info('Vision client initialized', {
30
+ provider: config.provider,
31
+ model: visionClient.getModelName()
32
+ });
25
33
  // 创建服务器 - 使用 McpServer
26
34
  const server = new McpServer({
27
35
  name: 'luma-mcp',
@@ -37,15 +45,18 @@ async function createServer() {
37
45
  await validateImageSource(imageSource);
38
46
  // 2. 处理图片(读取或返回URL)
39
47
  const imageDataUrl = await imageToBase64(imageSource);
40
- // 3. 构建提示词(直接使用prompt)
41
- const fullPrompt = buildAnalysisPrompt(prompt);
42
- // 4. 调用 GLM-4.5V 分析图片
43
- return await zhipuClient.analyzeImage(imageDataUrl, fullPrompt);
48
+ // 3. 构建提示词
49
+ // DeepSeek-OCR 需要简洁的 prompt,不支持复杂格式化
50
+ const fullPrompt = config.provider === 'siliconflow'
51
+ ? prompt // DeepSeek-OCR: 直接使用原始 prompt
52
+ : buildAnalysisPrompt(prompt); // GLM-4.5V: 使用结构化 prompt
53
+ // 4. 调用视觉模型分析图片
54
+ return await visionClient.analyzeImage(imageDataUrl, fullPrompt);
44
55
  }, 2, // 最多重试2次
45
56
  1000 // 初始延补1秒
46
57
  );
47
58
  // 注册工具 - 使用 McpServer.tool() API
48
- server.tool('analyze_image', `使用智谱GLM-4.5V视觉模型分析图片内容。
59
+ server.tool('analyze_image', `使用视觉模型分析图片内容。支持 GLM-4.5V(智谱)和 DeepSeek-OCR(硅基流动)。
49
60
 
50
61
  **何时自动调用此工具**:
51
62
  1. 用户提供了图片文件路径(包括临时路径、相对路径、绝对路径)
@@ -59,7 +70,7 @@ async function createServer() {
59
70
  **支持来源**:本地文件、远程URL、临时文件(包括截图)。
60
71
 
61
72
  如果你是不支持视觉的AI模型,看到图片路径时应主动调用此工具来分析图片内容。`, {
62
- image_source: z.string().describe('图片来源:本地文件路径(含临时路径)、远程URL。支持 PNG/JPG/WebP/GIF。例如:"./image.png"、"/tmp/screenshot.png"、"C:\\Users\\...\\image.jpg"、"https://example.com/pic.jpg"'),
73
+ image_source: z.string().describe('图片来源:支持三种格式:1) 本地文件路径(含临时路径)、2) 远程URL(https://)、3) Data URI(data:image/png;base64,...)。支持 PNG/JPG/WebP/GIF。例如:"./image.png"、"/tmp/screenshot.png"、"C:\\Users\\...\\image.jpg"、"https://example.com/pic.jpg"、"..."'),
63
74
  prompt: z.string().describe('必须:详细的分析指令。如果用户没有提供具体问题,默认使用:"请详细分析这张图片的内容"。对于具体任务:代码分析、UI设计、错误诊断、文字识别等,应提供明确的指令。例如:"这段代码为什么报错?"、"分析这个UI的布局和风格"、"识别图片中的所有文字"'),
64
75
  }, async (params) => {
65
76
  try {
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA;;;GAGG;AAEH,wCAAwC;AACxC,OAAO,EAAE,uBAAuB,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AACpE,uBAAuB,EAAE,CAAC;AAE1B,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAC1E,OAAO,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAC;AACnD,OAAO,EAAE,SAAS,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAE3F;;GAEG;AACH,KAAK,UAAU,YAAY;IACzB,MAAM,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;IAE5C,OAAO;IACP,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;IAC5B,MAAM,WAAW,GAAG,IAAI,WAAW,CAAC,MAAM,CAAC,CAAC;IAE5C,uBAAuB;IACvB,MAAM,MAAM,GAAG,IAAI,SAAS,CAC1B;QACE,IAAI,EAAE,UAAU;QAChB,OAAO,EAAE,OAAO;KACjB,EACD;QACE,YAAY,EAAE;YACZ,KAAK,EAAE,EAAE;SACV;KACF,CACF,CAAC;IAEF,aAAa;IACb,MAAM,gBAAgB,GAAG,SAAS,CAChC,KAAK,EAAE,WAAmB,EAAE,MAAc,EAAE,EAAE;QAC5C,YAAY;QACZ,MAAM,mBAAmB,CAAC,WAAW,CAAC,CAAC;QAEvC,oBAAoB;QACpB,MAAM,YAAY,GAAG,MAAM,aAAa,CAAC,WAAW,CAAC,CAAC;QAEtD,uBAAuB;QACvB,MAAM,UAAU,GAAG,mBAAmB,CAAC,MAAM,CAAC,CAAC;QAE/C,sBAAsB;QACtB,OAAO,MAAM,WAAW,CAAC,YAAY,CAAC,YAAY,EAAE,UAAU,CAAC,CAAC;IAClE,CAAC,EACD,CAAC,EAAE,SAAS;IACZ,IAAI,CAAC,SAAS;KACf,CAAC;IAEF,iCAAiC;IACjC,MAAM,CAAC,IAAI,CACT,eAAe,EACf;;;;;;;;;;;;;uCAamC,EACnC;QACE,YAAY,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+IAA+I,CAAC;QAClL,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,8HAA8H,CAAC;KAC5J,EACD,KAAK,EAAE,MAAM,EAAE,EAAE;QACf,IAAI,CAAC;YACH,MAAM,CAAC,IAAI,CAAC,iBAAiB,EAAE;gBAC7B,MAAM,EAAE,MAAM,CAAC,YAAY;gBAC3B,MAAM,EAAE,MAAM,CAAC,MAAM;aACtB,CAAC,CAAC;YAEH,YAAY;YACZ,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;YAE1E,MAAM,CAAC,IAAI,CAAC,uCAAuC,CAAC,CAAC;YACrD,OAAO,qBAAqB,CAAC,MAAM,CAAC,CAAC;QACvC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,uBAAuB,EAAE;gBACpC,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC,CAAC;YAEH,OAAO,mBAAmB,CACxB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CACzD,CAAC;QACJ,CAAC;IACH,CAAC,CACF,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,IAAI;IACjB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,YAAY,EAAE,CAAC;QACpC,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;QAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;QAEhC,MAAM,CAAC,IAAI,CAAC,+CAA+C,CAAC,CAAC;IAC/D,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,KAAK,CAAC,iCAAiC,EAAE;YAC9C,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;SAC9D,CAAC,CAAC;QACH,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,SAAS;AACT,OAAO,CAAC,EAAE,CAAC,mBAAmB,EAAE,CAAC,KAAK,EAAE,EAAE;IACxC,MAAM,CAAC,KAAK,CAAC,oBAAoB,EAAE,EAAE,KAAK,EAAE,KAAK,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;IACjF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC;AAEH,OAAO,CAAC,EAAE,CAAC,oBAAoB,EAAE,CAAC,MAAM,EAAE,EAAE;IAC1C,MAAM,CAAC,KAAK,CAAC,qBAAqB,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC;IAChD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC;AAEH,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,GAAG,EAAE;IACxB,MAAM,CAAC,IAAI,CAAC,2CAA2C,CAAC,CAAC;IACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC;AAEH,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,GAAG,EAAE;IACzB,MAAM,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;IAC1D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC;AAEH,IAAI,EAAE,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA;;;GAGG;AAEH,wCAAwC;AACxC,OAAO,EAAE,uBAAuB,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AACpE,uBAAuB,EAAE,CAAC;AAE1B,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAC1E,OAAO,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAC;AACnD,OAAO,EAAE,SAAS,EAAE,qBAAqB,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAE3F;;GAEG;AACH,KAAK,UAAU,YAAY;IACzB,MAAM,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;IAE5C,OAAO;IACP,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;IAE5B,cAAc;IACd,MAAM,YAAY,GAAiB,MAAM,CAAC,QAAQ,KAAK,aAAa;QAClE,CAAC,CAAC,IAAI,iBAAiB,CAAC,MAAM,CAAC;QAC/B,CAAC,CAAC,IAAI,WAAW,CAAC,MAAM,CAAC,CAAC;IAE5B,MAAM,CAAC,IAAI,CAAC,2BAA2B,EAAE;QACvC,QAAQ,EAAE,MAAM,CAAC,QAAQ;QACzB,KAAK,EAAE,YAAY,CAAC,YAAY,EAAE;KACnC,CAAC,CAAC;IAEH,uBAAuB;IACvB,MAAM,MAAM,GAAG,IAAI,SAAS,CAC1B;QACE,IAAI,EAAE,UAAU;QAChB,OAAO,EAAE,OAAO;KACjB,EACD;QACE,YAAY,EAAE;YACZ,KAAK,EAAE,EAAE;SACV;KACF,CACF,CAAC;IAEF,aAAa;IACb,MAAM,gBAAgB,GAAG,SAAS,CAChC,KAAK,EAAE,WAAmB,EAAE,MAAc,EAAE,EAAE;QAC5C,YAAY;QACZ,MAAM,mBAAmB,CAAC,WAAW,CAAC,CAAC;QAEvC,oBAAoB;QACpB,MAAM,YAAY,GAAG,MAAM,aAAa,CAAC,WAAW,CAAC,CAAC;QAEtD,WAAW;QACX,qCAAqC;QACrC,MAAM,UAAU,GAAG,MAAM,CAAC,QAAQ,KAAK,aAAa;YAClD,CAAC,CAAC,MAAM,CAAE,8BAA8B;YACxC,CAAC,CAAC,mBAAmB,CAAC,MAAM,CAAC,CAAC,CAAE,yBAAyB;QAE3D,gBAAgB;QAChB,OAAO,MAAM,YAAY,CAAC,YAAY,CAAC,YAAY,EAAE,UAAU,CAAC,CAAC;IACnE,CAAC,EACD,CAAC,EAAE,SAAS;IACZ,IAAI,CAAC,SAAS;KACf,CAAC;IAEF,iCAAiC;IACjC,MAAM,CAAC,IAAI,CACT,eAAe,EACf;;;;;;;;;;;;;uCAamC,EACnC;QACE,YAAY,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,kPAAkP,CAAC;QACrR,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,8HAA8H,CAAC;KAC5J,EACD,KAAK,EAAE,MAAM,EAAE,EAAE;QACf,IAAI,CAAC;YACH,MAAM,CAAC,IAAI,CAAC,iBAAiB,EAAE;gBAC7B,MAAM,EAAE,MAAM,CAAC,YAAY;gBAC3B,MAAM,EAAE,MAAM,CAAC,MAAM;aACtB,CAAC,CAAC;YAEH,YAAY;YACZ,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;YAE1E,MAAM,CAAC,IAAI,CAAC,uCAAuC,CAAC,CAAC;YACrD,OAAO,qBAAqB,CAAC,MAAM,CAAC,CAAC;QACvC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,uBAAuB,EAAE;gBACpC,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC,CAAC;YAEH,OAAO,mBAAmB,CACxB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CACzD,CAAC;QACJ,CAAC;IACH,CAAC,CACF,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,IAAI;IACjB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,YAAY,EAAE,CAAC;QACpC,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;QAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;QAEhC,MAAM,CAAC,IAAI,CAAC,+CAA+C,CAAC,CAAC;IAC/D,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,KAAK,CAAC,iCAAiC,EAAE;YAC9C,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;SAC9D,CAAC,CAAC;QACH,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,SAAS;AACT,OAAO,CAAC,EAAE,CAAC,mBAAmB,EAAE,CAAC,KAAK,EAAE,EAAE;IACxC,MAAM,CAAC,KAAK,CAAC,oBAAoB,EAAE,EAAE,KAAK,EAAE,KAAK,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;IACjF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC;AAEH,OAAO,CAAC,EAAE,CAAC,oBAAoB,EAAE,CAAC,MAAM,EAAE,EAAE;IAC1C,MAAM,CAAC,KAAK,CAAC,qBAAqB,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC;IAChD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC;AAEH,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,GAAG,EAAE;IACxB,MAAM,CAAC,IAAI,CAAC,2CAA2C,CAAC,CAAC;IACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC;AAEH,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,GAAG,EAAE;IACzB,MAAM,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;IAC1D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC;AAEH,IAAI,EAAE,CAAC"}
@@ -0,0 +1,23 @@
1
+ /**
2
+ * 硅基流动 DeepSeek-OCR API 客户端
3
+ * 基于 OpenAI 兼容 API
4
+ */
5
+ import type { LumaConfig } from './config.js';
6
+ import type { VisionClient } from './vision-client.js';
7
+ /**
8
+ * 硅基流动 API 客户端
9
+ */
10
+ export declare class SiliconFlowClient implements VisionClient {
11
+ private config;
12
+ private apiEndpoint;
13
+ constructor(config: LumaConfig);
14
+ /**
15
+ * 分析图片
16
+ */
17
+ analyzeImage(imageDataUrl: string, prompt: string, enableThinking?: boolean): Promise<string>;
18
+ /**
19
+ * 获取模型名称
20
+ */
21
+ getModelName(): string;
22
+ }
23
+ //# sourceMappingURL=siliconflow-client.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"siliconflow-client.d.ts","sourceRoot":"","sources":["../src/siliconflow-client.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AA2CvD;;GAEG;AACH,qBAAa,iBAAkB,YAAW,YAAY;IACpD,OAAO,CAAC,MAAM,CAAa;IAC3B,OAAO,CAAC,WAAW,CAAoD;gBAE3D,MAAM,EAAE,UAAU;IAI9B;;OAEG;IACG,YAAY,CAAC,YAAY,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC;IAsEnG;;OAEG;IACH,YAAY,IAAI,MAAM;CAGvB"}
@@ -0,0 +1,85 @@
1
+ /**
2
+ * 硅基流动 DeepSeek-OCR API 客户端
3
+ * 基于 OpenAI 兼容 API
4
+ */
5
+ import axios from 'axios';
6
+ import { logger } from './utils/logger.js';
7
+ /**
8
+ * 硅基流动 API 客户端
9
+ */
10
+ export class SiliconFlowClient {
11
+ config;
12
+ apiEndpoint = 'https://api.siliconflow.cn/v1/chat/completions';
13
+ constructor(config) {
14
+ this.config = config;
15
+ }
16
+ /**
17
+ * 分析图片
18
+ */
19
+ async analyzeImage(imageDataUrl, prompt, enableThinking) {
20
+ const requestBody = {
21
+ model: this.config.model,
22
+ messages: [
23
+ {
24
+ role: 'user',
25
+ content: [
26
+ {
27
+ type: 'image_url',
28
+ image_url: {
29
+ url: imageDataUrl,
30
+ },
31
+ },
32
+ {
33
+ type: 'text',
34
+ text: prompt,
35
+ },
36
+ ],
37
+ },
38
+ ],
39
+ temperature: this.config.temperature,
40
+ max_tokens: this.config.maxTokens,
41
+ top_p: this.config.topP,
42
+ stream: false,
43
+ };
44
+ logger.info('Calling SiliconFlow DeepSeek-OCR API', {
45
+ model: this.config.model,
46
+ });
47
+ try {
48
+ const response = await axios.post(this.apiEndpoint, requestBody, {
49
+ headers: {
50
+ 'Authorization': `Bearer ${this.config.apiKey}`,
51
+ 'Content-Type': 'application/json',
52
+ },
53
+ timeout: 60000, // 60秒超时
54
+ });
55
+ if (!response.data.choices || response.data.choices.length === 0) {
56
+ throw new Error('No response from DeepSeek-OCR');
57
+ }
58
+ const result = response.data.choices[0].message.content;
59
+ const usage = response.data.usage;
60
+ logger.info('SiliconFlow API call successful', {
61
+ tokens: usage?.total_tokens || 0,
62
+ model: response.data.model
63
+ });
64
+ return result;
65
+ }
66
+ catch (error) {
67
+ logger.error('SiliconFlow API call failed', {
68
+ error: error instanceof Error ? error.message : String(error)
69
+ });
70
+ if (axios.isAxiosError(error)) {
71
+ const message = error.response?.data?.error?.message || error.message;
72
+ const status = error.response?.status;
73
+ throw new Error(`SiliconFlow API error (${status || 'unknown'}): ${message}`);
74
+ }
75
+ throw error;
76
+ }
77
+ }
78
+ /**
79
+ * 获取模型名称
80
+ */
81
+ getModelName() {
82
+ return this.config.model;
83
+ }
84
+ }
85
+ //# sourceMappingURL=siliconflow-client.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"siliconflow-client.js","sourceRoot":"","sources":["../src/siliconflow-client.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,MAAM,OAAO,CAAC;AAG1B,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AA0C3C;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACpB,MAAM,CAAa;IACnB,WAAW,GAAG,gDAAgD,CAAC;IAEvE,YAAY,MAAkB;QAC5B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,YAAY,CAAC,YAAoB,EAAE,MAAc,EAAE,cAAwB;QAC/E,MAAM,WAAW,GAAuB;YACtC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK;YACxB,QAAQ,EAAE;gBACR;oBACE,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE;wBACP;4BACE,IAAI,EAAE,WAAW;4BACjB,SAAS,EAAE;gCACT,GAAG,EAAE,YAAY;6BAClB;yBACF;wBACD;4BACE,IAAI,EAAE,MAAM;4BACZ,IAAI,EAAE,MAAM;yBACb;qBACF;iBACF;aACF;YACD,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW;YACpC,UAAU,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;YACjC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,IAAI;YACvB,MAAM,EAAE,KAAK;SACd,CAAC;QAEF,MAAM,CAAC,IAAI,CAAC,sCAAsC,EAAE;YAClD,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK;SACzB,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,IAAI,CAAC,WAAW,EAChB,WAAW,EACX;gBACE,OAAO,EAAE;oBACP,eAAe,EAAE,UAAU,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE;oBAC/C,cAAc,EAAE,kBAAkB;iBACnC;gBACD,OAAO,EAAE,KAAK,EAAE,QAAQ;aACzB,CACF,CAAC;YAEF,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACjE,MAAM,IAAI,KAAK,CAAC,+BAA+B,CAAC,CAAC;YACnD,CAAC;YAED,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC;YACxD,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC;YAElC,MAAM,CAAC,IAAI,CAAC,iCAAiC,EAAE;gBAC7C,MAAM,EAAE,KAAK,EAAE,YAAY,IAAI,CAAC;gBAChC,KAAK,EAAE,QAAQ,CAAC,IAAI,CAAC,KAAK;aAC3B,CAAC,CAAC;YAEH,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,6BAA6B,EAAE;gBAC1C,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC,CAAC;YAEH,IAAI,KAAK,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC9B,MAAM,OAAO,GAAG,KAAK,CAAC,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC;gBACtE,MAAM,MAAM,GAAG,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC;gBACtC,MAAM,IAAI,KAAK,CAAC,0BAA0B,MAAM,IAAI,SAAS,MAAM,OAAO,EAAE,CAAC,CAAC;YAChF,CAAC;YACD,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC;IAC3B,CAAC;CACF"}
@@ -0,0 +1,18 @@
1
+ /**
2
+ * 视觉模型客户端统一接口
3
+ */
4
+ export interface VisionClient {
5
+ /**
6
+ * 分析图片
7
+ * @param imageDataUrl 图片 Data URL 或 URL
8
+ * @param prompt 分析提示词
9
+ * @param enableThinking 是否启用思考模式(如果模型支持)
10
+ * @returns 分析结果文本
11
+ */
12
+ analyzeImage(imageDataUrl: string, prompt: string, enableThinking?: boolean): Promise<string>;
13
+ /**
14
+ * 获取模型名称
15
+ */
16
+ getModelName(): string;
17
+ }
18
+ //# sourceMappingURL=vision-client.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vision-client.d.ts","sourceRoot":"","sources":["../src/vision-client.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,YAAY;IAC3B;;;;;;OAMG;IACH,YAAY,CAAC,YAAY,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IAE9F;;OAEG;IACH,YAAY,IAAI,MAAM,CAAC;CACxB"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * 视觉模型客户端统一接口
3
+ */
4
+ export {};
5
+ //# sourceMappingURL=vision-client.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vision-client.js","sourceRoot":"","sources":["../src/vision-client.ts"],"names":[],"mappings":"AAAA;;GAEG"}
@@ -2,10 +2,11 @@
2
2
  * 智谱 GLM-4.5V API 客户端
3
3
  */
4
4
  import type { LumaConfig } from './config.js';
5
+ import type { VisionClient } from './vision-client.js';
5
6
  /**
6
7
  * 智谱 API 客户端
7
8
  */
8
- export declare class ZhipuClient {
9
+ export declare class ZhipuClient implements VisionClient {
9
10
  private config;
10
11
  private apiEndpoint;
11
12
  constructor(config: LumaConfig);
@@ -13,5 +14,9 @@ export declare class ZhipuClient {
13
14
  * 分析图片
14
15
  */
15
16
  analyzeImage(imageDataUrl: string, prompt: string, enableThinking?: boolean): Promise<string>;
17
+ /**
18
+ * 获取模型名称
19
+ */
20
+ getModelName(): string;
16
21
  }
17
22
  //# sourceMappingURL=zhipu-client.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"zhipu-client.d.ts","sourceRoot":"","sources":["../src/zhipu-client.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AA4C9C;;GAEG;AACH,qBAAa,WAAW;IACtB,OAAO,CAAC,MAAM,CAAa;IAC3B,OAAO,CAAC,WAAW,CAA2D;gBAElE,MAAM,EAAE,UAAU;IAI9B;;OAEG;IACG,YAAY,CAAC,YAAY,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC;CA2EpG"}
1
+ {"version":3,"file":"zhipu-client.d.ts","sourceRoot":"","sources":["../src/zhipu-client.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AA4CvD;;GAEG;AACH,qBAAa,WAAY,YAAW,YAAY;IAC9C,OAAO,CAAC,MAAM,CAAa;IAC3B,OAAO,CAAC,WAAW,CAA2D;gBAElE,MAAM,EAAE,UAAU;IAI9B;;OAEG;IACG,YAAY,CAAC,YAAY,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC;IA4EnG;;OAEG;IACH,YAAY,IAAI,MAAM;CAGvB"}
@@ -79,5 +79,11 @@ export class ZhipuClient {
79
79
  throw error;
80
80
  }
81
81
  }
82
+ /**
83
+ * 获取模型名称
84
+ */
85
+ getModelName() {
86
+ return this.config.model;
87
+ }
82
88
  }
83
89
  //# sourceMappingURL=zhipu-client.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"zhipu-client.js","sourceRoot":"","sources":["../src/zhipu-client.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AA2C3C;;GAEG;AACH,MAAM,OAAO,WAAW;IACd,MAAM,CAAa;IACnB,WAAW,GAAG,uDAAuD,CAAC;IAE9E,YAAY,MAAkB;QAC5B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,YAAY,CAAC,YAAoB,EAAE,MAAc,EAAE,cAAwB;QAC/E,MAAM,WAAW,GAAiB;YAChC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK;YACxB,QAAQ,EAAE;gBACR;oBACE,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE;wBACP;4BACE,IAAI,EAAE,WAAW;4BACjB,SAAS,EAAE;gCACT,GAAG,EAAE,YAAY;6BAClB;yBACF;wBACD;4BACE,IAAI,EAAE,MAAM;4BACZ,IAAI,EAAE,MAAM;yBACb;qBACF;iBACF;aACF;YACD,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW;YACpC,UAAU,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;YACjC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,IAAI;YACvB,QAAQ,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,EAAE,mBAAmB;SACnD,CAAC;QAEF,2BAA2B;QAC3B,IAAI,IAAI,CAAC,MAAM,CAAC,cAAc,KAAK,KAAK,IAAI,cAAc,KAAK,KAAK,EAAE,CAAC;YACrE,OAAO,WAAW,CAAC,QAAQ,CAAC;QAC9B,CAAC;QAED,MAAM,CAAC,IAAI,CAAC,sBAAsB,EAAE;YAClC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK;YACxB,QAAQ,EAAE,CAAC,CAAC,WAAW,CAAC,QAAQ;SACjC,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,IAAI,CAAC,WAAW,EAChB,WAAW,EACX;gBACE,OAAO,EAAE;oBACP,eAAe,EAAE,UAAU,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE;oBAC/C,cAAc,EAAE,kBAAkB;iBACnC;gBACD,OAAO,EAAE,KAAK,EAAE,QAAQ;aACzB,CACF,CAAC;YAEF,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACjE,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;YAC/C,CAAC;YAED,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC;YACxD,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC;YAElC,MAAM,CAAC,IAAI,CAAC,8BAA8B,EAAE;gBAC1C,MAAM,EAAE,KAAK,EAAE,YAAY,IAAI,CAAC;gBAChC,KAAK,EAAE,QAAQ,CAAC,IAAI,CAAC,KAAK;aAC3B,CAAC,CAAC;YAEH,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,0BAA0B,EAAE;gBACvC,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC,CAAC;YAEH,IAAI,KAAK,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC9B,MAAM,OAAO,GAAG,KAAK,CAAC,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC;gBACtE,MAAM,MAAM,GAAG,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC;gBACtC,MAAM,IAAI,KAAK,CAAC,uBAAuB,MAAM,IAAI,SAAS,MAAM,OAAO,EAAE,CAAC,CAAC;YAC7E,CAAC;YACD,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;CACF"}
1
+ {"version":3,"file":"zhipu-client.js","sourceRoot":"","sources":["../src/zhipu-client.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,MAAM,OAAO,CAAC;AAG1B,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AA2C3C;;GAEG;AACH,MAAM,OAAO,WAAW;IACd,MAAM,CAAa;IACnB,WAAW,GAAG,uDAAuD,CAAC;IAE9E,YAAY,MAAkB;QAC5B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,YAAY,CAAC,YAAoB,EAAE,MAAc,EAAE,cAAwB;QAC/E,MAAM,WAAW,GAAiB;YAChC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK;YACxB,QAAQ,EAAE;gBACR;oBACE,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE;wBACP;4BACE,IAAI,EAAE,WAAW;4BACjB,SAAS,EAAE;gCACT,GAAG,EAAE,YAAY;6BAClB;yBACF;wBACD;4BACE,IAAI,EAAE,MAAM;4BACZ,IAAI,EAAE,MAAM;yBACb;qBACF;iBACF;aACF;YACD,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW;YACpC,UAAU,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;YACjC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,IAAI;YACvB,QAAQ,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,EAAE,mBAAmB;SACnD,CAAC;QAEF,2BAA2B;QAC3B,IAAI,IAAI,CAAC,MAAM,CAAC,cAAc,KAAK,KAAK,IAAI,cAAc,KAAK,KAAK,EAAE,CAAC;YACrE,OAAO,WAAW,CAAC,QAAQ,CAAC;QAC9B,CAAC;QAED,MAAM,CAAC,IAAI,CAAC,sBAAsB,EAAE;YAClC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK;YACxB,QAAQ,EAAE,CAAC,CAAC,WAAW,CAAC,QAAQ;SACjC,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,IAAI,CAAC,WAAW,EAChB,WAAW,EACX;gBACE,OAAO,EAAE;oBACP,eAAe,EAAE,UAAU,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE;oBAC/C,cAAc,EAAE,kBAAkB;iBACnC;gBACD,OAAO,EAAE,KAAK,EAAE,QAAQ;aACzB,CACF,CAAC;YAEF,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACjE,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;YAC/C,CAAC;YAED,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC;YACxD,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC;YAElC,MAAM,CAAC,IAAI,CAAC,8BAA8B,EAAE;gBAC1C,MAAM,EAAE,KAAK,EAAE,YAAY,IAAI,CAAC;gBAChC,KAAK,EAAE,QAAQ,CAAC,IAAI,CAAC,KAAK;aAC3B,CAAC,CAAC;YAEH,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,0BAA0B,EAAE;gBACvC,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC,CAAC;YAEH,IAAI,KAAK,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC9B,MAAM,OAAO,GAAG,KAAK,CAAC,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC;gBACtE,MAAM,MAAM,GAAG,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC;gBACtC,MAAM,IAAI,KAAK,CAAC,uBAAuB,MAAM,IAAI,SAAS,MAAM,OAAO,EAAE,CAAC,CAAC;YAC7E,CAAC;YACD,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC;IAC3B,CAAC;CACF"}
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "luma-mcp",
3
- "version": "1.0.3",
4
- "description": "A vision understanding MCP server powered by GLM-4.5V",
3
+ "version": "1.1.1",
4
+ "description": "Multi-model vision understanding MCP server. Supports GLM-4.5V (Zhipu) and DeepSeek-OCR (SiliconFlow - Free)",
5
5
  "type": "module",
6
6
  "bin": {
7
7
  "luma-mcp": "build/index.js"
@@ -19,7 +19,12 @@
19
19
  "ai",
20
20
  "glm-4.5v",
21
21
  "zhipu",
22
- "image-understanding"
22
+ "deepseek-ocr",
23
+ "siliconflow",
24
+ "ocr",
25
+ "free",
26
+ "image-understanding",
27
+ "multi-model"
23
28
  ],
24
29
  "author": "Jochen",
25
30
  "license": "MIT",
@@ -0,0 +1,64 @@
1
+ /**
2
+ * 测试 Data URI 支持
3
+ */
4
+
5
+ import { validateImageSource, imageToBase64 } from '../src/image-processor.js';
6
+
7
+ // 一个有效的 1x1 像素 PNG 图片的 Data URI
8
+ const validDataUri = '';
9
+
10
+ // 无效的 Data URI(不支持的格式)
11
+ const invalidDataUri = '';
12
+
13
+ async function testDataUri() {
14
+ console.log('🧪 测试 Data URI 支持\n');
15
+
16
+ // 测试 1: 验证有效的 Data URI
17
+ try {
18
+ console.log('测试 1: 验证有效的 PNG Data URI');
19
+ await validateImageSource(validDataUri);
20
+ console.log('✅ 通过:有效的 Data URI 验证成功\n');
21
+ } catch (error) {
22
+ console.log(`❌ 失败: ${error instanceof Error ? error.message : String(error)}\n`);
23
+ }
24
+
25
+ // 测试 2: 验证无效的 Data URI(不支持的格式)
26
+ try {
27
+ console.log('测试 2: 验证不支持的格式 (SVG)');
28
+ await validateImageSource(invalidDataUri);
29
+ console.log('❌ 失败:应该抛出错误\n');
30
+ } catch (error) {
31
+ console.log(`✅ 通过:正确拒绝不支持的格式 - ${error instanceof Error ? error.message : String(error)}\n`);
32
+ }
33
+
34
+ // 测试 3: Data URI 转换(应该直接返回)
35
+ try {
36
+ console.log('测试 3: Data URI 转换');
37
+ const result = await imageToBase64(validDataUri);
38
+ if (result === validDataUri) {
39
+ console.log('✅ 通过:Data URI 正确传递(未修改)\n');
40
+ } else {
41
+ console.log('❌ 失败:Data URI 被修改了\n');
42
+ }
43
+ } catch (error) {
44
+ console.log(`❌ 失败: ${error instanceof Error ? error.message : String(error)}\n`);
45
+ }
46
+
47
+ // 测试 4: 大小验证(创建一个超过10MB的Data URI)
48
+ try {
49
+ console.log('测试 4: 验证大小限制 (>10MB)');
50
+ // 创建一个约 15MB 的 base64 字符串(20MB * 3/4 = 15MB)
51
+ const largeBase64 = 'A'.repeat(20 * 1024 * 1024);
52
+ const largeDataUri = `data:image/png;base64,${largeBase64}`;
53
+ await validateImageSource(largeDataUri);
54
+ console.log('❌ 失败:应该拒绝过大的文件\n');
55
+ } catch (error) {
56
+ console.log(`✅ 通过:正确拒绝超大文件 - ${error instanceof Error ? error.message : String(error)}\n`);
57
+ }
58
+
59
+ console.log('==========================================');
60
+ console.log('✅ Data URI 测试完成!');
61
+ console.log('==========================================\n');
62
+ }
63
+
64
+ testDataUri().catch(console.error);
@@ -0,0 +1,94 @@
1
+ /**
2
+ * 直接测试 DeepSeek-OCR API(无任何包装)
3
+ */
4
+
5
+ import axios from 'axios';
6
+ import * as fs from 'fs';
7
+ import * as path from 'path';
8
+
9
+ async function testDeepSeekOCR(imagePath: string) {
10
+ console.log('\n🧪 测试 DeepSeek-OCR API(原始调用)\n');
11
+
12
+ const apiKey = 'sk-skrldwndjawxvzzomztwmoinnwmvumezqyejysqutjwkjcdt';
13
+
14
+ // 读取图片并转为 base64
15
+ const imageBuffer = fs.readFileSync(imagePath);
16
+ const base64Image = imageBuffer.toString('base64');
17
+ const mimeType = imagePath.endsWith('.png') ? 'image/png' : 'image/jpeg';
18
+ const imageDataUrl = `data:${mimeType};base64,${base64Image}`;
19
+
20
+ console.log(`📸 图片: ${imagePath}`);
21
+ console.log(`📦 大小: ${(imageBuffer.length / 1024).toFixed(2)} KB\n`);
22
+
23
+ // 测试不同的 prompt
24
+ const prompts = [
25
+ '识别图片中的所有文字',
26
+ 'OCR',
27
+ 'Extract all text from this image',
28
+ 'What do you see in this image?',
29
+ '请详细描述这张图片'
30
+ ];
31
+
32
+ for (const prompt of prompts) {
33
+ console.log(`\n🔍 测试 Prompt: "${prompt}"`);
34
+ console.log('─'.repeat(50));
35
+
36
+ try {
37
+ const response = await axios.post(
38
+ 'https://api.siliconflow.cn/v1/chat/completions',
39
+ {
40
+ model: 'deepseek-ai/DeepSeek-OCR',
41
+ messages: [
42
+ {
43
+ role: 'user',
44
+ content: [
45
+ {
46
+ type: 'image_url',
47
+ image_url: {
48
+ url: imageDataUrl,
49
+ },
50
+ },
51
+ {
52
+ type: 'text',
53
+ text: prompt,
54
+ },
55
+ ],
56
+ },
57
+ ],
58
+ temperature: 0.7,
59
+ max_tokens: 4096,
60
+ },
61
+ {
62
+ headers: {
63
+ 'Authorization': `Bearer ${apiKey}`,
64
+ 'Content-Type': 'application/json',
65
+ },
66
+ timeout: 60000,
67
+ }
68
+ );
69
+
70
+ const result = response.data.choices[0].message.content;
71
+ const usage = response.data.usage;
72
+
73
+ console.log(`✅ Tokens: ${usage.total_tokens} (prompt: ${usage.prompt_tokens}, completion: ${usage.completion_tokens})`);
74
+ console.log(`📝 响应长度: ${result?.length || 0} 字符`);
75
+
76
+ if (result && result.trim().length > 0) {
77
+ console.log('\n📊 结果:');
78
+ console.log('─'.repeat(50));
79
+ console.log(result);
80
+ console.log('─'.repeat(50));
81
+ console.log('\n✅ 找到有效响应!');
82
+ break;
83
+ } else {
84
+ console.log('❌ 空响应');
85
+ }
86
+ } catch (error: any) {
87
+ console.log(`❌ 错误: ${error.message}`);
88
+ }
89
+ }
90
+ }
91
+
92
+ // 运行测试
93
+ const imagePath = path.join(process.cwd(), 'test.png');
94
+ testDeepSeekOCR(imagePath).catch(console.error);
@@ -4,7 +4,9 @@
4
4
  */
5
5
 
6
6
  import { loadConfig } from '../src/config.js';
7
+ import type { VisionClient } from '../src/vision-client.js';
7
8
  import { ZhipuClient } from '../src/zhipu-client.js';
9
+ import { SiliconFlowClient } from '../src/siliconflow-client.js';
8
10
  import { imageToBase64, validateImageSource } from '../src/image-processor.js';
9
11
  import { buildAnalysisPrompt } from '../src/prompts.js';
10
12
  import { logger } from '../src/utils/logger.js';
@@ -18,7 +20,7 @@ async function testImageAnalysis(imagePath: string, question?: string) {
18
20
  // 1. 加载配置
19
21
  console.log('📝 加载配置...');
20
22
  const config = loadConfig();
21
- console.log(`✅ 配置加载成功: 模型 ${config.model}\n`);
23
+ console.log(`✅ 配置加载成功: 提供商 ${config.provider}, 模型 ${config.model}\n`);
22
24
 
23
25
  // 2. 验证图片
24
26
  console.log('🔍 验证图片来源...');
@@ -33,12 +35,19 @@ async function testImageAnalysis(imagePath: string, question?: string) {
33
35
 
34
36
  // 4. 构建提示词
35
37
  console.log('💬 构建提示词...');
36
- const prompt = buildAnalysisPrompt(question);
38
+ // DeepSeek-OCR 需要简洁 prompt
39
+ const prompt = config.provider === 'siliconflow'
40
+ ? (question || '请详细分析这张图片的内容')
41
+ : buildAnalysisPrompt(question);
37
42
  console.log(`✅ 提示词: ${question || '通用描述'}\n`);
38
43
 
39
- // 5. 调用API
40
- console.log('🤖 调用 GLM-4.5V API...');
41
- const client = new ZhipuClient(config);
44
+ // 5. 创建客户端并调用API
45
+ const client: VisionClient = config.provider === 'siliconflow'
46
+ ? new SiliconFlowClient(config)
47
+ : new ZhipuClient(config);
48
+
49
+ const modelName = config.provider === 'siliconflow' ? 'DeepSeek-OCR' : 'GLM-4.5V';
50
+ console.log(`🤖 调用 ${modelName} API...`);
42
51
  const result = await client.analyzeImage(imageDataUrl, prompt);
43
52
 
44
53
  // 6. 显示结果
@@ -76,8 +85,12 @@ if (args.length === 0) {
76
85
  npm run test:local https://example.com/image.jpg
77
86
 
78
87
  环境变量:
79
- ZHIPU_API_KEY=your-api-key # 必需
80
- ZHIPU_MODEL=glm-4.5v # 可选
88
+ # 使用智谱 GLM-4.5V
89
+ ZHIPU_API_KEY=your-api-key
90
+
91
+ # 使用硅基流动 DeepSeek-OCR
92
+ MODEL_PROVIDER=siliconflow
93
+ SILICONFLOW_API_KEY=your-api-key
81
94
  `);
82
95
  process.exit(1);
83
96
  }