@lutery/vision-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/README.md +428 -0
  2. package/dist/adapters/base-adapter.d.ts +69 -0
  3. package/dist/adapters/base-adapter.d.ts.map +1 -0
  4. package/dist/adapters/base-adapter.js +143 -0
  5. package/dist/adapters/base-adapter.js.map +1 -0
  6. package/dist/adapters/claude-adapter.d.ts +38 -0
  7. package/dist/adapters/claude-adapter.d.ts.map +1 -0
  8. package/dist/adapters/claude-adapter.js +251 -0
  9. package/dist/adapters/claude-adapter.js.map +1 -0
  10. package/dist/adapters/glm-adapter.d.ts +15 -0
  11. package/dist/adapters/glm-adapter.d.ts.map +1 -0
  12. package/dist/adapters/glm-adapter.js +131 -0
  13. package/dist/adapters/glm-adapter.js.map +1 -0
  14. package/dist/adapters/modelscope-adapter.d.ts +20 -0
  15. package/dist/adapters/modelscope-adapter.d.ts.map +1 -0
  16. package/dist/adapters/modelscope-adapter.js +142 -0
  17. package/dist/adapters/modelscope-adapter.js.map +1 -0
  18. package/dist/adapters/openai-adapter.d.ts +20 -0
  19. package/dist/adapters/openai-adapter.d.ts.map +1 -0
  20. package/dist/adapters/openai-adapter.js +194 -0
  21. package/dist/adapters/openai-adapter.js.map +1 -0
  22. package/dist/adapters/siliconflow-adapter.d.ts +21 -0
  23. package/dist/adapters/siliconflow-adapter.d.ts.map +1 -0
  24. package/dist/adapters/siliconflow-adapter.js +145 -0
  25. package/dist/adapters/siliconflow-adapter.js.map +1 -0
  26. package/dist/config/model-config.d.ts +39 -0
  27. package/dist/config/model-config.d.ts.map +1 -0
  28. package/dist/config/model-config.js +115 -0
  29. package/dist/config/model-config.js.map +1 -0
  30. package/dist/index.d.ts +17 -0
  31. package/dist/index.d.ts.map +1 -0
  32. package/dist/index.js +186 -0
  33. package/dist/index.js.map +1 -0
  34. package/dist/prompts/system.d.ts +75 -0
  35. package/dist/prompts/system.d.ts.map +1 -0
  36. package/dist/prompts/system.js +272 -0
  37. package/dist/prompts/system.js.map +1 -0
  38. package/dist/providers/provider-registry.d.ts +58 -0
  39. package/dist/providers/provider-registry.d.ts.map +1 -0
  40. package/dist/providers/provider-registry.js +173 -0
  41. package/dist/providers/provider-registry.js.map +1 -0
  42. package/dist/src/adapters/base-adapter.d.ts +59 -0
  43. package/dist/src/adapters/base-adapter.d.ts.map +1 -0
  44. package/dist/src/adapters/base-adapter.js +83 -0
  45. package/dist/src/adapters/base-adapter.js.map +1 -0
  46. package/dist/src/adapters/glm-adapter.d.ts +15 -0
  47. package/dist/src/adapters/glm-adapter.d.ts.map +1 -0
  48. package/dist/src/adapters/glm-adapter.js +116 -0
  49. package/dist/src/adapters/glm-adapter.js.map +1 -0
  50. package/dist/src/adapters/siliconflow-adapter.d.ts +21 -0
  51. package/dist/src/adapters/siliconflow-adapter.d.ts.map +1 -0
  52. package/dist/src/adapters/siliconflow-adapter.js +130 -0
  53. package/dist/src/adapters/siliconflow-adapter.js.map +1 -0
  54. package/dist/src/config/model-config.d.ts +40 -0
  55. package/dist/src/config/model-config.d.ts.map +1 -0
  56. package/dist/src/config/model-config.js +126 -0
  57. package/dist/src/config/model-config.js.map +1 -0
  58. package/dist/src/index.d.ts +17 -0
  59. package/dist/src/index.d.ts.map +1 -0
  60. package/dist/src/index.js +188 -0
  61. package/dist/src/index.js.map +1 -0
  62. package/dist/src/prompts/system.d.ts +75 -0
  63. package/dist/src/prompts/system.d.ts.map +1 -0
  64. package/dist/src/prompts/system.js +272 -0
  65. package/dist/src/prompts/system.js.map +1 -0
  66. package/dist/src/tools/vision-tool.d.ts +91 -0
  67. package/dist/src/tools/vision-tool.d.ts.map +1 -0
  68. package/dist/src/tools/vision-tool.js +171 -0
  69. package/dist/src/tools/vision-tool.js.map +1 -0
  70. package/dist/src/utils/errors.d.ts +65 -0
  71. package/dist/src/utils/errors.d.ts.map +1 -0
  72. package/dist/src/utils/errors.js +146 -0
  73. package/dist/src/utils/errors.js.map +1 -0
  74. package/dist/src/utils/image-input.d.ts +45 -0
  75. package/dist/src/utils/image-input.d.ts.map +1 -0
  76. package/dist/src/utils/image-input.js +226 -0
  77. package/dist/src/utils/image-input.js.map +1 -0
  78. package/dist/src/utils/logger.d.ts +63 -0
  79. package/dist/src/utils/logger.d.ts.map +1 -0
  80. package/dist/src/utils/logger.js +157 -0
  81. package/dist/src/utils/logger.js.map +1 -0
  82. package/dist/test/integration.test.d.ts +10 -0
  83. package/dist/test/integration.test.d.ts.map +1 -0
  84. package/dist/test/integration.test.js +270 -0
  85. package/dist/test/integration.test.js.map +1 -0
  86. package/dist/test/test-utils.d.ts +45 -0
  87. package/dist/test/test-utils.d.ts.map +1 -0
  88. package/dist/test/test-utils.js +107 -0
  89. package/dist/test/test-utils.js.map +1 -0
  90. package/dist/test/vision-tool.test.d.ts +9 -0
  91. package/dist/test/vision-tool.test.d.ts.map +1 -0
  92. package/dist/test/vision-tool.test.js +167 -0
  93. package/dist/test/vision-tool.test.js.map +1 -0
  94. package/dist/tools/vision-tool.d.ts +91 -0
  95. package/dist/tools/vision-tool.d.ts.map +1 -0
  96. package/dist/tools/vision-tool.js +167 -0
  97. package/dist/tools/vision-tool.js.map +1 -0
  98. package/dist/utils/data-url-parser.d.ts +27 -0
  99. package/dist/utils/data-url-parser.d.ts.map +1 -0
  100. package/dist/utils/data-url-parser.js +53 -0
  101. package/dist/utils/data-url-parser.js.map +1 -0
  102. package/dist/utils/errors.d.ts +65 -0
  103. package/dist/utils/errors.d.ts.map +1 -0
  104. package/dist/utils/errors.js +146 -0
  105. package/dist/utils/errors.js.map +1 -0
  106. package/dist/utils/image-input.d.ts +45 -0
  107. package/dist/utils/image-input.d.ts.map +1 -0
  108. package/dist/utils/image-input.js +238 -0
  109. package/dist/utils/image-input.js.map +1 -0
  110. package/dist/utils/logger.d.ts +63 -0
  111. package/dist/utils/logger.d.ts.map +1 -0
  112. package/dist/utils/logger.js +157 -0
  113. package/dist/utils/logger.js.map +1 -0
  114. package/dist/utils/thinking-extractors.d.ts +34 -0
  115. package/dist/utils/thinking-extractors.d.ts.map +1 -0
  116. package/dist/utils/thinking-extractors.js +83 -0
  117. package/dist/utils/thinking-extractors.js.map +1 -0
  118. package/dist/utils/thinking-filter.d.ts +32 -0
  119. package/dist/utils/thinking-filter.d.ts.map +1 -0
  120. package/dist/utils/thinking-filter.js +147 -0
  121. package/dist/utils/thinking-filter.js.map +1 -0
  122. package/package.json +41 -0
package/README.md ADDED
@@ -0,0 +1,428 @@
1
+ # Vision MCP
2
+
3
+ MCP Server providing vision capabilities for LLMs via GLM-4.6V, SiliconFlow, and ModelScope. This server enables LLMs without native vision support or with expensive vision models to access cost-effective visual analysis capabilities.
4
+
5
+ ## Features
6
+
7
+ - 🤖 **Multiple Model Support**: GLM-4.6V, SiliconFlow, and ModelScope vision models
8
+ - 🖼️ **Flexible Image Input**: URL, base64 data URL, or local file paths
9
+ - 📊 **Multiple Analysis Types**: Image description, UI analysis, object detection, OCR, and structured extraction
10
+ - 🔧 **System Prompt Templates**: Built-in templates for common vision tasks
11
+ - 📦 **Easy Deployment**: STDIO MCP Server, runs with npx
12
+ - 🔒 **Secure**: Environment-based configuration, sensitive data masking in logs
13
+
14
+ ### Streaming Response Support
15
+
16
+ Current adapters explicitly disable streaming responses (`stream: false`) and are designed for complete JSON responses. This ensures compatibility with both GLM-4.6V and SiliconFlow APIs.
17
+
18
+ **Note**: Streaming-only providers are not currently supported. If a provider only supports streaming responses (Server-Sent Events/text/event-stream format), the adapter will fail as it expects a complete JSON response. To add support for streaming providers, a streaming response parser would need to be implemented.
19
+
20
+ ## Quick Start
21
+
22
+ ### Installation
23
+
24
+ 1. Clone or download this repository
25
+ 2. Install dependencies:
26
+
27
+ ```bash
28
+ cd vision_mcp
29
+ npm install
30
+ ```
31
+
32
+ ### Configuration
33
+
34
+ Create a `.env` file in the project root:
35
+
36
+ #### Option 1: GLM-4.6V
37
+
38
+ ```bash
39
+ VISION_MODEL_TYPE=glm-4.6v
40
+ VISION_MODEL_NAME=glm-4.6v
41
+ VISION_API_BASE_URL=https://open.bigmodel.cn/api/paas/v4
42
+ VISION_API_KEY=your-glm-api-key
43
+ ```
44
+
45
+ #### Option 2: SiliconFlow
46
+
47
+ ```bash
48
+ VISION_MODEL_TYPE=siliconflow
49
+ VISION_MODEL_NAME=Qwen/Qwen2-VL-72B-Instruct
50
+ VISION_API_BASE_URL=https://api.siliconflow.cn/v1
51
+ VISION_API_KEY=your-siliconflow-api-key
52
+ ```
53
+
54
+ #### Option 3: ModelScope API-Inference
55
+
56
+ ```bash
57
+ VISION_MODEL_TYPE=modelscope
58
+ VISION_MODEL_NAME=ZhipuAI/GLM-4.6V
59
+ VISION_API_BASE_URL=https://api-inference.modelscope.cn/v1
60
+ VISION_API_KEY=your-modelscope-token
61
+ ```
62
+
63
+ **Note**: ModelScope requires:
64
+ - Real-name authentication on your ModelScope account
65
+ - Aliyun account binding
66
+ - API usage limits apply (see [API Limits](https://www.modelscope.cn/docs/model-service/API-Inference/limits))
67
+
68
+ ### Build
69
+
70
+ ```bash
71
+ npm run build
72
+ ```
73
+
74
+ ### Run (local)
75
+
76
+ ```bash
77
+ node dist/index.js
78
+ ```
79
+
80
+ If successful, you'll see: `Vision MCP Server is running on stdio` in stderr.
81
+
82
+ ### Run (npx)
83
+
84
+ ```bash
85
+ # Local package (requires build first)
86
+ npx .
87
+
88
+ # Published package
89
+ npx -y @lutery/vision-mcp
90
+ ```
91
+
92
+ ## MCP Client Configuration
93
+
94
+ ### Claude Desktop
95
+
96
+ Add to your Claude Desktop configuration:
97
+
98
+ ```json
99
+ {
100
+ "mcpServers": {
101
+ "vision-mcp": {
102
+ "command": "npx",
103
+ "args": ["-y", "@lutery/vision-mcp"],
104
+ "env": {
105
+ "VISION_MODEL_TYPE": "glm-4.6v",
106
+ "VISION_MODEL_NAME": "glm-4.6v",
107
+ "VISION_API_BASE_URL": "https://open.bigmodel.cn/api/paas/v4",
108
+ "VISION_API_KEY": "your-api-key"
109
+ }
110
+ }
111
+ }
112
+ }
113
+ ```
114
+
115
+ Or with a local installation:
116
+
117
+ ```json
118
+ {
119
+ "mcpServers": {
120
+ "vision-mcp": {
121
+ "command": "node",
122
+ "args": ["/path/to/vision_mcp/dist/index.js"],
123
+ "env": {
124
+ "VISION_MODEL_TYPE": "glm-4.6v",
125
+ "VISION_MODEL_NAME": "glm-4.6v",
126
+ "VISION_API_BASE_URL": "https://open.bigmodel.cn/api/paas/v4",
127
+ "VISION_API_KEY": "your-api-key"
128
+ }
129
+ }
130
+ }
131
+ }
132
+ ```
133
+
134
+ ### Cursor/Codex CLI
135
+
136
+ Similar configuration for other MCP-compatible clients.
137
+
138
+ ## Using the Tools
139
+
140
+ ### 1. Analyze Image
141
+
142
+ Main tool for image analysis:
143
+
144
+ ```javascript
145
+ // Tool: analyze_image
146
+ // Parameters:
147
+ {
148
+ "image": "https://example.com/image.jpg", // Image URL, base64, or local path
149
+ "prompt": "Describe this UI design in detail", // Analysis prompt
150
+ "output_format": "text", // Optional: "text" or "json"
151
+ "template": "ui-analysis" // Optional: see templates below
152
+ }
153
+ ```
154
+
155
+ #### Example Prompts
156
+
157
+ **UI Analysis:**
158
+ ```json
159
+ {
160
+ "image": "./screenshot.png",
161
+ "prompt": "Analyze this UI design and extract all UI components with their positions and styles",
162
+ "template": "ui-analysis"
163
+ }
164
+ ```
165
+
166
+ **Object Detection:**
167
+ ```json
168
+ {
169
+ "image": "https://example.com/photo.jpg",
170
+ "prompt": "Detect all objects and provide their coordinates",
171
+ "template": "object-detection"
172
+ }
173
+ ```
174
+
175
+ **OCR:**
176
+ ```json
177
+ {
178
+ "image": "...",
179
+ "prompt": "Extract all text from this image",
180
+ "template": "ocr"
181
+ }
182
+ ```
183
+
184
+ **Structured Extraction:**
185
+ ```json
186
+ {
187
+ "image": "./form.jpg",
188
+ "prompt": "Extract all form fields and values as JSON",
189
+ "output_format": "json"
190
+ }
191
+ ```
192
+
193
+ ### 2. List Templates
194
+
195
+ List available system prompt templates:
196
+
197
+ ```javascript
198
+ // Tool: list_templates
199
+ // Parameters: none
200
+ ```
201
+
202
+ Available templates:
203
+ - `general-description` - General image description
204
+ - `ui-analysis` - UI prototype and interface analysis
205
+ - `object-detection` - Object detection and localization
206
+ - `ocr` - Text extraction (OCR)
207
+ - `structured-extraction` - Structured data extraction
208
+
209
+ ### 3. Get Config
210
+
211
+ Get current model configuration:
212
+
213
+ ```javascript
214
+ // Tool: get_config
215
+ // Parameters: none
216
+ ```
217
+
218
+ ## Image Input Formats
219
+
220
+ ### 1. URL
221
+
222
+ ```
223
+ https://example.com/image.jpg
224
+ ```
225
+
226
+ ### 2. Base64 Data URL
227
+
228
+ ```
229
+ ...
230
+ ```
231
+
232
+ ### 3. Local File Path
233
+
234
+ ```
235
+ /path/to/image.png
236
+ ./relative/path/image.jpg
237
+ ```
238
+
239
+ Note: Local paths only work if the MCP server has access to the filesystem.
240
+ Note: URL validation is strict by default (see `VISION_STRICT_URL_VALIDATION`).
241
+
242
+ ## Environment Variables
243
+
244
+ | Variable | Description | Default | Required |
245
+ |----------|-------------|---------|----------|
246
+ | `VISION_MODEL_TYPE` | Model type: `glm` (alias for `glm-4.6v`), `glm-4.6v`, `siliconflow`, or `modelscope` | - | Yes |
247
+ | `VISION_MODEL_NAME` | Model name for the API | See defaults below | Yes |
248
+ | `VISION_API_BASE_URL` | API base URL (must be base path, no `/chat/completions`) | See defaults below | Yes |
249
+ | `VISION_API_KEY` | API key for authentication | - | Yes |
250
+ | `VISION_API_TIMEOUT` | Request timeout in milliseconds | 60000 | No |
251
+ | `VISION_MAX_RETRIES` | Maximum retry attempts | 2 | No |
252
+ | `VISION_STRICT_URL_VALIDATION` | Enforce strict image URL validation | `true` | No |
253
+ | `LOG_LEVEL` | Log level: `debug`, `info`, `warn`, `error` | `info` | No |
254
+
255
+ **Notes**:
256
+ - `VISION_STRICT_URL_VALIDATION` defaults to `true`, enforcing strict validation that URLs must end with supported image extensions (`.jpg`, `.jpeg`, `.png`, `.webp`). Set to `false` to allow non-image URLs with a warning only.
257
+ - For GLM-4.6V provider, both `glm` and `glm-4.6v` values work for `VISION_MODEL_TYPE`. `glm` is provided as a convenient alias.
258
+
259
+ ### Model Defaults
260
+
261
+ **GLM-4.6V:**
262
+ ```bash
263
+ VISION_MODEL_NAME=glm-4.6v
264
+ VISION_API_BASE_URL=https://open.bigmodel.cn/api/paas/v4
265
+ ```
266
+
267
+ **SiliconFlow:**
268
+ ```bash
269
+ VISION_MODEL_NAME=Qwen/Qwen2-VL-72B-Instruct
270
+ VISION_API_BASE_URL=https://api.siliconflow.cn/v1
271
+ ```
272
+
273
+ ## API Keys
274
+
275
+ ### GLM-4.6V
276
+
277
+ Get your API key from: [智谱 AI 开放平台](https://open.bigmodel.cn/)
278
+
279
+ Format: `xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.xxxxxxxxxxxxxxxxxxxx`
280
+
281
+ ### SiliconFlow
282
+
283
+ Get your API key from: [SiliconFlow](https://cloud.siliconflow.cn/)
284
+
285
+ Format: `sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx`
286
+
287
+ ## MCP Protocol Note
288
+
289
+ **IMPORTANT**: This is a STDIO-based MCP Server. According to MCP protocol:
290
+
291
+ - **DO NOT** use `console.log()` or write to stdout
292
+ - **USE ONLY** `console.error()` for logging (stderr)
293
+ - stdout is reserved for JSON-RPC communication
294
+
295
+ The server handles this automatically. If you fork this project, ensure you follow this rule.
296
+
297
+ ## Development
298
+
299
+ ### Project Structure
300
+
301
+ ```
302
+ vision_mcp/
303
+ ├── src/
304
+ │ ├── index.ts # MCP Server entry point
305
+ │ ├── config/
306
+ │ │ └── model-config.ts # Configuration management
307
+ │ ├── tools/
308
+ │ │ └── vision-tool.ts # Vision analysis tool
309
+ │ ├── adapters/
310
+ │ │ ├── base-adapter.ts # Base adapter class
311
+ │ │ ├── glm-adapter.ts # GLM-4.6V adapter
312
+ │ │ └── siliconflow-adapter.ts # SiliconFlow adapter
313
+ │ ├── prompts/
314
+ │ │ └── system.ts # System prompt templates
315
+ │ └── utils/
316
+ │ ├── errors.ts # Error handling
317
+ │ ├── logger.ts # Logging utilities
318
+ │ └── image-input.ts # Image input normalization
319
+ ├── package.json
320
+ ├── tsconfig.json
321
+ └── README.md
322
+ ```
323
+
324
+ ### Building
325
+
326
+ ```bash
327
+ # Install dependencies
328
+ npm install
329
+
330
+ # Build TypeScript
331
+ npm run build
332
+
333
+ # Run tests
334
+ npm test
335
+ ```
336
+
337
+ ### Testing Notes
338
+
339
+ - `npm test` uses `VISION_API_KEY` (default) or provider-specific keys in the test script:
340
+ - `SILICONFLOW_API_KEY`
341
+ - `GLM_API_KEY`
342
+ - If no API key is set, the tests will exit with a clear error message.
343
+
344
+ ## Troubleshooting
345
+
346
+ ### 1. "Failed to load model configuration"
347
+
348
+ - Check all required environment variables are set
349
+ - Verify `VISION_MODEL_TYPE` is either `glm-4.6v` or `siliconflow`
350
+
351
+ ### 2. "API Key not found"
352
+
353
+ - Set `VISION_API_KEY` in your environment
354
+ - Verify the API key format matches the model requirements
355
+
356
+ ### 3. "Connection timeout"
357
+
358
+ - Increase `VISION_API_TIMEOUT` value
359
+ - Check network connectivity to the API endpoint
360
+ - Verify API endpoint URL is correct
361
+
362
+ ### 4. "Invalid image URL"
363
+
364
+ - Ensure URL is publicly accessible
365
+ - Check URL format (http:// or https://)
366
+ - Verify image format is supported
367
+
368
+ ### 5. "Permission denied reading file"
369
+
370
+ - MCP server needs filesystem access for local files
371
+ - Use absolute paths or ensure relative paths are accessible
372
+ - Check file permissions
373
+
374
+ ### 6. "Invalid API endpoint" or "404 Not Found"
375
+
376
+ - Ensure `VISION_API_BASE_URL` is the base path only, without `/chat/completions`
377
+ - Correct: `https://api.siliconflow.cn/v1`
378
+ - Incorrect: `https://api.siliconflow.cn/v1/chat/completions`
379
+ - Check the error details for the full request URL to diagnose endpoint issues
380
+
381
+ ## Security Notes
382
+
383
+ - API keys are loaded from environment variables, never hardcoded
384
+ - API keys are masked in logs
385
+ - Images are not persisted by default
386
+ - MCP server should run in trusted environments only (no built-in auth)
387
+ - **Thinking/Reasoning Content Filtering**: Model thinking/reasoning content is automatically filtered from responses to prevent exposing internal reasoning to MCP clients. This filtering is unconditional and applied to all supported models regardless of configuration.
388
+
389
+ ## Security Best Practices
390
+
391
+ ⚠️ **IMPORTANT**: Never commit API keys or credentials to the repository!
392
+
393
+ - **Use environment variables** for sensitive data (`.env` file)
394
+ - **Keep local test credentials** in `.gitignore`'d files (e.g., `test_key.local.md`)
395
+ - **Rotate keys immediately** if accidentally exposed or committed
396
+ - **See** `doc/test_key.example.md` for test setup template
397
+ - **Never** copy real API keys into documentation, code comments, or issue trackers
398
+
399
+ **Key Protection Checklist**:
400
+ - [ ] `.env` is in `.gitignore`
401
+ - [ ] `.env.local` is in `.gitignore`
402
+ - [ ] No real keys in `test_key.md` (use `test_key.example.md` instead)
403
+ - [ ] No keys in documentation or comments
404
+ - [ ] Review git history for accidental key commits (`git log --all --full-history -S --source --all -- "*secret*" "*key*" "*password*" "test_key.md"`)
405
+
406
+ ## License
407
+
408
+ MIT
409
+
410
+ ## Contributing
411
+
412
+ 1. Fork the repository
413
+ 2. Create a feature branch
414
+ 3. Make your changes
415
+ 4. Add tests
416
+ 5. Submit a pull request
417
+
418
+ ## Support
419
+
420
+ For issues and questions:
421
+ - Open an issue on the repository
422
+ - Check model documentation:
423
+ - [GLM-4.6V Docs](https://docs.bigmodel.cn/)
424
+ - [SiliconFlow Docs](https://docs.siliconflow.cn/)
425
+
426
+
427
+ ## TODO
428
+ - [ ] 适配modelscope的视觉模型接口请求:https://www.modelscope.cn/docs/model-service/API-Inference/intro
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Base Vision Model Adapter
3
+ *
4
+ * @description 定义模型适配器的统一接口和抽象基类
5
+ */
6
+ import { ModelConfig } from '../config/model-config.js';
7
+ /**
8
+ * 模型响应接口
9
+ */
10
+ export interface VisionModelResponse {
11
+ content: string;
12
+ usage?: {
13
+ promptTokens?: number;
14
+ completionTokens?: number;
15
+ totalTokens?: number;
16
+ };
17
+ model?: string;
18
+ }
19
+ /**
20
+ * 模型适配器接口
21
+ */
22
+ export interface VisionModelAdapter {
23
+ config: ModelConfig;
24
+ /**
25
+ * 分析图片
26
+ * @param imageData - 图片数据(URL 或 base64)
27
+ * @param prompt - 提示词
28
+ * @returns 模型响应
29
+ */
30
+ analyze(imageData: string, prompt: string): Promise<string>;
31
+ /**
32
+ * 分析图片(带完整响应)
33
+ * @param imageData - 图片数据(URL 或 base64)
34
+ * @param prompt - 提示词
35
+ * @returns 完整响应
36
+ */
37
+ analyzeWithResponse(imageData: string, prompt: string): Promise<VisionModelResponse>;
38
+ }
39
+ /**
40
+ * 抽象基类实现通用功能
41
+ */
42
+ export declare abstract class BaseVisionModelAdapter implements VisionModelAdapter {
43
+ config: ModelConfig;
44
+ constructor(config: ModelConfig);
45
+ abstract analyze(imageData: string, prompt: string): Promise<string>;
46
+ abstract analyzeWithResponse(imageData: string, prompt: string): Promise<VisionModelResponse>;
47
+ /**
48
+ * 不可重试的 HTTP 状态码
49
+ * 这些错误表示客户端配置问题,重试不会改变结果
50
+ */
51
+ private readonly NON_RETRYABLE_STATUS_CODES;
52
+ /**
53
+ * 带重试和超时控制的请求包装器
54
+ *
55
+ * @note 当前实现强制使用非流式响应(stream: false)。
56
+ * 如果提供商只支持流式响应,需要添加流式解析器。
57
+ */
58
+ protected withRetry<T>(operation: (signal: AbortSignal) => Promise<T>, config?: {
59
+ maxRetries?: number;
60
+ timeout?: number;
61
+ }): Promise<T>;
62
+ /**
63
+ * 解析模型响应
64
+ * @param response - 原始响应
65
+ * @param modelType - 模型类型(必需,用于过滤 thinking content)
66
+ */
67
+ protected parseResponse(response: unknown, modelType: string): VisionModelResponse;
68
+ }
69
+ //# sourceMappingURL=base-adapter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"base-adapter.d.ts","sourceRoot":"","sources":["../../src/adapters/base-adapter.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,WAAW,EAAE,MAAM,2BAA2B,CAAC;AAKxD;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE;QACN,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,MAAM,EAAE,WAAW,CAAC;IAEpB;;;;;OAKG;IACH,OAAO,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IAE5D;;;;;OAKG;IACH,mBAAmB,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;CACtF;AAED;;GAEG;AACH,8BAAsB,sBAAuB,YAAW,kBAAkB;IACrD,MAAM,EAAE,WAAW;gBAAnB,MAAM,EAAE,WAAW;IAEtC,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IACpE,QAAQ,CAAC,mBAAmB,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,mBAAmB,CAAC;IAE7F;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,0BAA0B,CAAiC;IAE5E;;;;;OAKG;cACa,SAAS,CAAC,CAAC,EACzB,SAAS,EAAE,CAAC,MAAM,EAAE,WAAW,KAAK,OAAO,CAAC,CAAC,CAAC,EAC9C,MAAM,GAAE;QAAE,UAAU,CAAC,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAA;KAAO,GACrD,OAAO,CAAC,CAAC,CAAC;IA+Eb;;;;OAIG;IACH,SAAS,CAAC,aAAa,CAAC,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,GAAG,mBAAmB;CAuDnF"}
@@ -0,0 +1,143 @@
1
+ /**
2
+ * Base Vision Model Adapter
3
+ *
4
+ * @description 定义模型适配器的统一接口和抽象基类
5
+ */
6
+ import { ModelAPIError, TimeoutError, VisionMCPError } from '../utils/errors.js';
7
+ import { logger } from '../utils/logger.js';
8
+ import { filterThinkingContent } from '../utils/thinking-filter.js';
9
+ /**
10
+ * 抽象基类实现通用功能
11
+ */
12
+ export class BaseVisionModelAdapter {
13
+ config;
14
+ constructor(config) {
15
+ this.config = config;
16
+ }
17
+ /**
18
+ * 不可重试的 HTTP 状态码
19
+ * 这些错误表示客户端配置问题,重试不会改变结果
20
+ */
21
+ NON_RETRYABLE_STATUS_CODES = new Set([400, 401, 403, 404]);
22
+ /**
23
+ * 带重试和超时控制的请求包装器
24
+ *
25
+ * @note 当前实现强制使用非流式响应(stream: false)。
26
+ * 如果提供商只支持流式响应,需要添加流式解析器。
27
+ */
28
+ async withRetry(operation, config = {}) {
29
+ const { maxRetries = this.config.maxRetries || 2, timeout = this.config.timeout || 60000 } = config;
30
+ let lastError;
31
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
32
+ try {
33
+ logger.debug(`Attempt ${attempt + 1}/${maxRetries + 1}`);
34
+ // 使用 AbortController 实现超时
35
+ const controller = new AbortController();
36
+ const timeoutId = setTimeout(() => controller.abort(), timeout);
37
+ try {
38
+ const result = await operation(controller.signal);
39
+ clearTimeout(timeoutId);
40
+ return result;
41
+ }
42
+ catch (error) {
43
+ clearTimeout(timeoutId);
44
+ throw error;
45
+ }
46
+ }
47
+ catch (error) {
48
+ lastError = error;
49
+ // 如果是 AbortError,转换为 TimeoutError
50
+ if (error instanceof Error && error.name === 'AbortError') {
51
+ throw new TimeoutError(`Request timed out after ${timeout}ms`);
52
+ }
53
+ // 检查是否为不可重试错误(400/401/403/404)
54
+ if (error instanceof ModelAPIError) {
55
+ const status = error.details?.status;
56
+ if (status && this.NON_RETRYABLE_STATUS_CODES.has(status)) {
57
+ logger.error('Non-retryable error, failing immediately', {
58
+ status,
59
+ attempt: attempt + 1
60
+ });
61
+ throw error; // 直接抛出,不重试
62
+ }
63
+ }
64
+ // 最后一次尝试失败,抛出错误
65
+ if (attempt === maxRetries) {
66
+ break;
67
+ }
68
+ // 计算退避时间(指数退避)
69
+ const backoffTime = Math.min(1000 * Math.pow(2, attempt), 5000);
70
+ logger.warn(`Attempt ${attempt + 1} failed, retrying in ${backoffTime}ms`, {
71
+ error: lastError.message
72
+ });
73
+ // 等待后退时间
74
+ await new Promise(resolve => setTimeout(resolve, backoffTime));
75
+ }
76
+ }
77
+ // 保留最后失败的完整错误详情(如果可用)
78
+ let errorDetails;
79
+ if (lastError !== undefined && lastError instanceof VisionMCPError && lastError.details) {
80
+ // 如果是 VisionMCPError,保留所有 details
81
+ errorDetails = {
82
+ ...lastError.details,
83
+ lastError: lastError.message
84
+ };
85
+ }
86
+ else {
87
+ // 否则只保留消息
88
+ errorDetails = { lastError: lastError?.message };
89
+ }
90
+ throw new ModelAPIError(`Failed after ${maxRetries + 1} attempts`, errorDetails);
91
+ }
92
+ /**
93
+ * 解析模型响应
94
+ * @param response - 原始响应
95
+ * @param modelType - 模型类型(必需,用于过滤 thinking content)
96
+ */
97
+ parseResponse(response, modelType) {
98
+ // modelType is required
99
+ if (!modelType) {
100
+ throw new Error('modelType is required for parseResponse');
101
+ }
102
+ try {
103
+ // @ts-ignore - 检查响应结构
104
+ const content = response?.choices?.[0]?.message?.content;
105
+ if (!content || typeof content !== 'string') {
106
+ throw new ModelAPIError('Invalid response format: missing or invalid content', { response });
107
+ }
108
+ // 过滤 thinking/reasoning content(无条件执行)
109
+ let filteredContent;
110
+ try {
111
+ filteredContent = filterThinkingContent(response, modelType);
112
+ // 如果有 content 被过滤,记录日志
113
+ if (filteredContent.length < content.length) {
114
+ logger.debug('Filtered thinking content from response', {
115
+ modelType,
116
+ originalLength: content.length,
117
+ filteredLength: filteredContent.length,
118
+ reduction: content.length - filteredContent.length
119
+ });
120
+ }
121
+ }
122
+ catch (error) {
123
+ logger.warn('Failed to filter thinking content, returning raw content', {
124
+ modelType,
125
+ error: error instanceof Error ? error.message : error
126
+ });
127
+ filteredContent = content;
128
+ }
129
+ return {
130
+ content: filteredContent,
131
+ // @ts-ignore
132
+ usage: response?.usage,
133
+ // @ts-ignore
134
+ model: response?.model
135
+ };
136
+ }
137
+ catch (error) {
138
+ logger.error('Failed to parse model response', error);
139
+ throw new ModelAPIError('Failed to parse model response', { response }, error);
140
+ }
141
+ }
142
+ }
143
+ //# sourceMappingURL=base-adapter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"base-adapter.js","sourceRoot":"","sources":["../../src/adapters/base-adapter.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACjF,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,EAAE,qBAAqB,EAAE,MAAM,6BAA6B,CAAC;AAsCpE;;GAEG;AACH,MAAM,OAAgB,sBAAsB;IACvB;IAAnB,YAAmB,MAAmB;QAAnB,WAAM,GAAN,MAAM,CAAa;IAAG,CAAC;IAK1C;;;OAGG;IACc,0BAA0B,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;IAE5E;;;;;OAKG;IACO,KAAK,CAAC,SAAS,CACvB,SAA8C,EAC9C,SAAoD,EAAE;QAEtD,MAAM,EACJ,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,UAAU,IAAI,CAAC,EACxC,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,KAAK,EACvC,GAAG,MAAM,CAAC;QAEX,IAAI,SAA4B,CAAC;QAEjC,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,UAAU,EAAE,OAAO,EAAE,EAAE,CAAC;YACvD,IAAI,CAAC;gBACH,MAAM,CAAC,KAAK,CAAC,WAAW,OAAO,GAAG,CAAC,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC,CAAC;gBAEzD,0BAA0B;gBAC1B,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;gBACzC,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,OAAO,CAAC,CAAC;gBAEhE,IAAI,CAAC;oBACH,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;oBAClD,YAAY,CAAC,SAAS,CAAC,CAAC;oBACxB,OAAO,MAAM,CAAC;gBAChB,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,YAAY,CAAC,SAAS,CAAC,CAAC;oBACxB,MAAM,KAAK,CAAC;gBACd,CAAC;YACH,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,SAAS,GAAG,KAAc,CAAC;gBAE3B,kCAAkC;gBAClC,IAAI,KAAK,YAAY,KAAK,IAAI,KAAK,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;oBAC1D,MAAM,IAAI,YAAY,CAAC,2BAA2B,OAAO,IAAI,CAAC,CAAC;gBACjE,CAAC;gBAED,+BAA+B;gBAC/B,IAAI,KAAK,YAAY,aAAa,EAAE,CAAC;oBACnC,MAAM,MAAM,GAAI,KAAK,CAAC,OAAe,EAAE,MAAM,CAAC;oBAC9C,IAAI,MAAM,IAAI,IAAI,CAAC,0BAA0B,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;wBAC1D,MAAM,CAAC,KAAK,CAAC,0CAA0C,EAAE;4BACvD,MAAM;4BACN,OAAO,EAAE,OAAO,GAAG,CAAC;yBACrB,CAAC,CAAC;wBACH,MAAM,KAAK,CAAC,CAAC,WAAW;oBAC1B,CAAC;gBACH,CAAC;gBAED,gBAAgB;gBAChB,IAAI,OAAO,KAAK,UAAU,EAAE,CAAC;oBAC3B,MAAM;gBACR,CAAC;gBAED,eAAe;gBACf,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,EAAE,IAAI,CAAC,CAAC;gBAChE,MAAM,CAAC,IAAI,CAAC,WAAW,OAAO,GAAG,CAAC,wBAAwB,WAAW,IAAI,EAAE;oBACzE,KAAK,EAAE,SAAS,CAAC,OAAO;iBACzB,CAAC,CAAC;gBAEH,SAAS;gBACT,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC,CAAC;YACjE,CAAC;QACH,CAAC;QAED,sBAAsB;QACtB,IAAI,YAAiC,CAAC;QACtC,IAAI,SAAS,KAAK,SAAS,IAAI,SAAS,YAAY,cAAc,IAAI,SAAS,CAAC,OAAO,EAAE,CAAC;YACxF,kCAAkC;YAClC,YAAY,GAAG;gBACb,GAAG,SAAS,CAAC,OAAO;gBACpB,SAAS,EAAE,SAAS,CAAC,OAAO;aAC7B,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,UAAU;YACV,YAAY,GAAG,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,CAAC;QACnD,CAAC;QAED,MAAM,IAAI,aAAa,CACrB,gBAAgB,UAAU,GAAG,CAAC,WAAW,EACzC,YAAY,CACb,CAAC;IACJ,CAAC;IAED;;;;OAIG;IACO,aAAa,CAAC,QAAiB,EAAE,SAAiB;QAC1D,wBAAwB;QACxB,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;QAC7D,CAAC;QAED,IAAI,CAAC;YACH,sBAAsB;YACtB,MAAM,OAAO,GAAG,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;YAEzD,IAAI,CAAC,OAAO,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;gBAC5C,MAAM,IAAI,aAAa,CACrB,qDAAqD,EACrD,EAAE,QAAQ,EAAE,CACb,CAAC;YACJ,CAAC;YAED,uCAAuC;YACvC,IAAI,eAAuB,CAAC;YAC5B,IAAI,CAAC;gBACH,eAAe,GAAG,qBAAqB,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;gBAE7D,uBAAuB;gBACvB,IAAI,eAAe,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;oBAC5C,MAAM,CAAC,KAAK,CAAC,yCAAyC,EAAE;wBACtD,SAAS;wBACT,cAAc,EAAE,OAAO,CAAC,MAAM;wBAC9B,cAAc,EAAE,eAAe,CAAC,MAAM;wBACtC,SAAS,EAAE,OAAO,CAAC,MAAM,GAAG,eAAe,CAAC,MAAM;qBACnD,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,CAAC,IAAI,CAAC,0DAA0D,EAAE;oBACtE,SAAS;oBACT,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;iBACtD,CAAC,CAAC;gBACH,eAAe,GAAG,OAAO,CAAC;YAC5B,CAAC;YAED,OAAO;gBACL,OAAO,EAAE,eAAe;gBACxB,aAAa;gBACb,KAAK,EAAE,QAAQ,EAAE,KAAK;gBACtB,aAAa;gBACb,KAAK,EAAE,QAAQ,EAAE,KAAK;aACvB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,gCAAgC,EAAE,KAAK,CAAC,CAAC;YACtD,MAAM,IAAI,aAAa,CACrB,gCAAgC,EAChC,EAAE,QAAQ,EAAE,EACZ,KAAK,CACN,CAAC;QACJ,CAAC;IACH,CAAC;CACF"}
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Claude (Anthropic Messages API) Adapter
3
+ *
4
+ * @description Claude Messages API 适配器实现,支持 Claude 多模态视觉模型
5
+ * @see https://docs.anthropic.com/claude/reference/messages-post
6
+ */
7
+ import { BaseVisionModelAdapter, VisionModelResponse } from './base-adapter.js';
8
+ import { ModelConfig } from '../config/model-config.js';
9
+ export interface ClaudeAdapterOptions {
10
+ maxTokens?: number;
11
+ apiVersion?: string;
12
+ }
13
+ export declare class ClaudeAdapter extends BaseVisionModelAdapter {
14
+ private options;
15
+ constructor(config: ModelConfig, options?: ClaudeAdapterOptions);
16
+ analyze(imageData: string, prompt: string): Promise<string>;
17
+ analyzeWithResponse(imageData: string, prompt: string): Promise<VisionModelResponse>;
18
+ private callClaudeAPI;
19
+ /**
20
+ * 构建图片 content block
21
+ * 支持 URL 和 base64 data URL 两种格式
22
+ */
23
+ private buildImageBlock;
24
+ /**
25
+ * 处理错误响应
26
+ */
27
+ private handleErrorResponse;
28
+ /**
29
+ * 解析响应数据
30
+ */
31
+ private parseResponseData;
32
+ /**
33
+ * 归一化 Claude 响应为 VisionModelResponse
34
+ * Claude 响应格式:{ content: [{type: "text", text: "..."}], usage: {...} }
35
+ */
36
+ private normalizeResponse;
37
+ }
38
+ //# sourceMappingURL=claude-adapter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"claude-adapter.d.ts","sourceRoot":"","sources":["../../src/adapters/claude-adapter.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,sBAAsB,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAChF,OAAO,EAAE,WAAW,EAAE,MAAM,2BAA2B,CAAC;AAKxD,MAAM,WAAW,oBAAoB;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,qBAAa,aAAc,SAAQ,sBAAsB;IACvD,OAAO,CAAC,OAAO,CAAiC;gBAEpC,MAAM,EAAE,WAAW,EAAE,OAAO,GAAE,oBAAyB;IAyB7D,OAAO,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAyB3D,mBAAmB,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,mBAAmB,CAAC;YA0B5E,aAAa;IAgE3B;;;OAGG;IACH,OAAO,CAAC,eAAe;IAwBvB;;OAEG;YACW,mBAAmB;IAwCjC;;OAEG;YACW,iBAAiB;IAiC/B;;;OAGG;IACH,OAAO,CAAC,iBAAiB;CA+B1B"}