@lutery/vision-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +428 -0
- package/dist/adapters/base-adapter.d.ts +69 -0
- package/dist/adapters/base-adapter.d.ts.map +1 -0
- package/dist/adapters/base-adapter.js +143 -0
- package/dist/adapters/base-adapter.js.map +1 -0
- package/dist/adapters/claude-adapter.d.ts +38 -0
- package/dist/adapters/claude-adapter.d.ts.map +1 -0
- package/dist/adapters/claude-adapter.js +251 -0
- package/dist/adapters/claude-adapter.js.map +1 -0
- package/dist/adapters/glm-adapter.d.ts +15 -0
- package/dist/adapters/glm-adapter.d.ts.map +1 -0
- package/dist/adapters/glm-adapter.js +131 -0
- package/dist/adapters/glm-adapter.js.map +1 -0
- package/dist/adapters/modelscope-adapter.d.ts +20 -0
- package/dist/adapters/modelscope-adapter.d.ts.map +1 -0
- package/dist/adapters/modelscope-adapter.js +142 -0
- package/dist/adapters/modelscope-adapter.js.map +1 -0
- package/dist/adapters/openai-adapter.d.ts +20 -0
- package/dist/adapters/openai-adapter.d.ts.map +1 -0
- package/dist/adapters/openai-adapter.js +194 -0
- package/dist/adapters/openai-adapter.js.map +1 -0
- package/dist/adapters/siliconflow-adapter.d.ts +21 -0
- package/dist/adapters/siliconflow-adapter.d.ts.map +1 -0
- package/dist/adapters/siliconflow-adapter.js +145 -0
- package/dist/adapters/siliconflow-adapter.js.map +1 -0
- package/dist/config/model-config.d.ts +39 -0
- package/dist/config/model-config.d.ts.map +1 -0
- package/dist/config/model-config.js +115 -0
- package/dist/config/model-config.js.map +1 -0
- package/dist/index.d.ts +17 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +186 -0
- package/dist/index.js.map +1 -0
- package/dist/prompts/system.d.ts +75 -0
- package/dist/prompts/system.d.ts.map +1 -0
- package/dist/prompts/system.js +272 -0
- package/dist/prompts/system.js.map +1 -0
- package/dist/providers/provider-registry.d.ts +58 -0
- package/dist/providers/provider-registry.d.ts.map +1 -0
- package/dist/providers/provider-registry.js +173 -0
- package/dist/providers/provider-registry.js.map +1 -0
- package/dist/src/adapters/base-adapter.d.ts +59 -0
- package/dist/src/adapters/base-adapter.d.ts.map +1 -0
- package/dist/src/adapters/base-adapter.js +83 -0
- package/dist/src/adapters/base-adapter.js.map +1 -0
- package/dist/src/adapters/glm-adapter.d.ts +15 -0
- package/dist/src/adapters/glm-adapter.d.ts.map +1 -0
- package/dist/src/adapters/glm-adapter.js +116 -0
- package/dist/src/adapters/glm-adapter.js.map +1 -0
- package/dist/src/adapters/siliconflow-adapter.d.ts +21 -0
- package/dist/src/adapters/siliconflow-adapter.d.ts.map +1 -0
- package/dist/src/adapters/siliconflow-adapter.js +130 -0
- package/dist/src/adapters/siliconflow-adapter.js.map +1 -0
- package/dist/src/config/model-config.d.ts +40 -0
- package/dist/src/config/model-config.d.ts.map +1 -0
- package/dist/src/config/model-config.js +126 -0
- package/dist/src/config/model-config.js.map +1 -0
- package/dist/src/index.d.ts +17 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +188 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/prompts/system.d.ts +75 -0
- package/dist/src/prompts/system.d.ts.map +1 -0
- package/dist/src/prompts/system.js +272 -0
- package/dist/src/prompts/system.js.map +1 -0
- package/dist/src/tools/vision-tool.d.ts +91 -0
- package/dist/src/tools/vision-tool.d.ts.map +1 -0
- package/dist/src/tools/vision-tool.js +171 -0
- package/dist/src/tools/vision-tool.js.map +1 -0
- package/dist/src/utils/errors.d.ts +65 -0
- package/dist/src/utils/errors.d.ts.map +1 -0
- package/dist/src/utils/errors.js +146 -0
- package/dist/src/utils/errors.js.map +1 -0
- package/dist/src/utils/image-input.d.ts +45 -0
- package/dist/src/utils/image-input.d.ts.map +1 -0
- package/dist/src/utils/image-input.js +226 -0
- package/dist/src/utils/image-input.js.map +1 -0
- package/dist/src/utils/logger.d.ts +63 -0
- package/dist/src/utils/logger.d.ts.map +1 -0
- package/dist/src/utils/logger.js +157 -0
- package/dist/src/utils/logger.js.map +1 -0
- package/dist/test/integration.test.d.ts +10 -0
- package/dist/test/integration.test.d.ts.map +1 -0
- package/dist/test/integration.test.js +270 -0
- package/dist/test/integration.test.js.map +1 -0
- package/dist/test/test-utils.d.ts +45 -0
- package/dist/test/test-utils.d.ts.map +1 -0
- package/dist/test/test-utils.js +107 -0
- package/dist/test/test-utils.js.map +1 -0
- package/dist/test/vision-tool.test.d.ts +9 -0
- package/dist/test/vision-tool.test.d.ts.map +1 -0
- package/dist/test/vision-tool.test.js +167 -0
- package/dist/test/vision-tool.test.js.map +1 -0
- package/dist/tools/vision-tool.d.ts +91 -0
- package/dist/tools/vision-tool.d.ts.map +1 -0
- package/dist/tools/vision-tool.js +167 -0
- package/dist/tools/vision-tool.js.map +1 -0
- package/dist/utils/data-url-parser.d.ts +27 -0
- package/dist/utils/data-url-parser.d.ts.map +1 -0
- package/dist/utils/data-url-parser.js +53 -0
- package/dist/utils/data-url-parser.js.map +1 -0
- package/dist/utils/errors.d.ts +65 -0
- package/dist/utils/errors.d.ts.map +1 -0
- package/dist/utils/errors.js +146 -0
- package/dist/utils/errors.js.map +1 -0
- package/dist/utils/image-input.d.ts +45 -0
- package/dist/utils/image-input.d.ts.map +1 -0
- package/dist/utils/image-input.js +238 -0
- package/dist/utils/image-input.js.map +1 -0
- package/dist/utils/logger.d.ts +63 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +157 -0
- package/dist/utils/logger.js.map +1 -0
- package/dist/utils/thinking-extractors.d.ts +34 -0
- package/dist/utils/thinking-extractors.d.ts.map +1 -0
- package/dist/utils/thinking-extractors.js +83 -0
- package/dist/utils/thinking-extractors.js.map +1 -0
- package/dist/utils/thinking-filter.d.ts +32 -0
- package/dist/utils/thinking-filter.d.ts.map +1 -0
- package/dist/utils/thinking-filter.js +147 -0
- package/dist/utils/thinking-filter.js.map +1 -0
- package/package.json +41 -0
package/README.md
ADDED
|
@@ -0,0 +1,428 @@
|
|
|
1
|
+
# Vision MCP
|
|
2
|
+
|
|
3
|
+
MCP Server providing vision capabilities for LLMs via GLM-4.6V, SiliconFlow, and ModelScope. This server enables LLMs without native vision support or with expensive vision models to access cost-effective visual analysis capabilities.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- 🤖 **Multiple Model Support**: GLM-4.6V, SiliconFlow, and ModelScope vision models
|
|
8
|
+
- 🖼️ **Flexible Image Input**: URL, base64 data URL, or local file paths
|
|
9
|
+
- 📊 **Multiple Analysis Types**: Image description, UI analysis, object detection, OCR, and structured extraction
|
|
10
|
+
- 🔧 **System Prompt Templates**: Built-in templates for common vision tasks
|
|
11
|
+
- 📦 **Easy Deployment**: STDIO MCP Server, runs with npx
|
|
12
|
+
- 🔒 **Secure**: Environment-based configuration, sensitive data masking in logs
|
|
13
|
+
|
|
14
|
+
### Streaming Response Support
|
|
15
|
+
|
|
16
|
+
Current adapters explicitly disable streaming responses (`stream: false`) and are designed for complete JSON responses. This ensures compatibility with both GLM-4.6V and SiliconFlow APIs.
|
|
17
|
+
|
|
18
|
+
**Note**: Streaming-only providers are not currently supported. If a provider only supports streaming responses (Server-Sent Events/text/event-stream format), the adapter will fail as it expects a complete JSON response. To add support for streaming providers, a streaming response parser would need to be implemented.
|
|
19
|
+
|
|
20
|
+
## Quick Start
|
|
21
|
+
|
|
22
|
+
### Installation
|
|
23
|
+
|
|
24
|
+
1. Clone or download this repository
|
|
25
|
+
2. Install dependencies:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
cd vision_mcp
|
|
29
|
+
npm install
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Configuration
|
|
33
|
+
|
|
34
|
+
Create a `.env` file in the project root:
|
|
35
|
+
|
|
36
|
+
#### Option 1: GLM-4.6V
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
VISION_MODEL_TYPE=glm-4.6v
|
|
40
|
+
VISION_MODEL_NAME=glm-4.6v
|
|
41
|
+
VISION_API_BASE_URL=https://open.bigmodel.cn/api/paas/v4
|
|
42
|
+
VISION_API_KEY=your-glm-api-key
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
#### Option 2: SiliconFlow
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
VISION_MODEL_TYPE=siliconflow
|
|
49
|
+
VISION_MODEL_NAME=Qwen/Qwen2-VL-72B-Instruct
|
|
50
|
+
VISION_API_BASE_URL=https://api.siliconflow.cn/v1
|
|
51
|
+
VISION_API_KEY=your-siliconflow-api-key
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
#### Option 3: ModelScope API-Inference
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
VISION_MODEL_TYPE=modelscope
|
|
58
|
+
VISION_MODEL_NAME=ZhipuAI/GLM-4.6V
|
|
59
|
+
VISION_API_BASE_URL=https://api-inference.modelscope.cn/v1
|
|
60
|
+
VISION_API_KEY=your-modelscope-token
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
**Note**: ModelScope requires:
|
|
64
|
+
- Real-name authentication on your ModelScope account
|
|
65
|
+
- Aliyun account binding
|
|
66
|
+
- API usage limits apply (see [API Limits](https://www.modelscope.cn/docs/model-service/API-Inference/limits))
|
|
67
|
+
|
|
68
|
+
### Build
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
npm run build
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Run (local)
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
node dist/index.js
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
If successful, you'll see: `Vision MCP Server is running on stdio` in stderr.
|
|
81
|
+
|
|
82
|
+
### Run (npx)
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
# Local package (requires build first)
|
|
86
|
+
npx .
|
|
87
|
+
|
|
88
|
+
# Published package
|
|
89
|
+
npx -y @lutery/vision-mcp
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## MCP Client Configuration
|
|
93
|
+
|
|
94
|
+
### Claude Desktop
|
|
95
|
+
|
|
96
|
+
Add to your Claude Desktop configuration:
|
|
97
|
+
|
|
98
|
+
```json
|
|
99
|
+
{
|
|
100
|
+
"mcpServers": {
|
|
101
|
+
"vision-mcp": {
|
|
102
|
+
"command": "npx",
|
|
103
|
+
"args": ["-y", "@lutery/vision-mcp"],
|
|
104
|
+
"env": {
|
|
105
|
+
"VISION_MODEL_TYPE": "glm-4.6v",
|
|
106
|
+
"VISION_MODEL_NAME": "glm-4.6v",
|
|
107
|
+
"VISION_API_BASE_URL": "https://open.bigmodel.cn/api/paas/v4",
|
|
108
|
+
"VISION_API_KEY": "your-api-key"
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Or with a local installation:
|
|
116
|
+
|
|
117
|
+
```json
|
|
118
|
+
{
|
|
119
|
+
"mcpServers": {
|
|
120
|
+
"vision-mcp": {
|
|
121
|
+
"command": "node",
|
|
122
|
+
"args": ["/path/to/vision_mcp/dist/index.js"],
|
|
123
|
+
"env": {
|
|
124
|
+
"VISION_MODEL_TYPE": "glm-4.6v",
|
|
125
|
+
"VISION_MODEL_NAME": "glm-4.6v",
|
|
126
|
+
"VISION_API_BASE_URL": "https://open.bigmodel.cn/api/paas/v4",
|
|
127
|
+
"VISION_API_KEY": "your-api-key"
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### Cursor/Codex CLI
|
|
135
|
+
|
|
136
|
+
Similar configuration for other MCP-compatible clients.
|
|
137
|
+
|
|
138
|
+
## Using the Tools
|
|
139
|
+
|
|
140
|
+
### 1. Analyze Image
|
|
141
|
+
|
|
142
|
+
Main tool for image analysis:
|
|
143
|
+
|
|
144
|
+
```javascript
|
|
145
|
+
// Tool: analyze_image
|
|
146
|
+
// Parameters:
|
|
147
|
+
{
|
|
148
|
+
"image": "https://example.com/image.jpg", // Image URL, base64, or local path
|
|
149
|
+
"prompt": "Describe this UI design in detail", // Analysis prompt
|
|
150
|
+
"output_format": "text", // Optional: "text" or "json"
|
|
151
|
+
"template": "ui-analysis" // Optional: see templates below
|
|
152
|
+
}
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
#### Example Prompts
|
|
156
|
+
|
|
157
|
+
**UI Analysis:**
|
|
158
|
+
```json
|
|
159
|
+
{
|
|
160
|
+
"image": "./screenshot.png",
|
|
161
|
+
"prompt": "Analyze this UI design and extract all UI components with their positions and styles",
|
|
162
|
+
"template": "ui-analysis"
|
|
163
|
+
}
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
**Object Detection:**
|
|
167
|
+
```json
|
|
168
|
+
{
|
|
169
|
+
"image": "https://example.com/photo.jpg",
|
|
170
|
+
"prompt": "Detect all objects and provide their coordinates",
|
|
171
|
+
"template": "object-detection"
|
|
172
|
+
}
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
**OCR:**
|
|
176
|
+
```json
|
|
177
|
+
{
|
|
178
|
+
"image": "data:image/png;base64,iVBORw0KGgo...",
|
|
179
|
+
"prompt": "Extract all text from this image",
|
|
180
|
+
"template": "ocr"
|
|
181
|
+
}
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
**Structured Extraction:**
|
|
185
|
+
```json
|
|
186
|
+
{
|
|
187
|
+
"image": "./form.jpg",
|
|
188
|
+
"prompt": "Extract all form fields and values as JSON",
|
|
189
|
+
"output_format": "json"
|
|
190
|
+
}
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
### 2. List Templates
|
|
194
|
+
|
|
195
|
+
List available system prompt templates:
|
|
196
|
+
|
|
197
|
+
```javascript
|
|
198
|
+
// Tool: list_templates
|
|
199
|
+
// Parameters: none
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
Available templates:
|
|
203
|
+
- `general-description` - General image description
|
|
204
|
+
- `ui-analysis` - UI prototype and interface analysis
|
|
205
|
+
- `object-detection` - Object detection and localization
|
|
206
|
+
- `ocr` - Text extraction (OCR)
|
|
207
|
+
- `structured-extraction` - Structured data extraction
|
|
208
|
+
|
|
209
|
+
### 3. Get Config
|
|
210
|
+
|
|
211
|
+
Get current model configuration:
|
|
212
|
+
|
|
213
|
+
```javascript
|
|
214
|
+
// Tool: get_config
|
|
215
|
+
// Parameters: none
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
## Image Input Formats
|
|
219
|
+
|
|
220
|
+
### 1. URL
|
|
221
|
+
|
|
222
|
+
```
|
|
223
|
+
https://example.com/image.jpg
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
### 2. Base64 Data URL
|
|
227
|
+
|
|
228
|
+
```
|
|
229
|
+
data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD...
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
### 3. Local File Path
|
|
233
|
+
|
|
234
|
+
```
|
|
235
|
+
/path/to/image.png
|
|
236
|
+
./relative/path/image.jpg
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
Note: Local paths only work if the MCP server has access to the filesystem.
|
|
240
|
+
Note: URL validation is strict by default (see `VISION_STRICT_URL_VALIDATION`).
|
|
241
|
+
|
|
242
|
+
## Environment Variables
|
|
243
|
+
|
|
244
|
+
| Variable | Description | Default | Required |
|
|
245
|
+
|----------|-------------|---------|----------|
|
|
246
|
+
| `VISION_MODEL_TYPE` | Model type: `glm` (alias for `glm-4.6v`), `glm-4.6v`, `siliconflow`, or `modelscope` | - | Yes |
|
|
247
|
+
| `VISION_MODEL_NAME` | Model name for the API | See defaults below | Yes |
|
|
248
|
+
| `VISION_API_BASE_URL` | API base URL (must be base path, no `/chat/completions`) | See defaults below | Yes |
|
|
249
|
+
| `VISION_API_KEY` | API key for authentication | - | Yes |
|
|
250
|
+
| `VISION_API_TIMEOUT` | Request timeout in milliseconds | 60000 | No |
|
|
251
|
+
| `VISION_MAX_RETRIES` | Maximum retry attempts | 2 | No |
|
|
252
|
+
| `VISION_STRICT_URL_VALIDATION` | Enforce strict image URL validation | `true` | No |
|
|
253
|
+
| `LOG_LEVEL` | Log level: `debug`, `info`, `warn`, `error` | `info` | No |
|
|
254
|
+
|
|
255
|
+
**Notes**:
|
|
256
|
+
- `VISION_STRICT_URL_VALIDATION` defaults to `true`, enforcing strict validation that URLs must end with supported image extensions (`.jpg`, `.jpeg`, `.png`, `.webp`). Set to `false` to allow non-image URLs with a warning only.
|
|
257
|
+
- For GLM-4.6V provider, both `glm` and `glm-4.6v` values work for `VISION_MODEL_TYPE`. `glm` is provided as a convenient alias.
|
|
258
|
+
|
|
259
|
+
### Model Defaults
|
|
260
|
+
|
|
261
|
+
**GLM-4.6V:**
|
|
262
|
+
```bash
|
|
263
|
+
VISION_MODEL_NAME=glm-4.6v
|
|
264
|
+
VISION_API_BASE_URL=https://open.bigmodel.cn/api/paas/v4
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
**SiliconFlow:**
|
|
268
|
+
```bash
|
|
269
|
+
VISION_MODEL_NAME=Qwen/Qwen2-VL-72B-Instruct
|
|
270
|
+
VISION_API_BASE_URL=https://api.siliconflow.cn/v1
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
## API Keys
|
|
274
|
+
|
|
275
|
+
### GLM-4.6V
|
|
276
|
+
|
|
277
|
+
Get your API key from: [智谱 AI 开放平台](https://open.bigmodel.cn/)
|
|
278
|
+
|
|
279
|
+
Format: `xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.xxxxxxxxxxxxxxxxxxxx`
|
|
280
|
+
|
|
281
|
+
### SiliconFlow
|
|
282
|
+
|
|
283
|
+
Get your API key from: [SiliconFlow](https://cloud.siliconflow.cn/)
|
|
284
|
+
|
|
285
|
+
Format: `sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx`
|
|
286
|
+
|
|
287
|
+
## MCP Protocol Note
|
|
288
|
+
|
|
289
|
+
**IMPORTANT**: This is a STDIO-based MCP Server. According to MCP protocol:
|
|
290
|
+
|
|
291
|
+
- **DO NOT** use `console.log()` or write to stdout
|
|
292
|
+
- **USE ONLY** `console.error()` for logging (stderr)
|
|
293
|
+
- stdout is reserved for JSON-RPC communication
|
|
294
|
+
|
|
295
|
+
The server handles this automatically. If you fork this project, ensure you follow this rule.
|
|
296
|
+
|
|
297
|
+
## Development
|
|
298
|
+
|
|
299
|
+
### Project Structure
|
|
300
|
+
|
|
301
|
+
```
|
|
302
|
+
vision_mcp/
|
|
303
|
+
├── src/
|
|
304
|
+
│ ├── index.ts # MCP Server entry point
|
|
305
|
+
│ ├── config/
|
|
306
|
+
│ │ └── model-config.ts # Configuration management
|
|
307
|
+
│ ├── tools/
|
|
308
|
+
│ │ └── vision-tool.ts # Vision analysis tool
|
|
309
|
+
│ ├── adapters/
|
|
310
|
+
│ │ ├── base-adapter.ts # Base adapter class
|
|
311
|
+
│ │ ├── glm-adapter.ts # GLM-4.6V adapter
|
|
312
|
+
│ │ └── siliconflow-adapter.ts # SiliconFlow adapter
|
|
313
|
+
│ ├── prompts/
|
|
314
|
+
│ │ └── system.ts # System prompt templates
|
|
315
|
+
│ └── utils/
|
|
316
|
+
│ ├── errors.ts # Error handling
|
|
317
|
+
│ ├── logger.ts # Logging utilities
|
|
318
|
+
│ └── image-input.ts # Image input normalization
|
|
319
|
+
├── package.json
|
|
320
|
+
├── tsconfig.json
|
|
321
|
+
└── README.md
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
### Building
|
|
325
|
+
|
|
326
|
+
```bash
|
|
327
|
+
# Install dependencies
|
|
328
|
+
npm install
|
|
329
|
+
|
|
330
|
+
# Build TypeScript
|
|
331
|
+
npm run build
|
|
332
|
+
|
|
333
|
+
# Run tests
|
|
334
|
+
npm test
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
### Testing Notes
|
|
338
|
+
|
|
339
|
+
- `npm test` uses `VISION_API_KEY` (default) or provider-specific keys in the test script:
|
|
340
|
+
- `SILICONFLOW_API_KEY`
|
|
341
|
+
- `GLM_API_KEY`
|
|
342
|
+
- If no API key is set, the tests will exit with a clear error message.
|
|
343
|
+
|
|
344
|
+
## Troubleshooting
|
|
345
|
+
|
|
346
|
+
### 1. "Failed to load model configuration"
|
|
347
|
+
|
|
348
|
+
- Check all required environment variables are set
|
|
349
|
+
- Verify `VISION_MODEL_TYPE` is either `glm-4.6v` or `siliconflow`
|
|
350
|
+
|
|
351
|
+
### 2. "API Key not found"
|
|
352
|
+
|
|
353
|
+
- Set `VISION_API_KEY` in your environment
|
|
354
|
+
- Verify the API key format matches the model requirements
|
|
355
|
+
|
|
356
|
+
### 3. "Connection timeout"
|
|
357
|
+
|
|
358
|
+
- Increase `VISION_API_TIMEOUT` value
|
|
359
|
+
- Check network connectivity to the API endpoint
|
|
360
|
+
- Verify API endpoint URL is correct
|
|
361
|
+
|
|
362
|
+
### 4. "Invalid image URL"
|
|
363
|
+
|
|
364
|
+
- Ensure URL is publicly accessible
|
|
365
|
+
- Check URL format (http:// or https://)
|
|
366
|
+
- Verify image format is supported
|
|
367
|
+
|
|
368
|
+
### 5. "Permission denied reading file"
|
|
369
|
+
|
|
370
|
+
- MCP server needs filesystem access for local files
|
|
371
|
+
- Use absolute paths or ensure relative paths are accessible
|
|
372
|
+
- Check file permissions
|
|
373
|
+
|
|
374
|
+
### 6. "Invalid API endpoint" or "404 Not Found"
|
|
375
|
+
|
|
376
|
+
- Ensure `VISION_API_BASE_URL` is the base path only, without `/chat/completions`
|
|
377
|
+
- Correct: `https://api.siliconflow.cn/v1`
|
|
378
|
+
- Incorrect: `https://api.siliconflow.cn/v1/chat/completions`
|
|
379
|
+
- Check the error details for the full request URL to diagnose endpoint issues
|
|
380
|
+
|
|
381
|
+
## Security Notes
|
|
382
|
+
|
|
383
|
+
- API keys are loaded from environment variables, never hardcoded
|
|
384
|
+
- API keys are masked in logs
|
|
385
|
+
- Images are not persisted by default
|
|
386
|
+
- MCP server should run in trusted environments only (no built-in auth)
|
|
387
|
+
- **Thinking/Reasoning Content Filtering**: Model thinking/reasoning content is automatically filtered from responses to prevent exposing internal reasoning to MCP clients. This filtering is unconditional and applied to all supported models regardless of configuration.
|
|
388
|
+
|
|
389
|
+
## Security Best Practices
|
|
390
|
+
|
|
391
|
+
⚠️ **IMPORTANT**: Never commit API keys or credentials to the repository!
|
|
392
|
+
|
|
393
|
+
- **Use environment variables** for sensitive data (`.env` file)
|
|
394
|
+
- **Keep local test credentials** in `.gitignore`'d files (e.g., `test_key.local.md`)
|
|
395
|
+
- **Rotate keys immediately** if accidentally exposed or committed
|
|
396
|
+
- **See** `doc/test_key.example.md` for test setup template
|
|
397
|
+
- **Never** copy real API keys into documentation, code comments, or issue trackers
|
|
398
|
+
|
|
399
|
+
**Key Protection Checklist**:
|
|
400
|
+
- [ ] `.env` is in `.gitignore`
|
|
401
|
+
- [ ] `.env.local` is in `.gitignore`
|
|
402
|
+
- [ ] No real keys in `test_key.md` (use `test_key.example.md` instead)
|
|
403
|
+
- [ ] No keys in documentation or comments
|
|
404
|
+
- [ ] Review git history for accidental key commits (`git log --all --full-history -S --source --all -- "*secret*" "*key*" "*password*" "test_key.md"`)
|
|
405
|
+
|
|
406
|
+
## License
|
|
407
|
+
|
|
408
|
+
MIT
|
|
409
|
+
|
|
410
|
+
## Contributing
|
|
411
|
+
|
|
412
|
+
1. Fork the repository
|
|
413
|
+
2. Create a feature branch
|
|
414
|
+
3. Make your changes
|
|
415
|
+
4. Add tests
|
|
416
|
+
5. Submit a pull request
|
|
417
|
+
|
|
418
|
+
## Support
|
|
419
|
+
|
|
420
|
+
For issues and questions:
|
|
421
|
+
- Open an issue on the repository
|
|
422
|
+
- Check model documentation:
|
|
423
|
+
- [GLM-4.6V Docs](https://docs.bigmodel.cn/)
|
|
424
|
+
- [SiliconFlow Docs](https://docs.siliconflow.cn/)
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
## TODO
|
|
428
|
+
- [ ] 适配modelscope的视觉模型接口请求:https://www.modelscope.cn/docs/model-service/API-Inference/intro
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Base Vision Model Adapter
|
|
3
|
+
*
|
|
4
|
+
* @description 定义模型适配器的统一接口和抽象基类
|
|
5
|
+
*/
|
|
6
|
+
import { ModelConfig } from '../config/model-config.js';
|
|
7
|
+
/**
|
|
8
|
+
* 模型响应接口
|
|
9
|
+
*/
|
|
10
|
+
export interface VisionModelResponse {
|
|
11
|
+
content: string;
|
|
12
|
+
usage?: {
|
|
13
|
+
promptTokens?: number;
|
|
14
|
+
completionTokens?: number;
|
|
15
|
+
totalTokens?: number;
|
|
16
|
+
};
|
|
17
|
+
model?: string;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* 模型适配器接口
|
|
21
|
+
*/
|
|
22
|
+
export interface VisionModelAdapter {
|
|
23
|
+
config: ModelConfig;
|
|
24
|
+
/**
|
|
25
|
+
* 分析图片
|
|
26
|
+
* @param imageData - 图片数据(URL 或 base64)
|
|
27
|
+
* @param prompt - 提示词
|
|
28
|
+
* @returns 模型响应
|
|
29
|
+
*/
|
|
30
|
+
analyze(imageData: string, prompt: string): Promise<string>;
|
|
31
|
+
/**
|
|
32
|
+
* 分析图片(带完整响应)
|
|
33
|
+
* @param imageData - 图片数据(URL 或 base64)
|
|
34
|
+
* @param prompt - 提示词
|
|
35
|
+
* @returns 完整响应
|
|
36
|
+
*/
|
|
37
|
+
analyzeWithResponse(imageData: string, prompt: string): Promise<VisionModelResponse>;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* 抽象基类实现通用功能
|
|
41
|
+
*/
|
|
42
|
+
export declare abstract class BaseVisionModelAdapter implements VisionModelAdapter {
|
|
43
|
+
config: ModelConfig;
|
|
44
|
+
constructor(config: ModelConfig);
|
|
45
|
+
abstract analyze(imageData: string, prompt: string): Promise<string>;
|
|
46
|
+
abstract analyzeWithResponse(imageData: string, prompt: string): Promise<VisionModelResponse>;
|
|
47
|
+
/**
|
|
48
|
+
* 不可重试的 HTTP 状态码
|
|
49
|
+
* 这些错误表示客户端配置问题,重试不会改变结果
|
|
50
|
+
*/
|
|
51
|
+
private readonly NON_RETRYABLE_STATUS_CODES;
|
|
52
|
+
/**
|
|
53
|
+
* 带重试和超时控制的请求包装器
|
|
54
|
+
*
|
|
55
|
+
* @note 当前实现强制使用非流式响应(stream: false)。
|
|
56
|
+
* 如果提供商只支持流式响应,需要添加流式解析器。
|
|
57
|
+
*/
|
|
58
|
+
protected withRetry<T>(operation: (signal: AbortSignal) => Promise<T>, config?: {
|
|
59
|
+
maxRetries?: number;
|
|
60
|
+
timeout?: number;
|
|
61
|
+
}): Promise<T>;
|
|
62
|
+
/**
|
|
63
|
+
* 解析模型响应
|
|
64
|
+
* @param response - 原始响应
|
|
65
|
+
* @param modelType - 模型类型(必需,用于过滤 thinking content)
|
|
66
|
+
*/
|
|
67
|
+
protected parseResponse(response: unknown, modelType: string): VisionModelResponse;
|
|
68
|
+
}
|
|
69
|
+
//# sourceMappingURL=base-adapter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"base-adapter.d.ts","sourceRoot":"","sources":["../../src/adapters/base-adapter.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,WAAW,EAAE,MAAM,2BAA2B,CAAC;AAKxD;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE;QACN,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,MAAM,EAAE,WAAW,CAAC;IAEpB;;;;;OAKG;IACH,OAAO,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IAE5D;;;;;OAKG;IACH,mBAAmB,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;CACtF;AAED;;GAEG;AACH,8BAAsB,sBAAuB,YAAW,kBAAkB;IACrD,MAAM,EAAE,WAAW;gBAAnB,MAAM,EAAE,WAAW;IAEtC,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IACpE,QAAQ,CAAC,mBAAmB,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,mBAAmB,CAAC;IAE7F;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,0BAA0B,CAAiC;IAE5E;;;;;OAKG;cACa,SAAS,CAAC,CAAC,EACzB,SAAS,EAAE,CAAC,MAAM,EAAE,WAAW,KAAK,OAAO,CAAC,CAAC,CAAC,EAC9C,MAAM,GAAE;QAAE,UAAU,CAAC,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAA;KAAO,GACrD,OAAO,CAAC,CAAC,CAAC;IA+Eb;;;;OAIG;IACH,SAAS,CAAC,aAAa,CAAC,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,GAAG,mBAAmB;CAuDnF"}
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Base Vision Model Adapter
|
|
3
|
+
*
|
|
4
|
+
* @description 定义模型适配器的统一接口和抽象基类
|
|
5
|
+
*/
|
|
6
|
+
import { ModelAPIError, TimeoutError, VisionMCPError } from '../utils/errors.js';
|
|
7
|
+
import { logger } from '../utils/logger.js';
|
|
8
|
+
import { filterThinkingContent } from '../utils/thinking-filter.js';
|
|
9
|
+
/**
|
|
10
|
+
* 抽象基类实现通用功能
|
|
11
|
+
*/
|
|
12
|
+
export class BaseVisionModelAdapter {
|
|
13
|
+
config;
|
|
14
|
+
constructor(config) {
|
|
15
|
+
this.config = config;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* 不可重试的 HTTP 状态码
|
|
19
|
+
* 这些错误表示客户端配置问题,重试不会改变结果
|
|
20
|
+
*/
|
|
21
|
+
NON_RETRYABLE_STATUS_CODES = new Set([400, 401, 403, 404]);
|
|
22
|
+
/**
|
|
23
|
+
* 带重试和超时控制的请求包装器
|
|
24
|
+
*
|
|
25
|
+
* @note 当前实现强制使用非流式响应(stream: false)。
|
|
26
|
+
* 如果提供商只支持流式响应,需要添加流式解析器。
|
|
27
|
+
*/
|
|
28
|
+
async withRetry(operation, config = {}) {
|
|
29
|
+
const { maxRetries = this.config.maxRetries || 2, timeout = this.config.timeout || 60000 } = config;
|
|
30
|
+
let lastError;
|
|
31
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
32
|
+
try {
|
|
33
|
+
logger.debug(`Attempt ${attempt + 1}/${maxRetries + 1}`);
|
|
34
|
+
// 使用 AbortController 实现超时
|
|
35
|
+
const controller = new AbortController();
|
|
36
|
+
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
|
37
|
+
try {
|
|
38
|
+
const result = await operation(controller.signal);
|
|
39
|
+
clearTimeout(timeoutId);
|
|
40
|
+
return result;
|
|
41
|
+
}
|
|
42
|
+
catch (error) {
|
|
43
|
+
clearTimeout(timeoutId);
|
|
44
|
+
throw error;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
catch (error) {
|
|
48
|
+
lastError = error;
|
|
49
|
+
// 如果是 AbortError,转换为 TimeoutError
|
|
50
|
+
if (error instanceof Error && error.name === 'AbortError') {
|
|
51
|
+
throw new TimeoutError(`Request timed out after ${timeout}ms`);
|
|
52
|
+
}
|
|
53
|
+
// 检查是否为不可重试错误(400/401/403/404)
|
|
54
|
+
if (error instanceof ModelAPIError) {
|
|
55
|
+
const status = error.details?.status;
|
|
56
|
+
if (status && this.NON_RETRYABLE_STATUS_CODES.has(status)) {
|
|
57
|
+
logger.error('Non-retryable error, failing immediately', {
|
|
58
|
+
status,
|
|
59
|
+
attempt: attempt + 1
|
|
60
|
+
});
|
|
61
|
+
throw error; // 直接抛出,不重试
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
// 最后一次尝试失败,抛出错误
|
|
65
|
+
if (attempt === maxRetries) {
|
|
66
|
+
break;
|
|
67
|
+
}
|
|
68
|
+
// 计算退避时间(指数退避)
|
|
69
|
+
const backoffTime = Math.min(1000 * Math.pow(2, attempt), 5000);
|
|
70
|
+
logger.warn(`Attempt ${attempt + 1} failed, retrying in ${backoffTime}ms`, {
|
|
71
|
+
error: lastError.message
|
|
72
|
+
});
|
|
73
|
+
// 等待后退时间
|
|
74
|
+
await new Promise(resolve => setTimeout(resolve, backoffTime));
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
// 保留最后失败的完整错误详情(如果可用)
|
|
78
|
+
let errorDetails;
|
|
79
|
+
if (lastError !== undefined && lastError instanceof VisionMCPError && lastError.details) {
|
|
80
|
+
// 如果是 VisionMCPError,保留所有 details
|
|
81
|
+
errorDetails = {
|
|
82
|
+
...lastError.details,
|
|
83
|
+
lastError: lastError.message
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
// 否则只保留消息
|
|
88
|
+
errorDetails = { lastError: lastError?.message };
|
|
89
|
+
}
|
|
90
|
+
throw new ModelAPIError(`Failed after ${maxRetries + 1} attempts`, errorDetails);
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* 解析模型响应
|
|
94
|
+
* @param response - 原始响应
|
|
95
|
+
* @param modelType - 模型类型(必需,用于过滤 thinking content)
|
|
96
|
+
*/
|
|
97
|
+
parseResponse(response, modelType) {
|
|
98
|
+
// modelType is required
|
|
99
|
+
if (!modelType) {
|
|
100
|
+
throw new Error('modelType is required for parseResponse');
|
|
101
|
+
}
|
|
102
|
+
try {
|
|
103
|
+
// @ts-ignore - 检查响应结构
|
|
104
|
+
const content = response?.choices?.[0]?.message?.content;
|
|
105
|
+
if (!content || typeof content !== 'string') {
|
|
106
|
+
throw new ModelAPIError('Invalid response format: missing or invalid content', { response });
|
|
107
|
+
}
|
|
108
|
+
// 过滤 thinking/reasoning content(无条件执行)
|
|
109
|
+
let filteredContent;
|
|
110
|
+
try {
|
|
111
|
+
filteredContent = filterThinkingContent(response, modelType);
|
|
112
|
+
// 如果有 content 被过滤,记录日志
|
|
113
|
+
if (filteredContent.length < content.length) {
|
|
114
|
+
logger.debug('Filtered thinking content from response', {
|
|
115
|
+
modelType,
|
|
116
|
+
originalLength: content.length,
|
|
117
|
+
filteredLength: filteredContent.length,
|
|
118
|
+
reduction: content.length - filteredContent.length
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
catch (error) {
|
|
123
|
+
logger.warn('Failed to filter thinking content, returning raw content', {
|
|
124
|
+
modelType,
|
|
125
|
+
error: error instanceof Error ? error.message : error
|
|
126
|
+
});
|
|
127
|
+
filteredContent = content;
|
|
128
|
+
}
|
|
129
|
+
return {
|
|
130
|
+
content: filteredContent,
|
|
131
|
+
// @ts-ignore
|
|
132
|
+
usage: response?.usage,
|
|
133
|
+
// @ts-ignore
|
|
134
|
+
model: response?.model
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
catch (error) {
|
|
138
|
+
logger.error('Failed to parse model response', error);
|
|
139
|
+
throw new ModelAPIError('Failed to parse model response', { response }, error);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
//# sourceMappingURL=base-adapter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"base-adapter.js","sourceRoot":"","sources":["../../src/adapters/base-adapter.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACjF,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,EAAE,qBAAqB,EAAE,MAAM,6BAA6B,CAAC;AAsCpE;;GAEG;AACH,MAAM,OAAgB,sBAAsB;IACvB;IAAnB,YAAmB,MAAmB;QAAnB,WAAM,GAAN,MAAM,CAAa;IAAG,CAAC;IAK1C;;;OAGG;IACc,0BAA0B,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;IAE5E;;;;;OAKG;IACO,KAAK,CAAC,SAAS,CACvB,SAA8C,EAC9C,SAAoD,EAAE;QAEtD,MAAM,EACJ,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,UAAU,IAAI,CAAC,EACxC,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,KAAK,EACvC,GAAG,MAAM,CAAC;QAEX,IAAI,SAA4B,CAAC;QAEjC,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,UAAU,EAAE,OAAO,EAAE,EAAE,CAAC;YACvD,IAAI,CAAC;gBACH,MAAM,CAAC,KAAK,CAAC,WAAW,OAAO,GAAG,CAAC,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC,CAAC;gBAEzD,0BAA0B;gBAC1B,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;gBACzC,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,OAAO,CAAC,CAAC;gBAEhE,IAAI,CAAC;oBACH,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;oBAClD,YAAY,CAAC,SAAS,CAAC,CAAC;oBACxB,OAAO,MAAM,CAAC;gBAChB,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,YAAY,CAAC,SAAS,CAAC,CAAC;oBACxB,MAAM,KAAK,CAAC;gBACd,CAAC;YACH,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,SAAS,GAAG,KAAc,CAAC;gBAE3B,kCAAkC;gBAClC,IAAI,KAAK,YAAY,KAAK,IAAI,KAAK,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;oBAC1D,MAAM,IAAI,YAAY,CAAC,2BAA2B,OAAO,IAAI,CAAC,CAAC;gBACjE,CAAC;gBAED,+BAA+B;gBAC/B,IAAI,KAAK,YAAY,aAAa,EAAE,CAAC;oBACnC,MAAM,MAAM,GAAI,KAAK,CAAC,OAAe,EAAE,MAAM,CAAC;oBAC9C,IAAI,MAAM,IAAI,IAAI,CAAC,0BAA0B,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;wBAC1D,MAAM,CAAC,KAAK,CAAC,0CAA0C,EAAE;4BACvD,MAAM;4BACN,OAAO,EAAE,OAAO,GAAG,CAAC;yBACrB,CAAC,CAAC;wBACH,MAAM,KAAK,CAAC,CAAC,WAAW;oBAC1B,CAAC;gBACH,CAAC;gBAED,gBAAgB;gBAChB,IAAI,OAAO,KAAK,UAAU,EAAE,CAAC;oBAC3B,MAAM;gBACR,CAAC;gBAED,eAAe;gBACf,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,EAAE,IAAI,CAAC,CAAC;gBAChE,MAAM,CAAC,IAAI,CAAC,WAAW,OAAO,GAAG,CAAC,wBAAwB,WAAW,IAAI,EAAE;oBACzE,KAAK,EAAE,SAAS,CAAC,OAAO;iBACzB,CAAC,CAAC;gBAEH,SAAS;gBACT,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC,CAAC;YACjE,CAAC;QACH,CAAC;QAED,sBAAsB;QACtB,IAAI,YAAiC,CAAC;QACtC,IAAI,SAAS,KAAK,SAAS,IAAI,SAAS,YAAY,cAAc,IAAI,SAAS,CAAC,OAAO,EAAE,CAAC;YACxF,kCAAkC;YAClC,YAAY,GAAG;gBACb,GAAG,SAAS,CAAC,OAAO;gBACpB,SAAS,EAAE,SAAS,CAAC,OAAO;aAC7B,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,UAAU;YACV,YAAY,GAAG,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,CAAC;QACnD,CAAC;QAED,MAAM,IAAI,aAAa,CACrB,gBAAgB,UAAU,GAAG,CAAC,WAAW,EACzC,YAAY,CACb,CAAC;IACJ,CAAC;IAED;;;;OAIG;IACO,aAAa,CAAC,QAAiB,EAAE,SAAiB;QAC1D,wBAAwB;QACxB,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;QAC7D,CAAC;QAED,IAAI,CAAC;YACH,sBAAsB;YACtB,MAAM,OAAO,GAAG,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;YAEzD,IAAI,CAAC,OAAO,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;gBAC5C,MAAM,IAAI,aAAa,CACrB,qDAAqD,EACrD,EAAE,QAAQ,EAAE,CACb,CAAC;YACJ,CAAC;YAED,uCAAuC;YACvC,IAAI,eAAuB,CAAC;YAC5B,IAAI,CAAC;gBACH,eAAe,GAAG,qBAAqB,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;gBAE7D,uBAAuB;gBACvB,IAAI,eAAe,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;oBAC5C,MAAM,CAAC,KAAK,CAAC,yCAAyC,EAAE;wBACtD,SAAS;wBACT,cAAc,EAAE,OAAO,CAAC,MAAM;wBAC9B,cAAc,EAAE,eAAe,CAAC,MAAM;wBACtC,SAAS,EAAE,OAAO,CAAC,MAAM,GAAG,eAAe,CAAC,MAAM;qBACnD,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,CAAC,IAAI,CAAC,0DAA0D,EAAE;oBACtE,SAAS;oBACT,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;iBACtD,CAAC,CAAC;gBACH,eAAe,GAAG,OAAO,CAAC;YAC5B,CAAC;YAED,OAAO;gBACL,OAAO,EAAE,eAAe;gBACxB,aAAa;gBACb,KAAK,EAAE,QAAQ,EAAE,KAAK;gBACtB,aAAa;gBACb,KAAK,EAAE,QAAQ,EAAE,KAAK;aACvB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,gCAAgC,EAAE,KAAK,CAAC,CAAC;YACtD,MAAM,IAAI,aAAa,CACrB,gCAAgC,EAChC,EAAE,QAAQ,EAAE,EACZ,KAAK,CACN,CAAC;QACJ,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Claude (Anthropic Messages API) Adapter
|
|
3
|
+
*
|
|
4
|
+
* @description Claude Messages API 适配器实现,支持 Claude 多模态视觉模型
|
|
5
|
+
* @see https://docs.anthropic.com/claude/reference/messages-post
|
|
6
|
+
*/
|
|
7
|
+
import { BaseVisionModelAdapter, VisionModelResponse } from './base-adapter.js';
|
|
8
|
+
import { ModelConfig } from '../config/model-config.js';
|
|
9
|
+
export interface ClaudeAdapterOptions {
|
|
10
|
+
maxTokens?: number;
|
|
11
|
+
apiVersion?: string;
|
|
12
|
+
}
|
|
13
|
+
export declare class ClaudeAdapter extends BaseVisionModelAdapter {
|
|
14
|
+
private options;
|
|
15
|
+
constructor(config: ModelConfig, options?: ClaudeAdapterOptions);
|
|
16
|
+
analyze(imageData: string, prompt: string): Promise<string>;
|
|
17
|
+
analyzeWithResponse(imageData: string, prompt: string): Promise<VisionModelResponse>;
|
|
18
|
+
private callClaudeAPI;
|
|
19
|
+
/**
|
|
20
|
+
* 构建图片 content block
|
|
21
|
+
* 支持 URL 和 base64 data URL 两种格式
|
|
22
|
+
*/
|
|
23
|
+
private buildImageBlock;
|
|
24
|
+
/**
|
|
25
|
+
* 处理错误响应
|
|
26
|
+
*/
|
|
27
|
+
private handleErrorResponse;
|
|
28
|
+
/**
|
|
29
|
+
* 解析响应数据
|
|
30
|
+
*/
|
|
31
|
+
private parseResponseData;
|
|
32
|
+
/**
|
|
33
|
+
* 归一化 Claude 响应为 VisionModelResponse
|
|
34
|
+
* Claude 响应格式:{ content: [{type: "text", text: "..."}], usage: {...} }
|
|
35
|
+
*/
|
|
36
|
+
private normalizeResponse;
|
|
37
|
+
}
|
|
38
|
+
//# sourceMappingURL=claude-adapter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"claude-adapter.d.ts","sourceRoot":"","sources":["../../src/adapters/claude-adapter.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,sBAAsB,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAChF,OAAO,EAAE,WAAW,EAAE,MAAM,2BAA2B,CAAC;AAKxD,MAAM,WAAW,oBAAoB;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,qBAAa,aAAc,SAAQ,sBAAsB;IACvD,OAAO,CAAC,OAAO,CAAiC;gBAEpC,MAAM,EAAE,WAAW,EAAE,OAAO,GAAE,oBAAyB;IAyB7D,OAAO,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAyB3D,mBAAmB,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,mBAAmB,CAAC;YA0B5E,aAAa;IAgE3B;;;OAGG;IACH,OAAO,CAAC,eAAe;IAwBvB;;OAEG;YACW,mBAAmB;IAwCjC;;OAEG;YACW,iBAAiB;IAiC/B;;;OAGG;IACH,OAAO,CAAC,iBAAiB;CA+B1B"}
|