@showlotus/opencode-image-vision 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,5 +1,6 @@
1
1
  # opencode-image-vision
2
2
 
3
+ [![npm version](https://img.shields.io/npm/v/@showlotus/opencode-image-vision.svg)](https://www.npmjs.com/package/@showlotus/opencode-image-vision)
3
4
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
4
5
  [![MCP](https://img.shields.io/badge/MCP-Server-blue.svg)](https://modelcontextprotocol.io/)
5
6
  [![Node](https://img.shields.io/badge/Node.js-22%2B-green.svg)](https://nodejs.org/)
@@ -57,7 +58,7 @@ No clone or install needed. Just add to your `opencode.jsonc`:
57
58
  "mcp": {
58
59
  "image-vision": {
59
60
  "type": "local",
60
- "command": ["npx", "-y", "opencode-image-vision"],
61
+ "command": ["npx", "-y", "@showlotus/opencode-image-vision"],
61
62
  "environment": {
62
63
  "model": "zhipuai-coding-plan/glm-4.6v",
63
64
  },
@@ -211,12 +212,30 @@ The screenshot shows a terminal with the following error message...
211
212
 
212
213
  ### Supported providers
213
214
 
214
- | Provider ID | Base URL | Models |
215
- | --------------------- | -------------------------------------- | ---------- |
216
- | `zhipuai-coding-plan` | `https://open.bigmodel.cn/api/paas/v4` | `glm-4.6v` |
217
- | `zai-coding-plan` | `https://open.bigmodel.cn/api/paas/v4` | `glm-4.6v` |
218
- | `z-ai` | `https://open.bigmodel.cn/api/paas/v4` | `glm-4.6v` |
219
- | `zhipuai` | `https://open.bigmodel.cn/api/paas/v4` | `glm-4.6v` |
215
+ **OpenAI-compatible** (reuse `OpenAICompatibleProvider`):
216
+
217
+ | Provider ID | Base URL | Example Models |
218
+ | --------------------- | -------------------------------------------------------- | ------------------------------------------- |
219
+ | `zhipuai-coding-plan` | `https://open.bigmodel.cn/api/paas/v4` | `glm-4.6v` |
220
+ | `zai-coding-plan` | `https://open.bigmodel.cn/api/paas/v4` | `glm-4.6v` |
221
+ | `z-ai` | `https://open.bigmodel.cn/api/paas/v4` | `glm-4.6v` |
222
+ | `zhipuai` | `https://open.bigmodel.cn/api/paas/v4` | `glm-4.6v` |
223
+ | `moonshot` / `kimi` | `https://api.moonshot.cn/v1` | `moonshot-v1-32k-vision-preview` |
224
+ | `minimax` / `minimax-cn-coding-plan` | `https://api.minimaxi.chat/v1` | `MiniMax-Text-01` |
225
+ | `openai` | `https://api.openai.com/v1` | `gpt-4o`, `gpt-4o-mini` |
226
+ | `qwen` / `dashscope` | `https://dashscope.aliyuncs.com/compatible-mode/v1` | `qwen-vl-max`, `qwen-vl-plus` |
227
+ | `doubao` / `volcengine` | `https://ark.cn-beijing.volces.com/api/v3` | `doubao-vision-pro-32k` |
228
+ | `yi` / `lingyiwanwu` | `https://api.lingyiwanwu.com/v1` | `yi-vision-v2` |
229
+ | `gemini` / `google` | `https://generativelanguage.googleapis.com/v1beta/openai` | `gemini-2.0-flash`, `gemini-1.5-pro` |
230
+ | `stepfun` | `https://api.stepfun.com/v1` | `step-1v-32k` |
231
+ | `baichuan` | `https://api.baichuan-ai.com/v1` | `Baichuan4-Vision` |
232
+ | `hunyuan` | `https://api.hunyuan.cloud.tencent.com/v1` | `hunyuan-vision` |
233
+
234
+ **Custom API format**:
235
+
236
+ | Provider ID | Base URL | Provider Class | Example Models |
237
+ | --------------------- | ---------------------------------- | ---------------- | ---------------------------------------- |
238
+ | `anthropic` / `claude` | `https://api.anthropic.com/v1` | `ClaudeProvider` | `claude-3-5-sonnet-20241022` |
220
239
 
221
240
  ---
222
241
 
@@ -231,49 +250,56 @@ Model: [calls analyze_images with session_id]
231
250
  PostgreSQL isn't running on port 5432. Start it with: brew services start postgresql"
232
251
  ```
233
252
 
234
- The text-only model never sees pixels — it reads the description returned by GLM-4.6V and reasons over it.
253
+ The text-only model never sees pixels — it reads the description returned by the vision model and reasons over it.
235
254
 
236
255
  ---
237
256
 
238
257
  ## Extending with new providers
239
258
 
240
- Adding a new vision provider takes 3 steps:
259
+ Most vision model providers use the **OpenAI-compatible chat completions API** — you only need to add 2 registry entries (no code). Only providers with a **different API format** need a custom class.
241
260
 
242
- **1. Add base URL to the registry** (`src/opencode.js`):
261
+ ### Adding an OpenAI-compatible provider (e.g. OpenAI, Qwen, Doubao)
262
+
263
+ **1. Add base URL** (`src/opencode.js` → `PROVIDER_REGISTRY`):
243
264
 
244
265
  ```javascript
245
- const PROVIDER_REGISTRY = {
246
- 'zhipuai-coding-plan': { baseUrl: 'https://open.bigmodel.cn/api/paas/v4', format: 'openai' },
247
- // Add new provider:
248
- openai: { baseUrl: 'https://api.openai.com/v1', format: 'openai' },
249
- }
266
+ 'my-provider': { baseUrl: 'https://api.example.com/v1', format: 'openai' },
267
+ ```
268
+
269
+ **2. Add provider mapping** (`src/providers/index.js` → `OPENAI_COMPATIBLE`):
270
+
271
+ ```javascript
272
+ 'my-provider': OpenAICompatibleProvider,
273
+ ```
274
+
275
+ Done. Set `"model": "my-provider/my-vision-model"` in config.
276
+
277
+ ### Adding a custom-format provider (e.g. Anthropic Claude)
278
+
279
+ **1. Add base URL** (`src/opencode.js` → `PROVIDER_REGISTRY`):
280
+
281
+ ```javascript
282
+ 'my-provider': { baseUrl: 'https://api.example.com/v1', format: 'custom' },
250
283
  ```
251
284
 
252
- **2. Create a provider class** (`src/providers/openai.js`) — only needed if the API format differs:
285
+ **2. Create a provider class** (`src/providers/my-provider.js`):
253
286
 
254
287
  ```javascript
255
288
  import { VisionProvider } from './base.js'
256
289
 
257
- export class OpenAIProvider extends VisionProvider {
290
+ export class MyProvider extends VisionProvider {
258
291
  async analyze(base64, mime, prompt) {
259
292
  // Implement provider-specific API call
260
293
  }
261
294
  }
262
295
  ```
263
296
 
264
- **3. Register the mapping** (`src/providers/index.js`):
297
+ See `src/providers/claude.js` for a working example (Anthropic uses `x-api-key` auth and `/messages` endpoint).
265
298
 
266
- ```javascript
267
- const PROVIDER_MAP = {
268
- 'zhipuai-coding-plan': GLMProvider,
269
- openai: OpenAIProvider,
270
- }
271
- ```
272
-
273
- Then set the `model` environment variable:
299
+ **3. Add provider mapping** (`src/providers/index.js` → `PROVIDER_MAP`):
274
300
 
275
- ```jsonc
276
- "environment": { "model": "openai/gpt-4o" }
301
+ ```javascript
302
+ 'my-provider': MyProvider,
277
303
  ```
278
304
 
279
305
  ---
package/package.json CHANGED
@@ -1,8 +1,9 @@
1
1
  {
2
2
  "name": "@showlotus/opencode-image-vision",
3
- "version": "1.0.0",
3
+ "version": "1.0.2",
4
4
  "description": "MCP server that reads images from OpenCode's SQLite database and analyzes them via vision AI providers",
5
5
  "type": "module",
6
+ "packageManager": "pnpm@8.11.0",
6
7
  "main": "src/index.js",
7
8
  "bin": {
8
9
  "opencode-image-vision": "./src/index.js"
@@ -0,0 +1,19 @@
1
+ // 并发执行任务,控制最大并发数,结果按原顺序返回
2
+ export async function mapWithConcurrency(items, concurrency, worker) {
3
+ const results = new Array(items.length)
4
+ let cursor = 0
5
+
6
+ // 单个执行器:循环领取下一个未处理的任务,直到全部完成
7
+ const run = async () => {
8
+ while (cursor < items.length) {
9
+ const i = cursor++
10
+ results[i] = await worker(items[i], i)
11
+ }
12
+ }
13
+
14
+ // 启动不超过任务数量的并发执行器
15
+ await Promise.all(
16
+ Array.from({ length: Math.min(concurrency, items.length) }, run),
17
+ )
18
+ return results
19
+ }
package/src/index.js CHANGED
@@ -5,6 +5,7 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
5
5
  import { z } from 'zod'
6
6
  import { getDatabase, getImages } from './db.js'
7
7
  import { createProvider } from './providers/index.js'
8
+ import { mapWithConcurrency } from './concurrency.js'
8
9
 
9
10
  const DEFAULT_PROMPT =
10
11
  process.env.prompt ||
@@ -17,6 +18,9 @@ const DEFAULT_PROMPT =
17
18
  const DEFAULT_LIMIT = Number(process.env.limit) || 5
18
19
  const MAX_LIMIT = Number(process.env.max_limit) || 20
19
20
 
21
+ // 并发分析图片的最大并发数,可通过环境变量 concurrency 覆盖
22
+ const DEFAULT_CONCURRENCY = Number(process.env.concurrency) || 5
23
+
20
24
  let provider
21
25
  try {
22
26
  provider = createProvider()
@@ -60,16 +64,19 @@ server.tool(
60
64
  }
61
65
  }
62
66
 
63
- const results = []
64
- for (let i = 0; i < images.length; i++) {
65
- const img = images[i]
66
- try {
67
- const desc = await provider.analyze(img.base64, img.mime, analysisPrompt)
68
- results.push(`### Image ${i + 1}: ${img.filename}\n\n${desc}`)
69
- } catch (e) {
70
- results.push(`### Image ${i + 1}: ${img.filename}\n\n[Analysis failed: ${e.message}]`)
71
- }
72
- }
67
+ // 并发分析图片,单张失败不影响其他图片,结果保持原顺序
68
+ const results = await mapWithConcurrency(
69
+ images,
70
+ DEFAULT_CONCURRENCY,
71
+ async (img, i) => {
72
+ try {
73
+ const desc = await provider.analyze(img.base64, img.mime, analysisPrompt)
74
+ return `### Image ${i + 1}: ${img.filename}\n\n${desc}`
75
+ } catch (e) {
76
+ return `### Image ${i + 1}: ${img.filename}\n\n[Analysis failed: ${e.message}]`
77
+ }
78
+ },
79
+ )
73
80
 
74
81
  return {
75
82
  content: [
package/src/opencode.js CHANGED
@@ -5,10 +5,28 @@ import { join } from 'node:path';
5
5
  // Provider ID → base URL mapping
6
6
  // Future providers can be added here
7
7
  const PROVIDER_REGISTRY = {
8
- 'zhipuai-coding-plan': { baseUrl: 'https://open.bigmodel.cn/api/paas/v4', format: 'openai' },
9
- 'zai-coding-plan': { baseUrl: 'https://open.bigmodel.cn/api/paas/v4', format: 'openai' },
10
- 'z-ai': { baseUrl: 'https://open.bigmodel.cn/api/paas/v4', format: 'openai' },
11
- 'zhipuai': { baseUrl: 'https://open.bigmodel.cn/api/paas/v4', format: 'openai' },
8
+ 'zhipuai-coding-plan': { baseUrl: 'https://open.bigmodel.cn/api/paas/v4', format: 'openai' },
9
+ 'zai-coding-plan': { baseUrl: 'https://open.bigmodel.cn/api/paas/v4', format: 'openai' },
10
+ 'z-ai': { baseUrl: 'https://open.bigmodel.cn/api/paas/v4', format: 'openai' },
11
+ 'zhipuai': { baseUrl: 'https://open.bigmodel.cn/api/paas/v4', format: 'openai' },
12
+ 'moonshot': { baseUrl: 'https://api.moonshot.cn/v1', format: 'openai' },
13
+ 'kimi': { baseUrl: 'https://api.moonshot.cn/v1', format: 'openai' },
14
+ 'minimax': { baseUrl: 'https://api.minimaxi.chat/v1', format: 'openai' },
15
+ 'minimax-cn-coding-plan': { baseUrl: 'https://api.minimaxi.chat/v1', format: 'openai' },
16
+ 'openai': { baseUrl: 'https://api.openai.com/v1', format: 'openai' },
17
+ 'qwen': { baseUrl: 'https://dashscope.aliyuncs.com/compatible-mode/v1', format: 'openai' },
18
+ 'dashscope': { baseUrl: 'https://dashscope.aliyuncs.com/compatible-mode/v1', format: 'openai' },
19
+ 'doubao': { baseUrl: 'https://ark.cn-beijing.volces.com/api/v3', format: 'openai' },
20
+ 'volcengine': { baseUrl: 'https://ark.cn-beijing.volces.com/api/v3', format: 'openai' },
21
+ 'yi': { baseUrl: 'https://api.lingyiwanwu.com/v1', format: 'openai' },
22
+ 'lingyiwanwu': { baseUrl: 'https://api.lingyiwanwu.com/v1', format: 'openai' },
23
+ 'gemini': { baseUrl: 'https://generativelanguage.googleapis.com/v1beta/openai', format: 'openai' },
24
+ 'google': { baseUrl: 'https://generativelanguage.googleapis.com/v1beta/openai', format: 'openai' },
25
+ 'stepfun': { baseUrl: 'https://api.stepfun.com/v1', format: 'openai' },
26
+ 'baichuan': { baseUrl: 'https://api.baichuan-ai.com/v1', format: 'openai' },
27
+ 'hunyuan': { baseUrl: 'https://api.hunyuan.cloud.tencent.com/v1', format: 'openai' },
28
+ 'anthropic': { baseUrl: 'https://api.anthropic.com/v1', format: 'anthropic' },
29
+ 'claude': { baseUrl: 'https://api.anthropic.com/v1', format: 'anthropic' },
12
30
  };
13
31
 
14
32
  export function resolveProviderConfig(providerId, modelId) {
@@ -0,0 +1,64 @@
1
+ import { VisionProvider } from './base.js'
2
+
3
+ export class ClaudeProvider extends VisionProvider {
4
+ constructor(config) {
5
+ super(config)
6
+ this.apiKey = config.apiKey
7
+ this.baseUrl = config.baseUrl
8
+ this.model = config.model
9
+ this.timeout = config.timeout || 60_000
10
+
11
+ if (!this.apiKey) {
12
+ throw new Error('Anthropic API key not configured.')
13
+ }
14
+ if (!this.baseUrl) {
15
+ throw new Error('Anthropic base URL not configured.')
16
+ }
17
+ if (!this.model) {
18
+ throw new Error('Anthropic model not configured.')
19
+ }
20
+ }
21
+
22
+ async analyze(base64, mime, prompt) {
23
+ const ctrl = new AbortController()
24
+ const timer = setTimeout(() => ctrl.abort(), this.timeout)
25
+
26
+ try {
27
+ const res = await fetch(`${this.baseUrl}/messages`, {
28
+ method: 'POST',
29
+ headers: {
30
+ 'Content-Type': 'application/json',
31
+ 'x-api-key': this.apiKey,
32
+ 'anthropic-version': '2023-06-01',
33
+ },
34
+ body: JSON.stringify({
35
+ model: this.model,
36
+ max_tokens: 1024,
37
+ messages: [
38
+ {
39
+ role: 'user',
40
+ content: [
41
+ {
42
+ type: 'image',
43
+ source: { type: 'base64', media_type: mime, data: base64 },
44
+ },
45
+ { type: 'text', text: prompt },
46
+ ],
47
+ },
48
+ ],
49
+ }),
50
+ signal: ctrl.signal,
51
+ })
52
+
53
+ if (!res.ok) {
54
+ const t = await res.text().catch(() => '')
55
+ throw new Error(`Anthropic API ${res.status}: ${t.slice(0, 200)}`)
56
+ }
57
+
58
+ const json = await res.json()
59
+ return json.content?.[0]?.text?.trim() || '[No content returned]'
60
+ } finally {
61
+ clearTimeout(timer)
62
+ }
63
+ }
64
+ }
@@ -1,12 +1,34 @@
1
- import { GLMProvider } from './glm.js'
1
+ import { OpenAICompatibleProvider } from './openai-compatible.js'
2
+ import { ClaudeProvider } from './claude.js'
2
3
  import { resolveProviderConfig } from '../opencode.js'
3
4
 
4
- // Provider ID → provider class mapping
5
+ const OPENAI_COMPATIBLE = {
6
+ 'zhipuai-coding-plan': OpenAICompatibleProvider,
7
+ 'zai-coding-plan': OpenAICompatibleProvider,
8
+ 'z-ai': OpenAICompatibleProvider,
9
+ 'zhipuai': OpenAICompatibleProvider,
10
+ 'moonshot': OpenAICompatibleProvider,
11
+ 'kimi': OpenAICompatibleProvider,
12
+ 'minimax': OpenAICompatibleProvider,
13
+ 'minimax-cn-coding-plan': OpenAICompatibleProvider,
14
+ 'openai': OpenAICompatibleProvider,
15
+ 'qwen': OpenAICompatibleProvider,
16
+ 'dashscope': OpenAICompatibleProvider,
17
+ 'doubao': OpenAICompatibleProvider,
18
+ 'volcengine': OpenAICompatibleProvider,
19
+ 'yi': OpenAICompatibleProvider,
20
+ 'lingyiwanwu': OpenAICompatibleProvider,
21
+ 'gemini': OpenAICompatibleProvider,
22
+ 'google': OpenAICompatibleProvider,
23
+ 'stepfun': OpenAICompatibleProvider,
24
+ 'baichuan': OpenAICompatibleProvider,
25
+ 'hunyuan': OpenAICompatibleProvider,
26
+ }
27
+
5
28
  const PROVIDER_MAP = {
6
- 'zhipuai-coding-plan': GLMProvider,
7
- 'zai-coding-plan': GLMProvider,
8
- 'z-ai': GLMProvider,
9
- 'zhipuai': GLMProvider,
29
+ ...OPENAI_COMPATIBLE,
30
+ 'anthropic': ClaudeProvider,
31
+ 'claude': ClaudeProvider,
10
32
  }
11
33
 
12
34
  export function createProvider() {
@@ -14,7 +36,7 @@ export function createProvider() {
14
36
  const slashIdx = raw.indexOf('/')
15
37
  if (slashIdx === -1) {
16
38
  throw new Error(
17
- `Invalid VISION_MODEL format: "${raw}". Expected "provider/model", e.g. "zhipuai-coding-plan/glm-4.6v"`,
39
+ `Invalid model format: "${raw}". Expected "provider/model", e.g. "zhipuai-coding-plan/glm-4.6v"`,
18
40
  )
19
41
  }
20
42
  const providerId = raw.slice(0, slashIdx)
@@ -1,6 +1,6 @@
1
1
  import { VisionProvider } from './base.js'
2
2
 
3
- export class GLMProvider extends VisionProvider {
3
+ export class OpenAICompatibleProvider extends VisionProvider {
4
4
  constructor(config) {
5
5
  super(config)
6
6
  this.apiKey = config.apiKey
@@ -9,13 +9,13 @@ export class GLMProvider extends VisionProvider {
9
9
  this.timeout = config.timeout || 60_000
10
10
 
11
11
  if (!this.apiKey) {
12
- throw new Error('GLM API key not configured.')
12
+ throw new Error('API key not configured.')
13
13
  }
14
14
  if (!this.baseUrl) {
15
- throw new Error('GLM base URL not configured.')
15
+ throw new Error('Base URL not configured.')
16
16
  }
17
17
  if (!this.model) {
18
- throw new Error('GLM model not configured.')
18
+ throw new Error('Model not configured.')
19
19
  }
20
20
  }
21
21
 
@@ -48,7 +48,7 @@ export class GLMProvider extends VisionProvider {
48
48
 
49
49
  if (!res.ok) {
50
50
  const t = await res.text().catch(() => '')
51
- throw new Error(`GLM API ${res.status}: ${t.slice(0, 200)}`)
51
+ throw new Error(`Vision API ${res.status}: ${t.slice(0, 200)}`)
52
52
  }
53
53
 
54
54
  const json = await res.json()