@jochenyang/opencode-vision 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -62,7 +62,7 @@ vision 工具调用视觉 API 返回图片描述
62
62
  ## 前置要求
63
63
 
64
64
  - [OpenCode](https://github.com/opencode-ai/opencode) 已安装
65
- - 一个兼容 OpenAI 格式的视觉 API(如阿里云 DashScope 通义千问等)
65
+ - 一个支持视觉识别的 API(兼容 OpenAI Chat Completions 格式,或 MiniMax VLM 接口)
66
66
  - 环境变量(建议配置到系统级,避免每次启动重复输入)
67
67
 
68
68
  ## 环境变量
@@ -70,29 +70,54 @@ vision 工具调用视觉 API 返回图片描述
70
70
  | 变量 | 说明 | 示例值 |
71
71
  | ------------------ | -------------------------------------- | ------------------------------------------------------------- |
72
72
  | `VISION_API_KEY` | 视觉 API 的密钥 | `sk-your-api-key` |
73
- | `VISION_API_URL` | 视觉 API 的基础地址<br>(工具自动补全 `/chat/completions`) | `https://your-api-endpoint/v1` |
74
- | `VISION_MODEL` | 视觉模型名称 | `your-vision-model` |
73
+ | `VISION_API_URL` | 视觉 API 的基础地址 | `https://your-api-endpoint/v1` |
74
+ | `VISION_MODEL` | 视觉模型名称<br>(MiniMax 无需设置) | `your-vision-model` |
75
+ | `VISION_API_TYPE` | 可选,强制指定 API 类型<br>`openai` / `minimax` | `minimax` |
75
76
 
76
- ### Windows 系统级配置
77
+ > `VISION_API_URL`:OpenAI 兼容接口会自动补全 `/chat/completions`;MiniMax 会自动使用 `/v1/coding_plan/vlm` 端点。
78
+ >
79
+ > `VISION_API_TYPE`:默认自动检测(URL 含 `minimax` 自动切换),设此变量可显式指定。
77
80
 
81
+ ### 示例一:OpenAI 兼容接口(如阿里云 DashScope 通义千问)
82
+
83
+ **Windows 系统级配置:**
78
84
  ```powershell
79
85
  [System.Environment]::SetEnvironmentVariable('VISION_API_KEY', 'sk-your-api-key', 'User')
80
86
  [System.Environment]::SetEnvironmentVariable('VISION_API_URL', 'https://your-api-endpoint/v1', 'User')
81
87
  [System.Environment]::SetEnvironmentVariable('VISION_MODEL', 'your-vision-model', 'User')
82
88
  ```
83
89
 
84
- 设置后**重启终端**生效。
85
-
86
- ### macOS / Linux
87
-
88
- 在 `~/.zshrc` 或 `~/.bashrc` 中添加:
89
-
90
+ **macOS / Linux:**
90
91
  ```bash
91
92
  export VISION_API_KEY="sk-your-api-key"
92
93
  export VISION_API_URL="https://your-api-endpoint/v1"
93
94
  export VISION_MODEL="your-vision-model"
94
95
  ```
95
96
 
97
+ ### 示例二:MiniMax VLM
98
+
99
+ MiniMax 的 VLM 接口属于 **Token Plan** 服务,需要使用具备 Token Plan 访问权限的 API Key(Group API Key),而非普通的 Chat API Key。
100
+
101
+ > 如何获取:登录 [MiniMax 平台](https://platform.minimaxi.com) → Token Plan → 创建/查看 Group API Key。
102
+
103
+ **Windows 系统级配置:**
104
+ ```powershell
105
+ [System.Environment]::SetEnvironmentVariable('VISION_API_KEY', 'your-minimax-group-api-key', 'User')
106
+ [System.Environment]::SetEnvironmentVariable('VISION_API_URL', 'https://api.minimaxi.com', 'User')
107
+ REM VISION_MODEL 不需要设置,MiniMax 自动识别
108
+ ```
109
+
110
+ **macOS / Linux:**
111
+ ```bash
112
+ export VISION_API_KEY="your-minimax-group-api-key"
113
+ export VISION_API_URL="https://api.minimaxi.com"
114
+ # VISION_MODEL 不需要设置,MiniMax 自动识别
115
+ ```
116
+
117
+ > 提示:国内站使用 `https://api.minimaxi.com`,国际站使用 `https://api.minimax.io`。
118
+
119
+ 设置后**重启终端**生效。
120
+
96
121
  ## 安装
97
122
 
98
123
  ### 手动安装
@@ -157,7 +182,7 @@ opencode-vision/
157
182
 
158
183
  - 读取本地图片文件,通过视觉 API 识别内容
159
184
  - 支持 `path`(单图)和 `paths`(多图数组)两个参数
160
- - 兼容 OpenAI Chat Completions 格式的 API
185
+ - 支持两种后端:OpenAI Chat Completions 格式 / MiniMax VLM(自动检测)
161
186
 
162
187
  ### 插件:`plugins/vision-helper.ts`
163
188
 
@@ -176,7 +201,11 @@ opencode-vision/
176
201
 
177
202
  ## 自定义视觉 API
178
203
 
179
- 本工具兼容任何 OpenAI Chat Completions 格式的视觉 API。只需更换环境变量即可:
204
+ 本工具支持两种后端,自动检测或显式指定。
205
+
206
+ ### OpenAI Chat Completions 格式
207
+
208
+ 兼容任何 OpenAI Chat Completions 格式的视觉 API:
180
209
 
181
210
  ```powershell
182
211
  $env:VISION_API_KEY = 'sk-your-api-key'
@@ -184,6 +213,18 @@ $env:VISION_API_URL = 'https://your-api-endpoint/v1'
184
213
  $env:VISION_MODEL = 'your-vision-model'
185
214
  ```
186
215
 
216
+ ### MiniMax VLM
217
+
218
+ 工具自动检测 URL 是否含 `minimax`/`minimaxi`,自动切换为 MiniMax VLM 接口。也可通过 `VISION_API_TYPE=minimax` 强制指定。
219
+
220
+ > ⚠️ 需要具备 **Token Plan** 权限的 Group API Key。普通 Chat API Key 无法使用。
221
+
222
+ ```powershell
223
+ $env:VISION_API_KEY = 'your-minimax-group-api-key'
224
+ $env:VISION_API_URL = 'https://api.minimaxi.com'
225
+ # VISION_MODEL 不需要
226
+ ```
227
+
187
228
  ## 许可证
188
229
 
189
230
  [MIT](LICENSE)
package/README_en.md CHANGED
@@ -78,37 +78,62 @@ vision tool calls the vision API → returns image description
78
78
  ## Prerequisites
79
79
 
80
80
  - [OpenCode](https://github.com/opencode-ai/opencode) installed
81
- - An OpenAI-compatible vision API (e.g., Aliyun DashScope, OpenAI, etc.)
81
+ - A vision-capable API (OpenAI Chat Completions format or MiniMax VLM)
82
82
  - Environment variables configured (recommended system-wide)
83
83
 
84
84
  ## Environment Variables
85
85
 
86
- | Variable | Description | Example |
87
- | ----------------- | ------------------------------------------------------ | ------------------------------- |
88
- | `VISION_API_KEY` | Vision API key | `sk-your-api-key` |
89
- | `VISION_API_URL` | Vision API base URL<br>(tool auto-appends `/chat/completions`) | `https://your-api-endpoint/v1` |
90
- | `VISION_MODEL` | Vision model name | `your-vision-model` |
86
+ | Variable | Description | Example |
87
+ | ----------------- | ------------------------------------------------------------------ | ------------------------------- |
88
+ | `VISION_API_KEY` | Vision API key | `sk-your-api-key` |
89
+ | `VISION_API_URL` | Vision API base URL | `https://your-api-endpoint/v1` |
90
+ | `VISION_MODEL` | Vision model name<br>(not needed for MiniMax) | `your-vision-model` |
91
+ | `VISION_API_TYPE` | Optional, force API type<br>`openai` / `minimax` | `minimax` |
91
92
 
92
- ### Windows (System-wide)
93
+ > `VISION_API_URL`: OpenAI-compatible backends auto-append `/chat/completions`; MiniMax auto-detects and uses `/v1/coding_plan/vlm`.
94
+ >
95
+ > `VISION_API_TYPE`: Auto-detected by default (URL containing `minimax` triggers MiniMax mode). Can be explicitly set.
93
96
 
97
+ ### Example 1: OpenAI-compatible (e.g., Aliyun DashScope)
98
+
99
+ **Windows:**
94
100
  ```powershell
95
101
  [System.Environment]::SetEnvironmentVariable('VISION_API_KEY', 'sk-your-api-key', 'User')
96
102
  [System.Environment]::SetEnvironmentVariable('VISION_API_URL', 'https://your-api-endpoint/v1', 'User')
97
103
  [System.Environment]::SetEnvironmentVariable('VISION_MODEL', 'your-vision-model', 'User')
98
104
  ```
99
105
 
100
- **Restart your terminal** after setting.
101
-
102
- ### macOS / Linux
103
-
104
- Add to `~/.zshrc` or `~/.bashrc`:
105
-
106
+ **macOS / Linux:**
106
107
  ```bash
107
108
  export VISION_API_KEY="sk-your-api-key"
108
109
  export VISION_API_URL="https://your-api-endpoint/v1"
109
110
  export VISION_MODEL="your-vision-model"
110
111
  ```
111
112
 
113
+ ### Example 2: MiniMax VLM
114
+
115
+ MiniMax's VLM endpoint is part of the **Token Plan** service and requires a Group API Key with Token Plan access — a regular Chat API Key won't work.
116
+
117
+ > How to get one: Login to [MiniMax platform](https://platform.minimaxi.com) → Token Plan → Create/view Group API Key.
118
+
119
+ **Windows:**
120
+ ```powershell
121
+ [System.Environment]::SetEnvironmentVariable('VISION_API_KEY', 'your-minimax-group-api-key', 'User')
122
+ [System.Environment]::SetEnvironmentVariable('VISION_API_URL', 'https://api.minimaxi.com', 'User')
123
+ REM VISION_MODEL is not needed — MiniMax auto-detected
124
+ ```
125
+
126
+ **macOS / Linux:**
127
+ ```bash
128
+ export VISION_API_KEY="your-minimax-group-api-key"
129
+ export VISION_API_URL="https://api.minimaxi.com"
130
+ # VISION_MODEL is not needed — MiniMax auto-detected
131
+ ```
132
+
133
+ > Note: Use `https://api.minimaxi.com` for China region, `https://api.minimax.io` for global.
134
+
135
+ **Restart your terminal** after setting.
136
+
112
137
  ## Installation
113
138
 
114
139
  ### Manual
@@ -178,7 +203,7 @@ opencode-vision/
178
203
 
179
204
  - Reads local image files and describes them via a vision API
180
205
  - Supports `path` (single) and `paths` (multiple) parameters
181
- - Compatible with any OpenAI Chat Completions API
206
+ - Supports two backends: OpenAI Chat Completions / MiniMax VLM (auto-detected)
182
207
 
183
208
  ### Plugin: `plugins/vision-helper.ts`
184
209
 
@@ -197,7 +222,11 @@ opencode-vision/
197
222
 
198
223
  ## Custom Vision API
199
224
 
200
- Compatible with any OpenAI Chat Completions vision API. Just change the environment variables:
225
+ The tool supports two backends with auto-detection or explicit override.
226
+
227
+ ### OpenAI Chat Completions Format
228
+
229
+ Works with any OpenAI Chat Completions vision API:
201
230
 
202
231
  ```bash
203
232
  export VISION_API_KEY="sk-your-api-key"
@@ -205,6 +234,18 @@ export VISION_API_URL="https://your-api-endpoint/v1"
205
234
  export VISION_MODEL="your-vision-model"
206
235
  ```
207
236
 
237
+ ### MiniMax VLM
238
+
239
+ Auto-detected when the URL contains `minimax`/`minimaxi`. Can also be forced with `VISION_API_TYPE=minimax`.
240
+
241
+ > ⚠️ Requires a **Group API Key** with Token Plan access. Regular Chat API Keys won't work.
242
+
243
+ ```bash
244
+ export VISION_API_KEY="your-minimax-group-api-key"
245
+ export VISION_API_URL="https://api.minimaxi.com"
246
+ # VISION_MODEL is not needed
247
+ ```
248
+
208
249
  ## License
209
250
 
210
251
  [MIT](LICENSE)
package/bin/install.js CHANGED
@@ -14,8 +14,8 @@ const FILES = [
14
14
 
15
15
  const ENV_VARS = [
16
16
  { name: "VISION_API_KEY", desc: "视觉 API 密钥 / Vision API key", example: "sk-your-api-key" },
17
- { name: "VISION_API_URL", desc: "视觉 API 地址 / Vision API base URL", example: "https://your-api-endpoint/v1" },
18
- { name: "VISION_MODEL", desc: "视觉模型名称 / Vision model name", example: "your-vision-model" },
17
+ { name: "VISION_API_URL", desc: "视觉 API 地址 / Vision API base URL(MiniMax 也可用)", example: "https://api.minimax.chat" },
18
+ { name: "VISION_MODEL", desc: "视觉模型名称 / Vision model name(MiniMax 无需此项)", example: "your-vision-model" },
19
19
  ]
20
20
 
21
21
  function log(msg, ok = true) {
@@ -38,6 +38,12 @@ function printEnvGuide() {
38
38
  console.log()
39
39
  }
40
40
 
41
+ console.log(" \x1b[36mMiniMax 用户注意:\x1b[0m")
42
+ console.log(" VISION_API_URL 设为你的 MiniMax API 基础地址即可。")
43
+ console.log(" 工具自动检测 MiniMax 并使用 VLM 接口,不需要 VISION_MODEL。")
44
+ console.log(" 也可显式设置 VISION_API_TYPE=minimax。")
45
+ console.log()
46
+
41
47
  if (isWin) {
42
48
  console.log(" \x1b[36mWindows 系统级配置(管理员 PowerShell):\x1b[0m")
43
49
  console.log()
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jochenyang/opencode-vision",
3
- "version": "1.0.2",
3
+ "version": "1.1.0",
4
4
  "description": "Vision plugin + tool for OpenCode — automatically handles pasted images for non-vision models",
5
5
  "keywords": [
6
6
  "opencode",
@@ -8,7 +8,8 @@
8
8
  "image",
9
9
  "ai",
10
10
  "plugin",
11
- "tool"
11
+ "tool",
12
+ "minimax"
12
13
  ],
13
14
  "homepage": "https://github.com/jochenyang/opencode-vision",
14
15
  "license": "MIT",
package/tools/vision.ts CHANGED
@@ -11,7 +11,10 @@ Use this when the user pastes images but the current model cannot view images di
11
11
  The image(s) will have been auto-saved with a path hint like "[Image auto-saved to ...]" in the conversation.
12
12
  For multiple images, use the "paths" parameter.
13
13
 
14
- Requires VISION_API_KEY, VISION_API_URL and VISION_MODEL environment variables.`,
14
+ Requires VISION_API_KEY and VISION_API_URL.
15
+ VISION_MODEL is required for OpenAI-compatible backends.
16
+ MiniMax is auto-detected — set VISION_API_URL to your MiniMax base URL and VISION_MODEL is optional.
17
+ Override with VISION_API_TYPE=openai|minimax.`,
15
18
  args: {
16
19
  paths: tool.schema
17
20
  .array(tool.schema.string())
@@ -56,46 +59,113 @@ Requires VISION_API_KEY, VISION_API_URL and VISION_MODEL environment variables.`
56
59
 
57
60
  const apiKey = process.env["VISION_API_KEY"]
58
61
  const baseUrl = process.env["VISION_API_URL"]
59
- const model = process.env["VISION_MODEL"]
60
62
  if (!apiKey) return "Error: VISION_API_KEY not set"
61
63
  if (!baseUrl) return "Error: VISION_API_URL not set"
62
- if (!model) return "Error: VISION_MODEL not set"
63
64
 
64
- const apiUrl = `${baseUrl.replace(/\/+$/, "")}/chat/completions`
65
+ // Determine API type: explicit override or auto-detect from URL
66
+ const apiType = (process.env["VISION_API_TYPE"] || "").toLowerCase()
67
+ const isMiniMax = apiType === "minimax" || (!apiType && /minimax/i.test(baseUrl))
65
68
 
66
- const content: Record<string, unknown>[] = []
67
- if (args.question) {
68
- content.push({ type: "text", text: args.question })
69
- } else if (resolved.length > 1) {
70
- content.push({ type: "text", text: `Describe each of these ${resolved.length} images in detail, labeling which description corresponds to which file.` })
71
- } else {
72
- content.push({ type: "text", text: "Please describe this image in detail" })
69
+ if (isMiniMax) {
70
+ return await callMiniMax(apiKey, baseUrl, resolved, args.question)
73
71
  }
72
+ return await callOpenAI(apiKey, baseUrl, resolved, args.question)
73
+ },
74
+ })
74
75
 
75
- for (const filePath of resolved) {
76
- const file = Bun.file(filePath)
77
- const mime = file.type || "image/png"
78
- const buffer = await file.arrayBuffer()
79
- const base64 = Buffer.from(buffer).toString("base64")
80
- content.push({ type: "image_url", image_url: { url: `data:${mime};base64,${base64}` } })
81
- }
76
+ // ── OpenAI-compatible backend ──
77
+
78
+ async function callOpenAI(apiKey: string, baseUrl: string, resolved: string[], question?: string) {
79
+ const model = process.env["VISION_MODEL"]
80
+ if (!model) return "Error: VISION_MODEL not set (required for OpenAI-compatible backends)"
81
+
82
+ const apiUrl = `${baseUrl.replace(/\/+$/, "")}/chat/completions`
83
+
84
+ const content: Record<string, unknown>[] = []
85
+ if (question) {
86
+ content.push({ type: "text", text: question })
87
+ } else if (resolved.length > 1) {
88
+ content.push({
89
+ type: "text",
90
+ text: `Describe each of these ${resolved.length} images in detail, labeling which description corresponds to which file.`,
91
+ })
92
+ } else {
93
+ content.push({ type: "text", text: "Please describe this image in detail" })
94
+ }
95
+
96
+ for (const filePath of resolved) {
97
+ const file = Bun.file(filePath)
98
+ const mime = file.type || "image/png"
99
+ const buffer = await file.arrayBuffer()
100
+ const base64 = Buffer.from(buffer).toString("base64")
101
+ content.push({ type: "image_url", image_url: { url: `data:${mime};base64,${base64}` } })
102
+ }
103
+
104
+ const response = await fetch(apiUrl, {
105
+ method: "POST",
106
+ headers: { "Content-Type": "application/json", Authorization: `Bearer ${apiKey}` },
107
+ body: JSON.stringify({
108
+ model,
109
+ messages: [{ role: "user", content }],
110
+ max_tokens: 4096,
111
+ }),
112
+ })
113
+
114
+ if (!response.ok) {
115
+ const text = await response.text()
116
+ return `Vision API error (${response.status}): ${text}`
117
+ }
118
+
119
+ const data = (await response.json()) as { choices: { message: { content: string } }[] }
120
+ return data.choices?.[0]?.message?.content ?? "No description returned."
121
+ }
122
+
123
+ // ── MiniMax VLM backend ──
124
+
125
+ interface MiniMaxBaseResp {
126
+ status_code?: number
127
+ status_msg?: string
128
+ }
129
+
130
+ interface MiniMaxVlmResponse {
131
+ base_resp?: MiniMaxBaseResp
132
+ content?: string
133
+ }
134
+
135
+ async function callMiniMax(apiKey: string, baseUrl: string, resolved: string[], question?: string) {
136
+ const apiUrl = `${baseUrl.replace(/\/+$/, "")}/v1/coding_plan/vlm`
137
+
138
+ const descriptions: string[] = []
139
+ for (const filePath of resolved) {
140
+ const file = Bun.file(filePath)
141
+ const mime = file.type || "image/png"
142
+ const buffer = await file.arrayBuffer()
143
+ const base64 = Buffer.from(buffer).toString("base64")
144
+ const imageUrl = `data:${mime};base64,${base64}`
145
+
146
+ const prompt = question || "Please describe this image in detail"
82
147
 
83
148
  const response = await fetch(apiUrl, {
84
149
  method: "POST",
85
150
  headers: { "Content-Type": "application/json", Authorization: `Bearer ${apiKey}` },
86
- body: JSON.stringify({
87
- model,
88
- messages: [{ role: "user", content }],
89
- max_tokens: 4096,
90
- }),
151
+ body: JSON.stringify({ prompt, image_url: imageUrl }),
91
152
  })
92
153
 
93
154
  if (!response.ok) {
94
155
  const text = await response.text()
95
- return `Vision API error (${response.status}): ${text}`
156
+ return `MiniMax Vision API error (${response.status}): ${text}`
96
157
  }
97
158
 
98
- const data = (await response.json()) as { choices: { message: { content: string } }[] }
99
- return data.choices?.[0]?.message?.content ?? "No description returned."
100
- },
101
- })
159
+ const data = (await response.json()) as MiniMaxVlmResponse
160
+
161
+ // MiniMax wraps errors in base_resp even on HTTP 200
162
+ if (data.base_resp?.status_code && data.base_resp.status_code !== 0) {
163
+ return `MiniMax Vision API error: ${data.base_resp.status_msg || `status_code ${data.base_resp.status_code}`}`
164
+ }
165
+
166
+ descriptions.push(data.content || "No description returned.")
167
+ }
168
+
169
+ if (descriptions.length === 1) return descriptions[0]
170
+ return descriptions.map((d, i) => `--- Image ${i + 1} ---\n${d}`).join("\n\n")
171
+ }