@jochenyang/opencode-vision 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Jochen Yang
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,148 @@
1
+ # opencode-vision
2
+
3
+ 为 [OpenCode](https://github.com/opencode-ai/opencode) 提供视觉识别能力的插件 + 工具。
4
+
5
+ 当模型本身不支持多模态输入时,自动将用户粘贴的图片保存到临时目录,并引导模型调用 vision 工具进行识别。支持单图和多图。
6
+
7
+ ## 原理
8
+
9
+ ```
10
+ 用户粘贴图片 + "这是什么?"
11
+
12
+ vision-helper 插件 (experimental.chat.messages.transform)
13
+ ├─ 解 base64 → 保存到临时目录
14
+ ├─ 用简短占位替换原始图片部分(消除不支持的模型的 ERROR 噪音)
15
+ └─ 路径提示注入到用户文本前
16
+
17
+ 模型看到路径提示 → 自动调用 vision 工具
18
+
19
+ vision 工具调用视觉 API 返回图片描述
20
+ ```
21
+
22
+ - **单图** → 模型调用 `vision(path)` 读取单张图片
23
+ - **多图** → 模型调用 `vision(paths=[...])` 一次 API 调用处理全部图片
24
+
25
+ ## 前置要求
26
+
27
+ - [OpenCode](https://github.com/opencode-ai/opencode) 已安装
28
+ - 一个兼容 OpenAI 格式的视觉 API(如阿里云 DashScope 通义千问等)
29
+ - 环境变量(建议配置到系统级,避免每次启动重复输入)
30
+
31
+ ## 环境变量
32
+
33
+ | 变量 | 说明 | 示例值 |
34
+ | ------------------ | -------------------------------------- | ------------------------------------------------------------- |
35
+ | `VISION_API_KEY` | 视觉 API 的密钥 | `sk-your-api-key` |
36
+ | `VISION_API_URL` | 视觉 API 的基础地址<br>(工具自动补全 `/chat/completions`) | `https://your-api-endpoint/v1` |
37
+ | `VISION_MODEL` | 视觉模型名称 | `your-vision-model` |
38
+
39
+ ### Windows 系统级配置
40
+
41
+ ```powershell
42
+ [System.Environment]::SetEnvironmentVariable('VISION_API_KEY', 'sk-your-api-key', 'User')
43
+ [System.Environment]::SetEnvironmentVariable('VISION_API_URL', 'https://your-api-endpoint/v1', 'User')
44
+ [System.Environment]::SetEnvironmentVariable('VISION_MODEL', 'your-vision-model', 'User')
45
+ ```
46
+
47
+ 设置后**重启终端**生效。
48
+
49
+ ### macOS / Linux
50
+
51
+ 在 `~/.zshrc` 或 `~/.bashrc` 中添加:
52
+
53
+ ```bash
54
+ export VISION_API_KEY="sk-your-api-key"
55
+ export VISION_API_URL="https://your-api-endpoint/v1"
56
+ export VISION_MODEL="your-vision-model"
57
+ ```
58
+
59
+ ## 安装
60
+
61
+ ### 手动安装
62
+
63
+ 将两个文件复制到 OpenCode 的全局配置目录:
64
+
65
+ ```powershell
66
+ # 工具文件
67
+ copy tools\vision.ts $env:USERPROFILE\.config\opencode\tools\
68
+
69
+ # 插件文件
70
+ copy plugins\vision-helper.ts $env:USERPROFILE\.config\opencode\plugins\
71
+ ```
72
+
73
+ OpenCode 会自动发现 `~/.config/opencode/tools/` 和 `~/.config/opencode/plugins/` 下的文件,**无需修改 `opencode.json`**。
74
+
75
+ > 如果对应目录不存在,手动创建即可。
76
+
77
+ ### 通过 npx(即将支持)
78
+
79
+ ```bash
80
+ npx opencode-vision install
81
+ ```
82
+
83
+ ## 验证
84
+
85
+ 启动 OpenCode:
86
+
87
+ ```powershell
88
+ opencode
89
+ ```
90
+
91
+ 粘贴一张图片并提问:
92
+
93
+ ```
94
+ [图片] 这是什么?
95
+ ```
96
+
97
+ 预期行为:
98
+
99
+ 1. 模型无法直接读取图片(当前模型不支持多模态)
100
+ 2. 插件自动保存图片到临时目录并注入路径提示
101
+ 3. 模型自动调用 `vision` 工具读取图片
102
+ 4. 模型返回图片描述
103
+
104
+ ## 项目结构
105
+
106
+ ```
107
+ opencode-vision/
108
+ ├── tools/
109
+ │ └── vision.ts # vision 工具定义,调用视觉 API
110
+ ├── plugins/
111
+ │ └── vision-helper.ts # 插件:自动存图、注入提示、消除 ERROR 噪音
112
+ └── README.md
113
+ ```
114
+
115
+ ### 工具:`tools/vision.ts`
116
+
117
+ - 读取本地图片文件,通过视觉 API 识别内容
118
+ - 支持 `path`(单图)和 `paths`(多图数组)两个参数
119
+ - 兼容 OpenAI Chat Completions 格式的 API
120
+
121
+ ### 插件:`plugins/vision-helper.ts`
122
+
123
+ - 钩子:`experimental.chat.messages.transform`
124
+ - 在消息发送给模型前一刻处理
125
+ - 将图片保存到 `os.tmpdir()/opencode-vision/`
126
+ - 路径提示注入到用户文本前(不会持久化到聊天记录)
127
+ - 用简短占位替换原始图片部分,消除 `unsupportedParts` 产生的 ERROR 噪音
128
+
129
+ ## 注意事项
130
+
131
+ - 图片保存到系统临时目录 `os.tmpdir()/opencode-vision/`,重启系统后自动清理
132
+ - 临时文件以 `pasted-{timestamp}-{random}.{ext}` 命名
133
+ - 同一会话中多次粘贴同一张图会产生多个临时文件
134
+ - 视觉 API 调用使用 `max_tokens: 4096`,多图场景下足够返回详细描述
135
+
136
+ ## 自定义视觉 API
137
+
138
+ 本工具兼容任何 OpenAI Chat Completions 格式的视觉 API。只需更换环境变量即可:
139
+
140
+ ```powershell
141
+ $env:VISION_API_KEY = 'sk-your-api-key'
142
+ $env:VISION_API_URL = 'https://your-api-endpoint/v1'
143
+ $env:VISION_MODEL = 'your-vision-model'
144
+ ```
145
+
146
+ ## 许可证
147
+
148
+ MIT
package/bin/install.js ADDED
@@ -0,0 +1,125 @@
1
+ #!/usr/bin/env node
2
+ const fs = require("fs")
3
+ const path = require("path")
4
+ const os = require("os")
5
+
6
+ const SRC = path.join(__dirname, "..")
7
+ const DST = path.join(os.homedir(), ".config", "opencode")
8
+
9
+ const FILES = [
10
+ ["tools/vision.ts", "tools/vision.ts"],
11
+ ["plugins/vision-helper.ts", "plugins/vision-helper.ts"],
12
+ ]
13
+
14
+ function log(msg, ok = true) {
15
+ const prefix = ok ? "\x1b[32m ✓\x1b[0m" : "\x1b[31m ✗\x1b[0m"
16
+ console.log(`${prefix} ${msg}`)
17
+ }
18
+
19
+ function title(msg) {
20
+ console.log(`\n\x1b[36m═══ ${msg} \x1b[0m\n`)
21
+ }
22
+
23
+ async function doInstall() {
24
+ title("opencode-vision 安装")
25
+
26
+ for (const [, rel] of FILES) {
27
+ const dir = path.join(DST, path.dirname(rel))
28
+ if (!fs.existsSync(dir)) {
29
+ fs.mkdirSync(dir, { recursive: true })
30
+ }
31
+ }
32
+
33
+ for (const [srcRel, dstRel] of FILES) {
34
+ const src = path.join(SRC, srcRel)
35
+ const dst = path.join(DST, dstRel)
36
+
37
+ if (!fs.existsSync(src)) {
38
+ log(`源文件不存在: ${srcRel}`, false)
39
+ continue
40
+ }
41
+
42
+ fs.copyFileSync(src, dst)
43
+ log(`安装 ${dstRel}`)
44
+ }
45
+
46
+ title("环境变量检查")
47
+ const vars = {
48
+ VISION_API_KEY: process.env.VISION_API_KEY,
49
+ VISION_API_URL: process.env.VISION_API_URL,
50
+ VISION_MODEL: process.env.VISION_MODEL,
51
+ }
52
+
53
+ for (const [name, val] of Object.entries(vars)) {
54
+ if (val) {
55
+ const masked = name === "VISION_API_KEY" ? val.slice(0, 6) + "****" : val
56
+ log(`${name} = ${masked}`)
57
+ } else {
58
+ log(`${name} 未设置 — 请配置后再使用`, false)
59
+ }
60
+ }
61
+
62
+ title("OpenCode 检测")
63
+ try {
64
+ const { execSync } = require("child_process")
65
+ const ver = execSync("opencode --version 2>nul || opencode version 2>/dev/null", {
66
+ encoding: "utf8",
67
+ stdio: ["ignore", "pipe", "ignore"],
68
+ timeout: 5000,
69
+ }).trim()
70
+ log(`OpenCode ${ver || "已安装"}`)
71
+ } catch {
72
+ log("未检测到 OpenCode — 请先安装 https://github.com/opencode-ai/opencode", false)
73
+ }
74
+
75
+ title("安装完成")
76
+ console.log(" 重启 OpenCode 后即可使用。")
77
+ console.log(" 粘贴一张图片试试看:")
78
+ console.log(' [图片] "这是什么?"')
79
+ }
80
+
81
+ async function doUninstall() {
82
+ title("opencode-vision 卸载")
83
+
84
+ let removed = 0
85
+ for (const [, rel] of FILES) {
86
+ const dst = path.join(DST, rel)
87
+ if (!fs.existsSync(dst)) {
88
+ log(`未安装: ${rel}`)
89
+ continue
90
+ }
91
+ fs.unlinkSync(dst)
92
+ log(`已删除 ${rel}`)
93
+ removed++
94
+
95
+ // 如果目录空了就一并清理
96
+ const dir = path.dirname(dst)
97
+ if (fs.existsSync(dir) && fs.readdirSync(dir).length === 0) {
98
+ fs.rmdirSync(dir)
99
+ log(`已清理空目录 ${path.relative(os.homedir(), dir)}`)
100
+ }
101
+ }
102
+
103
+ title("卸载完成")
104
+ if (removed > 0) {
105
+ console.log(" 已删除 opencode-vision 相关文件。")
106
+ console.log(" 重启 OpenCode 即可生效。")
107
+ } else {
108
+ console.log(" 没有找到已安装的文件。")
109
+ }
110
+ }
111
+
112
+ async function main() {
113
+ const isUninstall = process.argv.includes("--uninstall") || process.argv.includes("uninstall")
114
+ if (isUninstall) {
115
+ await doUninstall()
116
+ } else {
117
+ await doInstall()
118
+ }
119
+ console.log()
120
+ }
121
+
122
+ main().catch((err) => {
123
+ console.error("\x1b[31m操作失败:\x1b[0m", err.message)
124
+ process.exit(1)
125
+ })
package/package.json ADDED
@@ -0,0 +1,24 @@
1
+ {
2
+ "name": "@jochenyang/opencode-vision",
3
+ "version": "1.0.0",
4
+ "description": "Vision plugin + tool for OpenCode — automatically handles pasted images for non-vision models",
5
+ "keywords": ["opencode", "vision", "image", "ai", "plugin", "tool"],
6
+ "homepage": "https://github.com/jochenyang/opencode-vision",
7
+ "license": "MIT",
8
+ "author": "Jochen Yang",
9
+ "bin": {
10
+ "opencode-vision": "./bin/install.js"
11
+ },
12
+ "files": [
13
+ "bin/",
14
+ "tools/",
15
+ "plugins/",
16
+ "README.md"
17
+ ],
18
+ "engines": {
19
+ "node": ">=18"
20
+ },
21
+ "publishConfig": {
22
+ "access": "public"
23
+ }
24
+ }
@@ -0,0 +1,65 @@
1
+ import type { Plugin } from "@opencode-ai/plugin"
2
+ import { tmpdir } from "os"
3
+ import path from "path"
4
+
5
+ const TMP_DIR = path.join(tmpdir(), "opencode-vision")
6
+
7
+ /**
8
+ * 在消息发送给模型前一刻,检测用户消息中的图片附件:
9
+ * 1. 保存图片到临时目录
10
+ * 2. 在用户文本前注入路径提示,让不支持多模态的模型自动调用 vision 工具
11
+ * 3. 替换原始图片部分避免 unsupportedParts 产生噪音 ERROR 文本
12
+ */
13
+ export default (async () => {
14
+ await Bun.write(path.join(TMP_DIR, ".check"), "").catch(() => {})
15
+
16
+ return {
17
+ "experimental.chat.messages.transform": async (_input, output) => {
18
+ for (const msg of output.messages) {
19
+ if (msg.info.role !== "user") continue
20
+
21
+ // 找出所有图片,保存到磁盘
22
+ const saved: { index: number; filePath: string }[] = []
23
+
24
+ for (let i = 0; i < msg.parts.length; i++) {
25
+ const part = msg.parts[i]
26
+ if (part.type !== "file" || typeof part.mime !== "string" || !part.mime.startsWith("image/")) continue
27
+
28
+ const colon = part.url.indexOf(";base64,")
29
+ if (colon === -1) continue
30
+ const base64 = part.url.slice(colon + ";base64,".length)
31
+ if (!base64) continue
32
+
33
+ const ext = part.mime.split("/")[1] || "png"
34
+ const name = `pasted-${Date.now()}-${Math.random().toString(36).slice(2, 8)}.${ext}`
35
+ const filePath = path.join(TMP_DIR, name)
36
+
37
+ await Bun.write(filePath, Buffer.from(base64, "base64"))
38
+ saved.push({ index: i, filePath })
39
+ }
40
+
41
+ if (saved.length === 0) continue
42
+
43
+ // 用简短文本占位替换原始图片 part,防止 unsupportedParts 产生噪音 ERROR
44
+ // 逆序遍历避免 index 偏移
45
+ for (const { index, filePath } of saved.toReversed()) {
46
+ msg.parts.splice(index, 1, {
47
+ type: "text",
48
+ text: `[vision: ${path.basename(filePath)}]`,
49
+ } as never)
50
+ }
51
+
52
+ // 构造路径提示
53
+ const hintText = saved.length === 1
54
+ ? `[Image auto-saved to ${saved[0].filePath} — use the vision tool to read it]`
55
+ : `[Images auto-saved to:\n${saved.map((s) => ` ${s.filePath}`).join("\n")}\n— use the vision tool with paths=[...] to read them all at once]`
56
+
57
+ // 注入到用户文本前面
58
+ const firstText = msg.parts.find((p) => p.type === "text" && !p.synthetic)
59
+ if (firstText && typeof firstText.text === "string") {
60
+ firstText.text = hintText + "\n" + firstText.text
61
+ }
62
+ }
63
+ },
64
+ }
65
+ }) satisfies Plugin
@@ -0,0 +1,101 @@
1
+ /// <reference path="../env.d.ts" />
2
+ import { tool } from "@opencode-ai/plugin"
3
+ import { tmpdir } from "os"
4
+ import path from "path"
5
+
6
+ const TMP_DIR = path.join(tmpdir(), "opencode-vision")
7
+
8
+ export default tool({
9
+ description: `Reads one or more image files and returns a description of their contents.
10
+ Use this when the user pastes images but the current model cannot view images directly.
11
+ The image(s) will have been auto-saved with a path hint like "[Image auto-saved to ...]" in the conversation.
12
+ For multiple images, use the "paths" parameter.
13
+
14
+ Requires VISION_API_KEY, VISION_API_URL and VISION_MODEL environment variables.`,
15
+ args: {
16
+ paths: tool.schema
17
+ .array(tool.schema.string())
18
+ .describe("Absolute path(s) to the image file(s). Use this for one or multiple images.")
19
+ .optional(),
20
+ path: tool.schema
21
+ .string()
22
+ .describe("Deprecated: use 'paths' instead. Absolute path to a single image file.")
23
+ .optional(),
24
+ question: tool.schema
25
+ .string()
26
+ .describe("Optional specific question about the image(s)")
27
+ .optional(),
28
+ },
29
+ async execute(args) {
30
+ const allPaths: string[] = []
31
+ if (args.paths && args.paths.length > 0) {
32
+ allPaths.push(...args.paths)
33
+ } else if (args.path) {
34
+ allPaths.push(args.path)
35
+ }
36
+ if (allPaths.length === 0) return "Error: no image path provided"
37
+
38
+ // Resolve each path (try absolute first, then fallback to TMP_DIR)
39
+ const resolved: string[] = []
40
+ for (const p of allPaths) {
41
+ let file = Bun.file(p)
42
+ if (await file.exists()) {
43
+ resolved.push(p)
44
+ continue
45
+ }
46
+ const fallback = path.join(TMP_DIR, path.basename(p))
47
+ file = Bun.file(fallback)
48
+ if (await file.exists()) {
49
+ resolved.push(fallback)
50
+ }
51
+ }
52
+
53
+ if (resolved.length === 0) {
54
+ return `Error: none of the specified images were found (looked in: ${allPaths.join(", ")})`
55
+ }
56
+
57
+ const apiKey = process.env["VISION_API_KEY"]
58
+ const baseUrl = process.env["VISION_API_URL"]
59
+ const model = process.env["VISION_MODEL"]
60
+ if (!apiKey) return "Error: VISION_API_KEY not set"
61
+ if (!baseUrl) return "Error: VISION_API_URL not set"
62
+ if (!model) return "Error: VISION_MODEL not set"
63
+
64
+ const apiUrl = `${baseUrl.replace(/\/+$/, "")}/chat/completions`
65
+
66
+ const content: Record<string, unknown>[] = []
67
+ if (args.question) {
68
+ content.push({ type: "text", text: args.question })
69
+ } else if (resolved.length > 1) {
70
+ content.push({ type: "text", text: `Describe each of these ${resolved.length} images in detail, labeling which description corresponds to which file.` })
71
+ } else {
72
+ content.push({ type: "text", text: "Please describe this image in detail" })
73
+ }
74
+
75
+ for (const filePath of resolved) {
76
+ const file = Bun.file(filePath)
77
+ const mime = file.type || "image/png"
78
+ const buffer = await file.arrayBuffer()
79
+ const base64 = Buffer.from(buffer).toString("base64")
80
+ content.push({ type: "image_url", image_url: { url: `data:${mime};base64,${base64}` } })
81
+ }
82
+
83
+ const response = await fetch(apiUrl, {
84
+ method: "POST",
85
+ headers: { "Content-Type": "application/json", Authorization: `Bearer ${apiKey}` },
86
+ body: JSON.stringify({
87
+ model,
88
+ messages: [{ role: "user", content }],
89
+ max_tokens: 4096,
90
+ }),
91
+ })
92
+
93
+ if (!response.ok) {
94
+ const text = await response.text()
95
+ return `Vision API error (${response.status}): ${text}`
96
+ }
97
+
98
+ const data = (await response.json()) as { choices: { message: { content: string } }[] }
99
+ return data.choices?.[0]?.message?.content ?? "No description returned."
100
+ },
101
+ })