opencode-see-image 0.9.1 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +7 -1
- package/README.md +68 -67
- package/index.ts +72 -22
- package/package.json +2 -2
|
@@ -6,7 +6,13 @@
|
|
|
6
6
|
"Bash(FILTER_BRANCH_SQUELCH_WARNING=1 git filter-branch -f --msg-filter 'sed \"/Co-Authored-By: Claude/d\" | sed -e :a -e \"/^\\\\n*$/{\\\\$d;N;ba\" -e \"}\"' HEAD~2..HEAD)",
|
|
7
7
|
"Bash(echo \"--- created, exit $? ---\")",
|
|
8
8
|
"Bash(node -p \"require\\('./package.json'\\).version\")",
|
|
9
|
-
"Bash(echo \"local package.json version: $\\(node -p \"require\\('./package.json'\\).version\" \\)\")"
|
|
9
|
+
"Bash(echo \"local package.json version: $\\(node -p \"require\\('./package.json'\\).version\" \\)\")",
|
|
10
|
+
"Bash(bun --version)",
|
|
11
|
+
"Bash(bun pm *)",
|
|
12
|
+
"Read(//Users/alfa/Documents/opencodeprojects/opencode-see-image/bun-types/**)",
|
|
13
|
+
"Bash(bun run *)",
|
|
14
|
+
"WebFetch(domain:docs.z.ai)",
|
|
15
|
+
"Bash(npm publish *)"
|
|
10
16
|
]
|
|
11
17
|
}
|
|
12
18
|
}
|
package/README.md
CHANGED
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
# opencode-see-image
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
give non-vision opencode models the ability to see images and screenshots by routing them to a vision-capable model.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
when a user attaches a screenshot to a text-only model, opencode rejects it with an error. This plugin intercepts that flow by registering a `see_image` tool that sends the image to a vision model and returns a textual description the primary model can reason about.
|
|
6
6
|
|
|
7
|
-
##
|
|
7
|
+
## install
|
|
8
8
|
|
|
9
|
-
**
|
|
9
|
+
**one command (recommended):**
|
|
10
10
|
```bash
|
|
11
11
|
opencode plugin opencode-see-image --global
|
|
12
12
|
```
|
|
13
13
|
This installs the package and adds it to your config. Then restart opencode.
|
|
14
14
|
|
|
15
|
-
**
|
|
15
|
+
**edit config manually:**
|
|
16
16
|
|
|
17
17
|
Add the plugin to your opencode config:
|
|
18
18
|
|
|
@@ -25,76 +25,76 @@ Add the plugin to your opencode config:
|
|
|
25
25
|
```
|
|
26
26
|
Then restart opencode.
|
|
27
27
|
|
|
28
|
-
##
|
|
28
|
+
## install via your agent (for some reason?)
|
|
29
29
|
|
|
30
|
-
|
|
30
|
+
ask your agent:
|
|
31
31
|
```
|
|
32
32
|
install the opencode-see-image plugin
|
|
33
33
|
```
|
|
34
|
-
|
|
34
|
+
it'll run `opencode plugin opencode-see-image --global` and tell you to restart.
|
|
35
35
|
|
|
36
|
-
##
|
|
36
|
+
## prerequisites
|
|
37
37
|
|
|
38
|
-
|
|
38
|
+
you need a connected vision-capable provider. The plugin auto-detects whichever you have connected, **either of these work**:
|
|
39
39
|
|
|
40
|
-
###
|
|
41
|
-
1.
|
|
42
|
-
2.
|
|
43
|
-
3.
|
|
40
|
+
### free (OpenCode Zen)
|
|
41
|
+
1. run `/connect` in opencode
|
|
42
|
+
2. select **opencode** (OpenCode Zen)
|
|
43
|
+
3. paste your API key from [opencode.ai/auth](https://opencode.ai/auth)
|
|
44
44
|
|
|
45
|
-
|
|
45
|
+
the plugin falls back to **mimo-v2.5-free**.
|
|
46
46
|
|
|
47
|
-
###
|
|
48
|
-
1.
|
|
49
|
-
2.
|
|
50
|
-
3.
|
|
47
|
+
### paid, w/ OpenCode Go
|
|
48
|
+
1. run `/connect` in opencode
|
|
49
|
+
2. select **opencode-go**
|
|
50
|
+
3. paste your API key from [opencode.ai/auth](https://opencode.ai/auth)
|
|
51
51
|
|
|
52
|
-
|
|
52
|
+
the plugin prefers **minimax-m3** via opencode-go when available.
|
|
53
53
|
|
|
54
|
-
###
|
|
54
|
+
### paid, w/ another provider
|
|
55
55
|
|
|
56
|
-
|
|
56
|
+
set the `SEE_IMAGE_*` env vars to point at any Anthropic-Messages-compatible endpoint. see [Configuration](#configuration) below.
|
|
57
57
|
|
|
58
|
-
**
|
|
58
|
+
**the resolve order:** explicit `SEE_IMAGE_API_KEY` env → configured `SEE_IMAGE_PROVIDER` → `opencode-go` (MiniMax M3) → `opencode` (mimo-v2.5-free).
|
|
59
59
|
|
|
60
|
-
##
|
|
60
|
+
## how the _eye surgery_ works
|
|
61
61
|
|
|
62
62
|
```
|
|
63
63
|
user attaches screenshot
|
|
64
|
-
|
|
65
|
-
|
|
64
|
+
|
|
|
65
|
+
v
|
|
66
66
|
opencode rejects it: 'this model does not support image input'
|
|
67
|
-
|
|
68
|
-
|
|
67
|
+
| (the model only sees the filename)
|
|
68
|
+
v
|
|
69
69
|
plugin's system-prompt instructions tell the model to call see_image
|
|
70
|
-
|
|
71
|
-
|
|
70
|
+
|
|
|
71
|
+
v
|
|
72
72
|
see_image tool:
|
|
73
|
-
1. queries opencode's SQLite DB for the image
|
|
73
|
+
1. queries opencode's SQLite DB for the image
|
|
74
74
|
2. falls back to filesystem search if not in DB
|
|
75
75
|
3. sends the image to the vision model via opencode's SDK
|
|
76
76
|
4. returns the textual description
|
|
77
|
-
|
|
78
|
-
|
|
77
|
+
|
|
|
78
|
+
v
|
|
79
79
|
primary model answers using the description
|
|
80
80
|
```
|
|
81
81
|
|
|
82
|
-
##
|
|
82
|
+
## the `see_image` tool
|
|
83
83
|
|
|
84
|
-
|
|
84
|
+
the plugin registers a `see_image` tool with two arguments:
|
|
85
85
|
|
|
86
|
-
|
|
|
86
|
+
| arg | type | required? | description |
|
|
87
87
|
|---|---|---|---|
|
|
88
|
-
| `filePath` | string |
|
|
89
|
-
| `question` | string |
|
|
88
|
+
| `filePath` | string | y | path to the image. Absolute path, or a bare filename like `"Screenshot 2026-06-18 at 17.32.24.png"` to auto-locate. |
|
|
89
|
+
| `question` | string | n | a specific question about the image. Defaults to a general detailed description. Use this to focus on a particular detail (e.g. `"What error is shown in the terminal?"`). |
|
|
90
90
|
|
|
91
|
-
|
|
91
|
+
your model calls this tool automatically when you attach a screenshot, you don't need to do anything special. The `question` arg is optional; the model uses it when you ask something specific about the image.
|
|
92
92
|
|
|
93
|
-
##
|
|
93
|
+
## configuration
|
|
94
94
|
|
|
95
|
-
|
|
95
|
+
all settings are env-var overrides. The plugin uses opencode's SDK client by default (handles auth automatically). Set `SEE_IMAGE_API_KEY` to bypass the SDK and call an HTTP endpoint directly.
|
|
96
96
|
|
|
97
|
-
|
|
|
97
|
+
| env var | default | description |
|
|
98
98
|
|---|---|---|
|
|
99
99
|
| `SEE_IMAGE_MODEL` | `minimax-m3` | Vision model ID |
|
|
100
100
|
| `SEE_IMAGE_PROVIDER` | `opencode-go` | Provider ID for SDK routing |
|
|
@@ -104,9 +104,9 @@ All settings are env-var overrides. The plugin uses opencode's SDK client by def
|
|
|
104
104
|
| `SEE_IMAGE_USER_AGENT` | _(Chrome UA)_ | User-Agent header (HTTP mode only) |
|
|
105
105
|
| `SEE_IMAGE_TIMEOUT` | `30000` | Per-candidate timeout in ms. Prevents hanging on slow models. |
|
|
106
106
|
|
|
107
|
-
###
|
|
107
|
+
### using a different vision model
|
|
108
108
|
|
|
109
|
-
|
|
109
|
+
any Anthropic-Messages-compatible endpoint works. for example, to use a direct MiniMax key:
|
|
110
110
|
|
|
111
111
|
```bash
|
|
112
112
|
export SEE_IMAGE_ENDPOINT="https://api.minimax.io/v1/messages"
|
|
@@ -114,60 +114,61 @@ export SEE_IMAGE_MODEL="minimax-m3"
|
|
|
114
114
|
export SEE_IMAGE_API_KEY="your-minimax-key"
|
|
115
115
|
```
|
|
116
116
|
|
|
117
|
-
|
|
117
|
+
to use a different opencode-go model (e.g. Kimi K2.7):
|
|
118
118
|
|
|
119
119
|
```bash
|
|
120
120
|
export SEE_IMAGE_MODEL="kimi-k2.7-code"
|
|
121
121
|
```
|
|
122
122
|
|
|
123
|
-
###
|
|
123
|
+
### verified vision-capable models
|
|
124
124
|
|
|
125
125
|
**Free (OpenCode Zen):**
|
|
126
126
|
|
|
127
|
-
|
|
|
128
|
-
|
|
129
|
-
| `mimo-v2.5-free` |
|
|
130
|
-
| `big-pickle` |
|
|
127
|
+
| model | Notes |
|
|
128
|
+
|---|---|
|
|
129
|
+
| `mimo-v2.5-free` | free. may be a bit slow. default fallback when only Zen is connected (routed via CLI). |
|
|
130
|
+
| `big-pickle` | for some reason, big pickle works as an image capable model when called through the sdk w/ an active opencode go sub. |
|
|
131
131
|
|
|
132
|
-
**
|
|
132
|
+
**paid (OpenCode Go):**
|
|
133
133
|
|
|
134
|
-
|
|
|
134
|
+
| model | speed | notes |
|
|
135
135
|
|---|---|---|
|
|
136
|
-
| `minimax-m3` | ~3000ms |
|
|
137
|
-
| `kimi-k2.7-code` | ~7000ms |
|
|
138
|
-
| `kimi-k2.6` | ~
|
|
139
|
-
| `qwen3.7-plus` | ~
|
|
136
|
+
| `minimax-m3` | ~3000ms | default. fast, clean, and accurate. |
|
|
137
|
+
| `kimi-k2.7-code` | ~7000ms | clean and accurate. |
|
|
138
|
+
| `kimi-k2.6` | ~12000ms | accurate but slow. |
|
|
139
|
+
| `qwen3.7-plus` | ~15000ms | slow, spends a bit more tokens because of thinking. |
|
|
140
140
|
|
|
141
|
-
##
|
|
141
|
+
## updating
|
|
142
142
|
|
|
143
|
-
**
|
|
143
|
+
**auto-update (built in):** uses the opencode-plugin-update-kit and shows a toast: *"opencode-see-image updated to X.Y.Z, restart opencode to apply"*. You just need to restart opencode to load the new version.
|
|
144
144
|
|
|
145
|
-
**
|
|
145
|
+
**manual update**:
|
|
146
146
|
```bash
|
|
147
147
|
opencode plugin opencode-see-image --force --global
|
|
148
148
|
```
|
|
149
|
-
|
|
149
|
+
then restart opencode.
|
|
150
150
|
|
|
151
|
-
**
|
|
151
|
+
**pin a version** in your config to opt out of auto-updates:
|
|
152
152
|
```jsonc
|
|
153
153
|
"plugin": ["opencode-see-image@0.4.2"]
|
|
154
154
|
```
|
|
155
155
|
|
|
156
|
-
##
|
|
156
|
+
## kimitations
|
|
157
157
|
|
|
158
|
-
- **macOS-only filesystem search
|
|
158
|
+
- **macOS-only filesystem search**. the filesystem fallback targets macOS screenshot temp dirs. Linux/Windows users should rely on the DB lookup (which is cross-platform) or pass absolute paths.
|
|
159
|
+
> if you can add compat for more platforms, i would love a pr.
|
|
159
160
|
|
|
160
|
-
##
|
|
161
|
+
## file search locations
|
|
161
162
|
|
|
162
|
-
|
|
163
|
+
when opencode rejects an image attachment, the model only receives a bare filename. `see_image` searches these locations in order:
|
|
163
164
|
|
|
164
165
|
1. `$TMPDIR/TemporaryItems/NSIRD_screencaptureui_*/` (where macOS stashes dragged screenshots)
|
|
165
166
|
2. `$TMPDIR/TemporaryItems/`
|
|
166
167
|
3. `~/Desktop` (default screenshot save location)
|
|
167
168
|
4. `~/Downloads`
|
|
168
|
-
5.
|
|
169
|
+
5. current working directory
|
|
169
170
|
|
|
170
|
-
|
|
171
|
+
pass an absolute `filePath` to skip the search.
|
|
171
172
|
|
|
172
173
|
## License
|
|
173
174
|
|
package/index.ts
CHANGED
|
@@ -218,7 +218,6 @@ function readProviderKey(providerID: string): string | null {
|
|
|
218
218
|
|
|
219
219
|
async function seeImageViaSDK(
|
|
220
220
|
client: any,
|
|
221
|
-
$: any,
|
|
222
221
|
dataUrl: string,
|
|
223
222
|
mediaType: string,
|
|
224
223
|
prompt: string,
|
|
@@ -226,28 +225,54 @@ async function seeImageViaSDK(
|
|
|
226
225
|
): Promise<{ text: string; model: string; provider: string }> {
|
|
227
226
|
const errors: string[] = []
|
|
228
227
|
|
|
229
|
-
// Write image to a temp file so the server can read it directly. Use the
|
|
230
|
-
// real extension so the CLI can sniff the type correctly.
|
|
231
228
|
const b64 = dataUrl.split(",")[1] || ""
|
|
232
229
|
const ext =
|
|
233
230
|
Object.entries(EXT_MEDIA).find(([, m]) => m === mediaType)?.[0] || "png"
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
231
|
+
|
|
232
|
+
// The free CLI fallback needs the image on disk. Write it lazily and only
|
|
233
|
+
// once, so the common SDK/dataURL path never touches the filesystem. Use the
|
|
234
|
+
// real extension so the CLI can sniff the type correctly.
|
|
235
|
+
let tmpPath: string | null = null
|
|
236
|
+
const ensureTmpFile = (): string | null => {
|
|
237
|
+
if (tmpPath) return tmpPath
|
|
238
|
+
const p = path.join(os.tmpdir(), `see-image-${Date.now()}.${ext}`)
|
|
239
|
+
try {
|
|
240
|
+
fs.writeFileSync(p, Buffer.from(b64, "base64"))
|
|
241
|
+
tmpPath = p
|
|
242
|
+
} catch {
|
|
243
|
+
return null
|
|
244
|
+
}
|
|
245
|
+
return tmpPath
|
|
246
|
+
}
|
|
238
247
|
|
|
239
248
|
// For free opencode models, use CLI instead of SDK (SDK returns empty).
|
|
240
|
-
// Bun
|
|
241
|
-
//
|
|
249
|
+
// Use Bun.spawn (not $) so we get a killable handle: Bun's $ ShellPromise
|
|
250
|
+
// has no .kill(), so racing it against a timeout would leak the process.
|
|
251
|
+
// We kill the child on both timeout and external abort.
|
|
242
252
|
const freeFallback = async (modelID: string, userPrompt: string): Promise<string | null> => {
|
|
253
|
+
const filePath = ensureTmpFile()
|
|
254
|
+
if (!filePath) return null
|
|
255
|
+
const proc = Bun.spawn(
|
|
256
|
+
[
|
|
257
|
+
"opencode",
|
|
258
|
+
"run",
|
|
259
|
+
"-f",
|
|
260
|
+
filePath,
|
|
261
|
+
"-m",
|
|
262
|
+
`opencode/${modelID}`,
|
|
263
|
+
userPrompt,
|
|
264
|
+
"--format",
|
|
265
|
+
"json",
|
|
266
|
+
"--dangerously-skip-permissions",
|
|
267
|
+
],
|
|
268
|
+
{ stdout: "pipe", stderr: "ignore" },
|
|
269
|
+
)
|
|
270
|
+
const timer = setTimeout(() => proc.kill(), TIMEOUT)
|
|
271
|
+
const onAbort = () => proc.kill()
|
|
272
|
+
abort?.addEventListener("abort", onAbort)
|
|
243
273
|
try {
|
|
244
|
-
const
|
|
245
|
-
|
|
246
|
-
proc.text(),
|
|
247
|
-
new Promise<never>((_, reject) =>
|
|
248
|
-
setTimeout(() => reject(new Error(`timed out after ${TIMEOUT}ms`)), TIMEOUT),
|
|
249
|
-
),
|
|
250
|
-
])
|
|
274
|
+
const out = await new Response(proc.stdout).text()
|
|
275
|
+
await proc.exited
|
|
251
276
|
for (const line of out.split("\n").filter(Boolean)) {
|
|
252
277
|
try {
|
|
253
278
|
const parsed = JSON.parse(line)
|
|
@@ -256,7 +281,10 @@ async function seeImageViaSDK(
|
|
|
256
281
|
}
|
|
257
282
|
} catch {}
|
|
258
283
|
}
|
|
259
|
-
} catch {}
|
|
284
|
+
} catch {} finally {
|
|
285
|
+
clearTimeout(timer)
|
|
286
|
+
abort?.removeEventListener("abort", onAbort)
|
|
287
|
+
}
|
|
260
288
|
return null
|
|
261
289
|
}
|
|
262
290
|
|
|
@@ -286,7 +314,15 @@ async function seeImageViaSDK(
|
|
|
286
314
|
|
|
287
315
|
let sessionID: string | undefined
|
|
288
316
|
try {
|
|
289
|
-
const sessionRes = await
|
|
317
|
+
const sessionRes = await Promise.race([
|
|
318
|
+
client.session.create({ body: {} }),
|
|
319
|
+
new Promise<never>((_, reject) =>
|
|
320
|
+
setTimeout(
|
|
321
|
+
() => reject(new Error(`session.create timed out after ${TIMEOUT}ms`)),
|
|
322
|
+
TIMEOUT,
|
|
323
|
+
),
|
|
324
|
+
),
|
|
325
|
+
])
|
|
290
326
|
sessionID = sessionRes.data?.id
|
|
291
327
|
if (!sessionID) {
|
|
292
328
|
errors.push(`${providerID}/${modelID}: no session ID`)
|
|
@@ -344,7 +380,10 @@ async function seeImageViaSDK(
|
|
|
344
380
|
|
|
345
381
|
if (!result) {
|
|
346
382
|
const apiKey =
|
|
347
|
-
process.env.SEE_IMAGE_API_KEY ||
|
|
383
|
+
process.env.SEE_IMAGE_API_KEY ||
|
|
384
|
+
(process.env.SEE_IMAGE_PROVIDER &&
|
|
385
|
+
readProviderKey(process.env.SEE_IMAGE_PROVIDER)) ||
|
|
386
|
+
readProviderKey("opencode-go")
|
|
348
387
|
if (apiKey) {
|
|
349
388
|
try {
|
|
350
389
|
result = await seeImageViaHTTP(b64, mediaType, prompt, abort, apiKey)
|
|
@@ -364,7 +403,9 @@ async function seeImageViaSDK(
|
|
|
364
403
|
`see_image: SDK vision call failed for all candidates. ${errMsg}.${hint}`,
|
|
365
404
|
)
|
|
366
405
|
} finally {
|
|
367
|
-
|
|
406
|
+
if (tmpPath) {
|
|
407
|
+
try { fs.unlinkSync(tmpPath) } catch {}
|
|
408
|
+
}
|
|
368
409
|
}
|
|
369
410
|
}
|
|
370
411
|
|
|
@@ -481,7 +522,17 @@ const SeeImagePlugin: Plugin = async (ctx) => {
|
|
|
481
522
|
.string()
|
|
482
523
|
.optional()
|
|
483
524
|
.describe(
|
|
484
|
-
|
|
525
|
+
[
|
|
526
|
+
"What to ask the vision model. Omit for a general detailed description.",
|
|
527
|
+
"Tailor it to the situation for much better results:",
|
|
528
|
+
'- Reading/transcribing text or code: "Transcribe all text exactly, preserving layout, line breaks, and code indentation."',
|
|
529
|
+
'- An error or stack trace screenshot: "Quote the exact error message and stack trace, then state the likely cause."',
|
|
530
|
+
'- Reproducing a UI as code: "Describe the layout, components, text, colors, and spacing precisely enough to rebuild this UI in code."',
|
|
531
|
+
'- A technical diagram/architecture: "Explain this diagram: list each component and the relationships and data/flow direction between them."',
|
|
532
|
+
'- A chart/graph/dashboard: "Read this visualization: axes, series, key values, and the main takeaway."',
|
|
533
|
+
'- Comparing against an expected design: "Describe this UI in detail so it can be diffed against an expected layout (note any visible defects or misalignment)."',
|
|
534
|
+
"Otherwise pass the user's own specific question verbatim.",
|
|
535
|
+
].join("\n"),
|
|
485
536
|
),
|
|
486
537
|
},
|
|
487
538
|
async execute(args, context) {
|
|
@@ -500,7 +551,6 @@ const SeeImagePlugin: Plugin = async (ctx) => {
|
|
|
500
551
|
} else {
|
|
501
552
|
result = await seeImageViaSDK(
|
|
502
553
|
client,
|
|
503
|
-
$,
|
|
504
554
|
resolved.dataUrl,
|
|
505
555
|
resolved.mediaType,
|
|
506
556
|
prompt,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "opencode-see-image",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.3",
|
|
4
4
|
"description": "Give non-vision opencode models the ability to see images/screenshots by routing them to a vision-capable model (MiniMax M3 via opencode-go by default).",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.ts",
|
|
@@ -23,6 +23,6 @@
|
|
|
23
23
|
"license": "MIT",
|
|
24
24
|
"dependencies": {
|
|
25
25
|
"@opencode-ai/plugin": "^1.15.0",
|
|
26
|
-
"opencode-plugin-update-kit": "^0.
|
|
26
|
+
"opencode-plugin-update-kit": "^0.2.0"
|
|
27
27
|
}
|
|
28
28
|
}
|