opencode-see-image 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -3
- package/index.ts +122 -50
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -70,9 +70,9 @@ plugin's system-prompt instructions tell the model to call see_image
|
|
|
70
70
|
│
|
|
71
71
|
▼
|
|
72
72
|
see_image tool:
|
|
73
|
-
1.
|
|
74
|
-
2.
|
|
75
|
-
3.
|
|
73
|
+
1. queries opencode's SQLite DB for the image (handles clipboard pastes, dragged files, screenshots)
|
|
74
|
+
2. falls back to filesystem search if not in DB
|
|
75
|
+
3. sends the image to the vision model via opencode's SDK
|
|
76
76
|
4. returns the textual description
|
|
77
77
|
│
|
|
78
78
|
▼
|
|
@@ -151,6 +151,10 @@ Then restart opencode. (No bun required, this uses opencode's own bun.)
|
|
|
151
151
|
"plugin": ["opencode-see-image@0.4.2"]
|
|
152
152
|
```
|
|
153
153
|
|
|
154
|
+
## Limitations
|
|
155
|
+
|
|
156
|
+
- **macOS-only filesystem search** — the filesystem fallback targets macOS screenshot temp dirs. Linux/Windows users should rely on the DB lookup (which is cross-platform) or pass absolute paths.
|
|
157
|
+
|
|
154
158
|
## File search locations
|
|
155
159
|
|
|
156
160
|
When opencode rejects an image attachment, the model only receives a bare filename. `see_image` searches these locations in order:
|
package/index.ts
CHANGED
|
@@ -2,6 +2,7 @@ import { tool } from "@opencode-ai/plugin"
|
|
|
2
2
|
import path from "path"
|
|
3
3
|
import os from "os"
|
|
4
4
|
import fs from "fs"
|
|
5
|
+
import { Database } from "bun:sqlite"
|
|
5
6
|
import type { Plugin } from "@opencode-ai/plugin"
|
|
6
7
|
|
|
7
8
|
const ENDPOINT =
|
|
@@ -23,51 +24,125 @@ const EXT_MEDIA: Record<string, string> = {
|
|
|
23
24
|
bmp: "image/bmp",
|
|
24
25
|
}
|
|
25
26
|
|
|
26
|
-
|
|
27
|
-
|
|
27
|
+
type ResolvedImage = {
|
|
28
|
+
dataUrl: string
|
|
29
|
+
mediaType: string
|
|
30
|
+
source: string
|
|
31
|
+
}
|
|
28
32
|
|
|
29
|
-
|
|
30
|
-
|
|
33
|
+
function opencodeDbPath(): string {
|
|
34
|
+
const dataDir =
|
|
35
|
+
process.env.OPENCODE_DATA_DIR ||
|
|
36
|
+
process.env.XDG_DATA_HOME ||
|
|
37
|
+
path.join(os.homedir(), ".local/share/opencode")
|
|
38
|
+
return path.join(dataDir, "opencode.db")
|
|
39
|
+
}
|
|
31
40
|
|
|
32
|
-
|
|
33
|
-
const
|
|
41
|
+
function resolveFromDb(filename: string): ResolvedImage | null {
|
|
42
|
+
const dbPath = opencodeDbPath()
|
|
43
|
+
if (!fs.existsSync(dbPath)) return null
|
|
34
44
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
45
|
+
try {
|
|
46
|
+
const db = new Database(dbPath, { readonly: true })
|
|
47
|
+
const rows = db
|
|
48
|
+
.query(
|
|
49
|
+
`SELECT data FROM part
|
|
50
|
+
WHERE json_extract(data, '$.type') = 'file'
|
|
51
|
+
AND json_extract(data, '$.filename') = ?
|
|
52
|
+
ORDER BY time_created DESC LIMIT 1`,
|
|
53
|
+
)
|
|
54
|
+
.all(filename) as Array<{ data: string }>
|
|
55
|
+
|
|
56
|
+
db.close()
|
|
57
|
+
|
|
58
|
+
if (!rows.length) return null
|
|
59
|
+
const part = JSON.parse(rows[0].data)
|
|
60
|
+
const url: string = part.url || ""
|
|
61
|
+
if (!url.startsWith("data:")) return null
|
|
62
|
+
|
|
63
|
+
return {
|
|
64
|
+
dataUrl: url,
|
|
65
|
+
mediaType: part.mime || "image/png",
|
|
66
|
+
source: "opencode-db",
|
|
67
|
+
}
|
|
68
|
+
} catch {
|
|
69
|
+
return null
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function resolveFromFilesystem(
|
|
74
|
+
name: string,
|
|
75
|
+
cwd: string,
|
|
76
|
+
): ResolvedImage | null {
|
|
77
|
+
let absPath: string | null = null
|
|
78
|
+
|
|
79
|
+
if (path.isAbsolute(name) && fs.existsSync(name)) {
|
|
80
|
+
absPath = name
|
|
81
|
+
} else {
|
|
82
|
+
const resolved = path.resolve(cwd, name)
|
|
83
|
+
if (fs.existsSync(resolved)) absPath = resolved
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (!absPath) {
|
|
87
|
+
const tmpdir = process.env.TMPDIR || "/tmp"
|
|
88
|
+
const searchDirs: string[] = []
|
|
89
|
+
const tempItems = path.join(tmpdir, "TemporaryItems")
|
|
90
|
+
if (fs.existsSync(tempItems)) {
|
|
91
|
+
try {
|
|
92
|
+
for (const sub of fs.readdirSync(tempItems, { withFileTypes: true })) {
|
|
93
|
+
if (
|
|
94
|
+
sub.isDirectory() &&
|
|
95
|
+
sub.name.startsWith("NSIRD_screencaptureui")
|
|
96
|
+
) {
|
|
97
|
+
searchDirs.push(path.join(tempItems, sub.name))
|
|
98
|
+
}
|
|
41
99
|
}
|
|
42
|
-
}
|
|
43
|
-
}
|
|
100
|
+
} catch {}
|
|
101
|
+
}
|
|
102
|
+
searchDirs.push(tempItems)
|
|
103
|
+
searchDirs.push(path.join(os.homedir(), "Desktop"))
|
|
104
|
+
searchDirs.push(path.join(os.homedir(), "Downloads"))
|
|
105
|
+
searchDirs.push(cwd)
|
|
106
|
+
|
|
107
|
+
for (const dir of searchDirs) {
|
|
108
|
+
if (!dir) continue
|
|
109
|
+
try {
|
|
110
|
+
const full = path.join(dir, name)
|
|
111
|
+
if (fs.existsSync(full)) {
|
|
112
|
+
absPath = full
|
|
113
|
+
break
|
|
114
|
+
}
|
|
115
|
+
} catch {}
|
|
116
|
+
}
|
|
44
117
|
}
|
|
45
|
-
searchDirs.push(tempItems)
|
|
46
|
-
searchDirs.push(path.join(os.homedir(), "Desktop"))
|
|
47
|
-
searchDirs.push(path.join(os.homedir(), "Downloads"))
|
|
48
|
-
searchDirs.push(cwd)
|
|
49
118
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
119
|
+
if (!absPath || !fs.existsSync(absPath)) return null
|
|
120
|
+
|
|
121
|
+
const ext = path.extname(absPath).slice(1).toLowerCase()
|
|
122
|
+
const mediaType = EXT_MEDIA[ext] || "image/png"
|
|
123
|
+
const b64 = Buffer.from(fs.readFileSync(absPath)).toString("base64")
|
|
124
|
+
|
|
125
|
+
return {
|
|
126
|
+
dataUrl: `data:${mediaType};base64,${b64}`,
|
|
127
|
+
mediaType,
|
|
128
|
+
source: absPath,
|
|
56
129
|
}
|
|
130
|
+
}
|
|
57
131
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
if (entry.name === name) return path.join(dir, name)
|
|
63
|
-
}
|
|
64
|
-
} catch {}
|
|
132
|
+
function resolveImage(name: string, cwd: string): ResolvedImage {
|
|
133
|
+
if (name === "clipboard") {
|
|
134
|
+
const fromDb = resolveFromDb("")
|
|
135
|
+
if (fromDb) return fromDb
|
|
65
136
|
}
|
|
66
137
|
|
|
67
|
-
const
|
|
138
|
+
const fromDb = resolveFromDb(name)
|
|
139
|
+
if (fromDb) return fromDb
|
|
140
|
+
|
|
141
|
+
const fromFs = resolveFromFilesystem(name, cwd)
|
|
142
|
+
if (fromFs) return fromFs
|
|
143
|
+
|
|
68
144
|
throw new Error(
|
|
69
|
-
`see_image: could not find "${name}". Searched
|
|
70
|
-
`Pass an absolute filePath instead.`,
|
|
145
|
+
`see_image: could not find "${name}". Searched opencode DB and filesystem (cwd, ~/Desktop, ~/Downloads, temp). Pass an absolute filePath instead.`,
|
|
71
146
|
)
|
|
72
147
|
}
|
|
73
148
|
|
|
@@ -276,16 +351,12 @@ async function maybeAutoUpdate(
|
|
|
276
351
|
|
|
277
352
|
log(`update available: ${current} -> ${latest}; updating`, "info")
|
|
278
353
|
|
|
279
|
-
// Use opencode's own plugin command to re-resolve from npm. This uses
|
|
280
|
-
// opencode's bundled bun, so it works even when bun isn't installed
|
|
281
|
-
// globally on the user's PATH.
|
|
282
354
|
const opencodeBin =
|
|
283
355
|
process.env.OPENCODE_BIN ||
|
|
284
356
|
path.join(os.homedir(), ".opencode/bin/opencode")
|
|
285
357
|
try {
|
|
286
358
|
await $`${opencodeBin} plugin ${PKG_NAME} --force --global`.quiet()
|
|
287
359
|
} catch (e: any) {
|
|
288
|
-
// Fallback: try bare `opencode` on PATH
|
|
289
360
|
try {
|
|
290
361
|
await $`opencode plugin ${PKG_NAME} --force --global`.quiet()
|
|
291
362
|
} catch (e2: any) {
|
|
@@ -319,7 +390,7 @@ const SeeImagePlugin: Plugin = async (ctx) => {
|
|
|
319
390
|
|
|
320
391
|
const seeImageTool = tool({
|
|
321
392
|
description:
|
|
322
|
-
'See an image/screenshot that the current model cannot view. Use when the user attaches an image and you get a "this model does not support image input" / "Cannot read" error, or when a screenshot/image is referenced ("see this", "can you see", .png/.jpg). Routes the image to a vision-capable model and returns a detailed textual description you can reason about as if you saw it. Pass filePath as an absolute path OR a bare filename (auto-located
|
|
393
|
+
'See an image/screenshot that the current model cannot view. Use when the user attaches an image and you get a "this model does not support image input" / "Cannot read" error, or when a screenshot/image is referenced ("see this", "can you see", .png/.jpg). Routes the image to a vision-capable model and returns a detailed textual description you can reason about as if you saw it. Pass filePath as an absolute path OR a bare filename (auto-located from opencode DB or filesystem).',
|
|
323
394
|
args: {
|
|
324
395
|
filePath: tool.schema
|
|
325
396
|
.string()
|
|
@@ -334,13 +405,7 @@ const SeeImagePlugin: Plugin = async (ctx) => {
|
|
|
334
405
|
),
|
|
335
406
|
},
|
|
336
407
|
async execute(args, context) {
|
|
337
|
-
const
|
|
338
|
-
const ext = path.extname(fullPath).slice(1).toLowerCase()
|
|
339
|
-
const mediaType = EXT_MEDIA[ext] || "image/png"
|
|
340
|
-
|
|
341
|
-
const buf = fs.readFileSync(fullPath)
|
|
342
|
-
const b64 = Buffer.from(buf).toString("base64")
|
|
343
|
-
const dataUrl = `data:${mediaType};base64,${b64}`
|
|
408
|
+
const resolved = resolveImage(args.filePath, context.directory)
|
|
344
409
|
|
|
345
410
|
const prompt =
|
|
346
411
|
args.question && args.question.trim().length > 0
|
|
@@ -350,17 +415,24 @@ const SeeImagePlugin: Plugin = async (ctx) => {
|
|
|
350
415
|
let result: { text: string; model: string; provider: string }
|
|
351
416
|
|
|
352
417
|
if (process.env.SEE_IMAGE_API_KEY) {
|
|
353
|
-
|
|
418
|
+
const b64 = resolved.dataUrl.split(",")[1] || ""
|
|
419
|
+
result = await seeImageViaHTTP(b64, resolved.mediaType, prompt, context.abort)
|
|
354
420
|
} else {
|
|
355
|
-
result = await seeImageViaSDK(
|
|
421
|
+
result = await seeImageViaSDK(
|
|
422
|
+
client,
|
|
423
|
+
resolved.dataUrl,
|
|
424
|
+
resolved.mediaType,
|
|
425
|
+
prompt,
|
|
426
|
+
context.abort,
|
|
427
|
+
)
|
|
356
428
|
}
|
|
357
429
|
|
|
358
430
|
context.metadata({
|
|
359
|
-
title: `see_image: ${
|
|
431
|
+
title: `see_image: ${args.filePath}`,
|
|
360
432
|
metadata: {
|
|
361
433
|
model: result.model,
|
|
362
434
|
provider: result.provider,
|
|
363
|
-
|
|
435
|
+
source: resolved.source,
|
|
364
436
|
},
|
|
365
437
|
})
|
|
366
438
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "opencode-see-image",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.0",
|
|
4
4
|
"description": "Give non-vision opencode models the ability to see images/screenshots by routing them to a vision-capable model (MiniMax M3 via opencode-go by default).",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.ts",
|