opencode-see-image 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +7 -3
  2. package/index.ts +122 -50
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -70,9 +70,9 @@ plugin's system-prompt instructions tell the model to call see_image
70
70
 
71
71
 
72
72
  see_image tool:
73
- 1. locates the file (macOS screenshot temp dirs, ~/Desktop, ~/Downloads, cwd)
74
- 2. base64-encodes it
75
- 3. routes it to the vision model via opencode's SDK (or direct HTTP if SEE_IMAGE_API_KEY is set)
73
+ 1. queries opencode's SQLite DB for the image (handles clipboard pastes, dragged files, screenshots)
74
+ 2. falls back to filesystem search if not in DB
75
+ 3. sends the image to the vision model via opencode's SDK
76
76
  4. returns the textual description
77
77
 
78
78
 
@@ -151,6 +151,10 @@ Then restart opencode. (No bun required, this uses opencode's own bun.)
151
151
  "plugin": ["opencode-see-image@0.4.2"]
152
152
  ```
153
153
 
154
+ ## Limitations
155
+
156
+ - **macOS-only filesystem search** — the filesystem fallback targets macOS screenshot temp dirs. Linux/Windows users should rely on the DB lookup (which is cross-platform) or pass absolute paths.
157
+
154
158
  ## File search locations
155
159
 
156
160
  When opencode rejects an image attachment, the model only receives a bare filename. `see_image` searches these locations in order:
package/index.ts CHANGED
@@ -2,6 +2,7 @@ import { tool } from "@opencode-ai/plugin"
2
2
  import path from "path"
3
3
  import os from "os"
4
4
  import fs from "fs"
5
+ import { Database } from "bun:sqlite"
5
6
  import type { Plugin } from "@opencode-ai/plugin"
6
7
 
7
8
  const ENDPOINT =
@@ -23,51 +24,125 @@ const EXT_MEDIA: Record<string, string> = {
23
24
  bmp: "image/bmp",
24
25
  }
25
26
 
26
- function resolveFilePath(name: string, cwd: string): string {
27
- if (path.isAbsolute(name) && fs.existsSync(name)) return name
27
+ type ResolvedImage = {
28
+ dataUrl: string
29
+ mediaType: string
30
+ source: string
31
+ }
28
32
 
29
- const resolved = path.resolve(cwd, name)
30
- if (fs.existsSync(resolved)) return resolved
33
+ function opencodeDbPath(): string {
34
+ const dataDir =
35
+ process.env.OPENCODE_DATA_DIR ||
36
+ process.env.XDG_DATA_HOME ||
37
+ path.join(os.homedir(), ".local/share/opencode")
38
+ return path.join(dataDir, "opencode.db")
39
+ }
31
40
 
32
- const tmpdir = process.env.TMPDIR || "/tmp"
33
- const searchDirs: string[] = []
41
+ function resolveFromDb(filename: string): ResolvedImage | null {
42
+ const dbPath = opencodeDbPath()
43
+ if (!fs.existsSync(dbPath)) return null
34
44
 
35
- const tempItems = path.join(tmpdir, "TemporaryItems")
36
- if (fs.existsSync(tempItems)) {
37
- try {
38
- for (const sub of fs.readdirSync(tempItems, { withFileTypes: true })) {
39
- if (sub.isDirectory() && sub.name.startsWith("NSIRD_screencaptureui")) {
40
- searchDirs.push(path.join(tempItems, sub.name))
45
+ try {
46
+ const db = new Database(dbPath, { readonly: true })
47
+ const rows = db
48
+ .query(
49
+ `SELECT data FROM part
50
+ WHERE json_extract(data, '$.type') = 'file'
51
+ AND json_extract(data, '$.filename') = ?
52
+ ORDER BY time_created DESC LIMIT 1`,
53
+ )
54
+ .all(filename) as Array<{ data: string }>
55
+
56
+ db.close()
57
+
58
+ if (!rows.length) return null
59
+ const part = JSON.parse(rows[0].data)
60
+ const url: string = part.url || ""
61
+ if (!url.startsWith("data:")) return null
62
+
63
+ return {
64
+ dataUrl: url,
65
+ mediaType: part.mime || "image/png",
66
+ source: "opencode-db",
67
+ }
68
+ } catch {
69
+ return null
70
+ }
71
+ }
72
+
73
+ function resolveFromFilesystem(
74
+ name: string,
75
+ cwd: string,
76
+ ): ResolvedImage | null {
77
+ let absPath: string | null = null
78
+
79
+ if (path.isAbsolute(name) && fs.existsSync(name)) {
80
+ absPath = name
81
+ } else {
82
+ const resolved = path.resolve(cwd, name)
83
+ if (fs.existsSync(resolved)) absPath = resolved
84
+ }
85
+
86
+ if (!absPath) {
87
+ const tmpdir = process.env.TMPDIR || "/tmp"
88
+ const searchDirs: string[] = []
89
+ const tempItems = path.join(tmpdir, "TemporaryItems")
90
+ if (fs.existsSync(tempItems)) {
91
+ try {
92
+ for (const sub of fs.readdirSync(tempItems, { withFileTypes: true })) {
93
+ if (
94
+ sub.isDirectory() &&
95
+ sub.name.startsWith("NSIRD_screencaptureui")
96
+ ) {
97
+ searchDirs.push(path.join(tempItems, sub.name))
98
+ }
41
99
  }
42
- }
43
- } catch {}
100
+ } catch {}
101
+ }
102
+ searchDirs.push(tempItems)
103
+ searchDirs.push(path.join(os.homedir(), "Desktop"))
104
+ searchDirs.push(path.join(os.homedir(), "Downloads"))
105
+ searchDirs.push(cwd)
106
+
107
+ for (const dir of searchDirs) {
108
+ if (!dir) continue
109
+ try {
110
+ const full = path.join(dir, name)
111
+ if (fs.existsSync(full)) {
112
+ absPath = full
113
+ break
114
+ }
115
+ } catch {}
116
+ }
44
117
  }
45
- searchDirs.push(tempItems)
46
- searchDirs.push(path.join(os.homedir(), "Desktop"))
47
- searchDirs.push(path.join(os.homedir(), "Downloads"))
48
- searchDirs.push(cwd)
49
118
 
50
- for (const dir of searchDirs) {
51
- if (!dir) continue
52
- try {
53
- const full = path.join(dir, name)
54
- if (fs.existsSync(full)) return full
55
- } catch {}
119
+ if (!absPath || !fs.existsSync(absPath)) return null
120
+
121
+ const ext = path.extname(absPath).slice(1).toLowerCase()
122
+ const mediaType = EXT_MEDIA[ext] || "image/png"
123
+ const b64 = Buffer.from(fs.readFileSync(absPath)).toString("base64")
124
+
125
+ return {
126
+ dataUrl: `data:${mediaType};base64,${b64}`,
127
+ mediaType,
128
+ source: absPath,
56
129
  }
130
+ }
57
131
 
58
- for (const dir of searchDirs) {
59
- if (!dir || !fs.existsSync(dir)) continue
60
- try {
61
- for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
62
- if (entry.name === name) return path.join(dir, name)
63
- }
64
- } catch {}
132
+ function resolveImage(name: string, cwd: string): ResolvedImage {
133
+ if (name === "clipboard") {
134
+ const fromDb = resolveFromDb("")
135
+ if (fromDb) return fromDb
65
136
  }
66
137
 
67
- const searched = searchDirs.filter(Boolean).join(", ")
138
+ const fromDb = resolveFromDb(name)
139
+ if (fromDb) return fromDb
140
+
141
+ const fromFs = resolveFromFilesystem(name, cwd)
142
+ if (fromFs) return fromFs
143
+
68
144
  throw new Error(
69
- `see_image: could not find "${name}". Searched: ${searched}. ` +
70
- `Pass an absolute filePath instead.`,
145
+ `see_image: could not find "${name}". Searched opencode DB and filesystem (cwd, ~/Desktop, ~/Downloads, temp). Pass an absolute filePath instead.`,
71
146
  )
72
147
  }
73
148
 
@@ -276,16 +351,12 @@ async function maybeAutoUpdate(
276
351
 
277
352
  log(`update available: ${current} -> ${latest}; updating`, "info")
278
353
 
279
- // Use opencode's own plugin command to re-resolve from npm. This uses
280
- // opencode's bundled bun, so it works even when bun isn't installed
281
- // globally on the user's PATH.
282
354
  const opencodeBin =
283
355
  process.env.OPENCODE_BIN ||
284
356
  path.join(os.homedir(), ".opencode/bin/opencode")
285
357
  try {
286
358
  await $`${opencodeBin} plugin ${PKG_NAME} --force --global`.quiet()
287
359
  } catch (e: any) {
288
- // Fallback: try bare `opencode` on PATH
289
360
  try {
290
361
  await $`opencode plugin ${PKG_NAME} --force --global`.quiet()
291
362
  } catch (e2: any) {
@@ -319,7 +390,7 @@ const SeeImagePlugin: Plugin = async (ctx) => {
319
390
 
320
391
  const seeImageTool = tool({
321
392
  description:
322
- 'See an image/screenshot that the current model cannot view. Use when the user attaches an image and you get a "this model does not support image input" / "Cannot read" error, or when a screenshot/image is referenced ("see this", "can you see", .png/.jpg). Routes the image to a vision-capable model and returns a detailed textual description you can reason about as if you saw it. Pass filePath as an absolute path OR a bare filename (auto-located in macOS screenshot temp dirs, ~/Desktop, ~/Downloads, cwd).',
393
+ 'See an image/screenshot that the current model cannot view. Use when the user attaches an image and you get a "this model does not support image input" / "Cannot read" error, or when a screenshot/image is referenced ("see this", "can you see", .png/.jpg). Routes the image to a vision-capable model and returns a detailed textual description you can reason about as if you saw it. Pass filePath as an absolute path OR a bare filename (auto-located from opencode DB or filesystem).',
323
394
  args: {
324
395
  filePath: tool.schema
325
396
  .string()
@@ -334,13 +405,7 @@ const SeeImagePlugin: Plugin = async (ctx) => {
334
405
  ),
335
406
  },
336
407
  async execute(args, context) {
337
- const fullPath = resolveFilePath(args.filePath, context.directory)
338
- const ext = path.extname(fullPath).slice(1).toLowerCase()
339
- const mediaType = EXT_MEDIA[ext] || "image/png"
340
-
341
- const buf = fs.readFileSync(fullPath)
342
- const b64 = Buffer.from(buf).toString("base64")
343
- const dataUrl = `data:${mediaType};base64,${b64}`
408
+ const resolved = resolveImage(args.filePath, context.directory)
344
409
 
345
410
  const prompt =
346
411
  args.question && args.question.trim().length > 0
@@ -350,17 +415,24 @@ const SeeImagePlugin: Plugin = async (ctx) => {
350
415
  let result: { text: string; model: string; provider: string }
351
416
 
352
417
  if (process.env.SEE_IMAGE_API_KEY) {
353
- result = await seeImageViaHTTP(b64, mediaType, prompt, context.abort)
418
+ const b64 = resolved.dataUrl.split(",")[1] || ""
419
+ result = await seeImageViaHTTP(b64, resolved.mediaType, prompt, context.abort)
354
420
  } else {
355
- result = await seeImageViaSDK(client, dataUrl, mediaType, prompt, context.abort)
421
+ result = await seeImageViaSDK(
422
+ client,
423
+ resolved.dataUrl,
424
+ resolved.mediaType,
425
+ prompt,
426
+ context.abort,
427
+ )
356
428
  }
357
429
 
358
430
  context.metadata({
359
- title: `see_image: ${path.basename(fullPath)}`,
431
+ title: `see_image: ${args.filePath}`,
360
432
  metadata: {
361
433
  model: result.model,
362
434
  provider: result.provider,
363
- file: fullPath,
435
+ source: resolved.source,
364
436
  },
365
437
  })
366
438
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencode-see-image",
3
- "version": "0.4.2",
3
+ "version": "0.5.0",
4
4
  "description": "Give non-vision opencode models the ability to see images/screenshots by routing them to a vision-capable model (MiniMax M3 via opencode-go by default).",
5
5
  "type": "module",
6
6
  "main": "index.ts",