opencode-see-image 0.8.6 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(bun build *)",
5
+ "Bash(npm view *)",
6
+ "Bash(FILTER_BRANCH_SQUELCH_WARNING=1 git filter-branch -f --msg-filter 'sed \"/Co-Authored-By: Claude/d\" | sed -e :a -e \"/^\\\\n*$/{\\\\$d;N;ba\" -e \"}\"' HEAD~2..HEAD)",
7
+ "Bash(echo \"--- created, exit $? ---\")",
8
+ "Bash(node -p \"require\\('./package.json'\\).version\")",
9
+ "Bash(echo \"local package.json version: $\\(node -p \"require\\('./package.json'\\).version\" \\)\")"
10
+ ]
11
+ }
12
+ }
package/README.md CHANGED
@@ -42,7 +42,7 @@ You need a connected vision-capable provider. The plugin auto-detects whichever
42
42
  2. Select **opencode** (OpenCode Zen)
43
43
  3. Paste your API key from [opencode.ai/auth](https://opencode.ai/auth)
44
44
 
45
- The plugin falls back to **big-pickle** (~12000ms). No subscription needed.
45
+ The plugin falls back to **mimo-v2.5-free**. No subscription needed.
46
46
 
47
47
  ### Paid, w/ OpenCode Go
48
48
  1. Run `/connect` in opencode
@@ -55,7 +55,7 @@ The plugin prefers **minimax-m3** via opencode-go (~3000ms) when available.
55
55
 
56
56
  Set the `SEE_IMAGE_*` env vars to point at any Anthropic-Messages-compatible endpoint. See [Configuration](#configuration) below.
57
57
 
58
- **Resolution order:** explicit `SEE_IMAGE_API_KEY` env → configured `SEE_IMAGE_PROVIDER` → `opencode-go` (MiniMax M3) → `opencode` (big-pickle, free).
58
+ **Resolution order:** explicit `SEE_IMAGE_API_KEY` env → configured `SEE_IMAGE_PROVIDER` → `opencode-go` (MiniMax M3) → `opencode` (mimo-v2.5-free, free).
59
59
 
60
60
  ## How it works
61
61
 
@@ -126,7 +126,8 @@ export SEE_IMAGE_MODEL="kimi-k2.7-code"
126
126
 
127
127
  | Model | Speed | Notes |
128
128
  |---|---|---|
129
- | `big-pickle` | ~12000ms | Free. Accurate. Default fallback when only Zen is connected. |
129
+ | `mimo-v2.5-free` | | Free. Default fallback when only Zen is connected (routed via CLI). |
130
+ | `big-pickle` | ~12000ms | Free. Accurate. Alternative Zen fallback. |
130
131
 
131
132
  **Paid (OpenCode Go):**
132
133
 
package/index.ts CHANGED
@@ -11,7 +11,7 @@ const ENDPOINT =
11
11
  "https://opencode.ai/zen/go/v1/messages"
12
12
  const MODEL = process.env.SEE_IMAGE_MODEL || "minimax-m3"
13
13
  const PROVIDER_ID = process.env.SEE_IMAGE_PROVIDER || "opencode-go"
14
- const TIMEOUT = parseInt(process.env.SEE_IMAGE_TIMEOUT || "10000", 10)
14
+ const TIMEOUT = parseInt(process.env.SEE_IMAGE_TIMEOUT || "30000", 10)
15
15
  const API_VERSION = process.env.SEE_IMAGE_API_VERSION || "2023-06-01"
16
16
  const USER_AGENT =
17
17
  process.env.SEE_IMAGE_USER_AGENT ||
@@ -47,8 +47,9 @@ function resolveFromDb(
47
47
  const dbPath = opencodeDbPath()
48
48
  if (!fs.existsSync(dbPath)) return null
49
49
 
50
+ let db: Database | undefined
50
51
  try {
51
- const db = new Database(dbPath, { readonly: true })
52
+ db = new Database(dbPath, { readonly: true })
52
53
  let rows: Array<{ data: string }>
53
54
 
54
55
  if (!filename || filename === "clipboard") {
@@ -98,8 +99,6 @@ function resolveFromDb(
98
99
  }
99
100
  }
100
101
 
101
- db.close()
102
-
103
102
  if (!rows.length) return null
104
103
  const part = JSON.parse(rows[0].data)
105
104
  const url: string = part.url || ""
@@ -112,6 +111,8 @@ function resolveFromDb(
112
111
  }
113
112
  } catch {
114
113
  return null
114
+ } finally {
115
+ db?.close()
115
116
  }
116
117
  }
117
118
 
@@ -217,6 +218,7 @@ function readProviderKey(providerID: string): string | null {
217
218
 
218
219
  async function seeImageViaSDK(
219
220
  client: any,
221
+ $: any,
220
222
  dataUrl: string,
221
223
  mediaType: string,
222
224
  prompt: string,
@@ -224,14 +226,40 @@ async function seeImageViaSDK(
224
226
  ): Promise<{ text: string; model: string; provider: string }> {
225
227
  const errors: string[] = []
226
228
 
227
- // Write image to a temp file so the server can read it directly
229
+ // Write image to a temp file so the server can read it directly. Use the
230
+ // real extension so the CLI can sniff the type correctly.
228
231
  const b64 = dataUrl.split(",")[1] || ""
229
- const tmpPath = path.join(os.tmpdir(), `see-image-${Date.now()}.png`)
232
+ const ext =
233
+ Object.entries(EXT_MEDIA).find(([, m]) => m === mediaType)?.[0] || "png"
234
+ const tmpPath = path.join(os.tmpdir(), `see-image-${Date.now()}.${ext}`)
230
235
  try {
231
236
  fs.writeFileSync(tmpPath, Buffer.from(b64, "base64"))
232
237
  } catch {}
233
238
 
234
- const fileUrl = tmpPath
239
+ // For free opencode models, use CLI instead of SDK (SDK returns empty).
240
+ // Bun's $ doesn't accept an AbortSignal, so race the output against a
241
+ // timeout to actually bound how long a slow model can hang us.
242
+ const freeFallback = async (modelID: string, userPrompt: string): Promise<string | null> => {
243
+ try {
244
+ const proc = $`opencode run -f ${tmpPath} -m opencode/${modelID} ${userPrompt} --format json --dangerously-skip-permissions`.nothrow()
245
+ const out = await Promise.race([
246
+ proc.text(),
247
+ new Promise<never>((_, reject) =>
248
+ setTimeout(() => reject(new Error(`timed out after ${TIMEOUT}ms`)), TIMEOUT),
249
+ ),
250
+ ])
251
+ for (const line of out.split("\n").filter(Boolean)) {
252
+ try {
253
+ const parsed = JSON.parse(line)
254
+ if (parsed?.part?.type === "text" && parsed?.part?.text) {
255
+ return parsed.part.text
256
+ }
257
+ } catch {}
258
+ }
259
+ } catch {}
260
+ return null
261
+ }
262
+
235
263
  let result: { text: string; model: string; provider: string } | undefined
236
264
 
237
265
  try {
@@ -245,6 +273,17 @@ async function seeImageViaSDK(
245
273
  candidates.push({ providerID: "opencode", modelID: "mimo-v2.5-free" })
246
274
 
247
275
  for (const { providerID, modelID } of candidates) {
276
+ if (providerID === "opencode") {
277
+ // SDK session.prompt returns empty for free models; use CLI instead
278
+ const text = await freeFallback(modelID, prompt)
279
+ if (text) {
280
+ result = { text, model: modelID, provider: providerID }
281
+ break
282
+ }
283
+ errors.push(`${providerID}/${modelID}: no text from CLI fallback`)
284
+ continue
285
+ }
286
+
248
287
  let sessionID: string | undefined
249
288
  try {
250
289
  const sessionRes = await client.session.create({ body: {} })
@@ -255,22 +294,29 @@ async function seeImageViaSDK(
255
294
  }
256
295
 
257
296
  const controller = new AbortController()
297
+ const onAbort = () => controller.abort()
298
+ abort?.addEventListener("abort", onAbort)
258
299
  const timer = setTimeout(() => controller.abort(), TIMEOUT)
259
- const res = await client.session.prompt({
260
- path: { id: sessionID },
261
- body: {
262
- model: { providerID, modelID },
263
- parts: [
264
- { type: "file", mime: mediaType, url: providerID === "opencode" ? fileUrl : dataUrl },
265
- { type: "text", text: prompt },
266
- ],
267
- tools: {},
268
- system:
269
- "You are a vision assistant. Describe the image accurately and concisely. Answer with text only.",
270
- },
271
- signal: controller.signal,
272
- })
273
- clearTimeout(timer)
300
+ let res
301
+ try {
302
+ res = await client.session.prompt({
303
+ path: { id: sessionID },
304
+ body: {
305
+ model: { providerID, modelID },
306
+ parts: [
307
+ { type: "file", mime: mediaType, url: dataUrl },
308
+ { type: "text", text: prompt },
309
+ ],
310
+ tools: {},
311
+ system:
312
+ "You are a vision assistant. Describe the image accurately and concisely. Answer with text only.",
313
+ },
314
+ signal: controller.signal,
315
+ })
316
+ } finally {
317
+ clearTimeout(timer)
318
+ abort?.removeEventListener("abort", onAbort)
319
+ }
274
320
 
275
321
  const parts = res.data?.parts ?? []
276
322
  const text = (parts as any[])
@@ -312,7 +358,7 @@ async function seeImageViaSDK(
312
358
 
313
359
  const errMsg = errors.join("; ")
314
360
  const hint = errMsg.includes("usage limit")
315
- ? ` Enable usage from your balance at https://opencode.ai/workspace/wrk_01KVARG0A0Y87XV5JYBNJ0WRXB/go`
361
+ ? ` Enable usage from your balance in your opencode workspace at https://opencode.ai/workspace`
316
362
  : ""
317
363
  throw new Error(
318
364
  `see_image: SDK vision call failed for all candidates. ${errMsg}.${hint}`,
@@ -454,6 +500,7 @@ const SeeImagePlugin: Plugin = async (ctx) => {
454
500
  } else {
455
501
  result = await seeImageViaSDK(
456
502
  client,
503
+ $,
457
504
  resolved.dataUrl,
458
505
  resolved.mediaType,
459
506
  prompt,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencode-see-image",
3
- "version": "0.8.6",
3
+ "version": "0.9.1",
4
4
  "description": "Give non-vision opencode models the ability to see images/screenshots by routing them to a vision-capable model (MiniMax M3 via opencode-go by default).",
5
5
  "type": "module",
6
6
  "main": "index.ts",