opencode-see-image 0.6.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/index.ts +23 -13
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -102,6 +102,7 @@ All settings are env-var overrides. The plugin uses opencode's SDK client by def
|
|
|
102
102
|
| `SEE_IMAGE_ENDPOINT` | `https://opencode.ai/zen/go/v1/messages` | HTTP endpoint (only used if `SEE_IMAGE_API_KEY` is set) |
|
|
103
103
|
| `SEE_IMAGE_API_VERSION` | `2023-06-01` | `anthropic-version` header (HTTP mode only) |
|
|
104
104
|
| `SEE_IMAGE_USER_AGENT` | _(Chrome UA)_ | User-Agent header (HTTP mode only) |
|
|
105
|
+
| `SEE_IMAGE_TIMEOUT` | `30000` | Per-candidate timeout in ms. Prevents hanging on slow models. |
|
|
105
106
|
|
|
106
107
|
### Using a different vision model
|
|
107
108
|
|
package/index.ts
CHANGED
|
@@ -11,6 +11,7 @@ const ENDPOINT =
|
|
|
11
11
|
"https://opencode.ai/zen/go/v1/messages"
|
|
12
12
|
const MODEL = process.env.SEE_IMAGE_MODEL || "minimax-m3"
|
|
13
13
|
const PROVIDER_ID = process.env.SEE_IMAGE_PROVIDER || "opencode-go"
|
|
14
|
+
const TIMEOUT = parseInt(process.env.SEE_IMAGE_TIMEOUT || "30000", 10)
|
|
14
15
|
const API_VERSION = process.env.SEE_IMAGE_API_VERSION || "2023-06-01"
|
|
15
16
|
const USER_AGENT =
|
|
16
17
|
process.env.SEE_IMAGE_USER_AGENT ||
|
|
@@ -222,6 +223,10 @@ async function seeImageViaSDK(
|
|
|
222
223
|
continue
|
|
223
224
|
}
|
|
224
225
|
|
|
226
|
+
// Per-candidate timeout so a slow model doesn't hang forever
|
|
227
|
+
const controller = new AbortController()
|
|
228
|
+
const timer = setTimeout(() => controller.abort(), TIMEOUT)
|
|
229
|
+
|
|
225
230
|
const result = await client.session.prompt({
|
|
226
231
|
path: { id: sessionID },
|
|
227
232
|
body: {
|
|
@@ -234,7 +239,9 @@ async function seeImageViaSDK(
|
|
|
234
239
|
system:
|
|
235
240
|
"You are a vision assistant. Describe the image accurately and concisely. Answer with text only.",
|
|
236
241
|
},
|
|
242
|
+
signal: controller.signal,
|
|
237
243
|
})
|
|
244
|
+
clearTimeout(timer)
|
|
238
245
|
|
|
239
246
|
const parts = result.data?.parts ?? []
|
|
240
247
|
const text = (parts as any[])
|
|
@@ -325,28 +332,31 @@ async function seeImageViaHTTP(
|
|
|
325
332
|
|
|
326
333
|
const SYSTEM_INSTRUCTIONS = `# See Image (vision bridge), opencode-see-image plugin
|
|
327
334
|
|
|
328
|
-
You have
|
|
335
|
+
You have a \`see_image\` tool. When the user attaches an image, opencode rejects it and you receive an error string containing the **filename**. You MUST call \`see_image\` to actually view it — do NOT just tell the user about the error.
|
|
329
336
|
|
|
330
|
-
## When
|
|
337
|
+
## When you MUST call \`see_image\`
|
|
331
338
|
|
|
332
|
-
|
|
333
|
-
1. You receive an error like: \`Cannot read "Screenshot ....png" (this model does not support image input)\`
|
|
334
|
-
2. The user references an image/screenshot they expect you to see ("see this", "look at this", "can you see this", ".png"/".jpg")
|
|
335
|
-
3. The user pastes an image path they want you to inspect
|
|
339
|
+
Call \`see_image\` immediately in ALL these cases — do not inform the user, do not ask for confirmation:
|
|
336
340
|
|
|
337
|
-
|
|
341
|
+
1. You receive an error containing \`Cannot read "Screenshot ..."\` or \`Cannot read "...png"\` or \`Cannot read "...jpg"\`
|
|
342
|
+
2. The error message says \`this model does not support image input\`
|
|
343
|
+
3. The error starts with \`ERROR:\` and mentions an image file
|
|
344
|
+
4. The user references an image/screenshot ("see this", "look at this", ".png", ".jpg")
|
|
345
|
+
5. The user pastes an image path
|
|
338
346
|
|
|
339
347
|
## How to use it
|
|
340
348
|
|
|
341
|
-
1.
|
|
342
|
-
2.
|
|
343
|
-
3.
|
|
349
|
+
1. Extract the filename from the error (it's in quotes in the error message, e.g. \`Screenshot 2026-06-19 at 02.18.53.png\`)
|
|
350
|
+
2. Call \`see_image\` with \`filePath\` set to that bare filename
|
|
351
|
+
3. Optionally pass a \`question\` if the user asked something specific
|
|
352
|
+
4. Answer using the returned description as if you saw the image. Be natural.
|
|
344
353
|
|
|
345
354
|
## Important
|
|
346
355
|
|
|
347
|
-
-
|
|
348
|
-
- If
|
|
349
|
-
-
|
|
356
|
+
- NEVER just repeat the error to the user. Call the tool.
|
|
357
|
+
- If \`see_image\` cannot find the file, tell the user the filename and ask for an absolute path.
|
|
358
|
+
- Do NOT use \`see_image\` for text files (\`.ts\`, \`.md\`, \`.json\`, etc.) — use \`read\` instead.
|
|
359
|
+
- Never guess image contents. If you haven't called \`see_image\`, you haven't seen the image.`
|
|
350
360
|
|
|
351
361
|
const PKG_NAME = "opencode-see-image"
|
|
352
362
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "opencode-see-image",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.2",
|
|
4
4
|
"description": "Give non-vision opencode models the ability to see images/screenshots by routing them to a vision-capable model (MiniMax M3 via opencode-go by default).",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.ts",
|