opencode-see-image 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +1 -0
  2. package/index.ts +41 -1
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -102,6 +102,7 @@ All settings are env-var overrides. The plugin uses opencode's SDK client by def
102
102
  | `SEE_IMAGE_ENDPOINT` | `https://opencode.ai/zen/go/v1/messages` | HTTP endpoint (only used if `SEE_IMAGE_API_KEY` is set) |
103
103
  | `SEE_IMAGE_API_VERSION` | `2023-06-01` | `anthropic-version` header (HTTP mode only) |
104
104
  | `SEE_IMAGE_USER_AGENT` | _(Chrome UA)_ | User-Agent header (HTTP mode only) |
105
+ | `SEE_IMAGE_TIMEOUT` | `30000` | Per-candidate timeout in ms. Prevents hanging on slow models. |
105
106
 
106
107
  ### Using a different vision model
107
108
 
package/index.ts CHANGED
@@ -11,6 +11,7 @@ const ENDPOINT =
11
11
  "https://opencode.ai/zen/go/v1/messages"
12
12
  const MODEL = process.env.SEE_IMAGE_MODEL || "minimax-m3"
13
13
  const PROVIDER_ID = process.env.SEE_IMAGE_PROVIDER || "opencode-go"
14
+ const TIMEOUT = parseInt(process.env.SEE_IMAGE_TIMEOUT || "30000", 10)
14
15
  const API_VERSION = process.env.SEE_IMAGE_API_VERSION || "2023-06-01"
15
16
  const USER_AGENT =
16
17
  process.env.SEE_IMAGE_USER_AGENT ||
@@ -194,6 +195,26 @@ function resolveImage(name: string, cwd: string, sessionID?: string): ResolvedIm
194
195
  )
195
196
  }
196
197
 
198
+ function readProviderKey(providerID: string): string | null {
199
+ try {
200
+ const xdgDataHome = process.env.XDG_DATA_HOME
201
+ ? path.join(process.env.XDG_DATA_HOME, "opencode")
202
+ : ""
203
+ const dataDir =
204
+ process.env.OPENCODE_DATA_DIR ||
205
+ xdgDataHome ||
206
+ path.join(os.homedir(), ".local/share/opencode")
207
+ const authPath = path.join(dataDir, "auth.json")
208
+ if (!fs.existsSync(authPath)) return null
209
+ const auth = JSON.parse(fs.readFileSync(authPath, "utf8"))
210
+ const entry = auth[providerID]
211
+ if (entry?.type === "api" && entry?.key) return entry.key
212
+ return null
213
+ } catch {
214
+ return null
215
+ }
216
+ }
217
+
197
218
  async function seeImageViaSDK(
198
219
  client: any,
199
220
  dataUrl: string,
@@ -222,6 +243,10 @@ async function seeImageViaSDK(
222
243
  continue
223
244
  }
224
245
 
246
+ // Per-candidate timeout so a slow model doesn't hang forever
247
+ const controller = new AbortController()
248
+ const timer = setTimeout(() => controller.abort(), TIMEOUT)
249
+
225
250
  const result = await client.session.prompt({
226
251
  path: { id: sessionID },
227
252
  body: {
@@ -234,7 +259,9 @@ async function seeImageViaSDK(
234
259
  system:
235
260
  "You are a vision assistant. Describe the image accurately and concisely. Answer with text only.",
236
261
  },
262
+ signal: controller.signal,
237
263
  })
264
+ clearTimeout(timer)
238
265
 
239
266
  const parts = result.data?.parts ?? []
240
267
  const text = (parts as any[])
@@ -259,6 +286,18 @@ async function seeImageViaSDK(
259
286
  }
260
287
  }
261
288
 
289
+ // If user has an API key configured (from auth.json or env), try HTTP fallback
290
+ const b64 = dataUrl.split(",")[1] || ""
291
+ const apiKey =
292
+ process.env.SEE_IMAGE_API_KEY || readProviderKey("opencode-go")
293
+ if (apiKey) {
294
+ try {
295
+ return await seeImageViaHTTP(b64, mediaType, prompt, abort, apiKey)
296
+ } catch (e: any) {
297
+ errors.push(`http-fallback: ${e?.message ?? e}`)
298
+ }
299
+ }
300
+
262
301
  throw new Error(
263
302
  `see_image: SDK vision call failed for all candidates. ${errors.join("; ")}`,
264
303
  )
@@ -269,8 +308,9 @@ async function seeImageViaHTTP(
269
308
  mediaType: string,
270
309
  prompt: string,
271
310
  abort?: AbortSignal,
311
+ keyOverride?: string,
272
312
  ): Promise<{ text: string; model: string; provider: string }> {
273
- const key = process.env.SEE_IMAGE_API_KEY!
313
+ const key = keyOverride || process.env.SEE_IMAGE_API_KEY!
274
314
  const body = {
275
315
  model: MODEL,
276
316
  max_tokens: 2048,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencode-see-image",
3
- "version": "0.6.1",
3
+ "version": "0.7.0",
4
4
  "description": "Give non-vision opencode models the ability to see images/screenshots by routing them to a vision-capable model (MiniMax M3 via opencode-go by default).",
5
5
  "type": "module",
6
6
  "main": "index.ts",