opencode-see-image 0.6.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.ts +81 -4
  2. package/package.json +1 -1
package/index.ts CHANGED
@@ -195,13 +195,74 @@ function resolveImage(name: string, cwd: string, sessionID?: string): ResolvedIm
195
195
  )
196
196
  }
197
197
 
198
+ function readProviderKey(providerID: string): string | null {
199
+ try {
200
+ const xdgDataHome = process.env.XDG_DATA_HOME
201
+ ? path.join(process.env.XDG_DATA_HOME, "opencode")
202
+ : ""
203
+ const dataDir =
204
+ process.env.OPENCODE_DATA_DIR ||
205
+ xdgDataHome ||
206
+ path.join(os.homedir(), ".local/share/opencode")
207
+ const authPath = path.join(dataDir, "auth.json")
208
+ if (!fs.existsSync(authPath)) return null
209
+ const auth = JSON.parse(fs.readFileSync(authPath, "utf8"))
210
+ const entry = auth[providerID]
211
+ if (entry?.type === "api" && entry?.key) return entry.key
212
+ return null
213
+ } catch {
214
+ return null
215
+ }
216
+ }
217
+
198
218
  async function seeImageViaSDK(
199
219
  client: any,
200
220
  dataUrl: string,
201
221
  mediaType: string,
202
222
  prompt: string,
203
223
  abort?: AbortSignal,
224
+ currentSessionID?: string,
204
225
  ): Promise<{ text: string; model: string; provider: string }> {
226
+ const errors: string[] = []
227
+
228
+ // Try current session first (uses its existing model — free big-pickle, no new session needed)
229
+ if (currentSessionID) {
230
+ try {
231
+ const controller = new AbortController()
232
+ const timer = setTimeout(() => controller.abort(), TIMEOUT)
233
+ const result = await client.session.prompt({
234
+ path: { id: currentSessionID },
235
+ body: {
236
+ parts: [
237
+ { type: "file", mime: mediaType, url: dataUrl },
238
+ { type: "text", text: prompt },
239
+ ],
240
+ tools: {},
241
+ system:
242
+ "You are a vision assistant. Describe the image accurately and concisely. Answer with text only.",
243
+ },
244
+ signal: controller.signal,
245
+ })
246
+ clearTimeout(timer)
247
+
248
+ const parts = result.data?.parts ?? []
249
+ const text = (parts as any[])
250
+ .filter((p: any) => p.type === "text")
251
+ .map((p: any) => p.text)
252
+ .filter((t: any) => typeof t === "string" && t.length > 0)
253
+ .join("\n")
254
+ .trim()
255
+
256
+ if (text) {
257
+ return { text, model: "", provider: "current" }
258
+ }
259
+ errors.push(`current-session: no text in response`)
260
+ } catch (e: any) {
261
+ errors.push(`current-session: ${e?.message ?? e}`)
262
+ }
263
+ }
264
+
265
+ // Fallback: create new sessions with specific provider/model candidates
205
266
  const envProvider = process.env.SEE_IMAGE_PROVIDER
206
267
  const envModel = process.env.SEE_IMAGE_MODEL
207
268
  const candidates: Array<{ providerID: string; modelID: string }> = []
@@ -211,8 +272,6 @@ async function seeImageViaSDK(
211
272
  candidates.push({ providerID: "opencode-go", modelID: "minimax-m3" })
212
273
  candidates.push({ providerID: "opencode", modelID: "big-pickle" })
213
274
 
214
- const errors: string[] = []
215
-
216
275
  for (const { providerID, modelID } of candidates) {
217
276
  let sessionID: string | undefined
218
277
  try {
@@ -266,8 +325,24 @@ async function seeImageViaSDK(
266
325
  }
267
326
  }
268
327
 
328
+ // If user has an API key configured (from auth.json or env), try HTTP fallback
329
+ const b64 = dataUrl.split(",")[1] || ""
330
+ const apiKey =
331
+ process.env.SEE_IMAGE_API_KEY || readProviderKey("opencode-go")
332
+ if (apiKey) {
333
+ try {
334
+ return await seeImageViaHTTP(b64, mediaType, prompt, abort, apiKey)
335
+ } catch (e: any) {
336
+ errors.push(`http-fallback: ${e?.message ?? e}`)
337
+ }
338
+ }
339
+
340
+ const errMsg = errors.join("; ")
341
+ const hint = errMsg.includes("usage limit")
342
+ ? ` Enable usage from your balance at https://opencode.ai/workspace/wrk_01KVARG0A0Y87XV5JYBNJ0WRXB/go`
343
+ : ""
269
344
  throw new Error(
270
- `see_image: SDK vision call failed for all candidates. ${errors.join("; ")}`,
345
+ `see_image: SDK vision call failed for all candidates. ${errMsg}.${hint}`,
271
346
  )
272
347
  }
273
348
 
@@ -276,8 +351,9 @@ async function seeImageViaHTTP(
276
351
  mediaType: string,
277
352
  prompt: string,
278
353
  abort?: AbortSignal,
354
+ keyOverride?: string,
279
355
  ): Promise<{ text: string; model: string; provider: string }> {
280
- const key = process.env.SEE_IMAGE_API_KEY!
356
+ const key = keyOverride || process.env.SEE_IMAGE_API_KEY!
281
357
  const body = {
282
358
  model: MODEL,
283
359
  max_tokens: 2048,
@@ -406,6 +482,7 @@ const SeeImagePlugin: Plugin = async (ctx) => {
406
482
  resolved.mediaType,
407
483
  prompt,
408
484
  context.abort,
485
+ context.sessionID,
409
486
  )
410
487
  }
411
488
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencode-see-image",
3
- "version": "0.6.2",
3
+ "version": "0.8.0",
4
4
  "description": "Give non-vision opencode models the ability to see images/screenshots by routing them to a vision-capable model (MiniMax M3 via opencode-go by default).",
5
5
  "type": "module",
6
6
  "main": "index.ts",