opencode-see-image 0.8.3 → 0.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.ts +91 -77
- package/package.json +1 -1
package/index.ts
CHANGED
|
@@ -11,7 +11,7 @@ const ENDPOINT =
|
|
|
11
11
|
"https://opencode.ai/zen/go/v1/messages"
|
|
12
12
|
const MODEL = process.env.SEE_IMAGE_MODEL || "minimax-m3"
|
|
13
13
|
const PROVIDER_ID = process.env.SEE_IMAGE_PROVIDER || "opencode-go"
|
|
14
|
-
const TIMEOUT = parseInt(process.env.SEE_IMAGE_TIMEOUT || "
|
|
14
|
+
const TIMEOUT = parseInt(process.env.SEE_IMAGE_TIMEOUT || "10000", 10)
|
|
15
15
|
const API_VERSION = process.env.SEE_IMAGE_API_VERSION || "2023-06-01"
|
|
16
16
|
const USER_AGENT =
|
|
17
17
|
process.env.SEE_IMAGE_USER_AGENT ||
|
|
@@ -224,88 +224,102 @@ async function seeImageViaSDK(
|
|
|
224
224
|
): Promise<{ text: string; model: string; provider: string }> {
|
|
225
225
|
const errors: string[] = []
|
|
226
226
|
|
|
227
|
-
//
|
|
228
|
-
const
|
|
229
|
-
const
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
}
|
|
234
|
-
candidates.push({ providerID: "opencode-go", modelID: "minimax-m3" })
|
|
235
|
-
candidates.push({ providerID: "opencode", modelID: "mimo-v2.5-free" })
|
|
236
|
-
|
|
237
|
-
for (const { providerID, modelID } of candidates) {
|
|
238
|
-
let sessionID: string | undefined
|
|
239
|
-
try {
|
|
240
|
-
const sessionRes = await client.session.create({ body: {} })
|
|
241
|
-
sessionID = sessionRes.data?.id
|
|
242
|
-
if (!sessionID) {
|
|
243
|
-
errors.push(`${providerID}/${modelID}: no session ID`)
|
|
244
|
-
continue
|
|
245
|
-
}
|
|
227
|
+
// Write image to a temp file so the server can read it directly
|
|
228
|
+
const b64 = dataUrl.split(",")[1] || ""
|
|
229
|
+
const tmpPath = path.join(os.tmpdir(), `see-image-${Date.now()}.png`)
|
|
230
|
+
try {
|
|
231
|
+
fs.writeFileSync(tmpPath, Buffer.from(b64, "base64"))
|
|
232
|
+
} catch {}
|
|
246
233
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
.
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
234
|
+
const fileUrl = `file://${tmpPath}`
|
|
235
|
+
let result: { text: string; model: string; provider: string } | undefined
|
|
236
|
+
|
|
237
|
+
try {
|
|
238
|
+
const candidates: Array<{ providerID: string; modelID: string }> = []
|
|
239
|
+
const envProvider = process.env.SEE_IMAGE_PROVIDER
|
|
240
|
+
const envModel = process.env.SEE_IMAGE_MODEL
|
|
241
|
+
if (envProvider && envModel) {
|
|
242
|
+
candidates.push({ providerID: envProvider, modelID: envModel })
|
|
243
|
+
}
|
|
244
|
+
candidates.push({ providerID: "opencode-go", modelID: "minimax-m3" })
|
|
245
|
+
candidates.push({ providerID: "opencode", modelID: "mimo-v2.5-free" })
|
|
246
|
+
|
|
247
|
+
for (const { providerID, modelID } of candidates) {
|
|
248
|
+
let sessionID: string | undefined
|
|
249
|
+
try {
|
|
250
|
+
const sessionRes = await client.session.create({ body: {} })
|
|
251
|
+
sessionID = sessionRes.data?.id
|
|
252
|
+
if (!sessionID) {
|
|
253
|
+
errors.push(`${providerID}/${modelID}: no session ID`)
|
|
254
|
+
continue
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
const controller = new AbortController()
|
|
258
|
+
const timer = setTimeout(() => controller.abort(), TIMEOUT)
|
|
259
|
+
const res = await client.session.prompt({
|
|
260
|
+
path: { id: sessionID },
|
|
261
|
+
body: {
|
|
262
|
+
model: { providerID, modelID },
|
|
263
|
+
parts: [
|
|
264
|
+
{ type: "file", mime: mediaType, url: providerID === "opencode" ? fileUrl : dataUrl },
|
|
265
|
+
{ type: "text", text: prompt },
|
|
266
|
+
],
|
|
267
|
+
tools: {},
|
|
268
|
+
system:
|
|
269
|
+
"You are a vision assistant. Describe the image accurately and concisely. Answer with text only.",
|
|
270
|
+
},
|
|
271
|
+
signal: controller.signal,
|
|
272
|
+
})
|
|
273
|
+
clearTimeout(timer)
|
|
274
|
+
|
|
275
|
+
const parts = res.data?.parts ?? []
|
|
276
|
+
const text = (parts as any[])
|
|
277
|
+
.filter((p: any) => p.type === "text")
|
|
278
|
+
.map((p: any) => p.text)
|
|
279
|
+
.filter((t: any) => typeof t === "string" && t.length > 0)
|
|
280
|
+
.join("\n")
|
|
281
|
+
.trim()
|
|
282
|
+
|
|
283
|
+
if (text) {
|
|
284
|
+
result = { text, model: modelID, provider: providerID }
|
|
285
|
+
break
|
|
286
|
+
}
|
|
287
|
+
errors.push(`${providerID}/${modelID}: no text in response`)
|
|
288
|
+
} catch (e: any) {
|
|
289
|
+
errors.push(`${providerID}/${modelID}: ${e?.message ?? e}`)
|
|
290
|
+
} finally {
|
|
291
|
+
if (sessionID) {
|
|
292
|
+
await client.session
|
|
293
|
+
.delete({ path: { id: sessionID } })
|
|
294
|
+
.catch(() => {})
|
|
295
|
+
}
|
|
286
296
|
}
|
|
287
297
|
}
|
|
288
|
-
}
|
|
289
298
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
+
if (!result) {
|
|
300
|
+
const apiKey =
|
|
301
|
+
process.env.SEE_IMAGE_API_KEY || readProviderKey("opencode-go")
|
|
302
|
+
if (apiKey) {
|
|
303
|
+
try {
|
|
304
|
+
result = await seeImageViaHTTP(b64, mediaType, prompt, abort, apiKey)
|
|
305
|
+
} catch (e: any) {
|
|
306
|
+
errors.push(`http-fallback: ${e?.message ?? e}`)
|
|
307
|
+
}
|
|
308
|
+
}
|
|
299
309
|
}
|
|
300
|
-
}
|
|
301
310
|
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
311
|
+
if (result) return result
|
|
312
|
+
|
|
313
|
+
const errMsg = errors.join("; ")
|
|
314
|
+
const hint = errMsg.includes("usage limit")
|
|
315
|
+
? ` Enable usage from your balance at https://opencode.ai/workspace/wrk_01KVARG0A0Y87XV5JYBNJ0WRXB/go`
|
|
316
|
+
: ""
|
|
317
|
+
throw new Error(
|
|
318
|
+
`see_image: SDK vision call failed for all candidates. ${errMsg}.${hint}`,
|
|
319
|
+
)
|
|
320
|
+
} finally {
|
|
321
|
+
try { fs.unlinkSync(tmpPath) } catch {}
|
|
322
|
+
}
|
|
309
323
|
}
|
|
310
324
|
|
|
311
325
|
async function seeImageViaHTTP(
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "opencode-see-image",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.5",
|
|
4
4
|
"description": "Give non-vision opencode models the ability to see images/screenshots by routing them to a vision-capable model (MiniMax M3 via opencode-go by default).",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.ts",
|