opencode-see-image 0.9.3 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +19 -1
- package/README.md +9 -1
- package/index.ts +241 -98
- package/package.json +1 -1
|
@@ -12,7 +12,25 @@
|
|
|
12
12
|
"Read(//Users/alfa/Documents/opencodeprojects/opencode-see-image/bun-types/**)",
|
|
13
13
|
"Bash(bun run *)",
|
|
14
14
|
"WebFetch(domain:docs.z.ai)",
|
|
15
|
-
"Bash(npm publish *)"
|
|
15
|
+
"Bash(npm publish *)",
|
|
16
|
+
"Bash(python3 -c ' *)",
|
|
17
|
+
"Bash(open -a Preview \"/Users/alfa/.claude/image-cache/31fd2007-9418-45bb-a3e5-d273327f5f78/3.png\" \"/Users/alfa/.claude/image-cache/31fd2007-9418-45bb-a3e5-d273327f5f78/4.png\" \"/Users/alfa/.claude/image-cache/31fd2007-9418-45bb-a3e5-d273327f5f78/5.png\")",
|
|
18
|
+
"Bash(ps -o etime= -p 82196)",
|
|
19
|
+
"Bash(echo \"STILL RUNNING \\($\\(ps -o etime= -p 82196)",
|
|
20
|
+
"Bash(awk '/export type TextPart = \\\\{/,/\\\\};/' node_modules/@opencode-ai/sdk/dist/gen/types.gen.d.t)",
|
|
21
|
+
"Bash(awk '{print $2, $9, $11, $12, $13}')",
|
|
22
|
+
"Bash(pkill -f \"[o]pencode run\")",
|
|
23
|
+
"Bash(pkill -f \"[o]pencode-run\")",
|
|
24
|
+
"Bash(pkill -f \"seq 1 40\")",
|
|
25
|
+
"Bash(rm -f verify.json)",
|
|
26
|
+
"Bash(opencode run *)",
|
|
27
|
+
"Bash(echo \"exit=$? done=$\\(date +%T\\) bytes=$\\(wc -c < /tmp/verify.json\\)\")",
|
|
28
|
+
"Bash(pkill -9 -f \"opencode run\")",
|
|
29
|
+
"Bash(pkill -9 -f \"simple.json\\\\|verify.json\\\\|quick.json\\\\|strm\")",
|
|
30
|
+
"Bash(pkill -9 -f \"14.39.13\")",
|
|
31
|
+
"Bash(npm dist-tag *)",
|
|
32
|
+
"Bash(awk '{print $2, $11, $12, $13, $14}')",
|
|
33
|
+
"Bash(awk '/export type ToolState =/,/^};|^export \\(type|declare\\)/' sdk/dist/gen/types.gen.d.ts)"
|
|
16
34
|
]
|
|
17
35
|
}
|
|
18
36
|
}
|
package/README.md
CHANGED
|
@@ -102,7 +102,15 @@ all settings are env-var overrides. The plugin uses opencode's SDK client by def
|
|
|
102
102
|
| `SEE_IMAGE_ENDPOINT` | `https://opencode.ai/zen/go/v1/messages` | HTTP endpoint (only used if `SEE_IMAGE_API_KEY` is set) |
|
|
103
103
|
| `SEE_IMAGE_API_VERSION` | `2023-06-01` | `anthropic-version` header (HTTP mode only) |
|
|
104
104
|
| `SEE_IMAGE_USER_AGENT` | _(Chrome UA)_ | User-Agent header (HTTP mode only) |
|
|
105
|
-
| `SEE_IMAGE_TIMEOUT` | `30000` |
|
|
105
|
+
| `SEE_IMAGE_TIMEOUT` | `30000` | Timeout in ms for session setup and HTTP-mode calls. |
|
|
106
|
+
| `SEE_IMAGE_STALL_TIMEOUT` | `60000` | Stall timeout in ms (SDK streaming). The call is only aborted if the vision model produces no new tokens for this long — so long transcriptions keep running as long as they're progressing. |
|
|
107
|
+
| `SEE_IMAGE_MAX_TIMEOUT` | `0` | Absolute cap in ms on a single streaming call. `0` = no cap. |
|
|
108
|
+
|
|
109
|
+
### live progress
|
|
110
|
+
|
|
111
|
+
While the vision model works, the tool call shows an animated heartbeat bar plus live status, e.g. `see_image ░▒▓█▓▒░ reading… 1240 chars · 7s · minimax-m3`. The char count and a preview of the latest text update as tokens stream in, so you can see it's alive and watch the description form.
|
|
112
|
+
|
|
113
|
+
The preferred path streams from the vision model via opencode's event stream and uses a **stall timeout** (`SEE_IMAGE_STALL_TIMEOUT`) instead of a hard cutoff: a slow-but-progressing model (e.g. transcribing a huge table) runs to completion, while a genuinely silent/hung call is reaped. If streaming isn't available or a call is cut short, the plugin falls back to a reliable non-streaming CLI call to the same model (full answer, no live preview), then to the free model.
|
|
106
114
|
|
|
107
115
|
### using a different vision model
|
|
108
116
|
|
package/index.ts
CHANGED
|
@@ -12,6 +12,22 @@ const ENDPOINT =
|
|
|
12
12
|
const MODEL = process.env.SEE_IMAGE_MODEL || "minimax-m3"
|
|
13
13
|
const PROVIDER_ID = process.env.SEE_IMAGE_PROVIDER || "opencode-go"
|
|
14
14
|
const TIMEOUT = parseInt(process.env.SEE_IMAGE_TIMEOUT || "30000", 10)
|
|
15
|
+
// Stall timeout (SDK streaming path): abort only if the model produces no new
|
|
16
|
+
// tokens for this long. A slow-but-progressing call keeps running.
|
|
17
|
+
const STALL_TIMEOUT = parseInt(process.env.SEE_IMAGE_STALL_TIMEOUT || "60000", 10)
|
|
18
|
+
// Optional absolute cap on a single vision call, in ms (0 = no cap).
|
|
19
|
+
const MAX_TIMEOUT = parseInt(process.env.SEE_IMAGE_MAX_TIMEOUT || "0", 10)
|
|
20
|
+
|
|
21
|
+
// Animated heartbeat: a flowing gradient wave shown in the tool title while we
|
|
22
|
+
// wait, so the user can see the call is alive and not frozen.
|
|
23
|
+
const HEARTBEAT_FRAMES = ["░", "▒", "▓", "█", "▓", "▒", "░"]
|
|
24
|
+
function heartbeatBar(tick: number, width = 14): string {
|
|
25
|
+
let s = ""
|
|
26
|
+
for (let i = 0; i < width; i++) {
|
|
27
|
+
s += HEARTBEAT_FRAMES[(i + tick) % HEARTBEAT_FRAMES.length]
|
|
28
|
+
}
|
|
29
|
+
return s
|
|
30
|
+
}
|
|
15
31
|
const API_VERSION = process.env.SEE_IMAGE_API_VERSION || "2023-06-01"
|
|
16
32
|
const USER_AGENT =
|
|
17
33
|
process.env.SEE_IMAGE_USER_AGENT ||
|
|
@@ -216,12 +232,15 @@ function readProviderKey(providerID: string): string | null {
|
|
|
216
232
|
}
|
|
217
233
|
}
|
|
218
234
|
|
|
235
|
+
type ProgressFn = (info: { chars: number; preview: string; model: string }) => void
|
|
236
|
+
|
|
219
237
|
async function seeImageViaSDK(
|
|
220
238
|
client: any,
|
|
221
239
|
dataUrl: string,
|
|
222
240
|
mediaType: string,
|
|
223
241
|
prompt: string,
|
|
224
242
|
abort?: AbortSignal,
|
|
243
|
+
onProgress?: ProgressFn,
|
|
225
244
|
): Promise<{ text: string; model: string; provider: string }> {
|
|
226
245
|
const errors: string[] = []
|
|
227
246
|
|
|
@@ -245,13 +264,147 @@ async function seeImageViaSDK(
|
|
|
245
264
|
return tmpPath
|
|
246
265
|
}
|
|
247
266
|
|
|
248
|
-
//
|
|
249
|
-
//
|
|
250
|
-
//
|
|
251
|
-
//
|
|
252
|
-
|
|
267
|
+
// Two runners back the candidate list:
|
|
268
|
+
//
|
|
269
|
+
// streamViaSDK — subscribes to opencode's event stream so we get text
|
|
270
|
+
// token-by-token. This drives the live content preview AND token-based
|
|
271
|
+
// stall detection (abort only after STALL_TIMEOUT of silence). It also
|
|
272
|
+
// races the prompt against a stall/max rejection, so a hung call can't
|
|
273
|
+
// block past the stall window even if the abort signal is ignored. Only
|
|
274
|
+
// used when an event stream is actually available (its whole point).
|
|
275
|
+
//
|
|
276
|
+
// runViaCLI — `opencode run -m <provider>/<model>` via Bun.spawn (killable).
|
|
277
|
+
// The proven, reliable fallback. It buffers --format json output until
|
|
278
|
+
// exit, so it gives no live preview, but it returns the full answer.
|
|
279
|
+
const streamViaSDK = async (
|
|
280
|
+
providerID: string,
|
|
281
|
+
modelID: string,
|
|
282
|
+
): Promise<string | null> => {
|
|
283
|
+
const sessionRes = await Promise.race([
|
|
284
|
+
client.session.create({ body: {} }),
|
|
285
|
+
new Promise<never>((_, reject) =>
|
|
286
|
+
setTimeout(
|
|
287
|
+
() => reject(new Error(`session.create timed out after ${TIMEOUT}ms`)),
|
|
288
|
+
TIMEOUT,
|
|
289
|
+
),
|
|
290
|
+
),
|
|
291
|
+
])
|
|
292
|
+
const sessionID: string | undefined = sessionRes.data?.id
|
|
293
|
+
if (!sessionID) throw new Error("no session ID")
|
|
294
|
+
|
|
295
|
+
const cleanupSession = () =>
|
|
296
|
+
client.session.delete({ path: { id: sessionID } }).catch(() => {})
|
|
297
|
+
|
|
298
|
+
// The SDK path exists for the live preview; if we can't get an event
|
|
299
|
+
// stream there's nothing to preview or to measure stalls against, so bail
|
|
300
|
+
// and let the loop fall through to the reliable CLI runner.
|
|
301
|
+
let stream: AsyncGenerator<any> | undefined
|
|
302
|
+
try {
|
|
303
|
+
stream = (await client.event.subscribe())?.stream
|
|
304
|
+
} catch {}
|
|
305
|
+
if (!stream) {
|
|
306
|
+
cleanupSession()
|
|
307
|
+
return null
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
const controller = new AbortController()
|
|
311
|
+
const onAbort = () => controller.abort()
|
|
312
|
+
abort?.addEventListener("abort", onAbort)
|
|
313
|
+
|
|
314
|
+
const partsByID = new Map<string, string>()
|
|
315
|
+
let streamedText = ""
|
|
316
|
+
let lastActivity = Date.now()
|
|
317
|
+
let finished = false
|
|
318
|
+
|
|
319
|
+
const consume = (async () => {
|
|
320
|
+
try {
|
|
321
|
+
for await (const ev of stream!) {
|
|
322
|
+
if (finished) break
|
|
323
|
+
const p = ev?.properties?.part
|
|
324
|
+
if (
|
|
325
|
+
ev?.type === "message.part.updated" &&
|
|
326
|
+
p?.type === "text" &&
|
|
327
|
+
p.sessionID === sessionID
|
|
328
|
+
) {
|
|
329
|
+
partsByID.set(p.id, typeof p.text === "string" ? p.text : "")
|
|
330
|
+
streamedText = [...partsByID.values()].join("\n").trim()
|
|
331
|
+
lastActivity = Date.now()
|
|
332
|
+
onProgress?.({
|
|
333
|
+
chars: streamedText.length,
|
|
334
|
+
preview: streamedText.slice(-200),
|
|
335
|
+
model: modelID,
|
|
336
|
+
})
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
} catch {}
|
|
340
|
+
})()
|
|
341
|
+
|
|
342
|
+
let stallTimer: ReturnType<typeof setInterval> | undefined
|
|
343
|
+
let maxTimer: ReturnType<typeof setTimeout> | undefined
|
|
344
|
+
const guard = new Promise<never>((_, reject) => {
|
|
345
|
+
stallTimer = setInterval(() => {
|
|
346
|
+
if (Date.now() - lastActivity > STALL_TIMEOUT) {
|
|
347
|
+
controller.abort()
|
|
348
|
+
reject(new Error(`stalled: no tokens for ${STALL_TIMEOUT}ms`))
|
|
349
|
+
}
|
|
350
|
+
}, 1000)
|
|
351
|
+
if (MAX_TIMEOUT > 0) {
|
|
352
|
+
maxTimer = setTimeout(() => {
|
|
353
|
+
controller.abort()
|
|
354
|
+
reject(new Error(`exceeded MAX_TIMEOUT ${MAX_TIMEOUT}ms`))
|
|
355
|
+
}, MAX_TIMEOUT)
|
|
356
|
+
}
|
|
357
|
+
})
|
|
358
|
+
|
|
359
|
+
let res: any
|
|
360
|
+
try {
|
|
361
|
+
res = await Promise.race([
|
|
362
|
+
client.session.prompt({
|
|
363
|
+
path: { id: sessionID },
|
|
364
|
+
body: {
|
|
365
|
+
model: { providerID, modelID },
|
|
366
|
+
parts: [
|
|
367
|
+
{ type: "file", mime: mediaType, url: dataUrl },
|
|
368
|
+
{ type: "text", text: prompt },
|
|
369
|
+
],
|
|
370
|
+
tools: {},
|
|
371
|
+
system:
|
|
372
|
+
"You are a vision assistant. Describe the image accurately and concisely. Answer with text only.",
|
|
373
|
+
},
|
|
374
|
+
signal: controller.signal,
|
|
375
|
+
}),
|
|
376
|
+
guard,
|
|
377
|
+
])
|
|
378
|
+
} catch (e: any) {
|
|
379
|
+
// Stalled / aborted / errored — keep whatever streamed in so far.
|
|
380
|
+
if (!streamedText) throw e
|
|
381
|
+
} finally {
|
|
382
|
+
finished = true
|
|
383
|
+
if (stallTimer) clearInterval(stallTimer)
|
|
384
|
+
if (maxTimer) clearTimeout(maxTimer)
|
|
385
|
+
try { await stream.return?.(undefined) } catch {}
|
|
386
|
+
abort?.removeEventListener("abort", onAbort)
|
|
387
|
+
cleanupSession()
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
const finalText = (res?.data?.parts ?? [])
|
|
391
|
+
.filter((p: any) => p.type === "text")
|
|
392
|
+
.map((p: any) => p.text)
|
|
393
|
+
.filter((t: any) => typeof t === "string" && t.length > 0)
|
|
394
|
+
.join("\n")
|
|
395
|
+
.trim()
|
|
396
|
+
|
|
397
|
+
return finalText || streamedText || null
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
const runViaCLI = async (
|
|
401
|
+
providerID: string,
|
|
402
|
+
modelID: string,
|
|
403
|
+
): Promise<string | null> => {
|
|
253
404
|
const filePath = ensureTmpFile()
|
|
254
405
|
if (!filePath) return null
|
|
406
|
+
onProgress?.({ chars: 0, preview: "", model: modelID })
|
|
407
|
+
|
|
255
408
|
const proc = Bun.spawn(
|
|
256
409
|
[
|
|
257
410
|
"opencode",
|
|
@@ -259,122 +412,72 @@ async function seeImageViaSDK(
|
|
|
259
412
|
"-f",
|
|
260
413
|
filePath,
|
|
261
414
|
"-m",
|
|
262
|
-
|
|
263
|
-
|
|
415
|
+
`${providerID}/${modelID}`,
|
|
416
|
+
prompt,
|
|
264
417
|
"--format",
|
|
265
418
|
"json",
|
|
266
419
|
"--dangerously-skip-permissions",
|
|
267
420
|
],
|
|
268
421
|
{ stdout: "pipe", stderr: "ignore" },
|
|
269
422
|
)
|
|
270
|
-
const timer = setTimeout(() => proc.kill(), TIMEOUT)
|
|
271
423
|
const onAbort = () => proc.kill()
|
|
272
424
|
abort?.addEventListener("abort", onAbort)
|
|
425
|
+
const maxTimer =
|
|
426
|
+
MAX_TIMEOUT > 0 ? setTimeout(() => proc.kill(), MAX_TIMEOUT) : undefined
|
|
427
|
+
|
|
273
428
|
try {
|
|
274
429
|
const out = await new Response(proc.stdout).text()
|
|
275
430
|
await proc.exited
|
|
431
|
+
const parts = new Map<string, string>()
|
|
276
432
|
for (const line of out.split("\n").filter(Boolean)) {
|
|
277
433
|
try {
|
|
278
|
-
const
|
|
279
|
-
if (
|
|
280
|
-
|
|
434
|
+
const p = JSON.parse(line)?.part
|
|
435
|
+
if (p?.type === "text" && typeof p.text === "string") {
|
|
436
|
+
parts.set(p.id ?? String(parts.size), p.text)
|
|
281
437
|
}
|
|
282
438
|
} catch {}
|
|
283
439
|
}
|
|
284
|
-
|
|
285
|
-
|
|
440
|
+
return [...parts.values()].join("\n").trim() || null
|
|
441
|
+
} catch {
|
|
442
|
+
return null
|
|
443
|
+
} finally {
|
|
444
|
+
if (maxTimer) clearTimeout(maxTimer)
|
|
286
445
|
abort?.removeEventListener("abort", onAbort)
|
|
287
446
|
}
|
|
288
|
-
return null
|
|
289
447
|
}
|
|
290
448
|
|
|
291
449
|
let result: { text: string; model: string; provider: string } | undefined
|
|
292
450
|
|
|
293
451
|
try {
|
|
294
|
-
const candidates: Array<{
|
|
452
|
+
const candidates: Array<{
|
|
453
|
+
providerID: string
|
|
454
|
+
modelID: string
|
|
455
|
+
mode: "sdk" | "cli"
|
|
456
|
+
}> = []
|
|
295
457
|
const envProvider = process.env.SEE_IMAGE_PROVIDER
|
|
296
458
|
const envModel = process.env.SEE_IMAGE_MODEL
|
|
297
459
|
if (envProvider && envModel) {
|
|
298
|
-
candidates.push({ providerID: envProvider, modelID: envModel })
|
|
460
|
+
candidates.push({ providerID: envProvider, modelID: envModel, mode: "sdk" })
|
|
299
461
|
}
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
// SDK session.prompt returns empty for free models; use CLI instead
|
|
306
|
-
const text = await freeFallback(modelID, prompt)
|
|
307
|
-
if (text) {
|
|
308
|
-
result = { text, model: modelID, provider: providerID }
|
|
309
|
-
break
|
|
310
|
-
}
|
|
311
|
-
errors.push(`${providerID}/${modelID}: no text from CLI fallback`)
|
|
312
|
-
continue
|
|
313
|
-
}
|
|
462
|
+
// Prefer streaming minimax (live preview); fall back to the same model via
|
|
463
|
+
// the proven CLI runner; then the free model via CLI.
|
|
464
|
+
candidates.push({ providerID: "opencode-go", modelID: "minimax-m3", mode: "sdk" })
|
|
465
|
+
candidates.push({ providerID: "opencode-go", modelID: "minimax-m3", mode: "cli" })
|
|
466
|
+
candidates.push({ providerID: "opencode", modelID: "mimo-v2.5-free", mode: "cli" })
|
|
314
467
|
|
|
315
|
-
|
|
468
|
+
for (const { providerID, modelID, mode } of candidates) {
|
|
316
469
|
try {
|
|
317
|
-
const
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
() => reject(new Error(`session.create timed out after ${TIMEOUT}ms`)),
|
|
322
|
-
TIMEOUT,
|
|
323
|
-
),
|
|
324
|
-
),
|
|
325
|
-
])
|
|
326
|
-
sessionID = sessionRes.data?.id
|
|
327
|
-
if (!sessionID) {
|
|
328
|
-
errors.push(`${providerID}/${modelID}: no session ID`)
|
|
329
|
-
continue
|
|
330
|
-
}
|
|
331
|
-
|
|
332
|
-
const controller = new AbortController()
|
|
333
|
-
const onAbort = () => controller.abort()
|
|
334
|
-
abort?.addEventListener("abort", onAbort)
|
|
335
|
-
const timer = setTimeout(() => controller.abort(), TIMEOUT)
|
|
336
|
-
let res
|
|
337
|
-
try {
|
|
338
|
-
res = await client.session.prompt({
|
|
339
|
-
path: { id: sessionID },
|
|
340
|
-
body: {
|
|
341
|
-
model: { providerID, modelID },
|
|
342
|
-
parts: [
|
|
343
|
-
{ type: "file", mime: mediaType, url: dataUrl },
|
|
344
|
-
{ type: "text", text: prompt },
|
|
345
|
-
],
|
|
346
|
-
tools: {},
|
|
347
|
-
system:
|
|
348
|
-
"You are a vision assistant. Describe the image accurately and concisely. Answer with text only.",
|
|
349
|
-
},
|
|
350
|
-
signal: controller.signal,
|
|
351
|
-
})
|
|
352
|
-
} finally {
|
|
353
|
-
clearTimeout(timer)
|
|
354
|
-
abort?.removeEventListener("abort", onAbort)
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
const parts = res.data?.parts ?? []
|
|
358
|
-
const text = (parts as any[])
|
|
359
|
-
.filter((p: any) => p.type === "text")
|
|
360
|
-
.map((p: any) => p.text)
|
|
361
|
-
.filter((t: any) => typeof t === "string" && t.length > 0)
|
|
362
|
-
.join("\n")
|
|
363
|
-
.trim()
|
|
364
|
-
|
|
470
|
+
const text =
|
|
471
|
+
mode === "sdk"
|
|
472
|
+
? await streamViaSDK(providerID, modelID)
|
|
473
|
+
: await runViaCLI(providerID, modelID)
|
|
365
474
|
if (text) {
|
|
366
475
|
result = { text, model: modelID, provider: providerID }
|
|
367
476
|
break
|
|
368
477
|
}
|
|
369
|
-
errors.push(`${providerID}/${modelID}: no text
|
|
478
|
+
errors.push(`${providerID}/${modelID} (${mode}): no text`)
|
|
370
479
|
} catch (e: any) {
|
|
371
|
-
errors.push(`${providerID}/${modelID}: ${e?.message ?? e}`)
|
|
372
|
-
} finally {
|
|
373
|
-
if (sessionID) {
|
|
374
|
-
await client.session
|
|
375
|
-
.delete({ path: { id: sessionID } })
|
|
376
|
-
.catch(() => {})
|
|
377
|
-
}
|
|
480
|
+
errors.push(`${providerID}/${modelID} (${mode}): ${e?.message ?? e}`)
|
|
378
481
|
}
|
|
379
482
|
}
|
|
380
483
|
|
|
@@ -545,17 +648,57 @@ const SeeImagePlugin: Plugin = async (ctx) => {
|
|
|
545
648
|
|
|
546
649
|
let result: { text: string; model: string; provider: string }
|
|
547
650
|
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
)
|
|
651
|
+
// Live feedback while we wait: an animated heartbeat bar plus, once the
|
|
652
|
+
// vision model starts streaming, a growing char count and a preview of
|
|
653
|
+
// the latest text. The timer ticks independently so the bar animates
|
|
654
|
+
// even before any tokens arrive; onProgress feeds it streamed content.
|
|
655
|
+
const started = Date.now()
|
|
656
|
+
let tick = 0
|
|
657
|
+
const live = { chars: 0, preview: "", model: "" }
|
|
658
|
+
const onProgress: ProgressFn = (info) => {
|
|
659
|
+
live.chars = info.chars
|
|
660
|
+
live.preview = info.preview
|
|
661
|
+
if (info.model) live.model = info.model
|
|
662
|
+
}
|
|
663
|
+
const render = () => {
|
|
664
|
+
const secs = Math.round((Date.now() - started) / 1000)
|
|
665
|
+
const bar = heartbeatBar(++tick)
|
|
666
|
+
const label = live.chars > 0 ? `reading… ${live.chars} chars` : "looking…"
|
|
667
|
+
const model = live.model ? ` · ${live.model}` : ""
|
|
668
|
+
context.metadata({
|
|
669
|
+
title: `see_image ${bar} ${label} · ${secs}s${model}`,
|
|
670
|
+
metadata: {
|
|
671
|
+
elapsedSeconds: secs,
|
|
672
|
+
chars: live.chars,
|
|
673
|
+
preview: live.preview,
|
|
674
|
+
model: live.model,
|
|
675
|
+
},
|
|
676
|
+
})
|
|
677
|
+
}
|
|
678
|
+
render()
|
|
679
|
+
const heartbeat = setInterval(render, 500)
|
|
680
|
+
|
|
681
|
+
try {
|
|
682
|
+
if (process.env.SEE_IMAGE_API_KEY) {
|
|
683
|
+
const b64 = resolved.dataUrl.split(",")[1] || ""
|
|
684
|
+
result = await seeImageViaHTTP(
|
|
685
|
+
b64,
|
|
686
|
+
resolved.mediaType,
|
|
687
|
+
prompt,
|
|
688
|
+
context.abort,
|
|
689
|
+
)
|
|
690
|
+
} else {
|
|
691
|
+
result = await seeImageViaSDK(
|
|
692
|
+
client,
|
|
693
|
+
resolved.dataUrl,
|
|
694
|
+
resolved.mediaType,
|
|
695
|
+
prompt,
|
|
696
|
+
context.abort,
|
|
697
|
+
onProgress,
|
|
698
|
+
)
|
|
699
|
+
}
|
|
700
|
+
} finally {
|
|
701
|
+
clearInterval(heartbeat)
|
|
559
702
|
}
|
|
560
703
|
|
|
561
704
|
context.metadata({
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "opencode-see-image",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.10.1",
|
|
4
4
|
"description": "Give non-vision opencode models the ability to see images/screenshots by routing them to a vision-capable model (MiniMax M3 via opencode-go by default).",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.ts",
|