open-agents-ai 0.185.27 → 0.185.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +194 -8
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -26413,7 +26413,25 @@ If you're stuck, try a completely different approach. Do NOT repeat what failed
|
|
|
26413
26413
|
this.emit({ type: "error", content: `Model not available. Use /model to select a different model.`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
26414
26414
|
break;
|
|
26415
26415
|
}
|
|
26416
|
-
|
|
26416
|
+
let imageRecovered = false;
|
|
26417
|
+
if (/invalid image|image.*invalid|image_url.*unsupported|does not support.*image|image.*not supported/i.test(errMsg)) {
|
|
26418
|
+
imageRecovered = await this._recoverFromImageError(messages, chatRequest, turn);
|
|
26419
|
+
if (imageRecovered) {
|
|
26420
|
+
try {
|
|
26421
|
+
const imgRetry = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
|
|
26422
|
+
response = imgRetry;
|
|
26423
|
+
} catch (imgRetryErr) {
|
|
26424
|
+
const msg2 = imgRetryErr instanceof Error ? imgRetryErr.message : String(imgRetryErr);
|
|
26425
|
+
this.emit({ type: "error", content: `Retry after image fallback also failed: ${msg2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
26426
|
+
imageRecovered = false;
|
|
26427
|
+
break;
|
|
26428
|
+
}
|
|
26429
|
+
} else {
|
|
26430
|
+
break;
|
|
26431
|
+
}
|
|
26432
|
+
}
|
|
26433
|
+
if (imageRecovered) {
|
|
26434
|
+
} else if (/does not support tools|HTTP 400.*tools/i.test(errMsg)) {
|
|
26417
26435
|
this.emit({
|
|
26418
26436
|
type: "status",
|
|
26419
26437
|
content: `Model lacks native tool support \u2014 switching to prompt-injected tool mode`,
|
|
@@ -26464,14 +26482,16 @@ If you're stuck, try a completely different approach. Do NOT repeat what failed
|
|
|
26464
26482
|
break;
|
|
26465
26483
|
}
|
|
26466
26484
|
}
|
|
26467
|
-
|
|
26468
|
-
|
|
26469
|
-
|
|
26470
|
-
|
|
26471
|
-
|
|
26472
|
-
|
|
26485
|
+
if (!imageRecovered) {
|
|
26486
|
+
this.emit({
|
|
26487
|
+
type: "error",
|
|
26488
|
+
content: `Backend unavailable \u2014 stopping task. Fix the issue and retry.`,
|
|
26489
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
26490
|
+
});
|
|
26491
|
+
break;
|
|
26492
|
+
}
|
|
26473
26493
|
}
|
|
26474
|
-
response = recovered;
|
|
26494
|
+
response = recovered ?? response;
|
|
26475
26495
|
}
|
|
26476
26496
|
totalTokens += response.usage?.totalTokens ?? 0;
|
|
26477
26497
|
promptTokens += response.usage?.promptTokens ?? 0;
|
|
@@ -28444,6 +28464,172 @@ ${transcript}`
|
|
|
28444
28464
|
return true;
|
|
28445
28465
|
return false;
|
|
28446
28466
|
}
|
|
28467
|
+
/**
|
|
28468
|
+
* Graceful image error recovery chain:
|
|
28469
|
+
* 1. Downconvert images (resize to ≤512px, JPEG compress) and retry inline
|
|
28470
|
+
* 2. Describe images via moondream/Ollama vision → replace image_url with text description
|
|
28471
|
+
* 3. Last resort: strip images, keep text context
|
|
28472
|
+
*
|
|
28473
|
+
* Mutates messages in-place. Returns true if messages were successfully transformed.
|
|
28474
|
+
*/
|
|
28475
|
+
async _recoverFromImageError(messages, chatRequest, turn) {
|
|
28476
|
+
const imageEntries = [];
|
|
28477
|
+
for (let mi = 0; mi < messages.length; mi++) {
|
|
28478
|
+
const msg = messages[mi];
|
|
28479
|
+
if (!Array.isArray(msg.content))
|
|
28480
|
+
continue;
|
|
28481
|
+
for (let pi = 0; pi < msg.content.length; pi++) {
|
|
28482
|
+
const part = msg.content[pi];
|
|
28483
|
+
if (part.type === "image_url" && part.image_url?.url) {
|
|
28484
|
+
imageEntries.push({ msgIdx: mi, partIdx: pi, dataUrl: part.image_url.url });
|
|
28485
|
+
}
|
|
28486
|
+
}
|
|
28487
|
+
}
|
|
28488
|
+
if (imageEntries.length === 0)
|
|
28489
|
+
return false;
|
|
28490
|
+
this.emit({ type: "status", content: `Image rejected \u2014 trying downconversion (${imageEntries.length} image(s))...`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
28491
|
+
let downconverted = false;
|
|
28492
|
+
try {
|
|
28493
|
+
for (const entry of imageEntries) {
|
|
28494
|
+
const { dataUrl } = entry;
|
|
28495
|
+
if (!dataUrl.startsWith("data:"))
|
|
28496
|
+
continue;
|
|
28497
|
+
const commaIdx = dataUrl.indexOf(",");
|
|
28498
|
+
if (commaIdx < 0)
|
|
28499
|
+
continue;
|
|
28500
|
+
const rawBase64 = dataUrl.slice(commaIdx + 1);
|
|
28501
|
+
const buffer = Buffer.from(rawBase64, "base64");
|
|
28502
|
+
let resizedBase64 = null;
|
|
28503
|
+
try {
|
|
28504
|
+
const { execSync: execSync35 } = await import("node:child_process");
|
|
28505
|
+
const { writeFileSync: writeFileSync30, readFileSync: readFileSync45, unlinkSync: unlinkSync13 } = await import("node:fs");
|
|
28506
|
+
const { join: join77 } = await import("node:path");
|
|
28507
|
+
const { tmpdir: tmpdir11 } = await import("node:os");
|
|
28508
|
+
const tmpIn = join77(tmpdir11(), `oa_img_in_${Date.now()}.png`);
|
|
28509
|
+
const tmpOut = join77(tmpdir11(), `oa_img_out_${Date.now()}.jpg`);
|
|
28510
|
+
writeFileSync30(tmpIn, buffer);
|
|
28511
|
+
execSync35(`python3 -c "
|
|
28512
|
+
from PIL import Image
|
|
28513
|
+
img = Image.open('${tmpIn}')
|
|
28514
|
+
img.thumbnail((512, 512), Image.LANCZOS)
|
|
28515
|
+
img = img.convert('RGB')
|
|
28516
|
+
img.save('${tmpOut}', 'JPEG', quality=75)
|
|
28517
|
+
"`, { timeout: 1e4, stdio: "pipe" });
|
|
28518
|
+
const resizedBuf = readFileSync45(tmpOut);
|
|
28519
|
+
resizedBase64 = `data:image/jpeg;base64,${resizedBuf.toString("base64")}`;
|
|
28520
|
+
try {
|
|
28521
|
+
unlinkSync13(tmpIn);
|
|
28522
|
+
} catch {
|
|
28523
|
+
}
|
|
28524
|
+
try {
|
|
28525
|
+
unlinkSync13(tmpOut);
|
|
28526
|
+
} catch {
|
|
28527
|
+
}
|
|
28528
|
+
} catch {
|
|
28529
|
+
}
|
|
28530
|
+
if (resizedBase64) {
|
|
28531
|
+
const msg = messages[entry.msgIdx];
|
|
28532
|
+
const parts = msg.content;
|
|
28533
|
+
parts[entry.partIdx] = { type: "image_url", image_url: { url: resizedBase64 } };
|
|
28534
|
+
downconverted = true;
|
|
28535
|
+
}
|
|
28536
|
+
}
|
|
28537
|
+
} catch {
|
|
28538
|
+
}
|
|
28539
|
+
if (downconverted) {
|
|
28540
|
+
chatRequest.messages = messages;
|
|
28541
|
+
this.emit({ type: "status", content: `Downconverted images to 512px JPEG \u2014 retrying`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
28542
|
+
return true;
|
|
28543
|
+
}
|
|
28544
|
+
this.emit({ type: "status", content: `Downconversion unavailable \u2014 describing images via vision model...`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
28545
|
+
const ollamaHost = process.env["OLLAMA_HOST"] || "http://127.0.0.1:11434";
|
|
28546
|
+
let described = false;
|
|
28547
|
+
for (const entry of imageEntries) {
|
|
28548
|
+
const { dataUrl } = entry;
|
|
28549
|
+
if (!dataUrl.startsWith("data:"))
|
|
28550
|
+
continue;
|
|
28551
|
+
const commaIdx = dataUrl.indexOf(",");
|
|
28552
|
+
if (commaIdx < 0)
|
|
28553
|
+
continue;
|
|
28554
|
+
const rawBase64 = dataUrl.slice(commaIdx + 1);
|
|
28555
|
+
try {
|
|
28556
|
+
const model = process.env["OLLAMA_VISION_MODEL"] || "moondream";
|
|
28557
|
+
let res = await fetch(`${ollamaHost}/api/generate`, {
|
|
28558
|
+
method: "POST",
|
|
28559
|
+
headers: { "Content-Type": "application/json" },
|
|
28560
|
+
body: JSON.stringify({
|
|
28561
|
+
model,
|
|
28562
|
+
prompt: "Describe this image in detail. Include text content, UI elements, code, errors, and any relevant visual information.",
|
|
28563
|
+
images: [rawBase64],
|
|
28564
|
+
stream: false
|
|
28565
|
+
}),
|
|
28566
|
+
signal: AbortSignal.timeout(6e4)
|
|
28567
|
+
});
|
|
28568
|
+
if (!res.ok && model === "moondream" && res.status === 404) {
|
|
28569
|
+
this.emit({ type: "status", content: `Pulling moondream vision model...`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
28570
|
+
try {
|
|
28571
|
+
const { execSync: execSync35 } = await import("node:child_process");
|
|
28572
|
+
execSync35("ollama pull moondream", { timeout: 3e5, stdio: "pipe" });
|
|
28573
|
+
res = await fetch(`${ollamaHost}/api/generate`, {
|
|
28574
|
+
method: "POST",
|
|
28575
|
+
headers: { "Content-Type": "application/json" },
|
|
28576
|
+
body: JSON.stringify({
|
|
28577
|
+
model,
|
|
28578
|
+
prompt: "Describe this image in detail. Include text content, UI elements, code, errors, and any relevant visual information.",
|
|
28579
|
+
images: [rawBase64],
|
|
28580
|
+
stream: false
|
|
28581
|
+
}),
|
|
28582
|
+
signal: AbortSignal.timeout(6e4)
|
|
28583
|
+
});
|
|
28584
|
+
} catch {
|
|
28585
|
+
}
|
|
28586
|
+
}
|
|
28587
|
+
if (res.ok) {
|
|
28588
|
+
const data = await res.json();
|
|
28589
|
+
const description = data.response?.trim();
|
|
28590
|
+
if (description && description.length > 20) {
|
|
28591
|
+
const msg = messages[entry.msgIdx];
|
|
28592
|
+
const parts = msg.content;
|
|
28593
|
+
parts[entry.partIdx] = {
|
|
28594
|
+
type: "text",
|
|
28595
|
+
text: `[Image description from vision model]:
|
|
28596
|
+
${description}`
|
|
28597
|
+
};
|
|
28598
|
+
described = true;
|
|
28599
|
+
this.emit({ type: "status", content: `Image described (${description.length} chars) \u2014 replacing inline`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
28600
|
+
}
|
|
28601
|
+
}
|
|
28602
|
+
} catch {
|
|
28603
|
+
}
|
|
28604
|
+
}
|
|
28605
|
+
if (described) {
|
|
28606
|
+
for (const msg of messages) {
|
|
28607
|
+
if (Array.isArray(msg.content)) {
|
|
28608
|
+
const parts = msg.content;
|
|
28609
|
+
const allText = parts.every((p) => p.type === "text");
|
|
28610
|
+
if (allText && parts.length === 1 && parts[0].text) {
|
|
28611
|
+
msg.content = parts[0].text;
|
|
28612
|
+
}
|
|
28613
|
+
}
|
|
28614
|
+
}
|
|
28615
|
+
chatRequest.messages = messages;
|
|
28616
|
+
this.emit({ type: "status", content: `Images replaced with descriptions \u2014 retrying`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
28617
|
+
return true;
|
|
28618
|
+
}
|
|
28619
|
+
this.emit({ type: "status", content: `No vision model available \u2014 stripping images (text context preserved)`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
28620
|
+
for (const msg of messages) {
|
|
28621
|
+
if (Array.isArray(msg.content)) {
|
|
28622
|
+
const textParts = msg.content.filter((p) => p.type !== "image_url");
|
|
28623
|
+
if (textParts.length > 0) {
|
|
28624
|
+
msg.content = textParts.length === 1 && textParts[0].text ? textParts[0].text : textParts;
|
|
28625
|
+
} else {
|
|
28626
|
+
msg.content = "[Image was provided but could not be processed \u2014 no vision model available]";
|
|
28627
|
+
}
|
|
28628
|
+
}
|
|
28629
|
+
}
|
|
28630
|
+
chatRequest.messages = messages;
|
|
28631
|
+
return true;
|
|
28632
|
+
}
|
|
28447
28633
|
/**
|
|
28448
28634
|
* Retry a failed model request up to 3 times with exponential backoff.
|
|
28449
28635
|
* Returns the response on success, or null if all retries failed.
|
package/package.json
CHANGED