open-agents-ai 0.185.28 → 0.185.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +177 -22
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -26415,29 +26415,18 @@ If you're stuck, try a completely different approach. Do NOT repeat what failed
|
|
|
26415
26415
|
}
|
|
26416
26416
|
let imageRecovered = false;
|
|
26417
26417
|
if (/invalid image|image.*invalid|image_url.*unsupported|does not support.*image|image.*not supported/i.test(errMsg)) {
|
|
26418
|
-
this.
|
|
26419
|
-
|
|
26420
|
-
|
|
26421
|
-
|
|
26422
|
-
|
|
26423
|
-
|
|
26424
|
-
|
|
26425
|
-
|
|
26426
|
-
|
|
26427
|
-
|
|
26428
|
-
} else {
|
|
26429
|
-
msg2.content = "[Image was here but backend doesn't support images]";
|
|
26430
|
-
}
|
|
26418
|
+
imageRecovered = await this._recoverFromImageError(messages, chatRequest, turn);
|
|
26419
|
+
if (imageRecovered) {
|
|
26420
|
+
try {
|
|
26421
|
+
const imgRetry = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
|
|
26422
|
+
response = imgRetry;
|
|
26423
|
+
} catch (imgRetryErr) {
|
|
26424
|
+
const msg2 = imgRetryErr instanceof Error ? imgRetryErr.message : String(imgRetryErr);
|
|
26425
|
+
this.emit({ type: "error", content: `Retry after image fallback also failed: ${msg2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
26426
|
+
imageRecovered = false;
|
|
26427
|
+
break;
|
|
26431
26428
|
}
|
|
26432
|
-
}
|
|
26433
|
-
chatRequest.messages = messages;
|
|
26434
|
-
try {
|
|
26435
|
-
const imgRetry = this.options.streamEnabled && this.hasStreamingSupport() ? await this.streamingRequest(chatRequest, turn) : await this.backend.chatCompletion(chatRequest);
|
|
26436
|
-
response = imgRetry;
|
|
26437
|
-
imageRecovered = true;
|
|
26438
|
-
} catch (imgRetryErr) {
|
|
26439
|
-
const msg2 = imgRetryErr instanceof Error ? imgRetryErr.message : String(imgRetryErr);
|
|
26440
|
-
this.emit({ type: "error", content: `Retry without images also failed: ${msg2}`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
26429
|
+
} else {
|
|
26441
26430
|
break;
|
|
26442
26431
|
}
|
|
26443
26432
|
}
|
|
@@ -28475,6 +28464,172 @@ ${transcript}`
|
|
|
28475
28464
|
return true;
|
|
28476
28465
|
return false;
|
|
28477
28466
|
}
|
|
28467
|
+
/**
|
|
28468
|
+
* Graceful image error recovery chain:
|
|
28469
|
+
* 1. Downconvert images (resize to ≤512px, JPEG compress) and retry inline
|
|
28470
|
+
* 2. Describe images via moondream/Ollama vision → replace image_url with text description
|
|
28471
|
+
* 3. Last resort: strip images, keep text context
|
|
28472
|
+
*
|
|
28473
|
+
* Mutates messages in-place. Returns true if messages were successfully transformed.
|
|
28474
|
+
*/
|
|
28475
|
+
async _recoverFromImageError(messages, chatRequest, turn) {
|
|
28476
|
+
const imageEntries = [];
|
|
28477
|
+
for (let mi = 0; mi < messages.length; mi++) {
|
|
28478
|
+
const msg = messages[mi];
|
|
28479
|
+
if (!Array.isArray(msg.content))
|
|
28480
|
+
continue;
|
|
28481
|
+
for (let pi = 0; pi < msg.content.length; pi++) {
|
|
28482
|
+
const part = msg.content[pi];
|
|
28483
|
+
if (part.type === "image_url" && part.image_url?.url) {
|
|
28484
|
+
imageEntries.push({ msgIdx: mi, partIdx: pi, dataUrl: part.image_url.url });
|
|
28485
|
+
}
|
|
28486
|
+
}
|
|
28487
|
+
}
|
|
28488
|
+
if (imageEntries.length === 0)
|
|
28489
|
+
return false;
|
|
28490
|
+
this.emit({ type: "status", content: `Image rejected \u2014 trying downconversion (${imageEntries.length} image(s))...`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
28491
|
+
let downconverted = false;
|
|
28492
|
+
try {
|
|
28493
|
+
for (const entry of imageEntries) {
|
|
28494
|
+
const { dataUrl } = entry;
|
|
28495
|
+
if (!dataUrl.startsWith("data:"))
|
|
28496
|
+
continue;
|
|
28497
|
+
const commaIdx = dataUrl.indexOf(",");
|
|
28498
|
+
if (commaIdx < 0)
|
|
28499
|
+
continue;
|
|
28500
|
+
const rawBase64 = dataUrl.slice(commaIdx + 1);
|
|
28501
|
+
const buffer = Buffer.from(rawBase64, "base64");
|
|
28502
|
+
let resizedBase64 = null;
|
|
28503
|
+
try {
|
|
28504
|
+
const { execSync: execSync35 } = await import("node:child_process");
|
|
28505
|
+
const { writeFileSync: writeFileSync30, readFileSync: readFileSync45, unlinkSync: unlinkSync13 } = await import("node:fs");
|
|
28506
|
+
const { join: join77 } = await import("node:path");
|
|
28507
|
+
const { tmpdir: tmpdir11 } = await import("node:os");
|
|
28508
|
+
const tmpIn = join77(tmpdir11(), `oa_img_in_${Date.now()}.png`);
|
|
28509
|
+
const tmpOut = join77(tmpdir11(), `oa_img_out_${Date.now()}.jpg`);
|
|
28510
|
+
writeFileSync30(tmpIn, buffer);
|
|
28511
|
+
execSync35(`python3 -c "
|
|
28512
|
+
from PIL import Image
|
|
28513
|
+
img = Image.open('${tmpIn}')
|
|
28514
|
+
img.thumbnail((512, 512), Image.LANCZOS)
|
|
28515
|
+
img = img.convert('RGB')
|
|
28516
|
+
img.save('${tmpOut}', 'JPEG', quality=75)
|
|
28517
|
+
"`, { timeout: 1e4, stdio: "pipe" });
|
|
28518
|
+
const resizedBuf = readFileSync45(tmpOut);
|
|
28519
|
+
resizedBase64 = `data:image/jpeg;base64,${resizedBuf.toString("base64")}`;
|
|
28520
|
+
try {
|
|
28521
|
+
unlinkSync13(tmpIn);
|
|
28522
|
+
} catch {
|
|
28523
|
+
}
|
|
28524
|
+
try {
|
|
28525
|
+
unlinkSync13(tmpOut);
|
|
28526
|
+
} catch {
|
|
28527
|
+
}
|
|
28528
|
+
} catch {
|
|
28529
|
+
}
|
|
28530
|
+
if (resizedBase64) {
|
|
28531
|
+
const msg = messages[entry.msgIdx];
|
|
28532
|
+
const parts = msg.content;
|
|
28533
|
+
parts[entry.partIdx] = { type: "image_url", image_url: { url: resizedBase64 } };
|
|
28534
|
+
downconverted = true;
|
|
28535
|
+
}
|
|
28536
|
+
}
|
|
28537
|
+
} catch {
|
|
28538
|
+
}
|
|
28539
|
+
if (downconverted) {
|
|
28540
|
+
chatRequest.messages = messages;
|
|
28541
|
+
this.emit({ type: "status", content: `Downconverted images to 512px JPEG \u2014 retrying`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
28542
|
+
return true;
|
|
28543
|
+
}
|
|
28544
|
+
this.emit({ type: "status", content: `Downconversion unavailable \u2014 describing images via vision model...`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
28545
|
+
const ollamaHost = process.env["OLLAMA_HOST"] || "http://127.0.0.1:11434";
|
|
28546
|
+
let described = false;
|
|
28547
|
+
for (const entry of imageEntries) {
|
|
28548
|
+
const { dataUrl } = entry;
|
|
28549
|
+
if (!dataUrl.startsWith("data:"))
|
|
28550
|
+
continue;
|
|
28551
|
+
const commaIdx = dataUrl.indexOf(",");
|
|
28552
|
+
if (commaIdx < 0)
|
|
28553
|
+
continue;
|
|
28554
|
+
const rawBase64 = dataUrl.slice(commaIdx + 1);
|
|
28555
|
+
try {
|
|
28556
|
+
const model = process.env["OLLAMA_VISION_MODEL"] || "moondream";
|
|
28557
|
+
let res = await fetch(`${ollamaHost}/api/generate`, {
|
|
28558
|
+
method: "POST",
|
|
28559
|
+
headers: { "Content-Type": "application/json" },
|
|
28560
|
+
body: JSON.stringify({
|
|
28561
|
+
model,
|
|
28562
|
+
prompt: "Describe this image in detail. Include text content, UI elements, code, errors, and any relevant visual information.",
|
|
28563
|
+
images: [rawBase64],
|
|
28564
|
+
stream: false
|
|
28565
|
+
}),
|
|
28566
|
+
signal: AbortSignal.timeout(6e4)
|
|
28567
|
+
});
|
|
28568
|
+
if (!res.ok && model === "moondream" && res.status === 404) {
|
|
28569
|
+
this.emit({ type: "status", content: `Pulling moondream vision model...`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
28570
|
+
try {
|
|
28571
|
+
const { execSync: execSync35 } = await import("node:child_process");
|
|
28572
|
+
execSync35("ollama pull moondream", { timeout: 3e5, stdio: "pipe" });
|
|
28573
|
+
res = await fetch(`${ollamaHost}/api/generate`, {
|
|
28574
|
+
method: "POST",
|
|
28575
|
+
headers: { "Content-Type": "application/json" },
|
|
28576
|
+
body: JSON.stringify({
|
|
28577
|
+
model,
|
|
28578
|
+
prompt: "Describe this image in detail. Include text content, UI elements, code, errors, and any relevant visual information.",
|
|
28579
|
+
images: [rawBase64],
|
|
28580
|
+
stream: false
|
|
28581
|
+
}),
|
|
28582
|
+
signal: AbortSignal.timeout(6e4)
|
|
28583
|
+
});
|
|
28584
|
+
} catch {
|
|
28585
|
+
}
|
|
28586
|
+
}
|
|
28587
|
+
if (res.ok) {
|
|
28588
|
+
const data = await res.json();
|
|
28589
|
+
const description = data.response?.trim();
|
|
28590
|
+
if (description && description.length > 20) {
|
|
28591
|
+
const msg = messages[entry.msgIdx];
|
|
28592
|
+
const parts = msg.content;
|
|
28593
|
+
parts[entry.partIdx] = {
|
|
28594
|
+
type: "text",
|
|
28595
|
+
text: `[Image description from vision model]:
|
|
28596
|
+
${description}`
|
|
28597
|
+
};
|
|
28598
|
+
described = true;
|
|
28599
|
+
this.emit({ type: "status", content: `Image described (${description.length} chars) \u2014 replacing inline`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
28600
|
+
}
|
|
28601
|
+
}
|
|
28602
|
+
} catch {
|
|
28603
|
+
}
|
|
28604
|
+
}
|
|
28605
|
+
if (described) {
|
|
28606
|
+
for (const msg of messages) {
|
|
28607
|
+
if (Array.isArray(msg.content)) {
|
|
28608
|
+
const parts = msg.content;
|
|
28609
|
+
const allText = parts.every((p) => p.type === "text");
|
|
28610
|
+
if (allText && parts.length === 1 && parts[0].text) {
|
|
28611
|
+
msg.content = parts[0].text;
|
|
28612
|
+
}
|
|
28613
|
+
}
|
|
28614
|
+
}
|
|
28615
|
+
chatRequest.messages = messages;
|
|
28616
|
+
this.emit({ type: "status", content: `Images replaced with descriptions \u2014 retrying`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
28617
|
+
return true;
|
|
28618
|
+
}
|
|
28619
|
+
this.emit({ type: "status", content: `No vision model available \u2014 stripping images (text context preserved)`, timestamp: (/* @__PURE__ */ new Date()).toISOString() });
|
|
28620
|
+
for (const msg of messages) {
|
|
28621
|
+
if (Array.isArray(msg.content)) {
|
|
28622
|
+
const textParts = msg.content.filter((p) => p.type !== "image_url");
|
|
28623
|
+
if (textParts.length > 0) {
|
|
28624
|
+
msg.content = textParts.length === 1 && textParts[0].text ? textParts[0].text : textParts;
|
|
28625
|
+
} else {
|
|
28626
|
+
msg.content = "[Image was provided but could not be processed \u2014 no vision model available]";
|
|
28627
|
+
}
|
|
28628
|
+
}
|
|
28629
|
+
}
|
|
28630
|
+
chatRequest.messages = messages;
|
|
28631
|
+
return true;
|
|
28632
|
+
}
|
|
28478
28633
|
/**
|
|
28479
28634
|
* Retry a failed model request up to 3 times with exponential backoff.
|
|
28480
28635
|
* Returns the response on success, or null if all retries failed.
|
package/package.json
CHANGED