open-agents-ai 0.187.593 → 0.187.594
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +147 -37
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -503992,6 +503992,13 @@ ${cameras.join("\n")}`,
|
|
|
503992
503992
|
}
|
|
503993
503993
|
const data = readFileSync29(filePath);
|
|
503994
503994
|
const sizeKB = Math.round(data.length / 1024);
|
|
503995
|
+
const outputPath = userOutputPath ?? join55(process.cwd(), ".oa", "camera-captures", `capture-${Date.now()}.jpg`);
|
|
503996
|
+
if (!userOutputPath) {
|
|
503997
|
+
mkdirSync14(join55(process.cwd(), ".oa", "camera-captures"), {
|
|
503998
|
+
recursive: true
|
|
503999
|
+
});
|
|
504000
|
+
writeFileSync15(outputPath, data);
|
|
504001
|
+
}
|
|
503995
504002
|
if (userOutputPath) {
|
|
503996
504003
|
return {
|
|
503997
504004
|
success: true,
|
|
@@ -504005,12 +504012,13 @@ Saved to: ${userOutputPath}`,
|
|
|
504005
504012
|
} catch {
|
|
504006
504013
|
}
|
|
504007
504014
|
const base642 = data.toString("base64");
|
|
504015
|
+
const display = `Captured ${resolution} frame from ${source} (${sizeKB}KB JPEG).
|
|
504016
|
+
Saved to: ${outputPath}`;
|
|
504008
504017
|
return {
|
|
504009
504018
|
success: true,
|
|
504010
|
-
output:
|
|
504011
|
-
|
|
504012
|
-
|
|
504013
|
-
data:image/jpeg;base64,${base642}`,
|
|
504019
|
+
output: display,
|
|
504020
|
+
llmContent: `${display}
|
|
504021
|
+
[IMAGE_BASE64:image/jpeg:${base642}]`,
|
|
504014
504022
|
durationMs: performance.now() - start2
|
|
504015
504023
|
};
|
|
504016
504024
|
}
|
|
@@ -531235,19 +531243,22 @@ TASK: ${task}` : task;
|
|
|
531235
531243
|
web_fetch: 4,
|
|
531236
531244
|
list_directory: 12,
|
|
531237
531245
|
find_files: 10,
|
|
531238
|
-
grep_search: 12
|
|
531246
|
+
grep_search: 12,
|
|
531247
|
+
camera_capture: 3
|
|
531239
531248
|
} : loopTier === "medium" ? {
|
|
531240
531249
|
web_search: 10,
|
|
531241
531250
|
web_fetch: 8,
|
|
531242
531251
|
list_directory: 18,
|
|
531243
531252
|
find_files: 14,
|
|
531244
|
-
grep_search: 18
|
|
531253
|
+
grep_search: 18,
|
|
531254
|
+
camera_capture: 4
|
|
531245
531255
|
} : {
|
|
531246
531256
|
web_search: 20,
|
|
531247
531257
|
web_fetch: 15,
|
|
531248
531258
|
list_directory: 30,
|
|
531249
531259
|
find_files: 20,
|
|
531250
|
-
grep_search: 30
|
|
531260
|
+
grep_search: 30,
|
|
531261
|
+
camera_capture: 5
|
|
531251
531262
|
};
|
|
531252
531263
|
for (const [tool, budget] of Object.entries(toolBudgets)) {
|
|
531253
531264
|
toolCallBudget.set(tool, budget);
|
|
@@ -534464,6 +534475,9 @@ Respond with EXACTLY this structure before your next tool call:
|
|
|
534464
534475
|
};
|
|
534465
534476
|
}
|
|
534466
534477
|
}
|
|
534478
|
+
if (result.success) {
|
|
534479
|
+
result = await this.offloadEmbeddedImageResult(result, tc.name, turn);
|
|
534480
|
+
}
|
|
534467
534481
|
let output = this.normalizeToolOutput(result, tc.name, tc.arguments, turn);
|
|
534468
534482
|
if (!result.success && (this.options.modelTier === "small" || this.options.modelTier === "medium")) {
|
|
534469
534483
|
const recovery = this.buildRecoveryGuidance(tc.name, result.error ?? "", tc.arguments);
|
|
@@ -536881,25 +536895,14 @@ Integrate this guidance into your current approach. Continue working on the task
|
|
|
536881
536895
|
turn,
|
|
536882
536896
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
536883
536897
|
});
|
|
536884
|
-
const
|
|
536885
|
-
|
|
536886
|
-
this.describeImageViaVisionSubagent(imageUrl, textContent),
|
|
536887
|
-
tmpImgPath ? this.extractImageOcrText(tmpImgPath) : Promise.resolve("")
|
|
536888
|
-
]);
|
|
536889
|
-
const visionDesc = visionOutcome.status === "fulfilled" ? visionOutcome.value.trim() : "";
|
|
536890
|
-
const ocrText = ocrOutcome.status === "fulfilled" ? ocrOutcome.value.trim() : "";
|
|
536891
|
-
if (visionDesc || ocrText) {
|
|
536892
|
-
const sections = [];
|
|
536893
|
-
if (visionDesc)
|
|
536894
|
-
sections.push(`[Image analysis]: ${visionDesc}`);
|
|
536895
|
-
if (ocrText)
|
|
536896
|
-
sections.push(`[OCR extracted text]: ${ocrText}`);
|
|
536898
|
+
const analysis = await this.analyzeImageDataForContext(mime, base642, textContent);
|
|
536899
|
+
if (analysis.contextBlock) {
|
|
536897
536900
|
const userPrefix = textContent ? `[User added context]: ${textContent}
|
|
536898
536901
|
|
|
536899
536902
|
` : "[User shared an image]. ";
|
|
536900
536903
|
messages2.push({
|
|
536901
536904
|
role: "user",
|
|
536902
|
-
content: userPrefix +
|
|
536905
|
+
content: userPrefix + analysis.contextBlock + "\n\nIntegrate this visual information into your current approach."
|
|
536903
536906
|
});
|
|
536904
536907
|
this.emit({
|
|
536905
536908
|
type: "status",
|
|
@@ -536909,7 +536912,7 @@ Integrate this guidance into your current approach. Continue working on the task
|
|
|
536909
536912
|
});
|
|
536910
536913
|
return;
|
|
536911
536914
|
}
|
|
536912
|
-
const reason =
|
|
536915
|
+
const reason = analysis.errorReason || "vision and OCR returned no text";
|
|
536913
536916
|
this.emit({
|
|
536914
536917
|
type: "status",
|
|
536915
536918
|
content: `Image offload unavailable (${reason}); falling back to inline image`,
|
|
@@ -536918,6 +536921,71 @@ Integrate this guidance into your current approach. Continue working on the task
|
|
|
536918
536921
|
});
|
|
536919
536922
|
this.appendInlineImageMessage(messages2, imageUrl, textContent);
|
|
536920
536923
|
}
|
|
536924
|
+
async offloadEmbeddedImageResult(result, toolName, turn) {
|
|
536925
|
+
const modelSource = result.llmContent ?? result.output;
|
|
536926
|
+
const image = this.extractFirstEmbeddedImage(modelSource);
|
|
536927
|
+
if (!image)
|
|
536928
|
+
return result;
|
|
536929
|
+
this.emit({
|
|
536930
|
+
type: "status",
|
|
536931
|
+
content: `${toolName}: offloading embedded image analysis outside main context`,
|
|
536932
|
+
turn,
|
|
536933
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
536934
|
+
});
|
|
536935
|
+
const analysis = await this.analyzeImageDataForContext(image.mime, image.base64, image.textWithoutImage.slice(0, 2e3));
|
|
536936
|
+
const imageNote = analysis.contextBlock ? `${analysis.contextBlock}
|
|
536937
|
+
|
|
536938
|
+
Use this image analysis. Do not repeat ${toolName} with the same arguments unless the scene has changed.` : `[Embedded image data omitted from model context; ${analysis.errorReason || "vision and OCR returned no text"}. Use any saved image path above with vision/image_read if further inspection is needed.]`;
|
|
536939
|
+
return {
|
|
536940
|
+
...result,
|
|
536941
|
+
llmContent: `${image.textWithoutImage.trim()}
|
|
536942
|
+
|
|
536943
|
+
${imageNote}`.trim()
|
|
536944
|
+
};
|
|
536945
|
+
}
|
|
536946
|
+
extractFirstEmbeddedImage(text) {
|
|
536947
|
+
const markerPattern = /\[IMAGE_BASE64:([^:\]]+):([^\]]+)\]/;
|
|
536948
|
+
const markerMatch = text.match(markerPattern);
|
|
536949
|
+
if (markerMatch) {
|
|
536950
|
+
const mime2 = markerMatch[1];
|
|
536951
|
+
const base643 = markerMatch[2];
|
|
536952
|
+
return {
|
|
536953
|
+
mime: mime2,
|
|
536954
|
+
base64: base643,
|
|
536955
|
+
textWithoutImage: text.replace(markerPattern, `[image data omitted: ${mime2}, ${base643.length} base64 chars]`).trim()
|
|
536956
|
+
};
|
|
536957
|
+
}
|
|
536958
|
+
const dataUrlPattern = /data:(image\/[a-zA-Z0-9.+-]+);base64,([A-Za-z0-9+/=]+)/;
|
|
536959
|
+
const dataUrlMatch = text.match(dataUrlPattern);
|
|
536960
|
+
if (!dataUrlMatch)
|
|
536961
|
+
return null;
|
|
536962
|
+
const mime = dataUrlMatch[1];
|
|
536963
|
+
const base642 = dataUrlMatch[2];
|
|
536964
|
+
return {
|
|
536965
|
+
mime,
|
|
536966
|
+
base64: base642,
|
|
536967
|
+
textWithoutImage: text.replace(dataUrlPattern, `[image data omitted: ${mime}, ${base642.length} base64 chars]`).trim()
|
|
536968
|
+
};
|
|
536969
|
+
}
|
|
536970
|
+
async analyzeImageDataForContext(mime, base642, textContent) {
|
|
536971
|
+
const imageUrl = `data:${mime};base64,${base642}`;
|
|
536972
|
+
const tmpImgPath = this.writeTempImageForOcr(mime, base642);
|
|
536973
|
+
const [visionOutcome, ocrOutcome] = await Promise.allSettled([
|
|
536974
|
+
this.describeImageViaVisionSubagent(imageUrl, textContent),
|
|
536975
|
+
tmpImgPath ? this.extractImageOcrText(tmpImgPath) : Promise.resolve("")
|
|
536976
|
+
]);
|
|
536977
|
+
const visionDesc = visionOutcome.status === "fulfilled" ? visionOutcome.value.trim() : "";
|
|
536978
|
+
const ocrText = ocrOutcome.status === "fulfilled" ? ocrOutcome.value.trim() : "";
|
|
536979
|
+
const sections = [];
|
|
536980
|
+
if (visionDesc)
|
|
536981
|
+
sections.push(`[Image analysis]: ${visionDesc}`);
|
|
536982
|
+
if (ocrText)
|
|
536983
|
+
sections.push(`[OCR extracted text]: ${ocrText}`);
|
|
536984
|
+
if (sections.length > 0)
|
|
536985
|
+
return { contextBlock: sections.join("\n\n") };
|
|
536986
|
+
const errorReason = visionOutcome.status === "rejected" ? String(visionOutcome.reason?.message ?? visionOutcome.reason) : void 0;
|
|
536987
|
+
return { contextBlock: "", errorReason };
|
|
536988
|
+
}
|
|
536921
536989
|
async describeImageViaVisionSubagent(imageUrl, textContent) {
|
|
536922
536990
|
const visionMessages = [
|
|
536923
536991
|
{
|
|
@@ -591936,6 +592004,36 @@ function normalizeTelegramMedia(message2) {
|
|
|
591936
592004
|
}
|
|
591937
592005
|
return void 0;
|
|
591938
592006
|
}
|
|
592007
|
+
function telegramMediaIsImage(media) {
|
|
592008
|
+
if (media.type === "photo") return true;
|
|
592009
|
+
if (media.mimeType?.toLowerCase().startsWith("image/")) return true;
|
|
592010
|
+
return /\.(png|jpe?g|gif|webp|bmp|tiff?)$/i.test(media.fileName ?? "");
|
|
592011
|
+
}
|
|
592012
|
+
function telegramImageExtension(media) {
|
|
592013
|
+
const fileName = media.fileName ?? "";
|
|
592014
|
+
const dotIdx = fileName.lastIndexOf(".");
|
|
592015
|
+
if (dotIdx >= 0) {
|
|
592016
|
+
const ext = fileName.slice(dotIdx).toLowerCase();
|
|
592017
|
+
if (/^\.(png|jpe?g|gif|webp|bmp|tiff?)$/.test(ext)) return ext;
|
|
592018
|
+
}
|
|
592019
|
+
const mime = media.mimeType?.toLowerCase() ?? "";
|
|
592020
|
+
if (mime.includes("png")) return ".png";
|
|
592021
|
+
if (mime.includes("webp")) return ".webp";
|
|
592022
|
+
if (mime.includes("gif")) return ".gif";
|
|
592023
|
+
if (mime.includes("bmp")) return ".bmp";
|
|
592024
|
+
if (mime.includes("tiff")) return ".tif";
|
|
592025
|
+
return ".jpg";
|
|
592026
|
+
}
|
|
592027
|
+
function telegramImageMime(media) {
|
|
592028
|
+
if (media.mimeType?.toLowerCase().startsWith("image/")) return media.mimeType;
|
|
592029
|
+
const ext = telegramImageExtension(media);
|
|
592030
|
+
if (ext === ".png") return "image/png";
|
|
592031
|
+
if (ext === ".webp") return "image/webp";
|
|
592032
|
+
if (ext === ".gif") return "image/gif";
|
|
592033
|
+
if (ext === ".bmp") return "image/bmp";
|
|
592034
|
+
if (ext === ".tif" || ext === ".tiff") return "image/tiff";
|
|
592035
|
+
return "image/jpeg";
|
|
592036
|
+
}
|
|
591939
592037
|
function normalizeTelegramUpdate(update2) {
|
|
591940
592038
|
const sourceUpdateType = update2.guest_message ? "guest_message" : update2.message ? "message" : null;
|
|
591941
592039
|
if (!sourceUpdateType) return null;
|
|
@@ -592794,12 +592892,22 @@ Join: ${newUrl}`);
|
|
|
592794
592892
|
}
|
|
592795
592893
|
const existing = this.subAgents.get(sessionKey);
|
|
592796
592894
|
if (existing && !existing.aborted) {
|
|
592797
|
-
|
|
592895
|
+
let steeringText = msg.text;
|
|
592896
|
+
if (msg.media) {
|
|
592897
|
+
const mediaContext = await this.processMedia(msg);
|
|
592898
|
+
if (mediaContext) {
|
|
592899
|
+
steeringText += `
|
|
592900
|
+
|
|
592901
|
+
[Media attached — processed content below]
|
|
592902
|
+
${mediaContext}`;
|
|
592903
|
+
}
|
|
592904
|
+
}
|
|
592905
|
+
this.recordChatHistory(sessionKey, { role: "user", text: steeringText, mode: "steering" });
|
|
592798
592906
|
if (existing.runner) {
|
|
592799
|
-
existing.runner.injectUserMessage(
|
|
592907
|
+
existing.runner.injectUserMessage(steeringText);
|
|
592800
592908
|
this.tuiWrite(() => renderTelegramSubAgentEvent(msg.username, "mid-conversation steering injected"));
|
|
592801
592909
|
} else {
|
|
592802
|
-
existing.pendingMessages.push(
|
|
592910
|
+
existing.pendingMessages.push(steeringText);
|
|
592803
592911
|
this.tuiWrite(() => renderTelegramSubAgentEvent(msg.username, `queued (${existing.pendingMessages.length} pending)`));
|
|
592804
592912
|
}
|
|
592805
592913
|
return;
|
|
@@ -593494,8 +593602,9 @@ Todo/session id: ${sessionContext.sessionId}` : `Telegram ${isGroup ? "group" :
|
|
|
593494
593602
|
async processMedia(msg) {
|
|
593495
593603
|
if (!msg.media) return "";
|
|
593496
593604
|
const { type, fileId, fileUniqueId, mimeType, caption } = msg.media;
|
|
593605
|
+
const isImageMedia = telegramMediaIsImage(msg.media);
|
|
593497
593606
|
let ext = ".bin";
|
|
593498
|
-
if (
|
|
593607
|
+
if (isImageMedia) ext = telegramImageExtension(msg.media);
|
|
593499
593608
|
else if (type === "audio" || type === "voice") ext = ".ogg";
|
|
593500
593609
|
else if (type === "video" || type === "video_note" || type === "live_photo") ext = ".mp4";
|
|
593501
593610
|
else if (msg.media.fileName) {
|
|
@@ -593526,23 +593635,27 @@ Todo/session id: ${sessionContext.sessionId}` : `Telegram ${isGroup ? "group" :
|
|
|
593526
593635
|
username: msg.username
|
|
593527
593636
|
});
|
|
593528
593637
|
let description = `[${type}${caption ? `: ${caption}` : ""}]`;
|
|
593529
|
-
if (
|
|
593638
|
+
if (isImageMedia) {
|
|
593530
593639
|
let visionContext = "";
|
|
593531
593640
|
try {
|
|
593532
593641
|
const { runVisionIngress: runVisionIngress2, formatImageContextPrefix: formatImageContextPrefix2 } = await Promise.resolve().then(() => (init_vision_ingress(), vision_ingress_exports));
|
|
593533
593642
|
const ingressResult = await runVisionIngress2(
|
|
593534
|
-
{
|
|
593535
|
-
|
|
593643
|
+
{
|
|
593644
|
+
path: localPath,
|
|
593645
|
+
buffer: readFileSync84(localPath),
|
|
593646
|
+
mime: telegramImageMime(msg.media)
|
|
593647
|
+
},
|
|
593648
|
+
this.agentConfig?.model ?? ""
|
|
593536
593649
|
);
|
|
593537
593650
|
visionContext = formatImageContextPrefix2(ingressResult);
|
|
593538
593651
|
cacheEntry.extractedContent = ingressResult.contextBlock;
|
|
593539
593652
|
} catch {
|
|
593540
593653
|
}
|
|
593541
593654
|
if (visionContext) {
|
|
593542
|
-
description = `[
|
|
593655
|
+
description = `[Image received: ${localPath}${caption ? ` — caption: "${caption}"` : ""}
|
|
593543
593656
|
${visionContext}]`;
|
|
593544
593657
|
} else {
|
|
593545
|
-
description = `[
|
|
593658
|
+
description = `[Image received and saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}. You can use image_read or vision tools to analyze it if available.]`;
|
|
593546
593659
|
}
|
|
593547
593660
|
try {
|
|
593548
593661
|
await fetch("http://127.0.0.1:11435/v1/memory/ingest", {
|
|
@@ -593553,8 +593666,7 @@ ${visionContext}]`;
|
|
|
593553
593666
|
});
|
|
593554
593667
|
} catch {
|
|
593555
593668
|
}
|
|
593556
|
-
}
|
|
593557
|
-
if (type === "audio" || type === "voice") {
|
|
593669
|
+
} else if (type === "audio" || type === "voice") {
|
|
593558
593670
|
let transcription = null;
|
|
593559
593671
|
try {
|
|
593560
593672
|
const { getListenEngine: getListenEngine2 } = await Promise.resolve().then(() => (init_listen(), listen_exports));
|
|
@@ -593580,12 +593692,10 @@ ${visionContext}]`;
|
|
|
593580
593692
|
});
|
|
593581
593693
|
} catch {
|
|
593582
593694
|
}
|
|
593583
|
-
}
|
|
593584
|
-
if (type === "video" || type === "video_note" || type === "live_photo") {
|
|
593695
|
+
} else if (type === "video" || type === "video_note" || type === "live_photo") {
|
|
593585
593696
|
const label = type === "live_photo" ? "Live photo" : "Video";
|
|
593586
593697
|
description = `[${label} received and saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}.]`;
|
|
593587
|
-
}
|
|
593588
|
-
if (type === "document") {
|
|
593698
|
+
} else if (type === "document") {
|
|
593589
593699
|
description = `[Document received: ${msg.media.fileName || "unnamed"}${mimeType ? ` (${mimeType})` : ""}, saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}.]`;
|
|
593590
593700
|
}
|
|
593591
593701
|
cacheEntry.extractedContent = description;
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "open-agents-ai",
|
|
3
|
-
"version": "0.187.
|
|
3
|
+
"version": "0.187.594",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "open-agents-ai",
|
|
9
|
-
"version": "0.187.
|
|
9
|
+
"version": "0.187.594",
|
|
10
10
|
"hasInstallScript": true,
|
|
11
11
|
"license": "CC-BY-NC-4.0",
|
|
12
12
|
"dependencies": {
|
package/package.json
CHANGED