open-agents-ai 0.187.592 → 0.187.594

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -503992,6 +503992,13 @@ ${cameras.join("\n")}`,
503992
503992
  }
503993
503993
  const data = readFileSync29(filePath);
503994
503994
  const sizeKB = Math.round(data.length / 1024);
503995
+ const outputPath = userOutputPath ?? join55(process.cwd(), ".oa", "camera-captures", `capture-${Date.now()}.jpg`);
503996
+ if (!userOutputPath) {
503997
+ mkdirSync14(join55(process.cwd(), ".oa", "camera-captures"), {
503998
+ recursive: true
503999
+ });
504000
+ writeFileSync15(outputPath, data);
504001
+ }
503995
504002
  if (userOutputPath) {
503996
504003
  return {
503997
504004
  success: true,
@@ -504005,12 +504012,13 @@ Saved to: ${userOutputPath}`,
504005
504012
  } catch {
504006
504013
  }
504007
504014
  const base642 = data.toString("base64");
504015
+ const display = `Captured ${resolution} frame from ${source} (${sizeKB}KB JPEG).
504016
+ Saved to: ${outputPath}`;
504008
504017
  return {
504009
504018
  success: true,
504010
- output: `Captured ${resolution} frame from ${source} (${sizeKB}KB JPEG).
504011
-
504012
- Base64 image data (use with vision tools):
504013
- data:image/jpeg;base64,${base642}`,
504019
+ output: display,
504020
+ llmContent: `${display}
504021
+ [IMAGE_BASE64:image/jpeg:${base642}]`,
504014
504022
  durationMs: performance.now() - start2
504015
504023
  };
504016
504024
  }
@@ -527374,9 +527382,11 @@ RECOVERY: cd to the directory containing '${file}', run a plain install with no
527374
527382
  });
527375
527383
 
527376
527384
  // packages/orchestrator/dist/agenticRunner.js
527377
- import { existsSync as _fsExistsSync, readFileSync as _fsReadFileSync, writeFileSync as _fsWriteFileSync, mkdirSync as _fsMkdirSync } from "node:fs";
527385
+ import { existsSync as _fsExistsSync, readFileSync as _fsReadFileSync, writeFileSync as _fsWriteFileSync, unlinkSync as _fsUnlinkSync, mkdirSync as _fsMkdirSync } from "node:fs";
527386
+ import { execFile as _execFile } from "node:child_process";
527378
527387
  import { createHash as _createHash } from "node:crypto";
527379
527388
  import { join as _pathJoin } from "node:path";
527389
+ import { tmpdir as _osTmpdir } from "node:os";
527380
527390
  import { homedir as _osHomedir } from "node:os";
527381
527391
  import { z as z15 } from "zod";
527382
527392
  function repairJson(raw) {
@@ -531233,19 +531243,22 @@ TASK: ${task}` : task;
531233
531243
  web_fetch: 4,
531234
531244
  list_directory: 12,
531235
531245
  find_files: 10,
531236
- grep_search: 12
531246
+ grep_search: 12,
531247
+ camera_capture: 3
531237
531248
  } : loopTier === "medium" ? {
531238
531249
  web_search: 10,
531239
531250
  web_fetch: 8,
531240
531251
  list_directory: 18,
531241
531252
  find_files: 14,
531242
- grep_search: 18
531253
+ grep_search: 18,
531254
+ camera_capture: 4
531243
531255
  } : {
531244
531256
  web_search: 20,
531245
531257
  web_fetch: 15,
531246
531258
  list_directory: 30,
531247
531259
  find_files: 20,
531248
- grep_search: 30
531260
+ grep_search: 30,
531261
+ camera_capture: 5
531249
531262
  };
531250
531263
  for (const [tool, budget] of Object.entries(toolBudgets)) {
531251
531264
  toolCallBudget.set(tool, budget);
@@ -532289,45 +532302,7 @@ ${_staleSamples.join("\n")}` : ``,
532289
532302
  }
532290
532303
  while (this.pendingUserMessages.length > 0) {
532291
532304
  const userMsg = this.pendingUserMessages.shift();
532292
- const imagePattern = /\[IMAGE_BASE64:([^:]+):([^\]]+)\]/;
532293
- const imgMatch = userMsg.match(imagePattern);
532294
- if (imgMatch) {
532295
- const mime = imgMatch[1];
532296
- const base642 = imgMatch[2];
532297
- const textContent = userMsg.replace(imagePattern, "").trim();
532298
- const parts = [];
532299
- if (textContent) {
532300
- parts.push({
532301
- type: "text",
532302
- text: `[User added context]: ${textContent}
532303
-
532304
- Describe what you see and integrate this into your current approach.`
532305
- });
532306
- } else {
532307
- parts.push({
532308
- type: "text",
532309
- text: "[User shared an image]. Describe what you see and integrate this into your current approach."
532310
- });
532311
- }
532312
- parts.push({
532313
- type: "image_url",
532314
- image_url: { url: `data:${mime};base64,${base642}` }
532315
- });
532316
- messages2.push({ role: "user", content: parts });
532317
- } else {
532318
- messages2.push({
532319
- role: "user",
532320
- content: `[User added context]: ${userMsg}
532321
-
532322
- Integrate this guidance into your current approach. Continue working on the task.`
532323
- });
532324
- }
532325
- this.emit({
532326
- type: "user_interrupt",
532327
- content: userMsg.replace(/\[IMAGE_BASE64:[^\]]+\]/, "[image]").slice(0, 200),
532328
- turn,
532329
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
532330
- });
532305
+ await this.appendInjectedUserMessage(userMsg, messages2, turn);
532331
532306
  }
532332
532307
  {
532333
532308
  const maybeReminder = this.getTodoReminderContent(turn);
@@ -532648,12 +532623,28 @@ ${memoryLines.join("\n")}`
532648
532623
  timeoutMs: this.options.requestTimeoutMs
532649
532624
  };
532650
532625
  {
532626
+ const _imgB64Pat = /\[IMAGE_BASE64:[^\]]+\]/g;
532651
532627
  const ctxChars = compacted.reduce((s2, m2) => {
532652
- let c9 = typeof m2.content === "string" ? m2.content.length : 100;
532628
+ let c9 = 0;
532629
+ let imgCount = 0;
532630
+ if (typeof m2.content === "string") {
532631
+ const imgMatches = m2.content.match(_imgB64Pat);
532632
+ imgCount = imgMatches ? imgMatches.length : 0;
532633
+ c9 = m2.content.replace(_imgB64Pat, "").length;
532634
+ } else if (Array.isArray(m2.content)) {
532635
+ for (const p2 of m2.content) {
532636
+ if (p2.type === "text" && p2.text)
532637
+ c9 += p2.text.length;
532638
+ else if (p2.type === "image_url")
532639
+ imgCount++;
532640
+ }
532641
+ } else {
532642
+ c9 = 100;
532643
+ }
532653
532644
  if (m2.tool_calls)
532654
532645
  for (const tc of m2.tool_calls)
532655
532646
  c9 += tc.function.arguments?.length ?? 0;
532656
- return s2 + c9;
532647
+ return s2 + c9 + imgCount * 1500 * 4;
532657
532648
  }, 0);
532658
532649
  const estTokens = Math.ceil(ctxChars / 4);
532659
532650
  const limits = this.contextLimits();
@@ -532867,13 +532858,31 @@ ${memoryLines.join("\n")}`
532867
532858
  const choiceContent = response.choices[0]?.message?.content ?? "";
532868
532859
  const choiceArgs = response.choices[0]?.message?.toolCalls?.map((tc) => JSON.stringify(tc.arguments)).join("") ?? "";
532869
532860
  estimatedTokens += Math.ceil((choiceContent.length + choiceArgs.length) / 4);
532861
+ const IMAGE_TOKEN_ESTIMATE = 1500;
532862
+ const imageBase64Pattern = /\[IMAGE_BASE64:[^\]]+\]/g;
532870
532863
  const estimatedContextTokens = Math.ceil(compacted.reduce((sum, m2) => {
532871
- let chars = typeof m2.content === "string" ? m2.content.length : 100;
532864
+ let chars = 0;
532865
+ let imageCount = 0;
532866
+ if (typeof m2.content === "string") {
532867
+ const imageMatches = m2.content.match(imageBase64Pattern);
532868
+ imageCount = imageMatches ? imageMatches.length : 0;
532869
+ chars = m2.content.replace(imageBase64Pattern, "").length;
532870
+ } else if (Array.isArray(m2.content)) {
532871
+ for (const part of m2.content) {
532872
+ if (part.type === "text" && part.text) {
532873
+ chars += part.text.length;
532874
+ } else if (part.type === "image_url") {
532875
+ imageCount++;
532876
+ }
532877
+ }
532878
+ } else {
532879
+ chars = 100;
532880
+ }
532872
532881
  if (m2.tool_calls) {
532873
532882
  for (const tc of m2.tool_calls)
532874
532883
  chars += tc.function.arguments?.length ?? 0;
532875
532884
  }
532876
- return sum + chars;
532885
+ return sum + chars + imageCount * IMAGE_TOKEN_ESTIMATE * 4;
532877
532886
  }, 0) / 4);
532878
532887
  this.emit({
532879
532888
  type: "token_usage",
@@ -534466,6 +534475,9 @@ Respond with EXACTLY this structure before your next tool call:
534466
534475
  };
534467
534476
  }
534468
534477
  }
534478
+ if (result.success) {
534479
+ result = await this.offloadEmbeddedImageResult(result, tc.name, turn);
534480
+ }
534469
534481
  let output = this.normalizeToolOutput(result, tc.name, tc.arguments, turn);
534470
534482
  if (!result.success && (this.options.modelTier === "small" || this.options.modelTier === "medium")) {
534471
534483
  const recovery = this.buildRecoveryGuidance(tc.name, result.error ?? "", tc.arguments);
@@ -535327,45 +535339,7 @@ You have ${this.options.maxTurns} more turns. Continue making progress. Call tas
535327
535339
  }
535328
535340
  while (this.pendingUserMessages.length > 0) {
535329
535341
  const userMsg = this.pendingUserMessages.shift();
535330
- const imagePattern = /\[IMAGE_BASE64:([^:]+):([^\]]+)\]/;
535331
- const imgMatch = userMsg.match(imagePattern);
535332
- if (imgMatch) {
535333
- const mime = imgMatch[1];
535334
- const base642 = imgMatch[2];
535335
- const textContent = userMsg.replace(imagePattern, "").trim();
535336
- const parts = [];
535337
- if (textContent) {
535338
- parts.push({
535339
- type: "text",
535340
- text: `[User added context]: ${textContent}
535341
-
535342
- Describe what you see and integrate this into your current approach.`
535343
- });
535344
- } else {
535345
- parts.push({
535346
- type: "text",
535347
- text: "[User shared an image]. Describe what you see and integrate this into your current approach."
535348
- });
535349
- }
535350
- parts.push({
535351
- type: "image_url",
535352
- image_url: { url: `data:${mime};base64,${base642}` }
535353
- });
535354
- messages2.push({ role: "user", content: parts });
535355
- } else {
535356
- messages2.push({
535357
- role: "user",
535358
- content: `[User added context]: ${userMsg}
535359
-
535360
- Integrate this guidance into your current approach. Continue working on the task.`
535361
- });
535362
- }
535363
- this.emit({
535364
- type: "user_interrupt",
535365
- content: userMsg.replace(/\[IMAGE_BASE64:[^\]]+\]/, "[image]").slice(0, 200),
535366
- turn,
535367
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
535368
- });
535342
+ await this.appendInjectedUserMessage(userMsg, messages2, turn);
535369
535343
  }
535370
535344
  let compactedMsgs;
535371
535345
  if (this._pendingCompaction) {
@@ -535440,13 +535414,29 @@ Integrate this guidance into your current approach. Continue working on the task
535440
535414
  const choiceContent2 = response.choices[0]?.message?.content ?? "";
535441
535415
  const choiceArgs2 = response.choices[0]?.message?.toolCalls?.map((tc) => JSON.stringify(tc.arguments)).join("") ?? "";
535442
535416
  estimatedTokens += Math.ceil((choiceContent2.length + choiceArgs2.length) / 4);
535417
+ const _bfImgPat = /\[IMAGE_BASE64:[^\]]+\]/g;
535443
535418
  const bfEstCtx = Math.ceil(compactedMsgs.reduce((sum, m2) => {
535444
- let chars = typeof m2.content === "string" ? m2.content.length : 100;
535419
+ let chars = 0;
535420
+ let imgCount = 0;
535421
+ if (typeof m2.content === "string") {
535422
+ const imgMatches = m2.content.match(_bfImgPat);
535423
+ imgCount = imgMatches ? imgMatches.length : 0;
535424
+ chars = m2.content.replace(_bfImgPat, "").length;
535425
+ } else if (Array.isArray(m2.content)) {
535426
+ for (const p2 of m2.content) {
535427
+ if (p2.type === "text" && p2.text)
535428
+ chars += p2.text.length;
535429
+ else if (p2.type === "image_url")
535430
+ imgCount++;
535431
+ }
535432
+ } else {
535433
+ chars = 100;
535434
+ }
535445
535435
  if (m2.tool_calls) {
535446
535436
  for (const tc of m2.tool_calls)
535447
535437
  chars += tc.function.arguments?.length ?? 0;
535448
535438
  }
535449
- return sum + chars;
535439
+ return sum + chars + imgCount * 1500 * 4;
535450
535440
  }, 0) / 4);
535451
535441
  this.emit({
535452
535442
  type: "token_usage",
@@ -536874,18 +536864,241 @@ ${tail}`;
536874
536864
  }
536875
536865
  return folded;
536876
536866
  }
536867
+ async appendInjectedUserMessage(userMsg, messages2, turn) {
536868
+ const imagePattern = /\[IMAGE_BASE64:([^:]+):([^\]]+)\]/;
536869
+ const imgMatch = userMsg.match(imagePattern);
536870
+ if (imgMatch) {
536871
+ const mime = imgMatch[1];
536872
+ const base642 = imgMatch[2];
536873
+ const textContent = userMsg.replace(imagePattern, "").trim();
536874
+ await this.appendOffloadedImageMessage(messages2, mime, base642, textContent, turn);
536875
+ } else {
536876
+ messages2.push({
536877
+ role: "user",
536878
+ content: `[User added context]: ${userMsg}
536879
+
536880
+ Integrate this guidance into your current approach. Continue working on the task.`
536881
+ });
536882
+ }
536883
+ this.emit({
536884
+ type: "user_interrupt",
536885
+ content: userMsg.replace(/\[IMAGE_BASE64:[^\]]+\]/, "[image]").slice(0, 200),
536886
+ turn,
536887
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
536888
+ });
536889
+ }
536890
+ async appendOffloadedImageMessage(messages2, mime, base642, textContent, turn) {
536891
+ const imageUrl = `data:${mime};base64,${base642}`;
536892
+ this.emit({
536893
+ type: "status",
536894
+ content: "Image received; offloading visual analysis outside main context",
536895
+ turn,
536896
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
536897
+ });
536898
+ const analysis = await this.analyzeImageDataForContext(mime, base642, textContent);
536899
+ if (analysis.contextBlock) {
536900
+ const userPrefix = textContent ? `[User added context]: ${textContent}
536901
+
536902
+ ` : "[User shared an image]. ";
536903
+ messages2.push({
536904
+ role: "user",
536905
+ content: userPrefix + analysis.contextBlock + "\n\nIntegrate this visual information into your current approach."
536906
+ });
536907
+ this.emit({
536908
+ type: "status",
536909
+ content: "Image analysis added as text; base64 excluded from main context",
536910
+ turn,
536911
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
536912
+ });
536913
+ return;
536914
+ }
536915
+ const reason = analysis.errorReason || "vision and OCR returned no text";
536916
+ this.emit({
536917
+ type: "status",
536918
+ content: `Image offload unavailable (${reason}); falling back to inline image`,
536919
+ turn,
536920
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
536921
+ });
536922
+ this.appendInlineImageMessage(messages2, imageUrl, textContent);
536923
+ }
536924
+ async offloadEmbeddedImageResult(result, toolName, turn) {
536925
+ const modelSource = result.llmContent ?? result.output;
536926
+ const image = this.extractFirstEmbeddedImage(modelSource);
536927
+ if (!image)
536928
+ return result;
536929
+ this.emit({
536930
+ type: "status",
536931
+ content: `${toolName}: offloading embedded image analysis outside main context`,
536932
+ turn,
536933
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
536934
+ });
536935
+ const analysis = await this.analyzeImageDataForContext(image.mime, image.base64, image.textWithoutImage.slice(0, 2e3));
536936
+ const imageNote = analysis.contextBlock ? `${analysis.contextBlock}
536937
+
536938
+ Use this image analysis. Do not repeat ${toolName} with the same arguments unless the scene has changed.` : `[Embedded image data omitted from model context; ${analysis.errorReason || "vision and OCR returned no text"}. Use any saved image path above with vision/image_read if further inspection is needed.]`;
536939
+ return {
536940
+ ...result,
536941
+ llmContent: `${image.textWithoutImage.trim()}
536942
+
536943
+ ${imageNote}`.trim()
536944
+ };
536945
+ }
536946
+ extractFirstEmbeddedImage(text) {
536947
+ const markerPattern = /\[IMAGE_BASE64:([^:\]]+):([^\]]+)\]/;
536948
+ const markerMatch = text.match(markerPattern);
536949
+ if (markerMatch) {
536950
+ const mime2 = markerMatch[1];
536951
+ const base643 = markerMatch[2];
536952
+ return {
536953
+ mime: mime2,
536954
+ base64: base643,
536955
+ textWithoutImage: text.replace(markerPattern, `[image data omitted: ${mime2}, ${base643.length} base64 chars]`).trim()
536956
+ };
536957
+ }
536958
+ const dataUrlPattern = /data:(image\/[a-zA-Z0-9.+-]+);base64,([A-Za-z0-9+/=]+)/;
536959
+ const dataUrlMatch = text.match(dataUrlPattern);
536960
+ if (!dataUrlMatch)
536961
+ return null;
536962
+ const mime = dataUrlMatch[1];
536963
+ const base642 = dataUrlMatch[2];
536964
+ return {
536965
+ mime,
536966
+ base64: base642,
536967
+ textWithoutImage: text.replace(dataUrlPattern, `[image data omitted: ${mime}, ${base642.length} base64 chars]`).trim()
536968
+ };
536969
+ }
536970
+ async analyzeImageDataForContext(mime, base642, textContent) {
536971
+ const imageUrl = `data:${mime};base64,${base642}`;
536972
+ const tmpImgPath = this.writeTempImageForOcr(mime, base642);
536973
+ const [visionOutcome, ocrOutcome] = await Promise.allSettled([
536974
+ this.describeImageViaVisionSubagent(imageUrl, textContent),
536975
+ tmpImgPath ? this.extractImageOcrText(tmpImgPath) : Promise.resolve("")
536976
+ ]);
536977
+ const visionDesc = visionOutcome.status === "fulfilled" ? visionOutcome.value.trim() : "";
536978
+ const ocrText = ocrOutcome.status === "fulfilled" ? ocrOutcome.value.trim() : "";
536979
+ const sections = [];
536980
+ if (visionDesc)
536981
+ sections.push(`[Image analysis]: ${visionDesc}`);
536982
+ if (ocrText)
536983
+ sections.push(`[OCR extracted text]: ${ocrText}`);
536984
+ if (sections.length > 0)
536985
+ return { contextBlock: sections.join("\n\n") };
536986
+ const errorReason = visionOutcome.status === "rejected" ? String(visionOutcome.reason?.message ?? visionOutcome.reason) : void 0;
536987
+ return { contextBlock: "", errorReason };
536988
+ }
536989
+ async describeImageViaVisionSubagent(imageUrl, textContent) {
536990
+ const visionMessages = [
536991
+ {
536992
+ role: "system",
536993
+ content: "You are a visual analysis sub-agent. Describe the image in detail, including visible text, UI elements, code, diagrams, errors, and other task-relevant visual details. Be thorough but concise."
536994
+ },
536995
+ {
536996
+ role: "user",
536997
+ content: [
536998
+ {
536999
+ type: "text",
537000
+ text: textContent ? `Context from user: ${textContent}
537001
+
537002
+ Describe what you see in this image.` : "Describe what you see in this image in detail."
537003
+ },
537004
+ { type: "image_url", image_url: { url: imageUrl } }
537005
+ ]
537006
+ }
537007
+ ];
537008
+ const result = await this.backend.chatCompletion({
537009
+ messages: visionMessages,
537010
+ tools: [],
537011
+ temperature: 0.3,
537012
+ maxTokens: 2048,
537013
+ timeoutMs: 3e4,
537014
+ think: false
537015
+ });
537016
+ return result.choices[0]?.message?.content ?? "";
537017
+ }
537018
+ appendInlineImageMessage(messages2, imageUrl, textContent) {
537019
+ const parts = [
537020
+ {
537021
+ type: "text",
537022
+ text: textContent ? `[User added context]: ${textContent}
537023
+
537024
+ Describe what you see and integrate this into your current approach.` : "[User shared an image]. Describe what you see and integrate this into your current approach."
537025
+ },
537026
+ { type: "image_url", image_url: { url: imageUrl } }
537027
+ ];
537028
+ messages2.push({ role: "user", content: parts });
537029
+ }
537030
+ writeTempImageForOcr(mime, base642) {
537031
+ try {
537032
+ const ext = this.imageExtensionForMime(mime);
537033
+ const id = _createHash("sha256").update(`${process.pid}:${Date.now()}:`).update(base642.slice(0, 4096)).digest("hex").slice(0, 16);
537034
+ const tmpImgPath = _pathJoin(_osTmpdir(), `oa-img-${id}.${ext}`);
537035
+ _fsWriteFileSync(tmpImgPath, Buffer.from(base642, "base64"));
537036
+ return tmpImgPath;
537037
+ } catch {
537038
+ return null;
537039
+ }
537040
+ }
537041
+ imageExtensionForMime(mime) {
537042
+ const normalized = mime.toLowerCase();
537043
+ if (normalized.includes("jpeg") || normalized.includes("jpg"))
537044
+ return "jpg";
537045
+ if (normalized.includes("webp"))
537046
+ return "webp";
537047
+ if (normalized.includes("gif"))
537048
+ return "gif";
537049
+ if (normalized.includes("bmp"))
537050
+ return "bmp";
537051
+ if (normalized.includes("tiff"))
537052
+ return "tif";
537053
+ return "png";
537054
+ }
537055
+ async extractImageOcrText(tmpImgPath) {
537056
+ try {
537057
+ const stdout = await new Promise((resolve44, reject) => {
537058
+ _execFile("tesseract", [tmpImgPath, "stdout"], {
537059
+ encoding: "utf8",
537060
+ timeout: 15e3,
537061
+ maxBuffer: 2 * 1024 * 1024
537062
+ }, (err, out) => {
537063
+ if (err) {
537064
+ reject(err);
537065
+ return;
537066
+ }
537067
+ resolve44(out);
537068
+ });
537069
+ });
537070
+ return stdout.trim();
537071
+ } catch {
537072
+ return "";
537073
+ } finally {
537074
+ try {
537075
+ _fsUnlinkSync(tmpImgPath);
537076
+ } catch {
537077
+ }
537078
+ }
537079
+ }
536877
537080
  // -------------------------------------------------------------------------
536878
537081
  // Context compaction
536879
537082
  // -------------------------------------------------------------------------
536880
537083
  async compactMessages(messages2, strategy = "default", force = false) {
536881
537084
  if (messages2.length < 3)
536882
537085
  return messages2;
537086
+ const _compImgPat = /\[IMAGE_BASE64:[^\]]+\]/g;
536883
537087
  const totalChars = messages2.reduce((sum, m2) => {
536884
537088
  let chars = 0;
536885
- if (typeof m2.content === "string")
536886
- chars += m2.content.length;
536887
- else if (Array.isArray(m2.content)) {
536888
- chars += m2.content.reduce((s2, p2) => s2 + (p2.text?.length || 0) + (p2.image_url ? 1e3 : 0), 0);
537089
+ if (typeof m2.content === "string") {
537090
+ const imgMatches = m2.content.match(_compImgPat);
537091
+ const imgCount = imgMatches ? imgMatches.length : 0;
537092
+ chars += m2.content.replace(_compImgPat, "").length + imgCount * 1500 * 4;
537093
+ } else if (Array.isArray(m2.content)) {
537094
+ chars += m2.content.reduce((s2, p2) => {
537095
+ if (p2.type === "text" && p2.text)
537096
+ return s2 + p2.text.length;
537097
+ if (p2.type === "image_url" && p2.image_url?.url) {
537098
+ return s2 + 1500 * 4;
537099
+ }
537100
+ return s2;
537101
+ }, 0);
536889
537102
  }
536890
537103
  if (m2.tool_calls) {
536891
537104
  for (const tc of m2.tool_calls) {
@@ -536923,7 +537136,22 @@ ${tail}`;
536923
537136
  let budgetCut = messages2.length;
536924
537137
  for (let i2 = messages2.length - 1; i2 >= headEndIdx; i2--) {
536925
537138
  const msg = messages2[i2];
536926
- const msgChars = typeof msg.content === "string" ? msg.content.length : 100;
537139
+ let msgChars = 0;
537140
+ if (typeof msg.content === "string") {
537141
+ const _mImgPat = /\[IMAGE_BASE64:[^\]]+\]/g;
537142
+ const imgMatches = msg.content.match(_mImgPat);
537143
+ const imgCount = imgMatches ? imgMatches.length : 0;
537144
+ msgChars = msg.content.replace(_mImgPat, "").length + imgCount * 1500 * 4;
537145
+ } else if (Array.isArray(msg.content)) {
537146
+ for (const p2 of msg.content) {
537147
+ if (p2.type === "text" && p2.text)
537148
+ msgChars += p2.text.length;
537149
+ else if (p2.type === "image_url")
537150
+ msgChars += 1500 * 4;
537151
+ }
537152
+ } else {
537153
+ msgChars = 100;
537154
+ }
536927
537155
  const toolCallChars = (msg.tool_calls || []).reduce((s2, tc) => s2 + (tc.function?.arguments?.length || 0) + (tc.function?.name?.length || 0), 0);
536928
537156
  const msgTokens = Math.ceil((msgChars + toolCallChars) / 4) + 10;
536929
537157
  if (accumulated + msgTokens > tailTokenBudget && messages2.length - i2 >= 4) {
@@ -537032,7 +537260,25 @@ ${tail}`;
537032
537260
  const strategyLabel = strategy !== "default" ? ` (${strategy})` : "";
537033
537261
  const forceLabel = force ? " [manual]" : "";
537034
537262
  const preTokens = Math.ceil(totalChars / 4);
537035
- const postChars = combinedSummary.length + recent.reduce((s2, m2) => s2 + (typeof m2.content === "string" ? m2.content.length : 100), 0) + head.reduce((s2, m2) => s2 + (typeof m2.content === "string" ? m2.content.length : 100), 0);
537263
+ const _postImgPat = /\[IMAGE_BASE64:[^\]]+\]/g;
537264
+ const _estimateMsgChars = (m2) => {
537265
+ if (typeof m2.content === "string") {
537266
+ const imgMatches = m2.content.match(_postImgPat);
537267
+ const imgCount = imgMatches ? imgMatches.length : 0;
537268
+ return m2.content.replace(_postImgPat, "").length + imgCount * 1500 * 4;
537269
+ } else if (Array.isArray(m2.content)) {
537270
+ let c9 = 0;
537271
+ for (const p2 of m2.content) {
537272
+ if (p2.type === "text" && p2.text)
537273
+ c9 += p2.text.length;
537274
+ else if (p2.type === "image_url")
537275
+ c9 += 1500 * 4;
537276
+ }
537277
+ return c9;
537278
+ }
537279
+ return 100;
537280
+ };
537281
+ const postChars = combinedSummary.length + recent.reduce((s2, m2) => s2 + _estimateMsgChars(m2), 0) + head.reduce((s2, m2) => s2 + _estimateMsgChars(m2), 0);
537036
537282
  const postTokens = Math.ceil(postChars / 4);
537037
537283
  const savedTokens = preTokens - postTokens;
537038
537284
  this.emit({
@@ -537220,13 +537466,29 @@ ${content.slice(0, 8e3)}
537220
537466
  }
537221
537467
  const ctxWindow = this.options.contextWindowSize;
537222
537468
  if (ctxWindow > 0) {
537469
+ const _safetyImgPat = /\[IMAGE_BASE64:[^\]]+\]/g;
537223
537470
  const estimateResult = (msgs) => msgs.reduce((sum, m2) => {
537224
- let chars = typeof m2.content === "string" ? m2.content.length : 100;
537471
+ let chars = 0;
537472
+ let imgCount = 0;
537473
+ if (typeof m2.content === "string") {
537474
+ const imgMatches = m2.content.match(_safetyImgPat);
537475
+ imgCount = imgMatches ? imgMatches.length : 0;
537476
+ chars = m2.content.replace(_safetyImgPat, "").length;
537477
+ } else if (Array.isArray(m2.content)) {
537478
+ for (const p2 of m2.content) {
537479
+ if (p2.type === "text" && p2.text)
537480
+ chars += p2.text.length;
537481
+ else if (p2.type === "image_url")
537482
+ imgCount++;
537483
+ }
537484
+ } else {
537485
+ chars = 100;
537486
+ }
537225
537487
  if (m2.tool_calls) {
537226
537488
  for (const tc of m2.tool_calls)
537227
537489
  chars += tc.function.arguments?.length ?? 0;
537228
537490
  }
537229
- return sum + chars;
537491
+ return sum + chars + imgCount * 1500 * 4;
537230
537492
  }, 0) / 4;
537231
537493
  const safetyTarget = Math.floor(ctxWindow * 0.65);
537232
537494
  let trimmedRecent = [...filteredRecent];
@@ -538340,7 +538602,49 @@ ${transcript}`
538340
538602
  "pascal_case",
538341
538603
  "screaming_snake_case",
538342
538604
  "dot_notation",
538343
- "title_case"
538605
+ "title_case",
538606
+ // Conditionally-registered tools documented in the base system prompt
538607
+ // as available but only registered in TUI/desktop mode, NOT telegram:
538608
+ "background_run",
538609
+ // TUI-only: background shell execution
538610
+ "task_status",
538611
+ // TUI-only: check background task status
538612
+ "task_output",
538613
+ // TUI-only: read background task output
538614
+ "task_stop",
538615
+ // TUI-only: kill background task
538616
+ "skill_list",
538617
+ // TUI-only: discover available skills
538618
+ "skill_execute",
538619
+ // TUI-only: load and run a skill
538620
+ "skill_build",
538621
+ // TUI-only: generate a new skill
538622
+ "desktop_click",
538623
+ // TUI-only: click UI element by description
538624
+ "desktop_describe",
538625
+ // TUI-only: screenshot + describe desktop
538626
+ "repl_exec",
538627
+ // TUI-only: persistent Python REPL
538628
+ "cron_agent",
538629
+ // TUI-only: scheduled agent tasks
538630
+ "scheduler",
538631
+ // TUI-only: OS-level cron scheduling
538632
+ "reminder",
538633
+ // TUI-only: cross-session reminders
538634
+ "agenda",
538635
+ // TUI-only: attention directives
538636
+ "priority_classify",
538637
+ // TUI-only: task priority classification
538638
+ "priority_delegate",
538639
+ // TUI-only: delegate to sub-agent by priority
538640
+ "create_tool",
538641
+ // TUI-only: create custom tool
538642
+ "manage_tools",
538643
+ // TUI-only: list/inspect/delete custom tools
538644
+ "sub_agent",
538645
+ // TUI-only: delegate sub-task (telegram uses full_sub_agent)
538646
+ "nexus"
538647
+ // TUI-only: P2P networking
538344
538648
  ]);
538345
538649
  for (const tool of this.tools.values()) {
538346
538650
  const props = tool.parameters?.properties;
@@ -591700,6 +592004,36 @@ function normalizeTelegramMedia(message2) {
591700
592004
  }
591701
592005
  return void 0;
591702
592006
  }
592007
+ function telegramMediaIsImage(media) {
592008
+ if (media.type === "photo") return true;
592009
+ if (media.mimeType?.toLowerCase().startsWith("image/")) return true;
592010
+ return /\.(png|jpe?g|gif|webp|bmp|tiff?)$/i.test(media.fileName ?? "");
592011
+ }
592012
+ function telegramImageExtension(media) {
592013
+ const fileName = media.fileName ?? "";
592014
+ const dotIdx = fileName.lastIndexOf(".");
592015
+ if (dotIdx >= 0) {
592016
+ const ext = fileName.slice(dotIdx).toLowerCase();
592017
+ if (/^\.(png|jpe?g|gif|webp|bmp|tiff?)$/.test(ext)) return ext;
592018
+ }
592019
+ const mime = media.mimeType?.toLowerCase() ?? "";
592020
+ if (mime.includes("png")) return ".png";
592021
+ if (mime.includes("webp")) return ".webp";
592022
+ if (mime.includes("gif")) return ".gif";
592023
+ if (mime.includes("bmp")) return ".bmp";
592024
+ if (mime.includes("tiff")) return ".tif";
592025
+ return ".jpg";
592026
+ }
592027
+ function telegramImageMime(media) {
592028
+ if (media.mimeType?.toLowerCase().startsWith("image/")) return media.mimeType;
592029
+ const ext = telegramImageExtension(media);
592030
+ if (ext === ".png") return "image/png";
592031
+ if (ext === ".webp") return "image/webp";
592032
+ if (ext === ".gif") return "image/gif";
592033
+ if (ext === ".bmp") return "image/bmp";
592034
+ if (ext === ".tif" || ext === ".tiff") return "image/tiff";
592035
+ return "image/jpeg";
592036
+ }
591703
592037
  function normalizeTelegramUpdate(update2) {
591704
592038
  const sourceUpdateType = update2.guest_message ? "guest_message" : update2.message ? "message" : null;
591705
592039
  if (!sourceUpdateType) return null;
@@ -592558,12 +592892,22 @@ Join: ${newUrl}`);
592558
592892
  }
592559
592893
  const existing = this.subAgents.get(sessionKey);
592560
592894
  if (existing && !existing.aborted) {
592561
- this.recordChatHistory(sessionKey, { role: "user", text: msg.text, mode: "steering" });
592895
+ let steeringText = msg.text;
592896
+ if (msg.media) {
592897
+ const mediaContext = await this.processMedia(msg);
592898
+ if (mediaContext) {
592899
+ steeringText += `
592900
+
592901
+ [Media attached — processed content below]
592902
+ ${mediaContext}`;
592903
+ }
592904
+ }
592905
+ this.recordChatHistory(sessionKey, { role: "user", text: steeringText, mode: "steering" });
592562
592906
  if (existing.runner) {
592563
- existing.runner.injectUserMessage(msg.text);
592907
+ existing.runner.injectUserMessage(steeringText);
592564
592908
  this.tuiWrite(() => renderTelegramSubAgentEvent(msg.username, "mid-conversation steering injected"));
592565
592909
  } else {
592566
- existing.pendingMessages.push(msg.text);
592910
+ existing.pendingMessages.push(steeringText);
592567
592911
  this.tuiWrite(() => renderTelegramSubAgentEvent(msg.username, `queued (${existing.pendingMessages.length} pending)`));
592568
592912
  }
592569
592913
  return;
@@ -593258,8 +593602,9 @@ Todo/session id: ${sessionContext.sessionId}` : `Telegram ${isGroup ? "group" :
593258
593602
  async processMedia(msg) {
593259
593603
  if (!msg.media) return "";
593260
593604
  const { type, fileId, fileUniqueId, mimeType, caption } = msg.media;
593605
+ const isImageMedia = telegramMediaIsImage(msg.media);
593261
593606
  let ext = ".bin";
593262
- if (type === "photo") ext = ".jpg";
593607
+ if (isImageMedia) ext = telegramImageExtension(msg.media);
593263
593608
  else if (type === "audio" || type === "voice") ext = ".ogg";
593264
593609
  else if (type === "video" || type === "video_note" || type === "live_photo") ext = ".mp4";
593265
593610
  else if (msg.media.fileName) {
@@ -593290,23 +593635,27 @@ Todo/session id: ${sessionContext.sessionId}` : `Telegram ${isGroup ? "group" :
593290
593635
  username: msg.username
593291
593636
  });
593292
593637
  let description = `[${type}${caption ? `: ${caption}` : ""}]`;
593293
- if (type === "photo") {
593638
+ if (isImageMedia) {
593294
593639
  let visionContext = "";
593295
593640
  try {
593296
593641
  const { runVisionIngress: runVisionIngress2, formatImageContextPrefix: formatImageContextPrefix2 } = await Promise.resolve().then(() => (init_vision_ingress(), vision_ingress_exports));
593297
593642
  const ingressResult = await runVisionIngress2(
593298
- { path: localPath, buffer: Buffer.from(""), mime: "image/png" },
593299
- ""
593643
+ {
593644
+ path: localPath,
593645
+ buffer: readFileSync84(localPath),
593646
+ mime: telegramImageMime(msg.media)
593647
+ },
593648
+ this.agentConfig?.model ?? ""
593300
593649
  );
593301
593650
  visionContext = formatImageContextPrefix2(ingressResult);
593302
593651
  cacheEntry.extractedContent = ingressResult.contextBlock;
593303
593652
  } catch {
593304
593653
  }
593305
593654
  if (visionContext) {
593306
- description = `[Photo received: ${localPath}${caption ? ` — caption: "${caption}"` : ""}
593655
+ description = `[Image received: ${localPath}${caption ? ` — caption: "${caption}"` : ""}
593307
593656
  ${visionContext}]`;
593308
593657
  } else {
593309
- description = `[Photo received and saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}. You can use image_read or vision tools to analyze it if available.]`;
593658
+ description = `[Image received and saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}. You can use image_read or vision tools to analyze it if available.]`;
593310
593659
  }
593311
593660
  try {
593312
593661
  await fetch("http://127.0.0.1:11435/v1/memory/ingest", {
@@ -593317,8 +593666,7 @@ ${visionContext}]`;
593317
593666
  });
593318
593667
  } catch {
593319
593668
  }
593320
- }
593321
- if (type === "audio" || type === "voice") {
593669
+ } else if (type === "audio" || type === "voice") {
593322
593670
  let transcription = null;
593323
593671
  try {
593324
593672
  const { getListenEngine: getListenEngine2 } = await Promise.resolve().then(() => (init_listen(), listen_exports));
@@ -593344,12 +593692,10 @@ ${visionContext}]`;
593344
593692
  });
593345
593693
  } catch {
593346
593694
  }
593347
- }
593348
- if (type === "video" || type === "video_note" || type === "live_photo") {
593695
+ } else if (type === "video" || type === "video_note" || type === "live_photo") {
593349
593696
  const label = type === "live_photo" ? "Live photo" : "Video";
593350
593697
  description = `[${label} received and saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}.]`;
593351
- }
593352
- if (type === "document") {
593698
+ } else if (type === "document") {
593353
593699
  description = `[Document received: ${msg.media.fileName || "unnamed"}${mimeType ? ` (${mimeType})` : ""}, saved to ${localPath}${caption ? ` — caption: "${caption}"` : ""}.]`;
593354
593700
  }
593355
593701
  cacheEntry.extractedContent = description;
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.187.592",
3
+ "version": "0.187.594",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "open-agents-ai",
9
- "version": "0.187.592",
9
+ "version": "0.187.594",
10
10
  "hasInstallScript": true,
11
11
  "license": "CC-BY-NC-4.0",
12
12
  "dependencies": {
@@ -2049,9 +2049,9 @@
2049
2049
  }
2050
2050
  },
2051
2051
  "node_modules/aiwg": {
2052
- "version": "2026.5.4",
2053
- "resolved": "https://registry.npmjs.org/aiwg/-/aiwg-2026.5.4.tgz",
2054
- "integrity": "sha512-/10XfF6pD+7/I945vx1uhh37+N4NIp1NscGJUEAJAMwVVrHXdqZ4UotCfLBp6dnwOI3tI5jfg3zWJkr1yhUPOw==",
2052
+ "version": "2026.5.5",
2053
+ "resolved": "https://registry.npmjs.org/aiwg/-/aiwg-2026.5.5.tgz",
2054
+ "integrity": "sha512-bRMCp3qOAgZycb1Pyahmx3CgrMVtKU5gQhYpyY23r6B/LhNOP/+t0SHRuvPN4bOVOVsec/qKpma9OvQnlqrp0w==",
2055
2055
  "license": "MIT",
2056
2056
  "dependencies": {
2057
2057
  "@modelcontextprotocol/sdk": "^1.24.0",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.187.592",
3
+ "version": "0.187.594",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",