@opengeni/runtime 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -90,10 +90,10 @@ import {
90
90
  timeoutAgentError,
91
91
  timeoutControlResponse,
92
92
  verifyStreamToken
93
- } from "./chunk-2PO56VAL.js";
93
+ } from "./chunk-KNW7AMQB.js";
94
94
 
95
95
  // src/index.ts
96
- import { AGENT_INSTRUCTIONS_CORE_PLACEHOLDER, collectSandboxEnvironment as collectSandboxEnvironment2, contextServerCompactThreshold, firstPartyMcpBaseUrl, resolveContextCompactionMode, resolveModelProvider, sandboxLifecycleHookIds } from "@opengeni/config";
96
+ import { AGENT_INSTRUCTIONS_CORE_PLACEHOLDER, collectSandboxEnvironment as collectSandboxEnvironment2, contextInputBudgetTokens, contextServerCompactThreshold, firstPartyMcpBaseUrl, resolveContextCompactionMode, resolveModelProvider, sandboxLifecycleHookIds } from "@opengeni/config";
97
97
  import { CAPABILITY_DESCRIPTORS as CAPABILITY_DESCRIPTORS2, isClearedRunStateBlob, signDelegatedAccessToken } from "@opengeni/contracts";
98
98
  import {
99
99
  Agent,
@@ -509,6 +509,106 @@ function computerCallNormalizingFetch(base) {
509
509
  };
510
510
  }
511
511
 
512
+ // src/image-history.ts
513
+ var SCREENSHOT_OMITTED_PLACEHOLDER = "[screenshot omitted: an older desktop frame \u2014 the full image remains in the session event log]";
514
+ var DATA_IMAGE_BASE64_PATTERN = /data:image\/[a-z0-9.+-]+;base64,[a-z0-9+/=_-]+/i;
515
+ function elideStaleScreenshotImages(items, options = {}) {
516
+ const keepLast = Math.max(0, Math.floor(options.keepLast ?? 3));
517
+ const placeholder = options.placeholder ?? SCREENSHOT_OMITTED_PLACEHOLDER;
518
+ const occurrences = [];
519
+ for (let i = 0; i < items.length; i += 1) {
520
+ collectItemImageOccurrences(items[i], [i], placeholder, occurrences);
521
+ }
522
+ const elidedCount = Math.max(0, occurrences.length - keepLast);
523
+ if (elidedCount === 0) {
524
+ return { items: items.slice(), imageCount: occurrences.length, elidedCount: 0 };
525
+ }
526
+ const cloned = structuredClone(items);
527
+ for (const occurrence of occurrences.slice(0, elidedCount)) {
528
+ setPath(cloned, occurrence.path, occurrence.replacement);
529
+ }
530
+ return { items: cloned, imageCount: occurrences.length, elidedCount };
531
+ }
532
+ function collectItemImageOccurrences(item, path, placeholder, out) {
533
+ if (!isRecord(item)) {
534
+ return;
535
+ }
536
+ if (item.type === "message" && (item.role === "user" || item.role === "system")) {
537
+ return;
538
+ }
539
+ if (item.type === "computer_call_result" || item.type === "computer_call_output") {
540
+ collectComputerOutputImages(item, path, placeholder, out);
541
+ return;
542
+ }
543
+ if (item.type === "function_call_result" || item.type === "function_call_output") {
544
+ collectToolResultImages(item.output, [...path, "output"], placeholder, out);
545
+ }
546
+ }
547
+ function collectComputerOutputImages(item, path, placeholder, out) {
548
+ const output = item.output;
549
+ if (!isRecord(output) || output.type !== "computer_screenshot") {
550
+ return;
551
+ }
552
+ for (const key of ["data", "image_url", "imageUrl"]) {
553
+ if (isImageDataUrl(output[key])) {
554
+ out.push({ path: [...path, "output", key], replacement: placeholder });
555
+ return;
556
+ }
557
+ }
558
+ }
559
+ function collectToolResultImages(value, path, placeholder, out) {
560
+ if (typeof value === "string") {
561
+ if (isImageDataUrl(value)) {
562
+ out.push({ path, replacement: placeholder });
563
+ }
564
+ return;
565
+ }
566
+ if (Array.isArray(value)) {
567
+ for (let i = 0; i < value.length; i += 1) {
568
+ collectToolResultImages(value[i], [...path, i], placeholder, out);
569
+ }
570
+ return;
571
+ }
572
+ if (!isRecord(value)) {
573
+ return;
574
+ }
575
+ if (value.type === "input_image") {
576
+ for (const key of ["image", "imageUrl", "image_url"]) {
577
+ if (isImageDataUrl(value[key])) {
578
+ out.push({ path, replacement: { type: "input_text", text: placeholder } });
579
+ return;
580
+ }
581
+ }
582
+ }
583
+ for (const key of ["content", "text", "output"]) {
584
+ if (key in value) {
585
+ collectToolResultImages(value[key], [...path, key], placeholder, out);
586
+ }
587
+ }
588
+ }
589
+ function isImageDataUrl(value) {
590
+ return typeof value === "string" && DATA_IMAGE_BASE64_PATTERN.test(value);
591
+ }
592
+ function isRecord(value) {
593
+ return Boolean(value) && typeof value === "object" && !Array.isArray(value);
594
+ }
595
+ function setPath(root, path, value) {
596
+ if (path.length === 0) {
597
+ return;
598
+ }
599
+ let cursor = root;
600
+ for (let i = 0; i < path.length - 1; i += 1) {
601
+ const segment = path[i];
602
+ cursor = Array.isArray(cursor) ? cursor[segment] : cursor[segment];
603
+ }
604
+ const last = path[path.length - 1];
605
+ if (Array.isArray(cursor)) {
606
+ cursor[last] = value;
607
+ } else {
608
+ cursor[last] = value;
609
+ }
610
+ }
611
+
512
612
  // src/codex-tool-search.ts
513
613
  import { toolSearchTool } from "@openai/agents";
514
614
  var CODEX_APPS_TOOL_PREFIX = "codex_apps__";
@@ -685,13 +785,22 @@ function installCodexToolSearch(agent, connectorNamespaces = NO_NAMESPACES) {
685
785
 
686
786
  // src/context-compaction.ts
687
787
  var COMPACTION_SUMMARY_MARKER = "opengeni_context_summary";
688
- var SUMMARY_PREFIX = [
689
- "[CONTEXT CHECKPOINT] The earlier part of this conversation was automatically compacted to stay within the model context window.",
690
- "Durable facts already live in the workspace notebook / document bases (via MCP) \u2014 the summary below is a light working-memory bridge, not a full transcript.",
691
- "Trust it for current objective, decisions, blockers, deployed/infra state, and next steps; re-read the notebook for anything authoritative.",
788
+ var SUMMARY_BUFFER_TOKENS = 2e4;
789
+ var COMPACT_USER_MESSAGE_MAX_TOKENS = 2e4;
790
+ var CLIENT_COMPACTION_TRIGGER_FRACTION = 0.9;
791
+ var COMPACTION_PROMPT = [
792
+ "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.",
692
793
  "",
693
- "SUMMARY:"
794
+ "Include:",
795
+ "- Current progress and key decisions made",
796
+ "- Important context, constraints, or user preferences",
797
+ "- What remains to be done (clear next steps)",
798
+ "- Any critical data, examples, or references needed to continue",
799
+ "",
800
+ "Be concise, structured, and focused on helping the next LLM seamlessly continue the work."
694
801
  ].join("\n");
802
+ var SUMMARY_PREFIX = "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:";
803
+ var USER_MESSAGE_TRUNCATION_MARKER = "\n[... middle truncated for context compaction ...]\n";
695
804
  var RESULT_TYPE_BY_CALL_TYPE2 = {
696
805
  function_call: "function_call_result",
697
806
  computer_call: "computer_call_result",
@@ -735,6 +844,56 @@ function estimateTokens(items) {
735
844
  }
736
845
  return total;
737
846
  }
847
+ function clientCompactionThresholdTokens(input) {
848
+ const available = Math.max(
849
+ 0,
850
+ input.contextWindowTokens - input.contextReservedOutputTokens - SUMMARY_BUFFER_TOKENS
851
+ );
852
+ return Math.floor(available * CLIENT_COMPACTION_TRIGGER_FRACTION);
853
+ }
854
+ function decideClientCompaction(input) {
855
+ const thresholdTokens = clientCompactionThresholdTokens(input);
856
+ const recorded = typeof input.lastInputTokens === "number" && input.lastInputTokens > 0 ? input.lastInputTokens : 0;
857
+ const signalTokens = recorded > 0 ? recorded : estimateTokens(input.items);
858
+ if (input.items.length === 0) {
859
+ return { shouldCompact: false, reason: "no_history", signalTokens, thresholdTokens };
860
+ }
861
+ if (input.force) {
862
+ return { shouldCompact: true, reason: "force", signalTokens, thresholdTokens };
863
+ }
864
+ if (signalTokens > thresholdTokens) {
865
+ return { shouldCompact: true, reason: "above_threshold", signalTokens, thresholdTokens };
866
+ }
867
+ return { shouldCompact: false, reason: "below_threshold", signalTokens, thresholdTokens };
868
+ }
869
+ var CompactionNeededError = class extends Error {
870
+ signalTokens;
871
+ thresholdTokens;
872
+ signalSource;
873
+ constructor(input) {
874
+ super(
875
+ `Context compaction needed: signal ${input.signalTokens} tokens exceeded threshold ${input.thresholdTokens}`
876
+ );
877
+ this.name = "CompactionNeededError";
878
+ this.signalTokens = input.signalTokens;
879
+ this.thresholdTokens = input.thresholdTokens;
880
+ this.signalSource = input.signalSource;
881
+ }
882
+ };
883
+ function findCompactionNeededError(error, seen = /* @__PURE__ */ new WeakSet()) {
884
+ if (error instanceof CompactionNeededError) {
885
+ return error;
886
+ }
887
+ if (!error || typeof error !== "object") {
888
+ return null;
889
+ }
890
+ if (seen.has(error)) {
891
+ return null;
892
+ }
893
+ seen.add(error);
894
+ const record = error;
895
+ return findCompactionNeededError(record.cause, seen) ?? findCompactionNeededError(record.error, seen);
896
+ }
738
897
  function findKeepBoundary(items, keepRecentTokens) {
739
898
  const boundaries = [];
740
899
  for (let i = 0; i < items.length; i += 1) {
@@ -771,150 +930,116 @@ function enforceInputBudget(items, maxTokens, trailingTokens = 0) {
771
930
  estimatedTokens: estimateTokens(kept) + Math.max(0, trailingTokens)
772
931
  };
773
932
  }
774
- function planCompaction(input) {
775
- const softLimit = Math.floor(input.inputBudgetTokens * input.softFraction);
776
- const hardLimit = Math.floor(input.inputBudgetTokens * input.hardFraction);
777
- const recorded = typeof input.lastInputTokens === "number" && input.lastInputTokens > 0 ? input.lastInputTokens : 0;
778
- const signalTokens = Math.max(recorded, estimateTokens(input.items));
779
- const hardForced = signalTokens >= hardLimit;
780
- const empty = {
781
- shouldCompact: false,
782
- reason: "below_threshold",
783
- signalTokens,
784
- hardForced,
785
- boundaryIndex: input.items.length,
786
- prefixItems: [],
787
- priorSummaryItem: null,
788
- tailItems: [...input.items]
789
- };
790
- if (!input.force && signalTokens < softLimit) {
791
- return empty;
792
- }
793
- const effectiveKeepRecent = hardForced ? Math.min(
794
- Math.floor(input.keepRecentTokens / 2),
795
- Math.floor(input.inputBudgetTokens / 4)
796
- ) : input.keepRecentTokens;
797
- const boundaryIndex = findKeepBoundary(input.items, effectiveKeepRecent);
798
- if (boundaryIndex <= 0) {
799
- return { ...empty, reason: "no_boundary", boundaryIndex };
800
- }
801
- const prefix = input.items.slice(0, boundaryIndex);
802
- const tailItems = input.items.slice(boundaryIndex);
803
- let priorSummaryItem = null;
804
- const prefixItems = [];
805
- for (const item of prefix) {
806
- if (isCompactionSummary(item)) {
807
- priorSummaryItem = item;
933
+ function buildCompactionPromptInput(items) {
934
+ return [
935
+ ...items,
936
+ {
937
+ type: "message",
938
+ role: "user",
939
+ content: COMPACTION_PROMPT
940
+ }
941
+ ];
942
+ }
943
+ function buildCompactionReplacementHistory(items, summaryBody) {
944
+ const history = [];
945
+ for (const item of items) {
946
+ if (!isUserMessage(item) || isCompactionSummary(item)) {
808
947
  continue;
809
948
  }
810
- prefixItems.push(item);
811
- }
812
- if (prefixItems.length === 0) {
813
- return { ...empty, reason: "nothing_to_summarize", boundaryIndex };
949
+ history.push(compactUserMessage(item));
814
950
  }
951
+ history.push(buildSummaryItem(summaryBody));
952
+ return history;
953
+ }
954
+ function buildSummaryItem(summaryBody) {
955
+ const trimmed = summaryBody.trim();
815
956
  return {
816
- shouldCompact: true,
817
- reason: "compact",
818
- signalTokens,
819
- hardForced,
820
- boundaryIndex,
821
- prefixItems,
822
- priorSummaryItem,
823
- tailItems
957
+ type: "message",
958
+ role: "user",
959
+ content: `${SUMMARY_PREFIX}
960
+ ${trimmed}`,
961
+ [COMPACTION_SUMMARY_MARKER]: true
824
962
  };
825
963
  }
826
- function compactionSummaryText(item) {
827
- if (!item) {
828
- return "";
964
+ function compactUserMessage(item) {
965
+ const text = messageText(item);
966
+ const next = { ...item };
967
+ if (estimatedTextTokens(text) > COMPACT_USER_MESSAGE_MAX_TOKENS) {
968
+ next.content = truncateMiddleByEstimatedTokens(text, COMPACT_USER_MESSAGE_MAX_TOKENS);
969
+ return next;
970
+ }
971
+ next.content = contentWithoutImages(item);
972
+ return next;
973
+ }
974
+ function estimatedTextTokens(text) {
975
+ return Math.ceil(text.length / 4);
976
+ }
977
+ function truncateMiddleByEstimatedTokens(text, maxTokens) {
978
+ const maxChars = Math.max(0, maxTokens * 4);
979
+ if (text.length <= maxChars) {
980
+ return text;
829
981
  }
982
+ if (maxChars <= USER_MESSAGE_TRUNCATION_MARKER.length) {
983
+ return USER_MESSAGE_TRUNCATION_MARKER.slice(0, maxChars);
984
+ }
985
+ const keepChars = maxChars - USER_MESSAGE_TRUNCATION_MARKER.length;
986
+ const headChars = Math.ceil(keepChars / 2);
987
+ const tailChars = Math.floor(keepChars / 2);
988
+ return `${text.slice(0, headChars)}${USER_MESSAGE_TRUNCATION_MARKER}${text.slice(text.length - tailChars)}`;
989
+ }
990
+ function contentWithoutImages(item) {
991
+ const content = item.content;
992
+ if (!Array.isArray(content)) {
993
+ return content;
994
+ }
995
+ return content.filter((part) => {
996
+ if (!part || typeof part !== "object") {
997
+ return true;
998
+ }
999
+ const type = part.type;
1000
+ return type !== "input_image" && type !== "image_url";
1001
+ });
1002
+ }
1003
+ function messageText(item) {
830
1004
  const content = item.content;
831
1005
  if (typeof content === "string") {
832
- return stripSummaryPrefix(content);
1006
+ return content;
833
1007
  }
834
1008
  if (Array.isArray(content)) {
835
- const text = content.map((part) => {
1009
+ return content.map((part) => {
836
1010
  if (part && typeof part === "object") {
837
- const t = part.text;
838
- return typeof t === "string" ? t : "";
1011
+ const record = part;
1012
+ if (typeof record.text === "string") {
1013
+ return record.text;
1014
+ }
1015
+ if (typeof record.content === "string") {
1016
+ return record.content;
1017
+ }
839
1018
  }
840
1019
  return "";
841
1020
  }).join("");
842
- return stripSummaryPrefix(text);
843
1021
  }
844
1022
  return "";
845
1023
  }
846
- function stripSummaryPrefix(text) {
847
- const marker = "SUMMARY:";
848
- const idx = text.indexOf(marker);
849
- return idx >= 0 ? text.slice(idx + marker.length) : text;
850
- }
851
- function buildSummaryItem(summaryBody) {
852
- return {
853
- type: "message",
854
- role: "user",
855
- content: `${SUMMARY_PREFIX}${summaryBody}`,
856
- [COMPACTION_SUMMARY_MARKER]: true
857
- };
858
- }
859
- var SUMMARY_INSTRUCTIONS = [
860
- "You are compacting the earlier part of a long-running agent conversation into a compact working-memory checkpoint so the agent can continue past the model's context limit.",
861
- "Durable facts already live in the workspace notebook and document bases (via MCP). Do NOT re-derive or copy those; summarize POINTERS, not contents.",
862
- "Capture, concisely and factually:",
863
- "- The current objective and the key decisions made so far.",
864
- "- Open blockers and anything in-progress.",
865
- "- Deployed / infrastructure state that has changed (what exists now).",
866
- "- Environment and credential facts BY REFERENCE ONLY \u2014 name the env var keys, secret names, or notebook/document ids; NEVER copy a secret value, token, key, or password.",
867
- "- Concrete next steps.",
868
- "Say explicitly that durable facts are in the notebook and that this summary lists pointers, not contents.",
869
- "Output only the summary body \u2014 no preamble, no markdown headers, plain prose or terse bullets."
870
- ].join("\n");
871
- function renderPrefixTranscript(items, priorSummaryText) {
872
- const lines = [];
873
- if (priorSummaryText.trim().length > 0) {
874
- lines.push("PRIOR CHECKPOINT SUMMARY (fold this forward; it already replaced even older history):");
875
- lines.push(priorSummaryText.trim());
876
- lines.push("");
877
- lines.push("CONVERSATION SINCE THAT CHECKPOINT:");
878
- } else {
879
- lines.push("CONVERSATION TO SUMMARIZE:");
880
- }
881
- for (const item of items) {
882
- lines.push(renderItem(item));
883
- }
884
- return lines.join("\n");
1024
+ function renderCompactionPromptInputForChat(input) {
1025
+ return input.map(renderItem).join("\n");
885
1026
  }
886
1027
  function renderItem(item) {
887
1028
  const type = itemType2(item) ?? "unknown";
888
1029
  if (type === "message") {
889
1030
  const role = itemRole(item) ?? "assistant";
890
- return `[${role}] ${truncate(messageText(item), 4e3)}`;
1031
+ return `[${role}] ${truncateForTranscript(messageText(item), 4e3)}`;
891
1032
  }
892
1033
  if (type === "reasoning") {
893
1034
  return "[reasoning] (omitted)";
894
1035
  }
895
1036
  if (RESULT_TYPES2.has(type)) {
896
- return `[tool_result] ${truncate(resultText(item), 2e3)}`;
1037
+ return `[tool_result] ${truncateForTranscript(resultText(item), 2e3)}`;
897
1038
  }
898
1039
  if (RESULT_TYPE_BY_CALL_TYPE2[type]) {
899
- return `[tool_call ${type}] ${truncate(callText(item), 1e3)}`;
900
- }
901
- return `[${type}] ${truncate(safeStringify(item), 1e3)}`;
902
- }
903
- function messageText(item) {
904
- const content = item.content;
905
- if (typeof content === "string") {
906
- return content;
907
- }
908
- if (Array.isArray(content)) {
909
- return content.map((part) => {
910
- if (part && typeof part === "object") {
911
- const t = part.text;
912
- return typeof t === "string" ? t : "";
913
- }
914
- return "";
915
- }).join("");
1040
+ return `[tool_call ${type}] ${truncateForTranscript(callText(item), 1e3)}`;
916
1041
  }
917
- return "";
1042
+ return `[${type}] ${truncateForTranscript(safeStringify(item), 1e3)}`;
918
1043
  }
919
1044
  function resultText(item) {
920
1045
  const output = item.output;
@@ -937,18 +1062,11 @@ function safeStringify(value) {
937
1062
  return String(value);
938
1063
  }
939
1064
  }
940
- function truncate(text, max) {
1065
+ function truncateForTranscript(text, max) {
941
1066
  if (text.length <= max) {
942
1067
  return text;
943
1068
  }
944
- return `${text.slice(0, max)}\u2026 (${text.length - max} more chars)`;
945
- }
946
- function buildCompactionMessages(plan) {
947
- const priorText = compactionSummaryText(plan.priorSummaryItem);
948
- return {
949
- system: SUMMARY_INSTRUCTIONS,
950
- user: renderPrefixTranscript(plan.prefixItems, priorText)
951
- };
1069
+ return `${text.slice(0, max)}... (${text.length - max} more chars)`;
952
1070
  }
953
1071
 
954
1072
  // src/sandbox-computer.ts
@@ -966,8 +1084,8 @@ var DEFAULT_DIMENSIONS = [1280, 800];
966
1084
  var ACTION_YIELD_MS = 15e3;
967
1085
  var SCROLL_NOTCH_PIXELS = 100;
968
1086
  var SCROLL_MAX_CLICKS = 15;
969
- var SCREENSHOT_MAX_ATTEMPTS = 3;
970
- var SCREENSHOT_RETRY_DELAY_MS = 400;
1087
+ var SCREENSHOT_WARMUP_BUDGET_MS = 3e4;
1088
+ var SCREENSHOT_RETRY_DELAY_MS = 750;
971
1089
  var KEYSYM = {
972
1090
  ctrl: "ctrl",
973
1091
  control: "ctrl",
@@ -1036,6 +1154,8 @@ var SandboxComputer = class {
1036
1154
  typeDelayMs;
1037
1155
  readOnly;
1038
1156
  tmp;
1157
+ screenshotWarmupBudgetMs;
1158
+ screenshotRetryDelayMs;
1039
1159
  constructor(session, opts = {}) {
1040
1160
  this.session = session;
1041
1161
  this.display = opts.display ?? DEFAULT_DISPLAY;
@@ -1046,6 +1166,8 @@ var SandboxComputer = class {
1046
1166
  this.typeDelayMs = opts.typeDelayMs ?? 12;
1047
1167
  this.readOnly = opts.readOnly ?? false;
1048
1168
  this.tmp = opts.screenshotTmpDir ?? "/tmp";
1169
+ this.screenshotWarmupBudgetMs = opts.screenshotWarmupBudgetMs ?? SCREENSHOT_WARMUP_BUDGET_MS;
1170
+ this.screenshotRetryDelayMs = opts.screenshotRetryDelayMs ?? SCREENSHOT_RETRY_DELAY_MS;
1049
1171
  }
1050
1172
  /** Rebind to a freshly resumed-by-id session after a box rollover / re-establish. */
1051
1173
  rebind(session) {
@@ -1090,10 +1212,13 @@ var SandboxComputer = class {
1090
1212
  }
1091
1213
  async screenshot() {
1092
1214
  let lastError;
1093
- for (let attempt = 0; attempt < SCREENSHOT_MAX_ATTEMPTS; attempt++) {
1215
+ const deadline = Date.now() + this.screenshotWarmupBudgetMs;
1216
+ let attempt = 0;
1217
+ while (true) {
1094
1218
  if (attempt > 0) {
1095
- await new Promise((r) => setTimeout(r, SCREENSHOT_RETRY_DELAY_MS));
1219
+ await new Promise((r) => setTimeout(r, this.screenshotRetryDelayMs));
1096
1220
  }
1221
+ attempt++;
1097
1222
  const f = `${this.tmp}/og-shot-${Date.now()}-${Math.random().toString(36).slice(2)}.png`;
1098
1223
  try {
1099
1224
  await this.x(`scrot --pointer --overwrite ${f}`);
@@ -1107,6 +1232,9 @@ var SandboxComputer = class {
1107
1232
  } finally {
1108
1233
  await this.x(`rm -f ${f}`).catch(() => void 0);
1109
1234
  }
1235
+ if (Date.now() + this.screenshotRetryDelayMs >= deadline) {
1236
+ break;
1237
+ }
1110
1238
  }
1111
1239
  if (lastError instanceof Error) {
1112
1240
  throw lastError;
@@ -1449,16 +1577,29 @@ var ComputerUseCapability = class extends Capability {
1449
1577
  // The SDK base exposes the bound runAs as a protected field.
1450
1578
  ...typeof this._runAs === "string" ? { runAs: this._runAs } : {}
1451
1579
  });
1580
+ switch (this.args.toolMode) {
1581
+ case "hosted":
1582
+ return [this.hostedComputerTool(computer)];
1583
+ case "function-image":
1584
+ return computerFunctionTools(computer, this.args.readOnly ?? false, this.args.needsApproval, true);
1585
+ case "function-text":
1586
+ return computerFunctionTools(computer, this.args.readOnly ?? false, this.args.needsApproval, false);
1587
+ case void 0:
1588
+ break;
1589
+ }
1452
1590
  if (supportsStructuredToolOutputTransport(this._modelInstance)) {
1453
- return [
1454
- computerTool({
1455
- computer,
1456
- ...this.args.needsApproval !== void 0 ? { needsApproval: this.args.needsApproval } : {}
1457
- })
1458
- ];
1591
+ return [this.hostedComputerTool(computer)];
1459
1592
  }
1460
1593
  return computerFunctionTools(computer, this.args.readOnly ?? false, this.args.needsApproval, this.args.imageFunctionResults ?? false);
1461
1594
  }
1595
+ /** The single HOSTED `computer_use_preview` tool bound to `computer` — identical
1596
+ * construction for the explicit "hosted" mode and the legacy structured-sniff path. */
1597
+ hostedComputerTool(computer) {
1598
+ return computerTool({
1599
+ computer,
1600
+ ...this.args.needsApproval !== void 0 ? { needsApproval: this.args.needsApproval } : {}
1601
+ });
1602
+ }
1462
1603
  };
1463
1604
 
1464
1605
  // src/index.ts
@@ -1622,20 +1763,17 @@ function configureOpenAI(settings) {
1622
1763
  }
1623
1764
  setDefaultModelProvider(router);
1624
1765
  }
1625
- async function summarizeForCompaction(settings, messages, options = {}) {
1766
+ async function summarizeForCompaction(settings, input, options = {}) {
1626
1767
  const client = options.client ?? buildOpenAIClientFromSettings(settings);
1627
1768
  const api = options.api ?? "responses";
1628
1769
  const model = options.model ?? settings.openaiModel;
1629
- const maxTokens = options.maxOutputTokens ?? settings.contextSummaryMaxTokens;
1770
+ const maxTokens = options.maxOutputTokens ?? SUMMARY_BUFFER_TOKENS;
1630
1771
  try {
1631
1772
  if (api === "chat") {
1632
1773
  const completion = await client.chat.completions.create({
1633
1774
  model,
1634
1775
  max_tokens: maxTokens,
1635
- messages: [
1636
- { role: "system", content: messages.system },
1637
- { role: "user", content: messages.user }
1638
- ]
1776
+ messages: [{ role: "user", content: renderCompactionPromptInputForChat(input) }]
1639
1777
  });
1640
1778
  const text2 = completion.choices?.[0]?.message?.content;
1641
1779
  const trimmed2 = typeof text2 === "string" ? text2.trim() : "";
@@ -1648,10 +1786,7 @@ async function summarizeForCompaction(settings, messages, options = {}) {
1648
1786
  // built-in path (api "responses"), so gate it on the built-in provider.
1649
1787
  ...settings.openaiProvider === "azure" ? {} : { store: false },
1650
1788
  max_output_tokens: maxTokens,
1651
- input: [
1652
- { role: "system", content: messages.system },
1653
- { role: "user", content: messages.user }
1654
- ]
1789
+ input
1655
1790
  });
1656
1791
  const text = extractResponseOutputText(response);
1657
1792
  const trimmed = text.trim();
@@ -1786,7 +1921,8 @@ function buildOpenGeniAgent(settings, resources, options = {}) {
1786
1921
  capabilities: buildAgentCapabilities(settings, options.packSkills ?? [], {
1787
1922
  compactionMode,
1788
1923
  contextWindowTokens,
1789
- ...options.structuredToolTransport !== void 0 ? { structuredToolTransport: options.structuredToolTransport } : {}
1924
+ ...options.structuredToolTransport !== void 0 ? { structuredToolTransport: options.structuredToolTransport } : {},
1925
+ ...options.computerToolMode !== void 0 ? { computerToolMode: options.computerToolMode } : {}
1790
1926
  })
1791
1927
  });
1792
1928
  agentFileDownloads.set(agent, normalizeSandboxFileDownloads(options.fileResourceDownloads ?? []).filter((download) => !download.content));
@@ -1828,17 +1964,13 @@ function buildAgentCapabilities(settings, packSkills, options = {}) {
1828
1964
  }
1829
1965
  caps.push(skills({ lazyFrom: lazySkillSourceWithPackSkills(packSkills) }));
1830
1966
  if (settings.computerUseEnabled && settings.sandboxDesktopEnabled && desktopCapableBackend(settings.sandboxBackend)) {
1967
+ const explicitMode = options.computerToolMode;
1831
1968
  const computerCapability = computerUse({
1832
1969
  dimensions: [settings.streamResolutionWidth, settings.streamResolutionHeight],
1833
1970
  readOnly: settings.computerUseReadOnly,
1834
- // On the codex path the function tools deliver screenshots as a real image the
1835
- // model can see. The ChatGPT/Codex backend rejects HOSTED tool types but DOES
1836
- // accept `input_image` content items inside a `function_call_output` (proven by
1837
- // openai/codex codex-rs, whose view_image tool ships exactly that shape) — so a
1838
- // structured image tool result is seen, where a text data-URL would be unreadable.
1839
- ...options.structuredToolTransport === false ? { imageFunctionResults: true } : {}
1971
+ ...explicitMode ? { toolMode: explicitMode } : options.structuredToolTransport === false ? { imageFunctionResults: true } : {}
1840
1972
  });
1841
- if (options.structuredToolTransport === false) {
1973
+ if (!explicitMode && options.structuredToolTransport === false) {
1842
1974
  neutralizeStructuredToolTransport(computerCapability);
1843
1975
  }
1844
1976
  caps.push(computerCapability);
@@ -1899,7 +2031,7 @@ async function prepareAgentTools(settings, tools, options = {}) {
1899
2031
  }
1900
2032
  const error = connectedBestEffort.errors.get(failed);
1901
2033
  console.warn(
1902
- `[mcp] optional capability server "${failed.name}" failed to connect/list tools; skipping it for this turn`,
2034
+ `[mcp] optional server "${failed.name}" failed to connect/list tools; skipping it for this turn`,
1903
2035
  error instanceof Error ? error.message : error
1904
2036
  );
1905
2037
  }
@@ -2223,6 +2355,52 @@ var normalizeComputerCallsFilter = ({ modelData }) => ({
2223
2355
  modelData.input
2224
2356
  )
2225
2357
  });
2358
+ function contextRobustnessFilterForSettings(settings, options = {}) {
2359
+ const inputBudgetTokens = modelCallBudgetTokens(settings);
2360
+ const clientCompactionMode = resolveContextCompactionMode(settings) === "client";
2361
+ const compactionThresholdTokens = clientCompactionThresholdTokens(settings);
2362
+ return ({ modelData }) => {
2363
+ const images = elideStaleScreenshotImages(modelData.input);
2364
+ if (images.elidedCount > 0) {
2365
+ console.warn(
2366
+ `per-call image history policy elided ${images.elidedCount} older screenshot image(s), keeping the last ${Math.min(3, images.imageCount)} full image(s)`
2367
+ );
2368
+ }
2369
+ let input = images.items;
2370
+ if (inputBudgetTokens !== void 0) {
2371
+ const guarded = enforceInputBudget(
2372
+ input,
2373
+ inputBudgetTokens
2374
+ );
2375
+ if (guarded.trimmed) {
2376
+ console.warn(
2377
+ `per-call budget guard trimmed ${guarded.droppedCount} oldest history item(s) to fit input budget (${inputBudgetTokens} tokens); the over-budget model call was NOT sent`
2378
+ );
2379
+ input = guarded.items;
2380
+ }
2381
+ }
2382
+ if (clientCompactionMode && options.throwOnCompactionNeeded) {
2383
+ const reported = options.contextCompactionSignalTokens?.();
2384
+ const hasReported = typeof reported === "number" && reported > 0;
2385
+ const signalTokens = hasReported ? reported : estimateTokens(input);
2386
+ if (signalTokens > compactionThresholdTokens) {
2387
+ throw new CompactionNeededError({
2388
+ signalTokens,
2389
+ thresholdTokens: compactionThresholdTokens,
2390
+ signalSource: hasReported ? "provider" : "estimate"
2391
+ });
2392
+ }
2393
+ }
2394
+ return { ...modelData, input };
2395
+ };
2396
+ }
2397
+ function modelCallBudgetTokens(settings) {
2398
+ if (resolveContextCompactionMode(settings) !== "client") {
2399
+ return void 0;
2400
+ }
2401
+ const budget = contextInputBudgetTokens(settings);
2402
+ return budget > 0 ? budget : void 0;
2403
+ }
2226
2404
  function composeCallModelInputFilters(filters) {
2227
2405
  return async (args) => {
2228
2406
  let modelData = args.modelData;
@@ -2232,11 +2410,12 @@ function composeCallModelInputFilters(filters) {
2232
2410
  return modelData;
2233
2411
  };
2234
2412
  }
2235
- function callModelInputFilterForSettings(settings) {
2413
+ function callModelInputFilterForSettings(settings, options = {}) {
2236
2414
  const filters = [normalizeComputerCallsFilter];
2237
2415
  if (settings.openaiProviderItemIds === "strip") {
2238
2416
  filters.push(stripProviderItemIdsFilter);
2239
2417
  }
2418
+ filters.push(contextRobustnessFilterForSettings(settings, options));
2240
2419
  return composeCallModelInputFilters(filters);
2241
2420
  }
2242
2421
  async function runAgentStream(agent, input, settings, overrides = {}) {
@@ -2273,7 +2452,13 @@ async function runAgentStream(agent, input, settings, overrides = {}) {
2273
2452
  }
2274
2453
  const decoratedClient = withSandboxLifecycleHooks(resourceClient2, ownedHooks, ownedHookContext);
2275
2454
  const ownedFilter = composeCallModelInputFilters(
2276
- [callModelInputFilterForSettings(settings), overrides.callModelInputFilter].filter(
2455
+ [
2456
+ callModelInputFilterForSettings(settings, {
2457
+ throwOnCompactionNeeded: Boolean(overrides.contextCompactionSignalTokens),
2458
+ ...overrides.contextCompactionSignalTokens ? { contextCompactionSignalTokens: overrides.contextCompactionSignalTokens } : {}
2459
+ }),
2460
+ overrides.callModelInputFilter
2461
+ ].filter(
2277
2462
  (f) => Boolean(f)
2278
2463
  )
2279
2464
  );
@@ -2309,18 +2494,23 @@ async function runAgentStream(agent, input, settings, overrides = {}) {
2309
2494
  }) : void 0;
2310
2495
  const sandboxSessionState = prepared.sandboxSessionState ?? (prepared.serializedRunStateForSandbox && client ? await restoredSandboxSessionState(await RunState.fromString(agent, prepared.serializedRunStateForSandbox), client) : void 0);
2311
2496
  const callModelInputFilter = composeCallModelInputFilters(
2312
- [callModelInputFilterForSettings(settings), overrides.callModelInputFilter].filter(
2497
+ [
2498
+ callModelInputFilterForSettings(settings, {
2499
+ throwOnCompactionNeeded: Boolean(overrides.contextCompactionSignalTokens),
2500
+ ...overrides.contextCompactionSignalTokens ? { contextCompactionSignalTokens: overrides.contextCompactionSignalTokens } : {}
2501
+ }),
2502
+ overrides.callModelInputFilter
2503
+ ].filter(
2313
2504
  (f) => Boolean(f)
2314
2505
  )
2315
2506
  );
2316
2507
  const runOptions = {
2317
2508
  stream: true,
2318
2509
  maxTurns: settings.agentMaxModelCallsPerTurn,
2319
- // Strip provider-assigned item ids from every model call (turn-start
2320
- // history replay AND mid-turn follow-ups) so requests never depend on the
2321
- // provider's server-side response store. A stored response can vanish
2322
- // between two calls of the same turn, failing the run with 400 "Item with
2323
- // id 'rs_…' not found"; with the ids gone the request is self-contained.
2510
+ // Built-in per-call guard chain: normalize computer calls, optionally strip
2511
+ // provider ids, elide stale screenshots in every mode, and trim to the input
2512
+ // budget on the client-compaction path. This runs for turn-start replay AND
2513
+ // every mid-turn follow-up.
2324
2514
  callModelInputFilter
2325
2515
  };
2326
2516
  void settings.disableOpenaiTracing;
@@ -3493,12 +3683,16 @@ function approvalIdentifier(item) {
3493
3683
  export {
3494
3684
  ActiveBackendUnresolvableError,
3495
3685
  CAPABILITY_DESCRIPTORS,
3686
+ CLIENT_COMPACTION_TRIGGER_FRACTION,
3687
+ COMPACTION_PROMPT,
3496
3688
  COMPACTION_SUMMARY_MARKER,
3689
+ COMPACT_USER_MESSAGE_MAX_TOKENS,
3497
3690
  ChannelAConflictError,
3498
3691
  ChannelANotFoundError,
3499
3692
  ChannelAUnsupportedError,
3500
3693
  ChannelAValidationError,
3501
3694
  CodexSubscriptionUnavailableError,
3695
+ CompactionNeededError,
3502
3696
  ComputerActionError,
3503
3697
  ComputerReadOnlyError,
3504
3698
  ComputerUnavailableError,
@@ -3520,12 +3714,13 @@ export {
3520
3714
  RecordingUnavailableError,
3521
3715
  RoutingSandboxSession,
3522
3716
  RoutingUnsupportedError,
3717
+ SCREENSHOT_OMITTED_PLACEHOLDER,
3523
3718
  SELFHOSTED_DEFAULT_TIMEOUT_MS,
3524
3719
  SELFHOSTED_RECONNECT_WINDOW_MS,
3525
3720
  SELFHOSTED_RELAY_STREAM_PATH,
3526
3721
  STREAM_PORT,
3527
3722
  STREAM_TOKEN_DEFAULT_TTL_SECONDS,
3528
- SUMMARY_INSTRUCTIONS,
3723
+ SUMMARY_BUFFER_TOKENS,
3529
3724
  SUMMARY_PREFIX,
3530
3725
  SandboxChannelAService,
3531
3726
  SandboxComputer,
@@ -3540,6 +3735,7 @@ export {
3540
3735
  TERMINAL_STREAM_PORT,
3541
3736
  TerminalServerError,
3542
3737
  TerminalServerUnsupportedError,
3738
+ USER_MESSAGE_TRUNCATION_MARKER,
3543
3739
  agentErrorToControlError,
3544
3740
  agentsErrorRunState,
3545
3741
  applyMissingManifestEntries,
@@ -3550,7 +3746,8 @@ export {
3550
3746
  azureOpenAIDefaultQuery,
3551
3747
  backendSupportsOs,
3552
3748
  buildAgentCapabilities,
3553
- buildCompactionMessages,
3749
+ buildCompactionPromptInput,
3750
+ buildCompactionReplacementHistory,
3554
3751
  buildDisplayStackScript,
3555
3752
  buildManifest,
3556
3753
  buildModelInstance,
@@ -3562,21 +3759,24 @@ export {
3562
3759
  buildSummaryItem,
3563
3760
  buildTerminalServerScript,
3564
3761
  callModelInputFilterForSettings,
3762
+ clientCompactionThresholdTokens,
3565
3763
  collectSandboxEnvironment,
3566
- compactionSummaryText,
3567
3764
  composeAgentInstructions,
3568
3765
  computerUse,
3569
3766
  configureOpenAI,
3570
3767
  contentTypeForCodec,
3768
+ contextRobustnessFilterForSettings,
3571
3769
  coreInstructions,
3572
3770
  createProductionAgentRuntime,
3573
3771
  createSandboxClient,
3574
3772
  createSandboxClientForBackend,
3773
+ decideClientCompaction,
3575
3774
  decodeModalSnapshotId,
3576
3775
  deletePriorPersistedSnapshot,
3577
3776
  deleteRecordingArtifacts,
3578
3777
  deserializeSandboxSessionStateEnvelope,
3579
3778
  desktopCapableBackend,
3779
+ elideStaleScreenshotImages,
3580
3780
  enforceInputBudget,
3581
3781
  ensureDisplayStack,
3582
3782
  ensureReadableStreamFrom,
@@ -3587,6 +3787,7 @@ export {
3587
3787
  exposeStreamPort,
3588
3788
  extForCodec,
3589
3789
  extractResponseOutputText,
3790
+ findCompactionNeededError,
3590
3791
  findKeepBoundary,
3591
3792
  isCompactionSummary,
3592
3793
  isExecSessionLostBanner,
@@ -3613,14 +3814,13 @@ export {
3613
3814
  parseNumstatZ,
3614
3815
  parsePorcelainV2,
3615
3816
  parseUnifiedPatch,
3616
- planCompaction,
3617
3817
  prefixedMcpToolName,
3618
3818
  prepareAgentTools,
3619
3819
  prepareRunInput,
3620
3820
  readRecordingBytes,
3621
3821
  readWorkspaceArchiveFromEnvelopeSessionState,
3622
3822
  recordingStorageKey,
3623
- renderPrefixTranscript,
3823
+ renderCompactionPromptInputForChat,
3624
3824
  repositoryCloneCommand,
3625
3825
  repositoryUsesSandboxClone,
3626
3826
  resolveTurnModel,