@opengeni/runtime 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -63,6 +63,8 @@ import {
63
63
  isWorkspaceEscapeError,
64
64
  makeActiveBackendResolver,
65
65
  mintStreamToken,
66
+ modalSandboxAttributionEnvironment,
67
+ modalSandboxAttributionTags,
66
68
  negotiateCapabilities,
67
69
  negotiateSelfhostedCapabilities,
68
70
  offlineAgentError,
@@ -85,15 +87,18 @@ import {
85
87
  stopRecording,
86
88
  stripExecBanner,
87
89
  subjectFor,
90
+ sweepModalOrphanSandboxes,
91
+ tagModalSandbox,
88
92
  tearDownDisplayStack,
89
93
  tearDownTerminalServer,
94
+ terminateModalSandboxById,
90
95
  timeoutAgentError,
91
96
  timeoutControlResponse,
92
97
  verifyStreamToken
93
- } from "./chunk-2PO56VAL.js";
98
+ } from "./chunk-D5KU3QUC.js";
94
99
 
95
100
  // src/index.ts
96
- import { AGENT_INSTRUCTIONS_CORE_PLACEHOLDER, collectSandboxEnvironment as collectSandboxEnvironment2, contextServerCompactThreshold, firstPartyMcpBaseUrl, resolveContextCompactionMode, resolveModelProvider, sandboxLifecycleHookIds } from "@opengeni/config";
101
+ import { AGENT_INSTRUCTIONS_CORE_PLACEHOLDER, collectSandboxEnvironment as collectSandboxEnvironment2, contextInputBudgetTokens, contextServerCompactThreshold, firstPartyMcpBaseUrl, resolveContextCompactionMode, resolveModelProvider, sandboxLifecycleHookIds } from "@opengeni/config";
97
102
  import { CAPABILITY_DESCRIPTORS as CAPABILITY_DESCRIPTORS2, isClearedRunStateBlob, signDelegatedAccessToken } from "@opengeni/contracts";
98
103
  import {
99
104
  Agent,
@@ -509,6 +514,106 @@ function computerCallNormalizingFetch(base) {
509
514
  };
510
515
  }
511
516
 
517
+ // src/image-history.ts
518
+ var SCREENSHOT_OMITTED_PLACEHOLDER = "[screenshot omitted: an older desktop frame \u2014 the full image remains in the session event log]";
519
+ var DATA_IMAGE_BASE64_PATTERN = /data:image\/[a-z0-9.+-]+;base64,[a-z0-9+/=_-]+/i;
520
+ function elideStaleScreenshotImages(items, options = {}) {
521
+ const keepLast = Math.max(0, Math.floor(options.keepLast ?? 3));
522
+ const placeholder = options.placeholder ?? SCREENSHOT_OMITTED_PLACEHOLDER;
523
+ const occurrences = [];
524
+ for (let i = 0; i < items.length; i += 1) {
525
+ collectItemImageOccurrences(items[i], [i], placeholder, occurrences);
526
+ }
527
+ const elidedCount = Math.max(0, occurrences.length - keepLast);
528
+ if (elidedCount === 0) {
529
+ return { items: items.slice(), imageCount: occurrences.length, elidedCount: 0 };
530
+ }
531
+ const cloned = structuredClone(items);
532
+ for (const occurrence of occurrences.slice(0, elidedCount)) {
533
+ setPath(cloned, occurrence.path, occurrence.replacement);
534
+ }
535
+ return { items: cloned, imageCount: occurrences.length, elidedCount };
536
+ }
537
+ function collectItemImageOccurrences(item, path, placeholder, out) {
538
+ if (!isRecord(item)) {
539
+ return;
540
+ }
541
+ if (item.type === "message" && (item.role === "user" || item.role === "system")) {
542
+ return;
543
+ }
544
+ if (item.type === "computer_call_result" || item.type === "computer_call_output") {
545
+ collectComputerOutputImages(item, path, placeholder, out);
546
+ return;
547
+ }
548
+ if (item.type === "function_call_result" || item.type === "function_call_output") {
549
+ collectToolResultImages(item.output, [...path, "output"], placeholder, out);
550
+ }
551
+ }
552
+ function collectComputerOutputImages(item, path, placeholder, out) {
553
+ const output = item.output;
554
+ if (!isRecord(output) || output.type !== "computer_screenshot") {
555
+ return;
556
+ }
557
+ for (const key of ["data", "image_url", "imageUrl"]) {
558
+ if (isImageDataUrl(output[key])) {
559
+ out.push({ path: [...path, "output", key], replacement: placeholder });
560
+ return;
561
+ }
562
+ }
563
+ }
564
+ function collectToolResultImages(value, path, placeholder, out) {
565
+ if (typeof value === "string") {
566
+ if (isImageDataUrl(value)) {
567
+ out.push({ path, replacement: placeholder });
568
+ }
569
+ return;
570
+ }
571
+ if (Array.isArray(value)) {
572
+ for (let i = 0; i < value.length; i += 1) {
573
+ collectToolResultImages(value[i], [...path, i], placeholder, out);
574
+ }
575
+ return;
576
+ }
577
+ if (!isRecord(value)) {
578
+ return;
579
+ }
580
+ if (value.type === "input_image") {
581
+ for (const key of ["image", "imageUrl", "image_url"]) {
582
+ if (isImageDataUrl(value[key])) {
583
+ out.push({ path, replacement: { type: "input_text", text: placeholder } });
584
+ return;
585
+ }
586
+ }
587
+ }
588
+ for (const key of ["content", "text", "output"]) {
589
+ if (key in value) {
590
+ collectToolResultImages(value[key], [...path, key], placeholder, out);
591
+ }
592
+ }
593
+ }
594
+ function isImageDataUrl(value) {
595
+ return typeof value === "string" && DATA_IMAGE_BASE64_PATTERN.test(value);
596
+ }
597
+ function isRecord(value) {
598
+ return Boolean(value) && typeof value === "object" && !Array.isArray(value);
599
+ }
600
+ function setPath(root, path, value) {
601
+ if (path.length === 0) {
602
+ return;
603
+ }
604
+ let cursor = root;
605
+ for (let i = 0; i < path.length - 1; i += 1) {
606
+ const segment = path[i];
607
+ cursor = Array.isArray(cursor) ? cursor[segment] : cursor[segment];
608
+ }
609
+ const last = path[path.length - 1];
610
+ if (Array.isArray(cursor)) {
611
+ cursor[last] = value;
612
+ } else {
613
+ cursor[last] = value;
614
+ }
615
+ }
616
+
512
617
  // src/codex-tool-search.ts
513
618
  import { toolSearchTool } from "@openai/agents";
514
619
  var CODEX_APPS_TOOL_PREFIX = "codex_apps__";
@@ -685,13 +790,22 @@ function installCodexToolSearch(agent, connectorNamespaces = NO_NAMESPACES) {
685
790
 
686
791
  // src/context-compaction.ts
687
792
  var COMPACTION_SUMMARY_MARKER = "opengeni_context_summary";
688
- var SUMMARY_PREFIX = [
689
- "[CONTEXT CHECKPOINT] The earlier part of this conversation was automatically compacted to stay within the model context window.",
690
- "Durable facts already live in the workspace notebook / document bases (via MCP) \u2014 the summary below is a light working-memory bridge, not a full transcript.",
691
- "Trust it for current objective, decisions, blockers, deployed/infra state, and next steps; re-read the notebook for anything authoritative.",
793
+ var SUMMARY_BUFFER_TOKENS = 2e4;
794
+ var COMPACT_USER_MESSAGE_MAX_TOKENS = 2e4;
795
+ var CLIENT_COMPACTION_TRIGGER_FRACTION = 0.9;
796
+ var COMPACTION_PROMPT = [
797
+ "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.",
798
+ "",
799
+ "Include:",
800
+ "- Current progress and key decisions made",
801
+ "- Important context, constraints, or user preferences",
802
+ "- What remains to be done (clear next steps)",
803
+ "- Any critical data, examples, or references needed to continue",
692
804
  "",
693
- "SUMMARY:"
805
+ "Be concise, structured, and focused on helping the next LLM seamlessly continue the work."
694
806
  ].join("\n");
807
+ var SUMMARY_PREFIX = "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:";
808
+ var USER_MESSAGE_TRUNCATION_MARKER = "\n[... middle truncated for context compaction ...]\n";
695
809
  var RESULT_TYPE_BY_CALL_TYPE2 = {
696
810
  function_call: "function_call_result",
697
811
  computer_call: "computer_call_result",
@@ -735,6 +849,56 @@ function estimateTokens(items) {
735
849
  }
736
850
  return total;
737
851
  }
852
+ function clientCompactionThresholdTokens(input) {
853
+ const available = Math.max(
854
+ 0,
855
+ input.contextWindowTokens - input.contextReservedOutputTokens - SUMMARY_BUFFER_TOKENS
856
+ );
857
+ return Math.floor(available * CLIENT_COMPACTION_TRIGGER_FRACTION);
858
+ }
859
+ function decideClientCompaction(input) {
860
+ const thresholdTokens = clientCompactionThresholdTokens(input);
861
+ const recorded = typeof input.lastInputTokens === "number" && input.lastInputTokens > 0 ? input.lastInputTokens : 0;
862
+ const signalTokens = recorded > 0 ? recorded : estimateTokens(input.items);
863
+ if (input.items.length === 0) {
864
+ return { shouldCompact: false, reason: "no_history", signalTokens, thresholdTokens };
865
+ }
866
+ if (input.force) {
867
+ return { shouldCompact: true, reason: "force", signalTokens, thresholdTokens };
868
+ }
869
+ if (signalTokens > thresholdTokens) {
870
+ return { shouldCompact: true, reason: "above_threshold", signalTokens, thresholdTokens };
871
+ }
872
+ return { shouldCompact: false, reason: "below_threshold", signalTokens, thresholdTokens };
873
+ }
874
+ var CompactionNeededError = class extends Error {
875
+ signalTokens;
876
+ thresholdTokens;
877
+ signalSource;
878
+ constructor(input) {
879
+ super(
880
+ `Context compaction needed: signal ${input.signalTokens} tokens exceeded threshold ${input.thresholdTokens}`
881
+ );
882
+ this.name = "CompactionNeededError";
883
+ this.signalTokens = input.signalTokens;
884
+ this.thresholdTokens = input.thresholdTokens;
885
+ this.signalSource = input.signalSource;
886
+ }
887
+ };
888
+ function findCompactionNeededError(error, seen = /* @__PURE__ */ new WeakSet()) {
889
+ if (error instanceof CompactionNeededError) {
890
+ return error;
891
+ }
892
+ if (!error || typeof error !== "object") {
893
+ return null;
894
+ }
895
+ if (seen.has(error)) {
896
+ return null;
897
+ }
898
+ seen.add(error);
899
+ const record = error;
900
+ return findCompactionNeededError(record.cause, seen) ?? findCompactionNeededError(record.error, seen);
901
+ }
738
902
  function findKeepBoundary(items, keepRecentTokens) {
739
903
  const boundaries = [];
740
904
  for (let i = 0; i < items.length; i += 1) {
@@ -771,150 +935,116 @@ function enforceInputBudget(items, maxTokens, trailingTokens = 0) {
771
935
  estimatedTokens: estimateTokens(kept) + Math.max(0, trailingTokens)
772
936
  };
773
937
  }
774
- function planCompaction(input) {
775
- const softLimit = Math.floor(input.inputBudgetTokens * input.softFraction);
776
- const hardLimit = Math.floor(input.inputBudgetTokens * input.hardFraction);
777
- const recorded = typeof input.lastInputTokens === "number" && input.lastInputTokens > 0 ? input.lastInputTokens : 0;
778
- const signalTokens = Math.max(recorded, estimateTokens(input.items));
779
- const hardForced = signalTokens >= hardLimit;
780
- const empty = {
781
- shouldCompact: false,
782
- reason: "below_threshold",
783
- signalTokens,
784
- hardForced,
785
- boundaryIndex: input.items.length,
786
- prefixItems: [],
787
- priorSummaryItem: null,
788
- tailItems: [...input.items]
789
- };
790
- if (!input.force && signalTokens < softLimit) {
791
- return empty;
792
- }
793
- const effectiveKeepRecent = hardForced ? Math.min(
794
- Math.floor(input.keepRecentTokens / 2),
795
- Math.floor(input.inputBudgetTokens / 4)
796
- ) : input.keepRecentTokens;
797
- const boundaryIndex = findKeepBoundary(input.items, effectiveKeepRecent);
798
- if (boundaryIndex <= 0) {
799
- return { ...empty, reason: "no_boundary", boundaryIndex };
800
- }
801
- const prefix = input.items.slice(0, boundaryIndex);
802
- const tailItems = input.items.slice(boundaryIndex);
803
- let priorSummaryItem = null;
804
- const prefixItems = [];
805
- for (const item of prefix) {
806
- if (isCompactionSummary(item)) {
807
- priorSummaryItem = item;
938
+ function buildCompactionPromptInput(items) {
939
+ return [
940
+ ...items,
941
+ {
942
+ type: "message",
943
+ role: "user",
944
+ content: COMPACTION_PROMPT
945
+ }
946
+ ];
947
+ }
948
+ function buildCompactionReplacementHistory(items, summaryBody) {
949
+ const history = [];
950
+ for (const item of items) {
951
+ if (!isUserMessage(item) || isCompactionSummary(item)) {
808
952
  continue;
809
953
  }
810
- prefixItems.push(item);
811
- }
812
- if (prefixItems.length === 0) {
813
- return { ...empty, reason: "nothing_to_summarize", boundaryIndex };
954
+ history.push(compactUserMessage(item));
814
955
  }
956
+ history.push(buildSummaryItem(summaryBody));
957
+ return history;
958
+ }
959
+ function buildSummaryItem(summaryBody) {
960
+ const trimmed = summaryBody.trim();
815
961
  return {
816
- shouldCompact: true,
817
- reason: "compact",
818
- signalTokens,
819
- hardForced,
820
- boundaryIndex,
821
- prefixItems,
822
- priorSummaryItem,
823
- tailItems
962
+ type: "message",
963
+ role: "user",
964
+ content: `${SUMMARY_PREFIX}
965
+ ${trimmed}`,
966
+ [COMPACTION_SUMMARY_MARKER]: true
824
967
  };
825
968
  }
826
- function compactionSummaryText(item) {
827
- if (!item) {
828
- return "";
969
+ function compactUserMessage(item) {
970
+ const text = messageText(item);
971
+ const next = { ...item };
972
+ if (estimatedTextTokens(text) > COMPACT_USER_MESSAGE_MAX_TOKENS) {
973
+ next.content = truncateMiddleByEstimatedTokens(text, COMPACT_USER_MESSAGE_MAX_TOKENS);
974
+ return next;
975
+ }
976
+ next.content = contentWithoutImages(item);
977
+ return next;
978
+ }
979
+ function estimatedTextTokens(text) {
980
+ return Math.ceil(text.length / 4);
981
+ }
982
+ function truncateMiddleByEstimatedTokens(text, maxTokens) {
983
+ const maxChars = Math.max(0, maxTokens * 4);
984
+ if (text.length <= maxChars) {
985
+ return text;
986
+ }
987
+ if (maxChars <= USER_MESSAGE_TRUNCATION_MARKER.length) {
988
+ return USER_MESSAGE_TRUNCATION_MARKER.slice(0, maxChars);
829
989
  }
990
+ const keepChars = maxChars - USER_MESSAGE_TRUNCATION_MARKER.length;
991
+ const headChars = Math.ceil(keepChars / 2);
992
+ const tailChars = Math.floor(keepChars / 2);
993
+ return `${text.slice(0, headChars)}${USER_MESSAGE_TRUNCATION_MARKER}${text.slice(text.length - tailChars)}`;
994
+ }
995
+ function contentWithoutImages(item) {
996
+ const content = item.content;
997
+ if (!Array.isArray(content)) {
998
+ return content;
999
+ }
1000
+ return content.filter((part) => {
1001
+ if (!part || typeof part !== "object") {
1002
+ return true;
1003
+ }
1004
+ const type = part.type;
1005
+ return type !== "input_image" && type !== "image_url";
1006
+ });
1007
+ }
1008
+ function messageText(item) {
830
1009
  const content = item.content;
831
1010
  if (typeof content === "string") {
832
- return stripSummaryPrefix(content);
1011
+ return content;
833
1012
  }
834
1013
  if (Array.isArray(content)) {
835
- const text = content.map((part) => {
1014
+ return content.map((part) => {
836
1015
  if (part && typeof part === "object") {
837
- const t = part.text;
838
- return typeof t === "string" ? t : "";
1016
+ const record = part;
1017
+ if (typeof record.text === "string") {
1018
+ return record.text;
1019
+ }
1020
+ if (typeof record.content === "string") {
1021
+ return record.content;
1022
+ }
839
1023
  }
840
1024
  return "";
841
1025
  }).join("");
842
- return stripSummaryPrefix(text);
843
1026
  }
844
1027
  return "";
845
1028
  }
846
- function stripSummaryPrefix(text) {
847
- const marker = "SUMMARY:";
848
- const idx = text.indexOf(marker);
849
- return idx >= 0 ? text.slice(idx + marker.length) : text;
850
- }
851
- function buildSummaryItem(summaryBody) {
852
- return {
853
- type: "message",
854
- role: "user",
855
- content: `${SUMMARY_PREFIX}${summaryBody}`,
856
- [COMPACTION_SUMMARY_MARKER]: true
857
- };
858
- }
859
- var SUMMARY_INSTRUCTIONS = [
860
- "You are compacting the earlier part of a long-running agent conversation into a compact working-memory checkpoint so the agent can continue past the model's context limit.",
861
- "Durable facts already live in the workspace notebook and document bases (via MCP). Do NOT re-derive or copy those; summarize POINTERS, not contents.",
862
- "Capture, concisely and factually:",
863
- "- The current objective and the key decisions made so far.",
864
- "- Open blockers and anything in-progress.",
865
- "- Deployed / infrastructure state that has changed (what exists now).",
866
- "- Environment and credential facts BY REFERENCE ONLY \u2014 name the env var keys, secret names, or notebook/document ids; NEVER copy a secret value, token, key, or password.",
867
- "- Concrete next steps.",
868
- "Say explicitly that durable facts are in the notebook and that this summary lists pointers, not contents.",
869
- "Output only the summary body \u2014 no preamble, no markdown headers, plain prose or terse bullets."
870
- ].join("\n");
871
- function renderPrefixTranscript(items, priorSummaryText) {
872
- const lines = [];
873
- if (priorSummaryText.trim().length > 0) {
874
- lines.push("PRIOR CHECKPOINT SUMMARY (fold this forward; it already replaced even older history):");
875
- lines.push(priorSummaryText.trim());
876
- lines.push("");
877
- lines.push("CONVERSATION SINCE THAT CHECKPOINT:");
878
- } else {
879
- lines.push("CONVERSATION TO SUMMARIZE:");
880
- }
881
- for (const item of items) {
882
- lines.push(renderItem(item));
883
- }
884
- return lines.join("\n");
1029
+ function renderCompactionPromptInputForChat(input) {
1030
+ return input.map(renderItem).join("\n");
885
1031
  }
886
1032
  function renderItem(item) {
887
1033
  const type = itemType2(item) ?? "unknown";
888
1034
  if (type === "message") {
889
1035
  const role = itemRole(item) ?? "assistant";
890
- return `[${role}] ${truncate(messageText(item), 4e3)}`;
1036
+ return `[${role}] ${truncateForTranscript(messageText(item), 4e3)}`;
891
1037
  }
892
1038
  if (type === "reasoning") {
893
1039
  return "[reasoning] (omitted)";
894
1040
  }
895
1041
  if (RESULT_TYPES2.has(type)) {
896
- return `[tool_result] ${truncate(resultText(item), 2e3)}`;
1042
+ return `[tool_result] ${truncateForTranscript(resultText(item), 2e3)}`;
897
1043
  }
898
1044
  if (RESULT_TYPE_BY_CALL_TYPE2[type]) {
899
- return `[tool_call ${type}] ${truncate(callText(item), 1e3)}`;
1045
+ return `[tool_call ${type}] ${truncateForTranscript(callText(item), 1e3)}`;
900
1046
  }
901
- return `[${type}] ${truncate(safeStringify(item), 1e3)}`;
902
- }
903
- function messageText(item) {
904
- const content = item.content;
905
- if (typeof content === "string") {
906
- return content;
907
- }
908
- if (Array.isArray(content)) {
909
- return content.map((part) => {
910
- if (part && typeof part === "object") {
911
- const t = part.text;
912
- return typeof t === "string" ? t : "";
913
- }
914
- return "";
915
- }).join("");
916
- }
917
- return "";
1047
+ return `[${type}] ${truncateForTranscript(safeStringify(item), 1e3)}`;
918
1048
  }
919
1049
  function resultText(item) {
920
1050
  const output = item.output;
@@ -937,18 +1067,11 @@ function safeStringify(value) {
937
1067
  return String(value);
938
1068
  }
939
1069
  }
940
- function truncate(text, max) {
1070
+ function truncateForTranscript(text, max) {
941
1071
  if (text.length <= max) {
942
1072
  return text;
943
1073
  }
944
- return `${text.slice(0, max)}\u2026 (${text.length - max} more chars)`;
945
- }
946
- function buildCompactionMessages(plan) {
947
- const priorText = compactionSummaryText(plan.priorSummaryItem);
948
- return {
949
- system: SUMMARY_INSTRUCTIONS,
950
- user: renderPrefixTranscript(plan.prefixItems, priorText)
951
- };
1074
+ return `${text.slice(0, max)}... (${text.length - max} more chars)`;
952
1075
  }
953
1076
 
954
1077
  // src/sandbox-computer.ts
@@ -966,8 +1089,8 @@ var DEFAULT_DIMENSIONS = [1280, 800];
966
1089
  var ACTION_YIELD_MS = 15e3;
967
1090
  var SCROLL_NOTCH_PIXELS = 100;
968
1091
  var SCROLL_MAX_CLICKS = 15;
969
- var SCREENSHOT_MAX_ATTEMPTS = 3;
970
- var SCREENSHOT_RETRY_DELAY_MS = 400;
1092
+ var SCREENSHOT_WARMUP_BUDGET_MS = 3e4;
1093
+ var SCREENSHOT_RETRY_DELAY_MS = 750;
971
1094
  var KEYSYM = {
972
1095
  ctrl: "ctrl",
973
1096
  control: "ctrl",
@@ -1036,6 +1159,8 @@ var SandboxComputer = class {
1036
1159
  typeDelayMs;
1037
1160
  readOnly;
1038
1161
  tmp;
1162
+ screenshotWarmupBudgetMs;
1163
+ screenshotRetryDelayMs;
1039
1164
  constructor(session, opts = {}) {
1040
1165
  this.session = session;
1041
1166
  this.display = opts.display ?? DEFAULT_DISPLAY;
@@ -1046,6 +1171,8 @@ var SandboxComputer = class {
1046
1171
  this.typeDelayMs = opts.typeDelayMs ?? 12;
1047
1172
  this.readOnly = opts.readOnly ?? false;
1048
1173
  this.tmp = opts.screenshotTmpDir ?? "/tmp";
1174
+ this.screenshotWarmupBudgetMs = opts.screenshotWarmupBudgetMs ?? SCREENSHOT_WARMUP_BUDGET_MS;
1175
+ this.screenshotRetryDelayMs = opts.screenshotRetryDelayMs ?? SCREENSHOT_RETRY_DELAY_MS;
1049
1176
  }
1050
1177
  /** Rebind to a freshly resumed-by-id session after a box rollover / re-establish. */
1051
1178
  rebind(session) {
@@ -1090,10 +1217,13 @@ var SandboxComputer = class {
1090
1217
  }
1091
1218
  async screenshot() {
1092
1219
  let lastError;
1093
- for (let attempt = 0; attempt < SCREENSHOT_MAX_ATTEMPTS; attempt++) {
1220
+ const deadline = Date.now() + this.screenshotWarmupBudgetMs;
1221
+ let attempt = 0;
1222
+ while (true) {
1094
1223
  if (attempt > 0) {
1095
- await new Promise((r) => setTimeout(r, SCREENSHOT_RETRY_DELAY_MS));
1224
+ await new Promise((r) => setTimeout(r, this.screenshotRetryDelayMs));
1096
1225
  }
1226
+ attempt++;
1097
1227
  const f = `${this.tmp}/og-shot-${Date.now()}-${Math.random().toString(36).slice(2)}.png`;
1098
1228
  try {
1099
1229
  await this.x(`scrot --pointer --overwrite ${f}`);
@@ -1107,6 +1237,9 @@ var SandboxComputer = class {
1107
1237
  } finally {
1108
1238
  await this.x(`rm -f ${f}`).catch(() => void 0);
1109
1239
  }
1240
+ if (Date.now() + this.screenshotRetryDelayMs >= deadline) {
1241
+ break;
1242
+ }
1110
1243
  }
1111
1244
  if (lastError instanceof Error) {
1112
1245
  throw lastError;
@@ -1200,6 +1333,15 @@ var NativeDesktopComputer = class {
1200
1333
  dimensions;
1201
1334
  session;
1202
1335
  readOnly;
1336
+ // The ENCODED vs NATIVE geometry of the MOST RECENT screenshot the model saw. The
1337
+ // model computes click coordinates in the encoded-pixel space of that screenshot;
1338
+ // when the agent downscaled the PNG to fit the transport budget, encoded < native,
1339
+ // so we scale coordinates back up to native pixels before injecting (the agent's
1340
+ // native inject — macOS CGEvent / Linux XTEST — expects native-pixel coordinates,
1341
+ // exactly as it received them pre-downscale). Null until the first screenshot;
1342
+ // equal encoded==native (or absent) ⇒ scale factor 1.0 ⇒ byte-identical behavior.
1343
+ lastEncoded = null;
1344
+ lastNative = null;
1203
1345
  constructor(session, opts = {}) {
1204
1346
  this.session = session;
1205
1347
  this.dimensions = opts.dimensions ?? DEFAULT_DIMENSIONS;
@@ -1213,14 +1355,33 @@ var NativeDesktopComputer = class {
1213
1355
  guardWrite() {
1214
1356
  if (this.readOnly) throw new ComputerReadOnlyError();
1215
1357
  }
1358
+ /** Scale a coordinate the model expressed in the MOST RECENT screenshot's
1359
+ * ENCODED pixel space back to NATIVE pixels. When the last frame was not
1360
+ * downscaled (encoded == native), or no screenshot has been taken yet, this is a
1361
+ * 1:1 identity — the byte-identical current behavior. The agent then applies its
1362
+ * own platform mapping (macOS divides native pixels by the backing scale to reach
1363
+ * CGEvent points; Linux XTEST is 1:1) exactly as it did pre-downscale. */
1364
+ toNative(x, y) {
1365
+ const enc = this.lastEncoded;
1366
+ const nat = this.lastNative;
1367
+ if (!enc || !nat || enc[0] <= 0 || enc[1] <= 0) return { x, y };
1368
+ if (enc[0] === nat[0] && enc[1] === nat[1]) return { x, y };
1369
+ return {
1370
+ x: Math.round(x * nat[0] / enc[0]),
1371
+ y: Math.round(y * nat[1] / enc[1])
1372
+ };
1373
+ }
1216
1374
  async pointer(x, y, action, button) {
1217
- await this.session.desktopInput({ $case: "pointer", pointer: { x, y, action, button } });
1375
+ const n = this.toNative(x, y);
1376
+ await this.session.desktopInput({ $case: "pointer", pointer: { x: n.x, y: n.y, action, button } });
1218
1377
  }
1219
1378
  async screenshot() {
1220
- const { png } = await this.session.screenshot();
1379
+ const { png, width, height, nativeWidth, nativeHeight } = await this.session.screenshot();
1221
1380
  if (png.length === 0) {
1222
1381
  throw new ComputerUnavailableError("native desktop screenshot returned an empty frame (display not up?)");
1223
1382
  }
1383
+ this.lastEncoded = [width, height];
1384
+ this.lastNative = [nativeWidth || width, nativeHeight || height];
1224
1385
  return Buffer.from(png).toString("base64");
1225
1386
  }
1226
1387
  async click(x, y, button) {
@@ -1237,7 +1398,8 @@ var NativeDesktopComputer = class {
1237
1398
  }
1238
1399
  async scroll(x, y, sx, sy) {
1239
1400
  this.guardWrite();
1240
- await this.session.desktopInput({ $case: "scroll", scroll: { x, y, deltaX: sx, deltaY: sy } });
1401
+ const n = this.toNative(x, y);
1402
+ await this.session.desktopInput({ $case: "scroll", scroll: { x: n.x, y: n.y, deltaX: sx, deltaY: sy } });
1241
1403
  }
1242
1404
  async type(text) {
1243
1405
  this.guardWrite();
@@ -1596,7 +1758,7 @@ var MultiProviderModelProvider = class {
1596
1758
  fallback;
1597
1759
  async getModel(modelName) {
1598
1760
  if (modelName) {
1599
- const resolved = resolveTurnModel(this.settings, modelName);
1761
+ const resolved = resolveTurnModel(settingsForRunScopedModelResolution(this.settings, modelName), modelName);
1600
1762
  if (resolved) {
1601
1763
  if (modelName.startsWith(CODEX_MODEL_ID_PREFIX) && resolved.provider.kind !== "codex-subscription") {
1602
1764
  throw new CodexSubscriptionUnavailableError(modelName);
@@ -1611,6 +1773,20 @@ var MultiProviderModelProvider = class {
1611
1773
  return this.fallback.getModel(modelName);
1612
1774
  }
1613
1775
  };
1776
+ function settingsForRunScopedModelResolution(settings, modelName) {
1777
+ if (modelName !== settings.openaiModel) {
1778
+ return settings;
1779
+ }
1780
+ const builtinAllowed = splitOpenaiAllowedModels(settings.openaiAllowedModels);
1781
+ const fallbackBuiltin = builtinAllowed.find((id) => id !== modelName);
1782
+ if (!fallbackBuiltin) {
1783
+ return settings;
1784
+ }
1785
+ return builtinAllowed.includes(modelName) ? settings : { ...settings, openaiModel: fallbackBuiltin };
1786
+ }
1787
+ function splitOpenaiAllowedModels(value) {
1788
+ return value.split(",").map((item) => item.trim()).filter(Boolean);
1789
+ }
1614
1790
  var CodexSubscriptionUnavailableError = class extends Error {
1615
1791
  constructor(modelName) {
1616
1792
  super(
@@ -1635,20 +1811,17 @@ function configureOpenAI(settings) {
1635
1811
  }
1636
1812
  setDefaultModelProvider(router);
1637
1813
  }
1638
- async function summarizeForCompaction(settings, messages, options = {}) {
1814
+ async function summarizeForCompaction(settings, input, options = {}) {
1639
1815
  const client = options.client ?? buildOpenAIClientFromSettings(settings);
1640
1816
  const api = options.api ?? "responses";
1641
1817
  const model = options.model ?? settings.openaiModel;
1642
- const maxTokens = options.maxOutputTokens ?? settings.contextSummaryMaxTokens;
1818
+ const maxTokens = options.maxOutputTokens ?? SUMMARY_BUFFER_TOKENS;
1643
1819
  try {
1644
1820
  if (api === "chat") {
1645
1821
  const completion = await client.chat.completions.create({
1646
1822
  model,
1647
1823
  max_tokens: maxTokens,
1648
- messages: [
1649
- { role: "system", content: messages.system },
1650
- { role: "user", content: messages.user }
1651
- ]
1824
+ messages: [{ role: "user", content: renderCompactionPromptInputForChat(input) }]
1652
1825
  });
1653
1826
  const text2 = completion.choices?.[0]?.message?.content;
1654
1827
  const trimmed2 = typeof text2 === "string" ? text2.trim() : "";
@@ -1661,10 +1834,7 @@ async function summarizeForCompaction(settings, messages, options = {}) {
1661
1834
  // built-in path (api "responses"), so gate it on the built-in provider.
1662
1835
  ...settings.openaiProvider === "azure" ? {} : { store: false },
1663
1836
  max_output_tokens: maxTokens,
1664
- input: [
1665
- { role: "system", content: messages.system },
1666
- { role: "user", content: messages.user }
1667
- ]
1837
+ input
1668
1838
  });
1669
1839
  const text = extractResponseOutputText(response);
1670
1840
  const trimmed = text.trim();
@@ -1736,6 +1906,13 @@ function composeAgentInstructions(template, workspaceEnvironment) {
1736
1906
  }
1737
1907
  return core ? `${template} ${core}` : template;
1738
1908
  }
1909
+ function appendSessionInstructions(composed, sessionInstructions) {
1910
+ const trimmed = sessionInstructions?.trim();
1911
+ return trimmed ? `${composed} ${trimmed}` : composed;
1912
+ }
1913
+ function appendGenesisTitleDirective(instructions, genesisTitleHint) {
1914
+ return genesisTitleHint ? `${instructions} ${GENESIS_TITLE_DIRECTIVE}` : instructions;
1915
+ }
1739
1916
  var agentFileDownloads = /* @__PURE__ */ new WeakMap();
1740
1917
  var agentRepositoryCloneHooks = /* @__PURE__ */ new WeakMap();
1741
1918
  var agentGitTokenSeed = /* @__PURE__ */ new WeakMap();
@@ -1757,7 +1934,21 @@ function buildOpenGeniAgent(settings, resources, options = {}) {
1757
1934
  // ownership + workspace-environment block) at the {{core}} marker, or
1758
1935
  // appends it when the template omits the marker. With the default template
1759
1936
  // and no environment this is byte-identical to the historical preamble.
1760
- instructions: options.genesisTitleHint ? `${composeAgentInstructions(options.instructionsTemplate ?? settings.agentInstructionsTemplate, options.workspaceEnvironment)} ${GENESIS_TITLE_DIRECTIVE}` : composeAgentInstructions(options.instructionsTemplate ?? settings.agentInstructionsTemplate, options.workspaceEnvironment),
1937
+ // Persona composition order (all one system-level instructions string):
1938
+ // 1. workspace instructionsTemplate (or deployment default) with the
1939
+ // non-bypassable CORE substituted at {{core}} — composeAgentInstructions,
1940
+ // 2. + the per-session persona instructions (session-specific, LAST so it
1941
+ // refines the workspace persona),
1942
+ // 3. + the one-shot genesis title directive (genesis turn only).
1943
+ // With no session instructions and no genesis hint this is byte-identical to
1944
+ // the historical composed instructions.
1945
+ instructions: appendGenesisTitleDirective(
1946
+ appendSessionInstructions(
1947
+ composeAgentInstructions(options.instructionsTemplate ?? settings.agentInstructionsTemplate, options.workspaceEnvironment),
1948
+ options.sessionInstructions
1949
+ ),
1950
+ options.genesisTitleHint
1951
+ ),
1761
1952
  modelSettings: {
1762
1953
  reasoning: { effort: options.reasoningEffort ?? settings.openaiReasoningEffort, summary: "detailed" },
1763
1954
  // Server-side compaction (OpenAI platform) requires store=false: the
@@ -2233,6 +2424,52 @@ var normalizeComputerCallsFilter = ({ modelData }) => ({
2233
2424
  modelData.input
2234
2425
  )
2235
2426
  });
2427
+ function contextRobustnessFilterForSettings(settings, options = {}) {
2428
+ const inputBudgetTokens = modelCallBudgetTokens(settings);
2429
+ const clientCompactionMode = resolveContextCompactionMode(settings) === "client";
2430
+ const compactionThresholdTokens = clientCompactionThresholdTokens(settings);
2431
+ return ({ modelData }) => {
2432
+ const images = elideStaleScreenshotImages(modelData.input);
2433
+ if (images.elidedCount > 0) {
2434
+ console.warn(
2435
+ `per-call image history policy elided ${images.elidedCount} older screenshot image(s), keeping the last ${Math.min(3, images.imageCount)} full image(s)`
2436
+ );
2437
+ }
2438
+ let input = images.items;
2439
+ if (inputBudgetTokens !== void 0) {
2440
+ const guarded = enforceInputBudget(
2441
+ input,
2442
+ inputBudgetTokens
2443
+ );
2444
+ if (guarded.trimmed) {
2445
+ console.warn(
2446
+ `per-call budget guard trimmed ${guarded.droppedCount} oldest history item(s) to fit input budget (${inputBudgetTokens} tokens); the over-budget model call was NOT sent`
2447
+ );
2448
+ input = guarded.items;
2449
+ }
2450
+ }
2451
+ if (clientCompactionMode && options.throwOnCompactionNeeded) {
2452
+ const reported = options.contextCompactionSignalTokens?.();
2453
+ const hasReported = typeof reported === "number" && reported > 0;
2454
+ const signalTokens = hasReported ? reported : estimateTokens(input);
2455
+ if (signalTokens > compactionThresholdTokens) {
2456
+ throw new CompactionNeededError({
2457
+ signalTokens,
2458
+ thresholdTokens: compactionThresholdTokens,
2459
+ signalSource: hasReported ? "provider" : "estimate"
2460
+ });
2461
+ }
2462
+ }
2463
+ return { ...modelData, input };
2464
+ };
2465
+ }
2466
+ function modelCallBudgetTokens(settings) {
2467
+ if (resolveContextCompactionMode(settings) !== "client") {
2468
+ return void 0;
2469
+ }
2470
+ const budget = contextInputBudgetTokens(settings);
2471
+ return budget > 0 ? budget : void 0;
2472
+ }
2236
2473
  function composeCallModelInputFilters(filters) {
2237
2474
  return async (args) => {
2238
2475
  let modelData = args.modelData;
@@ -2242,11 +2479,12 @@ function composeCallModelInputFilters(filters) {
2242
2479
  return modelData;
2243
2480
  };
2244
2481
  }
2245
- function callModelInputFilterForSettings(settings) {
2482
+ function callModelInputFilterForSettings(settings, options = {}) {
2246
2483
  const filters = [normalizeComputerCallsFilter];
2247
2484
  if (settings.openaiProviderItemIds === "strip") {
2248
2485
  filters.push(stripProviderItemIdsFilter);
2249
2486
  }
2487
+ filters.push(contextRobustnessFilterForSettings(settings, options));
2250
2488
  return composeCallModelInputFilters(filters);
2251
2489
  }
2252
2490
  async function runAgentStream(agent, input, settings, overrides = {}) {
@@ -2283,7 +2521,13 @@ async function runAgentStream(agent, input, settings, overrides = {}) {
2283
2521
  }
2284
2522
  const decoratedClient = withSandboxLifecycleHooks(resourceClient2, ownedHooks, ownedHookContext);
2285
2523
  const ownedFilter = composeCallModelInputFilters(
2286
- [callModelInputFilterForSettings(settings), overrides.callModelInputFilter].filter(
2524
+ [
2525
+ callModelInputFilterForSettings(settings, {
2526
+ throwOnCompactionNeeded: Boolean(overrides.contextCompactionSignalTokens),
2527
+ ...overrides.contextCompactionSignalTokens ? { contextCompactionSignalTokens: overrides.contextCompactionSignalTokens } : {}
2528
+ }),
2529
+ overrides.callModelInputFilter
2530
+ ].filter(
2287
2531
  (f) => Boolean(f)
2288
2532
  )
2289
2533
  );
@@ -2319,18 +2563,23 @@ async function runAgentStream(agent, input, settings, overrides = {}) {
2319
2563
  }) : void 0;
2320
2564
  const sandboxSessionState = prepared.sandboxSessionState ?? (prepared.serializedRunStateForSandbox && client ? await restoredSandboxSessionState(await RunState.fromString(agent, prepared.serializedRunStateForSandbox), client) : void 0);
2321
2565
  const callModelInputFilter = composeCallModelInputFilters(
2322
- [callModelInputFilterForSettings(settings), overrides.callModelInputFilter].filter(
2566
+ [
2567
+ callModelInputFilterForSettings(settings, {
2568
+ throwOnCompactionNeeded: Boolean(overrides.contextCompactionSignalTokens),
2569
+ ...overrides.contextCompactionSignalTokens ? { contextCompactionSignalTokens: overrides.contextCompactionSignalTokens } : {}
2570
+ }),
2571
+ overrides.callModelInputFilter
2572
+ ].filter(
2323
2573
  (f) => Boolean(f)
2324
2574
  )
2325
2575
  );
2326
2576
  const runOptions = {
2327
2577
  stream: true,
2328
2578
  maxTurns: settings.agentMaxModelCallsPerTurn,
2329
- // Strip provider-assigned item ids from every model call (turn-start
2330
- // history replay AND mid-turn follow-ups) so requests never depend on the
2331
- // provider's server-side response store. A stored response can vanish
2332
- // between two calls of the same turn, failing the run with 400 "Item with
2333
- // id 'rs_…' not found"; with the ids gone the request is self-contained.
2579
+ // Built-in per-call guard chain: normalize computer calls, optionally strip
2580
+ // provider ids, elide stale screenshots in every mode, and trim to the input
2581
+ // budget on the client-compaction path. This runs for turn-start replay AND
2582
+ // every mid-turn follow-up.
2334
2583
  callModelInputFilter
2335
2584
  };
2336
2585
  void settings.disableOpenaiTracing;
@@ -3503,12 +3752,16 @@ function approvalIdentifier(item) {
3503
3752
  export {
3504
3753
  ActiveBackendUnresolvableError,
3505
3754
  CAPABILITY_DESCRIPTORS,
3755
+ CLIENT_COMPACTION_TRIGGER_FRACTION,
3756
+ COMPACTION_PROMPT,
3506
3757
  COMPACTION_SUMMARY_MARKER,
3758
+ COMPACT_USER_MESSAGE_MAX_TOKENS,
3507
3759
  ChannelAConflictError,
3508
3760
  ChannelANotFoundError,
3509
3761
  ChannelAUnsupportedError,
3510
3762
  ChannelAValidationError,
3511
3763
  CodexSubscriptionUnavailableError,
3764
+ CompactionNeededError,
3512
3765
  ComputerActionError,
3513
3766
  ComputerReadOnlyError,
3514
3767
  ComputerUnavailableError,
@@ -3530,12 +3783,13 @@ export {
3530
3783
  RecordingUnavailableError,
3531
3784
  RoutingSandboxSession,
3532
3785
  RoutingUnsupportedError,
3786
+ SCREENSHOT_OMITTED_PLACEHOLDER,
3533
3787
  SELFHOSTED_DEFAULT_TIMEOUT_MS,
3534
3788
  SELFHOSTED_RECONNECT_WINDOW_MS,
3535
3789
  SELFHOSTED_RELAY_STREAM_PATH,
3536
3790
  STREAM_PORT,
3537
3791
  STREAM_TOKEN_DEFAULT_TTL_SECONDS,
3538
- SUMMARY_INSTRUCTIONS,
3792
+ SUMMARY_BUFFER_TOKENS,
3539
3793
  SUMMARY_PREFIX,
3540
3794
  SandboxChannelAService,
3541
3795
  SandboxComputer,
@@ -3550,8 +3804,11 @@ export {
3550
3804
  TERMINAL_STREAM_PORT,
3551
3805
  TerminalServerError,
3552
3806
  TerminalServerUnsupportedError,
3807
+ USER_MESSAGE_TRUNCATION_MARKER,
3553
3808
  agentErrorToControlError,
3554
3809
  agentsErrorRunState,
3810
+ appendGenesisTitleDirective,
3811
+ appendSessionInstructions,
3555
3812
  applyMissingManifestEntries,
3556
3813
  assertDescriptorRegistryInvariants,
3557
3814
  assertProviderRegistryInvariants,
@@ -3560,7 +3817,8 @@ export {
3560
3817
  azureOpenAIDefaultQuery,
3561
3818
  backendSupportsOs,
3562
3819
  buildAgentCapabilities,
3563
- buildCompactionMessages,
3820
+ buildCompactionPromptInput,
3821
+ buildCompactionReplacementHistory,
3564
3822
  buildDisplayStackScript,
3565
3823
  buildManifest,
3566
3824
  buildModelInstance,
@@ -3572,21 +3830,24 @@ export {
3572
3830
  buildSummaryItem,
3573
3831
  buildTerminalServerScript,
3574
3832
  callModelInputFilterForSettings,
3833
+ clientCompactionThresholdTokens,
3575
3834
  collectSandboxEnvironment,
3576
- compactionSummaryText,
3577
3835
  composeAgentInstructions,
3578
3836
  computerUse,
3579
3837
  configureOpenAI,
3580
3838
  contentTypeForCodec,
3839
+ contextRobustnessFilterForSettings,
3581
3840
  coreInstructions,
3582
3841
  createProductionAgentRuntime,
3583
3842
  createSandboxClient,
3584
3843
  createSandboxClientForBackend,
3844
+ decideClientCompaction,
3585
3845
  decodeModalSnapshotId,
3586
3846
  deletePriorPersistedSnapshot,
3587
3847
  deleteRecordingArtifacts,
3588
3848
  deserializeSandboxSessionStateEnvelope,
3589
3849
  desktopCapableBackend,
3850
+ elideStaleScreenshotImages,
3590
3851
  enforceInputBudget,
3591
3852
  ensureDisplayStack,
3592
3853
  ensureReadableStreamFrom,
@@ -3597,6 +3858,7 @@ export {
3597
3858
  exposeStreamPort,
3598
3859
  extForCodec,
3599
3860
  extractResponseOutputText,
3861
+ findCompactionNeededError,
3600
3862
  findKeepBoundary,
3601
3863
  isCompactionSummary,
3602
3864
  isExecSessionLostBanner,
@@ -3609,6 +3871,8 @@ export {
3609
3871
  materializeSandboxFileDownloads,
3610
3872
  maxTurnsExceededRunState,
3611
3873
  mintStreamToken,
3874
+ modalSandboxAttributionEnvironment,
3875
+ modalSandboxAttributionTags,
3612
3876
  modelResponseUsageFromSdkEvent,
3613
3877
  negotiateCapabilities,
3614
3878
  negotiateSelfhostedCapabilities,
@@ -3623,14 +3887,13 @@ export {
3623
3887
  parseNumstatZ,
3624
3888
  parsePorcelainV2,
3625
3889
  parseUnifiedPatch,
3626
- planCompaction,
3627
3890
  prefixedMcpToolName,
3628
3891
  prepareAgentTools,
3629
3892
  prepareRunInput,
3630
3893
  readRecordingBytes,
3631
3894
  readWorkspaceArchiveFromEnvelopeSessionState,
3632
3895
  recordingStorageKey,
3633
- renderPrefixTranscript,
3896
+ renderCompactionPromptInputForChat,
3634
3897
  repositoryCloneCommand,
3635
3898
  repositoryUsesSandboxClone,
3636
3899
  resolveTurnModel,
@@ -3660,8 +3923,11 @@ export {
3660
3923
  stripReasoningIdentityFromSerializedRunState,
3661
3924
  subjectFor,
3662
3925
  summarizeForCompaction,
3926
+ sweepModalOrphanSandboxes,
3927
+ tagModalSandbox,
3663
3928
  tearDownDisplayStack,
3664
3929
  tearDownTerminalServer,
3930
+ terminateModalSandboxById,
3665
3931
  timeoutAgentError,
3666
3932
  timeoutControlResponse,
3667
3933
  verifyStreamToken,