@opengeni/runtime 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,149 @@
1
+ import type { AgentInputItem } from "@openai/agents";
2
+
3
+ export const SCREENSHOT_OMITTED_PLACEHOLDER =
4
+ "[screenshot omitted: an older desktop frame — the full image remains in the session event log]";
5
+
6
+ const DATA_IMAGE_BASE64_PATTERN = /data:image\/[a-z0-9.+-]+;base64,[a-z0-9+/=_-]+/i;
7
+
8
+ type PathSegment = string | number;
9
+
10
+ type ImageOccurrence = {
11
+ path: PathSegment[];
12
+ replacement: unknown;
13
+ };
14
+
15
+ export type ElideStaleScreenshotsResult<T> = {
16
+ items: T[];
17
+ imageCount: number;
18
+ elidedCount: number;
19
+ };
20
+
21
+ export type ElideStaleScreenshotsOptions = {
22
+ keepLast?: number;
23
+ placeholder?: string;
24
+ };
25
+
26
+ export function elideStaleScreenshotImages<T extends AgentInputItem>(
27
+ items: readonly T[],
28
+ options: ElideStaleScreenshotsOptions = {},
29
+ ): ElideStaleScreenshotsResult<T> {
30
+ const keepLast = Math.max(0, Math.floor(options.keepLast ?? 3));
31
+ const placeholder = options.placeholder ?? SCREENSHOT_OMITTED_PLACEHOLDER;
32
+ const occurrences: ImageOccurrence[] = [];
33
+ for (let i = 0; i < items.length; i += 1) {
34
+ collectItemImageOccurrences(items[i], [i], placeholder, occurrences);
35
+ }
36
+
37
+ const elidedCount = Math.max(0, occurrences.length - keepLast);
38
+ if (elidedCount === 0) {
39
+ return { items: items.slice(), imageCount: occurrences.length, elidedCount: 0 };
40
+ }
41
+
42
+ const cloned = structuredClone(items) as T[];
43
+ for (const occurrence of occurrences.slice(0, elidedCount)) {
44
+ setPath(cloned, occurrence.path, occurrence.replacement);
45
+ }
46
+ return { items: cloned, imageCount: occurrences.length, elidedCount };
47
+ }
48
+
49
+ function collectItemImageOccurrences(
50
+ item: unknown,
51
+ path: PathSegment[],
52
+ placeholder: string,
53
+ out: ImageOccurrence[],
54
+ ): void {
55
+ if (!isRecord(item)) {
56
+ return;
57
+ }
58
+ if (item.type === "message" && (item.role === "user" || item.role === "system")) {
59
+ return;
60
+ }
61
+ if (item.type === "computer_call_result" || item.type === "computer_call_output") {
62
+ collectComputerOutputImages(item, path, placeholder, out);
63
+ return;
64
+ }
65
+ if (item.type === "function_call_result" || item.type === "function_call_output") {
66
+ collectToolResultImages(item.output, [...path, "output"], placeholder, out);
67
+ }
68
+ }
69
+
70
+ function collectComputerOutputImages(
71
+ item: Record<string, unknown>,
72
+ path: PathSegment[],
73
+ placeholder: string,
74
+ out: ImageOccurrence[],
75
+ ): void {
76
+ const output = item.output;
77
+ if (!isRecord(output) || output.type !== "computer_screenshot") {
78
+ return;
79
+ }
80
+ for (const key of ["data", "image_url", "imageUrl"]) {
81
+ if (isImageDataUrl(output[key])) {
82
+ out.push({ path: [...path, "output", key], replacement: placeholder });
83
+ return;
84
+ }
85
+ }
86
+ }
87
+
88
+ function collectToolResultImages(
89
+ value: unknown,
90
+ path: PathSegment[],
91
+ placeholder: string,
92
+ out: ImageOccurrence[],
93
+ ): void {
94
+ if (typeof value === "string") {
95
+ if (isImageDataUrl(value)) {
96
+ out.push({ path, replacement: placeholder });
97
+ }
98
+ return;
99
+ }
100
+ if (Array.isArray(value)) {
101
+ for (let i = 0; i < value.length; i += 1) {
102
+ collectToolResultImages(value[i], [...path, i], placeholder, out);
103
+ }
104
+ return;
105
+ }
106
+ if (!isRecord(value)) {
107
+ return;
108
+ }
109
+ if (value.type === "input_image") {
110
+ for (const key of ["image", "imageUrl", "image_url"]) {
111
+ if (isImageDataUrl(value[key])) {
112
+ out.push({ path, replacement: { type: "input_text", text: placeholder } });
113
+ return;
114
+ }
115
+ }
116
+ }
117
+ for (const key of ["content", "text", "output"]) {
118
+ if (key in value) {
119
+ collectToolResultImages(value[key], [...path, key], placeholder, out);
120
+ }
121
+ }
122
+ }
123
+
124
+ function isImageDataUrl(value: unknown): value is string {
125
+ return typeof value === "string" && DATA_IMAGE_BASE64_PATTERN.test(value);
126
+ }
127
+
128
+ function isRecord(value: unknown): value is Record<string, unknown> {
129
+ return Boolean(value) && typeof value === "object" && !Array.isArray(value);
130
+ }
131
+
132
+ function setPath(root: unknown, path: PathSegment[], value: unknown): void {
133
+ if (path.length === 0) {
134
+ return;
135
+ }
136
+ let cursor = root;
137
+ for (let i = 0; i < path.length - 1; i += 1) {
138
+ const segment = path[i]!;
139
+ cursor = Array.isArray(cursor)
140
+ ? cursor[segment as number]
141
+ : (cursor as Record<string, unknown>)[segment as string];
142
+ }
143
+ const last = path[path.length - 1]!;
144
+ if (Array.isArray(cursor)) {
145
+ cursor[last as number] = value;
146
+ } else {
147
+ (cursor as Record<string, unknown>)[last as string] = value;
148
+ }
149
+ }
package/src/index.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import type { ConfiguredModel, ContextCompactionMode, ModelProviderApi, ResolvedModelProvider, Settings } from "@opengeni/config";
2
- import { AGENT_INSTRUCTIONS_CORE_PLACEHOLDER, collectSandboxEnvironment, contextServerCompactThreshold, firstPartyMcpBaseUrl, parseExposedPorts, resolveContextCompactionMode, resolveModelProvider, sandboxLifecycleHookIds } from "@opengeni/config";
2
+ import { AGENT_INSTRUCTIONS_CORE_PLACEHOLDER, collectSandboxEnvironment, contextInputBudgetTokens, contextServerCompactThreshold, firstPartyMcpBaseUrl, parseExposedPorts, resolveContextCompactionMode, resolveModelProvider, sandboxLifecycleHookIds } from "@opengeni/config";
3
3
  import { CAPABILITY_DESCRIPTORS, isClearedRunStateBlob, signDelegatedAccessToken, type Permission, type ReasoningEffort, type ResourceRef, type SessionEventType, type ToolRef } from "@opengeni/contracts";
4
4
  import {
5
5
  Agent,
@@ -82,8 +82,17 @@ import { dirname, isAbsolute, join, posix as posixPath, relative } from "node:pa
82
82
  import { fileURLToPath } from "node:url";
83
83
 
84
84
  import { computerCallNormalizingFetch, normalizeComputerCallActions, sanitizeHistoryItemsForModel } from "./history-sanitizer";
85
+ import { elideStaleScreenshotImages } from "./image-history";
85
86
  import { installCodexToolSearch } from "./codex-tool-search";
86
- import { enforceInputBudget, estimateItemTokens } from "./context-compaction";
87
+ import {
88
+ CompactionNeededError,
89
+ SUMMARY_BUFFER_TOKENS,
90
+ clientCompactionThresholdTokens,
91
+ enforceInputBudget,
92
+ estimateItemTokens,
93
+ estimateTokens,
94
+ renderCompactionPromptInputForChat,
95
+ } from "./context-compaction";
87
96
  import {
88
97
  createSandboxClient,
89
98
  deserializeSandboxSessionStateEnvelope,
@@ -91,7 +100,7 @@ import {
91
100
  restoredSandboxSessionStateFromEntry,
92
101
  setSelfhostedApplyDiff,
93
102
  } from "./sandbox";
94
- import { computerUse } from "./sandbox-computer";
103
+ import { computerUse, type ComputerToolMode } from "./sandbox-computer";
95
104
 
96
105
  // P4.3 computer-use surface (the agent's :0 driver). Re-exported from the barrel
97
106
  // so callers (the worker, live proofs) reach SandboxComputer/ComputerUseCapability
@@ -106,6 +115,7 @@ export {
106
115
  ComputerActionError,
107
116
  type SandboxComputerOptions,
108
117
  type ComputerUseArgs,
118
+ type ComputerToolMode,
109
119
  } from "./sandbox-computer";
110
120
 
111
121
  // The agent-loop-free sandbox leaf (createSandboxClient + resume/recovery
@@ -133,22 +143,34 @@ export type { HistoryItem } from "./history-sanitizer";
133
143
  export { OpenAIChatCompletionsModel, OpenAIResponsesModel } from "@openai/agents";
134
144
 
135
145
  export {
136
- planCompaction,
146
+ CompactionNeededError,
147
+ buildCompactionPromptInput,
148
+ buildCompactionReplacementHistory,
149
+ clientCompactionThresholdTokens,
150
+ decideClientCompaction,
137
151
  enforceInputBudget,
138
152
  buildSummaryItem,
139
- buildCompactionMessages,
153
+ findCompactionNeededError,
140
154
  isCompactionSummary,
141
155
  isUserMessage,
142
156
  findKeepBoundary,
143
157
  estimateTokens,
144
158
  estimateItemTokens,
145
- compactionSummaryText,
146
- renderPrefixTranscript,
159
+ renderCompactionPromptInputForChat,
147
160
  COMPACTION_SUMMARY_MARKER,
161
+ COMPACTION_PROMPT,
162
+ COMPACT_USER_MESSAGE_MAX_TOKENS,
163
+ CLIENT_COMPACTION_TRIGGER_FRACTION,
164
+ SUMMARY_BUFFER_TOKENS,
148
165
  SUMMARY_PREFIX,
149
- SUMMARY_INSTRUCTIONS,
166
+ USER_MESSAGE_TRUNCATION_MARKER,
150
167
  } from "./context-compaction";
151
- export type { CompactionItem, CompactionPlan, PlanCompactionInput } from "./context-compaction";
168
+ export type { ClientCompactionDecision, CompactionItem } from "./context-compaction";
169
+ export {
170
+ elideStaleScreenshotImages,
171
+ SCREENSHOT_OMITTED_PLACEHOLDER,
172
+ } from "./image-history";
173
+ export type { ElideStaleScreenshotsOptions, ElideStaleScreenshotsResult } from "./image-history";
152
174
 
153
175
  ensureReadableStreamFrom();
154
176
 
@@ -499,10 +521,10 @@ export function configureOpenAI(settings: Settings): void {
499
521
 
500
522
  /**
501
523
  * Run the compaction summarizer as one plain, tool-less, non-streaming model
502
- * call against the resolved provider. `system`/`user` come from
503
- * buildCompactionMessages. Returns the trimmed summary text, or null on any
524
+ * call against the resolved provider. `input` is the active history plus
525
+ * Codex's checkpoint prompt. Returns the trimmed summary text, or null on any
504
526
  * failure (the caller treats a failed summarize as "skip compaction this turn"
505
- * never fatal). The call deliberately does NOT request reasoning encryption,
527
+ * - never fatal). The call deliberately does NOT request reasoning encryption,
506
528
  * tools, or server-side compaction; it is a self-contained summarize.
507
529
  *
508
530
  * Provider-aware: the summary always runs on the SAME provider that serves the
@@ -516,22 +538,19 @@ export function configureOpenAI(settings: Settings): void {
516
538
  */
517
539
  export async function summarizeForCompaction(
518
540
  settings: Settings,
519
- messages: { system: string; user: string },
541
+ input: Array<Record<string, unknown>>,
520
542
  options: { client?: OpenAI; api?: ModelProviderApi; maxOutputTokens?: number; model?: string } = {},
521
543
  ): Promise<string | null> {
522
544
  const client = options.client ?? buildOpenAIClientFromSettings(settings);
523
545
  const api = options.api ?? "responses";
524
546
  const model = options.model ?? settings.openaiModel;
525
- const maxTokens = options.maxOutputTokens ?? settings.contextSummaryMaxTokens;
547
+ const maxTokens = options.maxOutputTokens ?? SUMMARY_BUFFER_TOKENS;
526
548
  try {
527
549
  if (api === "chat") {
528
550
  const completion = await client.chat.completions.create({
529
551
  model,
530
552
  max_tokens: maxTokens,
531
- messages: [
532
- { role: "system", content: messages.system },
533
- { role: "user", content: messages.user },
534
- ],
553
+ messages: [{ role: "user", content: renderCompactionPromptInputForChat(input) }],
535
554
  } as any);
536
555
  const text = (completion as { choices?: Array<{ message?: { content?: unknown } }> }).choices?.[0]?.message?.content;
537
556
  const trimmed = typeof text === "string" ? text.trim() : "";
@@ -544,10 +563,7 @@ export async function summarizeForCompaction(
544
563
  // built-in path (api "responses"), so gate it on the built-in provider.
545
564
  ...(settings.openaiProvider === "azure" ? {} : { store: false }),
546
565
  max_output_tokens: maxTokens,
547
- input: [
548
- { role: "system", content: messages.system },
549
- { role: "user", content: messages.user },
550
- ],
566
+ input,
551
567
  } as any);
552
568
  const text = extractResponseOutputText(response);
553
569
  const trimmed = text.trim();
@@ -643,6 +659,12 @@ export type BuildAgentOptions = {
643
659
  encryptedReasoning?: boolean;
644
660
  contextWindowTokens?: number;
645
661
  structuredToolTransport?: boolean;
662
+ // EXPLICIT computer-use tool transport, decided where provider identity is
663
+ // authoritative (the worker's model resolution — agent-turn.ts). Threaded into
664
+ // buildAgentCapabilities → computerUse({toolMode}) so tool selection never rests
665
+ // on the SDK's constructor-name sniff. When omitted, the legacy sniff +
666
+ // `structuredToolTransport` neutralize path is preserved byte-for-byte.
667
+ computerToolMode?: ComputerToolMode;
646
668
  // The LIVE, by-reference connector-namespace Set from prepareAgentTools
647
669
  // (codexConnectorNamespaces): fills during each turn's codex_apps tools/list,
648
670
  // read per model call by the codex tool_search description so the model sees
@@ -864,6 +886,7 @@ export function buildOpenGeniAgent(settings: Settings, resources: ResourceRef[],
864
886
  compactionMode,
865
887
  contextWindowTokens,
866
888
  ...(options.structuredToolTransport !== undefined ? { structuredToolTransport: options.structuredToolTransport } : {}),
889
+ ...(options.computerToolMode !== undefined ? { computerToolMode: options.computerToolMode } : {}),
867
890
  }),
868
891
  });
869
892
  agentFileDownloads.set(agent, normalizeSandboxFileDownloads(options.fileResourceDownloads ?? []).filter((download) => !download.content));
@@ -961,7 +984,16 @@ function neutralizeStructuredToolTransport(capability: ReturnType<typeof filesys
961
984
  export function buildAgentCapabilities(
962
985
  settings: Settings,
963
986
  packSkills: PackSkill[],
964
- options: { compactionMode?: ContextCompactionMode; contextWindowTokens?: number; structuredToolTransport?: boolean } = {},
987
+ options: {
988
+ compactionMode?: ContextCompactionMode;
989
+ contextWindowTokens?: number;
990
+ structuredToolTransport?: boolean;
991
+ // EXPLICIT computer-use transport (see BuildAgentOptions.computerToolMode). When
992
+ // present, computerUse() is handed the mode directly and its tools() obeys it
993
+ // without the constructor-name sniff. When absent, the legacy neutralize +
994
+ // imageFunctionResults path (driven by structuredToolTransport) is unchanged.
995
+ computerToolMode?: ComputerToolMode;
996
+ } = {},
965
997
  ): ReturnType<typeof Capabilities.default> {
966
998
  const mode = options.compactionMode ?? resolveContextCompactionMode(settings);
967
999
  const contextWindowTokens = options.contextWindowTokens ?? settings.contextWindowTokens;
@@ -996,25 +1028,37 @@ export function buildAgentCapabilities(
996
1028
  && settings.sandboxDesktopEnabled
997
1029
  && desktopCapableBackend(settings.sandboxBackend)
998
1030
  ) {
999
- // computer-use is now transport-aware, exactly like filesystem: its `tools()`
1000
- // emits the HOSTED `computer_use_preview` tool on the structured transport and a
1001
- // set of FUNCTION `computer_*` tools on the text transport. The ChatGPT/Codex
1002
- // backend rejects hosted tool types (only function/custom/web_search accepted),
1003
- // so on the codex path (structuredToolTransport === false) we neutralize the
1004
- // capability's model binding — the SAME trick used for filesystem above — so
1005
- // `tools()` sees no model instance and emits the function tools the backend can
1006
- // call, instead of suppressing the desktop tier entirely.
1031
+ // computer-use is transport-aware, exactly like filesystem: `tools()` emits the
1032
+ // HOSTED `computer_use_preview` tool on the structured transport and a set of
1033
+ // FUNCTION `computer_*` tools on the text transport. The ChatGPT/Codex backend
1034
+ // rejects hosted tool types (only function/custom/web_search accepted).
1035
+ //
1036
+ // HARDENING: when the caller declares an EXPLICIT `computerToolMode` (the worker
1037
+ // does, from its authoritative model resolution), thread it straight through
1038
+ // tool selection then never depends on the SDK's model-instance constructor-name
1039
+ // sniff (which a wrapped/proxied model would defeat, silently 400ing a
1040
+ // chat-completions provider handed the hosted tool). When ABSENT, the legacy path
1041
+ // is preserved byte-for-byte: on the codex path (structuredToolTransport === false)
1042
+ // we set imageFunctionResults and neutralize the capability's model binding — the
1043
+ // SAME trick used for filesystem above — so `tools()` sees no model instance and
1044
+ // emits the function tools the backend can call, instead of suppressing the tier.
1045
+ const explicitMode = options.computerToolMode;
1007
1046
  const computerCapability = computerUse({
1008
1047
  dimensions: [settings.streamResolutionWidth, settings.streamResolutionHeight],
1009
1048
  readOnly: settings.computerUseReadOnly,
1010
- // On the codex path the function tools deliver screenshots as a real image the
1011
- // model can see. The ChatGPT/Codex backend rejects HOSTED tool types but DOES
1012
- // accept `input_image` content items inside a `function_call_output` (proven by
1013
- // openai/codex codex-rs, whose view_image tool ships exactly that shape) so a
1014
- // structured image tool result is seen, where a text data-URL would be unreadable.
1015
- ...(options.structuredToolTransport === false ? { imageFunctionResults: true } : {}),
1049
+ ...(explicitMode
1050
+ ? { toolMode: explicitMode }
1051
+ // Legacy (no explicit mode): on the codex path the function tools deliver
1052
+ // screenshots as a real image the model can see. The ChatGPT/Codex backend
1053
+ // rejects HOSTED tool types but DOES accept `input_image` content items inside a
1054
+ // `function_call_output` (proven by openai/codex codex-rs, whose view_image tool
1055
+ // ships exactly that shape) — so a structured image tool result is seen, where a
1056
+ // text data-URL would be unreadable.
1057
+ : options.structuredToolTransport === false ? { imageFunctionResults: true } : {}),
1016
1058
  });
1017
- if (options.structuredToolTransport === false) {
1059
+ // Neutralize ONLY on the legacy sniff path. With an explicit toolMode the mode
1060
+ // already forces the function tools, so the constructor-name override is moot.
1061
+ if (!explicitMode && options.structuredToolTransport === false) {
1018
1062
  neutralizeStructuredToolTransport(computerCapability);
1019
1063
  }
1020
1064
  caps.push(computerCapability as unknown as ReturnType<typeof Capabilities.default>[number]);
@@ -1088,19 +1132,19 @@ export async function prepareAgentTools(settings: Settings, tools: ToolRef[], op
1088
1132
  // device-code login may lack the connector scopes, and the backend can
1089
1133
  // reject the bearer at the initialize/tools-list handshake, so a 401/403
1090
1134
  // (or a missing/failed token) drops the server.
1091
- // - an AUTO-ATTACHED workspace-default capability MCP (ToolRef.optional):
1092
- // the caller never explicitly requested it, so a broken/expired
1093
- // capability credential must SKIP the server with a warning, never kill
1094
- // the turn before the model runs. An EXPLICITLY-requested tool omits
1095
- // `optional` and stays strict (below), preserving the fail-loud contract.
1135
+ // - an optional ToolRef: either an auto-attached workspace-default
1136
+ // capability MCP or a client/pack-selected portable ref. A
1137
+ // broken/expired credential or unavailable endpoint skips the server
1138
+ // with a warning, never killing the turn before the model runs. Bare
1139
+ // refs stay strict (below), preserving the fail-loud default.
1096
1140
  const optional = tool.optional === true;
1097
1141
  return { server, bestEffort: isCodexAppsMcpServer(config) || optional, optional };
1098
1142
  }));
1099
1143
  const requiredServers = servers.filter((entry) => !entry.bestEffort).map((entry) => entry.server);
1100
1144
  const bestEffortServers = servers.filter((entry) => entry.bestEffort).map((entry) => entry.server);
1101
- // Names of the OPTIONAL capability servers (not codex_apps) so a drop is
1102
- // surfaced as a warning; codex_apps keeps its historically-quiet drop (a
1103
- // not-logged-in ChatGPT plan is a normal, non-noteworthy state).
1145
+ // Names of the OPTIONAL servers (not codex_apps) so a drop is surfaced as a
1146
+ // warning; codex_apps keeps its historically-quiet drop (a not-logged-in
1147
+ // ChatGPT plan is a normal, non-noteworthy state).
1104
1148
  const optionalServerNames = new Set(
1105
1149
  servers.filter((entry) => entry.optional).map((entry) => entry.server.name),
1106
1150
  );
@@ -1121,7 +1165,7 @@ export async function prepareAgentTools(settings: Settings, tools: ToolRef[], op
1121
1165
  }
1122
1166
  const error = connectedBestEffort.errors.get(failed);
1123
1167
  console.warn(
1124
- `[mcp] optional capability server "${failed.name}" failed to connect/list tools; skipping it for this turn`,
1168
+ `[mcp] optional server "${failed.name}" failed to connect/list tools; skipping it for this turn`,
1125
1169
  error instanceof Error ? error.message : error,
1126
1170
  );
1127
1171
  }
@@ -1544,6 +1588,7 @@ export type RunAgentStreamOptions = {
1544
1588
  sandboxClient?: unknown;
1545
1589
  sandboxEnvironment?: Record<string, string>;
1546
1590
  onRuntimeEvent?: (event: NormalizedRuntimeEvent) => Promise<void> | void;
1591
+ contextCompactionSignalTokens?: () => number | null | undefined;
1547
1592
  // OWNERSHIP INVERSION (P1.2): an externally-owned, already-live sandbox
1548
1593
  // session resolved by the per-turn resume-by-id path. When present,
1549
1594
  // runAgentStream does NOT build (or resume, or discard) a client — it threads
@@ -1574,6 +1619,11 @@ export type RunAgentStreamOptions = {
1574
1619
  callModelInputFilter?: CallModelInputFilter;
1575
1620
  };
1576
1621
 
1622
+ export type ContextRobustnessFilterOptions = {
1623
+ contextCompactionSignalTokens?: () => number | null | undefined;
1624
+ throwOnCompactionNeeded?: boolean;
1625
+ };
1626
+
1577
1627
  // One-shot directive appended to the agent's system prompt on the genesis turn
1578
1628
  // (see buildOpenGeniAgent's genesisTitleHint). Delivered through the
1579
1629
  // authoritative instructions channel so the model reliably obeys; references
@@ -1627,6 +1677,59 @@ export const normalizeComputerCallsFilter: CallModelInputFilter = ({ modelData }
1627
1677
  ) as unknown as AgentInputItem[],
1628
1678
  });
1629
1679
 
1680
+ export function contextRobustnessFilterForSettings(
1681
+ settings: Settings,
1682
+ options: ContextRobustnessFilterOptions = {},
1683
+ ): CallModelInputFilter {
1684
+ const inputBudgetTokens = modelCallBudgetTokens(settings);
1685
+ const clientCompactionMode = resolveContextCompactionMode(settings) === "client";
1686
+ const compactionThresholdTokens = clientCompactionThresholdTokens(settings);
1687
+ return ({ modelData }) => {
1688
+ const images = elideStaleScreenshotImages(modelData.input);
1689
+ if (images.elidedCount > 0) {
1690
+ console.warn(
1691
+ `per-call image history policy elided ${images.elidedCount} older screenshot image(s), keeping the last ${Math.min(3, images.imageCount)} full image(s)`,
1692
+ );
1693
+ }
1694
+ let input = images.items;
1695
+ if (inputBudgetTokens !== undefined) {
1696
+ const guarded = enforceInputBudget(
1697
+ input as unknown as Array<Record<string, unknown>>,
1698
+ inputBudgetTokens,
1699
+ );
1700
+ if (guarded.trimmed) {
1701
+ console.warn(
1702
+ `per-call budget guard trimmed ${guarded.droppedCount} oldest history item(s) to fit input budget (${inputBudgetTokens} tokens); the over-budget model call was NOT sent`,
1703
+ );
1704
+ input = guarded.items as unknown as AgentInputItem[];
1705
+ }
1706
+ }
1707
+ if (clientCompactionMode && options.throwOnCompactionNeeded) {
1708
+ const reported = options.contextCompactionSignalTokens?.();
1709
+ const hasReported = typeof reported === "number" && reported > 0;
1710
+ const signalTokens = hasReported
1711
+ ? reported
1712
+ : estimateTokens(input as unknown as Array<Record<string, unknown>>);
1713
+ if (signalTokens > compactionThresholdTokens) {
1714
+ throw new CompactionNeededError({
1715
+ signalTokens,
1716
+ thresholdTokens: compactionThresholdTokens,
1717
+ signalSource: hasReported ? "provider" : "estimate",
1718
+ });
1719
+ }
1720
+ }
1721
+ return { ...modelData, input };
1722
+ };
1723
+ }
1724
+
1725
+ function modelCallBudgetTokens(settings: Settings): number | undefined {
1726
+ if (resolveContextCompactionMode(settings) !== "client") {
1727
+ return undefined;
1728
+ }
1729
+ const budget = contextInputBudgetTokens(settings);
1730
+ return budget > 0 ? budget : undefined;
1731
+ }
1732
+
1630
1733
  /**
1631
1734
  * Compose a list of callModelInputFilters into one, applied left-to-right so
1632
1735
  * each sees the prior filter's output.
@@ -1645,13 +1748,18 @@ function composeCallModelInputFilters(filters: CallModelInputFilter[]): CallMode
1645
1748
  * The model-input filter applied before every model call. The computer_call
1646
1749
  * action/actions normalizer is ALWAYS on (the Azure endpoint 400s without it);
1647
1750
  * the provider-item-id strip is layered on top when the configured policy
1648
- * selects it.
1751
+ * selects it; the context-robustness guard then elides stale screenshots on
1752
+ * every mode and applies hard budget trimming only on the client-compaction path.
1649
1753
  */
1650
- export function callModelInputFilterForSettings(settings: Settings): CallModelInputFilter | undefined {
1754
+ export function callModelInputFilterForSettings(
1755
+ settings: Settings,
1756
+ options: ContextRobustnessFilterOptions = {},
1757
+ ): CallModelInputFilter | undefined {
1651
1758
  const filters: CallModelInputFilter[] = [normalizeComputerCallsFilter];
1652
1759
  if (settings.openaiProviderItemIds === "strip") {
1653
1760
  filters.push(stripProviderItemIdsFilter);
1654
1761
  }
1762
+ filters.push(contextRobustnessFilterForSettings(settings, options));
1655
1763
  return composeCallModelInputFilters(filters);
1656
1764
  }
1657
1765
 
@@ -1730,7 +1838,15 @@ export async function runAgentStream(agent: Agent<any, any>, input: PreparedAgen
1730
1838
  // through the client during this run (it is inert for the provided session).
1731
1839
  const decoratedClient = withSandboxLifecycleHooks(resourceClient, ownedHooks, ownedHookContext);
1732
1840
  const ownedFilter = composeCallModelInputFilters(
1733
- [callModelInputFilterForSettings(settings), overrides.callModelInputFilter].filter(
1841
+ [
1842
+ callModelInputFilterForSettings(settings, {
1843
+ throwOnCompactionNeeded: Boolean(overrides.contextCompactionSignalTokens),
1844
+ ...(overrides.contextCompactionSignalTokens
1845
+ ? { contextCompactionSignalTokens: overrides.contextCompactionSignalTokens }
1846
+ : {}),
1847
+ }),
1848
+ overrides.callModelInputFilter,
1849
+ ].filter(
1734
1850
  (f): f is CallModelInputFilter => Boolean(f),
1735
1851
  ),
1736
1852
  );
@@ -1777,23 +1893,31 @@ export async function runAgentStream(agent: Agent<any, any>, input: PreparedAgen
1777
1893
  ?? (prepared.serializedRunStateForSandbox && client
1778
1894
  ? await restoredSandboxSessionState(await RunState.fromString(agent, prepared.serializedRunStateForSandbox), client)
1779
1895
  : undefined);
1780
- // Strip provider item ids first, then apply any per-turn filter (genesis
1781
- // title directive). Composed left-to-right so the directive lands on the
1782
- // already-id-stripped input. A callModelInputFilter only shapes the per-call
1783
- // model input, never the persisted run-state history.
1896
+ // Apply the built-in per-call filters (computer-call normalization, optional
1897
+ // provider-id stripping, image/budget guard), then any per-turn filter
1898
+ // (genesis title directive). A callModelInputFilter only shapes the per-call
1899
+ // model input; the SDK persists filtered clones into its session view, while
1900
+ // OpenGeni's durable conversation truth is still reconciled explicitly below.
1784
1901
  const callModelInputFilter = composeCallModelInputFilters(
1785
- [callModelInputFilterForSettings(settings), overrides.callModelInputFilter].filter(
1902
+ [
1903
+ callModelInputFilterForSettings(settings, {
1904
+ throwOnCompactionNeeded: Boolean(overrides.contextCompactionSignalTokens),
1905
+ ...(overrides.contextCompactionSignalTokens
1906
+ ? { contextCompactionSignalTokens: overrides.contextCompactionSignalTokens }
1907
+ : {}),
1908
+ }),
1909
+ overrides.callModelInputFilter,
1910
+ ].filter(
1786
1911
  (f): f is CallModelInputFilter => Boolean(f),
1787
1912
  ),
1788
1913
  );
1789
1914
  const runOptions: Parameters<typeof run>[2] = {
1790
1915
  stream: true,
1791
1916
  maxTurns: settings.agentMaxModelCallsPerTurn,
1792
- // Strip provider-assigned item ids from every model call (turn-start
1793
- // history replay AND mid-turn follow-ups) so requests never depend on the
1794
- // provider's server-side response store. A stored response can vanish
1795
- // between two calls of the same turn, failing the run with 400 "Item with
1796
- // id 'rs_…' not found"; with the ids gone the request is self-contained.
1917
+ // Built-in per-call guard chain: normalize computer calls, optionally strip
1918
+ // provider ids, elide stale screenshots in every mode, and trim to the input
1919
+ // budget on the client-compaction path. This runs for turn-start replay AND
1920
+ // every mid-turn follow-up.
1797
1921
  callModelInputFilter,
1798
1922
  };
1799
1923
  void settings.disableOpenaiTracing;