npm - @opengeni/runtime - Versions diffs - 0.2.1 → 0.2.3 - Mend

@opengeni/runtime 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/{chunk-2PO56VAL.js → chunk-KNW7AMQB.js} +11 -4
package/dist/chunk-KNW7AMQB.js.map +1 -0
package/dist/index.d.ts +113 -177
package/dist/index.js +371 -171
package/dist/index.js.map +1 -1
package/dist/sandbox/index.d.ts +6 -4
package/dist/sandbox/index.js +1 -1
package/package.json +5 -5
package/src/context-compaction.ts +217 -348
package/src/image-history.ts +149 -0
package/src/index.ts +184 -60
package/src/sandbox/display-stack.ts +61 -12
package/src/sandbox-computer.ts +90 -18
package/dist/chunk-2PO56VAL.js.map +0 -1

package/src/image-history.ts ADDED Viewed

@@ -0,0 +1,149 @@
+import type { AgentInputItem } from "@openai/agents";
+export const SCREENSHOT_OMITTED_PLACEHOLDER =
+  "[screenshot omitted: an older desktop frame — the full image remains in the session event log]";
+const DATA_IMAGE_BASE64_PATTERN = /data:image\/[a-z0-9.+-]+;base64,[a-z0-9+/=_-]+/i;
+type PathSegment = string | number;
+type ImageOccurrence = {
+  path: PathSegment[];
+  replacement: unknown;
+};
+export type ElideStaleScreenshotsResult<T> = {
+  items: T[];
+  imageCount: number;
+  elidedCount: number;
+};
+export type ElideStaleScreenshotsOptions = {
+  keepLast?: number;
+  placeholder?: string;
+};
+export function elideStaleScreenshotImages<T extends AgentInputItem>(
+  items: readonly T[],
+  options: ElideStaleScreenshotsOptions = {},
+): ElideStaleScreenshotsResult<T> {
+  const keepLast = Math.max(0, Math.floor(options.keepLast ?? 3));
+  const placeholder = options.placeholder ?? SCREENSHOT_OMITTED_PLACEHOLDER;
+  const occurrences: ImageOccurrence[] = [];
+  for (let i = 0; i < items.length; i += 1) {
+    collectItemImageOccurrences(items[i], [i], placeholder, occurrences);
+  }
+  const elidedCount = Math.max(0, occurrences.length - keepLast);
+  if (elidedCount === 0) {
+    return { items: items.slice(), imageCount: occurrences.length, elidedCount: 0 };
+  }
+  const cloned = structuredClone(items) as T[];
+  for (const occurrence of occurrences.slice(0, elidedCount)) {
+    setPath(cloned, occurrence.path, occurrence.replacement);
+  }
+  return { items: cloned, imageCount: occurrences.length, elidedCount };
+}
+function collectItemImageOccurrences(
+  item: unknown,
+  path: PathSegment[],
+  placeholder: string,
+  out: ImageOccurrence[],
+): void {
+  if (!isRecord(item)) {
+    return;
+  }
+  if (item.type === "message" && (item.role === "user" || item.role === "system")) {
+    return;
+  }
+  if (item.type === "computer_call_result" || item.type === "computer_call_output") {
+    collectComputerOutputImages(item, path, placeholder, out);
+    return;
+  }
+  if (item.type === "function_call_result" || item.type === "function_call_output") {
+    collectToolResultImages(item.output, [...path, "output"], placeholder, out);
+  }
+}
+function collectComputerOutputImages(
+  item: Record<string, unknown>,
+  path: PathSegment[],
+  placeholder: string,
+  out: ImageOccurrence[],
+): void {
+  const output = item.output;
+  if (!isRecord(output) || output.type !== "computer_screenshot") {
+    return;
+  }
+  for (const key of ["data", "image_url", "imageUrl"]) {
+    if (isImageDataUrl(output[key])) {
+      out.push({ path: [...path, "output", key], replacement: placeholder });
+      return;
+    }
+  }
+}
+function collectToolResultImages(
+  value: unknown,
+  path: PathSegment[],
+  placeholder: string,
+  out: ImageOccurrence[],
+): void {
+  if (typeof value === "string") {
+    if (isImageDataUrl(value)) {
+      out.push({ path, replacement: placeholder });
+    }
+    return;
+  }
+  if (Array.isArray(value)) {
+    for (let i = 0; i < value.length; i += 1) {
+      collectToolResultImages(value[i], [...path, i], placeholder, out);
+    }
+    return;
+  }
+  if (!isRecord(value)) {
+    return;
+  }
+  if (value.type === "input_image") {
+    for (const key of ["image", "imageUrl", "image_url"]) {
+      if (isImageDataUrl(value[key])) {
+        out.push({ path, replacement: { type: "input_text", text: placeholder } });
+        return;
+      }
+    }
+  }
+  for (const key of ["content", "text", "output"]) {
+    if (key in value) {
+      collectToolResultImages(value[key], [...path, key], placeholder, out);
+    }
+  }
+}
+function isImageDataUrl(value: unknown): value is string {
+  return typeof value === "string" && DATA_IMAGE_BASE64_PATTERN.test(value);
+}
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return Boolean(value) && typeof value === "object" && !Array.isArray(value);
+}
+function setPath(root: unknown, path: PathSegment[], value: unknown): void {
+  if (path.length === 0) {
+    return;
+  }
+  let cursor = root;
+  for (let i = 0; i < path.length - 1; i += 1) {
+    const segment = path[i]!;
+    cursor = Array.isArray(cursor)
+      ? cursor[segment as number]
+      : (cursor as Record<string, unknown>)[segment as string];
+  }
+  const last = path[path.length - 1]!;
+  if (Array.isArray(cursor)) {
+    cursor[last as number] = value;
+  } else {
+    (cursor as Record<string, unknown>)[last as string] = value;
+  }
+}

package/src/index.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { ConfiguredModel, ContextCompactionMode, ModelProviderApi, ResolvedModelProvider, Settings } from "@opengeni/config";
-import { AGENT_INSTRUCTIONS_CORE_PLACEHOLDER, collectSandboxEnvironment, contextServerCompactThreshold, firstPartyMcpBaseUrl, parseExposedPorts, resolveContextCompactionMode, resolveModelProvider, sandboxLifecycleHookIds } from "@opengeni/config";
+import { AGENT_INSTRUCTIONS_CORE_PLACEHOLDER, collectSandboxEnvironment, contextInputBudgetTokens, contextServerCompactThreshold, firstPartyMcpBaseUrl, parseExposedPorts, resolveContextCompactionMode, resolveModelProvider, sandboxLifecycleHookIds } from "@opengeni/config";
 import { CAPABILITY_DESCRIPTORS, isClearedRunStateBlob, signDelegatedAccessToken, type Permission, type ReasoningEffort, type ResourceRef, type SessionEventType, type ToolRef } from "@opengeni/contracts";
 import {
   Agent,
@@ -82,8 +82,17 @@ import { dirname, isAbsolute, join, posix as posixPath, relative } from "node:pa
 import { fileURLToPath } from "node:url";
 import { computerCallNormalizingFetch, normalizeComputerCallActions, sanitizeHistoryItemsForModel } from "./history-sanitizer";
+import { elideStaleScreenshotImages } from "./image-history";
 import { installCodexToolSearch } from "./codex-tool-search";
-import { enforceInputBudget, estimateItemTokens } from "./context-compaction";
+import {
+  CompactionNeededError,
+  SUMMARY_BUFFER_TOKENS,
+  clientCompactionThresholdTokens,
+  enforceInputBudget,
+  estimateItemTokens,
+  estimateTokens,
+  renderCompactionPromptInputForChat,
+} from "./context-compaction";
 import {
   createSandboxClient,
   deserializeSandboxSessionStateEnvelope,
@@ -91,7 +100,7 @@ import {
   restoredSandboxSessionStateFromEntry,
   setSelfhostedApplyDiff,
 } from "./sandbox";
-import { computerUse } from "./sandbox-computer";
+import { computerUse, type ComputerToolMode } from "./sandbox-computer";
 // P4.3 computer-use surface (the agent's :0 driver). Re-exported from the barrel
 // so callers (the worker, live proofs) reach SandboxComputer/ComputerUseCapability
@@ -106,6 +115,7 @@ export {
   ComputerActionError,
   type SandboxComputerOptions,
   type ComputerUseArgs,
+  type ComputerToolMode,
 } from "./sandbox-computer";
 // The agent-loop-free sandbox leaf (createSandboxClient + resume/recovery
@@ -133,22 +143,34 @@ export type { HistoryItem } from "./history-sanitizer";
 export { OpenAIChatCompletionsModel, OpenAIResponsesModel } from "@openai/agents";
 export {
-  planCompaction,
+  CompactionNeededError,
+  buildCompactionPromptInput,
+  buildCompactionReplacementHistory,
+  clientCompactionThresholdTokens,
+  decideClientCompaction,
   enforceInputBudget,
   buildSummaryItem,
-  buildCompactionMessages,
+  findCompactionNeededError,
   isCompactionSummary,
   isUserMessage,
   findKeepBoundary,
   estimateTokens,
   estimateItemTokens,
-  compactionSummaryText,
-  renderPrefixTranscript,
+  renderCompactionPromptInputForChat,
   COMPACTION_SUMMARY_MARKER,
+  COMPACTION_PROMPT,
+  COMPACT_USER_MESSAGE_MAX_TOKENS,
+  CLIENT_COMPACTION_TRIGGER_FRACTION,
+  SUMMARY_BUFFER_TOKENS,
   SUMMARY_PREFIX,
-  SUMMARY_INSTRUCTIONS,
+  USER_MESSAGE_TRUNCATION_MARKER,
 } from "./context-compaction";
-export type { CompactionItem, CompactionPlan, PlanCompactionInput } from "./context-compaction";
+export type { ClientCompactionDecision, CompactionItem } from "./context-compaction";
+export {
+  elideStaleScreenshotImages,
+  SCREENSHOT_OMITTED_PLACEHOLDER,
+} from "./image-history";
+export type { ElideStaleScreenshotsOptions, ElideStaleScreenshotsResult } from "./image-history";
 ensureReadableStreamFrom();
@@ -499,10 +521,10 @@ export function configureOpenAI(settings: Settings): void {
 /**
  * Run the compaction summarizer as one plain, tool-less, non-streaming model
- * call against the resolved provider. `system`/`user` come from
- * buildCompactionMessages. Returns the trimmed summary text, or null on any
+ * call against the resolved provider. `input` is the active history plus
+ * Codex's checkpoint prompt. Returns the trimmed summary text, or null on any
  * failure (the caller treats a failed summarize as "skip compaction this turn"
- * — never fatal). The call deliberately does NOT request reasoning encryption,
+ * - never fatal). The call deliberately does NOT request reasoning encryption,
  * tools, or server-side compaction; it is a self-contained summarize.
  *
  * Provider-aware: the summary always runs on the SAME provider that serves the
@@ -516,22 +538,19 @@ export function configureOpenAI(settings: Settings): void {
  */
 export async function summarizeForCompaction(
   settings: Settings,
-  messages: { system: string; user: string },
+  input: Array<Record<string, unknown>>,
   options: { client?: OpenAI; api?: ModelProviderApi; maxOutputTokens?: number; model?: string } = {},
 ): Promise<string | null> {
   const client = options.client ?? buildOpenAIClientFromSettings(settings);
   const api = options.api ?? "responses";
   const model = options.model ?? settings.openaiModel;
-  const maxTokens = options.maxOutputTokens ?? settings.contextSummaryMaxTokens;
+  const maxTokens = options.maxOutputTokens ?? SUMMARY_BUFFER_TOKENS;
   try {
     if (api === "chat") {
       const completion = await client.chat.completions.create({
         model,
         max_tokens: maxTokens,
-        messages: [
-          { role: "system", content: messages.system },
-          { role: "user", content: messages.user },
-        ],
+        messages: [{ role: "user", content: renderCompactionPromptInputForChat(input) }],
       } as any);
       const text = (completion as { choices?: Array<{ message?: { content?: unknown } }> }).choices?.[0]?.message?.content;
       const trimmed = typeof text === "string" ? text.trim() : "";
@@ -544,10 +563,7 @@ export async function summarizeForCompaction(
       // built-in path (api "responses"), so gate it on the built-in provider.
       ...(settings.openaiProvider === "azure" ? {} : { store: false }),
       max_output_tokens: maxTokens,
-      input: [
-        { role: "system", content: messages.system },
-        { role: "user", content: messages.user },
-      ],
+      input,
     } as any);
     const text = extractResponseOutputText(response);
     const trimmed = text.trim();
@@ -643,6 +659,12 @@ export type BuildAgentOptions = {
   encryptedReasoning?: boolean;
   contextWindowTokens?: number;
   structuredToolTransport?: boolean;
+  // EXPLICIT computer-use tool transport, decided where provider identity is
+  // authoritative (the worker's model resolution — agent-turn.ts). Threaded into
+  // buildAgentCapabilities → computerUse({toolMode}) so tool selection never rests
+  // on the SDK's constructor-name sniff. When omitted, the legacy sniff +
+  // `structuredToolTransport` neutralize path is preserved byte-for-byte.
+  computerToolMode?: ComputerToolMode;
   // The LIVE, by-reference connector-namespace Set from prepareAgentTools
   // (codexConnectorNamespaces): fills during each turn's codex_apps tools/list,
   // read per model call by the codex tool_search description so the model sees
@@ -864,6 +886,7 @@ export function buildOpenGeniAgent(settings: Settings, resources: ResourceRef[],
       compactionMode,
       contextWindowTokens,
       ...(options.structuredToolTransport !== undefined ? { structuredToolTransport: options.structuredToolTransport } : {}),
+      ...(options.computerToolMode !== undefined ? { computerToolMode: options.computerToolMode } : {}),
     }),
   });
   agentFileDownloads.set(agent, normalizeSandboxFileDownloads(options.fileResourceDownloads ?? []).filter((download) => !download.content));
@@ -961,7 +984,16 @@ function neutralizeStructuredToolTransport(capability: ReturnType<typeof filesys
 export function buildAgentCapabilities(
   settings: Settings,
   packSkills: PackSkill[],
-  options: { compactionMode?: ContextCompactionMode; contextWindowTokens?: number; structuredToolTransport?: boolean } = {},
+  options: {
+    compactionMode?: ContextCompactionMode;
+    contextWindowTokens?: number;
+    structuredToolTransport?: boolean;
+    // EXPLICIT computer-use transport (see BuildAgentOptions.computerToolMode). When
+    // present, computerUse() is handed the mode directly and its tools() obeys it
+    // without the constructor-name sniff. When absent, the legacy neutralize +
+    // imageFunctionResults path (driven by structuredToolTransport) is unchanged.
+    computerToolMode?: ComputerToolMode;
+  } = {},
 ): ReturnType<typeof Capabilities.default> {
   const mode = options.compactionMode ?? resolveContextCompactionMode(settings);
   const contextWindowTokens = options.contextWindowTokens ?? settings.contextWindowTokens;
@@ -996,25 +1028,37 @@ export function buildAgentCapabilities(
     && settings.sandboxDesktopEnabled
     && desktopCapableBackend(settings.sandboxBackend)
   ) {
-    // computer-use is now transport-aware, exactly like filesystem: its `tools()`
-    // emits the HOSTED `computer_use_preview` tool on the structured transport and a
-    // set of FUNCTION `computer_*` tools on the text transport. The ChatGPT/Codex
-    // backend rejects hosted tool types (only function/custom/web_search accepted),
-    // so on the codex path (structuredToolTransport === false) we neutralize the
-    // capability's model binding — the SAME trick used for filesystem above — so
-    // `tools()` sees no model instance and emits the function tools the backend can
-    // call, instead of suppressing the desktop tier entirely.
+    // computer-use is transport-aware, exactly like filesystem: `tools()` emits the
+    // HOSTED `computer_use_preview` tool on the structured transport and a set of
+    // FUNCTION `computer_*` tools on the text transport. The ChatGPT/Codex backend
+    // rejects hosted tool types (only function/custom/web_search accepted).
+    //
+    // HARDENING: when the caller declares an EXPLICIT `computerToolMode` (the worker
+    // does, from its authoritative model resolution), thread it straight through —
+    // tool selection then never depends on the SDK's model-instance constructor-name
+    // sniff (which a wrapped/proxied model would defeat, silently 400ing a
+    // chat-completions provider handed the hosted tool). When ABSENT, the legacy path
+    // is preserved byte-for-byte: on the codex path (structuredToolTransport === false)
+    // we set imageFunctionResults and neutralize the capability's model binding — the
+    // SAME trick used for filesystem above — so `tools()` sees no model instance and
+    // emits the function tools the backend can call, instead of suppressing the tier.
+    const explicitMode = options.computerToolMode;
     const computerCapability = computerUse({
       dimensions: [settings.streamResolutionWidth, settings.streamResolutionHeight],
       readOnly: settings.computerUseReadOnly,
-      // On the codex path the function tools deliver screenshots as a real image the
-      // model can see. The ChatGPT/Codex backend rejects HOSTED tool types but DOES
-      // accept `input_image` content items inside a `function_call_output` (proven by
-      // openai/codex codex-rs, whose view_image tool ships exactly that shape) — so a
-      // structured image tool result is seen, where a text data-URL would be unreadable.
-      ...(options.structuredToolTransport === false ? { imageFunctionResults: true } : {}),
+      ...(explicitMode
+        ? { toolMode: explicitMode }
+        // Legacy (no explicit mode): on the codex path the function tools deliver
+        // screenshots as a real image the model can see. The ChatGPT/Codex backend
+        // rejects HOSTED tool types but DOES accept `input_image` content items inside a
+        // `function_call_output` (proven by openai/codex codex-rs, whose view_image tool
+        // ships exactly that shape) — so a structured image tool result is seen, where a
+        // text data-URL would be unreadable.
+        : options.structuredToolTransport === false ? { imageFunctionResults: true } : {}),
     });
-    if (options.structuredToolTransport === false) {
+    // Neutralize ONLY on the legacy sniff path. With an explicit toolMode the mode
+    // already forces the function tools, so the constructor-name override is moot.
+    if (!explicitMode && options.structuredToolTransport === false) {
       neutralizeStructuredToolTransport(computerCapability);
     }
     caps.push(computerCapability as unknown as ReturnType<typeof Capabilities.default>[number]);
@@ -1088,19 +1132,19 @@ export async function prepareAgentTools(settings: Settings, tools: ToolRef[], op
     //    device-code login may lack the connector scopes, and the backend can
     //    reject the bearer at the initialize/tools-list handshake, so a 401/403
     //    (or a missing/failed token) drops the server.
-    //  - an AUTO-ATTACHED workspace-default capability MCP (ToolRef.optional):
-    //    the caller never explicitly requested it, so a broken/expired
-    //    capability credential must SKIP the server with a warning, never kill
-    //    the turn before the model runs. An EXPLICITLY-requested tool omits
-    //    `optional` and stays strict (below), preserving the fail-loud contract.
+    //  - an optional ToolRef: either an auto-attached workspace-default
+    //    capability MCP or a client/pack-selected portable ref. A
+    //    broken/expired credential or unavailable endpoint skips the server
+    //    with a warning, never killing the turn before the model runs. Bare
+    //    refs stay strict (below), preserving the fail-loud default.
     const optional = tool.optional === true;
     return { server, bestEffort: isCodexAppsMcpServer(config) || optional, optional };
   }));
   const requiredServers = servers.filter((entry) => !entry.bestEffort).map((entry) => entry.server);
   const bestEffortServers = servers.filter((entry) => entry.bestEffort).map((entry) => entry.server);
-  // Names of the OPTIONAL capability servers (not codex_apps) so a drop is
-  // surfaced as a warning; codex_apps keeps its historically-quiet drop (a
-  // not-logged-in ChatGPT plan is a normal, non-noteworthy state).
+  // Names of the OPTIONAL servers (not codex_apps) so a drop is surfaced as a
+  // warning; codex_apps keeps its historically-quiet drop (a not-logged-in
+  // ChatGPT plan is a normal, non-noteworthy state).
   const optionalServerNames = new Set(
     servers.filter((entry) => entry.optional).map((entry) => entry.server.name),
   );
@@ -1121,7 +1165,7 @@ export async function prepareAgentTools(settings: Settings, tools: ToolRef[], op
       }
       const error = connectedBestEffort.errors.get(failed);
       console.warn(
-        `[mcp] optional capability server "${failed.name}" failed to connect/list tools; skipping it for this turn`,
+        `[mcp] optional server "${failed.name}" failed to connect/list tools; skipping it for this turn`,
         error instanceof Error ? error.message : error,
       );
     }
@@ -1544,6 +1588,7 @@ export type RunAgentStreamOptions = {
   sandboxClient?: unknown;
   sandboxEnvironment?: Record<string, string>;
   onRuntimeEvent?: (event: NormalizedRuntimeEvent) => Promise<void> | void;
+  contextCompactionSignalTokens?: () => number | null | undefined;
   // OWNERSHIP INVERSION (P1.2): an externally-owned, already-live sandbox
   // session resolved by the per-turn resume-by-id path. When present,
   // runAgentStream does NOT build (or resume, or discard) a client — it threads
@@ -1574,6 +1619,11 @@ export type RunAgentStreamOptions = {
   callModelInputFilter?: CallModelInputFilter;
 };
+export type ContextRobustnessFilterOptions = {
+  contextCompactionSignalTokens?: () => number | null | undefined;
+  throwOnCompactionNeeded?: boolean;
+};
 // One-shot directive appended to the agent's system prompt on the genesis turn
 // (see buildOpenGeniAgent's genesisTitleHint). Delivered through the
 // authoritative instructions channel so the model reliably obeys; references
@@ -1627,6 +1677,59 @@ export const normalizeComputerCallsFilter: CallModelInputFilter = ({ modelData }
   ) as unknown as AgentInputItem[],
 });
+export function contextRobustnessFilterForSettings(
+  settings: Settings,
+  options: ContextRobustnessFilterOptions = {},
+): CallModelInputFilter {
+  const inputBudgetTokens = modelCallBudgetTokens(settings);
+  const clientCompactionMode = resolveContextCompactionMode(settings) === "client";
+  const compactionThresholdTokens = clientCompactionThresholdTokens(settings);
+  return ({ modelData }) => {
+    const images = elideStaleScreenshotImages(modelData.input);
+    if (images.elidedCount > 0) {
+      console.warn(
+        `per-call image history policy elided ${images.elidedCount} older screenshot image(s), keeping the last ${Math.min(3, images.imageCount)} full image(s)`,
+      );
+    }
+    let input = images.items;
+    if (inputBudgetTokens !== undefined) {
+      const guarded = enforceInputBudget(
+        input as unknown as Array<Record<string, unknown>>,
+        inputBudgetTokens,
+      );
+      if (guarded.trimmed) {
+        console.warn(
+          `per-call budget guard trimmed ${guarded.droppedCount} oldest history item(s) to fit input budget (${inputBudgetTokens} tokens); the over-budget model call was NOT sent`,
+        );
+        input = guarded.items as unknown as AgentInputItem[];
+      }
+    }
+    if (clientCompactionMode && options.throwOnCompactionNeeded) {
+      const reported = options.contextCompactionSignalTokens?.();
+      const hasReported = typeof reported === "number" && reported > 0;
+      const signalTokens = hasReported
+        ? reported
+        : estimateTokens(input as unknown as Array<Record<string, unknown>>);
+      if (signalTokens > compactionThresholdTokens) {
+        throw new CompactionNeededError({
+          signalTokens,
+          thresholdTokens: compactionThresholdTokens,
+          signalSource: hasReported ? "provider" : "estimate",
+        });
+      }
+    }
+    return { ...modelData, input };
+  };
+}
+function modelCallBudgetTokens(settings: Settings): number | undefined {
+  if (resolveContextCompactionMode(settings) !== "client") {
+    return undefined;
+  }
+  const budget = contextInputBudgetTokens(settings);
+  return budget > 0 ? budget : undefined;
+}
 /**
  * Compose a list of callModelInputFilters into one, applied left-to-right so
  * each sees the prior filter's output.
@@ -1645,13 +1748,18 @@ function composeCallModelInputFilters(filters: CallModelInputFilter[]): CallMode
  * The model-input filter applied before every model call. The computer_call
  * action/actions normalizer is ALWAYS on (the Azure endpoint 400s without it);
  * the provider-item-id strip is layered on top when the configured policy
- * selects it.
+ * selects it; the context-robustness guard then elides stale screenshots on
+ * every mode and applies hard budget trimming only on the client-compaction path.
  */
-export function callModelInputFilterForSettings(settings: Settings): CallModelInputFilter | undefined {
+export function callModelInputFilterForSettings(
+  settings: Settings,
+  options: ContextRobustnessFilterOptions = {},
+): CallModelInputFilter | undefined {
   const filters: CallModelInputFilter[] = [normalizeComputerCallsFilter];
   if (settings.openaiProviderItemIds === "strip") {
     filters.push(stripProviderItemIdsFilter);
   }
+  filters.push(contextRobustnessFilterForSettings(settings, options));
   return composeCallModelInputFilters(filters);
 }
@@ -1730,7 +1838,15 @@ export async function runAgentStream(agent: Agent<any, any>, input: PreparedAgen
     // through the client during this run (it is inert for the provided session).
     const decoratedClient = withSandboxLifecycleHooks(resourceClient, ownedHooks, ownedHookContext);
     const ownedFilter = composeCallModelInputFilters(
-      [callModelInputFilterForSettings(settings), overrides.callModelInputFilter].filter(
+      [
+        callModelInputFilterForSettings(settings, {
+          throwOnCompactionNeeded: Boolean(overrides.contextCompactionSignalTokens),
+          ...(overrides.contextCompactionSignalTokens
+            ? { contextCompactionSignalTokens: overrides.contextCompactionSignalTokens }
+            : {}),
+        }),
+        overrides.callModelInputFilter,
+      ].filter(
         (f): f is CallModelInputFilter => Boolean(f),
       ),
     );
@@ -1777,23 +1893,31 @@ export async function runAgentStream(agent: Agent<any, any>, input: PreparedAgen
     ?? (prepared.serializedRunStateForSandbox && client
       ? await restoredSandboxSessionState(await RunState.fromString(agent, prepared.serializedRunStateForSandbox), client)
       : undefined);
-  // Strip provider item ids first, then apply any per-turn filter (genesis
-  // title directive). Composed left-to-right so the directive lands on the
-  // already-id-stripped input. A callModelInputFilter only shapes the per-call
-  // model input, never the persisted run-state history.
+  // Apply the built-in per-call filters (computer-call normalization, optional
+  // provider-id stripping, image/budget guard), then any per-turn filter
+  // (genesis title directive). A callModelInputFilter only shapes the per-call
+  // model input; the SDK persists filtered clones into its session view, while
+  // OpenGeni's durable conversation truth is still reconciled explicitly below.
   const callModelInputFilter = composeCallModelInputFilters(
-    [callModelInputFilterForSettings(settings), overrides.callModelInputFilter].filter(
+    [
+      callModelInputFilterForSettings(settings, {
+        throwOnCompactionNeeded: Boolean(overrides.contextCompactionSignalTokens),
+        ...(overrides.contextCompactionSignalTokens
+          ? { contextCompactionSignalTokens: overrides.contextCompactionSignalTokens }
+          : {}),
+      }),
+      overrides.callModelInputFilter,
+    ].filter(
       (f): f is CallModelInputFilter => Boolean(f),
     ),
   );
   const runOptions: Parameters<typeof run>[2] = {
     stream: true,
     maxTurns: settings.agentMaxModelCallsPerTurn,
-    // Strip provider-assigned item ids from every model call (turn-start
-    // history replay AND mid-turn follow-ups) so requests never depend on the
-    // provider's server-side response store. A stored response can vanish
-    // between two calls of the same turn, failing the run with 400 "Item with
-    // id 'rs_…' not found"; with the ids gone the request is self-contained.
+    // Built-in per-call guard chain: normalize computer calls, optionally strip
+    // provider ids, elide stale screenshots in every mode, and trim to the input
+    // budget on the client-compaction path. This runs for turn-start replay AND
+    // every mid-turn follow-up.
     callModelInputFilter,
   };
   void settings.disableOpenaiTracing;