@opengeni/runtime 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-2PO56VAL.js → chunk-KNW7AMQB.js} +11 -4
- package/dist/chunk-KNW7AMQB.js.map +1 -0
- package/dist/index.d.ts +113 -177
- package/dist/index.js +371 -171
- package/dist/index.js.map +1 -1
- package/dist/sandbox/index.d.ts +6 -4
- package/dist/sandbox/index.js +1 -1
- package/package.json +5 -5
- package/src/context-compaction.ts +217 -348
- package/src/image-history.ts +149 -0
- package/src/index.ts +184 -60
- package/src/sandbox/display-stack.ts +61 -12
- package/src/sandbox-computer.ts +90 -18
- package/dist/chunk-2PO56VAL.js.map +0 -1
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import type { AgentInputItem } from "@openai/agents";
|
|
2
|
+
|
|
3
|
+
export const SCREENSHOT_OMITTED_PLACEHOLDER =
|
|
4
|
+
"[screenshot omitted: an older desktop frame — the full image remains in the session event log]";
|
|
5
|
+
|
|
6
|
+
const DATA_IMAGE_BASE64_PATTERN = /data:image\/[a-z0-9.+-]+;base64,[a-z0-9+/=_-]+/i;
|
|
7
|
+
|
|
8
|
+
type PathSegment = string | number;
|
|
9
|
+
|
|
10
|
+
type ImageOccurrence = {
|
|
11
|
+
path: PathSegment[];
|
|
12
|
+
replacement: unknown;
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
export type ElideStaleScreenshotsResult<T> = {
|
|
16
|
+
items: T[];
|
|
17
|
+
imageCount: number;
|
|
18
|
+
elidedCount: number;
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
export type ElideStaleScreenshotsOptions = {
|
|
22
|
+
keepLast?: number;
|
|
23
|
+
placeholder?: string;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
export function elideStaleScreenshotImages<T extends AgentInputItem>(
|
|
27
|
+
items: readonly T[],
|
|
28
|
+
options: ElideStaleScreenshotsOptions = {},
|
|
29
|
+
): ElideStaleScreenshotsResult<T> {
|
|
30
|
+
const keepLast = Math.max(0, Math.floor(options.keepLast ?? 3));
|
|
31
|
+
const placeholder = options.placeholder ?? SCREENSHOT_OMITTED_PLACEHOLDER;
|
|
32
|
+
const occurrences: ImageOccurrence[] = [];
|
|
33
|
+
for (let i = 0; i < items.length; i += 1) {
|
|
34
|
+
collectItemImageOccurrences(items[i], [i], placeholder, occurrences);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const elidedCount = Math.max(0, occurrences.length - keepLast);
|
|
38
|
+
if (elidedCount === 0) {
|
|
39
|
+
return { items: items.slice(), imageCount: occurrences.length, elidedCount: 0 };
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const cloned = structuredClone(items) as T[];
|
|
43
|
+
for (const occurrence of occurrences.slice(0, elidedCount)) {
|
|
44
|
+
setPath(cloned, occurrence.path, occurrence.replacement);
|
|
45
|
+
}
|
|
46
|
+
return { items: cloned, imageCount: occurrences.length, elidedCount };
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function collectItemImageOccurrences(
|
|
50
|
+
item: unknown,
|
|
51
|
+
path: PathSegment[],
|
|
52
|
+
placeholder: string,
|
|
53
|
+
out: ImageOccurrence[],
|
|
54
|
+
): void {
|
|
55
|
+
if (!isRecord(item)) {
|
|
56
|
+
return;
|
|
57
|
+
}
|
|
58
|
+
if (item.type === "message" && (item.role === "user" || item.role === "system")) {
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
if (item.type === "computer_call_result" || item.type === "computer_call_output") {
|
|
62
|
+
collectComputerOutputImages(item, path, placeholder, out);
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
if (item.type === "function_call_result" || item.type === "function_call_output") {
|
|
66
|
+
collectToolResultImages(item.output, [...path, "output"], placeholder, out);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function collectComputerOutputImages(
|
|
71
|
+
item: Record<string, unknown>,
|
|
72
|
+
path: PathSegment[],
|
|
73
|
+
placeholder: string,
|
|
74
|
+
out: ImageOccurrence[],
|
|
75
|
+
): void {
|
|
76
|
+
const output = item.output;
|
|
77
|
+
if (!isRecord(output) || output.type !== "computer_screenshot") {
|
|
78
|
+
return;
|
|
79
|
+
}
|
|
80
|
+
for (const key of ["data", "image_url", "imageUrl"]) {
|
|
81
|
+
if (isImageDataUrl(output[key])) {
|
|
82
|
+
out.push({ path: [...path, "output", key], replacement: placeholder });
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function collectToolResultImages(
|
|
89
|
+
value: unknown,
|
|
90
|
+
path: PathSegment[],
|
|
91
|
+
placeholder: string,
|
|
92
|
+
out: ImageOccurrence[],
|
|
93
|
+
): void {
|
|
94
|
+
if (typeof value === "string") {
|
|
95
|
+
if (isImageDataUrl(value)) {
|
|
96
|
+
out.push({ path, replacement: placeholder });
|
|
97
|
+
}
|
|
98
|
+
return;
|
|
99
|
+
}
|
|
100
|
+
if (Array.isArray(value)) {
|
|
101
|
+
for (let i = 0; i < value.length; i += 1) {
|
|
102
|
+
collectToolResultImages(value[i], [...path, i], placeholder, out);
|
|
103
|
+
}
|
|
104
|
+
return;
|
|
105
|
+
}
|
|
106
|
+
if (!isRecord(value)) {
|
|
107
|
+
return;
|
|
108
|
+
}
|
|
109
|
+
if (value.type === "input_image") {
|
|
110
|
+
for (const key of ["image", "imageUrl", "image_url"]) {
|
|
111
|
+
if (isImageDataUrl(value[key])) {
|
|
112
|
+
out.push({ path, replacement: { type: "input_text", text: placeholder } });
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
for (const key of ["content", "text", "output"]) {
|
|
118
|
+
if (key in value) {
|
|
119
|
+
collectToolResultImages(value[key], [...path, key], placeholder, out);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function isImageDataUrl(value: unknown): value is string {
|
|
125
|
+
return typeof value === "string" && DATA_IMAGE_BASE64_PATTERN.test(value);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function isRecord(value: unknown): value is Record<string, unknown> {
|
|
129
|
+
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function setPath(root: unknown, path: PathSegment[], value: unknown): void {
|
|
133
|
+
if (path.length === 0) {
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
let cursor = root;
|
|
137
|
+
for (let i = 0; i < path.length - 1; i += 1) {
|
|
138
|
+
const segment = path[i]!;
|
|
139
|
+
cursor = Array.isArray(cursor)
|
|
140
|
+
? cursor[segment as number]
|
|
141
|
+
: (cursor as Record<string, unknown>)[segment as string];
|
|
142
|
+
}
|
|
143
|
+
const last = path[path.length - 1]!;
|
|
144
|
+
if (Array.isArray(cursor)) {
|
|
145
|
+
cursor[last as number] = value;
|
|
146
|
+
} else {
|
|
147
|
+
(cursor as Record<string, unknown>)[last as string] = value;
|
|
148
|
+
}
|
|
149
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { ConfiguredModel, ContextCompactionMode, ModelProviderApi, ResolvedModelProvider, Settings } from "@opengeni/config";
|
|
2
|
-
import { AGENT_INSTRUCTIONS_CORE_PLACEHOLDER, collectSandboxEnvironment, contextServerCompactThreshold, firstPartyMcpBaseUrl, parseExposedPorts, resolveContextCompactionMode, resolveModelProvider, sandboxLifecycleHookIds } from "@opengeni/config";
|
|
2
|
+
import { AGENT_INSTRUCTIONS_CORE_PLACEHOLDER, collectSandboxEnvironment, contextInputBudgetTokens, contextServerCompactThreshold, firstPartyMcpBaseUrl, parseExposedPorts, resolveContextCompactionMode, resolveModelProvider, sandboxLifecycleHookIds } from "@opengeni/config";
|
|
3
3
|
import { CAPABILITY_DESCRIPTORS, isClearedRunStateBlob, signDelegatedAccessToken, type Permission, type ReasoningEffort, type ResourceRef, type SessionEventType, type ToolRef } from "@opengeni/contracts";
|
|
4
4
|
import {
|
|
5
5
|
Agent,
|
|
@@ -82,8 +82,17 @@ import { dirname, isAbsolute, join, posix as posixPath, relative } from "node:pa
|
|
|
82
82
|
import { fileURLToPath } from "node:url";
|
|
83
83
|
|
|
84
84
|
import { computerCallNormalizingFetch, normalizeComputerCallActions, sanitizeHistoryItemsForModel } from "./history-sanitizer";
|
|
85
|
+
import { elideStaleScreenshotImages } from "./image-history";
|
|
85
86
|
import { installCodexToolSearch } from "./codex-tool-search";
|
|
86
|
-
import {
|
|
87
|
+
import {
|
|
88
|
+
CompactionNeededError,
|
|
89
|
+
SUMMARY_BUFFER_TOKENS,
|
|
90
|
+
clientCompactionThresholdTokens,
|
|
91
|
+
enforceInputBudget,
|
|
92
|
+
estimateItemTokens,
|
|
93
|
+
estimateTokens,
|
|
94
|
+
renderCompactionPromptInputForChat,
|
|
95
|
+
} from "./context-compaction";
|
|
87
96
|
import {
|
|
88
97
|
createSandboxClient,
|
|
89
98
|
deserializeSandboxSessionStateEnvelope,
|
|
@@ -91,7 +100,7 @@ import {
|
|
|
91
100
|
restoredSandboxSessionStateFromEntry,
|
|
92
101
|
setSelfhostedApplyDiff,
|
|
93
102
|
} from "./sandbox";
|
|
94
|
-
import { computerUse } from "./sandbox-computer";
|
|
103
|
+
import { computerUse, type ComputerToolMode } from "./sandbox-computer";
|
|
95
104
|
|
|
96
105
|
// P4.3 computer-use surface (the agent's :0 driver). Re-exported from the barrel
|
|
97
106
|
// so callers (the worker, live proofs) reach SandboxComputer/ComputerUseCapability
|
|
@@ -106,6 +115,7 @@ export {
|
|
|
106
115
|
ComputerActionError,
|
|
107
116
|
type SandboxComputerOptions,
|
|
108
117
|
type ComputerUseArgs,
|
|
118
|
+
type ComputerToolMode,
|
|
109
119
|
} from "./sandbox-computer";
|
|
110
120
|
|
|
111
121
|
// The agent-loop-free sandbox leaf (createSandboxClient + resume/recovery
|
|
@@ -133,22 +143,34 @@ export type { HistoryItem } from "./history-sanitizer";
|
|
|
133
143
|
export { OpenAIChatCompletionsModel, OpenAIResponsesModel } from "@openai/agents";
|
|
134
144
|
|
|
135
145
|
export {
|
|
136
|
-
|
|
146
|
+
CompactionNeededError,
|
|
147
|
+
buildCompactionPromptInput,
|
|
148
|
+
buildCompactionReplacementHistory,
|
|
149
|
+
clientCompactionThresholdTokens,
|
|
150
|
+
decideClientCompaction,
|
|
137
151
|
enforceInputBudget,
|
|
138
152
|
buildSummaryItem,
|
|
139
|
-
|
|
153
|
+
findCompactionNeededError,
|
|
140
154
|
isCompactionSummary,
|
|
141
155
|
isUserMessage,
|
|
142
156
|
findKeepBoundary,
|
|
143
157
|
estimateTokens,
|
|
144
158
|
estimateItemTokens,
|
|
145
|
-
|
|
146
|
-
renderPrefixTranscript,
|
|
159
|
+
renderCompactionPromptInputForChat,
|
|
147
160
|
COMPACTION_SUMMARY_MARKER,
|
|
161
|
+
COMPACTION_PROMPT,
|
|
162
|
+
COMPACT_USER_MESSAGE_MAX_TOKENS,
|
|
163
|
+
CLIENT_COMPACTION_TRIGGER_FRACTION,
|
|
164
|
+
SUMMARY_BUFFER_TOKENS,
|
|
148
165
|
SUMMARY_PREFIX,
|
|
149
|
-
|
|
166
|
+
USER_MESSAGE_TRUNCATION_MARKER,
|
|
150
167
|
} from "./context-compaction";
|
|
151
|
-
export type {
|
|
168
|
+
export type { ClientCompactionDecision, CompactionItem } from "./context-compaction";
|
|
169
|
+
export {
|
|
170
|
+
elideStaleScreenshotImages,
|
|
171
|
+
SCREENSHOT_OMITTED_PLACEHOLDER,
|
|
172
|
+
} from "./image-history";
|
|
173
|
+
export type { ElideStaleScreenshotsOptions, ElideStaleScreenshotsResult } from "./image-history";
|
|
152
174
|
|
|
153
175
|
ensureReadableStreamFrom();
|
|
154
176
|
|
|
@@ -499,10 +521,10 @@ export function configureOpenAI(settings: Settings): void {
|
|
|
499
521
|
|
|
500
522
|
/**
|
|
501
523
|
* Run the compaction summarizer as one plain, tool-less, non-streaming model
|
|
502
|
-
* call against the resolved provider. `
|
|
503
|
-
*
|
|
524
|
+
* call against the resolved provider. `input` is the active history plus
|
|
525
|
+
* Codex's checkpoint prompt. Returns the trimmed summary text, or null on any
|
|
504
526
|
* failure (the caller treats a failed summarize as "skip compaction this turn"
|
|
505
|
-
*
|
|
527
|
+
* - never fatal). The call deliberately does NOT request reasoning encryption,
|
|
506
528
|
* tools, or server-side compaction; it is a self-contained summarize.
|
|
507
529
|
*
|
|
508
530
|
* Provider-aware: the summary always runs on the SAME provider that serves the
|
|
@@ -516,22 +538,19 @@ export function configureOpenAI(settings: Settings): void {
|
|
|
516
538
|
*/
|
|
517
539
|
export async function summarizeForCompaction(
|
|
518
540
|
settings: Settings,
|
|
519
|
-
|
|
541
|
+
input: Array<Record<string, unknown>>,
|
|
520
542
|
options: { client?: OpenAI; api?: ModelProviderApi; maxOutputTokens?: number; model?: string } = {},
|
|
521
543
|
): Promise<string | null> {
|
|
522
544
|
const client = options.client ?? buildOpenAIClientFromSettings(settings);
|
|
523
545
|
const api = options.api ?? "responses";
|
|
524
546
|
const model = options.model ?? settings.openaiModel;
|
|
525
|
-
const maxTokens = options.maxOutputTokens ??
|
|
547
|
+
const maxTokens = options.maxOutputTokens ?? SUMMARY_BUFFER_TOKENS;
|
|
526
548
|
try {
|
|
527
549
|
if (api === "chat") {
|
|
528
550
|
const completion = await client.chat.completions.create({
|
|
529
551
|
model,
|
|
530
552
|
max_tokens: maxTokens,
|
|
531
|
-
messages: [
|
|
532
|
-
{ role: "system", content: messages.system },
|
|
533
|
-
{ role: "user", content: messages.user },
|
|
534
|
-
],
|
|
553
|
+
messages: [{ role: "user", content: renderCompactionPromptInputForChat(input) }],
|
|
535
554
|
} as any);
|
|
536
555
|
const text = (completion as { choices?: Array<{ message?: { content?: unknown } }> }).choices?.[0]?.message?.content;
|
|
537
556
|
const trimmed = typeof text === "string" ? text.trim() : "";
|
|
@@ -544,10 +563,7 @@ export async function summarizeForCompaction(
|
|
|
544
563
|
// built-in path (api "responses"), so gate it on the built-in provider.
|
|
545
564
|
...(settings.openaiProvider === "azure" ? {} : { store: false }),
|
|
546
565
|
max_output_tokens: maxTokens,
|
|
547
|
-
input
|
|
548
|
-
{ role: "system", content: messages.system },
|
|
549
|
-
{ role: "user", content: messages.user },
|
|
550
|
-
],
|
|
566
|
+
input,
|
|
551
567
|
} as any);
|
|
552
568
|
const text = extractResponseOutputText(response);
|
|
553
569
|
const trimmed = text.trim();
|
|
@@ -643,6 +659,12 @@ export type BuildAgentOptions = {
|
|
|
643
659
|
encryptedReasoning?: boolean;
|
|
644
660
|
contextWindowTokens?: number;
|
|
645
661
|
structuredToolTransport?: boolean;
|
|
662
|
+
// EXPLICIT computer-use tool transport, decided where provider identity is
|
|
663
|
+
// authoritative (the worker's model resolution — agent-turn.ts). Threaded into
|
|
664
|
+
// buildAgentCapabilities → computerUse({toolMode}) so tool selection never rests
|
|
665
|
+
// on the SDK's constructor-name sniff. When omitted, the legacy sniff +
|
|
666
|
+
// `structuredToolTransport` neutralize path is preserved byte-for-byte.
|
|
667
|
+
computerToolMode?: ComputerToolMode;
|
|
646
668
|
// The LIVE, by-reference connector-namespace Set from prepareAgentTools
|
|
647
669
|
// (codexConnectorNamespaces): fills during each turn's codex_apps tools/list,
|
|
648
670
|
// read per model call by the codex tool_search description so the model sees
|
|
@@ -864,6 +886,7 @@ export function buildOpenGeniAgent(settings: Settings, resources: ResourceRef[],
|
|
|
864
886
|
compactionMode,
|
|
865
887
|
contextWindowTokens,
|
|
866
888
|
...(options.structuredToolTransport !== undefined ? { structuredToolTransport: options.structuredToolTransport } : {}),
|
|
889
|
+
...(options.computerToolMode !== undefined ? { computerToolMode: options.computerToolMode } : {}),
|
|
867
890
|
}),
|
|
868
891
|
});
|
|
869
892
|
agentFileDownloads.set(agent, normalizeSandboxFileDownloads(options.fileResourceDownloads ?? []).filter((download) => !download.content));
|
|
@@ -961,7 +984,16 @@ function neutralizeStructuredToolTransport(capability: ReturnType<typeof filesys
|
|
|
961
984
|
export function buildAgentCapabilities(
|
|
962
985
|
settings: Settings,
|
|
963
986
|
packSkills: PackSkill[],
|
|
964
|
-
options: {
|
|
987
|
+
options: {
|
|
988
|
+
compactionMode?: ContextCompactionMode;
|
|
989
|
+
contextWindowTokens?: number;
|
|
990
|
+
structuredToolTransport?: boolean;
|
|
991
|
+
// EXPLICIT computer-use transport (see BuildAgentOptions.computerToolMode). When
|
|
992
|
+
// present, computerUse() is handed the mode directly and its tools() obeys it
|
|
993
|
+
// without the constructor-name sniff. When absent, the legacy neutralize +
|
|
994
|
+
// imageFunctionResults path (driven by structuredToolTransport) is unchanged.
|
|
995
|
+
computerToolMode?: ComputerToolMode;
|
|
996
|
+
} = {},
|
|
965
997
|
): ReturnType<typeof Capabilities.default> {
|
|
966
998
|
const mode = options.compactionMode ?? resolveContextCompactionMode(settings);
|
|
967
999
|
const contextWindowTokens = options.contextWindowTokens ?? settings.contextWindowTokens;
|
|
@@ -996,25 +1028,37 @@ export function buildAgentCapabilities(
|
|
|
996
1028
|
&& settings.sandboxDesktopEnabled
|
|
997
1029
|
&& desktopCapableBackend(settings.sandboxBackend)
|
|
998
1030
|
) {
|
|
999
|
-
// computer-use is
|
|
1000
|
-
//
|
|
1001
|
-
//
|
|
1002
|
-
//
|
|
1003
|
-
//
|
|
1004
|
-
//
|
|
1005
|
-
//
|
|
1006
|
-
//
|
|
1031
|
+
// computer-use is transport-aware, exactly like filesystem: `tools()` emits the
|
|
1032
|
+
// HOSTED `computer_use_preview` tool on the structured transport and a set of
|
|
1033
|
+
// FUNCTION `computer_*` tools on the text transport. The ChatGPT/Codex backend
|
|
1034
|
+
// rejects hosted tool types (only function/custom/web_search accepted).
|
|
1035
|
+
//
|
|
1036
|
+
// HARDENING: when the caller declares an EXPLICIT `computerToolMode` (the worker
|
|
1037
|
+
// does, from its authoritative model resolution), thread it straight through —
|
|
1038
|
+
// tool selection then never depends on the SDK's model-instance constructor-name
|
|
1039
|
+
// sniff (which a wrapped/proxied model would defeat, silently 400ing a
|
|
1040
|
+
// chat-completions provider handed the hosted tool). When ABSENT, the legacy path
|
|
1041
|
+
// is preserved byte-for-byte: on the codex path (structuredToolTransport === false)
|
|
1042
|
+
// we set imageFunctionResults and neutralize the capability's model binding — the
|
|
1043
|
+
// SAME trick used for filesystem above — so `tools()` sees no model instance and
|
|
1044
|
+
// emits the function tools the backend can call, instead of suppressing the tier.
|
|
1045
|
+
const explicitMode = options.computerToolMode;
|
|
1007
1046
|
const computerCapability = computerUse({
|
|
1008
1047
|
dimensions: [settings.streamResolutionWidth, settings.streamResolutionHeight],
|
|
1009
1048
|
readOnly: settings.computerUseReadOnly,
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1049
|
+
...(explicitMode
|
|
1050
|
+
? { toolMode: explicitMode }
|
|
1051
|
+
// Legacy (no explicit mode): on the codex path the function tools deliver
|
|
1052
|
+
// screenshots as a real image the model can see. The ChatGPT/Codex backend
|
|
1053
|
+
// rejects HOSTED tool types but DOES accept `input_image` content items inside a
|
|
1054
|
+
// `function_call_output` (proven by openai/codex codex-rs, whose view_image tool
|
|
1055
|
+
// ships exactly that shape) — so a structured image tool result is seen, where a
|
|
1056
|
+
// text data-URL would be unreadable.
|
|
1057
|
+
: options.structuredToolTransport === false ? { imageFunctionResults: true } : {}),
|
|
1016
1058
|
});
|
|
1017
|
-
|
|
1059
|
+
// Neutralize ONLY on the legacy sniff path. With an explicit toolMode the mode
|
|
1060
|
+
// already forces the function tools, so the constructor-name override is moot.
|
|
1061
|
+
if (!explicitMode && options.structuredToolTransport === false) {
|
|
1018
1062
|
neutralizeStructuredToolTransport(computerCapability);
|
|
1019
1063
|
}
|
|
1020
1064
|
caps.push(computerCapability as unknown as ReturnType<typeof Capabilities.default>[number]);
|
|
@@ -1088,19 +1132,19 @@ export async function prepareAgentTools(settings: Settings, tools: ToolRef[], op
|
|
|
1088
1132
|
// device-code login may lack the connector scopes, and the backend can
|
|
1089
1133
|
// reject the bearer at the initialize/tools-list handshake, so a 401/403
|
|
1090
1134
|
// (or a missing/failed token) drops the server.
|
|
1091
|
-
// - an
|
|
1092
|
-
//
|
|
1093
|
-
//
|
|
1094
|
-
// the turn before the model runs.
|
|
1095
|
-
//
|
|
1135
|
+
// - an optional ToolRef: either an auto-attached workspace-default
|
|
1136
|
+
// capability MCP or a client/pack-selected portable ref. A
|
|
1137
|
+
// broken/expired credential or unavailable endpoint skips the server
|
|
1138
|
+
// with a warning, never killing the turn before the model runs. Bare
|
|
1139
|
+
// refs stay strict (below), preserving the fail-loud default.
|
|
1096
1140
|
const optional = tool.optional === true;
|
|
1097
1141
|
return { server, bestEffort: isCodexAppsMcpServer(config) || optional, optional };
|
|
1098
1142
|
}));
|
|
1099
1143
|
const requiredServers = servers.filter((entry) => !entry.bestEffort).map((entry) => entry.server);
|
|
1100
1144
|
const bestEffortServers = servers.filter((entry) => entry.bestEffort).map((entry) => entry.server);
|
|
1101
|
-
// Names of the OPTIONAL
|
|
1102
|
-
//
|
|
1103
|
-
//
|
|
1145
|
+
// Names of the OPTIONAL servers (not codex_apps) so a drop is surfaced as a
|
|
1146
|
+
// warning; codex_apps keeps its historically-quiet drop (a not-logged-in
|
|
1147
|
+
// ChatGPT plan is a normal, non-noteworthy state).
|
|
1104
1148
|
const optionalServerNames = new Set(
|
|
1105
1149
|
servers.filter((entry) => entry.optional).map((entry) => entry.server.name),
|
|
1106
1150
|
);
|
|
@@ -1121,7 +1165,7 @@ export async function prepareAgentTools(settings: Settings, tools: ToolRef[], op
|
|
|
1121
1165
|
}
|
|
1122
1166
|
const error = connectedBestEffort.errors.get(failed);
|
|
1123
1167
|
console.warn(
|
|
1124
|
-
`[mcp] optional
|
|
1168
|
+
`[mcp] optional server "${failed.name}" failed to connect/list tools; skipping it for this turn`,
|
|
1125
1169
|
error instanceof Error ? error.message : error,
|
|
1126
1170
|
);
|
|
1127
1171
|
}
|
|
@@ -1544,6 +1588,7 @@ export type RunAgentStreamOptions = {
|
|
|
1544
1588
|
sandboxClient?: unknown;
|
|
1545
1589
|
sandboxEnvironment?: Record<string, string>;
|
|
1546
1590
|
onRuntimeEvent?: (event: NormalizedRuntimeEvent) => Promise<void> | void;
|
|
1591
|
+
contextCompactionSignalTokens?: () => number | null | undefined;
|
|
1547
1592
|
// OWNERSHIP INVERSION (P1.2): an externally-owned, already-live sandbox
|
|
1548
1593
|
// session resolved by the per-turn resume-by-id path. When present,
|
|
1549
1594
|
// runAgentStream does NOT build (or resume, or discard) a client — it threads
|
|
@@ -1574,6 +1619,11 @@ export type RunAgentStreamOptions = {
|
|
|
1574
1619
|
callModelInputFilter?: CallModelInputFilter;
|
|
1575
1620
|
};
|
|
1576
1621
|
|
|
1622
|
+
export type ContextRobustnessFilterOptions = {
|
|
1623
|
+
contextCompactionSignalTokens?: () => number | null | undefined;
|
|
1624
|
+
throwOnCompactionNeeded?: boolean;
|
|
1625
|
+
};
|
|
1626
|
+
|
|
1577
1627
|
// One-shot directive appended to the agent's system prompt on the genesis turn
|
|
1578
1628
|
// (see buildOpenGeniAgent's genesisTitleHint). Delivered through the
|
|
1579
1629
|
// authoritative instructions channel so the model reliably obeys; references
|
|
@@ -1627,6 +1677,59 @@ export const normalizeComputerCallsFilter: CallModelInputFilter = ({ modelData }
|
|
|
1627
1677
|
) as unknown as AgentInputItem[],
|
|
1628
1678
|
});
|
|
1629
1679
|
|
|
1680
|
+
export function contextRobustnessFilterForSettings(
|
|
1681
|
+
settings: Settings,
|
|
1682
|
+
options: ContextRobustnessFilterOptions = {},
|
|
1683
|
+
): CallModelInputFilter {
|
|
1684
|
+
const inputBudgetTokens = modelCallBudgetTokens(settings);
|
|
1685
|
+
const clientCompactionMode = resolveContextCompactionMode(settings) === "client";
|
|
1686
|
+
const compactionThresholdTokens = clientCompactionThresholdTokens(settings);
|
|
1687
|
+
return ({ modelData }) => {
|
|
1688
|
+
const images = elideStaleScreenshotImages(modelData.input);
|
|
1689
|
+
if (images.elidedCount > 0) {
|
|
1690
|
+
console.warn(
|
|
1691
|
+
`per-call image history policy elided ${images.elidedCount} older screenshot image(s), keeping the last ${Math.min(3, images.imageCount)} full image(s)`,
|
|
1692
|
+
);
|
|
1693
|
+
}
|
|
1694
|
+
let input = images.items;
|
|
1695
|
+
if (inputBudgetTokens !== undefined) {
|
|
1696
|
+
const guarded = enforceInputBudget(
|
|
1697
|
+
input as unknown as Array<Record<string, unknown>>,
|
|
1698
|
+
inputBudgetTokens,
|
|
1699
|
+
);
|
|
1700
|
+
if (guarded.trimmed) {
|
|
1701
|
+
console.warn(
|
|
1702
|
+
`per-call budget guard trimmed ${guarded.droppedCount} oldest history item(s) to fit input budget (${inputBudgetTokens} tokens); the over-budget model call was NOT sent`,
|
|
1703
|
+
);
|
|
1704
|
+
input = guarded.items as unknown as AgentInputItem[];
|
|
1705
|
+
}
|
|
1706
|
+
}
|
|
1707
|
+
if (clientCompactionMode && options.throwOnCompactionNeeded) {
|
|
1708
|
+
const reported = options.contextCompactionSignalTokens?.();
|
|
1709
|
+
const hasReported = typeof reported === "number" && reported > 0;
|
|
1710
|
+
const signalTokens = hasReported
|
|
1711
|
+
? reported
|
|
1712
|
+
: estimateTokens(input as unknown as Array<Record<string, unknown>>);
|
|
1713
|
+
if (signalTokens > compactionThresholdTokens) {
|
|
1714
|
+
throw new CompactionNeededError({
|
|
1715
|
+
signalTokens,
|
|
1716
|
+
thresholdTokens: compactionThresholdTokens,
|
|
1717
|
+
signalSource: hasReported ? "provider" : "estimate",
|
|
1718
|
+
});
|
|
1719
|
+
}
|
|
1720
|
+
}
|
|
1721
|
+
return { ...modelData, input };
|
|
1722
|
+
};
|
|
1723
|
+
}
|
|
1724
|
+
|
|
1725
|
+
function modelCallBudgetTokens(settings: Settings): number | undefined {
|
|
1726
|
+
if (resolveContextCompactionMode(settings) !== "client") {
|
|
1727
|
+
return undefined;
|
|
1728
|
+
}
|
|
1729
|
+
const budget = contextInputBudgetTokens(settings);
|
|
1730
|
+
return budget > 0 ? budget : undefined;
|
|
1731
|
+
}
|
|
1732
|
+
|
|
1630
1733
|
/**
|
|
1631
1734
|
* Compose a list of callModelInputFilters into one, applied left-to-right so
|
|
1632
1735
|
* each sees the prior filter's output.
|
|
@@ -1645,13 +1748,18 @@ function composeCallModelInputFilters(filters: CallModelInputFilter[]): CallMode
|
|
|
1645
1748
|
* The model-input filter applied before every model call. The computer_call
|
|
1646
1749
|
* action/actions normalizer is ALWAYS on (the Azure endpoint 400s without it);
|
|
1647
1750
|
* the provider-item-id strip is layered on top when the configured policy
|
|
1648
|
-
* selects it
|
|
1751
|
+
* selects it; the context-robustness guard then elides stale screenshots on
|
|
1752
|
+
* every mode and applies hard budget trimming only on the client-compaction path.
|
|
1649
1753
|
*/
|
|
1650
|
-
export function callModelInputFilterForSettings(
|
|
1754
|
+
export function callModelInputFilterForSettings(
|
|
1755
|
+
settings: Settings,
|
|
1756
|
+
options: ContextRobustnessFilterOptions = {},
|
|
1757
|
+
): CallModelInputFilter | undefined {
|
|
1651
1758
|
const filters: CallModelInputFilter[] = [normalizeComputerCallsFilter];
|
|
1652
1759
|
if (settings.openaiProviderItemIds === "strip") {
|
|
1653
1760
|
filters.push(stripProviderItemIdsFilter);
|
|
1654
1761
|
}
|
|
1762
|
+
filters.push(contextRobustnessFilterForSettings(settings, options));
|
|
1655
1763
|
return composeCallModelInputFilters(filters);
|
|
1656
1764
|
}
|
|
1657
1765
|
|
|
@@ -1730,7 +1838,15 @@ export async function runAgentStream(agent: Agent<any, any>, input: PreparedAgen
|
|
|
1730
1838
|
// through the client during this run (it is inert for the provided session).
|
|
1731
1839
|
const decoratedClient = withSandboxLifecycleHooks(resourceClient, ownedHooks, ownedHookContext);
|
|
1732
1840
|
const ownedFilter = composeCallModelInputFilters(
|
|
1733
|
-
[
|
|
1841
|
+
[
|
|
1842
|
+
callModelInputFilterForSettings(settings, {
|
|
1843
|
+
throwOnCompactionNeeded: Boolean(overrides.contextCompactionSignalTokens),
|
|
1844
|
+
...(overrides.contextCompactionSignalTokens
|
|
1845
|
+
? { contextCompactionSignalTokens: overrides.contextCompactionSignalTokens }
|
|
1846
|
+
: {}),
|
|
1847
|
+
}),
|
|
1848
|
+
overrides.callModelInputFilter,
|
|
1849
|
+
].filter(
|
|
1734
1850
|
(f): f is CallModelInputFilter => Boolean(f),
|
|
1735
1851
|
),
|
|
1736
1852
|
);
|
|
@@ -1777,23 +1893,31 @@ export async function runAgentStream(agent: Agent<any, any>, input: PreparedAgen
|
|
|
1777
1893
|
?? (prepared.serializedRunStateForSandbox && client
|
|
1778
1894
|
? await restoredSandboxSessionState(await RunState.fromString(agent, prepared.serializedRunStateForSandbox), client)
|
|
1779
1895
|
: undefined);
|
|
1780
|
-
//
|
|
1781
|
-
//
|
|
1782
|
-
//
|
|
1783
|
-
// model input
|
|
1896
|
+
// Apply the built-in per-call filters (computer-call normalization, optional
|
|
1897
|
+
// provider-id stripping, image/budget guard), then any per-turn filter
|
|
1898
|
+
// (genesis title directive). A callModelInputFilter only shapes the per-call
|
|
1899
|
+
// model input; the SDK persists filtered clones into its session view, while
|
|
1900
|
+
// OpenGeni's durable conversation truth is still reconciled explicitly below.
|
|
1784
1901
|
const callModelInputFilter = composeCallModelInputFilters(
|
|
1785
|
-
[
|
|
1902
|
+
[
|
|
1903
|
+
callModelInputFilterForSettings(settings, {
|
|
1904
|
+
throwOnCompactionNeeded: Boolean(overrides.contextCompactionSignalTokens),
|
|
1905
|
+
...(overrides.contextCompactionSignalTokens
|
|
1906
|
+
? { contextCompactionSignalTokens: overrides.contextCompactionSignalTokens }
|
|
1907
|
+
: {}),
|
|
1908
|
+
}),
|
|
1909
|
+
overrides.callModelInputFilter,
|
|
1910
|
+
].filter(
|
|
1786
1911
|
(f): f is CallModelInputFilter => Boolean(f),
|
|
1787
1912
|
),
|
|
1788
1913
|
);
|
|
1789
1914
|
const runOptions: Parameters<typeof run>[2] = {
|
|
1790
1915
|
stream: true,
|
|
1791
1916
|
maxTurns: settings.agentMaxModelCallsPerTurn,
|
|
1792
|
-
//
|
|
1793
|
-
//
|
|
1794
|
-
//
|
|
1795
|
-
//
|
|
1796
|
-
// id 'rs_…' not found"; with the ids gone the request is self-contained.
|
|
1917
|
+
// Built-in per-call guard chain: normalize computer calls, optionally strip
|
|
1918
|
+
// provider ids, elide stale screenshots in every mode, and trim to the input
|
|
1919
|
+
// budget on the client-compaction path. This runs for turn-start replay AND
|
|
1920
|
+
// every mid-turn follow-up.
|
|
1797
1921
|
callModelInputFilter,
|
|
1798
1922
|
};
|
|
1799
1923
|
void settings.disableOpenaiTracing;
|