@opengeni/runtime 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-2PO56VAL.js +3478 -0
- package/dist/chunk-2PO56VAL.js.map +1 -0
- package/dist/index.d.ts +912 -0
- package/dist/index.js +3663 -0
- package/dist/index.js.map +1 -0
- package/dist/sandbox/index.d.ts +1738 -0
- package/dist/sandbox/index.js +187 -0
- package/dist/sandbox/index.js.map +1 -0
- package/package.json +49 -0
- package/src/bundled_hashicorp_terraform_skills/LICENSE +373 -0
- package/src/bundled_hashicorp_terraform_skills/README.md +18 -0
- package/src/bundled_hashicorp_terraform_skills/UPSTREAM_GIT_SHA +1 -0
- package/src/bundled_hashicorp_terraform_skills/azure-verified-modules/SKILL.md +613 -0
- package/src/bundled_hashicorp_terraform_skills/checkov/SKILL.md +43 -0
- package/src/bundled_hashicorp_terraform_skills/refactor-module/SKILL.md +538 -0
- package/src/bundled_hashicorp_terraform_skills/social-media-marketing/SKILL.md +35 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-search-import/SKILL.md +372 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-search-import/references/MANUAL-IMPORT.md +113 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-search-import/scripts/list_resources.sh +38 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-stacks/SKILL.md +480 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/api-monitoring.md +543 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/component-blocks.md +476 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/deployment-blocks.md +391 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/examples.md +1529 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/linked-stacks.md +187 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/troubleshooting.md +671 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-style-guide/SKILL.md +353 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-test/SKILL.md +451 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-test/references/CI_CD.md +80 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-test/references/EXAMPLES.md +314 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-test/references/MOCK_PROVIDERS.md +171 -0
- package/src/codex-tool-search.ts +267 -0
- package/src/context-compaction.ts +538 -0
- package/src/history-sanitizer.ts +719 -0
- package/src/index.ts +3299 -0
- package/src/sandbox/capabilities.ts +69 -0
- package/src/sandbox/channel-a.ts +1031 -0
- package/src/sandbox/display-stack.ts +231 -0
- package/src/sandbox/errors.ts +34 -0
- package/src/sandbox/index.ts +832 -0
- package/src/sandbox/providers/blaxel.ts +35 -0
- package/src/sandbox/providers/cloudflare.ts +24 -0
- package/src/sandbox/providers/daytona.ts +34 -0
- package/src/sandbox/providers/docker.ts +17 -0
- package/src/sandbox/providers/e2b.ts +36 -0
- package/src/sandbox/providers/index.ts +107 -0
- package/src/sandbox/providers/local.ts +13 -0
- package/src/sandbox/providers/modal.ts +55 -0
- package/src/sandbox/providers/none.ts +13 -0
- package/src/sandbox/providers/runloop.ts +32 -0
- package/src/sandbox/providers/selfhosted.ts +96 -0
- package/src/sandbox/providers/types.ts +38 -0
- package/src/sandbox/providers/vercel.ts +29 -0
- package/src/sandbox/recording.ts +286 -0
- package/src/sandbox/routing/backend-resolver.ts +189 -0
- package/src/sandbox/routing/routing-session.ts +455 -0
- package/src/sandbox/select.ts +371 -0
- package/src/sandbox/selfhosted/capabilities.ts +255 -0
- package/src/sandbox/selfhosted/control-rpc.ts +351 -0
- package/src/sandbox/selfhosted/session.ts +930 -0
- package/src/sandbox/selfhosted/testing.ts +230 -0
- package/src/sandbox/stream-port.ts +185 -0
- package/src/sandbox/stream-token.ts +90 -0
- package/src/sandbox/terminal-server.ts +203 -0
- package/src/sandbox-computer.ts +835 -0
package/src/index.ts
ADDED
|
@@ -0,0 +1,3299 @@
|
|
|
1
|
+
import type { ConfiguredModel, ContextCompactionMode, ModelProviderApi, ResolvedModelProvider, Settings } from "@opengeni/config";
|
|
2
|
+
import { AGENT_INSTRUCTIONS_CORE_PLACEHOLDER, collectSandboxEnvironment, contextServerCompactThreshold, firstPartyMcpBaseUrl, parseExposedPorts, resolveContextCompactionMode, resolveModelProvider, sandboxLifecycleHookIds } from "@opengeni/config";
|
|
3
|
+
import { CAPABILITY_DESCRIPTORS, isClearedRunStateBlob, signDelegatedAccessToken, type Permission, type ReasoningEffort, type ResourceRef, type SessionEventType, type ToolRef } from "@opengeni/contracts";
|
|
4
|
+
import {
|
|
5
|
+
Agent,
|
|
6
|
+
AgentsError,
|
|
7
|
+
connectMcpServers,
|
|
8
|
+
OpenAIProvider,
|
|
9
|
+
setDefaultModelProvider,
|
|
10
|
+
MaxTurnsExceededError,
|
|
11
|
+
MCPServerStreamableHttp,
|
|
12
|
+
// Provider-bound Model instances. Both are re-exported from
|
|
13
|
+
// @openai/agents-openai via `export * from '@openai/agents-openai'` in
|
|
14
|
+
// @openai/agents' index (0.11.6), so the multi-provider routing imports them
|
|
15
|
+
// from the same entrypoint as the rest of the SDK rather than reaching into
|
|
16
|
+
// the openai subpackage. OpenAIChatCompletionsModel speaks /v1/chat/completions
|
|
17
|
+
// (the registry "chat" wire API, e.g. Fireworks); OpenAIResponsesModel speaks
|
|
18
|
+
// /v1/responses (the built-in OpenAI/Azure "responses" wire API). Both bind a
|
|
19
|
+
// model id to a specific OpenAI client, which is what routes a turn to its
|
|
20
|
+
// provider without touching the global default client.
|
|
21
|
+
OpenAIChatCompletionsModel,
|
|
22
|
+
OpenAIResponsesModel,
|
|
23
|
+
RunState,
|
|
24
|
+
isOpenAIResponsesRawModelStreamEvent,
|
|
25
|
+
run,
|
|
26
|
+
Runner,
|
|
27
|
+
setDefaultOpenAIClient,
|
|
28
|
+
setDefaultOpenAIKey,
|
|
29
|
+
setOpenAIResponsesTransport,
|
|
30
|
+
// Hosted web_search tool factory. Re-exported from @openai/agents-openai via
|
|
31
|
+
// `export * from '@openai/agents-openai'` in @openai/agents' index (0.11.6);
|
|
32
|
+
// it returns a { type: 'hosted_tool', providerData: { type: 'web_search' } }
|
|
33
|
+
// descriptor the OpenAI Responses model serializes into request.tools[].
|
|
34
|
+
webSearchTool,
|
|
35
|
+
// The SDK's V4A-diff applier — the apply_patch host the filesystem capability's
|
|
36
|
+
// editor uses. The agent-loop-free sandbox leaf cannot import it (it lives behind
|
|
37
|
+
// the `@openai/agents` root the leaf forbids), so the barrel imports it here and
|
|
38
|
+
// injects it into the selfhosted session's `createEditor` via setSelfhostedApplyDiff
|
|
39
|
+
// (below, right after the leaf re-export). This lets a selfhosted active backend
|
|
40
|
+
// apply file edits over its NATS fs ops using the SDK's exact diff semantics.
|
|
41
|
+
applyDiff,
|
|
42
|
+
type AgentInputItem,
|
|
43
|
+
type CallModelInputFilter,
|
|
44
|
+
type MCPServer,
|
|
45
|
+
type Model,
|
|
46
|
+
type ModelProvider,
|
|
47
|
+
type RunStreamEvent,
|
|
48
|
+
} from "@openai/agents";
|
|
49
|
+
import {
|
|
50
|
+
localDirLazySkillSource,
|
|
51
|
+
} from "@openai/agents/sandbox/local";
|
|
52
|
+
import {
|
|
53
|
+
Capabilities,
|
|
54
|
+
Manifest,
|
|
55
|
+
SandboxAgent,
|
|
56
|
+
StaticCompactionPolicy,
|
|
57
|
+
azureBlobMount,
|
|
58
|
+
compaction,
|
|
59
|
+
dir,
|
|
60
|
+
file,
|
|
61
|
+
filesystem,
|
|
62
|
+
gitRepo,
|
|
63
|
+
inContainerMountStrategy,
|
|
64
|
+
localDir,
|
|
65
|
+
s3Mount,
|
|
66
|
+
shell,
|
|
67
|
+
skills,
|
|
68
|
+
type Dir,
|
|
69
|
+
type Entry,
|
|
70
|
+
type LocalDirLazySkillSource,
|
|
71
|
+
type SandboxClient,
|
|
72
|
+
type SandboxSessionLike,
|
|
73
|
+
type SandboxSessionState,
|
|
74
|
+
type SandboxRunConfig,
|
|
75
|
+
type SkillIndexEntry,
|
|
76
|
+
} from "@openai/agents/sandbox";
|
|
77
|
+
import { ModalCloudBucketMountStrategy } from "@openai/agents-extensions/sandbox/modal";
|
|
78
|
+
import OpenAI from "openai";
|
|
79
|
+
import { CODEX_APPS_MCP_SERVER_ID, CODEX_MODEL_ID_PREFIX, CODEX_ORIGINATOR, codexAppsSanitizingFetch, codexRequestStorage, codexSubscriptionFetch } from "@opengeni/codex";
|
|
80
|
+
import { cpSync, existsSync, mkdirSync, readdirSync, renameSync, rmSync } from "node:fs";
|
|
81
|
+
import { dirname, isAbsolute, join, posix as posixPath, relative } from "node:path";
|
|
82
|
+
import { fileURLToPath } from "node:url";
|
|
83
|
+
|
|
84
|
+
import { computerCallNormalizingFetch, normalizeComputerCallActions, sanitizeHistoryItemsForModel } from "./history-sanitizer";
|
|
85
|
+
import { installCodexToolSearch } from "./codex-tool-search";
|
|
86
|
+
import { enforceInputBudget, estimateItemTokens } from "./context-compaction";
|
|
87
|
+
import {
|
|
88
|
+
createSandboxClient,
|
|
89
|
+
deserializeSandboxSessionStateEnvelope,
|
|
90
|
+
desktopCapableBackend,
|
|
91
|
+
restoredSandboxSessionStateFromEntry,
|
|
92
|
+
setSelfhostedApplyDiff,
|
|
93
|
+
} from "./sandbox";
|
|
94
|
+
import { computerUse } from "./sandbox-computer";
|
|
95
|
+
|
|
96
|
+
// P4.3 computer-use surface (the agent's :0 driver). Re-exported from the barrel
|
|
97
|
+
// so callers (the worker, live proofs) reach SandboxComputer/ComputerUseCapability
|
|
98
|
+
// alongside the rest of the runtime. NOT part of the agent-loop-free leaf (it
|
|
99
|
+
// imports computerTool from the @openai/agents root).
|
|
100
|
+
export {
|
|
101
|
+
SandboxComputer,
|
|
102
|
+
ComputerUseCapability,
|
|
103
|
+
computerUse,
|
|
104
|
+
ComputerUnavailableError,
|
|
105
|
+
ComputerReadOnlyError,
|
|
106
|
+
ComputerActionError,
|
|
107
|
+
type SandboxComputerOptions,
|
|
108
|
+
type ComputerUseArgs,
|
|
109
|
+
} from "./sandbox-computer";
|
|
110
|
+
|
|
111
|
+
// The agent-loop-free sandbox leaf (createSandboxClient + resume/recovery
|
|
112
|
+
// helpers + the config-owned env/port re-exports). Re-exported verbatim so the
|
|
113
|
+
// barrel surface is unchanged for apps/worker while @opengeni/runtime/sandbox
|
|
114
|
+
// stays importable by the API without the agent loop.
|
|
115
|
+
export * from "./sandbox";
|
|
116
|
+
|
|
117
|
+
// Inject the SDK's V4A `applyDiff` into the selfhosted session's apply_patch editor
|
|
118
|
+
// at module load. The leaf can't import `applyDiff` (agent-loop root), so the
|
|
119
|
+
// barrel — which already imports `@openai/agents` — wires it once. A selfhosted
|
|
120
|
+
// active backend can now apply file edits over its NATS fs ops with the SDK's exact
|
|
121
|
+
// diff semantics; without this, `createEditor()` throws a clear "not injected" error
|
|
122
|
+
// rather than mis-editing. Runs at import time, before any turn binds a capability.
|
|
123
|
+
setSelfhostedApplyDiff(applyDiff as unknown as (input: string, diff: string, mode?: "default" | "create") => string);
|
|
124
|
+
|
|
125
|
+
export { sanitizeHistoryItemsForModel, stripReasoningEncryptedContent, stripReasoningIdentityFromSerializedRunState, neutralizeToolSearchItemsInSerializedRunState } from "./history-sanitizer";
|
|
126
|
+
export type { HistoryItem } from "./history-sanitizer";
|
|
127
|
+
|
|
128
|
+
// The provider-bound Model classes used by buildModelInstance/resolveTurnModel.
|
|
129
|
+
// Re-exported so callers (and routing tests) can assert which wire API a
|
|
130
|
+
// resolved turn was bound to — OpenAIChatCompletionsModel for registry "chat"
|
|
131
|
+
// providers (Fireworks), OpenAIResponsesModel for the built-in "responses" path
|
|
132
|
+
// — without reaching into @openai/agents directly.
|
|
133
|
+
export { OpenAIChatCompletionsModel, OpenAIResponsesModel } from "@openai/agents";
|
|
134
|
+
|
|
135
|
+
export {
|
|
136
|
+
planCompaction,
|
|
137
|
+
enforceInputBudget,
|
|
138
|
+
buildSummaryItem,
|
|
139
|
+
buildCompactionMessages,
|
|
140
|
+
isCompactionSummary,
|
|
141
|
+
isUserMessage,
|
|
142
|
+
findKeepBoundary,
|
|
143
|
+
estimateTokens,
|
|
144
|
+
estimateItemTokens,
|
|
145
|
+
compactionSummaryText,
|
|
146
|
+
renderPrefixTranscript,
|
|
147
|
+
COMPACTION_SUMMARY_MARKER,
|
|
148
|
+
SUMMARY_PREFIX,
|
|
149
|
+
SUMMARY_INSTRUCTIONS,
|
|
150
|
+
} from "./context-compaction";
|
|
151
|
+
export type { CompactionItem, CompactionPlan, PlanCompactionInput } from "./context-compaction";
|
|
152
|
+
|
|
153
|
+
ensureReadableStreamFrom();
|
|
154
|
+
|
|
155
|
+
const SANDBOX_LIFECYCLE_COMMAND_TIMEOUT_MS = 120_000;
|
|
156
|
+
|
|
157
|
+
export type NormalizedRuntimeEvent = {
|
|
158
|
+
type: SessionEventType;
|
|
159
|
+
payload: unknown;
|
|
160
|
+
};
|
|
161
|
+
|
|
162
|
+
export type ModelResponseUsage = {
|
|
163
|
+
responseId?: string;
|
|
164
|
+
usage: {
|
|
165
|
+
inputTokens?: number;
|
|
166
|
+
outputTokens?: number;
|
|
167
|
+
totalTokens?: number;
|
|
168
|
+
inputTokensDetails?: Record<string, number> | Array<Record<string, number>>;
|
|
169
|
+
};
|
|
170
|
+
};
|
|
171
|
+
|
|
172
|
+
type RuntimeMcpTool = Awaited<ReturnType<MCPServer["listTools"]>>[number];
|
|
173
|
+
|
|
174
|
+
export function ensureReadableStreamFrom(): void {
|
|
175
|
+
const ctor = globalThis.ReadableStream as (typeof ReadableStream & {
|
|
176
|
+
from?: <T>(source: Iterable<T> | AsyncIterable<T>) => ReadableStream<T>;
|
|
177
|
+
}) | undefined;
|
|
178
|
+
if (!ctor || typeof ctor.from === "function") {
|
|
179
|
+
return;
|
|
180
|
+
}
|
|
181
|
+
Object.defineProperty(ctor, "from", {
|
|
182
|
+
configurable: true,
|
|
183
|
+
writable: true,
|
|
184
|
+
value<T>(source: Iterable<T> | AsyncIterable<T>): ReadableStream<T> {
|
|
185
|
+
const iterator = isAsyncIterable(source)
|
|
186
|
+
? source[Symbol.asyncIterator]()
|
|
187
|
+
: source[Symbol.iterator]();
|
|
188
|
+
return new ReadableStream<T>({
|
|
189
|
+
async pull(controller) {
|
|
190
|
+
const next = await iterator.next();
|
|
191
|
+
if (next.done) {
|
|
192
|
+
controller.close();
|
|
193
|
+
} else {
|
|
194
|
+
controller.enqueue(next.value);
|
|
195
|
+
}
|
|
196
|
+
},
|
|
197
|
+
async cancel() {
|
|
198
|
+
await iterator.return?.();
|
|
199
|
+
},
|
|
200
|
+
});
|
|
201
|
+
},
|
|
202
|
+
});
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
export type AgentSegmentInput =
|
|
206
|
+
| {
|
|
207
|
+
kind: "message";
|
|
208
|
+
text: string;
|
|
209
|
+
serializedRunState?: string | null;
|
|
210
|
+
// Items-mode conversation truth (issue #35): when provided, turn input is
|
|
211
|
+
// built from these verbatim AgentInputItems and the stored sandbox
|
|
212
|
+
// envelope — no RunState deserialization, no SDK-version coupling.
|
|
213
|
+
historyItems?: AgentInputItem[] | null;
|
|
214
|
+
sandboxEnvelope?: Record<string, unknown> | null;
|
|
215
|
+
}
|
|
216
|
+
| { kind: "approval"; serializedRunState: string; approvalId: string; decision: "approve" | "reject"; message?: string };
|
|
217
|
+
|
|
218
|
+
export type PreparedAgentInput = {
|
|
219
|
+
input: string | AgentInputItem[] | RunState<any, any>;
|
|
220
|
+
sandboxSessionState?: SandboxSessionState;
|
|
221
|
+
serializedRunStateForSandbox?: string;
|
|
222
|
+
};
|
|
223
|
+
|
|
224
|
+
export type SandboxFileDownload = {
|
|
225
|
+
fileId: string;
|
|
226
|
+
mountPath: string;
|
|
227
|
+
filename: string;
|
|
228
|
+
url?: string;
|
|
229
|
+
content?: Uint8Array;
|
|
230
|
+
expiresAt?: Date | string;
|
|
231
|
+
sizeBytes?: number;
|
|
232
|
+
};
|
|
233
|
+
|
|
234
|
+
export type OpenGeniRuntime = {
|
|
235
|
+
configure: (settings: Settings) => void;
|
|
236
|
+
// Multi-provider per-turn model routing. Returns the resolved provider, its
|
|
237
|
+
// (cached) client, the provider-bound Model instance, and the configured-model
|
|
238
|
+
// shape; null when the turn's model is not in the registry, so the caller
|
|
239
|
+
// falls back to the legacy global-client path (settings.openaiModel).
|
|
240
|
+
resolveTurnModel: (settings: Settings, modelId: string) => ReturnType<typeof resolveTurnModel>;
|
|
241
|
+
buildAgent: (settings: Settings, resources: ResourceRef[], options?: BuildAgentOptions) => Agent<any, any>;
|
|
242
|
+
prepareTools: (settings: Settings, tools: ToolRef[], options?: PrepareToolsOptions) => Promise<PreparedAgentTools>;
|
|
243
|
+
prepareInput: (agent: Agent<any, any>, input: AgentSegmentInput, options?: PrepareInputOptions) => Promise<PreparedAgentInput>;
|
|
244
|
+
runStream: (agent: Agent<any, any>, input: PreparedAgentInput, settings: Settings, options?: RunAgentStreamOptions) => Promise<Awaited<ReturnType<typeof runAgentStream>>>;
|
|
245
|
+
serializeApprovals: (interruptions: unknown[]) => unknown[];
|
|
246
|
+
};
|
|
247
|
+
|
|
248
|
+
export type ProductionRuntimeOverrides = {
|
|
249
|
+
model?: Model;
|
|
250
|
+
sandboxClient?: unknown;
|
|
251
|
+
};
|
|
252
|
+
|
|
253
|
+
export function createProductionAgentRuntime(overrides: ProductionRuntimeOverrides = {}): OpenGeniRuntime {
|
|
254
|
+
return {
|
|
255
|
+
configure: configureOpenAI,
|
|
256
|
+
// A test/override model shadows the registry routing entirely (the scripted
|
|
257
|
+
// model used in worker tests is not in any provider's allow-list), so when
|
|
258
|
+
// one is supplied resolveTurnModel reports "no resolution" and the caller
|
|
259
|
+
// keeps the legacy global-client path with the override model.
|
|
260
|
+
resolveTurnModel: (settings, modelId) => (overrides.model ? null : resolveTurnModel(settings, modelId)),
|
|
261
|
+
buildAgent: (settings, resources, options) => buildOpenGeniAgent(settings, resources, {
|
|
262
|
+
...options,
|
|
263
|
+
...(overrides.model ? { model: overrides.model } : {}),
|
|
264
|
+
}),
|
|
265
|
+
prepareTools: prepareAgentTools,
|
|
266
|
+
prepareInput: prepareRunInput,
|
|
267
|
+
runStream: async (agent, input, settings, options) => await runAgentStream(agent, input, settings, {
|
|
268
|
+
...options,
|
|
269
|
+
sandboxClient: overrides.sandboxClient,
|
|
270
|
+
}),
|
|
271
|
+
serializeApprovals,
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Build an OpenAI client from settings for the configured provider. Mirrors the
|
|
277
|
+
* client construction in configureOpenAI so a direct API call (the compaction
|
|
278
|
+
* summarizer) uses the same Azure/OpenAI auth and base URL. Returns null when
|
|
279
|
+
* the OpenAI-platform path has only a key (the SDK default client is used via
|
|
280
|
+
* setDefaultOpenAIKey there); the caller then constructs a key-only client.
|
|
281
|
+
*/
|
|
282
|
+
export function buildOpenAIClientFromSettings(settings: Settings): OpenAI {
|
|
283
|
+
if (settings.openaiProvider === "azure") {
|
|
284
|
+
const baseURL = settings.azureOpenaiBaseUrl ?? azureDeploymentBaseUrl(settings);
|
|
285
|
+
const apiKey = settings.azureOpenaiApiKey ?? settings.azureOpenaiAdToken ?? "azure-ad-token";
|
|
286
|
+
return new OpenAI({
|
|
287
|
+
apiKey,
|
|
288
|
+
baseURL,
|
|
289
|
+
maxRetries: settings.openaiMaxRetries,
|
|
290
|
+
defaultQuery: azureOpenAIDefaultQuery(settings, baseURL),
|
|
291
|
+
defaultHeaders: settings.azureOpenaiAdToken && !settings.azureOpenaiApiKey
|
|
292
|
+
? { Authorization: `Bearer ${settings.azureOpenaiAdToken}` }
|
|
293
|
+
: undefined,
|
|
294
|
+
// Rewrite every outbound /responses computer_call to the ACTIONS-ONLY shape
|
|
295
|
+
// the GA Azure computer tool (gpt-5.5) accepts. This is the lowest reachable
|
|
296
|
+
// seam — below the SDK responses converter, which always re-synthesizes BOTH
|
|
297
|
+
// `action` and `actions` (rejected 400 "exactly one of action or actions").
|
|
298
|
+
// See computerCallNormalizingFetch / rewriteComputerCallsToActionsOnly.
|
|
299
|
+
fetch: computerCallNormalizingFetch(globalThis.fetch),
|
|
300
|
+
});
|
|
301
|
+
}
|
|
302
|
+
return new OpenAI({
|
|
303
|
+
apiKey: settings.openaiApiKey ?? process.env.OPENAI_API_KEY,
|
|
304
|
+
...(settings.openaiBaseUrl ? { baseURL: settings.openaiBaseUrl } : {}),
|
|
305
|
+
maxRetries: settings.openaiMaxRetries,
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
/**
|
|
310
|
+
* One OpenAI client per resolved provider id, built lazily and cached for the
|
|
311
|
+
* process. The built-in openai/azure provider reuses
|
|
312
|
+
* buildOpenAIClientFromSettings verbatim (so its Azure AD/api-version/base-URL
|
|
313
|
+
* construction stays byte-for-byte identical to configureOpenAI); a registry
|
|
314
|
+
* provider gets a plain client pointed at its base URL with its resolved key,
|
|
315
|
+
* the shared maxRetries budget, and its declared defaultQuery/defaultHeaders.
|
|
316
|
+
* Caching by provider.id keeps concurrent multi-provider turns sharing one
|
|
317
|
+
* connection pool per provider rather than reconstructing a client per turn.
|
|
318
|
+
*/
|
|
319
|
+
const providerClientCache = new Map<string, OpenAI>();
|
|
320
|
+
|
|
321
|
+
export function buildProviderClient(provider: ResolvedModelProvider, settings: Settings): OpenAI {
|
|
322
|
+
const cached = providerClientCache.get(provider.id);
|
|
323
|
+
if (cached) {
|
|
324
|
+
return cached;
|
|
325
|
+
}
|
|
326
|
+
const client = provider.builtin
|
|
327
|
+
? buildOpenAIClientFromSettings(settings)
|
|
328
|
+
: provider.kind === "codex-subscription"
|
|
329
|
+
// Codex subscription: the static apiKey is a placeholder — the real per-request
|
|
330
|
+
// bearer + ChatGPT-Account-ID, the /responses->/codex/responses rewrite, and the
|
|
331
|
+
// body normalization are all injected by codexSubscriptionFetch, which reads the
|
|
332
|
+
// per-workspace token from codexRequestStorage (AsyncLocalStorage) at call time.
|
|
333
|
+
// The provider id is constant ("codex-subscription"), so one cached client serves
|
|
334
|
+
// every workspace without baking a token into it.
|
|
335
|
+
? new OpenAI({
|
|
336
|
+
apiKey: provider.apiKey ?? "codex-subscription",
|
|
337
|
+
...(provider.baseUrl ? { baseURL: provider.baseUrl } : {}),
|
|
338
|
+
maxRetries: settings.openaiMaxRetries,
|
|
339
|
+
fetch: codexSubscriptionFetch(globalThis.fetch),
|
|
340
|
+
})
|
|
341
|
+
// ResolvedModelProvider.apiKey is already the resolved key (configuredProviders
|
|
342
|
+
// ran resolveProviderApiKey at config time, collapsing apiKey/apiKeyEnv), so it
|
|
343
|
+
// is passed straight through here rather than re-resolved.
|
|
344
|
+
: new OpenAI({
|
|
345
|
+
...(provider.apiKey ? { apiKey: provider.apiKey } : {}),
|
|
346
|
+
...(provider.baseUrl ? { baseURL: provider.baseUrl } : {}),
|
|
347
|
+
maxRetries: settings.openaiMaxRetries,
|
|
348
|
+
...(provider.defaultQuery ? { defaultQuery: provider.defaultQuery } : {}),
|
|
349
|
+
...(provider.defaultHeaders ? { defaultHeaders: provider.defaultHeaders } : {}),
|
|
350
|
+
});
|
|
351
|
+
providerClientCache.set(provider.id, client);
|
|
352
|
+
return client;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
/**
|
|
356
|
+
* Bind a model id to a provider's OpenAI client as an @openai/agents `Model`
|
|
357
|
+
* instance, choosing the wire API by the provider's declared `api`: the "chat"
|
|
358
|
+
* providers (e.g. Fireworks) get an OpenAIChatCompletionsModel that speaks
|
|
359
|
+
* /v1/chat/completions, the "responses" providers (built-in OpenAI/Azure) get
|
|
360
|
+
* an OpenAIResponsesModel that speaks /v1/responses. Passing this Model into
|
|
361
|
+
* the agent is what routes a turn to its provider without mutating the global
|
|
362
|
+
* default client.
|
|
363
|
+
*/
|
|
364
|
+
export function buildModelInstance(provider: ResolvedModelProvider, client: OpenAI, modelId: string): Model {
|
|
365
|
+
return provider.api === "chat"
|
|
366
|
+
? new OpenAIChatCompletionsModel(client, modelId)
|
|
367
|
+
: new OpenAIResponsesModel(client, modelId);
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
/**
|
|
371
|
+
* Resolved per-turn model routing: the provider that serves `modelId`, its
|
|
372
|
+
* (cached) OpenAI client, the provider-bound `Model` instance, and the
|
|
373
|
+
* configured-model shape (label/api/contextWindow/reasoningEffort/hostedWebSearch).
|
|
374
|
+
* Returns null when the model is not in the registry — the caller then falls
|
|
375
|
+
* back to the legacy global-client path (settings.openaiModel + the default
|
|
376
|
+
* client configured by configureOpenAI), preserved byte-for-byte.
|
|
377
|
+
*/
|
|
378
|
+
export function resolveTurnModel(
|
|
379
|
+
settings: Settings,
|
|
380
|
+
modelId: string,
|
|
381
|
+
): { provider: ResolvedModelProvider; client: OpenAI; model: Model; configured: ConfiguredModel } | null {
|
|
382
|
+
const resolved = resolveModelProvider(settings, modelId);
|
|
383
|
+
if (!resolved) {
|
|
384
|
+
return null;
|
|
385
|
+
}
|
|
386
|
+
const client = buildProviderClient(resolved.provider, settings);
|
|
387
|
+
return {
|
|
388
|
+
provider: resolved.provider,
|
|
389
|
+
client,
|
|
390
|
+
model: buildModelInstance(resolved.provider, client, resolved.model.id),
|
|
391
|
+
configured: resolved.model,
|
|
392
|
+
};
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
/**
|
|
396
|
+
* Routes a model *name* to its provider-bound Model (Fireworks chat model for a
|
|
397
|
+
* registry model id, the built-in OpenAI/Azure responses model otherwise) via
|
|
398
|
+
* `resolveTurnModel`. This is the load-bearing piece for the sandbox path:
|
|
399
|
+
* passing a Model *instance* as `agent.model` only survives the in-process
|
|
400
|
+
* (`sandboxBackend: "none"`) run — on the SandboxAgent/Modal path the instance
|
|
401
|
+
* is dropped and the model *name* is re-resolved through the run's
|
|
402
|
+
* `modelProvider` (or the global default). Without this router that re-resolution
|
|
403
|
+
* hits the default client (e.g. Azure) and a registry model 404s
|
|
404
|
+
* ("deployment does not exist"); with it the name resolves back to the right
|
|
405
|
+
* provider. Installed both as the run-scoped `Runner.config.modelProvider` (every
|
|
406
|
+
* run in runAgentStream goes through `runScopedRunner(settings)`, built from the
|
|
407
|
+
* per-turn settings) and as the process default (see configureOpenAI). The
|
|
408
|
+
* run-scoped instance is the load-bearing one: a `Runner` resolves string model
|
|
409
|
+
* names against ITS OWN modelProvider, not the lazy global default, so each
|
|
410
|
+
* concurrent turn routes codex/registry names against its own settings and a
|
|
411
|
+
* foreign turn's setDefaultModelProvider can never clobber this turn's routing.
|
|
412
|
+
* The process default remains only as a boot-time fallback. Falls back to the
|
|
413
|
+
* SDK default provider for a model that is in no provider's allow-list.
|
|
414
|
+
*/
|
|
415
|
+
export class MultiProviderModelProvider implements ModelProvider {
|
|
416
|
+
private fallback: OpenAIProvider | undefined;
|
|
417
|
+
|
|
418
|
+
constructor(private readonly settings: Settings) {}
|
|
419
|
+
|
|
420
|
+
async getModel(modelName?: string): Promise<Model> {
|
|
421
|
+
if (modelName) {
|
|
422
|
+
const resolved = resolveTurnModel(this.settings, modelName);
|
|
423
|
+
if (resolved) {
|
|
424
|
+
// Fail-loud floor (defense in depth): a `codex/<slug>` id must only ever
|
|
425
|
+
// resolve through the synthetic codex-subscription provider (which installs
|
|
426
|
+
// fetch: codexSubscriptionFetch + the per-workspace bearer). If a future
|
|
427
|
+
// settings path re-introduces a built-in/registry shadow that binds a
|
|
428
|
+
// `codex/` id to any other provider kind, that would silently ship the id
|
|
429
|
+
// to Azure/OpenAI as a deployment name (DeploymentNotFound 404). Refuse it
|
|
430
|
+
// here so codex can never reach a non-codex client on ANY backend; the
|
|
431
|
+
// primary fix (config configuredModels) keeps this a no-op in practice.
|
|
432
|
+
if (modelName.startsWith(CODEX_MODEL_ID_PREFIX) && resolved.provider.kind !== "codex-subscription") {
|
|
433
|
+
throw new CodexSubscriptionUnavailableError(modelName);
|
|
434
|
+
}
|
|
435
|
+
return resolved.model;
|
|
436
|
+
}
|
|
437
|
+
// A `codex/<slug>` id only resolves when the per-workspace worker overlay
|
|
438
|
+
// (settingsWithCodexCredential) has injected the synthetic codex-subscription
|
|
439
|
+
// provider — which it does ONLY for a workspace with an *active* connected
|
|
440
|
+
// Codex subscription. If it did not resolve, the subscription is not
|
|
441
|
+
// connected for this workspace, so the codex provider is absent. Falling
|
|
442
|
+
// through to the built-in OpenAIProvider below would ship `codex/<slug>` to
|
|
443
|
+
// the global default (Azure) client as a deployment name and surface a
|
|
444
|
+
// misleading "DeploymentNotFound" 404. Throw a clear, user-actionable error
|
|
445
|
+
// instead; it propagates through the worker's agentRunFailurePayload as the
|
|
446
|
+
// turn.failed message the session UI shows. Mirrors the codex-prefix
|
|
447
|
+
// awareness of assertConfiguredModel at apps/api/src/domain/sessions.ts.
|
|
448
|
+
if (modelName.startsWith(CODEX_MODEL_ID_PREFIX)) {
|
|
449
|
+
throw new CodexSubscriptionUnavailableError(modelName);
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
// A non-codex model in no provider's allow-list falls back to the SDK's
|
|
453
|
+
// default OpenAIProvider, which uses the global default client/key
|
|
454
|
+
// configureOpenAI set up (the built-in OpenAI/Azure provider).
|
|
455
|
+
this.fallback ??= new OpenAIProvider();
|
|
456
|
+
return this.fallback.getModel(modelName);
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
/**
|
|
461
|
+
* A `codex/<slug>` turn reached the model router but the workspace has no active
|
|
462
|
+
* Codex subscription connected (the worker overlay never injected the synthetic
|
|
463
|
+
* provider, so resolveTurnModel returned nothing). Thrown instead of silently
|
|
464
|
+
* routing the id to the built-in Azure/OpenAI client — that produced an opaque
|
|
465
|
+
* "DeploymentNotFound" 404. The message is user-actionable (connect/reconnect)
|
|
466
|
+
* and carries no status/code, so agentRunFailurePayload surfaces it verbatim as
|
|
467
|
+
* a non-retryable turn.failed the session UI shows.
|
|
468
|
+
*/
|
|
469
|
+
export class CodexSubscriptionUnavailableError extends Error {
|
|
470
|
+
constructor(modelName: string) {
|
|
471
|
+
super(
|
|
472
|
+
`Codex subscription model "${modelName}" is unavailable: no active Codex subscription is connected for this workspace. `
|
|
473
|
+
+ `Connect (or reconnect) your ChatGPT/Codex subscription in Settings, then retry.`,
|
|
474
|
+
);
|
|
475
|
+
this.name = "CodexSubscriptionUnavailableError";
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
export function configureOpenAI(settings: Settings): void {
|
|
480
|
+
setOpenAIResponsesTransport(settings.openaiResponsesTransport);
|
|
481
|
+
// Install the registry-aware router as the process default model provider so a
|
|
482
|
+
// model name re-resolved on the SandboxAgent/Modal path (where a Model instance
|
|
483
|
+
// does not survive) routes to its provider instead of the built-in client.
|
|
484
|
+
// Built before the default-client calls below so it captures the same settings.
|
|
485
|
+
const router = new MultiProviderModelProvider(settings);
|
|
486
|
+
if (settings.openaiProvider === "azure") {
|
|
487
|
+
setDefaultOpenAIClient(buildOpenAIClientFromSettings(settings));
|
|
488
|
+
setDefaultModelProvider(router);
|
|
489
|
+
return;
|
|
490
|
+
}
|
|
491
|
+
if (settings.openaiApiKey) {
|
|
492
|
+
setDefaultOpenAIKey(settings.openaiApiKey);
|
|
493
|
+
}
|
|
494
|
+
if (settings.openaiBaseUrl) {
|
|
495
|
+
setDefaultOpenAIClient(buildOpenAIClientFromSettings(settings));
|
|
496
|
+
}
|
|
497
|
+
setDefaultModelProvider(router);
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
/**
|
|
501
|
+
* Run the compaction summarizer as one plain, tool-less, non-streaming model
|
|
502
|
+
* call against the resolved provider. `system`/`user` come from
|
|
503
|
+
* buildCompactionMessages. Returns the trimmed summary text, or null on any
|
|
504
|
+
* failure (the caller treats a failed summarize as "skip compaction this turn"
|
|
505
|
+
* — never fatal). The call deliberately does NOT request reasoning encryption,
|
|
506
|
+
* tools, or server-side compaction; it is a self-contained summarize.
|
|
507
|
+
*
|
|
508
|
+
* Provider-aware: the summary always runs on the SAME provider that serves the
|
|
509
|
+
* turn (registry providers can't summarize through OpenAI/Azure, and vice
|
|
510
|
+
* versa). `api: "chat"` providers (Fireworks) speak /v1/chat/completions, where
|
|
511
|
+
* the summary is choices[0].message.content; `api: "responses"` (the default,
|
|
512
|
+
* built-in OpenAI/Azure) speaks /v1/responses as before. When no client/api is
|
|
513
|
+
* supplied it falls back to the built-in OpenAI/Azure Responses path so the
|
|
514
|
+
* legacy global-client callers are byte-for-byte unchanged. store:false is set
|
|
515
|
+
* only on the OpenAI-platform Responses path (Azure rejects it; chat ignores it).
|
|
516
|
+
*/
|
|
517
|
+
export async function summarizeForCompaction(
|
|
518
|
+
settings: Settings,
|
|
519
|
+
messages: { system: string; user: string },
|
|
520
|
+
options: { client?: OpenAI; api?: ModelProviderApi; maxOutputTokens?: number; model?: string } = {},
|
|
521
|
+
): Promise<string | null> {
|
|
522
|
+
const client = options.client ?? buildOpenAIClientFromSettings(settings);
|
|
523
|
+
const api = options.api ?? "responses";
|
|
524
|
+
const model = options.model ?? settings.openaiModel;
|
|
525
|
+
const maxTokens = options.maxOutputTokens ?? settings.contextSummaryMaxTokens;
|
|
526
|
+
try {
|
|
527
|
+
if (api === "chat") {
|
|
528
|
+
const completion = await client.chat.completions.create({
|
|
529
|
+
model,
|
|
530
|
+
max_tokens: maxTokens,
|
|
531
|
+
messages: [
|
|
532
|
+
{ role: "system", content: messages.system },
|
|
533
|
+
{ role: "user", content: messages.user },
|
|
534
|
+
],
|
|
535
|
+
} as any);
|
|
536
|
+
const text = (completion as { choices?: Array<{ message?: { content?: unknown } }> }).choices?.[0]?.message?.content;
|
|
537
|
+
const trimmed = typeof text === "string" ? text.trim() : "";
|
|
538
|
+
return trimmed.length > 0 ? trimmed : null;
|
|
539
|
+
}
|
|
540
|
+
const response = await client.responses.create({
|
|
541
|
+
model,
|
|
542
|
+
// store:false is the OpenAI-platform-only storeless precondition; Azure
|
|
543
|
+
// rejects it. The summarizer's resolved client is OpenAI/Azure on the
|
|
544
|
+
// built-in path (api "responses"), so gate it on the built-in provider.
|
|
545
|
+
...(settings.openaiProvider === "azure" ? {} : { store: false }),
|
|
546
|
+
max_output_tokens: maxTokens,
|
|
547
|
+
input: [
|
|
548
|
+
{ role: "system", content: messages.system },
|
|
549
|
+
{ role: "user", content: messages.user },
|
|
550
|
+
],
|
|
551
|
+
} as any);
|
|
552
|
+
const text = extractResponseOutputText(response);
|
|
553
|
+
const trimmed = text.trim();
|
|
554
|
+
return trimmed.length > 0 ? trimmed : null;
|
|
555
|
+
} catch (error) {
|
|
556
|
+
console.error("context compaction summarize failed (compaction skipped this turn)", error);
|
|
557
|
+
return null;
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
/**
|
|
562
|
+
* Pull the assistant text out of a Responses API result, shape-tolerant. Only
|
|
563
|
+
* `role === "assistant"` message items contribute: a provider whose Responses
|
|
564
|
+
* endpoint echoes the user input back as an output `message` item (Fireworks'
|
|
565
|
+
* beta /v1/responses does exactly this — see docs/model-providers.md) would
|
|
566
|
+
* otherwise corrupt the summary with the prompt it was given. The OpenAI/Azure
|
|
567
|
+
* Responses API only emits assistant messages, so this guard is a no-op there.
|
|
568
|
+
*/
|
|
569
|
+
export function extractResponseOutputText(response: unknown): string {
|
|
570
|
+
if (!response || typeof response !== "object") {
|
|
571
|
+
return "";
|
|
572
|
+
}
|
|
573
|
+
const direct = (response as { output_text?: unknown }).output_text;
|
|
574
|
+
if (typeof direct === "string") {
|
|
575
|
+
return direct;
|
|
576
|
+
}
|
|
577
|
+
const output = (response as { output?: unknown }).output;
|
|
578
|
+
if (!Array.isArray(output)) {
|
|
579
|
+
return "";
|
|
580
|
+
}
|
|
581
|
+
const parts: string[] = [];
|
|
582
|
+
for (const item of output) {
|
|
583
|
+
if (!item || typeof item !== "object") {
|
|
584
|
+
continue;
|
|
585
|
+
}
|
|
586
|
+
if ((item as { type?: unknown }).type !== "message") {
|
|
587
|
+
continue;
|
|
588
|
+
}
|
|
589
|
+
// Read assistant messages only; skip any input-echo (role "user"/"system").
|
|
590
|
+
if ((item as { role?: unknown }).role !== "assistant") {
|
|
591
|
+
continue;
|
|
592
|
+
}
|
|
593
|
+
const content = (item as { content?: unknown }).content;
|
|
594
|
+
if (!Array.isArray(content)) {
|
|
595
|
+
continue;
|
|
596
|
+
}
|
|
597
|
+
for (const part of content) {
|
|
598
|
+
if (part && typeof part === "object" && typeof (part as { text?: unknown }).text === "string") {
|
|
599
|
+
parts.push((part as { text: string }).text);
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
return parts.join("");
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
export type BuildAgentOptions = {
|
|
607
|
+
model?: Model;
|
|
608
|
+
reasoningEffort?: ReasoningEffort;
|
|
609
|
+
// Per-turn gating overrides for the multi-provider path. Each defaults to
|
|
610
|
+
// today's settings-derived behaviour when omitted, so the legacy
|
|
611
|
+
// global-client callers (no model resolution) are byte-for-byte unchanged.
|
|
612
|
+
//
|
|
613
|
+
// - compactionMode: the resolved context-compaction path. Drives whether the
|
|
614
|
+
// sandbox `compaction()` capability is attached AND whether `store: false`
|
|
615
|
+
// is set (the OpenAI-platform-only storeless precondition). Registry
|
|
616
|
+
// providers resolve to "client", so neither is applied to them.
|
|
617
|
+
// Default: resolveContextCompactionMode(settings).
|
|
618
|
+
// - hostedWebSearch: attach the hosted web_search tool. Only the providers
|
|
619
|
+
// that actually execute it (built-in OpenAI/Azure; a registry model that
|
|
620
|
+
// opts in) should get it — Fireworks accepts the param but no-ops it, which
|
|
621
|
+
// would hand the agent a dead tool. Default: settings.webSearchEnabled.
|
|
622
|
+
// - encryptedReasoning: round-trip reasoning.encrypted_content via
|
|
623
|
+
// providerData.include. Only the Responses API carries it; the chat wire
|
|
624
|
+
// API has no such field, so registry "chat" providers turn it off.
|
|
625
|
+
// Default: settings.openaiReasoningEncryptedContent.
|
|
626
|
+
// - contextWindowTokens: the model's effective window, used to derive the
|
|
627
|
+
// server-path compaction threshold. A registry model can declare its own
|
|
628
|
+
// (e.g. GLM 5.2's 1,048,576). Default: settings.contextWindowTokens.
|
|
629
|
+
// - structuredToolTransport: whether the backend supports the Responses
|
|
630
|
+
// STRUCTURED/HOSTED sandbox-tool transport — the hosted `apply_patch` tool
|
|
631
|
+
// type and structured `view_image` output. The SDK's sandbox capabilities
|
|
632
|
+
// pick hosted-vs-function purely from the bound model instance's constructor
|
|
633
|
+
// name (supportsApplyPatchTransport / supportsStructuredToolOutputTransport).
|
|
634
|
+
// Our codex turns run the OpenAIResponsesModel — which the SDK reads as
|
|
635
|
+
// hosted-capable — but route it to the ChatGPT/Codex backend, which REJECTS
|
|
636
|
+
// the hosted `apply_patch` type ("Unsupported tool type: apply_patch",
|
|
637
|
+
// verified live). Set false for that backend so filesystem emits the
|
|
638
|
+
// function `apply_patch` + text `view_image` variants it accepts. Default
|
|
639
|
+
// true (let the SDK decide from the model instance) — non-codex paths are
|
|
640
|
+
// byte-for-byte unchanged.
|
|
641
|
+
compactionMode?: ContextCompactionMode;
|
|
642
|
+
hostedWebSearch?: boolean;
|
|
643
|
+
encryptedReasoning?: boolean;
|
|
644
|
+
contextWindowTokens?: number;
|
|
645
|
+
structuredToolTransport?: boolean;
|
|
646
|
+
// The LIVE, by-reference connector-namespace Set from prepareAgentTools
|
|
647
|
+
// (codexConnectorNamespaces): fills during each turn's codex_apps tools/list,
|
|
648
|
+
// read per model call by the codex tool_search description so the model sees
|
|
649
|
+
// the account's ACTUALLY-connected sources (codex-rs parity). Only meaningful
|
|
650
|
+
// on the codex tool-search path.
|
|
651
|
+
codexConnectorNamespaces?: ReadonlySet<string>;
|
|
652
|
+
sandboxEnvironment?: Record<string, string>;
|
|
653
|
+
// The EFFECTIVE/active compute backend for this turn. `settings.sandboxBackend`
|
|
654
|
+
// is the session's HOME backend (the default cloud group box it was created
|
|
655
|
+
// with); when a session has swapped its active sandbox to a connected machine
|
|
656
|
+
// (active_sandbox_id → a selfhosted lease, while the home backend stays the
|
|
657
|
+
// cloud default), the worker passes that machine's backend here so
|
|
658
|
+
// filesystem-touching lifecycle hooks key off where the agent ACTUALLY runs,
|
|
659
|
+
// not where it was created. The one such hook today is the repository clone
|
|
660
|
+
// (sandboxRepositoryCloneHooks): a bring-your-own machine owns its real disk,
|
|
661
|
+
// so the platform must NEVER `git clone` onto it. Defaults to
|
|
662
|
+
// settings.sandboxBackend, so the legacy cloud paths are byte-for-byte
|
|
663
|
+
// unchanged and a session whose HOME backend is "selfhosted" is gated with no
|
|
664
|
+
// caller change.
|
|
665
|
+
activeSandboxBackend?: Settings["sandboxBackend"];
|
|
666
|
+
fileResourceDownloads?: SandboxFileDownload[];
|
|
667
|
+
mcpServers?: MCPServer[];
|
|
668
|
+
workspaceEnvironment?: WorkspaceEnvironmentContext;
|
|
669
|
+
// TOKEN-BROKER (B1): the run-scoped GitHub App installation token, minted ONCE
|
|
670
|
+
// per turn by the worker (sandboxEnvironmentForRun's `gitToken`). Threaded here
|
|
671
|
+
// OFF-MANIFEST — it is NOT part of sandboxEnvironment (the manifest env), so the
|
|
672
|
+
// token VALUE never triggers the SDK's provided-session env-delta guard even
|
|
673
|
+
// though it rotates every turn. buildAgent stashes it alongside the agent's
|
|
674
|
+
// repository-clone hooks; runStream forwards it into the clone hook context, which
|
|
675
|
+
// seeds it to the box's token FILE before the clone runs. Omitted on the
|
|
676
|
+
// selfhosted path (the machine uses its own git creds) — a NO-OP there.
|
|
677
|
+
gitTokenSeed?: string;
|
|
678
|
+
// Genesis turn only: append a one-shot instruction to the agent's system
|
|
679
|
+
// prompt telling it to title the session via opengeni__set_session_title
|
|
680
|
+
// before responding. Delivered through the instructions channel (where the
|
|
681
|
+
// model actually obeys), appended AFTER the non-bypassable core so a
|
|
682
|
+
// white-label persona template can't drop it.
|
|
683
|
+
genesisTitleHint?: boolean;
|
|
684
|
+
// Per-call agent persona override (the white-label surface). Resolved by the
|
|
685
|
+
// caller as session > workspace > deployment default; when omitted the
|
|
686
|
+
// runtime falls back to settings.agentInstructionsTemplate. The runtime
|
|
687
|
+
// substitutes the non-bypassable CORE at AGENT_INSTRUCTIONS_CORE_PLACEHOLDER
|
|
688
|
+
// (or appends it when the template omits the marker), so an override can
|
|
689
|
+
// restyle the persona but never drop the goal-loop contract or environment
|
|
690
|
+
// block.
|
|
691
|
+
instructionsTemplate?: string;
|
|
692
|
+
// Skills delivered by enabled capability packs. They join the bundled
|
|
693
|
+
// skills in the sandbox skill index (mounted under .agents/) so
|
|
694
|
+
// skills/<name> references resolve like any other indexed skill.
|
|
695
|
+
packSkills?: PackSkill[];
|
|
696
|
+
};
|
|
697
|
+
|
|
698
|
+
export type PackSkillFile = {
|
|
699
|
+
// Relative POSIX path inside the skill directory, e.g. "SKILL.md" or
|
|
700
|
+
// "references/runbook.md".
|
|
701
|
+
path: string;
|
|
702
|
+
content: string;
|
|
703
|
+
};
|
|
704
|
+
|
|
705
|
+
export type PackSkill = {
|
|
706
|
+
name: string;
|
|
707
|
+
description?: string | null;
|
|
708
|
+
files: PackSkillFile[];
|
|
709
|
+
};
|
|
710
|
+
|
|
711
|
+
/**
|
|
712
|
+
* Operator-facing metadata for the workspace environment attached to a run.
|
|
713
|
+
* Surfaced verbatim in the agent instructions: the description is where
|
|
714
|
+
* operators document how the exported credentials are meant to be used
|
|
715
|
+
* (e.g. which variable holds a deploy key and how to clone with it), so an
|
|
716
|
+
* agent must not have to rediscover that by enumerating `env` and guessing.
|
|
717
|
+
* Only metadata belongs here — never variable values.
|
|
718
|
+
*/
|
|
719
|
+
export type WorkspaceEnvironmentContext = {
|
|
720
|
+
name: string;
|
|
721
|
+
description?: string | null;
|
|
722
|
+
variableNames?: string[];
|
|
723
|
+
};
|
|
724
|
+
|
|
725
|
+
export function workspaceEnvironmentInstructions(environment: WorkspaceEnvironmentContext): string[] {
|
|
726
|
+
const lines = [
|
|
727
|
+
`A workspace environment named "${environment.name}" is attached to this session; its variables are exported in the sandbox shell environment.`,
|
|
728
|
+
];
|
|
729
|
+
const variableNames = (environment.variableNames ?? []).filter((name) => name.length > 0);
|
|
730
|
+
if (variableNames.length > 0) {
|
|
731
|
+
lines.push(`Exported environment variables: ${[...variableNames].sort().join(", ")}.`);
|
|
732
|
+
}
|
|
733
|
+
const description = environment.description?.trim();
|
|
734
|
+
if (description) {
|
|
735
|
+
lines.push(`Environment notes from the operator: ${description}`);
|
|
736
|
+
}
|
|
737
|
+
return lines;
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
/**
|
|
741
|
+
* The non-bypassable CORE of the agent instructions: the goal-loop ownership
|
|
742
|
+
* line (which names the opengeni__goal_* tools and is what keeps a long-running
|
|
743
|
+
* session driving itself) followed by the dynamic workspace-environment block.
|
|
744
|
+
* Returned as ordered lines so the caller joins them with the rest of the
|
|
745
|
+
* instructions by " ", exactly as the historical preamble did.
|
|
746
|
+
*
|
|
747
|
+
* This is the slice a white-labelled persona template must never be able to
|
|
748
|
+
* drop: composeAgentInstructions() substitutes it at the persona template's
|
|
749
|
+
* {{core}} marker, and appends it when the marker is absent.
|
|
750
|
+
*/
|
|
751
|
+
export function coreInstructions(workspaceEnvironment?: WorkspaceEnvironmentContext): string[] {
|
|
752
|
+
return [
|
|
753
|
+
"If the session has a goal, you own it: keep working until you call opengeni__goal_complete with concrete evidence or opengeni__goal_pause with a rationale; revise it with opengeni__goal_update; create one with opengeni__goal_set when given a long-running objective.",
|
|
754
|
+
...(workspaceEnvironment ? workspaceEnvironmentInstructions(workspaceEnvironment) : []),
|
|
755
|
+
];
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
/**
|
|
759
|
+
* Composes the final agent instructions from a (possibly white-labelled)
|
|
760
|
+
* persona template and the non-bypassable CORE. The CORE is substituted at the
|
|
761
|
+
* template's {{core}} marker; if the template omits the marker, the CORE is
|
|
762
|
+
* appended after it instead (the non-bypassable fail-safe). The substitution
|
|
763
|
+
* and the append both join by " ", so the DEFAULT_AGENT_INSTRUCTIONS template
|
|
764
|
+
* with an empty environment reproduces the historical preamble byte-for-byte.
|
|
765
|
+
*/
|
|
766
|
+
export function composeAgentInstructions(template: string, workspaceEnvironment?: WorkspaceEnvironmentContext): string {
|
|
767
|
+
const core = coreInstructions(workspaceEnvironment).join(" ");
|
|
768
|
+
if (template.includes(AGENT_INSTRUCTIONS_CORE_PLACEHOLDER)) {
|
|
769
|
+
return template.split(AGENT_INSTRUCTIONS_CORE_PLACEHOLDER).join(core);
|
|
770
|
+
}
|
|
771
|
+
return core ? `${template} ${core}` : template;
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
const agentFileDownloads = new WeakMap<object, SandboxFileDownload[]>();
|
|
775
|
+
const agentRepositoryCloneHooks = new WeakMap<object, SandboxLifecycleHook[]>();
|
|
776
|
+
// TOKEN-BROKER (B1): the per-turn git token seed, stashed alongside the agent's
|
|
777
|
+
// repository-clone hooks (a parallel map keyed by the agent). Kept OFF the
|
|
778
|
+
// manifest/defaultManifest so the rotating value never rides the SDK's provided-
|
|
779
|
+
// session env; runStream reads it to build the clone hook context. Absent when
|
|
780
|
+
// no repo is attached / on the selfhosted path.
|
|
781
|
+
const agentGitTokenSeed = new WeakMap<object, string>();
|
|
782
|
+
// The EFFECTIVE backend the turn resolved for this agent (undefined -> the home
|
|
783
|
+
// backend). Read by runStream's owned branch to keep platform box-setup hooks off
|
|
784
|
+
// connected machines (a user's real computer).
|
|
785
|
+
const agentActiveSandboxBackend = new WeakMap<object, Settings["sandboxBackend"]>();
|
|
786
|
+
|
|
787
|
+
export function buildOpenGeniAgent(settings: Settings, resources: ResourceRef[], options: BuildAgentOptions = {}): Agent<any, any> {
|
|
788
|
+
// Resolved per-turn gating. Each override defaults to today's settings-derived
|
|
789
|
+
// behaviour, so the legacy global-client callers (no resolved model) build the
|
|
790
|
+
// exact same agent as before; the multi-provider worker path passes the
|
|
791
|
+
// resolved provider's mode/api/window/web-search instead.
|
|
792
|
+
const compactionMode = options.compactionMode ?? resolveContextCompactionMode(settings);
|
|
793
|
+
const hostedWebSearch = options.hostedWebSearch ?? settings.webSearchEnabled;
|
|
794
|
+
const encryptedReasoning = options.encryptedReasoning ?? settings.openaiReasoningEncryptedContent;
|
|
795
|
+
const contextWindowTokens = options.contextWindowTokens ?? settings.contextWindowTokens;
|
|
796
|
+
// Native hosted tools attached to every constructed agent. webSearchEnabled
|
|
797
|
+
// is ON by default and provider-unconditional on the built-in path (the live
|
|
798
|
+
// Azure Responses path executes the hosted web_search tool); a registry model
|
|
799
|
+
// only gets it when it opts in (resolved via options.hostedWebSearch), since
|
|
800
|
+
// a provider that no-ops the param would hand the agent a dead tool. The SDK
|
|
801
|
+
// merges this explicit `tools` array with the MCP-server tools
|
|
802
|
+
// (Agent.getAllTools = [...mcpTools, ...tools]) and, on the SandboxAgent path,
|
|
803
|
+
// with the sandbox capability tools (prepareSandboxAgent: tools =
|
|
804
|
+
// [...agent.tools, ...capability.tools()]), so hosted web_search coexists with
|
|
805
|
+
// both rather than overriding them.
|
|
806
|
+
const hostedTools = hostedWebSearch ? [webSearchTool()] : [];
|
|
807
|
+
const baseConfig = {
|
|
808
|
+
name: "OpenGeni Agent",
|
|
809
|
+
model: options.model ?? settings.openaiModel,
|
|
810
|
+
// White-label persona composition. The effective template is the per-call
|
|
811
|
+
// override (options.instructionsTemplate, resolved by the caller as
|
|
812
|
+
// session > workspace) falling back to the deployment default
|
|
813
|
+
// (settings.agentInstructionsTemplate, default DEFAULT_AGENT_INSTRUCTIONS).
|
|
814
|
+
// composeAgentInstructions substitutes the non-bypassable CORE (goal-loop
|
|
815
|
+
// ownership + workspace-environment block) at the {{core}} marker, or
|
|
816
|
+
// appends it when the template omits the marker. With the default template
|
|
817
|
+
// and no environment this is byte-identical to the historical preamble.
|
|
818
|
+
instructions: options.genesisTitleHint
|
|
819
|
+
? `${composeAgentInstructions(options.instructionsTemplate ?? settings.agentInstructionsTemplate, options.workspaceEnvironment)} ${GENESIS_TITLE_DIRECTIVE}`
|
|
820
|
+
: composeAgentInstructions(options.instructionsTemplate ?? settings.agentInstructionsTemplate, options.workspaceEnvironment),
|
|
821
|
+
modelSettings: {
|
|
822
|
+
reasoning: { effort: options.reasoningEffort ?? settings.openaiReasoningEffort, summary: "detailed" },
|
|
823
|
+
// Server-side compaction (OpenAI platform) requires store=false: the
|
|
824
|
+
// server emits an opaque ENCRYPTED 'compaction' item that round-trips in
|
|
825
|
+
// the request rather than being anchored to a stored response. OpenGeni
|
|
826
|
+
// already runs storeless (provider item ids stripped, encrypted reasoning
|
|
827
|
+
// round-tripped), so this is consistent with the existing design and
|
|
828
|
+
// only set where the server compaction capability is attached. Gated on
|
|
829
|
+
// the RESOLVED compaction mode (registry providers resolve to "client",
|
|
830
|
+
// so they never carry store:false).
|
|
831
|
+
...(compactionMode === "server" ? { store: false } : {}),
|
|
832
|
+
// Round-trip the encrypted reasoning payload with every call so chains
|
|
833
|
+
// of thought survive without provider-side response storage (which is
|
|
834
|
+
// what stripped provider item ids opt us out of — see
|
|
835
|
+
// stripProviderItemIds). providerData.include replaces any
|
|
836
|
+
// tool-derived include entries; OpenGeni's tools are MCP/sandbox
|
|
837
|
+
// function tools, which contribute none. Gated on the resolved
|
|
838
|
+
// encryptedReasoning flag: the chat wire API has no encrypted_content
|
|
839
|
+
// field, so registry "chat" providers turn it off.
|
|
840
|
+
...(encryptedReasoning
|
|
841
|
+
? { providerData: { include: ["reasoning.encrypted_content"] } }
|
|
842
|
+
: {}),
|
|
843
|
+
},
|
|
844
|
+
// Explicit hosted tools (web_search when enabled). Threaded into BOTH the
|
|
845
|
+
// `new Agent(baseConfig)` path (sandboxBackend === "none") and the
|
|
846
|
+
// `new SandboxAgent({ ...baseConfig, ... })` path via the shared baseConfig
|
|
847
|
+
// spread; the SDK concatenates these with MCP and sandbox capability tools.
|
|
848
|
+
...(hostedTools.length ? { tools: hostedTools } : {}),
|
|
849
|
+
...(options.mcpServers?.length ? { mcpServers: options.mcpServers } : {}),
|
|
850
|
+
} as const;
|
|
851
|
+
|
|
852
|
+
if (settings.sandboxBackend === "none") {
|
|
853
|
+
const agent = new Agent(baseConfig);
|
|
854
|
+
maybeInstallCodexToolSearch(agent, settings, options);
|
|
855
|
+
return agent;
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
const runAs = sandboxRunAs(settings);
|
|
859
|
+
const agent = new SandboxAgent({
|
|
860
|
+
...baseConfig,
|
|
861
|
+
defaultManifest: buildManifest(settings, resources, options.sandboxEnvironment, options.fileResourceDownloads),
|
|
862
|
+
...(runAs ? { runAs } : {}),
|
|
863
|
+
capabilities: buildAgentCapabilities(settings, options.packSkills ?? [], {
|
|
864
|
+
compactionMode,
|
|
865
|
+
contextWindowTokens,
|
|
866
|
+
...(options.structuredToolTransport !== undefined ? { structuredToolTransport: options.structuredToolTransport } : {}),
|
|
867
|
+
}),
|
|
868
|
+
});
|
|
869
|
+
agentFileDownloads.set(agent, normalizeSandboxFileDownloads(options.fileResourceDownloads ?? []).filter((download) => !download.content));
|
|
870
|
+
agentRepositoryCloneHooks.set(agent, sandboxRepositoryCloneHooks(settings, resources, options.activeSandboxBackend));
|
|
871
|
+
// Stash the EFFECTIVE backend so runStream's owned branch can skip the direct
|
|
872
|
+
// beforeAgentStart hook run on a connected machine: the box there is the user's
|
|
873
|
+
// REAL computer — the platform must not run setup (az login) against it. The
|
|
874
|
+
// clone hooks are already excluded for selfhosted at construction (above); this
|
|
875
|
+
// keeps the built-in hooks equally out.
|
|
876
|
+
if (options.activeSandboxBackend) {
|
|
877
|
+
agentActiveSandboxBackend.set(agent, options.activeSandboxBackend);
|
|
878
|
+
}
|
|
879
|
+
// TOKEN-BROKER (B1): stash the per-turn seed off-manifest so runStream can seed the
|
|
880
|
+
// clone hook without the token ever touching defaultManifest / sandboxEnvironment.
|
|
881
|
+
if (options.gitTokenSeed) {
|
|
882
|
+
agentGitTokenSeed.set(agent, options.gitTokenSeed);
|
|
883
|
+
}
|
|
884
|
+
maybeInstallCodexToolSearch(agent, settings, options);
|
|
885
|
+
return agent;
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
/**
|
|
889
|
+
* Enable Codex-CLI-style progressive connector disclosure on a codex turn when the
|
|
890
|
+
* flag is on. Gated on `structuredToolTransport === false` — the same signal that
|
|
891
|
+
* identifies a codex-subscription turn (the ChatGPT backend that rejects hosted
|
|
892
|
+
* tools) — so no non-codex turn is ever touched. On qualifying turns it wraps
|
|
893
|
+
* `getAllTools` (clone-survivingly — see {@link installCodexToolSearch}) to defer
|
|
894
|
+
* codex_apps schemas + add the client tool_search tool, whose description renders
|
|
895
|
+
* the live connector namespaces threaded from prepareAgentTools.
|
|
896
|
+
*/
|
|
897
|
+
function maybeInstallCodexToolSearch(agent: Agent<any, any>, settings: Settings, options: BuildAgentOptions): void {
|
|
898
|
+
if (settings.codexToolSearchEnabled && options.structuredToolTransport === false) {
|
|
899
|
+
installCodexToolSearch(
|
|
900
|
+
agent as unknown as Parameters<typeof installCodexToolSearch>[0],
|
|
901
|
+
options.codexConnectorNamespaces ?? new Set<string>(),
|
|
902
|
+
);
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
/**
|
|
907
|
+
* Force a sandbox capability to emit its FUNCTION-transport tool variants instead
|
|
908
|
+
* of the hosted ones, by dropping the model instance the SDK's transport
|
|
909
|
+
* detection keys off. See {@link buildAgentCapabilities} for why (codex routes the
|
|
910
|
+
* OpenAIResponsesModel to the ChatGPT backend, which rejects the hosted
|
|
911
|
+
* `apply_patch` AND `computer_use_preview` tool types). The SDK reads
|
|
912
|
+
* hosted-vs-function ONLY from `_modelInstance` (set via `bindModel`); overriding
|
|
913
|
+
* `bindModel` to discard the instance leaves `_modelInstance` undefined, so
|
|
914
|
+
* `supportsApplyPatchTransport` / `supportsStructuredToolOutputTransport` return
|
|
915
|
+
* false and `tools()` emits the function variants — `apply_patch` + text
|
|
916
|
+
* `view_image` for filesystem, and the `computer_*` function tools + text
|
|
917
|
+
* `computer_screenshot` for computer-use. `bindModel` still returns the capability
|
|
918
|
+
* so the SDK's bind chain (`.bind().bindRunAs().bindModel()`) is preserved.
|
|
919
|
+
*/
|
|
920
|
+
function neutralizeStructuredToolTransport(capability: ReturnType<typeof filesystem> | ReturnType<typeof computerUse>): void {
|
|
921
|
+
// Use `this` (NOT a captured reference to `capability`): the SandboxAgent binds
|
|
922
|
+
// via `cap.clone().bind(session).bindRunAs(runAs).bindModel(model, instance)` and
|
|
923
|
+
// runs tools() on the object the CHAIN returns. Capability.clone() copies this
|
|
924
|
+
// override onto the fresh per-run instance, so bindModel must operate on and
|
|
925
|
+
// RETURN `this` (the clone) — a version that mutated/returned the ORIGINAL
|
|
926
|
+
// capability leaves the clone (which .bind() set `_session` on) out of the chain,
|
|
927
|
+
// so tools() runs on the unbound original and throws "Filesystem capability is
|
|
928
|
+
// not bound to a SandboxSession". Dropping the model instance is all we need:
|
|
929
|
+
// supportsApplyPatchTransport(undefined) is false → the function apply_patch.
|
|
930
|
+
const forceFunctionTransport = function (this: Record<string, unknown>): unknown {
|
|
931
|
+
this._modelInstance = undefined;
|
|
932
|
+
return this;
|
|
933
|
+
};
|
|
934
|
+
(capability as unknown as { bindModel: typeof forceFunctionTransport }).bindModel = forceFunctionTransport;
|
|
935
|
+
}
|
|
936
|
+
|
|
937
|
+
/**
|
|
938
|
+
* Build the SandboxAgent capability set provider-aware.
|
|
939
|
+
*
|
|
940
|
+
* The SDK's `Capabilities.default()` force-includes `compaction()`, whose
|
|
941
|
+
* sampling params emit `context_management:[{type:'compaction', …}]` to the
|
|
942
|
+
* Responses transport. The OpenAI platform honors that (server-side compaction);
|
|
943
|
+
* AZURE rejects it with `400 unsupported_parameter` — which is exactly the live
|
|
944
|
+
* production failure on Azure today. So we MUST NOT attach the compaction
|
|
945
|
+
* capability on the Azure / client / off paths.
|
|
946
|
+
*
|
|
947
|
+
* We rebuild the base set explicitly (`filesystem()`, `shell()`, the same
|
|
948
|
+
* factories the SDK default uses) and add `compaction()` ONLY on the server
|
|
949
|
+
* path, with an explicit `StaticCompactionPolicy(threshold)` so gpt-5.5 — which
|
|
950
|
+
* is absent from the SDK's hardcoded context-window map and would otherwise hit
|
|
951
|
+
* the wrong 240k fallback — gets the correct threshold. The SDK has no
|
|
952
|
+
* window-registration API, so an explicit threshold is the only way to fix it.
|
|
953
|
+
*
|
|
954
|
+
* The resolved compaction mode and the effective context window are now passed
|
|
955
|
+
* IN (the multi-provider caller resolves them per provider/model) rather than
|
|
956
|
+
* re-derived from settings here. Both default to the settings-derived value so
|
|
957
|
+
* callers that don't route per-model (and the existing tests) keep today's exact
|
|
958
|
+
* behaviour; the effective window only changes the server-path threshold when a
|
|
959
|
+
* resolved model declares its own contextWindowTokens.
|
|
960
|
+
*/
|
|
961
|
+
export function buildAgentCapabilities(
|
|
962
|
+
settings: Settings,
|
|
963
|
+
packSkills: PackSkill[],
|
|
964
|
+
options: { compactionMode?: ContextCompactionMode; contextWindowTokens?: number; structuredToolTransport?: boolean } = {},
|
|
965
|
+
): ReturnType<typeof Capabilities.default> {
|
|
966
|
+
const mode = options.compactionMode ?? resolveContextCompactionMode(settings);
|
|
967
|
+
const contextWindowTokens = options.contextWindowTokens ?? settings.contextWindowTokens;
|
|
968
|
+
// The `filesystem()` capability picks hosted-vs-function tool variants from the
|
|
969
|
+
// bound model instance (supportsApplyPatchTransport / structured tool output).
|
|
970
|
+
// When the caller declares the backend does NOT support that structured/hosted
|
|
971
|
+
// transport (codex → the ChatGPT backend rejects the hosted `apply_patch` type),
|
|
972
|
+
// neutralize this capability's model binding so tools() falls to the function
|
|
973
|
+
// `apply_patch` + text `view_image` variants the backend accepts — the SDK
|
|
974
|
+
// handles their function_call round-trip natively, so no reimplementation.
|
|
975
|
+
// Scoped to filesystem: shell() (always function tools) and compaction() (a
|
|
976
|
+
// sampling param, dropped by the codex normalizer) are untouched.
|
|
977
|
+
const filesystemCapability = filesystem();
|
|
978
|
+
if (options.structuredToolTransport === false) {
|
|
979
|
+
neutralizeStructuredToolTransport(filesystemCapability);
|
|
980
|
+
}
|
|
981
|
+
const caps: ReturnType<typeof Capabilities.default> = [filesystemCapability, shell()];
|
|
982
|
+
if (mode === "server") {
|
|
983
|
+
caps.push(compaction({ policy: new StaticCompactionPolicy(contextServerCompactThreshold({ ...settings, contextWindowTokens })) }));
|
|
984
|
+
}
|
|
985
|
+
caps.push(skills({ lazyFrom: lazySkillSourceWithPackSkills(packSkills) }));
|
|
986
|
+
// P4.3 computer-use: the agent drives the SAME :0 humans watch (xdotool/XTEST +
|
|
987
|
+
// scrot), but only when the desktop tier is ON, computer-use is enabled, and the
|
|
988
|
+
// backend is one whose image carries the X stack (descriptorgate — honest about
|
|
989
|
+
// which backends are desktop-capable today; headless/dev backends never get the
|
|
990
|
+
// tool, so a misconfigured non-desktop box can't register a tool that always
|
|
991
|
+
// fails). The capability's tools() bind to the live externally-owned session at
|
|
992
|
+
// run time (the SandboxAgent merge); xdotool drives :0 regardless of whether any
|
|
993
|
+
// viewer is attached, so no pixel-tunnel dependency.
|
|
994
|
+
if (
|
|
995
|
+
settings.computerUseEnabled
|
|
996
|
+
&& settings.sandboxDesktopEnabled
|
|
997
|
+
&& desktopCapableBackend(settings.sandboxBackend)
|
|
998
|
+
) {
|
|
999
|
+
// computer-use is now transport-aware, exactly like filesystem: its `tools()`
|
|
1000
|
+
// emits the HOSTED `computer_use_preview` tool on the structured transport and a
|
|
1001
|
+
// set of FUNCTION `computer_*` tools on the text transport. The ChatGPT/Codex
|
|
1002
|
+
// backend rejects hosted tool types (only function/custom/web_search accepted),
|
|
1003
|
+
// so on the codex path (structuredToolTransport === false) we neutralize the
|
|
1004
|
+
// capability's model binding — the SAME trick used for filesystem above — so
|
|
1005
|
+
// `tools()` sees no model instance and emits the function tools the backend can
|
|
1006
|
+
// call, instead of suppressing the desktop tier entirely.
|
|
1007
|
+
const computerCapability = computerUse({
|
|
1008
|
+
dimensions: [settings.streamResolutionWidth, settings.streamResolutionHeight],
|
|
1009
|
+
readOnly: settings.computerUseReadOnly,
|
|
1010
|
+
// On the codex path the function tools deliver screenshots as a real image the
|
|
1011
|
+
// model can see. The ChatGPT/Codex backend rejects HOSTED tool types but DOES
|
|
1012
|
+
// accept `input_image` content items inside a `function_call_output` (proven by
|
|
1013
|
+
// openai/codex codex-rs, whose view_image tool ships exactly that shape) — so a
|
|
1014
|
+
// structured image tool result is seen, where a text data-URL would be unreadable.
|
|
1015
|
+
...(options.structuredToolTransport === false ? { imageFunctionResults: true } : {}),
|
|
1016
|
+
});
|
|
1017
|
+
if (options.structuredToolTransport === false) {
|
|
1018
|
+
neutralizeStructuredToolTransport(computerCapability);
|
|
1019
|
+
}
|
|
1020
|
+
caps.push(computerCapability as unknown as ReturnType<typeof Capabilities.default>[number]);
|
|
1021
|
+
}
|
|
1022
|
+
return caps;
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
export function sandboxRunAs(_settings: Settings): string | undefined {
|
|
1026
|
+
return undefined;
|
|
1027
|
+
}
|
|
1028
|
+
|
|
1029
|
+
export type PreparedAgentTools = {
|
|
1030
|
+
mcpServers: MCPServer[];
|
|
1031
|
+
close: () => Promise<void>;
|
|
1032
|
+
// P4 (Part B.1): the live, by-reference Set of ORIGINAL-dotted connector
|
|
1033
|
+
// namespaces the codex_apps transport saw across this turn's tools/list calls.
|
|
1034
|
+
// Accumulates as the agent lists tools during the run, so the worker reads it
|
|
1035
|
+
// AFTER the turn (in its finally) to cache the serving account's connector set.
|
|
1036
|
+
// Empty when this turn has no codex_apps server (or it never listed any
|
|
1037
|
+
// namespaced tool) — the worker only persists a non-empty set.
|
|
1038
|
+
codexConnectorNamespaces: Set<string>;
|
|
1039
|
+
};
|
|
1040
|
+
|
|
1041
|
+
export type PrepareToolsOptions = {
|
|
1042
|
+
accountId?: string;
|
|
1043
|
+
workspaceId?: string;
|
|
1044
|
+
// Worker-asserted session scope for first-party MCP calls; enables
|
|
1045
|
+
// session-scoped tools such as goal management on the API side.
|
|
1046
|
+
sessionId?: string;
|
|
1047
|
+
subjectId?: string;
|
|
1048
|
+
subjectLabel?: string;
|
|
1049
|
+
// Overrides the fixed first-party MCP permission set for this session's
|
|
1050
|
+
// delegated token (manager-style sessions). The caller is responsible for
|
|
1051
|
+
// having validated the set against the session creator's grant.
|
|
1052
|
+
firstPartyPermissions?: Permission[];
|
|
1053
|
+
};
|
|
1054
|
+
|
|
1055
|
+
export async function prepareAgentTools(settings: Settings, tools: ToolRef[], options: PrepareToolsOptions = {}): Promise<PreparedAgentTools> {
|
|
1056
|
+
// P4 (Part B.1): one Set per prepareTools call, shared by reference into the
|
|
1057
|
+
// codex_apps sanitizing fetch so every tools/list this turn accumulates the
|
|
1058
|
+
// account's connector namespaces. Surfaced on PreparedAgentTools for the worker.
|
|
1059
|
+
const codexConnectorNamespaces = new Set<string>();
|
|
1060
|
+
if (tools.length === 0) {
|
|
1061
|
+
return { mcpServers: [], close: async () => {}, codexConnectorNamespaces };
|
|
1062
|
+
}
|
|
1063
|
+
const registry = new Map(settings.mcpServers.map((server) => [server.id, server]));
|
|
1064
|
+
const servers = await Promise.all(tools.map(async (tool) => {
|
|
1065
|
+
const config = registry.get(tool.id);
|
|
1066
|
+
if (!config) {
|
|
1067
|
+
throw new Error(`Unknown MCP server id: ${tool.id}`);
|
|
1068
|
+
}
|
|
1069
|
+
const url = firstPartyMcpServerUrlForRun(settings, config, options.workspaceId) ?? config.url;
|
|
1070
|
+
const server = new PrefixedMcpServer(new MCPServerStreamableHttp({
|
|
1071
|
+
url,
|
|
1072
|
+
name: config.name ?? config.id,
|
|
1073
|
+
cacheToolsList: config.cacheToolsList,
|
|
1074
|
+
// codex_apps returns connector tools with empty `outputSchema: {}` that the
|
|
1075
|
+
// MCP SDK's strict Tool schema rejects (fails the turn during tools/list);
|
|
1076
|
+
// sanitize the response on the wire before validation. The namespace Set
|
|
1077
|
+
// also captures each tool's original connector namespace (P4 Part B.1).
|
|
1078
|
+
...(isCodexAppsMcpServer(config) ? { fetch: codexAppsSanitizingFetch(globalThis.fetch, codexConnectorNamespaces) } : {}),
|
|
1079
|
+
...await mcpServerRequestInit(settings, config, options),
|
|
1080
|
+
...(config.timeoutMs ? {
|
|
1081
|
+
timeout: config.timeoutMs,
|
|
1082
|
+
clientSessionTimeoutSeconds: Math.ceil(config.timeoutMs / 1000),
|
|
1083
|
+
} : {}),
|
|
1084
|
+
}), config.id, config.allowedTools);
|
|
1085
|
+
// A server is connected BEST-EFFORT (a connect / tools-list failure drops
|
|
1086
|
+
// it instead of failing the turn) in two cases:
|
|
1087
|
+
// - codex_apps: connector availability is RUNTIME-DISCOVERED — the
|
|
1088
|
+
// device-code login may lack the connector scopes, and the backend can
|
|
1089
|
+
// reject the bearer at the initialize/tools-list handshake, so a 401/403
|
|
1090
|
+
// (or a missing/failed token) drops the server.
|
|
1091
|
+
// - an AUTO-ATTACHED workspace-default capability MCP (ToolRef.optional):
|
|
1092
|
+
// the caller never explicitly requested it, so a broken/expired
|
|
1093
|
+
// capability credential must SKIP the server with a warning, never kill
|
|
1094
|
+
// the turn before the model runs. An EXPLICITLY-requested tool omits
|
|
1095
|
+
// `optional` and stays strict (below), preserving the fail-loud contract.
|
|
1096
|
+
const optional = tool.optional === true;
|
|
1097
|
+
return { server, bestEffort: isCodexAppsMcpServer(config) || optional, optional };
|
|
1098
|
+
}));
|
|
1099
|
+
const requiredServers = servers.filter((entry) => !entry.bestEffort).map((entry) => entry.server);
|
|
1100
|
+
const bestEffortServers = servers.filter((entry) => entry.bestEffort).map((entry) => entry.server);
|
|
1101
|
+
// Names of the OPTIONAL capability servers (not codex_apps) so a drop is
|
|
1102
|
+
// surfaced as a warning; codex_apps keeps its historically-quiet drop (a
|
|
1103
|
+
// not-logged-in ChatGPT plan is a normal, non-noteworthy state).
|
|
1104
|
+
const optionalServerNames = new Set(
|
|
1105
|
+
servers.filter((entry) => entry.optional).map((entry) => entry.server.name),
|
|
1106
|
+
);
|
|
1107
|
+
const connectedRequired = await connectMcpServers(requiredServers, {
|
|
1108
|
+
connectInParallel: true,
|
|
1109
|
+
strict: true,
|
|
1110
|
+
});
|
|
1111
|
+
const connectedBestEffort = bestEffortServers.length
|
|
1112
|
+
? await connectMcpServers(bestEffortServers, {
|
|
1113
|
+
connectInParallel: true,
|
|
1114
|
+
strict: false,
|
|
1115
|
+
})
|
|
1116
|
+
: null;
|
|
1117
|
+
if (connectedBestEffort) {
|
|
1118
|
+
for (const failed of connectedBestEffort.failed) {
|
|
1119
|
+
if (!optionalServerNames.has(failed.name)) {
|
|
1120
|
+
continue;
|
|
1121
|
+
}
|
|
1122
|
+
const error = connectedBestEffort.errors.get(failed);
|
|
1123
|
+
console.warn(
|
|
1124
|
+
`[mcp] optional capability server "${failed.name}" failed to connect/list tools; skipping it for this turn`,
|
|
1125
|
+
error instanceof Error ? error.message : error,
|
|
1126
|
+
);
|
|
1127
|
+
}
|
|
1128
|
+
}
|
|
1129
|
+
return {
|
|
1130
|
+
mcpServers: [...connectedRequired.active, ...(connectedBestEffort?.active ?? [])],
|
|
1131
|
+
close: async () => {
|
|
1132
|
+
await connectedRequired.close();
|
|
1133
|
+
if (connectedBestEffort) {
|
|
1134
|
+
await connectedBestEffort.close();
|
|
1135
|
+
}
|
|
1136
|
+
},
|
|
1137
|
+
codexConnectorNamespaces,
|
|
1138
|
+
};
|
|
1139
|
+
}
|
|
1140
|
+
|
|
1141
|
+
async function mcpServerRequestInit(settings: Settings, config: Settings["mcpServers"][number], options: PrepareToolsOptions): Promise<{ requestInit: { headers: Record<string, string> } } | {}> {
|
|
1142
|
+
// codex_apps is checked FIRST so the static-headers path can never apply to
|
|
1143
|
+
// it: its refreshing ChatGPT/Codex bearer is resolved per-connect from the
|
|
1144
|
+
// codex ALS, never from a baked `config.headers` value.
|
|
1145
|
+
if (isCodexAppsMcpServer(config)) {
|
|
1146
|
+
return await codexAppsMcpRequestInit(settings);
|
|
1147
|
+
}
|
|
1148
|
+
if (isFirstPartyMcpServer(settings, config)) {
|
|
1149
|
+
return await firstPartyMcpRequestInit(settings, config, options);
|
|
1150
|
+
}
|
|
1151
|
+
// Third-party MCP servers get their configured credential headers (for
|
|
1152
|
+
// example workspace-enabled capability MCP credentials) and nothing else —
|
|
1153
|
+
// never OpenGeni's own access key or delegated tokens.
|
|
1154
|
+
if (config.headers && Object.keys(config.headers).length > 0) {
|
|
1155
|
+
return { requestInit: { headers: { ...config.headers } } };
|
|
1156
|
+
}
|
|
1157
|
+
return {};
|
|
1158
|
+
}
|
|
1159
|
+
|
|
1160
|
+
async function firstPartyMcpRequestInit(settings: Settings, config: Settings["mcpServers"][number], options: PrepareToolsOptions): Promise<{ requestInit: { headers: Record<string, string> } } | {}> {
|
|
1161
|
+
if (!isFirstPartyMcpServer(settings, config)) {
|
|
1162
|
+
return {};
|
|
1163
|
+
}
|
|
1164
|
+
const headers: Record<string, string> = {};
|
|
1165
|
+
if (settings.authRequired && settings.accessKey) {
|
|
1166
|
+
headers["x-opengeni-access-key"] = settings.accessKey;
|
|
1167
|
+
}
|
|
1168
|
+
if (settings.delegationSecret && options.accountId && options.workspaceId) {
|
|
1169
|
+
headers.authorization = `Bearer ${await signDelegatedAccessToken(settings.delegationSecret, {
|
|
1170
|
+
accountId: options.accountId,
|
|
1171
|
+
workspaceId: options.workspaceId,
|
|
1172
|
+
subjectId: options.subjectId ?? "worker:first-party-mcp",
|
|
1173
|
+
...(options.subjectLabel ? { subjectLabel: options.subjectLabel } : {}),
|
|
1174
|
+
permissions: options.firstPartyPermissions ?? firstPartyMcpPermissions,
|
|
1175
|
+
...(options.sessionId ? { sessionId: options.sessionId } : {}),
|
|
1176
|
+
exp: Math.floor(Date.now() / 1000) + 60 * 60,
|
|
1177
|
+
})}`;
|
|
1178
|
+
}
|
|
1179
|
+
if (Object.keys(headers).length === 0) {
|
|
1180
|
+
return {};
|
|
1181
|
+
}
|
|
1182
|
+
return {
|
|
1183
|
+
requestInit: {
|
|
1184
|
+
headers,
|
|
1185
|
+
},
|
|
1186
|
+
};
|
|
1187
|
+
}
|
|
1188
|
+
|
|
1189
|
+
/**
|
|
1190
|
+
* Builds the connect-time auth headers for the codex_apps connectors MCP. The
|
|
1191
|
+
* bearer is resolved from codexRequestStorage — the SAME refreshing token source
|
|
1192
|
+
* the model fetch uses (proactive refresh + single-flight + db persist) — so the
|
|
1193
|
+
* token is valid at connect. A missing store (non-codex turn, or prepareTools
|
|
1194
|
+
* ran outside the ALS) or a token failure (needs_relogin) returns {} so the
|
|
1195
|
+
* best-effort connect drops the server rather than crashing the turn.
|
|
1196
|
+
*/
|
|
1197
|
+
async function codexAppsMcpRequestInit(settings: Settings): Promise<{ requestInit: { headers: Record<string, string> } } | {}> {
|
|
1198
|
+
const ctx = codexRequestStorage.getStore();
|
|
1199
|
+
if (!ctx) {
|
|
1200
|
+
return {};
|
|
1201
|
+
}
|
|
1202
|
+
let token;
|
|
1203
|
+
try {
|
|
1204
|
+
token = await ctx.getToken();
|
|
1205
|
+
} catch {
|
|
1206
|
+
return {};
|
|
1207
|
+
}
|
|
1208
|
+
const headers: Record<string, string> = {
|
|
1209
|
+
authorization: `Bearer ${token.accessToken}`,
|
|
1210
|
+
// The ChatGPT backend sits behind Cloudflare, which 403s requests bearing a
|
|
1211
|
+
// default runtime User-Agent (confirmed live: an HTML bot-block page, NOT an
|
|
1212
|
+
// auth failure). Send the codex client identity — the same originator/version/
|
|
1213
|
+
// User-Agent the model fetch uses — so the MCP connect handshake passes the edge.
|
|
1214
|
+
originator: CODEX_ORIGINATOR,
|
|
1215
|
+
"user-agent": `${CODEX_ORIGINATOR}/${ctx.clientVersion}`,
|
|
1216
|
+
version: ctx.clientVersion,
|
|
1217
|
+
};
|
|
1218
|
+
if (token.chatgptAccountId) {
|
|
1219
|
+
headers["chatgpt-account-id"] = token.chatgptAccountId;
|
|
1220
|
+
}
|
|
1221
|
+
if (settings.codexProductSku) {
|
|
1222
|
+
headers["X-OpenAI-Product-Sku"] = settings.codexProductSku;
|
|
1223
|
+
}
|
|
1224
|
+
return { requestInit: { headers } };
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
// The first-party MCP permission set signed into a worker's delegated token
|
|
1228
|
+
// when the session does not specify its own. POWERFUL BY DEFAULT: it carries
|
|
1229
|
+
// every permission that unlocks a first-party tool — session orchestration
|
|
1230
|
+
// (sessions:*), workspace environments (environments:*), and GitHub
|
|
1231
|
+
// (github:use) — so agents are fully capable out of the box. A user DEMOTES a
|
|
1232
|
+
// specific session by setting a narrower session.firstPartyMcpPermissions (the
|
|
1233
|
+
// create-session permission picker), which the worker uses instead. Account-
|
|
1234
|
+
// level scopes (billing/account/members/api_keys/workspace:admin) are
|
|
1235
|
+
// intentionally excluded: they gate no first-party tool and are not agent
|
|
1236
|
+
// capabilities. (A finer-grained capability model comes later.)
|
|
1237
|
+
const firstPartyMcpPermissions: Permission[] = [
|
|
1238
|
+
"workspace:read",
|
|
1239
|
+
"files:read",
|
|
1240
|
+
"documents:search",
|
|
1241
|
+
"scheduled_tasks:manage",
|
|
1242
|
+
"scheduled_tasks:run",
|
|
1243
|
+
"goals:manage",
|
|
1244
|
+
"sessions:read",
|
|
1245
|
+
"sessions:create",
|
|
1246
|
+
"sessions:control",
|
|
1247
|
+
"environments:use",
|
|
1248
|
+
"environments:manage",
|
|
1249
|
+
"github:use",
|
|
1250
|
+
];
|
|
1251
|
+
|
|
1252
|
+
// codex_apps is third-party-by-trust (the external ChatGPT connectors backend)
|
|
1253
|
+
// but needs DYNAMIC auth, so it is its own category — deliberately NOT folded
|
|
1254
|
+
// into the first-party allowlist, which would wrongly sign an OpenGeni delegated
|
|
1255
|
+
// token to chatgpt.com.
|
|
1256
|
+
function isCodexAppsMcpServer(config: Settings["mcpServers"][number]): boolean {
|
|
1257
|
+
return config.id === CODEX_APPS_MCP_SERVER_ID;
|
|
1258
|
+
}
|
|
1259
|
+
|
|
1260
|
+
function isFirstPartyMcpServer(settings: Settings, config: Settings["mcpServers"][number]): boolean {
|
|
1261
|
+
if (!["opengeni", "files", "docs"].includes(config.id)) {
|
|
1262
|
+
return false;
|
|
1263
|
+
}
|
|
1264
|
+
if (config.url.includes("{workspaceId}")) {
|
|
1265
|
+
return true;
|
|
1266
|
+
}
|
|
1267
|
+
const url = normalizeUrl(config.url);
|
|
1268
|
+
if (!url) {
|
|
1269
|
+
return false;
|
|
1270
|
+
}
|
|
1271
|
+
return firstPartyMcpUrls(settings).some((candidate) => candidate === url);
|
|
1272
|
+
}
|
|
1273
|
+
|
|
1274
|
+
function firstPartyMcpServerUrlForRun(settings: Settings, config: Settings["mcpServers"][number], workspaceId: string | undefined): string | null {
|
|
1275
|
+
if (!workspaceId || !["opengeni", "files", "docs"].includes(config.id)) {
|
|
1276
|
+
return null;
|
|
1277
|
+
}
|
|
1278
|
+
if (config.url.includes("{workspaceId}")) {
|
|
1279
|
+
return config.url.replaceAll("{workspaceId}", workspaceId);
|
|
1280
|
+
}
|
|
1281
|
+
if (!isFirstPartyMcpServer(settings, config)) {
|
|
1282
|
+
return null;
|
|
1283
|
+
}
|
|
1284
|
+
const rawBase = settings.opengeniMcpUrl?.includes("{workspaceId}")
|
|
1285
|
+
? settings.opengeniMcpUrl.replaceAll("{workspaceId}", workspaceId)
|
|
1286
|
+
: settings.opengeniMcpUrl
|
|
1287
|
+
? scopedMcpUrlFromConfiguredBase(settings.opengeniMcpUrl, workspaceId)
|
|
1288
|
+
// unset → the shared loopback default (a `{workspaceId}` template owned by
|
|
1289
|
+
// @opengeni/config's firstPartyMcpBaseUrl), scoped to this run's workspace.
|
|
1290
|
+
: firstPartyMcpBaseUrl(settings).replaceAll("{workspaceId}", workspaceId);
|
|
1291
|
+
const url = new URL(rawBase);
|
|
1292
|
+
if (config.id === "docs") {
|
|
1293
|
+
url.pathname = `${url.pathname.replace(/\/+$/, "")}/docs`;
|
|
1294
|
+
}
|
|
1295
|
+
return url.toString();
|
|
1296
|
+
}
|
|
1297
|
+
|
|
1298
|
+
function scopedMcpUrlFromConfiguredBase(raw: string, workspaceId: string): string {
|
|
1299
|
+
const url = new URL(raw);
|
|
1300
|
+
url.pathname = `/v1/workspaces/${workspaceId}/mcp`;
|
|
1301
|
+
url.search = "";
|
|
1302
|
+
url.hash = "";
|
|
1303
|
+
return url.toString();
|
|
1304
|
+
}
|
|
1305
|
+
|
|
1306
|
+
function firstPartyMcpUrls(settings: Settings): string[] {
|
|
1307
|
+
// Route the unset case through the shared loopback default so the literal
|
|
1308
|
+
// lives in exactly one place (@opengeni/config's firstPartyMcpBaseUrl).
|
|
1309
|
+
const base = normalizeUrl(settings.opengeniMcpUrl ?? firstPartyMcpBaseUrl(settings));
|
|
1310
|
+
if (!base) {
|
|
1311
|
+
return [];
|
|
1312
|
+
}
|
|
1313
|
+
const docs = new URL(base);
|
|
1314
|
+
docs.pathname = `${docs.pathname.replace(/\/+$/, "")}/docs`;
|
|
1315
|
+
return [base, normalizeUrl(docs.toString())].filter((value): value is string => Boolean(value));
|
|
1316
|
+
}
|
|
1317
|
+
|
|
1318
|
+
function normalizeUrl(raw: string): string | null {
|
|
1319
|
+
try {
|
|
1320
|
+
const url = new URL(raw);
|
|
1321
|
+
url.hash = "";
|
|
1322
|
+
url.pathname = url.pathname.replace(/\/+$/, "");
|
|
1323
|
+
return url.toString();
|
|
1324
|
+
} catch {
|
|
1325
|
+
return null;
|
|
1326
|
+
}
|
|
1327
|
+
}
|
|
1328
|
+
|
|
1329
|
+
export function prefixedMcpToolName(registryId: string, toolName: string): string {
|
|
1330
|
+
return `${registryId}__${toolName}`;
|
|
1331
|
+
}
|
|
1332
|
+
|
|
1333
|
+
class PrefixedMcpServer implements MCPServer {
|
|
1334
|
+
readonly cacheToolsList: boolean;
|
|
1335
|
+
readonly name: string;
|
|
1336
|
+
readonly prefix: string;
|
|
1337
|
+
private readonly allowedTools: Set<string> | undefined;
|
|
1338
|
+
|
|
1339
|
+
constructor(private readonly inner: MCPServer, registryId: string, allowedTools?: string[]) {
|
|
1340
|
+
this.name = registryId;
|
|
1341
|
+
this.prefix = prefixedMcpToolName(registryId, "");
|
|
1342
|
+
this.cacheToolsList = inner.cacheToolsList;
|
|
1343
|
+
this.allowedTools = allowedTools ? new Set(allowedTools) : undefined;
|
|
1344
|
+
}
|
|
1345
|
+
|
|
1346
|
+
connect(): Promise<void> {
|
|
1347
|
+
return this.inner.connect();
|
|
1348
|
+
}
|
|
1349
|
+
|
|
1350
|
+
close(): Promise<void> {
|
|
1351
|
+
return this.inner.close();
|
|
1352
|
+
}
|
|
1353
|
+
|
|
1354
|
+
async listTools(): Promise<RuntimeMcpTool[]> {
|
|
1355
|
+
const tools = await this.inner.listTools();
|
|
1356
|
+
return tools
|
|
1357
|
+
.filter((tool) => this.isAllowed(tool.name))
|
|
1358
|
+
.map((tool) => ({ ...tool, name: prefixedMcpToolName(this.name, tool.name) }));
|
|
1359
|
+
}
|
|
1360
|
+
|
|
1361
|
+
async callTool(toolName: string, args: Record<string, unknown> | null, meta?: Record<string, unknown> | null): Promise<any> {
|
|
1362
|
+
const unprefixed = this.unprefixToolName(toolName);
|
|
1363
|
+
if (!this.isAllowed(unprefixed)) {
|
|
1364
|
+
throw new Error(`MCP tool ${unprefixed} is not allowed for server ${this.name}`);
|
|
1365
|
+
}
|
|
1366
|
+
return await this.inner.callTool(unprefixed, args, meta);
|
|
1367
|
+
}
|
|
1368
|
+
|
|
1369
|
+
invalidateToolsCache(): Promise<void> {
|
|
1370
|
+
return this.inner.invalidateToolsCache();
|
|
1371
|
+
}
|
|
1372
|
+
|
|
1373
|
+
async listResources(params?: Record<string, unknown>): Promise<any> {
|
|
1374
|
+
const resourcesServer = this.inner as MCPServer & { listResources?: (params?: Record<string, unknown>) => Promise<any> };
|
|
1375
|
+
if (!resourcesServer.listResources) {
|
|
1376
|
+
throw new Error(`MCP server ${this.name} does not support resources`);
|
|
1377
|
+
}
|
|
1378
|
+
return await resourcesServer.listResources(params);
|
|
1379
|
+
}
|
|
1380
|
+
|
|
1381
|
+
async listResourceTemplates(params?: Record<string, unknown>): Promise<any> {
|
|
1382
|
+
const resourcesServer = this.inner as MCPServer & { listResourceTemplates?: (params?: Record<string, unknown>) => Promise<any> };
|
|
1383
|
+
if (!resourcesServer.listResourceTemplates) {
|
|
1384
|
+
throw new Error(`MCP server ${this.name} does not support resource templates`);
|
|
1385
|
+
}
|
|
1386
|
+
return await resourcesServer.listResourceTemplates(params);
|
|
1387
|
+
}
|
|
1388
|
+
|
|
1389
|
+
async readResource(uri: string): Promise<any> {
|
|
1390
|
+
const resourcesServer = this.inner as MCPServer & { readResource?: (uri: string) => Promise<any> };
|
|
1391
|
+
if (!resourcesServer.readResource) {
|
|
1392
|
+
throw new Error(`MCP server ${this.name} does not support resource reads`);
|
|
1393
|
+
}
|
|
1394
|
+
return await resourcesServer.readResource(uri);
|
|
1395
|
+
}
|
|
1396
|
+
|
|
1397
|
+
private isAllowed(toolName: string): boolean {
|
|
1398
|
+
return !this.allowedTools || this.allowedTools.has(toolName);
|
|
1399
|
+
}
|
|
1400
|
+
|
|
1401
|
+
private unprefixToolName(toolName: string): string {
|
|
1402
|
+
if (!toolName.startsWith(this.prefix)) {
|
|
1403
|
+
throw new Error(`MCP tool ${toolName} is missing expected ${this.name} prefix`);
|
|
1404
|
+
}
|
|
1405
|
+
return toolName.slice(this.prefix.length);
|
|
1406
|
+
}
|
|
1407
|
+
}
|
|
1408
|
+
|
|
1409
|
+
// createSandboxClient (+ withDockerNetwork / connectDockerNetwork) moved to the
|
|
1410
|
+
// agent-loop-free leaf ./sandbox; re-exported via `export * from "./sandbox"`.
|
|
1411
|
+
|
|
1412
|
+
export type PrepareInputOptions = {
|
|
1413
|
+
sandboxClient?: unknown;
|
|
1414
|
+
/**
|
|
1415
|
+
* Usable input-token budget B (window - reserved output). When set, the
|
|
1416
|
+
* assembled history is passed through `enforceInputBudget` so a single
|
|
1417
|
+
* over-budget input can never be sent — the last-resort backstop behind the
|
|
1418
|
+
* best-effort pre-turn compaction. Omitted (undefined) disables the guard
|
|
1419
|
+
* (no behaviour change for callers that don't opt in).
|
|
1420
|
+
*/
|
|
1421
|
+
inputBudgetTokens?: number;
|
|
1422
|
+
};
|
|
1423
|
+
|
|
1424
|
+
/**
|
|
1425
|
+
* Apply the read-path budget guard to an assembled model input: drop the oldest
|
|
1426
|
+
* history at a clean turn boundary until the request fits B. Orphan-safe (only
|
|
1427
|
+
* cuts at user-message boundaries) and only active when a budget is supplied.
|
|
1428
|
+
* The trailing user message is counted against the budget but never dropped.
|
|
1429
|
+
*/
|
|
1430
|
+
function guardAssembledInput(
|
|
1431
|
+
history: AgentInputItem[],
|
|
1432
|
+
trailing: AgentInputItem,
|
|
1433
|
+
inputBudgetTokens: number | undefined,
|
|
1434
|
+
): AgentInputItem[] {
|
|
1435
|
+
if (typeof inputBudgetTokens !== "number" || inputBudgetTokens <= 0) {
|
|
1436
|
+
return [...history, trailing];
|
|
1437
|
+
}
|
|
1438
|
+
const trailingTokens = estimateItemTokens(trailing as unknown as Record<string, unknown>);
|
|
1439
|
+
const guarded = enforceInputBudget(
|
|
1440
|
+
history as unknown as Array<Record<string, unknown>>,
|
|
1441
|
+
inputBudgetTokens,
|
|
1442
|
+
trailingTokens,
|
|
1443
|
+
);
|
|
1444
|
+
if (guarded.trimmed) {
|
|
1445
|
+
console.warn(
|
|
1446
|
+
`read-path budget guard trimmed ${guarded.droppedCount} oldest history item(s) to fit input budget (${inputBudgetTokens} tokens); the over-budget input was NOT sent`,
|
|
1447
|
+
);
|
|
1448
|
+
}
|
|
1449
|
+
return [...(guarded.items as unknown as AgentInputItem[]), trailing];
|
|
1450
|
+
}
|
|
1451
|
+
|
|
1452
|
+
export async function prepareRunInput(agent: Agent<any, any>, input: AgentSegmentInput, options: PrepareInputOptions = {}): Promise<PreparedAgentInput> {
|
|
1453
|
+
if (input.kind === "message") {
|
|
1454
|
+
if (input.historyItems && input.historyItems.length > 0) {
|
|
1455
|
+
// Items mode: conversation truth comes from the database, the sandbox
|
|
1456
|
+
// recovery descriptor from its own store. The RunState blob is not
|
|
1457
|
+
// touched at all on this path.
|
|
1458
|
+
const sandboxSessionState = input.sandboxEnvelope
|
|
1459
|
+
? await restoredSandboxSessionStateFromEntry(input.sandboxEnvelope, options.sandboxClient)
|
|
1460
|
+
: undefined;
|
|
1461
|
+
// Replayed conversation truth is reloaded verbatim from the database, so
|
|
1462
|
+
// it can contain a tool-call pairing the Responses API rejects (most
|
|
1463
|
+
// destructively an orphaned function_call_result with no matching
|
|
1464
|
+
// function_call — which 400s every turn and bricks the session until the
|
|
1465
|
+
// row is hand-deleted). Sanitize the in-memory copy before it reaches the
|
|
1466
|
+
// model so existing corruption self-heals and a future write-path race is
|
|
1467
|
+
// non-fatal; the stored rows are never touched.
|
|
1468
|
+
const sanitizedHistory = sanitizeHistoryItemsForModel(
|
|
1469
|
+
input.historyItems as unknown as Array<Record<string, unknown>>,
|
|
1470
|
+
) as unknown as AgentInputItem[];
|
|
1471
|
+
return {
|
|
1472
|
+
// Read-path budget guard: even after the orphan sanitizer, an assembled
|
|
1473
|
+
// input can exceed the model window (pre-turn compaction is best-effort
|
|
1474
|
+
// and can no-op). Trim the oldest history at a clean turn boundary so an
|
|
1475
|
+
// over-budget request is never sent. No-op when no budget is supplied.
|
|
1476
|
+
input: guardAssembledInput(
|
|
1477
|
+
sanitizedHistory,
|
|
1478
|
+
{
|
|
1479
|
+
type: "message",
|
|
1480
|
+
role: "user",
|
|
1481
|
+
content: input.text,
|
|
1482
|
+
} as AgentInputItem,
|
|
1483
|
+
options.inputBudgetTokens,
|
|
1484
|
+
),
|
|
1485
|
+
...(sandboxSessionState ? { sandboxSessionState } : {}),
|
|
1486
|
+
};
|
|
1487
|
+
}
|
|
1488
|
+
// No prior state, or a cleared sentinel: start fresh. The clear sentinel
|
|
1489
|
+
// ({@link CLEARED_RUN_STATE_BLOB}) is not a real serialized run state — it
|
|
1490
|
+
// carries no $schemaVersion, so RunState.fromString would throw on it. In
|
|
1491
|
+
// run_state history mode this message path is the one that reads the blob
|
|
1492
|
+
// after a /clear, so recognizing the sentinel here is what keeps the next
|
|
1493
|
+
// turn working (a fresh, empty context) instead of bricking on deserialize.
|
|
1494
|
+
if (!input.serializedRunState || isClearedRunStateBlob(input.serializedRunState)) {
|
|
1495
|
+
return { input: input.text };
|
|
1496
|
+
}
|
|
1497
|
+
const state = await RunState.fromString(agent, input.serializedRunState);
|
|
1498
|
+
const sandboxSessionState = await restoredSandboxSessionState(state, options.sandboxClient);
|
|
1499
|
+
// state.history already runs the SDK's own orphan-tool-call pruning, but
|
|
1500
|
+
// applying the same sanitizer keeps the legacy run-state resume path under
|
|
1501
|
+
// one invariant with the items path and is defensive against a corrupt blob.
|
|
1502
|
+
const sanitizedHistory = sanitizeHistoryItemsForModel(
|
|
1503
|
+
state.history as unknown as Array<Record<string, unknown>>,
|
|
1504
|
+
) as unknown as AgentInputItem[];
|
|
1505
|
+
return {
|
|
1506
|
+
// Read-path budget guard (see the items path above): keep an over-budget
|
|
1507
|
+
// resumed history off the wire by trimming the oldest turns when a budget
|
|
1508
|
+
// is supplied.
|
|
1509
|
+
input: guardAssembledInput(
|
|
1510
|
+
sanitizedHistory,
|
|
1511
|
+
{
|
|
1512
|
+
type: "message",
|
|
1513
|
+
role: "user",
|
|
1514
|
+
content: input.text,
|
|
1515
|
+
} as AgentInputItem,
|
|
1516
|
+
options.inputBudgetTokens,
|
|
1517
|
+
),
|
|
1518
|
+
...(sandboxSessionState ? { sandboxSessionState } : {}),
|
|
1519
|
+
serializedRunStateForSandbox: input.serializedRunState,
|
|
1520
|
+
};
|
|
1521
|
+
}
|
|
1522
|
+
// An approval can only be resumed against a real saved run state. If the
|
|
1523
|
+
// latest blob is the cleared sentinel the awaiting turn was wiped (the API
|
|
1524
|
+
// refuses clear in requires_action, so this is a defensive guard) — fail with
|
|
1525
|
+
// an honest message instead of the cryptic SDK "missing schema version".
|
|
1526
|
+
if (isClearedRunStateBlob(input.serializedRunState)) {
|
|
1527
|
+
throw new Error("Cannot resume an approval: the session context was cleared, so the awaiting run state no longer exists.");
|
|
1528
|
+
}
|
|
1529
|
+
const state = await RunState.fromString(agent, input.serializedRunState);
|
|
1530
|
+
const interruptions = state.getInterruptions();
|
|
1531
|
+
const target = interruptions.find((item: any) => approvalIdentifier(item) === input.approvalId);
|
|
1532
|
+
if (!target) {
|
|
1533
|
+
throw new Error(`Approval not found in saved run state: ${input.approvalId}`);
|
|
1534
|
+
}
|
|
1535
|
+
if (input.decision === "approve") {
|
|
1536
|
+
state.approve(target as any);
|
|
1537
|
+
} else {
|
|
1538
|
+
state.reject(target as any, input.message ? { message: input.message } : undefined);
|
|
1539
|
+
}
|
|
1540
|
+
return { input: state };
|
|
1541
|
+
}
|
|
1542
|
+
|
|
1543
|
+
export type RunAgentStreamOptions = {
|
|
1544
|
+
sandboxClient?: unknown;
|
|
1545
|
+
sandboxEnvironment?: Record<string, string>;
|
|
1546
|
+
onRuntimeEvent?: (event: NormalizedRuntimeEvent) => Promise<void> | void;
|
|
1547
|
+
// OWNERSHIP INVERSION (P1.2): an externally-owned, already-live sandbox
|
|
1548
|
+
// session resolved by the per-turn resume-by-id path. When present,
|
|
1549
|
+
// runAgentStream does NOT build (or resume, or discard) a client — it threads
|
|
1550
|
+
// these straight into runOptions.sandbox as a NON-OWNED session. The SDK
|
|
1551
|
+
// registers a provided session non-owned (manager.js) and NEVER reaps it on a
|
|
1552
|
+
// normal finish (proven by spikes/sdk-keystone) — that is the keystone: the
|
|
1553
|
+
// one box survives across turns. Mutually exclusive with the per-run
|
|
1554
|
+
// createSandboxClient path (the owned branch takes precedence when both set).
|
|
1555
|
+
// Agent-dependent decorators (file-downloads, lifecycle/repo-clone hooks) are
|
|
1556
|
+
// re-applied around the resumed client here; the live `session`/`sessionState`
|
|
1557
|
+
// carry the box, so no create()/resume() is re-invoked inside run().
|
|
1558
|
+
ownedSandbox?: {
|
|
1559
|
+
client: unknown; // built by the per-turn resume path (the raw provider client)
|
|
1560
|
+
session: unknown; // SandboxSessionLike — the live, NON-OWNED handle (never reaped)
|
|
1561
|
+
sessionState?: unknown; // SandboxSessionState the box was resumed from
|
|
1562
|
+
// The UN-PROXIED established box for platform setup (lifecycle hooks + file
|
|
1563
|
+
// resource materialization). `session` may be the mid-turn routing proxy whose
|
|
1564
|
+
// every exec re-reads the active pointer — platform-initiated setup must NOT
|
|
1565
|
+
// follow a swap onto a connected machine (the user's real computer), so it
|
|
1566
|
+
// runs against this pinned handle instead. Absent -> falls back to `session`.
|
|
1567
|
+
setupSession?: unknown;
|
|
1568
|
+
};
|
|
1569
|
+
// A per-turn model-input filter chained AFTER the provider-item-id strip.
|
|
1570
|
+
// Used by the genesis-title injection to prepend a hidden, NON-PERSISTED
|
|
1571
|
+
// directive: a callModelInputFilter mutates only `modelData.input` for each
|
|
1572
|
+
// model call and never touches `state.history`/`originalInput`, so the
|
|
1573
|
+
// reconcile dual-write never sees it.
|
|
1574
|
+
callModelInputFilter?: CallModelInputFilter;
|
|
1575
|
+
};
|
|
1576
|
+
|
|
1577
|
+
// One-shot directive appended to the agent's system prompt on the genesis turn
|
|
1578
|
+
// (see buildOpenGeniAgent's genesisTitleHint). Delivered through the
|
|
1579
|
+
// authoritative instructions channel so the model reliably obeys; references
|
|
1580
|
+
// the prefixed tool name the agent actually sees (opengeni__set_session_title).
|
|
1581
|
+
// Appended after the non-bypassable core so a white-label persona can't drop it.
|
|
1582
|
+
export const GENESIS_TITLE_DIRECTIVE =
|
|
1583
|
+
"This is the first turn of a new session. Before responding to the user, call the opengeni__set_session_title tool with a concise 3-7 word title that summarizes what this session is about, then address the user's request normally.";
|
|
1584
|
+
|
|
1585
|
+
/**
|
|
1586
|
+
* callModelInputFilter that removes provider-assigned item ids (rs_/msg_/fc_…)
|
|
1587
|
+
* from every input item immediately before each model call. Responses-API
|
|
1588
|
+
* requests that carry item ids are resolved against the provider's stored
|
|
1589
|
+
* responses, and that store is not durable enough to anchor long runs on: a
|
|
1590
|
+
* response that streamed successfully can be missing from the store on the
|
|
1591
|
+
* very next call, which then fails with 400 "Item with id ... not found"
|
|
1592
|
+
* (observed live on Azure OpenAI mid-turn). All item content — including the
|
|
1593
|
+
* encrypted reasoning payload carried in providerData when
|
|
1594
|
+
* `openaiReasoningEncryptedContent` is on — is sent inline, so the ids add
|
|
1595
|
+
* fragility without adding information. Pairing fields (`call_id`/`callId`)
|
|
1596
|
+
* are separate properties and stay untouched; items are cloned, never mutated.
|
|
1597
|
+
*/
|
|
1598
|
+
export const stripProviderItemIdsFilter: CallModelInputFilter = ({ modelData }) => ({
|
|
1599
|
+
...modelData,
|
|
1600
|
+
input: modelData.input.map((item) => {
|
|
1601
|
+
if (item && typeof item === "object" && "id" in item) {
|
|
1602
|
+
const { id: _id, ...rest } = item as Record<string, unknown>;
|
|
1603
|
+
return rest as AgentInputItem;
|
|
1604
|
+
}
|
|
1605
|
+
return item;
|
|
1606
|
+
}),
|
|
1607
|
+
});
|
|
1608
|
+
|
|
1609
|
+
/**
|
|
1610
|
+
* callModelInputFilter that normalizes every `computer_call` carrying BOTH
|
|
1611
|
+
* `action` and `actions` down to EXACTLY ONE (keeps `actions`, drops `action`).
|
|
1612
|
+
* The Azure computer-use endpoint rejects a request whose computer_call has
|
|
1613
|
+
* both with `400 Computer call input must include exactly one of `action` or
|
|
1614
|
+
* `actions``; and (live-proven against gpt-5.5's GA computer tool) it also
|
|
1615
|
+
* rejects the `action`-only form, accepting ONLY the batched plural `actions`.
|
|
1616
|
+
* The SDK 0.11.6 schema allows both, so a freshly-emitted
|
|
1617
|
+
* screenshot call carries the redundant pair. This filter runs before EVERY
|
|
1618
|
+
* model call — the turn-start history replay AND every mid-turn follow-up — so
|
|
1619
|
+
* it covers the just-emitted (non-replayed) computer_call on the same turn,
|
|
1620
|
+
* which the turn-start `prepareRunInput` sanitizer never sees. Items are cloned,
|
|
1621
|
+
* never mutated.
|
|
1622
|
+
*/
|
|
1623
|
+
export const normalizeComputerCallsFilter: CallModelInputFilter = ({ modelData }) => ({
|
|
1624
|
+
...modelData,
|
|
1625
|
+
input: normalizeComputerCallActions(
|
|
1626
|
+
modelData.input as unknown as Array<Record<string, unknown>>,
|
|
1627
|
+
) as unknown as AgentInputItem[],
|
|
1628
|
+
});
|
|
1629
|
+
|
|
1630
|
+
/**
|
|
1631
|
+
* Compose a list of callModelInputFilters into one, applied left-to-right so
|
|
1632
|
+
* each sees the prior filter's output.
|
|
1633
|
+
*/
|
|
1634
|
+
function composeCallModelInputFilters(filters: CallModelInputFilter[]): CallModelInputFilter {
|
|
1635
|
+
return async (args) => {
|
|
1636
|
+
let modelData = args.modelData;
|
|
1637
|
+
for (const filter of filters) {
|
|
1638
|
+
modelData = await filter({ ...args, modelData });
|
|
1639
|
+
}
|
|
1640
|
+
return modelData;
|
|
1641
|
+
};
|
|
1642
|
+
}
|
|
1643
|
+
|
|
1644
|
+
/**
|
|
1645
|
+
* The model-input filter applied before every model call. The computer_call
|
|
1646
|
+
* action/actions normalizer is ALWAYS on (the Azure endpoint 400s without it);
|
|
1647
|
+
* the provider-item-id strip is layered on top when the configured policy
|
|
1648
|
+
* selects it.
|
|
1649
|
+
*/
|
|
1650
|
+
export function callModelInputFilterForSettings(settings: Settings): CallModelInputFilter | undefined {
|
|
1651
|
+
const filters: CallModelInputFilter[] = [normalizeComputerCallsFilter];
|
|
1652
|
+
if (settings.openaiProviderItemIds === "strip") {
|
|
1653
|
+
filters.push(stripProviderItemIdsFilter);
|
|
1654
|
+
}
|
|
1655
|
+
return composeCallModelInputFilters(filters);
|
|
1656
|
+
}
|
|
1657
|
+
|
|
1658
|
+
export async function runAgentStream(agent: Agent<any, any>, input: PreparedAgentInput | string | RunState<any, any>, settings: Settings, overrides: RunAgentStreamOptions = {}) {
|
|
1659
|
+
const prepared: PreparedAgentInput = typeof input === "string" || input instanceof RunState ? { input } : input;
|
|
1660
|
+
const environment = overrides.sandboxEnvironment ?? collectSandboxEnvironment(settings);
|
|
1661
|
+
|
|
1662
|
+
// OWNED PATH (P1.2 ownership inversion): the per-turn resume path injected a
|
|
1663
|
+
// live, externally-owned box. We thread the live `session` straight into
|
|
1664
|
+
// runOptions.sandbox so the SDK registers it NON-OWNED and never reaps it on
|
|
1665
|
+
// a normal finish (the keystone). We re-apply ONLY the agent-dependent
|
|
1666
|
+
// decorators (file-downloads + lifecycle/repo-clone hooks) around the resumed
|
|
1667
|
+
// client — the manifest-refresh-on-resume wrap is a no-op when a live
|
|
1668
|
+
// `session` is supplied (resume is not re-invoked). This branch is reached
|
|
1669
|
+
// ONLY when sandboxOwnershipEnabled gated the activity into resolving a box;
|
|
1670
|
+
// with the flag off the activity never sets `ownedSandbox` and this whole
|
|
1671
|
+
// block is skipped (byte-for-byte the legacy path).
|
|
1672
|
+
if (overrides.ownedSandbox) {
|
|
1673
|
+
const { client: ownedClient, session, sessionState } = overrides.ownedSandbox;
|
|
1674
|
+
// Platform setup (hooks + file materialization) execs against the UN-PROXIED
|
|
1675
|
+
// established box when the caller pinned one — never through the routing proxy,
|
|
1676
|
+
// whose per-op pointer re-read could land these execs on a machine swapped in
|
|
1677
|
+
// mid-turn.
|
|
1678
|
+
const setupSession = (overrides.ownedSandbox.setupSession ?? session) as SandboxSessionLike;
|
|
1679
|
+
const runAs = sandboxRunAs(settings);
|
|
1680
|
+
const fileDownloads = sandboxFileDownloadsForAgent(agent);
|
|
1681
|
+
const resourceClient = fileDownloads.length > 0
|
|
1682
|
+
? withSandboxFileDownloads(ownedClient as SandboxClient, fileDownloads, {
|
|
1683
|
+
...(overrides.onRuntimeEvent ? { onRuntimeEvent: overrides.onRuntimeEvent } : {}),
|
|
1684
|
+
...(runAs ? { runAs } : {}),
|
|
1685
|
+
})
|
|
1686
|
+
: (ownedClient as SandboxClient);
|
|
1687
|
+
// TOKEN-BROKER (B1): the per-turn git token seed, forwarded OFF-MANIFEST so the
|
|
1688
|
+
// repository-clone hook seeds it to the box's token file before the clone.
|
|
1689
|
+
const ownedGitTokenSeed = gitTokenSeedForAgent(agent);
|
|
1690
|
+
const ownedHooks = [
|
|
1691
|
+
...sandboxLifecycleHooksForIds(sandboxLifecycleHookIds(settings)),
|
|
1692
|
+
...sandboxRepositoryCloneHooksForAgent(agent),
|
|
1693
|
+
];
|
|
1694
|
+
const ownedHookContext: SandboxLifecycleHookContext = {
|
|
1695
|
+
environment,
|
|
1696
|
+
...(overrides.onRuntimeEvent ? { onRuntimeEvent: overrides.onRuntimeEvent } : {}),
|
|
1697
|
+
...(runAs ? { runAs } : {}),
|
|
1698
|
+
...(ownedGitTokenSeed ? { gitTokenSeed: ownedGitTokenSeed } : {}),
|
|
1699
|
+
};
|
|
1700
|
+
// OWNED-PATH HOOKS: the SDK NEVER calls client.create/resume when handed a live
|
|
1701
|
+
// provided session (SandboxRuntimeManager uses `sandboxConfig.session` directly),
|
|
1702
|
+
// so the withSandboxLifecycleHooks decoration below can never fire on this branch —
|
|
1703
|
+
// it only wraps create/resume. Run the beforeAgentStart hooks directly against the
|
|
1704
|
+
// provided box, once per turn, BEFORE the run starts: this is what executes the
|
|
1705
|
+
// repository-clone hook (which also seeds the B1 askpass + token file) and the
|
|
1706
|
+
// azure-cli-login hook on lease-owned boxes. Re-running on a warm box is safe by
|
|
1707
|
+
// construction: clone skips when the target is already materialized, the token
|
|
1708
|
+
// seed OVERWRITES the file (the desired per-turn refresh), and az login is
|
|
1709
|
+
// idempotent. A turn resumed after preemption re-enters here and re-seeds the
|
|
1710
|
+
// freshly minted token — which is exactly what a >1h-old warm box needs.
|
|
1711
|
+
// EXCEPT on a connected machine (effective backend "selfhosted"): the box is the
|
|
1712
|
+
// user's REAL computer — the platform must not run setup against it (the clone
|
|
1713
|
+
// hooks are already empty there; this keeps az login off it too).
|
|
1714
|
+
if (agentActiveSandboxBackend.get(agent) !== "selfhosted") {
|
|
1715
|
+
await runBeforeAgentStartHooks(setupSession, ownedHooks, ownedHookContext);
|
|
1716
|
+
// FILE RESOURCES: withSandboxFileDownloads below has the IDENTICAL provided-
|
|
1717
|
+
// session blind spot (it too wraps only create/resume), so signed-URL file
|
|
1718
|
+
// materialization must also run directly against the pinned box. The download
|
|
1719
|
+
// command is idempotent (skips an existing file) and atomic (tmp + rename),
|
|
1720
|
+
// so the per-turn re-run is safe; the turn re-signs URLs each run, so a
|
|
1721
|
+
// re-warmed box always gets fresh links.
|
|
1722
|
+
if (fileDownloads.length > 0) {
|
|
1723
|
+
await materializeSandboxFileDownloads(setupSession, fileDownloads, {
|
|
1724
|
+
...(overrides.onRuntimeEvent ? { onRuntimeEvent: overrides.onRuntimeEvent } : {}),
|
|
1725
|
+
...(runAs ? { runAs } : {}),
|
|
1726
|
+
});
|
|
1727
|
+
}
|
|
1728
|
+
}
|
|
1729
|
+
// Keep the decoration as a safety net for any session the SDK does create/resume
|
|
1730
|
+
// through the client during this run (it is inert for the provided session).
|
|
1731
|
+
const decoratedClient = withSandboxLifecycleHooks(resourceClient, ownedHooks, ownedHookContext);
|
|
1732
|
+
const ownedFilter = composeCallModelInputFilters(
|
|
1733
|
+
[callModelInputFilterForSettings(settings), overrides.callModelInputFilter].filter(
|
|
1734
|
+
(f): f is CallModelInputFilter => Boolean(f),
|
|
1735
|
+
),
|
|
1736
|
+
);
|
|
1737
|
+
const ownedRunOptions: Parameters<typeof run>[2] = {
|
|
1738
|
+
stream: true,
|
|
1739
|
+
maxTurns: settings.agentMaxModelCallsPerTurn,
|
|
1740
|
+
callModelInputFilter: ownedFilter,
|
|
1741
|
+
};
|
|
1742
|
+
ownedRunOptions.sandbox = {
|
|
1743
|
+
client: decoratedClient,
|
|
1744
|
+
session,
|
|
1745
|
+
...(sessionState ? { sessionState } : {}),
|
|
1746
|
+
} as SandboxRunConfig;
|
|
1747
|
+
return await runScopedRunner(settings).run(agent, prepared.input, ownedRunOptions);
|
|
1748
|
+
}
|
|
1749
|
+
|
|
1750
|
+
const rawClient = overrides.sandboxClient ?? createSandboxClient(settings, environment);
|
|
1751
|
+
const refreshedClient = rawClient
|
|
1752
|
+
? withManifestRefreshOnResume(rawClient as SandboxClient, (agent as { defaultManifest?: Manifest }).defaultManifest)
|
|
1753
|
+
: undefined;
|
|
1754
|
+
const runAs = sandboxRunAs(settings);
|
|
1755
|
+
const fileDownloads = sandboxFileDownloadsForAgent(agent);
|
|
1756
|
+
const resourceClient = refreshedClient && fileDownloads.length > 0
|
|
1757
|
+
? withSandboxFileDownloads(refreshedClient, fileDownloads, {
|
|
1758
|
+
...(overrides.onRuntimeEvent ? { onRuntimeEvent: overrides.onRuntimeEvent } : {}),
|
|
1759
|
+
...(runAs ? { runAs } : {}),
|
|
1760
|
+
})
|
|
1761
|
+
: refreshedClient;
|
|
1762
|
+
// TOKEN-BROKER (B1): the per-turn git token seed, forwarded OFF-MANIFEST so the
|
|
1763
|
+
// repository-clone hook seeds it to the box's token file before the clone.
|
|
1764
|
+
const gitTokenSeed = gitTokenSeedForAgent(agent);
|
|
1765
|
+
const client = resourceClient
|
|
1766
|
+
? withSandboxLifecycleHooks(resourceClient, [
|
|
1767
|
+
...sandboxLifecycleHooksForIds(sandboxLifecycleHookIds(settings)),
|
|
1768
|
+
...sandboxRepositoryCloneHooksForAgent(agent),
|
|
1769
|
+
], {
|
|
1770
|
+
environment,
|
|
1771
|
+
...(overrides.onRuntimeEvent ? { onRuntimeEvent: overrides.onRuntimeEvent } : {}),
|
|
1772
|
+
...(runAs ? { runAs } : {}),
|
|
1773
|
+
...(gitTokenSeed ? { gitTokenSeed } : {}),
|
|
1774
|
+
})
|
|
1775
|
+
: undefined;
|
|
1776
|
+
const sandboxSessionState = prepared.sandboxSessionState
|
|
1777
|
+
?? (prepared.serializedRunStateForSandbox && client
|
|
1778
|
+
? await restoredSandboxSessionState(await RunState.fromString(agent, prepared.serializedRunStateForSandbox), client)
|
|
1779
|
+
: undefined);
|
|
1780
|
+
// Strip provider item ids first, then apply any per-turn filter (genesis
|
|
1781
|
+
// title directive). Composed left-to-right so the directive lands on the
|
|
1782
|
+
// already-id-stripped input. A callModelInputFilter only shapes the per-call
|
|
1783
|
+
// model input, never the persisted run-state history.
|
|
1784
|
+
const callModelInputFilter = composeCallModelInputFilters(
|
|
1785
|
+
[callModelInputFilterForSettings(settings), overrides.callModelInputFilter].filter(
|
|
1786
|
+
(f): f is CallModelInputFilter => Boolean(f),
|
|
1787
|
+
),
|
|
1788
|
+
);
|
|
1789
|
+
const runOptions: Parameters<typeof run>[2] = {
|
|
1790
|
+
stream: true,
|
|
1791
|
+
maxTurns: settings.agentMaxModelCallsPerTurn,
|
|
1792
|
+
// Strip provider-assigned item ids from every model call (turn-start
|
|
1793
|
+
// history replay AND mid-turn follow-ups) so requests never depend on the
|
|
1794
|
+
// provider's server-side response store. A stored response can vanish
|
|
1795
|
+
// between two calls of the same turn, failing the run with 400 "Item with
|
|
1796
|
+
// id 'rs_…' not found"; with the ids gone the request is self-contained.
|
|
1797
|
+
callModelInputFilter,
|
|
1798
|
+
};
|
|
1799
|
+
void settings.disableOpenaiTracing;
|
|
1800
|
+
if (client) {
|
|
1801
|
+
runOptions.sandbox = {
|
|
1802
|
+
client,
|
|
1803
|
+
...(sandboxSessionState ? { sessionState: sandboxSessionState } : {}),
|
|
1804
|
+
} as SandboxRunConfig;
|
|
1805
|
+
}
|
|
1806
|
+
return await runScopedRunner(settings).run(agent, prepared.input, runOptions);
|
|
1807
|
+
}
|
|
1808
|
+
|
|
1809
|
+
/**
|
|
1810
|
+
* A per-run `Runner` whose `modelProvider` is built from THIS turn's settings.
|
|
1811
|
+
*
|
|
1812
|
+
* The standalone `run()` uses a process-global default Runner whose modelProvider
|
|
1813
|
+
* is the lazy global default (whatever the last `configureOpenAI` /
|
|
1814
|
+
* `setDefaultModelProvider` installed). The worker runs ~100 activities
|
|
1815
|
+
* concurrently in one process, so a concurrently-starting turn for a DIFFERENT
|
|
1816
|
+
* workspace can overwrite that global between this turn's `configure` and a
|
|
1817
|
+
* per-call `getModel()` during the stream — leaving the global router with no
|
|
1818
|
+
* codex provider and throwing CodexSubscriptionUnavailableError on a
|
|
1819
|
+
* `codex/<slug>` name re-resolution (the SandboxAgent/Modal path drops the Model
|
|
1820
|
+
* instance and re-resolves by NAME). Pinning a run-scoped Runner makes the
|
|
1821
|
+
* mutable global irrelevant to correctness: each concurrent turn resolves names
|
|
1822
|
+
* against its OWN settings (which carry the codex-subscription provider via
|
|
1823
|
+
* withCodexProvider for an active workspace, and the registry providers). The
|
|
1824
|
+
* Runner inherits the SDK's default config for everything else, identical to the
|
|
1825
|
+
* default runner. setDefaultModelProvider remains only as a boot-time fallback.
|
|
1826
|
+
*/
|
|
1827
|
+
function runScopedRunner(settings: Settings): Runner {
|
|
1828
|
+
return new Runner({ modelProvider: new MultiProviderModelProvider(settings) });
|
|
1829
|
+
}
|
|
1830
|
+
|
|
1831
|
+
export { MaxTurnsExceededError } from "@openai/agents";
|
|
1832
|
+
|
|
1833
|
+
/**
|
|
1834
|
+
* Detects the agents SDK per-segment turn cap. The cap is a pacing valve, not
|
|
1835
|
+
* a session failure: callers should end the segment gracefully (idle) so an
|
|
1836
|
+
* active goal's continuation loop -- or a follow-up user message -- resumes
|
|
1837
|
+
* the work. When the SDK attached the run state at the moment the cap hit,
|
|
1838
|
+
* the serialized form is returned so the resumed turn keeps full context.
|
|
1839
|
+
*/
|
|
1840
|
+
export function maxTurnsExceededRunState(error: unknown): { serializedRunState: string | null } | null {
|
|
1841
|
+
if (!(error instanceof MaxTurnsExceededError)) {
|
|
1842
|
+
return null;
|
|
1843
|
+
}
|
|
1844
|
+
try {
|
|
1845
|
+
return { serializedRunState: error.state ? error.state.toString() : null };
|
|
1846
|
+
} catch {
|
|
1847
|
+
return { serializedRunState: null };
|
|
1848
|
+
}
|
|
1849
|
+
}
|
|
1850
|
+
|
|
1851
|
+
/**
|
|
1852
|
+
* Serialized run state attached to any agents SDK error, when present.
|
|
1853
|
+
* Provider failures usually surface as raw API errors without state; callers
|
|
1854
|
+
* must treat a null here as "resume from the previous snapshot" rather than
|
|
1855
|
+
* an error.
|
|
1856
|
+
*/
|
|
1857
|
+
export function agentsErrorRunState(error: unknown): string | null {
|
|
1858
|
+
if (!(error instanceof AgentsError) || !error.state) {
|
|
1859
|
+
return null;
|
|
1860
|
+
}
|
|
1861
|
+
try {
|
|
1862
|
+
return error.state.toString();
|
|
1863
|
+
} catch {
|
|
1864
|
+
return null;
|
|
1865
|
+
}
|
|
1866
|
+
}
|
|
1867
|
+
|
|
1868
|
+
export function withManifestRefreshOnResume(client: SandboxClient, targetManifest: Manifest | undefined): SandboxClient {
|
|
1869
|
+
if (!targetManifest || !client.resume) {
|
|
1870
|
+
return client;
|
|
1871
|
+
}
|
|
1872
|
+
return {
|
|
1873
|
+
backendId: client.backendId,
|
|
1874
|
+
...(client.supportsDefaultOptions !== undefined ? { supportsDefaultOptions: client.supportsDefaultOptions } : {}),
|
|
1875
|
+
...(client.create ? { create: async (...args: any[]) => await (client.create as any)(...args) } : {}),
|
|
1876
|
+
resume: async (state: SandboxSessionState) => {
|
|
1877
|
+
const session = await client.resume!(state);
|
|
1878
|
+
await applyMissingManifestEntries(session, targetManifest);
|
|
1879
|
+
return session;
|
|
1880
|
+
},
|
|
1881
|
+
...(client.delete ? { delete: async (state: SandboxSessionState) => await client.delete!(state) } : {}),
|
|
1882
|
+
...(client.serializeSessionState ? { serializeSessionState: async (state: SandboxSessionState, options) => await client.serializeSessionState!(state, options) } : {}),
|
|
1883
|
+
...(client.canPersistOwnedSessionState ? { canPersistOwnedSessionState: async (state: SandboxSessionState) => await client.canPersistOwnedSessionState!(state) } : {}),
|
|
1884
|
+
...(client.canReusePreservedOwnedSession ? { canReusePreservedOwnedSession: async (state: SandboxSessionState) => await client.canReusePreservedOwnedSession!(state) } : {}),
|
|
1885
|
+
...(client.deserializeSessionState ? { deserializeSessionState: async (state: Record<string, unknown>) => await client.deserializeSessionState!(state) } : {}),
|
|
1886
|
+
};
|
|
1887
|
+
}
|
|
1888
|
+
|
|
1889
|
+
export async function applyMissingManifestEntries(session: SandboxSessionLike, targetManifest: Manifest): Promise<void> {
|
|
1890
|
+
const currentManifestValue = (session as { state?: { manifest?: Manifest | { root?: string; entries?: Record<string, any>; environment?: Record<string, any> } } }).state?.manifest;
|
|
1891
|
+
const currentManifest = currentManifestValue ? ensureManifest(currentManifestValue) : undefined;
|
|
1892
|
+
const target = ensureManifest(targetManifest);
|
|
1893
|
+
if (!currentManifest) {
|
|
1894
|
+
if (Object.keys(target.entries).length === 0) {
|
|
1895
|
+
return;
|
|
1896
|
+
}
|
|
1897
|
+
throw new Error("Resumed sandbox session cannot apply new manifest entries because current manifest state is unavailable");
|
|
1898
|
+
}
|
|
1899
|
+
if (!session.applyManifest && !session.materializeEntry) {
|
|
1900
|
+
if (Object.keys(target.entries).length === 0) {
|
|
1901
|
+
return;
|
|
1902
|
+
}
|
|
1903
|
+
throw new Error("Resumed sandbox session cannot apply new manifest entries because it does not support applyManifest() or materializeEntry()");
|
|
1904
|
+
}
|
|
1905
|
+
if (Object.keys(target.entries).length === 0) {
|
|
1906
|
+
return;
|
|
1907
|
+
}
|
|
1908
|
+
if (currentManifest.root !== target.root) {
|
|
1909
|
+
throw new Error("Cannot apply per-turn resources to a sandbox with a different manifest root");
|
|
1910
|
+
}
|
|
1911
|
+
const entries: Record<string, any> = {};
|
|
1912
|
+
for (const [path, entry] of Object.entries(target.entries)) {
|
|
1913
|
+
const existing = (currentManifest.entries as Record<string, unknown>)[path];
|
|
1914
|
+
if (existing === undefined) {
|
|
1915
|
+
entries[path] = entry;
|
|
1916
|
+
continue;
|
|
1917
|
+
}
|
|
1918
|
+
if (stableJson(existing) !== stableJson(entry)) {
|
|
1919
|
+
throw new Error(`Cannot replace existing sandbox manifest entry: ${path}`);
|
|
1920
|
+
}
|
|
1921
|
+
}
|
|
1922
|
+
const environmentChanged = stableJson(currentManifest.environment) !== stableJson(target.environment);
|
|
1923
|
+
if (environmentChanged && !session.applyManifest) {
|
|
1924
|
+
throw new Error("Resumed sandbox session cannot refresh manifest environment because it does not support applyManifest()");
|
|
1925
|
+
}
|
|
1926
|
+
if (Object.keys(entries).length === 0 && !environmentChanged) {
|
|
1927
|
+
return;
|
|
1928
|
+
}
|
|
1929
|
+
// Carry path grants through manifest rebuilds: since @openai/agents 0.11.0
|
|
1930
|
+
// they gate local source materialization, and run states saved before the
|
|
1931
|
+
// upgrade have manifests without grants.
|
|
1932
|
+
const extraPathGrants = mergePathGrants(currentManifest.extraPathGrants, target.extraPathGrants);
|
|
1933
|
+
const delta = new Manifest({
|
|
1934
|
+
root: currentManifest.root,
|
|
1935
|
+
entries,
|
|
1936
|
+
environment: target.environment,
|
|
1937
|
+
...(extraPathGrants.length ? { extraPathGrants } : {}),
|
|
1938
|
+
});
|
|
1939
|
+
if (session.applyManifest) {
|
|
1940
|
+
await session.applyManifest(delta);
|
|
1941
|
+
} else {
|
|
1942
|
+
for (const [path, entry] of Object.entries(entries)) {
|
|
1943
|
+
await session.materializeEntry!({ path, entry });
|
|
1944
|
+
}
|
|
1945
|
+
}
|
|
1946
|
+
(session as { state?: { manifest?: Manifest } }).state!.manifest = new Manifest({
|
|
1947
|
+
root: currentManifest.root,
|
|
1948
|
+
environment: environmentChanged ? target.environment : currentManifest.environment,
|
|
1949
|
+
entries: {
|
|
1950
|
+
...currentManifest.entries,
|
|
1951
|
+
...entries,
|
|
1952
|
+
},
|
|
1953
|
+
...(extraPathGrants.length ? { extraPathGrants } : {}),
|
|
1954
|
+
});
|
|
1955
|
+
}
|
|
1956
|
+
|
|
1957
|
+
function mergePathGrants(
|
|
1958
|
+
current: Manifest["extraPathGrants"] | undefined,
|
|
1959
|
+
target: Manifest["extraPathGrants"] | undefined,
|
|
1960
|
+
): Manifest["extraPathGrants"] {
|
|
1961
|
+
const merged = new Map<string, Manifest["extraPathGrants"][number]>();
|
|
1962
|
+
for (const grant of [...(current ?? []), ...(target ?? [])]) {
|
|
1963
|
+
merged.set(grant.path, grant);
|
|
1964
|
+
}
|
|
1965
|
+
return [...merged.values()];
|
|
1966
|
+
}
|
|
1967
|
+
|
|
1968
|
+
export function withSandboxFileDownloads(
|
|
1969
|
+
client: SandboxClient,
|
|
1970
|
+
downloads: SandboxFileDownload[],
|
|
1971
|
+
context: Pick<SandboxLifecycleHookContext, "onRuntimeEvent" | "runAs"> = {},
|
|
1972
|
+
): SandboxClient {
|
|
1973
|
+
const normalizedDownloads = normalizeSandboxFileDownloads(downloads);
|
|
1974
|
+
if (normalizedDownloads.length === 0) {
|
|
1975
|
+
return client;
|
|
1976
|
+
}
|
|
1977
|
+
const completed = new WeakSet<object>();
|
|
1978
|
+
const wrapSession = async <T extends SandboxSessionLike>(session: T): Promise<T> => {
|
|
1979
|
+
if (typeof session === "object" && session !== null && !completed.has(session)) {
|
|
1980
|
+
await materializeSandboxFileDownloads(session, normalizedDownloads, context);
|
|
1981
|
+
completed.add(session);
|
|
1982
|
+
}
|
|
1983
|
+
return session;
|
|
1984
|
+
};
|
|
1985
|
+
return {
|
|
1986
|
+
backendId: client.backendId,
|
|
1987
|
+
...(client.supportsDefaultOptions !== undefined ? { supportsDefaultOptions: client.supportsDefaultOptions } : {}),
|
|
1988
|
+
...(client.create ? { create: async (...args: any[]) => await wrapSession(await (client.create as any)(...args)) } : {}),
|
|
1989
|
+
...(client.resume ? { resume: async (state: SandboxSessionState) => await wrapSession(await client.resume!(state)) } : {}),
|
|
1990
|
+
...(client.delete ? { delete: async (state: SandboxSessionState) => await client.delete!(state) } : {}),
|
|
1991
|
+
...(client.serializeSessionState ? { serializeSessionState: async (state: SandboxSessionState, options) => await client.serializeSessionState!(state, options) } : {}),
|
|
1992
|
+
...(client.canPersistOwnedSessionState ? { canPersistOwnedSessionState: async (state: SandboxSessionState) => await client.canPersistOwnedSessionState!(state) } : {}),
|
|
1993
|
+
...(client.canReusePreservedOwnedSession ? { canReusePreservedOwnedSession: async (state: SandboxSessionState) => await client.canReusePreservedOwnedSession!(state) } : {}),
|
|
1994
|
+
...(client.deserializeSessionState ? { deserializeSessionState: async (state: Record<string, unknown>) => await client.deserializeSessionState!(state) } : {}),
|
|
1995
|
+
};
|
|
1996
|
+
}
|
|
1997
|
+
|
|
1998
|
+
export async function materializeSandboxFileDownloads(
|
|
1999
|
+
session: SandboxSessionLike,
|
|
2000
|
+
downloads: SandboxFileDownload[],
|
|
2001
|
+
context: Pick<SandboxLifecycleHookContext, "onRuntimeEvent" | "runAs"> = {},
|
|
2002
|
+
): Promise<void> {
|
|
2003
|
+
const normalizedDownloads = normalizeSandboxFileDownloads(downloads);
|
|
2004
|
+
if (normalizedDownloads.length === 0) {
|
|
2005
|
+
return;
|
|
2006
|
+
}
|
|
2007
|
+
if (!session.exec && !session.execCommand) {
|
|
2008
|
+
throw new Error("Sandbox file download materialization requires command execution support");
|
|
2009
|
+
}
|
|
2010
|
+
for (const download of normalizedDownloads) {
|
|
2011
|
+
const targetPath = sandboxDownloadTargetPath(download);
|
|
2012
|
+
const payload = {
|
|
2013
|
+
fileId: download.fileId,
|
|
2014
|
+
path: targetPath,
|
|
2015
|
+
sizeBytes: download.sizeBytes ?? null,
|
|
2016
|
+
expiresAt: download.expiresAt ? new Date(download.expiresAt).toISOString() : null,
|
|
2017
|
+
};
|
|
2018
|
+
await context.onRuntimeEvent?.({ type: "sandbox.operation.started", payload: { name: "file-resource-download", ...payload } });
|
|
2019
|
+
try {
|
|
2020
|
+
const result = session.exec
|
|
2021
|
+
? await session.exec({
|
|
2022
|
+
cmd: sandboxFileDownloadCommand(download, targetPath),
|
|
2023
|
+
workdir: "/workspace",
|
|
2024
|
+
...(context.runAs ? { runAs: context.runAs } : {}),
|
|
2025
|
+
yieldTimeMs: SANDBOX_LIFECYCLE_COMMAND_TIMEOUT_MS,
|
|
2026
|
+
maxOutputTokens: 20_000,
|
|
2027
|
+
})
|
|
2028
|
+
: await session.execCommand!({
|
|
2029
|
+
cmd: sandboxFileDownloadCommand(download, targetPath),
|
|
2030
|
+
workdir: "/workspace",
|
|
2031
|
+
...(context.runAs ? { runAs: context.runAs } : {}),
|
|
2032
|
+
yieldTimeMs: SANDBOX_LIFECYCLE_COMMAND_TIMEOUT_MS,
|
|
2033
|
+
maxOutputTokens: 20_000,
|
|
2034
|
+
});
|
|
2035
|
+
assertSandboxCommandSucceeded(result, `Sandbox file resource download ${download.fileId}`);
|
|
2036
|
+
await context.onRuntimeEvent?.({ type: "sandbox.operation.completed", payload: { name: "file-resource-download", ...payload } });
|
|
2037
|
+
} catch (error) {
|
|
2038
|
+
await context.onRuntimeEvent?.({
|
|
2039
|
+
type: "sandbox.operation.failed",
|
|
2040
|
+
payload: {
|
|
2041
|
+
name: "file-resource-download",
|
|
2042
|
+
...payload,
|
|
2043
|
+
error: error instanceof Error ? error.message : String(error),
|
|
2044
|
+
},
|
|
2045
|
+
});
|
|
2046
|
+
throw error;
|
|
2047
|
+
}
|
|
2048
|
+
}
|
|
2049
|
+
}
|
|
2050
|
+
|
|
2051
|
+
export function sandboxFileDownloadsForAgent(agent: unknown): SandboxFileDownload[] {
|
|
2052
|
+
return typeof agent === "object" && agent !== null
|
|
2053
|
+
? [...(agentFileDownloads.get(agent) ?? [])]
|
|
2054
|
+
: [];
|
|
2055
|
+
}
|
|
2056
|
+
|
|
2057
|
+
function ensureManifest(manifest: Manifest | { root?: string; entries?: Record<string, any>; environment?: Record<string, any>; extraPathGrants?: any[] }): Manifest {
|
|
2058
|
+
if (manifest instanceof Manifest && typeof manifest.mountTargetsForMaterialization === "function") {
|
|
2059
|
+
return manifest;
|
|
2060
|
+
}
|
|
2061
|
+
return new Manifest({
|
|
2062
|
+
...(manifest.root ? { root: manifest.root } : {}),
|
|
2063
|
+
entries: manifest.entries ?? {},
|
|
2064
|
+
environment: manifest.environment ?? {},
|
|
2065
|
+
...(manifest.extraPathGrants?.length ? { extraPathGrants: manifest.extraPathGrants } : {}),
|
|
2066
|
+
});
|
|
2067
|
+
}
|
|
2068
|
+
|
|
2069
|
+
/** Coerce the various binary shapes a tool-output image `data` field can take into
|
|
2070
|
+
* a Uint8Array. Handles a live `Uint8Array`, a plain number[] , and the
|
|
2071
|
+
* object-of-numbers (`{"0":137,"1":80,…}`) that a `Uint8Array` degrades into after
|
|
2072
|
+
* a JSON round-trip — the exact 10x-bloat shape this normalizer exists to kill. */
|
|
2073
|
+
function toImageBytes(data: unknown): Uint8Array | null {
|
|
2074
|
+
if (data instanceof Uint8Array) {
|
|
2075
|
+
return data;
|
|
2076
|
+
}
|
|
2077
|
+
if (Array.isArray(data)) {
|
|
2078
|
+
return data.every((n) => typeof n === "number") ? Uint8Array.from(data as number[]) : null;
|
|
2079
|
+
}
|
|
2080
|
+
if (data && typeof data === "object") {
|
|
2081
|
+
const values = Object.values(data as Record<string, unknown>);
|
|
2082
|
+
if (values.length > 0 && values.every((n) => typeof n === "number")) {
|
|
2083
|
+
return Uint8Array.from(values as number[]);
|
|
2084
|
+
}
|
|
2085
|
+
}
|
|
2086
|
+
return null;
|
|
2087
|
+
}
|
|
2088
|
+
|
|
2089
|
+
/** Compact a structured image tool output — the SDK's `{type:'image', image:{data,mediaType}}`
|
|
2090
|
+
* shape (produced by the codex-path `computer_screenshot` function tool) OR the already-
|
|
2091
|
+
* normalized protocol `{type:'input_image', image:'data:…'}` item — into a `data:<mt>;base64,…`
|
|
2092
|
+
* string. Returns null when `value` is not an image output. */
|
|
2093
|
+
function structuredImageToDataUrl(value: unknown): string | null {
|
|
2094
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
2095
|
+
return null;
|
|
2096
|
+
}
|
|
2097
|
+
const v = value as { type?: unknown; image?: unknown };
|
|
2098
|
+
if (v.type === "input_image") {
|
|
2099
|
+
// Protocol item: `image` is already a `data:…` (or plain URL) string.
|
|
2100
|
+
return typeof v.image === "string" && v.image.length > 0 ? v.image : null;
|
|
2101
|
+
}
|
|
2102
|
+
if (v.type !== "image" || !v.image || typeof v.image !== "object") {
|
|
2103
|
+
return null;
|
|
2104
|
+
}
|
|
2105
|
+
const image = v.image as { data?: unknown; mediaType?: unknown; url?: unknown };
|
|
2106
|
+
if (typeof image.url === "string" && image.url.length > 0) {
|
|
2107
|
+
return image.url;
|
|
2108
|
+
}
|
|
2109
|
+
const mediaType = typeof image.mediaType === "string" && image.mediaType.length > 0 ? image.mediaType : "image/png";
|
|
2110
|
+
if (typeof image.data === "string") {
|
|
2111
|
+
return image.data.startsWith("data:") ? image.data : `data:${mediaType};base64,${image.data}`;
|
|
2112
|
+
}
|
|
2113
|
+
const bytes = toImageBytes(image.data);
|
|
2114
|
+
return bytes ? `data:${mediaType};base64,${Buffer.from(bytes).toString("base64")}` : null;
|
|
2115
|
+
}
|
|
2116
|
+
|
|
2117
|
+
/**
|
|
2118
|
+
* Compact a tool-call output for the `agent.toolCall.output` SESSION EVENT so it
|
|
2119
|
+
* never carries a raw binary payload. The codex-path `computer_screenshot` function
|
|
2120
|
+
* tool returns a structured `{type:'image', image:{data: Uint8Array, mediaType}}`;
|
|
2121
|
+
* captured verbatim its `Uint8Array` JSON-serializes as an object-of-numbers (~12.7MB
|
|
2122
|
+
* per screenshot in session_events — ~10x the base64 form). This mirrors the desktop
|
|
2123
|
+
* screenshot to the SAME compact `data:<mediaType>;base64,…` STRING the HOSTED
|
|
2124
|
+
* `computer_call` event already carries (agents-core sets its output to that data-URL),
|
|
2125
|
+
* so both computer-use transports emit one representation. The full data-URL is kept
|
|
2126
|
+
* (not truncated) because the web timeline RENDERS the screenshot from this event
|
|
2127
|
+
* payload — packages/react/src/timeline/tool-renderers.tsx ComputerCallRenderer
|
|
2128
|
+
* (`out.startsWith("data:image")` → <ScreenshotFigure src={out}/>) and ViewImageRenderer.
|
|
2129
|
+
* Non-image outputs (text strings, MCP `{isError,content}` objects, hosted computer_call
|
|
2130
|
+
* data-URL strings) pass through unchanged.
|
|
2131
|
+
*/
|
|
2132
|
+
export function normalizeToolOutputForEvent(output: unknown): unknown {
|
|
2133
|
+
const single = structuredImageToDataUrl(output);
|
|
2134
|
+
if (single !== null) {
|
|
2135
|
+
return single;
|
|
2136
|
+
}
|
|
2137
|
+
if (Array.isArray(output)) {
|
|
2138
|
+
const normalized = output.map((el) => structuredImageToDataUrl(el) ?? el);
|
|
2139
|
+
// A lone image content item unwraps to the bare data-URL string the timeline
|
|
2140
|
+
// image renderers expect; a mixed/multi array keeps its (now-compact) shape.
|
|
2141
|
+
if (normalized.length === 1 && typeof normalized[0] === "string") {
|
|
2142
|
+
return normalized[0];
|
|
2143
|
+
}
|
|
2144
|
+
return normalized;
|
|
2145
|
+
}
|
|
2146
|
+
return output;
|
|
2147
|
+
}
|
|
2148
|
+
|
|
2149
|
+
export function normalizeSdkEvent(event: RunStreamEvent): NormalizedRuntimeEvent[] {
|
|
2150
|
+
const out: NormalizedRuntimeEvent[] = [];
|
|
2151
|
+
if (event.type === "raw_model_stream_event") {
|
|
2152
|
+
const data = (event as any).data;
|
|
2153
|
+
if (data?.type === "output_text_delta" && typeof data.delta === "string") {
|
|
2154
|
+
out.push({ type: "agent.message.delta", payload: { text: data.delta } });
|
|
2155
|
+
return out;
|
|
2156
|
+
}
|
|
2157
|
+
}
|
|
2158
|
+
if (isOpenAIResponsesRawModelStreamEvent(event)) {
|
|
2159
|
+
const raw = (event as any).data?.event;
|
|
2160
|
+
if (raw?.type === "response.reasoning_summary_text.delta" && typeof raw.delta === "string") {
|
|
2161
|
+
out.push({ type: "agent.reasoning.delta", payload: { text: raw.delta } });
|
|
2162
|
+
}
|
|
2163
|
+
return out;
|
|
2164
|
+
}
|
|
2165
|
+
if (event.type === "agent_updated_stream_event") {
|
|
2166
|
+
out.push({ type: "agent.updated", payload: { agent: (event as any).agent?.name ?? null } });
|
|
2167
|
+
return out;
|
|
2168
|
+
}
|
|
2169
|
+
if (event.type !== "run_item_stream_event") {
|
|
2170
|
+
return out;
|
|
2171
|
+
}
|
|
2172
|
+
const item = (event as any).item;
|
|
2173
|
+
if (!item) {
|
|
2174
|
+
return out;
|
|
2175
|
+
}
|
|
2176
|
+
if (item.type === "tool_call_item") {
|
|
2177
|
+
const raw = item.rawItem ?? {};
|
|
2178
|
+
out.push({
|
|
2179
|
+
type: "agent.toolCall.created",
|
|
2180
|
+
payload: {
|
|
2181
|
+
id: raw.callId ?? raw.id ?? item.id ?? null,
|
|
2182
|
+
name: raw.name ?? raw.type ?? "tool",
|
|
2183
|
+
arguments: raw.arguments ?? raw.input ?? null,
|
|
2184
|
+
raw,
|
|
2185
|
+
},
|
|
2186
|
+
});
|
|
2187
|
+
} else if (item.type === "tool_call_output_item") {
|
|
2188
|
+
out.push({
|
|
2189
|
+
type: "agent.toolCall.output",
|
|
2190
|
+
payload: {
|
|
2191
|
+
id: item.rawItem?.callId ?? item.id ?? null,
|
|
2192
|
+
// Compact any structured/binary image output to a data-URL string so a
|
|
2193
|
+
// screenshot never bloats session_events ~10x as an object-of-numbers.
|
|
2194
|
+
output: normalizeToolOutputForEvent(item.output),
|
|
2195
|
+
},
|
|
2196
|
+
});
|
|
2197
|
+
} else if (item.type === "tool_search_call_item") {
|
|
2198
|
+
// Progressive connector disclosure: surface the model's tool search as a
|
|
2199
|
+
// regular tool-call event so the session stream shows the step (parity with
|
|
2200
|
+
// the Codex CLI, which renders its searches). Arguments may be an object
|
|
2201
|
+
// (the live wire shape) or a string.
|
|
2202
|
+
const raw = item.rawItem ?? {};
|
|
2203
|
+
out.push({
|
|
2204
|
+
type: "agent.toolCall.created",
|
|
2205
|
+
payload: {
|
|
2206
|
+
id: raw.call_id ?? raw.callId ?? raw.id ?? item.id ?? null,
|
|
2207
|
+
name: "tool_search",
|
|
2208
|
+
arguments: raw.arguments ?? null,
|
|
2209
|
+
raw,
|
|
2210
|
+
},
|
|
2211
|
+
});
|
|
2212
|
+
} else if (item.type === "tool_search_output_item") {
|
|
2213
|
+
const raw = item.rawItem ?? {};
|
|
2214
|
+
const disclosed = Array.isArray(raw.tools)
|
|
2215
|
+
? raw.tools.map((tool: { name?: unknown }) => (typeof tool?.name === "string" ? tool.name : "")).filter(Boolean)
|
|
2216
|
+
: [];
|
|
2217
|
+
out.push({
|
|
2218
|
+
type: "agent.toolCall.output",
|
|
2219
|
+
payload: {
|
|
2220
|
+
id: raw.call_id ?? raw.callId ?? item.id ?? null,
|
|
2221
|
+
output: { type: "text", text: disclosed.length > 0 ? `Disclosed tools: ${disclosed.join(", ")}` : "No matching tools found." },
|
|
2222
|
+
},
|
|
2223
|
+
});
|
|
2224
|
+
} else if (item.type === "message_output_item") {
|
|
2225
|
+
const text = typeof item.text === "string" ? item.text : undefined;
|
|
2226
|
+
if (text) {
|
|
2227
|
+
out.push({ type: "agent.message.completed", payload: { text } });
|
|
2228
|
+
}
|
|
2229
|
+
}
|
|
2230
|
+
return out;
|
|
2231
|
+
}
|
|
2232
|
+
|
|
2233
|
+
export function modelResponseUsageFromSdkEvent(event: RunStreamEvent): ModelResponseUsage | null {
|
|
2234
|
+
const response = modelResponseFromSdkEvent(event);
|
|
2235
|
+
const usage = usageFromResponse(response);
|
|
2236
|
+
if (!usage) {
|
|
2237
|
+
return null;
|
|
2238
|
+
}
|
|
2239
|
+
const responseId = typeof response?.id === "string"
|
|
2240
|
+
? response.id
|
|
2241
|
+
: typeof response?.responseId === "string"
|
|
2242
|
+
? response.responseId
|
|
2243
|
+
: undefined;
|
|
2244
|
+
return {
|
|
2245
|
+
...(responseId ? { responseId } : {}),
|
|
2246
|
+
usage,
|
|
2247
|
+
};
|
|
2248
|
+
}
|
|
2249
|
+
|
|
2250
|
+
function modelResponseFromSdkEvent(event: RunStreamEvent): any {
|
|
2251
|
+
if (event.type === "raw_model_stream_event") {
|
|
2252
|
+
const data = (event as any).data;
|
|
2253
|
+
if (data?.type === "response_done") {
|
|
2254
|
+
return data.response;
|
|
2255
|
+
}
|
|
2256
|
+
}
|
|
2257
|
+
if (isOpenAIResponsesRawModelStreamEvent(event)) {
|
|
2258
|
+
const raw = (event as any).data?.event;
|
|
2259
|
+
if (raw?.type === "response.completed") {
|
|
2260
|
+
return raw.response;
|
|
2261
|
+
}
|
|
2262
|
+
}
|
|
2263
|
+
return null;
|
|
2264
|
+
}
|
|
2265
|
+
|
|
2266
|
+
function usageFromResponse(response: any): ModelResponseUsage["usage"] | null {
|
|
2267
|
+
const raw = response?.usage;
|
|
2268
|
+
if (!raw || typeof raw !== "object") {
|
|
2269
|
+
return null;
|
|
2270
|
+
}
|
|
2271
|
+
const usage = {
|
|
2272
|
+
...numberProp(raw, "inputTokens", "inputTokens", "input_tokens"),
|
|
2273
|
+
...numberProp(raw, "outputTokens", "outputTokens", "output_tokens"),
|
|
2274
|
+
...numberProp(raw, "totalTokens", "totalTokens", "total_tokens"),
|
|
2275
|
+
...inputTokenDetailsProp(raw),
|
|
2276
|
+
};
|
|
2277
|
+
return Object.keys(usage).length > 0 ? usage : null;
|
|
2278
|
+
}
|
|
2279
|
+
|
|
2280
|
+
function numberProp(raw: Record<string, unknown>, outputKey: "inputTokens" | "outputTokens" | "totalTokens", camel: string, snake: string): Partial<ModelResponseUsage["usage"]> {
|
|
2281
|
+
const value = raw[camel] ?? raw[snake];
|
|
2282
|
+
return typeof value === "number" && Number.isFinite(value) ? { [outputKey]: value } : {};
|
|
2283
|
+
}
|
|
2284
|
+
|
|
2285
|
+
function inputTokenDetailsProp(raw: Record<string, unknown>): Partial<ModelResponseUsage["usage"]> {
|
|
2286
|
+
const details = raw.inputTokensDetails ?? raw.input_tokens_details;
|
|
2287
|
+
if (!details || typeof details !== "object") {
|
|
2288
|
+
return {};
|
|
2289
|
+
}
|
|
2290
|
+
return { inputTokensDetails: details as Record<string, number> | Array<Record<string, number>> };
|
|
2291
|
+
}
|
|
2292
|
+
|
|
2293
|
+
export function serializeApprovals(interruptions: unknown[]): unknown[] {
|
|
2294
|
+
return interruptions.map((item: any) => {
|
|
2295
|
+
if (typeof item?.toJSON === "function") {
|
|
2296
|
+
return item.toJSON();
|
|
2297
|
+
}
|
|
2298
|
+
return {
|
|
2299
|
+
id: approvalIdentifier(item),
|
|
2300
|
+
name: item?.name ?? item?.rawItem?.name ?? "tool",
|
|
2301
|
+
arguments: item?.arguments ?? item?.rawItem?.arguments ?? null,
|
|
2302
|
+
raw: item,
|
|
2303
|
+
};
|
|
2304
|
+
});
|
|
2305
|
+
}
|
|
2306
|
+
|
|
2307
|
+
export function buildManifest(
|
|
2308
|
+
settings: Settings,
|
|
2309
|
+
resources: ResourceRef[],
|
|
2310
|
+
environment = collectSandboxEnvironment(settings),
|
|
2311
|
+
fileResourceDownloads: SandboxFileDownload[] = [],
|
|
2312
|
+
): Manifest {
|
|
2313
|
+
const entries: Record<string, any> = {};
|
|
2314
|
+
const downloadsByFileId = new Map(normalizeSandboxFileDownloads(fileResourceDownloads).map((download) => [download.fileId, download]));
|
|
2315
|
+
for (const resource of resources) {
|
|
2316
|
+
if (resource.kind === "repository") {
|
|
2317
|
+
const url = new URL(resource.uri);
|
|
2318
|
+
const host = url.hostname.toLowerCase();
|
|
2319
|
+
const repo = url.pathname.replace(/^\/+|\/+$/g, "").replace(/\.git$/, "");
|
|
2320
|
+
const mountPath = normalizeManifestPath(resource.mountPath ?? `repos/${repo}`);
|
|
2321
|
+
if (repositoryUsesSandboxClone(settings, resource)) {
|
|
2322
|
+
entries[mountPath] = dir();
|
|
2323
|
+
continue;
|
|
2324
|
+
}
|
|
2325
|
+
entries[mountPath] = gitRepo({
|
|
2326
|
+
host,
|
|
2327
|
+
repo,
|
|
2328
|
+
ref: resource.ref,
|
|
2329
|
+
...(resource.subpath ? { subpath: normalizeManifestPath(resource.subpath) } : {}),
|
|
2330
|
+
});
|
|
2331
|
+
continue;
|
|
2332
|
+
}
|
|
2333
|
+
if (resource.kind === "file") {
|
|
2334
|
+
const mountPath = normalizeManifestPath(resource.mountPath ?? `files/${resource.fileId}`);
|
|
2335
|
+
const download = downloadsByFileId.get(resource.fileId);
|
|
2336
|
+
entries[mountPath] = download
|
|
2337
|
+
? sandboxDownloadDirectory(download, mountPath)
|
|
2338
|
+
: objectStorageFileMount(settings, `files/${resource.fileId}/original`);
|
|
2339
|
+
}
|
|
2340
|
+
}
|
|
2341
|
+
// No extraPathGrants here: remote sandbox clients (Modal) reject manifests
|
|
2342
|
+
// that carry them at create/apply time, which broke every Modal session.
|
|
2343
|
+
// The lazy bundled-skills source no longer needs a grant because
|
|
2344
|
+
// bundledSkillsDir() stages the skills inside the process working directory
|
|
2345
|
+
// whenever the packaged copy lives outside it.
|
|
2346
|
+
return new Manifest({
|
|
2347
|
+
root: "/workspace",
|
|
2348
|
+
entries,
|
|
2349
|
+
environment,
|
|
2350
|
+
});
|
|
2351
|
+
}
|
|
2352
|
+
|
|
2353
|
+
function sandboxDownloadDirectory(download: SandboxFileDownload, mountPath: string): any {
|
|
2354
|
+
if (download.mountPath !== mountPath) {
|
|
2355
|
+
throw new Error(`File download materialization path mismatch for ${download.fileId}: expected ${mountPath}, got ${download.mountPath}`);
|
|
2356
|
+
}
|
|
2357
|
+
assertSafeSandboxFilename(download.filename, download.fileId);
|
|
2358
|
+
if (download.content) {
|
|
2359
|
+
return dir({
|
|
2360
|
+
children: {
|
|
2361
|
+
[download.filename]: file({ content: download.content }),
|
|
2362
|
+
},
|
|
2363
|
+
});
|
|
2364
|
+
}
|
|
2365
|
+
return dir();
|
|
2366
|
+
}
|
|
2367
|
+
|
|
2368
|
+
function objectStorageFileMount(settings: Settings, prefix: string): any {
|
|
2369
|
+
// Descriptor-driven: a nativeBucketMount backend (modal) mounts via the
|
|
2370
|
+
// provider's own bucket-mount strategy and cannot mount Azure Blob entries —
|
|
2371
|
+
// it needs pre-signed downloads instead. Reading the descriptor (not a
|
|
2372
|
+
// hard-coded backend name) keeps this honest as providers are added.
|
|
2373
|
+
const nativeBucketMount = CAPABILITY_DESCRIPTORS[settings.sandboxBackend].nativeBucketMount;
|
|
2374
|
+
if (settings.objectStorageBackend === "azure-blob") {
|
|
2375
|
+
if (nativeBucketMount) {
|
|
2376
|
+
throw new Error("Modal sandbox Azure Blob file resources require pre-signed download materialization because the current OpenAI Agents SDK Modal client does not support Azure Blob mount entries.");
|
|
2377
|
+
}
|
|
2378
|
+
const config = azureBlobMountConfig(settings);
|
|
2379
|
+
return azureBlobMount({
|
|
2380
|
+
container: config.container,
|
|
2381
|
+
prefix,
|
|
2382
|
+
accountName: config.accountName,
|
|
2383
|
+
accountKey: config.accountKey,
|
|
2384
|
+
endpointUrl: config.endpointUrl,
|
|
2385
|
+
readOnly: true,
|
|
2386
|
+
mountStrategy: inContainerMountStrategy({ pattern: { type: "rclone", mode: "fuse" } }),
|
|
2387
|
+
});
|
|
2388
|
+
}
|
|
2389
|
+
if (settings.objectStorageBackend === "aws-s3" || settings.objectStorageBackend === "gcs") {
|
|
2390
|
+
throw new Error(`${settings.objectStorageBackend} file resources require pre-signed download materialization`);
|
|
2391
|
+
}
|
|
2392
|
+
const config = s3CompatibleMountConfig(settings);
|
|
2393
|
+
return s3Mount({
|
|
2394
|
+
bucket: config.bucket,
|
|
2395
|
+
prefix,
|
|
2396
|
+
endpointUrl: config.endpointUrl,
|
|
2397
|
+
region: config.region,
|
|
2398
|
+
s3Provider: config.s3Provider,
|
|
2399
|
+
accessKeyId: config.accessKeyId,
|
|
2400
|
+
secretAccessKey: config.secretAccessKey,
|
|
2401
|
+
readOnly: true,
|
|
2402
|
+
mountStrategy: nativeBucketMount
|
|
2403
|
+
? new ModalCloudBucketMountStrategy()
|
|
2404
|
+
: inContainerMountStrategy({ pattern: { type: "rclone", mode: "fuse" } }),
|
|
2405
|
+
});
|
|
2406
|
+
}
|
|
2407
|
+
|
|
2408
|
+
function s3CompatibleMountConfig(settings: Settings): {
|
|
2409
|
+
bucket: string;
|
|
2410
|
+
endpointUrl: string;
|
|
2411
|
+
region: string;
|
|
2412
|
+
s3Provider: string;
|
|
2413
|
+
accessKeyId: string;
|
|
2414
|
+
secretAccessKey: string;
|
|
2415
|
+
} {
|
|
2416
|
+
const endpointUrl = settings.objectStorageSandboxEndpoint ?? settings.objectStorageEndpoint;
|
|
2417
|
+
if (!endpointUrl || !settings.objectStorageAccessKeyId || !settings.objectStorageSecretAccessKey) {
|
|
2418
|
+
throw new Error("File resources require configured S3-compatible object storage");
|
|
2419
|
+
}
|
|
2420
|
+
return {
|
|
2421
|
+
bucket: settings.objectStorageBucket,
|
|
2422
|
+
endpointUrl,
|
|
2423
|
+
region: settings.objectStorageRegion,
|
|
2424
|
+
s3Provider: settings.objectStorageS3Provider,
|
|
2425
|
+
accessKeyId: settings.objectStorageAccessKeyId,
|
|
2426
|
+
secretAccessKey: settings.objectStorageSecretAccessKey,
|
|
2427
|
+
};
|
|
2428
|
+
}
|
|
2429
|
+
|
|
2430
|
+
function azureBlobMountConfig(settings: Settings): {
|
|
2431
|
+
container: string;
|
|
2432
|
+
accountName: string;
|
|
2433
|
+
accountKey: string;
|
|
2434
|
+
endpointUrl?: string;
|
|
2435
|
+
} {
|
|
2436
|
+
const parsed = settings.objectStorageAzureConnectionString
|
|
2437
|
+
? parseAzureConnectionString(settings.objectStorageAzureConnectionString)
|
|
2438
|
+
: {};
|
|
2439
|
+
const accountName = settings.objectStorageAzureAccountName ?? parsed.AccountName;
|
|
2440
|
+
const accountKey = settings.objectStorageAzureAccountKey ?? parsed.AccountKey;
|
|
2441
|
+
if (!accountName || !accountKey) {
|
|
2442
|
+
throw new Error("File resources require Azure Blob account name and account key");
|
|
2443
|
+
}
|
|
2444
|
+
const endpointUrl = azureBlobManifestEndpoint(settings.objectStorageAzureEndpoint ?? parsed.BlobEndpoint, accountName);
|
|
2445
|
+
return {
|
|
2446
|
+
container: settings.objectStorageBucket,
|
|
2447
|
+
accountName,
|
|
2448
|
+
accountKey,
|
|
2449
|
+
...(endpointUrl ? { endpointUrl } : {}),
|
|
2450
|
+
};
|
|
2451
|
+
}
|
|
2452
|
+
|
|
2453
|
+
function azureBlobManifestEndpoint(endpoint: string | undefined, accountName: string): string | undefined {
|
|
2454
|
+
if (!endpoint) {
|
|
2455
|
+
return undefined;
|
|
2456
|
+
}
|
|
2457
|
+
const normalized = endpoint.replace(/\/+$/, "");
|
|
2458
|
+
const standardAccountEndpoint = `https://${accountName}.blob.core.windows.net`;
|
|
2459
|
+
return normalized === standardAccountEndpoint ? undefined : normalized;
|
|
2460
|
+
}
|
|
2461
|
+
|
|
2462
|
+
function parseAzureConnectionString(value: string): Record<string, string> {
|
|
2463
|
+
return Object.fromEntries(value.split(";")
|
|
2464
|
+
.map((part) => part.trim())
|
|
2465
|
+
.filter(Boolean)
|
|
2466
|
+
.map((part) => {
|
|
2467
|
+
const index = part.indexOf("=");
|
|
2468
|
+
return index === -1 ? [part, ""] : [part.slice(0, index), part.slice(index + 1)];
|
|
2469
|
+
}));
|
|
2470
|
+
}
|
|
2471
|
+
|
|
2472
|
+
function normalizeManifestPath(path: string): string {
|
|
2473
|
+
const normalized = path.replace(/^\/+|\/+$/g, "");
|
|
2474
|
+
if (!normalized || normalized.includes("..")) {
|
|
2475
|
+
throw new Error(`Invalid sandbox resource path: ${path}`);
|
|
2476
|
+
}
|
|
2477
|
+
return normalized;
|
|
2478
|
+
}
|
|
2479
|
+
|
|
2480
|
+
function normalizeSandboxFileDownloads(downloads: SandboxFileDownload[]): SandboxFileDownload[] {
|
|
2481
|
+
return downloads.map((download) => {
|
|
2482
|
+
const mountPath = normalizeManifestPath(download.mountPath);
|
|
2483
|
+
assertSafeSandboxFilename(download.filename, download.fileId);
|
|
2484
|
+
if (!download.content && !download.url?.trim()) {
|
|
2485
|
+
throw new Error(`File download materialization requires content or a URL for ${download.fileId}`);
|
|
2486
|
+
}
|
|
2487
|
+
return {
|
|
2488
|
+
...download,
|
|
2489
|
+
mountPath,
|
|
2490
|
+
};
|
|
2491
|
+
});
|
|
2492
|
+
}
|
|
2493
|
+
|
|
2494
|
+
function assertSafeSandboxFilename(filename: string, fileId: string): void {
|
|
2495
|
+
if (!filename || filename.includes("/") || filename.includes("\\") || filename === "." || filename === ".." || filename.includes("..")) {
|
|
2496
|
+
throw new Error(`Invalid sandbox file name for ${fileId}: ${filename}`);
|
|
2497
|
+
}
|
|
2498
|
+
}
|
|
2499
|
+
|
|
2500
|
+
function sandboxDownloadTargetPath(download: SandboxFileDownload): string {
|
|
2501
|
+
return posixPath.join("/workspace", download.mountPath, download.filename);
|
|
2502
|
+
}
|
|
2503
|
+
|
|
2504
|
+
function sandboxFileDownloadCommand(download: SandboxFileDownload, targetPath: string): string {
|
|
2505
|
+
if (!download.url) {
|
|
2506
|
+
throw new Error(`File download materialization URL is empty for ${download.fileId}`);
|
|
2507
|
+
}
|
|
2508
|
+
const targetDir = posixPath.dirname(targetPath);
|
|
2509
|
+
const tmpPath = `${targetPath}.opengeni-download-$$`;
|
|
2510
|
+
return [
|
|
2511
|
+
"set -euo pipefail",
|
|
2512
|
+
`mkdir -p -- ${shellQuote(targetDir)}`,
|
|
2513
|
+
`if [ ! -f ${shellQuote(targetPath)} ]; then`,
|
|
2514
|
+
` tmp=${shellQuote(tmpPath)}`,
|
|
2515
|
+
" cleanup() { rm -f -- \"$tmp\"; }",
|
|
2516
|
+
" trap cleanup EXIT",
|
|
2517
|
+
` curl --fail --location --silent --show-error --retry 3 --retry-delay 1 --output "$tmp" ${shellQuote(download.url)}`,
|
|
2518
|
+
` mv -- "$tmp" ${shellQuote(targetPath)}`,
|
|
2519
|
+
" trap - EXIT",
|
|
2520
|
+
"fi",
|
|
2521
|
+
`chmod a-w -- ${shellQuote(targetPath)} 2>/dev/null || true`,
|
|
2522
|
+
].join("\n");
|
|
2523
|
+
}
|
|
2524
|
+
|
|
2525
|
+
function shellQuote(value: string): string {
|
|
2526
|
+
return `'${value.replace(/'/g, "'\\''")}'`;
|
|
2527
|
+
}
|
|
2528
|
+
|
|
2529
|
+
async function restoredSandboxSessionState(state: RunState<any, any>, client: unknown): Promise<SandboxSessionState | undefined> {
|
|
2530
|
+
if (!client) {
|
|
2531
|
+
return undefined;
|
|
2532
|
+
}
|
|
2533
|
+
const sandboxState = (state as any)._sandbox;
|
|
2534
|
+
const entry = sandboxState?.sessionsByAgent?.[sandboxState.currentAgentKey]
|
|
2535
|
+
?? (sandboxState?.currentAgentKey && sandboxState?.sessionState
|
|
2536
|
+
? {
|
|
2537
|
+
backendId: sandboxState.backendId,
|
|
2538
|
+
currentAgentKey: sandboxState.currentAgentKey,
|
|
2539
|
+
currentAgentName: sandboxState.currentAgentName,
|
|
2540
|
+
sessionState: sandboxState.sessionState,
|
|
2541
|
+
}
|
|
2542
|
+
: undefined);
|
|
2543
|
+
if (!entry) {
|
|
2544
|
+
return undefined;
|
|
2545
|
+
}
|
|
2546
|
+
if ((client as SandboxClient).backendId !== entry.backendId) {
|
|
2547
|
+
throw new Error("RunState sandbox backend does not match the configured sandbox client");
|
|
2548
|
+
}
|
|
2549
|
+
return await deserializeSandboxSessionStateEnvelope(client as SandboxClient, entry.sessionState);
|
|
2550
|
+
}
|
|
2551
|
+
|
|
2552
|
+
// sandboxStateEntryFromRunState + restoredSandboxSessionStateFromEntry +
|
|
2553
|
+
// deserializeSandboxSessionStateEnvelope moved to the agent-loop-free leaf
|
|
2554
|
+
// ./sandbox; re-exported via `export * from "./sandbox"`. The private
|
|
2555
|
+
// restoredSandboxSessionState above (which takes an agent-loop RunState) calls
|
|
2556
|
+
// the moved deserializeSandboxSessionStateEnvelope, imported from ./sandbox.
|
|
2557
|
+
|
|
2558
|
+
export type SandboxLifecycleHookPhase = "beforeAgentStart";
|
|
2559
|
+
|
|
2560
|
+
export type SandboxLifecycleHookContext = {
|
|
2561
|
+
environment: Record<string, string>;
|
|
2562
|
+
onRuntimeEvent?: (event: NormalizedRuntimeEvent) => Promise<void> | void;
|
|
2563
|
+
runAs?: string;
|
|
2564
|
+
// TOKEN-BROKER (B1): the run-scoped GitHub token to seed into the box's token
|
|
2565
|
+
// FILE before the repository clone runs. Threaded OFF-MANIFEST — it rides ONLY
|
|
2566
|
+
// the clone exec's per-call env (OPENGENI_GIT_TOKEN_SEED), NEVER the box/agent
|
|
2567
|
+
// manifest env (validateNoEnvironmentDelta must never see a rotating value).
|
|
2568
|
+
gitTokenSeed?: string;
|
|
2569
|
+
};
|
|
2570
|
+
|
|
2571
|
+
export type SandboxLifecycleHook = {
|
|
2572
|
+
id: string;
|
|
2573
|
+
phase: SandboxLifecycleHookPhase;
|
|
2574
|
+
shouldRun?: (context: SandboxLifecycleHookContext) => boolean;
|
|
2575
|
+
run: (session: SandboxSessionLike, context: SandboxLifecycleHookContext) => Promise<void>;
|
|
2576
|
+
};
|
|
2577
|
+
|
|
2578
|
+
const builtInSandboxLifecycleHooks: Record<string, SandboxLifecycleHook> = {
|
|
2579
|
+
"azure-cli-login": {
|
|
2580
|
+
id: "azure-cli-login",
|
|
2581
|
+
phase: "beforeAgentStart",
|
|
2582
|
+
shouldRun: ({ environment }) => hasAzureServicePrincipal(environment),
|
|
2583
|
+
run: runAzureCliLoginHook,
|
|
2584
|
+
},
|
|
2585
|
+
};
|
|
2586
|
+
|
|
2587
|
+
export function sandboxLifecycleHooksForIds(ids: string[]): SandboxLifecycleHook[] {
|
|
2588
|
+
return ids.map((id) => {
|
|
2589
|
+
const hook = builtInSandboxLifecycleHooks[id];
|
|
2590
|
+
if (!hook) {
|
|
2591
|
+
throw new Error(`Unknown sandbox lifecycle hook ${id}`);
|
|
2592
|
+
}
|
|
2593
|
+
return hook;
|
|
2594
|
+
});
|
|
2595
|
+
}
|
|
2596
|
+
|
|
2597
|
+
function applicableBeforeAgentStartHooks(
|
|
2598
|
+
hooks: SandboxLifecycleHook[],
|
|
2599
|
+
context: SandboxLifecycleHookContext,
|
|
2600
|
+
): SandboxLifecycleHook[] {
|
|
2601
|
+
return hooks.filter((hook) => hook.phase === "beforeAgentStart" && (hook.shouldRun?.(context) ?? true));
|
|
2602
|
+
}
|
|
2603
|
+
|
|
2604
|
+
/**
|
|
2605
|
+
* Run the beforeAgentStart lifecycle hooks directly against an already-live box.
|
|
2606
|
+
*
|
|
2607
|
+
* The create/resume decoration (withSandboxLifecycleHooks) is structurally blind to
|
|
2608
|
+
* the PROVIDED-session path: when runStream hands the SDK a live `session`
|
|
2609
|
+
* (runOptions.sandbox.session — the lease-owned box resolved by the turn activity),
|
|
2610
|
+
* SandboxRuntimeManager uses it as-is and never calls client.create/resume, so a
|
|
2611
|
+
* wrapper around those methods never fires. Callers on that path invoke this
|
|
2612
|
+
* before starting the run so the box still gets its beforeAgentStart preparation
|
|
2613
|
+
* (repository clone + B1 askpass/token-file seed, azure-cli-login).
|
|
2614
|
+
*/
|
|
2615
|
+
export async function runBeforeAgentStartHooks(
|
|
2616
|
+
session: SandboxSessionLike,
|
|
2617
|
+
hooks: SandboxLifecycleHook[],
|
|
2618
|
+
context: SandboxLifecycleHookContext,
|
|
2619
|
+
): Promise<void> {
|
|
2620
|
+
for (const hook of applicableBeforeAgentStartHooks(hooks, context)) {
|
|
2621
|
+
await hook.run(session, context);
|
|
2622
|
+
}
|
|
2623
|
+
}
|
|
2624
|
+
|
|
2625
|
+
export function withSandboxLifecycleHooks(
|
|
2626
|
+
client: SandboxClient,
|
|
2627
|
+
hooks: SandboxLifecycleHook[],
|
|
2628
|
+
context: SandboxLifecycleHookContext,
|
|
2629
|
+
): SandboxClient {
|
|
2630
|
+
const beforeAgentStartHooks = applicableBeforeAgentStartHooks(hooks, context);
|
|
2631
|
+
if (beforeAgentStartHooks.length === 0) {
|
|
2632
|
+
return client;
|
|
2633
|
+
}
|
|
2634
|
+
const seen = new WeakSet<object>();
|
|
2635
|
+
const wrapSession = async <T extends SandboxSessionLike>(session: T): Promise<T> => {
|
|
2636
|
+
if (typeof session === "object" && session !== null && !seen.has(session)) {
|
|
2637
|
+
for (const hook of beforeAgentStartHooks) {
|
|
2638
|
+
await hook.run(session, context);
|
|
2639
|
+
}
|
|
2640
|
+
seen.add(session);
|
|
2641
|
+
}
|
|
2642
|
+
return session;
|
|
2643
|
+
};
|
|
2644
|
+
const wrapped: SandboxClient = {
|
|
2645
|
+
backendId: client.backendId,
|
|
2646
|
+
...(client.supportsDefaultOptions !== undefined ? { supportsDefaultOptions: client.supportsDefaultOptions } : {}),
|
|
2647
|
+
...(client.create ? { create: async (...args: any[]) => await wrapSession(await (client.create as any)(...args)) } : {}),
|
|
2648
|
+
...(client.resume ? { resume: async (state: SandboxSessionState) => await wrapSession(await client.resume!(state)) } : {}),
|
|
2649
|
+
...(client.delete ? { delete: async (state: SandboxSessionState) => await client.delete!(state) } : {}),
|
|
2650
|
+
...(client.serializeSessionState ? { serializeSessionState: async (state: SandboxSessionState, options) => await client.serializeSessionState!(state, options) } : {}),
|
|
2651
|
+
...(client.canPersistOwnedSessionState ? { canPersistOwnedSessionState: async (state: SandboxSessionState) => await client.canPersistOwnedSessionState!(state) } : {}),
|
|
2652
|
+
...(client.canReusePreservedOwnedSession ? { canReusePreservedOwnedSession: async (state: SandboxSessionState) => await client.canReusePreservedOwnedSession!(state) } : {}),
|
|
2653
|
+
...(client.deserializeSessionState ? { deserializeSessionState: async (state: Record<string, unknown>) => await client.deserializeSessionState!(state) } : {}),
|
|
2654
|
+
};
|
|
2655
|
+
return wrapped;
|
|
2656
|
+
}
|
|
2657
|
+
|
|
2658
|
+
function sandboxRepositoryCloneHooksForAgent(agent: Agent<any, any>): SandboxLifecycleHook[] {
|
|
2659
|
+
return agentRepositoryCloneHooks.get(agent) ?? [];
|
|
2660
|
+
}
|
|
2661
|
+
|
|
2662
|
+
// TOKEN-BROKER (B1): the per-turn git token seed stashed for this agent (undefined
|
|
2663
|
+
// when no repo is attached / on the selfhosted path). Read into the clone hook
|
|
2664
|
+
// context at runStream so the token is seeded off-manifest.
|
|
2665
|
+
function gitTokenSeedForAgent(agent: Agent<any, any>): string | undefined {
|
|
2666
|
+
return agentGitTokenSeed.get(agent);
|
|
2667
|
+
}
|
|
2668
|
+
|
|
2669
|
+
function sandboxRepositoryCloneHooks(
|
|
2670
|
+
settings: Settings,
|
|
2671
|
+
resources: ResourceRef[],
|
|
2672
|
+
activeSandboxBackend: Settings["sandboxBackend"] = settings.sandboxBackend,
|
|
2673
|
+
): SandboxLifecycleHook[] {
|
|
2674
|
+
const repositories = resources.filter((resource): resource is Extract<ResourceRef, { kind: "repository" }> => (
|
|
2675
|
+
resource.kind === "repository" && repositoryUsesSandboxClone(settings, resource, activeSandboxBackend)
|
|
2676
|
+
));
|
|
2677
|
+
if (repositories.length === 0) {
|
|
2678
|
+
return [];
|
|
2679
|
+
}
|
|
2680
|
+
return [{
|
|
2681
|
+
id: "repository-clone",
|
|
2682
|
+
phase: "beforeAgentStart",
|
|
2683
|
+
run: async (session, context) => {
|
|
2684
|
+
await runRepositoryCloneHook(session, repositories, context);
|
|
2685
|
+
},
|
|
2686
|
+
}];
|
|
2687
|
+
}
|
|
2688
|
+
|
|
2689
|
+
/**
|
|
2690
|
+
* Whether the platform should seed a repository resource by `git clone` inside
|
|
2691
|
+
* the sandbox before the agent starts.
|
|
2692
|
+
*
|
|
2693
|
+
* SAFETY GATE (selfhosted/bring-your-own machine): the clone hook writes into
|
|
2694
|
+
* `posixPath.join("/workspace", mountPath)`, which a selfhosted agent rewrites
|
|
2695
|
+
* to a path under its REAL launch directory — so a platform-initiated clone
|
|
2696
|
+
* lands on the user's actual disk. A connected machine already owns its
|
|
2697
|
+
* filesystem; the platform must NEVER clone onto it. We therefore key the
|
|
2698
|
+
* decision off the EFFECTIVE/active backend, not just the session's HOME backend
|
|
2699
|
+
* (`settings.sandboxBackend`): a session can run on the cloud default while its
|
|
2700
|
+
* active sandbox has been swapped to a connected machine (active_sandbox_id → a
|
|
2701
|
+
* selfhosted lease), in which case the agent actually executes on the user's
|
|
2702
|
+
* machine even though the home backend is e.g. "modal". `activeSandboxBackend`
|
|
2703
|
+
* defaults to the home backend, so a session whose HOME backend is "selfhosted"
|
|
2704
|
+
* is gated with no caller change, and every cloud path is byte-for-byte
|
|
2705
|
+
* unchanged.
|
|
2706
|
+
*/
|
|
2707
|
+
export function repositoryUsesSandboxClone(
|
|
2708
|
+
settings: Settings,
|
|
2709
|
+
resource: Extract<ResourceRef, { kind: "repository" }>,
|
|
2710
|
+
activeSandboxBackend: Settings["sandboxBackend"] = settings.sandboxBackend,
|
|
2711
|
+
): boolean {
|
|
2712
|
+
if (activeSandboxBackend === "selfhosted") {
|
|
2713
|
+
return false;
|
|
2714
|
+
}
|
|
2715
|
+
return settings.sandboxBackend === "modal" || Boolean(resource.githubInstallationId && resource.githubRepositoryId);
|
|
2716
|
+
}
|
|
2717
|
+
|
|
2718
|
+
export function repositoryCloneCommand(resources: Extract<ResourceRef, { kind: "repository" }>[]): string {
|
|
2719
|
+
const commands = [
|
|
2720
|
+
"set -eu",
|
|
2721
|
+
"export HOME=\"${HOME:-/workspace}\"",
|
|
2722
|
+
"export GIT_TERMINAL_PROMPT=\"${GIT_TERMINAL_PROMPT:-0}\"",
|
|
2723
|
+
// TOKEN-BROKER (B1/B2): seed the run-scoped GitHub token into the STABLE token FILE
|
|
2724
|
+
// AND provision the git-askpass helper into the box AT SETUP (runtime) BEFORE any
|
|
2725
|
+
// clone runs, so GIT_ASKPASS points at a per-box, user-writable script that reads
|
|
2726
|
+
// that file for the fetch below. Provisioning the askpass here (rather than relying
|
|
2727
|
+
// on a baked image script at /usr/local/bin/opengeni-git-askpass) removes the
|
|
2728
|
+
// image-rebuild rollout gate: the askpass is correct on ANY box image, including
|
|
2729
|
+
// pre-existing warm boxes on their next turn's clone hook, and no product image has
|
|
2730
|
+
// to carry it. The seed rides the per-exec env (OPENGENI_GIT_TOKEN_SEED) — NEVER the
|
|
2731
|
+
// box/agent manifest (validateNoEnvironmentDelta must not see a rotating value), so
|
|
2732
|
+
// this whole block is a no-op when the seed is absent (e.g. the selfhosted path,
|
|
2733
|
+
// which uses its own git creds). The token file lives at $OPENGENI_GIT_TOKEN_FILE
|
|
2734
|
+
// (stable, from the shared base) with a $HOME/.opengeni/git-token fallback.
|
|
2735
|
+
// $GIT_ASKPASS is on the box manifest env (set by
|
|
2736
|
+
// sandboxEnvironmentForRun to $HOME/.opengeni/askpass), so it is available to this
|
|
2737
|
+
// exec; the askpass script we write is byte-identical to docker/opengeni-git-askpass
|
|
2738
|
+
// and is written via a QUOTED heredoc (<<'ASKPASS_EOF') so NOTHING inside it expands
|
|
2739
|
+
// ($1, $HOME, ${OPENGENI_GIT_TOKEN_FILE:-...}, and the literal \n in printf all land
|
|
2740
|
+
// verbatim), then chmod 0755 so git can exec it.
|
|
2741
|
+
//
|
|
2742
|
+
// ATOMIC REWRITE: this block now re-runs at the start of EVERY turn on a warm box
|
|
2743
|
+
// that other turn holders may be actively using — an in-flight `git fetch` from a
|
|
2744
|
+
// concurrent turn can invoke the askpass (which cats the token file) at any moment.
|
|
2745
|
+
// Both files are therefore written to a pid-suffixed temp under umask 077 and
|
|
2746
|
+
// renamed into place: rename is atomic, concurrent readers keep the old inode, and
|
|
2747
|
+
// the token is never observable world-readable (no post-hoc chmod window).
|
|
2748
|
+
"if [ -n \"${OPENGENI_GIT_TOKEN_SEED:-}\" ]; then",
|
|
2749
|
+
" seed_umask=\"$(umask)\"",
|
|
2750
|
+
" umask 077",
|
|
2751
|
+
" git_token_file=\"${OPENGENI_GIT_TOKEN_FILE:-$HOME/.opengeni/git-token}\"",
|
|
2752
|
+
" mkdir -p \"$(dirname \"$git_token_file\")\"",
|
|
2753
|
+
" printf '%s' \"$OPENGENI_GIT_TOKEN_SEED\" > \"$git_token_file.tmp.$$\"",
|
|
2754
|
+
" mv -f \"$git_token_file.tmp.$$\" \"$git_token_file\"",
|
|
2755
|
+
" git_askpass=\"${GIT_ASKPASS:-$HOME/.opengeni/askpass}\"",
|
|
2756
|
+
" mkdir -p \"$(dirname \"$git_askpass\")\"",
|
|
2757
|
+
" cat > \"$git_askpass.tmp.$$\" <<'ASKPASS_EOF'",
|
|
2758
|
+
"#!/usr/bin/env sh",
|
|
2759
|
+
"case \"$1\" in",
|
|
2760
|
+
" *Username*) printf '%s\\n' \"x-access-token\" ;;",
|
|
2761
|
+
" *Password*) cat \"${OPENGENI_GIT_TOKEN_FILE:-$HOME/.opengeni/git-token}\" 2>/dev/null || printf '\\n' ;;",
|
|
2762
|
+
" *) printf '\\n' ;;",
|
|
2763
|
+
"esac",
|
|
2764
|
+
"ASKPASS_EOF",
|
|
2765
|
+
" chmod 0755 \"$git_askpass.tmp.$$\"",
|
|
2766
|
+
" mv -f \"$git_askpass.tmp.$$\" \"$git_askpass\"",
|
|
2767
|
+
" umask \"$seed_umask\"",
|
|
2768
|
+
"fi",
|
|
2769
|
+
"ensure_git() {",
|
|
2770
|
+
" if command -v git >/dev/null 2>&1; then",
|
|
2771
|
+
" return 0",
|
|
2772
|
+
" fi",
|
|
2773
|
+
" if command -v apt-get >/dev/null 2>&1; then",
|
|
2774
|
+
" export DEBIAN_FRONTEND=noninteractive",
|
|
2775
|
+
" apt-get update >/dev/null",
|
|
2776
|
+
" apt-get install -y --no-install-recommends ca-certificates git >/dev/null",
|
|
2777
|
+
" rm -rf /var/lib/apt/lists/*",
|
|
2778
|
+
" command -v git >/dev/null 2>&1 && return 0",
|
|
2779
|
+
" fi",
|
|
2780
|
+
" echo \"git is not installed in the sandbox and could not be bootstrapped\" >&2",
|
|
2781
|
+
" exit 127",
|
|
2782
|
+
"}",
|
|
2783
|
+
"ensure_git",
|
|
2784
|
+
"clone_repository() {",
|
|
2785
|
+
" target=\"$1\"",
|
|
2786
|
+
" uri=\"$2\"",
|
|
2787
|
+
" ref=\"$3\"",
|
|
2788
|
+
" subpath=\"$4\"",
|
|
2789
|
+
" if [ -e \"$target\" ] && { [ -f \"$target\" ] || [ -n \"$(find \"$target\" -mindepth 1 -maxdepth 1 -print -quit 2>/dev/null)\" ]; }; then",
|
|
2790
|
+
// This hook re-runs every turn on a long-lived box, so \"non-empty\" alone is not
|
|
2791
|
+
// proof of a completed materialization: an interrupted clone (worker crash /
|
|
2792
|
+
// lifecycle timeout mid-mv/cp) leaves a partial tree that would otherwise pass
|
|
2793
|
+
// this check forever. A full-repo target must actually BE a work tree to be
|
|
2794
|
+
// skipped; a partial one is wiped and rebuilt (nothing legitimate writes under
|
|
2795
|
+
// the mount path before the repo exists). Subpath extracts are not git repos —
|
|
2796
|
+
// for those the plain non-empty check stands (no stronger signal available).
|
|
2797
|
+
" if [ -n \"$subpath\" ] || git -C \"$target\" rev-parse --is-inside-work-tree >/dev/null 2>&1; then",
|
|
2798
|
+
" echo \"Repository resource already present at $target\"",
|
|
2799
|
+
" return 0",
|
|
2800
|
+
" fi",
|
|
2801
|
+
" echo \"Re-materializing partial repository resource at $target\" >&2",
|
|
2802
|
+
" find \"$target\" -mindepth 1 -maxdepth 1 -exec rm -rf {} +",
|
|
2803
|
+
" fi",
|
|
2804
|
+
" mkdir -p \"$(dirname \"$target\")\"",
|
|
2805
|
+
" tmp=\"${target}.tmp.$$\"",
|
|
2806
|
+
" rm -rf \"$tmp\"",
|
|
2807
|
+
// Fetch failures must not leak the pid-suffixed tmp clone beside the mount
|
|
2808
|
+
// (set -eu would exit before any cleanup).
|
|
2809
|
+
" if ! { git init \"$tmp\" >/dev/null && git -C \"$tmp\" remote add origin \"$uri\" && git -C \"$tmp\" fetch --depth 1 --no-tags --filter=blob:none origin \"$ref\" && git -C \"$tmp\" checkout --detach FETCH_HEAD >/dev/null; }; then",
|
|
2810
|
+
" rm -rf \"$tmp\"",
|
|
2811
|
+
" echo \"Repository resource fetch failed for $target\" >&2",
|
|
2812
|
+
" exit 1",
|
|
2813
|
+
" fi",
|
|
2814
|
+
" if [ -n \"$subpath\" ]; then",
|
|
2815
|
+
" if [ ! -e \"$tmp/$subpath\" ]; then",
|
|
2816
|
+
" echo \"Repository subpath not found: $subpath\" >&2",
|
|
2817
|
+
" rm -rf \"$tmp\"",
|
|
2818
|
+
" exit 1",
|
|
2819
|
+
" fi",
|
|
2820
|
+
" if [ -d \"$tmp/$subpath\" ]; then",
|
|
2821
|
+
" mkdir -p \"$target\"",
|
|
2822
|
+
" cp -a \"$tmp/$subpath/.\" \"$target/\"",
|
|
2823
|
+
" else",
|
|
2824
|
+
" rmdir \"$target\" 2>/dev/null || true",
|
|
2825
|
+
" cp -a \"$tmp/$subpath\" \"$target\"",
|
|
2826
|
+
" fi",
|
|
2827
|
+
" rm -rf \"$tmp\"",
|
|
2828
|
+
" else",
|
|
2829
|
+
" rmdir \"$target\" 2>/dev/null || true",
|
|
2830
|
+
// Two concurrent turn holders can race this install: without the existence
|
|
2831
|
+
// re-check the loser's un-flagged `mv` would nest its tmp clone INSIDE the
|
|
2832
|
+
// winner's tree as <name>.tmp.<pid>. If the winner produced a valid work tree,
|
|
2833
|
+
// accept it; a non-empty non-repo survivor here is a mount point the manifest
|
|
2834
|
+
// re-filled — install into it by content copy instead of rename.
|
|
2835
|
+
" if [ -e \"$target\" ]; then",
|
|
2836
|
+
" if git -C \"$target\" rev-parse --is-inside-work-tree >/dev/null 2>&1; then",
|
|
2837
|
+
" rm -rf \"$tmp\"",
|
|
2838
|
+
" echo \"Repository resource already present at $target\"",
|
|
2839
|
+
" return 0",
|
|
2840
|
+
" fi",
|
|
2841
|
+
" cp -a \"$tmp/.\" \"$target/\"",
|
|
2842
|
+
" rm -rf \"$tmp\"",
|
|
2843
|
+
" else",
|
|
2844
|
+
" mv \"$tmp\" \"$target\"",
|
|
2845
|
+
" fi",
|
|
2846
|
+
" git -C \"$target\" rev-parse --is-inside-work-tree >/dev/null",
|
|
2847
|
+
" fi",
|
|
2848
|
+
" if [ ! -e \"$target\" ]; then",
|
|
2849
|
+
" echo \"Repository resource was not materialized at $target\" >&2",
|
|
2850
|
+
" exit 1",
|
|
2851
|
+
" fi",
|
|
2852
|
+
" echo \"Repository resource ready at $target\"",
|
|
2853
|
+
"}",
|
|
2854
|
+
];
|
|
2855
|
+
for (const resource of resources) {
|
|
2856
|
+
const url = new URL(resource.uri);
|
|
2857
|
+
const repo = url.pathname.replace(/^\/+|\/+$/g, "").replace(/\.git$/, "");
|
|
2858
|
+
const mountPath = normalizeManifestPath(resource.mountPath ?? `repos/${repo}`);
|
|
2859
|
+
commands.push([
|
|
2860
|
+
"clone_repository",
|
|
2861
|
+
shellQuote(posixPath.join("/workspace", mountPath)),
|
|
2862
|
+
shellQuote(resource.uri),
|
|
2863
|
+
shellQuote(resource.ref),
|
|
2864
|
+
shellQuote(resource.subpath ? normalizeManifestPath(resource.subpath) : ""),
|
|
2865
|
+
].join(" "));
|
|
2866
|
+
}
|
|
2867
|
+
return commands.join("\n");
|
|
2868
|
+
}
|
|
2869
|
+
|
|
2870
|
+
export async function runRepositoryCloneHook(
|
|
2871
|
+
session: SandboxSessionLike,
|
|
2872
|
+
resources: Extract<ResourceRef, { kind: "repository" }>[],
|
|
2873
|
+
context: SandboxLifecycleHookContext = { environment: {} },
|
|
2874
|
+
): Promise<void> {
|
|
2875
|
+
const payload = { name: "repository-clone", repositoryCount: resources.length };
|
|
2876
|
+
await context.onRuntimeEvent?.({ type: "sandbox.operation.started", payload });
|
|
2877
|
+
try {
|
|
2878
|
+
// TOKEN-BROKER (B1): thread the run-scoped GitHub token PER-EXEC, never on the
|
|
2879
|
+
// manifest. The SDK's ExecCommandArgs has no `environment` field (exec inherits
|
|
2880
|
+
// the box's manifest env), so we can't hand the seed through an exec option — and
|
|
2881
|
+
// we MUST NOT put it on the manifest (validateNoEnvironmentDelta would see a
|
|
2882
|
+
// rotating value). We therefore inline it as an ephemeral `export` prefix on THIS
|
|
2883
|
+
// exec's command text only: it lives in the command, not the box/agent manifest,
|
|
2884
|
+
// and never persists. The clone command's gated seed block then writes it to the
|
|
2885
|
+
// token FILE before the fetch, so GIT_ASKPASS reads it. Absent seed (e.g. the
|
|
2886
|
+
// selfhosted path) -> no prefix, the clone runs byte-for-byte as before.
|
|
2887
|
+
const command = context.gitTokenSeed
|
|
2888
|
+
? `export OPENGENI_GIT_TOKEN_SEED=${shellQuote(context.gitTokenSeed)}\n${repositoryCloneCommand(resources)}`
|
|
2889
|
+
: repositoryCloneCommand(resources);
|
|
2890
|
+
if (session.exec) {
|
|
2891
|
+
const result = await session.exec({
|
|
2892
|
+
cmd: command,
|
|
2893
|
+
workdir: "/workspace",
|
|
2894
|
+
...(context.runAs ? { runAs: context.runAs } : {}),
|
|
2895
|
+
yieldTimeMs: SANDBOX_LIFECYCLE_COMMAND_TIMEOUT_MS,
|
|
2896
|
+
maxOutputTokens: 20_000,
|
|
2897
|
+
});
|
|
2898
|
+
assertSandboxCommandSucceeded(result, "Repository clone hook");
|
|
2899
|
+
} else if (session.execCommand) {
|
|
2900
|
+
const result = await session.execCommand({
|
|
2901
|
+
cmd: command,
|
|
2902
|
+
workdir: "/workspace",
|
|
2903
|
+
...(context.runAs ? { runAs: context.runAs } : {}),
|
|
2904
|
+
yieldTimeMs: SANDBOX_LIFECYCLE_COMMAND_TIMEOUT_MS,
|
|
2905
|
+
maxOutputTokens: 20_000,
|
|
2906
|
+
});
|
|
2907
|
+
assertSandboxCommandSucceeded(result, "Repository clone hook");
|
|
2908
|
+
} else {
|
|
2909
|
+
throw new Error("Sandbox session does not support command execution");
|
|
2910
|
+
}
|
|
2911
|
+
await context.onRuntimeEvent?.({ type: "sandbox.operation.completed", payload });
|
|
2912
|
+
} catch (error) {
|
|
2913
|
+
await context.onRuntimeEvent?.({
|
|
2914
|
+
type: "sandbox.operation.failed",
|
|
2915
|
+
payload: {
|
|
2916
|
+
...payload,
|
|
2917
|
+
error: error instanceof Error ? error.message : String(error),
|
|
2918
|
+
},
|
|
2919
|
+
});
|
|
2920
|
+
throw error;
|
|
2921
|
+
}
|
|
2922
|
+
}
|
|
2923
|
+
|
|
2924
|
+
export function azureCliLoginCommand(): string {
|
|
2925
|
+
return [
|
|
2926
|
+
"export HOME=\"${HOME:-/workspace}\"",
|
|
2927
|
+
"mkdir -p \"$HOME/.azure\"",
|
|
2928
|
+
"CLIENT_ID=\"${AZURE_CLIENT_ID:-${ARM_CLIENT_ID:-}}\"",
|
|
2929
|
+
"CLIENT_SECRET=\"${AZURE_CLIENT_SECRET:-${ARM_CLIENT_SECRET:-}}\"",
|
|
2930
|
+
"TENANT_ID=\"${AZURE_TENANT_ID:-${ARM_TENANT_ID:-}}\"",
|
|
2931
|
+
"SUBSCRIPTION_ID=\"${AZURE_SUBSCRIPTION_ID:-${ARM_SUBSCRIPTION_ID:-}}\"",
|
|
2932
|
+
"if [ -n \"$CLIENT_ID\" ] && [ -n \"$CLIENT_SECRET\" ] && [ -n \"$TENANT_ID\" ]; then",
|
|
2933
|
+
" command -v az >/dev/null 2>&1 || { echo \"Azure CLI is not installed in the sandbox\" >&2; exit 127; }",
|
|
2934
|
+
" az account show --only-show-errors >/dev/null 2>&1 || az login --service-principal --username \"$CLIENT_ID\" --password \"$CLIENT_SECRET\" --tenant \"$TENANT_ID\" --allow-no-subscriptions --only-show-errors --output none",
|
|
2935
|
+
// if/fi, NOT `[ -n ] && az`: this line ends the credentialed if-body, so with a
|
|
2936
|
+
// no-subscription SP (an explicitly supported config — the login above passes
|
|
2937
|
+
// --allow-no-subscriptions) the bare `[ -n ]` would exit the whole script 1 and
|
|
2938
|
+
// fail the turn.
|
|
2939
|
+
" if [ -n \"$SUBSCRIPTION_ID\" ]; then az account set --subscription \"$SUBSCRIPTION_ID\" --only-show-errors; fi",
|
|
2940
|
+
"fi",
|
|
2941
|
+
].join("\n");
|
|
2942
|
+
}
|
|
2943
|
+
|
|
2944
|
+
export function sandboxCommandExitCode(result: unknown): number | null {
|
|
2945
|
+
if (typeof result === "string") {
|
|
2946
|
+
const match = result.match(/Process exited with code (-?\d+)/);
|
|
2947
|
+
return match ? Number(match[1]) : null;
|
|
2948
|
+
}
|
|
2949
|
+
if (!result || typeof result !== "object") {
|
|
2950
|
+
return null;
|
|
2951
|
+
}
|
|
2952
|
+
const candidate = result as {
|
|
2953
|
+
exitCode?: unknown;
|
|
2954
|
+
exit_code?: unknown;
|
|
2955
|
+
code?: unknown;
|
|
2956
|
+
status?: unknown;
|
|
2957
|
+
};
|
|
2958
|
+
for (const value of [candidate.exitCode, candidate.exit_code, candidate.code, candidate.status]) {
|
|
2959
|
+
if (typeof value === "number") {
|
|
2960
|
+
return value;
|
|
2961
|
+
}
|
|
2962
|
+
}
|
|
2963
|
+
return null;
|
|
2964
|
+
}
|
|
2965
|
+
|
|
2966
|
+
export function sandboxCommandOutput(result: unknown): string {
|
|
2967
|
+
if (!result || typeof result !== "object") {
|
|
2968
|
+
return "";
|
|
2969
|
+
}
|
|
2970
|
+
const candidate = result as {
|
|
2971
|
+
output?: unknown;
|
|
2972
|
+
stdout?: unknown;
|
|
2973
|
+
stderr?: unknown;
|
|
2974
|
+
};
|
|
2975
|
+
return [candidate.output, candidate.stderr, candidate.stdout]
|
|
2976
|
+
.filter((value): value is string => typeof value === "string" && value.length > 0)
|
|
2977
|
+
.join("\n");
|
|
2978
|
+
}
|
|
2979
|
+
|
|
2980
|
+
function assertSandboxCommandSucceeded(result: unknown, operation: string): void {
|
|
2981
|
+
const output = sandboxCommandOutput(result);
|
|
2982
|
+
if (sandboxCommandStillRunning(result)) {
|
|
2983
|
+
throw new Error(`${operation} did not finish before the lifecycle command timeout${output ? `:\n${output}` : ""}`);
|
|
2984
|
+
}
|
|
2985
|
+
const exitCode = sandboxCommandExitCode(result);
|
|
2986
|
+
if (exitCode !== null && exitCode !== 0) {
|
|
2987
|
+
throw new Error(output || `${operation} failed with exit code ${exitCode}`);
|
|
2988
|
+
}
|
|
2989
|
+
if (exitCode === null) {
|
|
2990
|
+
throw new Error(output || `${operation} did not return a command exit code`);
|
|
2991
|
+
}
|
|
2992
|
+
}
|
|
2993
|
+
|
|
2994
|
+
export function sandboxCommandStillRunning(result: unknown): boolean {
|
|
2995
|
+
if (typeof result === "string") {
|
|
2996
|
+
return /Process running with session ID \d+/u.test(result);
|
|
2997
|
+
}
|
|
2998
|
+
if (!result || typeof result !== "object") {
|
|
2999
|
+
return false;
|
|
3000
|
+
}
|
|
3001
|
+
const candidate = result as { sessionId?: unknown; session_id?: unknown };
|
|
3002
|
+
return typeof candidate.sessionId === "number" || typeof candidate.session_id === "number";
|
|
3003
|
+
}
|
|
3004
|
+
|
|
3005
|
+
function hasAzureServicePrincipal(environment: Record<string, string>): boolean {
|
|
3006
|
+
const clientId = environment.AZURE_CLIENT_ID || environment.ARM_CLIENT_ID;
|
|
3007
|
+
const clientSecret = environment.AZURE_CLIENT_SECRET || environment.ARM_CLIENT_SECRET;
|
|
3008
|
+
const tenantId = environment.AZURE_TENANT_ID || environment.ARM_TENANT_ID;
|
|
3009
|
+
return Boolean(clientId && clientSecret && tenantId);
|
|
3010
|
+
}
|
|
3011
|
+
|
|
3012
|
+
export async function runAzureCliLoginHook(
|
|
3013
|
+
session: SandboxSessionLike,
|
|
3014
|
+
context: SandboxLifecycleHookContext = { environment: {} },
|
|
3015
|
+
): Promise<void> {
|
|
3016
|
+
const payload = { name: "azure-cli-login", command: "az login --service-principal" };
|
|
3017
|
+
await context.onRuntimeEvent?.({ type: "sandbox.operation.started", payload });
|
|
3018
|
+
try {
|
|
3019
|
+
if (session.exec) {
|
|
3020
|
+
const result = await session.exec({
|
|
3021
|
+
cmd: azureCliLoginCommand(),
|
|
3022
|
+
workdir: "/workspace",
|
|
3023
|
+
...(context.runAs ? { runAs: context.runAs } : {}),
|
|
3024
|
+
yieldTimeMs: SANDBOX_LIFECYCLE_COMMAND_TIMEOUT_MS,
|
|
3025
|
+
maxOutputTokens: 20_000,
|
|
3026
|
+
});
|
|
3027
|
+
assertSandboxCommandSucceeded(result, "Azure CLI login hook");
|
|
3028
|
+
} else if (session.execCommand) {
|
|
3029
|
+
const result = await session.execCommand({
|
|
3030
|
+
cmd: azureCliLoginCommand(),
|
|
3031
|
+
workdir: "/workspace",
|
|
3032
|
+
...(context.runAs ? { runAs: context.runAs } : {}),
|
|
3033
|
+
yieldTimeMs: SANDBOX_LIFECYCLE_COMMAND_TIMEOUT_MS,
|
|
3034
|
+
maxOutputTokens: 20_000,
|
|
3035
|
+
});
|
|
3036
|
+
assertSandboxCommandSucceeded(result, "Azure CLI login hook");
|
|
3037
|
+
} else {
|
|
3038
|
+
throw new Error("Sandbox session does not support command execution");
|
|
3039
|
+
}
|
|
3040
|
+
await context.onRuntimeEvent?.({ type: "sandbox.operation.completed", payload });
|
|
3041
|
+
} catch (error) {
|
|
3042
|
+
await context.onRuntimeEvent?.({
|
|
3043
|
+
type: "sandbox.operation.failed",
|
|
3044
|
+
payload: {
|
|
3045
|
+
...payload,
|
|
3046
|
+
error: error instanceof Error ? error.message : String(error),
|
|
3047
|
+
},
|
|
3048
|
+
});
|
|
3049
|
+
throw error;
|
|
3050
|
+
}
|
|
3051
|
+
}
|
|
3052
|
+
|
|
3053
|
+
function azureDeploymentBaseUrl(settings: Settings): string {
|
|
3054
|
+
const endpoint = settings.azureOpenaiEndpoint?.replace(/\/+$/, "");
|
|
3055
|
+
if (!endpoint || !settings.azureOpenaiDeployment) {
|
|
3056
|
+
throw new Error("Azure OpenAI endpoint/deployment settings are incomplete");
|
|
3057
|
+
}
|
|
3058
|
+
return `${endpoint}/openai/deployments/${settings.azureOpenaiDeployment}`;
|
|
3059
|
+
}
|
|
3060
|
+
|
|
3061
|
+
export function azureOpenAIDefaultQuery(
|
|
3062
|
+
settings: Pick<Settings, "azureOpenaiApiVersion">,
|
|
3063
|
+
baseURL: string,
|
|
3064
|
+
): Record<string, string> | undefined {
|
|
3065
|
+
if (!settings.azureOpenaiApiVersion) return undefined;
|
|
3066
|
+
const normalized = baseURL.replace(/\/+$/, "").toLowerCase();
|
|
3067
|
+
if (normalized.endsWith("/openai/v1")) {
|
|
3068
|
+
return undefined;
|
|
3069
|
+
}
|
|
3070
|
+
return { "api-version": settings.azureOpenaiApiVersion };
|
|
3071
|
+
}
|
|
3072
|
+
|
|
3073
|
+
// Since @openai/agents 0.11.0 local sandbox sources (including the lazy
|
|
3074
|
+
// bundled-skills source) must stay within the SDK process working directory:
|
|
3075
|
+
// reads outside it require manifest.extraPathGrants, and remote sandbox
|
|
3076
|
+
// clients such as Modal reject manifests that carry extra path grants. The
|
|
3077
|
+
// packaged skills live inside the runtime package — outside the worker's cwd
|
|
3078
|
+
// in production — so stage a copy under the working directory once per
|
|
3079
|
+
// process instead of granting the packaged path.
|
|
3080
|
+
let stagedBundledSkillsDir: string | null = null;
|
|
3081
|
+
|
|
3082
|
+
function bundledSkillsDir(): string {
|
|
3083
|
+
const moduleDir = dirname(fileURLToPath(import.meta.url));
|
|
3084
|
+
const packaged = [
|
|
3085
|
+
join(moduleDir, "bundled_hashicorp_terraform_skills"),
|
|
3086
|
+
join(moduleDir, "..", "src", "bundled_hashicorp_terraform_skills"),
|
|
3087
|
+
].find((candidate) => existsSync(candidate)) ?? join(moduleDir, "bundled_hashicorp_terraform_skills");
|
|
3088
|
+
if (isPathWithin(process.cwd(), packaged)) {
|
|
3089
|
+
return packaged;
|
|
3090
|
+
}
|
|
3091
|
+
if (!stagedBundledSkillsDir) {
|
|
3092
|
+
stagedBundledSkillsDir = stageBundledSkills(packaged, join(process.cwd(), ".opengeni", "bundled_hashicorp_terraform_skills"));
|
|
3093
|
+
}
|
|
3094
|
+
return stagedBundledSkillsDir;
|
|
3095
|
+
}
|
|
3096
|
+
|
|
3097
|
+
function stageBundledSkills(packaged: string, target: string): string {
|
|
3098
|
+
const tmp = `${target}.tmp-${process.pid}`;
|
|
3099
|
+
rmSync(tmp, { recursive: true, force: true });
|
|
3100
|
+
mkdirSync(dirname(tmp), { recursive: true });
|
|
3101
|
+
cpSync(packaged, tmp, { recursive: true });
|
|
3102
|
+
rmSync(target, { recursive: true, force: true });
|
|
3103
|
+
try {
|
|
3104
|
+
renameSync(tmp, target);
|
|
3105
|
+
} catch (error) {
|
|
3106
|
+
// Another process staged the same content between our rm and rename.
|
|
3107
|
+
rmSync(tmp, { recursive: true, force: true });
|
|
3108
|
+
if (!existsSync(target)) {
|
|
3109
|
+
throw error;
|
|
3110
|
+
}
|
|
3111
|
+
}
|
|
3112
|
+
return target;
|
|
3113
|
+
}
|
|
3114
|
+
|
|
3115
|
+
function isPathWithin(root: string, candidate: string): boolean {
|
|
3116
|
+
const relativePath = relative(root, candidate);
|
|
3117
|
+
return relativePath === "" || (!relativePath.startsWith("..") && !isAbsolute(relativePath));
|
|
3118
|
+
}
|
|
3119
|
+
|
|
3120
|
+
/**
|
|
3121
|
+
* The skill source fed to the SDK Skills capability. Without pack skills this
|
|
3122
|
+
* is the plain bundled local-dir source, byte-for-byte the pre-pack behavior.
|
|
3123
|
+
* With pack skills it becomes a single in-memory dir source combining bundled
|
|
3124
|
+
* skill directories (as local_dir entries the SDK materializes lazily) with
|
|
3125
|
+
* pack skill directories built from manifest-carried file content — one skill
|
|
3126
|
+
* index, one `## Skills` instruction section, lazy `load_skill` for all of
|
|
3127
|
+
* them. A pack skill shadows a bundled skill with the same directory name.
|
|
3128
|
+
*/
|
|
3129
|
+
export function lazySkillSourceWithPackSkills(packSkills: PackSkill[]): LocalDirLazySkillSource {
|
|
3130
|
+
const bundledDir = bundledSkillsDir();
|
|
3131
|
+
const bundled = localDirLazySkillSource({ src: bundledDir });
|
|
3132
|
+
if (packSkills.length === 0) {
|
|
3133
|
+
return bundled;
|
|
3134
|
+
}
|
|
3135
|
+
const children: Record<string, Entry> = {};
|
|
3136
|
+
for (const name of bundledSkillDirNames(bundledDir)) {
|
|
3137
|
+
children[name] = localDir({ src: join(bundledDir, name) });
|
|
3138
|
+
}
|
|
3139
|
+
const packIndex: SkillIndexEntry[] = [];
|
|
3140
|
+
const packNames = new Set<string>();
|
|
3141
|
+
const packNameKeys = new Set<string>();
|
|
3142
|
+
for (const skill of packSkills) {
|
|
3143
|
+
assertSafePackSkillName(skill.name);
|
|
3144
|
+
if (packNameKeys.has(skill.name.toLowerCase())) {
|
|
3145
|
+
throw new Error(`Duplicate pack skill name: ${skill.name}`);
|
|
3146
|
+
}
|
|
3147
|
+
packNameKeys.add(skill.name.toLowerCase());
|
|
3148
|
+
packNames.add(skill.name);
|
|
3149
|
+
children[skill.name] = packSkillDirEntry(skill);
|
|
3150
|
+
packIndex.push({ name: skill.name, description: packSkillDescription(skill), path: skill.name });
|
|
3151
|
+
}
|
|
3152
|
+
return {
|
|
3153
|
+
source: dir({ children }),
|
|
3154
|
+
getIndex: (manifest, skillsPath) => [
|
|
3155
|
+
...(bundled.getIndex?.(manifest, skillsPath) ?? []).filter((entry) => !packNames.has(entry.path ?? entry.name)),
|
|
3156
|
+
...packIndex,
|
|
3157
|
+
],
|
|
3158
|
+
};
|
|
3159
|
+
}
|
|
3160
|
+
|
|
3161
|
+
function bundledSkillDirNames(root: string): string[] {
|
|
3162
|
+
return readdirSync(root, { withFileTypes: true })
|
|
3163
|
+
.filter((entry) => entry.isDirectory() && existsSync(join(root, entry.name, "SKILL.md")))
|
|
3164
|
+
.map((entry) => entry.name)
|
|
3165
|
+
.sort();
|
|
3166
|
+
}
|
|
3167
|
+
|
|
3168
|
+
type PackSkillDirNode = {
|
|
3169
|
+
dirs: Map<string, PackSkillDirNode>;
|
|
3170
|
+
files: Map<string, string>;
|
|
3171
|
+
};
|
|
3172
|
+
|
|
3173
|
+
function packSkillDirEntry(skill: PackSkill): Dir {
|
|
3174
|
+
const root: PackSkillDirNode = { dirs: new Map(), files: new Map() };
|
|
3175
|
+
for (const skillFile of skill.files) {
|
|
3176
|
+
const segments = packSkillPathSegments(skill.name, skillFile.path);
|
|
3177
|
+
let node = root;
|
|
3178
|
+
for (const segment of segments.slice(0, -1)) {
|
|
3179
|
+
if (node.files.has(segment)) {
|
|
3180
|
+
throw new Error(`Pack skill ${skill.name} uses ${segment} as both a file and a directory`);
|
|
3181
|
+
}
|
|
3182
|
+
let next = node.dirs.get(segment);
|
|
3183
|
+
if (!next) {
|
|
3184
|
+
next = { dirs: new Map(), files: new Map() };
|
|
3185
|
+
node.dirs.set(segment, next);
|
|
3186
|
+
}
|
|
3187
|
+
node = next;
|
|
3188
|
+
}
|
|
3189
|
+
const filename = segments[segments.length - 1]!;
|
|
3190
|
+
if (node.dirs.has(filename) || node.files.has(filename)) {
|
|
3191
|
+
throw new Error(`Duplicate pack skill file path in ${skill.name}: ${skillFile.path}`);
|
|
3192
|
+
}
|
|
3193
|
+
node.files.set(filename, skillFile.content);
|
|
3194
|
+
}
|
|
3195
|
+
if (!root.files.has("SKILL.md")) {
|
|
3196
|
+
throw new Error(`Pack skill ${skill.name} is missing a top-level SKILL.md file`);
|
|
3197
|
+
}
|
|
3198
|
+
return packSkillDirFromNode(root);
|
|
3199
|
+
}
|
|
3200
|
+
|
|
3201
|
+
function packSkillDirFromNode(node: PackSkillDirNode): Dir {
|
|
3202
|
+
const children: Record<string, Entry> = {};
|
|
3203
|
+
for (const [name, child] of node.dirs) {
|
|
3204
|
+
children[name] = packSkillDirFromNode(child);
|
|
3205
|
+
}
|
|
3206
|
+
for (const [name, content] of node.files) {
|
|
3207
|
+
children[name] = file({ content });
|
|
3208
|
+
}
|
|
3209
|
+
return dir({ children });
|
|
3210
|
+
}
|
|
3211
|
+
|
|
3212
|
+
function assertSafePackSkillName(name: string): void {
|
|
3213
|
+
if (packSkillPathSegments(name, name).length !== 1) {
|
|
3214
|
+
throw new Error(`Invalid pack skill name: ${name}`);
|
|
3215
|
+
}
|
|
3216
|
+
}
|
|
3217
|
+
|
|
3218
|
+
function packSkillPathSegments(skillName: string, path: string): string[] {
|
|
3219
|
+
const segments = path.split("/");
|
|
3220
|
+
if (path.startsWith("/") || path.includes("\\") || segments.some((segment) => segment.length === 0 || segment === "." || segment === "..")) {
|
|
3221
|
+
throw new Error(`Invalid pack skill file path for ${skillName}: ${path}`);
|
|
3222
|
+
}
|
|
3223
|
+
return segments;
|
|
3224
|
+
}
|
|
3225
|
+
|
|
3226
|
+
function packSkillDescription(skill: PackSkill): string {
|
|
3227
|
+
const explicit = skill.description?.trim();
|
|
3228
|
+
if (explicit) {
|
|
3229
|
+
return explicit;
|
|
3230
|
+
}
|
|
3231
|
+
const markdown = skill.files.find((skillFile) => skillFile.path === "SKILL.md")?.content ?? "";
|
|
3232
|
+
return skillFrontmatterDescription(markdown) ?? "No description provided.";
|
|
3233
|
+
}
|
|
3234
|
+
|
|
3235
|
+
function skillFrontmatterDescription(markdown: string): string | null {
|
|
3236
|
+
const lines = markdown.split(/\r?\n/);
|
|
3237
|
+
if (lines[0]?.trim() !== "---") {
|
|
3238
|
+
return null;
|
|
3239
|
+
}
|
|
3240
|
+
const end = lines.findIndex((line, index) => index > 0 && line.trim() === "---");
|
|
3241
|
+
if (end === -1) {
|
|
3242
|
+
return null;
|
|
3243
|
+
}
|
|
3244
|
+
const collected: string[] = [];
|
|
3245
|
+
let inDescription = false;
|
|
3246
|
+
for (const line of lines.slice(1, end)) {
|
|
3247
|
+
const match = line.match(/^description:\s*(.*)$/);
|
|
3248
|
+
if (match) {
|
|
3249
|
+
const inline = match[1]!.trim();
|
|
3250
|
+
if (inline && inline !== ">-" && inline !== ">" && inline !== "|" && inline !== "|-") {
|
|
3251
|
+
return unquoteFrontmatterValue(inline);
|
|
3252
|
+
}
|
|
3253
|
+
inDescription = true;
|
|
3254
|
+
continue;
|
|
3255
|
+
}
|
|
3256
|
+
if (inDescription) {
|
|
3257
|
+
if (/^\s+\S/.test(line)) {
|
|
3258
|
+
collected.push(line.trim());
|
|
3259
|
+
continue;
|
|
3260
|
+
}
|
|
3261
|
+
break;
|
|
3262
|
+
}
|
|
3263
|
+
}
|
|
3264
|
+
const blockValue = collected.join(" ").trim();
|
|
3265
|
+
return blockValue ? blockValue : null;
|
|
3266
|
+
}
|
|
3267
|
+
|
|
3268
|
+
function unquoteFrontmatterValue(value: string): string {
|
|
3269
|
+
if (value.length >= 2 && value[0] === value[value.length - 1] && (value[0] === '"' || value[0] === "'")) {
|
|
3270
|
+
return value.slice(1, -1);
|
|
3271
|
+
}
|
|
3272
|
+
return value;
|
|
3273
|
+
}
|
|
3274
|
+
|
|
3275
|
+
function stringValue(value: unknown): string | undefined {
|
|
3276
|
+
return typeof value === "string" && value.trim() ? value.trim() : undefined;
|
|
3277
|
+
}
|
|
3278
|
+
|
|
3279
|
+
function isAsyncIterable<T>(source: Iterable<T> | AsyncIterable<T>): source is AsyncIterable<T> {
|
|
3280
|
+
return typeof (source as AsyncIterable<T>)[Symbol.asyncIterator] === "function";
|
|
3281
|
+
}
|
|
3282
|
+
|
|
3283
|
+
function stableJson(value: unknown): string {
|
|
3284
|
+
return JSON.stringify(sortJson(value));
|
|
3285
|
+
}
|
|
3286
|
+
|
|
3287
|
+
function sortJson(value: unknown): unknown {
|
|
3288
|
+
if (Array.isArray(value)) {
|
|
3289
|
+
return value.map(sortJson);
|
|
3290
|
+
}
|
|
3291
|
+
if (value && typeof value === "object") {
|
|
3292
|
+
return Object.fromEntries(Object.entries(value).sort(([a], [b]) => a.localeCompare(b)).map(([key, nested]) => [key, sortJson(nested)]));
|
|
3293
|
+
}
|
|
3294
|
+
return value;
|
|
3295
|
+
}
|
|
3296
|
+
|
|
3297
|
+
function approvalIdentifier(item: any): string {
|
|
3298
|
+
return String(item?.rawItem?.callId ?? item?.rawItem?.id ?? item?.id ?? item?.name ?? "approval");
|
|
3299
|
+
}
|