@opengeni/runtime 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/chunk-2PO56VAL.js +3478 -0
  2. package/dist/chunk-2PO56VAL.js.map +1 -0
  3. package/dist/index.d.ts +912 -0
  4. package/dist/index.js +3663 -0
  5. package/dist/index.js.map +1 -0
  6. package/dist/sandbox/index.d.ts +1738 -0
  7. package/dist/sandbox/index.js +187 -0
  8. package/dist/sandbox/index.js.map +1 -0
  9. package/package.json +49 -0
  10. package/src/bundled_hashicorp_terraform_skills/LICENSE +373 -0
  11. package/src/bundled_hashicorp_terraform_skills/README.md +18 -0
  12. package/src/bundled_hashicorp_terraform_skills/UPSTREAM_GIT_SHA +1 -0
  13. package/src/bundled_hashicorp_terraform_skills/azure-verified-modules/SKILL.md +613 -0
  14. package/src/bundled_hashicorp_terraform_skills/checkov/SKILL.md +43 -0
  15. package/src/bundled_hashicorp_terraform_skills/refactor-module/SKILL.md +538 -0
  16. package/src/bundled_hashicorp_terraform_skills/social-media-marketing/SKILL.md +35 -0
  17. package/src/bundled_hashicorp_terraform_skills/terraform-search-import/SKILL.md +372 -0
  18. package/src/bundled_hashicorp_terraform_skills/terraform-search-import/references/MANUAL-IMPORT.md +113 -0
  19. package/src/bundled_hashicorp_terraform_skills/terraform-search-import/scripts/list_resources.sh +38 -0
  20. package/src/bundled_hashicorp_terraform_skills/terraform-stacks/SKILL.md +480 -0
  21. package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/api-monitoring.md +543 -0
  22. package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/component-blocks.md +476 -0
  23. package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/deployment-blocks.md +391 -0
  24. package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/examples.md +1529 -0
  25. package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/linked-stacks.md +187 -0
  26. package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/troubleshooting.md +671 -0
  27. package/src/bundled_hashicorp_terraform_skills/terraform-style-guide/SKILL.md +353 -0
  28. package/src/bundled_hashicorp_terraform_skills/terraform-test/SKILL.md +451 -0
  29. package/src/bundled_hashicorp_terraform_skills/terraform-test/references/CI_CD.md +80 -0
  30. package/src/bundled_hashicorp_terraform_skills/terraform-test/references/EXAMPLES.md +314 -0
  31. package/src/bundled_hashicorp_terraform_skills/terraform-test/references/MOCK_PROVIDERS.md +171 -0
  32. package/src/codex-tool-search.ts +267 -0
  33. package/src/context-compaction.ts +538 -0
  34. package/src/history-sanitizer.ts +719 -0
  35. package/src/index.ts +3299 -0
  36. package/src/sandbox/capabilities.ts +69 -0
  37. package/src/sandbox/channel-a.ts +1031 -0
  38. package/src/sandbox/display-stack.ts +231 -0
  39. package/src/sandbox/errors.ts +34 -0
  40. package/src/sandbox/index.ts +832 -0
  41. package/src/sandbox/providers/blaxel.ts +35 -0
  42. package/src/sandbox/providers/cloudflare.ts +24 -0
  43. package/src/sandbox/providers/daytona.ts +34 -0
  44. package/src/sandbox/providers/docker.ts +17 -0
  45. package/src/sandbox/providers/e2b.ts +36 -0
  46. package/src/sandbox/providers/index.ts +107 -0
  47. package/src/sandbox/providers/local.ts +13 -0
  48. package/src/sandbox/providers/modal.ts +55 -0
  49. package/src/sandbox/providers/none.ts +13 -0
  50. package/src/sandbox/providers/runloop.ts +32 -0
  51. package/src/sandbox/providers/selfhosted.ts +96 -0
  52. package/src/sandbox/providers/types.ts +38 -0
  53. package/src/sandbox/providers/vercel.ts +29 -0
  54. package/src/sandbox/recording.ts +286 -0
  55. package/src/sandbox/routing/backend-resolver.ts +189 -0
  56. package/src/sandbox/routing/routing-session.ts +455 -0
  57. package/src/sandbox/select.ts +371 -0
  58. package/src/sandbox/selfhosted/capabilities.ts +255 -0
  59. package/src/sandbox/selfhosted/control-rpc.ts +351 -0
  60. package/src/sandbox/selfhosted/session.ts +930 -0
  61. package/src/sandbox/selfhosted/testing.ts +230 -0
  62. package/src/sandbox/stream-port.ts +185 -0
  63. package/src/sandbox/stream-token.ts +90 -0
  64. package/src/sandbox/terminal-server.ts +203 -0
  65. package/src/sandbox-computer.ts +835 -0
package/src/index.ts ADDED
@@ -0,0 +1,3299 @@
1
+ import type { ConfiguredModel, ContextCompactionMode, ModelProviderApi, ResolvedModelProvider, Settings } from "@opengeni/config";
2
+ import { AGENT_INSTRUCTIONS_CORE_PLACEHOLDER, collectSandboxEnvironment, contextServerCompactThreshold, firstPartyMcpBaseUrl, parseExposedPorts, resolveContextCompactionMode, resolveModelProvider, sandboxLifecycleHookIds } from "@opengeni/config";
3
+ import { CAPABILITY_DESCRIPTORS, isClearedRunStateBlob, signDelegatedAccessToken, type Permission, type ReasoningEffort, type ResourceRef, type SessionEventType, type ToolRef } from "@opengeni/contracts";
4
+ import {
5
+ Agent,
6
+ AgentsError,
7
+ connectMcpServers,
8
+ OpenAIProvider,
9
+ setDefaultModelProvider,
10
+ MaxTurnsExceededError,
11
+ MCPServerStreamableHttp,
12
+ // Provider-bound Model instances. Both are re-exported from
13
+ // @openai/agents-openai via `export * from '@openai/agents-openai'` in
14
+ // @openai/agents' index (0.11.6), so the multi-provider routing imports them
15
+ // from the same entrypoint as the rest of the SDK rather than reaching into
16
+ // the openai subpackage. OpenAIChatCompletionsModel speaks /v1/chat/completions
17
+ // (the registry "chat" wire API, e.g. Fireworks); OpenAIResponsesModel speaks
18
+ // /v1/responses (the built-in OpenAI/Azure "responses" wire API). Both bind a
19
+ // model id to a specific OpenAI client, which is what routes a turn to its
20
+ // provider without touching the global default client.
21
+ OpenAIChatCompletionsModel,
22
+ OpenAIResponsesModel,
23
+ RunState,
24
+ isOpenAIResponsesRawModelStreamEvent,
25
+ run,
26
+ Runner,
27
+ setDefaultOpenAIClient,
28
+ setDefaultOpenAIKey,
29
+ setOpenAIResponsesTransport,
30
+ // Hosted web_search tool factory. Re-exported from @openai/agents-openai via
31
+ // `export * from '@openai/agents-openai'` in @openai/agents' index (0.11.6);
32
+ // it returns a { type: 'hosted_tool', providerData: { type: 'web_search' } }
33
+ // descriptor the OpenAI Responses model serializes into request.tools[].
34
+ webSearchTool,
35
+ // The SDK's V4A-diff applier — the apply_patch host the filesystem capability's
36
+ // editor uses. The agent-loop-free sandbox leaf cannot import it (it lives behind
37
+ // the `@openai/agents` root the leaf forbids), so the barrel imports it here and
38
+ // injects it into the selfhosted session's `createEditor` via setSelfhostedApplyDiff
39
+ // (below, right after the leaf re-export). This lets a selfhosted active backend
40
+ // apply file edits over its NATS fs ops using the SDK's exact diff semantics.
41
+ applyDiff,
42
+ type AgentInputItem,
43
+ type CallModelInputFilter,
44
+ type MCPServer,
45
+ type Model,
46
+ type ModelProvider,
47
+ type RunStreamEvent,
48
+ } from "@openai/agents";
49
+ import {
50
+ localDirLazySkillSource,
51
+ } from "@openai/agents/sandbox/local";
52
+ import {
53
+ Capabilities,
54
+ Manifest,
55
+ SandboxAgent,
56
+ StaticCompactionPolicy,
57
+ azureBlobMount,
58
+ compaction,
59
+ dir,
60
+ file,
61
+ filesystem,
62
+ gitRepo,
63
+ inContainerMountStrategy,
64
+ localDir,
65
+ s3Mount,
66
+ shell,
67
+ skills,
68
+ type Dir,
69
+ type Entry,
70
+ type LocalDirLazySkillSource,
71
+ type SandboxClient,
72
+ type SandboxSessionLike,
73
+ type SandboxSessionState,
74
+ type SandboxRunConfig,
75
+ type SkillIndexEntry,
76
+ } from "@openai/agents/sandbox";
77
+ import { ModalCloudBucketMountStrategy } from "@openai/agents-extensions/sandbox/modal";
78
+ import OpenAI from "openai";
79
+ import { CODEX_APPS_MCP_SERVER_ID, CODEX_MODEL_ID_PREFIX, CODEX_ORIGINATOR, codexAppsSanitizingFetch, codexRequestStorage, codexSubscriptionFetch } from "@opengeni/codex";
80
+ import { cpSync, existsSync, mkdirSync, readdirSync, renameSync, rmSync } from "node:fs";
81
+ import { dirname, isAbsolute, join, posix as posixPath, relative } from "node:path";
82
+ import { fileURLToPath } from "node:url";
83
+
84
+ import { computerCallNormalizingFetch, normalizeComputerCallActions, sanitizeHistoryItemsForModel } from "./history-sanitizer";
85
+ import { installCodexToolSearch } from "./codex-tool-search";
86
+ import { enforceInputBudget, estimateItemTokens } from "./context-compaction";
87
+ import {
88
+ createSandboxClient,
89
+ deserializeSandboxSessionStateEnvelope,
90
+ desktopCapableBackend,
91
+ restoredSandboxSessionStateFromEntry,
92
+ setSelfhostedApplyDiff,
93
+ } from "./sandbox";
94
+ import { computerUse } from "./sandbox-computer";
95
+
96
+ // P4.3 computer-use surface (the agent's :0 driver). Re-exported from the barrel
97
+ // so callers (the worker, live proofs) reach SandboxComputer/ComputerUseCapability
98
+ // alongside the rest of the runtime. NOT part of the agent-loop-free leaf (it
99
+ // imports computerTool from the @openai/agents root).
100
+ export {
101
+ SandboxComputer,
102
+ ComputerUseCapability,
103
+ computerUse,
104
+ ComputerUnavailableError,
105
+ ComputerReadOnlyError,
106
+ ComputerActionError,
107
+ type SandboxComputerOptions,
108
+ type ComputerUseArgs,
109
+ } from "./sandbox-computer";
110
+
111
+ // The agent-loop-free sandbox leaf (createSandboxClient + resume/recovery
112
+ // helpers + the config-owned env/port re-exports). Re-exported verbatim so the
113
+ // barrel surface is unchanged for apps/worker while @opengeni/runtime/sandbox
114
+ // stays importable by the API without the agent loop.
115
+ export * from "./sandbox";
116
+
117
+ // Inject the SDK's V4A `applyDiff` into the selfhosted session's apply_patch editor
118
+ // at module load. The leaf can't import `applyDiff` (agent-loop root), so the
119
+ // barrel — which already imports `@openai/agents` — wires it once. A selfhosted
120
+ // active backend can now apply file edits over its NATS fs ops with the SDK's exact
121
+ // diff semantics; without this, `createEditor()` throws a clear "not injected" error
122
+ // rather than mis-editing. Runs at import time, before any turn binds a capability.
123
+ setSelfhostedApplyDiff(applyDiff as unknown as (input: string, diff: string, mode?: "default" | "create") => string);
124
+
125
+ export { sanitizeHistoryItemsForModel, stripReasoningEncryptedContent, stripReasoningIdentityFromSerializedRunState, neutralizeToolSearchItemsInSerializedRunState } from "./history-sanitizer";
126
+ export type { HistoryItem } from "./history-sanitizer";
127
+
128
+ // The provider-bound Model classes used by buildModelInstance/resolveTurnModel.
129
+ // Re-exported so callers (and routing tests) can assert which wire API a
130
+ // resolved turn was bound to — OpenAIChatCompletionsModel for registry "chat"
131
+ // providers (Fireworks), OpenAIResponsesModel for the built-in "responses" path
132
+ // — without reaching into @openai/agents directly.
133
+ export { OpenAIChatCompletionsModel, OpenAIResponsesModel } from "@openai/agents";
134
+
135
+ export {
136
+ planCompaction,
137
+ enforceInputBudget,
138
+ buildSummaryItem,
139
+ buildCompactionMessages,
140
+ isCompactionSummary,
141
+ isUserMessage,
142
+ findKeepBoundary,
143
+ estimateTokens,
144
+ estimateItemTokens,
145
+ compactionSummaryText,
146
+ renderPrefixTranscript,
147
+ COMPACTION_SUMMARY_MARKER,
148
+ SUMMARY_PREFIX,
149
+ SUMMARY_INSTRUCTIONS,
150
+ } from "./context-compaction";
151
+ export type { CompactionItem, CompactionPlan, PlanCompactionInput } from "./context-compaction";
152
+
153
+ ensureReadableStreamFrom();
154
+
155
+ const SANDBOX_LIFECYCLE_COMMAND_TIMEOUT_MS = 120_000;
156
+
157
+ export type NormalizedRuntimeEvent = {
158
+ type: SessionEventType;
159
+ payload: unknown;
160
+ };
161
+
162
+ export type ModelResponseUsage = {
163
+ responseId?: string;
164
+ usage: {
165
+ inputTokens?: number;
166
+ outputTokens?: number;
167
+ totalTokens?: number;
168
+ inputTokensDetails?: Record<string, number> | Array<Record<string, number>>;
169
+ };
170
+ };
171
+
172
+ type RuntimeMcpTool = Awaited<ReturnType<MCPServer["listTools"]>>[number];
173
+
174
+ export function ensureReadableStreamFrom(): void {
175
+ const ctor = globalThis.ReadableStream as (typeof ReadableStream & {
176
+ from?: <T>(source: Iterable<T> | AsyncIterable<T>) => ReadableStream<T>;
177
+ }) | undefined;
178
+ if (!ctor || typeof ctor.from === "function") {
179
+ return;
180
+ }
181
+ Object.defineProperty(ctor, "from", {
182
+ configurable: true,
183
+ writable: true,
184
+ value<T>(source: Iterable<T> | AsyncIterable<T>): ReadableStream<T> {
185
+ const iterator = isAsyncIterable(source)
186
+ ? source[Symbol.asyncIterator]()
187
+ : source[Symbol.iterator]();
188
+ return new ReadableStream<T>({
189
+ async pull(controller) {
190
+ const next = await iterator.next();
191
+ if (next.done) {
192
+ controller.close();
193
+ } else {
194
+ controller.enqueue(next.value);
195
+ }
196
+ },
197
+ async cancel() {
198
+ await iterator.return?.();
199
+ },
200
+ });
201
+ },
202
+ });
203
+ }
204
+
205
+ export type AgentSegmentInput =
206
+ | {
207
+ kind: "message";
208
+ text: string;
209
+ serializedRunState?: string | null;
210
+ // Items-mode conversation truth (issue #35): when provided, turn input is
211
+ // built from these verbatim AgentInputItems and the stored sandbox
212
+ // envelope — no RunState deserialization, no SDK-version coupling.
213
+ historyItems?: AgentInputItem[] | null;
214
+ sandboxEnvelope?: Record<string, unknown> | null;
215
+ }
216
+ | { kind: "approval"; serializedRunState: string; approvalId: string; decision: "approve" | "reject"; message?: string };
217
+
218
+ export type PreparedAgentInput = {
219
+ input: string | AgentInputItem[] | RunState<any, any>;
220
+ sandboxSessionState?: SandboxSessionState;
221
+ serializedRunStateForSandbox?: string;
222
+ };
223
+
224
+ export type SandboxFileDownload = {
225
+ fileId: string;
226
+ mountPath: string;
227
+ filename: string;
228
+ url?: string;
229
+ content?: Uint8Array;
230
+ expiresAt?: Date | string;
231
+ sizeBytes?: number;
232
+ };
233
+
234
+ export type OpenGeniRuntime = {
235
+ configure: (settings: Settings) => void;
236
+ // Multi-provider per-turn model routing. Returns the resolved provider, its
237
+ // (cached) client, the provider-bound Model instance, and the configured-model
238
+ // shape; null when the turn's model is not in the registry, so the caller
239
+ // falls back to the legacy global-client path (settings.openaiModel).
240
+ resolveTurnModel: (settings: Settings, modelId: string) => ReturnType<typeof resolveTurnModel>;
241
+ buildAgent: (settings: Settings, resources: ResourceRef[], options?: BuildAgentOptions) => Agent<any, any>;
242
+ prepareTools: (settings: Settings, tools: ToolRef[], options?: PrepareToolsOptions) => Promise<PreparedAgentTools>;
243
+ prepareInput: (agent: Agent<any, any>, input: AgentSegmentInput, options?: PrepareInputOptions) => Promise<PreparedAgentInput>;
244
+ runStream: (agent: Agent<any, any>, input: PreparedAgentInput, settings: Settings, options?: RunAgentStreamOptions) => Promise<Awaited<ReturnType<typeof runAgentStream>>>;
245
+ serializeApprovals: (interruptions: unknown[]) => unknown[];
246
+ };
247
+
248
+ export type ProductionRuntimeOverrides = {
249
+ model?: Model;
250
+ sandboxClient?: unknown;
251
+ };
252
+
253
+ export function createProductionAgentRuntime(overrides: ProductionRuntimeOverrides = {}): OpenGeniRuntime {
254
+ return {
255
+ configure: configureOpenAI,
256
+ // A test/override model shadows the registry routing entirely (the scripted
257
+ // model used in worker tests is not in any provider's allow-list), so when
258
+ // one is supplied resolveTurnModel reports "no resolution" and the caller
259
+ // keeps the legacy global-client path with the override model.
260
+ resolveTurnModel: (settings, modelId) => (overrides.model ? null : resolveTurnModel(settings, modelId)),
261
+ buildAgent: (settings, resources, options) => buildOpenGeniAgent(settings, resources, {
262
+ ...options,
263
+ ...(overrides.model ? { model: overrides.model } : {}),
264
+ }),
265
+ prepareTools: prepareAgentTools,
266
+ prepareInput: prepareRunInput,
267
+ runStream: async (agent, input, settings, options) => await runAgentStream(agent, input, settings, {
268
+ ...options,
269
+ sandboxClient: overrides.sandboxClient,
270
+ }),
271
+ serializeApprovals,
272
+ };
273
+ }
274
+
275
+ /**
276
+ * Build an OpenAI client from settings for the configured provider. Mirrors the
277
+ * client construction in configureOpenAI so a direct API call (the compaction
278
+ * summarizer) uses the same Azure/OpenAI auth and base URL. Returns null when
279
+ * the OpenAI-platform path has only a key (the SDK default client is used via
280
+ * setDefaultOpenAIKey there); the caller then constructs a key-only client.
281
+ */
282
+ export function buildOpenAIClientFromSettings(settings: Settings): OpenAI {
283
+ if (settings.openaiProvider === "azure") {
284
+ const baseURL = settings.azureOpenaiBaseUrl ?? azureDeploymentBaseUrl(settings);
285
+ const apiKey = settings.azureOpenaiApiKey ?? settings.azureOpenaiAdToken ?? "azure-ad-token";
286
+ return new OpenAI({
287
+ apiKey,
288
+ baseURL,
289
+ maxRetries: settings.openaiMaxRetries,
290
+ defaultQuery: azureOpenAIDefaultQuery(settings, baseURL),
291
+ defaultHeaders: settings.azureOpenaiAdToken && !settings.azureOpenaiApiKey
292
+ ? { Authorization: `Bearer ${settings.azureOpenaiAdToken}` }
293
+ : undefined,
294
+ // Rewrite every outbound /responses computer_call to the ACTIONS-ONLY shape
295
+ // the GA Azure computer tool (gpt-5.5) accepts. This is the lowest reachable
296
+ // seam — below the SDK responses converter, which always re-synthesizes BOTH
297
+ // `action` and `actions` (rejected 400 "exactly one of action or actions").
298
+ // See computerCallNormalizingFetch / rewriteComputerCallsToActionsOnly.
299
+ fetch: computerCallNormalizingFetch(globalThis.fetch),
300
+ });
301
+ }
302
+ return new OpenAI({
303
+ apiKey: settings.openaiApiKey ?? process.env.OPENAI_API_KEY,
304
+ ...(settings.openaiBaseUrl ? { baseURL: settings.openaiBaseUrl } : {}),
305
+ maxRetries: settings.openaiMaxRetries,
306
+ });
307
+ }
308
+
309
+ /**
310
+ * One OpenAI client per resolved provider id, built lazily and cached for the
311
+ * process. The built-in openai/azure provider reuses
312
+ * buildOpenAIClientFromSettings verbatim (so its Azure AD/api-version/base-URL
313
+ * construction stays byte-for-byte identical to configureOpenAI); a registry
314
+ * provider gets a plain client pointed at its base URL with its resolved key,
315
+ * the shared maxRetries budget, and its declared defaultQuery/defaultHeaders.
316
+ * Caching by provider.id keeps concurrent multi-provider turns sharing one
317
+ * connection pool per provider rather than reconstructing a client per turn.
318
+ */
319
+ const providerClientCache = new Map<string, OpenAI>();
320
+
321
+ export function buildProviderClient(provider: ResolvedModelProvider, settings: Settings): OpenAI {
322
+ const cached = providerClientCache.get(provider.id);
323
+ if (cached) {
324
+ return cached;
325
+ }
326
+ const client = provider.builtin
327
+ ? buildOpenAIClientFromSettings(settings)
328
+ : provider.kind === "codex-subscription"
329
+ // Codex subscription: the static apiKey is a placeholder — the real per-request
330
+ // bearer + ChatGPT-Account-ID, the /responses->/codex/responses rewrite, and the
331
+ // body normalization are all injected by codexSubscriptionFetch, which reads the
332
+ // per-workspace token from codexRequestStorage (AsyncLocalStorage) at call time.
333
+ // The provider id is constant ("codex-subscription"), so one cached client serves
334
+ // every workspace without baking a token into it.
335
+ ? new OpenAI({
336
+ apiKey: provider.apiKey ?? "codex-subscription",
337
+ ...(provider.baseUrl ? { baseURL: provider.baseUrl } : {}),
338
+ maxRetries: settings.openaiMaxRetries,
339
+ fetch: codexSubscriptionFetch(globalThis.fetch),
340
+ })
341
+ // ResolvedModelProvider.apiKey is already the resolved key (configuredProviders
342
+ // ran resolveProviderApiKey at config time, collapsing apiKey/apiKeyEnv), so it
343
+ // is passed straight through here rather than re-resolved.
344
+ : new OpenAI({
345
+ ...(provider.apiKey ? { apiKey: provider.apiKey } : {}),
346
+ ...(provider.baseUrl ? { baseURL: provider.baseUrl } : {}),
347
+ maxRetries: settings.openaiMaxRetries,
348
+ ...(provider.defaultQuery ? { defaultQuery: provider.defaultQuery } : {}),
349
+ ...(provider.defaultHeaders ? { defaultHeaders: provider.defaultHeaders } : {}),
350
+ });
351
+ providerClientCache.set(provider.id, client);
352
+ return client;
353
+ }
354
+
355
+ /**
356
+ * Bind a model id to a provider's OpenAI client as an @openai/agents `Model`
357
+ * instance, choosing the wire API by the provider's declared `api`: the "chat"
358
+ * providers (e.g. Fireworks) get an OpenAIChatCompletionsModel that speaks
359
+ * /v1/chat/completions, the "responses" providers (built-in OpenAI/Azure) get
360
+ * an OpenAIResponsesModel that speaks /v1/responses. Passing this Model into
361
+ * the agent is what routes a turn to its provider without mutating the global
362
+ * default client.
363
+ */
364
+ export function buildModelInstance(provider: ResolvedModelProvider, client: OpenAI, modelId: string): Model {
365
+ return provider.api === "chat"
366
+ ? new OpenAIChatCompletionsModel(client, modelId)
367
+ : new OpenAIResponsesModel(client, modelId);
368
+ }
369
+
370
+ /**
371
+ * Resolved per-turn model routing: the provider that serves `modelId`, its
372
+ * (cached) OpenAI client, the provider-bound `Model` instance, and the
373
+ * configured-model shape (label/api/contextWindow/reasoningEffort/hostedWebSearch).
374
+ * Returns null when the model is not in the registry — the caller then falls
375
+ * back to the legacy global-client path (settings.openaiModel + the default
376
+ * client configured by configureOpenAI), preserved byte-for-byte.
377
+ */
378
+ export function resolveTurnModel(
379
+ settings: Settings,
380
+ modelId: string,
381
+ ): { provider: ResolvedModelProvider; client: OpenAI; model: Model; configured: ConfiguredModel } | null {
382
+ const resolved = resolveModelProvider(settings, modelId);
383
+ if (!resolved) {
384
+ return null;
385
+ }
386
+ const client = buildProviderClient(resolved.provider, settings);
387
+ return {
388
+ provider: resolved.provider,
389
+ client,
390
+ model: buildModelInstance(resolved.provider, client, resolved.model.id),
391
+ configured: resolved.model,
392
+ };
393
+ }
394
+
395
+ /**
396
+ * Routes a model *name* to its provider-bound Model (Fireworks chat model for a
397
+ * registry model id, the built-in OpenAI/Azure responses model otherwise) via
398
+ * `resolveTurnModel`. This is the load-bearing piece for the sandbox path:
399
+ * passing a Model *instance* as `agent.model` only survives the in-process
400
+ * (`sandboxBackend: "none"`) run — on the SandboxAgent/Modal path the instance
401
+ * is dropped and the model *name* is re-resolved through the run's
402
+ * `modelProvider` (or the global default). Without this router that re-resolution
403
+ * hits the default client (e.g. Azure) and a registry model 404s
404
+ * ("deployment does not exist"); with it the name resolves back to the right
405
+ * provider. Installed both as the run-scoped `Runner.config.modelProvider` (every
406
+ * run in runAgentStream goes through `runScopedRunner(settings)`, built from the
407
+ * per-turn settings) and as the process default (see configureOpenAI). The
408
+ * run-scoped instance is the load-bearing one: a `Runner` resolves string model
409
+ * names against ITS OWN modelProvider, not the lazy global default, so each
410
+ * concurrent turn routes codex/registry names against its own settings and a
411
+ * foreign turn's setDefaultModelProvider can never clobber this turn's routing.
412
+ * The process default remains only as a boot-time fallback. Falls back to the
413
+ * SDK default provider for a model that is in no provider's allow-list.
414
+ */
415
+ export class MultiProviderModelProvider implements ModelProvider {
416
+ private fallback: OpenAIProvider | undefined;
417
+
418
+ constructor(private readonly settings: Settings) {}
419
+
420
+ async getModel(modelName?: string): Promise<Model> {
421
+ if (modelName) {
422
+ const resolved = resolveTurnModel(this.settings, modelName);
423
+ if (resolved) {
424
+ // Fail-loud floor (defense in depth): a `codex/<slug>` id must only ever
425
+ // resolve through the synthetic codex-subscription provider (which installs
426
+ // fetch: codexSubscriptionFetch + the per-workspace bearer). If a future
427
+ // settings path re-introduces a built-in/registry shadow that binds a
428
+ // `codex/` id to any other provider kind, that would silently ship the id
429
+ // to Azure/OpenAI as a deployment name (DeploymentNotFound 404). Refuse it
430
+ // here so codex can never reach a non-codex client on ANY backend; the
431
+ // primary fix (config configuredModels) keeps this a no-op in practice.
432
+ if (modelName.startsWith(CODEX_MODEL_ID_PREFIX) && resolved.provider.kind !== "codex-subscription") {
433
+ throw new CodexSubscriptionUnavailableError(modelName);
434
+ }
435
+ return resolved.model;
436
+ }
437
+ // A `codex/<slug>` id only resolves when the per-workspace worker overlay
438
+ // (settingsWithCodexCredential) has injected the synthetic codex-subscription
439
+ // provider — which it does ONLY for a workspace with an *active* connected
440
+ // Codex subscription. If it did not resolve, the subscription is not
441
+ // connected for this workspace, so the codex provider is absent. Falling
442
+ // through to the built-in OpenAIProvider below would ship `codex/<slug>` to
443
+ // the global default (Azure) client as a deployment name and surface a
444
+ // misleading "DeploymentNotFound" 404. Throw a clear, user-actionable error
445
+ // instead; it propagates through the worker's agentRunFailurePayload as the
446
+ // turn.failed message the session UI shows. Mirrors the codex-prefix
447
+ // awareness of assertConfiguredModel at apps/api/src/domain/sessions.ts.
448
+ if (modelName.startsWith(CODEX_MODEL_ID_PREFIX)) {
449
+ throw new CodexSubscriptionUnavailableError(modelName);
450
+ }
451
+ }
452
+ // A non-codex model in no provider's allow-list falls back to the SDK's
453
+ // default OpenAIProvider, which uses the global default client/key
454
+ // configureOpenAI set up (the built-in OpenAI/Azure provider).
455
+ this.fallback ??= new OpenAIProvider();
456
+ return this.fallback.getModel(modelName);
457
+ }
458
+ }
459
+
460
+ /**
461
+ * A `codex/<slug>` turn reached the model router but the workspace has no active
462
+ * Codex subscription connected (the worker overlay never injected the synthetic
463
+ * provider, so resolveTurnModel returned nothing). Thrown instead of silently
464
+ * routing the id to the built-in Azure/OpenAI client — that produced an opaque
465
+ * "DeploymentNotFound" 404. The message is user-actionable (connect/reconnect)
466
+ * and carries no status/code, so agentRunFailurePayload surfaces it verbatim as
467
+ * a non-retryable turn.failed the session UI shows.
468
+ */
469
+ export class CodexSubscriptionUnavailableError extends Error {
470
+ constructor(modelName: string) {
471
+ super(
472
+ `Codex subscription model "${modelName}" is unavailable: no active Codex subscription is connected for this workspace. `
473
+ + `Connect (or reconnect) your ChatGPT/Codex subscription in Settings, then retry.`,
474
+ );
475
+ this.name = "CodexSubscriptionUnavailableError";
476
+ }
477
+ }
478
+
479
+ export function configureOpenAI(settings: Settings): void {
480
+ setOpenAIResponsesTransport(settings.openaiResponsesTransport);
481
+ // Install the registry-aware router as the process default model provider so a
482
+ // model name re-resolved on the SandboxAgent/Modal path (where a Model instance
483
+ // does not survive) routes to its provider instead of the built-in client.
484
+ // Built before the default-client calls below so it captures the same settings.
485
+ const router = new MultiProviderModelProvider(settings);
486
+ if (settings.openaiProvider === "azure") {
487
+ setDefaultOpenAIClient(buildOpenAIClientFromSettings(settings));
488
+ setDefaultModelProvider(router);
489
+ return;
490
+ }
491
+ if (settings.openaiApiKey) {
492
+ setDefaultOpenAIKey(settings.openaiApiKey);
493
+ }
494
+ if (settings.openaiBaseUrl) {
495
+ setDefaultOpenAIClient(buildOpenAIClientFromSettings(settings));
496
+ }
497
+ setDefaultModelProvider(router);
498
+ }
499
+
500
+ /**
501
+ * Run the compaction summarizer as one plain, tool-less, non-streaming model
502
+ * call against the resolved provider. `system`/`user` come from
503
+ * buildCompactionMessages. Returns the trimmed summary text, or null on any
504
+ * failure (the caller treats a failed summarize as "skip compaction this turn"
505
+ * — never fatal). The call deliberately does NOT request reasoning encryption,
506
+ * tools, or server-side compaction; it is a self-contained summarize.
507
+ *
508
+ * Provider-aware: the summary always runs on the SAME provider that serves the
509
+ * turn (registry providers can't summarize through OpenAI/Azure, and vice
510
+ * versa). `api: "chat"` providers (Fireworks) speak /v1/chat/completions, where
511
+ * the summary is choices[0].message.content; `api: "responses"` (the default,
512
+ * built-in OpenAI/Azure) speaks /v1/responses as before. When no client/api is
513
+ * supplied it falls back to the built-in OpenAI/Azure Responses path so the
514
+ * legacy global-client callers are byte-for-byte unchanged. store:false is set
515
+ * only on the OpenAI-platform Responses path (Azure rejects it; chat ignores it).
516
+ */
517
+ export async function summarizeForCompaction(
518
+ settings: Settings,
519
+ messages: { system: string; user: string },
520
+ options: { client?: OpenAI; api?: ModelProviderApi; maxOutputTokens?: number; model?: string } = {},
521
+ ): Promise<string | null> {
522
+ const client = options.client ?? buildOpenAIClientFromSettings(settings);
523
+ const api = options.api ?? "responses";
524
+ const model = options.model ?? settings.openaiModel;
525
+ const maxTokens = options.maxOutputTokens ?? settings.contextSummaryMaxTokens;
526
+ try {
527
+ if (api === "chat") {
528
+ const completion = await client.chat.completions.create({
529
+ model,
530
+ max_tokens: maxTokens,
531
+ messages: [
532
+ { role: "system", content: messages.system },
533
+ { role: "user", content: messages.user },
534
+ ],
535
+ } as any);
536
+ const text = (completion as { choices?: Array<{ message?: { content?: unknown } }> }).choices?.[0]?.message?.content;
537
+ const trimmed = typeof text === "string" ? text.trim() : "";
538
+ return trimmed.length > 0 ? trimmed : null;
539
+ }
540
+ const response = await client.responses.create({
541
+ model,
542
+ // store:false is the OpenAI-platform-only storeless precondition; Azure
543
+ // rejects it. The summarizer's resolved client is OpenAI/Azure on the
544
+ // built-in path (api "responses"), so gate it on the built-in provider.
545
+ ...(settings.openaiProvider === "azure" ? {} : { store: false }),
546
+ max_output_tokens: maxTokens,
547
+ input: [
548
+ { role: "system", content: messages.system },
549
+ { role: "user", content: messages.user },
550
+ ],
551
+ } as any);
552
+ const text = extractResponseOutputText(response);
553
+ const trimmed = text.trim();
554
+ return trimmed.length > 0 ? trimmed : null;
555
+ } catch (error) {
556
+ console.error("context compaction summarize failed (compaction skipped this turn)", error);
557
+ return null;
558
+ }
559
+ }
560
+
561
+ /**
562
+ * Pull the assistant text out of a Responses API result, shape-tolerant. Only
563
+ * `role === "assistant"` message items contribute: a provider whose Responses
564
+ * endpoint echoes the user input back as an output `message` item (Fireworks'
565
+ * beta /v1/responses does exactly this — see docs/model-providers.md) would
566
+ * otherwise corrupt the summary with the prompt it was given. The OpenAI/Azure
567
+ * Responses API only emits assistant messages, so this guard is a no-op there.
568
+ */
569
+ export function extractResponseOutputText(response: unknown): string {
570
+ if (!response || typeof response !== "object") {
571
+ return "";
572
+ }
573
+ const direct = (response as { output_text?: unknown }).output_text;
574
+ if (typeof direct === "string") {
575
+ return direct;
576
+ }
577
+ const output = (response as { output?: unknown }).output;
578
+ if (!Array.isArray(output)) {
579
+ return "";
580
+ }
581
+ const parts: string[] = [];
582
+ for (const item of output) {
583
+ if (!item || typeof item !== "object") {
584
+ continue;
585
+ }
586
+ if ((item as { type?: unknown }).type !== "message") {
587
+ continue;
588
+ }
589
+ // Read assistant messages only; skip any input-echo (role "user"/"system").
590
+ if ((item as { role?: unknown }).role !== "assistant") {
591
+ continue;
592
+ }
593
+ const content = (item as { content?: unknown }).content;
594
+ if (!Array.isArray(content)) {
595
+ continue;
596
+ }
597
+ for (const part of content) {
598
+ if (part && typeof part === "object" && typeof (part as { text?: unknown }).text === "string") {
599
+ parts.push((part as { text: string }).text);
600
+ }
601
+ }
602
+ }
603
+ return parts.join("");
604
+ }
605
+
606
+ export type BuildAgentOptions = {
607
+ model?: Model;
608
+ reasoningEffort?: ReasoningEffort;
609
+ // Per-turn gating overrides for the multi-provider path. Each defaults to
610
+ // today's settings-derived behaviour when omitted, so the legacy
611
+ // global-client callers (no model resolution) are byte-for-byte unchanged.
612
+ //
613
+ // - compactionMode: the resolved context-compaction path. Drives whether the
614
+ // sandbox `compaction()` capability is attached AND whether `store: false`
615
+ // is set (the OpenAI-platform-only storeless precondition). Registry
616
+ // providers resolve to "client", so neither is applied to them.
617
+ // Default: resolveContextCompactionMode(settings).
618
+ // - hostedWebSearch: attach the hosted web_search tool. Only the providers
619
+ // that actually execute it (built-in OpenAI/Azure; a registry model that
620
+ // opts in) should get it — Fireworks accepts the param but no-ops it, which
621
+ // would hand the agent a dead tool. Default: settings.webSearchEnabled.
622
+ // - encryptedReasoning: round-trip reasoning.encrypted_content via
623
+ // providerData.include. Only the Responses API carries it; the chat wire
624
+ // API has no such field, so registry "chat" providers turn it off.
625
+ // Default: settings.openaiReasoningEncryptedContent.
626
+ // - contextWindowTokens: the model's effective window, used to derive the
627
+ // server-path compaction threshold. A registry model can declare its own
628
+ // (e.g. GLM 5.2's 1,048,576). Default: settings.contextWindowTokens.
629
+ // - structuredToolTransport: whether the backend supports the Responses
630
+ // STRUCTURED/HOSTED sandbox-tool transport — the hosted `apply_patch` tool
631
+ // type and structured `view_image` output. The SDK's sandbox capabilities
632
+ // pick hosted-vs-function purely from the bound model instance's constructor
633
+ // name (supportsApplyPatchTransport / supportsStructuredToolOutputTransport).
634
+ // Our codex turns run the OpenAIResponsesModel — which the SDK reads as
635
+ // hosted-capable — but route it to the ChatGPT/Codex backend, which REJECTS
636
+ // the hosted `apply_patch` type ("Unsupported tool type: apply_patch",
637
+ // verified live). Set false for that backend so filesystem emits the
638
+ // function `apply_patch` + text `view_image` variants it accepts. Default
639
+ // true (let the SDK decide from the model instance) — non-codex paths are
640
+ // byte-for-byte unchanged.
641
+ compactionMode?: ContextCompactionMode;
642
+ hostedWebSearch?: boolean;
643
+ encryptedReasoning?: boolean;
644
+ contextWindowTokens?: number;
645
+ structuredToolTransport?: boolean;
646
+ // The LIVE, by-reference connector-namespace Set from prepareAgentTools
647
+ // (codexConnectorNamespaces): fills during each turn's codex_apps tools/list,
648
+ // read per model call by the codex tool_search description so the model sees
649
+ // the account's ACTUALLY-connected sources (codex-rs parity). Only meaningful
650
+ // on the codex tool-search path.
651
+ codexConnectorNamespaces?: ReadonlySet<string>;
652
+ sandboxEnvironment?: Record<string, string>;
653
+ // The EFFECTIVE/active compute backend for this turn. `settings.sandboxBackend`
654
+ // is the session's HOME backend (the default cloud group box it was created
655
+ // with); when a session has swapped its active sandbox to a connected machine
656
+ // (active_sandbox_id → a selfhosted lease, while the home backend stays the
657
+ // cloud default), the worker passes that machine's backend here so
658
+ // filesystem-touching lifecycle hooks key off where the agent ACTUALLY runs,
659
+ // not where it was created. The one such hook today is the repository clone
660
+ // (sandboxRepositoryCloneHooks): a bring-your-own machine owns its real disk,
661
+ // so the platform must NEVER `git clone` onto it. Defaults to
662
+ // settings.sandboxBackend, so the legacy cloud paths are byte-for-byte
663
+ // unchanged and a session whose HOME backend is "selfhosted" is gated with no
664
+ // caller change.
665
+ activeSandboxBackend?: Settings["sandboxBackend"];
666
+ fileResourceDownloads?: SandboxFileDownload[];
667
+ mcpServers?: MCPServer[];
668
+ workspaceEnvironment?: WorkspaceEnvironmentContext;
669
+ // TOKEN-BROKER (B1): the run-scoped GitHub App installation token, minted ONCE
670
+ // per turn by the worker (sandboxEnvironmentForRun's `gitToken`). Threaded here
671
+ // OFF-MANIFEST — it is NOT part of sandboxEnvironment (the manifest env), so the
672
+ // token VALUE never triggers the SDK's provided-session env-delta guard even
673
+ // though it rotates every turn. buildAgent stashes it alongside the agent's
674
+ // repository-clone hooks; runStream forwards it into the clone hook context, which
675
+ // seeds it to the box's token FILE before the clone runs. Omitted on the
676
+ // selfhosted path (the machine uses its own git creds) — a NO-OP there.
677
+ gitTokenSeed?: string;
678
+ // Genesis turn only: append a one-shot instruction to the agent's system
679
+ // prompt telling it to title the session via opengeni__set_session_title
680
+ // before responding. Delivered through the instructions channel (where the
681
+ // model actually obeys), appended AFTER the non-bypassable core so a
682
+ // white-label persona template can't drop it.
683
+ genesisTitleHint?: boolean;
684
+ // Per-call agent persona override (the white-label surface). Resolved by the
685
+ // caller as session > workspace > deployment default; when omitted the
686
+ // runtime falls back to settings.agentInstructionsTemplate. The runtime
687
+ // substitutes the non-bypassable CORE at AGENT_INSTRUCTIONS_CORE_PLACEHOLDER
688
+ // (or appends it when the template omits the marker), so an override can
689
+ // restyle the persona but never drop the goal-loop contract or environment
690
+ // block.
691
+ instructionsTemplate?: string;
692
+ // Skills delivered by enabled capability packs. They join the bundled
693
+ // skills in the sandbox skill index (mounted under .agents/) so
694
+ // skills/<name> references resolve like any other indexed skill.
695
+ packSkills?: PackSkill[];
696
+ };
697
+
698
+ export type PackSkillFile = {
699
+ // Relative POSIX path inside the skill directory, e.g. "SKILL.md" or
700
+ // "references/runbook.md".
701
+ path: string;
702
+ content: string;
703
+ };
704
+
705
+ export type PackSkill = {
706
+ name: string;
707
+ description?: string | null;
708
+ files: PackSkillFile[];
709
+ };
710
+
711
+ /**
712
+ * Operator-facing metadata for the workspace environment attached to a run.
713
+ * Surfaced verbatim in the agent instructions: the description is where
714
+ * operators document how the exported credentials are meant to be used
715
+ * (e.g. which variable holds a deploy key and how to clone with it), so an
716
+ * agent must not have to rediscover that by enumerating `env` and guessing.
717
+ * Only metadata belongs here — never variable values.
718
+ */
719
+ export type WorkspaceEnvironmentContext = {
720
+ name: string;
721
+ description?: string | null;
722
+ variableNames?: string[];
723
+ };
724
+
725
+ export function workspaceEnvironmentInstructions(environment: WorkspaceEnvironmentContext): string[] {
726
+ const lines = [
727
+ `A workspace environment named "${environment.name}" is attached to this session; its variables are exported in the sandbox shell environment.`,
728
+ ];
729
+ const variableNames = (environment.variableNames ?? []).filter((name) => name.length > 0);
730
+ if (variableNames.length > 0) {
731
+ lines.push(`Exported environment variables: ${[...variableNames].sort().join(", ")}.`);
732
+ }
733
+ const description = environment.description?.trim();
734
+ if (description) {
735
+ lines.push(`Environment notes from the operator: ${description}`);
736
+ }
737
+ return lines;
738
+ }
739
+
740
+ /**
741
+ * The non-bypassable CORE of the agent instructions: the goal-loop ownership
742
+ * line (which names the opengeni__goal_* tools and is what keeps a long-running
743
+ * session driving itself) followed by the dynamic workspace-environment block.
744
+ * Returned as ordered lines so the caller joins them with the rest of the
745
+ * instructions by " ", exactly as the historical preamble did.
746
+ *
747
+ * This is the slice a white-labelled persona template must never be able to
748
+ * drop: composeAgentInstructions() substitutes it at the persona template's
749
+ * {{core}} marker, and appends it when the marker is absent.
750
+ */
751
+ export function coreInstructions(workspaceEnvironment?: WorkspaceEnvironmentContext): string[] {
752
+ return [
753
+ "If the session has a goal, you own it: keep working until you call opengeni__goal_complete with concrete evidence or opengeni__goal_pause with a rationale; revise it with opengeni__goal_update; create one with opengeni__goal_set when given a long-running objective.",
754
+ ...(workspaceEnvironment ? workspaceEnvironmentInstructions(workspaceEnvironment) : []),
755
+ ];
756
+ }
757
+
758
+ /**
759
+ * Composes the final agent instructions from a (possibly white-labelled)
760
+ * persona template and the non-bypassable CORE. The CORE is substituted at the
761
+ * template's {{core}} marker; if the template omits the marker, the CORE is
762
+ * appended after it instead (the non-bypassable fail-safe). The substitution
763
+ * and the append both join by " ", so the DEFAULT_AGENT_INSTRUCTIONS template
764
+ * with an empty environment reproduces the historical preamble byte-for-byte.
765
+ */
766
+ export function composeAgentInstructions(template: string, workspaceEnvironment?: WorkspaceEnvironmentContext): string {
767
+ const core = coreInstructions(workspaceEnvironment).join(" ");
768
+ if (template.includes(AGENT_INSTRUCTIONS_CORE_PLACEHOLDER)) {
769
+ return template.split(AGENT_INSTRUCTIONS_CORE_PLACEHOLDER).join(core);
770
+ }
771
+ return core ? `${template} ${core}` : template;
772
+ }
773
+
774
+ const agentFileDownloads = new WeakMap<object, SandboxFileDownload[]>();
775
+ const agentRepositoryCloneHooks = new WeakMap<object, SandboxLifecycleHook[]>();
776
+ // TOKEN-BROKER (B1): the per-turn git token seed, stashed alongside the agent's
777
+ // repository-clone hooks (a parallel map keyed by the agent). Kept OFF the
778
+ // manifest/defaultManifest so the rotating value never rides the SDK's provided-
779
+ // session env; runStream reads it to build the clone hook context. Absent when
780
+ // no repo is attached / on the selfhosted path.
781
+ const agentGitTokenSeed = new WeakMap<object, string>();
782
+ // The EFFECTIVE backend the turn resolved for this agent (undefined -> the home
783
+ // backend). Read by runStream's owned branch to keep platform box-setup hooks off
784
+ // connected machines (a user's real computer).
785
+ const agentActiveSandboxBackend = new WeakMap<object, Settings["sandboxBackend"]>();
786
+
787
+ export function buildOpenGeniAgent(settings: Settings, resources: ResourceRef[], options: BuildAgentOptions = {}): Agent<any, any> {
788
+ // Resolved per-turn gating. Each override defaults to today's settings-derived
789
+ // behaviour, so the legacy global-client callers (no resolved model) build the
790
+ // exact same agent as before; the multi-provider worker path passes the
791
+ // resolved provider's mode/api/window/web-search instead.
792
+ const compactionMode = options.compactionMode ?? resolveContextCompactionMode(settings);
793
+ const hostedWebSearch = options.hostedWebSearch ?? settings.webSearchEnabled;
794
+ const encryptedReasoning = options.encryptedReasoning ?? settings.openaiReasoningEncryptedContent;
795
+ const contextWindowTokens = options.contextWindowTokens ?? settings.contextWindowTokens;
796
+ // Native hosted tools attached to every constructed agent. webSearchEnabled
797
+ // is ON by default and provider-unconditional on the built-in path (the live
798
+ // Azure Responses path executes the hosted web_search tool); a registry model
799
+ // only gets it when it opts in (resolved via options.hostedWebSearch), since
800
+ // a provider that no-ops the param would hand the agent a dead tool. The SDK
801
+ // merges this explicit `tools` array with the MCP-server tools
802
+ // (Agent.getAllTools = [...mcpTools, ...tools]) and, on the SandboxAgent path,
803
+ // with the sandbox capability tools (prepareSandboxAgent: tools =
804
+ // [...agent.tools, ...capability.tools()]), so hosted web_search coexists with
805
+ // both rather than overriding them.
806
+ const hostedTools = hostedWebSearch ? [webSearchTool()] : [];
807
+ const baseConfig = {
808
+ name: "OpenGeni Agent",
809
+ model: options.model ?? settings.openaiModel,
810
+ // White-label persona composition. The effective template is the per-call
811
+ // override (options.instructionsTemplate, resolved by the caller as
812
+ // session > workspace) falling back to the deployment default
813
+ // (settings.agentInstructionsTemplate, default DEFAULT_AGENT_INSTRUCTIONS).
814
+ // composeAgentInstructions substitutes the non-bypassable CORE (goal-loop
815
+ // ownership + workspace-environment block) at the {{core}} marker, or
816
+ // appends it when the template omits the marker. With the default template
817
+ // and no environment this is byte-identical to the historical preamble.
818
+ instructions: options.genesisTitleHint
819
+ ? `${composeAgentInstructions(options.instructionsTemplate ?? settings.agentInstructionsTemplate, options.workspaceEnvironment)} ${GENESIS_TITLE_DIRECTIVE}`
820
+ : composeAgentInstructions(options.instructionsTemplate ?? settings.agentInstructionsTemplate, options.workspaceEnvironment),
821
+ modelSettings: {
822
+ reasoning: { effort: options.reasoningEffort ?? settings.openaiReasoningEffort, summary: "detailed" },
823
+ // Server-side compaction (OpenAI platform) requires store=false: the
824
+ // server emits an opaque ENCRYPTED 'compaction' item that round-trips in
825
+ // the request rather than being anchored to a stored response. OpenGeni
826
+ // already runs storeless (provider item ids stripped, encrypted reasoning
827
+ // round-tripped), so this is consistent with the existing design and
828
+ // only set where the server compaction capability is attached. Gated on
829
+ // the RESOLVED compaction mode (registry providers resolve to "client",
830
+ // so they never carry store:false).
831
+ ...(compactionMode === "server" ? { store: false } : {}),
832
+ // Round-trip the encrypted reasoning payload with every call so chains
833
+ // of thought survive without provider-side response storage (which is
834
+ // what stripped provider item ids opt us out of — see
835
+ // stripProviderItemIds). providerData.include replaces any
836
+ // tool-derived include entries; OpenGeni's tools are MCP/sandbox
837
+ // function tools, which contribute none. Gated on the resolved
838
+ // encryptedReasoning flag: the chat wire API has no encrypted_content
839
+ // field, so registry "chat" providers turn it off.
840
+ ...(encryptedReasoning
841
+ ? { providerData: { include: ["reasoning.encrypted_content"] } }
842
+ : {}),
843
+ },
844
+ // Explicit hosted tools (web_search when enabled). Threaded into BOTH the
845
+ // `new Agent(baseConfig)` path (sandboxBackend === "none") and the
846
+ // `new SandboxAgent({ ...baseConfig, ... })` path via the shared baseConfig
847
+ // spread; the SDK concatenates these with MCP and sandbox capability tools.
848
+ ...(hostedTools.length ? { tools: hostedTools } : {}),
849
+ ...(options.mcpServers?.length ? { mcpServers: options.mcpServers } : {}),
850
+ } as const;
851
+
852
+ if (settings.sandboxBackend === "none") {
853
+ const agent = new Agent(baseConfig);
854
+ maybeInstallCodexToolSearch(agent, settings, options);
855
+ return agent;
856
+ }
857
+
858
+ const runAs = sandboxRunAs(settings);
859
+ const agent = new SandboxAgent({
860
+ ...baseConfig,
861
+ defaultManifest: buildManifest(settings, resources, options.sandboxEnvironment, options.fileResourceDownloads),
862
+ ...(runAs ? { runAs } : {}),
863
+ capabilities: buildAgentCapabilities(settings, options.packSkills ?? [], {
864
+ compactionMode,
865
+ contextWindowTokens,
866
+ ...(options.structuredToolTransport !== undefined ? { structuredToolTransport: options.structuredToolTransport } : {}),
867
+ }),
868
+ });
869
+ agentFileDownloads.set(agent, normalizeSandboxFileDownloads(options.fileResourceDownloads ?? []).filter((download) => !download.content));
870
+ agentRepositoryCloneHooks.set(agent, sandboxRepositoryCloneHooks(settings, resources, options.activeSandboxBackend));
871
+ // Stash the EFFECTIVE backend so runStream's owned branch can skip the direct
872
+ // beforeAgentStart hook run on a connected machine: the box there is the user's
873
+ // REAL computer — the platform must not run setup (az login) against it. The
874
+ // clone hooks are already excluded for selfhosted at construction (above); this
875
+ // keeps the built-in hooks equally out.
876
+ if (options.activeSandboxBackend) {
877
+ agentActiveSandboxBackend.set(agent, options.activeSandboxBackend);
878
+ }
879
+ // TOKEN-BROKER (B1): stash the per-turn seed off-manifest so runStream can seed the
880
+ // clone hook without the token ever touching defaultManifest / sandboxEnvironment.
881
+ if (options.gitTokenSeed) {
882
+ agentGitTokenSeed.set(agent, options.gitTokenSeed);
883
+ }
884
+ maybeInstallCodexToolSearch(agent, settings, options);
885
+ return agent;
886
+ }
887
+
888
+ /**
889
+ * Enable Codex-CLI-style progressive connector disclosure on a codex turn when the
890
+ * flag is on. Gated on `structuredToolTransport === false` — the same signal that
891
+ * identifies a codex-subscription turn (the ChatGPT backend that rejects hosted
892
+ * tools) — so no non-codex turn is ever touched. On qualifying turns it wraps
893
+ * `getAllTools` (clone-survivingly — see {@link installCodexToolSearch}) to defer
894
+ * codex_apps schemas + add the client tool_search tool, whose description renders
895
+ * the live connector namespaces threaded from prepareAgentTools.
896
+ */
897
+ function maybeInstallCodexToolSearch(agent: Agent<any, any>, settings: Settings, options: BuildAgentOptions): void {
898
+ if (settings.codexToolSearchEnabled && options.structuredToolTransport === false) {
899
+ installCodexToolSearch(
900
+ agent as unknown as Parameters<typeof installCodexToolSearch>[0],
901
+ options.codexConnectorNamespaces ?? new Set<string>(),
902
+ );
903
+ }
904
+ }
905
+
906
+ /**
907
+ * Force a sandbox capability to emit its FUNCTION-transport tool variants instead
908
+ * of the hosted ones, by dropping the model instance the SDK's transport
909
+ * detection keys off. See {@link buildAgentCapabilities} for why (codex routes the
910
+ * OpenAIResponsesModel to the ChatGPT backend, which rejects the hosted
911
+ * `apply_patch` AND `computer_use_preview` tool types). The SDK reads
912
+ * hosted-vs-function ONLY from `_modelInstance` (set via `bindModel`); overriding
913
+ * `bindModel` to discard the instance leaves `_modelInstance` undefined, so
914
+ * `supportsApplyPatchTransport` / `supportsStructuredToolOutputTransport` return
915
+ * false and `tools()` emits the function variants — `apply_patch` + text
916
+ * `view_image` for filesystem, and the `computer_*` function tools + text
917
+ * `computer_screenshot` for computer-use. `bindModel` still returns the capability
918
+ * so the SDK's bind chain (`.bind().bindRunAs().bindModel()`) is preserved.
919
+ */
920
+ function neutralizeStructuredToolTransport(capability: ReturnType<typeof filesystem> | ReturnType<typeof computerUse>): void {
921
+ // Use `this` (NOT a captured reference to `capability`): the SandboxAgent binds
922
+ // via `cap.clone().bind(session).bindRunAs(runAs).bindModel(model, instance)` and
923
+ // runs tools() on the object the CHAIN returns. Capability.clone() copies this
924
+ // override onto the fresh per-run instance, so bindModel must operate on and
925
+ // RETURN `this` (the clone) — a version that mutated/returned the ORIGINAL
926
+ // capability leaves the clone (which .bind() set `_session` on) out of the chain,
927
+ // so tools() runs on the unbound original and throws "Filesystem capability is
928
+ // not bound to a SandboxSession". Dropping the model instance is all we need:
929
+ // supportsApplyPatchTransport(undefined) is false → the function apply_patch.
930
+ const forceFunctionTransport = function (this: Record<string, unknown>): unknown {
931
+ this._modelInstance = undefined;
932
+ return this;
933
+ };
934
+ (capability as unknown as { bindModel: typeof forceFunctionTransport }).bindModel = forceFunctionTransport;
935
+ }
936
+
937
+ /**
938
+ * Build the SandboxAgent capability set provider-aware.
939
+ *
940
+ * The SDK's `Capabilities.default()` force-includes `compaction()`, whose
941
+ * sampling params emit `context_management:[{type:'compaction', …}]` to the
942
+ * Responses transport. The OpenAI platform honors that (server-side compaction);
943
+ * AZURE rejects it with `400 unsupported_parameter` — which is exactly the live
944
+ * production failure on Azure today. So we MUST NOT attach the compaction
945
+ * capability on the Azure / client / off paths.
946
+ *
947
+ * We rebuild the base set explicitly (`filesystem()`, `shell()`, the same
948
+ * factories the SDK default uses) and add `compaction()` ONLY on the server
949
+ * path, with an explicit `StaticCompactionPolicy(threshold)` so gpt-5.5 — which
950
+ * is absent from the SDK's hardcoded context-window map and would otherwise hit
951
+ * the wrong 240k fallback — gets the correct threshold. The SDK has no
952
+ * window-registration API, so an explicit threshold is the only way to fix it.
953
+ *
954
+ * The resolved compaction mode and the effective context window are now passed
955
+ * IN (the multi-provider caller resolves them per provider/model) rather than
956
+ * re-derived from settings here. Both default to the settings-derived value so
957
+ * callers that don't route per-model (and the existing tests) keep today's exact
958
+ * behaviour; the effective window only changes the server-path threshold when a
959
+ * resolved model declares its own contextWindowTokens.
960
+ */
961
+ export function buildAgentCapabilities(
962
+ settings: Settings,
963
+ packSkills: PackSkill[],
964
+ options: { compactionMode?: ContextCompactionMode; contextWindowTokens?: number; structuredToolTransport?: boolean } = {},
965
+ ): ReturnType<typeof Capabilities.default> {
966
+ const mode = options.compactionMode ?? resolveContextCompactionMode(settings);
967
+ const contextWindowTokens = options.contextWindowTokens ?? settings.contextWindowTokens;
968
+ // The `filesystem()` capability picks hosted-vs-function tool variants from the
969
+ // bound model instance (supportsApplyPatchTransport / structured tool output).
970
+ // When the caller declares the backend does NOT support that structured/hosted
971
+ // transport (codex → the ChatGPT backend rejects the hosted `apply_patch` type),
972
+ // neutralize this capability's model binding so tools() falls to the function
973
+ // `apply_patch` + text `view_image` variants the backend accepts — the SDK
974
+ // handles their function_call round-trip natively, so no reimplementation.
975
+ // Scoped to filesystem: shell() (always function tools) and compaction() (a
976
+ // sampling param, dropped by the codex normalizer) are untouched.
977
+ const filesystemCapability = filesystem();
978
+ if (options.structuredToolTransport === false) {
979
+ neutralizeStructuredToolTransport(filesystemCapability);
980
+ }
981
+ const caps: ReturnType<typeof Capabilities.default> = [filesystemCapability, shell()];
982
+ if (mode === "server") {
983
+ caps.push(compaction({ policy: new StaticCompactionPolicy(contextServerCompactThreshold({ ...settings, contextWindowTokens })) }));
984
+ }
985
+ caps.push(skills({ lazyFrom: lazySkillSourceWithPackSkills(packSkills) }));
986
+ // P4.3 computer-use: the agent drives the SAME :0 humans watch (xdotool/XTEST +
987
+ // scrot), but only when the desktop tier is ON, computer-use is enabled, and the
988
+ // backend is one whose image carries the X stack (descriptorgate — honest about
989
+ // which backends are desktop-capable today; headless/dev backends never get the
990
+ // tool, so a misconfigured non-desktop box can't register a tool that always
991
+ // fails). The capability's tools() bind to the live externally-owned session at
992
+ // run time (the SandboxAgent merge); xdotool drives :0 regardless of whether any
993
+ // viewer is attached, so no pixel-tunnel dependency.
994
+ if (
995
+ settings.computerUseEnabled
996
+ && settings.sandboxDesktopEnabled
997
+ && desktopCapableBackend(settings.sandboxBackend)
998
+ ) {
999
+ // computer-use is now transport-aware, exactly like filesystem: its `tools()`
1000
+ // emits the HOSTED `computer_use_preview` tool on the structured transport and a
1001
+ // set of FUNCTION `computer_*` tools on the text transport. The ChatGPT/Codex
1002
+ // backend rejects hosted tool types (only function/custom/web_search accepted),
1003
+ // so on the codex path (structuredToolTransport === false) we neutralize the
1004
+ // capability's model binding — the SAME trick used for filesystem above — so
1005
+ // `tools()` sees no model instance and emits the function tools the backend can
1006
+ // call, instead of suppressing the desktop tier entirely.
1007
+ const computerCapability = computerUse({
1008
+ dimensions: [settings.streamResolutionWidth, settings.streamResolutionHeight],
1009
+ readOnly: settings.computerUseReadOnly,
1010
+ // On the codex path the function tools deliver screenshots as a real image the
1011
+ // model can see. The ChatGPT/Codex backend rejects HOSTED tool types but DOES
1012
+ // accept `input_image` content items inside a `function_call_output` (proven by
1013
+ // openai/codex codex-rs, whose view_image tool ships exactly that shape) — so a
1014
+ // structured image tool result is seen, where a text data-URL would be unreadable.
1015
+ ...(options.structuredToolTransport === false ? { imageFunctionResults: true } : {}),
1016
+ });
1017
+ if (options.structuredToolTransport === false) {
1018
+ neutralizeStructuredToolTransport(computerCapability);
1019
+ }
1020
+ caps.push(computerCapability as unknown as ReturnType<typeof Capabilities.default>[number]);
1021
+ }
1022
+ return caps;
1023
+ }
1024
+
1025
+ export function sandboxRunAs(_settings: Settings): string | undefined {
1026
+ return undefined;
1027
+ }
1028
+
1029
+ export type PreparedAgentTools = {
1030
+ mcpServers: MCPServer[];
1031
+ close: () => Promise<void>;
1032
+ // P4 (Part B.1): the live, by-reference Set of ORIGINAL-dotted connector
1033
+ // namespaces the codex_apps transport saw across this turn's tools/list calls.
1034
+ // Accumulates as the agent lists tools during the run, so the worker reads it
1035
+ // AFTER the turn (in its finally) to cache the serving account's connector set.
1036
+ // Empty when this turn has no codex_apps server (or it never listed any
1037
+ // namespaced tool) — the worker only persists a non-empty set.
1038
+ codexConnectorNamespaces: Set<string>;
1039
+ };
1040
+
1041
+ export type PrepareToolsOptions = {
1042
+ accountId?: string;
1043
+ workspaceId?: string;
1044
+ // Worker-asserted session scope for first-party MCP calls; enables
1045
+ // session-scoped tools such as goal management on the API side.
1046
+ sessionId?: string;
1047
+ subjectId?: string;
1048
+ subjectLabel?: string;
1049
+ // Overrides the fixed first-party MCP permission set for this session's
1050
+ // delegated token (manager-style sessions). The caller is responsible for
1051
+ // having validated the set against the session creator's grant.
1052
+ firstPartyPermissions?: Permission[];
1053
+ };
1054
+
1055
+ export async function prepareAgentTools(settings: Settings, tools: ToolRef[], options: PrepareToolsOptions = {}): Promise<PreparedAgentTools> {
1056
+ // P4 (Part B.1): one Set per prepareTools call, shared by reference into the
1057
+ // codex_apps sanitizing fetch so every tools/list this turn accumulates the
1058
+ // account's connector namespaces. Surfaced on PreparedAgentTools for the worker.
1059
+ const codexConnectorNamespaces = new Set<string>();
1060
+ if (tools.length === 0) {
1061
+ return { mcpServers: [], close: async () => {}, codexConnectorNamespaces };
1062
+ }
1063
+ const registry = new Map(settings.mcpServers.map((server) => [server.id, server]));
1064
+ const servers = await Promise.all(tools.map(async (tool) => {
1065
+ const config = registry.get(tool.id);
1066
+ if (!config) {
1067
+ throw new Error(`Unknown MCP server id: ${tool.id}`);
1068
+ }
1069
+ const url = firstPartyMcpServerUrlForRun(settings, config, options.workspaceId) ?? config.url;
1070
+ const server = new PrefixedMcpServer(new MCPServerStreamableHttp({
1071
+ url,
1072
+ name: config.name ?? config.id,
1073
+ cacheToolsList: config.cacheToolsList,
1074
+ // codex_apps returns connector tools with empty `outputSchema: {}` that the
1075
+ // MCP SDK's strict Tool schema rejects (fails the turn during tools/list);
1076
+ // sanitize the response on the wire before validation. The namespace Set
1077
+ // also captures each tool's original connector namespace (P4 Part B.1).
1078
+ ...(isCodexAppsMcpServer(config) ? { fetch: codexAppsSanitizingFetch(globalThis.fetch, codexConnectorNamespaces) } : {}),
1079
+ ...await mcpServerRequestInit(settings, config, options),
1080
+ ...(config.timeoutMs ? {
1081
+ timeout: config.timeoutMs,
1082
+ clientSessionTimeoutSeconds: Math.ceil(config.timeoutMs / 1000),
1083
+ } : {}),
1084
+ }), config.id, config.allowedTools);
1085
+ // A server is connected BEST-EFFORT (a connect / tools-list failure drops
1086
+ // it instead of failing the turn) in two cases:
1087
+ // - codex_apps: connector availability is RUNTIME-DISCOVERED — the
1088
+ // device-code login may lack the connector scopes, and the backend can
1089
+ // reject the bearer at the initialize/tools-list handshake, so a 401/403
1090
+ // (or a missing/failed token) drops the server.
1091
+ // - an AUTO-ATTACHED workspace-default capability MCP (ToolRef.optional):
1092
+ // the caller never explicitly requested it, so a broken/expired
1093
+ // capability credential must SKIP the server with a warning, never kill
1094
+ // the turn before the model runs. An EXPLICITLY-requested tool omits
1095
+ // `optional` and stays strict (below), preserving the fail-loud contract.
1096
+ const optional = tool.optional === true;
1097
+ return { server, bestEffort: isCodexAppsMcpServer(config) || optional, optional };
1098
+ }));
1099
+ const requiredServers = servers.filter((entry) => !entry.bestEffort).map((entry) => entry.server);
1100
+ const bestEffortServers = servers.filter((entry) => entry.bestEffort).map((entry) => entry.server);
1101
+ // Names of the OPTIONAL capability servers (not codex_apps) so a drop is
1102
+ // surfaced as a warning; codex_apps keeps its historically-quiet drop (a
1103
+ // not-logged-in ChatGPT plan is a normal, non-noteworthy state).
1104
+ const optionalServerNames = new Set(
1105
+ servers.filter((entry) => entry.optional).map((entry) => entry.server.name),
1106
+ );
1107
+ const connectedRequired = await connectMcpServers(requiredServers, {
1108
+ connectInParallel: true,
1109
+ strict: true,
1110
+ });
1111
+ const connectedBestEffort = bestEffortServers.length
1112
+ ? await connectMcpServers(bestEffortServers, {
1113
+ connectInParallel: true,
1114
+ strict: false,
1115
+ })
1116
+ : null;
1117
+ if (connectedBestEffort) {
1118
+ for (const failed of connectedBestEffort.failed) {
1119
+ if (!optionalServerNames.has(failed.name)) {
1120
+ continue;
1121
+ }
1122
+ const error = connectedBestEffort.errors.get(failed);
1123
+ console.warn(
1124
+ `[mcp] optional capability server "${failed.name}" failed to connect/list tools; skipping it for this turn`,
1125
+ error instanceof Error ? error.message : error,
1126
+ );
1127
+ }
1128
+ }
1129
+ return {
1130
+ mcpServers: [...connectedRequired.active, ...(connectedBestEffort?.active ?? [])],
1131
+ close: async () => {
1132
+ await connectedRequired.close();
1133
+ if (connectedBestEffort) {
1134
+ await connectedBestEffort.close();
1135
+ }
1136
+ },
1137
+ codexConnectorNamespaces,
1138
+ };
1139
+ }
1140
+
1141
+ async function mcpServerRequestInit(settings: Settings, config: Settings["mcpServers"][number], options: PrepareToolsOptions): Promise<{ requestInit: { headers: Record<string, string> } } | {}> {
1142
+ // codex_apps is checked FIRST so the static-headers path can never apply to
1143
+ // it: its refreshing ChatGPT/Codex bearer is resolved per-connect from the
1144
+ // codex ALS, never from a baked `config.headers` value.
1145
+ if (isCodexAppsMcpServer(config)) {
1146
+ return await codexAppsMcpRequestInit(settings);
1147
+ }
1148
+ if (isFirstPartyMcpServer(settings, config)) {
1149
+ return await firstPartyMcpRequestInit(settings, config, options);
1150
+ }
1151
+ // Third-party MCP servers get their configured credential headers (for
1152
+ // example workspace-enabled capability MCP credentials) and nothing else —
1153
+ // never OpenGeni's own access key or delegated tokens.
1154
+ if (config.headers && Object.keys(config.headers).length > 0) {
1155
+ return { requestInit: { headers: { ...config.headers } } };
1156
+ }
1157
+ return {};
1158
+ }
1159
+
1160
+ async function firstPartyMcpRequestInit(settings: Settings, config: Settings["mcpServers"][number], options: PrepareToolsOptions): Promise<{ requestInit: { headers: Record<string, string> } } | {}> {
1161
+ if (!isFirstPartyMcpServer(settings, config)) {
1162
+ return {};
1163
+ }
1164
+ const headers: Record<string, string> = {};
1165
+ if (settings.authRequired && settings.accessKey) {
1166
+ headers["x-opengeni-access-key"] = settings.accessKey;
1167
+ }
1168
+ if (settings.delegationSecret && options.accountId && options.workspaceId) {
1169
+ headers.authorization = `Bearer ${await signDelegatedAccessToken(settings.delegationSecret, {
1170
+ accountId: options.accountId,
1171
+ workspaceId: options.workspaceId,
1172
+ subjectId: options.subjectId ?? "worker:first-party-mcp",
1173
+ ...(options.subjectLabel ? { subjectLabel: options.subjectLabel } : {}),
1174
+ permissions: options.firstPartyPermissions ?? firstPartyMcpPermissions,
1175
+ ...(options.sessionId ? { sessionId: options.sessionId } : {}),
1176
+ exp: Math.floor(Date.now() / 1000) + 60 * 60,
1177
+ })}`;
1178
+ }
1179
+ if (Object.keys(headers).length === 0) {
1180
+ return {};
1181
+ }
1182
+ return {
1183
+ requestInit: {
1184
+ headers,
1185
+ },
1186
+ };
1187
+ }
1188
+
1189
+ /**
1190
+ * Builds the connect-time auth headers for the codex_apps connectors MCP. The
1191
+ * bearer is resolved from codexRequestStorage — the SAME refreshing token source
1192
+ * the model fetch uses (proactive refresh + single-flight + db persist) — so the
1193
+ * token is valid at connect. A missing store (non-codex turn, or prepareTools
1194
+ * ran outside the ALS) or a token failure (needs_relogin) returns {} so the
1195
+ * best-effort connect drops the server rather than crashing the turn.
1196
+ */
1197
+ async function codexAppsMcpRequestInit(settings: Settings): Promise<{ requestInit: { headers: Record<string, string> } } | {}> {
1198
+ const ctx = codexRequestStorage.getStore();
1199
+ if (!ctx) {
1200
+ return {};
1201
+ }
1202
+ let token;
1203
+ try {
1204
+ token = await ctx.getToken();
1205
+ } catch {
1206
+ return {};
1207
+ }
1208
+ const headers: Record<string, string> = {
1209
+ authorization: `Bearer ${token.accessToken}`,
1210
+ // The ChatGPT backend sits behind Cloudflare, which 403s requests bearing a
1211
+ // default runtime User-Agent (confirmed live: an HTML bot-block page, NOT an
1212
+ // auth failure). Send the codex client identity — the same originator/version/
1213
+ // User-Agent the model fetch uses — so the MCP connect handshake passes the edge.
1214
+ originator: CODEX_ORIGINATOR,
1215
+ "user-agent": `${CODEX_ORIGINATOR}/${ctx.clientVersion}`,
1216
+ version: ctx.clientVersion,
1217
+ };
1218
+ if (token.chatgptAccountId) {
1219
+ headers["chatgpt-account-id"] = token.chatgptAccountId;
1220
+ }
1221
+ if (settings.codexProductSku) {
1222
+ headers["X-OpenAI-Product-Sku"] = settings.codexProductSku;
1223
+ }
1224
+ return { requestInit: { headers } };
1225
+ }
1226
+
1227
+ // The first-party MCP permission set signed into a worker's delegated token
1228
+ // when the session does not specify its own. POWERFUL BY DEFAULT: it carries
1229
+ // every permission that unlocks a first-party tool — session orchestration
1230
+ // (sessions:*), workspace environments (environments:*), and GitHub
1231
+ // (github:use) — so agents are fully capable out of the box. A user DEMOTES a
1232
+ // specific session by setting a narrower session.firstPartyMcpPermissions (the
1233
+ // create-session permission picker), which the worker uses instead. Account-
1234
+ // level scopes (billing/account/members/api_keys/workspace:admin) are
1235
+ // intentionally excluded: they gate no first-party tool and are not agent
1236
+ // capabilities. (A finer-grained capability model comes later.)
1237
+ const firstPartyMcpPermissions: Permission[] = [
1238
+ "workspace:read",
1239
+ "files:read",
1240
+ "documents:search",
1241
+ "scheduled_tasks:manage",
1242
+ "scheduled_tasks:run",
1243
+ "goals:manage",
1244
+ "sessions:read",
1245
+ "sessions:create",
1246
+ "sessions:control",
1247
+ "environments:use",
1248
+ "environments:manage",
1249
+ "github:use",
1250
+ ];
1251
+
1252
+ // codex_apps is third-party-by-trust (the external ChatGPT connectors backend)
1253
+ // but needs DYNAMIC auth, so it is its own category — deliberately NOT folded
1254
+ // into the first-party allowlist, which would wrongly sign an OpenGeni delegated
1255
+ // token to chatgpt.com.
1256
+ function isCodexAppsMcpServer(config: Settings["mcpServers"][number]): boolean {
1257
+ return config.id === CODEX_APPS_MCP_SERVER_ID;
1258
+ }
1259
+
1260
+ function isFirstPartyMcpServer(settings: Settings, config: Settings["mcpServers"][number]): boolean {
1261
+ if (!["opengeni", "files", "docs"].includes(config.id)) {
1262
+ return false;
1263
+ }
1264
+ if (config.url.includes("{workspaceId}")) {
1265
+ return true;
1266
+ }
1267
+ const url = normalizeUrl(config.url);
1268
+ if (!url) {
1269
+ return false;
1270
+ }
1271
+ return firstPartyMcpUrls(settings).some((candidate) => candidate === url);
1272
+ }
1273
+
1274
+ function firstPartyMcpServerUrlForRun(settings: Settings, config: Settings["mcpServers"][number], workspaceId: string | undefined): string | null {
1275
+ if (!workspaceId || !["opengeni", "files", "docs"].includes(config.id)) {
1276
+ return null;
1277
+ }
1278
+ if (config.url.includes("{workspaceId}")) {
1279
+ return config.url.replaceAll("{workspaceId}", workspaceId);
1280
+ }
1281
+ if (!isFirstPartyMcpServer(settings, config)) {
1282
+ return null;
1283
+ }
1284
+ const rawBase = settings.opengeniMcpUrl?.includes("{workspaceId}")
1285
+ ? settings.opengeniMcpUrl.replaceAll("{workspaceId}", workspaceId)
1286
+ : settings.opengeniMcpUrl
1287
+ ? scopedMcpUrlFromConfiguredBase(settings.opengeniMcpUrl, workspaceId)
1288
+ // unset → the shared loopback default (a `{workspaceId}` template owned by
1289
+ // @opengeni/config's firstPartyMcpBaseUrl), scoped to this run's workspace.
1290
+ : firstPartyMcpBaseUrl(settings).replaceAll("{workspaceId}", workspaceId);
1291
+ const url = new URL(rawBase);
1292
+ if (config.id === "docs") {
1293
+ url.pathname = `${url.pathname.replace(/\/+$/, "")}/docs`;
1294
+ }
1295
+ return url.toString();
1296
+ }
1297
+
1298
+ function scopedMcpUrlFromConfiguredBase(raw: string, workspaceId: string): string {
1299
+ const url = new URL(raw);
1300
+ url.pathname = `/v1/workspaces/${workspaceId}/mcp`;
1301
+ url.search = "";
1302
+ url.hash = "";
1303
+ return url.toString();
1304
+ }
1305
+
1306
+ function firstPartyMcpUrls(settings: Settings): string[] {
1307
+ // Route the unset case through the shared loopback default so the literal
1308
+ // lives in exactly one place (@opengeni/config's firstPartyMcpBaseUrl).
1309
+ const base = normalizeUrl(settings.opengeniMcpUrl ?? firstPartyMcpBaseUrl(settings));
1310
+ if (!base) {
1311
+ return [];
1312
+ }
1313
+ const docs = new URL(base);
1314
+ docs.pathname = `${docs.pathname.replace(/\/+$/, "")}/docs`;
1315
+ return [base, normalizeUrl(docs.toString())].filter((value): value is string => Boolean(value));
1316
+ }
1317
+
1318
+ function normalizeUrl(raw: string): string | null {
1319
+ try {
1320
+ const url = new URL(raw);
1321
+ url.hash = "";
1322
+ url.pathname = url.pathname.replace(/\/+$/, "");
1323
+ return url.toString();
1324
+ } catch {
1325
+ return null;
1326
+ }
1327
+ }
1328
+
1329
+ export function prefixedMcpToolName(registryId: string, toolName: string): string {
1330
+ return `${registryId}__${toolName}`;
1331
+ }
1332
+
1333
+ class PrefixedMcpServer implements MCPServer {
1334
+ readonly cacheToolsList: boolean;
1335
+ readonly name: string;
1336
+ readonly prefix: string;
1337
+ private readonly allowedTools: Set<string> | undefined;
1338
+
1339
+ constructor(private readonly inner: MCPServer, registryId: string, allowedTools?: string[]) {
1340
+ this.name = registryId;
1341
+ this.prefix = prefixedMcpToolName(registryId, "");
1342
+ this.cacheToolsList = inner.cacheToolsList;
1343
+ this.allowedTools = allowedTools ? new Set(allowedTools) : undefined;
1344
+ }
1345
+
1346
+ connect(): Promise<void> {
1347
+ return this.inner.connect();
1348
+ }
1349
+
1350
+ close(): Promise<void> {
1351
+ return this.inner.close();
1352
+ }
1353
+
1354
+ async listTools(): Promise<RuntimeMcpTool[]> {
1355
+ const tools = await this.inner.listTools();
1356
+ return tools
1357
+ .filter((tool) => this.isAllowed(tool.name))
1358
+ .map((tool) => ({ ...tool, name: prefixedMcpToolName(this.name, tool.name) }));
1359
+ }
1360
+
1361
+ async callTool(toolName: string, args: Record<string, unknown> | null, meta?: Record<string, unknown> | null): Promise<any> {
1362
+ const unprefixed = this.unprefixToolName(toolName);
1363
+ if (!this.isAllowed(unprefixed)) {
1364
+ throw new Error(`MCP tool ${unprefixed} is not allowed for server ${this.name}`);
1365
+ }
1366
+ return await this.inner.callTool(unprefixed, args, meta);
1367
+ }
1368
+
1369
+ invalidateToolsCache(): Promise<void> {
1370
+ return this.inner.invalidateToolsCache();
1371
+ }
1372
+
1373
+ async listResources(params?: Record<string, unknown>): Promise<any> {
1374
+ const resourcesServer = this.inner as MCPServer & { listResources?: (params?: Record<string, unknown>) => Promise<any> };
1375
+ if (!resourcesServer.listResources) {
1376
+ throw new Error(`MCP server ${this.name} does not support resources`);
1377
+ }
1378
+ return await resourcesServer.listResources(params);
1379
+ }
1380
+
1381
+ async listResourceTemplates(params?: Record<string, unknown>): Promise<any> {
1382
+ const resourcesServer = this.inner as MCPServer & { listResourceTemplates?: (params?: Record<string, unknown>) => Promise<any> };
1383
+ if (!resourcesServer.listResourceTemplates) {
1384
+ throw new Error(`MCP server ${this.name} does not support resource templates`);
1385
+ }
1386
+ return await resourcesServer.listResourceTemplates(params);
1387
+ }
1388
+
1389
+ async readResource(uri: string): Promise<any> {
1390
+ const resourcesServer = this.inner as MCPServer & { readResource?: (uri: string) => Promise<any> };
1391
+ if (!resourcesServer.readResource) {
1392
+ throw new Error(`MCP server ${this.name} does not support resource reads`);
1393
+ }
1394
+ return await resourcesServer.readResource(uri);
1395
+ }
1396
+
1397
+ private isAllowed(toolName: string): boolean {
1398
+ return !this.allowedTools || this.allowedTools.has(toolName);
1399
+ }
1400
+
1401
+ private unprefixToolName(toolName: string): string {
1402
+ if (!toolName.startsWith(this.prefix)) {
1403
+ throw new Error(`MCP tool ${toolName} is missing expected ${this.name} prefix`);
1404
+ }
1405
+ return toolName.slice(this.prefix.length);
1406
+ }
1407
+ }
1408
+
1409
+ // createSandboxClient (+ withDockerNetwork / connectDockerNetwork) moved to the
1410
+ // agent-loop-free leaf ./sandbox; re-exported via `export * from "./sandbox"`.
1411
+
1412
+ export type PrepareInputOptions = {
1413
+ sandboxClient?: unknown;
1414
+ /**
1415
+ * Usable input-token budget B (window - reserved output). When set, the
1416
+ * assembled history is passed through `enforceInputBudget` so a single
1417
+ * over-budget input can never be sent — the last-resort backstop behind the
1418
+ * best-effort pre-turn compaction. Omitted (undefined) disables the guard
1419
+ * (no behaviour change for callers that don't opt in).
1420
+ */
1421
+ inputBudgetTokens?: number;
1422
+ };
1423
+
1424
+ /**
1425
+ * Apply the read-path budget guard to an assembled model input: drop the oldest
1426
+ * history at a clean turn boundary until the request fits B. Orphan-safe (only
1427
+ * cuts at user-message boundaries) and only active when a budget is supplied.
1428
+ * The trailing user message is counted against the budget but never dropped.
1429
+ */
1430
+ function guardAssembledInput(
1431
+ history: AgentInputItem[],
1432
+ trailing: AgentInputItem,
1433
+ inputBudgetTokens: number | undefined,
1434
+ ): AgentInputItem[] {
1435
+ if (typeof inputBudgetTokens !== "number" || inputBudgetTokens <= 0) {
1436
+ return [...history, trailing];
1437
+ }
1438
+ const trailingTokens = estimateItemTokens(trailing as unknown as Record<string, unknown>);
1439
+ const guarded = enforceInputBudget(
1440
+ history as unknown as Array<Record<string, unknown>>,
1441
+ inputBudgetTokens,
1442
+ trailingTokens,
1443
+ );
1444
+ if (guarded.trimmed) {
1445
+ console.warn(
1446
+ `read-path budget guard trimmed ${guarded.droppedCount} oldest history item(s) to fit input budget (${inputBudgetTokens} tokens); the over-budget input was NOT sent`,
1447
+ );
1448
+ }
1449
+ return [...(guarded.items as unknown as AgentInputItem[]), trailing];
1450
+ }
1451
+
1452
+ export async function prepareRunInput(agent: Agent<any, any>, input: AgentSegmentInput, options: PrepareInputOptions = {}): Promise<PreparedAgentInput> {
1453
+ if (input.kind === "message") {
1454
+ if (input.historyItems && input.historyItems.length > 0) {
1455
+ // Items mode: conversation truth comes from the database, the sandbox
1456
+ // recovery descriptor from its own store. The RunState blob is not
1457
+ // touched at all on this path.
1458
+ const sandboxSessionState = input.sandboxEnvelope
1459
+ ? await restoredSandboxSessionStateFromEntry(input.sandboxEnvelope, options.sandboxClient)
1460
+ : undefined;
1461
+ // Replayed conversation truth is reloaded verbatim from the database, so
1462
+ // it can contain a tool-call pairing the Responses API rejects (most
1463
+ // destructively an orphaned function_call_result with no matching
1464
+ // function_call — which 400s every turn and bricks the session until the
1465
+ // row is hand-deleted). Sanitize the in-memory copy before it reaches the
1466
+ // model so existing corruption self-heals and a future write-path race is
1467
+ // non-fatal; the stored rows are never touched.
1468
+ const sanitizedHistory = sanitizeHistoryItemsForModel(
1469
+ input.historyItems as unknown as Array<Record<string, unknown>>,
1470
+ ) as unknown as AgentInputItem[];
1471
+ return {
1472
+ // Read-path budget guard: even after the orphan sanitizer, an assembled
1473
+ // input can exceed the model window (pre-turn compaction is best-effort
1474
+ // and can no-op). Trim the oldest history at a clean turn boundary so an
1475
+ // over-budget request is never sent. No-op when no budget is supplied.
1476
+ input: guardAssembledInput(
1477
+ sanitizedHistory,
1478
+ {
1479
+ type: "message",
1480
+ role: "user",
1481
+ content: input.text,
1482
+ } as AgentInputItem,
1483
+ options.inputBudgetTokens,
1484
+ ),
1485
+ ...(sandboxSessionState ? { sandboxSessionState } : {}),
1486
+ };
1487
+ }
1488
+ // No prior state, or a cleared sentinel: start fresh. The clear sentinel
1489
+ // ({@link CLEARED_RUN_STATE_BLOB}) is not a real serialized run state — it
1490
+ // carries no $schemaVersion, so RunState.fromString would throw on it. In
1491
+ // run_state history mode this message path is the one that reads the blob
1492
+ // after a /clear, so recognizing the sentinel here is what keeps the next
1493
+ // turn working (a fresh, empty context) instead of bricking on deserialize.
1494
+ if (!input.serializedRunState || isClearedRunStateBlob(input.serializedRunState)) {
1495
+ return { input: input.text };
1496
+ }
1497
+ const state = await RunState.fromString(agent, input.serializedRunState);
1498
+ const sandboxSessionState = await restoredSandboxSessionState(state, options.sandboxClient);
1499
+ // state.history already runs the SDK's own orphan-tool-call pruning, but
1500
+ // applying the same sanitizer keeps the legacy run-state resume path under
1501
+ // one invariant with the items path and is defensive against a corrupt blob.
1502
+ const sanitizedHistory = sanitizeHistoryItemsForModel(
1503
+ state.history as unknown as Array<Record<string, unknown>>,
1504
+ ) as unknown as AgentInputItem[];
1505
+ return {
1506
+ // Read-path budget guard (see the items path above): keep an over-budget
1507
+ // resumed history off the wire by trimming the oldest turns when a budget
1508
+ // is supplied.
1509
+ input: guardAssembledInput(
1510
+ sanitizedHistory,
1511
+ {
1512
+ type: "message",
1513
+ role: "user",
1514
+ content: input.text,
1515
+ } as AgentInputItem,
1516
+ options.inputBudgetTokens,
1517
+ ),
1518
+ ...(sandboxSessionState ? { sandboxSessionState } : {}),
1519
+ serializedRunStateForSandbox: input.serializedRunState,
1520
+ };
1521
+ }
1522
+ // An approval can only be resumed against a real saved run state. If the
1523
+ // latest blob is the cleared sentinel the awaiting turn was wiped (the API
1524
+ // refuses clear in requires_action, so this is a defensive guard) — fail with
1525
+ // an honest message instead of the cryptic SDK "missing schema version".
1526
+ if (isClearedRunStateBlob(input.serializedRunState)) {
1527
+ throw new Error("Cannot resume an approval: the session context was cleared, so the awaiting run state no longer exists.");
1528
+ }
1529
+ const state = await RunState.fromString(agent, input.serializedRunState);
1530
+ const interruptions = state.getInterruptions();
1531
+ const target = interruptions.find((item: any) => approvalIdentifier(item) === input.approvalId);
1532
+ if (!target) {
1533
+ throw new Error(`Approval not found in saved run state: ${input.approvalId}`);
1534
+ }
1535
+ if (input.decision === "approve") {
1536
+ state.approve(target as any);
1537
+ } else {
1538
+ state.reject(target as any, input.message ? { message: input.message } : undefined);
1539
+ }
1540
+ return { input: state };
1541
+ }
1542
+
1543
+ export type RunAgentStreamOptions = {
1544
+ sandboxClient?: unknown;
1545
+ sandboxEnvironment?: Record<string, string>;
1546
+ onRuntimeEvent?: (event: NormalizedRuntimeEvent) => Promise<void> | void;
1547
+ // OWNERSHIP INVERSION (P1.2): an externally-owned, already-live sandbox
1548
+ // session resolved by the per-turn resume-by-id path. When present,
1549
+ // runAgentStream does NOT build (or resume, or discard) a client — it threads
1550
+ // these straight into runOptions.sandbox as a NON-OWNED session. The SDK
1551
+ // registers a provided session non-owned (manager.js) and NEVER reaps it on a
1552
+ // normal finish (proven by spikes/sdk-keystone) — that is the keystone: the
1553
+ // one box survives across turns. Mutually exclusive with the per-run
1554
+ // createSandboxClient path (the owned branch takes precedence when both set).
1555
+ // Agent-dependent decorators (file-downloads, lifecycle/repo-clone hooks) are
1556
+ // re-applied around the resumed client here; the live `session`/`sessionState`
1557
+ // carry the box, so no create()/resume() is re-invoked inside run().
1558
+ ownedSandbox?: {
1559
+ client: unknown; // built by the per-turn resume path (the raw provider client)
1560
+ session: unknown; // SandboxSessionLike — the live, NON-OWNED handle (never reaped)
1561
+ sessionState?: unknown; // SandboxSessionState the box was resumed from
1562
+ // The UN-PROXIED established box for platform setup (lifecycle hooks + file
1563
+ // resource materialization). `session` may be the mid-turn routing proxy whose
1564
+ // every exec re-reads the active pointer — platform-initiated setup must NOT
1565
+ // follow a swap onto a connected machine (the user's real computer), so it
1566
+ // runs against this pinned handle instead. Absent -> falls back to `session`.
1567
+ setupSession?: unknown;
1568
+ };
1569
+ // A per-turn model-input filter chained AFTER the provider-item-id strip.
1570
+ // Used by the genesis-title injection to prepend a hidden, NON-PERSISTED
1571
+ // directive: a callModelInputFilter mutates only `modelData.input` for each
1572
+ // model call and never touches `state.history`/`originalInput`, so the
1573
+ // reconcile dual-write never sees it.
1574
+ callModelInputFilter?: CallModelInputFilter;
1575
+ };
1576
+
1577
+ // One-shot directive appended to the agent's system prompt on the genesis turn
1578
+ // (see buildOpenGeniAgent's genesisTitleHint). Delivered through the
1579
+ // authoritative instructions channel so the model reliably obeys; references
1580
+ // the prefixed tool name the agent actually sees (opengeni__set_session_title).
1581
+ // Appended after the non-bypassable core so a white-label persona can't drop it.
1582
+ export const GENESIS_TITLE_DIRECTIVE =
1583
+ "This is the first turn of a new session. Before responding to the user, call the opengeni__set_session_title tool with a concise 3-7 word title that summarizes what this session is about, then address the user's request normally.";
1584
+
1585
+ /**
1586
+ * callModelInputFilter that removes provider-assigned item ids (rs_/msg_/fc_…)
1587
+ * from every input item immediately before each model call. Responses-API
1588
+ * requests that carry item ids are resolved against the provider's stored
1589
+ * responses, and that store is not durable enough to anchor long runs on: a
1590
+ * response that streamed successfully can be missing from the store on the
1591
+ * very next call, which then fails with 400 "Item with id ... not found"
1592
+ * (observed live on Azure OpenAI mid-turn). All item content — including the
1593
+ * encrypted reasoning payload carried in providerData when
1594
+ * `openaiReasoningEncryptedContent` is on — is sent inline, so the ids add
1595
+ * fragility without adding information. Pairing fields (`call_id`/`callId`)
1596
+ * are separate properties and stay untouched; items are cloned, never mutated.
1597
+ */
1598
+ export const stripProviderItemIdsFilter: CallModelInputFilter = ({ modelData }) => ({
1599
+ ...modelData,
1600
+ input: modelData.input.map((item) => {
1601
+ if (item && typeof item === "object" && "id" in item) {
1602
+ const { id: _id, ...rest } = item as Record<string, unknown>;
1603
+ return rest as AgentInputItem;
1604
+ }
1605
+ return item;
1606
+ }),
1607
+ });
1608
+
1609
+ /**
1610
+ * callModelInputFilter that normalizes every `computer_call` carrying BOTH
1611
+ * `action` and `actions` down to EXACTLY ONE (keeps `actions`, drops `action`).
1612
+ * The Azure computer-use endpoint rejects a request whose computer_call has
1613
+ * both with `400 Computer call input must include exactly one of `action` or
1614
+ * `actions``; and (live-proven against gpt-5.5's GA computer tool) it also
1615
+ * rejects the `action`-only form, accepting ONLY the batched plural `actions`.
1616
+ * The SDK 0.11.6 schema allows both, so a freshly-emitted
1617
+ * screenshot call carries the redundant pair. This filter runs before EVERY
1618
+ * model call — the turn-start history replay AND every mid-turn follow-up — so
1619
+ * it covers the just-emitted (non-replayed) computer_call on the same turn,
1620
+ * which the turn-start `prepareRunInput` sanitizer never sees. Items are cloned,
1621
+ * never mutated.
1622
+ */
1623
+ export const normalizeComputerCallsFilter: CallModelInputFilter = ({ modelData }) => ({
1624
+ ...modelData,
1625
+ input: normalizeComputerCallActions(
1626
+ modelData.input as unknown as Array<Record<string, unknown>>,
1627
+ ) as unknown as AgentInputItem[],
1628
+ });
1629
+
1630
+ /**
1631
+ * Compose a list of callModelInputFilters into one, applied left-to-right so
1632
+ * each sees the prior filter's output.
1633
+ */
1634
+ function composeCallModelInputFilters(filters: CallModelInputFilter[]): CallModelInputFilter {
1635
+ return async (args) => {
1636
+ let modelData = args.modelData;
1637
+ for (const filter of filters) {
1638
+ modelData = await filter({ ...args, modelData });
1639
+ }
1640
+ return modelData;
1641
+ };
1642
+ }
1643
+
1644
+ /**
1645
+ * The model-input filter applied before every model call. The computer_call
1646
+ * action/actions normalizer is ALWAYS on (the Azure endpoint 400s without it);
1647
+ * the provider-item-id strip is layered on top when the configured policy
1648
+ * selects it.
1649
+ */
1650
+ export function callModelInputFilterForSettings(settings: Settings): CallModelInputFilter | undefined {
1651
+ const filters: CallModelInputFilter[] = [normalizeComputerCallsFilter];
1652
+ if (settings.openaiProviderItemIds === "strip") {
1653
+ filters.push(stripProviderItemIdsFilter);
1654
+ }
1655
+ return composeCallModelInputFilters(filters);
1656
+ }
1657
+
1658
+ export async function runAgentStream(agent: Agent<any, any>, input: PreparedAgentInput | string | RunState<any, any>, settings: Settings, overrides: RunAgentStreamOptions = {}) {
1659
+ const prepared: PreparedAgentInput = typeof input === "string" || input instanceof RunState ? { input } : input;
1660
+ const environment = overrides.sandboxEnvironment ?? collectSandboxEnvironment(settings);
1661
+
1662
+ // OWNED PATH (P1.2 ownership inversion): the per-turn resume path injected a
1663
+ // live, externally-owned box. We thread the live `session` straight into
1664
+ // runOptions.sandbox so the SDK registers it NON-OWNED and never reaps it on
1665
+ // a normal finish (the keystone). We re-apply ONLY the agent-dependent
1666
+ // decorators (file-downloads + lifecycle/repo-clone hooks) around the resumed
1667
+ // client — the manifest-refresh-on-resume wrap is a no-op when a live
1668
+ // `session` is supplied (resume is not re-invoked). This branch is reached
1669
+ // ONLY when sandboxOwnershipEnabled gated the activity into resolving a box;
1670
+ // with the flag off the activity never sets `ownedSandbox` and this whole
1671
+ // block is skipped (byte-for-byte the legacy path).
1672
+ if (overrides.ownedSandbox) {
1673
+ const { client: ownedClient, session, sessionState } = overrides.ownedSandbox;
1674
+ // Platform setup (hooks + file materialization) execs against the UN-PROXIED
1675
+ // established box when the caller pinned one — never through the routing proxy,
1676
+ // whose per-op pointer re-read could land these execs on a machine swapped in
1677
+ // mid-turn.
1678
+ const setupSession = (overrides.ownedSandbox.setupSession ?? session) as SandboxSessionLike;
1679
+ const runAs = sandboxRunAs(settings);
1680
+ const fileDownloads = sandboxFileDownloadsForAgent(agent);
1681
+ const resourceClient = fileDownloads.length > 0
1682
+ ? withSandboxFileDownloads(ownedClient as SandboxClient, fileDownloads, {
1683
+ ...(overrides.onRuntimeEvent ? { onRuntimeEvent: overrides.onRuntimeEvent } : {}),
1684
+ ...(runAs ? { runAs } : {}),
1685
+ })
1686
+ : (ownedClient as SandboxClient);
1687
+ // TOKEN-BROKER (B1): the per-turn git token seed, forwarded OFF-MANIFEST so the
1688
+ // repository-clone hook seeds it to the box's token file before the clone.
1689
+ const ownedGitTokenSeed = gitTokenSeedForAgent(agent);
1690
+ const ownedHooks = [
1691
+ ...sandboxLifecycleHooksForIds(sandboxLifecycleHookIds(settings)),
1692
+ ...sandboxRepositoryCloneHooksForAgent(agent),
1693
+ ];
1694
+ const ownedHookContext: SandboxLifecycleHookContext = {
1695
+ environment,
1696
+ ...(overrides.onRuntimeEvent ? { onRuntimeEvent: overrides.onRuntimeEvent } : {}),
1697
+ ...(runAs ? { runAs } : {}),
1698
+ ...(ownedGitTokenSeed ? { gitTokenSeed: ownedGitTokenSeed } : {}),
1699
+ };
1700
+ // OWNED-PATH HOOKS: the SDK NEVER calls client.create/resume when handed a live
1701
+ // provided session (SandboxRuntimeManager uses `sandboxConfig.session` directly),
1702
+ // so the withSandboxLifecycleHooks decoration below can never fire on this branch —
1703
+ // it only wraps create/resume. Run the beforeAgentStart hooks directly against the
1704
+ // provided box, once per turn, BEFORE the run starts: this is what executes the
1705
+ // repository-clone hook (which also seeds the B1 askpass + token file) and the
1706
+ // azure-cli-login hook on lease-owned boxes. Re-running on a warm box is safe by
1707
+ // construction: clone skips when the target is already materialized, the token
1708
+ // seed OVERWRITES the file (the desired per-turn refresh), and az login is
1709
+ // idempotent. A turn resumed after preemption re-enters here and re-seeds the
1710
+ // freshly minted token — which is exactly what a >1h-old warm box needs.
1711
+ // EXCEPT on a connected machine (effective backend "selfhosted"): the box is the
1712
+ // user's REAL computer — the platform must not run setup against it (the clone
1713
+ // hooks are already empty there; this keeps az login off it too).
1714
+ if (agentActiveSandboxBackend.get(agent) !== "selfhosted") {
1715
+ await runBeforeAgentStartHooks(setupSession, ownedHooks, ownedHookContext);
1716
+ // FILE RESOURCES: withSandboxFileDownloads below has the IDENTICAL provided-
1717
+ // session blind spot (it too wraps only create/resume), so signed-URL file
1718
+ // materialization must also run directly against the pinned box. The download
1719
+ // command is idempotent (skips an existing file) and atomic (tmp + rename),
1720
+ // so the per-turn re-run is safe; the turn re-signs URLs each run, so a
1721
+ // re-warmed box always gets fresh links.
1722
+ if (fileDownloads.length > 0) {
1723
+ await materializeSandboxFileDownloads(setupSession, fileDownloads, {
1724
+ ...(overrides.onRuntimeEvent ? { onRuntimeEvent: overrides.onRuntimeEvent } : {}),
1725
+ ...(runAs ? { runAs } : {}),
1726
+ });
1727
+ }
1728
+ }
1729
+ // Keep the decoration as a safety net for any session the SDK does create/resume
1730
+ // through the client during this run (it is inert for the provided session).
1731
+ const decoratedClient = withSandboxLifecycleHooks(resourceClient, ownedHooks, ownedHookContext);
1732
+ const ownedFilter = composeCallModelInputFilters(
1733
+ [callModelInputFilterForSettings(settings), overrides.callModelInputFilter].filter(
1734
+ (f): f is CallModelInputFilter => Boolean(f),
1735
+ ),
1736
+ );
1737
+ const ownedRunOptions: Parameters<typeof run>[2] = {
1738
+ stream: true,
1739
+ maxTurns: settings.agentMaxModelCallsPerTurn,
1740
+ callModelInputFilter: ownedFilter,
1741
+ };
1742
+ ownedRunOptions.sandbox = {
1743
+ client: decoratedClient,
1744
+ session,
1745
+ ...(sessionState ? { sessionState } : {}),
1746
+ } as SandboxRunConfig;
1747
+ return await runScopedRunner(settings).run(agent, prepared.input, ownedRunOptions);
1748
+ }
1749
+
1750
+ const rawClient = overrides.sandboxClient ?? createSandboxClient(settings, environment);
1751
+ const refreshedClient = rawClient
1752
+ ? withManifestRefreshOnResume(rawClient as SandboxClient, (agent as { defaultManifest?: Manifest }).defaultManifest)
1753
+ : undefined;
1754
+ const runAs = sandboxRunAs(settings);
1755
+ const fileDownloads = sandboxFileDownloadsForAgent(agent);
1756
+ const resourceClient = refreshedClient && fileDownloads.length > 0
1757
+ ? withSandboxFileDownloads(refreshedClient, fileDownloads, {
1758
+ ...(overrides.onRuntimeEvent ? { onRuntimeEvent: overrides.onRuntimeEvent } : {}),
1759
+ ...(runAs ? { runAs } : {}),
1760
+ })
1761
+ : refreshedClient;
1762
+ // TOKEN-BROKER (B1): the per-turn git token seed, forwarded OFF-MANIFEST so the
1763
+ // repository-clone hook seeds it to the box's token file before the clone.
1764
+ const gitTokenSeed = gitTokenSeedForAgent(agent);
1765
+ const client = resourceClient
1766
+ ? withSandboxLifecycleHooks(resourceClient, [
1767
+ ...sandboxLifecycleHooksForIds(sandboxLifecycleHookIds(settings)),
1768
+ ...sandboxRepositoryCloneHooksForAgent(agent),
1769
+ ], {
1770
+ environment,
1771
+ ...(overrides.onRuntimeEvent ? { onRuntimeEvent: overrides.onRuntimeEvent } : {}),
1772
+ ...(runAs ? { runAs } : {}),
1773
+ ...(gitTokenSeed ? { gitTokenSeed } : {}),
1774
+ })
1775
+ : undefined;
1776
+ const sandboxSessionState = prepared.sandboxSessionState
1777
+ ?? (prepared.serializedRunStateForSandbox && client
1778
+ ? await restoredSandboxSessionState(await RunState.fromString(agent, prepared.serializedRunStateForSandbox), client)
1779
+ : undefined);
1780
+ // Strip provider item ids first, then apply any per-turn filter (genesis
1781
+ // title directive). Composed left-to-right so the directive lands on the
1782
+ // already-id-stripped input. A callModelInputFilter only shapes the per-call
1783
+ // model input, never the persisted run-state history.
1784
+ const callModelInputFilter = composeCallModelInputFilters(
1785
+ [callModelInputFilterForSettings(settings), overrides.callModelInputFilter].filter(
1786
+ (f): f is CallModelInputFilter => Boolean(f),
1787
+ ),
1788
+ );
1789
+ const runOptions: Parameters<typeof run>[2] = {
1790
+ stream: true,
1791
+ maxTurns: settings.agentMaxModelCallsPerTurn,
1792
+ // Strip provider-assigned item ids from every model call (turn-start
1793
+ // history replay AND mid-turn follow-ups) so requests never depend on the
1794
+ // provider's server-side response store. A stored response can vanish
1795
+ // between two calls of the same turn, failing the run with 400 "Item with
1796
+ // id 'rs_…' not found"; with the ids gone the request is self-contained.
1797
+ callModelInputFilter,
1798
+ };
1799
+ void settings.disableOpenaiTracing;
1800
+ if (client) {
1801
+ runOptions.sandbox = {
1802
+ client,
1803
+ ...(sandboxSessionState ? { sessionState: sandboxSessionState } : {}),
1804
+ } as SandboxRunConfig;
1805
+ }
1806
+ return await runScopedRunner(settings).run(agent, prepared.input, runOptions);
1807
+ }
1808
+
1809
+ /**
1810
+ * A per-run `Runner` whose `modelProvider` is built from THIS turn's settings.
1811
+ *
1812
+ * The standalone `run()` uses a process-global default Runner whose modelProvider
1813
+ * is the lazy global default (whatever the last `configureOpenAI` /
1814
+ * `setDefaultModelProvider` installed). The worker runs ~100 activities
1815
+ * concurrently in one process, so a concurrently-starting turn for a DIFFERENT
1816
+ * workspace can overwrite that global between this turn's `configure` and a
1817
+ * per-call `getModel()` during the stream — leaving the global router with no
1818
+ * codex provider and throwing CodexSubscriptionUnavailableError on a
1819
+ * `codex/<slug>` name re-resolution (the SandboxAgent/Modal path drops the Model
1820
+ * instance and re-resolves by NAME). Pinning a run-scoped Runner makes the
1821
+ * mutable global irrelevant to correctness: each concurrent turn resolves names
1822
+ * against its OWN settings (which carry the codex-subscription provider via
1823
+ * withCodexProvider for an active workspace, and the registry providers). The
1824
+ * Runner inherits the SDK's default config for everything else, identical to the
1825
+ * default runner. setDefaultModelProvider remains only as a boot-time fallback.
1826
+ */
1827
+ function runScopedRunner(settings: Settings): Runner {
1828
+ return new Runner({ modelProvider: new MultiProviderModelProvider(settings) });
1829
+ }
1830
+
1831
+ export { MaxTurnsExceededError } from "@openai/agents";
1832
+
1833
+ /**
1834
+ * Detects the agents SDK per-segment turn cap. The cap is a pacing valve, not
1835
+ * a session failure: callers should end the segment gracefully (idle) so an
1836
+ * active goal's continuation loop -- or a follow-up user message -- resumes
1837
+ * the work. When the SDK attached the run state at the moment the cap hit,
1838
+ * the serialized form is returned so the resumed turn keeps full context.
1839
+ */
1840
+ export function maxTurnsExceededRunState(error: unknown): { serializedRunState: string | null } | null {
1841
+ if (!(error instanceof MaxTurnsExceededError)) {
1842
+ return null;
1843
+ }
1844
+ try {
1845
+ return { serializedRunState: error.state ? error.state.toString() : null };
1846
+ } catch {
1847
+ return { serializedRunState: null };
1848
+ }
1849
+ }
1850
+
1851
+ /**
1852
+ * Serialized run state attached to any agents SDK error, when present.
1853
+ * Provider failures usually surface as raw API errors without state; callers
1854
+ * must treat a null here as "resume from the previous snapshot" rather than
1855
+ * an error.
1856
+ */
1857
+ export function agentsErrorRunState(error: unknown): string | null {
1858
+ if (!(error instanceof AgentsError) || !error.state) {
1859
+ return null;
1860
+ }
1861
+ try {
1862
+ return error.state.toString();
1863
+ } catch {
1864
+ return null;
1865
+ }
1866
+ }
1867
+
1868
+ export function withManifestRefreshOnResume(client: SandboxClient, targetManifest: Manifest | undefined): SandboxClient {
1869
+ if (!targetManifest || !client.resume) {
1870
+ return client;
1871
+ }
1872
+ return {
1873
+ backendId: client.backendId,
1874
+ ...(client.supportsDefaultOptions !== undefined ? { supportsDefaultOptions: client.supportsDefaultOptions } : {}),
1875
+ ...(client.create ? { create: async (...args: any[]) => await (client.create as any)(...args) } : {}),
1876
+ resume: async (state: SandboxSessionState) => {
1877
+ const session = await client.resume!(state);
1878
+ await applyMissingManifestEntries(session, targetManifest);
1879
+ return session;
1880
+ },
1881
+ ...(client.delete ? { delete: async (state: SandboxSessionState) => await client.delete!(state) } : {}),
1882
+ ...(client.serializeSessionState ? { serializeSessionState: async (state: SandboxSessionState, options) => await client.serializeSessionState!(state, options) } : {}),
1883
+ ...(client.canPersistOwnedSessionState ? { canPersistOwnedSessionState: async (state: SandboxSessionState) => await client.canPersistOwnedSessionState!(state) } : {}),
1884
+ ...(client.canReusePreservedOwnedSession ? { canReusePreservedOwnedSession: async (state: SandboxSessionState) => await client.canReusePreservedOwnedSession!(state) } : {}),
1885
+ ...(client.deserializeSessionState ? { deserializeSessionState: async (state: Record<string, unknown>) => await client.deserializeSessionState!(state) } : {}),
1886
+ };
1887
+ }
1888
+
1889
+ export async function applyMissingManifestEntries(session: SandboxSessionLike, targetManifest: Manifest): Promise<void> {
1890
+ const currentManifestValue = (session as { state?: { manifest?: Manifest | { root?: string; entries?: Record<string, any>; environment?: Record<string, any> } } }).state?.manifest;
1891
+ const currentManifest = currentManifestValue ? ensureManifest(currentManifestValue) : undefined;
1892
+ const target = ensureManifest(targetManifest);
1893
+ if (!currentManifest) {
1894
+ if (Object.keys(target.entries).length === 0) {
1895
+ return;
1896
+ }
1897
+ throw new Error("Resumed sandbox session cannot apply new manifest entries because current manifest state is unavailable");
1898
+ }
1899
+ if (!session.applyManifest && !session.materializeEntry) {
1900
+ if (Object.keys(target.entries).length === 0) {
1901
+ return;
1902
+ }
1903
+ throw new Error("Resumed sandbox session cannot apply new manifest entries because it does not support applyManifest() or materializeEntry()");
1904
+ }
1905
+ if (Object.keys(target.entries).length === 0) {
1906
+ return;
1907
+ }
1908
+ if (currentManifest.root !== target.root) {
1909
+ throw new Error("Cannot apply per-turn resources to a sandbox with a different manifest root");
1910
+ }
1911
+ const entries: Record<string, any> = {};
1912
+ for (const [path, entry] of Object.entries(target.entries)) {
1913
+ const existing = (currentManifest.entries as Record<string, unknown>)[path];
1914
+ if (existing === undefined) {
1915
+ entries[path] = entry;
1916
+ continue;
1917
+ }
1918
+ if (stableJson(existing) !== stableJson(entry)) {
1919
+ throw new Error(`Cannot replace existing sandbox manifest entry: ${path}`);
1920
+ }
1921
+ }
1922
+ const environmentChanged = stableJson(currentManifest.environment) !== stableJson(target.environment);
1923
+ if (environmentChanged && !session.applyManifest) {
1924
+ throw new Error("Resumed sandbox session cannot refresh manifest environment because it does not support applyManifest()");
1925
+ }
1926
+ if (Object.keys(entries).length === 0 && !environmentChanged) {
1927
+ return;
1928
+ }
1929
+ // Carry path grants through manifest rebuilds: since @openai/agents 0.11.0
1930
+ // they gate local source materialization, and run states saved before the
1931
+ // upgrade have manifests without grants.
1932
+ const extraPathGrants = mergePathGrants(currentManifest.extraPathGrants, target.extraPathGrants);
1933
+ const delta = new Manifest({
1934
+ root: currentManifest.root,
1935
+ entries,
1936
+ environment: target.environment,
1937
+ ...(extraPathGrants.length ? { extraPathGrants } : {}),
1938
+ });
1939
+ if (session.applyManifest) {
1940
+ await session.applyManifest(delta);
1941
+ } else {
1942
+ for (const [path, entry] of Object.entries(entries)) {
1943
+ await session.materializeEntry!({ path, entry });
1944
+ }
1945
+ }
1946
+ (session as { state?: { manifest?: Manifest } }).state!.manifest = new Manifest({
1947
+ root: currentManifest.root,
1948
+ environment: environmentChanged ? target.environment : currentManifest.environment,
1949
+ entries: {
1950
+ ...currentManifest.entries,
1951
+ ...entries,
1952
+ },
1953
+ ...(extraPathGrants.length ? { extraPathGrants } : {}),
1954
+ });
1955
+ }
1956
+
1957
+ function mergePathGrants(
1958
+ current: Manifest["extraPathGrants"] | undefined,
1959
+ target: Manifest["extraPathGrants"] | undefined,
1960
+ ): Manifest["extraPathGrants"] {
1961
+ const merged = new Map<string, Manifest["extraPathGrants"][number]>();
1962
+ for (const grant of [...(current ?? []), ...(target ?? [])]) {
1963
+ merged.set(grant.path, grant);
1964
+ }
1965
+ return [...merged.values()];
1966
+ }
1967
+
1968
+ export function withSandboxFileDownloads(
1969
+ client: SandboxClient,
1970
+ downloads: SandboxFileDownload[],
1971
+ context: Pick<SandboxLifecycleHookContext, "onRuntimeEvent" | "runAs"> = {},
1972
+ ): SandboxClient {
1973
+ const normalizedDownloads = normalizeSandboxFileDownloads(downloads);
1974
+ if (normalizedDownloads.length === 0) {
1975
+ return client;
1976
+ }
1977
+ const completed = new WeakSet<object>();
1978
+ const wrapSession = async <T extends SandboxSessionLike>(session: T): Promise<T> => {
1979
+ if (typeof session === "object" && session !== null && !completed.has(session)) {
1980
+ await materializeSandboxFileDownloads(session, normalizedDownloads, context);
1981
+ completed.add(session);
1982
+ }
1983
+ return session;
1984
+ };
1985
+ return {
1986
+ backendId: client.backendId,
1987
+ ...(client.supportsDefaultOptions !== undefined ? { supportsDefaultOptions: client.supportsDefaultOptions } : {}),
1988
+ ...(client.create ? { create: async (...args: any[]) => await wrapSession(await (client.create as any)(...args)) } : {}),
1989
+ ...(client.resume ? { resume: async (state: SandboxSessionState) => await wrapSession(await client.resume!(state)) } : {}),
1990
+ ...(client.delete ? { delete: async (state: SandboxSessionState) => await client.delete!(state) } : {}),
1991
+ ...(client.serializeSessionState ? { serializeSessionState: async (state: SandboxSessionState, options) => await client.serializeSessionState!(state, options) } : {}),
1992
+ ...(client.canPersistOwnedSessionState ? { canPersistOwnedSessionState: async (state: SandboxSessionState) => await client.canPersistOwnedSessionState!(state) } : {}),
1993
+ ...(client.canReusePreservedOwnedSession ? { canReusePreservedOwnedSession: async (state: SandboxSessionState) => await client.canReusePreservedOwnedSession!(state) } : {}),
1994
+ ...(client.deserializeSessionState ? { deserializeSessionState: async (state: Record<string, unknown>) => await client.deserializeSessionState!(state) } : {}),
1995
+ };
1996
+ }
1997
+
1998
+ export async function materializeSandboxFileDownloads(
1999
+ session: SandboxSessionLike,
2000
+ downloads: SandboxFileDownload[],
2001
+ context: Pick<SandboxLifecycleHookContext, "onRuntimeEvent" | "runAs"> = {},
2002
+ ): Promise<void> {
2003
+ const normalizedDownloads = normalizeSandboxFileDownloads(downloads);
2004
+ if (normalizedDownloads.length === 0) {
2005
+ return;
2006
+ }
2007
+ if (!session.exec && !session.execCommand) {
2008
+ throw new Error("Sandbox file download materialization requires command execution support");
2009
+ }
2010
+ for (const download of normalizedDownloads) {
2011
+ const targetPath = sandboxDownloadTargetPath(download);
2012
+ const payload = {
2013
+ fileId: download.fileId,
2014
+ path: targetPath,
2015
+ sizeBytes: download.sizeBytes ?? null,
2016
+ expiresAt: download.expiresAt ? new Date(download.expiresAt).toISOString() : null,
2017
+ };
2018
+ await context.onRuntimeEvent?.({ type: "sandbox.operation.started", payload: { name: "file-resource-download", ...payload } });
2019
+ try {
2020
+ const result = session.exec
2021
+ ? await session.exec({
2022
+ cmd: sandboxFileDownloadCommand(download, targetPath),
2023
+ workdir: "/workspace",
2024
+ ...(context.runAs ? { runAs: context.runAs } : {}),
2025
+ yieldTimeMs: SANDBOX_LIFECYCLE_COMMAND_TIMEOUT_MS,
2026
+ maxOutputTokens: 20_000,
2027
+ })
2028
+ : await session.execCommand!({
2029
+ cmd: sandboxFileDownloadCommand(download, targetPath),
2030
+ workdir: "/workspace",
2031
+ ...(context.runAs ? { runAs: context.runAs } : {}),
2032
+ yieldTimeMs: SANDBOX_LIFECYCLE_COMMAND_TIMEOUT_MS,
2033
+ maxOutputTokens: 20_000,
2034
+ });
2035
+ assertSandboxCommandSucceeded(result, `Sandbox file resource download ${download.fileId}`);
2036
+ await context.onRuntimeEvent?.({ type: "sandbox.operation.completed", payload: { name: "file-resource-download", ...payload } });
2037
+ } catch (error) {
2038
+ await context.onRuntimeEvent?.({
2039
+ type: "sandbox.operation.failed",
2040
+ payload: {
2041
+ name: "file-resource-download",
2042
+ ...payload,
2043
+ error: error instanceof Error ? error.message : String(error),
2044
+ },
2045
+ });
2046
+ throw error;
2047
+ }
2048
+ }
2049
+ }
2050
+
2051
+ export function sandboxFileDownloadsForAgent(agent: unknown): SandboxFileDownload[] {
2052
+ return typeof agent === "object" && agent !== null
2053
+ ? [...(agentFileDownloads.get(agent) ?? [])]
2054
+ : [];
2055
+ }
2056
+
2057
+ function ensureManifest(manifest: Manifest | { root?: string; entries?: Record<string, any>; environment?: Record<string, any>; extraPathGrants?: any[] }): Manifest {
2058
+ if (manifest instanceof Manifest && typeof manifest.mountTargetsForMaterialization === "function") {
2059
+ return manifest;
2060
+ }
2061
+ return new Manifest({
2062
+ ...(manifest.root ? { root: manifest.root } : {}),
2063
+ entries: manifest.entries ?? {},
2064
+ environment: manifest.environment ?? {},
2065
+ ...(manifest.extraPathGrants?.length ? { extraPathGrants: manifest.extraPathGrants } : {}),
2066
+ });
2067
+ }
2068
+
2069
+ /** Coerce the various binary shapes a tool-output image `data` field can take into
2070
+ * a Uint8Array. Handles a live `Uint8Array`, a plain number[] , and the
2071
+ * object-of-numbers (`{"0":137,"1":80,…}`) that a `Uint8Array` degrades into after
2072
+ * a JSON round-trip — the exact 10x-bloat shape this normalizer exists to kill. */
2073
+ function toImageBytes(data: unknown): Uint8Array | null {
2074
+ if (data instanceof Uint8Array) {
2075
+ return data;
2076
+ }
2077
+ if (Array.isArray(data)) {
2078
+ return data.every((n) => typeof n === "number") ? Uint8Array.from(data as number[]) : null;
2079
+ }
2080
+ if (data && typeof data === "object") {
2081
+ const values = Object.values(data as Record<string, unknown>);
2082
+ if (values.length > 0 && values.every((n) => typeof n === "number")) {
2083
+ return Uint8Array.from(values as number[]);
2084
+ }
2085
+ }
2086
+ return null;
2087
+ }
2088
+
2089
+ /** Compact a structured image tool output — the SDK's `{type:'image', image:{data,mediaType}}`
2090
+ * shape (produced by the codex-path `computer_screenshot` function tool) OR the already-
2091
+ * normalized protocol `{type:'input_image', image:'data:…'}` item — into a `data:<mt>;base64,…`
2092
+ * string. Returns null when `value` is not an image output. */
2093
+ function structuredImageToDataUrl(value: unknown): string | null {
2094
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
2095
+ return null;
2096
+ }
2097
+ const v = value as { type?: unknown; image?: unknown };
2098
+ if (v.type === "input_image") {
2099
+ // Protocol item: `image` is already a `data:…` (or plain URL) string.
2100
+ return typeof v.image === "string" && v.image.length > 0 ? v.image : null;
2101
+ }
2102
+ if (v.type !== "image" || !v.image || typeof v.image !== "object") {
2103
+ return null;
2104
+ }
2105
+ const image = v.image as { data?: unknown; mediaType?: unknown; url?: unknown };
2106
+ if (typeof image.url === "string" && image.url.length > 0) {
2107
+ return image.url;
2108
+ }
2109
+ const mediaType = typeof image.mediaType === "string" && image.mediaType.length > 0 ? image.mediaType : "image/png";
2110
+ if (typeof image.data === "string") {
2111
+ return image.data.startsWith("data:") ? image.data : `data:${mediaType};base64,${image.data}`;
2112
+ }
2113
+ const bytes = toImageBytes(image.data);
2114
+ return bytes ? `data:${mediaType};base64,${Buffer.from(bytes).toString("base64")}` : null;
2115
+ }
2116
+
2117
+ /**
2118
+ * Compact a tool-call output for the `agent.toolCall.output` SESSION EVENT so it
2119
+ * never carries a raw binary payload. The codex-path `computer_screenshot` function
2120
+ * tool returns a structured `{type:'image', image:{data: Uint8Array, mediaType}}`;
2121
+ * captured verbatim its `Uint8Array` JSON-serializes as an object-of-numbers (~12.7MB
2122
+ * per screenshot in session_events — ~10x the base64 form). This mirrors the desktop
2123
+ * screenshot to the SAME compact `data:<mediaType>;base64,…` STRING the HOSTED
2124
+ * `computer_call` event already carries (agents-core sets its output to that data-URL),
2125
+ * so both computer-use transports emit one representation. The full data-URL is kept
2126
+ * (not truncated) because the web timeline RENDERS the screenshot from this event
2127
+ * payload — packages/react/src/timeline/tool-renderers.tsx ComputerCallRenderer
2128
+ * (`out.startsWith("data:image")` → <ScreenshotFigure src={out}/>) and ViewImageRenderer.
2129
+ * Non-image outputs (text strings, MCP `{isError,content}` objects, hosted computer_call
2130
+ * data-URL strings) pass through unchanged.
2131
+ */
2132
+ export function normalizeToolOutputForEvent(output: unknown): unknown {
2133
+ const single = structuredImageToDataUrl(output);
2134
+ if (single !== null) {
2135
+ return single;
2136
+ }
2137
+ if (Array.isArray(output)) {
2138
+ const normalized = output.map((el) => structuredImageToDataUrl(el) ?? el);
2139
+ // A lone image content item unwraps to the bare data-URL string the timeline
2140
+ // image renderers expect; a mixed/multi array keeps its (now-compact) shape.
2141
+ if (normalized.length === 1 && typeof normalized[0] === "string") {
2142
+ return normalized[0];
2143
+ }
2144
+ return normalized;
2145
+ }
2146
+ return output;
2147
+ }
2148
+
2149
+ export function normalizeSdkEvent(event: RunStreamEvent): NormalizedRuntimeEvent[] {
2150
+ const out: NormalizedRuntimeEvent[] = [];
2151
+ if (event.type === "raw_model_stream_event") {
2152
+ const data = (event as any).data;
2153
+ if (data?.type === "output_text_delta" && typeof data.delta === "string") {
2154
+ out.push({ type: "agent.message.delta", payload: { text: data.delta } });
2155
+ return out;
2156
+ }
2157
+ }
2158
+ if (isOpenAIResponsesRawModelStreamEvent(event)) {
2159
+ const raw = (event as any).data?.event;
2160
+ if (raw?.type === "response.reasoning_summary_text.delta" && typeof raw.delta === "string") {
2161
+ out.push({ type: "agent.reasoning.delta", payload: { text: raw.delta } });
2162
+ }
2163
+ return out;
2164
+ }
2165
+ if (event.type === "agent_updated_stream_event") {
2166
+ out.push({ type: "agent.updated", payload: { agent: (event as any).agent?.name ?? null } });
2167
+ return out;
2168
+ }
2169
+ if (event.type !== "run_item_stream_event") {
2170
+ return out;
2171
+ }
2172
+ const item = (event as any).item;
2173
+ if (!item) {
2174
+ return out;
2175
+ }
2176
+ if (item.type === "tool_call_item") {
2177
+ const raw = item.rawItem ?? {};
2178
+ out.push({
2179
+ type: "agent.toolCall.created",
2180
+ payload: {
2181
+ id: raw.callId ?? raw.id ?? item.id ?? null,
2182
+ name: raw.name ?? raw.type ?? "tool",
2183
+ arguments: raw.arguments ?? raw.input ?? null,
2184
+ raw,
2185
+ },
2186
+ });
2187
+ } else if (item.type === "tool_call_output_item") {
2188
+ out.push({
2189
+ type: "agent.toolCall.output",
2190
+ payload: {
2191
+ id: item.rawItem?.callId ?? item.id ?? null,
2192
+ // Compact any structured/binary image output to a data-URL string so a
2193
+ // screenshot never bloats session_events ~10x as an object-of-numbers.
2194
+ output: normalizeToolOutputForEvent(item.output),
2195
+ },
2196
+ });
2197
+ } else if (item.type === "tool_search_call_item") {
2198
+ // Progressive connector disclosure: surface the model's tool search as a
2199
+ // regular tool-call event so the session stream shows the step (parity with
2200
+ // the Codex CLI, which renders its searches). Arguments may be an object
2201
+ // (the live wire shape) or a string.
2202
+ const raw = item.rawItem ?? {};
2203
+ out.push({
2204
+ type: "agent.toolCall.created",
2205
+ payload: {
2206
+ id: raw.call_id ?? raw.callId ?? raw.id ?? item.id ?? null,
2207
+ name: "tool_search",
2208
+ arguments: raw.arguments ?? null,
2209
+ raw,
2210
+ },
2211
+ });
2212
+ } else if (item.type === "tool_search_output_item") {
2213
+ const raw = item.rawItem ?? {};
2214
+ const disclosed = Array.isArray(raw.tools)
2215
+ ? raw.tools.map((tool: { name?: unknown }) => (typeof tool?.name === "string" ? tool.name : "")).filter(Boolean)
2216
+ : [];
2217
+ out.push({
2218
+ type: "agent.toolCall.output",
2219
+ payload: {
2220
+ id: raw.call_id ?? raw.callId ?? item.id ?? null,
2221
+ output: { type: "text", text: disclosed.length > 0 ? `Disclosed tools: ${disclosed.join(", ")}` : "No matching tools found." },
2222
+ },
2223
+ });
2224
+ } else if (item.type === "message_output_item") {
2225
+ const text = typeof item.text === "string" ? item.text : undefined;
2226
+ if (text) {
2227
+ out.push({ type: "agent.message.completed", payload: { text } });
2228
+ }
2229
+ }
2230
+ return out;
2231
+ }
2232
+
2233
+ export function modelResponseUsageFromSdkEvent(event: RunStreamEvent): ModelResponseUsage | null {
2234
+ const response = modelResponseFromSdkEvent(event);
2235
+ const usage = usageFromResponse(response);
2236
+ if (!usage) {
2237
+ return null;
2238
+ }
2239
+ const responseId = typeof response?.id === "string"
2240
+ ? response.id
2241
+ : typeof response?.responseId === "string"
2242
+ ? response.responseId
2243
+ : undefined;
2244
+ return {
2245
+ ...(responseId ? { responseId } : {}),
2246
+ usage,
2247
+ };
2248
+ }
2249
+
2250
+ function modelResponseFromSdkEvent(event: RunStreamEvent): any {
2251
+ if (event.type === "raw_model_stream_event") {
2252
+ const data = (event as any).data;
2253
+ if (data?.type === "response_done") {
2254
+ return data.response;
2255
+ }
2256
+ }
2257
+ if (isOpenAIResponsesRawModelStreamEvent(event)) {
2258
+ const raw = (event as any).data?.event;
2259
+ if (raw?.type === "response.completed") {
2260
+ return raw.response;
2261
+ }
2262
+ }
2263
+ return null;
2264
+ }
2265
+
2266
+ function usageFromResponse(response: any): ModelResponseUsage["usage"] | null {
2267
+ const raw = response?.usage;
2268
+ if (!raw || typeof raw !== "object") {
2269
+ return null;
2270
+ }
2271
+ const usage = {
2272
+ ...numberProp(raw, "inputTokens", "inputTokens", "input_tokens"),
2273
+ ...numberProp(raw, "outputTokens", "outputTokens", "output_tokens"),
2274
+ ...numberProp(raw, "totalTokens", "totalTokens", "total_tokens"),
2275
+ ...inputTokenDetailsProp(raw),
2276
+ };
2277
+ return Object.keys(usage).length > 0 ? usage : null;
2278
+ }
2279
+
2280
+ function numberProp(raw: Record<string, unknown>, outputKey: "inputTokens" | "outputTokens" | "totalTokens", camel: string, snake: string): Partial<ModelResponseUsage["usage"]> {
2281
+ const value = raw[camel] ?? raw[snake];
2282
+ return typeof value === "number" && Number.isFinite(value) ? { [outputKey]: value } : {};
2283
+ }
2284
+
2285
+ function inputTokenDetailsProp(raw: Record<string, unknown>): Partial<ModelResponseUsage["usage"]> {
2286
+ const details = raw.inputTokensDetails ?? raw.input_tokens_details;
2287
+ if (!details || typeof details !== "object") {
2288
+ return {};
2289
+ }
2290
+ return { inputTokensDetails: details as Record<string, number> | Array<Record<string, number>> };
2291
+ }
2292
+
2293
+ export function serializeApprovals(interruptions: unknown[]): unknown[] {
2294
+ return interruptions.map((item: any) => {
2295
+ if (typeof item?.toJSON === "function") {
2296
+ return item.toJSON();
2297
+ }
2298
+ return {
2299
+ id: approvalIdentifier(item),
2300
+ name: item?.name ?? item?.rawItem?.name ?? "tool",
2301
+ arguments: item?.arguments ?? item?.rawItem?.arguments ?? null,
2302
+ raw: item,
2303
+ };
2304
+ });
2305
+ }
2306
+
2307
+ export function buildManifest(
2308
+ settings: Settings,
2309
+ resources: ResourceRef[],
2310
+ environment = collectSandboxEnvironment(settings),
2311
+ fileResourceDownloads: SandboxFileDownload[] = [],
2312
+ ): Manifest {
2313
+ const entries: Record<string, any> = {};
2314
+ const downloadsByFileId = new Map(normalizeSandboxFileDownloads(fileResourceDownloads).map((download) => [download.fileId, download]));
2315
+ for (const resource of resources) {
2316
+ if (resource.kind === "repository") {
2317
+ const url = new URL(resource.uri);
2318
+ const host = url.hostname.toLowerCase();
2319
+ const repo = url.pathname.replace(/^\/+|\/+$/g, "").replace(/\.git$/, "");
2320
+ const mountPath = normalizeManifestPath(resource.mountPath ?? `repos/${repo}`);
2321
+ if (repositoryUsesSandboxClone(settings, resource)) {
2322
+ entries[mountPath] = dir();
2323
+ continue;
2324
+ }
2325
+ entries[mountPath] = gitRepo({
2326
+ host,
2327
+ repo,
2328
+ ref: resource.ref,
2329
+ ...(resource.subpath ? { subpath: normalizeManifestPath(resource.subpath) } : {}),
2330
+ });
2331
+ continue;
2332
+ }
2333
+ if (resource.kind === "file") {
2334
+ const mountPath = normalizeManifestPath(resource.mountPath ?? `files/${resource.fileId}`);
2335
+ const download = downloadsByFileId.get(resource.fileId);
2336
+ entries[mountPath] = download
2337
+ ? sandboxDownloadDirectory(download, mountPath)
2338
+ : objectStorageFileMount(settings, `files/${resource.fileId}/original`);
2339
+ }
2340
+ }
2341
+ // No extraPathGrants here: remote sandbox clients (Modal) reject manifests
2342
+ // that carry them at create/apply time, which broke every Modal session.
2343
+ // The lazy bundled-skills source no longer needs a grant because
2344
+ // bundledSkillsDir() stages the skills inside the process working directory
2345
+ // whenever the packaged copy lives outside it.
2346
+ return new Manifest({
2347
+ root: "/workspace",
2348
+ entries,
2349
+ environment,
2350
+ });
2351
+ }
2352
+
2353
+ function sandboxDownloadDirectory(download: SandboxFileDownload, mountPath: string): any {
2354
+ if (download.mountPath !== mountPath) {
2355
+ throw new Error(`File download materialization path mismatch for ${download.fileId}: expected ${mountPath}, got ${download.mountPath}`);
2356
+ }
2357
+ assertSafeSandboxFilename(download.filename, download.fileId);
2358
+ if (download.content) {
2359
+ return dir({
2360
+ children: {
2361
+ [download.filename]: file({ content: download.content }),
2362
+ },
2363
+ });
2364
+ }
2365
+ return dir();
2366
+ }
2367
+
2368
+ function objectStorageFileMount(settings: Settings, prefix: string): any {
2369
+ // Descriptor-driven: a nativeBucketMount backend (modal) mounts via the
2370
+ // provider's own bucket-mount strategy and cannot mount Azure Blob entries —
2371
+ // it needs pre-signed downloads instead. Reading the descriptor (not a
2372
+ // hard-coded backend name) keeps this honest as providers are added.
2373
+ const nativeBucketMount = CAPABILITY_DESCRIPTORS[settings.sandboxBackend].nativeBucketMount;
2374
+ if (settings.objectStorageBackend === "azure-blob") {
2375
+ if (nativeBucketMount) {
2376
+ throw new Error("Modal sandbox Azure Blob file resources require pre-signed download materialization because the current OpenAI Agents SDK Modal client does not support Azure Blob mount entries.");
2377
+ }
2378
+ const config = azureBlobMountConfig(settings);
2379
+ return azureBlobMount({
2380
+ container: config.container,
2381
+ prefix,
2382
+ accountName: config.accountName,
2383
+ accountKey: config.accountKey,
2384
+ endpointUrl: config.endpointUrl,
2385
+ readOnly: true,
2386
+ mountStrategy: inContainerMountStrategy({ pattern: { type: "rclone", mode: "fuse" } }),
2387
+ });
2388
+ }
2389
+ if (settings.objectStorageBackend === "aws-s3" || settings.objectStorageBackend === "gcs") {
2390
+ throw new Error(`${settings.objectStorageBackend} file resources require pre-signed download materialization`);
2391
+ }
2392
+ const config = s3CompatibleMountConfig(settings);
2393
+ return s3Mount({
2394
+ bucket: config.bucket,
2395
+ prefix,
2396
+ endpointUrl: config.endpointUrl,
2397
+ region: config.region,
2398
+ s3Provider: config.s3Provider,
2399
+ accessKeyId: config.accessKeyId,
2400
+ secretAccessKey: config.secretAccessKey,
2401
+ readOnly: true,
2402
+ mountStrategy: nativeBucketMount
2403
+ ? new ModalCloudBucketMountStrategy()
2404
+ : inContainerMountStrategy({ pattern: { type: "rclone", mode: "fuse" } }),
2405
+ });
2406
+ }
2407
+
2408
+ function s3CompatibleMountConfig(settings: Settings): {
2409
+ bucket: string;
2410
+ endpointUrl: string;
2411
+ region: string;
2412
+ s3Provider: string;
2413
+ accessKeyId: string;
2414
+ secretAccessKey: string;
2415
+ } {
2416
+ const endpointUrl = settings.objectStorageSandboxEndpoint ?? settings.objectStorageEndpoint;
2417
+ if (!endpointUrl || !settings.objectStorageAccessKeyId || !settings.objectStorageSecretAccessKey) {
2418
+ throw new Error("File resources require configured S3-compatible object storage");
2419
+ }
2420
+ return {
2421
+ bucket: settings.objectStorageBucket,
2422
+ endpointUrl,
2423
+ region: settings.objectStorageRegion,
2424
+ s3Provider: settings.objectStorageS3Provider,
2425
+ accessKeyId: settings.objectStorageAccessKeyId,
2426
+ secretAccessKey: settings.objectStorageSecretAccessKey,
2427
+ };
2428
+ }
2429
+
2430
+ function azureBlobMountConfig(settings: Settings): {
2431
+ container: string;
2432
+ accountName: string;
2433
+ accountKey: string;
2434
+ endpointUrl?: string;
2435
+ } {
2436
+ const parsed = settings.objectStorageAzureConnectionString
2437
+ ? parseAzureConnectionString(settings.objectStorageAzureConnectionString)
2438
+ : {};
2439
+ const accountName = settings.objectStorageAzureAccountName ?? parsed.AccountName;
2440
+ const accountKey = settings.objectStorageAzureAccountKey ?? parsed.AccountKey;
2441
+ if (!accountName || !accountKey) {
2442
+ throw new Error("File resources require Azure Blob account name and account key");
2443
+ }
2444
+ const endpointUrl = azureBlobManifestEndpoint(settings.objectStorageAzureEndpoint ?? parsed.BlobEndpoint, accountName);
2445
+ return {
2446
+ container: settings.objectStorageBucket,
2447
+ accountName,
2448
+ accountKey,
2449
+ ...(endpointUrl ? { endpointUrl } : {}),
2450
+ };
2451
+ }
2452
+
2453
+ function azureBlobManifestEndpoint(endpoint: string | undefined, accountName: string): string | undefined {
2454
+ if (!endpoint) {
2455
+ return undefined;
2456
+ }
2457
+ const normalized = endpoint.replace(/\/+$/, "");
2458
+ const standardAccountEndpoint = `https://${accountName}.blob.core.windows.net`;
2459
+ return normalized === standardAccountEndpoint ? undefined : normalized;
2460
+ }
2461
+
2462
+ function parseAzureConnectionString(value: string): Record<string, string> {
2463
+ return Object.fromEntries(value.split(";")
2464
+ .map((part) => part.trim())
2465
+ .filter(Boolean)
2466
+ .map((part) => {
2467
+ const index = part.indexOf("=");
2468
+ return index === -1 ? [part, ""] : [part.slice(0, index), part.slice(index + 1)];
2469
+ }));
2470
+ }
2471
+
2472
+ function normalizeManifestPath(path: string): string {
2473
+ const normalized = path.replace(/^\/+|\/+$/g, "");
2474
+ if (!normalized || normalized.includes("..")) {
2475
+ throw new Error(`Invalid sandbox resource path: ${path}`);
2476
+ }
2477
+ return normalized;
2478
+ }
2479
+
2480
+ function normalizeSandboxFileDownloads(downloads: SandboxFileDownload[]): SandboxFileDownload[] {
2481
+ return downloads.map((download) => {
2482
+ const mountPath = normalizeManifestPath(download.mountPath);
2483
+ assertSafeSandboxFilename(download.filename, download.fileId);
2484
+ if (!download.content && !download.url?.trim()) {
2485
+ throw new Error(`File download materialization requires content or a URL for ${download.fileId}`);
2486
+ }
2487
+ return {
2488
+ ...download,
2489
+ mountPath,
2490
+ };
2491
+ });
2492
+ }
2493
+
2494
+ function assertSafeSandboxFilename(filename: string, fileId: string): void {
2495
+ if (!filename || filename.includes("/") || filename.includes("\\") || filename === "." || filename === ".." || filename.includes("..")) {
2496
+ throw new Error(`Invalid sandbox file name for ${fileId}: ${filename}`);
2497
+ }
2498
+ }
2499
+
2500
+ function sandboxDownloadTargetPath(download: SandboxFileDownload): string {
2501
+ return posixPath.join("/workspace", download.mountPath, download.filename);
2502
+ }
2503
+
2504
+ function sandboxFileDownloadCommand(download: SandboxFileDownload, targetPath: string): string {
2505
+ if (!download.url) {
2506
+ throw new Error(`File download materialization URL is empty for ${download.fileId}`);
2507
+ }
2508
+ const targetDir = posixPath.dirname(targetPath);
2509
+ const tmpPath = `${targetPath}.opengeni-download-$$`;
2510
+ return [
2511
+ "set -euo pipefail",
2512
+ `mkdir -p -- ${shellQuote(targetDir)}`,
2513
+ `if [ ! -f ${shellQuote(targetPath)} ]; then`,
2514
+ ` tmp=${shellQuote(tmpPath)}`,
2515
+ " cleanup() { rm -f -- \"$tmp\"; }",
2516
+ " trap cleanup EXIT",
2517
+ ` curl --fail --location --silent --show-error --retry 3 --retry-delay 1 --output "$tmp" ${shellQuote(download.url)}`,
2518
+ ` mv -- "$tmp" ${shellQuote(targetPath)}`,
2519
+ " trap - EXIT",
2520
+ "fi",
2521
+ `chmod a-w -- ${shellQuote(targetPath)} 2>/dev/null || true`,
2522
+ ].join("\n");
2523
+ }
2524
+
2525
+ function shellQuote(value: string): string {
2526
+ return `'${value.replace(/'/g, "'\\''")}'`;
2527
+ }
2528
+
2529
+ async function restoredSandboxSessionState(state: RunState<any, any>, client: unknown): Promise<SandboxSessionState | undefined> {
2530
+ if (!client) {
2531
+ return undefined;
2532
+ }
2533
+ const sandboxState = (state as any)._sandbox;
2534
+ const entry = sandboxState?.sessionsByAgent?.[sandboxState.currentAgentKey]
2535
+ ?? (sandboxState?.currentAgentKey && sandboxState?.sessionState
2536
+ ? {
2537
+ backendId: sandboxState.backendId,
2538
+ currentAgentKey: sandboxState.currentAgentKey,
2539
+ currentAgentName: sandboxState.currentAgentName,
2540
+ sessionState: sandboxState.sessionState,
2541
+ }
2542
+ : undefined);
2543
+ if (!entry) {
2544
+ return undefined;
2545
+ }
2546
+ if ((client as SandboxClient).backendId !== entry.backendId) {
2547
+ throw new Error("RunState sandbox backend does not match the configured sandbox client");
2548
+ }
2549
+ return await deserializeSandboxSessionStateEnvelope(client as SandboxClient, entry.sessionState);
2550
+ }
2551
+
2552
+ // sandboxStateEntryFromRunState + restoredSandboxSessionStateFromEntry +
2553
+ // deserializeSandboxSessionStateEnvelope moved to the agent-loop-free leaf
2554
+ // ./sandbox; re-exported via `export * from "./sandbox"`. The private
2555
+ // restoredSandboxSessionState above (which takes an agent-loop RunState) calls
2556
+ // the moved deserializeSandboxSessionStateEnvelope, imported from ./sandbox.
2557
+
2558
+ export type SandboxLifecycleHookPhase = "beforeAgentStart";
2559
+
2560
+ export type SandboxLifecycleHookContext = {
2561
+ environment: Record<string, string>;
2562
+ onRuntimeEvent?: (event: NormalizedRuntimeEvent) => Promise<void> | void;
2563
+ runAs?: string;
2564
+ // TOKEN-BROKER (B1): the run-scoped GitHub token to seed into the box's token
2565
+ // FILE before the repository clone runs. Threaded OFF-MANIFEST — it rides ONLY
2566
+ // the clone exec's per-call env (OPENGENI_GIT_TOKEN_SEED), NEVER the box/agent
2567
+ // manifest env (validateNoEnvironmentDelta must never see a rotating value).
2568
+ gitTokenSeed?: string;
2569
+ };
2570
+
2571
+ export type SandboxLifecycleHook = {
2572
+ id: string;
2573
+ phase: SandboxLifecycleHookPhase;
2574
+ shouldRun?: (context: SandboxLifecycleHookContext) => boolean;
2575
+ run: (session: SandboxSessionLike, context: SandboxLifecycleHookContext) => Promise<void>;
2576
+ };
2577
+
2578
+ const builtInSandboxLifecycleHooks: Record<string, SandboxLifecycleHook> = {
2579
+ "azure-cli-login": {
2580
+ id: "azure-cli-login",
2581
+ phase: "beforeAgentStart",
2582
+ shouldRun: ({ environment }) => hasAzureServicePrincipal(environment),
2583
+ run: runAzureCliLoginHook,
2584
+ },
2585
+ };
2586
+
2587
+ export function sandboxLifecycleHooksForIds(ids: string[]): SandboxLifecycleHook[] {
2588
+ return ids.map((id) => {
2589
+ const hook = builtInSandboxLifecycleHooks[id];
2590
+ if (!hook) {
2591
+ throw new Error(`Unknown sandbox lifecycle hook ${id}`);
2592
+ }
2593
+ return hook;
2594
+ });
2595
+ }
2596
+
2597
+ function applicableBeforeAgentStartHooks(
2598
+ hooks: SandboxLifecycleHook[],
2599
+ context: SandboxLifecycleHookContext,
2600
+ ): SandboxLifecycleHook[] {
2601
+ return hooks.filter((hook) => hook.phase === "beforeAgentStart" && (hook.shouldRun?.(context) ?? true));
2602
+ }
2603
+
2604
+ /**
2605
+ * Run the beforeAgentStart lifecycle hooks directly against an already-live box.
2606
+ *
2607
+ * The create/resume decoration (withSandboxLifecycleHooks) is structurally blind to
2608
+ * the PROVIDED-session path: when runStream hands the SDK a live `session`
2609
+ * (runOptions.sandbox.session — the lease-owned box resolved by the turn activity),
2610
+ * SandboxRuntimeManager uses it as-is and never calls client.create/resume, so a
2611
+ * wrapper around those methods never fires. Callers on that path invoke this
2612
+ * before starting the run so the box still gets its beforeAgentStart preparation
2613
+ * (repository clone + B1 askpass/token-file seed, azure-cli-login).
2614
+ */
2615
+ export async function runBeforeAgentStartHooks(
2616
+ session: SandboxSessionLike,
2617
+ hooks: SandboxLifecycleHook[],
2618
+ context: SandboxLifecycleHookContext,
2619
+ ): Promise<void> {
2620
+ for (const hook of applicableBeforeAgentStartHooks(hooks, context)) {
2621
+ await hook.run(session, context);
2622
+ }
2623
+ }
2624
+
2625
+ export function withSandboxLifecycleHooks(
2626
+ client: SandboxClient,
2627
+ hooks: SandboxLifecycleHook[],
2628
+ context: SandboxLifecycleHookContext,
2629
+ ): SandboxClient {
2630
+ const beforeAgentStartHooks = applicableBeforeAgentStartHooks(hooks, context);
2631
+ if (beforeAgentStartHooks.length === 0) {
2632
+ return client;
2633
+ }
2634
+ const seen = new WeakSet<object>();
2635
+ const wrapSession = async <T extends SandboxSessionLike>(session: T): Promise<T> => {
2636
+ if (typeof session === "object" && session !== null && !seen.has(session)) {
2637
+ for (const hook of beforeAgentStartHooks) {
2638
+ await hook.run(session, context);
2639
+ }
2640
+ seen.add(session);
2641
+ }
2642
+ return session;
2643
+ };
2644
+ const wrapped: SandboxClient = {
2645
+ backendId: client.backendId,
2646
+ ...(client.supportsDefaultOptions !== undefined ? { supportsDefaultOptions: client.supportsDefaultOptions } : {}),
2647
+ ...(client.create ? { create: async (...args: any[]) => await wrapSession(await (client.create as any)(...args)) } : {}),
2648
+ ...(client.resume ? { resume: async (state: SandboxSessionState) => await wrapSession(await client.resume!(state)) } : {}),
2649
+ ...(client.delete ? { delete: async (state: SandboxSessionState) => await client.delete!(state) } : {}),
2650
+ ...(client.serializeSessionState ? { serializeSessionState: async (state: SandboxSessionState, options) => await client.serializeSessionState!(state, options) } : {}),
2651
+ ...(client.canPersistOwnedSessionState ? { canPersistOwnedSessionState: async (state: SandboxSessionState) => await client.canPersistOwnedSessionState!(state) } : {}),
2652
+ ...(client.canReusePreservedOwnedSession ? { canReusePreservedOwnedSession: async (state: SandboxSessionState) => await client.canReusePreservedOwnedSession!(state) } : {}),
2653
+ ...(client.deserializeSessionState ? { deserializeSessionState: async (state: Record<string, unknown>) => await client.deserializeSessionState!(state) } : {}),
2654
+ };
2655
+ return wrapped;
2656
+ }
2657
+
2658
+ function sandboxRepositoryCloneHooksForAgent(agent: Agent<any, any>): SandboxLifecycleHook[] {
2659
+ return agentRepositoryCloneHooks.get(agent) ?? [];
2660
+ }
2661
+
2662
+ // TOKEN-BROKER (B1): the per-turn git token seed stashed for this agent (undefined
2663
+ // when no repo is attached / on the selfhosted path). Read into the clone hook
2664
+ // context at runStream so the token is seeded off-manifest.
2665
+ function gitTokenSeedForAgent(agent: Agent<any, any>): string | undefined {
2666
+ return agentGitTokenSeed.get(agent);
2667
+ }
2668
+
2669
+ function sandboxRepositoryCloneHooks(
2670
+ settings: Settings,
2671
+ resources: ResourceRef[],
2672
+ activeSandboxBackend: Settings["sandboxBackend"] = settings.sandboxBackend,
2673
+ ): SandboxLifecycleHook[] {
2674
+ const repositories = resources.filter((resource): resource is Extract<ResourceRef, { kind: "repository" }> => (
2675
+ resource.kind === "repository" && repositoryUsesSandboxClone(settings, resource, activeSandboxBackend)
2676
+ ));
2677
+ if (repositories.length === 0) {
2678
+ return [];
2679
+ }
2680
+ return [{
2681
+ id: "repository-clone",
2682
+ phase: "beforeAgentStart",
2683
+ run: async (session, context) => {
2684
+ await runRepositoryCloneHook(session, repositories, context);
2685
+ },
2686
+ }];
2687
+ }
2688
+
2689
+ /**
2690
+ * Whether the platform should seed a repository resource by `git clone` inside
2691
+ * the sandbox before the agent starts.
2692
+ *
2693
+ * SAFETY GATE (selfhosted/bring-your-own machine): the clone hook writes into
2694
+ * `posixPath.join("/workspace", mountPath)`, which a selfhosted agent rewrites
2695
+ * to a path under its REAL launch directory — so a platform-initiated clone
2696
+ * lands on the user's actual disk. A connected machine already owns its
2697
+ * filesystem; the platform must NEVER clone onto it. We therefore key the
2698
+ * decision off the EFFECTIVE/active backend, not just the session's HOME backend
2699
+ * (`settings.sandboxBackend`): a session can run on the cloud default while its
2700
+ * active sandbox has been swapped to a connected machine (active_sandbox_id → a
2701
+ * selfhosted lease), in which case the agent actually executes on the user's
2702
+ * machine even though the home backend is e.g. "modal". `activeSandboxBackend`
2703
+ * defaults to the home backend, so a session whose HOME backend is "selfhosted"
2704
+ * is gated with no caller change, and every cloud path is byte-for-byte
2705
+ * unchanged.
2706
+ */
2707
+ export function repositoryUsesSandboxClone(
2708
+ settings: Settings,
2709
+ resource: Extract<ResourceRef, { kind: "repository" }>,
2710
+ activeSandboxBackend: Settings["sandboxBackend"] = settings.sandboxBackend,
2711
+ ): boolean {
2712
+ if (activeSandboxBackend === "selfhosted") {
2713
+ return false;
2714
+ }
2715
+ return settings.sandboxBackend === "modal" || Boolean(resource.githubInstallationId && resource.githubRepositoryId);
2716
+ }
2717
+
2718
+ export function repositoryCloneCommand(resources: Extract<ResourceRef, { kind: "repository" }>[]): string {
2719
+ const commands = [
2720
+ "set -eu",
2721
+ "export HOME=\"${HOME:-/workspace}\"",
2722
+ "export GIT_TERMINAL_PROMPT=\"${GIT_TERMINAL_PROMPT:-0}\"",
2723
+ // TOKEN-BROKER (B1/B2): seed the run-scoped GitHub token into the STABLE token FILE
2724
+ // AND provision the git-askpass helper into the box AT SETUP (runtime) BEFORE any
2725
+ // clone runs, so GIT_ASKPASS points at a per-box, user-writable script that reads
2726
+ // that file for the fetch below. Provisioning the askpass here (rather than relying
2727
+ // on a baked image script at /usr/local/bin/opengeni-git-askpass) removes the
2728
+ // image-rebuild rollout gate: the askpass is correct on ANY box image, including
2729
+ // pre-existing warm boxes on their next turn's clone hook, and no product image has
2730
+ // to carry it. The seed rides the per-exec env (OPENGENI_GIT_TOKEN_SEED) — NEVER the
2731
+ // box/agent manifest (validateNoEnvironmentDelta must not see a rotating value), so
2732
+ // this whole block is a no-op when the seed is absent (e.g. the selfhosted path,
2733
+ // which uses its own git creds). The token file lives at $OPENGENI_GIT_TOKEN_FILE
2734
+ // (stable, from the shared base) with a $HOME/.opengeni/git-token fallback.
2735
+ // $GIT_ASKPASS is on the box manifest env (set by
2736
+ // sandboxEnvironmentForRun to $HOME/.opengeni/askpass), so it is available to this
2737
+ // exec; the askpass script we write is byte-identical to docker/opengeni-git-askpass
2738
+ // and is written via a QUOTED heredoc (<<'ASKPASS_EOF') so NOTHING inside it expands
2739
+ // ($1, $HOME, ${OPENGENI_GIT_TOKEN_FILE:-...}, and the literal \n in printf all land
2740
+ // verbatim), then chmod 0755 so git can exec it.
2741
+ //
2742
+ // ATOMIC REWRITE: this block now re-runs at the start of EVERY turn on a warm box
2743
+ // that other turn holders may be actively using — an in-flight `git fetch` from a
2744
+ // concurrent turn can invoke the askpass (which cats the token file) at any moment.
2745
+ // Both files are therefore written to a pid-suffixed temp under umask 077 and
2746
+ // renamed into place: rename is atomic, concurrent readers keep the old inode, and
2747
+ // the token is never observable world-readable (no post-hoc chmod window).
2748
+ "if [ -n \"${OPENGENI_GIT_TOKEN_SEED:-}\" ]; then",
2749
+ " seed_umask=\"$(umask)\"",
2750
+ " umask 077",
2751
+ " git_token_file=\"${OPENGENI_GIT_TOKEN_FILE:-$HOME/.opengeni/git-token}\"",
2752
+ " mkdir -p \"$(dirname \"$git_token_file\")\"",
2753
+ " printf '%s' \"$OPENGENI_GIT_TOKEN_SEED\" > \"$git_token_file.tmp.$$\"",
2754
+ " mv -f \"$git_token_file.tmp.$$\" \"$git_token_file\"",
2755
+ " git_askpass=\"${GIT_ASKPASS:-$HOME/.opengeni/askpass}\"",
2756
+ " mkdir -p \"$(dirname \"$git_askpass\")\"",
2757
+ " cat > \"$git_askpass.tmp.$$\" <<'ASKPASS_EOF'",
2758
+ "#!/usr/bin/env sh",
2759
+ "case \"$1\" in",
2760
+ " *Username*) printf '%s\\n' \"x-access-token\" ;;",
2761
+ " *Password*) cat \"${OPENGENI_GIT_TOKEN_FILE:-$HOME/.opengeni/git-token}\" 2>/dev/null || printf '\\n' ;;",
2762
+ " *) printf '\\n' ;;",
2763
+ "esac",
2764
+ "ASKPASS_EOF",
2765
+ " chmod 0755 \"$git_askpass.tmp.$$\"",
2766
+ " mv -f \"$git_askpass.tmp.$$\" \"$git_askpass\"",
2767
+ " umask \"$seed_umask\"",
2768
+ "fi",
2769
+ "ensure_git() {",
2770
+ " if command -v git >/dev/null 2>&1; then",
2771
+ " return 0",
2772
+ " fi",
2773
+ " if command -v apt-get >/dev/null 2>&1; then",
2774
+ " export DEBIAN_FRONTEND=noninteractive",
2775
+ " apt-get update >/dev/null",
2776
+ " apt-get install -y --no-install-recommends ca-certificates git >/dev/null",
2777
+ " rm -rf /var/lib/apt/lists/*",
2778
+ " command -v git >/dev/null 2>&1 && return 0",
2779
+ " fi",
2780
+ " echo \"git is not installed in the sandbox and could not be bootstrapped\" >&2",
2781
+ " exit 127",
2782
+ "}",
2783
+ "ensure_git",
2784
+ "clone_repository() {",
2785
+ " target=\"$1\"",
2786
+ " uri=\"$2\"",
2787
+ " ref=\"$3\"",
2788
+ " subpath=\"$4\"",
2789
+ " if [ -e \"$target\" ] && { [ -f \"$target\" ] || [ -n \"$(find \"$target\" -mindepth 1 -maxdepth 1 -print -quit 2>/dev/null)\" ]; }; then",
2790
+ // This hook re-runs every turn on a long-lived box, so \"non-empty\" alone is not
2791
+ // proof of a completed materialization: an interrupted clone (worker crash /
2792
+ // lifecycle timeout mid-mv/cp) leaves a partial tree that would otherwise pass
2793
+ // this check forever. A full-repo target must actually BE a work tree to be
2794
+ // skipped; a partial one is wiped and rebuilt (nothing legitimate writes under
2795
+ // the mount path before the repo exists). Subpath extracts are not git repos —
2796
+ // for those the plain non-empty check stands (no stronger signal available).
2797
+ " if [ -n \"$subpath\" ] || git -C \"$target\" rev-parse --is-inside-work-tree >/dev/null 2>&1; then",
2798
+ " echo \"Repository resource already present at $target\"",
2799
+ " return 0",
2800
+ " fi",
2801
+ " echo \"Re-materializing partial repository resource at $target\" >&2",
2802
+ " find \"$target\" -mindepth 1 -maxdepth 1 -exec rm -rf {} +",
2803
+ " fi",
2804
+ " mkdir -p \"$(dirname \"$target\")\"",
2805
+ " tmp=\"${target}.tmp.$$\"",
2806
+ " rm -rf \"$tmp\"",
2807
+ // Fetch failures must not leak the pid-suffixed tmp clone beside the mount
2808
+ // (set -eu would exit before any cleanup).
2809
+ " if ! { git init \"$tmp\" >/dev/null && git -C \"$tmp\" remote add origin \"$uri\" && git -C \"$tmp\" fetch --depth 1 --no-tags --filter=blob:none origin \"$ref\" && git -C \"$tmp\" checkout --detach FETCH_HEAD >/dev/null; }; then",
2810
+ " rm -rf \"$tmp\"",
2811
+ " echo \"Repository resource fetch failed for $target\" >&2",
2812
+ " exit 1",
2813
+ " fi",
2814
+ " if [ -n \"$subpath\" ]; then",
2815
+ " if [ ! -e \"$tmp/$subpath\" ]; then",
2816
+ " echo \"Repository subpath not found: $subpath\" >&2",
2817
+ " rm -rf \"$tmp\"",
2818
+ " exit 1",
2819
+ " fi",
2820
+ " if [ -d \"$tmp/$subpath\" ]; then",
2821
+ " mkdir -p \"$target\"",
2822
+ " cp -a \"$tmp/$subpath/.\" \"$target/\"",
2823
+ " else",
2824
+ " rmdir \"$target\" 2>/dev/null || true",
2825
+ " cp -a \"$tmp/$subpath\" \"$target\"",
2826
+ " fi",
2827
+ " rm -rf \"$tmp\"",
2828
+ " else",
2829
+ " rmdir \"$target\" 2>/dev/null || true",
2830
+ // Two concurrent turn holders can race this install: without the existence
2831
+ // re-check the loser's un-flagged `mv` would nest its tmp clone INSIDE the
2832
+ // winner's tree as <name>.tmp.<pid>. If the winner produced a valid work tree,
2833
+ // accept it; a non-empty non-repo survivor here is a mount point the manifest
2834
+ // re-filled — install into it by content copy instead of rename.
2835
+ " if [ -e \"$target\" ]; then",
2836
+ " if git -C \"$target\" rev-parse --is-inside-work-tree >/dev/null 2>&1; then",
2837
+ " rm -rf \"$tmp\"",
2838
+ " echo \"Repository resource already present at $target\"",
2839
+ " return 0",
2840
+ " fi",
2841
+ " cp -a \"$tmp/.\" \"$target/\"",
2842
+ " rm -rf \"$tmp\"",
2843
+ " else",
2844
+ " mv \"$tmp\" \"$target\"",
2845
+ " fi",
2846
+ " git -C \"$target\" rev-parse --is-inside-work-tree >/dev/null",
2847
+ " fi",
2848
+ " if [ ! -e \"$target\" ]; then",
2849
+ " echo \"Repository resource was not materialized at $target\" >&2",
2850
+ " exit 1",
2851
+ " fi",
2852
+ " echo \"Repository resource ready at $target\"",
2853
+ "}",
2854
+ ];
2855
+ for (const resource of resources) {
2856
+ const url = new URL(resource.uri);
2857
+ const repo = url.pathname.replace(/^\/+|\/+$/g, "").replace(/\.git$/, "");
2858
+ const mountPath = normalizeManifestPath(resource.mountPath ?? `repos/${repo}`);
2859
+ commands.push([
2860
+ "clone_repository",
2861
+ shellQuote(posixPath.join("/workspace", mountPath)),
2862
+ shellQuote(resource.uri),
2863
+ shellQuote(resource.ref),
2864
+ shellQuote(resource.subpath ? normalizeManifestPath(resource.subpath) : ""),
2865
+ ].join(" "));
2866
+ }
2867
+ return commands.join("\n");
2868
+ }
2869
+
2870
+ export async function runRepositoryCloneHook(
2871
+ session: SandboxSessionLike,
2872
+ resources: Extract<ResourceRef, { kind: "repository" }>[],
2873
+ context: SandboxLifecycleHookContext = { environment: {} },
2874
+ ): Promise<void> {
2875
+ const payload = { name: "repository-clone", repositoryCount: resources.length };
2876
+ await context.onRuntimeEvent?.({ type: "sandbox.operation.started", payload });
2877
+ try {
2878
+ // TOKEN-BROKER (B1): thread the run-scoped GitHub token PER-EXEC, never on the
2879
+ // manifest. The SDK's ExecCommandArgs has no `environment` field (exec inherits
2880
+ // the box's manifest env), so we can't hand the seed through an exec option — and
2881
+ // we MUST NOT put it on the manifest (validateNoEnvironmentDelta would see a
2882
+ // rotating value). We therefore inline it as an ephemeral `export` prefix on THIS
2883
+ // exec's command text only: it lives in the command, not the box/agent manifest,
2884
+ // and never persists. The clone command's gated seed block then writes it to the
2885
+ // token FILE before the fetch, so GIT_ASKPASS reads it. Absent seed (e.g. the
2886
+ // selfhosted path) -> no prefix, the clone runs byte-for-byte as before.
2887
+ const command = context.gitTokenSeed
2888
+ ? `export OPENGENI_GIT_TOKEN_SEED=${shellQuote(context.gitTokenSeed)}\n${repositoryCloneCommand(resources)}`
2889
+ : repositoryCloneCommand(resources);
2890
+ if (session.exec) {
2891
+ const result = await session.exec({
2892
+ cmd: command,
2893
+ workdir: "/workspace",
2894
+ ...(context.runAs ? { runAs: context.runAs } : {}),
2895
+ yieldTimeMs: SANDBOX_LIFECYCLE_COMMAND_TIMEOUT_MS,
2896
+ maxOutputTokens: 20_000,
2897
+ });
2898
+ assertSandboxCommandSucceeded(result, "Repository clone hook");
2899
+ } else if (session.execCommand) {
2900
+ const result = await session.execCommand({
2901
+ cmd: command,
2902
+ workdir: "/workspace",
2903
+ ...(context.runAs ? { runAs: context.runAs } : {}),
2904
+ yieldTimeMs: SANDBOX_LIFECYCLE_COMMAND_TIMEOUT_MS,
2905
+ maxOutputTokens: 20_000,
2906
+ });
2907
+ assertSandboxCommandSucceeded(result, "Repository clone hook");
2908
+ } else {
2909
+ throw new Error("Sandbox session does not support command execution");
2910
+ }
2911
+ await context.onRuntimeEvent?.({ type: "sandbox.operation.completed", payload });
2912
+ } catch (error) {
2913
+ await context.onRuntimeEvent?.({
2914
+ type: "sandbox.operation.failed",
2915
+ payload: {
2916
+ ...payload,
2917
+ error: error instanceof Error ? error.message : String(error),
2918
+ },
2919
+ });
2920
+ throw error;
2921
+ }
2922
+ }
2923
+
2924
+ export function azureCliLoginCommand(): string {
2925
+ return [
2926
+ "export HOME=\"${HOME:-/workspace}\"",
2927
+ "mkdir -p \"$HOME/.azure\"",
2928
+ "CLIENT_ID=\"${AZURE_CLIENT_ID:-${ARM_CLIENT_ID:-}}\"",
2929
+ "CLIENT_SECRET=\"${AZURE_CLIENT_SECRET:-${ARM_CLIENT_SECRET:-}}\"",
2930
+ "TENANT_ID=\"${AZURE_TENANT_ID:-${ARM_TENANT_ID:-}}\"",
2931
+ "SUBSCRIPTION_ID=\"${AZURE_SUBSCRIPTION_ID:-${ARM_SUBSCRIPTION_ID:-}}\"",
2932
+ "if [ -n \"$CLIENT_ID\" ] && [ -n \"$CLIENT_SECRET\" ] && [ -n \"$TENANT_ID\" ]; then",
2933
+ " command -v az >/dev/null 2>&1 || { echo \"Azure CLI is not installed in the sandbox\" >&2; exit 127; }",
2934
+ " az account show --only-show-errors >/dev/null 2>&1 || az login --service-principal --username \"$CLIENT_ID\" --password \"$CLIENT_SECRET\" --tenant \"$TENANT_ID\" --allow-no-subscriptions --only-show-errors --output none",
2935
+ // if/fi, NOT `[ -n ] && az`: this line ends the credentialed if-body, so with a
2936
+ // no-subscription SP (an explicitly supported config — the login above passes
2937
+ // --allow-no-subscriptions) the bare `[ -n ]` would exit the whole script 1 and
2938
+ // fail the turn.
2939
+ " if [ -n \"$SUBSCRIPTION_ID\" ]; then az account set --subscription \"$SUBSCRIPTION_ID\" --only-show-errors; fi",
2940
+ "fi",
2941
+ ].join("\n");
2942
+ }
2943
+
2944
+ export function sandboxCommandExitCode(result: unknown): number | null {
2945
+ if (typeof result === "string") {
2946
+ const match = result.match(/Process exited with code (-?\d+)/);
2947
+ return match ? Number(match[1]) : null;
2948
+ }
2949
+ if (!result || typeof result !== "object") {
2950
+ return null;
2951
+ }
2952
+ const candidate = result as {
2953
+ exitCode?: unknown;
2954
+ exit_code?: unknown;
2955
+ code?: unknown;
2956
+ status?: unknown;
2957
+ };
2958
+ for (const value of [candidate.exitCode, candidate.exit_code, candidate.code, candidate.status]) {
2959
+ if (typeof value === "number") {
2960
+ return value;
2961
+ }
2962
+ }
2963
+ return null;
2964
+ }
2965
+
2966
+ export function sandboxCommandOutput(result: unknown): string {
2967
+ if (!result || typeof result !== "object") {
2968
+ return "";
2969
+ }
2970
+ const candidate = result as {
2971
+ output?: unknown;
2972
+ stdout?: unknown;
2973
+ stderr?: unknown;
2974
+ };
2975
+ return [candidate.output, candidate.stderr, candidate.stdout]
2976
+ .filter((value): value is string => typeof value === "string" && value.length > 0)
2977
+ .join("\n");
2978
+ }
2979
+
2980
+ function assertSandboxCommandSucceeded(result: unknown, operation: string): void {
2981
+ const output = sandboxCommandOutput(result);
2982
+ if (sandboxCommandStillRunning(result)) {
2983
+ throw new Error(`${operation} did not finish before the lifecycle command timeout${output ? `:\n${output}` : ""}`);
2984
+ }
2985
+ const exitCode = sandboxCommandExitCode(result);
2986
+ if (exitCode !== null && exitCode !== 0) {
2987
+ throw new Error(output || `${operation} failed with exit code ${exitCode}`);
2988
+ }
2989
+ if (exitCode === null) {
2990
+ throw new Error(output || `${operation} did not return a command exit code`);
2991
+ }
2992
+ }
2993
+
2994
+ export function sandboxCommandStillRunning(result: unknown): boolean {
2995
+ if (typeof result === "string") {
2996
+ return /Process running with session ID \d+/u.test(result);
2997
+ }
2998
+ if (!result || typeof result !== "object") {
2999
+ return false;
3000
+ }
3001
+ const candidate = result as { sessionId?: unknown; session_id?: unknown };
3002
+ return typeof candidate.sessionId === "number" || typeof candidate.session_id === "number";
3003
+ }
3004
+
3005
+ function hasAzureServicePrincipal(environment: Record<string, string>): boolean {
3006
+ const clientId = environment.AZURE_CLIENT_ID || environment.ARM_CLIENT_ID;
3007
+ const clientSecret = environment.AZURE_CLIENT_SECRET || environment.ARM_CLIENT_SECRET;
3008
+ const tenantId = environment.AZURE_TENANT_ID || environment.ARM_TENANT_ID;
3009
+ return Boolean(clientId && clientSecret && tenantId);
3010
+ }
3011
+
3012
+ export async function runAzureCliLoginHook(
3013
+ session: SandboxSessionLike,
3014
+ context: SandboxLifecycleHookContext = { environment: {} },
3015
+ ): Promise<void> {
3016
+ const payload = { name: "azure-cli-login", command: "az login --service-principal" };
3017
+ await context.onRuntimeEvent?.({ type: "sandbox.operation.started", payload });
3018
+ try {
3019
+ if (session.exec) {
3020
+ const result = await session.exec({
3021
+ cmd: azureCliLoginCommand(),
3022
+ workdir: "/workspace",
3023
+ ...(context.runAs ? { runAs: context.runAs } : {}),
3024
+ yieldTimeMs: SANDBOX_LIFECYCLE_COMMAND_TIMEOUT_MS,
3025
+ maxOutputTokens: 20_000,
3026
+ });
3027
+ assertSandboxCommandSucceeded(result, "Azure CLI login hook");
3028
+ } else if (session.execCommand) {
3029
+ const result = await session.execCommand({
3030
+ cmd: azureCliLoginCommand(),
3031
+ workdir: "/workspace",
3032
+ ...(context.runAs ? { runAs: context.runAs } : {}),
3033
+ yieldTimeMs: SANDBOX_LIFECYCLE_COMMAND_TIMEOUT_MS,
3034
+ maxOutputTokens: 20_000,
3035
+ });
3036
+ assertSandboxCommandSucceeded(result, "Azure CLI login hook");
3037
+ } else {
3038
+ throw new Error("Sandbox session does not support command execution");
3039
+ }
3040
+ await context.onRuntimeEvent?.({ type: "sandbox.operation.completed", payload });
3041
+ } catch (error) {
3042
+ await context.onRuntimeEvent?.({
3043
+ type: "sandbox.operation.failed",
3044
+ payload: {
3045
+ ...payload,
3046
+ error: error instanceof Error ? error.message : String(error),
3047
+ },
3048
+ });
3049
+ throw error;
3050
+ }
3051
+ }
3052
+
3053
+ function azureDeploymentBaseUrl(settings: Settings): string {
3054
+ const endpoint = settings.azureOpenaiEndpoint?.replace(/\/+$/, "");
3055
+ if (!endpoint || !settings.azureOpenaiDeployment) {
3056
+ throw new Error("Azure OpenAI endpoint/deployment settings are incomplete");
3057
+ }
3058
+ return `${endpoint}/openai/deployments/${settings.azureOpenaiDeployment}`;
3059
+ }
3060
+
3061
+ export function azureOpenAIDefaultQuery(
3062
+ settings: Pick<Settings, "azureOpenaiApiVersion">,
3063
+ baseURL: string,
3064
+ ): Record<string, string> | undefined {
3065
+ if (!settings.azureOpenaiApiVersion) return undefined;
3066
+ const normalized = baseURL.replace(/\/+$/, "").toLowerCase();
3067
+ if (normalized.endsWith("/openai/v1")) {
3068
+ return undefined;
3069
+ }
3070
+ return { "api-version": settings.azureOpenaiApiVersion };
3071
+ }
3072
+
3073
+ // Since @openai/agents 0.11.0 local sandbox sources (including the lazy
3074
+ // bundled-skills source) must stay within the SDK process working directory:
3075
+ // reads outside it require manifest.extraPathGrants, and remote sandbox
3076
+ // clients such as Modal reject manifests that carry extra path grants. The
3077
+ // packaged skills live inside the runtime package — outside the worker's cwd
3078
+ // in production — so stage a copy under the working directory once per
3079
+ // process instead of granting the packaged path.
3080
+ let stagedBundledSkillsDir: string | null = null;
3081
+
3082
+ function bundledSkillsDir(): string {
3083
+ const moduleDir = dirname(fileURLToPath(import.meta.url));
3084
+ const packaged = [
3085
+ join(moduleDir, "bundled_hashicorp_terraform_skills"),
3086
+ join(moduleDir, "..", "src", "bundled_hashicorp_terraform_skills"),
3087
+ ].find((candidate) => existsSync(candidate)) ?? join(moduleDir, "bundled_hashicorp_terraform_skills");
3088
+ if (isPathWithin(process.cwd(), packaged)) {
3089
+ return packaged;
3090
+ }
3091
+ if (!stagedBundledSkillsDir) {
3092
+ stagedBundledSkillsDir = stageBundledSkills(packaged, join(process.cwd(), ".opengeni", "bundled_hashicorp_terraform_skills"));
3093
+ }
3094
+ return stagedBundledSkillsDir;
3095
+ }
3096
+
3097
+ function stageBundledSkills(packaged: string, target: string): string {
3098
+ const tmp = `${target}.tmp-${process.pid}`;
3099
+ rmSync(tmp, { recursive: true, force: true });
3100
+ mkdirSync(dirname(tmp), { recursive: true });
3101
+ cpSync(packaged, tmp, { recursive: true });
3102
+ rmSync(target, { recursive: true, force: true });
3103
+ try {
3104
+ renameSync(tmp, target);
3105
+ } catch (error) {
3106
+ // Another process staged the same content between our rm and rename.
3107
+ rmSync(tmp, { recursive: true, force: true });
3108
+ if (!existsSync(target)) {
3109
+ throw error;
3110
+ }
3111
+ }
3112
+ return target;
3113
+ }
3114
+
3115
+ function isPathWithin(root: string, candidate: string): boolean {
3116
+ const relativePath = relative(root, candidate);
3117
+ return relativePath === "" || (!relativePath.startsWith("..") && !isAbsolute(relativePath));
3118
+ }
3119
+
3120
+ /**
3121
+ * The skill source fed to the SDK Skills capability. Without pack skills this
3122
+ * is the plain bundled local-dir source, byte-for-byte the pre-pack behavior.
3123
+ * With pack skills it becomes a single in-memory dir source combining bundled
3124
+ * skill directories (as local_dir entries the SDK materializes lazily) with
3125
+ * pack skill directories built from manifest-carried file content — one skill
3126
+ * index, one `## Skills` instruction section, lazy `load_skill` for all of
3127
+ * them. A pack skill shadows a bundled skill with the same directory name.
3128
+ */
3129
+ export function lazySkillSourceWithPackSkills(packSkills: PackSkill[]): LocalDirLazySkillSource {
3130
+ const bundledDir = bundledSkillsDir();
3131
+ const bundled = localDirLazySkillSource({ src: bundledDir });
3132
+ if (packSkills.length === 0) {
3133
+ return bundled;
3134
+ }
3135
+ const children: Record<string, Entry> = {};
3136
+ for (const name of bundledSkillDirNames(bundledDir)) {
3137
+ children[name] = localDir({ src: join(bundledDir, name) });
3138
+ }
3139
+ const packIndex: SkillIndexEntry[] = [];
3140
+ const packNames = new Set<string>();
3141
+ const packNameKeys = new Set<string>();
3142
+ for (const skill of packSkills) {
3143
+ assertSafePackSkillName(skill.name);
3144
+ if (packNameKeys.has(skill.name.toLowerCase())) {
3145
+ throw new Error(`Duplicate pack skill name: ${skill.name}`);
3146
+ }
3147
+ packNameKeys.add(skill.name.toLowerCase());
3148
+ packNames.add(skill.name);
3149
+ children[skill.name] = packSkillDirEntry(skill);
3150
+ packIndex.push({ name: skill.name, description: packSkillDescription(skill), path: skill.name });
3151
+ }
3152
+ return {
3153
+ source: dir({ children }),
3154
+ getIndex: (manifest, skillsPath) => [
3155
+ ...(bundled.getIndex?.(manifest, skillsPath) ?? []).filter((entry) => !packNames.has(entry.path ?? entry.name)),
3156
+ ...packIndex,
3157
+ ],
3158
+ };
3159
+ }
3160
+
3161
+ function bundledSkillDirNames(root: string): string[] {
3162
+ return readdirSync(root, { withFileTypes: true })
3163
+ .filter((entry) => entry.isDirectory() && existsSync(join(root, entry.name, "SKILL.md")))
3164
+ .map((entry) => entry.name)
3165
+ .sort();
3166
+ }
3167
+
3168
+ type PackSkillDirNode = {
3169
+ dirs: Map<string, PackSkillDirNode>;
3170
+ files: Map<string, string>;
3171
+ };
3172
+
3173
+ function packSkillDirEntry(skill: PackSkill): Dir {
3174
+ const root: PackSkillDirNode = { dirs: new Map(), files: new Map() };
3175
+ for (const skillFile of skill.files) {
3176
+ const segments = packSkillPathSegments(skill.name, skillFile.path);
3177
+ let node = root;
3178
+ for (const segment of segments.slice(0, -1)) {
3179
+ if (node.files.has(segment)) {
3180
+ throw new Error(`Pack skill ${skill.name} uses ${segment} as both a file and a directory`);
3181
+ }
3182
+ let next = node.dirs.get(segment);
3183
+ if (!next) {
3184
+ next = { dirs: new Map(), files: new Map() };
3185
+ node.dirs.set(segment, next);
3186
+ }
3187
+ node = next;
3188
+ }
3189
+ const filename = segments[segments.length - 1]!;
3190
+ if (node.dirs.has(filename) || node.files.has(filename)) {
3191
+ throw new Error(`Duplicate pack skill file path in ${skill.name}: ${skillFile.path}`);
3192
+ }
3193
+ node.files.set(filename, skillFile.content);
3194
+ }
3195
+ if (!root.files.has("SKILL.md")) {
3196
+ throw new Error(`Pack skill ${skill.name} is missing a top-level SKILL.md file`);
3197
+ }
3198
+ return packSkillDirFromNode(root);
3199
+ }
3200
+
3201
+ function packSkillDirFromNode(node: PackSkillDirNode): Dir {
3202
+ const children: Record<string, Entry> = {};
3203
+ for (const [name, child] of node.dirs) {
3204
+ children[name] = packSkillDirFromNode(child);
3205
+ }
3206
+ for (const [name, content] of node.files) {
3207
+ children[name] = file({ content });
3208
+ }
3209
+ return dir({ children });
3210
+ }
3211
+
3212
+ function assertSafePackSkillName(name: string): void {
3213
+ if (packSkillPathSegments(name, name).length !== 1) {
3214
+ throw new Error(`Invalid pack skill name: ${name}`);
3215
+ }
3216
+ }
3217
+
3218
+ function packSkillPathSegments(skillName: string, path: string): string[] {
3219
+ const segments = path.split("/");
3220
+ if (path.startsWith("/") || path.includes("\\") || segments.some((segment) => segment.length === 0 || segment === "." || segment === "..")) {
3221
+ throw new Error(`Invalid pack skill file path for ${skillName}: ${path}`);
3222
+ }
3223
+ return segments;
3224
+ }
3225
+
3226
+ function packSkillDescription(skill: PackSkill): string {
3227
+ const explicit = skill.description?.trim();
3228
+ if (explicit) {
3229
+ return explicit;
3230
+ }
3231
+ const markdown = skill.files.find((skillFile) => skillFile.path === "SKILL.md")?.content ?? "";
3232
+ return skillFrontmatterDescription(markdown) ?? "No description provided.";
3233
+ }
3234
+
3235
+ function skillFrontmatterDescription(markdown: string): string | null {
3236
+ const lines = markdown.split(/\r?\n/);
3237
+ if (lines[0]?.trim() !== "---") {
3238
+ return null;
3239
+ }
3240
+ const end = lines.findIndex((line, index) => index > 0 && line.trim() === "---");
3241
+ if (end === -1) {
3242
+ return null;
3243
+ }
3244
+ const collected: string[] = [];
3245
+ let inDescription = false;
3246
+ for (const line of lines.slice(1, end)) {
3247
+ const match = line.match(/^description:\s*(.*)$/);
3248
+ if (match) {
3249
+ const inline = match[1]!.trim();
3250
+ if (inline && inline !== ">-" && inline !== ">" && inline !== "|" && inline !== "|-") {
3251
+ return unquoteFrontmatterValue(inline);
3252
+ }
3253
+ inDescription = true;
3254
+ continue;
3255
+ }
3256
+ if (inDescription) {
3257
+ if (/^\s+\S/.test(line)) {
3258
+ collected.push(line.trim());
3259
+ continue;
3260
+ }
3261
+ break;
3262
+ }
3263
+ }
3264
+ const blockValue = collected.join(" ").trim();
3265
+ return blockValue ? blockValue : null;
3266
+ }
3267
+
3268
+ function unquoteFrontmatterValue(value: string): string {
3269
+ if (value.length >= 2 && value[0] === value[value.length - 1] && (value[0] === '"' || value[0] === "'")) {
3270
+ return value.slice(1, -1);
3271
+ }
3272
+ return value;
3273
+ }
3274
+
3275
+ function stringValue(value: unknown): string | undefined {
3276
+ return typeof value === "string" && value.trim() ? value.trim() : undefined;
3277
+ }
3278
+
3279
+ function isAsyncIterable<T>(source: Iterable<T> | AsyncIterable<T>): source is AsyncIterable<T> {
3280
+ return typeof (source as AsyncIterable<T>)[Symbol.asyncIterator] === "function";
3281
+ }
3282
+
3283
+ function stableJson(value: unknown): string {
3284
+ return JSON.stringify(sortJson(value));
3285
+ }
3286
+
3287
+ function sortJson(value: unknown): unknown {
3288
+ if (Array.isArray(value)) {
3289
+ return value.map(sortJson);
3290
+ }
3291
+ if (value && typeof value === "object") {
3292
+ return Object.fromEntries(Object.entries(value).sort(([a], [b]) => a.localeCompare(b)).map(([key, nested]) => [key, sortJson(nested)]));
3293
+ }
3294
+ return value;
3295
+ }
3296
+
3297
+ function approvalIdentifier(item: any): string {
3298
+ return String(item?.rawItem?.callId ?? item?.rawItem?.id ?? item?.id ?? item?.name ?? "approval");
3299
+ }