@opengeni/runtime 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-2PO56VAL.js +3478 -0
- package/dist/chunk-2PO56VAL.js.map +1 -0
- package/dist/index.d.ts +912 -0
- package/dist/index.js +3663 -0
- package/dist/index.js.map +1 -0
- package/dist/sandbox/index.d.ts +1738 -0
- package/dist/sandbox/index.js +187 -0
- package/dist/sandbox/index.js.map +1 -0
- package/package.json +49 -0
- package/src/bundled_hashicorp_terraform_skills/LICENSE +373 -0
- package/src/bundled_hashicorp_terraform_skills/README.md +18 -0
- package/src/bundled_hashicorp_terraform_skills/UPSTREAM_GIT_SHA +1 -0
- package/src/bundled_hashicorp_terraform_skills/azure-verified-modules/SKILL.md +613 -0
- package/src/bundled_hashicorp_terraform_skills/checkov/SKILL.md +43 -0
- package/src/bundled_hashicorp_terraform_skills/refactor-module/SKILL.md +538 -0
- package/src/bundled_hashicorp_terraform_skills/social-media-marketing/SKILL.md +35 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-search-import/SKILL.md +372 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-search-import/references/MANUAL-IMPORT.md +113 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-search-import/scripts/list_resources.sh +38 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-stacks/SKILL.md +480 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/api-monitoring.md +543 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/component-blocks.md +476 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/deployment-blocks.md +391 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/examples.md +1529 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/linked-stacks.md +187 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/troubleshooting.md +671 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-style-guide/SKILL.md +353 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-test/SKILL.md +451 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-test/references/CI_CD.md +80 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-test/references/EXAMPLES.md +314 -0
- package/src/bundled_hashicorp_terraform_skills/terraform-test/references/MOCK_PROVIDERS.md +171 -0
- package/src/codex-tool-search.ts +267 -0
- package/src/context-compaction.ts +538 -0
- package/src/history-sanitizer.ts +719 -0
- package/src/index.ts +3299 -0
- package/src/sandbox/capabilities.ts +69 -0
- package/src/sandbox/channel-a.ts +1031 -0
- package/src/sandbox/display-stack.ts +231 -0
- package/src/sandbox/errors.ts +34 -0
- package/src/sandbox/index.ts +832 -0
- package/src/sandbox/providers/blaxel.ts +35 -0
- package/src/sandbox/providers/cloudflare.ts +24 -0
- package/src/sandbox/providers/daytona.ts +34 -0
- package/src/sandbox/providers/docker.ts +17 -0
- package/src/sandbox/providers/e2b.ts +36 -0
- package/src/sandbox/providers/index.ts +107 -0
- package/src/sandbox/providers/local.ts +13 -0
- package/src/sandbox/providers/modal.ts +55 -0
- package/src/sandbox/providers/none.ts +13 -0
- package/src/sandbox/providers/runloop.ts +32 -0
- package/src/sandbox/providers/selfhosted.ts +96 -0
- package/src/sandbox/providers/types.ts +38 -0
- package/src/sandbox/providers/vercel.ts +29 -0
- package/src/sandbox/recording.ts +286 -0
- package/src/sandbox/routing/backend-resolver.ts +189 -0
- package/src/sandbox/routing/routing-session.ts +455 -0
- package/src/sandbox/select.ts +371 -0
- package/src/sandbox/selfhosted/capabilities.ts +255 -0
- package/src/sandbox/selfhosted/control-rpc.ts +351 -0
- package/src/sandbox/selfhosted/session.ts +930 -0
- package/src/sandbox/selfhosted/testing.ts +230 -0
- package/src/sandbox/stream-port.ts +185 -0
- package/src/sandbox/stream-token.ts +90 -0
- package/src/sandbox/terminal-server.ts +203 -0
- package/src/sandbox-computer.ts +835 -0
|
@@ -0,0 +1,832 @@
|
|
|
1
|
+
// @opengeni/runtime/sandbox — the agent-loop-free sandbox leaf.
|
|
2
|
+
//
|
|
3
|
+
// This module is the load-bearing pre-req for the API-direct control plane
|
|
4
|
+
// (docs/design/sandbox-surfacing). It exposes the sandbox client factory plus
|
|
5
|
+
// the resume / recovery-envelope helpers that the API needs to touch a box by
|
|
6
|
+
// id (resume-by-id, file/exec/port ops) WITHOUT importing the @openai/agents
|
|
7
|
+
// agent-loop graph.
|
|
8
|
+
//
|
|
9
|
+
// IMPORT DISCIPLINE (enforced by packages/runtime/test/sandbox-leaf-no-agent-loop.test.ts):
|
|
10
|
+
// - ALLOWED: the per-provider sandbox SDK build imports
|
|
11
|
+
// `@openai/agents/sandbox`, `@openai/agents/sandbox/local`,
|
|
12
|
+
// `@openai/agents-extensions/sandbox/modal`
|
|
13
|
+
// and the workspace `@opengeni/config` / `@opengeni/contracts` packages.
|
|
14
|
+
// - FORBIDDEN: the agent-loop entrypoints — the bare `@openai/agents`,
|
|
15
|
+
// `@openai/agents-extensions`, or `@openai/agents-core` roots, and the
|
|
16
|
+
// loop symbols (`Agent`, `run`, `Runner`, `RunState`).
|
|
17
|
+
// The barrel `packages/runtime/src/index.ts` re-exports everything here via
|
|
18
|
+
// `export * from "./sandbox"`, so existing consumers (apps/worker) are
|
|
19
|
+
// unchanged.
|
|
20
|
+
|
|
21
|
+
import type { Settings } from "@opengeni/config";
|
|
22
|
+
import { collectSandboxEnvironment, parseExposedPorts } from "@opengeni/config";
|
|
23
|
+
import { DESKTOP_STREAM_PORT, TERMINAL_STREAM_PORT, type SandboxBackend } from "@opengeni/contracts";
|
|
24
|
+
import type {
|
|
25
|
+
SandboxClient,
|
|
26
|
+
SandboxSessionLike,
|
|
27
|
+
SandboxSessionState,
|
|
28
|
+
} from "@openai/agents/sandbox";
|
|
29
|
+
import { PROVIDER_REGISTRY } from "./providers";
|
|
30
|
+
import { SandboxConfigError } from "./errors";
|
|
31
|
+
import { isSelfhostedProviderNotFoundError } from "./selfhosted/session";
|
|
32
|
+
|
|
33
|
+
// Re-export the config-owned environment/port helpers from the leaf so the
|
|
34
|
+
// API-direct control plane can pull its full sandbox-construction surface from
|
|
35
|
+
// a single agent-loop-free entrypoint. They physically live in @opengeni/config
|
|
36
|
+
// (moving them into runtime would create a config→runtime cycle — ledger CR8).
|
|
37
|
+
export { collectSandboxEnvironment, parseExposedPorts } from "@opengeni/config";
|
|
38
|
+
|
|
39
|
+
// The provider registry surface — the descriptor table self-test, the per-
|
|
40
|
+
// provider registrations, selection + capability negotiation, and the typed
|
|
41
|
+
// construction errors. All agent-loop-free, so the API-direct control plane
|
|
42
|
+
// imports them from this one leaf.
|
|
43
|
+
export {
|
|
44
|
+
CAPABILITY_DESCRIPTORS,
|
|
45
|
+
DESKTOP_STREAM_PORT,
|
|
46
|
+
assertDescriptorRegistryInvariants,
|
|
47
|
+
type CapabilityDescriptor,
|
|
48
|
+
} from "./capabilities";
|
|
49
|
+
export { SandboxConfigError, SandboxProviderUnavailableError } from "./errors";
|
|
50
|
+
export {
|
|
51
|
+
PROVIDER_REGISTRY,
|
|
52
|
+
assertProviderRegistryInvariants,
|
|
53
|
+
type ProviderRegistration,
|
|
54
|
+
type ProviderConstructionContext,
|
|
55
|
+
} from "./providers";
|
|
56
|
+
export {
|
|
57
|
+
selectBackend,
|
|
58
|
+
backendSupportsOs,
|
|
59
|
+
desktopCapableBackend,
|
|
60
|
+
negotiateCapabilities,
|
|
61
|
+
type NegotiationContext,
|
|
62
|
+
} from "./select";
|
|
63
|
+
|
|
64
|
+
// Scoped data-plane stream-token mint/verify (P3.1). Agent-loop-free; the API
|
|
65
|
+
// pulls these from this leaf to authorize the desktop pixel plane.
|
|
66
|
+
export {
|
|
67
|
+
STREAM_TOKEN_DEFAULT_TTL_SECONDS,
|
|
68
|
+
mintStreamToken,
|
|
69
|
+
verifyStreamToken,
|
|
70
|
+
StreamTokenPayload,
|
|
71
|
+
type MintStreamTokenInput,
|
|
72
|
+
type StreamTokenPayloadType,
|
|
73
|
+
} from "./stream-token";
|
|
74
|
+
|
|
75
|
+
// The Channel-B desktop display-stack launcher (P4.1). Exec-launched,
|
|
76
|
+
// flock-idempotent; the worker (per-turn) and the API (per viewer op) both drive
|
|
77
|
+
// it from this leaf to bring up Xvfb -> XFCE -> x11vnc -viewonly -> websockify:6080.
|
|
78
|
+
export {
|
|
79
|
+
STREAM_PORT,
|
|
80
|
+
DISPLAY_STACK_TIMEOUT_MS,
|
|
81
|
+
DEFAULT_DESKTOP_GEOMETRY,
|
|
82
|
+
DisplayStackError,
|
|
83
|
+
DisplayStackUnsupportedError,
|
|
84
|
+
buildDisplayStackScript,
|
|
85
|
+
ensureDisplayStack,
|
|
86
|
+
tearDownDisplayStack,
|
|
87
|
+
type DesktopGeometry,
|
|
88
|
+
type EnsureDisplayStackOptions,
|
|
89
|
+
type EnsureDisplayStackResult,
|
|
90
|
+
} from "./display-stack";
|
|
91
|
+
|
|
92
|
+
// The Channel-B REAL PTY terminal-server launcher (P5.t). Exec-launched,
|
|
93
|
+
// flock-idempotent twin of ensureDisplayStack; brings up ttyd PTY-over-websocket
|
|
94
|
+
// (bash -l per ws client) on 7681 over the SAME tunnel the desktop noVNC uses.
|
|
95
|
+
export {
|
|
96
|
+
TERMINAL_STREAM_PORT,
|
|
97
|
+
TERMINAL_SERVER_TIMEOUT_MS,
|
|
98
|
+
TerminalServerError,
|
|
99
|
+
TerminalServerUnsupportedError,
|
|
100
|
+
buildTerminalServerScript,
|
|
101
|
+
ensureTerminalServer,
|
|
102
|
+
tearDownTerminalServer,
|
|
103
|
+
type EnsureTerminalServerOptions,
|
|
104
|
+
type EnsureTerminalServerResult,
|
|
105
|
+
} from "./terminal-server";
|
|
106
|
+
|
|
107
|
+
// The Channel-B pixel DATA PLANE (P4.2). Resolves the provider's scoped tunnel
|
|
108
|
+
// for port 6080 (client → provider-tunnel direct), assembles the WS URL, and
|
|
109
|
+
// mints the scoped stream token. Called API-direct on the resumed handle.
|
|
110
|
+
export {
|
|
111
|
+
exposeStreamPort,
|
|
112
|
+
buildStreamUrl,
|
|
113
|
+
StreamPortUnavailableError,
|
|
114
|
+
type ExposedPortEndpoint,
|
|
115
|
+
type ExposeStreamPortInput,
|
|
116
|
+
type ExposeStreamPortResult,
|
|
117
|
+
} from "./stream-port";
|
|
118
|
+
|
|
119
|
+
// P4.3 recording loop — plain functions over a live session handle (no agent
|
|
120
|
+
// loop); finalize reads bytes + PUTs to storage in the holding process (F10).
|
|
121
|
+
export {
|
|
122
|
+
startRecording,
|
|
123
|
+
stopRecording,
|
|
124
|
+
readRecordingBytes,
|
|
125
|
+
deleteRecordingArtifacts,
|
|
126
|
+
recordingStorageKey,
|
|
127
|
+
contentTypeForCodec,
|
|
128
|
+
extForCodec,
|
|
129
|
+
RecordingUnavailableError,
|
|
130
|
+
RecordingError,
|
|
131
|
+
type RecordingCodec,
|
|
132
|
+
type RecordingContentType,
|
|
133
|
+
type StartRecordingInput,
|
|
134
|
+
type RecordingProcess,
|
|
135
|
+
type FinalizeRecordingResult,
|
|
136
|
+
} from "./recording";
|
|
137
|
+
|
|
138
|
+
// P4.4 Channel-A structured services — the provider-agnostic SandboxChannelAService
|
|
139
|
+
// (FileSystem + Git + Terminal) over a live, resumed-by-id session handle. The
|
|
140
|
+
// API constructs one per request around the box it just resumed; no ownership.
|
|
141
|
+
// Agent-loop-free, so the API-direct control plane imports it from this leaf.
|
|
142
|
+
export {
|
|
143
|
+
SandboxChannelAService,
|
|
144
|
+
ChannelAValidationError,
|
|
145
|
+
ChannelAConflictError,
|
|
146
|
+
ChannelANotFoundError,
|
|
147
|
+
ChannelAUnsupportedError,
|
|
148
|
+
stripExecBanner,
|
|
149
|
+
parseExecBannerSessionId,
|
|
150
|
+
isWorkspaceEscapeError,
|
|
151
|
+
isExecSessionLostBanner,
|
|
152
|
+
assertSafeRelPath,
|
|
153
|
+
parsePorcelainV2,
|
|
154
|
+
parseNumstatZ,
|
|
155
|
+
parseUnifiedPatch,
|
|
156
|
+
type ChannelASession,
|
|
157
|
+
type ChannelAExecArgs,
|
|
158
|
+
type ChannelAExecResult,
|
|
159
|
+
type ChannelAEmitter,
|
|
160
|
+
type SandboxChannelAServiceOptions,
|
|
161
|
+
type NumstatEntry,
|
|
162
|
+
} from "./channel-a";
|
|
163
|
+
|
|
164
|
+
// The selfhosted (bring-your-own-compute) control surface (M3). The NATS-backed
|
|
165
|
+
// `SelfhostedSession` presents the SAME structural exec/fs/git surface as Modal
|
|
166
|
+
// over a `ControlRpc` seam (request/reply on `agent.<ws>.<id>.rpc`, encoded via
|
|
167
|
+
// `@opengeni/agent-proto`). agent-offline is NEVER a NotFound — the lease never
|
|
168
|
+
// cold-creates a rival for a user's real machine. The real NATS transport +
|
|
169
|
+
// Accounts land in M4 behind the SAME `ControlRpc`.
|
|
170
|
+
export {
|
|
171
|
+
type ControlRpc,
|
|
172
|
+
NatsControlRpc,
|
|
173
|
+
SelfhostedControlError,
|
|
174
|
+
agentErrorToControlError,
|
|
175
|
+
subjectFor,
|
|
176
|
+
offlineControlResponse,
|
|
177
|
+
timeoutControlResponse,
|
|
178
|
+
offlineAgentError,
|
|
179
|
+
timeoutAgentError,
|
|
180
|
+
type NatsRequestConnection,
|
|
181
|
+
type SelfhostedUnavailableReason,
|
|
182
|
+
} from "./selfhosted/control-rpc";
|
|
183
|
+
export {
|
|
184
|
+
SelfhostedSession,
|
|
185
|
+
SelfhostedSandboxClient,
|
|
186
|
+
buildSelfhostedBackendSession,
|
|
187
|
+
isSelfhostedProviderNotFoundError,
|
|
188
|
+
setSelfhostedApplyDiff,
|
|
189
|
+
SELFHOSTED_DEFAULT_TIMEOUT_MS,
|
|
190
|
+
SELFHOSTED_RELAY_STREAM_PATH,
|
|
191
|
+
type SelfhostedSessionState,
|
|
192
|
+
type SelfhostedSessionDeps,
|
|
193
|
+
type SelfhostedSessionBuild,
|
|
194
|
+
type SelfhostedRelayConfig,
|
|
195
|
+
type SelfhostedExecArgs,
|
|
196
|
+
type SelfhostedExecResult,
|
|
197
|
+
type SelfhostedApplyDiff,
|
|
198
|
+
type SelfhostedEditor,
|
|
199
|
+
type SelfhostedImageOutput,
|
|
200
|
+
} from "./selfhosted/session";
|
|
201
|
+
export {
|
|
202
|
+
negotiateSelfhostedCapabilities,
|
|
203
|
+
selfhostedLiveness,
|
|
204
|
+
SELFHOSTED_RECONNECT_WINDOW_MS,
|
|
205
|
+
type SelfhostedNegotiationInput,
|
|
206
|
+
type SelfhostedLivenessState,
|
|
207
|
+
type SelfhostedEnrollment,
|
|
208
|
+
} from "./selfhosted/capabilities";
|
|
209
|
+
export { MockAgentResponder, type MockAgentResponderOptions, type MockExecHandler } from "./selfhosted/testing";
|
|
210
|
+
|
|
211
|
+
// The hot-swap routing proxy (M7): ONE stable session-shaped object the SDK binds
|
|
212
|
+
// to, which re-reads the per-session active pointer per op and dispatches to the
|
|
213
|
+
// currently-active backend (Modal or selfhosted) — flippable mid-turn, single
|
|
214
|
+
// active at a time, fence-retrying on a swap race.
|
|
215
|
+
export {
|
|
216
|
+
RoutingSandboxSession,
|
|
217
|
+
RoutingUnsupportedError,
|
|
218
|
+
type ActivePointer,
|
|
219
|
+
type RoutableBackendSession,
|
|
220
|
+
type ResolvedActiveBackend,
|
|
221
|
+
type RoutingSandboxSessionDeps,
|
|
222
|
+
type RoutingTransitionEvent,
|
|
223
|
+
} from "./routing/routing-session";
|
|
224
|
+
export {
|
|
225
|
+
makeActiveBackendResolver,
|
|
226
|
+
ActiveBackendUnresolvableError,
|
|
227
|
+
type ActiveBackendResolverDeps,
|
|
228
|
+
type RoutableSandbox,
|
|
229
|
+
} from "./routing/backend-resolver";
|
|
230
|
+
|
|
231
|
+
/**
|
|
232
|
+
* Construct the raw provider SandboxClient for the configured backend. Registry-
|
|
233
|
+
* driven (the old flat if/else is gone): the backend's ProviderRegistration owns
|
|
234
|
+
* validateCredentials + build, with per-provider units/field-names. Returns
|
|
235
|
+
* undefined for "none".
|
|
236
|
+
*
|
|
237
|
+
* The desktop stream port (6080) is merged into exposedPorts for every desktop-
|
|
238
|
+
* capable (backend, os) when desktop is enabled AND the provider cannot expose
|
|
239
|
+
* ports on demand (modal/runloop/e2b pre-declare; blaxel resolves on demand).
|
|
240
|
+
* Existing modal/docker/local construction is behavior-preserved.
|
|
241
|
+
*/
|
|
242
|
+
export function createSandboxClient(settings: Settings, environment = collectSandboxEnvironment(settings)): unknown {
|
|
243
|
+
return createSandboxClientForBackend(settings.sandboxBackend as SandboxBackend, settings, environment);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Construct the raw provider SandboxClient for an EXPLICIT backend, independent
|
|
248
|
+
* of settings.sandboxBackend. This is the resume-by-id builder the per-turn
|
|
249
|
+
* resume path (and the API-direct control plane) call: a lease's box was created
|
|
250
|
+
* on a specific backend (the envelope's backendId / the lease's
|
|
251
|
+
* resume_backend_id), and the client that reattaches to it must be built for
|
|
252
|
+
* THAT backend, not the process's currently-configured default. When the backend
|
|
253
|
+
* equals settings.sandboxBackend this is identical to createSandboxClient
|
|
254
|
+
* (behavior-preserved). Returns undefined for "none".
|
|
255
|
+
*/
|
|
256
|
+
export function createSandboxClientForBackend(
|
|
257
|
+
backend: SandboxBackend,
|
|
258
|
+
settings: Settings,
|
|
259
|
+
environment = collectSandboxEnvironment(settings),
|
|
260
|
+
): unknown {
|
|
261
|
+
const registration = PROVIDER_REGISTRY[backend];
|
|
262
|
+
if (!registration) {
|
|
263
|
+
throw new SandboxConfigError(backend, `Unknown sandbox backend "${backend}"`);
|
|
264
|
+
}
|
|
265
|
+
if (registration.backend === "none") {
|
|
266
|
+
return undefined;
|
|
267
|
+
}
|
|
268
|
+
registration.validateCredentials(settings); // fail-fast, typed
|
|
269
|
+
|
|
270
|
+
const exposedPorts = parseExposedPorts(settings.dockerExposedPorts);
|
|
271
|
+
// 6080 port-merge: a desktop-capable backend that pre-declares ports (not
|
|
272
|
+
// on-demand) must carry the desktop port at construction so resolveExposedPort
|
|
273
|
+
// (6080) succeeds later. runloop is included (it is desktop-capable but NOT
|
|
274
|
+
// on-demand → must pre-declare). blaxel is on-demand → skipped here.
|
|
275
|
+
const desktop = registration.descriptor.capabilities.DesktopStream;
|
|
276
|
+
if (
|
|
277
|
+
desktop.available
|
|
278
|
+
&& settings.sandboxDesktopEnabled
|
|
279
|
+
&& !registration.descriptor.portExposure.supportsOnDemandPorts
|
|
280
|
+
&& !exposedPorts.includes(DESKTOP_STREAM_PORT)
|
|
281
|
+
) {
|
|
282
|
+
exposedPorts.push(DESKTOP_STREAM_PORT);
|
|
283
|
+
}
|
|
284
|
+
// 7681 port-merge: the REAL PTY terminal (ttyd) rides the SAME tunnel as the
|
|
285
|
+
// desktop, so a desktop-capable pre-declared-port backend must ALSO carry 7681
|
|
286
|
+
// at construction for resolveExposedPort(7681) to succeed later on a fresh box.
|
|
287
|
+
// Same condition as the 6080 merge (a desktop-capable image bakes ttyd too).
|
|
288
|
+
if (
|
|
289
|
+
desktop.available
|
|
290
|
+
&& settings.sandboxDesktopEnabled
|
|
291
|
+
&& !registration.descriptor.portExposure.supportsOnDemandPorts
|
|
292
|
+
&& !exposedPorts.includes(TERMINAL_STREAM_PORT)
|
|
293
|
+
) {
|
|
294
|
+
exposedPorts.push(TERMINAL_STREAM_PORT);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
const raw = registration.build({ settings, environment, exposedPorts });
|
|
298
|
+
// Docker network decoration stays backend-specific (only docker).
|
|
299
|
+
return registration.backend === "docker"
|
|
300
|
+
? withDockerNetwork(raw as SandboxClient, settings.dockerNetwork)
|
|
301
|
+
: raw;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
function withDockerNetwork(client: SandboxClient, network: string | undefined): SandboxClient {
|
|
305
|
+
const trimmed = network?.trim();
|
|
306
|
+
if (!trimmed) {
|
|
307
|
+
return client;
|
|
308
|
+
}
|
|
309
|
+
const wrapSession = async <T extends SandboxSessionLike>(session: T): Promise<T> => {
|
|
310
|
+
const containerId = (session as { state?: { containerId?: unknown } }).state?.containerId;
|
|
311
|
+
if (typeof containerId === "string" && containerId.length > 0) {
|
|
312
|
+
await connectDockerNetwork(trimmed, containerId);
|
|
313
|
+
}
|
|
314
|
+
return session;
|
|
315
|
+
};
|
|
316
|
+
return {
|
|
317
|
+
backendId: client.backendId,
|
|
318
|
+
...(client.supportsDefaultOptions !== undefined ? { supportsDefaultOptions: client.supportsDefaultOptions } : {}),
|
|
319
|
+
...(client.create ? { create: async (...args: any[]) => await wrapSession(await (client.create as any)(...args)) } : {}),
|
|
320
|
+
...(client.resume ? { resume: async (state: SandboxSessionState) => await wrapSession(await client.resume!(state)) } : {}),
|
|
321
|
+
...(client.delete ? { delete: async (state: SandboxSessionState) => await client.delete!(state) } : {}),
|
|
322
|
+
...(client.serializeSessionState ? { serializeSessionState: async (state: SandboxSessionState, options) => await client.serializeSessionState!(state, options) } : {}),
|
|
323
|
+
...(client.canPersistOwnedSessionState ? { canPersistOwnedSessionState: async (state: SandboxSessionState) => await client.canPersistOwnedSessionState!(state) } : {}),
|
|
324
|
+
...(client.canReusePreservedOwnedSession ? { canReusePreservedOwnedSession: async (state: SandboxSessionState) => await client.canReusePreservedOwnedSession!(state) } : {}),
|
|
325
|
+
...(client.deserializeSessionState ? { deserializeSessionState: async (state: Record<string, unknown>) => await client.deserializeSessionState!(state) } : {}),
|
|
326
|
+
};
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
async function connectDockerNetwork(network: string, containerId: string): Promise<void> {
|
|
330
|
+
const result = Bun.spawnSync(["docker", "network", "connect", network, containerId], {
|
|
331
|
+
stdout: "pipe",
|
|
332
|
+
stderr: "pipe",
|
|
333
|
+
});
|
|
334
|
+
if (result.exitCode === 0) {
|
|
335
|
+
return;
|
|
336
|
+
}
|
|
337
|
+
const stderr = new TextDecoder().decode(result.stderr);
|
|
338
|
+
if (stderr.includes("already exists")) {
|
|
339
|
+
return;
|
|
340
|
+
}
|
|
341
|
+
throw new Error(`Failed to connect Docker sandbox container to network ${network}: ${stderr.trim()}`);
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
/**
|
|
345
|
+
* Extract the sandbox recovery entry from a run state as a plain JSON record,
|
|
346
|
+
* for storage decoupled from the RunState blob (issue #35). Encapsulates the
|
|
347
|
+
* underscore-internal `_sandbox` read in exactly one place.
|
|
348
|
+
*/
|
|
349
|
+
export function sandboxStateEntryFromRunState(state: unknown): Record<string, unknown> | null {
|
|
350
|
+
const sandboxState = (state as any)?._sandbox;
|
|
351
|
+
if (!sandboxState) {
|
|
352
|
+
return null;
|
|
353
|
+
}
|
|
354
|
+
const entry = sandboxState.sessionsByAgent?.[sandboxState.currentAgentKey]
|
|
355
|
+
?? (sandboxState.currentAgentKey && sandboxState.sessionState
|
|
356
|
+
? {
|
|
357
|
+
backendId: sandboxState.backendId,
|
|
358
|
+
currentAgentKey: sandboxState.currentAgentKey,
|
|
359
|
+
currentAgentName: sandboxState.currentAgentName,
|
|
360
|
+
sessionState: sandboxState.sessionState,
|
|
361
|
+
}
|
|
362
|
+
: null);
|
|
363
|
+
if (!entry || !entry.sessionState) {
|
|
364
|
+
return null;
|
|
365
|
+
}
|
|
366
|
+
return entry as Record<string, unknown>;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
/**
|
|
370
|
+
* Items-mode counterpart of restoredSandboxSessionState: rebuild the live
|
|
371
|
+
* sandbox session state from a stored entry (as produced by
|
|
372
|
+
* sandboxStateEntryFromRunState) instead of from a RunState blob.
|
|
373
|
+
*/
|
|
374
|
+
export async function restoredSandboxSessionStateFromEntry(entry: Record<string, unknown>, client: unknown): Promise<SandboxSessionState | undefined> {
|
|
375
|
+
if (!client || !entry || typeof entry !== "object" || !("sessionState" in entry)) {
|
|
376
|
+
return undefined;
|
|
377
|
+
}
|
|
378
|
+
if (entry.backendId && (client as SandboxClient).backendId !== entry.backendId) {
|
|
379
|
+
throw new Error("Stored sandbox envelope backend does not match the configured sandbox client");
|
|
380
|
+
}
|
|
381
|
+
return await deserializeSandboxSessionStateEnvelope(client as SandboxClient, entry.sessionState);
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
/**
|
|
385
|
+
* Read the persisted /workspace snapshot archive off a lease envelope's
|
|
386
|
+
* `sessionState` (sandbox-file-persistence). The reaper (persistDrainSnapshot)
|
|
387
|
+
* folds the base64 archive — a Modal native snapshot-ref or a tar archive, the
|
|
388
|
+
* exact bytes `session.persistWorkspace()` returned — at
|
|
389
|
+
* `sessionState.workspaceArchive`. Cold-restore decodes it and replays it via
|
|
390
|
+
* `session.hydrateWorkspace(archive)` on the freshly-created box so /workspace is
|
|
391
|
+
* restored. Returns undefined when the envelope carries no archive (a box that
|
|
392
|
+
* was never drain-persisted, or a non-persistence config that stored none).
|
|
393
|
+
*
|
|
394
|
+
* It is deliberately read SEPARATELY from deserializeSandboxSessionStateEnvelope:
|
|
395
|
+
* the archive does NOT ride serializeSessionState (it originates at reaper time),
|
|
396
|
+
* and the SDK's deserializeSessionState must NOT receive it (it is an opaque
|
|
397
|
+
* runtime-level field, not provider state).
|
|
398
|
+
*/
|
|
399
|
+
export function readWorkspaceArchiveFromEnvelopeSessionState(sessionState: unknown): Uint8Array | undefined {
|
|
400
|
+
if (!sessionState || typeof sessionState !== "object") {
|
|
401
|
+
return undefined;
|
|
402
|
+
}
|
|
403
|
+
const b64 = (sessionState as { workspaceArchive?: unknown }).workspaceArchive;
|
|
404
|
+
if (typeof b64 !== "string" || b64.length === 0) {
|
|
405
|
+
return undefined;
|
|
406
|
+
}
|
|
407
|
+
try {
|
|
408
|
+
return Uint8Array.from(Buffer.from(b64, "base64"));
|
|
409
|
+
} catch {
|
|
410
|
+
return undefined;
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
// The native snapshot-ref prefixes the @openai/agents-extensions modal client
|
|
415
|
+
// encodes (snapshots.mjs `NATIVE_SNAPSHOT_PREFIXES`). The ref is
|
|
416
|
+
// `<PREFIX>\n{"snapshot_id":"...",...}`. We re-implement the decode here because
|
|
417
|
+
// `@openai/agents-extensions/sandbox/shared` is NOT an exported subpath (the
|
|
418
|
+
// package `exports` map only exposes `./sandbox/<provider>`), so decodeNativeSnapshotRef
|
|
419
|
+
// is unreachable — same reasoning as isProviderSandboxNotFoundError below.
|
|
420
|
+
const MODAL_SNAPSHOT_REF_PREFIXES = [
|
|
421
|
+
"MODAL_SANDBOX_FS_SNAPSHOT_V1\n",
|
|
422
|
+
"MODAL_SANDBOX_DIR_SNAPSHOT_V1\n",
|
|
423
|
+
];
|
|
424
|
+
|
|
425
|
+
/** Decode the Modal snapshot id out of a persisted base64 archive ref, or
|
|
426
|
+
* undefined when the archive is a tar payload (no provider snapshot to GC) or
|
|
427
|
+
* is unparseable. Used only for keep-latest-per-lease snapshot GC. */
|
|
428
|
+
export function decodeModalSnapshotId(archive: Uint8Array): string | undefined {
|
|
429
|
+
let text: string;
|
|
430
|
+
try {
|
|
431
|
+
text = new TextDecoder().decode(archive);
|
|
432
|
+
} catch {
|
|
433
|
+
return undefined;
|
|
434
|
+
}
|
|
435
|
+
for (const prefix of MODAL_SNAPSHOT_REF_PREFIXES) {
|
|
436
|
+
if (!text.startsWith(prefix)) {
|
|
437
|
+
continue;
|
|
438
|
+
}
|
|
439
|
+
try {
|
|
440
|
+
const payload = JSON.parse(text.slice(prefix.length)) as { snapshot_id?: unknown };
|
|
441
|
+
return typeof payload.snapshot_id === "string" && payload.snapshot_id.length > 0
|
|
442
|
+
? payload.snapshot_id
|
|
443
|
+
: undefined;
|
|
444
|
+
} catch {
|
|
445
|
+
return undefined;
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
return undefined;
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
/**
|
|
452
|
+
* Best-effort GC of a SUPERSEDED Modal filesystem/directory snapshot
|
|
453
|
+
* (sandbox-file-persistence). restoreSnapshotFilesystem terminates the previous
|
|
454
|
+
* SANDBOX but never deletes the prior SNAPSHOT image, so snapshots accumulate
|
|
455
|
+
* unbounded across warm/cold cycles. The reaper keeps only the latest per lease:
|
|
456
|
+
* when it writes a NEW archive it passes the PRIOR archive here to delete its
|
|
457
|
+
* image via the live session's Modal client (`session.modal.images.delete(id)` —
|
|
458
|
+
* the same API the SDK uses for directory images). Never throws (GC is a
|
|
459
|
+
* best-effort backstop; a leaked snapshot is a cost issue, not a correctness one).
|
|
460
|
+
* A tar archive (no snapshot id) is a no-op. Returns the deleted snapshot id (or
|
|
461
|
+
* undefined when nothing was deleted) for observability.
|
|
462
|
+
*/
|
|
463
|
+
export async function deletePriorPersistedSnapshot(session: unknown, priorArchiveBase64: string | null | undefined): Promise<string | undefined> {
|
|
464
|
+
if (!priorArchiveBase64) {
|
|
465
|
+
return undefined;
|
|
466
|
+
}
|
|
467
|
+
let bytes: Uint8Array;
|
|
468
|
+
try {
|
|
469
|
+
bytes = Uint8Array.from(Buffer.from(priorArchiveBase64, "base64"));
|
|
470
|
+
} catch {
|
|
471
|
+
return undefined;
|
|
472
|
+
}
|
|
473
|
+
const snapshotId = decodeModalSnapshotId(bytes);
|
|
474
|
+
if (!snapshotId) {
|
|
475
|
+
return undefined;
|
|
476
|
+
}
|
|
477
|
+
const modal = (session as { modal?: { images?: { delete?: (id: string) => Promise<unknown> } } }).modal;
|
|
478
|
+
const del = modal?.images?.delete;
|
|
479
|
+
if (typeof del !== "function") {
|
|
480
|
+
return undefined;
|
|
481
|
+
}
|
|
482
|
+
try {
|
|
483
|
+
await del.call(modal!.images, snapshotId);
|
|
484
|
+
return snapshotId;
|
|
485
|
+
} catch {
|
|
486
|
+
return undefined;
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
export async function deserializeSandboxSessionStateEnvelope(client: SandboxClient, envelope: unknown): Promise<SandboxSessionState | undefined> {
|
|
491
|
+
if (!envelope || typeof envelope !== "object") {
|
|
492
|
+
return undefined;
|
|
493
|
+
}
|
|
494
|
+
if (!client.deserializeSessionState) {
|
|
495
|
+
throw new Error("Sandbox client must implement deserializeSessionState() to resume RunState sandbox state");
|
|
496
|
+
}
|
|
497
|
+
const state = envelope as {
|
|
498
|
+
providerState?: Record<string, unknown>;
|
|
499
|
+
manifest?: unknown;
|
|
500
|
+
snapshot?: unknown;
|
|
501
|
+
snapshotFingerprint?: unknown;
|
|
502
|
+
snapshotFingerprintVersion?: unknown;
|
|
503
|
+
workspaceReady?: unknown;
|
|
504
|
+
exposedPorts?: unknown;
|
|
505
|
+
};
|
|
506
|
+
return await client.deserializeSessionState({
|
|
507
|
+
...(state.providerState ?? {}),
|
|
508
|
+
manifest: state.manifest,
|
|
509
|
+
...(state.snapshot !== undefined ? { snapshot: state.snapshot } : {}),
|
|
510
|
+
...(state.snapshotFingerprint !== undefined ? { snapshotFingerprint: state.snapshotFingerprint } : {}),
|
|
511
|
+
...(state.snapshotFingerprintVersion !== undefined ? { snapshotFingerprintVersion: state.snapshotFingerprintVersion } : {}),
|
|
512
|
+
workspaceReady: state.workspaceReady,
|
|
513
|
+
...(state.exposedPorts ? { exposedPorts: structuredClone(state.exposedPorts) } : {}),
|
|
514
|
+
});
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
// ============================================================================
|
|
518
|
+
// The ONE resume / recovery primitive (P1.2).
|
|
519
|
+
//
|
|
520
|
+
// establishSandboxSessionFromEnvelope is the single re-establish-from-envelope
|
|
521
|
+
// path the stateless model leans on: a turn (or any API-direct op) resolves the
|
|
522
|
+
// group lease, hands us the recovery envelope, and gets back a LIVE non-owned
|
|
523
|
+
// session. On a warm box this is a no-lock warm reattach by id (Modal fromId,
|
|
524
|
+
// e2b reconnect — R4-safe, a stray second handle never spawns a second box).
|
|
525
|
+
// When the provider reports the box genuinely gone (NotFound) we cold-restore
|
|
526
|
+
// from the snapshot via create(). NEVER create() on any OTHER resume error
|
|
527
|
+
// (only on NotFound) — a resume-conflict means the box is alive and the caller
|
|
528
|
+
// must back off, not spawn a rival.
|
|
529
|
+
// ============================================================================
|
|
530
|
+
|
|
531
|
+
/** A live, externally-owned sandbox session re-established from the group lease
|
|
532
|
+
* envelope. The caller injects `{client, session, sessionState}` NON-OWNED into
|
|
533
|
+
* the run (or drives session.exec/readFile/resolveExposedPort directly) and
|
|
534
|
+
* drops the handle when done — the lease, not this handle, owns the box. */
|
|
535
|
+
export type EstablishedSandboxSession = {
|
|
536
|
+
client: unknown;
|
|
537
|
+
session: unknown;
|
|
538
|
+
sessionState: unknown;
|
|
539
|
+
instanceId: string;
|
|
540
|
+
backendId: string;
|
|
541
|
+
};
|
|
542
|
+
|
|
543
|
+
// The structural slice we need from a provider SandboxClient to resume by id and
|
|
544
|
+
// cold-restore. Narrowed (not the full agent-loop SandboxClient) so the leaf
|
|
545
|
+
// stays agent-loop-free.
|
|
546
|
+
type ResumeCapableClient = {
|
|
547
|
+
backendId: string;
|
|
548
|
+
deserializeSessionState?: (state: Record<string, unknown>) => Promise<unknown>;
|
|
549
|
+
resume?: (state: unknown, options?: unknown) => Promise<unknown>;
|
|
550
|
+
create?: (manifest?: unknown, options?: unknown) => Promise<unknown>;
|
|
551
|
+
};
|
|
552
|
+
|
|
553
|
+
/**
|
|
554
|
+
* Per-provider NotFound discriminator. The @openai/agents-extensions
|
|
555
|
+
* `isProviderSandboxNotFoundError` / `assertResumeRecreateAllowed` helpers live
|
|
556
|
+
* under `@openai/agents-extensions/sandbox/shared`, which is NOT an exported
|
|
557
|
+
* subpath (the package `exports` map only exposes `./sandbox/<provider>`), so we
|
|
558
|
+
* re-implement the discrimination here by inspecting the thrown error shape.
|
|
559
|
+
*
|
|
560
|
+
* "Box no longer running" (the box was reaped / idled out / 24h-ceiling) is the
|
|
561
|
+
* ONLY error that licenses a cold-restore via create(). Every other resume
|
|
562
|
+
* failure (transient provider error, auth, network) must propagate so the caller
|
|
563
|
+
* backs off — never spawns a rival box. We err on the side of NOT recreating:
|
|
564
|
+
* an unrecognized error is treated as "not NotFound" (propagate), because a
|
|
565
|
+
* false-positive recreate is the dangerous direction (double-spawn).
|
|
566
|
+
*/
|
|
567
|
+
export function isProviderSandboxNotFoundError(backendId: string, error: unknown): boolean {
|
|
568
|
+
// selfhosted: agent-offline is NEVER a provider NotFound (the user's machine is
|
|
569
|
+
// not recreatable — a false NotFound would cold-create a RIVAL box). The
|
|
570
|
+
// selfhosted discriminator ALWAYS returns false; short-circuit so no goneMarker
|
|
571
|
+
// string match below can ever flip a selfhosted agent-offline error to true.
|
|
572
|
+
if (backendId === "selfhosted") {
|
|
573
|
+
return isSelfhostedProviderNotFoundError(error);
|
|
574
|
+
}
|
|
575
|
+
if (!error) {
|
|
576
|
+
return false;
|
|
577
|
+
}
|
|
578
|
+
const status = (error as { status?: unknown; statusCode?: unknown }).status
|
|
579
|
+
?? (error as { statusCode?: unknown }).statusCode;
|
|
580
|
+
if (status === 404) {
|
|
581
|
+
return true;
|
|
582
|
+
}
|
|
583
|
+
const name = typeof (error as { name?: unknown }).name === "string" ? (error as { name: string }).name : "";
|
|
584
|
+
const code = typeof (error as { code?: unknown }).code === "string" ? (error as { code: string }).code : "";
|
|
585
|
+
const message = error instanceof Error ? error.message : typeof error === "string" ? error : String((error as { message?: unknown })?.message ?? "");
|
|
586
|
+
const haystack = `${name} ${code} ${message}`.toLowerCase();
|
|
587
|
+
// Provider-agnostic "gone" markers (Modal: "sandbox … not found" / terminated;
|
|
588
|
+
// e2b/daytona/runloop: "not found" / "no longer running" / "terminated" /
|
|
589
|
+
// "does not exist"). Kept broad-but-conservative: it matches box-gone phrasing
|
|
590
|
+
// and never matches generic 5xx/transport errors.
|
|
591
|
+
const goneMarkers = [
|
|
592
|
+
"not found",
|
|
593
|
+
"no longer running",
|
|
594
|
+
"no longer exists",
|
|
595
|
+
"does not exist",
|
|
596
|
+
"doesn't exist",
|
|
597
|
+
"has been terminated",
|
|
598
|
+
"was terminated",
|
|
599
|
+
"is terminated",
|
|
600
|
+
"sandbox terminated",
|
|
601
|
+
"notfound",
|
|
602
|
+
"sandbox_not_found",
|
|
603
|
+
"box no longer running",
|
|
604
|
+
];
|
|
605
|
+
// A "running"/"already exists" resume-conflict is explicitly NOT NotFound — the
|
|
606
|
+
// box is alive; recreating would double-spawn.
|
|
607
|
+
if (haystack.includes("already running") || haystack.includes("still running") || haystack.includes("already exists")) {
|
|
608
|
+
return false;
|
|
609
|
+
}
|
|
610
|
+
return goneMarkers.some((marker) => haystack.includes(marker));
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
function readInstanceId(session: unknown): string {
|
|
614
|
+
const state = (session as { state?: Record<string, unknown> }).state ?? {};
|
|
615
|
+
const candidate = state.sandboxId ?? state.instanceId ?? state.id ?? state.hostId ?? state.containerId;
|
|
616
|
+
return typeof candidate === "string" && candidate.length > 0 ? candidate : "";
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
/**
|
|
620
|
+
* Resume the one box by id from its recovery envelope, or cold-restore from the
|
|
621
|
+
* snapshot when the provider reports it gone. The envelope is the lease's
|
|
622
|
+
* box-identity descriptor (the same per-turn `_sandbox` envelope upserted by the
|
|
623
|
+
* turn activity). A null envelope means a cold session that was never warmed →
|
|
624
|
+
* create() directly.
|
|
625
|
+
*
|
|
626
|
+
* - `opts.backendOverride ?? envelope.backendId ?? settings.sandboxBackend`
|
|
627
|
+
* selects the backend; the client is built for THAT backend (resume-by-id is
|
|
628
|
+
* fenced to the original provider).
|
|
629
|
+
* - warm reattach: deserialize the envelope sessionState → client.resume(state)
|
|
630
|
+
* (no lock; R4-safe). On a provider NotFound, cold-restore via create().
|
|
631
|
+
* - cold restore / cold session: client.create() — the ONLY create() site.
|
|
632
|
+
*/
|
|
633
|
+
export async function establishSandboxSessionFromEnvelope(
|
|
634
|
+
settings: Settings,
|
|
635
|
+
envelope: Record<string, unknown> | null,
|
|
636
|
+
opts: { sessionId: string; backendOverride?: SandboxBackend; environment?: Record<string, string> },
|
|
637
|
+
): Promise<EstablishedSandboxSession> {
|
|
638
|
+
const envelopeBackend = typeof envelope?.backendId === "string" ? (envelope.backendId as SandboxBackend) : undefined;
|
|
639
|
+
const backend = (opts.backendOverride ?? envelopeBackend ?? (settings.sandboxBackend as SandboxBackend));
|
|
640
|
+
const environment = opts.environment ?? collectSandboxEnvironment(settings);
|
|
641
|
+
const client = createSandboxClientForBackend(backend, settings, environment) as ResumeCapableClient | undefined;
|
|
642
|
+
if (!client) {
|
|
643
|
+
throw new SandboxConfigError(backend, `Cannot establish a sandbox session for backend "${backend}" (no client; sandboxBackend=none?)`);
|
|
644
|
+
}
|
|
645
|
+
if (!client.create) {
|
|
646
|
+
throw new SandboxConfigError(backend, `Sandbox backend "${backend}" does not support create()`);
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
// The manifest the box is CREATED with. Its `environment` must equal the
|
|
650
|
+
// environment the agent declares for this run (buildManifest's `environment`),
|
|
651
|
+
// because the SDK injects this box NON-OWNED and then applies the agent's
|
|
652
|
+
// manifest as a provided-session delta — `applyManifestToProvidedSession`
|
|
653
|
+
// throws on ANY environment delta (validateNoEnvironmentDelta). The client's
|
|
654
|
+
// constructor `env` materializes the RUNTIME env but does NOT populate
|
|
655
|
+
// `manifest.environment` (a bare create() yields `new Manifest()` with an empty
|
|
656
|
+
// environment), so the manifest env must be set here explicitly. `root` is left
|
|
657
|
+
// to default to "/workspace" to match buildManifest's declared root (the
|
|
658
|
+
// root-delta guard). The caller threads `opts.environment` = the SAME object
|
|
659
|
+
// passed to runtime.buildAgent, so current==target and the delta is empty.
|
|
660
|
+
const createManifest = { environment };
|
|
661
|
+
|
|
662
|
+
// The serialized provider state the box was last persisted as. The envelope
|
|
663
|
+
// shape is the per-turn `_sandbox` entry; its `sessionState` is the provider
|
|
664
|
+
// payload deserializeSandboxSessionStateEnvelope re-hydrates.
|
|
665
|
+
const envelopeSessionState = envelope && typeof envelope === "object" ? (envelope as { sessionState?: unknown }).sessionState : undefined;
|
|
666
|
+
|
|
667
|
+
// The persisted /workspace snapshot the reaper folded onto the lease envelope
|
|
668
|
+
// (sandbox-file-persistence). Present on a re-warm whose box was drain-persisted:
|
|
669
|
+
// - WARM reattach NotFound path (box gone, full envelope still has sandboxId);
|
|
670
|
+
// - COLD lease re-warm (confirmDrainCold preserved a MINIMAL archive-only
|
|
671
|
+
// envelope `{ sessionState: { workspaceArchive } }` — NO sandboxId, so the
|
|
672
|
+
// warm-reattach branch must NOT try resume()-by-id; it cold-creates+hydrates).
|
|
673
|
+
const workspaceArchive = readWorkspaceArchiveFromEnvelopeSessionState(envelopeSessionState);
|
|
674
|
+
|
|
675
|
+
// create() a FRESH box, THEN replay the persisted /workspace snapshot via
|
|
676
|
+
// session.hydrateWorkspace(archive) when one rode the envelope. hydrateWorkspace
|
|
677
|
+
// decodes the snapshot-ref and swaps the box for one booted from the snapshot
|
|
678
|
+
// image (restoreSnapshotFilesystem); no archive -> a clean empty box. This is the
|
|
679
|
+
// SOLE archive-replay seam, shared by the NotFound warm-reattach path AND the
|
|
680
|
+
// cold-restore branch (b) below.
|
|
681
|
+
const coldRestore = async (resumeFallbackState?: unknown): Promise<EstablishedSandboxSession> => {
|
|
682
|
+
const restored = await client.create!({ manifest: createManifest });
|
|
683
|
+
if (workspaceArchive) {
|
|
684
|
+
const hydrate = (restored as { hydrateWorkspace?: (data: Uint8Array) => Promise<void> }).hydrateWorkspace;
|
|
685
|
+
if (typeof hydrate === "function") {
|
|
686
|
+
try {
|
|
687
|
+
// hydrateWorkspace may internally REPLACE the underlying box
|
|
688
|
+
// (restoreSnapshotFilesystem creates a replacement sandbox and terminates
|
|
689
|
+
// the placeholder), so the instanceId must be re-read AFTER.
|
|
690
|
+
await hydrate.call(restored, workspaceArchive);
|
|
691
|
+
} catch (hydrateError) {
|
|
692
|
+
// sandbox-file-persistence: if hydrateWorkspace throws (snapshot GC'd,
|
|
693
|
+
// provider timeout, corrupt archive), the placeholder box created above is
|
|
694
|
+
// live but unhydrated — it would leak up to the full idle/hard lifetime
|
|
695
|
+
// (3600s) if we just re-throw. Best-effort delete/terminate it BEFORE
|
|
696
|
+
// re-throwing so no box leaks. The original error semantics are preserved
|
|
697
|
+
// (the re-throw propagates to the caller). This mirrors the reaper's
|
|
698
|
+
// discipline: NEVER leave an orphaned box running.
|
|
699
|
+
const restoredState = (restored as { state?: unknown }).state;
|
|
700
|
+
const clientWithDelete = client as { delete?: (state: unknown) => Promise<unknown> };
|
|
701
|
+
if (typeof clientWithDelete.delete === "function" && restoredState !== undefined) {
|
|
702
|
+
try { await clientWithDelete.delete(restoredState); } catch { /* best-effort; re-throw the hydrate error below */ }
|
|
703
|
+
} else {
|
|
704
|
+
// No delete() — try a session-level close/terminate as a fallback.
|
|
705
|
+
const sess = restored as { close?: () => Promise<unknown>; terminate?: () => Promise<unknown> };
|
|
706
|
+
try { await (sess.terminate ?? sess.close)?.(); } catch { /* best-effort */ }
|
|
707
|
+
}
|
|
708
|
+
throw hydrateError;
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
const restoredState = (restored as { state?: unknown }).state;
|
|
713
|
+
return { client, session: restored, sessionState: restoredState ?? resumeFallbackState, instanceId: readInstanceId(restored), backendId: client.backendId };
|
|
714
|
+
};
|
|
715
|
+
|
|
716
|
+
// Does the envelope carry a RESUMABLE box id (warm reattach), or only a
|
|
717
|
+
// restorable archive (cold lease)? A Modal envelope with no providerState.sandboxId
|
|
718
|
+
// (the minimal archive-only envelope confirmDrainCold preserves) is NOT resumable —
|
|
719
|
+
// client.resume() would throw "requires a persisted sandboxId", which is NOT a
|
|
720
|
+
// NotFound, so it would propagate instead of cold-restoring. Gate the resume
|
|
721
|
+
// branch on a present sandbox identity so an archive-only envelope falls straight
|
|
722
|
+
// through to the cold-restore+hydrate path (b).
|
|
723
|
+
const envelopeProviderState = envelopeSessionState && typeof envelopeSessionState === "object"
|
|
724
|
+
? (envelopeSessionState as { providerState?: Record<string, unknown> }).providerState
|
|
725
|
+
: undefined;
|
|
726
|
+
const hasResumableInstance = Boolean(
|
|
727
|
+
envelopeProviderState
|
|
728
|
+
&& typeof envelopeProviderState === "object"
|
|
729
|
+
&& (envelopeProviderState.sandboxId
|
|
730
|
+
|| envelopeProviderState.instanceId
|
|
731
|
+
|| envelopeProviderState.id
|
|
732
|
+
|| envelopeProviderState.containerId),
|
|
733
|
+
);
|
|
734
|
+
|
|
735
|
+
// (a) WARM REATTACH BY ID — only when the envelope carries a resumable box id.
|
|
736
|
+
if (hasResumableInstance && envelopeSessionState && client.resume && client.deserializeSessionState) {
|
|
737
|
+
let resumedState: unknown;
|
|
738
|
+
try {
|
|
739
|
+
resumedState = await deserializeSandboxSessionStateEnvelope(client as unknown as SandboxClient, envelopeSessionState);
|
|
740
|
+
} catch (error) {
|
|
741
|
+
throw new SandboxConfigError(backend, `Failed to deserialize sandbox resume envelope for backend "${backend}": ${error instanceof Error ? error.message : String(error)}`);
|
|
742
|
+
}
|
|
743
|
+
if (resumedState !== undefined) {
|
|
744
|
+
try {
|
|
745
|
+
const session = await client.resume(resumedState);
|
|
746
|
+
return { client, session, sessionState: resumedState, instanceId: readInstanceId(session), backendId: client.backendId };
|
|
747
|
+
} catch (error) {
|
|
748
|
+
// ONLY a provider NotFound (box gone) licenses a cold-restore. Anything
|
|
749
|
+
// else (transient/auth/network/resume-conflict) propagates: the caller
|
|
750
|
+
// backs off and re-fences — NEVER spawns a rival box.
|
|
751
|
+
if (!isProviderSandboxNotFoundError(client.backendId, error)) {
|
|
752
|
+
throw error;
|
|
753
|
+
}
|
|
754
|
+
// COLD-RESTORE: the box is genuinely gone. Modal does NOT restore via
|
|
755
|
+
// create({ snapshot }) — passing `snapshot` to ModalSandboxClient.create()
|
|
756
|
+
// THROWS (assertCoreSnapshotUnsupported). Modal's real persistence is an
|
|
757
|
+
// OPAQUE ARCHIVE captured by session.persistWorkspace() at reaper-drain
|
|
758
|
+
// time and folded onto the lease envelope (sandbox-file-persistence). The
|
|
759
|
+
// shared coldRestore() seam creates a fresh box and replays that archive.
|
|
760
|
+
return await coldRestore(resumedState);
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
// (b) COLD SESSION / COLD LEASE — no resumable box id. create() a fresh box, and
|
|
766
|
+
// if the envelope carries a persisted /workspace snapshot (the archive-only
|
|
767
|
+
// envelope confirmDrainCold preserves across draining->cold), replay it so
|
|
768
|
+
// /workspace survives the box churn (sandbox-file-persistence). No archive -> a
|
|
769
|
+
// clean empty box (a never-warmed session).
|
|
770
|
+
return await coldRestore();
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
// A client that can SERIALIZE a live session state back to the persistable
|
|
774
|
+
// envelope form (the inverse of deserializeSessionState). Narrowed so the leaf
|
|
775
|
+
// stays agent-loop-free.
|
|
776
|
+
type SerializeCapableClient = {
|
|
777
|
+
backendId: string;
|
|
778
|
+
serializeSessionState?: (state: unknown, options?: unknown) => Promise<Record<string, unknown>>;
|
|
779
|
+
};
|
|
780
|
+
|
|
781
|
+
/**
|
|
782
|
+
* Fold a freshly-established (or resumed) sandbox session into the persistable
|
|
783
|
+
* `resume_state` envelope the lease stores — the SAME `{ backendId, sessionState }`
|
|
784
|
+
* shape `establishSandboxSessionFromEnvelope` consumes to RESUME BY ID. The
|
|
785
|
+
* API-direct control plane (viewer attach / Channel-A) MUST persist this onto the
|
|
786
|
+
* lease at warm-commit time, or a later op (which reads the lease's resume_state)
|
|
787
|
+
* has nothing to resume from and COLD-CREATES A RIVAL BOX — the box-churn the
|
|
788
|
+
* prove-it surfaced (fs.write then fs.read 404'd on a different box; N Channel-A
|
|
789
|
+
* ops leaked N boxes). Returns null when the client cannot serialize (the caller
|
|
790
|
+
* stores null and the box rides the provider idle-timeout — no rival spawn, just
|
|
791
|
+
* no warm-reattach).
|
|
792
|
+
*/
|
|
793
|
+
export async function serializeEstablishedSandboxEnvelope(
|
|
794
|
+
established: EstablishedSandboxSession,
|
|
795
|
+
): Promise<Record<string, unknown> | null> {
|
|
796
|
+
const client = established.client as SerializeCapableClient | undefined;
|
|
797
|
+
if (!client || typeof client.serializeSessionState !== "function") {
|
|
798
|
+
return null;
|
|
799
|
+
}
|
|
800
|
+
if (established.sessionState === undefined || established.sessionState === null) {
|
|
801
|
+
return null;
|
|
802
|
+
}
|
|
803
|
+
try {
|
|
804
|
+
// serializeSessionState returns the PERSISTABLE FLAT provider state — for
|
|
805
|
+
// Modal `{ sandboxId, appName, imageTag, manifest(serialized),
|
|
806
|
+
// configuredExposedPorts, ... }` (sandboxId preserved via `...state`).
|
|
807
|
+
const serialized = await client.serializeSessionState(established.sessionState);
|
|
808
|
+
|
|
809
|
+
// deserializeSandboxSessionStateEnvelope expects the lease-envelope shape
|
|
810
|
+
// `{ providerState, manifest, snapshot?, exposedPorts?, workspaceReady }` and
|
|
811
|
+
// rehydrates `{ ...providerState, manifest, snapshot?, exposedPorts?,
|
|
812
|
+
// workspaceReady }`. So the FLAT serialized state must be nested under
|
|
813
|
+
// `providerState` (and manifest/ports lifted), or sandboxId is dropped on the
|
|
814
|
+
// round-trip and resume() throws "requires a persisted sandboxId". We pull
|
|
815
|
+
// manifest/exposedPorts up but leave them in providerState too (harmless; the
|
|
816
|
+
// deserialize spreads providerState first, then overlays manifest/ports).
|
|
817
|
+
const flat = serialized as Record<string, unknown>;
|
|
818
|
+
const manifest = flat.manifest;
|
|
819
|
+
const exposedPorts = flat.configuredExposedPorts ?? flat.exposedPorts;
|
|
820
|
+
const sessionState: Record<string, unknown> = {
|
|
821
|
+
providerState: flat,
|
|
822
|
+
...(manifest !== undefined ? { manifest } : {}),
|
|
823
|
+
...(exposedPorts !== undefined ? { exposedPorts } : {}),
|
|
824
|
+
workspaceReady: true,
|
|
825
|
+
};
|
|
826
|
+
return { backendId: established.backendId, sessionState };
|
|
827
|
+
} catch {
|
|
828
|
+
// A serialize failure must NOT fail the attach/op; we just lose warm-reattach
|
|
829
|
+
// for this box (it stays resumable-by-instance only via the next cold path).
|
|
830
|
+
return null;
|
|
831
|
+
}
|
|
832
|
+
}
|