@opengeni/runtime 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,7 @@ export { collectSandboxEnvironment, parseExposedPorts } from '@opengeni/config';
3
3
  import { SandboxBackend, CapabilityDescriptor, SandboxOs, SessionCapabilities, StreamTokenPayload, SessionEventType, SessionStructuredCapabilities, FsListRequest, FsListResponse, FsReadRequest, FsReadResponse, FsWriteRequest, FsWriteResponse, FsDeleteRequest, FsDeleteResponse, FsMoveRequest, FsMoveResponse, FsMkdirRequest, FsMkdirResponse, GitStatusRequest, GitStatusResponse, GitDiffRequest, GitDiffResponse, GitLogRequest, GitLogResponse, GitShowRequest, GitShowResponse, TerminalExecRequest, TerminalExecResponse, PtyOpenRequest, PtyOpenResponse, PtyWriteRequest, PtyResizeRequest, PtyCloseRequest, GitChangedPayload, GitDiffHunk, GitFileStatusCode, CapabilityUnavailableReason } from '@opengeni/contracts';
4
4
  export { CAPABILITY_DESCRIPTORS, CapabilityDescriptor, DESKTOP_STREAM_PORT, StreamTokenPayload, StreamTokenPayload as StreamTokenPayloadType, TERMINAL_STREAM_PORT } from '@opengeni/contracts';
5
5
  import { Manifest, SandboxClient, SandboxSessionState } from '@openai/agents/sandbox';
6
+ import * as modal from 'modal';
6
7
  import { ControlRequest, ControlResponse, ErrorCode, AgentError, DesktopInputRequest, ExecRequest, ExecResponse } from '@opengeni/agent-proto';
7
8
 
8
9
  /**
@@ -61,6 +62,38 @@ declare const PROVIDER_REGISTRY: Record<SandboxBackend, ProviderRegistration>;
61
62
  */
62
63
  declare function assertProviderRegistryInvariants(): void;
63
64
 
65
+ type ModalSandboxAttribution = {
66
+ leaseId: string;
67
+ workspaceId: string;
68
+ sandboxGroupId: string;
69
+ };
70
+ type LiveModalSandboxLeaseAttribution = ModalSandboxAttribution & {
71
+ instanceId: string | null;
72
+ liveness?: string;
73
+ };
74
+ type ModalOrphanSweepTermination = {
75
+ sandboxId: string;
76
+ reason: "stale_attribution" | "unattributed";
77
+ tags: Record<string, string>;
78
+ };
79
+ type ModalOrphanSweepResult = {
80
+ examined: number;
81
+ terminated: ModalOrphanSweepTermination[];
82
+ skipped: number;
83
+ };
84
+ declare function modalSandboxAttributionEnvironment(input: ModalSandboxAttribution): Record<string, string>;
85
+ declare function modalSandboxAttributionTags(input: ModalSandboxAttribution): Record<string, string>;
86
+ type ModalModule = typeof modal;
87
+ type ModalClientLike = InstanceType<ModalModule["ModalClient"]>;
88
+ declare function tagModalSandbox(settings: Settings, sandboxId: string, attribution: ModalSandboxAttribution): Promise<boolean>;
89
+ declare function terminateModalSandboxById(settings: Settings, sandboxId: string): Promise<boolean>;
90
+ declare function sweepModalOrphanSandboxes(settings: Settings, liveLeases: LiveModalSandboxLeaseAttribution[], options?: {
91
+ now?: Date;
92
+ maxTerminations?: number;
93
+ unattributedGraceMs?: number;
94
+ client?: ModalClientLike;
95
+ }): Promise<ModalOrphanSweepResult>;
96
+
64
97
  interface NegotiationContext {
65
98
  sessionId: string;
66
99
  backend: SandboxBackend;
@@ -1051,11 +1084,18 @@ declare class SelfhostedSession {
1051
1084
  /** Computer-use VIEW op: capture a single PNG screenshot of the machine's desktop
1052
1085
  * plus its geometry (via ScreenCaptureKit / x11). NOT consent-gated (a view op —
1053
1086
  * the view/control decoupling), so it works with a display but no screen-control
1054
- * consent. Returns the raw encoded bytes + width/height. */
1087
+ * consent. Returns the raw encoded bytes + the ENCODED width/height, plus the
1088
+ * NATIVE (pre-downscale) geometry: when the agent had to downscale the PNG to fit
1089
+ * the transport's max payload, `nativeWidth`/`nativeHeight` carry the original
1090
+ * capture size so the computer-use layer can scale model clicks (in encoded-pixel
1091
+ * space) back to native pixels. An older agent leaves them 0 → read as "same as
1092
+ * width/height" (no downscale). */
1055
1093
  screenshot(): Promise<{
1056
1094
  png: Uint8Array;
1057
1095
  width: number;
1058
1096
  height: number;
1097
+ nativeWidth: number;
1098
+ nativeHeight: number;
1059
1099
  }>;
1060
1100
  /** A cheap liveness probe — request a Ping on the subject; returns true iff a
1061
1101
  * responder answered (no AgentError). Used by `negotiateSelfhostedCapabilities`.
@@ -1375,6 +1415,8 @@ interface RoutableBackendSession {
1375
1415
  png: Uint8Array;
1376
1416
  width: number;
1377
1417
  height: number;
1418
+ nativeWidth: number;
1419
+ nativeHeight: number;
1378
1420
  }>;
1379
1421
  }
1380
1422
  /** The resolved active backend for an epoch: the live session + the sandbox id it
@@ -1457,6 +1499,8 @@ declare class RoutingSandboxSession implements RoutableBackendSession {
1457
1499
  png: Uint8Array;
1458
1500
  width: number;
1459
1501
  height: number;
1502
+ nativeWidth: number;
1503
+ nativeHeight: number;
1460
1504
  }>;
1461
1505
  constructor(deps: RoutingSandboxSessionDeps);
1462
1506
  /**
@@ -1689,6 +1733,7 @@ type EstablishedSandboxSession = {
1689
1733
  instanceId: string;
1690
1734
  backendId: string;
1691
1735
  };
1736
+ type SandboxCreatedCallback = (established: EstablishedSandboxSession) => Promise<void>;
1692
1737
  /**
1693
1738
  * Per-provider NotFound discriminator. The @openai/agents-extensions
1694
1739
  * `isProviderSandboxNotFoundError` / `assertResumeRecreateAllowed` helpers live
@@ -1722,6 +1767,7 @@ declare function establishSandboxSessionFromEnvelope(settings: Settings, envelop
1722
1767
  sessionId: string;
1723
1768
  backendOverride?: SandboxBackend;
1724
1769
  environment?: Record<string, string>;
1770
+ onSandboxCreated?: SandboxCreatedCallback;
1725
1771
  }): Promise<EstablishedSandboxSession>;
1726
1772
  /**
1727
1773
  * Fold a freshly-established (or resumed) sandbox session into the persistable
@@ -1737,4 +1783,4 @@ declare function establishSandboxSessionFromEnvelope(settings: Settings, envelop
1737
1783
  */
1738
1784
  declare function serializeEstablishedSandboxEnvelope(established: EstablishedSandboxSession): Promise<Record<string, unknown> | null>;
1739
1785
 
1740
- export { type ActiveBackendResolverDeps, ActiveBackendUnresolvableError, type ActivePointer, ChannelAConflictError, type ChannelAEmitter, type ChannelAExecArgs, type ChannelAExecResult, ChannelANotFoundError, type ChannelASession, ChannelAUnsupportedError, ChannelAValidationError, type ControlRpc, DEFAULT_DESKTOP_GEOMETRY, DISPLAY_STACK_TIMEOUT_MS, type DesktopGeometry, DisplayStackError, DisplayStackUnsupportedError, type EnsureDisplayStackOptions, type EnsureDisplayStackResult, type EnsureTerminalServerOptions, type EnsureTerminalServerResult, type EstablishedSandboxSession, type ExposeStreamPortInput, type ExposeStreamPortResult, type ExposedPortEndpoint, type FinalizeRecordingResult, type MintStreamTokenInput, MockAgentResponder, type MockAgentResponderOptions, type MockExecHandler, NatsControlRpc, type NatsRequestConnection, type NegotiationContext, type NumstatEntry, PROVIDER_REGISTRY, type ProviderConstructionContext, type ProviderRegistration, type RecordingCodec, type RecordingContentType, RecordingError, type RecordingProcess, RecordingUnavailableError, type ResolvedActiveBackend, type RoutableBackendSession, type RoutableSandbox, RoutingSandboxSession, type RoutingSandboxSessionDeps, type RoutingTransitionEvent, RoutingUnsupportedError, SELFHOSTED_DEFAULT_TIMEOUT_MS, SELFHOSTED_RECONNECT_WINDOW_MS, SELFHOSTED_RELAY_STREAM_PATH, STREAM_PORT, STREAM_TOKEN_DEFAULT_TTL_SECONDS, SandboxChannelAService, type SandboxChannelAServiceOptions, SandboxConfigError, SandboxProviderUnavailableError, type SelfhostedApplyDiff, SelfhostedControlError, type SelfhostedEditor, type SelfhostedEnrollment, type SelfhostedExecArgs, type SelfhostedExecResult, type SelfhostedImageOutput, type SelfhostedLivenessState, type SelfhostedNegotiationInput, type SelfhostedRelayConfig, SelfhostedSandboxClient, SelfhostedSession, type SelfhostedSessionBuild, type SelfhostedSessionDeps, type SelfhostedSessionState, type SelfhostedUnavailableReason, type StartRecordingInput, StreamPortUnavailableError, TERMINAL_SERVER_TIMEOUT_MS, TerminalServerError, TerminalServerUnsupportedError, agentErrorToControlError, assertDescriptorRegistryInvariants, assertProviderRegistryInvariants, assertSafeRelPath, backendSupportsOs, buildDisplayStackScript, buildSelfhostedBackendSession, buildStreamUrl, buildTerminalServerScript, contentTypeForCodec, createSandboxClient, createSandboxClientForBackend, decodeModalSnapshotId, deletePriorPersistedSnapshot, deleteRecordingArtifacts, deserializeSandboxSessionStateEnvelope, desktopCapableBackend, ensureDisplayStack, ensureTerminalServer, establishSandboxSessionFromEnvelope, exposeStreamPort, extForCodec, isExecSessionLostBanner, isProviderSandboxNotFoundError, isSelfhostedProviderNotFoundError, isWorkspaceEscapeError, makeActiveBackendResolver, mintStreamToken, negotiateCapabilities, negotiateSelfhostedCapabilities, offlineAgentError, offlineControlResponse, parseExecBannerSessionId, parseNumstatZ, parsePorcelainV2, parseUnifiedPatch, readRecordingBytes, readWorkspaceArchiveFromEnvelopeSessionState, recordingStorageKey, restoredSandboxSessionStateFromEntry, sandboxStateEntryFromRunState, selectBackend, selfhostedLiveness, serializeEstablishedSandboxEnvelope, setSelfhostedApplyDiff, startRecording, stopRecording, stripExecBanner, subjectFor, tearDownDisplayStack, tearDownTerminalServer, timeoutAgentError, timeoutControlResponse, verifyStreamToken };
1786
+ export { type ActiveBackendResolverDeps, ActiveBackendUnresolvableError, type ActivePointer, ChannelAConflictError, type ChannelAEmitter, type ChannelAExecArgs, type ChannelAExecResult, ChannelANotFoundError, type ChannelASession, ChannelAUnsupportedError, ChannelAValidationError, type ControlRpc, DEFAULT_DESKTOP_GEOMETRY, DISPLAY_STACK_TIMEOUT_MS, type DesktopGeometry, DisplayStackError, DisplayStackUnsupportedError, type EnsureDisplayStackOptions, type EnsureDisplayStackResult, type EnsureTerminalServerOptions, type EnsureTerminalServerResult, type EstablishedSandboxSession, type ExposeStreamPortInput, type ExposeStreamPortResult, type ExposedPortEndpoint, type FinalizeRecordingResult, type LiveModalSandboxLeaseAttribution, type MintStreamTokenInput, MockAgentResponder, type MockAgentResponderOptions, type MockExecHandler, type ModalOrphanSweepResult, type ModalSandboxAttribution, NatsControlRpc, type NatsRequestConnection, type NegotiationContext, type NumstatEntry, PROVIDER_REGISTRY, type ProviderConstructionContext, type ProviderRegistration, type RecordingCodec, type RecordingContentType, RecordingError, type RecordingProcess, RecordingUnavailableError, type ResolvedActiveBackend, type RoutableBackendSession, type RoutableSandbox, RoutingSandboxSession, type RoutingSandboxSessionDeps, type RoutingTransitionEvent, RoutingUnsupportedError, SELFHOSTED_DEFAULT_TIMEOUT_MS, SELFHOSTED_RECONNECT_WINDOW_MS, SELFHOSTED_RELAY_STREAM_PATH, STREAM_PORT, STREAM_TOKEN_DEFAULT_TTL_SECONDS, SandboxChannelAService, type SandboxChannelAServiceOptions, SandboxConfigError, type SandboxCreatedCallback, SandboxProviderUnavailableError, type SelfhostedApplyDiff, SelfhostedControlError, type SelfhostedEditor, type SelfhostedEnrollment, type SelfhostedExecArgs, type SelfhostedExecResult, type SelfhostedImageOutput, type SelfhostedLivenessState, type SelfhostedNegotiationInput, type SelfhostedRelayConfig, SelfhostedSandboxClient, SelfhostedSession, type SelfhostedSessionBuild, type SelfhostedSessionDeps, type SelfhostedSessionState, type SelfhostedUnavailableReason, type StartRecordingInput, StreamPortUnavailableError, TERMINAL_SERVER_TIMEOUT_MS, TerminalServerError, TerminalServerUnsupportedError, agentErrorToControlError, assertDescriptorRegistryInvariants, assertProviderRegistryInvariants, assertSafeRelPath, backendSupportsOs, buildDisplayStackScript, buildSelfhostedBackendSession, buildStreamUrl, buildTerminalServerScript, contentTypeForCodec, createSandboxClient, createSandboxClientForBackend, decodeModalSnapshotId, deletePriorPersistedSnapshot, deleteRecordingArtifacts, deserializeSandboxSessionStateEnvelope, desktopCapableBackend, ensureDisplayStack, ensureTerminalServer, establishSandboxSessionFromEnvelope, exposeStreamPort, extForCodec, isExecSessionLostBanner, isProviderSandboxNotFoundError, isSelfhostedProviderNotFoundError, isWorkspaceEscapeError, makeActiveBackendResolver, mintStreamToken, modalSandboxAttributionEnvironment, modalSandboxAttributionTags, negotiateCapabilities, negotiateSelfhostedCapabilities, offlineAgentError, offlineControlResponse, parseExecBannerSessionId, parseNumstatZ, parsePorcelainV2, parseUnifiedPatch, readRecordingBytes, readWorkspaceArchiveFromEnvelopeSessionState, recordingStorageKey, restoredSandboxSessionStateFromEntry, sandboxStateEntryFromRunState, selectBackend, selfhostedLiveness, serializeEstablishedSandboxEnvelope, setSelfhostedApplyDiff, startRecording, stopRecording, stripExecBanner, subjectFor, sweepModalOrphanSandboxes, tagModalSandbox, tearDownDisplayStack, tearDownTerminalServer, terminateModalSandboxById, timeoutAgentError, timeoutControlResponse, verifyStreamToken };
@@ -63,6 +63,8 @@ import {
63
63
  isWorkspaceEscapeError,
64
64
  makeActiveBackendResolver,
65
65
  mintStreamToken,
66
+ modalSandboxAttributionEnvironment,
67
+ modalSandboxAttributionTags,
66
68
  negotiateCapabilities,
67
69
  negotiateSelfhostedCapabilities,
68
70
  offlineAgentError,
@@ -85,12 +87,15 @@ import {
85
87
  stopRecording,
86
88
  stripExecBanner,
87
89
  subjectFor,
90
+ sweepModalOrphanSandboxes,
91
+ tagModalSandbox,
88
92
  tearDownDisplayStack,
89
93
  tearDownTerminalServer,
94
+ terminateModalSandboxById,
90
95
  timeoutAgentError,
91
96
  timeoutControlResponse,
92
97
  verifyStreamToken
93
- } from "../chunk-KNW7AMQB.js";
98
+ } from "../chunk-D5KU3QUC.js";
94
99
  export {
95
100
  ActiveBackendUnresolvableError,
96
101
  CAPABILITY_DESCRIPTORS,
@@ -156,6 +161,8 @@ export {
156
161
  isWorkspaceEscapeError,
157
162
  makeActiveBackendResolver,
158
163
  mintStreamToken,
164
+ modalSandboxAttributionEnvironment,
165
+ modalSandboxAttributionTags,
159
166
  negotiateCapabilities,
160
167
  negotiateSelfhostedCapabilities,
161
168
  offlineAgentError,
@@ -178,8 +185,11 @@ export {
178
185
  stopRecording,
179
186
  stripExecBanner,
180
187
  subjectFor,
188
+ sweepModalOrphanSandboxes,
189
+ tagModalSandbox,
181
190
  tearDownDisplayStack,
182
191
  tearDownTerminalServer,
192
+ terminateModalSandboxById,
183
193
  timeoutAgentError,
184
194
  timeoutControlResponse,
185
195
  verifyStreamToken
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@opengeni/runtime",
3
- "version": "0.2.3",
3
+ "version": "0.3.0",
4
4
  "type": "module",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.js",
@@ -29,8 +29,8 @@
29
29
  },
30
30
  "dependencies": {
31
31
  "@opengeni/agent-proto": "^0.2.1",
32
- "@opengeni/config": "^0.2.3",
33
- "@opengeni/contracts": "^0.5.0",
32
+ "@opengeni/config": "^0.2.4",
33
+ "@opengeni/contracts": "^0.6.0",
34
34
  "@openai/agents": "^0.11.6",
35
35
  "@openai/agents-extensions": "^0.11.6",
36
36
  "modal": "^0.7.4",
package/src/index.ts CHANGED
@@ -441,7 +441,7 @@ export class MultiProviderModelProvider implements ModelProvider {
441
441
 
442
442
  async getModel(modelName?: string): Promise<Model> {
443
443
  if (modelName) {
444
- const resolved = resolveTurnModel(this.settings, modelName);
444
+ const resolved = resolveTurnModel(settingsForRunScopedModelResolution(this.settings, modelName), modelName);
445
445
  if (resolved) {
446
446
  // Fail-loud floor (defense in depth): a `codex/<slug>` id must only ever
447
447
  // resolve through the synthetic codex-subscription provider (which installs
@@ -479,6 +479,27 @@ export class MultiProviderModelProvider implements ModelProvider {
479
479
  }
480
480
  }
481
481
 
482
+ function settingsForRunScopedModelResolution(settings: Settings, modelName: string): Settings {
483
+ if (modelName !== settings.openaiModel) {
484
+ return settings;
485
+ }
486
+ const builtinAllowed = splitOpenaiAllowedModels(settings.openaiAllowedModels);
487
+ const fallbackBuiltin = builtinAllowed.find((id) => id !== modelName);
488
+ if (!fallbackBuiltin) {
489
+ return settings;
490
+ }
491
+ // The worker sets runSettings.openaiModel to the turn's model. For namespaced
492
+ // registry ids configuredModels filters the built-in entry out, but a unique
493
+ // bare registry id would otherwise be claimed by the built-in only because of
494
+ // that per-turn override. Resolve the run-scoped router against the deployment
495
+ // allow-list head instead; real built-in models stay in the allow-list.
496
+ return builtinAllowed.includes(modelName) ? settings : { ...settings, openaiModel: fallbackBuiltin };
497
+ }
498
+
499
+ function splitOpenaiAllowedModels(value: string): string[] {
500
+ return value.split(",").map((item) => item.trim()).filter(Boolean);
501
+ }
502
+
482
503
  /**
483
504
  * A `codex/<slug>` turn reached the model router but the workspace has no active
484
505
  * Codex subscription connected (the worker overlay never injected the synthetic
@@ -711,6 +732,14 @@ export type BuildAgentOptions = {
711
732
  // restyle the persona but never drop the goal-loop contract or environment
712
733
  // block.
713
734
  instructionsTemplate?: string;
735
+ // Per-SESSION persona/system instructions (the per-agent-type prompt lever an
736
+ // embedding host supplies at session create). Composed AFTER the workspace
737
+ // instructionsTemplate + the non-bypassable CORE, so it refines the workspace
738
+ // persona for this one session without dropping the goal-loop/environment
739
+ // contract. Rides the SAME instructions channel (system-level) — NEVER a user/
740
+ // timeline message. Omitted ⇒ the composed instructions are byte-identical to
741
+ // a workspace-only persona.
742
+ sessionInstructions?: string;
714
743
  // Skills delivered by enabled capability packs. They join the bundled
715
744
  // skills in the sandbox skill index (mounted under .agents/) so
716
745
  // skills/<name> references resolve like any other indexed skill.
@@ -793,6 +822,27 @@ export function composeAgentInstructions(template: string, workspaceEnvironment?
793
822
  return core ? `${template} ${core}` : template;
794
823
  }
795
824
 
825
+ /**
826
+ * Appends the per-session persona instructions to the already-composed
827
+ * (workspace + CORE) instructions, joined by " " — exactly the join used
828
+ * throughout the persona composition. The session slice is intentionally LAST
829
+ * (session-specific refinement of the workspace persona). An absent/blank value
830
+ * is a no-op that returns the composed string byte-for-byte.
831
+ */
832
+ export function appendSessionInstructions(composed: string, sessionInstructions?: string): string {
833
+ const trimmed = sessionInstructions?.trim();
834
+ return trimmed ? `${composed} ${trimmed}` : composed;
835
+ }
836
+
837
+ /**
838
+ * Appends the one-shot genesis title directive (genesis turn only), joined by
839
+ * " " and always LAST so a white-label persona template or a per-session
840
+ * instruction can't drop it. A no-op when the hint is absent.
841
+ */
842
+ export function appendGenesisTitleDirective(instructions: string, genesisTitleHint?: boolean): string {
843
+ return genesisTitleHint ? `${instructions} ${GENESIS_TITLE_DIRECTIVE}` : instructions;
844
+ }
845
+
796
846
  const agentFileDownloads = new WeakMap<object, SandboxFileDownload[]>();
797
847
  const agentRepositoryCloneHooks = new WeakMap<object, SandboxLifecycleHook[]>();
798
848
  // TOKEN-BROKER (B1): the per-turn git token seed, stashed alongside the agent's
@@ -837,9 +887,21 @@ export function buildOpenGeniAgent(settings: Settings, resources: ResourceRef[],
837
887
  // ownership + workspace-environment block) at the {{core}} marker, or
838
888
  // appends it when the template omits the marker. With the default template
839
889
  // and no environment this is byte-identical to the historical preamble.
840
- instructions: options.genesisTitleHint
841
- ? `${composeAgentInstructions(options.instructionsTemplate ?? settings.agentInstructionsTemplate, options.workspaceEnvironment)} ${GENESIS_TITLE_DIRECTIVE}`
842
- : composeAgentInstructions(options.instructionsTemplate ?? settings.agentInstructionsTemplate, options.workspaceEnvironment),
890
+ // Persona composition order (all one system-level instructions string):
891
+ // 1. workspace instructionsTemplate (or deployment default) with the
892
+ // non-bypassable CORE substituted at {{core}} — composeAgentInstructions,
893
+ // 2. + the per-session persona instructions (session-specific, LAST so it
894
+ // refines the workspace persona),
895
+ // 3. + the one-shot genesis title directive (genesis turn only).
896
+ // With no session instructions and no genesis hint this is byte-identical to
897
+ // the historical composed instructions.
898
+ instructions: appendGenesisTitleDirective(
899
+ appendSessionInstructions(
900
+ composeAgentInstructions(options.instructionsTemplate ?? settings.agentInstructionsTemplate, options.workspaceEnvironment),
901
+ options.sessionInstructions,
902
+ ),
903
+ options.genesisTitleHint,
904
+ ),
843
905
  modelSettings: {
844
906
  reasoning: { effort: options.reasoningEffort ?? settings.openaiReasoningEffort, summary: "detailed" },
845
907
  // Server-side compaction (OpenAI platform) requires store=false: the
@@ -32,10 +32,34 @@ export const STREAM_PORT = DESKTOP_STREAM_PORT;
32
32
  export const DISPLAY_STACK_TIMEOUT_MS = 90_000;
33
33
 
34
34
  // PAINTABLE-FRAME gate: poll scrot up to this many times, this many seconds apart,
35
- // waiting for a non-empty frame before declaring the stack "up" (~30s worst case).
35
+ // waiting for an actually-PAINTED frame before declaring the stack "up" (~30s worst case).
36
36
  const PAINT_PROBE_ATTEMPTS = 150;
37
37
  const PAINT_PROBE_INTERVAL_S = 0.2;
38
38
 
39
+ // The paint FLOOR (bytes): a scrot at/above this size is a real painted desktop; below
40
+ // it, the root is still unpainted and the frame would read as "blank" to the model.
41
+ //
42
+ // WHY A SIZE FLOOR, NOT NON-EMPTINESS (the bug this fixes): the old gate only checked
43
+ // `[ -s frame.png ]` (non-empty). But an UNPAINTED root is never zero-byte — a fresh
44
+ // Xvfb draws either the `-retro` weave stipple or (with `-retro` dropped) solid black,
45
+ // and scrot happily encodes that as a small-but-non-empty PNG. So the old gate passed
46
+ // the instant the VNC ports bound — MEASURED at ~1.4s (fast runc host) to several
47
+ // seconds (cold gVisor) BEFORE xfdesktop finishes its first wallpaper paint — handing
48
+ // the model the pre-paint frame. That pre-paint frame is exactly the "blank/black"
49
+ // screenshot that 400s the model and blanks the human viewer.
50
+ //
51
+ // The sizes are unambiguous and were measured on the canonical desktop image (1280x800)
52
+ // under runc — both the current staging image and a fresh local build:
53
+ // painted XFCE desktop (blue-gradient wallpaper + panel + icons): ~210-222 KB
54
+ // `-retro` stipple root (unpainted, current image): ~17 KB
55
+ // solid-black root (unpainted, after we drop `-retro`): ~13.5 KB
56
+ // 60 KB sits ~3.5x above every unpainted state and ~3.5x below a real paint — a wide,
57
+ // unambiguous margin. It holds against BOTH the currently-deployed `-retro` image and
58
+ // the `-retro`-dropped image this change ships, so the runtime gate is correct before
59
+ // AND after the image rebuild lands. (Assumes the default ~1280x800 geometry; a larger
60
+ // framebuffer only scales the painted frame further above the floor.)
61
+ const PAINT_MIN_BYTES = 60_000;
62
+
39
63
  /** Desktop geometry for the framebuffer. v1 has no live RANDR: a resolution
40
64
  * change is a full down -> up restart (a separate op). */
41
65
  export type DesktopGeometry = {
@@ -145,18 +169,25 @@ export function buildDisplayStackScript(options: EnsureDisplayStackOptions = {})
145
169
  // PAINTABLE-FRAME GATE (the completion criterion): the up-script's readiness gates
146
170
  // only assert that Xvfb answers xdpyinfo and that x11vnc:5900 + websockify:PORT are
147
171
  // LISTENING — NOT that the display actually PAINTS. On a stone-cold gVisor box (the
148
- // machine→sandbox swap-recovery turn always hits one), Xvfb can answer and the VNC
149
- // ports can bind seconds BEFORE the root window / XFCE compositor is drawable, so a
150
- // scrot right after the `OPENGENI_DESKTOP_UP` marker yields a ZERO-BYTE frame which
151
- // is exactly the empty screenshot that 400s the model and blanks the human viewer.
172
+ // machine→sandbox swap-recovery turn always hits one), Xvfb answers and the VNC ports
173
+ // bind ~1.4s (fast host) to several seconds BEFORE xfdesktop finishes its first
174
+ // wallpaper paint. In that window a scrot yields a small UNPAINTED frame (the -retro
175
+ // stipple or a solid-black root) never zero-byte which is exactly the "blank/black"
176
+ // screenshot that 400s the model and blanks the human viewer. (VERIFIED locally: the
177
+ // real xfdesktop backdrop window maps at full 1280x800 the whole time; the render is
178
+ // never structurally broken — it is purely this pre-paint capture race.)
179
+ //
152
180
  // We therefore chain a real scrot probe as the completion gate: after the up-script
153
- // reports success, poll scrot until it produces a NON-EMPTY frame (bounded ~30s), and
154
- // only THEN let the command exit 0. If it never paints we exit 14 so the caller sees a
155
- // typed DisplayStackError("paint") an HONEST failure the worker can degrade + log,
156
- // rather than a false "up" that hands the model an empty image. `-ac` on Xvfb disables
181
+ // reports success, poll scrot until it produces an actually-PAINTED frame a PNG at or
182
+ // above PAINT_MIN_BYTES, not merely NON-EMPTY (the old `[ -s ]` check passed on the
183
+ // ~17 KB pre-paint stipple immediately; that WAS the bug) bounded ~30s, and only THEN
184
+ // let the command exit 0. If it never paints we exit 14 so the caller sees a typed
185
+ // DisplayStackError("paint") — an HONEST failure the worker can degrade + log, rather
186
+ // than a false "up" that hands the model an unpainted image. `-ac` on Xvfb disables
157
187
  // access control so this root-side scrot reaches :0. Runs on a pre-check hit too (cheap
158
188
  // — an already-up display paints on the first probe). Lives in the runtime-built script
159
- // (not the baked image up-script) so it ships with the worker/api, no image rebuild.
189
+ // (not the baked image up-script) so it ships with the worker/api, no image rebuild
190
+ // and its size floor holds against the currently-deployed image too.
160
191
  const bringUp =
161
192
  `if nc -z 127.0.0.1 ${port} >/dev/null 2>&1 && nc -z 127.0.0.1 5900 >/dev/null 2>&1; then ` +
162
193
  `echo "OPENGENI_DESKTOP_UP port=${port} geometry=${geometry.width}x${geometry.height} dpi=${geometry.dpi} (precheck)"; ` +
@@ -168,11 +199,15 @@ export function buildDisplayStackScript(options: EnsureDisplayStackOptions = {})
168
199
  const paintProbe =
169
200
  `p=/tmp/opengeni-desktop/paint-probe.png; ` +
170
201
  `for i in $(seq 1 ${PAINT_PROBE_ATTEMPTS}); do ` +
171
- `if DISPLAY=:0 scrot -o "$p" >/dev/null 2>&1 && [ -s "$p" ]; then rm -f "$p"; break; fi; ` +
202
+ // Capture, then measure the PNG byte-size. `wc -c < "$p"` yields a bare integer; a
203
+ // failed scrot leaves sz=0. A frame at/above PAINT_MIN_BYTES is a real painted desktop.
204
+ `if DISPLAY=:0 scrot -o "$p" >/dev/null 2>&1; then sz=$(wc -c < "$p" 2>/dev/null || echo 0); else sz=0; fi; ` +
172
205
  `rm -f "$p"; ` +
206
+ `if [ "$sz" -ge ${PAINT_MIN_BYTES} ]; then break; fi; ` +
173
207
  // NOTE: NOT_PAINTING goes to STDOUT (not stderr): Modal is execCommand-only, so the
174
208
  // caller infers the outcome by string-matching the output — stdout is always captured.
175
- `if [ "$i" = "${PAINT_PROBE_ATTEMPTS}" ]; then echo "OPENGENI_DESKTOP_NOT_PAINTING scrot empty after warmup"; exit 14; fi; ` +
209
+ // ($sz is bare shell here — no ${} braces so JS leaves it for bash to expand.)
210
+ `if [ "$i" = "${PAINT_PROBE_ATTEMPTS}" ]; then echo "OPENGENI_DESKTOP_NOT_PAINTING scrot below ${PAINT_MIN_BYTES}B after warmup (last=$sz)"; exit 14; fi; ` +
176
211
  `sleep ${PAINT_PROBE_INTERVAL_S}; ` +
177
212
  `done`;
178
213
  return `mkdir -p /tmp/opengeni-desktop; { ${bringUp} ; } && { ${paintProbe} ; }`;
@@ -53,6 +53,16 @@ export {
53
53
  type ProviderRegistration,
54
54
  type ProviderConstructionContext,
55
55
  } from "./providers";
56
+ export {
57
+ modalSandboxAttributionEnvironment,
58
+ modalSandboxAttributionTags,
59
+ sweepModalOrphanSandboxes,
60
+ tagModalSandbox,
61
+ terminateModalSandboxById,
62
+ type LiveModalSandboxLeaseAttribution,
63
+ type ModalOrphanSweepResult,
64
+ type ModalSandboxAttribution,
65
+ } from "./providers/modal";
56
66
  export {
57
67
  selectBackend,
58
68
  backendSupportsOs,
@@ -540,6 +550,8 @@ export type EstablishedSandboxSession = {
540
550
  backendId: string;
541
551
  };
542
552
 
553
+ export type SandboxCreatedCallback = (established: EstablishedSandboxSession) => Promise<void>;
554
+
543
555
  // The structural slice we need from a provider SandboxClient to resume by id and
544
556
  // cold-restore. Narrowed (not the full agent-loop SandboxClient) so the leaf
545
557
  // stays agent-loop-free.
@@ -616,6 +628,16 @@ function readInstanceId(session: unknown): string {
616
628
  return typeof candidate === "string" && candidate.length > 0 ? candidate : "";
617
629
  }
618
630
 
631
+ async function terminateCreatedSandbox(client: ResumeCapableClient, session: unknown, sessionState: unknown): Promise<void> {
632
+ const clientWithDelete = client as { delete?: (state: unknown) => Promise<unknown> };
633
+ if (typeof clientWithDelete.delete === "function" && sessionState !== undefined) {
634
+ try { await clientWithDelete.delete(sessionState); } catch { /* best-effort */ }
635
+ return;
636
+ }
637
+ const sess = session as { close?: () => Promise<unknown>; terminate?: () => Promise<unknown>; kill?: () => Promise<unknown> };
638
+ try { await (sess.terminate ?? sess.kill ?? sess.close)?.(); } catch { /* best-effort */ }
639
+ }
640
+
619
641
  /**
620
642
  * Resume the one box by id from its recovery envelope, or cold-restore from the
621
643
  * snapshot when the provider reports it gone. The envelope is the lease's
@@ -633,7 +655,12 @@ function readInstanceId(session: unknown): string {
633
655
  export async function establishSandboxSessionFromEnvelope(
634
656
  settings: Settings,
635
657
  envelope: Record<string, unknown> | null,
636
- opts: { sessionId: string; backendOverride?: SandboxBackend; environment?: Record<string, string> },
658
+ opts: {
659
+ sessionId: string;
660
+ backendOverride?: SandboxBackend;
661
+ environment?: Record<string, string>;
662
+ onSandboxCreated?: SandboxCreatedCallback;
663
+ },
637
664
  ): Promise<EstablishedSandboxSession> {
638
665
  const envelopeBackend = typeof envelope?.backendId === "string" ? (envelope.backendId as SandboxBackend) : undefined;
639
666
  const backend = (opts.backendOverride ?? envelopeBackend ?? (settings.sandboxBackend as SandboxBackend));
@@ -680,6 +707,22 @@ export async function establishSandboxSessionFromEnvelope(
680
707
  // cold-restore branch (b) below.
681
708
  const coldRestore = async (resumeFallbackState?: unknown): Promise<EstablishedSandboxSession> => {
682
709
  const restored = await client.create!({ manifest: createManifest });
710
+ let restoredState = (restored as { state?: unknown }).state;
711
+ let established: EstablishedSandboxSession = {
712
+ client,
713
+ session: restored,
714
+ sessionState: restoredState ?? resumeFallbackState,
715
+ instanceId: readInstanceId(restored),
716
+ backendId: client.backendId,
717
+ };
718
+ if (opts.onSandboxCreated) {
719
+ try {
720
+ await opts.onSandboxCreated(established);
721
+ } catch (createCallbackError) {
722
+ await terminateCreatedSandbox(client, restored, restoredState);
723
+ throw createCallbackError;
724
+ }
725
+ }
683
726
  if (workspaceArchive) {
684
727
  const hydrate = (restored as { hydrateWorkspace?: (data: Uint8Array) => Promise<void> }).hydrateWorkspace;
685
728
  if (typeof hydrate === "function") {
@@ -696,21 +739,38 @@ export async function establishSandboxSessionFromEnvelope(
696
739
  // re-throwing so no box leaks. The original error semantics are preserved
697
740
  // (the re-throw propagates to the caller). This mirrors the reaper's
698
741
  // discipline: NEVER leave an orphaned box running.
699
- const restoredState = (restored as { state?: unknown }).state;
700
- const clientWithDelete = client as { delete?: (state: unknown) => Promise<unknown> };
701
- if (typeof clientWithDelete.delete === "function" && restoredState !== undefined) {
702
- try { await clientWithDelete.delete(restoredState); } catch { /* best-effort; re-throw the hydrate error below */ }
703
- } else {
704
- // No delete() — try a session-level close/terminate as a fallback.
705
- const sess = restored as { close?: () => Promise<unknown>; terminate?: () => Promise<unknown> };
706
- try { await (sess.terminate ?? sess.close)?.(); } catch { /* best-effort */ }
707
- }
742
+ await terminateCreatedSandbox(client, restored, restoredState);
708
743
  throw hydrateError;
709
744
  }
745
+ const hydratedState = (restored as { state?: unknown }).state;
746
+ const hydratedInstanceId = readInstanceId(restored);
747
+ if (hydratedInstanceId && hydratedInstanceId !== established.instanceId) {
748
+ established = {
749
+ client,
750
+ session: restored,
751
+ sessionState: hydratedState ?? resumeFallbackState,
752
+ instanceId: hydratedInstanceId,
753
+ backendId: client.backendId,
754
+ };
755
+ if (opts.onSandboxCreated) {
756
+ try {
757
+ await opts.onSandboxCreated(established);
758
+ } catch (createCallbackError) {
759
+ await terminateCreatedSandbox(client, restored, hydratedState);
760
+ throw createCallbackError;
761
+ }
762
+ }
763
+ }
710
764
  }
711
765
  }
712
- const restoredState = (restored as { state?: unknown }).state;
713
- return { client, session: restored, sessionState: restoredState ?? resumeFallbackState, instanceId: readInstanceId(restored), backendId: client.backendId };
766
+ restoredState = (restored as { state?: unknown }).state;
767
+ return {
768
+ client,
769
+ session: restored,
770
+ sessionState: restoredState ?? resumeFallbackState,
771
+ instanceId: readInstanceId(restored),
772
+ backendId: client.backendId,
773
+ };
714
774
  };
715
775
 
716
776
  // Does the envelope carry a RESUMABLE box id (warm reattach), or only a