@opengeni/runtime 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-KNW7AMQB.js → chunk-D5KU3QUC.js} +231 -21
- package/dist/chunk-D5KU3QUC.js.map +1 -0
- package/dist/index.d.ts +18 -2
- package/dist/index.js +82 -6
- package/dist/index.js.map +1 -1
- package/dist/sandbox/index.d.ts +48 -2
- package/dist/sandbox/index.js +11 -1
- package/package.json +3 -3
- package/src/index.ts +66 -4
- package/src/sandbox/display-stack.ts +47 -12
- package/src/sandbox/index.ts +72 -12
- package/src/sandbox/providers/modal.ts +225 -0
- package/src/sandbox/routing/routing-session.ts +2 -2
- package/src/sandbox/selfhosted/session.ts +21 -5
- package/src/sandbox-computer.ts +52 -17
- package/dist/chunk-KNW7AMQB.js.map +0 -1
package/dist/sandbox/index.d.ts
CHANGED
|
@@ -3,6 +3,7 @@ export { collectSandboxEnvironment, parseExposedPorts } from '@opengeni/config';
|
|
|
3
3
|
import { SandboxBackend, CapabilityDescriptor, SandboxOs, SessionCapabilities, StreamTokenPayload, SessionEventType, SessionStructuredCapabilities, FsListRequest, FsListResponse, FsReadRequest, FsReadResponse, FsWriteRequest, FsWriteResponse, FsDeleteRequest, FsDeleteResponse, FsMoveRequest, FsMoveResponse, FsMkdirRequest, FsMkdirResponse, GitStatusRequest, GitStatusResponse, GitDiffRequest, GitDiffResponse, GitLogRequest, GitLogResponse, GitShowRequest, GitShowResponse, TerminalExecRequest, TerminalExecResponse, PtyOpenRequest, PtyOpenResponse, PtyWriteRequest, PtyResizeRequest, PtyCloseRequest, GitChangedPayload, GitDiffHunk, GitFileStatusCode, CapabilityUnavailableReason } from '@opengeni/contracts';
|
|
4
4
|
export { CAPABILITY_DESCRIPTORS, CapabilityDescriptor, DESKTOP_STREAM_PORT, StreamTokenPayload, StreamTokenPayload as StreamTokenPayloadType, TERMINAL_STREAM_PORT } from '@opengeni/contracts';
|
|
5
5
|
import { Manifest, SandboxClient, SandboxSessionState } from '@openai/agents/sandbox';
|
|
6
|
+
import * as modal from 'modal';
|
|
6
7
|
import { ControlRequest, ControlResponse, ErrorCode, AgentError, DesktopInputRequest, ExecRequest, ExecResponse } from '@opengeni/agent-proto';
|
|
7
8
|
|
|
8
9
|
/**
|
|
@@ -61,6 +62,38 @@ declare const PROVIDER_REGISTRY: Record<SandboxBackend, ProviderRegistration>;
|
|
|
61
62
|
*/
|
|
62
63
|
declare function assertProviderRegistryInvariants(): void;
|
|
63
64
|
|
|
65
|
+
type ModalSandboxAttribution = {
|
|
66
|
+
leaseId: string;
|
|
67
|
+
workspaceId: string;
|
|
68
|
+
sandboxGroupId: string;
|
|
69
|
+
};
|
|
70
|
+
type LiveModalSandboxLeaseAttribution = ModalSandboxAttribution & {
|
|
71
|
+
instanceId: string | null;
|
|
72
|
+
liveness?: string;
|
|
73
|
+
};
|
|
74
|
+
type ModalOrphanSweepTermination = {
|
|
75
|
+
sandboxId: string;
|
|
76
|
+
reason: "stale_attribution" | "unattributed";
|
|
77
|
+
tags: Record<string, string>;
|
|
78
|
+
};
|
|
79
|
+
type ModalOrphanSweepResult = {
|
|
80
|
+
examined: number;
|
|
81
|
+
terminated: ModalOrphanSweepTermination[];
|
|
82
|
+
skipped: number;
|
|
83
|
+
};
|
|
84
|
+
declare function modalSandboxAttributionEnvironment(input: ModalSandboxAttribution): Record<string, string>;
|
|
85
|
+
declare function modalSandboxAttributionTags(input: ModalSandboxAttribution): Record<string, string>;
|
|
86
|
+
type ModalModule = typeof modal;
|
|
87
|
+
type ModalClientLike = InstanceType<ModalModule["ModalClient"]>;
|
|
88
|
+
declare function tagModalSandbox(settings: Settings, sandboxId: string, attribution: ModalSandboxAttribution): Promise<boolean>;
|
|
89
|
+
declare function terminateModalSandboxById(settings: Settings, sandboxId: string): Promise<boolean>;
|
|
90
|
+
declare function sweepModalOrphanSandboxes(settings: Settings, liveLeases: LiveModalSandboxLeaseAttribution[], options?: {
|
|
91
|
+
now?: Date;
|
|
92
|
+
maxTerminations?: number;
|
|
93
|
+
unattributedGraceMs?: number;
|
|
94
|
+
client?: ModalClientLike;
|
|
95
|
+
}): Promise<ModalOrphanSweepResult>;
|
|
96
|
+
|
|
64
97
|
interface NegotiationContext {
|
|
65
98
|
sessionId: string;
|
|
66
99
|
backend: SandboxBackend;
|
|
@@ -1051,11 +1084,18 @@ declare class SelfhostedSession {
|
|
|
1051
1084
|
/** Computer-use VIEW op: capture a single PNG screenshot of the machine's desktop
|
|
1052
1085
|
* plus its geometry (via ScreenCaptureKit / x11). NOT consent-gated (a view op —
|
|
1053
1086
|
* the view/control decoupling), so it works with a display but no screen-control
|
|
1054
|
-
* consent. Returns the raw encoded bytes + width/height
|
|
1087
|
+
* consent. Returns the raw encoded bytes + the ENCODED width/height, plus the
|
|
1088
|
+
* NATIVE (pre-downscale) geometry: when the agent had to downscale the PNG to fit
|
|
1089
|
+
* the transport's max payload, `nativeWidth`/`nativeHeight` carry the original
|
|
1090
|
+
* capture size so the computer-use layer can scale model clicks (in encoded-pixel
|
|
1091
|
+
* space) back to native pixels. An older agent leaves them 0 → read as "same as
|
|
1092
|
+
* width/height" (no downscale). */
|
|
1055
1093
|
screenshot(): Promise<{
|
|
1056
1094
|
png: Uint8Array;
|
|
1057
1095
|
width: number;
|
|
1058
1096
|
height: number;
|
|
1097
|
+
nativeWidth: number;
|
|
1098
|
+
nativeHeight: number;
|
|
1059
1099
|
}>;
|
|
1060
1100
|
/** A cheap liveness probe — request a Ping on the subject; returns true iff a
|
|
1061
1101
|
* responder answered (no AgentError). Used by `negotiateSelfhostedCapabilities`.
|
|
@@ -1375,6 +1415,8 @@ interface RoutableBackendSession {
|
|
|
1375
1415
|
png: Uint8Array;
|
|
1376
1416
|
width: number;
|
|
1377
1417
|
height: number;
|
|
1418
|
+
nativeWidth: number;
|
|
1419
|
+
nativeHeight: number;
|
|
1378
1420
|
}>;
|
|
1379
1421
|
}
|
|
1380
1422
|
/** The resolved active backend for an epoch: the live session + the sandbox id it
|
|
@@ -1457,6 +1499,8 @@ declare class RoutingSandboxSession implements RoutableBackendSession {
|
|
|
1457
1499
|
png: Uint8Array;
|
|
1458
1500
|
width: number;
|
|
1459
1501
|
height: number;
|
|
1502
|
+
nativeWidth: number;
|
|
1503
|
+
nativeHeight: number;
|
|
1460
1504
|
}>;
|
|
1461
1505
|
constructor(deps: RoutingSandboxSessionDeps);
|
|
1462
1506
|
/**
|
|
@@ -1689,6 +1733,7 @@ type EstablishedSandboxSession = {
|
|
|
1689
1733
|
instanceId: string;
|
|
1690
1734
|
backendId: string;
|
|
1691
1735
|
};
|
|
1736
|
+
type SandboxCreatedCallback = (established: EstablishedSandboxSession) => Promise<void>;
|
|
1692
1737
|
/**
|
|
1693
1738
|
* Per-provider NotFound discriminator. The @openai/agents-extensions
|
|
1694
1739
|
* `isProviderSandboxNotFoundError` / `assertResumeRecreateAllowed` helpers live
|
|
@@ -1722,6 +1767,7 @@ declare function establishSandboxSessionFromEnvelope(settings: Settings, envelop
|
|
|
1722
1767
|
sessionId: string;
|
|
1723
1768
|
backendOverride?: SandboxBackend;
|
|
1724
1769
|
environment?: Record<string, string>;
|
|
1770
|
+
onSandboxCreated?: SandboxCreatedCallback;
|
|
1725
1771
|
}): Promise<EstablishedSandboxSession>;
|
|
1726
1772
|
/**
|
|
1727
1773
|
* Fold a freshly-established (or resumed) sandbox session into the persistable
|
|
@@ -1737,4 +1783,4 @@ declare function establishSandboxSessionFromEnvelope(settings: Settings, envelop
|
|
|
1737
1783
|
*/
|
|
1738
1784
|
declare function serializeEstablishedSandboxEnvelope(established: EstablishedSandboxSession): Promise<Record<string, unknown> | null>;
|
|
1739
1785
|
|
|
1740
|
-
export { type ActiveBackendResolverDeps, ActiveBackendUnresolvableError, type ActivePointer, ChannelAConflictError, type ChannelAEmitter, type ChannelAExecArgs, type ChannelAExecResult, ChannelANotFoundError, type ChannelASession, ChannelAUnsupportedError, ChannelAValidationError, type ControlRpc, DEFAULT_DESKTOP_GEOMETRY, DISPLAY_STACK_TIMEOUT_MS, type DesktopGeometry, DisplayStackError, DisplayStackUnsupportedError, type EnsureDisplayStackOptions, type EnsureDisplayStackResult, type EnsureTerminalServerOptions, type EnsureTerminalServerResult, type EstablishedSandboxSession, type ExposeStreamPortInput, type ExposeStreamPortResult, type ExposedPortEndpoint, type FinalizeRecordingResult, type MintStreamTokenInput, MockAgentResponder, type MockAgentResponderOptions, type MockExecHandler, NatsControlRpc, type NatsRequestConnection, type NegotiationContext, type NumstatEntry, PROVIDER_REGISTRY, type ProviderConstructionContext, type ProviderRegistration, type RecordingCodec, type RecordingContentType, RecordingError, type RecordingProcess, RecordingUnavailableError, type ResolvedActiveBackend, type RoutableBackendSession, type RoutableSandbox, RoutingSandboxSession, type RoutingSandboxSessionDeps, type RoutingTransitionEvent, RoutingUnsupportedError, SELFHOSTED_DEFAULT_TIMEOUT_MS, SELFHOSTED_RECONNECT_WINDOW_MS, SELFHOSTED_RELAY_STREAM_PATH, STREAM_PORT, STREAM_TOKEN_DEFAULT_TTL_SECONDS, SandboxChannelAService, type SandboxChannelAServiceOptions, SandboxConfigError, SandboxProviderUnavailableError, type SelfhostedApplyDiff, SelfhostedControlError, type SelfhostedEditor, type SelfhostedEnrollment, type SelfhostedExecArgs, type SelfhostedExecResult, type SelfhostedImageOutput, type SelfhostedLivenessState, type SelfhostedNegotiationInput, type SelfhostedRelayConfig, SelfhostedSandboxClient, SelfhostedSession, type SelfhostedSessionBuild, type SelfhostedSessionDeps, type SelfhostedSessionState, type SelfhostedUnavailableReason, type StartRecordingInput, StreamPortUnavailableError, TERMINAL_SERVER_TIMEOUT_MS, TerminalServerError, TerminalServerUnsupportedError, agentErrorToControlError, assertDescriptorRegistryInvariants, assertProviderRegistryInvariants, assertSafeRelPath, backendSupportsOs, buildDisplayStackScript, buildSelfhostedBackendSession, buildStreamUrl, buildTerminalServerScript, contentTypeForCodec, createSandboxClient, createSandboxClientForBackend, decodeModalSnapshotId, deletePriorPersistedSnapshot, deleteRecordingArtifacts, deserializeSandboxSessionStateEnvelope, desktopCapableBackend, ensureDisplayStack, ensureTerminalServer, establishSandboxSessionFromEnvelope, exposeStreamPort, extForCodec, isExecSessionLostBanner, isProviderSandboxNotFoundError, isSelfhostedProviderNotFoundError, isWorkspaceEscapeError, makeActiveBackendResolver, mintStreamToken, negotiateCapabilities, negotiateSelfhostedCapabilities, offlineAgentError, offlineControlResponse, parseExecBannerSessionId, parseNumstatZ, parsePorcelainV2, parseUnifiedPatch, readRecordingBytes, readWorkspaceArchiveFromEnvelopeSessionState, recordingStorageKey, restoredSandboxSessionStateFromEntry, sandboxStateEntryFromRunState, selectBackend, selfhostedLiveness, serializeEstablishedSandboxEnvelope, setSelfhostedApplyDiff, startRecording, stopRecording, stripExecBanner, subjectFor, tearDownDisplayStack, tearDownTerminalServer, timeoutAgentError, timeoutControlResponse, verifyStreamToken };
|
|
1786
|
+
export { type ActiveBackendResolverDeps, ActiveBackendUnresolvableError, type ActivePointer, ChannelAConflictError, type ChannelAEmitter, type ChannelAExecArgs, type ChannelAExecResult, ChannelANotFoundError, type ChannelASession, ChannelAUnsupportedError, ChannelAValidationError, type ControlRpc, DEFAULT_DESKTOP_GEOMETRY, DISPLAY_STACK_TIMEOUT_MS, type DesktopGeometry, DisplayStackError, DisplayStackUnsupportedError, type EnsureDisplayStackOptions, type EnsureDisplayStackResult, type EnsureTerminalServerOptions, type EnsureTerminalServerResult, type EstablishedSandboxSession, type ExposeStreamPortInput, type ExposeStreamPortResult, type ExposedPortEndpoint, type FinalizeRecordingResult, type LiveModalSandboxLeaseAttribution, type MintStreamTokenInput, MockAgentResponder, type MockAgentResponderOptions, type MockExecHandler, type ModalOrphanSweepResult, type ModalSandboxAttribution, NatsControlRpc, type NatsRequestConnection, type NegotiationContext, type NumstatEntry, PROVIDER_REGISTRY, type ProviderConstructionContext, type ProviderRegistration, type RecordingCodec, type RecordingContentType, RecordingError, type RecordingProcess, RecordingUnavailableError, type ResolvedActiveBackend, type RoutableBackendSession, type RoutableSandbox, RoutingSandboxSession, type RoutingSandboxSessionDeps, type RoutingTransitionEvent, RoutingUnsupportedError, SELFHOSTED_DEFAULT_TIMEOUT_MS, SELFHOSTED_RECONNECT_WINDOW_MS, SELFHOSTED_RELAY_STREAM_PATH, STREAM_PORT, STREAM_TOKEN_DEFAULT_TTL_SECONDS, SandboxChannelAService, type SandboxChannelAServiceOptions, SandboxConfigError, type SandboxCreatedCallback, SandboxProviderUnavailableError, type SelfhostedApplyDiff, SelfhostedControlError, type SelfhostedEditor, type SelfhostedEnrollment, type SelfhostedExecArgs, type SelfhostedExecResult, type SelfhostedImageOutput, type SelfhostedLivenessState, type SelfhostedNegotiationInput, type SelfhostedRelayConfig, SelfhostedSandboxClient, SelfhostedSession, type SelfhostedSessionBuild, type SelfhostedSessionDeps, type SelfhostedSessionState, type SelfhostedUnavailableReason, type StartRecordingInput, StreamPortUnavailableError, TERMINAL_SERVER_TIMEOUT_MS, TerminalServerError, TerminalServerUnsupportedError, agentErrorToControlError, assertDescriptorRegistryInvariants, assertProviderRegistryInvariants, assertSafeRelPath, backendSupportsOs, buildDisplayStackScript, buildSelfhostedBackendSession, buildStreamUrl, buildTerminalServerScript, contentTypeForCodec, createSandboxClient, createSandboxClientForBackend, decodeModalSnapshotId, deletePriorPersistedSnapshot, deleteRecordingArtifacts, deserializeSandboxSessionStateEnvelope, desktopCapableBackend, ensureDisplayStack, ensureTerminalServer, establishSandboxSessionFromEnvelope, exposeStreamPort, extForCodec, isExecSessionLostBanner, isProviderSandboxNotFoundError, isSelfhostedProviderNotFoundError, isWorkspaceEscapeError, makeActiveBackendResolver, mintStreamToken, modalSandboxAttributionEnvironment, modalSandboxAttributionTags, negotiateCapabilities, negotiateSelfhostedCapabilities, offlineAgentError, offlineControlResponse, parseExecBannerSessionId, parseNumstatZ, parsePorcelainV2, parseUnifiedPatch, readRecordingBytes, readWorkspaceArchiveFromEnvelopeSessionState, recordingStorageKey, restoredSandboxSessionStateFromEntry, sandboxStateEntryFromRunState, selectBackend, selfhostedLiveness, serializeEstablishedSandboxEnvelope, setSelfhostedApplyDiff, startRecording, stopRecording, stripExecBanner, subjectFor, sweepModalOrphanSandboxes, tagModalSandbox, tearDownDisplayStack, tearDownTerminalServer, terminateModalSandboxById, timeoutAgentError, timeoutControlResponse, verifyStreamToken };
|
package/dist/sandbox/index.js
CHANGED
|
@@ -63,6 +63,8 @@ import {
|
|
|
63
63
|
isWorkspaceEscapeError,
|
|
64
64
|
makeActiveBackendResolver,
|
|
65
65
|
mintStreamToken,
|
|
66
|
+
modalSandboxAttributionEnvironment,
|
|
67
|
+
modalSandboxAttributionTags,
|
|
66
68
|
negotiateCapabilities,
|
|
67
69
|
negotiateSelfhostedCapabilities,
|
|
68
70
|
offlineAgentError,
|
|
@@ -85,12 +87,15 @@ import {
|
|
|
85
87
|
stopRecording,
|
|
86
88
|
stripExecBanner,
|
|
87
89
|
subjectFor,
|
|
90
|
+
sweepModalOrphanSandboxes,
|
|
91
|
+
tagModalSandbox,
|
|
88
92
|
tearDownDisplayStack,
|
|
89
93
|
tearDownTerminalServer,
|
|
94
|
+
terminateModalSandboxById,
|
|
90
95
|
timeoutAgentError,
|
|
91
96
|
timeoutControlResponse,
|
|
92
97
|
verifyStreamToken
|
|
93
|
-
} from "../chunk-
|
|
98
|
+
} from "../chunk-D5KU3QUC.js";
|
|
94
99
|
export {
|
|
95
100
|
ActiveBackendUnresolvableError,
|
|
96
101
|
CAPABILITY_DESCRIPTORS,
|
|
@@ -156,6 +161,8 @@ export {
|
|
|
156
161
|
isWorkspaceEscapeError,
|
|
157
162
|
makeActiveBackendResolver,
|
|
158
163
|
mintStreamToken,
|
|
164
|
+
modalSandboxAttributionEnvironment,
|
|
165
|
+
modalSandboxAttributionTags,
|
|
159
166
|
negotiateCapabilities,
|
|
160
167
|
negotiateSelfhostedCapabilities,
|
|
161
168
|
offlineAgentError,
|
|
@@ -178,8 +185,11 @@ export {
|
|
|
178
185
|
stopRecording,
|
|
179
186
|
stripExecBanner,
|
|
180
187
|
subjectFor,
|
|
188
|
+
sweepModalOrphanSandboxes,
|
|
189
|
+
tagModalSandbox,
|
|
181
190
|
tearDownDisplayStack,
|
|
182
191
|
tearDownTerminalServer,
|
|
192
|
+
terminateModalSandboxById,
|
|
183
193
|
timeoutAgentError,
|
|
184
194
|
timeoutControlResponse,
|
|
185
195
|
verifyStreamToken
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@opengeni/runtime",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.js",
|
|
@@ -29,8 +29,8 @@
|
|
|
29
29
|
},
|
|
30
30
|
"dependencies": {
|
|
31
31
|
"@opengeni/agent-proto": "^0.2.1",
|
|
32
|
-
"@opengeni/config": "^0.2.
|
|
33
|
-
"@opengeni/contracts": "^0.
|
|
32
|
+
"@opengeni/config": "^0.2.4",
|
|
33
|
+
"@opengeni/contracts": "^0.6.0",
|
|
34
34
|
"@openai/agents": "^0.11.6",
|
|
35
35
|
"@openai/agents-extensions": "^0.11.6",
|
|
36
36
|
"modal": "^0.7.4",
|
package/src/index.ts
CHANGED
|
@@ -441,7 +441,7 @@ export class MultiProviderModelProvider implements ModelProvider {
|
|
|
441
441
|
|
|
442
442
|
async getModel(modelName?: string): Promise<Model> {
|
|
443
443
|
if (modelName) {
|
|
444
|
-
const resolved = resolveTurnModel(this.settings, modelName);
|
|
444
|
+
const resolved = resolveTurnModel(settingsForRunScopedModelResolution(this.settings, modelName), modelName);
|
|
445
445
|
if (resolved) {
|
|
446
446
|
// Fail-loud floor (defense in depth): a `codex/<slug>` id must only ever
|
|
447
447
|
// resolve through the synthetic codex-subscription provider (which installs
|
|
@@ -479,6 +479,27 @@ export class MultiProviderModelProvider implements ModelProvider {
|
|
|
479
479
|
}
|
|
480
480
|
}
|
|
481
481
|
|
|
482
|
+
function settingsForRunScopedModelResolution(settings: Settings, modelName: string): Settings {
|
|
483
|
+
if (modelName !== settings.openaiModel) {
|
|
484
|
+
return settings;
|
|
485
|
+
}
|
|
486
|
+
const builtinAllowed = splitOpenaiAllowedModels(settings.openaiAllowedModels);
|
|
487
|
+
const fallbackBuiltin = builtinAllowed.find((id) => id !== modelName);
|
|
488
|
+
if (!fallbackBuiltin) {
|
|
489
|
+
return settings;
|
|
490
|
+
}
|
|
491
|
+
// The worker sets runSettings.openaiModel to the turn's model. For namespaced
|
|
492
|
+
// registry ids configuredModels filters the built-in entry out, but a unique
|
|
493
|
+
// bare registry id would otherwise be claimed by the built-in only because of
|
|
494
|
+
// that per-turn override. Resolve the run-scoped router against the deployment
|
|
495
|
+
// allow-list head instead; real built-in models stay in the allow-list.
|
|
496
|
+
return builtinAllowed.includes(modelName) ? settings : { ...settings, openaiModel: fallbackBuiltin };
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
function splitOpenaiAllowedModels(value: string): string[] {
|
|
500
|
+
return value.split(",").map((item) => item.trim()).filter(Boolean);
|
|
501
|
+
}
|
|
502
|
+
|
|
482
503
|
/**
|
|
483
504
|
* A `codex/<slug>` turn reached the model router but the workspace has no active
|
|
484
505
|
* Codex subscription connected (the worker overlay never injected the synthetic
|
|
@@ -711,6 +732,14 @@ export type BuildAgentOptions = {
|
|
|
711
732
|
// restyle the persona but never drop the goal-loop contract or environment
|
|
712
733
|
// block.
|
|
713
734
|
instructionsTemplate?: string;
|
|
735
|
+
// Per-SESSION persona/system instructions (the per-agent-type prompt lever an
|
|
736
|
+
// embedding host supplies at session create). Composed AFTER the workspace
|
|
737
|
+
// instructionsTemplate + the non-bypassable CORE, so it refines the workspace
|
|
738
|
+
// persona for this one session without dropping the goal-loop/environment
|
|
739
|
+
// contract. Rides the SAME instructions channel (system-level) — NEVER a user/
|
|
740
|
+
// timeline message. Omitted ⇒ the composed instructions are byte-identical to
|
|
741
|
+
// a workspace-only persona.
|
|
742
|
+
sessionInstructions?: string;
|
|
714
743
|
// Skills delivered by enabled capability packs. They join the bundled
|
|
715
744
|
// skills in the sandbox skill index (mounted under .agents/) so
|
|
716
745
|
// skills/<name> references resolve like any other indexed skill.
|
|
@@ -793,6 +822,27 @@ export function composeAgentInstructions(template: string, workspaceEnvironment?
|
|
|
793
822
|
return core ? `${template} ${core}` : template;
|
|
794
823
|
}
|
|
795
824
|
|
|
825
|
+
/**
|
|
826
|
+
* Appends the per-session persona instructions to the already-composed
|
|
827
|
+
* (workspace + CORE) instructions, joined by " " — exactly the join used
|
|
828
|
+
* throughout the persona composition. The session slice is intentionally LAST
|
|
829
|
+
* (session-specific refinement of the workspace persona). An absent/blank value
|
|
830
|
+
* is a no-op that returns the composed string byte-for-byte.
|
|
831
|
+
*/
|
|
832
|
+
export function appendSessionInstructions(composed: string, sessionInstructions?: string): string {
|
|
833
|
+
const trimmed = sessionInstructions?.trim();
|
|
834
|
+
return trimmed ? `${composed} ${trimmed}` : composed;
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
/**
|
|
838
|
+
* Appends the one-shot genesis title directive (genesis turn only), joined by
|
|
839
|
+
* " " and always LAST so a white-label persona template or a per-session
|
|
840
|
+
* instruction can't drop it. A no-op when the hint is absent.
|
|
841
|
+
*/
|
|
842
|
+
export function appendGenesisTitleDirective(instructions: string, genesisTitleHint?: boolean): string {
|
|
843
|
+
return genesisTitleHint ? `${instructions} ${GENESIS_TITLE_DIRECTIVE}` : instructions;
|
|
844
|
+
}
|
|
845
|
+
|
|
796
846
|
const agentFileDownloads = new WeakMap<object, SandboxFileDownload[]>();
|
|
797
847
|
const agentRepositoryCloneHooks = new WeakMap<object, SandboxLifecycleHook[]>();
|
|
798
848
|
// TOKEN-BROKER (B1): the per-turn git token seed, stashed alongside the agent's
|
|
@@ -837,9 +887,21 @@ export function buildOpenGeniAgent(settings: Settings, resources: ResourceRef[],
|
|
|
837
887
|
// ownership + workspace-environment block) at the {{core}} marker, or
|
|
838
888
|
// appends it when the template omits the marker. With the default template
|
|
839
889
|
// and no environment this is byte-identical to the historical preamble.
|
|
840
|
-
instructions:
|
|
841
|
-
|
|
842
|
-
|
|
890
|
+
// Persona composition order (all one system-level instructions string):
|
|
891
|
+
// 1. workspace instructionsTemplate (or deployment default) with the
|
|
892
|
+
// non-bypassable CORE substituted at {{core}} — composeAgentInstructions,
|
|
893
|
+
// 2. + the per-session persona instructions (session-specific, LAST so it
|
|
894
|
+
// refines the workspace persona),
|
|
895
|
+
// 3. + the one-shot genesis title directive (genesis turn only).
|
|
896
|
+
// With no session instructions and no genesis hint this is byte-identical to
|
|
897
|
+
// the historical composed instructions.
|
|
898
|
+
instructions: appendGenesisTitleDirective(
|
|
899
|
+
appendSessionInstructions(
|
|
900
|
+
composeAgentInstructions(options.instructionsTemplate ?? settings.agentInstructionsTemplate, options.workspaceEnvironment),
|
|
901
|
+
options.sessionInstructions,
|
|
902
|
+
),
|
|
903
|
+
options.genesisTitleHint,
|
|
904
|
+
),
|
|
843
905
|
modelSettings: {
|
|
844
906
|
reasoning: { effort: options.reasoningEffort ?? settings.openaiReasoningEffort, summary: "detailed" },
|
|
845
907
|
// Server-side compaction (OpenAI platform) requires store=false: the
|
|
@@ -32,10 +32,34 @@ export const STREAM_PORT = DESKTOP_STREAM_PORT;
|
|
|
32
32
|
export const DISPLAY_STACK_TIMEOUT_MS = 90_000;
|
|
33
33
|
|
|
34
34
|
// PAINTABLE-FRAME gate: poll scrot up to this many times, this many seconds apart,
|
|
35
|
-
// waiting for
|
|
35
|
+
// waiting for an actually-PAINTED frame before declaring the stack "up" (~30s worst case).
|
|
36
36
|
const PAINT_PROBE_ATTEMPTS = 150;
|
|
37
37
|
const PAINT_PROBE_INTERVAL_S = 0.2;
|
|
38
38
|
|
|
39
|
+
// The paint FLOOR (bytes): a scrot at/above this size is a real painted desktop; below
|
|
40
|
+
// it, the root is still unpainted and the frame would read as "blank" to the model.
|
|
41
|
+
//
|
|
42
|
+
// WHY A SIZE FLOOR, NOT NON-EMPTINESS (the bug this fixes): the old gate only checked
|
|
43
|
+
// `[ -s frame.png ]` (non-empty). But an UNPAINTED root is never zero-byte — a fresh
|
|
44
|
+
// Xvfb draws either the `-retro` weave stipple or (with `-retro` dropped) solid black,
|
|
45
|
+
// and scrot happily encodes that as a small-but-non-empty PNG. So the old gate passed
|
|
46
|
+
// the instant the VNC ports bound — MEASURED at ~1.4s (fast runc host) to several
|
|
47
|
+
// seconds (cold gVisor) BEFORE xfdesktop finishes its first wallpaper paint — handing
|
|
48
|
+
// the model the pre-paint frame. That pre-paint frame is exactly the "blank/black"
|
|
49
|
+
// screenshot that 400s the model and blanks the human viewer.
|
|
50
|
+
//
|
|
51
|
+
// The sizes are unambiguous and were measured on the canonical desktop image (1280x800)
|
|
52
|
+
// under runc — both the current staging image and a fresh local build:
|
|
53
|
+
// painted XFCE desktop (blue-gradient wallpaper + panel + icons): ~210-222 KB
|
|
54
|
+
// `-retro` stipple root (unpainted, current image): ~17 KB
|
|
55
|
+
// solid-black root (unpainted, after we drop `-retro`): ~13.5 KB
|
|
56
|
+
// 60 KB sits ~3.5x above every unpainted state and ~3.5x below a real paint — a wide,
|
|
57
|
+
// unambiguous margin. It holds against BOTH the currently-deployed `-retro` image and
|
|
58
|
+
// the `-retro`-dropped image this change ships, so the runtime gate is correct before
|
|
59
|
+
// AND after the image rebuild lands. (Assumes the default ~1280x800 geometry; a larger
|
|
60
|
+
// framebuffer only scales the painted frame further above the floor.)
|
|
61
|
+
const PAINT_MIN_BYTES = 60_000;
|
|
62
|
+
|
|
39
63
|
/** Desktop geometry for the framebuffer. v1 has no live RANDR: a resolution
|
|
40
64
|
* change is a full down -> up restart (a separate op). */
|
|
41
65
|
export type DesktopGeometry = {
|
|
@@ -145,18 +169,25 @@ export function buildDisplayStackScript(options: EnsureDisplayStackOptions = {})
|
|
|
145
169
|
// PAINTABLE-FRAME GATE (the completion criterion): the up-script's readiness gates
|
|
146
170
|
// only assert that Xvfb answers xdpyinfo and that x11vnc:5900 + websockify:PORT are
|
|
147
171
|
// LISTENING — NOT that the display actually PAINTS. On a stone-cold gVisor box (the
|
|
148
|
-
// machine→sandbox swap-recovery turn always hits one), Xvfb
|
|
149
|
-
//
|
|
150
|
-
//
|
|
151
|
-
//
|
|
172
|
+
// machine→sandbox swap-recovery turn always hits one), Xvfb answers and the VNC ports
|
|
173
|
+
// bind ~1.4s (fast host) to several seconds BEFORE xfdesktop finishes its first
|
|
174
|
+
// wallpaper paint. In that window a scrot yields a small UNPAINTED frame (the -retro
|
|
175
|
+
// stipple or a solid-black root) — never zero-byte — which is exactly the "blank/black"
|
|
176
|
+
// screenshot that 400s the model and blanks the human viewer. (VERIFIED locally: the
|
|
177
|
+
// real xfdesktop backdrop window maps at full 1280x800 the whole time; the render is
|
|
178
|
+
// never structurally broken — it is purely this pre-paint capture race.)
|
|
179
|
+
//
|
|
152
180
|
// We therefore chain a real scrot probe as the completion gate: after the up-script
|
|
153
|
-
// reports success, poll scrot until it produces
|
|
154
|
-
//
|
|
155
|
-
//
|
|
156
|
-
//
|
|
181
|
+
// reports success, poll scrot until it produces an actually-PAINTED frame — a PNG at or
|
|
182
|
+
// above PAINT_MIN_BYTES, not merely NON-EMPTY (the old `[ -s ]` check passed on the
|
|
183
|
+
// ~17 KB pre-paint stipple immediately; that WAS the bug) — bounded ~30s, and only THEN
|
|
184
|
+
// let the command exit 0. If it never paints we exit 14 so the caller sees a typed
|
|
185
|
+
// DisplayStackError("paint") — an HONEST failure the worker can degrade + log, rather
|
|
186
|
+
// than a false "up" that hands the model an unpainted image. `-ac` on Xvfb disables
|
|
157
187
|
// access control so this root-side scrot reaches :0. Runs on a pre-check hit too (cheap
|
|
158
188
|
// — an already-up display paints on the first probe). Lives in the runtime-built script
|
|
159
|
-
// (not the baked image up-script) so it ships with the worker/api, no image rebuild
|
|
189
|
+
// (not the baked image up-script) so it ships with the worker/api, no image rebuild —
|
|
190
|
+
// and its size floor holds against the currently-deployed image too.
|
|
160
191
|
const bringUp =
|
|
161
192
|
`if nc -z 127.0.0.1 ${port} >/dev/null 2>&1 && nc -z 127.0.0.1 5900 >/dev/null 2>&1; then ` +
|
|
162
193
|
`echo "OPENGENI_DESKTOP_UP port=${port} geometry=${geometry.width}x${geometry.height} dpi=${geometry.dpi} (precheck)"; ` +
|
|
@@ -168,11 +199,15 @@ export function buildDisplayStackScript(options: EnsureDisplayStackOptions = {})
|
|
|
168
199
|
const paintProbe =
|
|
169
200
|
`p=/tmp/opengeni-desktop/paint-probe.png; ` +
|
|
170
201
|
`for i in $(seq 1 ${PAINT_PROBE_ATTEMPTS}); do ` +
|
|
171
|
-
|
|
202
|
+
// Capture, then measure the PNG byte-size. `wc -c < "$p"` yields a bare integer; a
|
|
203
|
+
// failed scrot leaves sz=0. A frame at/above PAINT_MIN_BYTES is a real painted desktop.
|
|
204
|
+
`if DISPLAY=:0 scrot -o "$p" >/dev/null 2>&1; then sz=$(wc -c < "$p" 2>/dev/null || echo 0); else sz=0; fi; ` +
|
|
172
205
|
`rm -f "$p"; ` +
|
|
206
|
+
`if [ "$sz" -ge ${PAINT_MIN_BYTES} ]; then break; fi; ` +
|
|
173
207
|
// NOTE: NOT_PAINTING goes to STDOUT (not stderr): Modal is execCommand-only, so the
|
|
174
208
|
// caller infers the outcome by string-matching the output — stdout is always captured.
|
|
175
|
-
|
|
209
|
+
// ($sz is bare shell here — no ${} braces — so JS leaves it for bash to expand.)
|
|
210
|
+
`if [ "$i" = "${PAINT_PROBE_ATTEMPTS}" ]; then echo "OPENGENI_DESKTOP_NOT_PAINTING scrot below ${PAINT_MIN_BYTES}B after warmup (last=$sz)"; exit 14; fi; ` +
|
|
176
211
|
`sleep ${PAINT_PROBE_INTERVAL_S}; ` +
|
|
177
212
|
`done`;
|
|
178
213
|
return `mkdir -p /tmp/opengeni-desktop; { ${bringUp} ; } && { ${paintProbe} ; }`;
|
package/src/sandbox/index.ts
CHANGED
|
@@ -53,6 +53,16 @@ export {
|
|
|
53
53
|
type ProviderRegistration,
|
|
54
54
|
type ProviderConstructionContext,
|
|
55
55
|
} from "./providers";
|
|
56
|
+
export {
|
|
57
|
+
modalSandboxAttributionEnvironment,
|
|
58
|
+
modalSandboxAttributionTags,
|
|
59
|
+
sweepModalOrphanSandboxes,
|
|
60
|
+
tagModalSandbox,
|
|
61
|
+
terminateModalSandboxById,
|
|
62
|
+
type LiveModalSandboxLeaseAttribution,
|
|
63
|
+
type ModalOrphanSweepResult,
|
|
64
|
+
type ModalSandboxAttribution,
|
|
65
|
+
} from "./providers/modal";
|
|
56
66
|
export {
|
|
57
67
|
selectBackend,
|
|
58
68
|
backendSupportsOs,
|
|
@@ -540,6 +550,8 @@ export type EstablishedSandboxSession = {
|
|
|
540
550
|
backendId: string;
|
|
541
551
|
};
|
|
542
552
|
|
|
553
|
+
export type SandboxCreatedCallback = (established: EstablishedSandboxSession) => Promise<void>;
|
|
554
|
+
|
|
543
555
|
// The structural slice we need from a provider SandboxClient to resume by id and
|
|
544
556
|
// cold-restore. Narrowed (not the full agent-loop SandboxClient) so the leaf
|
|
545
557
|
// stays agent-loop-free.
|
|
@@ -616,6 +628,16 @@ function readInstanceId(session: unknown): string {
|
|
|
616
628
|
return typeof candidate === "string" && candidate.length > 0 ? candidate : "";
|
|
617
629
|
}
|
|
618
630
|
|
|
631
|
+
async function terminateCreatedSandbox(client: ResumeCapableClient, session: unknown, sessionState: unknown): Promise<void> {
|
|
632
|
+
const clientWithDelete = client as { delete?: (state: unknown) => Promise<unknown> };
|
|
633
|
+
if (typeof clientWithDelete.delete === "function" && sessionState !== undefined) {
|
|
634
|
+
try { await clientWithDelete.delete(sessionState); } catch { /* best-effort */ }
|
|
635
|
+
return;
|
|
636
|
+
}
|
|
637
|
+
const sess = session as { close?: () => Promise<unknown>; terminate?: () => Promise<unknown>; kill?: () => Promise<unknown> };
|
|
638
|
+
try { await (sess.terminate ?? sess.kill ?? sess.close)?.(); } catch { /* best-effort */ }
|
|
639
|
+
}
|
|
640
|
+
|
|
619
641
|
/**
|
|
620
642
|
* Resume the one box by id from its recovery envelope, or cold-restore from the
|
|
621
643
|
* snapshot when the provider reports it gone. The envelope is the lease's
|
|
@@ -633,7 +655,12 @@ function readInstanceId(session: unknown): string {
|
|
|
633
655
|
export async function establishSandboxSessionFromEnvelope(
|
|
634
656
|
settings: Settings,
|
|
635
657
|
envelope: Record<string, unknown> | null,
|
|
636
|
-
opts: {
|
|
658
|
+
opts: {
|
|
659
|
+
sessionId: string;
|
|
660
|
+
backendOverride?: SandboxBackend;
|
|
661
|
+
environment?: Record<string, string>;
|
|
662
|
+
onSandboxCreated?: SandboxCreatedCallback;
|
|
663
|
+
},
|
|
637
664
|
): Promise<EstablishedSandboxSession> {
|
|
638
665
|
const envelopeBackend = typeof envelope?.backendId === "string" ? (envelope.backendId as SandboxBackend) : undefined;
|
|
639
666
|
const backend = (opts.backendOverride ?? envelopeBackend ?? (settings.sandboxBackend as SandboxBackend));
|
|
@@ -680,6 +707,22 @@ export async function establishSandboxSessionFromEnvelope(
|
|
|
680
707
|
// cold-restore branch (b) below.
|
|
681
708
|
const coldRestore = async (resumeFallbackState?: unknown): Promise<EstablishedSandboxSession> => {
|
|
682
709
|
const restored = await client.create!({ manifest: createManifest });
|
|
710
|
+
let restoredState = (restored as { state?: unknown }).state;
|
|
711
|
+
let established: EstablishedSandboxSession = {
|
|
712
|
+
client,
|
|
713
|
+
session: restored,
|
|
714
|
+
sessionState: restoredState ?? resumeFallbackState,
|
|
715
|
+
instanceId: readInstanceId(restored),
|
|
716
|
+
backendId: client.backendId,
|
|
717
|
+
};
|
|
718
|
+
if (opts.onSandboxCreated) {
|
|
719
|
+
try {
|
|
720
|
+
await opts.onSandboxCreated(established);
|
|
721
|
+
} catch (createCallbackError) {
|
|
722
|
+
await terminateCreatedSandbox(client, restored, restoredState);
|
|
723
|
+
throw createCallbackError;
|
|
724
|
+
}
|
|
725
|
+
}
|
|
683
726
|
if (workspaceArchive) {
|
|
684
727
|
const hydrate = (restored as { hydrateWorkspace?: (data: Uint8Array) => Promise<void> }).hydrateWorkspace;
|
|
685
728
|
if (typeof hydrate === "function") {
|
|
@@ -696,21 +739,38 @@ export async function establishSandboxSessionFromEnvelope(
|
|
|
696
739
|
// re-throwing so no box leaks. The original error semantics are preserved
|
|
697
740
|
// (the re-throw propagates to the caller). This mirrors the reaper's
|
|
698
741
|
// discipline: NEVER leave an orphaned box running.
|
|
699
|
-
|
|
700
|
-
const clientWithDelete = client as { delete?: (state: unknown) => Promise<unknown> };
|
|
701
|
-
if (typeof clientWithDelete.delete === "function" && restoredState !== undefined) {
|
|
702
|
-
try { await clientWithDelete.delete(restoredState); } catch { /* best-effort; re-throw the hydrate error below */ }
|
|
703
|
-
} else {
|
|
704
|
-
// No delete() — try a session-level close/terminate as a fallback.
|
|
705
|
-
const sess = restored as { close?: () => Promise<unknown>; terminate?: () => Promise<unknown> };
|
|
706
|
-
try { await (sess.terminate ?? sess.close)?.(); } catch { /* best-effort */ }
|
|
707
|
-
}
|
|
742
|
+
await terminateCreatedSandbox(client, restored, restoredState);
|
|
708
743
|
throw hydrateError;
|
|
709
744
|
}
|
|
745
|
+
const hydratedState = (restored as { state?: unknown }).state;
|
|
746
|
+
const hydratedInstanceId = readInstanceId(restored);
|
|
747
|
+
if (hydratedInstanceId && hydratedInstanceId !== established.instanceId) {
|
|
748
|
+
established = {
|
|
749
|
+
client,
|
|
750
|
+
session: restored,
|
|
751
|
+
sessionState: hydratedState ?? resumeFallbackState,
|
|
752
|
+
instanceId: hydratedInstanceId,
|
|
753
|
+
backendId: client.backendId,
|
|
754
|
+
};
|
|
755
|
+
if (opts.onSandboxCreated) {
|
|
756
|
+
try {
|
|
757
|
+
await opts.onSandboxCreated(established);
|
|
758
|
+
} catch (createCallbackError) {
|
|
759
|
+
await terminateCreatedSandbox(client, restored, hydratedState);
|
|
760
|
+
throw createCallbackError;
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
}
|
|
710
764
|
}
|
|
711
765
|
}
|
|
712
|
-
|
|
713
|
-
return {
|
|
766
|
+
restoredState = (restored as { state?: unknown }).state;
|
|
767
|
+
return {
|
|
768
|
+
client,
|
|
769
|
+
session: restored,
|
|
770
|
+
sessionState: restoredState ?? resumeFallbackState,
|
|
771
|
+
instanceId: readInstanceId(restored),
|
|
772
|
+
backendId: client.backendId,
|
|
773
|
+
};
|
|
714
774
|
};
|
|
715
775
|
|
|
716
776
|
// Does the envelope carry a RESUMABLE box id (warm reattach), or only a
|