@opengeni/runtime 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-D5KU3QUC.js → chunk-HGQ252FL.js} +22 -3
- package/dist/chunk-HGQ252FL.js.map +1 -0
- package/dist/index-CSGkld-v.d.ts +1801 -0
- package/dist/index.d.ts +7 -3
- package/dist/index.js +160 -37
- package/dist/index.js.map +1 -1
- package/dist/sandbox/index.d.ts +4 -1784
- package/dist/sandbox/index.js +1 -1
- package/package.json +3 -3
- package/src/history-sanitizer.ts +35 -38
- package/src/index.ts +67 -6
- package/src/metrics.ts +5 -0
- package/src/sandbox/display-stack.ts +25 -4
- package/src/sandbox/index.ts +28 -1
- package/src/sandbox-computer.ts +167 -36
- package/src/screenshot-error-card.ts +25 -0
- package/dist/chunk-D5KU3QUC.js.map +0 -1
package/dist/sandbox/index.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@opengeni/runtime",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.js",
|
|
@@ -29,8 +29,8 @@
|
|
|
29
29
|
},
|
|
30
30
|
"dependencies": {
|
|
31
31
|
"@opengeni/agent-proto": "^0.2.1",
|
|
32
|
-
"@opengeni/config": "^0.2.
|
|
33
|
-
"@opengeni/contracts": "^0.
|
|
32
|
+
"@opengeni/config": "^0.2.5",
|
|
33
|
+
"@opengeni/contracts": "^0.7.0",
|
|
34
34
|
"@openai/agents": "^0.11.6",
|
|
35
35
|
"@openai/agents-extensions": "^0.11.6",
|
|
36
36
|
"modal": "^0.7.4",
|
package/src/history-sanitizer.ts
CHANGED
|
@@ -27,6 +27,8 @@
|
|
|
27
27
|
* filtered, keeping the persisted audit trail intact.
|
|
28
28
|
*/
|
|
29
29
|
|
|
30
|
+
import { SCREENSHOT_FAILURE_CARD_IMAGE_URL } from "./screenshot-error-card";
|
|
31
|
+
|
|
30
32
|
/** A history item is any JSON object; we only inspect a few discriminator fields. */
|
|
31
33
|
export type HistoryItem = Record<string, unknown>;
|
|
32
34
|
|
|
@@ -594,42 +596,35 @@ export function rewriteComputerCallsToActionsOnly(body: unknown): boolean {
|
|
|
594
596
|
}
|
|
595
597
|
|
|
596
598
|
/**
|
|
597
|
-
*
|
|
598
|
-
*
|
|
599
|
-
* `
|
|
600
|
-
*
|
|
601
|
-
*
|
|
602
|
-
*
|
|
603
|
-
*
|
|
604
|
-
*
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
*
|
|
611
|
-
*
|
|
612
|
-
*
|
|
613
|
-
*
|
|
614
|
-
*
|
|
615
|
-
*
|
|
616
|
-
*
|
|
617
|
-
*
|
|
618
|
-
*
|
|
619
|
-
*
|
|
620
|
-
*
|
|
621
|
-
*
|
|
622
|
-
*
|
|
623
|
-
*
|
|
624
|
-
*
|
|
625
|
-
*
|
|
626
|
-
*
|
|
627
|
-
* Our screenshot() fail-loud guard (which throws on empty frames) only runs when
|
|
628
|
-
* the SDK calls screenshot() on a SUCCESS path — not on this action-error catch
|
|
629
|
-
* path that sets output='' directly. This wire-level rewrite is the only seam that
|
|
630
|
-
* catches both paths regardless of how the empty image_url was produced. It runs
|
|
631
|
-
* in the same `computerCallNormalizingFetch` wrapper, so a single parse/rewrite
|
|
632
|
-
* pass covers both the action/actions-only rewrite and this placeholder injection.
|
|
599
|
+
* Backstop for the empty `computer_call_output` image_url: walk the `input` array of
|
|
600
|
+
* a serialized Responses request body and replace any `computer_call_output` item
|
|
601
|
+
* whose `output.image_url` is empty/missing with a LEGIBLE "screen capture failed"
|
|
602
|
+
* error card ({@link SCREENSHOT_FAILURE_CARD_IMAGE_URL}).
|
|
603
|
+
*
|
|
604
|
+
* WHY A CARD, NOT A BLANK. An empty `image_url` reaches this seam ONLY when the
|
|
605
|
+
* computer op genuinely FAILED to produce a screen: agents-core's `toolExecution.mjs`
|
|
606
|
+
* catch sets `output = ''` when the action OR the follow-up `computer.screenshot()`
|
|
607
|
+
* throws, building `{type:"computer_call_output",output:{type:"computer_screenshot",
|
|
608
|
+
* image_url:""}}`. Azure then rejects the whole request with
|
|
609
|
+
* `400 Invalid 'input[N].output.image_url'`. The previous fix substituted a 1×1
|
|
610
|
+
* TRANSPARENT placeholder to dodge the 400 — but that reaches the model as a
|
|
611
|
+
* plausible BLANK DESKTOP it confidently reports ("the screen appears blank/empty"),
|
|
612
|
+
* turning a hard capture FAILURE into a silent, wrong observation. That is the worst
|
|
613
|
+
* failure mode for computer use, and it is exactly what the 0.1.3 TCC-denied incident
|
|
614
|
+
* produced. Substituting a legible error card instead makes the failure REACH THE
|
|
615
|
+
* MODEL as an error (the only channel the hosted `computer_use_preview` protocol has
|
|
616
|
+
* is the image), so the model stops and tells the user rather than hallucinating.
|
|
617
|
+
*
|
|
618
|
+
* WHY THIS IS SAFE (empty = failure, never an intentional blank). Post-af289e3 the
|
|
619
|
+
* intentional-blank cases carry a NON-empty data URI already: agents-core's
|
|
620
|
+
* tool-approval-rejection screenshot is its own non-empty 1×1 placeholder, and the
|
|
621
|
+
* SandboxComputer action-timeout now warn+returns to a REAL screenshot rather than an
|
|
622
|
+
* empty output. So an EMPTY image_url at this seam is unambiguously a capture/interact
|
|
623
|
+
* FAILURE — the error card is the correct substitution for every empty case, and this
|
|
624
|
+
* function never touches a non-empty (real screenshot OR intentional blank) output.
|
|
625
|
+
*
|
|
626
|
+
* The failure REASON (permission denied / null image / timeout / display down) is not
|
|
627
|
+
* on the card; it is logged worker-side by `NativeDesktopComputer.screenshot()`.
|
|
633
628
|
*
|
|
634
629
|
* Mutates `body` in place (the caller has already JSON.parsed a private copy).
|
|
635
630
|
* Returns `true` iff at least one image_url was replaced.
|
|
@@ -657,9 +652,11 @@ export function rewriteEmptyComputerCallOutputImageUrls(body: unknown): boolean
|
|
|
657
652
|
}
|
|
658
653
|
const out = output as Record<string, unknown>;
|
|
659
654
|
const imageUrl = out.image_url;
|
|
660
|
-
// Replace the image_url when it is not a non-empty string (covers: "", null,
|
|
655
|
+
// Replace the image_url when it is not a non-empty string (covers: "", null,
|
|
656
|
+
// undefined, missing) — an empty output is always a genuine capture failure, so
|
|
657
|
+
// it becomes the legible error card, never a silent blank.
|
|
661
658
|
if (typeof imageUrl !== "string" || imageUrl.length === 0) {
|
|
662
|
-
out.image_url =
|
|
659
|
+
out.image_url = SCREENSHOT_FAILURE_CARD_IMAGE_URL;
|
|
663
660
|
changed = true;
|
|
664
661
|
}
|
|
665
662
|
}
|
package/src/index.ts
CHANGED
|
@@ -27,6 +27,7 @@ import {
|
|
|
27
27
|
setDefaultOpenAIClient,
|
|
28
28
|
setDefaultOpenAIKey,
|
|
29
29
|
setOpenAIResponsesTransport,
|
|
30
|
+
setTracingDisabled,
|
|
30
31
|
// Hosted web_search tool factory. Re-exported from @openai/agents-openai via
|
|
31
32
|
// `export * from '@openai/agents-openai'` in @openai/agents' index (0.11.6);
|
|
32
33
|
// it returns a { type: 'hosted_tool', providerData: { type: 'web_search' } }
|
|
@@ -101,6 +102,9 @@ import {
|
|
|
101
102
|
setSelfhostedApplyDiff,
|
|
102
103
|
} from "./sandbox";
|
|
103
104
|
import { computerUse, type ComputerToolMode } from "./sandbox-computer";
|
|
105
|
+
import type { RuntimeMetricsHooks } from "./metrics";
|
|
106
|
+
|
|
107
|
+
export type { RuntimeMetricsHooks } from "./metrics";
|
|
104
108
|
|
|
105
109
|
// P4.3 computer-use surface (the agent's :0 driver). Re-exported from the barrel
|
|
106
110
|
// so callers (the worker, live proofs) reach SandboxComputer/ComputerUseCapability
|
|
@@ -253,6 +257,12 @@ export type SandboxFileDownload = {
|
|
|
253
257
|
sizeBytes?: number;
|
|
254
258
|
};
|
|
255
259
|
|
|
260
|
+
let runtimeMetricsHooks: RuntimeMetricsHooks | null = null;
|
|
261
|
+
|
|
262
|
+
export function configureRuntimeMetricsHooks(hooks: RuntimeMetricsHooks | null | undefined): void {
|
|
263
|
+
runtimeMetricsHooks = hooks ?? null;
|
|
264
|
+
}
|
|
265
|
+
|
|
256
266
|
export type OpenGeniRuntime = {
|
|
257
267
|
configure: (settings: Settings) => void;
|
|
258
268
|
// Multi-provider per-turn model routing. Returns the resolved provider, its
|
|
@@ -270,11 +280,15 @@ export type OpenGeniRuntime = {
|
|
|
270
280
|
export type ProductionRuntimeOverrides = {
|
|
271
281
|
model?: Model;
|
|
272
282
|
sandboxClient?: unknown;
|
|
283
|
+
metrics?: RuntimeMetricsHooks;
|
|
273
284
|
};
|
|
274
285
|
|
|
275
286
|
export function createProductionAgentRuntime(overrides: ProductionRuntimeOverrides = {}): OpenGeniRuntime {
|
|
276
287
|
return {
|
|
277
|
-
configure:
|
|
288
|
+
configure: (settings) => {
|
|
289
|
+
configureRuntimeMetricsHooks(overrides.metrics);
|
|
290
|
+
configureOpenAI(settings);
|
|
291
|
+
},
|
|
278
292
|
// A test/override model shadows the registry routing entirely (the scripted
|
|
279
293
|
// model used in worker tests is not in any provider's allow-list), so when
|
|
280
294
|
// one is supplied resolveTurnModel reports "no resolution" and the caller
|
|
@@ -301,7 +315,7 @@ export function createProductionAgentRuntime(overrides: ProductionRuntimeOverrid
|
|
|
301
315
|
* the OpenAI-platform path has only a key (the SDK default client is used via
|
|
302
316
|
* setDefaultOpenAIKey there); the caller then constructs a key-only client.
|
|
303
317
|
*/
|
|
304
|
-
export function buildOpenAIClientFromSettings(settings: Settings): OpenAI {
|
|
318
|
+
export function buildOpenAIClientFromSettings(settings: Settings, providerId: string = settings.openaiProvider): OpenAI {
|
|
305
319
|
if (settings.openaiProvider === "azure") {
|
|
306
320
|
const baseURL = settings.azureOpenaiBaseUrl ?? azureDeploymentBaseUrl(settings);
|
|
307
321
|
const apiKey = settings.azureOpenaiApiKey ?? settings.azureOpenaiAdToken ?? "azure-ad-token";
|
|
@@ -318,13 +332,14 @@ export function buildOpenAIClientFromSettings(settings: Settings): OpenAI {
|
|
|
318
332
|
// seam — below the SDK responses converter, which always re-synthesizes BOTH
|
|
319
333
|
// `action` and `actions` (rejected 400 "exactly one of action or actions").
|
|
320
334
|
// See computerCallNormalizingFetch / rewriteComputerCallsToActionsOnly.
|
|
321
|
-
fetch: computerCallNormalizingFetch(globalThis.fetch),
|
|
335
|
+
fetch: computerCallNormalizingFetch(instrumentedModelFetch(providerId, globalThis.fetch)),
|
|
322
336
|
});
|
|
323
337
|
}
|
|
324
338
|
return new OpenAI({
|
|
325
339
|
apiKey: settings.openaiApiKey ?? process.env.OPENAI_API_KEY,
|
|
326
340
|
...(settings.openaiBaseUrl ? { baseURL: settings.openaiBaseUrl } : {}),
|
|
327
341
|
maxRetries: settings.openaiMaxRetries,
|
|
342
|
+
fetch: instrumentedModelFetch(providerId, globalThis.fetch),
|
|
328
343
|
});
|
|
329
344
|
}
|
|
330
345
|
|
|
@@ -346,7 +361,7 @@ export function buildProviderClient(provider: ResolvedModelProvider, settings: S
|
|
|
346
361
|
return cached;
|
|
347
362
|
}
|
|
348
363
|
const client = provider.builtin
|
|
349
|
-
? buildOpenAIClientFromSettings(settings)
|
|
364
|
+
? buildOpenAIClientFromSettings(settings, provider.id)
|
|
350
365
|
: provider.kind === "codex-subscription"
|
|
351
366
|
// Codex subscription: the static apiKey is a placeholder — the real per-request
|
|
352
367
|
// bearer + ChatGPT-Account-ID, the /responses->/codex/responses rewrite, and the
|
|
@@ -358,7 +373,7 @@ export function buildProviderClient(provider: ResolvedModelProvider, settings: S
|
|
|
358
373
|
apiKey: provider.apiKey ?? "codex-subscription",
|
|
359
374
|
...(provider.baseUrl ? { baseURL: provider.baseUrl } : {}),
|
|
360
375
|
maxRetries: settings.openaiMaxRetries,
|
|
361
|
-
fetch: codexSubscriptionFetch(globalThis.fetch),
|
|
376
|
+
fetch: codexSubscriptionFetch(instrumentedModelFetch(provider.id, globalThis.fetch)),
|
|
362
377
|
})
|
|
363
378
|
// ResolvedModelProvider.apiKey is already the resolved key (configuredProviders
|
|
364
379
|
// ran resolveProviderApiKey at config time, collapsing apiKey/apiKeyEnv), so it
|
|
@@ -369,6 +384,7 @@ export function buildProviderClient(provider: ResolvedModelProvider, settings: S
|
|
|
369
384
|
maxRetries: settings.openaiMaxRetries,
|
|
370
385
|
...(provider.defaultQuery ? { defaultQuery: provider.defaultQuery } : {}),
|
|
371
386
|
...(provider.defaultHeaders ? { defaultHeaders: provider.defaultHeaders } : {}),
|
|
387
|
+
fetch: instrumentedModelFetch(provider.id, globalThis.fetch),
|
|
372
388
|
});
|
|
373
389
|
providerClientCache.set(provider.id, client);
|
|
374
390
|
return client;
|
|
@@ -521,6 +537,7 @@ export class CodexSubscriptionUnavailableError extends Error {
|
|
|
521
537
|
|
|
522
538
|
export function configureOpenAI(settings: Settings): void {
|
|
523
539
|
setOpenAIResponsesTransport(settings.openaiResponsesTransport);
|
|
540
|
+
setTracingDisabled(settings.disableOpenaiTracing || !settings.observabilityOtlpEndpoint);
|
|
524
541
|
// Install the registry-aware router as the process default model provider so a
|
|
525
542
|
// model name re-resolved on the SandboxAgent/Modal path (where a Model instance
|
|
526
543
|
// does not survive) routes to its provider instead of the built-in client.
|
|
@@ -540,6 +557,51 @@ export function configureOpenAI(settings: Settings): void {
|
|
|
540
557
|
setDefaultModelProvider(router);
|
|
541
558
|
}
|
|
542
559
|
|
|
560
|
+
function instrumentedModelFetch(provider: string, inner: typeof fetch): typeof fetch {
|
|
561
|
+
return (async (input: Parameters<typeof fetch>[0], init?: Parameters<typeof fetch>[1]) => {
|
|
562
|
+
if (!isModelCallFetch(input)) {
|
|
563
|
+
return await inner(input, init);
|
|
564
|
+
}
|
|
565
|
+
const started = performance.now();
|
|
566
|
+
try {
|
|
567
|
+
const response = await inner(input, init);
|
|
568
|
+
recordModelCallMetric(provider, response.ok ? "completed" : "failed", started);
|
|
569
|
+
return response;
|
|
570
|
+
} catch (error) {
|
|
571
|
+
recordModelCallMetric(provider, "failed", started);
|
|
572
|
+
throw error;
|
|
573
|
+
}
|
|
574
|
+
}) as typeof fetch;
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
function isModelCallFetch(input: Parameters<typeof fetch>[0]): boolean {
|
|
578
|
+
const rawUrl = typeof input === "string"
|
|
579
|
+
? input
|
|
580
|
+
: input instanceof URL
|
|
581
|
+
? input.toString()
|
|
582
|
+
: (input as { url?: unknown }).url;
|
|
583
|
+
if (typeof rawUrl !== "string" || rawUrl.length === 0) {
|
|
584
|
+
return false;
|
|
585
|
+
}
|
|
586
|
+
try {
|
|
587
|
+
const pathname = new URL(rawUrl, "http://opengeni.local").pathname;
|
|
588
|
+
return pathname.endsWith("/responses")
|
|
589
|
+
|| pathname.endsWith("/chat/completions")
|
|
590
|
+
|| pathname.endsWith("/codex/responses");
|
|
591
|
+
} catch {
|
|
592
|
+
return /\/(?:codex\/)?responses(?:\?|$)|\/chat\/completions(?:\?|$)/.test(rawUrl);
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
function recordModelCallMetric(provider: string, outcome: "completed" | "failed", started: number): void {
|
|
597
|
+
const durationSeconds = Math.max(0, (performance.now() - started) / 1000);
|
|
598
|
+
try {
|
|
599
|
+
runtimeMetricsHooks?.onModelCall?.({ provider, outcome, durationSeconds });
|
|
600
|
+
} catch {
|
|
601
|
+
// Metrics emission must never affect a model call.
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
|
|
543
605
|
/**
|
|
544
606
|
* Run the compaction summarizer as one plain, tool-less, non-streaming model
|
|
545
607
|
* call against the resolved provider. `input` is the active history plus
|
|
@@ -1982,7 +2044,6 @@ export async function runAgentStream(agent: Agent<any, any>, input: PreparedAgen
|
|
|
1982
2044
|
// every mid-turn follow-up.
|
|
1983
2045
|
callModelInputFilter,
|
|
1984
2046
|
};
|
|
1985
|
-
void settings.disableOpenaiTracing;
|
|
1986
2047
|
if (client) {
|
|
1987
2048
|
runOptions.sandbox = {
|
|
1988
2049
|
client,
|
package/src/metrics.ts
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export type RuntimeMetricsHooks = {
|
|
2
|
+
onModelCall?: (input: { provider: string; outcome: "completed" | "failed"; durationSeconds: number }) => void;
|
|
3
|
+
onSandboxCreate?: (input: { backend: string; outcome: "completed" | "failed"; durationSeconds: number }) => void;
|
|
4
|
+
onSandboxWarmingTimeout?: (input: { backend: string }) => void;
|
|
5
|
+
};
|
|
@@ -60,6 +60,22 @@ const PAINT_PROBE_INTERVAL_S = 0.2;
|
|
|
60
60
|
// framebuffer only scales the painted frame further above the floor.)
|
|
61
61
|
const PAINT_MIN_BYTES = 60_000;
|
|
62
62
|
|
|
63
|
+
// SETTLE gate (the gVisor staged-paint fix): crossing the 60 KB floor is necessary but
|
|
64
|
+
// NOT sufficient. On a fast runc host the paint is atomic (black 13.5 KB -> full 209 KB
|
|
65
|
+
// in one step, panel + icons included). On a STONE-COLD gVisor Modal box it is STAGED:
|
|
66
|
+
// the wallpaper gradient paints and crosses 60 KB a beat BEFORE xfdesktop draws the
|
|
67
|
+
// panel / launcher icons / logo. A screenshot in that window shows a bare teal wallpaper
|
|
68
|
+
// with no panel — which the model correctly reports as "graphical, but the desktop
|
|
69
|
+
// hasn't fully loaded" (VERIFIED live on staging: a cold-box turn's first agent
|
|
70
|
+
// screenshot caught exactly this). So the gate additionally waits for the frame to
|
|
71
|
+
// SETTLE: two consecutive probes both above the floor whose byte-sizes agree within
|
|
72
|
+
// PAINT_SETTLE_DELTA_BYTES. A still-painting desktop grows between probes; a fully
|
|
73
|
+
// rendered, static one is byte-stable (scrot -o omits the cursor, and the clock is
|
|
74
|
+
// minute-precision, so consecutive captures of a settled desktop are near-identical).
|
|
75
|
+
// This makes ensureDisplayStack block until the FULL desktop is up, so the turn's first
|
|
76
|
+
// screenshot — which runs AFTER this gate — sees the panel, not a bare wallpaper.
|
|
77
|
+
const PAINT_SETTLE_DELTA_BYTES = 2_000;
|
|
78
|
+
|
|
63
79
|
/** Desktop geometry for the framebuffer. v1 has no live RANDR: a resolution
|
|
64
80
|
* change is a full down -> up restart (a separate op). */
|
|
65
81
|
export type DesktopGeometry = {
|
|
@@ -197,17 +213,22 @@ export function buildDisplayStackScript(options: EnsureDisplayStackOptions = {})
|
|
|
197
213
|
`env ${env} opengeni-desktop-up; ` +
|
|
198
214
|
`fi`;
|
|
199
215
|
const paintProbe =
|
|
200
|
-
`p=/tmp/opengeni-desktop/paint-probe.png; ` +
|
|
216
|
+
`p=/tmp/opengeni-desktop/paint-probe.png; prev=0; ` +
|
|
201
217
|
`for i in $(seq 1 ${PAINT_PROBE_ATTEMPTS}); do ` +
|
|
202
218
|
// Capture, then measure the PNG byte-size. `wc -c < "$p"` yields a bare integer; a
|
|
203
219
|
// failed scrot leaves sz=0. A frame at/above PAINT_MIN_BYTES is a real painted desktop.
|
|
204
220
|
`if DISPLAY=:0 scrot -o "$p" >/dev/null 2>&1; then sz=$(wc -c < "$p" 2>/dev/null || echo 0); else sz=0; fi; ` +
|
|
205
221
|
`rm -f "$p"; ` +
|
|
206
|
-
|
|
222
|
+
// SETTLE: accept only when THIS probe AND the PREVIOUS one are both above the floor
|
|
223
|
+
// and their sizes agree within PAINT_SETTLE_DELTA_BYTES — i.e., the paint has stopped
|
|
224
|
+
// growing (the full desktop, panel + icons included, is up), not merely crossed the
|
|
225
|
+
// floor mid-paint on a staged gVisor boot. ($sz/$prev/$d are bare shell — no ${}
|
|
226
|
+
// braces — so JS leaves them for bash; ${PAINT_*} ARE JS constants and interpolate.)
|
|
227
|
+
`if [ "$sz" -ge ${PAINT_MIN_BYTES} ] && [ "$prev" -ge ${PAINT_MIN_BYTES} ]; then d=$((sz-prev)); [ "$d" -lt 0 ] && d=$((0-d)); [ "$d" -le ${PAINT_SETTLE_DELTA_BYTES} ] && break; fi; ` +
|
|
228
|
+
`prev=$sz; ` +
|
|
207
229
|
// NOTE: NOT_PAINTING goes to STDOUT (not stderr): Modal is execCommand-only, so the
|
|
208
230
|
// caller infers the outcome by string-matching the output — stdout is always captured.
|
|
209
|
-
|
|
210
|
-
`if [ "$i" = "${PAINT_PROBE_ATTEMPTS}" ]; then echo "OPENGENI_DESKTOP_NOT_PAINTING scrot below ${PAINT_MIN_BYTES}B after warmup (last=$sz)"; exit 14; fi; ` +
|
|
231
|
+
`if [ "$i" = "${PAINT_PROBE_ATTEMPTS}" ]; then echo "OPENGENI_DESKTOP_NOT_PAINTING scrot below ${PAINT_MIN_BYTES}B or unsettled after warmup (last=$sz)"; exit 14; fi; ` +
|
|
211
232
|
`sleep ${PAINT_PROBE_INTERVAL_S}; ` +
|
|
212
233
|
`done`;
|
|
213
234
|
return `mkdir -p /tmp/opengeni-desktop; { ${bringUp} ; } && { ${paintProbe} ; }`;
|
package/src/sandbox/index.ts
CHANGED
|
@@ -29,6 +29,7 @@ import type {
|
|
|
29
29
|
import { PROVIDER_REGISTRY } from "./providers";
|
|
30
30
|
import { SandboxConfigError } from "./errors";
|
|
31
31
|
import { isSelfhostedProviderNotFoundError } from "./selfhosted/session";
|
|
32
|
+
import type { RuntimeMetricsHooks } from "../metrics";
|
|
32
33
|
|
|
33
34
|
// Re-export the config-owned environment/port helpers from the leaf so the
|
|
34
35
|
// API-direct control plane can pull its full sandbox-construction surface from
|
|
@@ -660,6 +661,7 @@ export async function establishSandboxSessionFromEnvelope(
|
|
|
660
661
|
backendOverride?: SandboxBackend;
|
|
661
662
|
environment?: Record<string, string>;
|
|
662
663
|
onSandboxCreated?: SandboxCreatedCallback;
|
|
664
|
+
metrics?: RuntimeMetricsHooks;
|
|
663
665
|
},
|
|
664
666
|
): Promise<EstablishedSandboxSession> {
|
|
665
667
|
const envelopeBackend = typeof envelope?.backendId === "string" ? (envelope.backendId as SandboxBackend) : undefined;
|
|
@@ -706,7 +708,15 @@ export async function establishSandboxSessionFromEnvelope(
|
|
|
706
708
|
// SOLE archive-replay seam, shared by the NotFound warm-reattach path AND the
|
|
707
709
|
// cold-restore branch (b) below.
|
|
708
710
|
const coldRestore = async (resumeFallbackState?: unknown): Promise<EstablishedSandboxSession> => {
|
|
709
|
-
const
|
|
711
|
+
const createStarted = Date.now();
|
|
712
|
+
let restored: Awaited<ReturnType<NonNullable<typeof client.create>>>;
|
|
713
|
+
try {
|
|
714
|
+
restored = await client.create!({ manifest: createManifest });
|
|
715
|
+
recordSandboxCreateMetric(opts.metrics, client.backendId, "completed", createStarted);
|
|
716
|
+
} catch (error) {
|
|
717
|
+
recordSandboxCreateMetric(opts.metrics, client.backendId, "failed", createStarted);
|
|
718
|
+
throw error;
|
|
719
|
+
}
|
|
710
720
|
let restoredState = (restored as { state?: unknown }).state;
|
|
711
721
|
let established: EstablishedSandboxSession = {
|
|
712
722
|
client,
|
|
@@ -830,6 +840,23 @@ export async function establishSandboxSessionFromEnvelope(
|
|
|
830
840
|
return await coldRestore();
|
|
831
841
|
}
|
|
832
842
|
|
|
843
|
+
function recordSandboxCreateMetric(
|
|
844
|
+
metrics: RuntimeMetricsHooks | undefined,
|
|
845
|
+
backend: string,
|
|
846
|
+
outcome: "completed" | "failed",
|
|
847
|
+
startedMs: number,
|
|
848
|
+
): void {
|
|
849
|
+
try {
|
|
850
|
+
metrics?.onSandboxCreate?.({
|
|
851
|
+
backend,
|
|
852
|
+
outcome,
|
|
853
|
+
durationSeconds: Math.max(0, (Date.now() - startedMs) / 1000),
|
|
854
|
+
});
|
|
855
|
+
} catch {
|
|
856
|
+
// Metrics emission must not affect sandbox lifecycle.
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
|
|
833
860
|
// A client that can SERIALIZE a live session state back to the persistable
|
|
834
861
|
// envelope form (the inverse of deserializeSessionState). Narrowed so the leaf
|
|
835
862
|
// stays agent-loop-free.
|