@skillcap/gdh 0.13.3 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/INSTALL-BUNDLE.json +1 -1
- package/README.md +4 -4
- package/RELEASE-SPAN-UPDATE-CONTRACTS.json +79 -0
- package/node_modules/@gdh/adapters/package.json +8 -8
- package/node_modules/@gdh/authoring/package.json +2 -2
- package/node_modules/@gdh/cli/package.json +10 -10
- package/node_modules/@gdh/core/dist/index.d.ts +37 -2
- package/node_modules/@gdh/core/dist/index.d.ts.map +1 -1
- package/node_modules/@gdh/core/dist/index.js +2 -2
- package/node_modules/@gdh/core/dist/index.js.map +1 -1
- package/node_modules/@gdh/core/package.json +1 -1
- package/node_modules/@gdh/docs/package.json +2 -2
- package/node_modules/@gdh/mcp/package.json +8 -8
- package/node_modules/@gdh/observability/dist/runtime-bundles.d.ts.map +1 -1
- package/node_modules/@gdh/observability/dist/runtime-bundles.js +28 -2
- package/node_modules/@gdh/observability/dist/runtime-bundles.js.map +1 -1
- package/node_modules/@gdh/observability/package.json +2 -2
- package/node_modules/@gdh/runtime/dist/bridge-surface.js +173 -0
- package/node_modules/@gdh/runtime/dist/bridge-surface.js.map +1 -1
- package/node_modules/@gdh/runtime/dist/index.d.ts.map +1 -1
- package/node_modules/@gdh/runtime/dist/index.js +387 -17
- package/node_modules/@gdh/runtime/dist/index.js.map +1 -1
- package/node_modules/@gdh/runtime/package.json +2 -2
- package/node_modules/@gdh/scan/package.json +3 -3
- package/node_modules/@gdh/verify/dist/scenarios.d.ts +3 -1
- package/node_modules/@gdh/verify/dist/scenarios.d.ts.map +1 -1
- package/node_modules/@gdh/verify/dist/scenarios.js +425 -36
- package/node_modules/@gdh/verify/dist/scenarios.js.map +1 -1
- package/node_modules/@gdh/verify/package.json +7 -7
- package/package.json +11 -11
|
@@ -1,11 +1,21 @@
|
|
|
1
1
|
import fs from "node:fs/promises";
|
|
2
2
|
import path from "node:path";
|
|
3
|
-
import { GDH_RUNTIME_RUN_BUNDLE_VERSION, GDH_SCENARIO_SCHEMA_VERSION, } from "@gdh/core";
|
|
3
|
+
import { GDH_RUNTIME_RECIPE_RUN_VERSION, GDH_RUNTIME_RUN_BUNDLE_VERSION, GDH_SCENARIO_SCHEMA_VERSION, presentPublicRuntimeTerms, } from "@gdh/core";
|
|
4
4
|
import { computeRuntimeInputSignature, inspectRuntimeRunBundle, writeRuntimeRunBundle, } from "@gdh/observability";
|
|
5
|
-
import { inspectRuntimeRecipeRunState, runRuntimeRecipe, } from "@gdh/runtime";
|
|
5
|
+
import { checkRuntimeRecipe, createRuntimeBridgeManager, inspectRuntimeRecipeRunState, runRuntimeRecipe, } from "@gdh/runtime";
|
|
6
6
|
import { parse } from "yaml";
|
|
7
7
|
const PRIMARY_VERIFICATION_SCENARIO_DIRECTORY = ".gdh/verification-scenarios";
|
|
8
8
|
const LEGACY_SCENARIO_DIRECTORY = [".gdh", "scenarios"].join("/");
|
|
9
|
+
export const SUPPORTED_SCENARIO_RUNTIME_ASSERTION_WAITERS = [
|
|
10
|
+
"state.node_property.await",
|
|
11
|
+
"state.node_presence.await",
|
|
12
|
+
"state.signal.await",
|
|
13
|
+
];
|
|
14
|
+
const RENDERED_PROVIDER_RUNTIME_INCOMPATIBILITY_SIGNALS = [
|
|
15
|
+
"GLIBCXX_3.4.32",
|
|
16
|
+
"GLIBC_2.38",
|
|
17
|
+
"GDExtension",
|
|
18
|
+
];
|
|
9
19
|
export async function listRuntimeScenarios(input) {
|
|
10
20
|
const scenarios = await readScenarioDefinitions(input.targetPath);
|
|
11
21
|
const entries = scenarios.map((scenario) => ({
|
|
@@ -81,32 +91,21 @@ export async function runRuntimeVerificationScenario(input) {
|
|
|
81
91
|
});
|
|
82
92
|
for (let attemptNumber = 1; attemptNumber <= maxAttempts; attemptNumber += 1) {
|
|
83
93
|
const attemptPath = path.join(bundlePath, "attempts", `attempt-${attemptNumber}`);
|
|
84
|
-
const
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
provider: input.provider,
|
|
89
|
-
parameters: input.parameters,
|
|
90
|
-
enabledFeatures: input.enabledFeatures,
|
|
91
|
-
disabledFeatures: input.disabledFeatures,
|
|
92
|
-
environment: input.environment,
|
|
93
|
-
workspaceMode: input.workspaceMode,
|
|
94
|
-
screenshotCapture: input.screenshotCapture === "rendered" ||
|
|
95
|
-
scenario.artifactPolicy.screenshots === "rendered" ||
|
|
96
|
-
input.provider === "docker"
|
|
97
|
-
? "rendered"
|
|
98
|
-
: "never",
|
|
99
|
-
maxRuntimeSeconds: scenario.executionPolicy.maxRuntimeSeconds,
|
|
100
|
-
artifactDirectory: attemptPath,
|
|
94
|
+
const execution = await executeScenarioAttempt({
|
|
95
|
+
input,
|
|
96
|
+
scenario,
|
|
97
|
+
attemptPath,
|
|
101
98
|
});
|
|
102
|
-
const inspection =
|
|
99
|
+
const { recipeRun, inspection, runtimeAssertions } = execution;
|
|
103
100
|
const assertions = evaluateAssertions(scenario.assertions, inspection);
|
|
104
101
|
const feedback = deriveFeedback({
|
|
105
102
|
scenario,
|
|
106
103
|
recipeId: input.recipeId,
|
|
104
|
+
provider: input.provider,
|
|
107
105
|
recipeRun,
|
|
108
106
|
assertions,
|
|
109
107
|
inspection,
|
|
108
|
+
runtimeAssertions,
|
|
110
109
|
});
|
|
111
110
|
const attempt = buildAttemptResult({
|
|
112
111
|
attemptNumber,
|
|
@@ -114,6 +113,7 @@ export async function runRuntimeVerificationScenario(input) {
|
|
|
114
113
|
recipeRun,
|
|
115
114
|
inspection,
|
|
116
115
|
assertions,
|
|
116
|
+
runtimeAssertions,
|
|
117
117
|
feedback,
|
|
118
118
|
});
|
|
119
119
|
attempts.push(attempt);
|
|
@@ -196,6 +196,293 @@ async function persistProvisionalRunBundle(input) {
|
|
|
196
196
|
export async function inspectRuntimeVerificationBundleState(input) {
|
|
197
197
|
return inspectRuntimeRunBundle(input.targetPath, { bundleId: input.bundleId ?? null });
|
|
198
198
|
}
|
|
199
|
+
async function executeScenarioAttempt(input) {
|
|
200
|
+
if (input.scenario.runtimeAssertions.length === 0) {
|
|
201
|
+
return runPlainScenarioAttempt(input);
|
|
202
|
+
}
|
|
203
|
+
return runBridgeBackedScenarioAttempt(input);
|
|
204
|
+
}
|
|
205
|
+
async function runPlainScenarioAttempt(input) {
|
|
206
|
+
const recipeRun = await runRuntimeRecipe(buildScenarioAttemptRunInput(input.input, input.scenario, input.attemptPath));
|
|
207
|
+
return {
|
|
208
|
+
recipeRun,
|
|
209
|
+
inspection: await inspectRuntimeRecipeRunState(recipeRun),
|
|
210
|
+
runtimeAssertions: [],
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
async function runBridgeBackedScenarioAttempt(input) {
|
|
214
|
+
const runInput = buildScenarioAttemptRunInput(input.input, input.scenario, input.attemptPath);
|
|
215
|
+
const liveCheck = await checkRuntimeRecipe(runInput);
|
|
216
|
+
if (liveCheck.state !== "runnable" || liveCheck.recipe === null || liveCheck.launchPreview === null) {
|
|
217
|
+
return runPlainScenarioAttempt(input);
|
|
218
|
+
}
|
|
219
|
+
const bridgeManager = createRuntimeBridgeManager();
|
|
220
|
+
const startResult = await bridgeManager.startSession(runInput);
|
|
221
|
+
if (startResult.state !== "ready" || startResult.session === null) {
|
|
222
|
+
const recipeRun = buildBridgeStartFailureRecipeRun({
|
|
223
|
+
input: input.input,
|
|
224
|
+
scenario: input.scenario,
|
|
225
|
+
liveCheck,
|
|
226
|
+
startResult,
|
|
227
|
+
});
|
|
228
|
+
return {
|
|
229
|
+
recipeRun,
|
|
230
|
+
inspection: await inspectRuntimeRecipeRunState(recipeRun),
|
|
231
|
+
runtimeAssertions: [],
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
let stopResult = null;
|
|
235
|
+
try {
|
|
236
|
+
const runtimeAssertions = await runRuntimeAssertionsOverBridge({
|
|
237
|
+
bridgeManager,
|
|
238
|
+
sessionId: startResult.session.sessionId,
|
|
239
|
+
definitions: input.scenario.runtimeAssertions,
|
|
240
|
+
});
|
|
241
|
+
stopResult = await bridgeManager.stopSession(startResult.session.sessionId);
|
|
242
|
+
if (stopResult.state !== "stopped" || stopResult.session === null) {
|
|
243
|
+
const recipeRun = buildBridgeStopFailureRecipeRun({
|
|
244
|
+
input: input.input,
|
|
245
|
+
scenario: input.scenario,
|
|
246
|
+
liveCheck,
|
|
247
|
+
startResult,
|
|
248
|
+
stopResult,
|
|
249
|
+
});
|
|
250
|
+
return {
|
|
251
|
+
recipeRun,
|
|
252
|
+
inspection: await inspectRuntimeRecipeRunState(recipeRun),
|
|
253
|
+
runtimeAssertions,
|
|
254
|
+
};
|
|
255
|
+
}
|
|
256
|
+
const recipeRun = await buildBridgeBackedRecipeRun({
|
|
257
|
+
input: input.input,
|
|
258
|
+
scenario: input.scenario,
|
|
259
|
+
liveCheck,
|
|
260
|
+
startResult,
|
|
261
|
+
stopResult,
|
|
262
|
+
});
|
|
263
|
+
return {
|
|
264
|
+
recipeRun,
|
|
265
|
+
inspection: await inspectRuntimeRecipeRunState(recipeRun),
|
|
266
|
+
runtimeAssertions,
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
finally {
|
|
270
|
+
if (stopResult === null) {
|
|
271
|
+
await bridgeManager
|
|
272
|
+
.stopSession(startResult.session.sessionId)
|
|
273
|
+
.catch(async () => bridgeManager.stopAll().catch(() => { }));
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
function buildScenarioAttemptRunInput(input, scenario, attemptPath) {
|
|
278
|
+
const screenshotPolicy = scenario.artifactPolicy.screenshots;
|
|
279
|
+
const shouldCaptureRenderedScreenshot = input.screenshotCapture === "rendered" ||
|
|
280
|
+
screenshotPolicy === "rendered" ||
|
|
281
|
+
input.provider === "docker";
|
|
282
|
+
return {
|
|
283
|
+
targetPath: input.targetPath,
|
|
284
|
+
projectConfig: input.projectConfig,
|
|
285
|
+
recipeId: input.recipeId,
|
|
286
|
+
provider: input.provider,
|
|
287
|
+
parameters: input.parameters,
|
|
288
|
+
enabledFeatures: input.enabledFeatures,
|
|
289
|
+
disabledFeatures: input.disabledFeatures,
|
|
290
|
+
environment: input.environment,
|
|
291
|
+
workspaceMode: input.workspaceMode,
|
|
292
|
+
screenshotCapture: shouldCaptureRenderedScreenshot ? "rendered" : "never",
|
|
293
|
+
screenshotPolicy,
|
|
294
|
+
maxRuntimeSeconds: scenario.executionPolicy.maxRuntimeSeconds,
|
|
295
|
+
artifactDirectory: attemptPath,
|
|
296
|
+
};
|
|
297
|
+
}
|
|
298
|
+
async function runRuntimeAssertionsOverBridge(input) {
|
|
299
|
+
const results = [];
|
|
300
|
+
for (const definition of input.definitions) {
|
|
301
|
+
const invocation = await input.bridgeManager.invokeEntry(input.sessionId, definition.waiter, definition.input);
|
|
302
|
+
const result = buildRuntimeAssertionResult(definition, invocation);
|
|
303
|
+
results.push(result);
|
|
304
|
+
if (definition.required && result.status !== "passed") {
|
|
305
|
+
break;
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
return results;
|
|
309
|
+
}
|
|
310
|
+
function buildRuntimeAssertionResult(definition, invocation) {
|
|
311
|
+
const reasons = [...invocation.reasons];
|
|
312
|
+
if ((invocation.state === "ok" || invocation.state === "unavailable") &&
|
|
313
|
+
invocation.waiterEvidence === null) {
|
|
314
|
+
reasons.push("waiter_evidence_missing");
|
|
315
|
+
}
|
|
316
|
+
const status = invocation.state === "ok" && invocation.waiterEvidence?.outcome === "satisfied"
|
|
317
|
+
? "passed"
|
|
318
|
+
: invocation.state === "unavailable" && invocation.waiterEvidence?.outcome === "timed_out"
|
|
319
|
+
? "blocked"
|
|
320
|
+
: invocation.state === "unavailable"
|
|
321
|
+
? "blocked"
|
|
322
|
+
: "failed";
|
|
323
|
+
return {
|
|
324
|
+
id: definition.id,
|
|
325
|
+
summary: definition.summary,
|
|
326
|
+
required: definition.required,
|
|
327
|
+
waiter: definition.waiter,
|
|
328
|
+
input: definition.input,
|
|
329
|
+
status,
|
|
330
|
+
state: invocation.state,
|
|
331
|
+
reasons: dedupe(reasons),
|
|
332
|
+
result: invocation.result,
|
|
333
|
+
waiterEvidence: invocation.waiterEvidence,
|
|
334
|
+
transcriptPath: invocation.transcriptPath,
|
|
335
|
+
};
|
|
336
|
+
}
|
|
337
|
+
function buildBridgeStartFailureRecipeRun(input) {
|
|
338
|
+
return {
|
|
339
|
+
version: GDH_RUNTIME_RECIPE_RUN_VERSION,
|
|
340
|
+
targetPath: input.input.targetPath,
|
|
341
|
+
recipeId: input.input.recipeId,
|
|
342
|
+
state: input.startResult.state === "blocked" ? "blocked" : "failed",
|
|
343
|
+
summary: input.startResult.summary,
|
|
344
|
+
reasons: input.startResult.reasons,
|
|
345
|
+
recipe: input.liveCheck.recipe,
|
|
346
|
+
check: input.liveCheck,
|
|
347
|
+
session: null,
|
|
348
|
+
startedAt: null,
|
|
349
|
+
finishedAt: new Date().toISOString(),
|
|
350
|
+
exitCode: null,
|
|
351
|
+
launchCommand: input.startResult.launchCommand ?? input.liveCheck.launchPreview?.command ?? null,
|
|
352
|
+
screenshot: buildBridgeBackedScreenshotResult(input.scenario.artifactPolicy.screenshots),
|
|
353
|
+
artifacts: [],
|
|
354
|
+
statusPromotion: "none",
|
|
355
|
+
};
|
|
356
|
+
}
|
|
357
|
+
function buildBridgeStopFailureRecipeRun(input) {
|
|
358
|
+
const artifactDirectory = input.startResult.session?.artifactDirectory ?? null;
|
|
359
|
+
const transcriptPath = input.stopResult.transcriptPath ?? null;
|
|
360
|
+
return {
|
|
361
|
+
version: GDH_RUNTIME_RECIPE_RUN_VERSION,
|
|
362
|
+
targetPath: input.input.targetPath,
|
|
363
|
+
recipeId: input.input.recipeId,
|
|
364
|
+
state: "failed",
|
|
365
|
+
summary: input.stopResult.summary,
|
|
366
|
+
reasons: input.stopResult.reasons.length > 0 ? input.stopResult.reasons : ["bridge_session_stop_failed"],
|
|
367
|
+
recipe: input.liveCheck.recipe,
|
|
368
|
+
check: input.liveCheck,
|
|
369
|
+
session: null,
|
|
370
|
+
startedAt: input.startResult.session?.startedAt ?? null,
|
|
371
|
+
finishedAt: new Date().toISOString(),
|
|
372
|
+
exitCode: input.stopResult.exitCode,
|
|
373
|
+
launchCommand: input.startResult.launchCommand ?? input.liveCheck.launchPreview?.command ?? null,
|
|
374
|
+
screenshot: buildBridgeBackedScreenshotResult(input.scenario.artifactPolicy.screenshots),
|
|
375
|
+
artifacts: buildBridgeBackedArtifacts({
|
|
376
|
+
artifactDirectory,
|
|
377
|
+
transcriptPath,
|
|
378
|
+
reportPath: null,
|
|
379
|
+
}),
|
|
380
|
+
statusPromotion: "none",
|
|
381
|
+
};
|
|
382
|
+
}
|
|
383
|
+
async function buildBridgeBackedRecipeRun(input) {
|
|
384
|
+
const artifactDirectory = input.stopResult.session?.artifactDirectory ?? input.startResult.session?.artifactDirectory ?? null;
|
|
385
|
+
const reportPath = artifactDirectory === null ? null : path.join(artifactDirectory, "report.json");
|
|
386
|
+
const recipeRun = {
|
|
387
|
+
version: GDH_RUNTIME_RECIPE_RUN_VERSION,
|
|
388
|
+
targetPath: input.input.targetPath,
|
|
389
|
+
recipeId: input.input.recipeId,
|
|
390
|
+
state: "passed",
|
|
391
|
+
summary: `Runtime recipe "${input.input.recipeId}" ran successfully through a bridge-backed verification session.`,
|
|
392
|
+
reasons: [],
|
|
393
|
+
recipe: input.liveCheck.recipe,
|
|
394
|
+
check: input.liveCheck,
|
|
395
|
+
session: input.stopResult.session === null
|
|
396
|
+
? null
|
|
397
|
+
: {
|
|
398
|
+
launchMode: "gdh_launch",
|
|
399
|
+
workspaceMode: input.stopResult.session.workspaceMode,
|
|
400
|
+
targetPath: input.input.targetPath,
|
|
401
|
+
workingCopyPath: input.stopResult.session.workingCopyPath,
|
|
402
|
+
},
|
|
403
|
+
startedAt: input.startResult.session?.startedAt ?? null,
|
|
404
|
+
finishedAt: input.stopResult.session?.finishedAt ?? new Date().toISOString(),
|
|
405
|
+
exitCode: input.stopResult.exitCode,
|
|
406
|
+
launchCommand: input.startResult.launchCommand ?? input.liveCheck.launchPreview?.command ?? null,
|
|
407
|
+
screenshot: buildBridgeBackedScreenshotResult(input.scenario.artifactPolicy.screenshots),
|
|
408
|
+
artifacts: buildBridgeBackedArtifacts({
|
|
409
|
+
artifactDirectory,
|
|
410
|
+
transcriptPath: input.stopResult.transcriptPath,
|
|
411
|
+
reportPath,
|
|
412
|
+
}),
|
|
413
|
+
statusPromotion: input.liveCheck.recipe?.status === "draft" ? "candidate_observed" : "none",
|
|
414
|
+
};
|
|
415
|
+
if (reportPath !== null) {
|
|
416
|
+
await fs.mkdir(path.dirname(reportPath), { recursive: true });
|
|
417
|
+
await fs.writeFile(reportPath, `${JSON.stringify(presentPublicRuntimeTerms(recipeRun), null, 2)}\n`, "utf8");
|
|
418
|
+
}
|
|
419
|
+
return recipeRun;
|
|
420
|
+
}
|
|
421
|
+
function buildBridgeBackedArtifacts(input) {
|
|
422
|
+
if (input.artifactDirectory === null) {
|
|
423
|
+
return input.transcriptPath === null
|
|
424
|
+
? []
|
|
425
|
+
: [
|
|
426
|
+
{
|
|
427
|
+
id: "bridge-transcript",
|
|
428
|
+
path: input.transcriptPath,
|
|
429
|
+
description: "Persisted bridge-session transcript captured during runtime-assertion execution.",
|
|
430
|
+
},
|
|
431
|
+
];
|
|
432
|
+
}
|
|
433
|
+
const artifacts = [
|
|
434
|
+
{
|
|
435
|
+
id: "launch",
|
|
436
|
+
path: path.join(input.artifactDirectory, "launch.json"),
|
|
437
|
+
description: "Assembled launch preview used for the bridge-backed runtime session.",
|
|
438
|
+
},
|
|
439
|
+
{
|
|
440
|
+
id: "stdout",
|
|
441
|
+
path: path.join(input.artifactDirectory, "stdout.log"),
|
|
442
|
+
description: "Captured standard output from the bridge-backed runtime session.",
|
|
443
|
+
},
|
|
444
|
+
{
|
|
445
|
+
id: "stderr",
|
|
446
|
+
path: path.join(input.artifactDirectory, "stderr.log"),
|
|
447
|
+
description: "Captured standard error from the bridge-backed runtime session.",
|
|
448
|
+
},
|
|
449
|
+
];
|
|
450
|
+
if (input.transcriptPath !== null) {
|
|
451
|
+
artifacts.push({
|
|
452
|
+
id: "bridge-transcript",
|
|
453
|
+
path: input.transcriptPath,
|
|
454
|
+
description: "Persisted bridge-session transcript captured during runtime-assertion execution.",
|
|
455
|
+
});
|
|
456
|
+
}
|
|
457
|
+
if (input.reportPath !== null) {
|
|
458
|
+
artifacts.push({
|
|
459
|
+
id: "report",
|
|
460
|
+
path: input.reportPath,
|
|
461
|
+
description: "Structured runtime recipe report for this bridge-backed execution.",
|
|
462
|
+
});
|
|
463
|
+
}
|
|
464
|
+
return artifacts;
|
|
465
|
+
}
|
|
466
|
+
function buildBridgeBackedScreenshotResult(screenshots) {
|
|
467
|
+
if (screenshots === "rendered") {
|
|
468
|
+
return {
|
|
469
|
+
requested: true,
|
|
470
|
+
state: "unavailable",
|
|
471
|
+
summary: "Bridge-backed runtime assertions do not capture screenshots yet; waiter evidence remains the durable state-first proof.",
|
|
472
|
+
reason: "bridge_backed_runtime_assertions_do_not_capture_screenshots",
|
|
473
|
+
imagePath: null,
|
|
474
|
+
metadataPath: null,
|
|
475
|
+
};
|
|
476
|
+
}
|
|
477
|
+
return {
|
|
478
|
+
requested: false,
|
|
479
|
+
state: "omitted",
|
|
480
|
+
summary: "Screenshot capture was not requested for this scenario.",
|
|
481
|
+
reason: null,
|
|
482
|
+
imagePath: null,
|
|
483
|
+
metadataPath: null,
|
|
484
|
+
};
|
|
485
|
+
}
|
|
199
486
|
async function readScenarioDefinitions(targetPath) {
|
|
200
487
|
const scenariosDirectory = await resolveScenarioDirectory(targetPath, PRIMARY_VERIFICATION_SCENARIO_DIRECTORY, LEGACY_SCENARIO_DIRECTORY);
|
|
201
488
|
const entries = await fs.readdir(scenariosDirectory, { withFileTypes: true }).catch(() => []);
|
|
@@ -214,7 +501,7 @@ async function loadScenarioDefinition(targetPath, scenarioId) {
|
|
|
214
501
|
const scenarios = await readScenarioDefinitions(targetPath);
|
|
215
502
|
return scenarios.find((entry) => entry.id === scenarioId) ?? null;
|
|
216
503
|
}
|
|
217
|
-
function parseScenarioDefinition(content) {
|
|
504
|
+
export function parseScenarioDefinition(content) {
|
|
218
505
|
const parsed = toRecord(parse(content));
|
|
219
506
|
const parsedVersion = readNumber(parsed["version"]);
|
|
220
507
|
const screenshotPolicy = readScenarioScreenshotPolicy(toRecord(parsed["artifact_policy"])["screenshots"]);
|
|
@@ -238,6 +525,7 @@ function parseScenarioDefinition(content) {
|
|
|
238
525
|
expected: coerceJsonValue(entry["expected"]),
|
|
239
526
|
required: readBoolean(entry["required"], true),
|
|
240
527
|
})),
|
|
528
|
+
runtimeAssertions: parseScenarioRuntimeAssertions(parsed["runtime_assertions"]),
|
|
241
529
|
artifactPolicy: {
|
|
242
530
|
captureState: readBoolean(toRecord(parsed["artifact_policy"])["capture_state"], true),
|
|
243
531
|
screenshots: screenshotPolicy,
|
|
@@ -251,6 +539,33 @@ function parseScenarioDefinition(content) {
|
|
|
251
539
|
doneRelevance: readStringArray(parsed["done_relevance"]),
|
|
252
540
|
};
|
|
253
541
|
}
|
|
542
|
+
function parseScenarioRuntimeAssertions(value) {
|
|
543
|
+
return readObjectArray(value).map((entry, index) => {
|
|
544
|
+
const id = readString(entry["id"]);
|
|
545
|
+
const summary = readString(entry["summary"]);
|
|
546
|
+
const waiter = readString(entry["waiter"]);
|
|
547
|
+
const input = entry["input"];
|
|
548
|
+
if (id.length === 0) {
|
|
549
|
+
throw new Error(`Verification scenario runtime_assertions[${index}].id must be a non-empty string.`);
|
|
550
|
+
}
|
|
551
|
+
if (summary.length === 0) {
|
|
552
|
+
throw new Error(`Verification scenario runtime_assertions[${index}].summary must be a non-empty string.`);
|
|
553
|
+
}
|
|
554
|
+
if (!SUPPORTED_SCENARIO_RUNTIME_ASSERTION_WAITERS.includes(waiter)) {
|
|
555
|
+
throw new Error(`Verification scenario runtime_assertions[${index}].waiter must be one of ${SUPPORTED_SCENARIO_RUNTIME_ASSERTION_WAITERS.join(", ")}.`);
|
|
556
|
+
}
|
|
557
|
+
if (!isJsonRecord(input)) {
|
|
558
|
+
throw new Error(`Verification scenario runtime_assertions[${index}].input must be a JSON object.`);
|
|
559
|
+
}
|
|
560
|
+
return {
|
|
561
|
+
id,
|
|
562
|
+
summary,
|
|
563
|
+
waiter: waiter,
|
|
564
|
+
input: coerceJsonRecord(input),
|
|
565
|
+
required: readBoolean(entry["required"], true),
|
|
566
|
+
};
|
|
567
|
+
});
|
|
568
|
+
}
|
|
254
569
|
async function resolveScenarioDirectory(targetPath, primaryRelativePath, legacyRelativePath) {
|
|
255
570
|
const primaryDirectory = path.join(targetPath, primaryRelativePath);
|
|
256
571
|
if (await directoryExists(primaryDirectory)) {
|
|
@@ -323,7 +638,7 @@ function evaluateAssertions(assertions, inspection) {
|
|
|
323
638
|
});
|
|
324
639
|
}
|
|
325
640
|
function buildAttemptResult(input) {
|
|
326
|
-
const outcome = classifyAttemptOutcome(input.recipeRun, input.assertions);
|
|
641
|
+
const outcome = classifyAttemptOutcome(input.recipeRun, input.assertions, input.runtimeAssertions);
|
|
327
642
|
const startedAt = input.recipeRun.startedAt ?? input.recipeRun.finishedAt;
|
|
328
643
|
return {
|
|
329
644
|
attemptNumber: input.attemptNumber,
|
|
@@ -339,6 +654,11 @@ function buildAttemptResult(input) {
|
|
|
339
654
|
: `Verification scenario "${input.scenario.id}" failed on attempt ${input.attemptNumber}.`,
|
|
340
655
|
reasons: dedupe([
|
|
341
656
|
...input.recipeRun.reasons,
|
|
657
|
+
...input.runtimeAssertions
|
|
658
|
+
.filter((entry) => entry.status !== "passed")
|
|
659
|
+
.flatMap((entry) => entry.reasons.length > 0
|
|
660
|
+
? entry.reasons.map((reason) => `${entry.id}:${reason}`)
|
|
661
|
+
: [`${entry.id}:${entry.status}`]),
|
|
342
662
|
...input.assertions
|
|
343
663
|
.filter((entry) => entry.status !== "passed")
|
|
344
664
|
.map((entry) => `${entry.id}:${entry.reason ?? entry.status}`),
|
|
@@ -346,16 +666,23 @@ function buildAttemptResult(input) {
|
|
|
346
666
|
recipeRun: input.recipeRun,
|
|
347
667
|
inspection: input.inspection,
|
|
348
668
|
assertions: input.assertions,
|
|
669
|
+
runtimeAssertions: input.runtimeAssertions,
|
|
349
670
|
feedback: input.feedback,
|
|
350
671
|
};
|
|
351
672
|
}
|
|
352
|
-
function classifyAttemptOutcome(recipeRun, assertions) {
|
|
673
|
+
function classifyAttemptOutcome(recipeRun, assertions, runtimeAssertions) {
|
|
353
674
|
if (recipeRun.state === "blocked") {
|
|
354
675
|
return "blocked";
|
|
355
676
|
}
|
|
356
677
|
if (recipeRun.state === "failed") {
|
|
357
678
|
return "failed";
|
|
358
679
|
}
|
|
680
|
+
if (runtimeAssertions.some((entry) => entry.required && entry.status === "failed")) {
|
|
681
|
+
return "failed";
|
|
682
|
+
}
|
|
683
|
+
if (runtimeAssertions.some((entry) => entry.required && entry.status === "blocked")) {
|
|
684
|
+
return "blocked";
|
|
685
|
+
}
|
|
359
686
|
if (assertions.some((entry) => entry.required && entry.status === "blocked")) {
|
|
360
687
|
return "blocked";
|
|
361
688
|
}
|
|
@@ -412,11 +739,27 @@ function buildBundleArtifacts(bundlePath, attempts, feedback) {
|
|
|
412
739
|
path: artifact.path,
|
|
413
740
|
description: `Attempt ${attempt.attemptNumber}: ${artifact.description}`,
|
|
414
741
|
})));
|
|
742
|
+
const knownTranscriptPaths = new Set(attempt.recipeRun.artifacts
|
|
743
|
+
.filter((artifact) => artifact.id === "bridge-transcript")
|
|
744
|
+
.map((artifact) => artifact.path));
|
|
745
|
+
for (const runtimeAssertion of attempt.runtimeAssertions) {
|
|
746
|
+
if (runtimeAssertion.transcriptPath === null ||
|
|
747
|
+
knownTranscriptPaths.has(runtimeAssertion.transcriptPath)) {
|
|
748
|
+
continue;
|
|
749
|
+
}
|
|
750
|
+
artifacts.push({
|
|
751
|
+
id: `attempt-${attempt.attemptNumber}-${runtimeAssertion.id}-transcript`,
|
|
752
|
+
path: runtimeAssertion.transcriptPath,
|
|
753
|
+
description: `Attempt ${attempt.attemptNumber}: bridge transcript referenced by runtime assertion "${runtimeAssertion.id}".`,
|
|
754
|
+
});
|
|
755
|
+
knownTranscriptPaths.add(runtimeAssertion.transcriptPath);
|
|
756
|
+
}
|
|
415
757
|
}
|
|
416
758
|
return artifacts;
|
|
417
759
|
}
|
|
418
760
|
function deriveFeedback(input) {
|
|
419
761
|
const feedback = [];
|
|
762
|
+
const renderedScreenshotVerification = isRenderedScreenshotVerification(input.scenario);
|
|
420
763
|
if (input.scenario.recipeId !== input.recipeId) {
|
|
421
764
|
feedback.push({
|
|
422
765
|
id: "scenario-recipe-mismatch",
|
|
@@ -450,19 +793,24 @@ function deriveFeedback(input) {
|
|
|
450
793
|
id: "rendered-screenshot-unavailable",
|
|
451
794
|
attribution: "recipe_issue",
|
|
452
795
|
code: "rendered_screenshot_unavailable",
|
|
453
|
-
summary: "
|
|
454
|
-
details:
|
|
455
|
-
input.
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
796
|
+
summary: "Rendered screenshot capture was requested, but this verification run did not produce usable screenshot evidence.",
|
|
797
|
+
details: [
|
|
798
|
+
`provider:${input.provider}`,
|
|
799
|
+
`screenshot_state:${input.recipeRun.screenshot.state}`,
|
|
800
|
+
...(input.recipeRun.screenshot.reason === null
|
|
801
|
+
? []
|
|
802
|
+
: [`screenshot_reason:${input.recipeRun.screenshot.reason}`]),
|
|
803
|
+
],
|
|
804
|
+
});
|
|
805
|
+
}
|
|
806
|
+
const renderedProviderRuntimeSignals = findRenderedProviderRuntimeIncompatibilitySignals(input.inspection);
|
|
807
|
+
if (renderedScreenshotVerification && renderedProviderRuntimeSignals.length > 0) {
|
|
808
|
+
feedback.push({
|
|
809
|
+
id: "rendered-provider-runtime-incompatible",
|
|
810
|
+
attribution: "recipe_issue",
|
|
811
|
+
code: "rendered_provider_runtime_incompatible",
|
|
812
|
+
summary: "Rendered verification stderr shows provider/runtime incompatibility signals that can block screenshot proof.",
|
|
813
|
+
details: renderedProviderRuntimeSignals,
|
|
466
814
|
});
|
|
467
815
|
}
|
|
468
816
|
const blockedAssertions = input.assertions.filter((entry) => entry.status === "blocked");
|
|
@@ -485,6 +833,26 @@ function deriveFeedback(input) {
|
|
|
485
833
|
details: failedAssertions.map((entry) => entry.id),
|
|
486
834
|
});
|
|
487
835
|
}
|
|
836
|
+
const blockedRuntimeAssertions = input.runtimeAssertions.filter((entry) => entry.required && entry.status === "blocked");
|
|
837
|
+
if (blockedRuntimeAssertions.length > 0) {
|
|
838
|
+
feedback.push({
|
|
839
|
+
id: "runtime-assertions-blocked",
|
|
840
|
+
attribution: "scenario_issue",
|
|
841
|
+
code: "runtime_assertion_blocked",
|
|
842
|
+
summary: "One or more required runtime assertions timed out or were unavailable during scenario execution.",
|
|
843
|
+
details: blockedRuntimeAssertions.map((entry) => `${entry.id}:${entry.reasons.join("|") || entry.status}`),
|
|
844
|
+
});
|
|
845
|
+
}
|
|
846
|
+
const failedRuntimeAssertions = input.runtimeAssertions.filter((entry) => entry.required && entry.status === "failed");
|
|
847
|
+
if (failedRuntimeAssertions.length > 0) {
|
|
848
|
+
feedback.push({
|
|
849
|
+
id: "runtime-assertions-failed",
|
|
850
|
+
attribution: "gdh_gap",
|
|
851
|
+
code: "runtime_assertion_failed",
|
|
852
|
+
summary: "One or more required runtime assertions failed before GDH could record satisfied waiter evidence.",
|
|
853
|
+
details: failedRuntimeAssertions.map((entry) => `${entry.id}:${entry.reasons.join("|") || entry.status}`),
|
|
854
|
+
});
|
|
855
|
+
}
|
|
488
856
|
const assertedSources = new Set(input.scenario.assertions.map((entry) => entry.source));
|
|
489
857
|
const missingSources = input.inspection.filter((entry) => entry.state !== "available" && assertedSources.has(entry.id));
|
|
490
858
|
if (missingSources.length > 0) {
|
|
@@ -498,6 +866,18 @@ function deriveFeedback(input) {
|
|
|
498
866
|
}
|
|
499
867
|
return dedupeFeedback(feedback);
|
|
500
868
|
}
|
|
869
|
+
function isRenderedScreenshotVerification(scenario) {
|
|
870
|
+
return (scenario.artifactPolicy.screenshots === "rendered" &&
|
|
871
|
+
scenario.runtimeAssertions.length === 0);
|
|
872
|
+
}
|
|
873
|
+
function findRenderedProviderRuntimeIncompatibilitySignals(inspection) {
|
|
874
|
+
const stderrText = inspection.find((entry) => entry.id === "stderr_text");
|
|
875
|
+
const stderrValue = stderrText?.value;
|
|
876
|
+
if (stderrText?.state !== "available" || typeof stderrValue !== "string") {
|
|
877
|
+
return [];
|
|
878
|
+
}
|
|
879
|
+
return RENDERED_PROVIDER_RUNTIME_INCOMPATIBILITY_SIGNALS.filter((signal) => stderrValue.includes(signal));
|
|
880
|
+
}
|
|
501
881
|
function classifyReasons(reasons) {
|
|
502
882
|
if (reasons.some((reason) => reason.includes("godot_editor_not_configured") ||
|
|
503
883
|
reason.includes("missing_environment") ||
|
|
@@ -546,6 +926,7 @@ async function buildMissingScenarioBundle(input) {
|
|
|
546
926
|
setupSteps: [],
|
|
547
927
|
actions: [],
|
|
548
928
|
assertions: [],
|
|
929
|
+
runtimeAssertions: [],
|
|
549
930
|
artifactPolicy: {
|
|
550
931
|
captureState: true,
|
|
551
932
|
screenshots: "never",
|
|
@@ -685,6 +1066,14 @@ function readStringArray(value) {
|
|
|
685
1066
|
function readObjectArray(value) {
|
|
686
1067
|
return Array.isArray(value) ? value.map((entry) => toRecord(entry)) : [];
|
|
687
1068
|
}
|
|
1069
|
+
function isJsonRecord(value) {
|
|
1070
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
1071
|
+
}
|
|
1072
|
+
function coerceJsonRecord(value) {
|
|
1073
|
+
return isJsonRecord(value)
|
|
1074
|
+
? coerceJsonValue(value)
|
|
1075
|
+
: {};
|
|
1076
|
+
}
|
|
688
1077
|
function coerceJsonValue(value) {
|
|
689
1078
|
if (value === null ||
|
|
690
1079
|
typeof value === "string" ||
|