@riddledc/riddle-proof 0.8.28 → 0.8.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/codex-exec-agent.cjs +8 -5
- package/dist/adapters/codex-exec-agent.js +1 -1
- package/dist/adapters/codex.cjs +8 -5
- package/dist/adapters/codex.js +1 -1
- package/dist/adapters/local-agent.cjs +8 -5
- package/dist/adapters/local-agent.js +1 -1
- package/dist/advanced/engine-harness.cjs +56 -1
- package/dist/advanced/engine-harness.js +1 -1
- package/dist/advanced/index.cjs +56 -1
- package/dist/advanced/index.js +2 -2
- package/dist/advanced/proof-run-engine.cjs +56 -1
- package/dist/advanced/proof-run-engine.js +1 -1
- package/dist/{chunk-YC77HZVF.js → chunk-32RE64IO.js} +56 -1
- package/dist/{chunk-4PPJKW3Z.js → chunk-73EBR3YL.js} +8 -5
- package/dist/{chunk-27BG64ZG.js → chunk-XJA2GDVN.js} +2 -2
- package/dist/cli/index.js +3 -3
- package/dist/cli.cjs +64 -6
- package/dist/cli.js +3 -3
- package/dist/codex-exec-agent.cjs +8 -5
- package/dist/codex-exec-agent.js +1 -1
- package/dist/engine-harness.cjs +56 -1
- package/dist/engine-harness.js +1 -1
- package/dist/index.cjs +64 -6
- package/dist/index.js +2 -2
- package/dist/local-agent.cjs +8 -5
- package/dist/local-agent.js +1 -1
- package/dist/proof-run-engine.cjs +56 -1
- package/dist/proof-run-engine.js +1 -1
- package/package.json +1 -1
- package/runtime/lib/author.py +11 -3
- package/runtime/lib/setup.py +72 -1
- package/runtime/lib/verify.py +13 -1
- package/runtime/tests/recon_verify_smoke.py +31 -3
- /package/dist/{chunk-AM3K5FPW.js → chunk-UWO4YR7I.js} +0 -0
package/dist/cli.cjs
CHANGED
|
@@ -2684,6 +2684,60 @@ ${implementRes.stderr || ""}`;
|
|
|
2684
2684
|
verifyRes = runOne("verify");
|
|
2685
2685
|
executed.push(executedStep(verifyRes));
|
|
2686
2686
|
if (!verifyRes.ok || verifyRes.haltedForApproval) {
|
|
2687
|
+
const failedVerifyState = readState(config.statePath);
|
|
2688
|
+
const failedVerifyStatus = failedVerifyState?.verify_status || "";
|
|
2689
|
+
if (!verifyRes.haltedForApproval && (failedVerifyStatus === "capture_incomplete" || failedVerifyStatus === "capture_error")) {
|
|
2690
|
+
const verifyDecisionRequest2 = failedVerifyState?.verify_decision_request || null;
|
|
2691
|
+
const captureQuality = verifyDecisionRequest2?.capture_quality || {};
|
|
2692
|
+
const conclusiveVerifyBlockers2 = proofAssessmentHardBlockersForState({
|
|
2693
|
+
...failedVerifyState,
|
|
2694
|
+
structured_interaction_capture_failure_summary: stringValue(verifyDecisionRequest2?.structured_interaction_capture_failure_summary) || stringValue(failedVerifyState?.structured_interaction_capture_failure_summary) || void 0,
|
|
2695
|
+
structured_interaction_failure_summary: stringValue(verifyDecisionRequest2?.structured_interaction_failure_summary) || stringValue(failedVerifyState?.structured_interaction_failure_summary) || void 0
|
|
2696
|
+
});
|
|
2697
|
+
const structuredInteractionFailureSummary2 = stringValue(verifyDecisionRequest2?.structured_interaction_capture_failure_summary) || stringValue(verifyDecisionRequest2?.structured_interaction_failure_summary) || stringValue(failedVerifyState?.structured_interaction_capture_failure_summary) || stringValue(failedVerifyState?.structured_interaction_failure_summary) || stringValue(conclusiveVerifyBlockers2[0]);
|
|
2698
|
+
const captureTerminalBlocker = failedVerifyStatus === "capture_error" || conclusiveVerifyBlockers2.length > 0 || Boolean(structuredInteractionFailureSummary2) || captureQuality?.terminal_blocker === true || captureQuality?.blocking === true;
|
|
2699
|
+
const terminalCaptureQualitySummary = captureQuality?.terminal_blocker === true || captureQuality?.blocking === true ? stringValue(captureQuality?.summary) : "";
|
|
2700
|
+
const checkpointName = captureTerminalBlocker ? "verify_capture_blocked" : "verify_capture_retry";
|
|
2701
|
+
const summary = terminalCaptureQualitySummary || structuredInteractionFailureSummary2 || stringValue(verifyDecisionRequest2?.summary) || stringValue(failedVerifyState?.verify_summary) || stringValue(failedVerifyState?.proof_summary) || stringValue(verifyRes.error) || "Verify capture failed before the evidence could be judged.";
|
|
2702
|
+
const failedVerifyDetails = {
|
|
2703
|
+
executed,
|
|
2704
|
+
verifyStatus: failedVerifyStatus,
|
|
2705
|
+
verifySummary: failedVerifyState?.verify_summary || failedVerifyState?.proof_summary || null,
|
|
2706
|
+
afterCdn: failedVerifyState?.after_cdn || null,
|
|
2707
|
+
mergeRecommendation: failedVerifyState?.merge_recommendation || null,
|
|
2708
|
+
verifyDecisionRequest: verifyDecisionRequest2,
|
|
2709
|
+
conclusiveVerifyBlockers: conclusiveVerifyBlockers2,
|
|
2710
|
+
verifyError: verifyRes.error || null,
|
|
2711
|
+
verifyStdout: verifyRes.stdout || "",
|
|
2712
|
+
verifyStderr: verifyRes.stderr || ""
|
|
2713
|
+
};
|
|
2714
|
+
recordAttempt("verify", "checkpoint", summary, {
|
|
2715
|
+
autoApproved: verifyRes.autoApproved || false,
|
|
2716
|
+
checkpoint: checkpointName,
|
|
2717
|
+
error: verifyRes.error || null,
|
|
2718
|
+
details: failedVerifyDetails
|
|
2719
|
+
});
|
|
2720
|
+
return checkpoint(
|
|
2721
|
+
"verify",
|
|
2722
|
+
checkpointName,
|
|
2723
|
+
summary,
|
|
2724
|
+
{
|
|
2725
|
+
ok: true,
|
|
2726
|
+
nextActions: captureTerminalBlocker ? ["inspect_after_capture", "report_specific_browser_evidence_blocker", "start_a_new_run_after_the_product_or_script_is_fixed"] : ["inspect_after_capture", "continue_internal_loop_with_checkpoint", "return_to_recon_if_baseline_is_wrong"],
|
|
2727
|
+
advanceOptions: needsImplementation ? ["author", "implement", "ship", "verify", "recon"] : ["author", "verify", "recon"],
|
|
2728
|
+
recommendedAdvanceStage: captureTerminalBlocker ? null : verifyDecisionRequest2?.recommended_stage || verifyDecisionRequest2?.continue_with_stage || "author",
|
|
2729
|
+
continueWithStage: captureTerminalBlocker ? null : verifyDecisionRequest2?.continue_with_stage || verifyDecisionRequest2?.recommended_stage || "author",
|
|
2730
|
+
blocking: captureTerminalBlocker,
|
|
2731
|
+
details: failedVerifyDetails,
|
|
2732
|
+
verifyStatus: failedVerifyStatus,
|
|
2733
|
+
verifySummary: failedVerifyDetails.verifySummary,
|
|
2734
|
+
afterCdn: failedVerifyState?.after_cdn || null,
|
|
2735
|
+
mergeRecommendation: failedVerifyState?.merge_recommendation || null,
|
|
2736
|
+
verifyDecisionRequest: verifyDecisionRequest2,
|
|
2737
|
+
executed
|
|
2738
|
+
}
|
|
2739
|
+
);
|
|
2740
|
+
}
|
|
2687
2741
|
return failedRun("verify", verifyRes.haltedForApproval ? "verify halted for approval" : "verify failed", verifyRes, {
|
|
2688
2742
|
checkpoint: "verify_failed",
|
|
2689
2743
|
details: { executed },
|
|
@@ -2737,8 +2791,9 @@ ${implementRes.stderr || ""}`;
|
|
|
2737
2791
|
});
|
|
2738
2792
|
state = readState(config.statePath);
|
|
2739
2793
|
}
|
|
2794
|
+
const terminalCaptureQualitySummary = captureQuality?.terminal_blocker === true || captureQuality?.blocking === true ? stringValue(captureQuality?.summary) : "";
|
|
2740
2795
|
const checkpointName = captureTerminalBlocker ? "verify_capture_blocked" : "verify_capture_retry";
|
|
2741
|
-
const summary = structuredInteractionFailureSummary || stringValue(proofAssessment.summary) || "Verify ran, but the proof packet still needs internal capture-plan work before it should ship.";
|
|
2796
|
+
const summary = terminalCaptureQualitySummary || structuredInteractionFailureSummary || stringValue(proofAssessment.summary) || "Verify ran, but the proof packet still needs internal capture-plan work before it should ship.";
|
|
2742
2797
|
recordAttempt("verify", "checkpoint", summary, {
|
|
2743
2798
|
autoApproved: verifyRes.autoApproved || false,
|
|
2744
2799
|
checkpoint: checkpointName,
|
|
@@ -6238,7 +6293,7 @@ var DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS = 18e4;
|
|
|
6238
6293
|
var REFINED_INPUTS_SCHEMA = {
|
|
6239
6294
|
type: "object",
|
|
6240
6295
|
additionalProperties: false,
|
|
6241
|
-
required: ["server_path", "wait_for_selector", "reference"],
|
|
6296
|
+
required: ["server_path", "wait_for_selector", "reference", "expected_start_path", "expected_terminal_path"],
|
|
6242
6297
|
properties: {
|
|
6243
6298
|
server_path: { type: ["string", "null"] },
|
|
6244
6299
|
wait_for_selector: { type: ["string", "null"] },
|
|
@@ -6248,8 +6303,9 @@ var REFINED_INPUTS_SCHEMA = {
|
|
|
6248
6303
|
}
|
|
6249
6304
|
};
|
|
6250
6305
|
var INTERACTION_CONTRACT_SCHEMA = {
|
|
6251
|
-
type: "object",
|
|
6252
|
-
additionalProperties:
|
|
6306
|
+
type: ["object", "null"],
|
|
6307
|
+
additionalProperties: false,
|
|
6308
|
+
required: ["start_path", "expected_terminal_path", "expected_url", "action", "assertions"],
|
|
6253
6309
|
properties: {
|
|
6254
6310
|
start_path: { type: ["string", "null"] },
|
|
6255
6311
|
expected_terminal_path: { type: ["string", "null"] },
|
|
@@ -6316,6 +6372,8 @@ var AUTHOR_SCHEMA = {
|
|
|
6316
6372
|
"capture_script",
|
|
6317
6373
|
"baseline_understanding_used",
|
|
6318
6374
|
"refined_inputs",
|
|
6375
|
+
"expected_terminal_path",
|
|
6376
|
+
"interaction_contract",
|
|
6319
6377
|
"rationale",
|
|
6320
6378
|
"confidence",
|
|
6321
6379
|
"summary"
|
|
@@ -6935,8 +6993,8 @@ function createCodexExecAgentAdapter(config = {}, runner = createCodexExecJsonRu
|
|
|
6935
6993
|
"Choose the evidence modality from verification_mode and success_criteria: screenshots for visual/UI proof, interactions plus screenshots for interaction proof, structured metrics/logs/JSON/audio analysis for non-visual proof.",
|
|
6936
6994
|
"For playable/gameplay proof, treat screenshots as supporting artifacts only: start the game, send keyboard or pointer input, measure state before/after, measure non-HUD canvas/playfield pixel deltas across time, and return playability evidence with version riddle-proof.playability.v1.",
|
|
6937
6995
|
"For interaction proof, author the browser action explicitly in capture_script; a wait-only script is invalid. Return a structured evidence object with start route/state, terminal route/state, action, assertions, and matched UI text.",
|
|
6938
|
-
"For route-changing interaction proof, set refined_inputs.expected_start_path and refined_inputs.expected_terminal_path, and include interaction_contract with start_path, expected_terminal_path, action, and assertions. Keep refined_inputs.server_path on the start route; do not replace it with the terminal route.",
|
|
6939
|
-
"If the original request or success_criteria names an expected terminal URL/path, preserve it exactly in refined_inputs.expected_terminal_path and in interaction_contract.expected_terminal_path, including query and hash.",
|
|
6996
|
+
"For route-changing interaction proof, set refined_inputs.expected_start_path and refined_inputs.expected_terminal_path, and include interaction_contract with start_path, expected_terminal_path, expected_url when known, action, and assertions. Keep refined_inputs.server_path on the start route; do not replace it with the terminal route.",
|
|
6997
|
+
"If the original request or success_criteria names an expected terminal URL/path, preserve it exactly in refined_inputs.expected_terminal_path and in interaction_contract.expected_terminal_path, including query and hash. For non-interaction proof, set expected_terminal_path and interaction_contract to null.",
|
|
6940
6998
|
"Catch waitForURL or selector timeouts and record them as failed assertions instead of throwing before evidence is emitted.",
|
|
6941
6999
|
"For structured proof, collect meaningful measurements inside page.evaluate, assign them to an evidence variable, and return that object from capture_script. Screenshots are optional supporting context for data/audio/log/metric/custom modes.",
|
|
6942
7000
|
"Do not assign globalThis.__riddleProofEvidence, window.__riddleProofEvidence, or self.__riddleProofEvidence in the worker context. Avoid global evidence assignment unless it is inside page.evaluate for compatibility with older packets.",
|
package/dist/cli.js
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import "./chunk-
|
|
2
|
+
import "./chunk-XJA2GDVN.js";
|
|
3
3
|
import "./chunk-PEWAIEER.js";
|
|
4
4
|
import "./chunk-TWTEUS7R.js";
|
|
5
|
-
import "./chunk-
|
|
5
|
+
import "./chunk-UWO4YR7I.js";
|
|
6
6
|
import "./chunk-ZQWVXQKJ.js";
|
|
7
7
|
import "./chunk-RDPG554T.js";
|
|
8
8
|
import "./chunk-K6HZUSHH.js";
|
|
9
9
|
import "./chunk-OILKSY5J.js";
|
|
10
10
|
import "./chunk-JFQXAJH2.js";
|
|
11
|
-
import "./chunk-
|
|
11
|
+
import "./chunk-73EBR3YL.js";
|
|
12
12
|
import "./chunk-VY4Y5U57.js";
|
|
13
13
|
import "./chunk-MLKGABMK.js";
|
|
@@ -51,7 +51,7 @@ var DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS = 18e4;
|
|
|
51
51
|
var REFINED_INPUTS_SCHEMA = {
|
|
52
52
|
type: "object",
|
|
53
53
|
additionalProperties: false,
|
|
54
|
-
required: ["server_path", "wait_for_selector", "reference"],
|
|
54
|
+
required: ["server_path", "wait_for_selector", "reference", "expected_start_path", "expected_terminal_path"],
|
|
55
55
|
properties: {
|
|
56
56
|
server_path: { type: ["string", "null"] },
|
|
57
57
|
wait_for_selector: { type: ["string", "null"] },
|
|
@@ -61,8 +61,9 @@ var REFINED_INPUTS_SCHEMA = {
|
|
|
61
61
|
}
|
|
62
62
|
};
|
|
63
63
|
var INTERACTION_CONTRACT_SCHEMA = {
|
|
64
|
-
type: "object",
|
|
65
|
-
additionalProperties:
|
|
64
|
+
type: ["object", "null"],
|
|
65
|
+
additionalProperties: false,
|
|
66
|
+
required: ["start_path", "expected_terminal_path", "expected_url", "action", "assertions"],
|
|
66
67
|
properties: {
|
|
67
68
|
start_path: { type: ["string", "null"] },
|
|
68
69
|
expected_terminal_path: { type: ["string", "null"] },
|
|
@@ -129,6 +130,8 @@ var AUTHOR_SCHEMA = {
|
|
|
129
130
|
"capture_script",
|
|
130
131
|
"baseline_understanding_used",
|
|
131
132
|
"refined_inputs",
|
|
133
|
+
"expected_terminal_path",
|
|
134
|
+
"interaction_contract",
|
|
132
135
|
"rationale",
|
|
133
136
|
"confidence",
|
|
134
137
|
"summary"
|
|
@@ -748,8 +751,8 @@ function createCodexExecAgentAdapter(config = {}, runner = createCodexExecJsonRu
|
|
|
748
751
|
"Choose the evidence modality from verification_mode and success_criteria: screenshots for visual/UI proof, interactions plus screenshots for interaction proof, structured metrics/logs/JSON/audio analysis for non-visual proof.",
|
|
749
752
|
"For playable/gameplay proof, treat screenshots as supporting artifacts only: start the game, send keyboard or pointer input, measure state before/after, measure non-HUD canvas/playfield pixel deltas across time, and return playability evidence with version riddle-proof.playability.v1.",
|
|
750
753
|
"For interaction proof, author the browser action explicitly in capture_script; a wait-only script is invalid. Return a structured evidence object with start route/state, terminal route/state, action, assertions, and matched UI text.",
|
|
751
|
-
"For route-changing interaction proof, set refined_inputs.expected_start_path and refined_inputs.expected_terminal_path, and include interaction_contract with start_path, expected_terminal_path, action, and assertions. Keep refined_inputs.server_path on the start route; do not replace it with the terminal route.",
|
|
752
|
-
"If the original request or success_criteria names an expected terminal URL/path, preserve it exactly in refined_inputs.expected_terminal_path and in interaction_contract.expected_terminal_path, including query and hash.",
|
|
754
|
+
"For route-changing interaction proof, set refined_inputs.expected_start_path and refined_inputs.expected_terminal_path, and include interaction_contract with start_path, expected_terminal_path, expected_url when known, action, and assertions. Keep refined_inputs.server_path on the start route; do not replace it with the terminal route.",
|
|
755
|
+
"If the original request or success_criteria names an expected terminal URL/path, preserve it exactly in refined_inputs.expected_terminal_path and in interaction_contract.expected_terminal_path, including query and hash. For non-interaction proof, set expected_terminal_path and interaction_contract to null.",
|
|
753
756
|
"Catch waitForURL or selector timeouts and record them as failed assertions instead of throwing before evidence is emitted.",
|
|
754
757
|
"For structured proof, collect meaningful measurements inside page.evaluate, assign them to an evidence variable, and return that object from capture_script. Screenshots are optional supporting context for data/audio/log/metric/custom modes.",
|
|
755
758
|
"Do not assign globalThis.__riddleProofEvidence, window.__riddleProofEvidence, or self.__riddleProofEvidence in the worker context. Avoid global evidence assignment unless it is inside page.evaluate for compatibility with older packets.",
|
package/dist/codex-exec-agent.js
CHANGED
package/dist/engine-harness.cjs
CHANGED
|
@@ -2684,6 +2684,60 @@ ${implementRes.stderr || ""}`;
|
|
|
2684
2684
|
verifyRes = runOne("verify");
|
|
2685
2685
|
executed.push(executedStep(verifyRes));
|
|
2686
2686
|
if (!verifyRes.ok || verifyRes.haltedForApproval) {
|
|
2687
|
+
const failedVerifyState = readState(config.statePath);
|
|
2688
|
+
const failedVerifyStatus = failedVerifyState?.verify_status || "";
|
|
2689
|
+
if (!verifyRes.haltedForApproval && (failedVerifyStatus === "capture_incomplete" || failedVerifyStatus === "capture_error")) {
|
|
2690
|
+
const verifyDecisionRequest2 = failedVerifyState?.verify_decision_request || null;
|
|
2691
|
+
const captureQuality = verifyDecisionRequest2?.capture_quality || {};
|
|
2692
|
+
const conclusiveVerifyBlockers2 = proofAssessmentHardBlockersForState({
|
|
2693
|
+
...failedVerifyState,
|
|
2694
|
+
structured_interaction_capture_failure_summary: stringValue(verifyDecisionRequest2?.structured_interaction_capture_failure_summary) || stringValue(failedVerifyState?.structured_interaction_capture_failure_summary) || void 0,
|
|
2695
|
+
structured_interaction_failure_summary: stringValue(verifyDecisionRequest2?.structured_interaction_failure_summary) || stringValue(failedVerifyState?.structured_interaction_failure_summary) || void 0
|
|
2696
|
+
});
|
|
2697
|
+
const structuredInteractionFailureSummary2 = stringValue(verifyDecisionRequest2?.structured_interaction_capture_failure_summary) || stringValue(verifyDecisionRequest2?.structured_interaction_failure_summary) || stringValue(failedVerifyState?.structured_interaction_capture_failure_summary) || stringValue(failedVerifyState?.structured_interaction_failure_summary) || stringValue(conclusiveVerifyBlockers2[0]);
|
|
2698
|
+
const captureTerminalBlocker = failedVerifyStatus === "capture_error" || conclusiveVerifyBlockers2.length > 0 || Boolean(structuredInteractionFailureSummary2) || captureQuality?.terminal_blocker === true || captureQuality?.blocking === true;
|
|
2699
|
+
const terminalCaptureQualitySummary = captureQuality?.terminal_blocker === true || captureQuality?.blocking === true ? stringValue(captureQuality?.summary) : "";
|
|
2700
|
+
const checkpointName = captureTerminalBlocker ? "verify_capture_blocked" : "verify_capture_retry";
|
|
2701
|
+
const summary = terminalCaptureQualitySummary || structuredInteractionFailureSummary2 || stringValue(verifyDecisionRequest2?.summary) || stringValue(failedVerifyState?.verify_summary) || stringValue(failedVerifyState?.proof_summary) || stringValue(verifyRes.error) || "Verify capture failed before the evidence could be judged.";
|
|
2702
|
+
const failedVerifyDetails = {
|
|
2703
|
+
executed,
|
|
2704
|
+
verifyStatus: failedVerifyStatus,
|
|
2705
|
+
verifySummary: failedVerifyState?.verify_summary || failedVerifyState?.proof_summary || null,
|
|
2706
|
+
afterCdn: failedVerifyState?.after_cdn || null,
|
|
2707
|
+
mergeRecommendation: failedVerifyState?.merge_recommendation || null,
|
|
2708
|
+
verifyDecisionRequest: verifyDecisionRequest2,
|
|
2709
|
+
conclusiveVerifyBlockers: conclusiveVerifyBlockers2,
|
|
2710
|
+
verifyError: verifyRes.error || null,
|
|
2711
|
+
verifyStdout: verifyRes.stdout || "",
|
|
2712
|
+
verifyStderr: verifyRes.stderr || ""
|
|
2713
|
+
};
|
|
2714
|
+
recordAttempt("verify", "checkpoint", summary, {
|
|
2715
|
+
autoApproved: verifyRes.autoApproved || false,
|
|
2716
|
+
checkpoint: checkpointName,
|
|
2717
|
+
error: verifyRes.error || null,
|
|
2718
|
+
details: failedVerifyDetails
|
|
2719
|
+
});
|
|
2720
|
+
return checkpoint(
|
|
2721
|
+
"verify",
|
|
2722
|
+
checkpointName,
|
|
2723
|
+
summary,
|
|
2724
|
+
{
|
|
2725
|
+
ok: true,
|
|
2726
|
+
nextActions: captureTerminalBlocker ? ["inspect_after_capture", "report_specific_browser_evidence_blocker", "start_a_new_run_after_the_product_or_script_is_fixed"] : ["inspect_after_capture", "continue_internal_loop_with_checkpoint", "return_to_recon_if_baseline_is_wrong"],
|
|
2727
|
+
advanceOptions: needsImplementation ? ["author", "implement", "ship", "verify", "recon"] : ["author", "verify", "recon"],
|
|
2728
|
+
recommendedAdvanceStage: captureTerminalBlocker ? null : verifyDecisionRequest2?.recommended_stage || verifyDecisionRequest2?.continue_with_stage || "author",
|
|
2729
|
+
continueWithStage: captureTerminalBlocker ? null : verifyDecisionRequest2?.continue_with_stage || verifyDecisionRequest2?.recommended_stage || "author",
|
|
2730
|
+
blocking: captureTerminalBlocker,
|
|
2731
|
+
details: failedVerifyDetails,
|
|
2732
|
+
verifyStatus: failedVerifyStatus,
|
|
2733
|
+
verifySummary: failedVerifyDetails.verifySummary,
|
|
2734
|
+
afterCdn: failedVerifyState?.after_cdn || null,
|
|
2735
|
+
mergeRecommendation: failedVerifyState?.merge_recommendation || null,
|
|
2736
|
+
verifyDecisionRequest: verifyDecisionRequest2,
|
|
2737
|
+
executed
|
|
2738
|
+
}
|
|
2739
|
+
);
|
|
2740
|
+
}
|
|
2687
2741
|
return failedRun("verify", verifyRes.haltedForApproval ? "verify halted for approval" : "verify failed", verifyRes, {
|
|
2688
2742
|
checkpoint: "verify_failed",
|
|
2689
2743
|
details: { executed },
|
|
@@ -2737,8 +2791,9 @@ ${implementRes.stderr || ""}`;
|
|
|
2737
2791
|
});
|
|
2738
2792
|
state = readState(config.statePath);
|
|
2739
2793
|
}
|
|
2794
|
+
const terminalCaptureQualitySummary = captureQuality?.terminal_blocker === true || captureQuality?.blocking === true ? stringValue(captureQuality?.summary) : "";
|
|
2740
2795
|
const checkpointName = captureTerminalBlocker ? "verify_capture_blocked" : "verify_capture_retry";
|
|
2741
|
-
const summary = structuredInteractionFailureSummary || stringValue(proofAssessment.summary) || "Verify ran, but the proof packet still needs internal capture-plan work before it should ship.";
|
|
2796
|
+
const summary = terminalCaptureQualitySummary || structuredInteractionFailureSummary || stringValue(proofAssessment.summary) || "Verify ran, but the proof packet still needs internal capture-plan work before it should ship.";
|
|
2742
2797
|
recordAttempt("verify", "checkpoint", summary, {
|
|
2743
2798
|
autoApproved: verifyRes.autoApproved || false,
|
|
2744
2799
|
checkpoint: checkpointName,
|
package/dist/engine-harness.js
CHANGED
package/dist/index.cjs
CHANGED
|
@@ -2684,6 +2684,60 @@ ${implementRes.stderr || ""}`;
|
|
|
2684
2684
|
verifyRes = runOne("verify");
|
|
2685
2685
|
executed.push(executedStep(verifyRes));
|
|
2686
2686
|
if (!verifyRes.ok || verifyRes.haltedForApproval) {
|
|
2687
|
+
const failedVerifyState = readState(config.statePath);
|
|
2688
|
+
const failedVerifyStatus = failedVerifyState?.verify_status || "";
|
|
2689
|
+
if (!verifyRes.haltedForApproval && (failedVerifyStatus === "capture_incomplete" || failedVerifyStatus === "capture_error")) {
|
|
2690
|
+
const verifyDecisionRequest2 = failedVerifyState?.verify_decision_request || null;
|
|
2691
|
+
const captureQuality = verifyDecisionRequest2?.capture_quality || {};
|
|
2692
|
+
const conclusiveVerifyBlockers2 = proofAssessmentHardBlockersForState({
|
|
2693
|
+
...failedVerifyState,
|
|
2694
|
+
structured_interaction_capture_failure_summary: stringValue(verifyDecisionRequest2?.structured_interaction_capture_failure_summary) || stringValue(failedVerifyState?.structured_interaction_capture_failure_summary) || void 0,
|
|
2695
|
+
structured_interaction_failure_summary: stringValue(verifyDecisionRequest2?.structured_interaction_failure_summary) || stringValue(failedVerifyState?.structured_interaction_failure_summary) || void 0
|
|
2696
|
+
});
|
|
2697
|
+
const structuredInteractionFailureSummary2 = stringValue(verifyDecisionRequest2?.structured_interaction_capture_failure_summary) || stringValue(verifyDecisionRequest2?.structured_interaction_failure_summary) || stringValue(failedVerifyState?.structured_interaction_capture_failure_summary) || stringValue(failedVerifyState?.structured_interaction_failure_summary) || stringValue(conclusiveVerifyBlockers2[0]);
|
|
2698
|
+
const captureTerminalBlocker = failedVerifyStatus === "capture_error" || conclusiveVerifyBlockers2.length > 0 || Boolean(structuredInteractionFailureSummary2) || captureQuality?.terminal_blocker === true || captureQuality?.blocking === true;
|
|
2699
|
+
const terminalCaptureQualitySummary = captureQuality?.terminal_blocker === true || captureQuality?.blocking === true ? stringValue(captureQuality?.summary) : "";
|
|
2700
|
+
const checkpointName = captureTerminalBlocker ? "verify_capture_blocked" : "verify_capture_retry";
|
|
2701
|
+
const summary = terminalCaptureQualitySummary || structuredInteractionFailureSummary2 || stringValue(verifyDecisionRequest2?.summary) || stringValue(failedVerifyState?.verify_summary) || stringValue(failedVerifyState?.proof_summary) || stringValue(verifyRes.error) || "Verify capture failed before the evidence could be judged.";
|
|
2702
|
+
const failedVerifyDetails = {
|
|
2703
|
+
executed,
|
|
2704
|
+
verifyStatus: failedVerifyStatus,
|
|
2705
|
+
verifySummary: failedVerifyState?.verify_summary || failedVerifyState?.proof_summary || null,
|
|
2706
|
+
afterCdn: failedVerifyState?.after_cdn || null,
|
|
2707
|
+
mergeRecommendation: failedVerifyState?.merge_recommendation || null,
|
|
2708
|
+
verifyDecisionRequest: verifyDecisionRequest2,
|
|
2709
|
+
conclusiveVerifyBlockers: conclusiveVerifyBlockers2,
|
|
2710
|
+
verifyError: verifyRes.error || null,
|
|
2711
|
+
verifyStdout: verifyRes.stdout || "",
|
|
2712
|
+
verifyStderr: verifyRes.stderr || ""
|
|
2713
|
+
};
|
|
2714
|
+
recordAttempt("verify", "checkpoint", summary, {
|
|
2715
|
+
autoApproved: verifyRes.autoApproved || false,
|
|
2716
|
+
checkpoint: checkpointName,
|
|
2717
|
+
error: verifyRes.error || null,
|
|
2718
|
+
details: failedVerifyDetails
|
|
2719
|
+
});
|
|
2720
|
+
return checkpoint(
|
|
2721
|
+
"verify",
|
|
2722
|
+
checkpointName,
|
|
2723
|
+
summary,
|
|
2724
|
+
{
|
|
2725
|
+
ok: true,
|
|
2726
|
+
nextActions: captureTerminalBlocker ? ["inspect_after_capture", "report_specific_browser_evidence_blocker", "start_a_new_run_after_the_product_or_script_is_fixed"] : ["inspect_after_capture", "continue_internal_loop_with_checkpoint", "return_to_recon_if_baseline_is_wrong"],
|
|
2727
|
+
advanceOptions: needsImplementation ? ["author", "implement", "ship", "verify", "recon"] : ["author", "verify", "recon"],
|
|
2728
|
+
recommendedAdvanceStage: captureTerminalBlocker ? null : verifyDecisionRequest2?.recommended_stage || verifyDecisionRequest2?.continue_with_stage || "author",
|
|
2729
|
+
continueWithStage: captureTerminalBlocker ? null : verifyDecisionRequest2?.continue_with_stage || verifyDecisionRequest2?.recommended_stage || "author",
|
|
2730
|
+
blocking: captureTerminalBlocker,
|
|
2731
|
+
details: failedVerifyDetails,
|
|
2732
|
+
verifyStatus: failedVerifyStatus,
|
|
2733
|
+
verifySummary: failedVerifyDetails.verifySummary,
|
|
2734
|
+
afterCdn: failedVerifyState?.after_cdn || null,
|
|
2735
|
+
mergeRecommendation: failedVerifyState?.merge_recommendation || null,
|
|
2736
|
+
verifyDecisionRequest: verifyDecisionRequest2,
|
|
2737
|
+
executed
|
|
2738
|
+
}
|
|
2739
|
+
);
|
|
2740
|
+
}
|
|
2687
2741
|
return failedRun("verify", verifyRes.haltedForApproval ? "verify halted for approval" : "verify failed", verifyRes, {
|
|
2688
2742
|
checkpoint: "verify_failed",
|
|
2689
2743
|
details: { executed },
|
|
@@ -2737,8 +2791,9 @@ ${implementRes.stderr || ""}`;
|
|
|
2737
2791
|
});
|
|
2738
2792
|
state = readState(config.statePath);
|
|
2739
2793
|
}
|
|
2794
|
+
const terminalCaptureQualitySummary = captureQuality?.terminal_blocker === true || captureQuality?.blocking === true ? stringValue(captureQuality?.summary) : "";
|
|
2740
2795
|
const checkpointName = captureTerminalBlocker ? "verify_capture_blocked" : "verify_capture_retry";
|
|
2741
|
-
const summary = structuredInteractionFailureSummary || stringValue(proofAssessment.summary) || "Verify ran, but the proof packet still needs internal capture-plan work before it should ship.";
|
|
2796
|
+
const summary = terminalCaptureQualitySummary || structuredInteractionFailureSummary || stringValue(proofAssessment.summary) || "Verify ran, but the proof packet still needs internal capture-plan work before it should ship.";
|
|
2742
2797
|
recordAttempt("verify", "checkpoint", summary, {
|
|
2743
2798
|
autoApproved: verifyRes.autoApproved || false,
|
|
2744
2799
|
checkpoint: checkpointName,
|
|
@@ -6900,7 +6955,7 @@ var DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS = 18e4;
|
|
|
6900
6955
|
var REFINED_INPUTS_SCHEMA = {
|
|
6901
6956
|
type: "object",
|
|
6902
6957
|
additionalProperties: false,
|
|
6903
|
-
required: ["server_path", "wait_for_selector", "reference"],
|
|
6958
|
+
required: ["server_path", "wait_for_selector", "reference", "expected_start_path", "expected_terminal_path"],
|
|
6904
6959
|
properties: {
|
|
6905
6960
|
server_path: { type: ["string", "null"] },
|
|
6906
6961
|
wait_for_selector: { type: ["string", "null"] },
|
|
@@ -6910,8 +6965,9 @@ var REFINED_INPUTS_SCHEMA = {
|
|
|
6910
6965
|
}
|
|
6911
6966
|
};
|
|
6912
6967
|
var INTERACTION_CONTRACT_SCHEMA = {
|
|
6913
|
-
type: "object",
|
|
6914
|
-
additionalProperties:
|
|
6968
|
+
type: ["object", "null"],
|
|
6969
|
+
additionalProperties: false,
|
|
6970
|
+
required: ["start_path", "expected_terminal_path", "expected_url", "action", "assertions"],
|
|
6915
6971
|
properties: {
|
|
6916
6972
|
start_path: { type: ["string", "null"] },
|
|
6917
6973
|
expected_terminal_path: { type: ["string", "null"] },
|
|
@@ -6978,6 +7034,8 @@ var AUTHOR_SCHEMA = {
|
|
|
6978
7034
|
"capture_script",
|
|
6979
7035
|
"baseline_understanding_used",
|
|
6980
7036
|
"refined_inputs",
|
|
7037
|
+
"expected_terminal_path",
|
|
7038
|
+
"interaction_contract",
|
|
6981
7039
|
"rationale",
|
|
6982
7040
|
"confidence",
|
|
6983
7041
|
"summary"
|
|
@@ -7597,8 +7655,8 @@ function createCodexExecAgentAdapter(config = {}, runner = createCodexExecJsonRu
|
|
|
7597
7655
|
"Choose the evidence modality from verification_mode and success_criteria: screenshots for visual/UI proof, interactions plus screenshots for interaction proof, structured metrics/logs/JSON/audio analysis for non-visual proof.",
|
|
7598
7656
|
"For playable/gameplay proof, treat screenshots as supporting artifacts only: start the game, send keyboard or pointer input, measure state before/after, measure non-HUD canvas/playfield pixel deltas across time, and return playability evidence with version riddle-proof.playability.v1.",
|
|
7599
7657
|
"For interaction proof, author the browser action explicitly in capture_script; a wait-only script is invalid. Return a structured evidence object with start route/state, terminal route/state, action, assertions, and matched UI text.",
|
|
7600
|
-
"For route-changing interaction proof, set refined_inputs.expected_start_path and refined_inputs.expected_terminal_path, and include interaction_contract with start_path, expected_terminal_path, action, and assertions. Keep refined_inputs.server_path on the start route; do not replace it with the terminal route.",
|
|
7601
|
-
"If the original request or success_criteria names an expected terminal URL/path, preserve it exactly in refined_inputs.expected_terminal_path and in interaction_contract.expected_terminal_path, including query and hash.",
|
|
7658
|
+
"For route-changing interaction proof, set refined_inputs.expected_start_path and refined_inputs.expected_terminal_path, and include interaction_contract with start_path, expected_terminal_path, expected_url when known, action, and assertions. Keep refined_inputs.server_path on the start route; do not replace it with the terminal route.",
|
|
7659
|
+
"If the original request or success_criteria names an expected terminal URL/path, preserve it exactly in refined_inputs.expected_terminal_path and in interaction_contract.expected_terminal_path, including query and hash. For non-interaction proof, set expected_terminal_path and interaction_contract to null.",
|
|
7602
7660
|
"Catch waitForURL or selector timeouts and record them as failed assertions instead of throwing before evidence is emitted.",
|
|
7603
7661
|
"For structured proof, collect meaningful measurements inside page.evaluate, assign them to an evidence variable, and return that object from capture_script. Screenshots are optional supporting context for data/audio/log/metric/custom modes.",
|
|
7604
7662
|
"Do not assign globalThis.__riddleProofEvidence, window.__riddleProofEvidence, or self.__riddleProofEvidence in the worker context. Avoid global evidence assignment unless it is inside page.evaluate for compatibility with older packets.",
|
package/dist/index.js
CHANGED
|
@@ -95,7 +95,7 @@ import {
|
|
|
95
95
|
createDisabledRiddleProofAgentAdapter,
|
|
96
96
|
readRiddleProofRunStatus,
|
|
97
97
|
runRiddleProofEngineHarness
|
|
98
|
-
} from "./chunk-
|
|
98
|
+
} from "./chunk-UWO4YR7I.js";
|
|
99
99
|
import {
|
|
100
100
|
RIDDLE_PROOF_RUN_STATE_VERSION,
|
|
101
101
|
appendRunEvent,
|
|
@@ -134,7 +134,7 @@ import {
|
|
|
134
134
|
createCodexExecAgentAdapter,
|
|
135
135
|
createCodexExecJsonRunner,
|
|
136
136
|
runCodexExecAgentDoctor
|
|
137
|
-
} from "./chunk-
|
|
137
|
+
} from "./chunk-73EBR3YL.js";
|
|
138
138
|
import {
|
|
139
139
|
applyTerminalMetadata,
|
|
140
140
|
compactRecord,
|
package/dist/local-agent.cjs
CHANGED
|
@@ -53,7 +53,7 @@ var DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS = 18e4;
|
|
|
53
53
|
var REFINED_INPUTS_SCHEMA = {
|
|
54
54
|
type: "object",
|
|
55
55
|
additionalProperties: false,
|
|
56
|
-
required: ["server_path", "wait_for_selector", "reference"],
|
|
56
|
+
required: ["server_path", "wait_for_selector", "reference", "expected_start_path", "expected_terminal_path"],
|
|
57
57
|
properties: {
|
|
58
58
|
server_path: { type: ["string", "null"] },
|
|
59
59
|
wait_for_selector: { type: ["string", "null"] },
|
|
@@ -63,8 +63,9 @@ var REFINED_INPUTS_SCHEMA = {
|
|
|
63
63
|
}
|
|
64
64
|
};
|
|
65
65
|
var INTERACTION_CONTRACT_SCHEMA = {
|
|
66
|
-
type: "object",
|
|
67
|
-
additionalProperties:
|
|
66
|
+
type: ["object", "null"],
|
|
67
|
+
additionalProperties: false,
|
|
68
|
+
required: ["start_path", "expected_terminal_path", "expected_url", "action", "assertions"],
|
|
68
69
|
properties: {
|
|
69
70
|
start_path: { type: ["string", "null"] },
|
|
70
71
|
expected_terminal_path: { type: ["string", "null"] },
|
|
@@ -131,6 +132,8 @@ var AUTHOR_SCHEMA = {
|
|
|
131
132
|
"capture_script",
|
|
132
133
|
"baseline_understanding_used",
|
|
133
134
|
"refined_inputs",
|
|
135
|
+
"expected_terminal_path",
|
|
136
|
+
"interaction_contract",
|
|
134
137
|
"rationale",
|
|
135
138
|
"confidence",
|
|
136
139
|
"summary"
|
|
@@ -750,8 +753,8 @@ function createCodexExecAgentAdapter(config = {}, runner = createCodexExecJsonRu
|
|
|
750
753
|
"Choose the evidence modality from verification_mode and success_criteria: screenshots for visual/UI proof, interactions plus screenshots for interaction proof, structured metrics/logs/JSON/audio analysis for non-visual proof.",
|
|
751
754
|
"For playable/gameplay proof, treat screenshots as supporting artifacts only: start the game, send keyboard or pointer input, measure state before/after, measure non-HUD canvas/playfield pixel deltas across time, and return playability evidence with version riddle-proof.playability.v1.",
|
|
752
755
|
"For interaction proof, author the browser action explicitly in capture_script; a wait-only script is invalid. Return a structured evidence object with start route/state, terminal route/state, action, assertions, and matched UI text.",
|
|
753
|
-
"For route-changing interaction proof, set refined_inputs.expected_start_path and refined_inputs.expected_terminal_path, and include interaction_contract with start_path, expected_terminal_path, action, and assertions. Keep refined_inputs.server_path on the start route; do not replace it with the terminal route.",
|
|
754
|
-
"If the original request or success_criteria names an expected terminal URL/path, preserve it exactly in refined_inputs.expected_terminal_path and in interaction_contract.expected_terminal_path, including query and hash.",
|
|
756
|
+
"For route-changing interaction proof, set refined_inputs.expected_start_path and refined_inputs.expected_terminal_path, and include interaction_contract with start_path, expected_terminal_path, expected_url when known, action, and assertions. Keep refined_inputs.server_path on the start route; do not replace it with the terminal route.",
|
|
757
|
+
"If the original request or success_criteria names an expected terminal URL/path, preserve it exactly in refined_inputs.expected_terminal_path and in interaction_contract.expected_terminal_path, including query and hash. For non-interaction proof, set expected_terminal_path and interaction_contract to null.",
|
|
755
758
|
"Catch waitForURL or selector timeouts and record them as failed assertions instead of throwing before evidence is emitted.",
|
|
756
759
|
"For structured proof, collect meaningful measurements inside page.evaluate, assign them to an evidence variable, and return that object from capture_script. Screenshots are optional supporting context for data/audio/log/metric/custom modes.",
|
|
757
760
|
"Do not assign globalThis.__riddleProofEvidence, window.__riddleProofEvidence, or self.__riddleProofEvidence in the worker context. Avoid global evidence assignment unless it is inside page.evaluate for compatibility with older packets.",
|
package/dist/local-agent.js
CHANGED
|
@@ -2682,6 +2682,60 @@ ${implementRes.stderr || ""}`;
|
|
|
2682
2682
|
verifyRes = runOne("verify");
|
|
2683
2683
|
executed.push(executedStep(verifyRes));
|
|
2684
2684
|
if (!verifyRes.ok || verifyRes.haltedForApproval) {
|
|
2685
|
+
const failedVerifyState = readState(config.statePath);
|
|
2686
|
+
const failedVerifyStatus = failedVerifyState?.verify_status || "";
|
|
2687
|
+
if (!verifyRes.haltedForApproval && (failedVerifyStatus === "capture_incomplete" || failedVerifyStatus === "capture_error")) {
|
|
2688
|
+
const verifyDecisionRequest2 = failedVerifyState?.verify_decision_request || null;
|
|
2689
|
+
const captureQuality = verifyDecisionRequest2?.capture_quality || {};
|
|
2690
|
+
const conclusiveVerifyBlockers2 = proofAssessmentHardBlockersForState({
|
|
2691
|
+
...failedVerifyState,
|
|
2692
|
+
structured_interaction_capture_failure_summary: stringValue(verifyDecisionRequest2?.structured_interaction_capture_failure_summary) || stringValue(failedVerifyState?.structured_interaction_capture_failure_summary) || void 0,
|
|
2693
|
+
structured_interaction_failure_summary: stringValue(verifyDecisionRequest2?.structured_interaction_failure_summary) || stringValue(failedVerifyState?.structured_interaction_failure_summary) || void 0
|
|
2694
|
+
});
|
|
2695
|
+
const structuredInteractionFailureSummary2 = stringValue(verifyDecisionRequest2?.structured_interaction_capture_failure_summary) || stringValue(verifyDecisionRequest2?.structured_interaction_failure_summary) || stringValue(failedVerifyState?.structured_interaction_capture_failure_summary) || stringValue(failedVerifyState?.structured_interaction_failure_summary) || stringValue(conclusiveVerifyBlockers2[0]);
|
|
2696
|
+
const captureTerminalBlocker = failedVerifyStatus === "capture_error" || conclusiveVerifyBlockers2.length > 0 || Boolean(structuredInteractionFailureSummary2) || captureQuality?.terminal_blocker === true || captureQuality?.blocking === true;
|
|
2697
|
+
const terminalCaptureQualitySummary = captureQuality?.terminal_blocker === true || captureQuality?.blocking === true ? stringValue(captureQuality?.summary) : "";
|
|
2698
|
+
const checkpointName = captureTerminalBlocker ? "verify_capture_blocked" : "verify_capture_retry";
|
|
2699
|
+
const summary = terminalCaptureQualitySummary || structuredInteractionFailureSummary2 || stringValue(verifyDecisionRequest2?.summary) || stringValue(failedVerifyState?.verify_summary) || stringValue(failedVerifyState?.proof_summary) || stringValue(verifyRes.error) || "Verify capture failed before the evidence could be judged.";
|
|
2700
|
+
const failedVerifyDetails = {
|
|
2701
|
+
executed,
|
|
2702
|
+
verifyStatus: failedVerifyStatus,
|
|
2703
|
+
verifySummary: failedVerifyState?.verify_summary || failedVerifyState?.proof_summary || null,
|
|
2704
|
+
afterCdn: failedVerifyState?.after_cdn || null,
|
|
2705
|
+
mergeRecommendation: failedVerifyState?.merge_recommendation || null,
|
|
2706
|
+
verifyDecisionRequest: verifyDecisionRequest2,
|
|
2707
|
+
conclusiveVerifyBlockers: conclusiveVerifyBlockers2,
|
|
2708
|
+
verifyError: verifyRes.error || null,
|
|
2709
|
+
verifyStdout: verifyRes.stdout || "",
|
|
2710
|
+
verifyStderr: verifyRes.stderr || ""
|
|
2711
|
+
};
|
|
2712
|
+
recordAttempt("verify", "checkpoint", summary, {
|
|
2713
|
+
autoApproved: verifyRes.autoApproved || false,
|
|
2714
|
+
checkpoint: checkpointName,
|
|
2715
|
+
error: verifyRes.error || null,
|
|
2716
|
+
details: failedVerifyDetails
|
|
2717
|
+
});
|
|
2718
|
+
return checkpoint(
|
|
2719
|
+
"verify",
|
|
2720
|
+
checkpointName,
|
|
2721
|
+
summary,
|
|
2722
|
+
{
|
|
2723
|
+
ok: true,
|
|
2724
|
+
nextActions: captureTerminalBlocker ? ["inspect_after_capture", "report_specific_browser_evidence_blocker", "start_a_new_run_after_the_product_or_script_is_fixed"] : ["inspect_after_capture", "continue_internal_loop_with_checkpoint", "return_to_recon_if_baseline_is_wrong"],
|
|
2725
|
+
advanceOptions: needsImplementation ? ["author", "implement", "ship", "verify", "recon"] : ["author", "verify", "recon"],
|
|
2726
|
+
recommendedAdvanceStage: captureTerminalBlocker ? null : verifyDecisionRequest2?.recommended_stage || verifyDecisionRequest2?.continue_with_stage || "author",
|
|
2727
|
+
continueWithStage: captureTerminalBlocker ? null : verifyDecisionRequest2?.continue_with_stage || verifyDecisionRequest2?.recommended_stage || "author",
|
|
2728
|
+
blocking: captureTerminalBlocker,
|
|
2729
|
+
details: failedVerifyDetails,
|
|
2730
|
+
verifyStatus: failedVerifyStatus,
|
|
2731
|
+
verifySummary: failedVerifyDetails.verifySummary,
|
|
2732
|
+
afterCdn: failedVerifyState?.after_cdn || null,
|
|
2733
|
+
mergeRecommendation: failedVerifyState?.merge_recommendation || null,
|
|
2734
|
+
verifyDecisionRequest: verifyDecisionRequest2,
|
|
2735
|
+
executed
|
|
2736
|
+
}
|
|
2737
|
+
);
|
|
2738
|
+
}
|
|
2685
2739
|
return failedRun("verify", verifyRes.haltedForApproval ? "verify halted for approval" : "verify failed", verifyRes, {
|
|
2686
2740
|
checkpoint: "verify_failed",
|
|
2687
2741
|
details: { executed },
|
|
@@ -2735,8 +2789,9 @@ ${implementRes.stderr || ""}`;
|
|
|
2735
2789
|
});
|
|
2736
2790
|
state = readState(config.statePath);
|
|
2737
2791
|
}
|
|
2792
|
+
const terminalCaptureQualitySummary = captureQuality?.terminal_blocker === true || captureQuality?.blocking === true ? stringValue(captureQuality?.summary) : "";
|
|
2738
2793
|
const checkpointName = captureTerminalBlocker ? "verify_capture_blocked" : "verify_capture_retry";
|
|
2739
|
-
const summary = structuredInteractionFailureSummary || stringValue(proofAssessment.summary) || "Verify ran, but the proof packet still needs internal capture-plan work before it should ship.";
|
|
2794
|
+
const summary = terminalCaptureQualitySummary || structuredInteractionFailureSummary || stringValue(proofAssessment.summary) || "Verify ran, but the proof packet still needs internal capture-plan work before it should ship.";
|
|
2740
2795
|
recordAttempt("verify", "checkpoint", summary, {
|
|
2741
2796
|
autoApproved: verifyRes.autoApproved || false,
|
|
2742
2797
|
checkpoint: checkpointName,
|
package/dist/proof-run-engine.js
CHANGED
package/package.json
CHANGED
package/runtime/lib/author.py
CHANGED
|
@@ -89,10 +89,12 @@ def recon_baseline_understanding(state):
|
|
|
89
89
|
return understanding if isinstance(understanding, dict) else {}
|
|
90
90
|
|
|
91
91
|
|
|
92
|
-
def authored_capture_script(existing_script, wait_for_selector=''):
|
|
92
|
+
def authored_capture_script(state, existing_script, wait_for_selector=''):
|
|
93
93
|
script = (existing_script or '').strip()
|
|
94
94
|
if script:
|
|
95
95
|
return script
|
|
96
|
+
if is_interaction_mode(state):
|
|
97
|
+
return ''
|
|
96
98
|
steps = ['await page.waitForTimeout(1500);']
|
|
97
99
|
selector = (wait_for_selector or '').strip()
|
|
98
100
|
if selector:
|
|
@@ -152,8 +154,10 @@ def author_request_payload(state, reference, baselines, current_plan, hypothesis
|
|
|
152
154
|
'observations': item.get('observations'),
|
|
153
155
|
})
|
|
154
156
|
|
|
155
|
-
fallback_capture_script = authored_capture_script(state.get('capture_script'), fallback_selector)
|
|
157
|
+
fallback_capture_script = authored_capture_script(state, state.get('capture_script'), fallback_selector)
|
|
156
158
|
fallback_proof_plan = authored_proof_plan(state, reference, fallback_path, baselines, fallback_selector)
|
|
159
|
+
expected_start_path = state.get('expected_start_path') or fallback_path or '/'
|
|
160
|
+
expected_terminal_path = state.get('expected_terminal_path') or state.get('requested_expected_terminal_path') or ''
|
|
157
161
|
|
|
158
162
|
return {
|
|
159
163
|
'status': 'needs_supervisor_judgment',
|
|
@@ -174,6 +178,8 @@ def author_request_payload(state, reference, baselines, current_plan, hypothesis
|
|
|
174
178
|
'wait_for_selector': fallback_selector,
|
|
175
179
|
'capture_script': fallback_capture_script,
|
|
176
180
|
'proof_plan': fallback_proof_plan,
|
|
181
|
+
'expected_start_path': expected_start_path,
|
|
182
|
+
'expected_terminal_path': expected_terminal_path,
|
|
177
183
|
},
|
|
178
184
|
'interaction_contract': optional_record(state.get('interaction_contract')),
|
|
179
185
|
'proof_contract': optional_record(state.get('proof_contract')),
|
|
@@ -210,6 +216,7 @@ def author_request_payload(state, reference, baselines, current_plan, hypothesis
|
|
|
210
216
|
'server_path': 'string',
|
|
211
217
|
'wait_for_selector': 'string',
|
|
212
218
|
'reference': 'string',
|
|
219
|
+
'expected_start_path': 'string',
|
|
213
220
|
'expected_terminal_path': 'string',
|
|
214
221
|
},
|
|
215
222
|
'interaction_contract': {
|
|
@@ -245,7 +252,7 @@ default_path = normalize_path(first_non_empty(before_path, prod_path, current_pa
|
|
|
245
252
|
|
|
246
253
|
default_selector = first_non_empty((s.get('wait_for_selector') or '').strip(), (current_plan.get('wait_for_selector') or '').strip())
|
|
247
254
|
default_proof_plan = authored_proof_plan(s, reference, default_path, baselines, default_selector)
|
|
248
|
-
default_capture_script = authored_capture_script(s.get('capture_script'), default_selector)
|
|
255
|
+
default_capture_script = authored_capture_script(s, s.get('capture_script'), default_selector)
|
|
249
256
|
|
|
250
257
|
supervisor_packet = s.get('supervisor_author_packet') or {}
|
|
251
258
|
if not isinstance(supervisor_packet, dict):
|
|
@@ -333,6 +340,7 @@ authored_packet = {
|
|
|
333
340
|
'server_path': refined_path,
|
|
334
341
|
'wait_for_selector': refined_selector,
|
|
335
342
|
'reference': refined_reference,
|
|
343
|
+
'expected_start_path': s.get('expected_start_path') or '',
|
|
336
344
|
'expected_terminal_path': expected_terminal_path,
|
|
337
345
|
},
|
|
338
346
|
'interaction_contract': provided_payload['interaction_contract'],
|