@riddledc/riddle-proof 0.8.18 → 0.8.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/advanced/engine-harness.cjs +42 -2
- package/dist/advanced/engine-harness.js +2 -2
- package/dist/advanced/index.cjs +42 -2
- package/dist/advanced/index.js +4 -4
- package/dist/advanced/proof-run-core.cjs +11 -2
- package/dist/advanced/proof-run-core.js +1 -1
- package/dist/advanced/proof-run-engine.cjs +42 -2
- package/dist/advanced/proof-run-engine.js +2 -2
- package/dist/advanced/runner.js +2 -2
- package/dist/{chunk-WJZYRUNV.js → chunk-5NEO7WDZ.js} +32 -1
- package/dist/{chunk-7GZY5PLT.js → chunk-FU73I4V3.js} +11 -2
- package/dist/{chunk-E7UTJ7KB.js → chunk-K3TPF55N.js} +1 -1
- package/dist/{chunk-NGX4SUQN.js → chunk-P2RN2NYR.js} +1 -1
- package/dist/{chunk-ZOZLORGR.js → chunk-RWF763A4.js} +1 -1
- package/dist/cli/index.js +3 -3
- package/dist/cli.cjs +42 -2
- package/dist/cli.js +3 -3
- package/dist/engine-harness.cjs +42 -2
- package/dist/engine-harness.js +2 -2
- package/dist/index.cjs +42 -2
- package/dist/index.js +3 -3
- package/dist/proof-run-core.cjs +11 -2
- package/dist/proof-run-core.js +1 -1
- package/dist/proof-run-engine.cjs +42 -2
- package/dist/proof-run-engine.js +2 -2
- package/dist/runner.js +2 -2
- package/examples/regression-packs/oc-flow-regression.json +1 -0
- package/package.json +1 -1
- package/runtime/lib/ship.py +1 -1
- package/runtime/lib/verify.py +25 -2
- package/runtime/tests/recon_verify_smoke.py +97 -0
- package/runtime/tests/trust_boundary_regression.py +6 -0
|
@@ -512,6 +512,16 @@ function requiredBaselineLabelsForState(state = {}) {
|
|
|
512
512
|
if (reference === "prod" || reference === "both") labels.push("prod");
|
|
513
513
|
return labels;
|
|
514
514
|
}
|
|
515
|
+
function stateHasAfterEvidence(state = {}) {
|
|
516
|
+
if (String(state?.after_cdn || "").trim()) return true;
|
|
517
|
+
const bundle = objectValue(state?.evidence_bundle);
|
|
518
|
+
const after = objectValue(bundle.after);
|
|
519
|
+
const observation = objectValue(after.observation);
|
|
520
|
+
const supporting = objectValue(after.supporting_artifacts);
|
|
521
|
+
return Boolean(
|
|
522
|
+
observation.valid === true && (supporting.has_structured_payload === true || supporting.proof_evidence_present === true || observation.telemetry_ready === true || Object.keys(objectValue(bundle.proof_evidence)).length > 0 || Object.keys(objectValue(after.proof_evidence)).length > 0)
|
|
523
|
+
);
|
|
524
|
+
}
|
|
515
525
|
function validateShipGate(state = {}) {
|
|
516
526
|
const reference = normalizedReference(state);
|
|
517
527
|
const prodUrl = String(state?.prod_url || "").trim();
|
|
@@ -541,7 +551,7 @@ function validateShipGate(state = {}) {
|
|
|
541
551
|
reasons.push("prod_cdn is required before ship");
|
|
542
552
|
}
|
|
543
553
|
}
|
|
544
|
-
if (!
|
|
554
|
+
if (!stateHasAfterEvidence(state)) {
|
|
545
555
|
reasons.push("after_cdn is required before ship");
|
|
546
556
|
}
|
|
547
557
|
if (verifyStatus !== "evidence_captured") {
|
|
@@ -945,7 +955,6 @@ var init_proof_run_core = __esm({
|
|
|
945
955
|
VISUAL_FIRST_MODES = /* @__PURE__ */ new Set([
|
|
946
956
|
"visual",
|
|
947
957
|
"render",
|
|
948
|
-
"interaction",
|
|
949
958
|
"ui",
|
|
950
959
|
"layout",
|
|
951
960
|
"screenshot",
|
|
@@ -2614,6 +2623,7 @@ ${implementRes.stderr || ""}`;
|
|
|
2614
2623
|
verifyContinueWithStage,
|
|
2615
2624
|
convergenceSignals
|
|
2616
2625
|
};
|
|
2626
|
+
const structuredInteractionFailureSummary = stringValue(verifyDecisionRequest?.structured_interaction_failure_summary);
|
|
2617
2627
|
if (verifyStatus !== "evidence_captured") {
|
|
2618
2628
|
const captureQuality = verifyDecisionRequest?.capture_quality || {};
|
|
2619
2629
|
const captureTerminalBlocker = captureQuality?.terminal_blocker === true || captureQuality?.blocking === true;
|
|
@@ -2654,6 +2664,36 @@ ${implementRes.stderr || ""}`;
|
|
|
2654
2664
|
}
|
|
2655
2665
|
);
|
|
2656
2666
|
}
|
|
2667
|
+
if (structuredInteractionFailureSummary) {
|
|
2668
|
+
const summary = structuredInteractionFailureSummary;
|
|
2669
|
+
recordAttempt("verify", "checkpoint", summary, {
|
|
2670
|
+
autoApproved: verifyRes.autoApproved || false,
|
|
2671
|
+
checkpoint: "verify_capture_blocked",
|
|
2672
|
+
details: verifyDetails
|
|
2673
|
+
});
|
|
2674
|
+
return checkpoint(
|
|
2675
|
+
"verify",
|
|
2676
|
+
"verify_capture_blocked",
|
|
2677
|
+
summary,
|
|
2678
|
+
{
|
|
2679
|
+
ok: true,
|
|
2680
|
+
nextActions: ["inspect_after_capture", "report_specific_browser_evidence_blocker", "start_a_new_run_after_the_product_or_script_is_fixed"],
|
|
2681
|
+
advanceOptions: verifyLoopAdvanceOptions,
|
|
2682
|
+
recommendedAdvanceStage: null,
|
|
2683
|
+
continueWithStage: null,
|
|
2684
|
+
blocking: true,
|
|
2685
|
+
details: verifyDetails,
|
|
2686
|
+
verifyStatus,
|
|
2687
|
+
verifySummary,
|
|
2688
|
+
afterCdn: state?.after_cdn || null,
|
|
2689
|
+
mergeRecommendation: state?.merge_recommendation || null,
|
|
2690
|
+
verifyDecisionRequest,
|
|
2691
|
+
proofAssessment: proofAssessment.raw,
|
|
2692
|
+
proofAssessmentRequest: state?.proof_assessment_request || null,
|
|
2693
|
+
executed
|
|
2694
|
+
}
|
|
2695
|
+
);
|
|
2696
|
+
}
|
|
2657
2697
|
if (!hasSupervisorProofAssessment(state)) {
|
|
2658
2698
|
const summary = "Verify captured usable evidence. The supervising agent should now assess whether the proof supports ship or more internal iteration, then resume the workflow with proof_assessment_json.";
|
|
2659
2699
|
recordAttempt("verify", "checkpoint", summary, {
|
|
@@ -2,10 +2,10 @@ import {
|
|
|
2
2
|
createDisabledRiddleProofAgentAdapter,
|
|
3
3
|
readRiddleProofRunStatus,
|
|
4
4
|
runRiddleProofEngineHarness
|
|
5
|
-
} from "../chunk-
|
|
5
|
+
} from "../chunk-RWF763A4.js";
|
|
6
6
|
import "../chunk-YZUVEJ5B.js";
|
|
7
7
|
import "../chunk-FMOYUYH2.js";
|
|
8
|
-
import "../chunk-
|
|
8
|
+
import "../chunk-FU73I4V3.js";
|
|
9
9
|
import "../chunk-4FOHZ7JG.js";
|
|
10
10
|
import "../chunk-VY4Y5U57.js";
|
|
11
11
|
import "../chunk-MLKGABMK.js";
|
package/dist/advanced/index.cjs
CHANGED
|
@@ -543,6 +543,16 @@ function requiredBaselineLabelsForState(state = {}) {
|
|
|
543
543
|
if (reference === "prod" || reference === "both") labels.push("prod");
|
|
544
544
|
return labels;
|
|
545
545
|
}
|
|
546
|
+
function stateHasAfterEvidence(state = {}) {
|
|
547
|
+
if (String(state?.after_cdn || "").trim()) return true;
|
|
548
|
+
const bundle = objectValue(state?.evidence_bundle);
|
|
549
|
+
const after = objectValue(bundle.after);
|
|
550
|
+
const observation = objectValue(after.observation);
|
|
551
|
+
const supporting = objectValue(after.supporting_artifacts);
|
|
552
|
+
return Boolean(
|
|
553
|
+
observation.valid === true && (supporting.has_structured_payload === true || supporting.proof_evidence_present === true || observation.telemetry_ready === true || Object.keys(objectValue(bundle.proof_evidence)).length > 0 || Object.keys(objectValue(after.proof_evidence)).length > 0)
|
|
554
|
+
);
|
|
555
|
+
}
|
|
546
556
|
function validateShipGate(state = {}) {
|
|
547
557
|
const reference = normalizedReference(state);
|
|
548
558
|
const prodUrl = String(state?.prod_url || "").trim();
|
|
@@ -572,7 +582,7 @@ function validateShipGate(state = {}) {
|
|
|
572
582
|
reasons.push("prod_cdn is required before ship");
|
|
573
583
|
}
|
|
574
584
|
}
|
|
575
|
-
if (!
|
|
585
|
+
if (!stateHasAfterEvidence(state)) {
|
|
576
586
|
reasons.push("after_cdn is required before ship");
|
|
577
587
|
}
|
|
578
588
|
if (verifyStatus !== "evidence_captured") {
|
|
@@ -976,7 +986,6 @@ var init_proof_run_core = __esm({
|
|
|
976
986
|
VISUAL_FIRST_MODES = /* @__PURE__ */ new Set([
|
|
977
987
|
"visual",
|
|
978
988
|
"render",
|
|
979
|
-
"interaction",
|
|
980
989
|
"ui",
|
|
981
990
|
"layout",
|
|
982
991
|
"screenshot",
|
|
@@ -2645,6 +2654,7 @@ ${implementRes.stderr || ""}`;
|
|
|
2645
2654
|
verifyContinueWithStage,
|
|
2646
2655
|
convergenceSignals
|
|
2647
2656
|
};
|
|
2657
|
+
const structuredInteractionFailureSummary = stringValue(verifyDecisionRequest?.structured_interaction_failure_summary);
|
|
2648
2658
|
if (verifyStatus !== "evidence_captured") {
|
|
2649
2659
|
const captureQuality = verifyDecisionRequest?.capture_quality || {};
|
|
2650
2660
|
const captureTerminalBlocker = captureQuality?.terminal_blocker === true || captureQuality?.blocking === true;
|
|
@@ -2685,6 +2695,36 @@ ${implementRes.stderr || ""}`;
|
|
|
2685
2695
|
}
|
|
2686
2696
|
);
|
|
2687
2697
|
}
|
|
2698
|
+
if (structuredInteractionFailureSummary) {
|
|
2699
|
+
const summary = structuredInteractionFailureSummary;
|
|
2700
|
+
recordAttempt("verify", "checkpoint", summary, {
|
|
2701
|
+
autoApproved: verifyRes.autoApproved || false,
|
|
2702
|
+
checkpoint: "verify_capture_blocked",
|
|
2703
|
+
details: verifyDetails
|
|
2704
|
+
});
|
|
2705
|
+
return checkpoint(
|
|
2706
|
+
"verify",
|
|
2707
|
+
"verify_capture_blocked",
|
|
2708
|
+
summary,
|
|
2709
|
+
{
|
|
2710
|
+
ok: true,
|
|
2711
|
+
nextActions: ["inspect_after_capture", "report_specific_browser_evidence_blocker", "start_a_new_run_after_the_product_or_script_is_fixed"],
|
|
2712
|
+
advanceOptions: verifyLoopAdvanceOptions,
|
|
2713
|
+
recommendedAdvanceStage: null,
|
|
2714
|
+
continueWithStage: null,
|
|
2715
|
+
blocking: true,
|
|
2716
|
+
details: verifyDetails,
|
|
2717
|
+
verifyStatus,
|
|
2718
|
+
verifySummary,
|
|
2719
|
+
afterCdn: state?.after_cdn || null,
|
|
2720
|
+
mergeRecommendation: state?.merge_recommendation || null,
|
|
2721
|
+
verifyDecisionRequest,
|
|
2722
|
+
proofAssessment: proofAssessment.raw,
|
|
2723
|
+
proofAssessmentRequest: state?.proof_assessment_request || null,
|
|
2724
|
+
executed
|
|
2725
|
+
}
|
|
2726
|
+
);
|
|
2727
|
+
}
|
|
2688
2728
|
if (!hasSupervisorProofAssessment(state)) {
|
|
2689
2729
|
const summary = "Verify captured usable evidence. The supervising agent should now assess whether the proof supports ship or more internal iteration, then resume the workflow with proof_assessment_json.";
|
|
2690
2730
|
recordAttempt("verify", "checkpoint", summary, {
|
package/dist/advanced/index.js
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
import {
|
|
2
2
|
proof_run_engine_exports
|
|
3
|
-
} from "../chunk-
|
|
3
|
+
} from "../chunk-5NEO7WDZ.js";
|
|
4
4
|
import {
|
|
5
5
|
runner_exports
|
|
6
|
-
} from "../chunk-
|
|
6
|
+
} from "../chunk-P2RN2NYR.js";
|
|
7
7
|
import {
|
|
8
8
|
engine_harness_exports
|
|
9
|
-
} from "../chunk-
|
|
9
|
+
} from "../chunk-RWF763A4.js";
|
|
10
10
|
import "../chunk-YZUVEJ5B.js";
|
|
11
11
|
import "../chunk-FMOYUYH2.js";
|
|
12
12
|
import {
|
|
13
13
|
proof_run_core_exports
|
|
14
|
-
} from "../chunk-
|
|
14
|
+
} from "../chunk-FU73I4V3.js";
|
|
15
15
|
import "../chunk-4FOHZ7JG.js";
|
|
16
16
|
import "../chunk-VY4Y5U57.js";
|
|
17
17
|
import "../chunk-MLKGABMK.js";
|
|
@@ -474,7 +474,6 @@ function normalizedProofAssessment(state = {}) {
|
|
|
474
474
|
var VISUAL_FIRST_MODES = /* @__PURE__ */ new Set([
|
|
475
475
|
"visual",
|
|
476
476
|
"render",
|
|
477
|
-
"interaction",
|
|
478
477
|
"ui",
|
|
479
478
|
"layout",
|
|
480
479
|
"screenshot",
|
|
@@ -569,6 +568,16 @@ function requiredBaselineLabelsForState(state = {}) {
|
|
|
569
568
|
if (reference === "prod" || reference === "both") labels.push("prod");
|
|
570
569
|
return labels;
|
|
571
570
|
}
|
|
571
|
+
function stateHasAfterEvidence(state = {}) {
|
|
572
|
+
if (String(state?.after_cdn || "").trim()) return true;
|
|
573
|
+
const bundle = objectValue(state?.evidence_bundle);
|
|
574
|
+
const after = objectValue(bundle.after);
|
|
575
|
+
const observation = objectValue(after.observation);
|
|
576
|
+
const supporting = objectValue(after.supporting_artifacts);
|
|
577
|
+
return Boolean(
|
|
578
|
+
observation.valid === true && (supporting.has_structured_payload === true || supporting.proof_evidence_present === true || observation.telemetry_ready === true || Object.keys(objectValue(bundle.proof_evidence)).length > 0 || Object.keys(objectValue(after.proof_evidence)).length > 0)
|
|
579
|
+
);
|
|
580
|
+
}
|
|
572
581
|
function validateShipGate(state = {}) {
|
|
573
582
|
const reference = normalizedReference(state);
|
|
574
583
|
const prodUrl = String(state?.prod_url || "").trim();
|
|
@@ -598,7 +607,7 @@ function validateShipGate(state = {}) {
|
|
|
598
607
|
reasons.push("prod_cdn is required before ship");
|
|
599
608
|
}
|
|
600
609
|
}
|
|
601
|
-
if (!
|
|
610
|
+
if (!stateHasAfterEvidence(state)) {
|
|
602
611
|
reasons.push("after_cdn is required before ship");
|
|
603
612
|
}
|
|
604
613
|
if (verifyStatus !== "evidence_captured") {
|
|
@@ -453,7 +453,6 @@ function normalizedProofAssessment(state = {}) {
|
|
|
453
453
|
var VISUAL_FIRST_MODES = /* @__PURE__ */ new Set([
|
|
454
454
|
"visual",
|
|
455
455
|
"render",
|
|
456
|
-
"interaction",
|
|
457
456
|
"ui",
|
|
458
457
|
"layout",
|
|
459
458
|
"screenshot",
|
|
@@ -548,6 +547,16 @@ function requiredBaselineLabelsForState(state = {}) {
|
|
|
548
547
|
if (reference === "prod" || reference === "both") labels.push("prod");
|
|
549
548
|
return labels;
|
|
550
549
|
}
|
|
550
|
+
function stateHasAfterEvidence(state = {}) {
|
|
551
|
+
if (String(state?.after_cdn || "").trim()) return true;
|
|
552
|
+
const bundle = objectValue(state?.evidence_bundle);
|
|
553
|
+
const after = objectValue(bundle.after);
|
|
554
|
+
const observation = objectValue(after.observation);
|
|
555
|
+
const supporting = objectValue(after.supporting_artifacts);
|
|
556
|
+
return Boolean(
|
|
557
|
+
observation.valid === true && (supporting.has_structured_payload === true || supporting.proof_evidence_present === true || observation.telemetry_ready === true || Object.keys(objectValue(bundle.proof_evidence)).length > 0 || Object.keys(objectValue(after.proof_evidence)).length > 0)
|
|
558
|
+
);
|
|
559
|
+
}
|
|
551
560
|
function validateShipGate(state = {}) {
|
|
552
561
|
const reference = normalizedReference(state);
|
|
553
562
|
const prodUrl = String(state?.prod_url || "").trim();
|
|
@@ -577,7 +586,7 @@ function validateShipGate(state = {}) {
|
|
|
577
586
|
reasons.push("prod_cdn is required before ship");
|
|
578
587
|
}
|
|
579
588
|
}
|
|
580
|
-
if (!
|
|
589
|
+
if (!stateHasAfterEvidence(state)) {
|
|
581
590
|
reasons.push("after_cdn is required before ship");
|
|
582
591
|
}
|
|
583
592
|
if (verifyStatus !== "evidence_captured") {
|
|
@@ -2614,6 +2623,7 @@ ${implementRes.stderr || ""}`;
|
|
|
2614
2623
|
verifyContinueWithStage,
|
|
2615
2624
|
convergenceSignals
|
|
2616
2625
|
};
|
|
2626
|
+
const structuredInteractionFailureSummary = stringValue(verifyDecisionRequest?.structured_interaction_failure_summary);
|
|
2617
2627
|
if (verifyStatus !== "evidence_captured") {
|
|
2618
2628
|
const captureQuality = verifyDecisionRequest?.capture_quality || {};
|
|
2619
2629
|
const captureTerminalBlocker = captureQuality?.terminal_blocker === true || captureQuality?.blocking === true;
|
|
@@ -2654,6 +2664,36 @@ ${implementRes.stderr || ""}`;
|
|
|
2654
2664
|
}
|
|
2655
2665
|
);
|
|
2656
2666
|
}
|
|
2667
|
+
if (structuredInteractionFailureSummary) {
|
|
2668
|
+
const summary = structuredInteractionFailureSummary;
|
|
2669
|
+
recordAttempt("verify", "checkpoint", summary, {
|
|
2670
|
+
autoApproved: verifyRes.autoApproved || false,
|
|
2671
|
+
checkpoint: "verify_capture_blocked",
|
|
2672
|
+
details: verifyDetails
|
|
2673
|
+
});
|
|
2674
|
+
return checkpoint(
|
|
2675
|
+
"verify",
|
|
2676
|
+
"verify_capture_blocked",
|
|
2677
|
+
summary,
|
|
2678
|
+
{
|
|
2679
|
+
ok: true,
|
|
2680
|
+
nextActions: ["inspect_after_capture", "report_specific_browser_evidence_blocker", "start_a_new_run_after_the_product_or_script_is_fixed"],
|
|
2681
|
+
advanceOptions: verifyLoopAdvanceOptions,
|
|
2682
|
+
recommendedAdvanceStage: null,
|
|
2683
|
+
continueWithStage: null,
|
|
2684
|
+
blocking: true,
|
|
2685
|
+
details: verifyDetails,
|
|
2686
|
+
verifyStatus,
|
|
2687
|
+
verifySummary,
|
|
2688
|
+
afterCdn: state?.after_cdn || null,
|
|
2689
|
+
mergeRecommendation: state?.merge_recommendation || null,
|
|
2690
|
+
verifyDecisionRequest,
|
|
2691
|
+
proofAssessment: proofAssessment.raw,
|
|
2692
|
+
proofAssessmentRequest: state?.proof_assessment_request || null,
|
|
2693
|
+
executed
|
|
2694
|
+
}
|
|
2695
|
+
);
|
|
2696
|
+
}
|
|
2657
2697
|
if (!hasSupervisorProofAssessment(state)) {
|
|
2658
2698
|
const summary = "Verify captured usable evidence. The supervising agent should now assess whether the proof supports ship or more internal iteration, then resume the workflow with proof_assessment_json.";
|
|
2659
2699
|
recordAttempt("verify", "checkpoint", summary, {
|
package/dist/advanced/runner.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import {
|
|
2
2
|
runRiddleProof
|
|
3
|
-
} from "../chunk-
|
|
3
|
+
} from "../chunk-P2RN2NYR.js";
|
|
4
4
|
import "../chunk-YZUVEJ5B.js";
|
|
5
5
|
import "../chunk-FMOYUYH2.js";
|
|
6
|
-
import "../chunk-
|
|
6
|
+
import "../chunk-FU73I4V3.js";
|
|
7
7
|
import "../chunk-4FOHZ7JG.js";
|
|
8
8
|
import "../chunk-VY4Y5U57.js";
|
|
9
9
|
import "../chunk-MLKGABMK.js";
|
|
@@ -15,7 +15,7 @@ import {
|
|
|
15
15
|
validateShipGate,
|
|
16
16
|
workflowFile,
|
|
17
17
|
writeState
|
|
18
|
-
} from "./chunk-
|
|
18
|
+
} from "./chunk-FU73I4V3.js";
|
|
19
19
|
import {
|
|
20
20
|
__export
|
|
21
21
|
} from "./chunk-MLKGABMK.js";
|
|
@@ -1511,6 +1511,7 @@ ${implementRes.stderr || ""}`;
|
|
|
1511
1511
|
verifyContinueWithStage,
|
|
1512
1512
|
convergenceSignals
|
|
1513
1513
|
};
|
|
1514
|
+
const structuredInteractionFailureSummary = stringValue(verifyDecisionRequest?.structured_interaction_failure_summary);
|
|
1514
1515
|
if (verifyStatus !== "evidence_captured") {
|
|
1515
1516
|
const captureQuality = verifyDecisionRequest?.capture_quality || {};
|
|
1516
1517
|
const captureTerminalBlocker = captureQuality?.terminal_blocker === true || captureQuality?.blocking === true;
|
|
@@ -1551,6 +1552,36 @@ ${implementRes.stderr || ""}`;
|
|
|
1551
1552
|
}
|
|
1552
1553
|
);
|
|
1553
1554
|
}
|
|
1555
|
+
if (structuredInteractionFailureSummary) {
|
|
1556
|
+
const summary = structuredInteractionFailureSummary;
|
|
1557
|
+
recordAttempt("verify", "checkpoint", summary, {
|
|
1558
|
+
autoApproved: verifyRes.autoApproved || false,
|
|
1559
|
+
checkpoint: "verify_capture_blocked",
|
|
1560
|
+
details: verifyDetails
|
|
1561
|
+
});
|
|
1562
|
+
return checkpoint(
|
|
1563
|
+
"verify",
|
|
1564
|
+
"verify_capture_blocked",
|
|
1565
|
+
summary,
|
|
1566
|
+
{
|
|
1567
|
+
ok: true,
|
|
1568
|
+
nextActions: ["inspect_after_capture", "report_specific_browser_evidence_blocker", "start_a_new_run_after_the_product_or_script_is_fixed"],
|
|
1569
|
+
advanceOptions: verifyLoopAdvanceOptions,
|
|
1570
|
+
recommendedAdvanceStage: null,
|
|
1571
|
+
continueWithStage: null,
|
|
1572
|
+
blocking: true,
|
|
1573
|
+
details: verifyDetails,
|
|
1574
|
+
verifyStatus,
|
|
1575
|
+
verifySummary,
|
|
1576
|
+
afterCdn: state?.after_cdn || null,
|
|
1577
|
+
mergeRecommendation: state?.merge_recommendation || null,
|
|
1578
|
+
verifyDecisionRequest,
|
|
1579
|
+
proofAssessment: proofAssessment.raw,
|
|
1580
|
+
proofAssessmentRequest: state?.proof_assessment_request || null,
|
|
1581
|
+
executed
|
|
1582
|
+
}
|
|
1583
|
+
);
|
|
1584
|
+
}
|
|
1554
1585
|
if (!hasSupervisorProofAssessment(state)) {
|
|
1555
1586
|
const summary = "Verify captured usable evidence. The supervising agent should now assess whether the proof supports ship or more internal iteration, then resume the workflow with proof_assessment_json.";
|
|
1556
1587
|
recordAttempt("verify", "checkpoint", summary, {
|
|
@@ -445,7 +445,6 @@ function normalizedProofAssessment(state = {}) {
|
|
|
445
445
|
var VISUAL_FIRST_MODES = /* @__PURE__ */ new Set([
|
|
446
446
|
"visual",
|
|
447
447
|
"render",
|
|
448
|
-
"interaction",
|
|
449
448
|
"ui",
|
|
450
449
|
"layout",
|
|
451
450
|
"screenshot",
|
|
@@ -540,6 +539,16 @@ function requiredBaselineLabelsForState(state = {}) {
|
|
|
540
539
|
if (reference === "prod" || reference === "both") labels.push("prod");
|
|
541
540
|
return labels;
|
|
542
541
|
}
|
|
542
|
+
function stateHasAfterEvidence(state = {}) {
|
|
543
|
+
if (String(state?.after_cdn || "").trim()) return true;
|
|
544
|
+
const bundle = objectValue(state?.evidence_bundle);
|
|
545
|
+
const after = objectValue(bundle.after);
|
|
546
|
+
const observation = objectValue(after.observation);
|
|
547
|
+
const supporting = objectValue(after.supporting_artifacts);
|
|
548
|
+
return Boolean(
|
|
549
|
+
observation.valid === true && (supporting.has_structured_payload === true || supporting.proof_evidence_present === true || observation.telemetry_ready === true || Object.keys(objectValue(bundle.proof_evidence)).length > 0 || Object.keys(objectValue(after.proof_evidence)).length > 0)
|
|
550
|
+
);
|
|
551
|
+
}
|
|
543
552
|
function validateShipGate(state = {}) {
|
|
544
553
|
const reference = normalizedReference(state);
|
|
545
554
|
const prodUrl = String(state?.prod_url || "").trim();
|
|
@@ -569,7 +578,7 @@ function validateShipGate(state = {}) {
|
|
|
569
578
|
reasons.push("prod_cdn is required before ship");
|
|
570
579
|
}
|
|
571
580
|
}
|
|
572
|
-
if (!
|
|
581
|
+
if (!stateHasAfterEvidence(state)) {
|
|
573
582
|
reasons.push("after_cdn is required before ship");
|
|
574
583
|
}
|
|
575
584
|
if (verifyStatus !== "evidence_captured") {
|
package/dist/cli/index.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import "../chunk-
|
|
1
|
+
import "../chunk-K3TPF55N.js";
|
|
2
2
|
import "../chunk-PEWAIEER.js";
|
|
3
3
|
import "../chunk-TWTEUS7R.js";
|
|
4
|
-
import "../chunk-
|
|
4
|
+
import "../chunk-RWF763A4.js";
|
|
5
5
|
import "../chunk-YZUVEJ5B.js";
|
|
6
6
|
import "../chunk-FMOYUYH2.js";
|
|
7
|
-
import "../chunk-
|
|
7
|
+
import "../chunk-FU73I4V3.js";
|
|
8
8
|
import "../chunk-4FOHZ7JG.js";
|
|
9
9
|
import "../chunk-JFQXAJH2.js";
|
|
10
10
|
import "../chunk-EEIYUZXE.js";
|
package/dist/cli.cjs
CHANGED
|
@@ -512,6 +512,16 @@ function requiredBaselineLabelsForState(state = {}) {
|
|
|
512
512
|
if (reference === "prod" || reference === "both") labels.push("prod");
|
|
513
513
|
return labels;
|
|
514
514
|
}
|
|
515
|
+
function stateHasAfterEvidence(state = {}) {
|
|
516
|
+
if (String(state?.after_cdn || "").trim()) return true;
|
|
517
|
+
const bundle = objectValue(state?.evidence_bundle);
|
|
518
|
+
const after = objectValue(bundle.after);
|
|
519
|
+
const observation = objectValue(after.observation);
|
|
520
|
+
const supporting = objectValue(after.supporting_artifacts);
|
|
521
|
+
return Boolean(
|
|
522
|
+
observation.valid === true && (supporting.has_structured_payload === true || supporting.proof_evidence_present === true || observation.telemetry_ready === true || Object.keys(objectValue(bundle.proof_evidence)).length > 0 || Object.keys(objectValue(after.proof_evidence)).length > 0)
|
|
523
|
+
);
|
|
524
|
+
}
|
|
515
525
|
function validateShipGate(state = {}) {
|
|
516
526
|
const reference = normalizedReference(state);
|
|
517
527
|
const prodUrl = String(state?.prod_url || "").trim();
|
|
@@ -541,7 +551,7 @@ function validateShipGate(state = {}) {
|
|
|
541
551
|
reasons.push("prod_cdn is required before ship");
|
|
542
552
|
}
|
|
543
553
|
}
|
|
544
|
-
if (!
|
|
554
|
+
if (!stateHasAfterEvidence(state)) {
|
|
545
555
|
reasons.push("after_cdn is required before ship");
|
|
546
556
|
}
|
|
547
557
|
if (verifyStatus !== "evidence_captured") {
|
|
@@ -945,7 +955,6 @@ var init_proof_run_core = __esm({
|
|
|
945
955
|
VISUAL_FIRST_MODES = /* @__PURE__ */ new Set([
|
|
946
956
|
"visual",
|
|
947
957
|
"render",
|
|
948
|
-
"interaction",
|
|
949
958
|
"ui",
|
|
950
959
|
"layout",
|
|
951
960
|
"screenshot",
|
|
@@ -2614,6 +2623,7 @@ ${implementRes.stderr || ""}`;
|
|
|
2614
2623
|
verifyContinueWithStage,
|
|
2615
2624
|
convergenceSignals
|
|
2616
2625
|
};
|
|
2626
|
+
const structuredInteractionFailureSummary = stringValue(verifyDecisionRequest?.structured_interaction_failure_summary);
|
|
2617
2627
|
if (verifyStatus !== "evidence_captured") {
|
|
2618
2628
|
const captureQuality = verifyDecisionRequest?.capture_quality || {};
|
|
2619
2629
|
const captureTerminalBlocker = captureQuality?.terminal_blocker === true || captureQuality?.blocking === true;
|
|
@@ -2654,6 +2664,36 @@ ${implementRes.stderr || ""}`;
|
|
|
2654
2664
|
}
|
|
2655
2665
|
);
|
|
2656
2666
|
}
|
|
2667
|
+
if (structuredInteractionFailureSummary) {
|
|
2668
|
+
const summary = structuredInteractionFailureSummary;
|
|
2669
|
+
recordAttempt("verify", "checkpoint", summary, {
|
|
2670
|
+
autoApproved: verifyRes.autoApproved || false,
|
|
2671
|
+
checkpoint: "verify_capture_blocked",
|
|
2672
|
+
details: verifyDetails
|
|
2673
|
+
});
|
|
2674
|
+
return checkpoint(
|
|
2675
|
+
"verify",
|
|
2676
|
+
"verify_capture_blocked",
|
|
2677
|
+
summary,
|
|
2678
|
+
{
|
|
2679
|
+
ok: true,
|
|
2680
|
+
nextActions: ["inspect_after_capture", "report_specific_browser_evidence_blocker", "start_a_new_run_after_the_product_or_script_is_fixed"],
|
|
2681
|
+
advanceOptions: verifyLoopAdvanceOptions,
|
|
2682
|
+
recommendedAdvanceStage: null,
|
|
2683
|
+
continueWithStage: null,
|
|
2684
|
+
blocking: true,
|
|
2685
|
+
details: verifyDetails,
|
|
2686
|
+
verifyStatus,
|
|
2687
|
+
verifySummary,
|
|
2688
|
+
afterCdn: state?.after_cdn || null,
|
|
2689
|
+
mergeRecommendation: state?.merge_recommendation || null,
|
|
2690
|
+
verifyDecisionRequest,
|
|
2691
|
+
proofAssessment: proofAssessment.raw,
|
|
2692
|
+
proofAssessmentRequest: state?.proof_assessment_request || null,
|
|
2693
|
+
executed
|
|
2694
|
+
}
|
|
2695
|
+
);
|
|
2696
|
+
}
|
|
2657
2697
|
if (!hasSupervisorProofAssessment(state)) {
|
|
2658
2698
|
const summary = "Verify captured usable evidence. The supervising agent should now assess whether the proof supports ship or more internal iteration, then resume the workflow with proof_assessment_json.";
|
|
2659
2699
|
recordAttempt("verify", "checkpoint", summary, {
|
package/dist/cli.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import "./chunk-
|
|
2
|
+
import "./chunk-K3TPF55N.js";
|
|
3
3
|
import "./chunk-PEWAIEER.js";
|
|
4
4
|
import "./chunk-TWTEUS7R.js";
|
|
5
|
-
import "./chunk-
|
|
5
|
+
import "./chunk-RWF763A4.js";
|
|
6
6
|
import "./chunk-YZUVEJ5B.js";
|
|
7
7
|
import "./chunk-FMOYUYH2.js";
|
|
8
|
-
import "./chunk-
|
|
8
|
+
import "./chunk-FU73I4V3.js";
|
|
9
9
|
import "./chunk-4FOHZ7JG.js";
|
|
10
10
|
import "./chunk-JFQXAJH2.js";
|
|
11
11
|
import "./chunk-EEIYUZXE.js";
|
package/dist/engine-harness.cjs
CHANGED
|
@@ -512,6 +512,16 @@ function requiredBaselineLabelsForState(state = {}) {
|
|
|
512
512
|
if (reference === "prod" || reference === "both") labels.push("prod");
|
|
513
513
|
return labels;
|
|
514
514
|
}
|
|
515
|
+
function stateHasAfterEvidence(state = {}) {
|
|
516
|
+
if (String(state?.after_cdn || "").trim()) return true;
|
|
517
|
+
const bundle = objectValue(state?.evidence_bundle);
|
|
518
|
+
const after = objectValue(bundle.after);
|
|
519
|
+
const observation = objectValue(after.observation);
|
|
520
|
+
const supporting = objectValue(after.supporting_artifacts);
|
|
521
|
+
return Boolean(
|
|
522
|
+
observation.valid === true && (supporting.has_structured_payload === true || supporting.proof_evidence_present === true || observation.telemetry_ready === true || Object.keys(objectValue(bundle.proof_evidence)).length > 0 || Object.keys(objectValue(after.proof_evidence)).length > 0)
|
|
523
|
+
);
|
|
524
|
+
}
|
|
515
525
|
function validateShipGate(state = {}) {
|
|
516
526
|
const reference = normalizedReference(state);
|
|
517
527
|
const prodUrl = String(state?.prod_url || "").trim();
|
|
@@ -541,7 +551,7 @@ function validateShipGate(state = {}) {
|
|
|
541
551
|
reasons.push("prod_cdn is required before ship");
|
|
542
552
|
}
|
|
543
553
|
}
|
|
544
|
-
if (!
|
|
554
|
+
if (!stateHasAfterEvidence(state)) {
|
|
545
555
|
reasons.push("after_cdn is required before ship");
|
|
546
556
|
}
|
|
547
557
|
if (verifyStatus !== "evidence_captured") {
|
|
@@ -945,7 +955,6 @@ var init_proof_run_core = __esm({
|
|
|
945
955
|
VISUAL_FIRST_MODES = /* @__PURE__ */ new Set([
|
|
946
956
|
"visual",
|
|
947
957
|
"render",
|
|
948
|
-
"interaction",
|
|
949
958
|
"ui",
|
|
950
959
|
"layout",
|
|
951
960
|
"screenshot",
|
|
@@ -2614,6 +2623,7 @@ ${implementRes.stderr || ""}`;
|
|
|
2614
2623
|
verifyContinueWithStage,
|
|
2615
2624
|
convergenceSignals
|
|
2616
2625
|
};
|
|
2626
|
+
const structuredInteractionFailureSummary = stringValue(verifyDecisionRequest?.structured_interaction_failure_summary);
|
|
2617
2627
|
if (verifyStatus !== "evidence_captured") {
|
|
2618
2628
|
const captureQuality = verifyDecisionRequest?.capture_quality || {};
|
|
2619
2629
|
const captureTerminalBlocker = captureQuality?.terminal_blocker === true || captureQuality?.blocking === true;
|
|
@@ -2654,6 +2664,36 @@ ${implementRes.stderr || ""}`;
|
|
|
2654
2664
|
}
|
|
2655
2665
|
);
|
|
2656
2666
|
}
|
|
2667
|
+
if (structuredInteractionFailureSummary) {
|
|
2668
|
+
const summary = structuredInteractionFailureSummary;
|
|
2669
|
+
recordAttempt("verify", "checkpoint", summary, {
|
|
2670
|
+
autoApproved: verifyRes.autoApproved || false,
|
|
2671
|
+
checkpoint: "verify_capture_blocked",
|
|
2672
|
+
details: verifyDetails
|
|
2673
|
+
});
|
|
2674
|
+
return checkpoint(
|
|
2675
|
+
"verify",
|
|
2676
|
+
"verify_capture_blocked",
|
|
2677
|
+
summary,
|
|
2678
|
+
{
|
|
2679
|
+
ok: true,
|
|
2680
|
+
nextActions: ["inspect_after_capture", "report_specific_browser_evidence_blocker", "start_a_new_run_after_the_product_or_script_is_fixed"],
|
|
2681
|
+
advanceOptions: verifyLoopAdvanceOptions,
|
|
2682
|
+
recommendedAdvanceStage: null,
|
|
2683
|
+
continueWithStage: null,
|
|
2684
|
+
blocking: true,
|
|
2685
|
+
details: verifyDetails,
|
|
2686
|
+
verifyStatus,
|
|
2687
|
+
verifySummary,
|
|
2688
|
+
afterCdn: state?.after_cdn || null,
|
|
2689
|
+
mergeRecommendation: state?.merge_recommendation || null,
|
|
2690
|
+
verifyDecisionRequest,
|
|
2691
|
+
proofAssessment: proofAssessment.raw,
|
|
2692
|
+
proofAssessmentRequest: state?.proof_assessment_request || null,
|
|
2693
|
+
executed
|
|
2694
|
+
}
|
|
2695
|
+
);
|
|
2696
|
+
}
|
|
2657
2697
|
if (!hasSupervisorProofAssessment(state)) {
|
|
2658
2698
|
const summary = "Verify captured usable evidence. The supervising agent should now assess whether the proof supports ship or more internal iteration, then resume the workflow with proof_assessment_json.";
|
|
2659
2699
|
recordAttempt("verify", "checkpoint", summary, {
|
package/dist/engine-harness.js
CHANGED
|
@@ -2,10 +2,10 @@ import {
|
|
|
2
2
|
createDisabledRiddleProofAgentAdapter,
|
|
3
3
|
readRiddleProofRunStatus,
|
|
4
4
|
runRiddleProofEngineHarness
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-RWF763A4.js";
|
|
6
6
|
import "./chunk-YZUVEJ5B.js";
|
|
7
7
|
import "./chunk-FMOYUYH2.js";
|
|
8
|
-
import "./chunk-
|
|
8
|
+
import "./chunk-FU73I4V3.js";
|
|
9
9
|
import "./chunk-4FOHZ7JG.js";
|
|
10
10
|
import "./chunk-VY4Y5U57.js";
|
|
11
11
|
import "./chunk-MLKGABMK.js";
|
package/dist/index.cjs
CHANGED
|
@@ -512,6 +512,16 @@ function requiredBaselineLabelsForState(state = {}) {
|
|
|
512
512
|
if (reference === "prod" || reference === "both") labels.push("prod");
|
|
513
513
|
return labels;
|
|
514
514
|
}
|
|
515
|
+
function stateHasAfterEvidence(state = {}) {
|
|
516
|
+
if (String(state?.after_cdn || "").trim()) return true;
|
|
517
|
+
const bundle = objectValue(state?.evidence_bundle);
|
|
518
|
+
const after = objectValue(bundle.after);
|
|
519
|
+
const observation = objectValue(after.observation);
|
|
520
|
+
const supporting = objectValue(after.supporting_artifacts);
|
|
521
|
+
return Boolean(
|
|
522
|
+
observation.valid === true && (supporting.has_structured_payload === true || supporting.proof_evidence_present === true || observation.telemetry_ready === true || Object.keys(objectValue(bundle.proof_evidence)).length > 0 || Object.keys(objectValue(after.proof_evidence)).length > 0)
|
|
523
|
+
);
|
|
524
|
+
}
|
|
515
525
|
function validateShipGate(state = {}) {
|
|
516
526
|
const reference = normalizedReference(state);
|
|
517
527
|
const prodUrl = String(state?.prod_url || "").trim();
|
|
@@ -541,7 +551,7 @@ function validateShipGate(state = {}) {
|
|
|
541
551
|
reasons.push("prod_cdn is required before ship");
|
|
542
552
|
}
|
|
543
553
|
}
|
|
544
|
-
if (!
|
|
554
|
+
if (!stateHasAfterEvidence(state)) {
|
|
545
555
|
reasons.push("after_cdn is required before ship");
|
|
546
556
|
}
|
|
547
557
|
if (verifyStatus !== "evidence_captured") {
|
|
@@ -945,7 +955,6 @@ var init_proof_run_core = __esm({
|
|
|
945
955
|
VISUAL_FIRST_MODES = /* @__PURE__ */ new Set([
|
|
946
956
|
"visual",
|
|
947
957
|
"render",
|
|
948
|
-
"interaction",
|
|
949
958
|
"ui",
|
|
950
959
|
"layout",
|
|
951
960
|
"screenshot",
|
|
@@ -2614,6 +2623,7 @@ ${implementRes.stderr || ""}`;
|
|
|
2614
2623
|
verifyContinueWithStage,
|
|
2615
2624
|
convergenceSignals
|
|
2616
2625
|
};
|
|
2626
|
+
const structuredInteractionFailureSummary = stringValue(verifyDecisionRequest?.structured_interaction_failure_summary);
|
|
2617
2627
|
if (verifyStatus !== "evidence_captured") {
|
|
2618
2628
|
const captureQuality = verifyDecisionRequest?.capture_quality || {};
|
|
2619
2629
|
const captureTerminalBlocker = captureQuality?.terminal_blocker === true || captureQuality?.blocking === true;
|
|
@@ -2654,6 +2664,36 @@ ${implementRes.stderr || ""}`;
|
|
|
2654
2664
|
}
|
|
2655
2665
|
);
|
|
2656
2666
|
}
|
|
2667
|
+
if (structuredInteractionFailureSummary) {
|
|
2668
|
+
const summary = structuredInteractionFailureSummary;
|
|
2669
|
+
recordAttempt("verify", "checkpoint", summary, {
|
|
2670
|
+
autoApproved: verifyRes.autoApproved || false,
|
|
2671
|
+
checkpoint: "verify_capture_blocked",
|
|
2672
|
+
details: verifyDetails
|
|
2673
|
+
});
|
|
2674
|
+
return checkpoint(
|
|
2675
|
+
"verify",
|
|
2676
|
+
"verify_capture_blocked",
|
|
2677
|
+
summary,
|
|
2678
|
+
{
|
|
2679
|
+
ok: true,
|
|
2680
|
+
nextActions: ["inspect_after_capture", "report_specific_browser_evidence_blocker", "start_a_new_run_after_the_product_or_script_is_fixed"],
|
|
2681
|
+
advanceOptions: verifyLoopAdvanceOptions,
|
|
2682
|
+
recommendedAdvanceStage: null,
|
|
2683
|
+
continueWithStage: null,
|
|
2684
|
+
blocking: true,
|
|
2685
|
+
details: verifyDetails,
|
|
2686
|
+
verifyStatus,
|
|
2687
|
+
verifySummary,
|
|
2688
|
+
afterCdn: state?.after_cdn || null,
|
|
2689
|
+
mergeRecommendation: state?.merge_recommendation || null,
|
|
2690
|
+
verifyDecisionRequest,
|
|
2691
|
+
proofAssessment: proofAssessment.raw,
|
|
2692
|
+
proofAssessmentRequest: state?.proof_assessment_request || null,
|
|
2693
|
+
executed
|
|
2694
|
+
}
|
|
2695
|
+
);
|
|
2696
|
+
}
|
|
2657
2697
|
if (!hasSupervisorProofAssessment(state)) {
|
|
2658
2698
|
const summary = "Verify captured usable evidence. The supervising agent should now assess whether the proof supports ship or more internal iteration, then resume the workflow with proof_assessment_json.";
|
|
2659
2699
|
recordAttempt("verify", "checkpoint", summary, {
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
runRiddleProof
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-P2RN2NYR.js";
|
|
4
4
|
import "./chunk-6F4PWJZI.js";
|
|
5
5
|
import {
|
|
6
6
|
RIDDLE_PROOF_PLAYABILITY_ASSESSMENT_VERSION,
|
|
@@ -95,7 +95,7 @@ import {
|
|
|
95
95
|
createDisabledRiddleProofAgentAdapter,
|
|
96
96
|
readRiddleProofRunStatus,
|
|
97
97
|
runRiddleProofEngineHarness
|
|
98
|
-
} from "./chunk-
|
|
98
|
+
} from "./chunk-RWF763A4.js";
|
|
99
99
|
import {
|
|
100
100
|
RIDDLE_PROOF_RUN_STATE_VERSION,
|
|
101
101
|
appendRunEvent,
|
|
@@ -112,7 +112,7 @@ import {
|
|
|
112
112
|
RIDDLE_PROOF_RUN_CARD_VERSION,
|
|
113
113
|
createRiddleProofRunCard
|
|
114
114
|
} from "./chunk-FMOYUYH2.js";
|
|
115
|
-
import "./chunk-
|
|
115
|
+
import "./chunk-FU73I4V3.js";
|
|
116
116
|
import {
|
|
117
117
|
RIDDLE_PROOF_CHECKPOINT_PACKET_VERSION,
|
|
118
118
|
RIDDLE_PROOF_CHECKPOINT_RESPONSE_VERSION,
|
package/dist/proof-run-core.cjs
CHANGED
|
@@ -472,7 +472,6 @@ function normalizedProofAssessment(state = {}) {
|
|
|
472
472
|
var VISUAL_FIRST_MODES = /* @__PURE__ */ new Set([
|
|
473
473
|
"visual",
|
|
474
474
|
"render",
|
|
475
|
-
"interaction",
|
|
476
475
|
"ui",
|
|
477
476
|
"layout",
|
|
478
477
|
"screenshot",
|
|
@@ -567,6 +566,16 @@ function requiredBaselineLabelsForState(state = {}) {
|
|
|
567
566
|
if (reference === "prod" || reference === "both") labels.push("prod");
|
|
568
567
|
return labels;
|
|
569
568
|
}
|
|
569
|
+
function stateHasAfterEvidence(state = {}) {
|
|
570
|
+
if (String(state?.after_cdn || "").trim()) return true;
|
|
571
|
+
const bundle = objectValue(state?.evidence_bundle);
|
|
572
|
+
const after = objectValue(bundle.after);
|
|
573
|
+
const observation = objectValue(after.observation);
|
|
574
|
+
const supporting = objectValue(after.supporting_artifacts);
|
|
575
|
+
return Boolean(
|
|
576
|
+
observation.valid === true && (supporting.has_structured_payload === true || supporting.proof_evidence_present === true || observation.telemetry_ready === true || Object.keys(objectValue(bundle.proof_evidence)).length > 0 || Object.keys(objectValue(after.proof_evidence)).length > 0)
|
|
577
|
+
);
|
|
578
|
+
}
|
|
570
579
|
function validateShipGate(state = {}) {
|
|
571
580
|
const reference = normalizedReference(state);
|
|
572
581
|
const prodUrl = String(state?.prod_url || "").trim();
|
|
@@ -596,7 +605,7 @@ function validateShipGate(state = {}) {
|
|
|
596
605
|
reasons.push("prod_cdn is required before ship");
|
|
597
606
|
}
|
|
598
607
|
}
|
|
599
|
-
if (!
|
|
608
|
+
if (!stateHasAfterEvidence(state)) {
|
|
600
609
|
reasons.push("after_cdn is required before ship");
|
|
601
610
|
}
|
|
602
611
|
if (verifyStatus !== "evidence_captured") {
|
package/dist/proof-run-core.js
CHANGED
|
@@ -451,7 +451,6 @@ function normalizedProofAssessment(state = {}) {
|
|
|
451
451
|
var VISUAL_FIRST_MODES = /* @__PURE__ */ new Set([
|
|
452
452
|
"visual",
|
|
453
453
|
"render",
|
|
454
|
-
"interaction",
|
|
455
454
|
"ui",
|
|
456
455
|
"layout",
|
|
457
456
|
"screenshot",
|
|
@@ -546,6 +545,16 @@ function requiredBaselineLabelsForState(state = {}) {
|
|
|
546
545
|
if (reference === "prod" || reference === "both") labels.push("prod");
|
|
547
546
|
return labels;
|
|
548
547
|
}
|
|
548
|
+
function stateHasAfterEvidence(state = {}) {
|
|
549
|
+
if (String(state?.after_cdn || "").trim()) return true;
|
|
550
|
+
const bundle = objectValue(state?.evidence_bundle);
|
|
551
|
+
const after = objectValue(bundle.after);
|
|
552
|
+
const observation = objectValue(after.observation);
|
|
553
|
+
const supporting = objectValue(after.supporting_artifacts);
|
|
554
|
+
return Boolean(
|
|
555
|
+
observation.valid === true && (supporting.has_structured_payload === true || supporting.proof_evidence_present === true || observation.telemetry_ready === true || Object.keys(objectValue(bundle.proof_evidence)).length > 0 || Object.keys(objectValue(after.proof_evidence)).length > 0)
|
|
556
|
+
);
|
|
557
|
+
}
|
|
549
558
|
function validateShipGate(state = {}) {
|
|
550
559
|
const reference = normalizedReference(state);
|
|
551
560
|
const prodUrl = String(state?.prod_url || "").trim();
|
|
@@ -575,7 +584,7 @@ function validateShipGate(state = {}) {
|
|
|
575
584
|
reasons.push("prod_cdn is required before ship");
|
|
576
585
|
}
|
|
577
586
|
}
|
|
578
|
-
if (!
|
|
587
|
+
if (!stateHasAfterEvidence(state)) {
|
|
579
588
|
reasons.push("after_cdn is required before ship");
|
|
580
589
|
}
|
|
581
590
|
if (verifyStatus !== "evidence_captured") {
|
|
@@ -2612,6 +2621,7 @@ ${implementRes.stderr || ""}`;
|
|
|
2612
2621
|
verifyContinueWithStage,
|
|
2613
2622
|
convergenceSignals
|
|
2614
2623
|
};
|
|
2624
|
+
const structuredInteractionFailureSummary = stringValue(verifyDecisionRequest?.structured_interaction_failure_summary);
|
|
2615
2625
|
if (verifyStatus !== "evidence_captured") {
|
|
2616
2626
|
const captureQuality = verifyDecisionRequest?.capture_quality || {};
|
|
2617
2627
|
const captureTerminalBlocker = captureQuality?.terminal_blocker === true || captureQuality?.blocking === true;
|
|
@@ -2652,6 +2662,36 @@ ${implementRes.stderr || ""}`;
|
|
|
2652
2662
|
}
|
|
2653
2663
|
);
|
|
2654
2664
|
}
|
|
2665
|
+
if (structuredInteractionFailureSummary) {
|
|
2666
|
+
const summary = structuredInteractionFailureSummary;
|
|
2667
|
+
recordAttempt("verify", "checkpoint", summary, {
|
|
2668
|
+
autoApproved: verifyRes.autoApproved || false,
|
|
2669
|
+
checkpoint: "verify_capture_blocked",
|
|
2670
|
+
details: verifyDetails
|
|
2671
|
+
});
|
|
2672
|
+
return checkpoint(
|
|
2673
|
+
"verify",
|
|
2674
|
+
"verify_capture_blocked",
|
|
2675
|
+
summary,
|
|
2676
|
+
{
|
|
2677
|
+
ok: true,
|
|
2678
|
+
nextActions: ["inspect_after_capture", "report_specific_browser_evidence_blocker", "start_a_new_run_after_the_product_or_script_is_fixed"],
|
|
2679
|
+
advanceOptions: verifyLoopAdvanceOptions,
|
|
2680
|
+
recommendedAdvanceStage: null,
|
|
2681
|
+
continueWithStage: null,
|
|
2682
|
+
blocking: true,
|
|
2683
|
+
details: verifyDetails,
|
|
2684
|
+
verifyStatus,
|
|
2685
|
+
verifySummary,
|
|
2686
|
+
afterCdn: state?.after_cdn || null,
|
|
2687
|
+
mergeRecommendation: state?.merge_recommendation || null,
|
|
2688
|
+
verifyDecisionRequest,
|
|
2689
|
+
proofAssessment: proofAssessment.raw,
|
|
2690
|
+
proofAssessmentRequest: state?.proof_assessment_request || null,
|
|
2691
|
+
executed
|
|
2692
|
+
}
|
|
2693
|
+
);
|
|
2694
|
+
}
|
|
2655
2695
|
if (!hasSupervisorProofAssessment(state)) {
|
|
2656
2696
|
const summary = "Verify captured usable evidence. The supervising agent should now assess whether the proof supports ship or more internal iteration, then resume the workflow with proof_assessment_json.";
|
|
2657
2697
|
recordAttempt("verify", "checkpoint", summary, {
|
package/dist/proof-run-engine.js
CHANGED
package/dist/runner.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import {
|
|
2
2
|
runRiddleProof
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-P2RN2NYR.js";
|
|
4
4
|
import "./chunk-YZUVEJ5B.js";
|
|
5
5
|
import "./chunk-FMOYUYH2.js";
|
|
6
|
-
import "./chunk-
|
|
6
|
+
import "./chunk-FU73I4V3.js";
|
|
7
7
|
import "./chunk-4FOHZ7JG.js";
|
|
8
8
|
import "./chunk-VY4Y5U57.js";
|
|
9
9
|
import "./chunk-MLKGABMK.js";
|
|
@@ -35,6 +35,7 @@
|
|
|
35
35
|
"missing-selector-timeout-specific-blocker",
|
|
36
36
|
"thrown-error-preserves-structured-evidence",
|
|
37
37
|
"interaction-thrown-error-specific-blocker",
|
|
38
|
+
"interaction-iife-structured-proof-without-screenshot-pass",
|
|
38
39
|
"proof-evidence-absent-specific-blocker",
|
|
39
40
|
"no-diff-prod-audit-default-capture-pass"
|
|
40
41
|
]
|
package/package.json
CHANGED
package/runtime/lib/ship.py
CHANGED
|
@@ -10,7 +10,7 @@ from util import load_state, save_state, invoke, git
|
|
|
10
10
|
DISCORD_API = 'https://discord.com/api/v10'
|
|
11
11
|
SHIP_NOISE_PATHS = ('.codex', '.oc-smoke')
|
|
12
12
|
VISUAL_FIRST_MODES = {
|
|
13
|
-
'visual', 'render', '
|
|
13
|
+
'visual', 'render', 'ui', 'layout', 'screenshot',
|
|
14
14
|
'canvas', 'animation',
|
|
15
15
|
}
|
|
16
16
|
|
package/runtime/lib/verify.py
CHANGED
|
@@ -52,7 +52,7 @@ STRUCTURED_FIRST_MODES = {
|
|
|
52
52
|
'telemetry', 'text', 'api',
|
|
53
53
|
}
|
|
54
54
|
VISUAL_FIRST_MODES = {
|
|
55
|
-
'visual', 'render', '
|
|
55
|
+
'visual', 'render', 'ui', 'layout', 'screenshot',
|
|
56
56
|
'canvas', 'animation',
|
|
57
57
|
}
|
|
58
58
|
INTERACTION_MODES = {'interaction', 'interactive', 'user_flow', 'user-flow', 'workflow'}
|
|
@@ -235,6 +235,20 @@ def auto_screenshot_for_mode(verification_mode):
|
|
|
235
235
|
return normalized_verification_mode(verification_mode) not in STRUCTURED_FIRST_MODES
|
|
236
236
|
|
|
237
237
|
|
|
238
|
+
def capture_script_iife_expression(script):
|
|
239
|
+
text = (script or '').strip()
|
|
240
|
+
while text.endswith(';'):
|
|
241
|
+
text = text[:-1].rstrip()
|
|
242
|
+
if not text:
|
|
243
|
+
return ''
|
|
244
|
+
compact = re.sub(r'\s+', ' ', text)
|
|
245
|
+
if re.match(r'^\(\s*async\s*(function\b|\([^)]*\)\s*=>)', compact) and re.search(r'\)\s*\(\s*\)$', compact):
|
|
246
|
+
return text
|
|
247
|
+
if re.match(r'^\(\s*(function\b|\([^)]*\)\s*=>)', compact) and re.search(r'\)\s*\(\s*\)$', compact):
|
|
248
|
+
return text
|
|
249
|
+
return ''
|
|
250
|
+
|
|
251
|
+
|
|
238
252
|
def record_verify_phase(phase, status='running', summary=''):
|
|
239
253
|
global s
|
|
240
254
|
ts = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())
|
|
@@ -368,10 +382,19 @@ def abort_capture_failure(state, results, expected_path, message, raw_payload):
|
|
|
368
382
|
def build_probe_capture_script(base_script='', verification_mode='proof', proof_session_seed=None, viewport_matrix=None):
|
|
369
383
|
pieces = []
|
|
370
384
|
script = (base_script or '').strip()
|
|
385
|
+
iife_expression = capture_script_iife_expression(script)
|
|
371
386
|
pieces.extend(viewport_matrix_setup_js(viewport_matrix))
|
|
372
387
|
pieces.append('let __riddleProofCaptureScriptError = null;')
|
|
373
388
|
pieces.append('let __riddleProofCaptureScriptResult = null;')
|
|
374
|
-
if
|
|
389
|
+
if iife_expression:
|
|
390
|
+
pieces.extend([
|
|
391
|
+
'try {',
|
|
392
|
+
'__riddleProofCaptureScriptResult = await (' + iife_expression + ');',
|
|
393
|
+
'} catch (err) {',
|
|
394
|
+
' __riddleProofCaptureScriptError = err;',
|
|
395
|
+
'}',
|
|
396
|
+
])
|
|
397
|
+
elif script:
|
|
375
398
|
pieces.extend([
|
|
376
399
|
'try {',
|
|
377
400
|
'__riddleProofCaptureScriptResult = await (async () => {',
|
|
@@ -444,6 +444,45 @@ class FakeRiddle:
|
|
|
444
444
|
'totalPixels': 972000,
|
|
445
445
|
},
|
|
446
446
|
}
|
|
447
|
+
if 'clickedProofNavigationOcLiveShapeNoScreenshot' in script:
|
|
448
|
+
assert '__riddleProofCaptureScriptResult = await ((async () =>' in script
|
|
449
|
+
page_state = {
|
|
450
|
+
'bodyTextLength': 4113,
|
|
451
|
+
'visibleTextSample': 'RIDDLE PROOF Turn a URL into evidence an agent can cite.',
|
|
452
|
+
'interactiveElements': 6,
|
|
453
|
+
'visibleInteractiveElements': 6,
|
|
454
|
+
'pathname': '/proof/',
|
|
455
|
+
'href': 'https://riddledc.com/proof/',
|
|
456
|
+
'title': 'Riddle Proof',
|
|
457
|
+
'buttons': ['Proof'],
|
|
458
|
+
'headings': ['Riddle Proof'],
|
|
459
|
+
'links': [],
|
|
460
|
+
'canvasCount': 0,
|
|
461
|
+
'largeVisibleElements': [{'tag': 'h1', 'text': 'Riddle Proof'}],
|
|
462
|
+
}
|
|
463
|
+
proof_evidence = {
|
|
464
|
+
'version': 'riddle-proof.interaction.v1',
|
|
465
|
+
'expectedUrl': 'https://riddledc.com/proof/',
|
|
466
|
+
'routeExpectationSource': 'capture_script.expectedUrl',
|
|
467
|
+
'start': {'href': 'https://riddledc.com/', 'pathname': '/'},
|
|
468
|
+
'action': {'type': 'click', 'target': 'visible Proof navigation link', 'clicked': True},
|
|
469
|
+
'terminal': {'href': 'https://riddledc.com/proof/', 'pathname': '/proof/'},
|
|
470
|
+
'assertions': [
|
|
471
|
+
{'name': 'route expectation source is capture_script.expectedUrl', 'pass': True},
|
|
472
|
+
{'name': 'terminal URL matched expected proof route', 'pass': True},
|
|
473
|
+
{'name': 'Proof page content visible', 'pass': True},
|
|
474
|
+
],
|
|
475
|
+
'success': True,
|
|
476
|
+
}
|
|
477
|
+
return {
|
|
478
|
+
'ok': True,
|
|
479
|
+
'outputs': [{'name': 'proof.json', 'url': 'https://cdn.example.com/proof.json'}],
|
|
480
|
+
'result': {'pageState': page_state, 'proofEvidence': proof_evidence},
|
|
481
|
+
'console': [
|
|
482
|
+
'RIDDLE_PROOF_STATE:' + json.dumps(page_state),
|
|
483
|
+
'RIDDLE_PROOF_EVIDENCE:' + json.dumps(proof_evidence),
|
|
484
|
+
],
|
|
485
|
+
}
|
|
447
486
|
if 'clickedProofNavigationOcLiveShape' in script:
|
|
448
487
|
page_state = {
|
|
449
488
|
'bodyTextLength': 4113,
|
|
@@ -2791,6 +2830,63 @@ def run_verify_interaction_terminal_route_from_proof_evidence():
|
|
|
2791
2830
|
shutil.rmtree(tempdir, ignore_errors=True)
|
|
2792
2831
|
|
|
2793
2832
|
|
|
2833
|
+
def run_verify_interaction_iife_structured_evidence_without_screenshot():
|
|
2834
|
+
tempdir = Path(tempfile.mkdtemp(prefix='riddle-proof-interaction-iife-no-shot-'))
|
|
2835
|
+
state_path = tempdir / 'state.json'
|
|
2836
|
+
try:
|
|
2837
|
+
state = base_state(tempdir, reference='before')
|
|
2838
|
+
state.update({
|
|
2839
|
+
'recon_status': 'ready_for_proof_plan',
|
|
2840
|
+
'author_status': 'ready',
|
|
2841
|
+
'proof_plan_status': 'ready',
|
|
2842
|
+
'implementation_status': 'changes_detected',
|
|
2843
|
+
'verification_mode': 'interaction',
|
|
2844
|
+
'server_path': '/',
|
|
2845
|
+
'before_cdn': 'https://cdn.example.com/before-home.png',
|
|
2846
|
+
'proof_plan': 'Start at /, click Proof, and verify the terminal /proof/ route.',
|
|
2847
|
+
'capture_script': (
|
|
2848
|
+
"(async () => { "
|
|
2849
|
+
"const evidence = await clickedProofNavigationOcLiveShapeNoScreenshot(); "
|
|
2850
|
+
"await saveScreenshot('after-proof'); "
|
|
2851
|
+
"return evidence; "
|
|
2852
|
+
"})();"
|
|
2853
|
+
),
|
|
2854
|
+
'recon_results': {
|
|
2855
|
+
'baselines': {'before': {'path': '/', 'url': 'https://cdn.example.com/before-home.png'}},
|
|
2856
|
+
},
|
|
2857
|
+
})
|
|
2858
|
+
write_state(state_path, state)
|
|
2859
|
+
os.environ['RIDDLE_PROOF_STATE_FILE'] = str(state_path)
|
|
2860
|
+
|
|
2861
|
+
fake = FakeRiddle()
|
|
2862
|
+
load_util_with_fake(fake)
|
|
2863
|
+
load_module('verify_interaction_iife_no_screenshot', VERIFY_PATH)
|
|
2864
|
+
after_verify = json.loads(state_path.read_text())
|
|
2865
|
+
|
|
2866
|
+
assert after_verify['verify_status'] == 'evidence_captured'
|
|
2867
|
+
assert after_verify['after_cdn'] == ''
|
|
2868
|
+
assert after_verify['verify_results']['after']['observation']['valid'] is True
|
|
2869
|
+
assert after_verify['verify_results']['after']['observation']['details']['screenshot_required'] is False
|
|
2870
|
+
assert after_verify['evidence_bundle']['artifact_contract']['required']['screenshot'] is False
|
|
2871
|
+
assert 'screenshot' not in after_verify['evidence_bundle']['artifact_usage']['missing_required_signals']
|
|
2872
|
+
assert after_verify['route_expectation']['expected_path'] == '/proof'
|
|
2873
|
+
route = after_verify['proof_assessment_request']['semantic_context']['route']
|
|
2874
|
+
assert route['expected_after_path'] == '/proof'
|
|
2875
|
+
assert route['after_observed_path'] == '/proof'
|
|
2876
|
+
assert 'wrong route' not in after_verify['verify_results']['after']['observation']['reason']
|
|
2877
|
+
supporting = after_verify['verify_results']['after']['supporting_artifacts']
|
|
2878
|
+
assert supporting['proof_evidence_present'] is True
|
|
2879
|
+
assert supporting['has_structured_payload'] is True
|
|
2880
|
+
return {
|
|
2881
|
+
'ok': True,
|
|
2882
|
+
'expected_path': after_verify['route_expectation']['expected_path'],
|
|
2883
|
+
'after_cdn': after_verify['after_cdn'],
|
|
2884
|
+
'screenshot_required': after_verify['verify_results']['after']['observation']['details']['screenshot_required'],
|
|
2885
|
+
}
|
|
2886
|
+
finally:
|
|
2887
|
+
shutil.rmtree(tempdir, ignore_errors=True)
|
|
2888
|
+
|
|
2889
|
+
|
|
2794
2890
|
def run_verify_interaction_proof_evidence_overrides_stale_expected_path():
|
|
2795
2891
|
tempdir = Path(tempfile.mkdtemp(prefix='riddle-proof-interaction-stale-route-'))
|
|
2796
2892
|
state_path = tempdir / 'state.json'
|
|
@@ -3765,6 +3861,7 @@ if __name__ == '__main__':
|
|
|
3765
3861
|
'verify_capture_retry': run_verify_capture_retry(),
|
|
3766
3862
|
'remote_audit_verify_uses_default_capture_script': run_remote_audit_verify_uses_default_capture_script(),
|
|
3767
3863
|
'verify_interaction_terminal_route_from_proof_evidence': run_verify_interaction_terminal_route_from_proof_evidence(),
|
|
3864
|
+
'verify_interaction_iife_structured_evidence_without_screenshot': run_verify_interaction_iife_structured_evidence_without_screenshot(),
|
|
3768
3865
|
'verify_interaction_proof_evidence_overrides_stale_expected_path': run_verify_interaction_proof_evidence_overrides_stale_expected_path(),
|
|
3769
3866
|
'verify_interaction_proof_plan_placeholder_uses_live_evidence': run_verify_interaction_proof_plan_placeholder_uses_live_evidence(),
|
|
3770
3867
|
'verify_interaction_reverse_terminal_route_from_proof_evidence': run_verify_interaction_reverse_terminal_route_from_proof_evidence(),
|
|
@@ -103,6 +103,12 @@ CASES = [
|
|
|
103
103
|
'function': 'run_verify_structured_evidence_without_screenshot',
|
|
104
104
|
'expected_terminal': 'pass',
|
|
105
105
|
},
|
|
106
|
+
{
|
|
107
|
+
'name': 'interaction-iife-structured-proof-without-screenshot-pass',
|
|
108
|
+
'covers': ['route-changing interactions', 'proof-evidence-present', 'proof-evidence-present/absent cases'],
|
|
109
|
+
'function': 'run_verify_interaction_iife_structured_evidence_without_screenshot',
|
|
110
|
+
'expected_terminal': 'pass',
|
|
111
|
+
},
|
|
106
112
|
{
|
|
107
113
|
'name': 'proof-evidence-absent-specific-blocker',
|
|
108
114
|
'covers': ['proof-evidence-absent'],
|