@riddledc/riddle-proof 0.8.15 → 0.8.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -0
- package/dist/advanced/engine-harness.cjs +1 -1
- package/dist/advanced/engine-harness.js +1 -1
- package/dist/advanced/index.cjs +1 -1
- package/dist/advanced/index.js +1 -1
- package/dist/{chunk-KTIDPXE2.js → chunk-E7UTJ7KB.js} +1 -1
- package/dist/{chunk-RW4OUHN4.js → chunk-ZOZLORGR.js} +1 -1
- package/dist/cli/index.js +2 -2
- package/dist/cli.cjs +1 -1
- package/dist/cli.js +2 -2
- package/dist/engine-harness.cjs +1 -1
- package/dist/engine-harness.js +1 -1
- package/dist/index.cjs +1 -1
- package/dist/index.js +1 -1
- package/examples/regression-packs/oc-flow-regression.json +318 -0
- package/package.json +2 -2
- package/runtime/lib/verify.py +18 -0
- package/runtime/tests/recon_verify_smoke.py +114 -0
- package/runtime/tests/trust_boundary_regression.py +6 -0
package/README.md
CHANGED
|
@@ -208,6 +208,27 @@ viewport and returns an `environment_blocked` result without starting partial
|
|
|
208
208
|
jobs when the account balance cannot cover the intended sweep. Use
|
|
209
209
|
`--balance-preflight=false` to bypass this check.
|
|
210
210
|
|
|
211
|
+
## Regression Packs
|
|
212
|
+
|
|
213
|
+
Reusable regression packs live in `examples/regression-packs`. They are
|
|
214
|
+
host-readable manifests for recurring trust-boundary checks that should produce
|
|
215
|
+
the same terminal outcomes across generic runners and wrappers.
|
|
216
|
+
|
|
217
|
+
The first pack is
|
|
218
|
+
`examples/regression-packs/oc-flow-regression.json`. It names the live OpenClaw
|
|
219
|
+
regression cases for route-changing interactions, query/hash preservation,
|
|
220
|
+
negative dropped query/hash evidence, no-diff production audits, selector and
|
|
221
|
+
thrown-error blockers, and stale checkpoint responses after terminal status.
|
|
222
|
+
The same pack also points at the local generic core suite:
|
|
223
|
+
|
|
224
|
+
```sh
|
|
225
|
+
python3 packages/riddle-proof/runtime/tests/trust_boundary_regression.py
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
Before counting live wrapper runs, use the pack's runtime gate: verify
|
|
229
|
+
`riddle_proof_status` reports the loaded `@riddledc/openclaw-riddle-proof` and
|
|
230
|
+
`@riddledc/riddle-proof` versions. Disk package versions alone are not enough.
|
|
231
|
+
|
|
211
232
|
Use `--viewport-name <name>` to run only one named viewport from a
|
|
212
233
|
multi-viewport profile while preserving viewport-scoped setup actions and
|
|
213
234
|
checks:
|
|
@@ -5221,7 +5221,7 @@ function checkpointResponseContinuation(state, value) {
|
|
|
5221
5221
|
};
|
|
5222
5222
|
}
|
|
5223
5223
|
function finalizedCheckpointResponseWithoutPacketResult(state, value) {
|
|
5224
|
-
if (!value || state.checkpoint_packet || !
|
|
5224
|
+
if (!value || state.checkpoint_packet || !isProtectedFinalStatus(state.status)) return null;
|
|
5225
5225
|
const response = normalizeCheckpointResponse(value);
|
|
5226
5226
|
if (!response) return null;
|
|
5227
5227
|
if (isDuplicateCheckpointResponse(state, response)) return null;
|
|
@@ -2,7 +2,7 @@ import {
|
|
|
2
2
|
createDisabledRiddleProofAgentAdapter,
|
|
3
3
|
readRiddleProofRunStatus,
|
|
4
4
|
runRiddleProofEngineHarness
|
|
5
|
-
} from "../chunk-
|
|
5
|
+
} from "../chunk-ZOZLORGR.js";
|
|
6
6
|
import "../chunk-YZUVEJ5B.js";
|
|
7
7
|
import "../chunk-FMOYUYH2.js";
|
|
8
8
|
import "../chunk-7GZY5PLT.js";
|
package/dist/advanced/index.cjs
CHANGED
|
@@ -5758,7 +5758,7 @@ function checkpointResponseContinuation(state, value) {
|
|
|
5758
5758
|
};
|
|
5759
5759
|
}
|
|
5760
5760
|
function finalizedCheckpointResponseWithoutPacketResult(state, value) {
|
|
5761
|
-
if (!value || state.checkpoint_packet || !
|
|
5761
|
+
if (!value || state.checkpoint_packet || !isProtectedFinalStatus(state.status)) return null;
|
|
5762
5762
|
const response = normalizeCheckpointResponse(value);
|
|
5763
5763
|
if (!response) return null;
|
|
5764
5764
|
if (isDuplicateCheckpointResponse(state, response)) return null;
|
package/dist/advanced/index.js
CHANGED
|
@@ -862,7 +862,7 @@ function checkpointResponseContinuation(state, value) {
|
|
|
862
862
|
};
|
|
863
863
|
}
|
|
864
864
|
function finalizedCheckpointResponseWithoutPacketResult(state, value) {
|
|
865
|
-
if (!value || state.checkpoint_packet || !
|
|
865
|
+
if (!value || state.checkpoint_packet || !isProtectedFinalStatus(state.status)) return null;
|
|
866
866
|
const response = normalizeCheckpointResponse(value);
|
|
867
867
|
if (!response) return null;
|
|
868
868
|
if (isDuplicateCheckpointResponse(state, response)) return null;
|
package/dist/cli/index.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import "../chunk-
|
|
1
|
+
import "../chunk-E7UTJ7KB.js";
|
|
2
2
|
import "../chunk-PEWAIEER.js";
|
|
3
3
|
import "../chunk-TWTEUS7R.js";
|
|
4
|
-
import "../chunk-
|
|
4
|
+
import "../chunk-ZOZLORGR.js";
|
|
5
5
|
import "../chunk-YZUVEJ5B.js";
|
|
6
6
|
import "../chunk-FMOYUYH2.js";
|
|
7
7
|
import "../chunk-7GZY5PLT.js";
|
package/dist/cli.cjs
CHANGED
|
@@ -5290,7 +5290,7 @@ function checkpointResponseContinuation(state, value) {
|
|
|
5290
5290
|
};
|
|
5291
5291
|
}
|
|
5292
5292
|
function finalizedCheckpointResponseWithoutPacketResult(state, value) {
|
|
5293
|
-
if (!value || state.checkpoint_packet || !
|
|
5293
|
+
if (!value || state.checkpoint_packet || !isProtectedFinalStatus(state.status)) return null;
|
|
5294
5294
|
const response = normalizeCheckpointResponse(value);
|
|
5295
5295
|
if (!response) return null;
|
|
5296
5296
|
if (isDuplicateCheckpointResponse(state, response)) return null;
|
package/dist/cli.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import "./chunk-
|
|
2
|
+
import "./chunk-E7UTJ7KB.js";
|
|
3
3
|
import "./chunk-PEWAIEER.js";
|
|
4
4
|
import "./chunk-TWTEUS7R.js";
|
|
5
|
-
import "./chunk-
|
|
5
|
+
import "./chunk-ZOZLORGR.js";
|
|
6
6
|
import "./chunk-YZUVEJ5B.js";
|
|
7
7
|
import "./chunk-FMOYUYH2.js";
|
|
8
8
|
import "./chunk-7GZY5PLT.js";
|
package/dist/engine-harness.cjs
CHANGED
|
@@ -5219,7 +5219,7 @@ function checkpointResponseContinuation(state, value) {
|
|
|
5219
5219
|
};
|
|
5220
5220
|
}
|
|
5221
5221
|
function finalizedCheckpointResponseWithoutPacketResult(state, value) {
|
|
5222
|
-
if (!value || state.checkpoint_packet || !
|
|
5222
|
+
if (!value || state.checkpoint_packet || !isProtectedFinalStatus(state.status)) return null;
|
|
5223
5223
|
const response = normalizeCheckpointResponse(value);
|
|
5224
5224
|
if (!response) return null;
|
|
5225
5225
|
if (isDuplicateCheckpointResponse(state, response)) return null;
|
package/dist/engine-harness.js
CHANGED
package/dist/index.cjs
CHANGED
|
@@ -5953,7 +5953,7 @@ function checkpointResponseContinuation(state, value) {
|
|
|
5953
5953
|
};
|
|
5954
5954
|
}
|
|
5955
5955
|
function finalizedCheckpointResponseWithoutPacketResult(state, value) {
|
|
5956
|
-
if (!value || state.checkpoint_packet || !
|
|
5956
|
+
if (!value || state.checkpoint_packet || !isProtectedFinalStatus(state.status)) return null;
|
|
5957
5957
|
const response = normalizeCheckpointResponse(value);
|
|
5958
5958
|
if (!response) return null;
|
|
5959
5959
|
if (isDuplicateCheckpointResponse(state, response)) return null;
|
package/dist/index.js
CHANGED
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "riddle-proof.regression-pack.v1",
|
|
3
|
+
"pack_id": "riddle-proof-oc-flow-2026-06",
|
|
4
|
+
"public_name": "Riddle Proof OC Flow Regression Pack",
|
|
5
|
+
"description": "Reusable regression pack for the trust boundary between browser evidence, verifier judgment, retry policy, wrapper checkpoint handling, and terminal status.",
|
|
6
|
+
"minimum_versions": {
|
|
7
|
+
"@riddledc/openclaw-riddle-proof": "0.4.144",
|
|
8
|
+
"@riddledc/riddle-proof": "0.8.16"
|
|
9
|
+
},
|
|
10
|
+
"runtime_gate": {
|
|
11
|
+
"tool": "riddle_proof_status",
|
|
12
|
+
"require_loaded_metadata": true,
|
|
13
|
+
"fresh_runtime_rule": "Count only runs whose riddle_proof_status package_metadata reports the minimum_versions or newer. If disk package versions differ from loaded metadata, restart the gateway and discard stale runs."
|
|
14
|
+
},
|
|
15
|
+
"forbidden_terminal_markers": [
|
|
16
|
+
"codex_invalid_json",
|
|
17
|
+
"codex_no_final_response",
|
|
18
|
+
"codex_timeout",
|
|
19
|
+
"max_iterations_reached",
|
|
20
|
+
"stage_iteration_limit_reached",
|
|
21
|
+
"verify_capture_retry",
|
|
22
|
+
"checkpoint_response_without_packet"
|
|
23
|
+
],
|
|
24
|
+
"local_core_suite": {
|
|
25
|
+
"command": "python3 packages/riddle-proof/runtime/tests/trust_boundary_regression.py",
|
|
26
|
+
"required_cases": [
|
|
27
|
+
"route-change-forward-pass",
|
|
28
|
+
"route-change-retry-state-drift-ignored",
|
|
29
|
+
"route-change-reverse-pass",
|
|
30
|
+
"route-change-reverse-nested-terminal-url-pass",
|
|
31
|
+
"query-hash-trailing-slash-pass",
|
|
32
|
+
"query-hash-dropped-structured-negative-blocker",
|
|
33
|
+
"same-page-hash-pass",
|
|
34
|
+
"missing-selector-timeout-specific-blocker",
|
|
35
|
+
"thrown-error-preserves-structured-evidence",
|
|
36
|
+
"interaction-thrown-error-specific-blocker",
|
|
37
|
+
"proof-evidence-absent-specific-blocker",
|
|
38
|
+
"no-diff-prod-audit-default-capture-pass"
|
|
39
|
+
]
|
|
40
|
+
},
|
|
41
|
+
"openclaw_live_suite": {
|
|
42
|
+
"target": {
|
|
43
|
+
"repo": "davisdiehl/riddle-site",
|
|
44
|
+
"prod_url": "https://riddledc.com/",
|
|
45
|
+
"ship_mode": "none",
|
|
46
|
+
"implementation_mode": "none",
|
|
47
|
+
"require_diff": false,
|
|
48
|
+
"allow_code_changes": false
|
|
49
|
+
},
|
|
50
|
+
"result_log_fields": [
|
|
51
|
+
"run_id",
|
|
52
|
+
"loaded_metadata",
|
|
53
|
+
"state_path",
|
|
54
|
+
"terminal_status",
|
|
55
|
+
"last_checkpoint",
|
|
56
|
+
"route_expectation_source",
|
|
57
|
+
"proof_evidence_present",
|
|
58
|
+
"proof_json_urls",
|
|
59
|
+
"screenshots",
|
|
60
|
+
"forbidden_terminal_markers_seen"
|
|
61
|
+
],
|
|
62
|
+
"cases": [
|
|
63
|
+
{
|
|
64
|
+
"id": "home-to-proof-route-change-pass",
|
|
65
|
+
"tool": "riddle_proof_change",
|
|
66
|
+
"intent": "Start on the production home page, click the visible Proof navigation link, and prove the terminal route is /proof/.",
|
|
67
|
+
"params": {
|
|
68
|
+
"verification_mode": "interaction",
|
|
69
|
+
"server_path": "/",
|
|
70
|
+
"wait_for_selector": "main#main-content",
|
|
71
|
+
"capture_script_contract": [
|
|
72
|
+
"goto https://riddledc.com/",
|
|
73
|
+
"wait for main#main-content",
|
|
74
|
+
"click a visible Proof link whose href includes /proof",
|
|
75
|
+
"wait for terminal URL https://riddledc.com/proof/",
|
|
76
|
+
"return riddle-proof.interaction.v1 evidence with start, action, terminal, assertions, and routeExpectationSource=capture_script.expectedUrl",
|
|
77
|
+
"save screenshot after-proof"
|
|
78
|
+
]
|
|
79
|
+
},
|
|
80
|
+
"expect": {
|
|
81
|
+
"terminal_status": "ready_to_ship",
|
|
82
|
+
"terminal_path": "/proof/",
|
|
83
|
+
"proof_evidence_present": true,
|
|
84
|
+
"route_expectation_source": "capture_script.expectedUrl",
|
|
85
|
+
"forbidden_terminal_markers": []
|
|
86
|
+
}
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
"id": "proof-to-home-route-change-pass",
|
|
90
|
+
"tool": "riddle_proof_change",
|
|
91
|
+
"intent": "Start on /proof/, click the visible Riddle/Home root navigation link, and prove the terminal route is /.",
|
|
92
|
+
"params": {
|
|
93
|
+
"verification_mode": "interaction",
|
|
94
|
+
"prod_url": "https://riddledc.com/proof/",
|
|
95
|
+
"server_path": "/proof/",
|
|
96
|
+
"wait_for_selector": "main#main-content",
|
|
97
|
+
"capture_script_contract": [
|
|
98
|
+
"goto https://riddledc.com/proof/",
|
|
99
|
+
"wait for main#main-content",
|
|
100
|
+
"click the visible Riddle/Home nav link whose href is / or resolves to the site root",
|
|
101
|
+
"wait for terminal URL https://riddledc.com/",
|
|
102
|
+
"return riddle-proof.interaction.v1 evidence with nested terminal.expectedUrl and terminal.observedUrl",
|
|
103
|
+
"save screenshot after-home"
|
|
104
|
+
]
|
|
105
|
+
},
|
|
106
|
+
"expect": {
|
|
107
|
+
"terminal_status": "ready_to_ship",
|
|
108
|
+
"terminal_path": "/",
|
|
109
|
+
"proof_evidence_present": true,
|
|
110
|
+
"route_expectation_source": "capture_script.expectedUrl",
|
|
111
|
+
"forbidden_terminal_markers": []
|
|
112
|
+
}
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
"id": "pricing-query-hash-positive-pass",
|
|
116
|
+
"tool": "riddle_proof_change",
|
|
117
|
+
"intent": "Click into Pricing with a terminal query/hash expectation and prove slash, query, and hash are preserved.",
|
|
118
|
+
"params": {
|
|
119
|
+
"verification_mode": "interaction",
|
|
120
|
+
"server_path": "/",
|
|
121
|
+
"wait_for_selector": "main#main-content",
|
|
122
|
+
"expected_terminal_url": "https://riddledc.com/pricing/?rp_probe=1#pricing-probe",
|
|
123
|
+
"capture_script_contract": [
|
|
124
|
+
"start from https://riddledc.com/",
|
|
125
|
+
"navigate or click to the pricing route with ?rp_probe=1#pricing-probe",
|
|
126
|
+
"wait for https://riddledc.com/pricing/?rp_probe=1#pricing-probe",
|
|
127
|
+
"return structured evidence proving pathname /pricing/, search ?rp_probe=1, and hash #pricing-probe",
|
|
128
|
+
"save screenshot after-proof"
|
|
129
|
+
]
|
|
130
|
+
},
|
|
131
|
+
"expect": {
|
|
132
|
+
"terminal_status": "ready_to_ship",
|
|
133
|
+
"terminal_url": "https://riddledc.com/pricing/?rp_probe=1#pricing-probe",
|
|
134
|
+
"proof_evidence_present": true,
|
|
135
|
+
"query_preserved": true,
|
|
136
|
+
"hash_preserved": true,
|
|
137
|
+
"forbidden_terminal_markers": []
|
|
138
|
+
}
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
"id": "pricing-query-hash-dropped-blocker",
|
|
142
|
+
"tool": "riddle_proof_change",
|
|
143
|
+
"intent": "Force a negative control where expected query/hash are dropped at terminal; this must block specifically as invalid browser evidence.",
|
|
144
|
+
"params": {
|
|
145
|
+
"verification_mode": "interaction",
|
|
146
|
+
"server_path": "/",
|
|
147
|
+
"wait_for_selector": "main#main-content",
|
|
148
|
+
"expected_terminal_url": "https://riddledc.com/pricing/?rp_probe=1#pricing-probe",
|
|
149
|
+
"forced_observed_terminal_url": "https://riddledc.com/pricing/",
|
|
150
|
+
"capture_script_contract": [
|
|
151
|
+
"record expected terminal URL with ?rp_probe=1#pricing-probe",
|
|
152
|
+
"intentionally observe or force terminal evidence for https://riddledc.com/pricing/",
|
|
153
|
+
"return riddle-proof.interaction.v1 evidence with expected and observed terminal URLs",
|
|
154
|
+
"save screenshot terminal-pricing-negative-control"
|
|
155
|
+
]
|
|
156
|
+
},
|
|
157
|
+
"expect": {
|
|
158
|
+
"terminal_status": "blocked",
|
|
159
|
+
"last_checkpoint": "verify_capture_blocked",
|
|
160
|
+
"blocker_code": "verify_capture_blocked",
|
|
161
|
+
"capture_decision": "failed_interaction_capture",
|
|
162
|
+
"observed_terminal_url": "https://riddledc.com/pricing/",
|
|
163
|
+
"forbidden_terminal_markers": [
|
|
164
|
+
"ready_to_ship",
|
|
165
|
+
"codex_invalid_json",
|
|
166
|
+
"codex_timeout",
|
|
167
|
+
"max_iterations_reached",
|
|
168
|
+
"checkpoint_response_without_packet"
|
|
169
|
+
]
|
|
170
|
+
}
|
|
171
|
+
},
|
|
172
|
+
{
|
|
173
|
+
"id": "no-diff-prod-audit-pass",
|
|
174
|
+
"tool": "riddle_proof_change",
|
|
175
|
+
"intent": "Audit the live production home page without implementation or PR handoff, and prove the current target evidence directly.",
|
|
176
|
+
"params": {
|
|
177
|
+
"verification_mode": "interaction",
|
|
178
|
+
"implementation_mode": "none",
|
|
179
|
+
"require_diff": false,
|
|
180
|
+
"allow_code_changes": false,
|
|
181
|
+
"ship_mode": "none",
|
|
182
|
+
"server_path": "/",
|
|
183
|
+
"wait_for_selector": "main#main-content",
|
|
184
|
+
"capture_script_contract": [
|
|
185
|
+
"use the current production target only",
|
|
186
|
+
"do not request implementation or git diff",
|
|
187
|
+
"prove main#main-content and expected home-page content are visible",
|
|
188
|
+
"return structured evidence and save screenshot after-proof"
|
|
189
|
+
]
|
|
190
|
+
},
|
|
191
|
+
"expect": {
|
|
192
|
+
"terminal_status": "ready_to_ship",
|
|
193
|
+
"ship_handoff": "none",
|
|
194
|
+
"implementation_attempted": false,
|
|
195
|
+
"proof_evidence_present": true,
|
|
196
|
+
"forbidden_terminal_markers": []
|
|
197
|
+
}
|
|
198
|
+
},
|
|
199
|
+
{
|
|
200
|
+
"id": "missing-selector-timeout-blocker",
|
|
201
|
+
"tool": "riddle_proof_change",
|
|
202
|
+
"intent": "Use a missing selector in capture and require a specific Playwright selector timeout in terminal evidence.",
|
|
203
|
+
"params": {
|
|
204
|
+
"verification_mode": "interaction",
|
|
205
|
+
"server_path": "/",
|
|
206
|
+
"wait_for_selector": "main#main-content",
|
|
207
|
+
"capture_script_contract": [
|
|
208
|
+
"start from https://riddledc.com/",
|
|
209
|
+
"try to click or scroll a selector that does not exist",
|
|
210
|
+
"use a short selector timeout",
|
|
211
|
+
"surface the exact Playwright timeout message in structured failure evidence"
|
|
212
|
+
]
|
|
213
|
+
},
|
|
214
|
+
"expect": {
|
|
215
|
+
"terminal_status": "blocked",
|
|
216
|
+
"failure_kind": "specific_blocker",
|
|
217
|
+
"message_contains": "Timeout",
|
|
218
|
+
"forbidden_terminal_markers": [
|
|
219
|
+
"codex_invalid_json",
|
|
220
|
+
"codex_timeout",
|
|
221
|
+
"max_iterations_reached"
|
|
222
|
+
]
|
|
223
|
+
}
|
|
224
|
+
},
|
|
225
|
+
{
|
|
226
|
+
"id": "thrown-error-specific-blocker",
|
|
227
|
+
"tool": "riddle_proof_change",
|
|
228
|
+
"intent": "Throw an intentional marker from capture and require the exact marker to survive in structured failure evidence.",
|
|
229
|
+
"params": {
|
|
230
|
+
"verification_mode": "interaction",
|
|
231
|
+
"server_path": "/",
|
|
232
|
+
"wait_for_selector": "main#main-content",
|
|
233
|
+
"capture_script_contract": [
|
|
234
|
+
"throw intentional-riddle-proof-regression-thrown-error from the capture script",
|
|
235
|
+
"preserve the exact marker in terminal evidence",
|
|
236
|
+
"do not convert the failure to generic Codex JSON/lifecycle failure"
|
|
237
|
+
]
|
|
238
|
+
},
|
|
239
|
+
"expect": {
|
|
240
|
+
"terminal_status": "blocked",
|
|
241
|
+
"failure_kind": "specific_blocker",
|
|
242
|
+
"message_contains": "intentional-riddle-proof-regression-thrown-error",
|
|
243
|
+
"forbidden_terminal_markers": [
|
|
244
|
+
"codex_invalid_json",
|
|
245
|
+
"codex_timeout",
|
|
246
|
+
"max_iterations_reached"
|
|
247
|
+
]
|
|
248
|
+
}
|
|
249
|
+
},
|
|
250
|
+
{
|
|
251
|
+
"id": "late-stale-checkpoint-ignored",
|
|
252
|
+
"tool": "riddle_proof_change + riddle_proof_review",
|
|
253
|
+
"intent": "Complete a terminal ready_to_ship proof, then submit a stale manual author checkpoint response after terminal status and preserve ready_to_ship.",
|
|
254
|
+
"steps": [
|
|
255
|
+
{
|
|
256
|
+
"tool": "riddle_proof_change",
|
|
257
|
+
"params": {
|
|
258
|
+
"verification_mode": "interaction",
|
|
259
|
+
"server_path": "/",
|
|
260
|
+
"wait_for_selector": "main#main-content",
|
|
261
|
+
"checkpoint_mode": "terminal_only",
|
|
262
|
+
"report_mode": "terminal_only",
|
|
263
|
+
"capture_script_contract": [
|
|
264
|
+
"complete the Home -> Proof route proof",
|
|
265
|
+
"terminalize as ready_to_ship before any manual checkpoint injection",
|
|
266
|
+
"record state_path and run_id from riddle_proof_status"
|
|
267
|
+
]
|
|
268
|
+
},
|
|
269
|
+
"expect": {
|
|
270
|
+
"terminal_status": "ready_to_ship",
|
|
271
|
+
"proof_evidence_present": true
|
|
272
|
+
}
|
|
273
|
+
},
|
|
274
|
+
{
|
|
275
|
+
"tool": "riddle_proof_review",
|
|
276
|
+
"after": "terminal ready_to_ship",
|
|
277
|
+
"params_template": {
|
|
278
|
+
"state_path": "${state_path}",
|
|
279
|
+
"decision": "continue_checkpoint",
|
|
280
|
+
"summary": "Submit stale checkpoint response after terminal ready_to_ship.",
|
|
281
|
+
"checkpoint_response": {
|
|
282
|
+
"version": "riddle-proof.checkpoint_response.v1",
|
|
283
|
+
"run_id": "${run_id}",
|
|
284
|
+
"checkpoint": "author_supervisor_judgment",
|
|
285
|
+
"decision": "author_packet",
|
|
286
|
+
"summary": "Late stale author packet after terminal ready_to_ship.",
|
|
287
|
+
"payload": {
|
|
288
|
+
"proof_plan": "STALE PACKET: do not resume. This packet is intentionally late and should not alter a terminal run.",
|
|
289
|
+
"capture_script": "return { passed: true, staleManualCheckpointProbe: true };"
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
},
|
|
293
|
+
"expect": {
|
|
294
|
+
"terminal_status": "ready_to_ship",
|
|
295
|
+
"ignored_checkpoint_response": true,
|
|
296
|
+
"background_resume_started": false,
|
|
297
|
+
"forbidden_terminal_markers": [
|
|
298
|
+
"checkpoint_response_without_packet",
|
|
299
|
+
"max_iterations_reached",
|
|
300
|
+
"codex_invalid_json"
|
|
301
|
+
]
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
],
|
|
305
|
+
"expect": {
|
|
306
|
+
"terminal_status": "ready_to_ship",
|
|
307
|
+
"ignored_checkpoint_response": true,
|
|
308
|
+
"background_resume_started": false,
|
|
309
|
+
"forbidden_terminal_markers": [
|
|
310
|
+
"checkpoint_response_without_packet",
|
|
311
|
+
"max_iterations_reached",
|
|
312
|
+
"codex_invalid_json"
|
|
313
|
+
]
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
]
|
|
317
|
+
}
|
|
318
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@riddledc/riddle-proof",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.17",
|
|
4
4
|
"description": "Reusable Riddle Proof contracts and helpers for evidence-backed agent changes.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "RiddleDC",
|
|
@@ -227,6 +227,6 @@
|
|
|
227
227
|
"build": "tsup src/index.ts src/types.ts src/result.ts src/state.ts src/checkpoint.ts src/run-card.ts src/runner.ts src/engine-harness.ts src/codex-exec-agent.ts src/local-agent.ts src/cli.ts src/cli/index.ts src/diagnostics.ts src/proof-session.ts src/playability.ts src/basic-gameplay.ts src/profile.ts src/profile/index.ts src/openclaw.ts src/proof-run-core.ts src/proof-run-engine.ts src/riddle-client.ts src/runtime/riddle-client.ts src/spec/index.ts src/spec/types.ts src/spec/result.ts src/spec/state.ts src/spec/checkpoint.ts src/spec/run-card.ts src/runtime/index.ts src/app-contract/index.ts src/advanced/index.ts src/advanced/runner.ts src/advanced/engine-harness.ts src/advanced/proof-run-core.ts src/advanced/proof-run-engine.ts src/adapters/openclaw.ts src/adapters/local-agent.ts src/adapters/codex-exec-agent.ts src/adapters/codex.ts --format cjs,esm --dts --out-dir dist --clean",
|
|
228
228
|
"clean": "rm -rf dist",
|
|
229
229
|
"lint": "echo 'lint: (not configured)'",
|
|
230
|
-
"test": "npm run build && node test.js && node proof-run.test.js && node trust-boundary.test.js && python3 runtime/tests/trust_boundary_regression.py"
|
|
230
|
+
"test": "npm run build && node test.js && node proof-run.test.js && node trust-boundary.test.js && node regression-packs.test.js && python3 runtime/tests/trust_boundary_regression.py"
|
|
231
231
|
}
|
|
232
232
|
}
|
package/runtime/lib/verify.py
CHANGED
|
@@ -2109,6 +2109,8 @@ FULL_LOCATION_PATH_KEYS = (
|
|
|
2109
2109
|
'afterUrl', 'after_url',
|
|
2110
2110
|
'finalUrl', 'final_url',
|
|
2111
2111
|
'currentUrl', 'current_url',
|
|
2112
|
+
'observedUrl', 'observed_url',
|
|
2113
|
+
'actualUrl', 'actual_url',
|
|
2112
2114
|
'pathWithSearchAndHash', 'path_with_search_and_hash',
|
|
2113
2115
|
'fullPath', 'full_path',
|
|
2114
2116
|
)
|
|
@@ -2116,6 +2118,8 @@ PARTIAL_LOCATION_PATH_KEYS = (
|
|
|
2116
2118
|
'route',
|
|
2117
2119
|
'path',
|
|
2118
2120
|
'pathname',
|
|
2121
|
+
'observedPath', 'observed_path',
|
|
2122
|
+
'actualPath', 'actual_path',
|
|
2119
2123
|
'normalizedPath', 'normalized_path',
|
|
2120
2124
|
'rawPath', 'raw_path',
|
|
2121
2125
|
)
|
|
@@ -2246,6 +2250,20 @@ def expected_terminal_path_from_record(record, depth=0):
|
|
|
2246
2250
|
candidate = expected_terminal_path_from_record(item, depth + 1)
|
|
2247
2251
|
if candidate:
|
|
2248
2252
|
return candidate
|
|
2253
|
+
for key in AFTER_STATE_KEYS:
|
|
2254
|
+
value = record.get(key)
|
|
2255
|
+
if isinstance(value, dict):
|
|
2256
|
+
candidate = (
|
|
2257
|
+
record_path_candidate_for_keys(value, EXPECTED_TERMINAL_PATH_KEYS)
|
|
2258
|
+
or expected_terminal_path_from_record(value, depth + 1)
|
|
2259
|
+
)
|
|
2260
|
+
if candidate:
|
|
2261
|
+
return candidate
|
|
2262
|
+
elif isinstance(value, list):
|
|
2263
|
+
for item in value:
|
|
2264
|
+
candidate = expected_terminal_path_from_record(item, depth + 1)
|
|
2265
|
+
if candidate:
|
|
2266
|
+
return candidate
|
|
2249
2267
|
for key in EVIDENCE_CONTAINER_KEYS:
|
|
2250
2268
|
value = record.get(key)
|
|
2251
2269
|
if isinstance(value, dict):
|
|
@@ -488,6 +488,69 @@ class FakeRiddle:
|
|
|
488
488
|
'totalPixels': 972000,
|
|
489
489
|
},
|
|
490
490
|
}
|
|
491
|
+
if 'clickedHomeNavigationOcTerminalShape' in script:
|
|
492
|
+
page_state = {
|
|
493
|
+
'bodyTextLength': 180,
|
|
494
|
+
'visibleTextSample': 'Riddle Proof homepage hero Start Free',
|
|
495
|
+
'interactiveElements': 4,
|
|
496
|
+
'visibleInteractiveElements': 4,
|
|
497
|
+
'pathname': '/',
|
|
498
|
+
'href': 'https://riddledc.com/',
|
|
499
|
+
'title': 'Riddle',
|
|
500
|
+
'buttons': ['Start Free'],
|
|
501
|
+
'headings': ['Riddle Proof'],
|
|
502
|
+
'links': [],
|
|
503
|
+
'canvasCount': 0,
|
|
504
|
+
'largeVisibleElements': [{'tag': 'h1', 'text': 'Riddle Proof'}],
|
|
505
|
+
}
|
|
506
|
+
proof_evidence = {
|
|
507
|
+
'version': 'riddle-proof.interaction.v1',
|
|
508
|
+
'start': {
|
|
509
|
+
'expectedUrl': 'https://riddledc.com/proof/',
|
|
510
|
+
'expectedPath': '/proof/',
|
|
511
|
+
'observedUrl': 'https://riddledc.com/proof/',
|
|
512
|
+
'observedPath': '/proof/',
|
|
513
|
+
},
|
|
514
|
+
'action': {
|
|
515
|
+
'type': 'click',
|
|
516
|
+
'target': 'visible Riddle/Home nav link to root',
|
|
517
|
+
'chosenText': 'Riddle',
|
|
518
|
+
'chosenHref': '/',
|
|
519
|
+
'clicked': True,
|
|
520
|
+
},
|
|
521
|
+
'terminal': {
|
|
522
|
+
'expectedUrl': 'https://riddledc.com/',
|
|
523
|
+
'expectedPath': '/',
|
|
524
|
+
'routeExpectationSource': 'capture_script.expectedUrl',
|
|
525
|
+
'observedUrl': 'https://riddledc.com/',
|
|
526
|
+
'observedPath': '/',
|
|
527
|
+
'pageReady': True,
|
|
528
|
+
},
|
|
529
|
+
'assertions': {
|
|
530
|
+
'startedOnProofRoute': True,
|
|
531
|
+
'clickedRootNavLink': True,
|
|
532
|
+
'terminalUrlMatchedExpected': True,
|
|
533
|
+
'terminalRouteMatchedRoot': True,
|
|
534
|
+
'terminalMainVisible': True,
|
|
535
|
+
'routeExpectationSourceMatched': True,
|
|
536
|
+
},
|
|
537
|
+
'errors': [],
|
|
538
|
+
}
|
|
539
|
+
return {
|
|
540
|
+
'ok': True,
|
|
541
|
+
'screenshots': [{'url': 'https://cdn.example.com/home-after.png'}],
|
|
542
|
+
'outputs': [{'name': 'after-home.png', 'url': 'https://cdn.example.com/home-after.png'}],
|
|
543
|
+
'result': {'pageState': page_state, 'proofEvidence': proof_evidence},
|
|
544
|
+
'console': [
|
|
545
|
+
'RIDDLE_PROOF_STATE:' + json.dumps(page_state),
|
|
546
|
+
'RIDDLE_PROOF_EVIDENCE:' + json.dumps(proof_evidence),
|
|
547
|
+
],
|
|
548
|
+
'visual_diff': {
|
|
549
|
+
'diffPercentage': 1.2,
|
|
550
|
+
'differentPixels': 12000,
|
|
551
|
+
'totalPixels': 972000,
|
|
552
|
+
},
|
|
553
|
+
}
|
|
491
554
|
if 'clickedHomeNavigation' in script:
|
|
492
555
|
page_state = {
|
|
493
556
|
'bodyTextLength': 180,
|
|
@@ -2768,6 +2831,56 @@ def run_verify_interaction_reverse_terminal_route_from_proof_evidence():
|
|
|
2768
2831
|
shutil.rmtree(tempdir, ignore_errors=True)
|
|
2769
2832
|
|
|
2770
2833
|
|
|
2834
|
+
def run_verify_interaction_reverse_terminal_expected_url_from_nested_terminal_evidence():
|
|
2835
|
+
tempdir = Path(tempfile.mkdtemp(prefix='riddle-proof-interaction-reverse-oc-shape-'))
|
|
2836
|
+
state_path = tempdir / 'state.json'
|
|
2837
|
+
try:
|
|
2838
|
+
state = base_state(tempdir, reference='prod')
|
|
2839
|
+
state.update({
|
|
2840
|
+
'recon_status': 'ready_for_proof_plan',
|
|
2841
|
+
'author_status': 'ready',
|
|
2842
|
+
'proof_plan_status': 'ready',
|
|
2843
|
+
'implementation_status': 'not_required',
|
|
2844
|
+
'verification_mode': 'interaction',
|
|
2845
|
+
'implementation_mode': 'none',
|
|
2846
|
+
'require_diff': False,
|
|
2847
|
+
'allow_code_changes': False,
|
|
2848
|
+
'server_path': '/proof/',
|
|
2849
|
+
'prod_url': 'https://riddledc.com/proof/',
|
|
2850
|
+
'prod_cdn': 'https://cdn.example.com/prod-proof.png',
|
|
2851
|
+
'proof_plan': 'Start on the proof page, click the visible Riddle/Home root nav link, and trust the structured evidence for the terminal route.',
|
|
2852
|
+
'capture_script': "clickedHomeNavigationOcTerminalShape(); await saveScreenshot('after-home');",
|
|
2853
|
+
'recon_results': {
|
|
2854
|
+
'baselines': {'prod': {'path': '/proof/', 'url': 'https://cdn.example.com/prod-proof.png'}},
|
|
2855
|
+
},
|
|
2856
|
+
})
|
|
2857
|
+
write_state(state_path, state)
|
|
2858
|
+
os.environ['RIDDLE_PROOF_STATE_FILE'] = str(state_path)
|
|
2859
|
+
|
|
2860
|
+
fake = FakeRiddle()
|
|
2861
|
+
load_util_with_fake(fake)
|
|
2862
|
+
load_module('verify_interaction_reverse_terminal_nested_expected_url', VERIFY_PATH)
|
|
2863
|
+
after_verify = json.loads(state_path.read_text())
|
|
2864
|
+
|
|
2865
|
+
assert after_verify['verify_status'] == 'evidence_captured'
|
|
2866
|
+
assert after_verify['route_expectation']['source'] == 'proof_evidence_contract'
|
|
2867
|
+
assert after_verify['route_expectation']['start_path'] == '/proof'
|
|
2868
|
+
assert after_verify['route_expectation']['expected_path'] == '/'
|
|
2869
|
+
route = after_verify['proof_assessment_request']['semantic_context']['route']
|
|
2870
|
+
assert route['expected_start_path'] == '/proof'
|
|
2871
|
+
assert route['expected_after_path'] == '/'
|
|
2872
|
+
assert route['after_observed_path'] == '/'
|
|
2873
|
+
assert 'wrong route' not in after_verify['verify_results']['after']['observation']['reason']
|
|
2874
|
+
return {
|
|
2875
|
+
'ok': True,
|
|
2876
|
+
'expected_path': after_verify['route_expectation']['expected_path'],
|
|
2877
|
+
'after_observed_path': route['after_observed_path'],
|
|
2878
|
+
'source': after_verify['route_expectation']['source'],
|
|
2879
|
+
}
|
|
2880
|
+
finally:
|
|
2881
|
+
shutil.rmtree(tempdir, ignore_errors=True)
|
|
2882
|
+
|
|
2883
|
+
|
|
2771
2884
|
def run_verify_interaction_prose_route_noise_uses_proof_evidence():
|
|
2772
2885
|
tempdir = Path(tempfile.mkdtemp(prefix='riddle-proof-interaction-prose-noise-'))
|
|
2773
2886
|
state_path = tempdir / 'state.json'
|
|
@@ -3553,6 +3666,7 @@ if __name__ == '__main__':
|
|
|
3553
3666
|
'verify_interaction_terminal_route_from_proof_evidence': run_verify_interaction_terminal_route_from_proof_evidence(),
|
|
3554
3667
|
'verify_interaction_proof_evidence_overrides_stale_expected_path': run_verify_interaction_proof_evidence_overrides_stale_expected_path(),
|
|
3555
3668
|
'verify_interaction_reverse_terminal_route_from_proof_evidence': run_verify_interaction_reverse_terminal_route_from_proof_evidence(),
|
|
3669
|
+
'verify_interaction_reverse_terminal_expected_url_from_nested_terminal_evidence': run_verify_interaction_reverse_terminal_expected_url_from_nested_terminal_evidence(),
|
|
3556
3670
|
'verify_interaction_prose_route_noise_uses_proof_evidence': run_verify_interaction_prose_route_noise_uses_proof_evidence(),
|
|
3557
3671
|
'verify_interaction_hash_terminal_route_from_proof_evidence': run_verify_interaction_hash_terminal_route_from_proof_evidence(),
|
|
3558
3672
|
'verify_interaction_authored_query_hash_mismatch_blocks_with_evidence': run_verify_interaction_authored_query_hash_mismatch_blocks_with_evidence(),
|
|
@@ -37,6 +37,12 @@ CASES = [
|
|
|
37
37
|
'function': 'run_verify_interaction_reverse_terminal_route_from_proof_evidence',
|
|
38
38
|
'expected_terminal': 'pass',
|
|
39
39
|
},
|
|
40
|
+
{
|
|
41
|
+
'name': 'route-change-reverse-nested-terminal-url-pass',
|
|
42
|
+
'covers': ['route-changing interactions', 'proof-evidence-present'],
|
|
43
|
+
'function': 'run_verify_interaction_reverse_terminal_expected_url_from_nested_terminal_evidence',
|
|
44
|
+
'expected_terminal': 'pass',
|
|
45
|
+
},
|
|
40
46
|
{
|
|
41
47
|
'name': 'route-prose-noise-ignored',
|
|
42
48
|
'covers': ['route-changing interactions', 'proof-evidence-present'],
|