@riddledc/riddle-proof 0.8.16 → 0.8.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -208,6 +208,27 @@ viewport and returns an `environment_blocked` result without starting partial
|
|
|
208
208
|
jobs when the account balance cannot cover the intended sweep. Use
|
|
209
209
|
`--balance-preflight=false` to bypass this check.
|
|
210
210
|
|
|
211
|
+
## Regression Packs
|
|
212
|
+
|
|
213
|
+
Reusable regression packs live in `examples/regression-packs`. They are
|
|
214
|
+
host-readable manifests for recurring trust-boundary checks that should produce
|
|
215
|
+
the same terminal outcomes across generic runners and wrappers.
|
|
216
|
+
|
|
217
|
+
The first pack is
|
|
218
|
+
`examples/regression-packs/oc-flow-regression.json`. It names the live OpenClaw
|
|
219
|
+
regression cases for route-changing interactions, query/hash preservation,
|
|
220
|
+
negative dropped query/hash evidence, no-diff production audits, selector and
|
|
221
|
+
thrown-error blockers, and stale checkpoint responses after terminal status.
|
|
222
|
+
The same pack also points at the local generic core suite:
|
|
223
|
+
|
|
224
|
+
```sh
|
|
225
|
+
python3 packages/riddle-proof/runtime/tests/trust_boundary_regression.py
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
Before counting live wrapper runs, use the pack's runtime gate: verify
|
|
229
|
+
`riddle_proof_status` reports the loaded `@riddledc/openclaw-riddle-proof` and
|
|
230
|
+
`@riddledc/riddle-proof` versions. Disk package versions alone are not enough.
|
|
231
|
+
|
|
211
232
|
Use `--viewport-name <name>` to run only one named viewport from a
|
|
212
233
|
multi-viewport profile while preserving viewport-scoped setup actions and
|
|
213
234
|
checks:
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "riddle-proof.regression-pack.v1",
|
|
3
|
+
"pack_id": "riddle-proof-oc-flow-2026-06",
|
|
4
|
+
"public_name": "Riddle Proof OC Flow Regression Pack",
|
|
5
|
+
"description": "Reusable regression pack for the trust boundary between browser evidence, verifier judgment, retry policy, wrapper checkpoint handling, and terminal status.",
|
|
6
|
+
"minimum_versions": {
|
|
7
|
+
"@riddledc/openclaw-riddle-proof": "0.4.144",
|
|
8
|
+
"@riddledc/riddle-proof": "0.8.16"
|
|
9
|
+
},
|
|
10
|
+
"runtime_gate": {
|
|
11
|
+
"tool": "riddle_proof_status",
|
|
12
|
+
"require_loaded_metadata": true,
|
|
13
|
+
"fresh_runtime_rule": "Count only runs whose riddle_proof_status package_metadata reports the minimum_versions or newer. If disk package versions differ from loaded metadata, restart the gateway and discard stale runs."
|
|
14
|
+
},
|
|
15
|
+
"forbidden_terminal_markers": [
|
|
16
|
+
"codex_invalid_json",
|
|
17
|
+
"codex_no_final_response",
|
|
18
|
+
"codex_timeout",
|
|
19
|
+
"max_iterations_reached",
|
|
20
|
+
"stage_iteration_limit_reached",
|
|
21
|
+
"verify_capture_retry",
|
|
22
|
+
"checkpoint_response_without_packet"
|
|
23
|
+
],
|
|
24
|
+
"local_core_suite": {
|
|
25
|
+
"command": "python3 packages/riddle-proof/runtime/tests/trust_boundary_regression.py",
|
|
26
|
+
"required_cases": [
|
|
27
|
+
"route-change-forward-pass",
|
|
28
|
+
"route-change-retry-state-drift-ignored",
|
|
29
|
+
"route-change-reverse-pass",
|
|
30
|
+
"route-change-reverse-nested-terminal-url-pass",
|
|
31
|
+
"query-hash-trailing-slash-pass",
|
|
32
|
+
"query-hash-dropped-structured-negative-blocker",
|
|
33
|
+
"same-page-hash-pass",
|
|
34
|
+
"missing-selector-timeout-specific-blocker",
|
|
35
|
+
"thrown-error-preserves-structured-evidence",
|
|
36
|
+
"interaction-thrown-error-specific-blocker",
|
|
37
|
+
"proof-evidence-absent-specific-blocker",
|
|
38
|
+
"no-diff-prod-audit-default-capture-pass"
|
|
39
|
+
]
|
|
40
|
+
},
|
|
41
|
+
"openclaw_live_suite": {
|
|
42
|
+
"target": {
|
|
43
|
+
"repo": "davisdiehl/riddle-site",
|
|
44
|
+
"prod_url": "https://riddledc.com/",
|
|
45
|
+
"ship_mode": "none",
|
|
46
|
+
"implementation_mode": "none",
|
|
47
|
+
"require_diff": false,
|
|
48
|
+
"allow_code_changes": false
|
|
49
|
+
},
|
|
50
|
+
"result_log_fields": [
|
|
51
|
+
"run_id",
|
|
52
|
+
"loaded_metadata",
|
|
53
|
+
"state_path",
|
|
54
|
+
"terminal_status",
|
|
55
|
+
"last_checkpoint",
|
|
56
|
+
"route_expectation_source",
|
|
57
|
+
"proof_evidence_present",
|
|
58
|
+
"proof_json_urls",
|
|
59
|
+
"screenshots",
|
|
60
|
+
"forbidden_terminal_markers_seen"
|
|
61
|
+
],
|
|
62
|
+
"cases": [
|
|
63
|
+
{
|
|
64
|
+
"id": "home-to-proof-route-change-pass",
|
|
65
|
+
"tool": "riddle_proof_change",
|
|
66
|
+
"intent": "Start on the production home page, click the visible Proof navigation link, and prove the terminal route is /proof/.",
|
|
67
|
+
"params": {
|
|
68
|
+
"verification_mode": "interaction",
|
|
69
|
+
"server_path": "/",
|
|
70
|
+
"wait_for_selector": "main#main-content",
|
|
71
|
+
"capture_script_contract": [
|
|
72
|
+
"goto https://riddledc.com/",
|
|
73
|
+
"wait for main#main-content",
|
|
74
|
+
"click a visible Proof link whose href includes /proof",
|
|
75
|
+
"wait for terminal URL https://riddledc.com/proof/",
|
|
76
|
+
"return riddle-proof.interaction.v1 evidence with start, action, terminal, assertions, and routeExpectationSource=capture_script.expectedUrl",
|
|
77
|
+
"save screenshot after-proof"
|
|
78
|
+
]
|
|
79
|
+
},
|
|
80
|
+
"expect": {
|
|
81
|
+
"terminal_status": "ready_to_ship",
|
|
82
|
+
"terminal_path": "/proof/",
|
|
83
|
+
"proof_evidence_present": true,
|
|
84
|
+
"route_expectation_source": "capture_script.expectedUrl",
|
|
85
|
+
"forbidden_terminal_markers": []
|
|
86
|
+
}
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
"id": "proof-to-home-route-change-pass",
|
|
90
|
+
"tool": "riddle_proof_change",
|
|
91
|
+
"intent": "Start on /proof/, click the visible Riddle/Home root navigation link, and prove the terminal route is /.",
|
|
92
|
+
"params": {
|
|
93
|
+
"verification_mode": "interaction",
|
|
94
|
+
"prod_url": "https://riddledc.com/proof/",
|
|
95
|
+
"server_path": "/proof/",
|
|
96
|
+
"wait_for_selector": "main#main-content",
|
|
97
|
+
"capture_script_contract": [
|
|
98
|
+
"goto https://riddledc.com/proof/",
|
|
99
|
+
"wait for main#main-content",
|
|
100
|
+
"click the visible Riddle/Home nav link whose href is / or resolves to the site root",
|
|
101
|
+
"wait for terminal URL https://riddledc.com/",
|
|
102
|
+
"return riddle-proof.interaction.v1 evidence with nested terminal.expectedUrl and terminal.observedUrl",
|
|
103
|
+
"save screenshot after-home"
|
|
104
|
+
]
|
|
105
|
+
},
|
|
106
|
+
"expect": {
|
|
107
|
+
"terminal_status": "ready_to_ship",
|
|
108
|
+
"terminal_path": "/",
|
|
109
|
+
"proof_evidence_present": true,
|
|
110
|
+
"route_expectation_source": "capture_script.expectedUrl",
|
|
111
|
+
"forbidden_terminal_markers": []
|
|
112
|
+
}
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
"id": "pricing-query-hash-positive-pass",
|
|
116
|
+
"tool": "riddle_proof_change",
|
|
117
|
+
"intent": "Click into Pricing with a terminal query/hash expectation and prove slash, query, and hash are preserved.",
|
|
118
|
+
"params": {
|
|
119
|
+
"verification_mode": "interaction",
|
|
120
|
+
"server_path": "/",
|
|
121
|
+
"wait_for_selector": "main#main-content",
|
|
122
|
+
"expected_terminal_url": "https://riddledc.com/pricing/?rp_probe=1#pricing-probe",
|
|
123
|
+
"capture_script_contract": [
|
|
124
|
+
"start from https://riddledc.com/",
|
|
125
|
+
"navigate or click to the pricing route with ?rp_probe=1#pricing-probe",
|
|
126
|
+
"wait for https://riddledc.com/pricing/?rp_probe=1#pricing-probe",
|
|
127
|
+
"return structured evidence proving pathname /pricing/, search ?rp_probe=1, and hash #pricing-probe",
|
|
128
|
+
"save screenshot after-proof"
|
|
129
|
+
]
|
|
130
|
+
},
|
|
131
|
+
"expect": {
|
|
132
|
+
"terminal_status": "ready_to_ship",
|
|
133
|
+
"terminal_url": "https://riddledc.com/pricing/?rp_probe=1#pricing-probe",
|
|
134
|
+
"proof_evidence_present": true,
|
|
135
|
+
"query_preserved": true,
|
|
136
|
+
"hash_preserved": true,
|
|
137
|
+
"forbidden_terminal_markers": []
|
|
138
|
+
}
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
"id": "pricing-query-hash-dropped-blocker",
|
|
142
|
+
"tool": "riddle_proof_change",
|
|
143
|
+
"intent": "Force a negative control where expected query/hash are dropped at terminal; this must block specifically as invalid browser evidence.",
|
|
144
|
+
"params": {
|
|
145
|
+
"verification_mode": "interaction",
|
|
146
|
+
"server_path": "/",
|
|
147
|
+
"wait_for_selector": "main#main-content",
|
|
148
|
+
"expected_terminal_url": "https://riddledc.com/pricing/?rp_probe=1#pricing-probe",
|
|
149
|
+
"forced_observed_terminal_url": "https://riddledc.com/pricing/",
|
|
150
|
+
"capture_script_contract": [
|
|
151
|
+
"record expected terminal URL with ?rp_probe=1#pricing-probe",
|
|
152
|
+
"intentionally observe or force terminal evidence for https://riddledc.com/pricing/",
|
|
153
|
+
"return riddle-proof.interaction.v1 evidence with expected and observed terminal URLs",
|
|
154
|
+
"save screenshot terminal-pricing-negative-control"
|
|
155
|
+
]
|
|
156
|
+
},
|
|
157
|
+
"expect": {
|
|
158
|
+
"terminal_status": "blocked",
|
|
159
|
+
"last_checkpoint": "verify_capture_blocked",
|
|
160
|
+
"blocker_code": "verify_capture_blocked",
|
|
161
|
+
"capture_decision": "failed_interaction_capture",
|
|
162
|
+
"observed_terminal_url": "https://riddledc.com/pricing/",
|
|
163
|
+
"forbidden_terminal_markers": [
|
|
164
|
+
"ready_to_ship",
|
|
165
|
+
"codex_invalid_json",
|
|
166
|
+
"codex_timeout",
|
|
167
|
+
"max_iterations_reached",
|
|
168
|
+
"checkpoint_response_without_packet"
|
|
169
|
+
]
|
|
170
|
+
}
|
|
171
|
+
},
|
|
172
|
+
{
|
|
173
|
+
"id": "no-diff-prod-audit-pass",
|
|
174
|
+
"tool": "riddle_proof_change",
|
|
175
|
+
"intent": "Audit the live production home page without implementation or PR handoff, and prove the current target evidence directly.",
|
|
176
|
+
"params": {
|
|
177
|
+
"verification_mode": "interaction",
|
|
178
|
+
"implementation_mode": "none",
|
|
179
|
+
"require_diff": false,
|
|
180
|
+
"allow_code_changes": false,
|
|
181
|
+
"ship_mode": "none",
|
|
182
|
+
"server_path": "/",
|
|
183
|
+
"wait_for_selector": "main#main-content",
|
|
184
|
+
"capture_script_contract": [
|
|
185
|
+
"use the current production target only",
|
|
186
|
+
"do not request implementation or git diff",
|
|
187
|
+
"prove main#main-content and expected home-page content are visible",
|
|
188
|
+
"return structured evidence and save screenshot after-proof"
|
|
189
|
+
]
|
|
190
|
+
},
|
|
191
|
+
"expect": {
|
|
192
|
+
"terminal_status": "ready_to_ship",
|
|
193
|
+
"ship_handoff": "none",
|
|
194
|
+
"implementation_attempted": false,
|
|
195
|
+
"proof_evidence_present": true,
|
|
196
|
+
"forbidden_terminal_markers": []
|
|
197
|
+
}
|
|
198
|
+
},
|
|
199
|
+
{
|
|
200
|
+
"id": "missing-selector-timeout-blocker",
|
|
201
|
+
"tool": "riddle_proof_change",
|
|
202
|
+
"intent": "Use a missing selector in capture and require a specific Playwright selector timeout in terminal evidence.",
|
|
203
|
+
"params": {
|
|
204
|
+
"verification_mode": "interaction",
|
|
205
|
+
"server_path": "/",
|
|
206
|
+
"wait_for_selector": "main#main-content",
|
|
207
|
+
"capture_script_contract": [
|
|
208
|
+
"start from https://riddledc.com/",
|
|
209
|
+
"try to click or scroll a selector that does not exist",
|
|
210
|
+
"use a short selector timeout",
|
|
211
|
+
"surface the exact Playwright timeout message in structured failure evidence"
|
|
212
|
+
]
|
|
213
|
+
},
|
|
214
|
+
"expect": {
|
|
215
|
+
"terminal_status": "blocked",
|
|
216
|
+
"failure_kind": "specific_blocker",
|
|
217
|
+
"message_contains": "Timeout",
|
|
218
|
+
"forbidden_terminal_markers": [
|
|
219
|
+
"codex_invalid_json",
|
|
220
|
+
"codex_timeout",
|
|
221
|
+
"max_iterations_reached"
|
|
222
|
+
]
|
|
223
|
+
}
|
|
224
|
+
},
|
|
225
|
+
{
|
|
226
|
+
"id": "thrown-error-specific-blocker",
|
|
227
|
+
"tool": "riddle_proof_change",
|
|
228
|
+
"intent": "Throw an intentional marker from capture and require the exact marker to survive in structured failure evidence.",
|
|
229
|
+
"params": {
|
|
230
|
+
"verification_mode": "interaction",
|
|
231
|
+
"server_path": "/",
|
|
232
|
+
"wait_for_selector": "main#main-content",
|
|
233
|
+
"capture_script_contract": [
|
|
234
|
+
"throw intentional-riddle-proof-regression-thrown-error from the capture script",
|
|
235
|
+
"preserve the exact marker in terminal evidence",
|
|
236
|
+
"do not convert the failure to generic Codex JSON/lifecycle failure"
|
|
237
|
+
]
|
|
238
|
+
},
|
|
239
|
+
"expect": {
|
|
240
|
+
"terminal_status": "blocked",
|
|
241
|
+
"failure_kind": "specific_blocker",
|
|
242
|
+
"message_contains": "intentional-riddle-proof-regression-thrown-error",
|
|
243
|
+
"forbidden_terminal_markers": [
|
|
244
|
+
"codex_invalid_json",
|
|
245
|
+
"codex_timeout",
|
|
246
|
+
"max_iterations_reached"
|
|
247
|
+
]
|
|
248
|
+
}
|
|
249
|
+
},
|
|
250
|
+
{
|
|
251
|
+
"id": "late-stale-checkpoint-ignored",
|
|
252
|
+
"tool": "riddle_proof_change + riddle_proof_review",
|
|
253
|
+
"intent": "Complete a terminal ready_to_ship proof, then submit a stale manual author checkpoint response after terminal status and preserve ready_to_ship.",
|
|
254
|
+
"steps": [
|
|
255
|
+
{
|
|
256
|
+
"tool": "riddle_proof_change",
|
|
257
|
+
"params": {
|
|
258
|
+
"verification_mode": "interaction",
|
|
259
|
+
"server_path": "/",
|
|
260
|
+
"wait_for_selector": "main#main-content",
|
|
261
|
+
"checkpoint_mode": "terminal_only",
|
|
262
|
+
"report_mode": "terminal_only",
|
|
263
|
+
"capture_script_contract": [
|
|
264
|
+
"complete the Home -> Proof route proof",
|
|
265
|
+
"terminalize as ready_to_ship before any manual checkpoint injection",
|
|
266
|
+
"record state_path and run_id from riddle_proof_status"
|
|
267
|
+
]
|
|
268
|
+
},
|
|
269
|
+
"expect": {
|
|
270
|
+
"terminal_status": "ready_to_ship",
|
|
271
|
+
"proof_evidence_present": true
|
|
272
|
+
}
|
|
273
|
+
},
|
|
274
|
+
{
|
|
275
|
+
"tool": "riddle_proof_review",
|
|
276
|
+
"after": "terminal ready_to_ship",
|
|
277
|
+
"params_template": {
|
|
278
|
+
"state_path": "${state_path}",
|
|
279
|
+
"decision": "continue_checkpoint",
|
|
280
|
+
"summary": "Submit stale checkpoint response after terminal ready_to_ship.",
|
|
281
|
+
"checkpoint_response": {
|
|
282
|
+
"version": "riddle-proof.checkpoint_response.v1",
|
|
283
|
+
"run_id": "${run_id}",
|
|
284
|
+
"checkpoint": "author_supervisor_judgment",
|
|
285
|
+
"decision": "author_packet",
|
|
286
|
+
"summary": "Late stale author packet after terminal ready_to_ship.",
|
|
287
|
+
"payload": {
|
|
288
|
+
"proof_plan": "STALE PACKET: do not resume. This packet is intentionally late and should not alter a terminal run.",
|
|
289
|
+
"capture_script": "return { passed: true, staleManualCheckpointProbe: true };"
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
},
|
|
293
|
+
"expect": {
|
|
294
|
+
"terminal_status": "ready_to_ship",
|
|
295
|
+
"ignored_checkpoint_response": true,
|
|
296
|
+
"background_resume_started": false,
|
|
297
|
+
"forbidden_terminal_markers": [
|
|
298
|
+
"checkpoint_response_without_packet",
|
|
299
|
+
"max_iterations_reached",
|
|
300
|
+
"codex_invalid_json"
|
|
301
|
+
]
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
],
|
|
305
|
+
"expect": {
|
|
306
|
+
"terminal_status": "ready_to_ship",
|
|
307
|
+
"ignored_checkpoint_response": true,
|
|
308
|
+
"background_resume_started": false,
|
|
309
|
+
"forbidden_terminal_markers": [
|
|
310
|
+
"checkpoint_response_without_packet",
|
|
311
|
+
"max_iterations_reached",
|
|
312
|
+
"codex_invalid_json"
|
|
313
|
+
]
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
]
|
|
317
|
+
}
|
|
318
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@riddledc/riddle-proof",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.17",
|
|
4
4
|
"description": "Reusable Riddle Proof contracts and helpers for evidence-backed agent changes.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "RiddleDC",
|
|
@@ -227,6 +227,6 @@
|
|
|
227
227
|
"build": "tsup src/index.ts src/types.ts src/result.ts src/state.ts src/checkpoint.ts src/run-card.ts src/runner.ts src/engine-harness.ts src/codex-exec-agent.ts src/local-agent.ts src/cli.ts src/cli/index.ts src/diagnostics.ts src/proof-session.ts src/playability.ts src/basic-gameplay.ts src/profile.ts src/profile/index.ts src/openclaw.ts src/proof-run-core.ts src/proof-run-engine.ts src/riddle-client.ts src/runtime/riddle-client.ts src/spec/index.ts src/spec/types.ts src/spec/result.ts src/spec/state.ts src/spec/checkpoint.ts src/spec/run-card.ts src/runtime/index.ts src/app-contract/index.ts src/advanced/index.ts src/advanced/runner.ts src/advanced/engine-harness.ts src/advanced/proof-run-core.ts src/advanced/proof-run-engine.ts src/adapters/openclaw.ts src/adapters/local-agent.ts src/adapters/codex-exec-agent.ts src/adapters/codex.ts --format cjs,esm --dts --out-dir dist --clean",
|
|
228
228
|
"clean": "rm -rf dist",
|
|
229
229
|
"lint": "echo 'lint: (not configured)'",
|
|
230
|
-
"test": "npm run build && node test.js && node proof-run.test.js && node trust-boundary.test.js && python3 runtime/tests/trust_boundary_regression.py"
|
|
230
|
+
"test": "npm run build && node test.js && node proof-run.test.js && node trust-boundary.test.js && node regression-packs.test.js && python3 runtime/tests/trust_boundary_regression.py"
|
|
231
231
|
}
|
|
232
232
|
}
|