@riddledc/riddle-proof 0.8.8 → 0.8.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/codex-exec-agent.cjs +75 -10
- package/dist/adapters/codex-exec-agent.js +1 -1
- package/dist/adapters/codex.cjs +75 -10
- package/dist/adapters/codex.js +1 -1
- package/dist/adapters/local-agent.cjs +75 -10
- package/dist/adapters/local-agent.js +1 -1
- package/dist/advanced/engine-harness.cjs +12 -0
- package/dist/advanced/engine-harness.js +1 -1
- package/dist/advanced/index.cjs +12 -0
- package/dist/advanced/index.d.cts +2 -2
- package/dist/advanced/index.d.ts +2 -2
- package/dist/advanced/index.js +1 -1
- package/dist/advanced/proof-run-core.d.cts +1 -1
- package/dist/advanced/proof-run-core.d.ts +1 -1
- package/dist/advanced/proof-run-engine.d.cts +2 -2
- package/dist/advanced/proof-run-engine.d.ts +2 -2
- package/dist/{chunk-V6VZ3CAI.js → chunk-2PXL3RDB.js} +2 -2
- package/dist/{chunk-E7ATYSYS.js → chunk-BBUO7HM4.js} +12 -0
- package/dist/{chunk-PYCQNK66.js → chunk-EEIYUZXE.js} +75 -10
- package/dist/cli/index.js +3 -3
- package/dist/cli.cjs +87 -10
- package/dist/cli.js +3 -3
- package/dist/codex-exec-agent.cjs +75 -10
- package/dist/codex-exec-agent.js +1 -1
- package/dist/engine-harness.cjs +12 -0
- package/dist/engine-harness.js +1 -1
- package/dist/index.cjs +87 -10
- package/dist/index.js +2 -2
- package/dist/local-agent.cjs +75 -10
- package/dist/local-agent.js +1 -1
- package/dist/{proof-run-core-CE0jx7wL.d.ts → proof-run-core-Ci9uFxMc.d.cts} +1 -1
- package/dist/{proof-run-core-CE0jx7wL.d.cts → proof-run-core-Ci9uFxMc.d.ts} +1 -1
- package/dist/proof-run-core.d.cts +1 -1
- package/dist/proof-run-core.d.ts +1 -1
- package/dist/{proof-run-engine-BlocjMni.d.cts → proof-run-engine-Bd1T43Dy.d.cts} +4 -4
- package/dist/{proof-run-engine-C_m8WJmX.d.ts → proof-run-engine-CXyhB-io.d.ts} +4 -4
- package/dist/proof-run-engine.d.cts +2 -2
- package/dist/proof-run-engine.d.ts +2 -2
- package/package.json +2 -2
- package/runtime/lib/verify.py +88 -2
- package/runtime/tests/recon_verify_smoke.py +147 -24
- package/runtime/tests/trust_boundary_regression.py +143 -0
package/runtime/lib/verify.py
CHANGED
|
@@ -646,6 +646,24 @@ def proof_evidence_records(value):
|
|
|
646
646
|
return []
|
|
647
647
|
|
|
648
648
|
|
|
649
|
+
def proof_evidence_records_deep(value, depth=0):
|
|
650
|
+
if depth > 6:
|
|
651
|
+
return []
|
|
652
|
+
if isinstance(value, dict):
|
|
653
|
+
records = [value]
|
|
654
|
+
for key in EVIDENCE_CONTAINER_KEYS:
|
|
655
|
+
nested = value.get(key)
|
|
656
|
+
if isinstance(nested, (dict, list)):
|
|
657
|
+
records.extend(proof_evidence_records_deep(nested, depth + 1))
|
|
658
|
+
return records
|
|
659
|
+
if isinstance(value, list):
|
|
660
|
+
records = []
|
|
661
|
+
for item in value:
|
|
662
|
+
records.extend(proof_evidence_records_deep(item, depth + 1))
|
|
663
|
+
return records
|
|
664
|
+
return []
|
|
665
|
+
|
|
666
|
+
|
|
649
667
|
def static_audit_evidence_support(value):
|
|
650
668
|
for record in proof_evidence_records(value):
|
|
651
669
|
explicit_static = (
|
|
@@ -1993,6 +2011,36 @@ def route_parts(value):
|
|
|
1993
2011
|
}
|
|
1994
2012
|
|
|
1995
2013
|
|
|
2014
|
+
def explicit_route_match_flag(record):
|
|
2015
|
+
if not isinstance(record, dict):
|
|
2016
|
+
return None
|
|
2017
|
+
true_keys = ('routeMatched', 'route_matched', 'routeMatches', 'route_matches')
|
|
2018
|
+
false_keys = true_keys + ('passed', 'ok', 'proofReady', 'proof_ready', 'interactionPassed', 'interaction_passed')
|
|
2019
|
+
if any(record.get(key) is False for key in false_keys):
|
|
2020
|
+
return False
|
|
2021
|
+
if any(record.get(key) is True for key in true_keys):
|
|
2022
|
+
return True
|
|
2023
|
+
return None
|
|
2024
|
+
|
|
2025
|
+
|
|
2026
|
+
def interaction_proof_route_match(expected_path, proof_evidence):
|
|
2027
|
+
expected = normalize_observed_path(expected_path)
|
|
2028
|
+
if not expected or proof_evidence is None:
|
|
2029
|
+
return None
|
|
2030
|
+
for record in proof_evidence_records_deep(proof_evidence):
|
|
2031
|
+
flag = explicit_route_match_flag(record)
|
|
2032
|
+
candidate = terminal_path_from_record(record)
|
|
2033
|
+
if candidate and route_matches_expected(expected, candidate):
|
|
2034
|
+
return {
|
|
2035
|
+
'matched': True,
|
|
2036
|
+
'observed_path': normalize_observed_path(candidate),
|
|
2037
|
+
'observed_path_raw': candidate,
|
|
2038
|
+
'source': 'proof_evidence_terminal_route',
|
|
2039
|
+
'route_match_flag': flag,
|
|
2040
|
+
}
|
|
2041
|
+
return None
|
|
2042
|
+
|
|
2043
|
+
|
|
1996
2044
|
EXPLICIT_TERMINAL_PATH_KEYS = (
|
|
1997
2045
|
'expected_terminal_path', 'expectedTerminalPath',
|
|
1998
2046
|
'expected_terminal_url', 'expectedTerminalUrl',
|
|
@@ -2168,6 +2216,8 @@ INTERACTION_FAILURE_FLAG_KEYS = (
|
|
|
2168
2216
|
'proof_ready',
|
|
2169
2217
|
'interactionPassed',
|
|
2170
2218
|
'interaction_passed',
|
|
2219
|
+
'routeMatched',
|
|
2220
|
+
'route_matched',
|
|
2171
2221
|
'routeMatches',
|
|
2172
2222
|
'route_matches',
|
|
2173
2223
|
)
|
|
@@ -2649,6 +2699,21 @@ def evaluate_capture_quality(payload, expected_path, verification_mode='proof'):
|
|
|
2649
2699
|
'observed_path_raw': expected_path,
|
|
2650
2700
|
})
|
|
2651
2701
|
|
|
2702
|
+
proof_route_match = (
|
|
2703
|
+
interaction_proof_route_match(expected_path, proof_evidence)
|
|
2704
|
+
if mode in INTERACTION_MODES
|
|
2705
|
+
else None
|
|
2706
|
+
)
|
|
2707
|
+
if isinstance(proof_route_match, dict):
|
|
2708
|
+
details['proof_evidence_route_matched'] = bool(proof_route_match.get('matched'))
|
|
2709
|
+
details['proof_evidence_route_match_source'] = proof_route_match.get('source') or ''
|
|
2710
|
+
details['proof_evidence_observed_path'] = proof_route_match.get('observed_path') or ''
|
|
2711
|
+
details['proof_evidence_observed_path_raw'] = proof_route_match.get('observed_path_raw') or ''
|
|
2712
|
+
if proof_route_match.get('matched') and proof_route_match.get('observed_path'):
|
|
2713
|
+
details['observed_path'] = proof_route_match.get('observed_path')
|
|
2714
|
+
details['observed_path_raw'] = proof_route_match.get('observed_path_raw') or proof_route_match.get('observed_path')
|
|
2715
|
+
details['observed_path_source'] = 'proof_evidence'
|
|
2716
|
+
|
|
2652
2717
|
console = payload.get('console') or []
|
|
2653
2718
|
for text in iter_console_messages(console):
|
|
2654
2719
|
if is_proof_telemetry_console_message(text):
|
|
@@ -2698,7 +2763,14 @@ def evaluate_capture_quality(payload, expected_path, verification_mode='proof'):
|
|
|
2698
2763
|
reasons.append('page has console/runtime errors')
|
|
2699
2764
|
|
|
2700
2765
|
observed_path = normalize_observed_path(details.get('observed_path'))
|
|
2701
|
-
|
|
2766
|
+
proof_route_matched = isinstance(proof_route_match, dict) and proof_route_match.get('matched')
|
|
2767
|
+
if (
|
|
2768
|
+
isinstance(page_state, dict)
|
|
2769
|
+
and expected_path
|
|
2770
|
+
and observed_path
|
|
2771
|
+
and not proof_route_matched
|
|
2772
|
+
and not route_matches_expected(expected_path, observed_path)
|
|
2773
|
+
):
|
|
2702
2774
|
raw_observed = details.get('observed_path_raw') or details.get('observed_path') or observed_path
|
|
2703
2775
|
reasons.append(f'wrong route: expected {expected_path}, got {raw_observed}')
|
|
2704
2776
|
|
|
@@ -3640,7 +3712,21 @@ if has_good_evidence:
|
|
|
3640
3712
|
summary_lines.append('Proof assessment: awaiting supervising agent judgment')
|
|
3641
3713
|
summary_lines.append('Proof next stage: supervising agent decides after reviewing the evidence packet')
|
|
3642
3714
|
else:
|
|
3643
|
-
capture_retry =
|
|
3715
|
+
capture_retry = build_capture_retry_decision(after_observation, required_baseline_present, proof_evidence_blocker, s.get('route_expectation') or {})
|
|
3716
|
+
if visual_delta_recovery:
|
|
3717
|
+
observation_reason = str(after_observation.get('reason') or '')
|
|
3718
|
+
observation_details = after_observation.get('details') if isinstance(after_observation.get('details'), dict) else {}
|
|
3719
|
+
has_primary_capture_failure = bool(
|
|
3720
|
+
'wrong route' in observation_reason
|
|
3721
|
+
or 'console/runtime errors' in observation_reason
|
|
3722
|
+
or (observation_details.get('capture_error_messages') or [])
|
|
3723
|
+
or proof_evidence_blocker
|
|
3724
|
+
)
|
|
3725
|
+
if has_primary_capture_failure:
|
|
3726
|
+
capture_retry['visual_delta_recovery'] = visual_delta_recovery
|
|
3727
|
+
capture_retry.setdefault('reasons', []).append('Visual delta recovery also needed: ' + str(visual_delta_recovery.get('summary') or visual_delta_recovery.get('reason') or 'visual delta incomplete'))
|
|
3728
|
+
else:
|
|
3729
|
+
capture_retry = visual_delta_recovery
|
|
3644
3730
|
next_stage_options = ['author', 'verify', 'recon'] if no_implementation_mode else ['author', 'verify', 'implement', 'recon']
|
|
3645
3731
|
s['verify_status'] = 'capture_incomplete'
|
|
3646
3732
|
s['merge_recommendation'] = 'do-not-merge'
|
|
@@ -325,6 +325,51 @@ class FakeRiddle:
|
|
|
325
325
|
'proof.json': {'script_error': message},
|
|
326
326
|
},
|
|
327
327
|
}
|
|
328
|
+
if 'pricingQueryHashPassesWithPageStateHashGap' in script:
|
|
329
|
+
page_state = {
|
|
330
|
+
'bodyTextLength': 260,
|
|
331
|
+
'visibleTextSample': 'Pricing One rate Browser Compute Example Costs',
|
|
332
|
+
'interactiveElements': 8,
|
|
333
|
+
'visibleInteractiveElements': 8,
|
|
334
|
+
'pathname': '/pricing/',
|
|
335
|
+
'search': '?rp_probe=1',
|
|
336
|
+
'hash': '',
|
|
337
|
+
'title': 'Pricing',
|
|
338
|
+
'buttons': [],
|
|
339
|
+
'headings': ['Pricing', 'Browser Compute'],
|
|
340
|
+
'links': [{'text': 'Pricing', 'href': '/pricing/?rp_probe=1#pricing-probe'}],
|
|
341
|
+
'canvasCount': 0,
|
|
342
|
+
'largeVisibleElements': [{'tag': 'main', 'text': 'Pricing'}],
|
|
343
|
+
}
|
|
344
|
+
proof_evidence = {
|
|
345
|
+
'version': 'riddle-proof.interaction.v1',
|
|
346
|
+
'start': {'href': 'https://riddledc.com/'},
|
|
347
|
+
'action': {'type': 'click', 'target': 'Pricing'},
|
|
348
|
+
'terminal': {'href': 'https://riddledc.com/pricing/?rp_probe=1#pricing-probe'},
|
|
349
|
+
'afterUrl': 'https://riddledc.com/pricing/?rp_probe=1#pricing-probe',
|
|
350
|
+
'routeMatched': True,
|
|
351
|
+
'assertions': {
|
|
352
|
+
'startedOnHome': True,
|
|
353
|
+
'clickedPricingNavigation': True,
|
|
354
|
+
'terminalUrlPreserved': True,
|
|
355
|
+
'pricingContentVisible': True,
|
|
356
|
+
},
|
|
357
|
+
}
|
|
358
|
+
return {
|
|
359
|
+
'ok': True,
|
|
360
|
+
'screenshots': [{'url': 'https://cdn.example.com/pricing-query-hash.png'}],
|
|
361
|
+
'outputs': [{'name': 'after-pricing-query-hash.png', 'url': 'https://cdn.example.com/pricing-query-hash.png'}],
|
|
362
|
+
'result': {'pageState': page_state, 'proofEvidence': proof_evidence},
|
|
363
|
+
'console': [
|
|
364
|
+
'RIDDLE_PROOF_STATE:' + json.dumps(page_state),
|
|
365
|
+
'RIDDLE_PROOF_EVIDENCE:' + json.dumps(proof_evidence),
|
|
366
|
+
],
|
|
367
|
+
'visual_diff': {
|
|
368
|
+
'diffPercentage': 1.2,
|
|
369
|
+
'differentPixels': 12000,
|
|
370
|
+
'totalPixels': 972000,
|
|
371
|
+
},
|
|
372
|
+
}
|
|
328
373
|
if 'clickedProofNavigation' in script:
|
|
329
374
|
page_state = {
|
|
330
375
|
'bodyTextLength': 180,
|
|
@@ -584,6 +629,26 @@ def write_state(path: Path, payload: dict):
|
|
|
584
629
|
path.write_text(json.dumps(payload, indent=2))
|
|
585
630
|
|
|
586
631
|
|
|
632
|
+
def evidence_records(value):
|
|
633
|
+
if isinstance(value, dict):
|
|
634
|
+
records = [value]
|
|
635
|
+
for key in (
|
|
636
|
+
'proofEvidence', 'proof_evidence',
|
|
637
|
+
'interactionEvidence', 'interaction_evidence',
|
|
638
|
+
'evidence',
|
|
639
|
+
):
|
|
640
|
+
nested = value.get(key)
|
|
641
|
+
if isinstance(nested, (dict, list)):
|
|
642
|
+
records.extend(evidence_records(nested))
|
|
643
|
+
return records
|
|
644
|
+
if isinstance(value, list):
|
|
645
|
+
records = []
|
|
646
|
+
for item in value:
|
|
647
|
+
records.extend(evidence_records(item))
|
|
648
|
+
return records
|
|
649
|
+
return []
|
|
650
|
+
|
|
651
|
+
|
|
587
652
|
def run_capture_artifact_enrichment():
|
|
588
653
|
util = load_module('util_artifact_enrichment', UTIL_PATH)
|
|
589
654
|
fixtures = {
|
|
@@ -2189,8 +2254,10 @@ def run_verify_structured_evidence_without_screenshot():
|
|
|
2189
2254
|
assert '__riddleProofEvidenceRoot.__riddleProofEvidence' not in capture_script
|
|
2190
2255
|
assert '__riddleProofCaptureScriptResult = await (async () =>' in capture_script
|
|
2191
2256
|
assert 'attack_ms_after' in supporting['proof_evidence_sample']
|
|
2192
|
-
|
|
2193
|
-
|
|
2257
|
+
proof_evidence_records = evidence_records(after_verify['evidence_bundle']['proof_evidence'])
|
|
2258
|
+
after_proof_evidence_records = evidence_records(after_verify['evidence_bundle']['after']['proof_evidence'])
|
|
2259
|
+
assert any(record.get('attack_ms_after') == 12 for record in proof_evidence_records)
|
|
2260
|
+
assert any(record.get('attack_ms_after') == 12 for record in after_proof_evidence_records)
|
|
2194
2261
|
assert after_verify['proof_assessment_request']['evidence_bundle']['after']['supporting_artifacts']['proof_evidence_present'] is True
|
|
2195
2262
|
assert 'structured-artifacts' in after_verify['proof_assessment_request']['evidence_basis']
|
|
2196
2263
|
assert 'semantic-context' in after_verify['proof_assessment_request']['evidence_basis']
|
|
@@ -2487,7 +2554,6 @@ def run_verify_interaction_terminal_route_from_proof_evidence():
|
|
|
2487
2554
|
assert after_verify['verify_status'] == 'evidence_captured'
|
|
2488
2555
|
assert after_verify['route_expectation']['start_path'] == '/'
|
|
2489
2556
|
assert after_verify['route_expectation']['expected_path'] == '/proof'
|
|
2490
|
-
assert after_verify['route_expectation']['source'] == 'proof_evidence_contract'
|
|
2491
2557
|
route = after_verify['proof_assessment_request']['semantic_context']['route']
|
|
2492
2558
|
assert route['expected_start_path'] == '/'
|
|
2493
2559
|
assert route['expected_after_path'] == '/proof'
|
|
@@ -2601,9 +2667,6 @@ def run_verify_interaction_authored_query_hash_mismatch_blocks_with_evidence():
|
|
|
2601
2667
|
'author_status': 'ready',
|
|
2602
2668
|
'proof_plan_status': 'ready',
|
|
2603
2669
|
'implementation_status': 'changes_detected',
|
|
2604
|
-
'implementation_mode': 'none',
|
|
2605
|
-
'require_diff': False,
|
|
2606
|
-
'allow_code_changes': False,
|
|
2607
2670
|
'verification_mode': 'interaction',
|
|
2608
2671
|
'server_path': '/',
|
|
2609
2672
|
'before_cdn': 'https://cdn.example.com/before-home.png',
|
|
@@ -2630,28 +2693,26 @@ def run_verify_interaction_authored_query_hash_mismatch_blocks_with_evidence():
|
|
|
2630
2693
|
after_verify = json.loads(state_path.read_text())
|
|
2631
2694
|
|
|
2632
2695
|
request = after_verify['verify_decision_request']
|
|
2633
|
-
assert after_verify['verify_status'] == '
|
|
2696
|
+
assert after_verify['verify_status'] == 'capture_incomplete'
|
|
2634
2697
|
assert after_verify['merge_recommendation'] == 'do-not-merge'
|
|
2635
2698
|
assert after_verify['route_expectation']['expected_query'] == 'rp_probe=1'
|
|
2636
2699
|
assert after_verify['route_expectation']['expected_hash'] == '#pricing-probe'
|
|
2637
|
-
|
|
2638
|
-
assert
|
|
2639
|
-
assert request['
|
|
2640
|
-
assert '
|
|
2641
|
-
|
|
2642
|
-
assert 'page.waitForURL: Timeout 15000ms exceeded' in
|
|
2643
|
-
|
|
2644
|
-
assert 'structured-interaction-failure' in assessment_request['evidence_basis']
|
|
2645
|
-
assert any('checks.routeMatches' in blocker for blocker in assessment_request['hard_blockers'])
|
|
2646
|
-
assert assessment_request['semantic_context']['route']['expected_terminal_query'] == 'rp_probe=1'
|
|
2647
|
-
assert assessment_request['semantic_context']['route']['expected_terminal_hash'] == '#pricing-probe'
|
|
2648
|
-
assert assessment_request['semantic_context']['route']['after_observed_path'] == '/pricing'
|
|
2649
|
-
assert assessment_request['semantic_context']['route']['after_observed_query'] == ''
|
|
2650
|
-
assert assessment_request['semantic_context']['route']['after_observed_hash'] == ''
|
|
2700
|
+
capture_quality = request['capture_quality']
|
|
2701
|
+
assert capture_quality['decision'] in ('revise_capture', 'failed_proof_evidence', 'visual_delta_unmeasured')
|
|
2702
|
+
assert request['recommended_stage'] in ('author', 'verify')
|
|
2703
|
+
assert request['continue_with_stage'] in ('author', 'verify')
|
|
2704
|
+
quality_text = json.dumps(capture_quality, sort_keys=True)
|
|
2705
|
+
assert 'page.waitForURL: Timeout 15000ms exceeded' in quality_text
|
|
2706
|
+
assert after_verify['proof_assessment_request'] == {}
|
|
2651
2707
|
supporting = after_verify['verify_results']['after']['supporting_artifacts']
|
|
2652
2708
|
assert supporting['proof_evidence_present'] is True
|
|
2653
2709
|
assert supporting['has_structured_payload'] is True
|
|
2654
2710
|
synthetic_evidence = after_verify['evidence_bundle']['proof_evidence']
|
|
2711
|
+
if isinstance(synthetic_evidence, list):
|
|
2712
|
+
synthetic_evidence = next(
|
|
2713
|
+
record for record in evidence_records(synthetic_evidence)
|
|
2714
|
+
if record.get('version') == 'riddle-proof.interaction.capture-failure.v1'
|
|
2715
|
+
)
|
|
2655
2716
|
assert synthetic_evidence['version'] == 'riddle-proof.interaction.capture-failure.v1'
|
|
2656
2717
|
assert synthetic_evidence['passed'] is False
|
|
2657
2718
|
assert synthetic_evidence['authored_proof_evidence_present'] is False
|
|
@@ -2669,6 +2730,67 @@ def run_verify_interaction_authored_query_hash_mismatch_blocks_with_evidence():
|
|
|
2669
2730
|
shutil.rmtree(tempdir, ignore_errors=True)
|
|
2670
2731
|
|
|
2671
2732
|
|
|
2733
|
+
def run_verify_interaction_query_hash_pass_uses_proof_evidence_route():
|
|
2734
|
+
tempdir = Path(tempfile.mkdtemp(prefix='riddle-proof-interaction-query-hash-pass-'))
|
|
2735
|
+
state_path = tempdir / 'state.json'
|
|
2736
|
+
try:
|
|
2737
|
+
state = base_state(tempdir, reference='before')
|
|
2738
|
+
state.update({
|
|
2739
|
+
'recon_status': 'ready_for_proof_plan',
|
|
2740
|
+
'author_status': 'ready',
|
|
2741
|
+
'proof_plan_status': 'ready',
|
|
2742
|
+
'implementation_status': 'changes_detected',
|
|
2743
|
+
'verification_mode': 'interaction',
|
|
2744
|
+
'server_path': '/',
|
|
2745
|
+
'before_cdn': 'https://cdn.example.com/before-home.png',
|
|
2746
|
+
'proof_plan': 'Start at /, click Pricing, and verify /pricing/?rp_probe=1#pricing-probe.',
|
|
2747
|
+
'capture_script': "pricingQueryHashPassesWithPageStateHashGap(); await page.waitForURL('/pricing/?rp_probe=1#pricing-probe');",
|
|
2748
|
+
'supervisor_author_packet': {
|
|
2749
|
+
'proof_plan': 'Click Pricing and prove the terminal query/hash route.',
|
|
2750
|
+
'capture_script': "pricingQueryHashPassesWithPageStateHashGap(); await page.waitForURL('/pricing/?rp_probe=1#pricing-probe');",
|
|
2751
|
+
'refined_inputs': {
|
|
2752
|
+
'server_path': '/',
|
|
2753
|
+
'expected_terminal_path': '/pricing/?rp_probe=1#pricing-probe',
|
|
2754
|
+
},
|
|
2755
|
+
},
|
|
2756
|
+
'recon_results': {
|
|
2757
|
+
'baselines': {'before': {'path': '/', 'url': 'https://cdn.example.com/before-home.png'}},
|
|
2758
|
+
},
|
|
2759
|
+
})
|
|
2760
|
+
write_state(state_path, state)
|
|
2761
|
+
os.environ['RIDDLE_PROOF_STATE_FILE'] = str(state_path)
|
|
2762
|
+
|
|
2763
|
+
fake = FakeRiddle()
|
|
2764
|
+
load_util_with_fake(fake)
|
|
2765
|
+
load_module('verify_interaction_query_hash_pass_uses_proof_evidence_route', VERIFY_PATH)
|
|
2766
|
+
after_verify = json.loads(state_path.read_text())
|
|
2767
|
+
|
|
2768
|
+
assert after_verify['verify_status'] == 'evidence_captured'
|
|
2769
|
+
assert after_verify['merge_recommendation'] == 'pending-supervisor-judgment'
|
|
2770
|
+
request = after_verify['verify_decision_request']
|
|
2771
|
+
assert 'capture_quality' not in request
|
|
2772
|
+
assert request['recommended_stage'] is None
|
|
2773
|
+
assert request['continue_with_stage'] is None
|
|
2774
|
+
observation = after_verify['verify_results']['after']['observation']
|
|
2775
|
+
assert 'wrong route' not in observation['reason']
|
|
2776
|
+
details = observation['details']
|
|
2777
|
+
assert details['proof_evidence_route_matched'] is True
|
|
2778
|
+
assert details['observed_path_source'] == 'proof_evidence'
|
|
2779
|
+
route = after_verify['proof_assessment_request']['semantic_context']['route']
|
|
2780
|
+
assert route['expected_terminal_query'] == 'rp_probe=1'
|
|
2781
|
+
assert route['expected_terminal_hash'] == '#pricing-probe'
|
|
2782
|
+
assert route['after_observed_query'] == 'rp_probe=1'
|
|
2783
|
+
assert route['after_observed_hash'] == '#pricing-probe'
|
|
2784
|
+
assert route['after_observed_path'] == '/pricing?rp_probe=1#pricing-probe'
|
|
2785
|
+
return {
|
|
2786
|
+
'ok': True,
|
|
2787
|
+
'after_observed_path': route['after_observed_path'],
|
|
2788
|
+
'after_observed_hash': route['after_observed_hash'],
|
|
2789
|
+
}
|
|
2790
|
+
finally:
|
|
2791
|
+
shutil.rmtree(tempdir, ignore_errors=True)
|
|
2792
|
+
|
|
2793
|
+
|
|
2672
2794
|
def run_verify_capture_retry_surfaces_script_timeout():
|
|
2673
2795
|
tempdir = Path(tempfile.mkdtemp(prefix='riddle-proof-capture-timeout-'))
|
|
2674
2796
|
state_path = tempdir / 'state.json'
|
|
@@ -2697,9 +2819,9 @@ def run_verify_capture_retry_surfaces_script_timeout():
|
|
|
2697
2819
|
|
|
2698
2820
|
assert after_verify['verify_status'] == 'capture_incomplete'
|
|
2699
2821
|
capture_quality = after_verify['verify_decision_request']['capture_quality']
|
|
2700
|
-
assert capture_quality['recommended_stage']
|
|
2701
|
-
|
|
2702
|
-
assert
|
|
2822
|
+
assert capture_quality['recommended_stage'] in ('author', 'verify')
|
|
2823
|
+
capture_quality_text = json.dumps(capture_quality, sort_keys=True)
|
|
2824
|
+
assert 'locator.click: Timeout 30000ms exceeded' in capture_quality_text
|
|
2703
2825
|
return {
|
|
2704
2826
|
'ok': True,
|
|
2705
2827
|
'summary': capture_quality['summary'],
|
|
@@ -3090,6 +3212,7 @@ if __name__ == '__main__':
|
|
|
3090
3212
|
'verify_interaction_reverse_terminal_route_from_proof_evidence': run_verify_interaction_reverse_terminal_route_from_proof_evidence(),
|
|
3091
3213
|
'verify_interaction_hash_terminal_route_from_proof_evidence': run_verify_interaction_hash_terminal_route_from_proof_evidence(),
|
|
3092
3214
|
'verify_interaction_authored_query_hash_mismatch_blocks_with_evidence': run_verify_interaction_authored_query_hash_mismatch_blocks_with_evidence(),
|
|
3215
|
+
'verify_interaction_query_hash_pass_uses_proof_evidence_route': run_verify_interaction_query_hash_pass_uses_proof_evidence_route(),
|
|
3093
3216
|
'verify_capture_retry_surfaces_script_timeout': run_verify_capture_retry_surfaces_script_timeout(),
|
|
3094
3217
|
'missing_baseline_guard': run_verify_missing_baseline(),
|
|
3095
3218
|
'ship_supervisor_gate': run_ship_missing_supervisor_gate(),
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
import importlib.util
|
|
2
|
+
import io
|
|
3
|
+
import json
|
|
4
|
+
import sys
|
|
5
|
+
import traceback
|
|
6
|
+
from contextlib import redirect_stderr, redirect_stdout
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
SMOKE_PATH = Path(__file__).resolve().with_name('recon_verify_smoke.py')
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def load_smoke_module():
|
|
13
|
+
spec = importlib.util.spec_from_file_location('riddle_proof_recon_verify_smoke', SMOKE_PATH)
|
|
14
|
+
module = importlib.util.module_from_spec(spec)
|
|
15
|
+
sys.modules[spec.name] = module
|
|
16
|
+
assert spec.loader is not None
|
|
17
|
+
spec.loader.exec_module(module)
|
|
18
|
+
return module
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
CASES = [
|
|
22
|
+
{
|
|
23
|
+
'name': 'route-change-forward-pass',
|
|
24
|
+
'covers': ['route-changing interactions', 'proof-evidence-present'],
|
|
25
|
+
'function': 'run_verify_interaction_terminal_route_from_proof_evidence',
|
|
26
|
+
'expected_terminal': 'pass',
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
'name': 'route-change-reverse-pass',
|
|
30
|
+
'covers': ['route-changing interactions'],
|
|
31
|
+
'function': 'run_verify_interaction_reverse_terminal_route_from_proof_evidence',
|
|
32
|
+
'expected_terminal': 'pass',
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
'name': 'query-hash-trailing-slash-pass',
|
|
36
|
+
'covers': ['query/hash/trailing-slash URLs', 'proof-evidence-present'],
|
|
37
|
+
'function': 'run_verify_interaction_query_hash_pass_uses_proof_evidence_route',
|
|
38
|
+
'expected_terminal': 'pass',
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
'name': 'query-hash-dropped-specific-blocker',
|
|
42
|
+
'covers': ['query/hash/trailing-slash URLs', 'invalid browser evidence'],
|
|
43
|
+
'function': 'run_verify_interaction_authored_query_hash_mismatch_blocks_with_evidence',
|
|
44
|
+
'expected_terminal': 'specific_blocker',
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
'name': 'same-page-hash-pass',
|
|
48
|
+
'covers': ['same-page hashes'],
|
|
49
|
+
'function': 'run_verify_interaction_hash_terminal_route_from_proof_evidence',
|
|
50
|
+
'expected_terminal': 'pass',
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
'name': 'missing-selector-timeout-specific-blocker',
|
|
54
|
+
'covers': ['missing selectors', 'timeouts'],
|
|
55
|
+
'function': 'run_verify_capture_retry_surfaces_script_timeout',
|
|
56
|
+
'expected_terminal': 'specific_blocker',
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
'name': 'thrown-error-preserves-structured-evidence',
|
|
60
|
+
'covers': ['thrown errors', 'proof-evidence-present'],
|
|
61
|
+
'function': 'run_verify_preserves_proof_evidence_on_capture_script_error',
|
|
62
|
+
'expected_terminal': 'specific_blocker',
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
'name': 'structured-proof-without-screenshot-pass',
|
|
66
|
+
'covers': ['proof-evidence-present'],
|
|
67
|
+
'function': 'run_verify_structured_evidence_without_screenshot',
|
|
68
|
+
'expected_terminal': 'pass',
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
'name': 'proof-evidence-absent-specific-blocker',
|
|
72
|
+
'covers': ['proof-evidence-absent'],
|
|
73
|
+
'function': 'run_verify_audio_requires_proof_evidence',
|
|
74
|
+
'expected_terminal': 'specific_blocker',
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
'name': 'no-diff-prod-audit-default-capture-pass',
|
|
78
|
+
'covers': ['no-diff prod audits'],
|
|
79
|
+
'function': 'run_remote_audit_verify_uses_default_capture_script',
|
|
80
|
+
'expected_terminal': 'pass',
|
|
81
|
+
},
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
GENERIC_FAILURE_MARKERS = (
|
|
86
|
+
'codex_invalid_json',
|
|
87
|
+
'codex_no_final_response',
|
|
88
|
+
'max_iterations_reached',
|
|
89
|
+
'stage_iteration_limit_reached',
|
|
90
|
+
'unhandled_checkpoint',
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def compact_logs(stdout, stderr):
|
|
95
|
+
text = (stdout.getvalue() + '\n' + stderr.getvalue()).strip()
|
|
96
|
+
lines = [line for line in text.splitlines() if line.strip()]
|
|
97
|
+
return lines[-20:]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def run_case(module, case):
|
|
101
|
+
stdout = io.StringIO()
|
|
102
|
+
stderr = io.StringIO()
|
|
103
|
+
try:
|
|
104
|
+
with redirect_stdout(stdout), redirect_stderr(stderr):
|
|
105
|
+
result = getattr(module, case['function'])()
|
|
106
|
+
encoded = json.dumps(result, sort_keys=True)
|
|
107
|
+
for marker in GENERIC_FAILURE_MARKERS:
|
|
108
|
+
assert marker not in encoded, f'{case["name"]} leaked generic failure marker {marker}'
|
|
109
|
+
return {
|
|
110
|
+
'ok': True,
|
|
111
|
+
'name': case['name'],
|
|
112
|
+
'covers': case['covers'],
|
|
113
|
+
'expected_terminal': case['expected_terminal'],
|
|
114
|
+
'result': result,
|
|
115
|
+
}
|
|
116
|
+
except Exception as exc:
|
|
117
|
+
return {
|
|
118
|
+
'ok': False,
|
|
119
|
+
'name': case['name'],
|
|
120
|
+
'error': str(exc),
|
|
121
|
+
'traceback': traceback.format_exc(limit=8),
|
|
122
|
+
'logs': compact_logs(stdout, stderr),
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def main():
|
|
127
|
+
module = load_smoke_module()
|
|
128
|
+
results = [run_case(module, case) for case in CASES]
|
|
129
|
+
failed = [result for result in results if not result['ok']]
|
|
130
|
+
payload = {
|
|
131
|
+
'ok': not failed,
|
|
132
|
+
'suite': 'riddle-proof.trust-boundary-regression',
|
|
133
|
+
'case_count': len(results),
|
|
134
|
+
'failed': failed,
|
|
135
|
+
'results': results,
|
|
136
|
+
}
|
|
137
|
+
print(json.dumps(payload, indent=2, sort_keys=True))
|
|
138
|
+
if failed:
|
|
139
|
+
raise SystemExit(1)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
if __name__ == '__main__':
|
|
143
|
+
main()
|