@riddledc/riddle-proof 0.8.28 → 0.8.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/codex-exec-agent.cjs +8 -5
- package/dist/adapters/codex-exec-agent.js +1 -1
- package/dist/adapters/codex.cjs +8 -5
- package/dist/adapters/codex.js +1 -1
- package/dist/adapters/local-agent.cjs +8 -5
- package/dist/adapters/local-agent.js +1 -1
- package/dist/advanced/engine-harness.cjs +56 -1
- package/dist/advanced/engine-harness.js +1 -1
- package/dist/advanced/index.cjs +56 -1
- package/dist/advanced/index.js +2 -2
- package/dist/advanced/proof-run-engine.cjs +56 -1
- package/dist/advanced/proof-run-engine.js +1 -1
- package/dist/{chunk-YC77HZVF.js → chunk-32RE64IO.js} +56 -1
- package/dist/{chunk-4PPJKW3Z.js → chunk-73EBR3YL.js} +8 -5
- package/dist/{chunk-27BG64ZG.js → chunk-XJA2GDVN.js} +2 -2
- package/dist/cli/index.js +3 -3
- package/dist/cli.cjs +64 -6
- package/dist/cli.js +3 -3
- package/dist/codex-exec-agent.cjs +8 -5
- package/dist/codex-exec-agent.js +1 -1
- package/dist/engine-harness.cjs +56 -1
- package/dist/engine-harness.js +1 -1
- package/dist/index.cjs +64 -6
- package/dist/index.js +2 -2
- package/dist/local-agent.cjs +8 -5
- package/dist/local-agent.js +1 -1
- package/dist/proof-run-engine.cjs +56 -1
- package/dist/proof-run-engine.js +1 -1
- package/package.json +1 -1
- package/runtime/lib/author.py +11 -3
- package/runtime/lib/setup.py +72 -1
- package/runtime/lib/verify.py +13 -1
- package/runtime/tests/recon_verify_smoke.py +31 -3
- /package/dist/{chunk-AM3K5FPW.js → chunk-UWO4YR7I.js} +0 -0
package/runtime/lib/setup.py
CHANGED
|
@@ -6,7 +6,7 @@ scratch storage by default:
|
|
|
6
6
|
/var/tmp/riddle-proof/.riddle-proof-worktrees/riddle-proof-<run_id>-after
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
import json, subprocess as sp, os, sys, shutil, time, tempfile
|
|
9
|
+
import json, subprocess as sp, os, sys, shutil, time, tempfile, re
|
|
10
10
|
from urllib.parse import urlparse
|
|
11
11
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
12
12
|
from util import load_state, save_state, git, shell_quote
|
|
@@ -223,6 +223,76 @@ def interaction_verification_mode():
|
|
|
223
223
|
)
|
|
224
224
|
|
|
225
225
|
|
|
226
|
+
def normalize_route_path(value):
|
|
227
|
+
raw = str(value or '').strip()
|
|
228
|
+
if not raw:
|
|
229
|
+
return ''
|
|
230
|
+
try:
|
|
231
|
+
parsed = urlparse(raw if raw.startswith(('http://', 'https://')) else raw)
|
|
232
|
+
path = parsed.path or '/'
|
|
233
|
+
if not path.startswith('/'):
|
|
234
|
+
path = '/' + path
|
|
235
|
+
if len(path) > 1:
|
|
236
|
+
path = path.rstrip('/')
|
|
237
|
+
query = ('?' + parsed.query) if parsed.query else ''
|
|
238
|
+
fragment = ('#' + parsed.fragment) if parsed.fragment else ''
|
|
239
|
+
return path + query + fragment
|
|
240
|
+
except Exception:
|
|
241
|
+
path = raw.split('#', 1)[0].split('?', 1)[0]
|
|
242
|
+
if not path.startswith('/'):
|
|
243
|
+
path = '/' + path
|
|
244
|
+
return path.rstrip('/') or '/'
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def trim_route_candidate(value):
|
|
248
|
+
return str(value or '').strip().rstrip('),.;]}')
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def expected_terminal_route_from_text(value):
|
|
252
|
+
text = str(value or '').strip()
|
|
253
|
+
if not text:
|
|
254
|
+
return ''
|
|
255
|
+
route_pattern = r'(https?://[^\s"\'<>`]+|/[^\s"\'<>`]+)'
|
|
256
|
+
patterns = [
|
|
257
|
+
r'\bexpected\s+(?:terminal\s+|final\s+|after\s+)?(?:url|route|path)\s*(?:is|=|:)\s*' + route_pattern,
|
|
258
|
+
r'\b(?:terminal|final|after)\s+(?:url|route|path)\s*(?:is|=|:)\s*' + route_pattern,
|
|
259
|
+
r'\b(?:ends|end|ending|lands|land|landing)\s+(?:at|on)\s*' + route_pattern,
|
|
260
|
+
]
|
|
261
|
+
for pattern in patterns:
|
|
262
|
+
match = re.search(pattern, text, re.IGNORECASE)
|
|
263
|
+
if match:
|
|
264
|
+
route = normalize_route_path(trim_route_candidate(match.group(1)))
|
|
265
|
+
if route:
|
|
266
|
+
return route
|
|
267
|
+
return ''
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def requested_expected_terminal_route():
|
|
271
|
+
return (
|
|
272
|
+
expected_terminal_route_from_text(s.get('success_criteria')) or
|
|
273
|
+
expected_terminal_route_from_text(s.get('change_request')) or
|
|
274
|
+
expected_terminal_route_from_text(s.get('context')) or
|
|
275
|
+
expected_terminal_route_from_text(s.get('assertions_json'))
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def apply_interaction_route_contract(start_path):
|
|
280
|
+
if not interaction_verification_mode():
|
|
281
|
+
return
|
|
282
|
+
terminal_path = requested_expected_terminal_route()
|
|
283
|
+
if not terminal_path:
|
|
284
|
+
return
|
|
285
|
+
normalized_start = normalize_route_path(start_path) or '/'
|
|
286
|
+
s['requested_expected_terminal_path'] = terminal_path
|
|
287
|
+
s['expected_terminal_path'] = s.get('expected_terminal_path') or terminal_path
|
|
288
|
+
s['expected_start_path'] = s.get('expected_start_path') or normalized_start
|
|
289
|
+
contract = s.get('interaction_contract') if isinstance(s.get('interaction_contract'), dict) else {}
|
|
290
|
+
contract = dict(contract)
|
|
291
|
+
contract['start_path'] = contract.get('start_path') or normalized_start
|
|
292
|
+
contract['expected_terminal_path'] = contract.get('expected_terminal_path') or terminal_path
|
|
293
|
+
s['interaction_contract'] = contract
|
|
294
|
+
|
|
295
|
+
|
|
226
296
|
def remote_audit_mode():
|
|
227
297
|
return bool(s.get('remote_audit')) or (
|
|
228
298
|
not repo
|
|
@@ -550,6 +620,7 @@ if remote_audit_mode():
|
|
|
550
620
|
s['allow_code_changes'] = False
|
|
551
621
|
s['server_path'] = s.get('server_path') or target_path
|
|
552
622
|
s['server_path_source'] = s.get('server_path_source') or 'prod_url'
|
|
623
|
+
apply_interaction_route_contract(s['server_path'])
|
|
553
624
|
s['recon_status'] = 'ready_for_proof_plan'
|
|
554
625
|
s['recon_summary'] = 'Remote audit/no-diff run uses prod_url as the current target and skips repo worktrees.'
|
|
555
626
|
s['recon_hypothesis'] = {
|
package/runtime/lib/verify.py
CHANGED
|
@@ -3222,7 +3222,7 @@ def evaluate_capture_quality(payload, expected_path, verification_mode='proof'):
|
|
|
3222
3222
|
}
|
|
3223
3223
|
|
|
3224
3224
|
|
|
3225
|
-
def build_capture_retry_decision(after_observation, required_baseline_present, proof_evidence_blocker='', route_expectation=None):
|
|
3225
|
+
def build_capture_retry_decision(after_observation, required_baseline_present, proof_evidence_blocker='', route_expectation=None, verification_mode=''):
|
|
3226
3226
|
reasons = []
|
|
3227
3227
|
if not required_baseline_present:
|
|
3228
3228
|
reasons.append('Recon baseline is missing, so verify should return to recon instead of guessing a new reference context.')
|
|
@@ -3361,6 +3361,17 @@ def build_capture_retry_decision(after_observation, required_baseline_present, p
|
|
|
3361
3361
|
elif error_messages:
|
|
3362
3362
|
reasons.append('Capture script error: ' + error_messages[0][:500])
|
|
3363
3363
|
summary = 'Verify capture script failed: ' + error_messages[0][:300]
|
|
3364
|
+
reasons.append('The capture script produced a concrete browser/runtime failure, so this run should block with that exact evidence instead of re-authoring in a loop.')
|
|
3365
|
+
return {
|
|
3366
|
+
'decision': 'failed_interaction_capture' if normalized_verification_mode(verification_mode) in INTERACTION_MODES else 'failed_capture',
|
|
3367
|
+
'summary': summary,
|
|
3368
|
+
'recommended_stage': None,
|
|
3369
|
+
'continue_with_stage': None,
|
|
3370
|
+
'blocking': True,
|
|
3371
|
+
'terminal_blocker': True,
|
|
3372
|
+
'reasons': reasons,
|
|
3373
|
+
'mismatch': None,
|
|
3374
|
+
}
|
|
3364
3375
|
else:
|
|
3365
3376
|
summary = 'Verify needs another internal capture iteration before the evidence can be judged.'
|
|
3366
3377
|
reasons.append('The capture plan itself needs revision, so author should tighten the proof script or framing inputs.')
|
|
@@ -4248,6 +4259,7 @@ else:
|
|
|
4248
4259
|
required_baseline_present,
|
|
4249
4260
|
proof_evidence_blocker or structured_interaction_capture_failure_summary,
|
|
4250
4261
|
s.get('route_expectation') or {},
|
|
4262
|
+
s.get('verification_mode') or '',
|
|
4251
4263
|
)
|
|
4252
4264
|
if visual_delta_recovery:
|
|
4253
4265
|
observation_reason = str(after_observation.get('reason') or '')
|
|
@@ -1700,13 +1700,33 @@ def run_remote_interaction_audit_setup_requires_authoring():
|
|
|
1700
1700
|
assert state['recon_status'] == 'ready_for_proof_plan'
|
|
1701
1701
|
assert state['author_status'] == 'needs_authoring'
|
|
1702
1702
|
assert state['proof_plan_status'] == 'needs_authoring'
|
|
1703
|
+
assert state['requested_expected_terminal_path'] == '/proof'
|
|
1704
|
+
assert state['expected_terminal_path'] == '/proof'
|
|
1705
|
+
assert state['expected_start_path'] == '/'
|
|
1706
|
+
assert state['interaction_contract']['start_path'] == '/'
|
|
1707
|
+
assert state['interaction_contract']['expected_terminal_path'] == '/proof'
|
|
1703
1708
|
assert state.get('capture_script', '') == ''
|
|
1704
1709
|
assert state.get('capture_script_source', '') == ''
|
|
1705
1710
|
assert 'requires an authored browser interaction capture' in state['author_summary']
|
|
1711
|
+
|
|
1712
|
+
with temporary_env(RIDDLE_PROOF_STATE_FILE=str(state_path)):
|
|
1713
|
+
sys.modules.pop('util', None)
|
|
1714
|
+
try:
|
|
1715
|
+
load_module('author_remote_interaction_audit_request', AUTHOR_PATH)
|
|
1716
|
+
except SystemExit as exc:
|
|
1717
|
+
assert exc.code in (0, None), exc
|
|
1718
|
+
after_author = json.loads(state_path.read_text())
|
|
1719
|
+
assert after_author['author_status'] == 'needs_supervisor_judgment'
|
|
1720
|
+
assert after_author['author_request']['fallback_defaults']['server_path'] == '/'
|
|
1721
|
+
assert after_author['author_request']['fallback_defaults']['expected_start_path'] == '/'
|
|
1722
|
+
assert after_author['author_request']['fallback_defaults']['expected_terminal_path'] == '/proof'
|
|
1723
|
+
assert after_author['author_request']['fallback_defaults']['capture_script'] == ''
|
|
1724
|
+
assert after_author['author_request']['interaction_contract']['expected_terminal_path'] == '/proof'
|
|
1706
1725
|
return {
|
|
1707
1726
|
'ok': True,
|
|
1708
|
-
'author_status':
|
|
1709
|
-
'
|
|
1727
|
+
'author_status': after_author['author_status'],
|
|
1728
|
+
'expected_terminal_path': after_author['expected_terminal_path'],
|
|
1729
|
+
'capture_script_source': after_author.get('capture_script_source', ''),
|
|
1710
1730
|
}
|
|
1711
1731
|
finally:
|
|
1712
1732
|
sys.modules.pop('util', None)
|
|
@@ -2062,6 +2082,8 @@ def run_recon_then_author_request():
|
|
|
2062
2082
|
assert after_author['author_runtime_model_hint'] == 'openai-codex/gpt-5.4'
|
|
2063
2083
|
assert after_author['author_request']['status'] == 'needs_supervisor_judgment'
|
|
2064
2084
|
assert after_author['author_request']['fallback_defaults']['server_path'] == '/pricing'
|
|
2085
|
+
assert after_author['author_request']['fallback_defaults']['expected_start_path'] == '/pricing'
|
|
2086
|
+
assert after_author['author_request']['fallback_defaults']['expected_terminal_path'] == ''
|
|
2065
2087
|
assert 'supervising agent owns proof authoring' in after_author['author_request']['instructions'][0].lower()
|
|
2066
2088
|
|
|
2067
2089
|
return {
|
|
@@ -2400,6 +2422,8 @@ def run_author_keeps_interaction_start_route():
|
|
|
2400
2422
|
assert after_author['expected_start_path'] == '/'
|
|
2401
2423
|
assert after_author['expected_terminal_path'] == '/proof/'
|
|
2402
2424
|
assert after_author['author_packet']['refined_inputs']['server_path'] == '/'
|
|
2425
|
+
assert after_author['author_packet']['refined_inputs']['expected_start_path'] == '/'
|
|
2426
|
+
assert after_author['author_packet']['refined_inputs']['expected_terminal_path'] == '/proof/'
|
|
2403
2427
|
assert after_author['author_warnings']
|
|
2404
2428
|
assert 'terminal interaction route' in after_author['author_warnings'][0]
|
|
2405
2429
|
return {
|
|
@@ -3685,11 +3709,15 @@ def run_verify_capture_retry_surfaces_script_timeout():
|
|
|
3685
3709
|
|
|
3686
3710
|
assert after_verify['verify_status'] == 'capture_incomplete'
|
|
3687
3711
|
capture_quality = after_verify['verify_decision_request']['capture_quality']
|
|
3688
|
-
assert capture_quality['recommended_stage']
|
|
3712
|
+
assert capture_quality['recommended_stage'] is None
|
|
3713
|
+
assert capture_quality['continue_with_stage'] is None
|
|
3714
|
+
assert capture_quality['blocking'] is True
|
|
3715
|
+
assert capture_quality['terminal_blocker'] is True
|
|
3689
3716
|
capture_quality_text = json.dumps(capture_quality, sort_keys=True)
|
|
3690
3717
|
assert 'locator.click: Timeout 30000ms exceeded' in capture_quality_text
|
|
3691
3718
|
return {
|
|
3692
3719
|
'ok': True,
|
|
3720
|
+
'decision': capture_quality['decision'],
|
|
3693
3721
|
'summary': capture_quality['summary'],
|
|
3694
3722
|
}
|
|
3695
3723
|
finally:
|
|
File without changes
|