@riddledc/riddle-proof 0.8.28 → 0.8.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ scratch storage by default:
6
6
  /var/tmp/riddle-proof/.riddle-proof-worktrees/riddle-proof-<run_id>-after
7
7
  """
8
8
 
9
- import json, subprocess as sp, os, sys, shutil, time, tempfile
9
+ import json, subprocess as sp, os, sys, shutil, time, tempfile, re
10
10
  from urllib.parse import urlparse
11
11
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
12
12
  from util import load_state, save_state, git, shell_quote
@@ -223,6 +223,76 @@ def interaction_verification_mode():
223
223
  )
224
224
 
225
225
 
226
+ def normalize_route_path(value):
227
+ raw = str(value or '').strip()
228
+ if not raw:
229
+ return ''
230
+ try:
231
+ parsed = urlparse(raw if raw.startswith(('http://', 'https://')) else raw)
232
+ path = parsed.path or '/'
233
+ if not path.startswith('/'):
234
+ path = '/' + path
235
+ if len(path) > 1:
236
+ path = path.rstrip('/')
237
+ query = ('?' + parsed.query) if parsed.query else ''
238
+ fragment = ('#' + parsed.fragment) if parsed.fragment else ''
239
+ return path + query + fragment
240
+ except Exception:
241
+ path = raw.split('#', 1)[0].split('?', 1)[0]
242
+ if not path.startswith('/'):
243
+ path = '/' + path
244
+ return path.rstrip('/') or '/'
245
+
246
+
247
+ def trim_route_candidate(value):
248
+ return str(value or '').strip().rstrip('),.;]}')
249
+
250
+
251
+ def expected_terminal_route_from_text(value):
252
+ text = str(value or '').strip()
253
+ if not text:
254
+ return ''
255
+ route_pattern = r'(https?://[^\s"\'<>`]+|/[^\s"\'<>`]+)'
256
+ patterns = [
257
+ r'\bexpected\s+(?:terminal\s+|final\s+|after\s+)?(?:url|route|path)\s*(?:is|=|:)\s*' + route_pattern,
258
+ r'\b(?:terminal|final|after)\s+(?:url|route|path)\s*(?:is|=|:)\s*' + route_pattern,
259
+ r'\b(?:ends|end|ending|lands|land|landing)\s+(?:at|on)\s*' + route_pattern,
260
+ ]
261
+ for pattern in patterns:
262
+ match = re.search(pattern, text, re.IGNORECASE)
263
+ if match:
264
+ route = normalize_route_path(trim_route_candidate(match.group(1)))
265
+ if route:
266
+ return route
267
+ return ''
268
+
269
+
270
+ def requested_expected_terminal_route():
271
+ return (
272
+ expected_terminal_route_from_text(s.get('success_criteria')) or
273
+ expected_terminal_route_from_text(s.get('change_request')) or
274
+ expected_terminal_route_from_text(s.get('context')) or
275
+ expected_terminal_route_from_text(s.get('assertions_json'))
276
+ )
277
+
278
+
279
+ def apply_interaction_route_contract(start_path):
280
+ if not interaction_verification_mode():
281
+ return
282
+ terminal_path = requested_expected_terminal_route()
283
+ if not terminal_path:
284
+ return
285
+ normalized_start = normalize_route_path(start_path) or '/'
286
+ s['requested_expected_terminal_path'] = terminal_path
287
+ s['expected_terminal_path'] = s.get('expected_terminal_path') or terminal_path
288
+ s['expected_start_path'] = s.get('expected_start_path') or normalized_start
289
+ contract = s.get('interaction_contract') if isinstance(s.get('interaction_contract'), dict) else {}
290
+ contract = dict(contract)
291
+ contract['start_path'] = contract.get('start_path') or normalized_start
292
+ contract['expected_terminal_path'] = contract.get('expected_terminal_path') or terminal_path
293
+ s['interaction_contract'] = contract
294
+
295
+
226
296
  def remote_audit_mode():
227
297
  return bool(s.get('remote_audit')) or (
228
298
  not repo
@@ -550,6 +620,7 @@ if remote_audit_mode():
550
620
  s['allow_code_changes'] = False
551
621
  s['server_path'] = s.get('server_path') or target_path
552
622
  s['server_path_source'] = s.get('server_path_source') or 'prod_url'
623
+ apply_interaction_route_contract(s['server_path'])
553
624
  s['recon_status'] = 'ready_for_proof_plan'
554
625
  s['recon_summary'] = 'Remote audit/no-diff run uses prod_url as the current target and skips repo worktrees.'
555
626
  s['recon_hypothesis'] = {
@@ -3222,7 +3222,7 @@ def evaluate_capture_quality(payload, expected_path, verification_mode='proof'):
3222
3222
  }
3223
3223
 
3224
3224
 
3225
- def build_capture_retry_decision(after_observation, required_baseline_present, proof_evidence_blocker='', route_expectation=None):
3225
+ def build_capture_retry_decision(after_observation, required_baseline_present, proof_evidence_blocker='', route_expectation=None, verification_mode=''):
3226
3226
  reasons = []
3227
3227
  if not required_baseline_present:
3228
3228
  reasons.append('Recon baseline is missing, so verify should return to recon instead of guessing a new reference context.')
@@ -3361,6 +3361,17 @@ def build_capture_retry_decision(after_observation, required_baseline_present, p
3361
3361
  elif error_messages:
3362
3362
  reasons.append('Capture script error: ' + error_messages[0][:500])
3363
3363
  summary = 'Verify capture script failed: ' + error_messages[0][:300]
3364
+ reasons.append('The capture script produced a concrete browser/runtime failure, so this run should block with that exact evidence instead of re-authoring in a loop.')
3365
+ return {
3366
+ 'decision': 'failed_interaction_capture' if normalized_verification_mode(verification_mode) in INTERACTION_MODES else 'failed_capture',
3367
+ 'summary': summary,
3368
+ 'recommended_stage': None,
3369
+ 'continue_with_stage': None,
3370
+ 'blocking': True,
3371
+ 'terminal_blocker': True,
3372
+ 'reasons': reasons,
3373
+ 'mismatch': None,
3374
+ }
3364
3375
  else:
3365
3376
  summary = 'Verify needs another internal capture iteration before the evidence can be judged.'
3366
3377
  reasons.append('The capture plan itself needs revision, so author should tighten the proof script or framing inputs.')
@@ -4248,6 +4259,7 @@ else:
4248
4259
  required_baseline_present,
4249
4260
  proof_evidence_blocker or structured_interaction_capture_failure_summary,
4250
4261
  s.get('route_expectation') or {},
4262
+ s.get('verification_mode') or '',
4251
4263
  )
4252
4264
  if visual_delta_recovery:
4253
4265
  observation_reason = str(after_observation.get('reason') or '')
@@ -1700,13 +1700,33 @@ def run_remote_interaction_audit_setup_requires_authoring():
1700
1700
  assert state['recon_status'] == 'ready_for_proof_plan'
1701
1701
  assert state['author_status'] == 'needs_authoring'
1702
1702
  assert state['proof_plan_status'] == 'needs_authoring'
1703
+ assert state['requested_expected_terminal_path'] == '/proof'
1704
+ assert state['expected_terminal_path'] == '/proof'
1705
+ assert state['expected_start_path'] == '/'
1706
+ assert state['interaction_contract']['start_path'] == '/'
1707
+ assert state['interaction_contract']['expected_terminal_path'] == '/proof'
1703
1708
  assert state.get('capture_script', '') == ''
1704
1709
  assert state.get('capture_script_source', '') == ''
1705
1710
  assert 'requires an authored browser interaction capture' in state['author_summary']
1711
+
1712
+ with temporary_env(RIDDLE_PROOF_STATE_FILE=str(state_path)):
1713
+ sys.modules.pop('util', None)
1714
+ try:
1715
+ load_module('author_remote_interaction_audit_request', AUTHOR_PATH)
1716
+ except SystemExit as exc:
1717
+ assert exc.code in (0, None), exc
1718
+ after_author = json.loads(state_path.read_text())
1719
+ assert after_author['author_status'] == 'needs_supervisor_judgment'
1720
+ assert after_author['author_request']['fallback_defaults']['server_path'] == '/'
1721
+ assert after_author['author_request']['fallback_defaults']['expected_start_path'] == '/'
1722
+ assert after_author['author_request']['fallback_defaults']['expected_terminal_path'] == '/proof'
1723
+ assert after_author['author_request']['fallback_defaults']['capture_script'] == ''
1724
+ assert after_author['author_request']['interaction_contract']['expected_terminal_path'] == '/proof'
1706
1725
  return {
1707
1726
  'ok': True,
1708
- 'author_status': state['author_status'],
1709
- 'capture_script_source': state.get('capture_script_source', ''),
1727
+ 'author_status': after_author['author_status'],
1728
+ 'expected_terminal_path': after_author['expected_terminal_path'],
1729
+ 'capture_script_source': after_author.get('capture_script_source', ''),
1710
1730
  }
1711
1731
  finally:
1712
1732
  sys.modules.pop('util', None)
@@ -2062,6 +2082,8 @@ def run_recon_then_author_request():
2062
2082
  assert after_author['author_runtime_model_hint'] == 'openai-codex/gpt-5.4'
2063
2083
  assert after_author['author_request']['status'] == 'needs_supervisor_judgment'
2064
2084
  assert after_author['author_request']['fallback_defaults']['server_path'] == '/pricing'
2085
+ assert after_author['author_request']['fallback_defaults']['expected_start_path'] == '/pricing'
2086
+ assert after_author['author_request']['fallback_defaults']['expected_terminal_path'] == ''
2065
2087
  assert 'supervising agent owns proof authoring' in after_author['author_request']['instructions'][0].lower()
2066
2088
 
2067
2089
  return {
@@ -2400,6 +2422,8 @@ def run_author_keeps_interaction_start_route():
2400
2422
  assert after_author['expected_start_path'] == '/'
2401
2423
  assert after_author['expected_terminal_path'] == '/proof/'
2402
2424
  assert after_author['author_packet']['refined_inputs']['server_path'] == '/'
2425
+ assert after_author['author_packet']['refined_inputs']['expected_start_path'] == '/'
2426
+ assert after_author['author_packet']['refined_inputs']['expected_terminal_path'] == '/proof/'
2403
2427
  assert after_author['author_warnings']
2404
2428
  assert 'terminal interaction route' in after_author['author_warnings'][0]
2405
2429
  return {
@@ -3685,11 +3709,15 @@ def run_verify_capture_retry_surfaces_script_timeout():
3685
3709
 
3686
3710
  assert after_verify['verify_status'] == 'capture_incomplete'
3687
3711
  capture_quality = after_verify['verify_decision_request']['capture_quality']
3688
- assert capture_quality['recommended_stage'] in ('author', 'verify')
3712
+ assert capture_quality['recommended_stage'] is None
3713
+ assert capture_quality['continue_with_stage'] is None
3714
+ assert capture_quality['blocking'] is True
3715
+ assert capture_quality['terminal_blocker'] is True
3689
3716
  capture_quality_text = json.dumps(capture_quality, sort_keys=True)
3690
3717
  assert 'locator.click: Timeout 30000ms exceeded' in capture_quality_text
3691
3718
  return {
3692
3719
  'ok': True,
3720
+ 'decision': capture_quality['decision'],
3693
3721
  'summary': capture_quality['summary'],
3694
3722
  }
3695
3723
  finally:
File without changes