@riddledc/riddle-proof 0.8.29 → 0.8.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/advanced/engine-harness.cjs +188 -11
  2. package/dist/advanced/engine-harness.js +2 -2
  3. package/dist/advanced/index.cjs +188 -11
  4. package/dist/advanced/index.d.cts +2 -2
  5. package/dist/advanced/index.d.ts +2 -2
  6. package/dist/advanced/index.js +4 -4
  7. package/dist/advanced/proof-run-core.cjs +3 -1
  8. package/dist/advanced/proof-run-core.d.cts +1 -1
  9. package/dist/advanced/proof-run-core.d.ts +1 -1
  10. package/dist/advanced/proof-run-core.js +1 -1
  11. package/dist/advanced/proof-run-engine.cjs +136 -2
  12. package/dist/advanced/proof-run-engine.d.cts +2 -2
  13. package/dist/advanced/proof-run-engine.d.ts +2 -2
  14. package/dist/advanced/proof-run-engine.js +2 -2
  15. package/dist/advanced/runner.js +2 -2
  16. package/dist/{chunk-3OTO7IDH.js → chunk-C2NHHBFV.js} +1 -1
  17. package/dist/{chunk-YC77HZVF.js → chunk-IOI6QR3B.js} +134 -2
  18. package/dist/{chunk-FJPZZ4JO.js → chunk-U73JPBZW.js} +1 -1
  19. package/dist/{chunk-K6HZUSHH.js → chunk-X7SQTCIQ.js} +3 -1
  20. package/dist/{chunk-AM3K5FPW.js → chunk-ZREWMTFA.js} +53 -10
  21. package/dist/cli/index.js +3 -3
  22. package/dist/cli.cjs +188 -11
  23. package/dist/cli.js +3 -3
  24. package/dist/engine-harness.cjs +188 -11
  25. package/dist/engine-harness.js +2 -2
  26. package/dist/index.cjs +188 -11
  27. package/dist/index.js +3 -3
  28. package/dist/{proof-run-core-C8FDUhle.d.cts → proof-run-core-B1GeqkR8.d.cts} +2 -0
  29. package/dist/{proof-run-core-C8FDUhle.d.ts → proof-run-core-B1GeqkR8.d.ts} +2 -0
  30. package/dist/proof-run-core.cjs +3 -1
  31. package/dist/proof-run-core.d.cts +1 -1
  32. package/dist/proof-run-core.d.ts +1 -1
  33. package/dist/proof-run-core.js +1 -1
  34. package/dist/{proof-run-engine-D80hVFMf.d.cts → proof-run-engine-4dM37pEx.d.cts} +1 -1
  35. package/dist/{proof-run-engine-By7oLsF-.d.ts → proof-run-engine-BqaeqAze.d.ts} +1 -1
  36. package/dist/proof-run-engine.cjs +136 -2
  37. package/dist/proof-run-engine.d.cts +2 -2
  38. package/dist/proof-run-engine.d.ts +2 -2
  39. package/dist/proof-run-engine.js +2 -2
  40. package/dist/runner.js +2 -2
  41. package/lib/workspace-core.mjs +62 -7
  42. package/package.json +2 -2
  43. package/runtime/lib/riddle_core_call.mjs +662 -40
  44. package/runtime/lib/util.py +117 -40
  45. package/runtime/lib/verify.py +17 -4
  46. package/runtime/tests/recon_verify_smoke.py +137 -1
@@ -621,37 +621,65 @@ def nested_non_riddle_enabled():
621
621
  def invoke_riddle_core(tool, args, timeout=180):
622
622
  """Call Riddle's shared core package directly, without nested OpenClaw tool invocation."""
623
623
  script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'riddle_core_call.mjs')
624
+ result_file = tempfile.NamedTemporaryFile(prefix='riddle-proof-direct-', suffix='.json', delete=False).name
625
+ stderr_file = tempfile.NamedTemporaryFile(prefix='riddle-proof-direct-', suffix='.stderr', delete=False).name
626
+ env = dict(os.environ)
627
+ env['RIDDLE_PROOF_DIRECT_RESULT_FILE'] = result_file
628
+ stderr_text = ''
624
629
  try:
625
- r = sp.run(
626
- ['node', script, tool, json.dumps(args)],
627
- capture_output=True, text=True, timeout=timeout
628
- )
630
+ with open(stderr_file, 'w', encoding='utf-8') as stderr_handle:
631
+ r = sp.run(
632
+ ['node', script, tool, json.dumps(args)],
633
+ stdout=sp.DEVNULL, stderr=stderr_handle, text=True, timeout=timeout, env=env
634
+ )
629
635
  except sp.TimeoutExpired as e:
630
636
  print('direct_riddle(' + tool + ') TIMED OUT after ' + str(timeout) + 's')
631
- if e.stdout:
632
- print(' stdout: ' + e.stdout[:500])
633
- if e.stderr:
634
- print(' stderr: ' + e.stderr[:500])
637
+ try:
638
+ with open(stderr_file, 'r', encoding='utf-8') as f:
639
+ stderr_text = f.read()[:500]
640
+ except Exception:
641
+ stderr_text = ''
642
+ if stderr_text:
643
+ print(' stderr: ' + stderr_text)
635
644
  return {
636
645
  'ok': False,
637
646
  'timeout': True,
638
647
  'error': f'direct_riddle({tool}) timed out after {timeout}s',
639
- 'stdout': (e.stdout or '')[:500],
640
- 'stderr': (e.stderr or '')[:500],
648
+ 'stderr': stderr_text[:500],
641
649
  }
650
+ finally:
651
+ if 'r' not in locals():
652
+ for temp_path in (result_file, stderr_file):
653
+ try:
654
+ os.unlink(temp_path)
655
+ except Exception:
656
+ pass
657
+ try:
658
+ with open(stderr_file, 'r', encoding='utf-8') as f:
659
+ stderr_text = f.read()
660
+ except Exception:
661
+ stderr_text = ''
642
662
 
643
663
  if r.returncode != 0:
644
664
  print('direct_riddle(' + tool + ') FAILED rc=' + str(r.returncode))
645
- print(' stdout: ' + r.stdout[:500])
646
- print(' stderr: ' + r.stderr[:500])
665
+ print(' stderr: ' + stderr_text[:500])
647
666
 
648
667
  try:
649
- return json.loads(r.stdout)
650
- except:
668
+ with open(result_file, 'r', encoding='utf-8') as f:
669
+ return json.loads(f.read())
670
+ except Exception:
651
671
  print('direct_riddle(' + tool + ') JSON parse failed')
652
- print(' stdout: ' + r.stdout[:500])
653
- print(' stderr: ' + r.stderr[:500])
654
- return {'ok': False, 'error': r.stdout[:300], 'stderr': r.stderr[:300]}
672
+ print(' stderr: ' + stderr_text[:500])
673
+ return {'ok': False, 'error': 'direct_riddle result file missing or invalid', 'stderr': stderr_text[:300]}
674
+ finally:
675
+ try:
676
+ os.unlink(result_file)
677
+ except Exception:
678
+ pass
679
+ try:
680
+ os.unlink(stderr_file)
681
+ except Exception:
682
+ pass
655
683
 
656
684
 
657
685
  def invoke(tool, args, timeout=180):
@@ -771,7 +799,12 @@ def invoke_retry(tool, args, retries=3, timeout=180):
771
799
  result = invoke(tool, args, timeout=timeout)
772
800
  last_result = result
773
801
  # Check for success indicators
774
- if result.get('ok') or result.get('outputs') or result.get('screenshots'):
802
+ if result.get('ok'):
803
+ return result
804
+ if tool == 'riddle_script' and (result.get('error') or result.get('script_error')):
805
+ print('invoke_retry(riddle_script) stopping early for deterministic script error')
806
+ return result
807
+ if result.get('outputs') or result.get('screenshots'):
775
808
  return result
776
809
  print(f'invoke_retry({tool}) attempt {attempt}/{retries} failed: {str(result.get("error", "no output"))[:200]}')
777
810
  if tool == 'riddle_script' and non_retryable_riddle_script_error(result):
@@ -911,6 +944,39 @@ def summarize_capture_artifact_item(item):
911
944
  return {key: value for key, value in summary.items() if value not in (None, '')}
912
945
 
913
946
 
947
+ def capture_screenshot_url(payload, label=''):
948
+ if not isinstance(payload, dict):
949
+ return ''
950
+ enriched = enrich_capture_payload(payload)
951
+ candidates = []
952
+ for key in ('screenshots', 'outputs', 'artifacts'):
953
+ values = enriched.get(key) or []
954
+ if isinstance(values, list):
955
+ candidates.extend([item for item in values if isinstance(item, dict)])
956
+
957
+ requested = (label or '').strip()
958
+ expected_names = set()
959
+ if requested:
960
+ expected_names.update({
961
+ requested,
962
+ requested + '.png',
963
+ requested + '.jpg',
964
+ requested + '.jpeg',
965
+ requested + '.webp',
966
+ })
967
+ for item in candidates:
968
+ name = str(item.get('name') or '')
969
+ url = str(item.get('url') or '')
970
+ if url and expected_names and name in expected_names:
971
+ return url
972
+ for item in candidates:
973
+ url = str(item.get('url') or '')
974
+ name = str(item.get('name') or '')
975
+ if url and (not name or re.search(r'\.(png|jpe?g|webp|gif)$', name, re.I)):
976
+ return url
977
+ return ''
978
+
979
+
914
980
  def git(cmd, cwd):
915
981
  """Run a shell command in a repo directory."""
916
982
  return sp.run(cmd, shell=True, cwd=cwd, capture_output=True, text=True)
@@ -1152,7 +1218,7 @@ def build_capture_script(url, capture_script, label, wait_for_selector='', viewp
1152
1218
  effective_viewport_matrix = None if script_handles_viewport_matrix else viewport_matrix
1153
1219
  pieces = [
1154
1220
  *viewport_matrix_setup_js(effective_viewport_matrix),
1155
- 'await page.goto(' + json.dumps(url) + ');',
1221
+ 'await page.goto(' + json.dumps(url) + ', { waitUntil: "domcontentloaded", timeout: 30000 });',
1156
1222
  ]
1157
1223
  selector = (wait_for_selector or '').strip()
1158
1224
  if selector:
@@ -1179,33 +1245,44 @@ def capture_static_preview(state, project_dir, label, capture_script, timeout=30
1179
1245
  'raw': {'ok': False, 'error': 'No static build output found. Tried configured build_output, dist, build, out.'},
1180
1246
  }
1181
1247
 
1182
- preview = invoke_retry('riddle_preview', {'directory': build_dir, 'label': label}, retries=3, timeout=timeout)
1183
- if not preview.get('ok'):
1184
- return {
1185
- 'ok': False,
1186
- 'preview_id': preview.get('id', ''),
1187
- 'preview_url': preview.get('preview_url') or preview.get('previewUrl') or '',
1188
- 'url': '',
1189
- 'raw': preview,
1190
- }
1191
- preview_url = preview.get('preview_url') or preview.get('previewUrl') or ''
1192
- preview_id = preview.get('id', '')
1193
- capture_url = join_url_path(preview_url, target_path or state.get('server_path', ''))
1194
-
1195
- script = build_capture_script(capture_url, capture_script, label, state.get('wait_for_selector', ''), state.get('viewport_matrix'))
1196
- args = {'script': script, 'timeout_sec': 60}
1248
+ static_server_command = (
1249
+ (state.get('static_server_command') or '').strip()
1250
+ or os.environ.get('RIDDLE_PROOF_STATIC_SERVER_COMMAND', '').strip()
1251
+ or 'python3 -m http.server "$PORT" --bind 127.0.0.1'
1252
+ )
1253
+ target = target_path or state.get('server_path', '') or '/'
1254
+ args = {
1255
+ 'directory': build_dir,
1256
+ 'command': static_server_command,
1257
+ 'port': int(state.get('server_port') or 3000),
1258
+ 'wait_until': 'domcontentloaded',
1259
+ 'readiness_timeout': 60,
1260
+ 'timeout': max(60, min(int(timeout or 300), 300)),
1261
+ 'path': target,
1262
+ 'readiness_path': '/',
1263
+ 'script': capture_script,
1264
+ }
1265
+ if state.get('wait_for_selector'):
1266
+ args['wait_for_selector'] = state.get('wait_for_selector')
1267
+ if state.get('color_scheme'):
1268
+ args['color_scheme'] = state.get('color_scheme')
1197
1269
  apply_auth_context(state, args)
1198
- shot = invoke_retry('riddle_script', args, retries=3, timeout=max(timeout, 120))
1270
+ shot = invoke_retry('riddle_server_preview', args, retries=2, timeout=max(timeout, 120))
1199
1271
  screenshots = shot.get('screenshots') or []
1200
- url = screenshots[0].get('url', '') if screenshots else ''
1272
+ url = screenshots[0].get('url', '') if screenshots else capture_screenshot_url(shot, label)
1201
1273
  return {
1202
1274
  'ok': bool(url),
1203
- 'preview_id': preview_id,
1204
- 'preview_url': preview_url,
1205
- 'capture_url': capture_url,
1275
+ 'preview_id': '',
1276
+ 'preview_url': shot.get('preview_url') or '',
1277
+ 'capture_url': shot.get('target_url') or target,
1206
1278
  'url': url,
1207
1279
  'raw': {
1208
- 'preview': preview,
1280
+ 'preview': {
1281
+ 'ok': shot.get('ok'),
1282
+ 'runner': shot.get('runner'),
1283
+ 'preview_url': shot.get('preview_url') or '',
1284
+ 'target_url': shot.get('target_url') or '',
1285
+ },
1209
1286
  'capture': shot,
1210
1287
  },
1211
1288
  }
@@ -310,12 +310,13 @@ def payload_has_capture_artifacts(payload):
310
310
  def capture_payload_error(payload):
311
311
  if not isinstance(payload, dict):
312
312
  return ''
313
- if payload.get('ok') is False and not payload_has_capture_artifacts(payload):
314
- for key in ('error', 'stderr', 'stdout'):
313
+ if payload.get('ok') is False:
314
+ for key in ('error', 'script_error', 'stderr', 'stdout'):
315
315
  value = payload.get(key)
316
316
  if value:
317
317
  return str(value).strip()
318
- return 'capture tool returned ok=false without artifacts'
318
+ if not payload_has_capture_artifacts(payload):
319
+ return 'capture tool returned ok=false without artifacts'
319
320
  return ''
320
321
 
321
322
 
@@ -3222,7 +3223,7 @@ def evaluate_capture_quality(payload, expected_path, verification_mode='proof'):
3222
3223
  }
3223
3224
 
3224
3225
 
3225
- def build_capture_retry_decision(after_observation, required_baseline_present, proof_evidence_blocker='', route_expectation=None):
3226
+ def build_capture_retry_decision(after_observation, required_baseline_present, proof_evidence_blocker='', route_expectation=None, verification_mode=''):
3226
3227
  reasons = []
3227
3228
  if not required_baseline_present:
3228
3229
  reasons.append('Recon baseline is missing, so verify should return to recon instead of guessing a new reference context.')
@@ -3361,6 +3362,17 @@ def build_capture_retry_decision(after_observation, required_baseline_present, p
3361
3362
  elif error_messages:
3362
3363
  reasons.append('Capture script error: ' + error_messages[0][:500])
3363
3364
  summary = 'Verify capture script failed: ' + error_messages[0][:300]
3365
+ reasons.append('The capture script produced a concrete browser/runtime failure, so this run should block with that exact evidence instead of re-authoring in a loop.')
3366
+ return {
3367
+ 'decision': 'failed_interaction_capture' if normalized_verification_mode(verification_mode) in INTERACTION_MODES else 'failed_capture',
3368
+ 'summary': summary,
3369
+ 'recommended_stage': None,
3370
+ 'continue_with_stage': None,
3371
+ 'blocking': True,
3372
+ 'terminal_blocker': True,
3373
+ 'reasons': reasons,
3374
+ 'mismatch': None,
3375
+ }
3364
3376
  else:
3365
3377
  summary = 'Verify needs another internal capture iteration before the evidence can be judged.'
3366
3378
  reasons.append('The capture plan itself needs revision, so author should tighten the proof script or framing inputs.')
@@ -4248,6 +4260,7 @@ else:
4248
4260
  required_baseline_present,
4249
4261
  proof_evidence_blocker or structured_interaction_capture_failure_summary,
4250
4262
  s.get('route_expectation') or {},
4263
+ s.get('verification_mode') or '',
4251
4264
  )
4252
4265
  if visual_delta_recovery:
4253
4266
  observation_reason = str(after_observation.get('reason') or '')
@@ -89,6 +89,138 @@ class FakeRiddle:
89
89
  'id': f'pv-{label}',
90
90
  'preview_url': f'https://preview.example.com/{label}/',
91
91
  }
92
+ if tool == 'riddle_server_preview':
93
+ script = args.get('script', '')
94
+ target_path = args.get('path') or '/'
95
+ path_only, _, query = str(target_path).partition('?')
96
+ search = '?' + query if query else ''
97
+ delegated_markers = [
98
+ 'after-proof',
99
+ 'audioNoProof',
100
+ 'audioFailedProof',
101
+ 'throwAfterProofEvidence',
102
+ 'attack_ms_after',
103
+ 'window.__riddleProofEvidence',
104
+ 'globalThis.__riddleProofEvidence',
105
+ 'clickedSkipHashNavigation',
106
+ 'pricingQueryHashDropsTerminal',
107
+ 'pricingQueryHashStructuredNegativeControl',
108
+ 'pricingQueryHashPassesWithPageStateHashGap',
109
+ 'clickedProofNavigation',
110
+ 'clickedHomeNavigation',
111
+ 'skipLinkTimeout',
112
+ 'interactionThrownAfterFailedEvidence',
113
+ 'interactionThrownError',
114
+ ]
115
+ if any(marker in script for marker in delegated_markers):
116
+ return self.invoke_retry('riddle_script', {'script': script}, retries=retries, timeout=timeout)
117
+ if path_only == '/wrong' or '/wrong' in script:
118
+ return {
119
+ 'ok': True,
120
+ 'runner': 'local-server-preview',
121
+ 'target_url': 'http://127.0.0.1:3000/wrong',
122
+ 'screenshots': [{'url': 'https://cdn.example.com/wrong.png'}],
123
+ 'outputs': [{'name': 'wrong.png', 'url': 'https://cdn.example.com/wrong.png'}],
124
+ 'console': ['RIDDLE_PROOF_STATE:{"bodyTextLength":5,"interactiveElements":0,"pathname":"/wrong","title":"Wrong"}'],
125
+ }
126
+ if '/games/drum-sequencer' in path_only:
127
+ page_state = {
128
+ 'bodyTextLength': 240,
129
+ 'visibleTextSample': 'Neon Step Sequencer Monkberry Moon Delight Mix Board Play All',
130
+ 'interactiveElements': 8,
131
+ 'visibleInteractiveElements': 8,
132
+ 'pathname': '/games/drum-sequencer',
133
+ 'search': search,
134
+ 'title': 'Neon Step Sequencer',
135
+ 'buttons': ['Play All', 'Shuffle'],
136
+ 'headings': ['Neon Step Sequencer'],
137
+ 'links': [],
138
+ 'canvasCount': 1,
139
+ 'largeVisibleElements': [{'tag': 'canvas', 'text': ''}],
140
+ }
141
+ return {
142
+ 'ok': True,
143
+ 'runner': 'local-server-preview',
144
+ 'target_url': 'http://127.0.0.1:3000' + str(target_path),
145
+ 'screenshots': [{'url': 'https://cdn.example.com/sequencer-before.png'}],
146
+ 'outputs': [{'name': 'before.png', 'url': 'https://cdn.example.com/sequencer-before.png'}],
147
+ 'console': state_console(page_state),
148
+ }
149
+ if '/games/tic-tac-toe' in path_only:
150
+ return {
151
+ 'ok': True,
152
+ 'runner': 'local-server-preview',
153
+ 'target_url': 'http://127.0.0.1:3000' + str(target_path),
154
+ 'screenshots': [{'url': 'https://cdn.example.com/tictactoe-before.png'}],
155
+ 'outputs': [{'name': 'before.png', 'url': 'https://cdn.example.com/tictactoe-before.png'}],
156
+ 'console': state_console({
157
+ 'bodyTextLength': 220,
158
+ 'visibleTextSample': 'LilArcade Tic Tac Toe Player X Reset Game',
159
+ 'interactiveElements': 5,
160
+ 'visibleInteractiveElements': 5,
161
+ 'pathname': '/games/tic-tac-toe',
162
+ 'title': 'TicTacToe',
163
+ 'buttons': ['Reset Game'],
164
+ 'headings': ['Tic Tac Toe'],
165
+ 'links': [],
166
+ 'canvasCount': 0,
167
+ 'largeVisibleElements': [{'tag': 'button', 'text': 'Reset Game'}],
168
+ }),
169
+ }
170
+ if 'after-proof' in script:
171
+ after_url = 'https://cdn.example.com/after-artifact' if 'noVisualDelta' in script else 'https://cdn.example.com/after.png'
172
+ outputs = [{'name': 'after.png', 'url': after_url}]
173
+ if 'proof-session' in script:
174
+ outputs.append({'name': 'proof-session.json', 'url': 'https://cdn.example.com/proof-session.json'})
175
+ payload = {
176
+ 'ok': True,
177
+ 'runner': 'local-server-preview',
178
+ 'target_url': 'http://127.0.0.1:3000' + str(target_path),
179
+ 'screenshots': [{'url': after_url}],
180
+ 'outputs': outputs,
181
+ 'console': state_console({
182
+ 'bodyTextLength': 180,
183
+ 'visibleTextSample': 'Pricing CTA Buy Now',
184
+ 'interactiveElements': 4,
185
+ 'visibleInteractiveElements': 4,
186
+ 'pathname': path_only or '/pricing',
187
+ 'search': search,
188
+ 'title': 'After',
189
+ 'buttons': ['Buy Now'],
190
+ 'headings': ['Pricing'],
191
+ 'links': [],
192
+ 'canvasCount': 0,
193
+ 'largeVisibleElements': [{'tag': 'button', 'text': 'Buy Now'}],
194
+ }),
195
+ }
196
+ if 'noVisualDelta' not in script:
197
+ payload['visual_diff'] = {
198
+ 'diffPercentage': 1.2,
199
+ 'differentPixels': 12000,
200
+ 'totalPixels': 972000,
201
+ }
202
+ return payload
203
+ return {
204
+ 'ok': True,
205
+ 'runner': 'local-server-preview',
206
+ 'target_url': 'http://127.0.0.1:3000' + str(target_path),
207
+ 'screenshots': [{'url': 'https://cdn.example.com/home-before.png'}],
208
+ 'outputs': [{'name': 'before.png', 'url': 'https://cdn.example.com/home-before.png'}],
209
+ 'console': state_console({
210
+ 'bodyTextLength': 180,
211
+ 'visibleTextSample': 'Riddle Proof homepage hero Start Free',
212
+ 'interactiveElements': 4,
213
+ 'visibleInteractiveElements': 4,
214
+ 'pathname': path_only or '/',
215
+ 'search': search,
216
+ 'title': 'Riddle',
217
+ 'buttons': ['Start Free'],
218
+ 'headings': ['Riddle Proof'],
219
+ 'links': [],
220
+ 'canvasCount': 0,
221
+ 'largeVisibleElements': [{'tag': 'button', 'text': 'Start Free'}],
222
+ }),
223
+ }
92
224
  if tool == 'riddle_script':
93
225
  script = args.get('script', '')
94
226
  if 'preview.example.com' in script and '/wrong' in script:
@@ -3709,11 +3841,15 @@ def run_verify_capture_retry_surfaces_script_timeout():
3709
3841
 
3710
3842
  assert after_verify['verify_status'] == 'capture_incomplete'
3711
3843
  capture_quality = after_verify['verify_decision_request']['capture_quality']
3712
- assert capture_quality['recommended_stage'] in ('author', 'verify')
3844
+ assert capture_quality['recommended_stage'] is None
3845
+ assert capture_quality['continue_with_stage'] is None
3846
+ assert capture_quality['blocking'] is True
3847
+ assert capture_quality['terminal_blocker'] is True
3713
3848
  capture_quality_text = json.dumps(capture_quality, sort_keys=True)
3714
3849
  assert 'locator.click: Timeout 30000ms exceeded' in capture_quality_text
3715
3850
  return {
3716
3851
  'ok': True,
3852
+ 'decision': capture_quality['decision'],
3717
3853
  'summary': capture_quality['summary'],
3718
3854
  }
3719
3855
  finally: