@riddledc/riddle-proof 0.8.6 → 0.8.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/dist/adapters/codex-exec-agent.cjs +30 -10
  2. package/dist/adapters/codex-exec-agent.js +1 -1
  3. package/dist/adapters/codex.cjs +30 -10
  4. package/dist/adapters/codex.js +1 -1
  5. package/dist/adapters/local-agent.cjs +30 -10
  6. package/dist/adapters/local-agent.js +1 -1
  7. package/dist/advanced/engine-harness.cjs +64 -7
  8. package/dist/advanced/engine-harness.js +2 -2
  9. package/dist/advanced/index.cjs +64 -7
  10. package/dist/advanced/index.d.cts +1 -1
  11. package/dist/advanced/index.d.ts +1 -1
  12. package/dist/advanced/index.js +4 -4
  13. package/dist/advanced/proof-run-core.cjs +63 -6
  14. package/dist/advanced/proof-run-core.js +1 -1
  15. package/dist/advanced/proof-run-engine.cjs +63 -6
  16. package/dist/advanced/proof-run-engine.d.cts +1 -1
  17. package/dist/advanced/proof-run-engine.d.ts +1 -1
  18. package/dist/advanced/proof-run-engine.js +2 -2
  19. package/dist/advanced/runner.js +2 -2
  20. package/dist/{chunk-GMZ57RRY.js → chunk-46DDSZJR.js} +1 -1
  21. package/dist/{chunk-RV6LK7HU.js → chunk-5N5QFI2S.js} +63 -6
  22. package/dist/{chunk-UIJ7X63P.js → chunk-5N6MQCLC.js} +1 -1
  23. package/dist/{chunk-BDFSMWTI.js → chunk-E7ATYSYS.js} +1 -1
  24. package/dist/{chunk-5MILMRQY.js → chunk-PYCQNK66.js} +30 -10
  25. package/dist/{chunk-NAFJ4KSF.js → chunk-V6VZ3CAI.js} +2 -2
  26. package/dist/cli/index.js +4 -4
  27. package/dist/cli.cjs +99 -22
  28. package/dist/cli.js +4 -4
  29. package/dist/codex-exec-agent.cjs +30 -10
  30. package/dist/codex-exec-agent.js +1 -1
  31. package/dist/engine-harness.cjs +64 -7
  32. package/dist/engine-harness.js +2 -2
  33. package/dist/index.cjs +99 -22
  34. package/dist/index.js +4 -4
  35. package/dist/local-agent.cjs +30 -10
  36. package/dist/local-agent.js +1 -1
  37. package/dist/proof-run-core.cjs +63 -6
  38. package/dist/proof-run-core.js +1 -1
  39. package/dist/{proof-run-engine-BO1h0Bmy.d.cts → proof-run-engine-BlocjMni.d.cts} +3 -3
  40. package/dist/{proof-run-engine-CIdpWNh6.d.ts → proof-run-engine-C_m8WJmX.d.ts} +3 -3
  41. package/dist/proof-run-engine.cjs +63 -6
  42. package/dist/proof-run-engine.d.cts +1 -1
  43. package/dist/proof-run-engine.d.ts +1 -1
  44. package/dist/proof-run-engine.js +2 -2
  45. package/dist/runner.js +2 -2
  46. package/package.json +1 -1
  47. package/runtime/lib/author.py +39 -1
  48. package/runtime/lib/verify.py +241 -6
  49. package/runtime/tests/recon_verify_smoke.py +89 -20
@@ -26,7 +26,7 @@ import {
26
26
  visualDeltaShipGateReason,
27
27
  workflowFile,
28
28
  writeState
29
- } from "./chunk-RV6LK7HU.js";
29
+ } from "./chunk-5N5QFI2S.js";
30
30
  import "./chunk-MLKGABMK.js";
31
31
  export {
32
32
  BUNDLED_RIDDLE_PROOF_DIR,
@@ -292,7 +292,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
292
292
  blocking?: boolean;
293
293
  details?: Record<string, unknown>;
294
294
  ok: boolean;
295
- action: "setup" | "recon" | "author" | "implement" | "verify" | "ship" | "run";
295
+ action: "recon" | "author" | "ship" | "implement" | "verify" | "setup" | "run";
296
296
  state_path: string;
297
297
  stage: any;
298
298
  summary: string;
@@ -382,7 +382,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
382
382
  continueWithStage?: WorkflowStage | null;
383
383
  blocking?: boolean;
384
384
  details?: Record<string, unknown>;
385
- action: "setup" | "recon" | "author" | "implement" | "verify" | "ship" | "run";
385
+ action: "recon" | "author" | "ship" | "implement" | "verify" | "setup" | "run";
386
386
  state_path: string;
387
387
  stage: any;
388
388
  checkpoint: string;
@@ -659,7 +659,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
659
659
  error?: undefined;
660
660
  } | {
661
661
  ok: boolean;
662
- action: "setup" | "recon" | "author" | "implement" | "verify" | "ship";
662
+ action: "recon" | "author" | "ship" | "implement" | "verify" | "setup";
663
663
  state_path: string;
664
664
  stage: any;
665
665
  summary: string;
@@ -292,7 +292,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
292
292
  blocking?: boolean;
293
293
  details?: Record<string, unknown>;
294
294
  ok: boolean;
295
- action: "setup" | "recon" | "author" | "implement" | "verify" | "ship" | "run";
295
+ action: "recon" | "author" | "ship" | "implement" | "verify" | "setup" | "run";
296
296
  state_path: string;
297
297
  stage: any;
298
298
  summary: string;
@@ -382,7 +382,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
382
382
  continueWithStage?: WorkflowStage | null;
383
383
  blocking?: boolean;
384
384
  details?: Record<string, unknown>;
385
- action: "setup" | "recon" | "author" | "implement" | "verify" | "ship" | "run";
385
+ action: "recon" | "author" | "ship" | "implement" | "verify" | "setup" | "run";
386
386
  state_path: string;
387
387
  stage: any;
388
388
  checkpoint: string;
@@ -659,7 +659,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
659
659
  error?: undefined;
660
660
  } | {
661
661
  ok: boolean;
662
- action: "setup" | "recon" | "author" | "implement" | "verify" | "ship";
662
+ action: "recon" | "author" | "ship" | "implement" | "verify" | "setup";
663
663
  state_path: string;
664
664
  stage: any;
665
665
  summary: string;
@@ -195,6 +195,56 @@ function writeState(statePath, state) {
195
195
  function normalizeOptionalString(value) {
196
196
  return typeof value === "string" ? value.trim() : void 0;
197
197
  }
198
+ var INTERACTION_VERIFICATION_MODES = /* @__PURE__ */ new Set(["interaction", "interactive", "user_flow", "user-flow", "workflow"]);
199
+ function normalizeRoutePath(value) {
200
+ const raw = typeof value === "string" ? value.trim() : "";
201
+ if (!raw) return "";
202
+ try {
203
+ const url = /^https?:\/\//i.test(raw) ? new URL(raw) : new URL(raw.startsWith("/") || raw.startsWith("?") || raw.startsWith("#") ? raw : `/${raw}`, "https://riddle-proof.local");
204
+ const pathname = url.pathname.replace(/\/+$/, "") || "/";
205
+ return `${pathname}${url.search}${url.hash}`;
206
+ } catch {
207
+ const hashSplit = raw.split("#");
208
+ const beforeHash = hashSplit.shift() || "";
209
+ const hash = hashSplit.length ? `#${hashSplit.join("#")}` : "";
210
+ const querySplit = beforeHash.split("?");
211
+ const rawPath = querySplit.shift() || "";
212
+ const query = querySplit.length ? `?${querySplit.join("?")}` : "";
213
+ const pathname = `/${rawPath}`.replace(/\/+/g, "/").replace(/\/+$/, "") || "/";
214
+ return `${pathname}${query}${hash}`;
215
+ }
216
+ }
217
+ function isInteractionVerificationMode(value) {
218
+ return INTERACTION_VERIFICATION_MODES.has(typeof value === "string" ? value.trim().toLowerCase() : "");
219
+ }
220
+ function stringRecordValue(record, key) {
221
+ if (!record || typeof record !== "object") return "";
222
+ const value = record[key];
223
+ return typeof value === "string" ? value.trim() : "";
224
+ }
225
+ function appendStateWarning(state, key, warning) {
226
+ const existing = Array.isArray(state[key]) ? state[key].filter((item) => typeof item === "string") : [];
227
+ if (!existing.includes(warning)) state[key] = [...existing, warning];
228
+ }
229
+ function interactionStartPathForAuthorPacket(state, parsed, refined) {
230
+ return normalizeRoutePath(
231
+ stringRecordValue(state, "expected_start_path") || stringRecordValue(refined, "expected_start_path") || stringRecordValue(parsed.interaction_contract, "start_path") || stringRecordValue(parsed.proof_contract, "start_path") || stringRecordValue(state, "server_path") || "/"
232
+ ) || "/";
233
+ }
234
+ function authorPacketServerPath(state, parsed, refined, serverPath, expectedTerminalPath) {
235
+ if (!isInteractionVerificationMode(state.verification_mode)) return serverPath;
236
+ const startPath = interactionStartPathForAuthorPacket(state, parsed, refined);
237
+ state.expected_start_path = startPath;
238
+ if (expectedTerminalPath && normalizeRoutePath(serverPath) === normalizeRoutePath(expectedTerminalPath) && normalizeRoutePath(serverPath) !== startPath) {
239
+ appendStateWarning(
240
+ state,
241
+ "author_warnings",
242
+ "Supervisor packet refined_inputs.server_path matched the terminal interaction route; kept the recon start route for capture."
243
+ );
244
+ return startPath;
245
+ }
246
+ return serverPath;
247
+ }
198
248
  function knownEnvironmentIssuesFromNotes(notes) {
199
249
  const text = notes.toLowerCase();
200
250
  const issues = [];
@@ -853,17 +903,24 @@ function mergeStateFromParams(statePath, params) {
853
903
  state.proof_contract = parsed.proof_contract;
854
904
  }
855
905
  const refined = parsed?.refined_inputs || {};
906
+ const expectedTerminalPath = normalizeOptionalString(
907
+ typeof refined?.expected_terminal_path === "string" ? refined.expected_terminal_path : typeof parsed?.expected_terminal_path === "string" ? parsed.expected_terminal_path : ""
908
+ ) || "";
856
909
  if (typeof refined?.server_path === "string") {
857
- state.server_path = normalizeOptionalString(refined.server_path) || "";
910
+ const refinedServerPath = normalizeOptionalString(refined.server_path) || "";
911
+ state.server_path = authorPacketServerPath(
912
+ state,
913
+ parsed,
914
+ refined,
915
+ refinedServerPath,
916
+ expectedTerminalPath
917
+ );
858
918
  state.server_path_source = "supervising_agent";
859
919
  }
860
920
  if (typeof refined?.wait_for_selector === "string") state.wait_for_selector = normalizeOptionalString(refined.wait_for_selector) || "";
861
921
  if (typeof refined?.reference === "string" && refined.reference.trim()) state.reference = refined.reference.trim();
862
- if (typeof refined?.expected_terminal_path === "string") {
863
- state.expected_terminal_path = normalizeOptionalString(refined.expected_terminal_path) || "";
864
- }
865
- if (typeof parsed?.expected_terminal_path === "string") {
866
- state.expected_terminal_path = normalizeOptionalString(parsed.expected_terminal_path) || "";
922
+ if (expectedTerminalPath) {
923
+ state.expected_terminal_path = expectedTerminalPath;
867
924
  }
868
925
  if (typeof parsed?.confidence === "string") state.supervisor_author_confidence = normalizeOptionalString(parsed.confidence) || null;
869
926
  if (parsed?.rationale !== void 0) state.supervisor_author_rationale = parsed.rationale;
@@ -1,2 +1,2 @@
1
1
  import './proof-run-core-CE0jx7wL.cjs';
2
- export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from './proof-run-engine-BO1h0Bmy.cjs';
2
+ export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from './proof-run-engine-BlocjMni.cjs';
@@ -1,2 +1,2 @@
1
1
  import './proof-run-core-CE0jx7wL.js';
2
- export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from './proof-run-engine-CIdpWNh6.js';
2
+ export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from './proof-run-engine-C_m8WJmX.js';
@@ -1,8 +1,8 @@
1
1
  import {
2
2
  createRiddleProofEngine,
3
3
  executeWorkflow
4
- } from "./chunk-GMZ57RRY.js";
5
- import "./chunk-RV6LK7HU.js";
4
+ } from "./chunk-46DDSZJR.js";
5
+ import "./chunk-5N5QFI2S.js";
6
6
  import "./chunk-MLKGABMK.js";
7
7
  export {
8
8
  createRiddleProofEngine,
package/dist/runner.js CHANGED
@@ -1,9 +1,9 @@
1
1
  import {
2
2
  runRiddleProof
3
- } from "./chunk-UIJ7X63P.js";
3
+ } from "./chunk-5N6MQCLC.js";
4
4
  import "./chunk-YZUVEJ5B.js";
5
5
  import "./chunk-FMOYUYH2.js";
6
- import "./chunk-RV6LK7HU.js";
6
+ import "./chunk-5N5QFI2S.js";
7
7
  import "./chunk-4FOHZ7JG.js";
8
8
  import "./chunk-VY4Y5U57.js";
9
9
  import "./chunk-MLKGABMK.js";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@riddledc/riddle-proof",
3
- "version": "0.8.6",
3
+ "version": "0.8.8",
4
4
  "description": "Reusable Riddle Proof contracts and helpers for evidence-backed agent changes.",
5
5
  "license": "MIT",
6
6
  "author": "RiddleDC",
@@ -9,6 +9,7 @@ Instead it does two things:
9
9
  import json
10
10
  import os
11
11
  import sys
12
+ from urllib.parse import urlparse
12
13
 
13
14
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
14
15
  from util import load_state, save_state
@@ -34,6 +35,31 @@ def normalize_path(value):
34
35
  return path
35
36
 
36
37
 
38
+ def normalize_route_path(value):
39
+ raw = (value or '').strip()
40
+ if not raw:
41
+ return ''
42
+ parsed = urlparse(raw)
43
+ path = parsed.path or raw
44
+ query = parsed.query or ''
45
+ fragment = parsed.fragment or ''
46
+ if '?' in path:
47
+ path, query_tail = path.split('?', 1)
48
+ query = query or query_tail.split('#', 1)[0]
49
+ if '#' in path:
50
+ path, fragment_tail = path.split('#', 1)
51
+ fragment = fragment or fragment_tail
52
+ if not path.startswith('/'):
53
+ path = '/' + path.lstrip('/')
54
+ path = path.rstrip('/') or '/'
55
+ return path + (('?' + query) if query else '') + (('#' + fragment) if fragment else '')
56
+
57
+
58
+ def is_interaction_mode(state):
59
+ mode = (state.get('verification_mode') or '').strip().lower()
60
+ return mode in ('interaction', 'interactive', 'user_flow', 'user-flow', 'workflow')
61
+
62
+
37
63
  def first_non_empty(*values):
38
64
  for value in values:
39
65
  if isinstance(value, str) and value.strip():
@@ -284,6 +310,17 @@ expected_terminal_path = normalize_path(first_non_empty(
284
310
  supervisor_packet.get('expected_after_path'),
285
311
  s.get('expected_terminal_path'),
286
312
  ))
313
+ author_warnings = []
314
+ if is_interaction_mode(s):
315
+ interaction_start_path = normalize_route_path(first_non_empty(s.get('expected_start_path'), default_path, s.get('server_path'), '/')) or '/'
316
+ refined_route = normalize_route_path(refined_path)
317
+ terminal_route = normalize_route_path(expected_terminal_path)
318
+ if terminal_route and refined_route == terminal_route and refined_route != interaction_start_path:
319
+ refined_path = interaction_start_path
320
+ author_warnings.append(
321
+ 'Supervisor packet refined_inputs.server_path matched the terminal interaction route; kept the recon start route for capture.'
322
+ )
323
+ s['expected_start_path'] = interaction_start_path
287
324
  confidence = provided_payload['confidence'] if provided_payload['confidence'] in ('high', 'medium', 'low') else 'medium'
288
325
  rationale = sanitize_rationale(provided_payload['rationale'])
289
326
  summary = provided_payload['summary'] or 'Supervising agent supplied the proof packet from recon observations.'
@@ -301,6 +338,7 @@ authored_packet = {
301
338
  'interaction_contract': provided_payload['interaction_contract'],
302
339
  'proof_contract': provided_payload['proof_contract'],
303
340
  'rationale': rationale,
341
+ 'warnings': author_warnings,
304
342
  'confidence': confidence,
305
343
  'mode': 'supervising_agent',
306
344
  'model': ('supervising-agent:' + RUNTIME_MODEL_HINT) if RUNTIME_MODEL_HINT else 'supervising-agent',
@@ -328,7 +366,7 @@ s['author_mode'] = 'supervising_agent'
328
366
  s['author_model'] = authored_packet['model']
329
367
  s['author_confidence'] = confidence
330
368
  s['author_rationale'] = rationale
331
- s['author_warnings'] = []
369
+ s['author_warnings'] = author_warnings
332
370
  s['author_runtime_model_hint'] = RUNTIME_MODEL_HINT
333
371
  s['author_packet'] = authored_packet
334
372
  s['author_summary'] = summary
@@ -1995,16 +1995,22 @@ def route_parts(value):
1995
1995
 
1996
1996
  EXPLICIT_TERMINAL_PATH_KEYS = (
1997
1997
  'expected_terminal_path', 'expectedTerminalPath',
1998
+ 'expected_terminal_url', 'expectedTerminalUrl',
1998
1999
  'expected_terminal_route', 'expectedTerminalRoute',
1999
2000
  'terminal_path', 'terminalPath',
2001
+ 'terminal_url', 'terminalUrl',
2000
2002
  'terminal_route', 'terminalRoute',
2001
2003
  'expected_after_path', 'expectedAfterPath',
2004
+ 'expected_after_url', 'expectedAfterUrl',
2002
2005
  'expected_after_route', 'expectedAfterRoute',
2003
2006
  'after_path', 'afterPath',
2007
+ 'after_url', 'afterUrl',
2004
2008
  'after_route', 'afterRoute',
2005
2009
  'expected_final_path', 'expectedFinalPath',
2010
+ 'expected_final_url', 'expectedFinalUrl',
2006
2011
  'expected_final_route', 'expectedFinalRoute',
2007
2012
  'final_path', 'finalPath',
2013
+ 'final_url', 'finalUrl',
2008
2014
  'final_route', 'finalRoute',
2009
2015
  )
2010
2016
  LOCATION_PATH_KEYS = ('path', 'pathname', 'route', 'url', 'href')
@@ -2016,6 +2022,11 @@ AFTER_STATE_KEYS = (
2016
2022
  'final', 'final_state', 'finalState',
2017
2023
  'expected_final', 'expectedFinal',
2018
2024
  )
2025
+ EVIDENCE_CONTAINER_KEYS = (
2026
+ 'proofEvidence', 'proof_evidence',
2027
+ 'interactionEvidence', 'interaction_evidence',
2028
+ 'evidence',
2029
+ )
2019
2030
  CONTRACT_STATE_KEYS = (
2020
2031
  'interaction_contract', 'interactionContract',
2021
2032
  'proof_contract', 'proofContract',
@@ -2067,6 +2078,17 @@ def terminal_path_from_record(record, depth=0):
2067
2078
  candidate = terminal_path_from_record(item, depth + 1)
2068
2079
  if candidate:
2069
2080
  return candidate
2081
+ for key in EVIDENCE_CONTAINER_KEYS:
2082
+ value = record.get(key)
2083
+ if isinstance(value, dict):
2084
+ candidate = terminal_path_from_record(value, depth + 1)
2085
+ if candidate:
2086
+ return candidate
2087
+ elif isinstance(value, list):
2088
+ for item in value:
2089
+ candidate = terminal_path_from_record(item, depth + 1)
2090
+ if candidate:
2091
+ return candidate
2070
2092
  for key in CONTRACT_STATE_KEYS:
2071
2093
  value = record.get(key)
2072
2094
  if isinstance(value, dict):
@@ -2081,11 +2103,25 @@ def terminal_path_from_record(record, depth=0):
2081
2103
  return ''
2082
2104
 
2083
2105
 
2106
+ def text_path_candidate(value):
2107
+ if not isinstance(value, str):
2108
+ return ''
2109
+ raw = value.strip().rstrip('.,;:)]}')
2110
+ return path_candidate(raw)
2111
+
2112
+
2084
2113
  def terminal_path_from_text(value):
2085
2114
  if not isinstance(value, str):
2086
2115
  return ''
2087
2116
  for match in re.findall(r"""['"`](/[^'"`\s]+[?#][^'"`\s]*)['"`]""", value):
2088
- candidate = path_candidate(match)
2117
+ candidate = text_path_candidate(match)
2118
+ if candidate:
2119
+ return candidate
2120
+ context_pattern = re.compile(
2121
+ r"""(?is)\b(?:expected\s+(?:terminal|after|final)|terminal|after|final)\b[^/\r\n]{0,120}['"`]?(/[^'"`\s,;)]*)"""
2122
+ )
2123
+ for match in context_pattern.findall(value):
2124
+ candidate = text_path_candidate(match)
2089
2125
  if candidate:
2090
2126
  return candidate
2091
2127
  return ''
@@ -2122,6 +2158,170 @@ def interaction_assertions_pass(value):
2122
2158
  return False
2123
2159
 
2124
2160
 
2161
+ INTERACTION_ASSERTION_CONTAINER_KEYS = ('assertions', 'checks', 'predicates', 'expectations')
2162
+ INTERACTION_FAILURE_FLAG_KEYS = (
2163
+ 'passed',
2164
+ 'ok',
2165
+ 'valid',
2166
+ 'success',
2167
+ 'proofReady',
2168
+ 'proof_ready',
2169
+ 'interactionPassed',
2170
+ 'interaction_passed',
2171
+ 'routeMatches',
2172
+ 'route_matches',
2173
+ )
2174
+ INTERACTION_FAILURE_STATUS_VALUES = {'fail', 'failed', 'failure', 'error', 'errored', 'timeout', 'timed_out'}
2175
+ INTERACTION_ASSERTION_NAME_KEYS = ('name', 'id', 'key', 'label', 'assertion', 'check', 'field')
2176
+ INTERACTION_ROUTE_CONTEXT_KEYS = (
2177
+ 'expected',
2178
+ 'observed',
2179
+ 'actual',
2180
+ 'start',
2181
+ 'before',
2182
+ 'after',
2183
+ 'terminal',
2184
+ 'final',
2185
+ 'expected_after',
2186
+ 'expectedAfter',
2187
+ 'expected_terminal',
2188
+ 'expectedTerminal',
2189
+ 'expected_final',
2190
+ 'expectedFinal',
2191
+ )
2192
+
2193
+
2194
+ def failure_label(prefix, key):
2195
+ key = str(key or '').strip()
2196
+ prefix = str(prefix or '').strip()
2197
+ if prefix and key:
2198
+ return prefix + '.' + key
2199
+ return key or prefix or 'failed'
2200
+
2201
+
2202
+ def assertion_item_label(item, fallback):
2203
+ if isinstance(item, dict):
2204
+ for key in INTERACTION_ASSERTION_NAME_KEYS:
2205
+ value = str(item.get(key) or '').strip()
2206
+ if value:
2207
+ return value
2208
+ return fallback
2209
+
2210
+
2211
+ def collect_interaction_failed_assertions(value, prefix='', depth=0):
2212
+ if depth > 6:
2213
+ return []
2214
+ failures = []
2215
+ if isinstance(value, dict):
2216
+ for key in INTERACTION_FAILURE_FLAG_KEYS:
2217
+ if value.get(key) is False:
2218
+ failures.append(failure_label(prefix, key))
2219
+ status = str(value.get('status') or value.get('result') or '').strip().lower()
2220
+ if status in INTERACTION_FAILURE_STATUS_VALUES:
2221
+ failures.append(failure_label(prefix, assertion_item_label(value, 'status')))
2222
+ for key in INTERACTION_ASSERTION_CONTAINER_KEYS:
2223
+ checks = value.get(key)
2224
+ container_prefix = failure_label(prefix, key)
2225
+ if isinstance(checks, dict):
2226
+ for check_key, check_value in checks.items():
2227
+ if check_value is False:
2228
+ failures.append(failure_label(container_prefix, check_key))
2229
+ elif isinstance(check_value, dict):
2230
+ nested = collect_interaction_failed_assertions(
2231
+ check_value,
2232
+ failure_label(container_prefix, check_key),
2233
+ depth + 1,
2234
+ )
2235
+ failures.extend(nested)
2236
+ elif isinstance(check_value, list):
2237
+ failures.extend(collect_interaction_failed_assertions(
2238
+ check_value,
2239
+ failure_label(container_prefix, check_key),
2240
+ depth + 1,
2241
+ ))
2242
+ elif isinstance(checks, list):
2243
+ for index, item in enumerate(checks):
2244
+ if item is False:
2245
+ failures.append(failure_label(container_prefix, str(index)))
2246
+ elif isinstance(item, dict):
2247
+ item_label = assertion_item_label(item, str(index))
2248
+ failures.extend(collect_interaction_failed_assertions(
2249
+ item,
2250
+ failure_label(container_prefix, item_label),
2251
+ depth + 1,
2252
+ ))
2253
+ for key in EVIDENCE_CONTAINER_KEYS:
2254
+ nested = value.get(key)
2255
+ if isinstance(nested, (dict, list)):
2256
+ failures.extend(collect_interaction_failed_assertions(nested, failure_label(prefix, key), depth + 1))
2257
+ elif isinstance(value, list):
2258
+ for index, item in enumerate(value):
2259
+ if item is False:
2260
+ failures.append(failure_label(prefix, str(index)))
2261
+ elif isinstance(item, (dict, list)):
2262
+ failures.extend(collect_interaction_failed_assertions(item, prefix, depth + 1))
2263
+ deduped = []
2264
+ seen = set()
2265
+ for failure in failures:
2266
+ failure = str(failure or '').strip()
2267
+ if not failure or failure in seen:
2268
+ continue
2269
+ seen.add(failure)
2270
+ deduped.append(failure)
2271
+ return deduped
2272
+
2273
+
2274
+ def interaction_route_context_present(value, depth=0):
2275
+ if depth > 6:
2276
+ return False
2277
+ if isinstance(value, dict):
2278
+ if terminal_path_from_record(value):
2279
+ return True
2280
+ for key in INTERACTION_ROUTE_CONTEXT_KEYS:
2281
+ nested = value.get(key)
2282
+ if isinstance(nested, dict):
2283
+ if record_path_candidate(nested, allow_location_keys=True):
2284
+ return True
2285
+ query = str(nested.get('query') or nested.get('search') or '').strip()
2286
+ hash_value = str(nested.get('hash') or nested.get('fragment') or '').strip()
2287
+ if query or hash_value:
2288
+ return True
2289
+ if interaction_route_context_present(nested, depth + 1):
2290
+ return True
2291
+ elif isinstance(nested, str) and path_candidate(nested):
2292
+ return True
2293
+ for key in EVIDENCE_CONTAINER_KEYS:
2294
+ nested = value.get(key)
2295
+ if isinstance(nested, (dict, list)) and interaction_route_context_present(nested, depth + 1):
2296
+ return True
2297
+ elif isinstance(value, list):
2298
+ return any(interaction_route_context_present(item, depth + 1) for item in value)
2299
+ return False
2300
+
2301
+
2302
+ def failed_interaction_evidence_summary(proof_evidence):
2303
+ failures = []
2304
+ for record in proof_evidence_records(proof_evidence):
2305
+ failures.extend(collect_interaction_failed_assertions(record))
2306
+ deduped = []
2307
+ seen = set()
2308
+ for failure in failures:
2309
+ if failure not in seen:
2310
+ seen.add(failure)
2311
+ deduped.append(failure)
2312
+ if not deduped or not interaction_route_context_present(proof_evidence):
2313
+ return ''
2314
+ summary = 'Structured interaction proof evidence captured failed assertion(s): ' + ', '.join(deduped[:8]) + '.'
2315
+ capture_errors = []
2316
+ for record in proof_evidence_records(proof_evidence):
2317
+ error = str(record.get('capture_error') or record.get('error') or '').strip()
2318
+ if error:
2319
+ capture_errors.append(error)
2320
+ if capture_errors:
2321
+ summary += ' Capture script error: ' + capture_errors[0][:300]
2322
+ return summary
2323
+
2324
+
2125
2325
  def interaction_terminal_path_from_evidence(proof_evidence):
2126
2326
  for record in proof_evidence_records(proof_evidence):
2127
2327
  candidate = terminal_path_from_record(record)
@@ -2867,6 +3067,9 @@ def build_supervisor_assessment_request(state, payload, after_observation, requi
2867
3067
  evidence_basis.append('structured-artifacts')
2868
3068
  if supporting.get('playability_ready'):
2869
3069
  evidence_basis.append('playability')
3070
+ interaction_failure_summary = str(state.get('structured_interaction_failure_summary') or '').strip()
3071
+ if interaction_failure_summary:
3072
+ evidence_basis.append('structured-interaction-failure')
2870
3073
  visual_delta = ((evidence_bundle or {}).get('after') or {}).get('visual_delta') or {}
2871
3074
  if visual_delta.get('status') == 'measured':
2872
3075
  evidence_basis.append('visual-delta')
@@ -2900,6 +3103,8 @@ def build_supervisor_assessment_request(state, payload, after_observation, requi
2900
3103
  evidence_bundle['artifact_usage'] = artifact_usage
2901
3104
  visual_delta_blocker = '' if audit_no_diff_mode(state) else visual_delta_blocker_for_mode(verification_mode, visual_delta)
2902
3105
  hard_blockers = [visual_delta_blocker] if visual_delta_blocker else []
3106
+ if interaction_failure_summary:
3107
+ hard_blockers.append(interaction_failure_summary)
2903
3108
  if verification_mode in PLAYABILITY_MODES and not supporting.get('playability_ready'):
2904
3109
  assessment = supporting.get('playability_assessment') or {}
2905
3110
  concerns = assessment.get('concerns') if isinstance(assessment, dict) else []
@@ -2925,6 +3130,10 @@ def build_supervisor_assessment_request(state, payload, after_observation, requi
2925
3130
  instructions.append(
2926
3131
  'For visual/UI polish, capture success is not proof. If visual_delta.status is unmeasured, missing, not_applicable, or measured with passed=false, choose needs_implementation or needs_richer_proof instead of ready_to_ship.'
2927
3132
  )
3133
+ if interaction_failure_summary:
3134
+ instructions.append(
3135
+ 'The structured interaction evidence contains failed assertions. Treat those failed assertions as a hard blocker for ready_to_ship; do not send this back to author unless the capture script itself is missing the needed evidence.'
3136
+ )
2928
3137
  instructions.extend([
2929
3138
  'For playable/gameplay proof, screenshots are supporting evidence only. Do not mark ready_to_ship unless playability_assessment.passed is true and the proof shows accepted input, state/time progression, and playfield/canvas pixel motion.',
2930
3139
  'For data/audio/log/metrics/custom modes, judge the structured evidence bundle and proof_evidence_sample directly; screenshots are optional supporting context.',
@@ -2947,6 +3156,7 @@ def build_supervisor_assessment_request(state, payload, after_observation, requi
2947
3156
  'viewport_matrix': viewport_matrix,
2948
3157
  'evidence_bundle': evidence_bundle or {},
2949
3158
  'evidence_basis': evidence_basis,
3159
+ 'structured_interaction_failure_summary': interaction_failure_summary,
2950
3160
  'artifact_contract': artifact_contract,
2951
3161
  'artifact_production': artifact_production,
2952
3162
  'artifact_usage': artifact_usage,
@@ -3348,6 +3558,14 @@ if proof_evidence_required_for_mode(s.get('verification_mode')):
3348
3558
  if proof_evidence_blocker:
3349
3559
  summary_lines.append('Structured proof evidence gate: ' + proof_evidence_blocker)
3350
3560
 
3561
+ structured_interaction_failure_summary = ''
3562
+ proof_evidence = evidence_bundle.get('proof_evidence')
3563
+ if verification_mode in INTERACTION_MODES and proof_evidence is not None:
3564
+ structured_interaction_failure_summary = failed_interaction_evidence_summary(proof_evidence)
3565
+ if structured_interaction_failure_summary:
3566
+ summary_lines.append('Structured interaction evidence gate: ' + structured_interaction_failure_summary)
3567
+ s['structured_interaction_failure_summary'] = structured_interaction_failure_summary
3568
+
3351
3569
  visual_delta_recovery = build_visual_delta_recovery_decision(
3352
3570
  s.get('verification_mode'),
3353
3571
  visual_delta,
@@ -3356,14 +3574,20 @@ visual_delta_recovery = build_visual_delta_recovery_decision(
3356
3574
  if visual_delta_recovery:
3357
3575
  summary_lines.append('Visual delta recovery: ' + visual_delta_recovery['summary'])
3358
3576
 
3577
+ has_judgable_failed_interaction_evidence = (
3578
+ bool(structured_interaction_failure_summary)
3579
+ and required_baseline_present
3580
+ and not proof_evidence_blocker
3581
+ and not visual_delta_recovery
3582
+ )
3359
3583
  has_good_evidence = (
3360
3584
  required_baseline_present
3361
- and after_observation.get('valid')
3585
+ and (after_observation.get('valid') or has_judgable_failed_interaction_evidence)
3362
3586
  and not proof_evidence_blocker
3363
3587
  and not visual_delta_recovery
3364
3588
  )
3365
3589
 
3366
- if has_good_evidence:
3590
+ if has_good_evidence and after_observation.get('valid'):
3367
3591
  s['capture_hint_saved'] = record_successful_capture_hint(
3368
3592
  s,
3369
3593
  server_path=s.get('expected_start_path') or expected_path or s.get('server_path') or '/',
@@ -3374,9 +3598,12 @@ if has_good_evidence:
3374
3598
  )
3375
3599
 
3376
3600
  if has_good_evidence:
3601
+ if has_judgable_failed_interaction_evidence and isinstance(evidence_bundle.get('proof_session'), dict):
3602
+ evidence_bundle['proof_session']['status'] = 'evidence_captured'
3603
+ s['proof_session'] = evidence_bundle.get('proof_session') or {}
3377
3604
  supervisor_request = build_supervisor_assessment_request(s, after_payload, after_observation, required_baseline_present, expected_path, evidence_bundle)
3378
3605
  s['verify_status'] = 'evidence_captured'
3379
- s['merge_recommendation'] = 'pending-supervisor-judgment'
3606
+ s['merge_recommendation'] = 'do-not-merge' if has_judgable_failed_interaction_evidence else 'pending-supervisor-judgment'
3380
3607
  s['proof_assessment'] = {}
3381
3608
  s['proof_assessment_source'] = None
3382
3609
  s['proof_assessment_request'] = supervisor_request
@@ -3386,11 +3613,16 @@ if has_good_evidence:
3386
3613
  fields_agent_may_update.append('implementation_notes')
3387
3614
  s['verify_decision_request'] = {
3388
3615
  'status': s['verify_status'],
3389
- 'summary': 'Verify captured usable evidence and is waiting for supervising-agent proof assessment.',
3616
+ 'summary': (
3617
+ 'Verify captured structured interaction evidence with failed assertions and is waiting for supervising-agent proof assessment.'
3618
+ if has_judgable_failed_interaction_evidence
3619
+ else 'Verify captured usable evidence and is waiting for supervising-agent proof assessment.'
3620
+ ),
3390
3621
  'expected_path': expected_path,
3391
3622
  'expected_start_path': s.get('expected_start_path') or expected_path,
3392
3623
  'route_expectation': s.get('route_expectation') or {},
3393
3624
  'latest_observation': after_observation,
3625
+ 'structured_interaction_failure_summary': structured_interaction_failure_summary,
3394
3626
  'next_stage_options': next_stage_options,
3395
3627
  'recommended_stage': None,
3396
3628
  'continue_with_stage': None,
@@ -3402,7 +3634,10 @@ if has_good_evidence:
3402
3634
  'Do not escalate to the human unless the supervising agent concludes the workflow is genuinely stuck or not converging.',
3403
3635
  ],
3404
3636
  }
3405
- summary_lines.append('Proof assessment: awaiting supervising agent judgment')
3637
+ if has_judgable_failed_interaction_evidence:
3638
+ summary_lines.append('Proof assessment: awaiting supervising agent judgment on failed interaction evidence')
3639
+ else:
3640
+ summary_lines.append('Proof assessment: awaiting supervising agent judgment')
3406
3641
  summary_lines.append('Proof next stage: supervising agent decides after reviewing the evidence packet')
3407
3642
  else:
3408
3643
  capture_retry = visual_delta_recovery or build_capture_retry_decision(after_observation, required_baseline_present, proof_evidence_blocker, s.get('route_expectation') or {})