@riddledc/riddle-proof 0.8.6 → 0.8.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/codex-exec-agent.cjs +30 -10
- package/dist/adapters/codex-exec-agent.js +1 -1
- package/dist/adapters/codex.cjs +30 -10
- package/dist/adapters/codex.js +1 -1
- package/dist/adapters/local-agent.cjs +30 -10
- package/dist/adapters/local-agent.js +1 -1
- package/dist/advanced/engine-harness.cjs +64 -7
- package/dist/advanced/engine-harness.js +2 -2
- package/dist/advanced/index.cjs +64 -7
- package/dist/advanced/index.d.cts +1 -1
- package/dist/advanced/index.d.ts +1 -1
- package/dist/advanced/index.js +4 -4
- package/dist/advanced/proof-run-core.cjs +63 -6
- package/dist/advanced/proof-run-core.js +1 -1
- package/dist/advanced/proof-run-engine.cjs +63 -6
- package/dist/advanced/proof-run-engine.d.cts +1 -1
- package/dist/advanced/proof-run-engine.d.ts +1 -1
- package/dist/advanced/proof-run-engine.js +2 -2
- package/dist/advanced/runner.js +2 -2
- package/dist/{chunk-GMZ57RRY.js → chunk-46DDSZJR.js} +1 -1
- package/dist/{chunk-RV6LK7HU.js → chunk-5N5QFI2S.js} +63 -6
- package/dist/{chunk-UIJ7X63P.js → chunk-5N6MQCLC.js} +1 -1
- package/dist/{chunk-BDFSMWTI.js → chunk-E7ATYSYS.js} +1 -1
- package/dist/{chunk-5MILMRQY.js → chunk-PYCQNK66.js} +30 -10
- package/dist/{chunk-NAFJ4KSF.js → chunk-V6VZ3CAI.js} +2 -2
- package/dist/cli/index.js +4 -4
- package/dist/cli.cjs +99 -22
- package/dist/cli.js +4 -4
- package/dist/codex-exec-agent.cjs +30 -10
- package/dist/codex-exec-agent.js +1 -1
- package/dist/engine-harness.cjs +64 -7
- package/dist/engine-harness.js +2 -2
- package/dist/index.cjs +99 -22
- package/dist/index.js +4 -4
- package/dist/local-agent.cjs +30 -10
- package/dist/local-agent.js +1 -1
- package/dist/proof-run-core.cjs +63 -6
- package/dist/proof-run-core.js +1 -1
- package/dist/{proof-run-engine-BO1h0Bmy.d.cts → proof-run-engine-BlocjMni.d.cts} +3 -3
- package/dist/{proof-run-engine-CIdpWNh6.d.ts → proof-run-engine-C_m8WJmX.d.ts} +3 -3
- package/dist/proof-run-engine.cjs +63 -6
- package/dist/proof-run-engine.d.cts +1 -1
- package/dist/proof-run-engine.d.ts +1 -1
- package/dist/proof-run-engine.js +2 -2
- package/dist/runner.js +2 -2
- package/package.json +1 -1
- package/runtime/lib/author.py +39 -1
- package/runtime/lib/verify.py +241 -6
- package/runtime/tests/recon_verify_smoke.py +89 -20
package/dist/proof-run-core.js
CHANGED
|
@@ -292,7 +292,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
|
|
|
292
292
|
blocking?: boolean;
|
|
293
293
|
details?: Record<string, unknown>;
|
|
294
294
|
ok: boolean;
|
|
295
|
-
action: "
|
|
295
|
+
action: "recon" | "author" | "ship" | "implement" | "verify" | "setup" | "run";
|
|
296
296
|
state_path: string;
|
|
297
297
|
stage: any;
|
|
298
298
|
summary: string;
|
|
@@ -382,7 +382,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
|
|
|
382
382
|
continueWithStage?: WorkflowStage | null;
|
|
383
383
|
blocking?: boolean;
|
|
384
384
|
details?: Record<string, unknown>;
|
|
385
|
-
action: "
|
|
385
|
+
action: "recon" | "author" | "ship" | "implement" | "verify" | "setup" | "run";
|
|
386
386
|
state_path: string;
|
|
387
387
|
stage: any;
|
|
388
388
|
checkpoint: string;
|
|
@@ -659,7 +659,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
|
|
|
659
659
|
error?: undefined;
|
|
660
660
|
} | {
|
|
661
661
|
ok: boolean;
|
|
662
|
-
action: "
|
|
662
|
+
action: "recon" | "author" | "ship" | "implement" | "verify" | "setup";
|
|
663
663
|
state_path: string;
|
|
664
664
|
stage: any;
|
|
665
665
|
summary: string;
|
|
@@ -292,7 +292,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
|
|
|
292
292
|
blocking?: boolean;
|
|
293
293
|
details?: Record<string, unknown>;
|
|
294
294
|
ok: boolean;
|
|
295
|
-
action: "
|
|
295
|
+
action: "recon" | "author" | "ship" | "implement" | "verify" | "setup" | "run";
|
|
296
296
|
state_path: string;
|
|
297
297
|
stage: any;
|
|
298
298
|
summary: string;
|
|
@@ -382,7 +382,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
|
|
|
382
382
|
continueWithStage?: WorkflowStage | null;
|
|
383
383
|
blocking?: boolean;
|
|
384
384
|
details?: Record<string, unknown>;
|
|
385
|
-
action: "
|
|
385
|
+
action: "recon" | "author" | "ship" | "implement" | "verify" | "setup" | "run";
|
|
386
386
|
state_path: string;
|
|
387
387
|
stage: any;
|
|
388
388
|
checkpoint: string;
|
|
@@ -659,7 +659,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
|
|
|
659
659
|
error?: undefined;
|
|
660
660
|
} | {
|
|
661
661
|
ok: boolean;
|
|
662
|
-
action: "
|
|
662
|
+
action: "recon" | "author" | "ship" | "implement" | "verify" | "setup";
|
|
663
663
|
state_path: string;
|
|
664
664
|
stage: any;
|
|
665
665
|
summary: string;
|
|
@@ -195,6 +195,56 @@ function writeState(statePath, state) {
|
|
|
195
195
|
function normalizeOptionalString(value) {
|
|
196
196
|
return typeof value === "string" ? value.trim() : void 0;
|
|
197
197
|
}
|
|
198
|
+
var INTERACTION_VERIFICATION_MODES = /* @__PURE__ */ new Set(["interaction", "interactive", "user_flow", "user-flow", "workflow"]);
|
|
199
|
+
function normalizeRoutePath(value) {
|
|
200
|
+
const raw = typeof value === "string" ? value.trim() : "";
|
|
201
|
+
if (!raw) return "";
|
|
202
|
+
try {
|
|
203
|
+
const url = /^https?:\/\//i.test(raw) ? new URL(raw) : new URL(raw.startsWith("/") || raw.startsWith("?") || raw.startsWith("#") ? raw : `/${raw}`, "https://riddle-proof.local");
|
|
204
|
+
const pathname = url.pathname.replace(/\/+$/, "") || "/";
|
|
205
|
+
return `${pathname}${url.search}${url.hash}`;
|
|
206
|
+
} catch {
|
|
207
|
+
const hashSplit = raw.split("#");
|
|
208
|
+
const beforeHash = hashSplit.shift() || "";
|
|
209
|
+
const hash = hashSplit.length ? `#${hashSplit.join("#")}` : "";
|
|
210
|
+
const querySplit = beforeHash.split("?");
|
|
211
|
+
const rawPath = querySplit.shift() || "";
|
|
212
|
+
const query = querySplit.length ? `?${querySplit.join("?")}` : "";
|
|
213
|
+
const pathname = `/${rawPath}`.replace(/\/+/g, "/").replace(/\/+$/, "") || "/";
|
|
214
|
+
return `${pathname}${query}${hash}`;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
function isInteractionVerificationMode(value) {
|
|
218
|
+
return INTERACTION_VERIFICATION_MODES.has(typeof value === "string" ? value.trim().toLowerCase() : "");
|
|
219
|
+
}
|
|
220
|
+
function stringRecordValue(record, key) {
|
|
221
|
+
if (!record || typeof record !== "object") return "";
|
|
222
|
+
const value = record[key];
|
|
223
|
+
return typeof value === "string" ? value.trim() : "";
|
|
224
|
+
}
|
|
225
|
+
function appendStateWarning(state, key, warning) {
|
|
226
|
+
const existing = Array.isArray(state[key]) ? state[key].filter((item) => typeof item === "string") : [];
|
|
227
|
+
if (!existing.includes(warning)) state[key] = [...existing, warning];
|
|
228
|
+
}
|
|
229
|
+
function interactionStartPathForAuthorPacket(state, parsed, refined) {
|
|
230
|
+
return normalizeRoutePath(
|
|
231
|
+
stringRecordValue(state, "expected_start_path") || stringRecordValue(refined, "expected_start_path") || stringRecordValue(parsed.interaction_contract, "start_path") || stringRecordValue(parsed.proof_contract, "start_path") || stringRecordValue(state, "server_path") || "/"
|
|
232
|
+
) || "/";
|
|
233
|
+
}
|
|
234
|
+
function authorPacketServerPath(state, parsed, refined, serverPath, expectedTerminalPath) {
|
|
235
|
+
if (!isInteractionVerificationMode(state.verification_mode)) return serverPath;
|
|
236
|
+
const startPath = interactionStartPathForAuthorPacket(state, parsed, refined);
|
|
237
|
+
state.expected_start_path = startPath;
|
|
238
|
+
if (expectedTerminalPath && normalizeRoutePath(serverPath) === normalizeRoutePath(expectedTerminalPath) && normalizeRoutePath(serverPath) !== startPath) {
|
|
239
|
+
appendStateWarning(
|
|
240
|
+
state,
|
|
241
|
+
"author_warnings",
|
|
242
|
+
"Supervisor packet refined_inputs.server_path matched the terminal interaction route; kept the recon start route for capture."
|
|
243
|
+
);
|
|
244
|
+
return startPath;
|
|
245
|
+
}
|
|
246
|
+
return serverPath;
|
|
247
|
+
}
|
|
198
248
|
function knownEnvironmentIssuesFromNotes(notes) {
|
|
199
249
|
const text = notes.toLowerCase();
|
|
200
250
|
const issues = [];
|
|
@@ -853,17 +903,24 @@ function mergeStateFromParams(statePath, params) {
|
|
|
853
903
|
state.proof_contract = parsed.proof_contract;
|
|
854
904
|
}
|
|
855
905
|
const refined = parsed?.refined_inputs || {};
|
|
906
|
+
const expectedTerminalPath = normalizeOptionalString(
|
|
907
|
+
typeof refined?.expected_terminal_path === "string" ? refined.expected_terminal_path : typeof parsed?.expected_terminal_path === "string" ? parsed.expected_terminal_path : ""
|
|
908
|
+
) || "";
|
|
856
909
|
if (typeof refined?.server_path === "string") {
|
|
857
|
-
|
|
910
|
+
const refinedServerPath = normalizeOptionalString(refined.server_path) || "";
|
|
911
|
+
state.server_path = authorPacketServerPath(
|
|
912
|
+
state,
|
|
913
|
+
parsed,
|
|
914
|
+
refined,
|
|
915
|
+
refinedServerPath,
|
|
916
|
+
expectedTerminalPath
|
|
917
|
+
);
|
|
858
918
|
state.server_path_source = "supervising_agent";
|
|
859
919
|
}
|
|
860
920
|
if (typeof refined?.wait_for_selector === "string") state.wait_for_selector = normalizeOptionalString(refined.wait_for_selector) || "";
|
|
861
921
|
if (typeof refined?.reference === "string" && refined.reference.trim()) state.reference = refined.reference.trim();
|
|
862
|
-
if (
|
|
863
|
-
state.expected_terminal_path =
|
|
864
|
-
}
|
|
865
|
-
if (typeof parsed?.expected_terminal_path === "string") {
|
|
866
|
-
state.expected_terminal_path = normalizeOptionalString(parsed.expected_terminal_path) || "";
|
|
922
|
+
if (expectedTerminalPath) {
|
|
923
|
+
state.expected_terminal_path = expectedTerminalPath;
|
|
867
924
|
}
|
|
868
925
|
if (typeof parsed?.confidence === "string") state.supervisor_author_confidence = normalizeOptionalString(parsed.confidence) || null;
|
|
869
926
|
if (parsed?.rationale !== void 0) state.supervisor_author_rationale = parsed.rationale;
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
import './proof-run-core-CE0jx7wL.cjs';
|
|
2
|
-
export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from './proof-run-engine-
|
|
2
|
+
export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from './proof-run-engine-BlocjMni.cjs';
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
import './proof-run-core-CE0jx7wL.js';
|
|
2
|
-
export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from './proof-run-engine-
|
|
2
|
+
export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from './proof-run-engine-C_m8WJmX.js';
|
package/dist/proof-run-engine.js
CHANGED
package/dist/runner.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import {
|
|
2
2
|
runRiddleProof
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-5N6MQCLC.js";
|
|
4
4
|
import "./chunk-YZUVEJ5B.js";
|
|
5
5
|
import "./chunk-FMOYUYH2.js";
|
|
6
|
-
import "./chunk-
|
|
6
|
+
import "./chunk-5N5QFI2S.js";
|
|
7
7
|
import "./chunk-4FOHZ7JG.js";
|
|
8
8
|
import "./chunk-VY4Y5U57.js";
|
|
9
9
|
import "./chunk-MLKGABMK.js";
|
package/package.json
CHANGED
package/runtime/lib/author.py
CHANGED
|
@@ -9,6 +9,7 @@ Instead it does two things:
|
|
|
9
9
|
import json
|
|
10
10
|
import os
|
|
11
11
|
import sys
|
|
12
|
+
from urllib.parse import urlparse
|
|
12
13
|
|
|
13
14
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
14
15
|
from util import load_state, save_state
|
|
@@ -34,6 +35,31 @@ def normalize_path(value):
|
|
|
34
35
|
return path
|
|
35
36
|
|
|
36
37
|
|
|
38
|
+
def normalize_route_path(value):
|
|
39
|
+
raw = (value or '').strip()
|
|
40
|
+
if not raw:
|
|
41
|
+
return ''
|
|
42
|
+
parsed = urlparse(raw)
|
|
43
|
+
path = parsed.path or raw
|
|
44
|
+
query = parsed.query or ''
|
|
45
|
+
fragment = parsed.fragment or ''
|
|
46
|
+
if '?' in path:
|
|
47
|
+
path, query_tail = path.split('?', 1)
|
|
48
|
+
query = query or query_tail.split('#', 1)[0]
|
|
49
|
+
if '#' in path:
|
|
50
|
+
path, fragment_tail = path.split('#', 1)
|
|
51
|
+
fragment = fragment or fragment_tail
|
|
52
|
+
if not path.startswith('/'):
|
|
53
|
+
path = '/' + path.lstrip('/')
|
|
54
|
+
path = path.rstrip('/') or '/'
|
|
55
|
+
return path + (('?' + query) if query else '') + (('#' + fragment) if fragment else '')
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def is_interaction_mode(state):
|
|
59
|
+
mode = (state.get('verification_mode') or '').strip().lower()
|
|
60
|
+
return mode in ('interaction', 'interactive', 'user_flow', 'user-flow', 'workflow')
|
|
61
|
+
|
|
62
|
+
|
|
37
63
|
def first_non_empty(*values):
|
|
38
64
|
for value in values:
|
|
39
65
|
if isinstance(value, str) and value.strip():
|
|
@@ -284,6 +310,17 @@ expected_terminal_path = normalize_path(first_non_empty(
|
|
|
284
310
|
supervisor_packet.get('expected_after_path'),
|
|
285
311
|
s.get('expected_terminal_path'),
|
|
286
312
|
))
|
|
313
|
+
author_warnings = []
|
|
314
|
+
if is_interaction_mode(s):
|
|
315
|
+
interaction_start_path = normalize_route_path(first_non_empty(s.get('expected_start_path'), default_path, s.get('server_path'), '/')) or '/'
|
|
316
|
+
refined_route = normalize_route_path(refined_path)
|
|
317
|
+
terminal_route = normalize_route_path(expected_terminal_path)
|
|
318
|
+
if terminal_route and refined_route == terminal_route and refined_route != interaction_start_path:
|
|
319
|
+
refined_path = interaction_start_path
|
|
320
|
+
author_warnings.append(
|
|
321
|
+
'Supervisor packet refined_inputs.server_path matched the terminal interaction route; kept the recon start route for capture.'
|
|
322
|
+
)
|
|
323
|
+
s['expected_start_path'] = interaction_start_path
|
|
287
324
|
confidence = provided_payload['confidence'] if provided_payload['confidence'] in ('high', 'medium', 'low') else 'medium'
|
|
288
325
|
rationale = sanitize_rationale(provided_payload['rationale'])
|
|
289
326
|
summary = provided_payload['summary'] or 'Supervising agent supplied the proof packet from recon observations.'
|
|
@@ -301,6 +338,7 @@ authored_packet = {
|
|
|
301
338
|
'interaction_contract': provided_payload['interaction_contract'],
|
|
302
339
|
'proof_contract': provided_payload['proof_contract'],
|
|
303
340
|
'rationale': rationale,
|
|
341
|
+
'warnings': author_warnings,
|
|
304
342
|
'confidence': confidence,
|
|
305
343
|
'mode': 'supervising_agent',
|
|
306
344
|
'model': ('supervising-agent:' + RUNTIME_MODEL_HINT) if RUNTIME_MODEL_HINT else 'supervising-agent',
|
|
@@ -328,7 +366,7 @@ s['author_mode'] = 'supervising_agent'
|
|
|
328
366
|
s['author_model'] = authored_packet['model']
|
|
329
367
|
s['author_confidence'] = confidence
|
|
330
368
|
s['author_rationale'] = rationale
|
|
331
|
-
s['author_warnings'] =
|
|
369
|
+
s['author_warnings'] = author_warnings
|
|
332
370
|
s['author_runtime_model_hint'] = RUNTIME_MODEL_HINT
|
|
333
371
|
s['author_packet'] = authored_packet
|
|
334
372
|
s['author_summary'] = summary
|
package/runtime/lib/verify.py
CHANGED
|
@@ -1995,16 +1995,22 @@ def route_parts(value):
|
|
|
1995
1995
|
|
|
1996
1996
|
EXPLICIT_TERMINAL_PATH_KEYS = (
|
|
1997
1997
|
'expected_terminal_path', 'expectedTerminalPath',
|
|
1998
|
+
'expected_terminal_url', 'expectedTerminalUrl',
|
|
1998
1999
|
'expected_terminal_route', 'expectedTerminalRoute',
|
|
1999
2000
|
'terminal_path', 'terminalPath',
|
|
2001
|
+
'terminal_url', 'terminalUrl',
|
|
2000
2002
|
'terminal_route', 'terminalRoute',
|
|
2001
2003
|
'expected_after_path', 'expectedAfterPath',
|
|
2004
|
+
'expected_after_url', 'expectedAfterUrl',
|
|
2002
2005
|
'expected_after_route', 'expectedAfterRoute',
|
|
2003
2006
|
'after_path', 'afterPath',
|
|
2007
|
+
'after_url', 'afterUrl',
|
|
2004
2008
|
'after_route', 'afterRoute',
|
|
2005
2009
|
'expected_final_path', 'expectedFinalPath',
|
|
2010
|
+
'expected_final_url', 'expectedFinalUrl',
|
|
2006
2011
|
'expected_final_route', 'expectedFinalRoute',
|
|
2007
2012
|
'final_path', 'finalPath',
|
|
2013
|
+
'final_url', 'finalUrl',
|
|
2008
2014
|
'final_route', 'finalRoute',
|
|
2009
2015
|
)
|
|
2010
2016
|
LOCATION_PATH_KEYS = ('path', 'pathname', 'route', 'url', 'href')
|
|
@@ -2016,6 +2022,11 @@ AFTER_STATE_KEYS = (
|
|
|
2016
2022
|
'final', 'final_state', 'finalState',
|
|
2017
2023
|
'expected_final', 'expectedFinal',
|
|
2018
2024
|
)
|
|
2025
|
+
EVIDENCE_CONTAINER_KEYS = (
|
|
2026
|
+
'proofEvidence', 'proof_evidence',
|
|
2027
|
+
'interactionEvidence', 'interaction_evidence',
|
|
2028
|
+
'evidence',
|
|
2029
|
+
)
|
|
2019
2030
|
CONTRACT_STATE_KEYS = (
|
|
2020
2031
|
'interaction_contract', 'interactionContract',
|
|
2021
2032
|
'proof_contract', 'proofContract',
|
|
@@ -2067,6 +2078,17 @@ def terminal_path_from_record(record, depth=0):
|
|
|
2067
2078
|
candidate = terminal_path_from_record(item, depth + 1)
|
|
2068
2079
|
if candidate:
|
|
2069
2080
|
return candidate
|
|
2081
|
+
for key in EVIDENCE_CONTAINER_KEYS:
|
|
2082
|
+
value = record.get(key)
|
|
2083
|
+
if isinstance(value, dict):
|
|
2084
|
+
candidate = terminal_path_from_record(value, depth + 1)
|
|
2085
|
+
if candidate:
|
|
2086
|
+
return candidate
|
|
2087
|
+
elif isinstance(value, list):
|
|
2088
|
+
for item in value:
|
|
2089
|
+
candidate = terminal_path_from_record(item, depth + 1)
|
|
2090
|
+
if candidate:
|
|
2091
|
+
return candidate
|
|
2070
2092
|
for key in CONTRACT_STATE_KEYS:
|
|
2071
2093
|
value = record.get(key)
|
|
2072
2094
|
if isinstance(value, dict):
|
|
@@ -2081,11 +2103,25 @@ def terminal_path_from_record(record, depth=0):
|
|
|
2081
2103
|
return ''
|
|
2082
2104
|
|
|
2083
2105
|
|
|
2106
|
+
def text_path_candidate(value):
|
|
2107
|
+
if not isinstance(value, str):
|
|
2108
|
+
return ''
|
|
2109
|
+
raw = value.strip().rstrip('.,;:)]}')
|
|
2110
|
+
return path_candidate(raw)
|
|
2111
|
+
|
|
2112
|
+
|
|
2084
2113
|
def terminal_path_from_text(value):
|
|
2085
2114
|
if not isinstance(value, str):
|
|
2086
2115
|
return ''
|
|
2087
2116
|
for match in re.findall(r"""['"`](/[^'"`\s]+[?#][^'"`\s]*)['"`]""", value):
|
|
2088
|
-
candidate =
|
|
2117
|
+
candidate = text_path_candidate(match)
|
|
2118
|
+
if candidate:
|
|
2119
|
+
return candidate
|
|
2120
|
+
context_pattern = re.compile(
|
|
2121
|
+
r"""(?is)\b(?:expected\s+(?:terminal|after|final)|terminal|after|final)\b[^/\r\n]{0,120}['"`]?(/[^'"`\s,;)]*)"""
|
|
2122
|
+
)
|
|
2123
|
+
for match in context_pattern.findall(value):
|
|
2124
|
+
candidate = text_path_candidate(match)
|
|
2089
2125
|
if candidate:
|
|
2090
2126
|
return candidate
|
|
2091
2127
|
return ''
|
|
@@ -2122,6 +2158,170 @@ def interaction_assertions_pass(value):
|
|
|
2122
2158
|
return False
|
|
2123
2159
|
|
|
2124
2160
|
|
|
2161
|
+
INTERACTION_ASSERTION_CONTAINER_KEYS = ('assertions', 'checks', 'predicates', 'expectations')
|
|
2162
|
+
INTERACTION_FAILURE_FLAG_KEYS = (
|
|
2163
|
+
'passed',
|
|
2164
|
+
'ok',
|
|
2165
|
+
'valid',
|
|
2166
|
+
'success',
|
|
2167
|
+
'proofReady',
|
|
2168
|
+
'proof_ready',
|
|
2169
|
+
'interactionPassed',
|
|
2170
|
+
'interaction_passed',
|
|
2171
|
+
'routeMatches',
|
|
2172
|
+
'route_matches',
|
|
2173
|
+
)
|
|
2174
|
+
INTERACTION_FAILURE_STATUS_VALUES = {'fail', 'failed', 'failure', 'error', 'errored', 'timeout', 'timed_out'}
|
|
2175
|
+
INTERACTION_ASSERTION_NAME_KEYS = ('name', 'id', 'key', 'label', 'assertion', 'check', 'field')
|
|
2176
|
+
INTERACTION_ROUTE_CONTEXT_KEYS = (
|
|
2177
|
+
'expected',
|
|
2178
|
+
'observed',
|
|
2179
|
+
'actual',
|
|
2180
|
+
'start',
|
|
2181
|
+
'before',
|
|
2182
|
+
'after',
|
|
2183
|
+
'terminal',
|
|
2184
|
+
'final',
|
|
2185
|
+
'expected_after',
|
|
2186
|
+
'expectedAfter',
|
|
2187
|
+
'expected_terminal',
|
|
2188
|
+
'expectedTerminal',
|
|
2189
|
+
'expected_final',
|
|
2190
|
+
'expectedFinal',
|
|
2191
|
+
)
|
|
2192
|
+
|
|
2193
|
+
|
|
2194
|
+
def failure_label(prefix, key):
|
|
2195
|
+
key = str(key or '').strip()
|
|
2196
|
+
prefix = str(prefix or '').strip()
|
|
2197
|
+
if prefix and key:
|
|
2198
|
+
return prefix + '.' + key
|
|
2199
|
+
return key or prefix or 'failed'
|
|
2200
|
+
|
|
2201
|
+
|
|
2202
|
+
def assertion_item_label(item, fallback):
|
|
2203
|
+
if isinstance(item, dict):
|
|
2204
|
+
for key in INTERACTION_ASSERTION_NAME_KEYS:
|
|
2205
|
+
value = str(item.get(key) or '').strip()
|
|
2206
|
+
if value:
|
|
2207
|
+
return value
|
|
2208
|
+
return fallback
|
|
2209
|
+
|
|
2210
|
+
|
|
2211
|
+
def collect_interaction_failed_assertions(value, prefix='', depth=0):
|
|
2212
|
+
if depth > 6:
|
|
2213
|
+
return []
|
|
2214
|
+
failures = []
|
|
2215
|
+
if isinstance(value, dict):
|
|
2216
|
+
for key in INTERACTION_FAILURE_FLAG_KEYS:
|
|
2217
|
+
if value.get(key) is False:
|
|
2218
|
+
failures.append(failure_label(prefix, key))
|
|
2219
|
+
status = str(value.get('status') or value.get('result') or '').strip().lower()
|
|
2220
|
+
if status in INTERACTION_FAILURE_STATUS_VALUES:
|
|
2221
|
+
failures.append(failure_label(prefix, assertion_item_label(value, 'status')))
|
|
2222
|
+
for key in INTERACTION_ASSERTION_CONTAINER_KEYS:
|
|
2223
|
+
checks = value.get(key)
|
|
2224
|
+
container_prefix = failure_label(prefix, key)
|
|
2225
|
+
if isinstance(checks, dict):
|
|
2226
|
+
for check_key, check_value in checks.items():
|
|
2227
|
+
if check_value is False:
|
|
2228
|
+
failures.append(failure_label(container_prefix, check_key))
|
|
2229
|
+
elif isinstance(check_value, dict):
|
|
2230
|
+
nested = collect_interaction_failed_assertions(
|
|
2231
|
+
check_value,
|
|
2232
|
+
failure_label(container_prefix, check_key),
|
|
2233
|
+
depth + 1,
|
|
2234
|
+
)
|
|
2235
|
+
failures.extend(nested)
|
|
2236
|
+
elif isinstance(check_value, list):
|
|
2237
|
+
failures.extend(collect_interaction_failed_assertions(
|
|
2238
|
+
check_value,
|
|
2239
|
+
failure_label(container_prefix, check_key),
|
|
2240
|
+
depth + 1,
|
|
2241
|
+
))
|
|
2242
|
+
elif isinstance(checks, list):
|
|
2243
|
+
for index, item in enumerate(checks):
|
|
2244
|
+
if item is False:
|
|
2245
|
+
failures.append(failure_label(container_prefix, str(index)))
|
|
2246
|
+
elif isinstance(item, dict):
|
|
2247
|
+
item_label = assertion_item_label(item, str(index))
|
|
2248
|
+
failures.extend(collect_interaction_failed_assertions(
|
|
2249
|
+
item,
|
|
2250
|
+
failure_label(container_prefix, item_label),
|
|
2251
|
+
depth + 1,
|
|
2252
|
+
))
|
|
2253
|
+
for key in EVIDENCE_CONTAINER_KEYS:
|
|
2254
|
+
nested = value.get(key)
|
|
2255
|
+
if isinstance(nested, (dict, list)):
|
|
2256
|
+
failures.extend(collect_interaction_failed_assertions(nested, failure_label(prefix, key), depth + 1))
|
|
2257
|
+
elif isinstance(value, list):
|
|
2258
|
+
for index, item in enumerate(value):
|
|
2259
|
+
if item is False:
|
|
2260
|
+
failures.append(failure_label(prefix, str(index)))
|
|
2261
|
+
elif isinstance(item, (dict, list)):
|
|
2262
|
+
failures.extend(collect_interaction_failed_assertions(item, prefix, depth + 1))
|
|
2263
|
+
deduped = []
|
|
2264
|
+
seen = set()
|
|
2265
|
+
for failure in failures:
|
|
2266
|
+
failure = str(failure or '').strip()
|
|
2267
|
+
if not failure or failure in seen:
|
|
2268
|
+
continue
|
|
2269
|
+
seen.add(failure)
|
|
2270
|
+
deduped.append(failure)
|
|
2271
|
+
return deduped
|
|
2272
|
+
|
|
2273
|
+
|
|
2274
|
+
def interaction_route_context_present(value, depth=0):
|
|
2275
|
+
if depth > 6:
|
|
2276
|
+
return False
|
|
2277
|
+
if isinstance(value, dict):
|
|
2278
|
+
if terminal_path_from_record(value):
|
|
2279
|
+
return True
|
|
2280
|
+
for key in INTERACTION_ROUTE_CONTEXT_KEYS:
|
|
2281
|
+
nested = value.get(key)
|
|
2282
|
+
if isinstance(nested, dict):
|
|
2283
|
+
if record_path_candidate(nested, allow_location_keys=True):
|
|
2284
|
+
return True
|
|
2285
|
+
query = str(nested.get('query') or nested.get('search') or '').strip()
|
|
2286
|
+
hash_value = str(nested.get('hash') or nested.get('fragment') or '').strip()
|
|
2287
|
+
if query or hash_value:
|
|
2288
|
+
return True
|
|
2289
|
+
if interaction_route_context_present(nested, depth + 1):
|
|
2290
|
+
return True
|
|
2291
|
+
elif isinstance(nested, str) and path_candidate(nested):
|
|
2292
|
+
return True
|
|
2293
|
+
for key in EVIDENCE_CONTAINER_KEYS:
|
|
2294
|
+
nested = value.get(key)
|
|
2295
|
+
if isinstance(nested, (dict, list)) and interaction_route_context_present(nested, depth + 1):
|
|
2296
|
+
return True
|
|
2297
|
+
elif isinstance(value, list):
|
|
2298
|
+
return any(interaction_route_context_present(item, depth + 1) for item in value)
|
|
2299
|
+
return False
|
|
2300
|
+
|
|
2301
|
+
|
|
2302
|
+
def failed_interaction_evidence_summary(proof_evidence):
|
|
2303
|
+
failures = []
|
|
2304
|
+
for record in proof_evidence_records(proof_evidence):
|
|
2305
|
+
failures.extend(collect_interaction_failed_assertions(record))
|
|
2306
|
+
deduped = []
|
|
2307
|
+
seen = set()
|
|
2308
|
+
for failure in failures:
|
|
2309
|
+
if failure not in seen:
|
|
2310
|
+
seen.add(failure)
|
|
2311
|
+
deduped.append(failure)
|
|
2312
|
+
if not deduped or not interaction_route_context_present(proof_evidence):
|
|
2313
|
+
return ''
|
|
2314
|
+
summary = 'Structured interaction proof evidence captured failed assertion(s): ' + ', '.join(deduped[:8]) + '.'
|
|
2315
|
+
capture_errors = []
|
|
2316
|
+
for record in proof_evidence_records(proof_evidence):
|
|
2317
|
+
error = str(record.get('capture_error') or record.get('error') or '').strip()
|
|
2318
|
+
if error:
|
|
2319
|
+
capture_errors.append(error)
|
|
2320
|
+
if capture_errors:
|
|
2321
|
+
summary += ' Capture script error: ' + capture_errors[0][:300]
|
|
2322
|
+
return summary
|
|
2323
|
+
|
|
2324
|
+
|
|
2125
2325
|
def interaction_terminal_path_from_evidence(proof_evidence):
|
|
2126
2326
|
for record in proof_evidence_records(proof_evidence):
|
|
2127
2327
|
candidate = terminal_path_from_record(record)
|
|
@@ -2867,6 +3067,9 @@ def build_supervisor_assessment_request(state, payload, after_observation, requi
|
|
|
2867
3067
|
evidence_basis.append('structured-artifacts')
|
|
2868
3068
|
if supporting.get('playability_ready'):
|
|
2869
3069
|
evidence_basis.append('playability')
|
|
3070
|
+
interaction_failure_summary = str(state.get('structured_interaction_failure_summary') or '').strip()
|
|
3071
|
+
if interaction_failure_summary:
|
|
3072
|
+
evidence_basis.append('structured-interaction-failure')
|
|
2870
3073
|
visual_delta = ((evidence_bundle or {}).get('after') or {}).get('visual_delta') or {}
|
|
2871
3074
|
if visual_delta.get('status') == 'measured':
|
|
2872
3075
|
evidence_basis.append('visual-delta')
|
|
@@ -2900,6 +3103,8 @@ def build_supervisor_assessment_request(state, payload, after_observation, requi
|
|
|
2900
3103
|
evidence_bundle['artifact_usage'] = artifact_usage
|
|
2901
3104
|
visual_delta_blocker = '' if audit_no_diff_mode(state) else visual_delta_blocker_for_mode(verification_mode, visual_delta)
|
|
2902
3105
|
hard_blockers = [visual_delta_blocker] if visual_delta_blocker else []
|
|
3106
|
+
if interaction_failure_summary:
|
|
3107
|
+
hard_blockers.append(interaction_failure_summary)
|
|
2903
3108
|
if verification_mode in PLAYABILITY_MODES and not supporting.get('playability_ready'):
|
|
2904
3109
|
assessment = supporting.get('playability_assessment') or {}
|
|
2905
3110
|
concerns = assessment.get('concerns') if isinstance(assessment, dict) else []
|
|
@@ -2925,6 +3130,10 @@ def build_supervisor_assessment_request(state, payload, after_observation, requi
|
|
|
2925
3130
|
instructions.append(
|
|
2926
3131
|
'For visual/UI polish, capture success is not proof. If visual_delta.status is unmeasured, missing, not_applicable, or measured with passed=false, choose needs_implementation or needs_richer_proof instead of ready_to_ship.'
|
|
2927
3132
|
)
|
|
3133
|
+
if interaction_failure_summary:
|
|
3134
|
+
instructions.append(
|
|
3135
|
+
'The structured interaction evidence contains failed assertions. Treat those failed assertions as a hard blocker for ready_to_ship; do not send this back to author unless the capture script itself is missing the needed evidence.'
|
|
3136
|
+
)
|
|
2928
3137
|
instructions.extend([
|
|
2929
3138
|
'For playable/gameplay proof, screenshots are supporting evidence only. Do not mark ready_to_ship unless playability_assessment.passed is true and the proof shows accepted input, state/time progression, and playfield/canvas pixel motion.',
|
|
2930
3139
|
'For data/audio/log/metrics/custom modes, judge the structured evidence bundle and proof_evidence_sample directly; screenshots are optional supporting context.',
|
|
@@ -2947,6 +3156,7 @@ def build_supervisor_assessment_request(state, payload, after_observation, requi
|
|
|
2947
3156
|
'viewport_matrix': viewport_matrix,
|
|
2948
3157
|
'evidence_bundle': evidence_bundle or {},
|
|
2949
3158
|
'evidence_basis': evidence_basis,
|
|
3159
|
+
'structured_interaction_failure_summary': interaction_failure_summary,
|
|
2950
3160
|
'artifact_contract': artifact_contract,
|
|
2951
3161
|
'artifact_production': artifact_production,
|
|
2952
3162
|
'artifact_usage': artifact_usage,
|
|
@@ -3348,6 +3558,14 @@ if proof_evidence_required_for_mode(s.get('verification_mode')):
|
|
|
3348
3558
|
if proof_evidence_blocker:
|
|
3349
3559
|
summary_lines.append('Structured proof evidence gate: ' + proof_evidence_blocker)
|
|
3350
3560
|
|
|
3561
|
+
structured_interaction_failure_summary = ''
|
|
3562
|
+
proof_evidence = evidence_bundle.get('proof_evidence')
|
|
3563
|
+
if verification_mode in INTERACTION_MODES and proof_evidence is not None:
|
|
3564
|
+
structured_interaction_failure_summary = failed_interaction_evidence_summary(proof_evidence)
|
|
3565
|
+
if structured_interaction_failure_summary:
|
|
3566
|
+
summary_lines.append('Structured interaction evidence gate: ' + structured_interaction_failure_summary)
|
|
3567
|
+
s['structured_interaction_failure_summary'] = structured_interaction_failure_summary
|
|
3568
|
+
|
|
3351
3569
|
visual_delta_recovery = build_visual_delta_recovery_decision(
|
|
3352
3570
|
s.get('verification_mode'),
|
|
3353
3571
|
visual_delta,
|
|
@@ -3356,14 +3574,20 @@ visual_delta_recovery = build_visual_delta_recovery_decision(
|
|
|
3356
3574
|
if visual_delta_recovery:
|
|
3357
3575
|
summary_lines.append('Visual delta recovery: ' + visual_delta_recovery['summary'])
|
|
3358
3576
|
|
|
3577
|
+
has_judgable_failed_interaction_evidence = (
|
|
3578
|
+
bool(structured_interaction_failure_summary)
|
|
3579
|
+
and required_baseline_present
|
|
3580
|
+
and not proof_evidence_blocker
|
|
3581
|
+
and not visual_delta_recovery
|
|
3582
|
+
)
|
|
3359
3583
|
has_good_evidence = (
|
|
3360
3584
|
required_baseline_present
|
|
3361
|
-
and after_observation.get('valid')
|
|
3585
|
+
and (after_observation.get('valid') or has_judgable_failed_interaction_evidence)
|
|
3362
3586
|
and not proof_evidence_blocker
|
|
3363
3587
|
and not visual_delta_recovery
|
|
3364
3588
|
)
|
|
3365
3589
|
|
|
3366
|
-
if has_good_evidence:
|
|
3590
|
+
if has_good_evidence and after_observation.get('valid'):
|
|
3367
3591
|
s['capture_hint_saved'] = record_successful_capture_hint(
|
|
3368
3592
|
s,
|
|
3369
3593
|
server_path=s.get('expected_start_path') or expected_path or s.get('server_path') or '/',
|
|
@@ -3374,9 +3598,12 @@ if has_good_evidence:
|
|
|
3374
3598
|
)
|
|
3375
3599
|
|
|
3376
3600
|
if has_good_evidence:
|
|
3601
|
+
if has_judgable_failed_interaction_evidence and isinstance(evidence_bundle.get('proof_session'), dict):
|
|
3602
|
+
evidence_bundle['proof_session']['status'] = 'evidence_captured'
|
|
3603
|
+
s['proof_session'] = evidence_bundle.get('proof_session') or {}
|
|
3377
3604
|
supervisor_request = build_supervisor_assessment_request(s, after_payload, after_observation, required_baseline_present, expected_path, evidence_bundle)
|
|
3378
3605
|
s['verify_status'] = 'evidence_captured'
|
|
3379
|
-
s['merge_recommendation'] = 'pending-supervisor-judgment'
|
|
3606
|
+
s['merge_recommendation'] = 'do-not-merge' if has_judgable_failed_interaction_evidence else 'pending-supervisor-judgment'
|
|
3380
3607
|
s['proof_assessment'] = {}
|
|
3381
3608
|
s['proof_assessment_source'] = None
|
|
3382
3609
|
s['proof_assessment_request'] = supervisor_request
|
|
@@ -3386,11 +3613,16 @@ if has_good_evidence:
|
|
|
3386
3613
|
fields_agent_may_update.append('implementation_notes')
|
|
3387
3614
|
s['verify_decision_request'] = {
|
|
3388
3615
|
'status': s['verify_status'],
|
|
3389
|
-
'summary':
|
|
3616
|
+
'summary': (
|
|
3617
|
+
'Verify captured structured interaction evidence with failed assertions and is waiting for supervising-agent proof assessment.'
|
|
3618
|
+
if has_judgable_failed_interaction_evidence
|
|
3619
|
+
else 'Verify captured usable evidence and is waiting for supervising-agent proof assessment.'
|
|
3620
|
+
),
|
|
3390
3621
|
'expected_path': expected_path,
|
|
3391
3622
|
'expected_start_path': s.get('expected_start_path') or expected_path,
|
|
3392
3623
|
'route_expectation': s.get('route_expectation') or {},
|
|
3393
3624
|
'latest_observation': after_observation,
|
|
3625
|
+
'structured_interaction_failure_summary': structured_interaction_failure_summary,
|
|
3394
3626
|
'next_stage_options': next_stage_options,
|
|
3395
3627
|
'recommended_stage': None,
|
|
3396
3628
|
'continue_with_stage': None,
|
|
@@ -3402,7 +3634,10 @@ if has_good_evidence:
|
|
|
3402
3634
|
'Do not escalate to the human unless the supervising agent concludes the workflow is genuinely stuck or not converging.',
|
|
3403
3635
|
],
|
|
3404
3636
|
}
|
|
3405
|
-
|
|
3637
|
+
if has_judgable_failed_interaction_evidence:
|
|
3638
|
+
summary_lines.append('Proof assessment: awaiting supervising agent judgment on failed interaction evidence')
|
|
3639
|
+
else:
|
|
3640
|
+
summary_lines.append('Proof assessment: awaiting supervising agent judgment')
|
|
3406
3641
|
summary_lines.append('Proof next stage: supervising agent decides after reviewing the evidence packet')
|
|
3407
3642
|
else:
|
|
3408
3643
|
capture_retry = visual_delta_recovery or build_capture_retry_decision(after_observation, required_baseline_present, proof_evidence_blocker, s.get('route_expectation') or {})
|