@riddledc/riddle-proof 0.8.1 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/openclaw.js +4 -4
- package/dist/advanced/engine-harness.cjs +25 -4
- package/dist/advanced/engine-harness.js +5 -5
- package/dist/advanced/index.cjs +25 -4
- package/dist/advanced/index.d.cts +1 -1
- package/dist/advanced/index.d.ts +1 -1
- package/dist/advanced/index.js +7 -7
- package/dist/advanced/proof-run-core.cjs +12 -0
- package/dist/advanced/proof-run-core.js +1 -1
- package/dist/advanced/proof-run-engine.cjs +19 -0
- package/dist/advanced/proof-run-engine.d.cts +1 -1
- package/dist/advanced/proof-run-engine.d.ts +1 -1
- package/dist/advanced/proof-run-engine.js +2 -2
- package/dist/advanced/runner.js +5 -5
- package/dist/checkpoint.cjs +3 -1
- package/dist/checkpoint.js +1 -1
- package/dist/{chunk-VZD5LH7U.js → chunk-OIFHYMHP.js} +2 -2
- package/dist/{chunk-TJ63IE65.js → chunk-RV6LK7HU.js} +12 -0
- package/dist/{chunk-2CFVREFI.js → chunk-SKIAZTQ7.js} +3 -1
- package/dist/{chunk-BTN76IGW.js → chunk-SMBZT46I.js} +1 -1
- package/dist/{chunk-OHJQRDST.js → chunk-TNCDVE5O.js} +1 -1
- package/dist/{chunk-BHL4JSGM.js → chunk-TZ3YMCDM.js} +8 -1
- package/dist/{chunk-Y2KTBACQ.js → chunk-U4FUFBSH.js} +1 -1
- package/dist/{chunk-IWLQQ5S5.js → chunk-YB5ACBZE.js} +7 -7
- package/dist/{chunk-COERZX63.js → chunk-ZX45XGDJ.js} +2 -2
- package/dist/cli/index.js +6 -6
- package/dist/cli.cjs +25 -4
- package/dist/cli.js +6 -6
- package/dist/engine-harness.cjs +25 -4
- package/dist/engine-harness.js +5 -5
- package/dist/index.cjs +25 -4
- package/dist/index.js +6 -6
- package/dist/openclaw.js +4 -4
- package/dist/proof-run-core.cjs +12 -0
- package/dist/proof-run-core.js +1 -1
- package/dist/{proof-run-engine-CSSc0mNn.d.ts → proof-run-engine-BVkeO-Yo.d.ts} +3 -3
- package/dist/{proof-run-engine-HSRpUeBi.d.cts → proof-run-engine-CVnboNHj.d.cts} +3 -3
- package/dist/proof-run-engine.cjs +19 -0
- package/dist/proof-run-engine.d.cts +1 -1
- package/dist/proof-run-engine.d.ts +1 -1
- package/dist/proof-run-engine.js +2 -2
- package/dist/run-card.js +2 -2
- package/dist/runner.js +5 -5
- package/dist/spec/checkpoint.cjs +3 -1
- package/dist/spec/checkpoint.js +1 -1
- package/dist/spec/index.cjs +3 -1
- package/dist/spec/index.js +3 -3
- package/dist/spec/run-card.js +2 -2
- package/dist/spec/state.js +3 -3
- package/dist/state.js +3 -3
- package/dist/types.d.cts +1 -0
- package/dist/types.d.ts +1 -0
- package/package.json +1 -1
- package/runtime/lib/author.py +32 -0
- package/runtime/lib/setup.py +3 -0
- package/runtime/lib/verify.py +247 -8
- package/runtime/tests/recon_verify_smoke.py +306 -8
package/dist/types.d.cts
CHANGED
|
@@ -162,6 +162,7 @@ interface RiddleProofProofContract {
|
|
|
162
162
|
capture_script?: string;
|
|
163
163
|
artifact_contract?: Record<string, unknown>;
|
|
164
164
|
assertions?: unknown;
|
|
165
|
+
interaction_contract?: Record<string, unknown>;
|
|
165
166
|
baseline_understanding?: Record<string, unknown>;
|
|
166
167
|
route_assumptions?: Record<string, unknown>;
|
|
167
168
|
stop_condition?: string;
|
package/dist/types.d.ts
CHANGED
|
@@ -162,6 +162,7 @@ interface RiddleProofProofContract {
|
|
|
162
162
|
capture_script?: string;
|
|
163
163
|
artifact_contract?: Record<string, unknown>;
|
|
164
164
|
assertions?: unknown;
|
|
165
|
+
interaction_contract?: Record<string, unknown>;
|
|
165
166
|
baseline_understanding?: Record<string, unknown>;
|
|
166
167
|
route_assumptions?: Record<string, unknown>;
|
|
167
168
|
stop_condition?: string;
|
package/package.json
CHANGED
package/runtime/lib/author.py
CHANGED
|
@@ -53,6 +53,10 @@ def sanitize_rationale(value):
|
|
|
53
53
|
return out[:6]
|
|
54
54
|
|
|
55
55
|
|
|
56
|
+
def optional_record(value):
|
|
57
|
+
return value if isinstance(value, dict) else {}
|
|
58
|
+
|
|
59
|
+
|
|
56
60
|
def recon_baseline_understanding(state):
|
|
57
61
|
assessment = state.get('recon_assessment') or {}
|
|
58
62
|
understanding = assessment.get('baseline_understanding') or state.get('recon_baseline_understanding') or {}
|
|
@@ -145,12 +149,15 @@ def author_request_payload(state, reference, baselines, current_plan, hypothesis
|
|
|
145
149
|
'capture_script': fallback_capture_script,
|
|
146
150
|
'proof_plan': fallback_proof_plan,
|
|
147
151
|
},
|
|
152
|
+
'interaction_contract': optional_record(state.get('interaction_contract')),
|
|
153
|
+
'proof_contract': optional_record(state.get('proof_contract')),
|
|
148
154
|
'instructions': [
|
|
149
155
|
'The supervising agent owns proof authoring. Use the recon-confirmed route and baselines instead of inventing a new context.',
|
|
150
156
|
'Treat baseline_understanding as the required before-state review. The proof plan must name the observed before state, requested delta, and stop condition.',
|
|
151
157
|
'Return the authored packet via author_packet_json when possible. You may also set proof_plan, capture_script, server_path, and wait_for_selector directly.',
|
|
152
158
|
'Keep capture_script concise Playwright statements.',
|
|
153
159
|
'For visual/UI proof, include saveScreenshot(\'after-proof\') exactly once.',
|
|
160
|
+
'For interaction proof, preserve the interaction contract and name the expected terminal route/state separately from the initial route.',
|
|
154
161
|
'For playable/gameplay proof, start the experience, send keyboard or pointer input, sample state before/after, measure non-HUD playfield/canvas pixel deltas across time, and return a JSON-serializable evidence object with playability or playability_evidence version riddle-proof.playability.v1.',
|
|
155
162
|
'For data/audio/log/metric/custom proof, screenshots are optional; collect measurements inside page.evaluate, assign the result to an evidence variable, and return that evidence object from capture_script.',
|
|
156
163
|
'Do not assign globalThis.__riddleProofEvidence, window.__riddleProofEvidence, or self.__riddleProofEvidence in the worker context. Avoid global evidence assignment unless it is inside page.evaluate for compatibility with older packets.',
|
|
@@ -176,6 +183,13 @@ def author_request_payload(state, reference, baselines, current_plan, hypothesis
|
|
|
176
183
|
'server_path': 'string',
|
|
177
184
|
'wait_for_selector': 'string',
|
|
178
185
|
'reference': 'string',
|
|
186
|
+
'expected_terminal_path': 'string',
|
|
187
|
+
},
|
|
188
|
+
'interaction_contract': {
|
|
189
|
+
'start_path': 'string',
|
|
190
|
+
'expected_terminal_path': 'string',
|
|
191
|
+
'expected_terminal_state': 'object',
|
|
192
|
+
'actions': ['string'],
|
|
179
193
|
},
|
|
180
194
|
'rationale': ['string'],
|
|
181
195
|
'confidence': 'high | medium | low',
|
|
@@ -215,6 +229,8 @@ provided_payload = {
|
|
|
215
229
|
'capture_script': first_non_empty(supervisor_packet.get('capture_script'), s.get('capture_script')),
|
|
216
230
|
'baseline_understanding_used': supervisor_packet.get('baseline_understanding_used') or recon_baseline_understanding(s),
|
|
217
231
|
'refined_inputs': supervisor_packet.get('refined_inputs') or {},
|
|
232
|
+
'interaction_contract': optional_record(supervisor_packet.get('interaction_contract') or supervisor_packet.get('interactionContract') or s.get('interaction_contract')),
|
|
233
|
+
'proof_contract': optional_record(supervisor_packet.get('proof_contract') or supervisor_packet.get('proofContract') or s.get('proof_contract')),
|
|
218
234
|
'rationale': supervisor_packet.get('rationale', s.get('supervisor_author_rationale', [])),
|
|
219
235
|
'confidence': first_non_empty(supervisor_packet.get('confidence'), s.get('supervisor_author_confidence'), 'medium').lower(),
|
|
220
236
|
'summary': first_non_empty(supervisor_packet.get('summary'), s.get('supervisor_author_summary')),
|
|
@@ -260,6 +276,13 @@ refined = provided_payload['refined_inputs'] if isinstance(provided_payload['ref
|
|
|
260
276
|
refined_path = normalize_path(first_non_empty(refined.get('server_path'), s.get('server_path'), default_path)) or '/'
|
|
261
277
|
refined_selector = first_non_empty(refined.get('wait_for_selector'), s.get('wait_for_selector'), default_selector)
|
|
262
278
|
refined_reference = first_non_empty(refined.get('reference'), reference) or reference
|
|
279
|
+
expected_terminal_path = normalize_path(first_non_empty(
|
|
280
|
+
refined.get('expected_terminal_path'),
|
|
281
|
+
refined.get('expected_after_path'),
|
|
282
|
+
supervisor_packet.get('expected_terminal_path'),
|
|
283
|
+
supervisor_packet.get('expected_after_path'),
|
|
284
|
+
s.get('expected_terminal_path'),
|
|
285
|
+
))
|
|
263
286
|
confidence = provided_payload['confidence'] if provided_payload['confidence'] in ('high', 'medium', 'low') else 'medium'
|
|
264
287
|
rationale = sanitize_rationale(provided_payload['rationale'])
|
|
265
288
|
summary = provided_payload['summary'] or 'Supervising agent supplied the proof packet from recon observations.'
|
|
@@ -272,7 +295,10 @@ authored_packet = {
|
|
|
272
295
|
'server_path': refined_path,
|
|
273
296
|
'wait_for_selector': refined_selector,
|
|
274
297
|
'reference': refined_reference,
|
|
298
|
+
'expected_terminal_path': expected_terminal_path,
|
|
275
299
|
},
|
|
300
|
+
'interaction_contract': provided_payload['interaction_contract'],
|
|
301
|
+
'proof_contract': provided_payload['proof_contract'],
|
|
276
302
|
'rationale': rationale,
|
|
277
303
|
'confidence': confidence,
|
|
278
304
|
'mode': 'supervising_agent',
|
|
@@ -281,6 +307,12 @@ authored_packet = {
|
|
|
281
307
|
}
|
|
282
308
|
|
|
283
309
|
s['server_path'] = refined_path
|
|
310
|
+
if expected_terminal_path:
|
|
311
|
+
s['expected_terminal_path'] = expected_terminal_path
|
|
312
|
+
if authored_packet['interaction_contract']:
|
|
313
|
+
s['interaction_contract'] = authored_packet['interaction_contract']
|
|
314
|
+
if authored_packet['proof_contract']:
|
|
315
|
+
s['proof_contract'] = authored_packet['proof_contract']
|
|
284
316
|
if refined_selector:
|
|
285
317
|
s['wait_for_selector'] = refined_selector
|
|
286
318
|
elif s.get('wait_for_selector'):
|
package/runtime/lib/setup.py
CHANGED
|
@@ -563,6 +563,9 @@ if remote_audit_mode():
|
|
|
563
563
|
s['author_status'] = 'ready'
|
|
564
564
|
s['proof_plan_status'] = 'ready'
|
|
565
565
|
s['proof_plan'] = (s.get('proof_plan') or 'Audit the current prod_url target and capture current evidence without requiring a repo diff.').strip()
|
|
566
|
+
if not (s.get('capture_script') or '').strip():
|
|
567
|
+
s['capture_script'] = 'await page.waitForTimeout(1500);'
|
|
568
|
+
s['capture_script_source'] = 'default_remote_audit_current_target'
|
|
566
569
|
s['dependency_install'] = {
|
|
567
570
|
'shared': 'skipped:remote_audit',
|
|
568
571
|
'before': 'skipped:remote_audit',
|
package/runtime/lib/verify.py
CHANGED
|
@@ -55,6 +55,7 @@ VISUAL_FIRST_MODES = {
|
|
|
55
55
|
'visual', 'render', 'interaction', 'ui', 'layout', 'screenshot',
|
|
56
56
|
'canvas', 'animation',
|
|
57
57
|
}
|
|
58
|
+
INTERACTION_MODES = {'interaction', 'interactive', 'user_flow', 'user-flow', 'workflow'}
|
|
58
59
|
PLAYABILITY_MODES = {'playable', 'gameplay', 'game'}
|
|
59
60
|
PROOF_EVIDENCE_REQUIRED_MODES = {'audio'}
|
|
60
61
|
MIN_VISUAL_DELTA_PERCENT = 0.5
|
|
@@ -1891,6 +1892,179 @@ def route_matches_expected(expected_path, observed_path):
|
|
|
1891
1892
|
return True
|
|
1892
1893
|
|
|
1893
1894
|
|
|
1895
|
+
EXPLICIT_TERMINAL_PATH_KEYS = (
|
|
1896
|
+
'expected_terminal_path', 'expectedTerminalPath',
|
|
1897
|
+
'expected_terminal_route', 'expectedTerminalRoute',
|
|
1898
|
+
'terminal_path', 'terminalPath',
|
|
1899
|
+
'terminal_route', 'terminalRoute',
|
|
1900
|
+
'expected_after_path', 'expectedAfterPath',
|
|
1901
|
+
'expected_after_route', 'expectedAfterRoute',
|
|
1902
|
+
'after_path', 'afterPath',
|
|
1903
|
+
'after_route', 'afterRoute',
|
|
1904
|
+
'expected_final_path', 'expectedFinalPath',
|
|
1905
|
+
'expected_final_route', 'expectedFinalRoute',
|
|
1906
|
+
'final_path', 'finalPath',
|
|
1907
|
+
'final_route', 'finalRoute',
|
|
1908
|
+
)
|
|
1909
|
+
LOCATION_PATH_KEYS = ('path', 'pathname', 'route', 'url', 'href')
|
|
1910
|
+
AFTER_STATE_KEYS = (
|
|
1911
|
+
'after', 'after_state', 'afterState',
|
|
1912
|
+
'expected_after', 'expectedAfter',
|
|
1913
|
+
'terminal', 'terminal_state', 'terminalState',
|
|
1914
|
+
'expected_terminal', 'expectedTerminal',
|
|
1915
|
+
'final', 'final_state', 'finalState',
|
|
1916
|
+
'expected_final', 'expectedFinal',
|
|
1917
|
+
)
|
|
1918
|
+
CONTRACT_STATE_KEYS = (
|
|
1919
|
+
'interaction_contract', 'interactionContract',
|
|
1920
|
+
'proof_contract', 'proofContract',
|
|
1921
|
+
'contract',
|
|
1922
|
+
'route_assumptions', 'routeAssumptions',
|
|
1923
|
+
'refined_inputs', 'refinedInputs',
|
|
1924
|
+
'assertions', 'checks',
|
|
1925
|
+
)
|
|
1926
|
+
|
|
1927
|
+
|
|
1928
|
+
def path_candidate(value):
|
|
1929
|
+
if not isinstance(value, str):
|
|
1930
|
+
return ''
|
|
1931
|
+
raw = value.strip()
|
|
1932
|
+
if not raw:
|
|
1933
|
+
return ''
|
|
1934
|
+
if raw.startswith(('http://', 'https://', '/', '?')):
|
|
1935
|
+
return normalize_observed_path(raw)
|
|
1936
|
+
return ''
|
|
1937
|
+
|
|
1938
|
+
|
|
1939
|
+
def record_path_candidate(record, allow_location_keys=False):
|
|
1940
|
+
if not isinstance(record, dict):
|
|
1941
|
+
return ''
|
|
1942
|
+
keys = list(EXPLICIT_TERMINAL_PATH_KEYS)
|
|
1943
|
+
if allow_location_keys:
|
|
1944
|
+
keys.extend(LOCATION_PATH_KEYS)
|
|
1945
|
+
for key in keys:
|
|
1946
|
+
candidate = path_candidate(record.get(key))
|
|
1947
|
+
if candidate:
|
|
1948
|
+
return candidate
|
|
1949
|
+
return ''
|
|
1950
|
+
|
|
1951
|
+
|
|
1952
|
+
def terminal_path_from_record(record, depth=0):
|
|
1953
|
+
if not isinstance(record, dict) or depth > 4:
|
|
1954
|
+
return ''
|
|
1955
|
+
candidate = record_path_candidate(record)
|
|
1956
|
+
if candidate:
|
|
1957
|
+
return candidate
|
|
1958
|
+
for key in AFTER_STATE_KEYS:
|
|
1959
|
+
value = record.get(key)
|
|
1960
|
+
if isinstance(value, dict):
|
|
1961
|
+
candidate = record_path_candidate(value, allow_location_keys=True) or terminal_path_from_record(value, depth + 1)
|
|
1962
|
+
if candidate:
|
|
1963
|
+
return candidate
|
|
1964
|
+
elif isinstance(value, list):
|
|
1965
|
+
for item in value:
|
|
1966
|
+
candidate = terminal_path_from_record(item, depth + 1)
|
|
1967
|
+
if candidate:
|
|
1968
|
+
return candidate
|
|
1969
|
+
for key in CONTRACT_STATE_KEYS:
|
|
1970
|
+
value = record.get(key)
|
|
1971
|
+
if isinstance(value, dict):
|
|
1972
|
+
candidate = terminal_path_from_record(value, depth + 1)
|
|
1973
|
+
if candidate:
|
|
1974
|
+
return candidate
|
|
1975
|
+
elif isinstance(value, list):
|
|
1976
|
+
for item in value:
|
|
1977
|
+
candidate = terminal_path_from_record(item, depth + 1)
|
|
1978
|
+
if candidate:
|
|
1979
|
+
return candidate
|
|
1980
|
+
return ''
|
|
1981
|
+
|
|
1982
|
+
|
|
1983
|
+
def interaction_assertions_pass(value):
|
|
1984
|
+
for record in proof_evidence_records(value):
|
|
1985
|
+
if any(record.get(key) is False for key in (
|
|
1986
|
+
'passed', 'ok', 'proofReady', 'proof_ready', 'routeMatches', 'route_matches',
|
|
1987
|
+
)):
|
|
1988
|
+
return False
|
|
1989
|
+
if any(record.get(key) is True for key in (
|
|
1990
|
+
'passed', 'ok', 'proofReady', 'proof_ready', 'interactionPassed', 'interaction_passed',
|
|
1991
|
+
)):
|
|
1992
|
+
return True
|
|
1993
|
+
for key in ('assertions', 'checks', 'predicates', 'expectations'):
|
|
1994
|
+
checks = record.get(key)
|
|
1995
|
+
if isinstance(checks, dict):
|
|
1996
|
+
bools = [value for value in checks.values() if isinstance(value, bool)]
|
|
1997
|
+
if bools:
|
|
1998
|
+
return all(bools)
|
|
1999
|
+
elif isinstance(checks, list):
|
|
2000
|
+
bools = []
|
|
2001
|
+
for item in checks:
|
|
2002
|
+
if isinstance(item, bool):
|
|
2003
|
+
bools.append(item)
|
|
2004
|
+
elif isinstance(item, dict):
|
|
2005
|
+
for flag_key in ('passed', 'ok', 'valid'):
|
|
2006
|
+
if isinstance(item.get(flag_key), bool):
|
|
2007
|
+
bools.append(item.get(flag_key))
|
|
2008
|
+
break
|
|
2009
|
+
if bools:
|
|
2010
|
+
return all(bools)
|
|
2011
|
+
return False
|
|
2012
|
+
|
|
2013
|
+
|
|
2014
|
+
def interaction_terminal_path_from_evidence(proof_evidence):
|
|
2015
|
+
for record in proof_evidence_records(proof_evidence):
|
|
2016
|
+
candidate = terminal_path_from_record(record)
|
|
2017
|
+
if candidate:
|
|
2018
|
+
return candidate, 'proof_evidence_contract'
|
|
2019
|
+
if interaction_assertions_pass(proof_evidence):
|
|
2020
|
+
for record in proof_evidence_records(proof_evidence):
|
|
2021
|
+
for key in AFTER_STATE_KEYS:
|
|
2022
|
+
value = record.get(key)
|
|
2023
|
+
if isinstance(value, dict):
|
|
2024
|
+
candidate = record_path_candidate(value, allow_location_keys=True)
|
|
2025
|
+
if candidate:
|
|
2026
|
+
return candidate, 'proof_evidence_after_state'
|
|
2027
|
+
return '', ''
|
|
2028
|
+
|
|
2029
|
+
|
|
2030
|
+
def interaction_terminal_path_from_state(state):
|
|
2031
|
+
for key in (
|
|
2032
|
+
'interaction_contract',
|
|
2033
|
+
'proof_contract',
|
|
2034
|
+
'supervisor_author_packet',
|
|
2035
|
+
'author_packet',
|
|
2036
|
+
'author_request',
|
|
2037
|
+
'proof_plan_request',
|
|
2038
|
+
):
|
|
2039
|
+
candidate = terminal_path_from_record(state.get(key))
|
|
2040
|
+
if candidate:
|
|
2041
|
+
return candidate, key
|
|
2042
|
+
return '', ''
|
|
2043
|
+
|
|
2044
|
+
|
|
2045
|
+
def expected_path_for_verify(state, start_path, proof_evidence):
|
|
2046
|
+
mode = normalized_verification_mode(state.get('verification_mode'))
|
|
2047
|
+
normalized_start = normalize_observed_path(start_path) or '/'
|
|
2048
|
+
if mode not in INTERACTION_MODES:
|
|
2049
|
+
return normalized_start, {
|
|
2050
|
+
'mode': mode,
|
|
2051
|
+
'source': 'recon_start_path',
|
|
2052
|
+
'start_path': normalized_start,
|
|
2053
|
+
'expected_path': normalized_start,
|
|
2054
|
+
}
|
|
2055
|
+
candidate, source = interaction_terminal_path_from_state(state)
|
|
2056
|
+
if not candidate:
|
|
2057
|
+
candidate, source = interaction_terminal_path_from_evidence(proof_evidence)
|
|
2058
|
+
expected = candidate or normalized_start
|
|
2059
|
+
return expected, {
|
|
2060
|
+
'mode': mode,
|
|
2061
|
+
'source': source or 'recon_start_path',
|
|
2062
|
+
'start_path': normalized_start,
|
|
2063
|
+
'expected_path': expected,
|
|
2064
|
+
'terminal_path': expected if expected != normalized_start else '',
|
|
2065
|
+
}
|
|
2066
|
+
|
|
2067
|
+
|
|
1894
2068
|
def collect_supporting_artifacts(payload):
|
|
1895
2069
|
payload = enrich_capture_payload(payload)
|
|
1896
2070
|
outputs = payload.get('outputs') or []
|
|
@@ -2063,6 +2237,7 @@ def evaluate_capture_quality(payload, expected_path, verification_mode='proof'):
|
|
|
2063
2237
|
screenshot_required = screenshot_required_for_mode(mode)
|
|
2064
2238
|
details = {
|
|
2065
2239
|
'verification_mode': mode,
|
|
2240
|
+
'expected_path': normalize_observed_path(expected_path),
|
|
2066
2241
|
'capture_tool_error': capture_payload_error(payload),
|
|
2067
2242
|
'has_screenshot': False,
|
|
2068
2243
|
'screenshot_required': screenshot_required,
|
|
@@ -2138,7 +2313,12 @@ def evaluate_capture_quality(payload, expected_path, verification_mode='proof'):
|
|
|
2138
2313
|
for text in iter_console_messages(console):
|
|
2139
2314
|
if is_proof_telemetry_console_message(text):
|
|
2140
2315
|
continue
|
|
2141
|
-
if isinstance(text, str) and (
|
|
2316
|
+
if isinstance(text, str) and (
|
|
2317
|
+
'error' in text.lower()
|
|
2318
|
+
or 'failed' in text.lower()
|
|
2319
|
+
or 'timeout' in text.lower()
|
|
2320
|
+
or 'timed out' in text.lower()
|
|
2321
|
+
):
|
|
2142
2322
|
details['has_errors'] = True
|
|
2143
2323
|
if len(details['capture_error_messages']) < 3:
|
|
2144
2324
|
details['capture_error_messages'].append(text[:500])
|
|
@@ -2231,16 +2411,41 @@ def build_capture_retry_decision(after_observation, required_baseline_present, p
|
|
|
2231
2411
|
reason = after_observation.get('reason') or 'after capture is not usable yet'
|
|
2232
2412
|
reasons.append('The after evidence is not usable yet: ' + reason)
|
|
2233
2413
|
recommended_stage = 'recon' if 'wrong route' in reason else 'author'
|
|
2414
|
+
details = after_observation.get('details') if isinstance(after_observation.get('details'), dict) else {}
|
|
2415
|
+
error_messages = [
|
|
2416
|
+
str(item).strip()
|
|
2417
|
+
for item in (details.get('capture_error_messages') or [])
|
|
2418
|
+
if str(item).strip()
|
|
2419
|
+
]
|
|
2420
|
+
mismatch = None
|
|
2234
2421
|
if recommended_stage == 'recon':
|
|
2422
|
+
expected = details.get('expected_path') or ''
|
|
2423
|
+
observed = details.get('observed_path_raw') or details.get('observed_path') or ''
|
|
2424
|
+
if expected or observed:
|
|
2425
|
+
mismatch = {
|
|
2426
|
+
'field': 'route',
|
|
2427
|
+
'expected_path': expected,
|
|
2428
|
+
'observed_after_path': observed,
|
|
2429
|
+
}
|
|
2430
|
+
reasons.append('Route mismatch: expected after capture path ' + (expected or '(unknown)') + ', observed ' + (observed or '(unknown)') + '.')
|
|
2431
|
+
summary = 'Verify capture route mismatch: expected ' + (expected or '(unknown)') + ', got ' + (observed or '(unknown)') + '.'
|
|
2432
|
+
else:
|
|
2433
|
+
summary = 'Verify capture route mismatch needs recon to refresh the reference path.'
|
|
2235
2434
|
reasons.append('The capture appears to be on the wrong route or baseline context, so recon should refresh the reference path.')
|
|
2236
2435
|
else:
|
|
2436
|
+
if error_messages:
|
|
2437
|
+
reasons.append('Capture script error: ' + error_messages[0][:500])
|
|
2438
|
+
summary = 'Verify capture script failed: ' + error_messages[0][:300]
|
|
2439
|
+
else:
|
|
2440
|
+
summary = 'Verify needs another internal capture iteration before the evidence can be judged.'
|
|
2237
2441
|
reasons.append('The capture plan itself needs revision, so author should tighten the proof script or framing inputs.')
|
|
2238
2442
|
return {
|
|
2239
2443
|
'decision': 'revise_capture',
|
|
2240
|
-
'summary':
|
|
2444
|
+
'summary': summary,
|
|
2241
2445
|
'recommended_stage': recommended_stage,
|
|
2242
2446
|
'continue_with_stage': recommended_stage,
|
|
2243
2447
|
'reasons': reasons,
|
|
2448
|
+
'mismatch': mismatch,
|
|
2244
2449
|
}
|
|
2245
2450
|
|
|
2246
2451
|
|
|
@@ -2321,13 +2526,21 @@ def build_semantic_context(state, results, after_observation, expected_path):
|
|
|
2321
2526
|
before_semantic = semantic_observation('before', before.get('observation') or {})
|
|
2322
2527
|
prod_semantic = semantic_observation('prod', prod.get('observation') or {})
|
|
2323
2528
|
after_semantic = semantic_observation('after', after_observation)
|
|
2529
|
+
expected_start_path = state.get('expected_start_path') or expected_path
|
|
2530
|
+
route_expectation = state.get('route_expectation') if isinstance(state.get('route_expectation'), dict) else {}
|
|
2324
2531
|
return {
|
|
2325
2532
|
'expected_path': expected_path,
|
|
2533
|
+
'expected_start_path': expected_start_path,
|
|
2534
|
+
'route_expectation': route_expectation,
|
|
2326
2535
|
'reference': state.get('requested_reference') or state.get('reference', 'both'),
|
|
2327
2536
|
'requested_change': state.get('change_request', ''),
|
|
2328
2537
|
'success_criteria': (state.get('success_criteria') or '').strip(),
|
|
2329
2538
|
'route': {
|
|
2330
2539
|
'expected_path': expected_path,
|
|
2540
|
+
'expected_after_path': expected_path,
|
|
2541
|
+
'expected_start_path': expected_start_path,
|
|
2542
|
+
'expected_terminal_path': route_expectation.get('terminal_path') or '',
|
|
2543
|
+
'expectation_source': route_expectation.get('source') or '',
|
|
2331
2544
|
'before_observed_path': before_semantic.get('observed_path') or before.get('path') or '',
|
|
2332
2545
|
'prod_observed_path': prod_semantic.get('observed_path') or prod.get('path') or '',
|
|
2333
2546
|
'after_observed_path': after_semantic.get('observed_path') or '',
|
|
@@ -2405,6 +2618,8 @@ def build_evidence_bundle(state, results, after_payload, after_observation, requ
|
|
|
2405
2618
|
'verification_mode': normalized_verification_mode(state.get('verification_mode')),
|
|
2406
2619
|
'reference': state.get('requested_reference') or state.get('reference', 'both'),
|
|
2407
2620
|
'expected_path': expected_path,
|
|
2621
|
+
'expected_start_path': state.get('expected_start_path') or expected_path,
|
|
2622
|
+
'route_expectation': state.get('route_expectation') or {},
|
|
2408
2623
|
'required_baseline_present': required_baseline_present,
|
|
2409
2624
|
'baseline': results.get('baseline') or {},
|
|
2410
2625
|
'semantic_context': semantic_context,
|
|
@@ -2518,6 +2733,8 @@ def build_supervisor_assessment_request(state, payload, after_observation, requi
|
|
|
2518
2733
|
'status': 'needs_supervising_agent_assessment',
|
|
2519
2734
|
'verification_mode': verification_mode,
|
|
2520
2735
|
'expected_path': expected_path,
|
|
2736
|
+
'expected_start_path': state.get('expected_start_path') or expected_path,
|
|
2737
|
+
'route_expectation': state.get('route_expectation') or {},
|
|
2521
2738
|
'required_baseline_present': required_baseline_present,
|
|
2522
2739
|
'after_observation': after_observation,
|
|
2523
2740
|
'supporting_artifacts': supporting,
|
|
@@ -2545,10 +2762,14 @@ def build_supervisor_assessment_request(state, payload, after_observation, requi
|
|
|
2545
2762
|
|
|
2546
2763
|
s = load_state()
|
|
2547
2764
|
capture_script = (s.get('capture_script') or '').strip()
|
|
2548
|
-
|
|
2765
|
+
no_implementation_mode = audit_no_diff_mode(s)
|
|
2766
|
+
if not capture_script and no_implementation_mode:
|
|
2767
|
+
capture_script = 'await page.waitForTimeout(1500);'
|
|
2768
|
+
s['capture_script'] = capture_script
|
|
2769
|
+
s['capture_script_source'] = s.get('capture_script_source') or 'default_remote_audit_current_target'
|
|
2770
|
+
elif not capture_script:
|
|
2549
2771
|
raise SystemExit('capture_script not set in state. Recon should finish homework first, then verify should receive the real capture plan.')
|
|
2550
2772
|
|
|
2551
|
-
no_implementation_mode = audit_no_diff_mode(s)
|
|
2552
2773
|
if not implementation_ready_for_verify(s):
|
|
2553
2774
|
raise SystemExit('Implementation not recorded. Make the code changes and run riddle-proof-implement before verify.')
|
|
2554
2775
|
if no_implementation_mode and s.get('implementation_status') != 'not_required':
|
|
@@ -2568,15 +2789,17 @@ if not no_implementation_mode and (not after_dir or not os.path.exists(after_dir
|
|
|
2568
2789
|
|
|
2569
2790
|
build_cmd = s.get('build_command', 'npm run build')
|
|
2570
2791
|
recon_baselines = ((s.get('recon_results') or {}).get('baselines') or {})
|
|
2571
|
-
|
|
2792
|
+
capture_start_path = (
|
|
2572
2793
|
(recon_baselines.get('before') or {}).get('path')
|
|
2573
2794
|
or (recon_baselines.get('prod') or {}).get('path')
|
|
2574
2795
|
or ((s.get('recon_hypothesis') or {}).get('target_path'))
|
|
2575
2796
|
or s.get('server_path')
|
|
2576
2797
|
or '/'
|
|
2577
2798
|
)
|
|
2799
|
+
expected_path = capture_start_path
|
|
2800
|
+
s['expected_start_path'] = normalize_observed_path(capture_start_path) or '/'
|
|
2578
2801
|
verification_mode = normalized_verification_mode(s.get('verification_mode'))
|
|
2579
|
-
proof_session_seed = capture_proof_session_seed(s,
|
|
2802
|
+
proof_session_seed = capture_proof_session_seed(s, s['expected_start_path'])
|
|
2580
2803
|
probe_capture_script = build_probe_capture_script(capture_script, verification_mode, proof_session_seed, s.get('viewport_matrix'))
|
|
2581
2804
|
results = {
|
|
2582
2805
|
'baseline': {
|
|
@@ -2751,6 +2974,13 @@ else:
|
|
|
2751
2974
|
results['after'] = {'screenshots': [{'url': capture.get('url', '')}] if capture.get('url') else [], 'raw': capture.get('raw')}
|
|
2752
2975
|
s['after_cdn'] = capture.get('url', '')
|
|
2753
2976
|
|
|
2977
|
+
proof_evidence_for_route = extract_proof_evidence(after_payload)
|
|
2978
|
+
expected_path, route_expectation = expected_path_for_verify(s, s.get('expected_start_path') or capture_start_path, proof_evidence_for_route)
|
|
2979
|
+
s['route_expectation'] = route_expectation
|
|
2980
|
+
s['expected_path'] = expected_path
|
|
2981
|
+
if route_expectation.get('terminal_path'):
|
|
2982
|
+
s['expected_terminal_path'] = route_expectation.get('terminal_path')
|
|
2983
|
+
|
|
2754
2984
|
after_viewport_matrix = capture_viewport_matrix_status(s, after_payload, 'after-proof')
|
|
2755
2985
|
after_observation = evaluate_capture_quality(after_payload, expected_path, verification_mode)
|
|
2756
2986
|
details = after_observation.get('details') if isinstance(after_observation.get('details'), dict) else {}
|
|
@@ -2815,7 +3045,12 @@ if existing_prod:
|
|
|
2815
3045
|
summary_lines.append('After screenshot: ' + (s.get('after_cdn') or '(none)'))
|
|
2816
3046
|
if after_viewport_matrix.get('status') not in ('not_requested', ''):
|
|
2817
3047
|
summary_lines.append('Viewport matrix: ' + after_viewport_matrix.get('status', 'unknown') + ' (' + str(len(after_viewport_matrix.get('executed') or [])) + '/' + str(len(after_viewport_matrix.get('requested') or [])) + ' captured)')
|
|
2818
|
-
|
|
3048
|
+
expected_start_path = s.get('expected_start_path') or expected_path
|
|
3049
|
+
if expected_start_path and expected_start_path != expected_path:
|
|
3050
|
+
summary_lines.append('Expected start path from recon: ' + expected_start_path)
|
|
3051
|
+
summary_lines.append('Expected terminal proof path: ' + expected_path)
|
|
3052
|
+
else:
|
|
3053
|
+
summary_lines.append('Expected proof path from recon: ' + expected_path)
|
|
2819
3054
|
summary_lines.append('After observation: ' + after_observation['reason'])
|
|
2820
3055
|
supporting = results['after'].get('supporting_artifacts') or {}
|
|
2821
3056
|
if supporting.get('has_structured_payload'):
|
|
@@ -2901,7 +3136,7 @@ has_good_evidence = (
|
|
|
2901
3136
|
if has_good_evidence:
|
|
2902
3137
|
s['capture_hint_saved'] = record_successful_capture_hint(
|
|
2903
3138
|
s,
|
|
2904
|
-
server_path=expected_path or s.get('server_path') or '/',
|
|
3139
|
+
server_path=s.get('expected_start_path') or expected_path or s.get('server_path') or '/',
|
|
2905
3140
|
wait_for_selector=s.get('wait_for_selector') or '',
|
|
2906
3141
|
observed_path=observed_path,
|
|
2907
3142
|
source_stage='verify',
|
|
@@ -2923,6 +3158,8 @@ if has_good_evidence:
|
|
|
2923
3158
|
'status': s['verify_status'],
|
|
2924
3159
|
'summary': 'Verify captured usable evidence and is waiting for supervising-agent proof assessment.',
|
|
2925
3160
|
'expected_path': expected_path,
|
|
3161
|
+
'expected_start_path': s.get('expected_start_path') or expected_path,
|
|
3162
|
+
'route_expectation': s.get('route_expectation') or {},
|
|
2926
3163
|
'latest_observation': after_observation,
|
|
2927
3164
|
'next_stage_options': next_stage_options,
|
|
2928
3165
|
'recommended_stage': None,
|
|
@@ -2949,6 +3186,8 @@ else:
|
|
|
2949
3186
|
'status': s['verify_status'],
|
|
2950
3187
|
'summary': capture_retry['summary'],
|
|
2951
3188
|
'expected_path': expected_path,
|
|
3189
|
+
'expected_start_path': s.get('expected_start_path') or expected_path,
|
|
3190
|
+
'route_expectation': s.get('route_expectation') or {},
|
|
2952
3191
|
'latest_observation': after_observation,
|
|
2953
3192
|
'capture_quality': capture_retry,
|
|
2954
3193
|
'next_stage_options': next_stage_options,
|