@riddledc/riddle-proof 0.8.7 → 0.8.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/advanced/index.d.cts +1 -1
- package/dist/advanced/index.d.ts +1 -1
- package/dist/advanced/proof-run-engine.d.cts +1 -1
- package/dist/advanced/proof-run-engine.d.ts +1 -1
- package/dist/{proof-run-engine-B7DCPzpK.d.cts → proof-run-engine-BlocjMni.d.cts} +3 -3
- package/dist/{proof-run-engine-BomAcXhA.d.ts → proof-run-engine-C_m8WJmX.d.ts} +3 -3
- package/dist/proof-run-engine.d.cts +1 -1
- package/dist/proof-run-engine.d.ts +1 -1
- package/package.json +1 -1
- package/runtime/lib/verify.py +204 -5
- package/runtime/tests/recon_verify_smoke.py +19 -12
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
export { b as runner } from '../runner-4LJ5z0D-.cjs';
|
|
2
2
|
export { l as engineHarness } from '../engine-harness-LBfqbFSe.cjs';
|
|
3
3
|
export { p as proofRunCore } from '../proof-run-core-CE0jx7wL.cjs';
|
|
4
|
-
export { p as proofRunEngine } from '../proof-run-engine-
|
|
4
|
+
export { p as proofRunEngine } from '../proof-run-engine-BlocjMni.cjs';
|
|
5
5
|
import '../types.cjs';
|
package/dist/advanced/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
export { b as runner } from '../runner-BdQpOkZD.js';
|
|
2
2
|
export { l as engineHarness } from '../engine-harness-CMACHP6A.js';
|
|
3
3
|
export { p as proofRunCore } from '../proof-run-core-CE0jx7wL.js';
|
|
4
|
-
export { p as proofRunEngine } from '../proof-run-engine-
|
|
4
|
+
export { p as proofRunEngine } from '../proof-run-engine-C_m8WJmX.js';
|
|
5
5
|
import '../types.js';
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from '../proof-run-engine-
|
|
1
|
+
export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from '../proof-run-engine-BlocjMni.cjs';
|
|
2
2
|
import '../proof-run-core-CE0jx7wL.cjs';
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from '../proof-run-engine-
|
|
1
|
+
export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from '../proof-run-engine-C_m8WJmX.js';
|
|
2
2
|
import '../proof-run-core-CE0jx7wL.js';
|
|
@@ -292,7 +292,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
|
|
|
292
292
|
blocking?: boolean;
|
|
293
293
|
details?: Record<string, unknown>;
|
|
294
294
|
ok: boolean;
|
|
295
|
-
action: "
|
|
295
|
+
action: "recon" | "author" | "ship" | "implement" | "verify" | "setup" | "run";
|
|
296
296
|
state_path: string;
|
|
297
297
|
stage: any;
|
|
298
298
|
summary: string;
|
|
@@ -382,7 +382,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
|
|
|
382
382
|
continueWithStage?: WorkflowStage | null;
|
|
383
383
|
blocking?: boolean;
|
|
384
384
|
details?: Record<string, unknown>;
|
|
385
|
-
action: "
|
|
385
|
+
action: "recon" | "author" | "ship" | "implement" | "verify" | "setup" | "run";
|
|
386
386
|
state_path: string;
|
|
387
387
|
stage: any;
|
|
388
388
|
checkpoint: string;
|
|
@@ -659,7 +659,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
|
|
|
659
659
|
error?: undefined;
|
|
660
660
|
} | {
|
|
661
661
|
ok: boolean;
|
|
662
|
-
action: "
|
|
662
|
+
action: "recon" | "author" | "ship" | "implement" | "verify" | "setup";
|
|
663
663
|
state_path: string;
|
|
664
664
|
stage: any;
|
|
665
665
|
summary: string;
|
|
@@ -292,7 +292,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
|
|
|
292
292
|
blocking?: boolean;
|
|
293
293
|
details?: Record<string, unknown>;
|
|
294
294
|
ok: boolean;
|
|
295
|
-
action: "
|
|
295
|
+
action: "recon" | "author" | "ship" | "implement" | "verify" | "setup" | "run";
|
|
296
296
|
state_path: string;
|
|
297
297
|
stage: any;
|
|
298
298
|
summary: string;
|
|
@@ -382,7 +382,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
|
|
|
382
382
|
continueWithStage?: WorkflowStage | null;
|
|
383
383
|
blocking?: boolean;
|
|
384
384
|
details?: Record<string, unknown>;
|
|
385
|
-
action: "
|
|
385
|
+
action: "recon" | "author" | "ship" | "implement" | "verify" | "setup" | "run";
|
|
386
386
|
state_path: string;
|
|
387
387
|
stage: any;
|
|
388
388
|
checkpoint: string;
|
|
@@ -659,7 +659,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
|
|
|
659
659
|
error?: undefined;
|
|
660
660
|
} | {
|
|
661
661
|
ok: boolean;
|
|
662
|
-
action: "
|
|
662
|
+
action: "recon" | "author" | "ship" | "implement" | "verify" | "setup";
|
|
663
663
|
state_path: string;
|
|
664
664
|
stage: any;
|
|
665
665
|
summary: string;
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
import './proof-run-core-CE0jx7wL.cjs';
|
|
2
|
-
export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from './proof-run-engine-
|
|
2
|
+
export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from './proof-run-engine-BlocjMni.cjs';
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
import './proof-run-core-CE0jx7wL.js';
|
|
2
|
-
export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from './proof-run-engine-
|
|
2
|
+
export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from './proof-run-engine-C_m8WJmX.js';
|
package/package.json
CHANGED
package/runtime/lib/verify.py
CHANGED
|
@@ -2158,6 +2158,170 @@ def interaction_assertions_pass(value):
|
|
|
2158
2158
|
return False
|
|
2159
2159
|
|
|
2160
2160
|
|
|
2161
|
+
INTERACTION_ASSERTION_CONTAINER_KEYS = ('assertions', 'checks', 'predicates', 'expectations')
|
|
2162
|
+
INTERACTION_FAILURE_FLAG_KEYS = (
|
|
2163
|
+
'passed',
|
|
2164
|
+
'ok',
|
|
2165
|
+
'valid',
|
|
2166
|
+
'success',
|
|
2167
|
+
'proofReady',
|
|
2168
|
+
'proof_ready',
|
|
2169
|
+
'interactionPassed',
|
|
2170
|
+
'interaction_passed',
|
|
2171
|
+
'routeMatches',
|
|
2172
|
+
'route_matches',
|
|
2173
|
+
)
|
|
2174
|
+
INTERACTION_FAILURE_STATUS_VALUES = {'fail', 'failed', 'failure', 'error', 'errored', 'timeout', 'timed_out'}
|
|
2175
|
+
INTERACTION_ASSERTION_NAME_KEYS = ('name', 'id', 'key', 'label', 'assertion', 'check', 'field')
|
|
2176
|
+
INTERACTION_ROUTE_CONTEXT_KEYS = (
|
|
2177
|
+
'expected',
|
|
2178
|
+
'observed',
|
|
2179
|
+
'actual',
|
|
2180
|
+
'start',
|
|
2181
|
+
'before',
|
|
2182
|
+
'after',
|
|
2183
|
+
'terminal',
|
|
2184
|
+
'final',
|
|
2185
|
+
'expected_after',
|
|
2186
|
+
'expectedAfter',
|
|
2187
|
+
'expected_terminal',
|
|
2188
|
+
'expectedTerminal',
|
|
2189
|
+
'expected_final',
|
|
2190
|
+
'expectedFinal',
|
|
2191
|
+
)
|
|
2192
|
+
|
|
2193
|
+
|
|
2194
|
+
def failure_label(prefix, key):
|
|
2195
|
+
key = str(key or '').strip()
|
|
2196
|
+
prefix = str(prefix or '').strip()
|
|
2197
|
+
if prefix and key:
|
|
2198
|
+
return prefix + '.' + key
|
|
2199
|
+
return key or prefix or 'failed'
|
|
2200
|
+
|
|
2201
|
+
|
|
2202
|
+
def assertion_item_label(item, fallback):
|
|
2203
|
+
if isinstance(item, dict):
|
|
2204
|
+
for key in INTERACTION_ASSERTION_NAME_KEYS:
|
|
2205
|
+
value = str(item.get(key) or '').strip()
|
|
2206
|
+
if value:
|
|
2207
|
+
return value
|
|
2208
|
+
return fallback
|
|
2209
|
+
|
|
2210
|
+
|
|
2211
|
+
def collect_interaction_failed_assertions(value, prefix='', depth=0):
|
|
2212
|
+
if depth > 6:
|
|
2213
|
+
return []
|
|
2214
|
+
failures = []
|
|
2215
|
+
if isinstance(value, dict):
|
|
2216
|
+
for key in INTERACTION_FAILURE_FLAG_KEYS:
|
|
2217
|
+
if value.get(key) is False:
|
|
2218
|
+
failures.append(failure_label(prefix, key))
|
|
2219
|
+
status = str(value.get('status') or value.get('result') or '').strip().lower()
|
|
2220
|
+
if status in INTERACTION_FAILURE_STATUS_VALUES:
|
|
2221
|
+
failures.append(failure_label(prefix, assertion_item_label(value, 'status')))
|
|
2222
|
+
for key in INTERACTION_ASSERTION_CONTAINER_KEYS:
|
|
2223
|
+
checks = value.get(key)
|
|
2224
|
+
container_prefix = failure_label(prefix, key)
|
|
2225
|
+
if isinstance(checks, dict):
|
|
2226
|
+
for check_key, check_value in checks.items():
|
|
2227
|
+
if check_value is False:
|
|
2228
|
+
failures.append(failure_label(container_prefix, check_key))
|
|
2229
|
+
elif isinstance(check_value, dict):
|
|
2230
|
+
nested = collect_interaction_failed_assertions(
|
|
2231
|
+
check_value,
|
|
2232
|
+
failure_label(container_prefix, check_key),
|
|
2233
|
+
depth + 1,
|
|
2234
|
+
)
|
|
2235
|
+
failures.extend(nested)
|
|
2236
|
+
elif isinstance(check_value, list):
|
|
2237
|
+
failures.extend(collect_interaction_failed_assertions(
|
|
2238
|
+
check_value,
|
|
2239
|
+
failure_label(container_prefix, check_key),
|
|
2240
|
+
depth + 1,
|
|
2241
|
+
))
|
|
2242
|
+
elif isinstance(checks, list):
|
|
2243
|
+
for index, item in enumerate(checks):
|
|
2244
|
+
if item is False:
|
|
2245
|
+
failures.append(failure_label(container_prefix, str(index)))
|
|
2246
|
+
elif isinstance(item, dict):
|
|
2247
|
+
item_label = assertion_item_label(item, str(index))
|
|
2248
|
+
failures.extend(collect_interaction_failed_assertions(
|
|
2249
|
+
item,
|
|
2250
|
+
failure_label(container_prefix, item_label),
|
|
2251
|
+
depth + 1,
|
|
2252
|
+
))
|
|
2253
|
+
for key in EVIDENCE_CONTAINER_KEYS:
|
|
2254
|
+
nested = value.get(key)
|
|
2255
|
+
if isinstance(nested, (dict, list)):
|
|
2256
|
+
failures.extend(collect_interaction_failed_assertions(nested, failure_label(prefix, key), depth + 1))
|
|
2257
|
+
elif isinstance(value, list):
|
|
2258
|
+
for index, item in enumerate(value):
|
|
2259
|
+
if item is False:
|
|
2260
|
+
failures.append(failure_label(prefix, str(index)))
|
|
2261
|
+
elif isinstance(item, (dict, list)):
|
|
2262
|
+
failures.extend(collect_interaction_failed_assertions(item, prefix, depth + 1))
|
|
2263
|
+
deduped = []
|
|
2264
|
+
seen = set()
|
|
2265
|
+
for failure in failures:
|
|
2266
|
+
failure = str(failure or '').strip()
|
|
2267
|
+
if not failure or failure in seen:
|
|
2268
|
+
continue
|
|
2269
|
+
seen.add(failure)
|
|
2270
|
+
deduped.append(failure)
|
|
2271
|
+
return deduped
|
|
2272
|
+
|
|
2273
|
+
|
|
2274
|
+
def interaction_route_context_present(value, depth=0):
|
|
2275
|
+
if depth > 6:
|
|
2276
|
+
return False
|
|
2277
|
+
if isinstance(value, dict):
|
|
2278
|
+
if terminal_path_from_record(value):
|
|
2279
|
+
return True
|
|
2280
|
+
for key in INTERACTION_ROUTE_CONTEXT_KEYS:
|
|
2281
|
+
nested = value.get(key)
|
|
2282
|
+
if isinstance(nested, dict):
|
|
2283
|
+
if record_path_candidate(nested, allow_location_keys=True):
|
|
2284
|
+
return True
|
|
2285
|
+
query = str(nested.get('query') or nested.get('search') or '').strip()
|
|
2286
|
+
hash_value = str(nested.get('hash') or nested.get('fragment') or '').strip()
|
|
2287
|
+
if query or hash_value:
|
|
2288
|
+
return True
|
|
2289
|
+
if interaction_route_context_present(nested, depth + 1):
|
|
2290
|
+
return True
|
|
2291
|
+
elif isinstance(nested, str) and path_candidate(nested):
|
|
2292
|
+
return True
|
|
2293
|
+
for key in EVIDENCE_CONTAINER_KEYS:
|
|
2294
|
+
nested = value.get(key)
|
|
2295
|
+
if isinstance(nested, (dict, list)) and interaction_route_context_present(nested, depth + 1):
|
|
2296
|
+
return True
|
|
2297
|
+
elif isinstance(value, list):
|
|
2298
|
+
return any(interaction_route_context_present(item, depth + 1) for item in value)
|
|
2299
|
+
return False
|
|
2300
|
+
|
|
2301
|
+
|
|
2302
|
+
def failed_interaction_evidence_summary(proof_evidence):
|
|
2303
|
+
failures = []
|
|
2304
|
+
for record in proof_evidence_records(proof_evidence):
|
|
2305
|
+
failures.extend(collect_interaction_failed_assertions(record))
|
|
2306
|
+
deduped = []
|
|
2307
|
+
seen = set()
|
|
2308
|
+
for failure in failures:
|
|
2309
|
+
if failure not in seen:
|
|
2310
|
+
seen.add(failure)
|
|
2311
|
+
deduped.append(failure)
|
|
2312
|
+
if not deduped or not interaction_route_context_present(proof_evidence):
|
|
2313
|
+
return ''
|
|
2314
|
+
summary = 'Structured interaction proof evidence captured failed assertion(s): ' + ', '.join(deduped[:8]) + '.'
|
|
2315
|
+
capture_errors = []
|
|
2316
|
+
for record in proof_evidence_records(proof_evidence):
|
|
2317
|
+
error = str(record.get('capture_error') or record.get('error') or '').strip()
|
|
2318
|
+
if error:
|
|
2319
|
+
capture_errors.append(error)
|
|
2320
|
+
if capture_errors:
|
|
2321
|
+
summary += ' Capture script error: ' + capture_errors[0][:300]
|
|
2322
|
+
return summary
|
|
2323
|
+
|
|
2324
|
+
|
|
2161
2325
|
def interaction_terminal_path_from_evidence(proof_evidence):
|
|
2162
2326
|
for record in proof_evidence_records(proof_evidence):
|
|
2163
2327
|
candidate = terminal_path_from_record(record)
|
|
@@ -2903,6 +3067,9 @@ def build_supervisor_assessment_request(state, payload, after_observation, requi
|
|
|
2903
3067
|
evidence_basis.append('structured-artifacts')
|
|
2904
3068
|
if supporting.get('playability_ready'):
|
|
2905
3069
|
evidence_basis.append('playability')
|
|
3070
|
+
interaction_failure_summary = str(state.get('structured_interaction_failure_summary') or '').strip()
|
|
3071
|
+
if interaction_failure_summary:
|
|
3072
|
+
evidence_basis.append('structured-interaction-failure')
|
|
2906
3073
|
visual_delta = ((evidence_bundle or {}).get('after') or {}).get('visual_delta') or {}
|
|
2907
3074
|
if visual_delta.get('status') == 'measured':
|
|
2908
3075
|
evidence_basis.append('visual-delta')
|
|
@@ -2936,6 +3103,8 @@ def build_supervisor_assessment_request(state, payload, after_observation, requi
|
|
|
2936
3103
|
evidence_bundle['artifact_usage'] = artifact_usage
|
|
2937
3104
|
visual_delta_blocker = '' if audit_no_diff_mode(state) else visual_delta_blocker_for_mode(verification_mode, visual_delta)
|
|
2938
3105
|
hard_blockers = [visual_delta_blocker] if visual_delta_blocker else []
|
|
3106
|
+
if interaction_failure_summary:
|
|
3107
|
+
hard_blockers.append(interaction_failure_summary)
|
|
2939
3108
|
if verification_mode in PLAYABILITY_MODES and not supporting.get('playability_ready'):
|
|
2940
3109
|
assessment = supporting.get('playability_assessment') or {}
|
|
2941
3110
|
concerns = assessment.get('concerns') if isinstance(assessment, dict) else []
|
|
@@ -2961,6 +3130,10 @@ def build_supervisor_assessment_request(state, payload, after_observation, requi
|
|
|
2961
3130
|
instructions.append(
|
|
2962
3131
|
'For visual/UI polish, capture success is not proof. If visual_delta.status is unmeasured, missing, not_applicable, or measured with passed=false, choose needs_implementation or needs_richer_proof instead of ready_to_ship.'
|
|
2963
3132
|
)
|
|
3133
|
+
if interaction_failure_summary:
|
|
3134
|
+
instructions.append(
|
|
3135
|
+
'The structured interaction evidence contains failed assertions. Treat those failed assertions as a hard blocker for ready_to_ship; do not send this back to author unless the capture script itself is missing the needed evidence.'
|
|
3136
|
+
)
|
|
2964
3137
|
instructions.extend([
|
|
2965
3138
|
'For playable/gameplay proof, screenshots are supporting evidence only. Do not mark ready_to_ship unless playability_assessment.passed is true and the proof shows accepted input, state/time progression, and playfield/canvas pixel motion.',
|
|
2966
3139
|
'For data/audio/log/metrics/custom modes, judge the structured evidence bundle and proof_evidence_sample directly; screenshots are optional supporting context.',
|
|
@@ -2983,6 +3156,7 @@ def build_supervisor_assessment_request(state, payload, after_observation, requi
|
|
|
2983
3156
|
'viewport_matrix': viewport_matrix,
|
|
2984
3157
|
'evidence_bundle': evidence_bundle or {},
|
|
2985
3158
|
'evidence_basis': evidence_basis,
|
|
3159
|
+
'structured_interaction_failure_summary': interaction_failure_summary,
|
|
2986
3160
|
'artifact_contract': artifact_contract,
|
|
2987
3161
|
'artifact_production': artifact_production,
|
|
2988
3162
|
'artifact_usage': artifact_usage,
|
|
@@ -3384,6 +3558,14 @@ if proof_evidence_required_for_mode(s.get('verification_mode')):
|
|
|
3384
3558
|
if proof_evidence_blocker:
|
|
3385
3559
|
summary_lines.append('Structured proof evidence gate: ' + proof_evidence_blocker)
|
|
3386
3560
|
|
|
3561
|
+
structured_interaction_failure_summary = ''
|
|
3562
|
+
proof_evidence = evidence_bundle.get('proof_evidence')
|
|
3563
|
+
if verification_mode in INTERACTION_MODES and proof_evidence is not None:
|
|
3564
|
+
structured_interaction_failure_summary = failed_interaction_evidence_summary(proof_evidence)
|
|
3565
|
+
if structured_interaction_failure_summary:
|
|
3566
|
+
summary_lines.append('Structured interaction evidence gate: ' + structured_interaction_failure_summary)
|
|
3567
|
+
s['structured_interaction_failure_summary'] = structured_interaction_failure_summary
|
|
3568
|
+
|
|
3387
3569
|
visual_delta_recovery = build_visual_delta_recovery_decision(
|
|
3388
3570
|
s.get('verification_mode'),
|
|
3389
3571
|
visual_delta,
|
|
@@ -3392,14 +3574,20 @@ visual_delta_recovery = build_visual_delta_recovery_decision(
|
|
|
3392
3574
|
if visual_delta_recovery:
|
|
3393
3575
|
summary_lines.append('Visual delta recovery: ' + visual_delta_recovery['summary'])
|
|
3394
3576
|
|
|
3577
|
+
has_judgable_failed_interaction_evidence = (
|
|
3578
|
+
bool(structured_interaction_failure_summary)
|
|
3579
|
+
and required_baseline_present
|
|
3580
|
+
and not proof_evidence_blocker
|
|
3581
|
+
and not visual_delta_recovery
|
|
3582
|
+
)
|
|
3395
3583
|
has_good_evidence = (
|
|
3396
3584
|
required_baseline_present
|
|
3397
|
-
and after_observation.get('valid')
|
|
3585
|
+
and (after_observation.get('valid') or has_judgable_failed_interaction_evidence)
|
|
3398
3586
|
and not proof_evidence_blocker
|
|
3399
3587
|
and not visual_delta_recovery
|
|
3400
3588
|
)
|
|
3401
3589
|
|
|
3402
|
-
if has_good_evidence:
|
|
3590
|
+
if has_good_evidence and after_observation.get('valid'):
|
|
3403
3591
|
s['capture_hint_saved'] = record_successful_capture_hint(
|
|
3404
3592
|
s,
|
|
3405
3593
|
server_path=s.get('expected_start_path') or expected_path or s.get('server_path') or '/',
|
|
@@ -3410,9 +3598,12 @@ if has_good_evidence:
|
|
|
3410
3598
|
)
|
|
3411
3599
|
|
|
3412
3600
|
if has_good_evidence:
|
|
3601
|
+
if has_judgable_failed_interaction_evidence and isinstance(evidence_bundle.get('proof_session'), dict):
|
|
3602
|
+
evidence_bundle['proof_session']['status'] = 'evidence_captured'
|
|
3603
|
+
s['proof_session'] = evidence_bundle.get('proof_session') or {}
|
|
3413
3604
|
supervisor_request = build_supervisor_assessment_request(s, after_payload, after_observation, required_baseline_present, expected_path, evidence_bundle)
|
|
3414
3605
|
s['verify_status'] = 'evidence_captured'
|
|
3415
|
-
s['merge_recommendation'] = 'pending-supervisor-judgment'
|
|
3606
|
+
s['merge_recommendation'] = 'do-not-merge' if has_judgable_failed_interaction_evidence else 'pending-supervisor-judgment'
|
|
3416
3607
|
s['proof_assessment'] = {}
|
|
3417
3608
|
s['proof_assessment_source'] = None
|
|
3418
3609
|
s['proof_assessment_request'] = supervisor_request
|
|
@@ -3422,11 +3613,16 @@ if has_good_evidence:
|
|
|
3422
3613
|
fields_agent_may_update.append('implementation_notes')
|
|
3423
3614
|
s['verify_decision_request'] = {
|
|
3424
3615
|
'status': s['verify_status'],
|
|
3425
|
-
'summary':
|
|
3616
|
+
'summary': (
|
|
3617
|
+
'Verify captured structured interaction evidence with failed assertions and is waiting for supervising-agent proof assessment.'
|
|
3618
|
+
if has_judgable_failed_interaction_evidence
|
|
3619
|
+
else 'Verify captured usable evidence and is waiting for supervising-agent proof assessment.'
|
|
3620
|
+
),
|
|
3426
3621
|
'expected_path': expected_path,
|
|
3427
3622
|
'expected_start_path': s.get('expected_start_path') or expected_path,
|
|
3428
3623
|
'route_expectation': s.get('route_expectation') or {},
|
|
3429
3624
|
'latest_observation': after_observation,
|
|
3625
|
+
'structured_interaction_failure_summary': structured_interaction_failure_summary,
|
|
3430
3626
|
'next_stage_options': next_stage_options,
|
|
3431
3627
|
'recommended_stage': None,
|
|
3432
3628
|
'continue_with_stage': None,
|
|
@@ -3438,7 +3634,10 @@ if has_good_evidence:
|
|
|
3438
3634
|
'Do not escalate to the human unless the supervising agent concludes the workflow is genuinely stuck or not converging.',
|
|
3439
3635
|
],
|
|
3440
3636
|
}
|
|
3441
|
-
|
|
3637
|
+
if has_judgable_failed_interaction_evidence:
|
|
3638
|
+
summary_lines.append('Proof assessment: awaiting supervising agent judgment on failed interaction evidence')
|
|
3639
|
+
else:
|
|
3640
|
+
summary_lines.append('Proof assessment: awaiting supervising agent judgment')
|
|
3442
3641
|
summary_lines.append('Proof next stage: supervising agent decides after reviewing the evidence packet')
|
|
3443
3642
|
else:
|
|
3444
3643
|
capture_retry = visual_delta_recovery or build_capture_retry_decision(after_observation, required_baseline_present, proof_evidence_blocker, s.get('route_expectation') or {})
|
|
@@ -2591,7 +2591,7 @@ def run_verify_interaction_hash_terminal_route_from_proof_evidence():
|
|
|
2591
2591
|
shutil.rmtree(tempdir, ignore_errors=True)
|
|
2592
2592
|
|
|
2593
2593
|
|
|
2594
|
-
def
|
|
2594
|
+
def run_verify_interaction_authored_query_hash_mismatch_blocks_with_evidence():
|
|
2595
2595
|
tempdir = Path(tempfile.mkdtemp(prefix='riddle-proof-interaction-query-hash-mismatch-'))
|
|
2596
2596
|
state_path = tempdir / 'state.json'
|
|
2597
2597
|
try:
|
|
@@ -2630,17 +2630,24 @@ def run_verify_interaction_authored_query_hash_mismatch_returns_author():
|
|
|
2630
2630
|
after_verify = json.loads(state_path.read_text())
|
|
2631
2631
|
|
|
2632
2632
|
request = after_verify['verify_decision_request']
|
|
2633
|
-
|
|
2634
|
-
assert after_verify['
|
|
2633
|
+
assert after_verify['verify_status'] == 'evidence_captured'
|
|
2634
|
+
assert after_verify['merge_recommendation'] == 'do-not-merge'
|
|
2635
2635
|
assert after_verify['route_expectation']['expected_query'] == 'rp_probe=1'
|
|
2636
2636
|
assert after_verify['route_expectation']['expected_hash'] == '#pricing-probe'
|
|
2637
|
-
assert
|
|
2638
|
-
assert request['
|
|
2639
|
-
assert
|
|
2640
|
-
assert
|
|
2641
|
-
assert
|
|
2642
|
-
assert 'page.waitForURL: Timeout 15000ms exceeded' in
|
|
2643
|
-
|
|
2637
|
+
assert 'capture_quality' not in request
|
|
2638
|
+
assert request['recommended_stage'] is None
|
|
2639
|
+
assert request['continue_with_stage'] is None
|
|
2640
|
+
assert 'failed assertions' in request['summary']
|
|
2641
|
+
assert 'checks.routeMatches' in request['structured_interaction_failure_summary']
|
|
2642
|
+
assert 'page.waitForURL: Timeout 15000ms exceeded' in request['structured_interaction_failure_summary']
|
|
2643
|
+
assessment_request = after_verify['proof_assessment_request']
|
|
2644
|
+
assert 'structured-interaction-failure' in assessment_request['evidence_basis']
|
|
2645
|
+
assert any('checks.routeMatches' in blocker for blocker in assessment_request['hard_blockers'])
|
|
2646
|
+
assert assessment_request['semantic_context']['route']['expected_terminal_query'] == 'rp_probe=1'
|
|
2647
|
+
assert assessment_request['semantic_context']['route']['expected_terminal_hash'] == '#pricing-probe'
|
|
2648
|
+
assert assessment_request['semantic_context']['route']['after_observed_path'] == '/pricing'
|
|
2649
|
+
assert assessment_request['semantic_context']['route']['after_observed_query'] == ''
|
|
2650
|
+
assert assessment_request['semantic_context']['route']['after_observed_hash'] == ''
|
|
2644
2651
|
supporting = after_verify['verify_results']['after']['supporting_artifacts']
|
|
2645
2652
|
assert supporting['proof_evidence_present'] is True
|
|
2646
2653
|
assert supporting['has_structured_payload'] is True
|
|
@@ -2655,7 +2662,7 @@ def run_verify_interaction_authored_query_hash_mismatch_returns_author():
|
|
|
2655
2662
|
assert 'page.waitForURL: Timeout 15000ms exceeded' in synthetic_evidence['capture_error']
|
|
2656
2663
|
return {
|
|
2657
2664
|
'ok': True,
|
|
2658
|
-
'summary':
|
|
2665
|
+
'summary': request['summary'],
|
|
2659
2666
|
'recommended_stage': request['recommended_stage'],
|
|
2660
2667
|
}
|
|
2661
2668
|
finally:
|
|
@@ -3082,7 +3089,7 @@ if __name__ == '__main__':
|
|
|
3082
3089
|
'verify_interaction_terminal_route_from_proof_evidence': run_verify_interaction_terminal_route_from_proof_evidence(),
|
|
3083
3090
|
'verify_interaction_reverse_terminal_route_from_proof_evidence': run_verify_interaction_reverse_terminal_route_from_proof_evidence(),
|
|
3084
3091
|
'verify_interaction_hash_terminal_route_from_proof_evidence': run_verify_interaction_hash_terminal_route_from_proof_evidence(),
|
|
3085
|
-
'
|
|
3092
|
+
'verify_interaction_authored_query_hash_mismatch_blocks_with_evidence': run_verify_interaction_authored_query_hash_mismatch_blocks_with_evidence(),
|
|
3086
3093
|
'verify_capture_retry_surfaces_script_timeout': run_verify_capture_retry_surfaces_script_timeout(),
|
|
3087
3094
|
'missing_baseline_guard': run_verify_missing_baseline(),
|
|
3088
3095
|
'ship_supervisor_gate': run_ship_missing_supervisor_gate(),
|