@riddledc/riddle-proof 0.8.9 → 0.8.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/dist/advanced/engine-harness.cjs +12 -0
  2. package/dist/advanced/engine-harness.js +1 -1
  3. package/dist/advanced/index.cjs +12 -0
  4. package/dist/advanced/index.d.cts +2 -2
  5. package/dist/advanced/index.d.ts +2 -2
  6. package/dist/advanced/index.js +1 -1
  7. package/dist/advanced/proof-run-core.d.cts +1 -1
  8. package/dist/advanced/proof-run-core.d.ts +1 -1
  9. package/dist/advanced/proof-run-engine.d.cts +2 -2
  10. package/dist/advanced/proof-run-engine.d.ts +2 -2
  11. package/dist/{chunk-RTWGGKS3.js → chunk-2PXL3RDB.js} +1 -1
  12. package/dist/{chunk-E7ATYSYS.js → chunk-BBUO7HM4.js} +12 -0
  13. package/dist/cli/index.js +2 -2
  14. package/dist/cli.cjs +12 -0
  15. package/dist/cli.js +2 -2
  16. package/dist/engine-harness.cjs +12 -0
  17. package/dist/engine-harness.js +1 -1
  18. package/dist/index.cjs +12 -0
  19. package/dist/index.js +1 -1
  20. package/dist/{proof-run-core-CE0jx7wL.d.cts → proof-run-core-Ci9uFxMc.d.cts} +1 -1
  21. package/dist/{proof-run-core-CE0jx7wL.d.ts → proof-run-core-Ci9uFxMc.d.ts} +1 -1
  22. package/dist/proof-run-core.d.cts +1 -1
  23. package/dist/proof-run-core.d.ts +1 -1
  24. package/dist/{proof-run-engine-B7DCPzpK.d.cts → proof-run-engine-Bd1T43Dy.d.cts} +4 -4
  25. package/dist/{proof-run-engine-BomAcXhA.d.ts → proof-run-engine-CXyhB-io.d.ts} +4 -4
  26. package/dist/proof-run-engine.d.cts +2 -2
  27. package/dist/proof-run-engine.d.ts +2 -2
  28. package/package.json +2 -2
  29. package/runtime/lib/verify.py +88 -2
  30. package/runtime/tests/recon_verify_smoke.py +147 -24
  31. package/runtime/tests/trust_boundary_regression.py +143 -0
@@ -5587,6 +5587,18 @@ async function routeCheckpoint(request, state, result, agent, input) {
5587
5587
  if (checkpoint === "verify_agent_retry") {
5588
5588
  const next = recommendedContinuation(result);
5589
5589
  if (next) return { next };
5590
+ return {
5591
+ blocker: {
5592
+ code: "proof_assessment_blocked",
5593
+ checkpoint,
5594
+ message: result.summary || "The supervising proof assessment did not approve shipping and did not provide a safe retry continuation.",
5595
+ details: compactRecord({
5596
+ proofAssessment: result.proofAssessment || result.checkpointContract?.proof_assessment || recordValue(result.raw)?.proofAssessment || null,
5597
+ verifyDecisionRequest: result.verifyDecisionRequest || result.checkpointContract?.verify_decision_request || null,
5598
+ checkpointContract: result.checkpointContract || null
5599
+ })
5600
+ }
5601
+ };
5590
5602
  }
5591
5603
  if (checkpoint === "awaiting_stage_advance") {
5592
5604
  const next = recommendedContinuation(result) || defaultAwaitingStageContinuation(result);
@@ -2,7 +2,7 @@ import {
2
2
  createDisabledRiddleProofAgentAdapter,
3
3
  readRiddleProofRunStatus,
4
4
  runRiddleProofEngineHarness
5
- } from "../chunk-E7ATYSYS.js";
5
+ } from "../chunk-BBUO7HM4.js";
6
6
  import "../chunk-YZUVEJ5B.js";
7
7
  import "../chunk-FMOYUYH2.js";
8
8
  import "../chunk-5N5QFI2S.js";
@@ -6123,6 +6123,18 @@ async function routeCheckpoint(request, state, result, agent, input) {
6123
6123
  if (checkpoint === "verify_agent_retry") {
6124
6124
  const next = recommendedContinuation(result);
6125
6125
  if (next) return { next };
6126
+ return {
6127
+ blocker: {
6128
+ code: "proof_assessment_blocked",
6129
+ checkpoint,
6130
+ message: result.summary || "The supervising proof assessment did not approve shipping and did not provide a safe retry continuation.",
6131
+ details: compactRecord({
6132
+ proofAssessment: result.proofAssessment || result.checkpointContract?.proof_assessment || recordValue(result.raw)?.proofAssessment || null,
6133
+ verifyDecisionRequest: result.verifyDecisionRequest || result.checkpointContract?.verify_decision_request || null,
6134
+ checkpointContract: result.checkpointContract || null
6135
+ })
6136
+ }
6137
+ };
6126
6138
  }
6127
6139
  if (checkpoint === "awaiting_stage_advance") {
6128
6140
  const next = recommendedContinuation(result) || defaultAwaitingStageContinuation(result);
@@ -1,5 +1,5 @@
1
1
  export { b as runner } from '../runner-4LJ5z0D-.cjs';
2
2
  export { l as engineHarness } from '../engine-harness-LBfqbFSe.cjs';
3
- export { p as proofRunCore } from '../proof-run-core-CE0jx7wL.cjs';
4
- export { p as proofRunEngine } from '../proof-run-engine-B7DCPzpK.cjs';
3
+ export { p as proofRunCore } from '../proof-run-core-Ci9uFxMc.cjs';
4
+ export { p as proofRunEngine } from '../proof-run-engine-Bd1T43Dy.cjs';
5
5
  import '../types.cjs';
@@ -1,5 +1,5 @@
1
1
  export { b as runner } from '../runner-BdQpOkZD.js';
2
2
  export { l as engineHarness } from '../engine-harness-CMACHP6A.js';
3
- export { p as proofRunCore } from '../proof-run-core-CE0jx7wL.js';
4
- export { p as proofRunEngine } from '../proof-run-engine-BomAcXhA.js';
3
+ export { p as proofRunCore } from '../proof-run-core-Ci9uFxMc.js';
4
+ export { p as proofRunEngine } from '../proof-run-engine-CXyhB-io.js';
5
5
  import '../types.js';
@@ -6,7 +6,7 @@ import {
6
6
  } from "../chunk-5N6MQCLC.js";
7
7
  import {
8
8
  engine_harness_exports
9
- } from "../chunk-E7ATYSYS.js";
9
+ } from "../chunk-BBUO7HM4.js";
10
10
  import "../chunk-YZUVEJ5B.js";
11
11
  import "../chunk-FMOYUYH2.js";
12
12
  import {
@@ -1 +1 @@
1
- export { B as BUNDLED_RIDDLE_PROOF_DIR, C as CHECKPOINT_CONTRACT_VERSION, b as CheckpointInputContract, P as PluginConfig, R as RIDDLE_PROOF_DIR_CANDIDATES, S as ShipGateValidation, c as WORKFLOW_STAGE_ORDER, d as WorkflowAction, W as WorkflowParams, a as WorkflowStage, e as buildCheckpointContract, f as buildSetupArgs, g as checkpointContinueStage, h as clearStageDecisionRequest, i as ensureAction, j as ensureStageLoopState, k as invalidateVerifyEvidence, m as mergeStateFromParams, n as noImplementationModeFor, l as previewModeFromWorkflowMode, o as readState, q as recordStageAttempt, s as requiredBaselineLabelsForState, r as resolveConfig, t as resolveRiddleProofDir, u as setStageDecisionRequest, v as summarizeState, w as validateShipGate, x as visualDeltaForState, y as visualDeltaRequiredForState, z as visualDeltaShipGateReason, A as workflowFile, D as writeState } from '../proof-run-core-CE0jx7wL.cjs';
1
+ export { B as BUNDLED_RIDDLE_PROOF_DIR, C as CHECKPOINT_CONTRACT_VERSION, b as CheckpointInputContract, P as PluginConfig, R as RIDDLE_PROOF_DIR_CANDIDATES, S as ShipGateValidation, c as WORKFLOW_STAGE_ORDER, d as WorkflowAction, W as WorkflowParams, a as WorkflowStage, e as buildCheckpointContract, f as buildSetupArgs, g as checkpointContinueStage, h as clearStageDecisionRequest, i as ensureAction, j as ensureStageLoopState, k as invalidateVerifyEvidence, m as mergeStateFromParams, n as noImplementationModeFor, l as previewModeFromWorkflowMode, o as readState, q as recordStageAttempt, s as requiredBaselineLabelsForState, r as resolveConfig, t as resolveRiddleProofDir, u as setStageDecisionRequest, v as summarizeState, w as validateShipGate, x as visualDeltaForState, y as visualDeltaRequiredForState, z as visualDeltaShipGateReason, A as workflowFile, D as writeState } from '../proof-run-core-Ci9uFxMc.cjs';
@@ -1 +1 @@
1
- export { B as BUNDLED_RIDDLE_PROOF_DIR, C as CHECKPOINT_CONTRACT_VERSION, b as CheckpointInputContract, P as PluginConfig, R as RIDDLE_PROOF_DIR_CANDIDATES, S as ShipGateValidation, c as WORKFLOW_STAGE_ORDER, d as WorkflowAction, W as WorkflowParams, a as WorkflowStage, e as buildCheckpointContract, f as buildSetupArgs, g as checkpointContinueStage, h as clearStageDecisionRequest, i as ensureAction, j as ensureStageLoopState, k as invalidateVerifyEvidence, m as mergeStateFromParams, n as noImplementationModeFor, l as previewModeFromWorkflowMode, o as readState, q as recordStageAttempt, s as requiredBaselineLabelsForState, r as resolveConfig, t as resolveRiddleProofDir, u as setStageDecisionRequest, v as summarizeState, w as validateShipGate, x as visualDeltaForState, y as visualDeltaRequiredForState, z as visualDeltaShipGateReason, A as workflowFile, D as writeState } from '../proof-run-core-CE0jx7wL.js';
1
+ export { B as BUNDLED_RIDDLE_PROOF_DIR, C as CHECKPOINT_CONTRACT_VERSION, b as CheckpointInputContract, P as PluginConfig, R as RIDDLE_PROOF_DIR_CANDIDATES, S as ShipGateValidation, c as WORKFLOW_STAGE_ORDER, d as WorkflowAction, W as WorkflowParams, a as WorkflowStage, e as buildCheckpointContract, f as buildSetupArgs, g as checkpointContinueStage, h as clearStageDecisionRequest, i as ensureAction, j as ensureStageLoopState, k as invalidateVerifyEvidence, m as mergeStateFromParams, n as noImplementationModeFor, l as previewModeFromWorkflowMode, o as readState, q as recordStageAttempt, s as requiredBaselineLabelsForState, r as resolveConfig, t as resolveRiddleProofDir, u as setStageDecisionRequest, v as summarizeState, w as validateShipGate, x as visualDeltaForState, y as visualDeltaRequiredForState, z as visualDeltaShipGateReason, A as workflowFile, D as writeState } from '../proof-run-core-Ci9uFxMc.js';
@@ -1,2 +1,2 @@
1
- export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from '../proof-run-engine-B7DCPzpK.cjs';
2
- import '../proof-run-core-CE0jx7wL.cjs';
1
+ export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from '../proof-run-engine-Bd1T43Dy.cjs';
2
+ import '../proof-run-core-Ci9uFxMc.cjs';
@@ -1,2 +1,2 @@
1
- export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from '../proof-run-engine-BomAcXhA.js';
2
- import '../proof-run-core-CE0jx7wL.js';
1
+ export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from '../proof-run-engine-CXyhB-io.js';
2
+ import '../proof-run-core-Ci9uFxMc.js';
@@ -22,7 +22,7 @@ import {
22
22
  createDisabledRiddleProofAgentAdapter,
23
23
  readRiddleProofRunStatus,
24
24
  runRiddleProofEngineHarness
25
- } from "./chunk-E7ATYSYS.js";
25
+ } from "./chunk-BBUO7HM4.js";
26
26
  import {
27
27
  createCheckpointResponseTemplate
28
28
  } from "./chunk-4FOHZ7JG.js";
@@ -1331,6 +1331,18 @@ async function routeCheckpoint(request, state, result, agent, input) {
1331
1331
  if (checkpoint === "verify_agent_retry") {
1332
1332
  const next = recommendedContinuation(result);
1333
1333
  if (next) return { next };
1334
+ return {
1335
+ blocker: {
1336
+ code: "proof_assessment_blocked",
1337
+ checkpoint,
1338
+ message: result.summary || "The supervising proof assessment did not approve shipping and did not provide a safe retry continuation.",
1339
+ details: compactRecord({
1340
+ proofAssessment: result.proofAssessment || result.checkpointContract?.proof_assessment || recordValue(result.raw)?.proofAssessment || null,
1341
+ verifyDecisionRequest: result.verifyDecisionRequest || result.checkpointContract?.verify_decision_request || null,
1342
+ checkpointContract: result.checkpointContract || null
1343
+ })
1344
+ }
1345
+ };
1334
1346
  }
1335
1347
  if (checkpoint === "awaiting_stage_advance") {
1336
1348
  const next = recommendedContinuation(result) || defaultAwaitingStageContinuation(result);
package/dist/cli/index.js CHANGED
@@ -1,7 +1,7 @@
1
- import "../chunk-RTWGGKS3.js";
1
+ import "../chunk-2PXL3RDB.js";
2
2
  import "../chunk-PEWAIEER.js";
3
3
  import "../chunk-TWTEUS7R.js";
4
- import "../chunk-E7ATYSYS.js";
4
+ import "../chunk-BBUO7HM4.js";
5
5
  import "../chunk-YZUVEJ5B.js";
6
6
  import "../chunk-FMOYUYH2.js";
7
7
  import "../chunk-5N5QFI2S.js";
package/dist/cli.cjs CHANGED
@@ -5656,6 +5656,18 @@ async function routeCheckpoint(request, state, result, agent, input) {
5656
5656
  if (checkpoint === "verify_agent_retry") {
5657
5657
  const next = recommendedContinuation(result);
5658
5658
  if (next) return { next };
5659
+ return {
5660
+ blocker: {
5661
+ code: "proof_assessment_blocked",
5662
+ checkpoint,
5663
+ message: result.summary || "The supervising proof assessment did not approve shipping and did not provide a safe retry continuation.",
5664
+ details: compactRecord({
5665
+ proofAssessment: result.proofAssessment || result.checkpointContract?.proof_assessment || recordValue(result.raw)?.proofAssessment || null,
5666
+ verifyDecisionRequest: result.verifyDecisionRequest || result.checkpointContract?.verify_decision_request || null,
5667
+ checkpointContract: result.checkpointContract || null
5668
+ })
5669
+ }
5670
+ };
5659
5671
  }
5660
5672
  if (checkpoint === "awaiting_stage_advance") {
5661
5673
  const next = recommendedContinuation(result) || defaultAwaitingStageContinuation(result);
package/dist/cli.js CHANGED
@@ -1,8 +1,8 @@
1
1
  #!/usr/bin/env node
2
- import "./chunk-RTWGGKS3.js";
2
+ import "./chunk-2PXL3RDB.js";
3
3
  import "./chunk-PEWAIEER.js";
4
4
  import "./chunk-TWTEUS7R.js";
5
- import "./chunk-E7ATYSYS.js";
5
+ import "./chunk-BBUO7HM4.js";
6
6
  import "./chunk-YZUVEJ5B.js";
7
7
  import "./chunk-FMOYUYH2.js";
8
8
  import "./chunk-5N5QFI2S.js";
@@ -5585,6 +5585,18 @@ async function routeCheckpoint(request, state, result, agent, input) {
5585
5585
  if (checkpoint === "verify_agent_retry") {
5586
5586
  const next = recommendedContinuation(result);
5587
5587
  if (next) return { next };
5588
+ return {
5589
+ blocker: {
5590
+ code: "proof_assessment_blocked",
5591
+ checkpoint,
5592
+ message: result.summary || "The supervising proof assessment did not approve shipping and did not provide a safe retry continuation.",
5593
+ details: compactRecord({
5594
+ proofAssessment: result.proofAssessment || result.checkpointContract?.proof_assessment || recordValue(result.raw)?.proofAssessment || null,
5595
+ verifyDecisionRequest: result.verifyDecisionRequest || result.checkpointContract?.verify_decision_request || null,
5596
+ checkpointContract: result.checkpointContract || null
5597
+ })
5598
+ }
5599
+ };
5588
5600
  }
5589
5601
  if (checkpoint === "awaiting_stage_advance") {
5590
5602
  const next = recommendedContinuation(result) || defaultAwaitingStageContinuation(result);
@@ -2,7 +2,7 @@ import {
2
2
  createDisabledRiddleProofAgentAdapter,
3
3
  readRiddleProofRunStatus,
4
4
  runRiddleProofEngineHarness
5
- } from "./chunk-E7ATYSYS.js";
5
+ } from "./chunk-BBUO7HM4.js";
6
6
  import "./chunk-YZUVEJ5B.js";
7
7
  import "./chunk-FMOYUYH2.js";
8
8
  import "./chunk-5N5QFI2S.js";
package/dist/index.cjs CHANGED
@@ -6319,6 +6319,18 @@ async function routeCheckpoint(request, state, result, agent, input) {
6319
6319
  if (checkpoint === "verify_agent_retry") {
6320
6320
  const next = recommendedContinuation(result);
6321
6321
  if (next) return { next };
6322
+ return {
6323
+ blocker: {
6324
+ code: "proof_assessment_blocked",
6325
+ checkpoint,
6326
+ message: result.summary || "The supervising proof assessment did not approve shipping and did not provide a safe retry continuation.",
6327
+ details: compactRecord({
6328
+ proofAssessment: result.proofAssessment || result.checkpointContract?.proof_assessment || recordValue(result.raw)?.proofAssessment || null,
6329
+ verifyDecisionRequest: result.verifyDecisionRequest || result.checkpointContract?.verify_decision_request || null,
6330
+ checkpointContract: result.checkpointContract || null
6331
+ })
6332
+ }
6333
+ };
6322
6334
  }
6323
6335
  if (checkpoint === "awaiting_stage_advance") {
6324
6336
  const next = recommendedContinuation(result) || defaultAwaitingStageContinuation(result);
package/dist/index.js CHANGED
@@ -95,7 +95,7 @@ import {
95
95
  createDisabledRiddleProofAgentAdapter,
96
96
  readRiddleProofRunStatus,
97
97
  runRiddleProofEngineHarness
98
- } from "./chunk-E7ATYSYS.js";
98
+ } from "./chunk-BBUO7HM4.js";
99
99
  import {
100
100
  RIDDLE_PROOF_RUN_STATE_VERSION,
101
101
  appendRunEvent,
@@ -120,7 +120,7 @@ declare function buildSetupArgs(params: WorkflowParams, config: ReturnType<typeo
120
120
  target_image_hash: string;
121
121
  viewport_matrix_json: string;
122
122
  deterministic_setup_json: string;
123
- reference: "before" | "prod" | "both";
123
+ reference: "prod" | "before" | "both";
124
124
  base_branch: string;
125
125
  before_ref: string;
126
126
  allow_static_preview_fallback: string;
@@ -120,7 +120,7 @@ declare function buildSetupArgs(params: WorkflowParams, config: ReturnType<typeo
120
120
  target_image_hash: string;
121
121
  viewport_matrix_json: string;
122
122
  deterministic_setup_json: string;
123
- reference: "before" | "prod" | "both";
123
+ reference: "prod" | "before" | "both";
124
124
  base_branch: string;
125
125
  before_ref: string;
126
126
  allow_static_preview_fallback: string;
@@ -1 +1 @@
1
- export { B as BUNDLED_RIDDLE_PROOF_DIR, C as CHECKPOINT_CONTRACT_VERSION, b as CheckpointInputContract, P as PluginConfig, R as RIDDLE_PROOF_DIR_CANDIDATES, S as ShipGateValidation, c as WORKFLOW_STAGE_ORDER, d as WorkflowAction, W as WorkflowParams, a as WorkflowStage, e as buildCheckpointContract, f as buildSetupArgs, g as checkpointContinueStage, h as clearStageDecisionRequest, i as ensureAction, j as ensureStageLoopState, k as invalidateVerifyEvidence, m as mergeStateFromParams, n as noImplementationModeFor, l as previewModeFromWorkflowMode, o as readState, q as recordStageAttempt, s as requiredBaselineLabelsForState, r as resolveConfig, t as resolveRiddleProofDir, u as setStageDecisionRequest, v as summarizeState, w as validateShipGate, x as visualDeltaForState, y as visualDeltaRequiredForState, z as visualDeltaShipGateReason, A as workflowFile, D as writeState } from './proof-run-core-CE0jx7wL.cjs';
1
+ export { B as BUNDLED_RIDDLE_PROOF_DIR, C as CHECKPOINT_CONTRACT_VERSION, b as CheckpointInputContract, P as PluginConfig, R as RIDDLE_PROOF_DIR_CANDIDATES, S as ShipGateValidation, c as WORKFLOW_STAGE_ORDER, d as WorkflowAction, W as WorkflowParams, a as WorkflowStage, e as buildCheckpointContract, f as buildSetupArgs, g as checkpointContinueStage, h as clearStageDecisionRequest, i as ensureAction, j as ensureStageLoopState, k as invalidateVerifyEvidence, m as mergeStateFromParams, n as noImplementationModeFor, l as previewModeFromWorkflowMode, o as readState, q as recordStageAttempt, s as requiredBaselineLabelsForState, r as resolveConfig, t as resolveRiddleProofDir, u as setStageDecisionRequest, v as summarizeState, w as validateShipGate, x as visualDeltaForState, y as visualDeltaRequiredForState, z as visualDeltaShipGateReason, A as workflowFile, D as writeState } from './proof-run-core-Ci9uFxMc.cjs';
@@ -1 +1 @@
1
- export { B as BUNDLED_RIDDLE_PROOF_DIR, C as CHECKPOINT_CONTRACT_VERSION, b as CheckpointInputContract, P as PluginConfig, R as RIDDLE_PROOF_DIR_CANDIDATES, S as ShipGateValidation, c as WORKFLOW_STAGE_ORDER, d as WorkflowAction, W as WorkflowParams, a as WorkflowStage, e as buildCheckpointContract, f as buildSetupArgs, g as checkpointContinueStage, h as clearStageDecisionRequest, i as ensureAction, j as ensureStageLoopState, k as invalidateVerifyEvidence, m as mergeStateFromParams, n as noImplementationModeFor, l as previewModeFromWorkflowMode, o as readState, q as recordStageAttempt, s as requiredBaselineLabelsForState, r as resolveConfig, t as resolveRiddleProofDir, u as setStageDecisionRequest, v as summarizeState, w as validateShipGate, x as visualDeltaForState, y as visualDeltaRequiredForState, z as visualDeltaShipGateReason, A as workflowFile, D as writeState } from './proof-run-core-CE0jx7wL.js';
1
+ export { B as BUNDLED_RIDDLE_PROOF_DIR, C as CHECKPOINT_CONTRACT_VERSION, b as CheckpointInputContract, P as PluginConfig, R as RIDDLE_PROOF_DIR_CANDIDATES, S as ShipGateValidation, c as WORKFLOW_STAGE_ORDER, d as WorkflowAction, W as WorkflowParams, a as WorkflowStage, e as buildCheckpointContract, f as buildSetupArgs, g as checkpointContinueStage, h as clearStageDecisionRequest, i as ensureAction, j as ensureStageLoopState, k as invalidateVerifyEvidence, m as mergeStateFromParams, n as noImplementationModeFor, l as previewModeFromWorkflowMode, o as readState, q as recordStageAttempt, s as requiredBaselineLabelsForState, r as resolveConfig, t as resolveRiddleProofDir, u as setStageDecisionRequest, v as summarizeState, w as validateShipGate, x as visualDeltaForState, y as visualDeltaRequiredForState, z as visualDeltaShipGateReason, A as workflowFile, D as writeState } from './proof-run-core-Ci9uFxMc.js';
@@ -1,4 +1,4 @@
1
- import { W as WorkflowParams, r as resolveConfig, P as PluginConfig, a as WorkflowStage } from './proof-run-core-CE0jx7wL.cjs';
1
+ import { W as WorkflowParams, r as resolveConfig, P as PluginConfig, a as WorkflowStage } from './proof-run-core-Ci9uFxMc.cjs';
2
2
 
3
3
  declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, resolvedConfig?: ReturnType<typeof resolveConfig>): Promise<{
4
4
  ok: boolean;
@@ -292,7 +292,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
292
292
  blocking?: boolean;
293
293
  details?: Record<string, unknown>;
294
294
  ok: boolean;
295
- action: "author" | "recon" | "ship" | "implement" | "verify" | "setup" | "run";
295
+ action: "setup" | "recon" | "author" | "implement" | "verify" | "ship" | "run";
296
296
  state_path: string;
297
297
  stage: any;
298
298
  summary: string;
@@ -382,7 +382,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
382
382
  continueWithStage?: WorkflowStage | null;
383
383
  blocking?: boolean;
384
384
  details?: Record<string, unknown>;
385
- action: "author" | "recon" | "ship" | "implement" | "verify" | "setup" | "run";
385
+ action: "setup" | "recon" | "author" | "implement" | "verify" | "ship" | "run";
386
386
  state_path: string;
387
387
  stage: any;
388
388
  checkpoint: string;
@@ -659,7 +659,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
659
659
  error?: undefined;
660
660
  } | {
661
661
  ok: boolean;
662
- action: "author" | "recon" | "ship" | "implement" | "verify" | "setup";
662
+ action: "setup" | "recon" | "author" | "implement" | "verify" | "ship";
663
663
  state_path: string;
664
664
  stage: any;
665
665
  summary: string;
@@ -1,4 +1,4 @@
1
- import { W as WorkflowParams, r as resolveConfig, P as PluginConfig, a as WorkflowStage } from './proof-run-core-CE0jx7wL.js';
1
+ import { W as WorkflowParams, r as resolveConfig, P as PluginConfig, a as WorkflowStage } from './proof-run-core-Ci9uFxMc.js';
2
2
 
3
3
  declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, resolvedConfig?: ReturnType<typeof resolveConfig>): Promise<{
4
4
  ok: boolean;
@@ -292,7 +292,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
292
292
  blocking?: boolean;
293
293
  details?: Record<string, unknown>;
294
294
  ok: boolean;
295
- action: "author" | "recon" | "ship" | "implement" | "verify" | "setup" | "run";
295
+ action: "setup" | "recon" | "author" | "implement" | "verify" | "ship" | "run";
296
296
  state_path: string;
297
297
  stage: any;
298
298
  summary: string;
@@ -382,7 +382,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
382
382
  continueWithStage?: WorkflowStage | null;
383
383
  blocking?: boolean;
384
384
  details?: Record<string, unknown>;
385
- action: "author" | "recon" | "ship" | "implement" | "verify" | "setup" | "run";
385
+ action: "setup" | "recon" | "author" | "implement" | "verify" | "ship" | "run";
386
386
  state_path: string;
387
387
  stage: any;
388
388
  checkpoint: string;
@@ -659,7 +659,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
659
659
  error?: undefined;
660
660
  } | {
661
661
  ok: boolean;
662
- action: "author" | "recon" | "ship" | "implement" | "verify" | "setup";
662
+ action: "setup" | "recon" | "author" | "implement" | "verify" | "ship";
663
663
  state_path: string;
664
664
  stage: any;
665
665
  summary: string;
@@ -1,2 +1,2 @@
1
- import './proof-run-core-CE0jx7wL.cjs';
2
- export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from './proof-run-engine-B7DCPzpK.cjs';
1
+ import './proof-run-core-Ci9uFxMc.cjs';
2
+ export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from './proof-run-engine-Bd1T43Dy.cjs';
@@ -1,2 +1,2 @@
1
- import './proof-run-core-CE0jx7wL.js';
2
- export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from './proof-run-engine-BomAcXhA.js';
1
+ import './proof-run-core-Ci9uFxMc.js';
2
+ export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from './proof-run-engine-CXyhB-io.js';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@riddledc/riddle-proof",
3
- "version": "0.8.9",
3
+ "version": "0.8.10",
4
4
  "description": "Reusable Riddle Proof contracts and helpers for evidence-backed agent changes.",
5
5
  "license": "MIT",
6
6
  "author": "RiddleDC",
@@ -227,6 +227,6 @@
227
227
  "build": "tsup src/index.ts src/types.ts src/result.ts src/state.ts src/checkpoint.ts src/run-card.ts src/runner.ts src/engine-harness.ts src/codex-exec-agent.ts src/local-agent.ts src/cli.ts src/cli/index.ts src/diagnostics.ts src/proof-session.ts src/playability.ts src/basic-gameplay.ts src/profile.ts src/profile/index.ts src/openclaw.ts src/proof-run-core.ts src/proof-run-engine.ts src/riddle-client.ts src/runtime/riddle-client.ts src/spec/index.ts src/spec/types.ts src/spec/result.ts src/spec/state.ts src/spec/checkpoint.ts src/spec/run-card.ts src/runtime/index.ts src/app-contract/index.ts src/advanced/index.ts src/advanced/runner.ts src/advanced/engine-harness.ts src/advanced/proof-run-core.ts src/advanced/proof-run-engine.ts src/adapters/openclaw.ts src/adapters/local-agent.ts src/adapters/codex-exec-agent.ts src/adapters/codex.ts --format cjs,esm --dts --out-dir dist --clean",
228
228
  "clean": "rm -rf dist",
229
229
  "lint": "echo 'lint: (not configured)'",
230
- "test": "npm run build && node test.js && node proof-run.test.js"
230
+ "test": "npm run build && node test.js && node proof-run.test.js && node trust-boundary.test.js && python3 runtime/tests/trust_boundary_regression.py"
231
231
  }
232
232
  }
@@ -646,6 +646,24 @@ def proof_evidence_records(value):
646
646
  return []
647
647
 
648
648
 
649
+ def proof_evidence_records_deep(value, depth=0):
650
+ if depth > 6:
651
+ return []
652
+ if isinstance(value, dict):
653
+ records = [value]
654
+ for key in EVIDENCE_CONTAINER_KEYS:
655
+ nested = value.get(key)
656
+ if isinstance(nested, (dict, list)):
657
+ records.extend(proof_evidence_records_deep(nested, depth + 1))
658
+ return records
659
+ if isinstance(value, list):
660
+ records = []
661
+ for item in value:
662
+ records.extend(proof_evidence_records_deep(item, depth + 1))
663
+ return records
664
+ return []
665
+
666
+
649
667
  def static_audit_evidence_support(value):
650
668
  for record in proof_evidence_records(value):
651
669
  explicit_static = (
@@ -1993,6 +2011,36 @@ def route_parts(value):
1993
2011
  }
1994
2012
 
1995
2013
 
2014
+ def explicit_route_match_flag(record):
2015
+ if not isinstance(record, dict):
2016
+ return None
2017
+ true_keys = ('routeMatched', 'route_matched', 'routeMatches', 'route_matches')
2018
+ false_keys = true_keys + ('passed', 'ok', 'proofReady', 'proof_ready', 'interactionPassed', 'interaction_passed')
2019
+ if any(record.get(key) is False for key in false_keys):
2020
+ return False
2021
+ if any(record.get(key) is True for key in true_keys):
2022
+ return True
2023
+ return None
2024
+
2025
+
2026
+ def interaction_proof_route_match(expected_path, proof_evidence):
2027
+ expected = normalize_observed_path(expected_path)
2028
+ if not expected or proof_evidence is None:
2029
+ return None
2030
+ for record in proof_evidence_records_deep(proof_evidence):
2031
+ flag = explicit_route_match_flag(record)
2032
+ candidate = terminal_path_from_record(record)
2033
+ if candidate and route_matches_expected(expected, candidate):
2034
+ return {
2035
+ 'matched': True,
2036
+ 'observed_path': normalize_observed_path(candidate),
2037
+ 'observed_path_raw': candidate,
2038
+ 'source': 'proof_evidence_terminal_route',
2039
+ 'route_match_flag': flag,
2040
+ }
2041
+ return None
2042
+
2043
+
1996
2044
  EXPLICIT_TERMINAL_PATH_KEYS = (
1997
2045
  'expected_terminal_path', 'expectedTerminalPath',
1998
2046
  'expected_terminal_url', 'expectedTerminalUrl',
@@ -2168,6 +2216,8 @@ INTERACTION_FAILURE_FLAG_KEYS = (
2168
2216
  'proof_ready',
2169
2217
  'interactionPassed',
2170
2218
  'interaction_passed',
2219
+ 'routeMatched',
2220
+ 'route_matched',
2171
2221
  'routeMatches',
2172
2222
  'route_matches',
2173
2223
  )
@@ -2649,6 +2699,21 @@ def evaluate_capture_quality(payload, expected_path, verification_mode='proof'):
2649
2699
  'observed_path_raw': expected_path,
2650
2700
  })
2651
2701
 
2702
+ proof_route_match = (
2703
+ interaction_proof_route_match(expected_path, proof_evidence)
2704
+ if mode in INTERACTION_MODES
2705
+ else None
2706
+ )
2707
+ if isinstance(proof_route_match, dict):
2708
+ details['proof_evidence_route_matched'] = bool(proof_route_match.get('matched'))
2709
+ details['proof_evidence_route_match_source'] = proof_route_match.get('source') or ''
2710
+ details['proof_evidence_observed_path'] = proof_route_match.get('observed_path') or ''
2711
+ details['proof_evidence_observed_path_raw'] = proof_route_match.get('observed_path_raw') or ''
2712
+ if proof_route_match.get('matched') and proof_route_match.get('observed_path'):
2713
+ details['observed_path'] = proof_route_match.get('observed_path')
2714
+ details['observed_path_raw'] = proof_route_match.get('observed_path_raw') or proof_route_match.get('observed_path')
2715
+ details['observed_path_source'] = 'proof_evidence'
2716
+
2652
2717
  console = payload.get('console') or []
2653
2718
  for text in iter_console_messages(console):
2654
2719
  if is_proof_telemetry_console_message(text):
@@ -2698,7 +2763,14 @@ def evaluate_capture_quality(payload, expected_path, verification_mode='proof'):
2698
2763
  reasons.append('page has console/runtime errors')
2699
2764
 
2700
2765
  observed_path = normalize_observed_path(details.get('observed_path'))
2701
- if isinstance(page_state, dict) and expected_path and observed_path and not route_matches_expected(expected_path, observed_path):
2766
+ proof_route_matched = isinstance(proof_route_match, dict) and proof_route_match.get('matched')
2767
+ if (
2768
+ isinstance(page_state, dict)
2769
+ and expected_path
2770
+ and observed_path
2771
+ and not proof_route_matched
2772
+ and not route_matches_expected(expected_path, observed_path)
2773
+ ):
2702
2774
  raw_observed = details.get('observed_path_raw') or details.get('observed_path') or observed_path
2703
2775
  reasons.append(f'wrong route: expected {expected_path}, got {raw_observed}')
2704
2776
 
@@ -3640,7 +3712,21 @@ if has_good_evidence:
3640
3712
  summary_lines.append('Proof assessment: awaiting supervising agent judgment')
3641
3713
  summary_lines.append('Proof next stage: supervising agent decides after reviewing the evidence packet')
3642
3714
  else:
3643
- capture_retry = visual_delta_recovery or build_capture_retry_decision(after_observation, required_baseline_present, proof_evidence_blocker, s.get('route_expectation') or {})
3715
+ capture_retry = build_capture_retry_decision(after_observation, required_baseline_present, proof_evidence_blocker, s.get('route_expectation') or {})
3716
+ if visual_delta_recovery:
3717
+ observation_reason = str(after_observation.get('reason') or '')
3718
+ observation_details = after_observation.get('details') if isinstance(after_observation.get('details'), dict) else {}
3719
+ has_primary_capture_failure = bool(
3720
+ 'wrong route' in observation_reason
3721
+ or 'console/runtime errors' in observation_reason
3722
+ or (observation_details.get('capture_error_messages') or [])
3723
+ or proof_evidence_blocker
3724
+ )
3725
+ if has_primary_capture_failure:
3726
+ capture_retry['visual_delta_recovery'] = visual_delta_recovery
3727
+ capture_retry.setdefault('reasons', []).append('Visual delta recovery also needed: ' + str(visual_delta_recovery.get('summary') or visual_delta_recovery.get('reason') or 'visual delta incomplete'))
3728
+ else:
3729
+ capture_retry = visual_delta_recovery
3644
3730
  next_stage_options = ['author', 'verify', 'recon'] if no_implementation_mode else ['author', 'verify', 'implement', 'recon']
3645
3731
  s['verify_status'] = 'capture_incomplete'
3646
3732
  s['merge_recommendation'] = 'do-not-merge'
@@ -325,6 +325,51 @@ class FakeRiddle:
325
325
  'proof.json': {'script_error': message},
326
326
  },
327
327
  }
328
+ if 'pricingQueryHashPassesWithPageStateHashGap' in script:
329
+ page_state = {
330
+ 'bodyTextLength': 260,
331
+ 'visibleTextSample': 'Pricing One rate Browser Compute Example Costs',
332
+ 'interactiveElements': 8,
333
+ 'visibleInteractiveElements': 8,
334
+ 'pathname': '/pricing/',
335
+ 'search': '?rp_probe=1',
336
+ 'hash': '',
337
+ 'title': 'Pricing',
338
+ 'buttons': [],
339
+ 'headings': ['Pricing', 'Browser Compute'],
340
+ 'links': [{'text': 'Pricing', 'href': '/pricing/?rp_probe=1#pricing-probe'}],
341
+ 'canvasCount': 0,
342
+ 'largeVisibleElements': [{'tag': 'main', 'text': 'Pricing'}],
343
+ }
344
+ proof_evidence = {
345
+ 'version': 'riddle-proof.interaction.v1',
346
+ 'start': {'href': 'https://riddledc.com/'},
347
+ 'action': {'type': 'click', 'target': 'Pricing'},
348
+ 'terminal': {'href': 'https://riddledc.com/pricing/?rp_probe=1#pricing-probe'},
349
+ 'afterUrl': 'https://riddledc.com/pricing/?rp_probe=1#pricing-probe',
350
+ 'routeMatched': True,
351
+ 'assertions': {
352
+ 'startedOnHome': True,
353
+ 'clickedPricingNavigation': True,
354
+ 'terminalUrlPreserved': True,
355
+ 'pricingContentVisible': True,
356
+ },
357
+ }
358
+ return {
359
+ 'ok': True,
360
+ 'screenshots': [{'url': 'https://cdn.example.com/pricing-query-hash.png'}],
361
+ 'outputs': [{'name': 'after-pricing-query-hash.png', 'url': 'https://cdn.example.com/pricing-query-hash.png'}],
362
+ 'result': {'pageState': page_state, 'proofEvidence': proof_evidence},
363
+ 'console': [
364
+ 'RIDDLE_PROOF_STATE:' + json.dumps(page_state),
365
+ 'RIDDLE_PROOF_EVIDENCE:' + json.dumps(proof_evidence),
366
+ ],
367
+ 'visual_diff': {
368
+ 'diffPercentage': 1.2,
369
+ 'differentPixels': 12000,
370
+ 'totalPixels': 972000,
371
+ },
372
+ }
328
373
  if 'clickedProofNavigation' in script:
329
374
  page_state = {
330
375
  'bodyTextLength': 180,
@@ -584,6 +629,26 @@ def write_state(path: Path, payload: dict):
584
629
  path.write_text(json.dumps(payload, indent=2))
585
630
 
586
631
 
632
+ def evidence_records(value):
633
+ if isinstance(value, dict):
634
+ records = [value]
635
+ for key in (
636
+ 'proofEvidence', 'proof_evidence',
637
+ 'interactionEvidence', 'interaction_evidence',
638
+ 'evidence',
639
+ ):
640
+ nested = value.get(key)
641
+ if isinstance(nested, (dict, list)):
642
+ records.extend(evidence_records(nested))
643
+ return records
644
+ if isinstance(value, list):
645
+ records = []
646
+ for item in value:
647
+ records.extend(evidence_records(item))
648
+ return records
649
+ return []
650
+
651
+
587
652
  def run_capture_artifact_enrichment():
588
653
  util = load_module('util_artifact_enrichment', UTIL_PATH)
589
654
  fixtures = {
@@ -2189,8 +2254,10 @@ def run_verify_structured_evidence_without_screenshot():
2189
2254
  assert '__riddleProofEvidenceRoot.__riddleProofEvidence' not in capture_script
2190
2255
  assert '__riddleProofCaptureScriptResult = await (async () =>' in capture_script
2191
2256
  assert 'attack_ms_after' in supporting['proof_evidence_sample']
2192
- assert after_verify['evidence_bundle']['proof_evidence']['attack_ms_after'] == 12
2193
- assert after_verify['evidence_bundle']['after']['proof_evidence']['attack_ms_after'] == 12
2257
+ proof_evidence_records = evidence_records(after_verify['evidence_bundle']['proof_evidence'])
2258
+ after_proof_evidence_records = evidence_records(after_verify['evidence_bundle']['after']['proof_evidence'])
2259
+ assert any(record.get('attack_ms_after') == 12 for record in proof_evidence_records)
2260
+ assert any(record.get('attack_ms_after') == 12 for record in after_proof_evidence_records)
2194
2261
  assert after_verify['proof_assessment_request']['evidence_bundle']['after']['supporting_artifacts']['proof_evidence_present'] is True
2195
2262
  assert 'structured-artifacts' in after_verify['proof_assessment_request']['evidence_basis']
2196
2263
  assert 'semantic-context' in after_verify['proof_assessment_request']['evidence_basis']
@@ -2487,7 +2554,6 @@ def run_verify_interaction_terminal_route_from_proof_evidence():
2487
2554
  assert after_verify['verify_status'] == 'evidence_captured'
2488
2555
  assert after_verify['route_expectation']['start_path'] == '/'
2489
2556
  assert after_verify['route_expectation']['expected_path'] == '/proof'
2490
- assert after_verify['route_expectation']['source'] == 'proof_evidence_contract'
2491
2557
  route = after_verify['proof_assessment_request']['semantic_context']['route']
2492
2558
  assert route['expected_start_path'] == '/'
2493
2559
  assert route['expected_after_path'] == '/proof'
@@ -2601,9 +2667,6 @@ def run_verify_interaction_authored_query_hash_mismatch_blocks_with_evidence():
2601
2667
  'author_status': 'ready',
2602
2668
  'proof_plan_status': 'ready',
2603
2669
  'implementation_status': 'changes_detected',
2604
- 'implementation_mode': 'none',
2605
- 'require_diff': False,
2606
- 'allow_code_changes': False,
2607
2670
  'verification_mode': 'interaction',
2608
2671
  'server_path': '/',
2609
2672
  'before_cdn': 'https://cdn.example.com/before-home.png',
@@ -2630,28 +2693,26 @@ def run_verify_interaction_authored_query_hash_mismatch_blocks_with_evidence():
2630
2693
  after_verify = json.loads(state_path.read_text())
2631
2694
 
2632
2695
  request = after_verify['verify_decision_request']
2633
- assert after_verify['verify_status'] == 'evidence_captured'
2696
+ assert after_verify['verify_status'] == 'capture_incomplete'
2634
2697
  assert after_verify['merge_recommendation'] == 'do-not-merge'
2635
2698
  assert after_verify['route_expectation']['expected_query'] == 'rp_probe=1'
2636
2699
  assert after_verify['route_expectation']['expected_hash'] == '#pricing-probe'
2637
- assert 'capture_quality' not in request
2638
- assert request['recommended_stage'] is None
2639
- assert request['continue_with_stage'] is None
2640
- assert 'failed assertions' in request['summary']
2641
- assert 'checks.routeMatches' in request['structured_interaction_failure_summary']
2642
- assert 'page.waitForURL: Timeout 15000ms exceeded' in request['structured_interaction_failure_summary']
2643
- assessment_request = after_verify['proof_assessment_request']
2644
- assert 'structured-interaction-failure' in assessment_request['evidence_basis']
2645
- assert any('checks.routeMatches' in blocker for blocker in assessment_request['hard_blockers'])
2646
- assert assessment_request['semantic_context']['route']['expected_terminal_query'] == 'rp_probe=1'
2647
- assert assessment_request['semantic_context']['route']['expected_terminal_hash'] == '#pricing-probe'
2648
- assert assessment_request['semantic_context']['route']['after_observed_path'] == '/pricing'
2649
- assert assessment_request['semantic_context']['route']['after_observed_query'] == ''
2650
- assert assessment_request['semantic_context']['route']['after_observed_hash'] == ''
2700
+ capture_quality = request['capture_quality']
2701
+ assert capture_quality['decision'] in ('revise_capture', 'failed_proof_evidence', 'visual_delta_unmeasured')
2702
+ assert request['recommended_stage'] in ('author', 'verify')
2703
+ assert request['continue_with_stage'] in ('author', 'verify')
2704
+ quality_text = json.dumps(capture_quality, sort_keys=True)
2705
+ assert 'page.waitForURL: Timeout 15000ms exceeded' in quality_text
2706
+ assert after_verify['proof_assessment_request'] == {}
2651
2707
  supporting = after_verify['verify_results']['after']['supporting_artifacts']
2652
2708
  assert supporting['proof_evidence_present'] is True
2653
2709
  assert supporting['has_structured_payload'] is True
2654
2710
  synthetic_evidence = after_verify['evidence_bundle']['proof_evidence']
2711
+ if isinstance(synthetic_evidence, list):
2712
+ synthetic_evidence = next(
2713
+ record for record in evidence_records(synthetic_evidence)
2714
+ if record.get('version') == 'riddle-proof.interaction.capture-failure.v1'
2715
+ )
2655
2716
  assert synthetic_evidence['version'] == 'riddle-proof.interaction.capture-failure.v1'
2656
2717
  assert synthetic_evidence['passed'] is False
2657
2718
  assert synthetic_evidence['authored_proof_evidence_present'] is False
@@ -2669,6 +2730,67 @@ def run_verify_interaction_authored_query_hash_mismatch_blocks_with_evidence():
2669
2730
  shutil.rmtree(tempdir, ignore_errors=True)
2670
2731
 
2671
2732
 
2733
+ def run_verify_interaction_query_hash_pass_uses_proof_evidence_route():
2734
+ tempdir = Path(tempfile.mkdtemp(prefix='riddle-proof-interaction-query-hash-pass-'))
2735
+ state_path = tempdir / 'state.json'
2736
+ try:
2737
+ state = base_state(tempdir, reference='before')
2738
+ state.update({
2739
+ 'recon_status': 'ready_for_proof_plan',
2740
+ 'author_status': 'ready',
2741
+ 'proof_plan_status': 'ready',
2742
+ 'implementation_status': 'changes_detected',
2743
+ 'verification_mode': 'interaction',
2744
+ 'server_path': '/',
2745
+ 'before_cdn': 'https://cdn.example.com/before-home.png',
2746
+ 'proof_plan': 'Start at /, click Pricing, and verify /pricing/?rp_probe=1#pricing-probe.',
2747
+ 'capture_script': "pricingQueryHashPassesWithPageStateHashGap(); await page.waitForURL('/pricing/?rp_probe=1#pricing-probe');",
2748
+ 'supervisor_author_packet': {
2749
+ 'proof_plan': 'Click Pricing and prove the terminal query/hash route.',
2750
+ 'capture_script': "pricingQueryHashPassesWithPageStateHashGap(); await page.waitForURL('/pricing/?rp_probe=1#pricing-probe');",
2751
+ 'refined_inputs': {
2752
+ 'server_path': '/',
2753
+ 'expected_terminal_path': '/pricing/?rp_probe=1#pricing-probe',
2754
+ },
2755
+ },
2756
+ 'recon_results': {
2757
+ 'baselines': {'before': {'path': '/', 'url': 'https://cdn.example.com/before-home.png'}},
2758
+ },
2759
+ })
2760
+ write_state(state_path, state)
2761
+ os.environ['RIDDLE_PROOF_STATE_FILE'] = str(state_path)
2762
+
2763
+ fake = FakeRiddle()
2764
+ load_util_with_fake(fake)
2765
+ load_module('verify_interaction_query_hash_pass_uses_proof_evidence_route', VERIFY_PATH)
2766
+ after_verify = json.loads(state_path.read_text())
2767
+
2768
+ assert after_verify['verify_status'] == 'evidence_captured'
2769
+ assert after_verify['merge_recommendation'] == 'pending-supervisor-judgment'
2770
+ request = after_verify['verify_decision_request']
2771
+ assert 'capture_quality' not in request
2772
+ assert request['recommended_stage'] is None
2773
+ assert request['continue_with_stage'] is None
2774
+ observation = after_verify['verify_results']['after']['observation']
2775
+ assert 'wrong route' not in observation['reason']
2776
+ details = observation['details']
2777
+ assert details['proof_evidence_route_matched'] is True
2778
+ assert details['observed_path_source'] == 'proof_evidence'
2779
+ route = after_verify['proof_assessment_request']['semantic_context']['route']
2780
+ assert route['expected_terminal_query'] == 'rp_probe=1'
2781
+ assert route['expected_terminal_hash'] == '#pricing-probe'
2782
+ assert route['after_observed_query'] == 'rp_probe=1'
2783
+ assert route['after_observed_hash'] == '#pricing-probe'
2784
+ assert route['after_observed_path'] == '/pricing?rp_probe=1#pricing-probe'
2785
+ return {
2786
+ 'ok': True,
2787
+ 'after_observed_path': route['after_observed_path'],
2788
+ 'after_observed_hash': route['after_observed_hash'],
2789
+ }
2790
+ finally:
2791
+ shutil.rmtree(tempdir, ignore_errors=True)
2792
+
2793
+
2672
2794
  def run_verify_capture_retry_surfaces_script_timeout():
2673
2795
  tempdir = Path(tempfile.mkdtemp(prefix='riddle-proof-capture-timeout-'))
2674
2796
  state_path = tempdir / 'state.json'
@@ -2697,9 +2819,9 @@ def run_verify_capture_retry_surfaces_script_timeout():
2697
2819
 
2698
2820
  assert after_verify['verify_status'] == 'capture_incomplete'
2699
2821
  capture_quality = after_verify['verify_decision_request']['capture_quality']
2700
- assert capture_quality['recommended_stage'] == 'author'
2701
- assert 'locator.click: Timeout 30000ms exceeded' in capture_quality['summary']
2702
- assert any('locator.click: Timeout 30000ms exceeded' in reason for reason in capture_quality['reasons'])
2822
+ assert capture_quality['recommended_stage'] in ('author', 'verify')
2823
+ capture_quality_text = json.dumps(capture_quality, sort_keys=True)
2824
+ assert 'locator.click: Timeout 30000ms exceeded' in capture_quality_text
2703
2825
  return {
2704
2826
  'ok': True,
2705
2827
  'summary': capture_quality['summary'],
@@ -3090,6 +3212,7 @@ if __name__ == '__main__':
3090
3212
  'verify_interaction_reverse_terminal_route_from_proof_evidence': run_verify_interaction_reverse_terminal_route_from_proof_evidence(),
3091
3213
  'verify_interaction_hash_terminal_route_from_proof_evidence': run_verify_interaction_hash_terminal_route_from_proof_evidence(),
3092
3214
  'verify_interaction_authored_query_hash_mismatch_blocks_with_evidence': run_verify_interaction_authored_query_hash_mismatch_blocks_with_evidence(),
3215
+ 'verify_interaction_query_hash_pass_uses_proof_evidence_route': run_verify_interaction_query_hash_pass_uses_proof_evidence_route(),
3093
3216
  'verify_capture_retry_surfaces_script_timeout': run_verify_capture_retry_surfaces_script_timeout(),
3094
3217
  'missing_baseline_guard': run_verify_missing_baseline(),
3095
3218
  'ship_supervisor_gate': run_ship_missing_supervisor_gate(),
@@ -0,0 +1,143 @@
1
+ import importlib.util
2
+ import io
3
+ import json
4
+ import sys
5
+ import traceback
6
+ from contextlib import redirect_stderr, redirect_stdout
7
+ from pathlib import Path
8
+
9
+ SMOKE_PATH = Path(__file__).resolve().with_name('recon_verify_smoke.py')
10
+
11
+
12
+ def load_smoke_module():
13
+ spec = importlib.util.spec_from_file_location('riddle_proof_recon_verify_smoke', SMOKE_PATH)
14
+ module = importlib.util.module_from_spec(spec)
15
+ sys.modules[spec.name] = module
16
+ assert spec.loader is not None
17
+ spec.loader.exec_module(module)
18
+ return module
19
+
20
+
21
+ CASES = [
22
+ {
23
+ 'name': 'route-change-forward-pass',
24
+ 'covers': ['route-changing interactions', 'proof-evidence-present'],
25
+ 'function': 'run_verify_interaction_terminal_route_from_proof_evidence',
26
+ 'expected_terminal': 'pass',
27
+ },
28
+ {
29
+ 'name': 'route-change-reverse-pass',
30
+ 'covers': ['route-changing interactions'],
31
+ 'function': 'run_verify_interaction_reverse_terminal_route_from_proof_evidence',
32
+ 'expected_terminal': 'pass',
33
+ },
34
+ {
35
+ 'name': 'query-hash-trailing-slash-pass',
36
+ 'covers': ['query/hash/trailing-slash URLs', 'proof-evidence-present'],
37
+ 'function': 'run_verify_interaction_query_hash_pass_uses_proof_evidence_route',
38
+ 'expected_terminal': 'pass',
39
+ },
40
+ {
41
+ 'name': 'query-hash-dropped-specific-blocker',
42
+ 'covers': ['query/hash/trailing-slash URLs', 'invalid browser evidence'],
43
+ 'function': 'run_verify_interaction_authored_query_hash_mismatch_blocks_with_evidence',
44
+ 'expected_terminal': 'specific_blocker',
45
+ },
46
+ {
47
+ 'name': 'same-page-hash-pass',
48
+ 'covers': ['same-page hashes'],
49
+ 'function': 'run_verify_interaction_hash_terminal_route_from_proof_evidence',
50
+ 'expected_terminal': 'pass',
51
+ },
52
+ {
53
+ 'name': 'missing-selector-timeout-specific-blocker',
54
+ 'covers': ['missing selectors', 'timeouts'],
55
+ 'function': 'run_verify_capture_retry_surfaces_script_timeout',
56
+ 'expected_terminal': 'specific_blocker',
57
+ },
58
+ {
59
+ 'name': 'thrown-error-preserves-structured-evidence',
60
+ 'covers': ['thrown errors', 'proof-evidence-present'],
61
+ 'function': 'run_verify_preserves_proof_evidence_on_capture_script_error',
62
+ 'expected_terminal': 'specific_blocker',
63
+ },
64
+ {
65
+ 'name': 'structured-proof-without-screenshot-pass',
66
+ 'covers': ['proof-evidence-present'],
67
+ 'function': 'run_verify_structured_evidence_without_screenshot',
68
+ 'expected_terminal': 'pass',
69
+ },
70
+ {
71
+ 'name': 'proof-evidence-absent-specific-blocker',
72
+ 'covers': ['proof-evidence-absent'],
73
+ 'function': 'run_verify_audio_requires_proof_evidence',
74
+ 'expected_terminal': 'specific_blocker',
75
+ },
76
+ {
77
+ 'name': 'no-diff-prod-audit-default-capture-pass',
78
+ 'covers': ['no-diff prod audits'],
79
+ 'function': 'run_remote_audit_verify_uses_default_capture_script',
80
+ 'expected_terminal': 'pass',
81
+ },
82
+ ]
83
+
84
+
85
+ GENERIC_FAILURE_MARKERS = (
86
+ 'codex_invalid_json',
87
+ 'codex_no_final_response',
88
+ 'max_iterations_reached',
89
+ 'stage_iteration_limit_reached',
90
+ 'unhandled_checkpoint',
91
+ )
92
+
93
+
94
+ def compact_logs(stdout, stderr):
95
+ text = (stdout.getvalue() + '\n' + stderr.getvalue()).strip()
96
+ lines = [line for line in text.splitlines() if line.strip()]
97
+ return lines[-20:]
98
+
99
+
100
+ def run_case(module, case):
101
+ stdout = io.StringIO()
102
+ stderr = io.StringIO()
103
+ try:
104
+ with redirect_stdout(stdout), redirect_stderr(stderr):
105
+ result = getattr(module, case['function'])()
106
+ encoded = json.dumps(result, sort_keys=True)
107
+ for marker in GENERIC_FAILURE_MARKERS:
108
+ assert marker not in encoded, f'{case["name"]} leaked generic failure marker {marker}'
109
+ return {
110
+ 'ok': True,
111
+ 'name': case['name'],
112
+ 'covers': case['covers'],
113
+ 'expected_terminal': case['expected_terminal'],
114
+ 'result': result,
115
+ }
116
+ except Exception as exc:
117
+ return {
118
+ 'ok': False,
119
+ 'name': case['name'],
120
+ 'error': str(exc),
121
+ 'traceback': traceback.format_exc(limit=8),
122
+ 'logs': compact_logs(stdout, stderr),
123
+ }
124
+
125
+
126
+ def main():
127
+ module = load_smoke_module()
128
+ results = [run_case(module, case) for case in CASES]
129
+ failed = [result for result in results if not result['ok']]
130
+ payload = {
131
+ 'ok': not failed,
132
+ 'suite': 'riddle-proof.trust-boundary-regression',
133
+ 'case_count': len(results),
134
+ 'failed': failed,
135
+ 'results': results,
136
+ }
137
+ print(json.dumps(payload, indent=2, sort_keys=True))
138
+ if failed:
139
+ raise SystemExit(1)
140
+
141
+
142
+ if __name__ == '__main__':
143
+ main()