@riddledc/riddle-proof 0.8.8 → 0.8.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/dist/adapters/codex-exec-agent.cjs +75 -10
  2. package/dist/adapters/codex-exec-agent.js +1 -1
  3. package/dist/adapters/codex.cjs +75 -10
  4. package/dist/adapters/codex.js +1 -1
  5. package/dist/adapters/local-agent.cjs +75 -10
  6. package/dist/adapters/local-agent.js +1 -1
  7. package/dist/advanced/engine-harness.cjs +12 -0
  8. package/dist/advanced/engine-harness.js +1 -1
  9. package/dist/advanced/index.cjs +12 -0
  10. package/dist/advanced/index.d.cts +2 -2
  11. package/dist/advanced/index.d.ts +2 -2
  12. package/dist/advanced/index.js +1 -1
  13. package/dist/advanced/proof-run-core.d.cts +1 -1
  14. package/dist/advanced/proof-run-core.d.ts +1 -1
  15. package/dist/advanced/proof-run-engine.d.cts +2 -2
  16. package/dist/advanced/proof-run-engine.d.ts +2 -2
  17. package/dist/{chunk-V6VZ3CAI.js → chunk-2PXL3RDB.js} +2 -2
  18. package/dist/{chunk-E7ATYSYS.js → chunk-BBUO7HM4.js} +12 -0
  19. package/dist/{chunk-PYCQNK66.js → chunk-EEIYUZXE.js} +75 -10
  20. package/dist/cli/index.js +3 -3
  21. package/dist/cli.cjs +87 -10
  22. package/dist/cli.js +3 -3
  23. package/dist/codex-exec-agent.cjs +75 -10
  24. package/dist/codex-exec-agent.js +1 -1
  25. package/dist/engine-harness.cjs +12 -0
  26. package/dist/engine-harness.js +1 -1
  27. package/dist/index.cjs +87 -10
  28. package/dist/index.js +2 -2
  29. package/dist/local-agent.cjs +75 -10
  30. package/dist/local-agent.js +1 -1
  31. package/dist/{proof-run-core-CE0jx7wL.d.ts → proof-run-core-Ci9uFxMc.d.cts} +1 -1
  32. package/dist/{proof-run-core-CE0jx7wL.d.cts → proof-run-core-Ci9uFxMc.d.ts} +1 -1
  33. package/dist/proof-run-core.d.cts +1 -1
  34. package/dist/proof-run-core.d.ts +1 -1
  35. package/dist/{proof-run-engine-BlocjMni.d.cts → proof-run-engine-Bd1T43Dy.d.cts} +4 -4
  36. package/dist/{proof-run-engine-C_m8WJmX.d.ts → proof-run-engine-CXyhB-io.d.ts} +4 -4
  37. package/dist/proof-run-engine.d.cts +2 -2
  38. package/dist/proof-run-engine.d.ts +2 -2
  39. package/package.json +2 -2
  40. package/runtime/lib/verify.py +88 -2
  41. package/runtime/tests/recon_verify_smoke.py +147 -24
  42. package/runtime/tests/trust_boundary_regression.py +143 -0
@@ -48,6 +48,8 @@ function compactRecord(input) {
48
48
  }
49
49
 
50
50
  // src/codex-exec-agent.ts
51
+ var DEFAULT_CODEX_TIMEOUT_MS = 6e5;
52
+ var DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS = 18e4;
51
53
  var REFINED_INPUTS_SCHEMA = {
52
54
  type: "object",
53
55
  additionalProperties: false,
@@ -391,6 +393,46 @@ function parseJsonFromRunnerOutputs(outputs, schema) {
391
393
  if (!combined.trim() || seen.has(combined)) return { parsed: null, source: "" };
392
394
  return { parsed: parseJsonObject(combined, schema), source: "combined_output" };
393
395
  }
396
+ function resolveCodexTimeoutMs(config, request) {
397
+ if (typeof config.codexTimeoutMs === "number" && Number.isFinite(config.codexTimeoutMs) && config.codexTimeoutMs > 0) {
398
+ return Number(config.codexTimeoutMs);
399
+ }
400
+ return request.purpose === "proof packet authoring" ? DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS : DEFAULT_CODEX_TIMEOUT_MS;
401
+ }
402
+ function isCodexLifecycleEvent(value) {
403
+ if (!value || typeof value !== "object" || Array.isArray(value)) return false;
404
+ const type = value.type;
405
+ return typeof type === "string" && (type.startsWith("thread.") || type.startsWith("turn.") || type.startsWith("exec.") || type.startsWith("agent.") || type.startsWith("token.") || type.startsWith("reasoning.") || type.startsWith("error."));
406
+ }
407
+ function analyzeCodexRunnerOutput(outputs) {
408
+ const eventTypes = /* @__PURE__ */ new Set();
409
+ let eventLineCount = 0;
410
+ let nonEventLineCount = 0;
411
+ const nonEventSamples = [];
412
+ for (const output of outputs) {
413
+ const lines = output.text.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
414
+ for (const line of lines) {
415
+ try {
416
+ const parsed = JSON.parse(line);
417
+ if (isCodexLifecycleEvent(parsed)) {
418
+ eventLineCount += 1;
419
+ eventTypes.add(parsed.type);
420
+ continue;
421
+ }
422
+ } catch {
423
+ }
424
+ nonEventLineCount += 1;
425
+ if (nonEventSamples.length < 3) nonEventSamples.push(line.slice(0, 240));
426
+ }
427
+ }
428
+ return {
429
+ eventLineCount,
430
+ eventTypes: Array.from(eventTypes),
431
+ nonEventLineCount,
432
+ nonEventSamples,
433
+ onlyLifecycleEvents: eventLineCount > 0 && nonEventLineCount === 0
434
+ };
435
+ }
394
436
  function isHarnessVerificationOnlyBlocker(blocker) {
395
437
  const text = blocker.toLowerCase();
396
438
  return (text.includes("erofs") || text.includes("read-only file system")) && text.includes("node_modules") && (text.includes(".vite-temp") || text.includes("vite.config"));
@@ -414,21 +456,25 @@ function runnerMetrics(input) {
414
456
  exit_status: input.status ?? null,
415
457
  timed_out: input.timedOut || false,
416
458
  error_code: input.errorCode,
459
+ codex_event_types: input.codexEventTypes && input.codexEventTypes.length ? input.codexEventTypes : void 0,
460
+ codex_event_line_count: input.codexEventLineCount,
461
+ codex_non_event_line_count: input.codexNonEventLineCount,
417
462
  codex_command: input.config.codexCommand || "codex",
418
463
  codex_model: input.config.codexModel,
419
464
  codex_sandbox: input.config.codexSandbox || "workspace-write",
420
465
  codex_full_auto: input.config.codexFullAuto !== false,
421
- timeout_ms: Number(input.config.codexTimeoutMs || 6e5)
466
+ timeout_ms: input.timeoutMs ?? DEFAULT_CODEX_TIMEOUT_MS
422
467
  });
423
468
  }
424
469
  function createCodexExecJsonRunner(config = {}) {
425
470
  return (request) => {
426
471
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
427
472
  const startedMs = Date.now();
473
+ const timeoutMs = resolveCodexTimeoutMs(config, request);
428
474
  if (!request.workdir || !(0, import_node_fs.existsSync)(request.workdir)) {
429
475
  return {
430
476
  ok: false,
431
- metrics: runnerMetrics({ request, config, startedAt, startedMs, errorCode: "workdir_missing" }),
477
+ metrics: runnerMetrics({ request, config, startedAt, startedMs, timeoutMs, errorCode: "workdir_missing" }),
432
478
  blocker: {
433
479
  code: "codex_workdir_missing",
434
480
  message: `Codex workdir does not exist for ${request.purpose}.`,
@@ -463,7 +509,7 @@ function createCodexExecJsonRunner(config = {}) {
463
509
  const proc = (0, import_node_child_process.spawnSync)(config.codexCommand || "codex", args, {
464
510
  input: request.prompt,
465
511
  encoding: "utf-8",
466
- timeout: Number(config.codexTimeoutMs || 6e5),
512
+ timeout: timeoutMs,
467
513
  maxBuffer: 10 * 1024 * 1024,
468
514
  env
469
515
  });
@@ -482,6 +528,7 @@ function createCodexExecJsonRunner(config = {}) {
482
528
  stderr: proc.stderr || "",
483
529
  status: proc.status,
484
530
  timedOut,
531
+ timeoutMs,
485
532
  errorCode: proc.error.code || "spawn_error"
486
533
  }),
487
534
  blocker: {
@@ -504,6 +551,7 @@ function createCodexExecJsonRunner(config = {}) {
504
551
  stdout: proc.stdout || "",
505
552
  stderr: proc.stderr || "",
506
553
  status: proc.status,
554
+ timeoutMs,
507
555
  errorCode: "nonzero_exit"
508
556
  }),
509
557
  blocker: {
@@ -516,12 +564,15 @@ function createCodexExecJsonRunner(config = {}) {
516
564
  const finalText = (0, import_node_fs.existsSync)(lastMessagePath) ? (0, import_node_fs.readFileSync)(lastMessagePath, "utf-8") : String(proc.stdout || "");
517
565
  const stdoutText = String(proc.stdout || "");
518
566
  const stderrText = String(proc.stderr || "");
519
- const { parsed, source: parsedJsonSource } = parseJsonFromRunnerOutputs([
567
+ const runnerOutputs = [
520
568
  { source: (0, import_node_fs.existsSync)(lastMessagePath) ? "last_message" : "stdout", text: finalText },
521
569
  { source: "stdout", text: stdoutText },
522
570
  { source: "stderr", text: stderrText }
523
- ], request.schema);
571
+ ];
572
+ const { parsed, source: parsedJsonSource } = parseJsonFromRunnerOutputs(runnerOutputs, request.schema);
524
573
  if (!parsed) {
574
+ const outputAnalysis = analyzeCodexRunnerOutput(runnerOutputs);
575
+ const errorCode = outputAnalysis.onlyLifecycleEvents ? "no_final_response" : "invalid_json";
525
576
  return {
526
577
  ok: false,
527
578
  stdout: stdoutText,
@@ -535,12 +586,24 @@ function createCodexExecJsonRunner(config = {}) {
535
586
  stderr: stderrText,
536
587
  finalText,
537
588
  status: proc.status,
538
- errorCode: "invalid_json"
589
+ timeoutMs,
590
+ errorCode,
591
+ codexEventTypes: outputAnalysis.eventTypes,
592
+ codexEventLineCount: outputAnalysis.eventLineCount,
593
+ codexNonEventLineCount: outputAnalysis.nonEventLineCount
539
594
  }),
540
595
  blocker: {
541
- code: "codex_invalid_json",
542
- message: `Codex completed ${request.purpose}, but did not return valid JSON.`,
543
- details: { finalText, stdout: stdoutText, stderr: stderrText }
596
+ code: outputAnalysis.onlyLifecycleEvents ? "codex_no_final_response" : "codex_invalid_json",
597
+ message: outputAnalysis.onlyLifecycleEvents ? `Codex emitted lifecycle events during ${request.purpose}, but did not produce a final JSON response.` : `Codex completed ${request.purpose}, but did not return valid JSON.`,
598
+ details: {
599
+ finalText,
600
+ stdout: stdoutText,
601
+ stderr: stderrText,
602
+ event_types: outputAnalysis.eventTypes,
603
+ event_line_count: outputAnalysis.eventLineCount,
604
+ non_event_line_count: outputAnalysis.nonEventLineCount,
605
+ non_event_samples: outputAnalysis.nonEventSamples
606
+ }
544
607
  }
545
608
  };
546
609
  }
@@ -558,7 +621,8 @@ function createCodexExecJsonRunner(config = {}) {
558
621
  stderr: stderrText,
559
622
  finalText,
560
623
  parsedJsonSource,
561
- status: proc.status
624
+ status: proc.status,
625
+ timeoutMs
562
626
  })
563
627
  };
564
628
  } finally {
@@ -667,6 +731,7 @@ function createCodexExecAgentAdapter(config = {}, runner = createCodexExecJsonRu
667
731
  "Write a proof_plan and capture_script that will verify the exact user-facing change.",
668
732
  "Use recon_assessment.baseline_understanding as the source of truth. Do not author a proof plan unless it names the observed before state and the requested delta from that state.",
669
733
  "Use the recon-approved route and baseline context; make the plan name the concrete target, expected before state, expected after state, and stop condition.",
734
+ "Do not leave this authoring stage pending for external investigation. Keep any repo inspection brief, do not modify files, and return the JSON proof packet from the available state.",
670
735
  "Choose the evidence modality from verification_mode and success_criteria: screenshots for visual/UI proof, interactions plus screenshots for interaction proof, structured metrics/logs/JSON/audio analysis for non-visual proof.",
671
736
  "For playable/gameplay proof, treat screenshots as supporting artifacts only: start the game, send keyboard or pointer input, measure state before/after, measure non-HUD canvas/playfield pixel deltas across time, and return playability evidence with version riddle-proof.playability.v1.",
672
737
  "For interaction proof, return a structured evidence object with start route/state, terminal route/state, action, assertions, and matched UI text. Catch waitForURL or selector timeouts and record them as failed assertions instead of throwing before evidence is emitted.",
@@ -3,7 +3,7 @@ import {
3
3
  createCodexExecAgentAdapter,
4
4
  createCodexExecJsonRunner,
5
5
  runCodexExecAgentDoctor
6
- } from "../chunk-PYCQNK66.js";
6
+ } from "../chunk-EEIYUZXE.js";
7
7
  import "../chunk-VY4Y5U57.js";
8
8
  import "../chunk-MLKGABMK.js";
9
9
  export {
@@ -48,6 +48,8 @@ function compactRecord(input) {
48
48
  }
49
49
 
50
50
  // src/codex-exec-agent.ts
51
+ var DEFAULT_CODEX_TIMEOUT_MS = 6e5;
52
+ var DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS = 18e4;
51
53
  var REFINED_INPUTS_SCHEMA = {
52
54
  type: "object",
53
55
  additionalProperties: false,
@@ -391,6 +393,46 @@ function parseJsonFromRunnerOutputs(outputs, schema) {
391
393
  if (!combined.trim() || seen.has(combined)) return { parsed: null, source: "" };
392
394
  return { parsed: parseJsonObject(combined, schema), source: "combined_output" };
393
395
  }
396
+ function resolveCodexTimeoutMs(config, request) {
397
+ if (typeof config.codexTimeoutMs === "number" && Number.isFinite(config.codexTimeoutMs) && config.codexTimeoutMs > 0) {
398
+ return Number(config.codexTimeoutMs);
399
+ }
400
+ return request.purpose === "proof packet authoring" ? DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS : DEFAULT_CODEX_TIMEOUT_MS;
401
+ }
402
+ function isCodexLifecycleEvent(value) {
403
+ if (!value || typeof value !== "object" || Array.isArray(value)) return false;
404
+ const type = value.type;
405
+ return typeof type === "string" && (type.startsWith("thread.") || type.startsWith("turn.") || type.startsWith("exec.") || type.startsWith("agent.") || type.startsWith("token.") || type.startsWith("reasoning.") || type.startsWith("error."));
406
+ }
407
+ function analyzeCodexRunnerOutput(outputs) {
408
+ const eventTypes = /* @__PURE__ */ new Set();
409
+ let eventLineCount = 0;
410
+ let nonEventLineCount = 0;
411
+ const nonEventSamples = [];
412
+ for (const output of outputs) {
413
+ const lines = output.text.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
414
+ for (const line of lines) {
415
+ try {
416
+ const parsed = JSON.parse(line);
417
+ if (isCodexLifecycleEvent(parsed)) {
418
+ eventLineCount += 1;
419
+ eventTypes.add(parsed.type);
420
+ continue;
421
+ }
422
+ } catch {
423
+ }
424
+ nonEventLineCount += 1;
425
+ if (nonEventSamples.length < 3) nonEventSamples.push(line.slice(0, 240));
426
+ }
427
+ }
428
+ return {
429
+ eventLineCount,
430
+ eventTypes: Array.from(eventTypes),
431
+ nonEventLineCount,
432
+ nonEventSamples,
433
+ onlyLifecycleEvents: eventLineCount > 0 && nonEventLineCount === 0
434
+ };
435
+ }
394
436
  function isHarnessVerificationOnlyBlocker(blocker) {
395
437
  const text = blocker.toLowerCase();
396
438
  return (text.includes("erofs") || text.includes("read-only file system")) && text.includes("node_modules") && (text.includes(".vite-temp") || text.includes("vite.config"));
@@ -414,21 +456,25 @@ function runnerMetrics(input) {
414
456
  exit_status: input.status ?? null,
415
457
  timed_out: input.timedOut || false,
416
458
  error_code: input.errorCode,
459
+ codex_event_types: input.codexEventTypes && input.codexEventTypes.length ? input.codexEventTypes : void 0,
460
+ codex_event_line_count: input.codexEventLineCount,
461
+ codex_non_event_line_count: input.codexNonEventLineCount,
417
462
  codex_command: input.config.codexCommand || "codex",
418
463
  codex_model: input.config.codexModel,
419
464
  codex_sandbox: input.config.codexSandbox || "workspace-write",
420
465
  codex_full_auto: input.config.codexFullAuto !== false,
421
- timeout_ms: Number(input.config.codexTimeoutMs || 6e5)
466
+ timeout_ms: input.timeoutMs ?? DEFAULT_CODEX_TIMEOUT_MS
422
467
  });
423
468
  }
424
469
  function createCodexExecJsonRunner(config = {}) {
425
470
  return (request) => {
426
471
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
427
472
  const startedMs = Date.now();
473
+ const timeoutMs = resolveCodexTimeoutMs(config, request);
428
474
  if (!request.workdir || !(0, import_node_fs.existsSync)(request.workdir)) {
429
475
  return {
430
476
  ok: false,
431
- metrics: runnerMetrics({ request, config, startedAt, startedMs, errorCode: "workdir_missing" }),
477
+ metrics: runnerMetrics({ request, config, startedAt, startedMs, timeoutMs, errorCode: "workdir_missing" }),
432
478
  blocker: {
433
479
  code: "codex_workdir_missing",
434
480
  message: `Codex workdir does not exist for ${request.purpose}.`,
@@ -463,7 +509,7 @@ function createCodexExecJsonRunner(config = {}) {
463
509
  const proc = (0, import_node_child_process.spawnSync)(config.codexCommand || "codex", args, {
464
510
  input: request.prompt,
465
511
  encoding: "utf-8",
466
- timeout: Number(config.codexTimeoutMs || 6e5),
512
+ timeout: timeoutMs,
467
513
  maxBuffer: 10 * 1024 * 1024,
468
514
  env
469
515
  });
@@ -482,6 +528,7 @@ function createCodexExecJsonRunner(config = {}) {
482
528
  stderr: proc.stderr || "",
483
529
  status: proc.status,
484
530
  timedOut,
531
+ timeoutMs,
485
532
  errorCode: proc.error.code || "spawn_error"
486
533
  }),
487
534
  blocker: {
@@ -504,6 +551,7 @@ function createCodexExecJsonRunner(config = {}) {
504
551
  stdout: proc.stdout || "",
505
552
  stderr: proc.stderr || "",
506
553
  status: proc.status,
554
+ timeoutMs,
507
555
  errorCode: "nonzero_exit"
508
556
  }),
509
557
  blocker: {
@@ -516,12 +564,15 @@ function createCodexExecJsonRunner(config = {}) {
516
564
  const finalText = (0, import_node_fs.existsSync)(lastMessagePath) ? (0, import_node_fs.readFileSync)(lastMessagePath, "utf-8") : String(proc.stdout || "");
517
565
  const stdoutText = String(proc.stdout || "");
518
566
  const stderrText = String(proc.stderr || "");
519
- const { parsed, source: parsedJsonSource } = parseJsonFromRunnerOutputs([
567
+ const runnerOutputs = [
520
568
  { source: (0, import_node_fs.existsSync)(lastMessagePath) ? "last_message" : "stdout", text: finalText },
521
569
  { source: "stdout", text: stdoutText },
522
570
  { source: "stderr", text: stderrText }
523
- ], request.schema);
571
+ ];
572
+ const { parsed, source: parsedJsonSource } = parseJsonFromRunnerOutputs(runnerOutputs, request.schema);
524
573
  if (!parsed) {
574
+ const outputAnalysis = analyzeCodexRunnerOutput(runnerOutputs);
575
+ const errorCode = outputAnalysis.onlyLifecycleEvents ? "no_final_response" : "invalid_json";
525
576
  return {
526
577
  ok: false,
527
578
  stdout: stdoutText,
@@ -535,12 +586,24 @@ function createCodexExecJsonRunner(config = {}) {
535
586
  stderr: stderrText,
536
587
  finalText,
537
588
  status: proc.status,
538
- errorCode: "invalid_json"
589
+ timeoutMs,
590
+ errorCode,
591
+ codexEventTypes: outputAnalysis.eventTypes,
592
+ codexEventLineCount: outputAnalysis.eventLineCount,
593
+ codexNonEventLineCount: outputAnalysis.nonEventLineCount
539
594
  }),
540
595
  blocker: {
541
- code: "codex_invalid_json",
542
- message: `Codex completed ${request.purpose}, but did not return valid JSON.`,
543
- details: { finalText, stdout: stdoutText, stderr: stderrText }
596
+ code: outputAnalysis.onlyLifecycleEvents ? "codex_no_final_response" : "codex_invalid_json",
597
+ message: outputAnalysis.onlyLifecycleEvents ? `Codex emitted lifecycle events during ${request.purpose}, but did not produce a final JSON response.` : `Codex completed ${request.purpose}, but did not return valid JSON.`,
598
+ details: {
599
+ finalText,
600
+ stdout: stdoutText,
601
+ stderr: stderrText,
602
+ event_types: outputAnalysis.eventTypes,
603
+ event_line_count: outputAnalysis.eventLineCount,
604
+ non_event_line_count: outputAnalysis.nonEventLineCount,
605
+ non_event_samples: outputAnalysis.nonEventSamples
606
+ }
544
607
  }
545
608
  };
546
609
  }
@@ -558,7 +621,8 @@ function createCodexExecJsonRunner(config = {}) {
558
621
  stderr: stderrText,
559
622
  finalText,
560
623
  parsedJsonSource,
561
- status: proc.status
624
+ status: proc.status,
625
+ timeoutMs
562
626
  })
563
627
  };
564
628
  } finally {
@@ -667,6 +731,7 @@ function createCodexExecAgentAdapter(config = {}, runner = createCodexExecJsonRu
667
731
  "Write a proof_plan and capture_script that will verify the exact user-facing change.",
668
732
  "Use recon_assessment.baseline_understanding as the source of truth. Do not author a proof plan unless it names the observed before state and the requested delta from that state.",
669
733
  "Use the recon-approved route and baseline context; make the plan name the concrete target, expected before state, expected after state, and stop condition.",
734
+ "Do not leave this authoring stage pending for external investigation. Keep any repo inspection brief, do not modify files, and return the JSON proof packet from the available state.",
670
735
  "Choose the evidence modality from verification_mode and success_criteria: screenshots for visual/UI proof, interactions plus screenshots for interaction proof, structured metrics/logs/JSON/audio analysis for non-visual proof.",
671
736
  "For playable/gameplay proof, treat screenshots as supporting artifacts only: start the game, send keyboard or pointer input, measure state before/after, measure non-HUD canvas/playfield pixel deltas across time, and return playability evidence with version riddle-proof.playability.v1.",
672
737
  "For interaction proof, return a structured evidence object with start route/state, terminal route/state, action, assertions, and matched UI text. Catch waitForURL or selector timeouts and record them as failed assertions instead of throwing before evidence is emitted.",
@@ -3,7 +3,7 @@ import {
3
3
  createCodexExecAgentAdapter,
4
4
  createCodexExecJsonRunner,
5
5
  runCodexExecAgentDoctor
6
- } from "../chunk-PYCQNK66.js";
6
+ } from "../chunk-EEIYUZXE.js";
7
7
  import "../chunk-VY4Y5U57.js";
8
8
  import "../chunk-MLKGABMK.js";
9
9
  export {
@@ -48,6 +48,8 @@ function compactRecord(input) {
48
48
  }
49
49
 
50
50
  // src/codex-exec-agent.ts
51
+ var DEFAULT_CODEX_TIMEOUT_MS = 6e5;
52
+ var DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS = 18e4;
51
53
  var REFINED_INPUTS_SCHEMA = {
52
54
  type: "object",
53
55
  additionalProperties: false,
@@ -391,6 +393,46 @@ function parseJsonFromRunnerOutputs(outputs, schema) {
391
393
  if (!combined.trim() || seen.has(combined)) return { parsed: null, source: "" };
392
394
  return { parsed: parseJsonObject(combined, schema), source: "combined_output" };
393
395
  }
396
+ function resolveCodexTimeoutMs(config, request) {
397
+ if (typeof config.codexTimeoutMs === "number" && Number.isFinite(config.codexTimeoutMs) && config.codexTimeoutMs > 0) {
398
+ return Number(config.codexTimeoutMs);
399
+ }
400
+ return request.purpose === "proof packet authoring" ? DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS : DEFAULT_CODEX_TIMEOUT_MS;
401
+ }
402
+ function isCodexLifecycleEvent(value) {
403
+ if (!value || typeof value !== "object" || Array.isArray(value)) return false;
404
+ const type = value.type;
405
+ return typeof type === "string" && (type.startsWith("thread.") || type.startsWith("turn.") || type.startsWith("exec.") || type.startsWith("agent.") || type.startsWith("token.") || type.startsWith("reasoning.") || type.startsWith("error."));
406
+ }
407
+ function analyzeCodexRunnerOutput(outputs) {
408
+ const eventTypes = /* @__PURE__ */ new Set();
409
+ let eventLineCount = 0;
410
+ let nonEventLineCount = 0;
411
+ const nonEventSamples = [];
412
+ for (const output of outputs) {
413
+ const lines = output.text.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
414
+ for (const line of lines) {
415
+ try {
416
+ const parsed = JSON.parse(line);
417
+ if (isCodexLifecycleEvent(parsed)) {
418
+ eventLineCount += 1;
419
+ eventTypes.add(parsed.type);
420
+ continue;
421
+ }
422
+ } catch {
423
+ }
424
+ nonEventLineCount += 1;
425
+ if (nonEventSamples.length < 3) nonEventSamples.push(line.slice(0, 240));
426
+ }
427
+ }
428
+ return {
429
+ eventLineCount,
430
+ eventTypes: Array.from(eventTypes),
431
+ nonEventLineCount,
432
+ nonEventSamples,
433
+ onlyLifecycleEvents: eventLineCount > 0 && nonEventLineCount === 0
434
+ };
435
+ }
394
436
  function isHarnessVerificationOnlyBlocker(blocker) {
395
437
  const text = blocker.toLowerCase();
396
438
  return (text.includes("erofs") || text.includes("read-only file system")) && text.includes("node_modules") && (text.includes(".vite-temp") || text.includes("vite.config"));
@@ -414,21 +456,25 @@ function runnerMetrics(input) {
414
456
  exit_status: input.status ?? null,
415
457
  timed_out: input.timedOut || false,
416
458
  error_code: input.errorCode,
459
+ codex_event_types: input.codexEventTypes && input.codexEventTypes.length ? input.codexEventTypes : void 0,
460
+ codex_event_line_count: input.codexEventLineCount,
461
+ codex_non_event_line_count: input.codexNonEventLineCount,
417
462
  codex_command: input.config.codexCommand || "codex",
418
463
  codex_model: input.config.codexModel,
419
464
  codex_sandbox: input.config.codexSandbox || "workspace-write",
420
465
  codex_full_auto: input.config.codexFullAuto !== false,
421
- timeout_ms: Number(input.config.codexTimeoutMs || 6e5)
466
+ timeout_ms: input.timeoutMs ?? DEFAULT_CODEX_TIMEOUT_MS
422
467
  });
423
468
  }
424
469
  function createCodexExecJsonRunner(config = {}) {
425
470
  return (request) => {
426
471
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
427
472
  const startedMs = Date.now();
473
+ const timeoutMs = resolveCodexTimeoutMs(config, request);
428
474
  if (!request.workdir || !(0, import_node_fs.existsSync)(request.workdir)) {
429
475
  return {
430
476
  ok: false,
431
- metrics: runnerMetrics({ request, config, startedAt, startedMs, errorCode: "workdir_missing" }),
477
+ metrics: runnerMetrics({ request, config, startedAt, startedMs, timeoutMs, errorCode: "workdir_missing" }),
432
478
  blocker: {
433
479
  code: "codex_workdir_missing",
434
480
  message: `Codex workdir does not exist for ${request.purpose}.`,
@@ -463,7 +509,7 @@ function createCodexExecJsonRunner(config = {}) {
463
509
  const proc = (0, import_node_child_process.spawnSync)(config.codexCommand || "codex", args, {
464
510
  input: request.prompt,
465
511
  encoding: "utf-8",
466
- timeout: Number(config.codexTimeoutMs || 6e5),
512
+ timeout: timeoutMs,
467
513
  maxBuffer: 10 * 1024 * 1024,
468
514
  env
469
515
  });
@@ -482,6 +528,7 @@ function createCodexExecJsonRunner(config = {}) {
482
528
  stderr: proc.stderr || "",
483
529
  status: proc.status,
484
530
  timedOut,
531
+ timeoutMs,
485
532
  errorCode: proc.error.code || "spawn_error"
486
533
  }),
487
534
  blocker: {
@@ -504,6 +551,7 @@ function createCodexExecJsonRunner(config = {}) {
504
551
  stdout: proc.stdout || "",
505
552
  stderr: proc.stderr || "",
506
553
  status: proc.status,
554
+ timeoutMs,
507
555
  errorCode: "nonzero_exit"
508
556
  }),
509
557
  blocker: {
@@ -516,12 +564,15 @@ function createCodexExecJsonRunner(config = {}) {
516
564
  const finalText = (0, import_node_fs.existsSync)(lastMessagePath) ? (0, import_node_fs.readFileSync)(lastMessagePath, "utf-8") : String(proc.stdout || "");
517
565
  const stdoutText = String(proc.stdout || "");
518
566
  const stderrText = String(proc.stderr || "");
519
- const { parsed, source: parsedJsonSource } = parseJsonFromRunnerOutputs([
567
+ const runnerOutputs = [
520
568
  { source: (0, import_node_fs.existsSync)(lastMessagePath) ? "last_message" : "stdout", text: finalText },
521
569
  { source: "stdout", text: stdoutText },
522
570
  { source: "stderr", text: stderrText }
523
- ], request.schema);
571
+ ];
572
+ const { parsed, source: parsedJsonSource } = parseJsonFromRunnerOutputs(runnerOutputs, request.schema);
524
573
  if (!parsed) {
574
+ const outputAnalysis = analyzeCodexRunnerOutput(runnerOutputs);
575
+ const errorCode = outputAnalysis.onlyLifecycleEvents ? "no_final_response" : "invalid_json";
525
576
  return {
526
577
  ok: false,
527
578
  stdout: stdoutText,
@@ -535,12 +586,24 @@ function createCodexExecJsonRunner(config = {}) {
535
586
  stderr: stderrText,
536
587
  finalText,
537
588
  status: proc.status,
538
- errorCode: "invalid_json"
589
+ timeoutMs,
590
+ errorCode,
591
+ codexEventTypes: outputAnalysis.eventTypes,
592
+ codexEventLineCount: outputAnalysis.eventLineCount,
593
+ codexNonEventLineCount: outputAnalysis.nonEventLineCount
539
594
  }),
540
595
  blocker: {
541
- code: "codex_invalid_json",
542
- message: `Codex completed ${request.purpose}, but did not return valid JSON.`,
543
- details: { finalText, stdout: stdoutText, stderr: stderrText }
596
+ code: outputAnalysis.onlyLifecycleEvents ? "codex_no_final_response" : "codex_invalid_json",
597
+ message: outputAnalysis.onlyLifecycleEvents ? `Codex emitted lifecycle events during ${request.purpose}, but did not produce a final JSON response.` : `Codex completed ${request.purpose}, but did not return valid JSON.`,
598
+ details: {
599
+ finalText,
600
+ stdout: stdoutText,
601
+ stderr: stderrText,
602
+ event_types: outputAnalysis.eventTypes,
603
+ event_line_count: outputAnalysis.eventLineCount,
604
+ non_event_line_count: outputAnalysis.nonEventLineCount,
605
+ non_event_samples: outputAnalysis.nonEventSamples
606
+ }
544
607
  }
545
608
  };
546
609
  }
@@ -558,7 +621,8 @@ function createCodexExecJsonRunner(config = {}) {
558
621
  stderr: stderrText,
559
622
  finalText,
560
623
  parsedJsonSource,
561
- status: proc.status
624
+ status: proc.status,
625
+ timeoutMs
562
626
  })
563
627
  };
564
628
  } finally {
@@ -667,6 +731,7 @@ function createCodexExecAgentAdapter(config = {}, runner = createCodexExecJsonRu
667
731
  "Write a proof_plan and capture_script that will verify the exact user-facing change.",
668
732
  "Use recon_assessment.baseline_understanding as the source of truth. Do not author a proof plan unless it names the observed before state and the requested delta from that state.",
669
733
  "Use the recon-approved route and baseline context; make the plan name the concrete target, expected before state, expected after state, and stop condition.",
734
+ "Do not leave this authoring stage pending for external investigation. Keep any repo inspection brief, do not modify files, and return the JSON proof packet from the available state.",
670
735
  "Choose the evidence modality from verification_mode and success_criteria: screenshots for visual/UI proof, interactions plus screenshots for interaction proof, structured metrics/logs/JSON/audio analysis for non-visual proof.",
671
736
  "For playable/gameplay proof, treat screenshots as supporting artifacts only: start the game, send keyboard or pointer input, measure state before/after, measure non-HUD canvas/playfield pixel deltas across time, and return playability evidence with version riddle-proof.playability.v1.",
672
737
  "For interaction proof, return a structured evidence object with start route/state, terminal route/state, action, assertions, and matched UI text. Catch waitForURL or selector timeouts and record them as failed assertions instead of throwing before evidence is emitted.",
@@ -3,7 +3,7 @@ import {
3
3
  createCodexExecAgentAdapter,
4
4
  createCodexExecJsonRunner,
5
5
  runCodexExecAgentDoctor
6
- } from "../chunk-PYCQNK66.js";
6
+ } from "../chunk-EEIYUZXE.js";
7
7
  import "../chunk-VY4Y5U57.js";
8
8
  import "../chunk-MLKGABMK.js";
9
9
  export {
@@ -5587,6 +5587,18 @@ async function routeCheckpoint(request, state, result, agent, input) {
5587
5587
  if (checkpoint === "verify_agent_retry") {
5588
5588
  const next = recommendedContinuation(result);
5589
5589
  if (next) return { next };
5590
+ return {
5591
+ blocker: {
5592
+ code: "proof_assessment_blocked",
5593
+ checkpoint,
5594
+ message: result.summary || "The supervising proof assessment did not approve shipping and did not provide a safe retry continuation.",
5595
+ details: compactRecord({
5596
+ proofAssessment: result.proofAssessment || result.checkpointContract?.proof_assessment || recordValue(result.raw)?.proofAssessment || null,
5597
+ verifyDecisionRequest: result.verifyDecisionRequest || result.checkpointContract?.verify_decision_request || null,
5598
+ checkpointContract: result.checkpointContract || null
5599
+ })
5600
+ }
5601
+ };
5590
5602
  }
5591
5603
  if (checkpoint === "awaiting_stage_advance") {
5592
5604
  const next = recommendedContinuation(result) || defaultAwaitingStageContinuation(result);
@@ -2,7 +2,7 @@ import {
2
2
  createDisabledRiddleProofAgentAdapter,
3
3
  readRiddleProofRunStatus,
4
4
  runRiddleProofEngineHarness
5
- } from "../chunk-E7ATYSYS.js";
5
+ } from "../chunk-BBUO7HM4.js";
6
6
  import "../chunk-YZUVEJ5B.js";
7
7
  import "../chunk-FMOYUYH2.js";
8
8
  import "../chunk-5N5QFI2S.js";
@@ -6123,6 +6123,18 @@ async function routeCheckpoint(request, state, result, agent, input) {
6123
6123
  if (checkpoint === "verify_agent_retry") {
6124
6124
  const next = recommendedContinuation(result);
6125
6125
  if (next) return { next };
6126
+ return {
6127
+ blocker: {
6128
+ code: "proof_assessment_blocked",
6129
+ checkpoint,
6130
+ message: result.summary || "The supervising proof assessment did not approve shipping and did not provide a safe retry continuation.",
6131
+ details: compactRecord({
6132
+ proofAssessment: result.proofAssessment || result.checkpointContract?.proof_assessment || recordValue(result.raw)?.proofAssessment || null,
6133
+ verifyDecisionRequest: result.verifyDecisionRequest || result.checkpointContract?.verify_decision_request || null,
6134
+ checkpointContract: result.checkpointContract || null
6135
+ })
6136
+ }
6137
+ };
6126
6138
  }
6127
6139
  if (checkpoint === "awaiting_stage_advance") {
6128
6140
  const next = recommendedContinuation(result) || defaultAwaitingStageContinuation(result);
@@ -1,5 +1,5 @@
1
1
  export { b as runner } from '../runner-4LJ5z0D-.cjs';
2
2
  export { l as engineHarness } from '../engine-harness-LBfqbFSe.cjs';
3
- export { p as proofRunCore } from '../proof-run-core-CE0jx7wL.cjs';
4
- export { p as proofRunEngine } from '../proof-run-engine-BlocjMni.cjs';
3
+ export { p as proofRunCore } from '../proof-run-core-Ci9uFxMc.cjs';
4
+ export { p as proofRunEngine } from '../proof-run-engine-Bd1T43Dy.cjs';
5
5
  import '../types.cjs';
@@ -1,5 +1,5 @@
1
1
  export { b as runner } from '../runner-BdQpOkZD.js';
2
2
  export { l as engineHarness } from '../engine-harness-CMACHP6A.js';
3
- export { p as proofRunCore } from '../proof-run-core-CE0jx7wL.js';
4
- export { p as proofRunEngine } from '../proof-run-engine-C_m8WJmX.js';
3
+ export { p as proofRunCore } from '../proof-run-core-Ci9uFxMc.js';
4
+ export { p as proofRunEngine } from '../proof-run-engine-CXyhB-io.js';
5
5
  import '../types.js';
@@ -6,7 +6,7 @@ import {
6
6
  } from "../chunk-5N6MQCLC.js";
7
7
  import {
8
8
  engine_harness_exports
9
- } from "../chunk-E7ATYSYS.js";
9
+ } from "../chunk-BBUO7HM4.js";
10
10
  import "../chunk-YZUVEJ5B.js";
11
11
  import "../chunk-FMOYUYH2.js";
12
12
  import {
@@ -1 +1 @@
1
- export { B as BUNDLED_RIDDLE_PROOF_DIR, C as CHECKPOINT_CONTRACT_VERSION, b as CheckpointInputContract, P as PluginConfig, R as RIDDLE_PROOF_DIR_CANDIDATES, S as ShipGateValidation, c as WORKFLOW_STAGE_ORDER, d as WorkflowAction, W as WorkflowParams, a as WorkflowStage, e as buildCheckpointContract, f as buildSetupArgs, g as checkpointContinueStage, h as clearStageDecisionRequest, i as ensureAction, j as ensureStageLoopState, k as invalidateVerifyEvidence, m as mergeStateFromParams, n as noImplementationModeFor, l as previewModeFromWorkflowMode, o as readState, q as recordStageAttempt, s as requiredBaselineLabelsForState, r as resolveConfig, t as resolveRiddleProofDir, u as setStageDecisionRequest, v as summarizeState, w as validateShipGate, x as visualDeltaForState, y as visualDeltaRequiredForState, z as visualDeltaShipGateReason, A as workflowFile, D as writeState } from '../proof-run-core-CE0jx7wL.cjs';
1
+ export { B as BUNDLED_RIDDLE_PROOF_DIR, C as CHECKPOINT_CONTRACT_VERSION, b as CheckpointInputContract, P as PluginConfig, R as RIDDLE_PROOF_DIR_CANDIDATES, S as ShipGateValidation, c as WORKFLOW_STAGE_ORDER, d as WorkflowAction, W as WorkflowParams, a as WorkflowStage, e as buildCheckpointContract, f as buildSetupArgs, g as checkpointContinueStage, h as clearStageDecisionRequest, i as ensureAction, j as ensureStageLoopState, k as invalidateVerifyEvidence, m as mergeStateFromParams, n as noImplementationModeFor, l as previewModeFromWorkflowMode, o as readState, q as recordStageAttempt, s as requiredBaselineLabelsForState, r as resolveConfig, t as resolveRiddleProofDir, u as setStageDecisionRequest, v as summarizeState, w as validateShipGate, x as visualDeltaForState, y as visualDeltaRequiredForState, z as visualDeltaShipGateReason, A as workflowFile, D as writeState } from '../proof-run-core-Ci9uFxMc.cjs';
@@ -1 +1 @@
1
- export { B as BUNDLED_RIDDLE_PROOF_DIR, C as CHECKPOINT_CONTRACT_VERSION, b as CheckpointInputContract, P as PluginConfig, R as RIDDLE_PROOF_DIR_CANDIDATES, S as ShipGateValidation, c as WORKFLOW_STAGE_ORDER, d as WorkflowAction, W as WorkflowParams, a as WorkflowStage, e as buildCheckpointContract, f as buildSetupArgs, g as checkpointContinueStage, h as clearStageDecisionRequest, i as ensureAction, j as ensureStageLoopState, k as invalidateVerifyEvidence, m as mergeStateFromParams, n as noImplementationModeFor, l as previewModeFromWorkflowMode, o as readState, q as recordStageAttempt, s as requiredBaselineLabelsForState, r as resolveConfig, t as resolveRiddleProofDir, u as setStageDecisionRequest, v as summarizeState, w as validateShipGate, x as visualDeltaForState, y as visualDeltaRequiredForState, z as visualDeltaShipGateReason, A as workflowFile, D as writeState } from '../proof-run-core-CE0jx7wL.js';
1
+ export { B as BUNDLED_RIDDLE_PROOF_DIR, C as CHECKPOINT_CONTRACT_VERSION, b as CheckpointInputContract, P as PluginConfig, R as RIDDLE_PROOF_DIR_CANDIDATES, S as ShipGateValidation, c as WORKFLOW_STAGE_ORDER, d as WorkflowAction, W as WorkflowParams, a as WorkflowStage, e as buildCheckpointContract, f as buildSetupArgs, g as checkpointContinueStage, h as clearStageDecisionRequest, i as ensureAction, j as ensureStageLoopState, k as invalidateVerifyEvidence, m as mergeStateFromParams, n as noImplementationModeFor, l as previewModeFromWorkflowMode, o as readState, q as recordStageAttempt, s as requiredBaselineLabelsForState, r as resolveConfig, t as resolveRiddleProofDir, u as setStageDecisionRequest, v as summarizeState, w as validateShipGate, x as visualDeltaForState, y as visualDeltaRequiredForState, z as visualDeltaShipGateReason, A as workflowFile, D as writeState } from '../proof-run-core-Ci9uFxMc.js';
@@ -1,2 +1,2 @@
1
- export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from '../proof-run-engine-BlocjMni.cjs';
2
- import '../proof-run-core-CE0jx7wL.cjs';
1
+ export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from '../proof-run-engine-Bd1T43Dy.cjs';
2
+ import '../proof-run-core-Ci9uFxMc.cjs';
@@ -1,2 +1,2 @@
1
- export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from '../proof-run-engine-C_m8WJmX.js';
2
- import '../proof-run-core-CE0jx7wL.js';
1
+ export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from '../proof-run-engine-CXyhB-io.js';
2
+ import '../proof-run-core-Ci9uFxMc.js';