@riddledc/riddle-proof 0.8.8 → 0.8.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/dist/adapters/codex-exec-agent.cjs +75 -10
  2. package/dist/adapters/codex-exec-agent.js +1 -1
  3. package/dist/adapters/codex.cjs +75 -10
  4. package/dist/adapters/codex.js +1 -1
  5. package/dist/adapters/local-agent.cjs +75 -10
  6. package/dist/adapters/local-agent.js +1 -1
  7. package/dist/advanced/engine-harness.cjs +12 -0
  8. package/dist/advanced/engine-harness.js +1 -1
  9. package/dist/advanced/index.cjs +12 -0
  10. package/dist/advanced/index.d.cts +2 -2
  11. package/dist/advanced/index.d.ts +2 -2
  12. package/dist/advanced/index.js +1 -1
  13. package/dist/advanced/proof-run-core.d.cts +1 -1
  14. package/dist/advanced/proof-run-core.d.ts +1 -1
  15. package/dist/advanced/proof-run-engine.d.cts +2 -2
  16. package/dist/advanced/proof-run-engine.d.ts +2 -2
  17. package/dist/{chunk-V6VZ3CAI.js → chunk-2PXL3RDB.js} +2 -2
  18. package/dist/{chunk-E7ATYSYS.js → chunk-BBUO7HM4.js} +12 -0
  19. package/dist/{chunk-PYCQNK66.js → chunk-EEIYUZXE.js} +75 -10
  20. package/dist/cli/index.js +3 -3
  21. package/dist/cli.cjs +87 -10
  22. package/dist/cli.js +3 -3
  23. package/dist/codex-exec-agent.cjs +75 -10
  24. package/dist/codex-exec-agent.js +1 -1
  25. package/dist/engine-harness.cjs +12 -0
  26. package/dist/engine-harness.js +1 -1
  27. package/dist/index.cjs +87 -10
  28. package/dist/index.js +2 -2
  29. package/dist/local-agent.cjs +75 -10
  30. package/dist/local-agent.js +1 -1
  31. package/dist/{proof-run-core-CE0jx7wL.d.ts → proof-run-core-Ci9uFxMc.d.cts} +1 -1
  32. package/dist/{proof-run-core-CE0jx7wL.d.cts → proof-run-core-Ci9uFxMc.d.ts} +1 -1
  33. package/dist/proof-run-core.d.cts +1 -1
  34. package/dist/proof-run-core.d.ts +1 -1
  35. package/dist/{proof-run-engine-BlocjMni.d.cts → proof-run-engine-Bd1T43Dy.d.cts} +4 -4
  36. package/dist/{proof-run-engine-C_m8WJmX.d.ts → proof-run-engine-CXyhB-io.d.ts} +4 -4
  37. package/dist/proof-run-engine.d.cts +2 -2
  38. package/dist/proof-run-engine.d.ts +2 -2
  39. package/package.json +2 -2
  40. package/runtime/lib/verify.py +88 -2
  41. package/runtime/tests/recon_verify_smoke.py +147 -24
  42. package/runtime/tests/trust_boundary_regression.py +143 -0
@@ -22,14 +22,14 @@ import {
22
22
  createDisabledRiddleProofAgentAdapter,
23
23
  readRiddleProofRunStatus,
24
24
  runRiddleProofEngineHarness
25
- } from "./chunk-E7ATYSYS.js";
25
+ } from "./chunk-BBUO7HM4.js";
26
26
  import {
27
27
  createCheckpointResponseTemplate
28
28
  } from "./chunk-4FOHZ7JG.js";
29
29
  import {
30
30
  createCodexExecAgentAdapter,
31
31
  runCodexExecAgentDoctor
32
- } from "./chunk-PYCQNK66.js";
32
+ } from "./chunk-EEIYUZXE.js";
33
33
 
34
34
  // src/cli.ts
35
35
  import { existsSync, mkdirSync, readdirSync, readFileSync, statSync, writeFileSync } from "fs";
@@ -1331,6 +1331,18 @@ async function routeCheckpoint(request, state, result, agent, input) {
1331
1331
  if (checkpoint === "verify_agent_retry") {
1332
1332
  const next = recommendedContinuation(result);
1333
1333
  if (next) return { next };
1334
+ return {
1335
+ blocker: {
1336
+ code: "proof_assessment_blocked",
1337
+ checkpoint,
1338
+ message: result.summary || "The supervising proof assessment did not approve shipping and did not provide a safe retry continuation.",
1339
+ details: compactRecord({
1340
+ proofAssessment: result.proofAssessment || result.checkpointContract?.proof_assessment || recordValue(result.raw)?.proofAssessment || null,
1341
+ verifyDecisionRequest: result.verifyDecisionRequest || result.checkpointContract?.verify_decision_request || null,
1342
+ checkpointContract: result.checkpointContract || null
1343
+ })
1344
+ }
1345
+ };
1334
1346
  }
1335
1347
  if (checkpoint === "awaiting_stage_advance") {
1336
1348
  const next = recommendedContinuation(result) || defaultAwaitingStageContinuation(result);
@@ -7,6 +7,8 @@ import { execFileSync, spawnSync } from "child_process";
7
7
  import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "fs";
8
8
  import os from "os";
9
9
  import path from "path";
10
+ var DEFAULT_CODEX_TIMEOUT_MS = 6e5;
11
+ var DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS = 18e4;
10
12
  var REFINED_INPUTS_SCHEMA = {
11
13
  type: "object",
12
14
  additionalProperties: false,
@@ -350,6 +352,46 @@ function parseJsonFromRunnerOutputs(outputs, schema) {
350
352
  if (!combined.trim() || seen.has(combined)) return { parsed: null, source: "" };
351
353
  return { parsed: parseJsonObject(combined, schema), source: "combined_output" };
352
354
  }
355
+ function resolveCodexTimeoutMs(config, request) {
356
+ if (typeof config.codexTimeoutMs === "number" && Number.isFinite(config.codexTimeoutMs) && config.codexTimeoutMs > 0) {
357
+ return Number(config.codexTimeoutMs);
358
+ }
359
+ return request.purpose === "proof packet authoring" ? DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS : DEFAULT_CODEX_TIMEOUT_MS;
360
+ }
361
+ function isCodexLifecycleEvent(value) {
362
+ if (!value || typeof value !== "object" || Array.isArray(value)) return false;
363
+ const type = value.type;
364
+ return typeof type === "string" && (type.startsWith("thread.") || type.startsWith("turn.") || type.startsWith("exec.") || type.startsWith("agent.") || type.startsWith("token.") || type.startsWith("reasoning.") || type.startsWith("error."));
365
+ }
366
+ function analyzeCodexRunnerOutput(outputs) {
367
+ const eventTypes = /* @__PURE__ */ new Set();
368
+ let eventLineCount = 0;
369
+ let nonEventLineCount = 0;
370
+ const nonEventSamples = [];
371
+ for (const output of outputs) {
372
+ const lines = output.text.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
373
+ for (const line of lines) {
374
+ try {
375
+ const parsed = JSON.parse(line);
376
+ if (isCodexLifecycleEvent(parsed)) {
377
+ eventLineCount += 1;
378
+ eventTypes.add(parsed.type);
379
+ continue;
380
+ }
381
+ } catch {
382
+ }
383
+ nonEventLineCount += 1;
384
+ if (nonEventSamples.length < 3) nonEventSamples.push(line.slice(0, 240));
385
+ }
386
+ }
387
+ return {
388
+ eventLineCount,
389
+ eventTypes: Array.from(eventTypes),
390
+ nonEventLineCount,
391
+ nonEventSamples,
392
+ onlyLifecycleEvents: eventLineCount > 0 && nonEventLineCount === 0
393
+ };
394
+ }
353
395
  function isHarnessVerificationOnlyBlocker(blocker) {
354
396
  const text = blocker.toLowerCase();
355
397
  return (text.includes("erofs") || text.includes("read-only file system")) && text.includes("node_modules") && (text.includes(".vite-temp") || text.includes("vite.config"));
@@ -373,21 +415,25 @@ function runnerMetrics(input) {
373
415
  exit_status: input.status ?? null,
374
416
  timed_out: input.timedOut || false,
375
417
  error_code: input.errorCode,
418
+ codex_event_types: input.codexEventTypes && input.codexEventTypes.length ? input.codexEventTypes : void 0,
419
+ codex_event_line_count: input.codexEventLineCount,
420
+ codex_non_event_line_count: input.codexNonEventLineCount,
376
421
  codex_command: input.config.codexCommand || "codex",
377
422
  codex_model: input.config.codexModel,
378
423
  codex_sandbox: input.config.codexSandbox || "workspace-write",
379
424
  codex_full_auto: input.config.codexFullAuto !== false,
380
- timeout_ms: Number(input.config.codexTimeoutMs || 6e5)
425
+ timeout_ms: input.timeoutMs ?? DEFAULT_CODEX_TIMEOUT_MS
381
426
  });
382
427
  }
383
428
  function createCodexExecJsonRunner(config = {}) {
384
429
  return (request) => {
385
430
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
386
431
  const startedMs = Date.now();
432
+ const timeoutMs = resolveCodexTimeoutMs(config, request);
387
433
  if (!request.workdir || !existsSync(request.workdir)) {
388
434
  return {
389
435
  ok: false,
390
- metrics: runnerMetrics({ request, config, startedAt, startedMs, errorCode: "workdir_missing" }),
436
+ metrics: runnerMetrics({ request, config, startedAt, startedMs, timeoutMs, errorCode: "workdir_missing" }),
391
437
  blocker: {
392
438
  code: "codex_workdir_missing",
393
439
  message: `Codex workdir does not exist for ${request.purpose}.`,
@@ -422,7 +468,7 @@ function createCodexExecJsonRunner(config = {}) {
422
468
  const proc = spawnSync(config.codexCommand || "codex", args, {
423
469
  input: request.prompt,
424
470
  encoding: "utf-8",
425
- timeout: Number(config.codexTimeoutMs || 6e5),
471
+ timeout: timeoutMs,
426
472
  maxBuffer: 10 * 1024 * 1024,
427
473
  env
428
474
  });
@@ -441,6 +487,7 @@ function createCodexExecJsonRunner(config = {}) {
441
487
  stderr: proc.stderr || "",
442
488
  status: proc.status,
443
489
  timedOut,
490
+ timeoutMs,
444
491
  errorCode: proc.error.code || "spawn_error"
445
492
  }),
446
493
  blocker: {
@@ -463,6 +510,7 @@ function createCodexExecJsonRunner(config = {}) {
463
510
  stdout: proc.stdout || "",
464
511
  stderr: proc.stderr || "",
465
512
  status: proc.status,
513
+ timeoutMs,
466
514
  errorCode: "nonzero_exit"
467
515
  }),
468
516
  blocker: {
@@ -475,12 +523,15 @@ function createCodexExecJsonRunner(config = {}) {
475
523
  const finalText = existsSync(lastMessagePath) ? readFileSync(lastMessagePath, "utf-8") : String(proc.stdout || "");
476
524
  const stdoutText = String(proc.stdout || "");
477
525
  const stderrText = String(proc.stderr || "");
478
- const { parsed, source: parsedJsonSource } = parseJsonFromRunnerOutputs([
526
+ const runnerOutputs = [
479
527
  { source: existsSync(lastMessagePath) ? "last_message" : "stdout", text: finalText },
480
528
  { source: "stdout", text: stdoutText },
481
529
  { source: "stderr", text: stderrText }
482
- ], request.schema);
530
+ ];
531
+ const { parsed, source: parsedJsonSource } = parseJsonFromRunnerOutputs(runnerOutputs, request.schema);
483
532
  if (!parsed) {
533
+ const outputAnalysis = analyzeCodexRunnerOutput(runnerOutputs);
534
+ const errorCode = outputAnalysis.onlyLifecycleEvents ? "no_final_response" : "invalid_json";
484
535
  return {
485
536
  ok: false,
486
537
  stdout: stdoutText,
@@ -494,12 +545,24 @@ function createCodexExecJsonRunner(config = {}) {
494
545
  stderr: stderrText,
495
546
  finalText,
496
547
  status: proc.status,
497
- errorCode: "invalid_json"
548
+ timeoutMs,
549
+ errorCode,
550
+ codexEventTypes: outputAnalysis.eventTypes,
551
+ codexEventLineCount: outputAnalysis.eventLineCount,
552
+ codexNonEventLineCount: outputAnalysis.nonEventLineCount
498
553
  }),
499
554
  blocker: {
500
- code: "codex_invalid_json",
501
- message: `Codex completed ${request.purpose}, but did not return valid JSON.`,
502
- details: { finalText, stdout: stdoutText, stderr: stderrText }
555
+ code: outputAnalysis.onlyLifecycleEvents ? "codex_no_final_response" : "codex_invalid_json",
556
+ message: outputAnalysis.onlyLifecycleEvents ? `Codex emitted lifecycle events during ${request.purpose}, but did not produce a final JSON response.` : `Codex completed ${request.purpose}, but did not return valid JSON.`,
557
+ details: {
558
+ finalText,
559
+ stdout: stdoutText,
560
+ stderr: stderrText,
561
+ event_types: outputAnalysis.eventTypes,
562
+ event_line_count: outputAnalysis.eventLineCount,
563
+ non_event_line_count: outputAnalysis.nonEventLineCount,
564
+ non_event_samples: outputAnalysis.nonEventSamples
565
+ }
503
566
  }
504
567
  };
505
568
  }
@@ -517,7 +580,8 @@ function createCodexExecJsonRunner(config = {}) {
517
580
  stderr: stderrText,
518
581
  finalText,
519
582
  parsedJsonSource,
520
- status: proc.status
583
+ status: proc.status,
584
+ timeoutMs
521
585
  })
522
586
  };
523
587
  } finally {
@@ -626,6 +690,7 @@ function createCodexExecAgentAdapter(config = {}, runner = createCodexExecJsonRu
626
690
  "Write a proof_plan and capture_script that will verify the exact user-facing change.",
627
691
  "Use recon_assessment.baseline_understanding as the source of truth. Do not author a proof plan unless it names the observed before state and the requested delta from that state.",
628
692
  "Use the recon-approved route and baseline context; make the plan name the concrete target, expected before state, expected after state, and stop condition.",
693
+ "Do not leave this authoring stage pending for external investigation. Keep any repo inspection brief, do not modify files, and return the JSON proof packet from the available state.",
629
694
  "Choose the evidence modality from verification_mode and success_criteria: screenshots for visual/UI proof, interactions plus screenshots for interaction proof, structured metrics/logs/JSON/audio analysis for non-visual proof.",
630
695
  "For playable/gameplay proof, treat screenshots as supporting artifacts only: start the game, send keyboard or pointer input, measure state before/after, measure non-HUD canvas/playfield pixel deltas across time, and return playability evidence with version riddle-proof.playability.v1.",
631
696
  "For interaction proof, return a structured evidence object with start route/state, terminal route/state, action, assertions, and matched UI text. Catch waitForURL or selector timeouts and record them as failed assertions instead of throwing before evidence is emitted.",
package/dist/cli/index.js CHANGED
@@ -1,12 +1,12 @@
1
- import "../chunk-V6VZ3CAI.js";
1
+ import "../chunk-2PXL3RDB.js";
2
2
  import "../chunk-PEWAIEER.js";
3
3
  import "../chunk-TWTEUS7R.js";
4
- import "../chunk-E7ATYSYS.js";
4
+ import "../chunk-BBUO7HM4.js";
5
5
  import "../chunk-YZUVEJ5B.js";
6
6
  import "../chunk-FMOYUYH2.js";
7
7
  import "../chunk-5N5QFI2S.js";
8
8
  import "../chunk-4FOHZ7JG.js";
9
9
  import "../chunk-JFQXAJH2.js";
10
- import "../chunk-PYCQNK66.js";
10
+ import "../chunk-EEIYUZXE.js";
11
11
  import "../chunk-VY4Y5U57.js";
12
12
  import "../chunk-MLKGABMK.js";
package/dist/cli.cjs CHANGED
@@ -5656,6 +5656,18 @@ async function routeCheckpoint(request, state, result, agent, input) {
5656
5656
  if (checkpoint === "verify_agent_retry") {
5657
5657
  const next = recommendedContinuation(result);
5658
5658
  if (next) return { next };
5659
+ return {
5660
+ blocker: {
5661
+ code: "proof_assessment_blocked",
5662
+ checkpoint,
5663
+ message: result.summary || "The supervising proof assessment did not approve shipping and did not provide a safe retry continuation.",
5664
+ details: compactRecord({
5665
+ proofAssessment: result.proofAssessment || result.checkpointContract?.proof_assessment || recordValue(result.raw)?.proofAssessment || null,
5666
+ verifyDecisionRequest: result.verifyDecisionRequest || result.checkpointContract?.verify_decision_request || null,
5667
+ checkpointContract: result.checkpointContract || null
5668
+ })
5669
+ }
5670
+ };
5659
5671
  }
5660
5672
  if (checkpoint === "awaiting_stage_advance") {
5661
5673
  const next = recommendedContinuation(result) || defaultAwaitingStageContinuation(result);
@@ -5868,6 +5880,8 @@ var import_node_child_process3 = require("child_process");
5868
5880
  var import_node_fs4 = require("fs");
5869
5881
  var import_node_os = __toESM(require("os"), 1);
5870
5882
  var import_node_path4 = __toESM(require("path"), 1);
5883
+ var DEFAULT_CODEX_TIMEOUT_MS = 6e5;
5884
+ var DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS = 18e4;
5871
5885
  var REFINED_INPUTS_SCHEMA = {
5872
5886
  type: "object",
5873
5887
  additionalProperties: false,
@@ -6211,6 +6225,46 @@ function parseJsonFromRunnerOutputs(outputs, schema) {
6211
6225
  if (!combined.trim() || seen.has(combined)) return { parsed: null, source: "" };
6212
6226
  return { parsed: parseJsonObject(combined, schema), source: "combined_output" };
6213
6227
  }
6228
+ function resolveCodexTimeoutMs(config, request) {
6229
+ if (typeof config.codexTimeoutMs === "number" && Number.isFinite(config.codexTimeoutMs) && config.codexTimeoutMs > 0) {
6230
+ return Number(config.codexTimeoutMs);
6231
+ }
6232
+ return request.purpose === "proof packet authoring" ? DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS : DEFAULT_CODEX_TIMEOUT_MS;
6233
+ }
6234
+ function isCodexLifecycleEvent(value) {
6235
+ if (!value || typeof value !== "object" || Array.isArray(value)) return false;
6236
+ const type = value.type;
6237
+ return typeof type === "string" && (type.startsWith("thread.") || type.startsWith("turn.") || type.startsWith("exec.") || type.startsWith("agent.") || type.startsWith("token.") || type.startsWith("reasoning.") || type.startsWith("error."));
6238
+ }
6239
+ function analyzeCodexRunnerOutput(outputs) {
6240
+ const eventTypes = /* @__PURE__ */ new Set();
6241
+ let eventLineCount = 0;
6242
+ let nonEventLineCount = 0;
6243
+ const nonEventSamples = [];
6244
+ for (const output of outputs) {
6245
+ const lines = output.text.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
6246
+ for (const line of lines) {
6247
+ try {
6248
+ const parsed = JSON.parse(line);
6249
+ if (isCodexLifecycleEvent(parsed)) {
6250
+ eventLineCount += 1;
6251
+ eventTypes.add(parsed.type);
6252
+ continue;
6253
+ }
6254
+ } catch {
6255
+ }
6256
+ nonEventLineCount += 1;
6257
+ if (nonEventSamples.length < 3) nonEventSamples.push(line.slice(0, 240));
6258
+ }
6259
+ }
6260
+ return {
6261
+ eventLineCount,
6262
+ eventTypes: Array.from(eventTypes),
6263
+ nonEventLineCount,
6264
+ nonEventSamples,
6265
+ onlyLifecycleEvents: eventLineCount > 0 && nonEventLineCount === 0
6266
+ };
6267
+ }
6214
6268
  function isHarnessVerificationOnlyBlocker(blocker) {
6215
6269
  const text = blocker.toLowerCase();
6216
6270
  return (text.includes("erofs") || text.includes("read-only file system")) && text.includes("node_modules") && (text.includes(".vite-temp") || text.includes("vite.config"));
@@ -6234,21 +6288,25 @@ function runnerMetrics(input) {
6234
6288
  exit_status: input.status ?? null,
6235
6289
  timed_out: input.timedOut || false,
6236
6290
  error_code: input.errorCode,
6291
+ codex_event_types: input.codexEventTypes && input.codexEventTypes.length ? input.codexEventTypes : void 0,
6292
+ codex_event_line_count: input.codexEventLineCount,
6293
+ codex_non_event_line_count: input.codexNonEventLineCount,
6237
6294
  codex_command: input.config.codexCommand || "codex",
6238
6295
  codex_model: input.config.codexModel,
6239
6296
  codex_sandbox: input.config.codexSandbox || "workspace-write",
6240
6297
  codex_full_auto: input.config.codexFullAuto !== false,
6241
- timeout_ms: Number(input.config.codexTimeoutMs || 6e5)
6298
+ timeout_ms: input.timeoutMs ?? DEFAULT_CODEX_TIMEOUT_MS
6242
6299
  });
6243
6300
  }
6244
6301
  function createCodexExecJsonRunner(config = {}) {
6245
6302
  return (request) => {
6246
6303
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
6247
6304
  const startedMs = Date.now();
6305
+ const timeoutMs = resolveCodexTimeoutMs(config, request);
6248
6306
  if (!request.workdir || !(0, import_node_fs4.existsSync)(request.workdir)) {
6249
6307
  return {
6250
6308
  ok: false,
6251
- metrics: runnerMetrics({ request, config, startedAt, startedMs, errorCode: "workdir_missing" }),
6309
+ metrics: runnerMetrics({ request, config, startedAt, startedMs, timeoutMs, errorCode: "workdir_missing" }),
6252
6310
  blocker: {
6253
6311
  code: "codex_workdir_missing",
6254
6312
  message: `Codex workdir does not exist for ${request.purpose}.`,
@@ -6283,7 +6341,7 @@ function createCodexExecJsonRunner(config = {}) {
6283
6341
  const proc = (0, import_node_child_process3.spawnSync)(config.codexCommand || "codex", args, {
6284
6342
  input: request.prompt,
6285
6343
  encoding: "utf-8",
6286
- timeout: Number(config.codexTimeoutMs || 6e5),
6344
+ timeout: timeoutMs,
6287
6345
  maxBuffer: 10 * 1024 * 1024,
6288
6346
  env
6289
6347
  });
@@ -6302,6 +6360,7 @@ function createCodexExecJsonRunner(config = {}) {
6302
6360
  stderr: proc.stderr || "",
6303
6361
  status: proc.status,
6304
6362
  timedOut,
6363
+ timeoutMs,
6305
6364
  errorCode: proc.error.code || "spawn_error"
6306
6365
  }),
6307
6366
  blocker: {
@@ -6324,6 +6383,7 @@ function createCodexExecJsonRunner(config = {}) {
6324
6383
  stdout: proc.stdout || "",
6325
6384
  stderr: proc.stderr || "",
6326
6385
  status: proc.status,
6386
+ timeoutMs,
6327
6387
  errorCode: "nonzero_exit"
6328
6388
  }),
6329
6389
  blocker: {
@@ -6336,12 +6396,15 @@ function createCodexExecJsonRunner(config = {}) {
6336
6396
  const finalText = (0, import_node_fs4.existsSync)(lastMessagePath) ? (0, import_node_fs4.readFileSync)(lastMessagePath, "utf-8") : String(proc.stdout || "");
6337
6397
  const stdoutText = String(proc.stdout || "");
6338
6398
  const stderrText = String(proc.stderr || "");
6339
- const { parsed, source: parsedJsonSource } = parseJsonFromRunnerOutputs([
6399
+ const runnerOutputs = [
6340
6400
  { source: (0, import_node_fs4.existsSync)(lastMessagePath) ? "last_message" : "stdout", text: finalText },
6341
6401
  { source: "stdout", text: stdoutText },
6342
6402
  { source: "stderr", text: stderrText }
6343
- ], request.schema);
6403
+ ];
6404
+ const { parsed, source: parsedJsonSource } = parseJsonFromRunnerOutputs(runnerOutputs, request.schema);
6344
6405
  if (!parsed) {
6406
+ const outputAnalysis = analyzeCodexRunnerOutput(runnerOutputs);
6407
+ const errorCode = outputAnalysis.onlyLifecycleEvents ? "no_final_response" : "invalid_json";
6345
6408
  return {
6346
6409
  ok: false,
6347
6410
  stdout: stdoutText,
@@ -6355,12 +6418,24 @@ function createCodexExecJsonRunner(config = {}) {
6355
6418
  stderr: stderrText,
6356
6419
  finalText,
6357
6420
  status: proc.status,
6358
- errorCode: "invalid_json"
6421
+ timeoutMs,
6422
+ errorCode,
6423
+ codexEventTypes: outputAnalysis.eventTypes,
6424
+ codexEventLineCount: outputAnalysis.eventLineCount,
6425
+ codexNonEventLineCount: outputAnalysis.nonEventLineCount
6359
6426
  }),
6360
6427
  blocker: {
6361
- code: "codex_invalid_json",
6362
- message: `Codex completed ${request.purpose}, but did not return valid JSON.`,
6363
- details: { finalText, stdout: stdoutText, stderr: stderrText }
6428
+ code: outputAnalysis.onlyLifecycleEvents ? "codex_no_final_response" : "codex_invalid_json",
6429
+ message: outputAnalysis.onlyLifecycleEvents ? `Codex emitted lifecycle events during ${request.purpose}, but did not produce a final JSON response.` : `Codex completed ${request.purpose}, but did not return valid JSON.`,
6430
+ details: {
6431
+ finalText,
6432
+ stdout: stdoutText,
6433
+ stderr: stderrText,
6434
+ event_types: outputAnalysis.eventTypes,
6435
+ event_line_count: outputAnalysis.eventLineCount,
6436
+ non_event_line_count: outputAnalysis.nonEventLineCount,
6437
+ non_event_samples: outputAnalysis.nonEventSamples
6438
+ }
6364
6439
  }
6365
6440
  };
6366
6441
  }
@@ -6378,7 +6453,8 @@ function createCodexExecJsonRunner(config = {}) {
6378
6453
  stderr: stderrText,
6379
6454
  finalText,
6380
6455
  parsedJsonSource,
6381
- status: proc.status
6456
+ status: proc.status,
6457
+ timeoutMs
6382
6458
  })
6383
6459
  };
6384
6460
  } finally {
@@ -6487,6 +6563,7 @@ function createCodexExecAgentAdapter(config = {}, runner = createCodexExecJsonRu
6487
6563
  "Write a proof_plan and capture_script that will verify the exact user-facing change.",
6488
6564
  "Use recon_assessment.baseline_understanding as the source of truth. Do not author a proof plan unless it names the observed before state and the requested delta from that state.",
6489
6565
  "Use the recon-approved route and baseline context; make the plan name the concrete target, expected before state, expected after state, and stop condition.",
6566
+ "Do not leave this authoring stage pending for external investigation. Keep any repo inspection brief, do not modify files, and return the JSON proof packet from the available state.",
6490
6567
  "Choose the evidence modality from verification_mode and success_criteria: screenshots for visual/UI proof, interactions plus screenshots for interaction proof, structured metrics/logs/JSON/audio analysis for non-visual proof.",
6491
6568
  "For playable/gameplay proof, treat screenshots as supporting artifacts only: start the game, send keyboard or pointer input, measure state before/after, measure non-HUD canvas/playfield pixel deltas across time, and return playability evidence with version riddle-proof.playability.v1.",
6492
6569
  "For interaction proof, return a structured evidence object with start route/state, terminal route/state, action, assertions, and matched UI text. Catch waitForURL or selector timeouts and record them as failed assertions instead of throwing before evidence is emitted.",
package/dist/cli.js CHANGED
@@ -1,13 +1,13 @@
1
1
  #!/usr/bin/env node
2
- import "./chunk-V6VZ3CAI.js";
2
+ import "./chunk-2PXL3RDB.js";
3
3
  import "./chunk-PEWAIEER.js";
4
4
  import "./chunk-TWTEUS7R.js";
5
- import "./chunk-E7ATYSYS.js";
5
+ import "./chunk-BBUO7HM4.js";
6
6
  import "./chunk-YZUVEJ5B.js";
7
7
  import "./chunk-FMOYUYH2.js";
8
8
  import "./chunk-5N5QFI2S.js";
9
9
  import "./chunk-4FOHZ7JG.js";
10
10
  import "./chunk-JFQXAJH2.js";
11
- import "./chunk-PYCQNK66.js";
11
+ import "./chunk-EEIYUZXE.js";
12
12
  import "./chunk-VY4Y5U57.js";
13
13
  import "./chunk-MLKGABMK.js";
@@ -46,6 +46,8 @@ function compactRecord(input) {
46
46
  }
47
47
 
48
48
  // src/codex-exec-agent.ts
49
+ var DEFAULT_CODEX_TIMEOUT_MS = 6e5;
50
+ var DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS = 18e4;
49
51
  var REFINED_INPUTS_SCHEMA = {
50
52
  type: "object",
51
53
  additionalProperties: false,
@@ -389,6 +391,46 @@ function parseJsonFromRunnerOutputs(outputs, schema) {
389
391
  if (!combined.trim() || seen.has(combined)) return { parsed: null, source: "" };
390
392
  return { parsed: parseJsonObject(combined, schema), source: "combined_output" };
391
393
  }
394
+ function resolveCodexTimeoutMs(config, request) {
395
+ if (typeof config.codexTimeoutMs === "number" && Number.isFinite(config.codexTimeoutMs) && config.codexTimeoutMs > 0) {
396
+ return Number(config.codexTimeoutMs);
397
+ }
398
+ return request.purpose === "proof packet authoring" ? DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS : DEFAULT_CODEX_TIMEOUT_MS;
399
+ }
400
+ function isCodexLifecycleEvent(value) {
401
+ if (!value || typeof value !== "object" || Array.isArray(value)) return false;
402
+ const type = value.type;
403
+ return typeof type === "string" && (type.startsWith("thread.") || type.startsWith("turn.") || type.startsWith("exec.") || type.startsWith("agent.") || type.startsWith("token.") || type.startsWith("reasoning.") || type.startsWith("error."));
404
+ }
405
+ function analyzeCodexRunnerOutput(outputs) {
406
+ const eventTypes = /* @__PURE__ */ new Set();
407
+ let eventLineCount = 0;
408
+ let nonEventLineCount = 0;
409
+ const nonEventSamples = [];
410
+ for (const output of outputs) {
411
+ const lines = output.text.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
412
+ for (const line of lines) {
413
+ try {
414
+ const parsed = JSON.parse(line);
415
+ if (isCodexLifecycleEvent(parsed)) {
416
+ eventLineCount += 1;
417
+ eventTypes.add(parsed.type);
418
+ continue;
419
+ }
420
+ } catch {
421
+ }
422
+ nonEventLineCount += 1;
423
+ if (nonEventSamples.length < 3) nonEventSamples.push(line.slice(0, 240));
424
+ }
425
+ }
426
+ return {
427
+ eventLineCount,
428
+ eventTypes: Array.from(eventTypes),
429
+ nonEventLineCount,
430
+ nonEventSamples,
431
+ onlyLifecycleEvents: eventLineCount > 0 && nonEventLineCount === 0
432
+ };
433
+ }
392
434
  function isHarnessVerificationOnlyBlocker(blocker) {
393
435
  const text = blocker.toLowerCase();
394
436
  return (text.includes("erofs") || text.includes("read-only file system")) && text.includes("node_modules") && (text.includes(".vite-temp") || text.includes("vite.config"));
@@ -412,21 +454,25 @@ function runnerMetrics(input) {
412
454
  exit_status: input.status ?? null,
413
455
  timed_out: input.timedOut || false,
414
456
  error_code: input.errorCode,
457
+ codex_event_types: input.codexEventTypes && input.codexEventTypes.length ? input.codexEventTypes : void 0,
458
+ codex_event_line_count: input.codexEventLineCount,
459
+ codex_non_event_line_count: input.codexNonEventLineCount,
415
460
  codex_command: input.config.codexCommand || "codex",
416
461
  codex_model: input.config.codexModel,
417
462
  codex_sandbox: input.config.codexSandbox || "workspace-write",
418
463
  codex_full_auto: input.config.codexFullAuto !== false,
419
- timeout_ms: Number(input.config.codexTimeoutMs || 6e5)
464
+ timeout_ms: input.timeoutMs ?? DEFAULT_CODEX_TIMEOUT_MS
420
465
  });
421
466
  }
422
467
  function createCodexExecJsonRunner(config = {}) {
423
468
  return (request) => {
424
469
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
425
470
  const startedMs = Date.now();
471
+ const timeoutMs = resolveCodexTimeoutMs(config, request);
426
472
  if (!request.workdir || !(0, import_node_fs.existsSync)(request.workdir)) {
427
473
  return {
428
474
  ok: false,
429
- metrics: runnerMetrics({ request, config, startedAt, startedMs, errorCode: "workdir_missing" }),
475
+ metrics: runnerMetrics({ request, config, startedAt, startedMs, timeoutMs, errorCode: "workdir_missing" }),
430
476
  blocker: {
431
477
  code: "codex_workdir_missing",
432
478
  message: `Codex workdir does not exist for ${request.purpose}.`,
@@ -461,7 +507,7 @@ function createCodexExecJsonRunner(config = {}) {
461
507
  const proc = (0, import_node_child_process.spawnSync)(config.codexCommand || "codex", args, {
462
508
  input: request.prompt,
463
509
  encoding: "utf-8",
464
- timeout: Number(config.codexTimeoutMs || 6e5),
510
+ timeout: timeoutMs,
465
511
  maxBuffer: 10 * 1024 * 1024,
466
512
  env
467
513
  });
@@ -480,6 +526,7 @@ function createCodexExecJsonRunner(config = {}) {
480
526
  stderr: proc.stderr || "",
481
527
  status: proc.status,
482
528
  timedOut,
529
+ timeoutMs,
483
530
  errorCode: proc.error.code || "spawn_error"
484
531
  }),
485
532
  blocker: {
@@ -502,6 +549,7 @@ function createCodexExecJsonRunner(config = {}) {
502
549
  stdout: proc.stdout || "",
503
550
  stderr: proc.stderr || "",
504
551
  status: proc.status,
552
+ timeoutMs,
505
553
  errorCode: "nonzero_exit"
506
554
  }),
507
555
  blocker: {
@@ -514,12 +562,15 @@ function createCodexExecJsonRunner(config = {}) {
514
562
  const finalText = (0, import_node_fs.existsSync)(lastMessagePath) ? (0, import_node_fs.readFileSync)(lastMessagePath, "utf-8") : String(proc.stdout || "");
515
563
  const stdoutText = String(proc.stdout || "");
516
564
  const stderrText = String(proc.stderr || "");
517
- const { parsed, source: parsedJsonSource } = parseJsonFromRunnerOutputs([
565
+ const runnerOutputs = [
518
566
  { source: (0, import_node_fs.existsSync)(lastMessagePath) ? "last_message" : "stdout", text: finalText },
519
567
  { source: "stdout", text: stdoutText },
520
568
  { source: "stderr", text: stderrText }
521
- ], request.schema);
569
+ ];
570
+ const { parsed, source: parsedJsonSource } = parseJsonFromRunnerOutputs(runnerOutputs, request.schema);
522
571
  if (!parsed) {
572
+ const outputAnalysis = analyzeCodexRunnerOutput(runnerOutputs);
573
+ const errorCode = outputAnalysis.onlyLifecycleEvents ? "no_final_response" : "invalid_json";
523
574
  return {
524
575
  ok: false,
525
576
  stdout: stdoutText,
@@ -533,12 +584,24 @@ function createCodexExecJsonRunner(config = {}) {
533
584
  stderr: stderrText,
534
585
  finalText,
535
586
  status: proc.status,
536
- errorCode: "invalid_json"
587
+ timeoutMs,
588
+ errorCode,
589
+ codexEventTypes: outputAnalysis.eventTypes,
590
+ codexEventLineCount: outputAnalysis.eventLineCount,
591
+ codexNonEventLineCount: outputAnalysis.nonEventLineCount
537
592
  }),
538
593
  blocker: {
539
- code: "codex_invalid_json",
540
- message: `Codex completed ${request.purpose}, but did not return valid JSON.`,
541
- details: { finalText, stdout: stdoutText, stderr: stderrText }
594
+ code: outputAnalysis.onlyLifecycleEvents ? "codex_no_final_response" : "codex_invalid_json",
595
+ message: outputAnalysis.onlyLifecycleEvents ? `Codex emitted lifecycle events during ${request.purpose}, but did not produce a final JSON response.` : `Codex completed ${request.purpose}, but did not return valid JSON.`,
596
+ details: {
597
+ finalText,
598
+ stdout: stdoutText,
599
+ stderr: stderrText,
600
+ event_types: outputAnalysis.eventTypes,
601
+ event_line_count: outputAnalysis.eventLineCount,
602
+ non_event_line_count: outputAnalysis.nonEventLineCount,
603
+ non_event_samples: outputAnalysis.nonEventSamples
604
+ }
542
605
  }
543
606
  };
544
607
  }
@@ -556,7 +619,8 @@ function createCodexExecJsonRunner(config = {}) {
556
619
  stderr: stderrText,
557
620
  finalText,
558
621
  parsedJsonSource,
559
- status: proc.status
622
+ status: proc.status,
623
+ timeoutMs
560
624
  })
561
625
  };
562
626
  } finally {
@@ -665,6 +729,7 @@ function createCodexExecAgentAdapter(config = {}, runner = createCodexExecJsonRu
665
729
  "Write a proof_plan and capture_script that will verify the exact user-facing change.",
666
730
  "Use recon_assessment.baseline_understanding as the source of truth. Do not author a proof plan unless it names the observed before state and the requested delta from that state.",
667
731
  "Use the recon-approved route and baseline context; make the plan name the concrete target, expected before state, expected after state, and stop condition.",
732
+ "Do not leave this authoring stage pending for external investigation. Keep any repo inspection brief, do not modify files, and return the JSON proof packet from the available state.",
668
733
  "Choose the evidence modality from verification_mode and success_criteria: screenshots for visual/UI proof, interactions plus screenshots for interaction proof, structured metrics/logs/JSON/audio analysis for non-visual proof.",
669
734
  "For playable/gameplay proof, treat screenshots as supporting artifacts only: start the game, send keyboard or pointer input, measure state before/after, measure non-HUD canvas/playfield pixel deltas across time, and return playability evidence with version riddle-proof.playability.v1.",
670
735
  "For interaction proof, return a structured evidence object with start route/state, terminal route/state, action, assertions, and matched UI text. Catch waitForURL or selector timeouts and record them as failed assertions instead of throwing before evidence is emitted.",
@@ -2,7 +2,7 @@ import {
2
2
  createCodexExecAgentAdapter,
3
3
  createCodexExecJsonRunner,
4
4
  runCodexExecAgentDoctor
5
- } from "./chunk-PYCQNK66.js";
5
+ } from "./chunk-EEIYUZXE.js";
6
6
  import "./chunk-VY4Y5U57.js";
7
7
  import "./chunk-MLKGABMK.js";
8
8
  export {
@@ -5585,6 +5585,18 @@ async function routeCheckpoint(request, state, result, agent, input) {
5585
5585
  if (checkpoint === "verify_agent_retry") {
5586
5586
  const next = recommendedContinuation(result);
5587
5587
  if (next) return { next };
5588
+ return {
5589
+ blocker: {
5590
+ code: "proof_assessment_blocked",
5591
+ checkpoint,
5592
+ message: result.summary || "The supervising proof assessment did not approve shipping and did not provide a safe retry continuation.",
5593
+ details: compactRecord({
5594
+ proofAssessment: result.proofAssessment || result.checkpointContract?.proof_assessment || recordValue(result.raw)?.proofAssessment || null,
5595
+ verifyDecisionRequest: result.verifyDecisionRequest || result.checkpointContract?.verify_decision_request || null,
5596
+ checkpointContract: result.checkpointContract || null
5597
+ })
5598
+ }
5599
+ };
5588
5600
  }
5589
5601
  if (checkpoint === "awaiting_stage_advance") {
5590
5602
  const next = recommendedContinuation(result) || defaultAwaitingStageContinuation(result);
@@ -2,7 +2,7 @@ import {
2
2
  createDisabledRiddleProofAgentAdapter,
3
3
  readRiddleProofRunStatus,
4
4
  runRiddleProofEngineHarness
5
- } from "./chunk-E7ATYSYS.js";
5
+ } from "./chunk-BBUO7HM4.js";
6
6
  import "./chunk-YZUVEJ5B.js";
7
7
  import "./chunk-FMOYUYH2.js";
8
8
  import "./chunk-5N5QFI2S.js";