@riddledc/riddle-proof 0.8.7 → 0.8.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/codex-exec-agent.cjs +75 -10
- package/dist/adapters/codex-exec-agent.js +1 -1
- package/dist/adapters/codex.cjs +75 -10
- package/dist/adapters/codex.js +1 -1
- package/dist/adapters/local-agent.cjs +75 -10
- package/dist/adapters/local-agent.js +1 -1
- package/dist/{chunk-PYCQNK66.js → chunk-EEIYUZXE.js} +75 -10
- package/dist/{chunk-V6VZ3CAI.js → chunk-RTWGGKS3.js} +1 -1
- package/dist/cli/index.js +2 -2
- package/dist/cli.cjs +75 -10
- package/dist/cli.js +2 -2
- package/dist/codex-exec-agent.cjs +75 -10
- package/dist/codex-exec-agent.js +1 -1
- package/dist/index.cjs +75 -10
- package/dist/index.js +1 -1
- package/dist/local-agent.cjs +75 -10
- package/dist/local-agent.js +1 -1
- package/package.json +1 -1
- package/runtime/lib/verify.py +204 -5
- package/runtime/tests/recon_verify_smoke.py +19 -12
|
@@ -7,6 +7,8 @@ import { execFileSync, spawnSync } from "child_process";
|
|
|
7
7
|
import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "fs";
|
|
8
8
|
import os from "os";
|
|
9
9
|
import path from "path";
|
|
10
|
+
var DEFAULT_CODEX_TIMEOUT_MS = 6e5;
|
|
11
|
+
var DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS = 18e4;
|
|
10
12
|
var REFINED_INPUTS_SCHEMA = {
|
|
11
13
|
type: "object",
|
|
12
14
|
additionalProperties: false,
|
|
@@ -350,6 +352,46 @@ function parseJsonFromRunnerOutputs(outputs, schema) {
|
|
|
350
352
|
if (!combined.trim() || seen.has(combined)) return { parsed: null, source: "" };
|
|
351
353
|
return { parsed: parseJsonObject(combined, schema), source: "combined_output" };
|
|
352
354
|
}
|
|
355
|
+
function resolveCodexTimeoutMs(config, request) {
|
|
356
|
+
if (typeof config.codexTimeoutMs === "number" && Number.isFinite(config.codexTimeoutMs) && config.codexTimeoutMs > 0) {
|
|
357
|
+
return Number(config.codexTimeoutMs);
|
|
358
|
+
}
|
|
359
|
+
return request.purpose === "proof packet authoring" ? DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS : DEFAULT_CODEX_TIMEOUT_MS;
|
|
360
|
+
}
|
|
361
|
+
function isCodexLifecycleEvent(value) {
|
|
362
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) return false;
|
|
363
|
+
const type = value.type;
|
|
364
|
+
return typeof type === "string" && (type.startsWith("thread.") || type.startsWith("turn.") || type.startsWith("exec.") || type.startsWith("agent.") || type.startsWith("token.") || type.startsWith("reasoning.") || type.startsWith("error."));
|
|
365
|
+
}
|
|
366
|
+
function analyzeCodexRunnerOutput(outputs) {
|
|
367
|
+
const eventTypes = /* @__PURE__ */ new Set();
|
|
368
|
+
let eventLineCount = 0;
|
|
369
|
+
let nonEventLineCount = 0;
|
|
370
|
+
const nonEventSamples = [];
|
|
371
|
+
for (const output of outputs) {
|
|
372
|
+
const lines = output.text.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
|
|
373
|
+
for (const line of lines) {
|
|
374
|
+
try {
|
|
375
|
+
const parsed = JSON.parse(line);
|
|
376
|
+
if (isCodexLifecycleEvent(parsed)) {
|
|
377
|
+
eventLineCount += 1;
|
|
378
|
+
eventTypes.add(parsed.type);
|
|
379
|
+
continue;
|
|
380
|
+
}
|
|
381
|
+
} catch {
|
|
382
|
+
}
|
|
383
|
+
nonEventLineCount += 1;
|
|
384
|
+
if (nonEventSamples.length < 3) nonEventSamples.push(line.slice(0, 240));
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
return {
|
|
388
|
+
eventLineCount,
|
|
389
|
+
eventTypes: Array.from(eventTypes),
|
|
390
|
+
nonEventLineCount,
|
|
391
|
+
nonEventSamples,
|
|
392
|
+
onlyLifecycleEvents: eventLineCount > 0 && nonEventLineCount === 0
|
|
393
|
+
};
|
|
394
|
+
}
|
|
353
395
|
function isHarnessVerificationOnlyBlocker(blocker) {
|
|
354
396
|
const text = blocker.toLowerCase();
|
|
355
397
|
return (text.includes("erofs") || text.includes("read-only file system")) && text.includes("node_modules") && (text.includes(".vite-temp") || text.includes("vite.config"));
|
|
@@ -373,21 +415,25 @@ function runnerMetrics(input) {
|
|
|
373
415
|
exit_status: input.status ?? null,
|
|
374
416
|
timed_out: input.timedOut || false,
|
|
375
417
|
error_code: input.errorCode,
|
|
418
|
+
codex_event_types: input.codexEventTypes && input.codexEventTypes.length ? input.codexEventTypes : void 0,
|
|
419
|
+
codex_event_line_count: input.codexEventLineCount,
|
|
420
|
+
codex_non_event_line_count: input.codexNonEventLineCount,
|
|
376
421
|
codex_command: input.config.codexCommand || "codex",
|
|
377
422
|
codex_model: input.config.codexModel,
|
|
378
423
|
codex_sandbox: input.config.codexSandbox || "workspace-write",
|
|
379
424
|
codex_full_auto: input.config.codexFullAuto !== false,
|
|
380
|
-
timeout_ms:
|
|
425
|
+
timeout_ms: input.timeoutMs ?? DEFAULT_CODEX_TIMEOUT_MS
|
|
381
426
|
});
|
|
382
427
|
}
|
|
383
428
|
function createCodexExecJsonRunner(config = {}) {
|
|
384
429
|
return (request) => {
|
|
385
430
|
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
386
431
|
const startedMs = Date.now();
|
|
432
|
+
const timeoutMs = resolveCodexTimeoutMs(config, request);
|
|
387
433
|
if (!request.workdir || !existsSync(request.workdir)) {
|
|
388
434
|
return {
|
|
389
435
|
ok: false,
|
|
390
|
-
metrics: runnerMetrics({ request, config, startedAt, startedMs, errorCode: "workdir_missing" }),
|
|
436
|
+
metrics: runnerMetrics({ request, config, startedAt, startedMs, timeoutMs, errorCode: "workdir_missing" }),
|
|
391
437
|
blocker: {
|
|
392
438
|
code: "codex_workdir_missing",
|
|
393
439
|
message: `Codex workdir does not exist for ${request.purpose}.`,
|
|
@@ -422,7 +468,7 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
422
468
|
const proc = spawnSync(config.codexCommand || "codex", args, {
|
|
423
469
|
input: request.prompt,
|
|
424
470
|
encoding: "utf-8",
|
|
425
|
-
timeout:
|
|
471
|
+
timeout: timeoutMs,
|
|
426
472
|
maxBuffer: 10 * 1024 * 1024,
|
|
427
473
|
env
|
|
428
474
|
});
|
|
@@ -441,6 +487,7 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
441
487
|
stderr: proc.stderr || "",
|
|
442
488
|
status: proc.status,
|
|
443
489
|
timedOut,
|
|
490
|
+
timeoutMs,
|
|
444
491
|
errorCode: proc.error.code || "spawn_error"
|
|
445
492
|
}),
|
|
446
493
|
blocker: {
|
|
@@ -463,6 +510,7 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
463
510
|
stdout: proc.stdout || "",
|
|
464
511
|
stderr: proc.stderr || "",
|
|
465
512
|
status: proc.status,
|
|
513
|
+
timeoutMs,
|
|
466
514
|
errorCode: "nonzero_exit"
|
|
467
515
|
}),
|
|
468
516
|
blocker: {
|
|
@@ -475,12 +523,15 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
475
523
|
const finalText = existsSync(lastMessagePath) ? readFileSync(lastMessagePath, "utf-8") : String(proc.stdout || "");
|
|
476
524
|
const stdoutText = String(proc.stdout || "");
|
|
477
525
|
const stderrText = String(proc.stderr || "");
|
|
478
|
-
const
|
|
526
|
+
const runnerOutputs = [
|
|
479
527
|
{ source: existsSync(lastMessagePath) ? "last_message" : "stdout", text: finalText },
|
|
480
528
|
{ source: "stdout", text: stdoutText },
|
|
481
529
|
{ source: "stderr", text: stderrText }
|
|
482
|
-
]
|
|
530
|
+
];
|
|
531
|
+
const { parsed, source: parsedJsonSource } = parseJsonFromRunnerOutputs(runnerOutputs, request.schema);
|
|
483
532
|
if (!parsed) {
|
|
533
|
+
const outputAnalysis = analyzeCodexRunnerOutput(runnerOutputs);
|
|
534
|
+
const errorCode = outputAnalysis.onlyLifecycleEvents ? "no_final_response" : "invalid_json";
|
|
484
535
|
return {
|
|
485
536
|
ok: false,
|
|
486
537
|
stdout: stdoutText,
|
|
@@ -494,12 +545,24 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
494
545
|
stderr: stderrText,
|
|
495
546
|
finalText,
|
|
496
547
|
status: proc.status,
|
|
497
|
-
|
|
548
|
+
timeoutMs,
|
|
549
|
+
errorCode,
|
|
550
|
+
codexEventTypes: outputAnalysis.eventTypes,
|
|
551
|
+
codexEventLineCount: outputAnalysis.eventLineCount,
|
|
552
|
+
codexNonEventLineCount: outputAnalysis.nonEventLineCount
|
|
498
553
|
}),
|
|
499
554
|
blocker: {
|
|
500
|
-
code: "codex_invalid_json",
|
|
501
|
-
message: `Codex completed ${request.purpose}, but did not return valid JSON.`,
|
|
502
|
-
details: {
|
|
555
|
+
code: outputAnalysis.onlyLifecycleEvents ? "codex_no_final_response" : "codex_invalid_json",
|
|
556
|
+
message: outputAnalysis.onlyLifecycleEvents ? `Codex emitted lifecycle events during ${request.purpose}, but did not produce a final JSON response.` : `Codex completed ${request.purpose}, but did not return valid JSON.`,
|
|
557
|
+
details: {
|
|
558
|
+
finalText,
|
|
559
|
+
stdout: stdoutText,
|
|
560
|
+
stderr: stderrText,
|
|
561
|
+
event_types: outputAnalysis.eventTypes,
|
|
562
|
+
event_line_count: outputAnalysis.eventLineCount,
|
|
563
|
+
non_event_line_count: outputAnalysis.nonEventLineCount,
|
|
564
|
+
non_event_samples: outputAnalysis.nonEventSamples
|
|
565
|
+
}
|
|
503
566
|
}
|
|
504
567
|
};
|
|
505
568
|
}
|
|
@@ -517,7 +580,8 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
517
580
|
stderr: stderrText,
|
|
518
581
|
finalText,
|
|
519
582
|
parsedJsonSource,
|
|
520
|
-
status: proc.status
|
|
583
|
+
status: proc.status,
|
|
584
|
+
timeoutMs
|
|
521
585
|
})
|
|
522
586
|
};
|
|
523
587
|
} finally {
|
|
@@ -626,6 +690,7 @@ function createCodexExecAgentAdapter(config = {}, runner = createCodexExecJsonRu
|
|
|
626
690
|
"Write a proof_plan and capture_script that will verify the exact user-facing change.",
|
|
627
691
|
"Use recon_assessment.baseline_understanding as the source of truth. Do not author a proof plan unless it names the observed before state and the requested delta from that state.",
|
|
628
692
|
"Use the recon-approved route and baseline context; make the plan name the concrete target, expected before state, expected after state, and stop condition.",
|
|
693
|
+
"Do not leave this authoring stage pending for external investigation. Keep any repo inspection brief, do not modify files, and return the JSON proof packet from the available state.",
|
|
629
694
|
"Choose the evidence modality from verification_mode and success_criteria: screenshots for visual/UI proof, interactions plus screenshots for interaction proof, structured metrics/logs/JSON/audio analysis for non-visual proof.",
|
|
630
695
|
"For playable/gameplay proof, treat screenshots as supporting artifacts only: start the game, send keyboard or pointer input, measure state before/after, measure non-HUD canvas/playfield pixel deltas across time, and return playability evidence with version riddle-proof.playability.v1.",
|
|
631
696
|
"For interaction proof, return a structured evidence object with start route/state, terminal route/state, action, assertions, and matched UI text. Catch waitForURL or selector timeouts and record them as failed assertions instead of throwing before evidence is emitted.",
|
|
@@ -29,7 +29,7 @@ import {
|
|
|
29
29
|
import {
|
|
30
30
|
createCodexExecAgentAdapter,
|
|
31
31
|
runCodexExecAgentDoctor
|
|
32
|
-
} from "./chunk-
|
|
32
|
+
} from "./chunk-EEIYUZXE.js";
|
|
33
33
|
|
|
34
34
|
// src/cli.ts
|
|
35
35
|
import { existsSync, mkdirSync, readdirSync, readFileSync, statSync, writeFileSync } from "fs";
|
package/dist/cli/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import "../chunk-
|
|
1
|
+
import "../chunk-RTWGGKS3.js";
|
|
2
2
|
import "../chunk-PEWAIEER.js";
|
|
3
3
|
import "../chunk-TWTEUS7R.js";
|
|
4
4
|
import "../chunk-E7ATYSYS.js";
|
|
@@ -7,6 +7,6 @@ import "../chunk-FMOYUYH2.js";
|
|
|
7
7
|
import "../chunk-5N5QFI2S.js";
|
|
8
8
|
import "../chunk-4FOHZ7JG.js";
|
|
9
9
|
import "../chunk-JFQXAJH2.js";
|
|
10
|
-
import "../chunk-
|
|
10
|
+
import "../chunk-EEIYUZXE.js";
|
|
11
11
|
import "../chunk-VY4Y5U57.js";
|
|
12
12
|
import "../chunk-MLKGABMK.js";
|
package/dist/cli.cjs
CHANGED
|
@@ -5868,6 +5868,8 @@ var import_node_child_process3 = require("child_process");
|
|
|
5868
5868
|
var import_node_fs4 = require("fs");
|
|
5869
5869
|
var import_node_os = __toESM(require("os"), 1);
|
|
5870
5870
|
var import_node_path4 = __toESM(require("path"), 1);
|
|
5871
|
+
var DEFAULT_CODEX_TIMEOUT_MS = 6e5;
|
|
5872
|
+
var DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS = 18e4;
|
|
5871
5873
|
var REFINED_INPUTS_SCHEMA = {
|
|
5872
5874
|
type: "object",
|
|
5873
5875
|
additionalProperties: false,
|
|
@@ -6211,6 +6213,46 @@ function parseJsonFromRunnerOutputs(outputs, schema) {
|
|
|
6211
6213
|
if (!combined.trim() || seen.has(combined)) return { parsed: null, source: "" };
|
|
6212
6214
|
return { parsed: parseJsonObject(combined, schema), source: "combined_output" };
|
|
6213
6215
|
}
|
|
6216
|
+
function resolveCodexTimeoutMs(config, request) {
|
|
6217
|
+
if (typeof config.codexTimeoutMs === "number" && Number.isFinite(config.codexTimeoutMs) && config.codexTimeoutMs > 0) {
|
|
6218
|
+
return Number(config.codexTimeoutMs);
|
|
6219
|
+
}
|
|
6220
|
+
return request.purpose === "proof packet authoring" ? DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS : DEFAULT_CODEX_TIMEOUT_MS;
|
|
6221
|
+
}
|
|
6222
|
+
function isCodexLifecycleEvent(value) {
|
|
6223
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) return false;
|
|
6224
|
+
const type = value.type;
|
|
6225
|
+
return typeof type === "string" && (type.startsWith("thread.") || type.startsWith("turn.") || type.startsWith("exec.") || type.startsWith("agent.") || type.startsWith("token.") || type.startsWith("reasoning.") || type.startsWith("error."));
|
|
6226
|
+
}
|
|
6227
|
+
function analyzeCodexRunnerOutput(outputs) {
|
|
6228
|
+
const eventTypes = /* @__PURE__ */ new Set();
|
|
6229
|
+
let eventLineCount = 0;
|
|
6230
|
+
let nonEventLineCount = 0;
|
|
6231
|
+
const nonEventSamples = [];
|
|
6232
|
+
for (const output of outputs) {
|
|
6233
|
+
const lines = output.text.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
|
|
6234
|
+
for (const line of lines) {
|
|
6235
|
+
try {
|
|
6236
|
+
const parsed = JSON.parse(line);
|
|
6237
|
+
if (isCodexLifecycleEvent(parsed)) {
|
|
6238
|
+
eventLineCount += 1;
|
|
6239
|
+
eventTypes.add(parsed.type);
|
|
6240
|
+
continue;
|
|
6241
|
+
}
|
|
6242
|
+
} catch {
|
|
6243
|
+
}
|
|
6244
|
+
nonEventLineCount += 1;
|
|
6245
|
+
if (nonEventSamples.length < 3) nonEventSamples.push(line.slice(0, 240));
|
|
6246
|
+
}
|
|
6247
|
+
}
|
|
6248
|
+
return {
|
|
6249
|
+
eventLineCount,
|
|
6250
|
+
eventTypes: Array.from(eventTypes),
|
|
6251
|
+
nonEventLineCount,
|
|
6252
|
+
nonEventSamples,
|
|
6253
|
+
onlyLifecycleEvents: eventLineCount > 0 && nonEventLineCount === 0
|
|
6254
|
+
};
|
|
6255
|
+
}
|
|
6214
6256
|
function isHarnessVerificationOnlyBlocker(blocker) {
|
|
6215
6257
|
const text = blocker.toLowerCase();
|
|
6216
6258
|
return (text.includes("erofs") || text.includes("read-only file system")) && text.includes("node_modules") && (text.includes(".vite-temp") || text.includes("vite.config"));
|
|
@@ -6234,21 +6276,25 @@ function runnerMetrics(input) {
|
|
|
6234
6276
|
exit_status: input.status ?? null,
|
|
6235
6277
|
timed_out: input.timedOut || false,
|
|
6236
6278
|
error_code: input.errorCode,
|
|
6279
|
+
codex_event_types: input.codexEventTypes && input.codexEventTypes.length ? input.codexEventTypes : void 0,
|
|
6280
|
+
codex_event_line_count: input.codexEventLineCount,
|
|
6281
|
+
codex_non_event_line_count: input.codexNonEventLineCount,
|
|
6237
6282
|
codex_command: input.config.codexCommand || "codex",
|
|
6238
6283
|
codex_model: input.config.codexModel,
|
|
6239
6284
|
codex_sandbox: input.config.codexSandbox || "workspace-write",
|
|
6240
6285
|
codex_full_auto: input.config.codexFullAuto !== false,
|
|
6241
|
-
timeout_ms:
|
|
6286
|
+
timeout_ms: input.timeoutMs ?? DEFAULT_CODEX_TIMEOUT_MS
|
|
6242
6287
|
});
|
|
6243
6288
|
}
|
|
6244
6289
|
function createCodexExecJsonRunner(config = {}) {
|
|
6245
6290
|
return (request) => {
|
|
6246
6291
|
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
6247
6292
|
const startedMs = Date.now();
|
|
6293
|
+
const timeoutMs = resolveCodexTimeoutMs(config, request);
|
|
6248
6294
|
if (!request.workdir || !(0, import_node_fs4.existsSync)(request.workdir)) {
|
|
6249
6295
|
return {
|
|
6250
6296
|
ok: false,
|
|
6251
|
-
metrics: runnerMetrics({ request, config, startedAt, startedMs, errorCode: "workdir_missing" }),
|
|
6297
|
+
metrics: runnerMetrics({ request, config, startedAt, startedMs, timeoutMs, errorCode: "workdir_missing" }),
|
|
6252
6298
|
blocker: {
|
|
6253
6299
|
code: "codex_workdir_missing",
|
|
6254
6300
|
message: `Codex workdir does not exist for ${request.purpose}.`,
|
|
@@ -6283,7 +6329,7 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
6283
6329
|
const proc = (0, import_node_child_process3.spawnSync)(config.codexCommand || "codex", args, {
|
|
6284
6330
|
input: request.prompt,
|
|
6285
6331
|
encoding: "utf-8",
|
|
6286
|
-
timeout:
|
|
6332
|
+
timeout: timeoutMs,
|
|
6287
6333
|
maxBuffer: 10 * 1024 * 1024,
|
|
6288
6334
|
env
|
|
6289
6335
|
});
|
|
@@ -6302,6 +6348,7 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
6302
6348
|
stderr: proc.stderr || "",
|
|
6303
6349
|
status: proc.status,
|
|
6304
6350
|
timedOut,
|
|
6351
|
+
timeoutMs,
|
|
6305
6352
|
errorCode: proc.error.code || "spawn_error"
|
|
6306
6353
|
}),
|
|
6307
6354
|
blocker: {
|
|
@@ -6324,6 +6371,7 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
6324
6371
|
stdout: proc.stdout || "",
|
|
6325
6372
|
stderr: proc.stderr || "",
|
|
6326
6373
|
status: proc.status,
|
|
6374
|
+
timeoutMs,
|
|
6327
6375
|
errorCode: "nonzero_exit"
|
|
6328
6376
|
}),
|
|
6329
6377
|
blocker: {
|
|
@@ -6336,12 +6384,15 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
6336
6384
|
const finalText = (0, import_node_fs4.existsSync)(lastMessagePath) ? (0, import_node_fs4.readFileSync)(lastMessagePath, "utf-8") : String(proc.stdout || "");
|
|
6337
6385
|
const stdoutText = String(proc.stdout || "");
|
|
6338
6386
|
const stderrText = String(proc.stderr || "");
|
|
6339
|
-
const
|
|
6387
|
+
const runnerOutputs = [
|
|
6340
6388
|
{ source: (0, import_node_fs4.existsSync)(lastMessagePath) ? "last_message" : "stdout", text: finalText },
|
|
6341
6389
|
{ source: "stdout", text: stdoutText },
|
|
6342
6390
|
{ source: "stderr", text: stderrText }
|
|
6343
|
-
]
|
|
6391
|
+
];
|
|
6392
|
+
const { parsed, source: parsedJsonSource } = parseJsonFromRunnerOutputs(runnerOutputs, request.schema);
|
|
6344
6393
|
if (!parsed) {
|
|
6394
|
+
const outputAnalysis = analyzeCodexRunnerOutput(runnerOutputs);
|
|
6395
|
+
const errorCode = outputAnalysis.onlyLifecycleEvents ? "no_final_response" : "invalid_json";
|
|
6345
6396
|
return {
|
|
6346
6397
|
ok: false,
|
|
6347
6398
|
stdout: stdoutText,
|
|
@@ -6355,12 +6406,24 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
6355
6406
|
stderr: stderrText,
|
|
6356
6407
|
finalText,
|
|
6357
6408
|
status: proc.status,
|
|
6358
|
-
|
|
6409
|
+
timeoutMs,
|
|
6410
|
+
errorCode,
|
|
6411
|
+
codexEventTypes: outputAnalysis.eventTypes,
|
|
6412
|
+
codexEventLineCount: outputAnalysis.eventLineCount,
|
|
6413
|
+
codexNonEventLineCount: outputAnalysis.nonEventLineCount
|
|
6359
6414
|
}),
|
|
6360
6415
|
blocker: {
|
|
6361
|
-
code: "codex_invalid_json",
|
|
6362
|
-
message: `Codex completed ${request.purpose}, but did not return valid JSON.`,
|
|
6363
|
-
details: {
|
|
6416
|
+
code: outputAnalysis.onlyLifecycleEvents ? "codex_no_final_response" : "codex_invalid_json",
|
|
6417
|
+
message: outputAnalysis.onlyLifecycleEvents ? `Codex emitted lifecycle events during ${request.purpose}, but did not produce a final JSON response.` : `Codex completed ${request.purpose}, but did not return valid JSON.`,
|
|
6418
|
+
details: {
|
|
6419
|
+
finalText,
|
|
6420
|
+
stdout: stdoutText,
|
|
6421
|
+
stderr: stderrText,
|
|
6422
|
+
event_types: outputAnalysis.eventTypes,
|
|
6423
|
+
event_line_count: outputAnalysis.eventLineCount,
|
|
6424
|
+
non_event_line_count: outputAnalysis.nonEventLineCount,
|
|
6425
|
+
non_event_samples: outputAnalysis.nonEventSamples
|
|
6426
|
+
}
|
|
6364
6427
|
}
|
|
6365
6428
|
};
|
|
6366
6429
|
}
|
|
@@ -6378,7 +6441,8 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
6378
6441
|
stderr: stderrText,
|
|
6379
6442
|
finalText,
|
|
6380
6443
|
parsedJsonSource,
|
|
6381
|
-
status: proc.status
|
|
6444
|
+
status: proc.status,
|
|
6445
|
+
timeoutMs
|
|
6382
6446
|
})
|
|
6383
6447
|
};
|
|
6384
6448
|
} finally {
|
|
@@ -6487,6 +6551,7 @@ function createCodexExecAgentAdapter(config = {}, runner = createCodexExecJsonRu
|
|
|
6487
6551
|
"Write a proof_plan and capture_script that will verify the exact user-facing change.",
|
|
6488
6552
|
"Use recon_assessment.baseline_understanding as the source of truth. Do not author a proof plan unless it names the observed before state and the requested delta from that state.",
|
|
6489
6553
|
"Use the recon-approved route and baseline context; make the plan name the concrete target, expected before state, expected after state, and stop condition.",
|
|
6554
|
+
"Do not leave this authoring stage pending for external investigation. Keep any repo inspection brief, do not modify files, and return the JSON proof packet from the available state.",
|
|
6490
6555
|
"Choose the evidence modality from verification_mode and success_criteria: screenshots for visual/UI proof, interactions plus screenshots for interaction proof, structured metrics/logs/JSON/audio analysis for non-visual proof.",
|
|
6491
6556
|
"For playable/gameplay proof, treat screenshots as supporting artifacts only: start the game, send keyboard or pointer input, measure state before/after, measure non-HUD canvas/playfield pixel deltas across time, and return playability evidence with version riddle-proof.playability.v1.",
|
|
6492
6557
|
"For interaction proof, return a structured evidence object with start route/state, terminal route/state, action, assertions, and matched UI text. Catch waitForURL or selector timeouts and record them as failed assertions instead of throwing before evidence is emitted.",
|
package/dist/cli.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import "./chunk-
|
|
2
|
+
import "./chunk-RTWGGKS3.js";
|
|
3
3
|
import "./chunk-PEWAIEER.js";
|
|
4
4
|
import "./chunk-TWTEUS7R.js";
|
|
5
5
|
import "./chunk-E7ATYSYS.js";
|
|
@@ -8,6 +8,6 @@ import "./chunk-FMOYUYH2.js";
|
|
|
8
8
|
import "./chunk-5N5QFI2S.js";
|
|
9
9
|
import "./chunk-4FOHZ7JG.js";
|
|
10
10
|
import "./chunk-JFQXAJH2.js";
|
|
11
|
-
import "./chunk-
|
|
11
|
+
import "./chunk-EEIYUZXE.js";
|
|
12
12
|
import "./chunk-VY4Y5U57.js";
|
|
13
13
|
import "./chunk-MLKGABMK.js";
|
|
@@ -46,6 +46,8 @@ function compactRecord(input) {
|
|
|
46
46
|
}
|
|
47
47
|
|
|
48
48
|
// src/codex-exec-agent.ts
|
|
49
|
+
var DEFAULT_CODEX_TIMEOUT_MS = 6e5;
|
|
50
|
+
var DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS = 18e4;
|
|
49
51
|
var REFINED_INPUTS_SCHEMA = {
|
|
50
52
|
type: "object",
|
|
51
53
|
additionalProperties: false,
|
|
@@ -389,6 +391,46 @@ function parseJsonFromRunnerOutputs(outputs, schema) {
|
|
|
389
391
|
if (!combined.trim() || seen.has(combined)) return { parsed: null, source: "" };
|
|
390
392
|
return { parsed: parseJsonObject(combined, schema), source: "combined_output" };
|
|
391
393
|
}
|
|
394
|
+
function resolveCodexTimeoutMs(config, request) {
|
|
395
|
+
if (typeof config.codexTimeoutMs === "number" && Number.isFinite(config.codexTimeoutMs) && config.codexTimeoutMs > 0) {
|
|
396
|
+
return Number(config.codexTimeoutMs);
|
|
397
|
+
}
|
|
398
|
+
return request.purpose === "proof packet authoring" ? DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS : DEFAULT_CODEX_TIMEOUT_MS;
|
|
399
|
+
}
|
|
400
|
+
function isCodexLifecycleEvent(value) {
|
|
401
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) return false;
|
|
402
|
+
const type = value.type;
|
|
403
|
+
return typeof type === "string" && (type.startsWith("thread.") || type.startsWith("turn.") || type.startsWith("exec.") || type.startsWith("agent.") || type.startsWith("token.") || type.startsWith("reasoning.") || type.startsWith("error."));
|
|
404
|
+
}
|
|
405
|
+
function analyzeCodexRunnerOutput(outputs) {
|
|
406
|
+
const eventTypes = /* @__PURE__ */ new Set();
|
|
407
|
+
let eventLineCount = 0;
|
|
408
|
+
let nonEventLineCount = 0;
|
|
409
|
+
const nonEventSamples = [];
|
|
410
|
+
for (const output of outputs) {
|
|
411
|
+
const lines = output.text.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
|
|
412
|
+
for (const line of lines) {
|
|
413
|
+
try {
|
|
414
|
+
const parsed = JSON.parse(line);
|
|
415
|
+
if (isCodexLifecycleEvent(parsed)) {
|
|
416
|
+
eventLineCount += 1;
|
|
417
|
+
eventTypes.add(parsed.type);
|
|
418
|
+
continue;
|
|
419
|
+
}
|
|
420
|
+
} catch {
|
|
421
|
+
}
|
|
422
|
+
nonEventLineCount += 1;
|
|
423
|
+
if (nonEventSamples.length < 3) nonEventSamples.push(line.slice(0, 240));
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
return {
|
|
427
|
+
eventLineCount,
|
|
428
|
+
eventTypes: Array.from(eventTypes),
|
|
429
|
+
nonEventLineCount,
|
|
430
|
+
nonEventSamples,
|
|
431
|
+
onlyLifecycleEvents: eventLineCount > 0 && nonEventLineCount === 0
|
|
432
|
+
};
|
|
433
|
+
}
|
|
392
434
|
function isHarnessVerificationOnlyBlocker(blocker) {
|
|
393
435
|
const text = blocker.toLowerCase();
|
|
394
436
|
return (text.includes("erofs") || text.includes("read-only file system")) && text.includes("node_modules") && (text.includes(".vite-temp") || text.includes("vite.config"));
|
|
@@ -412,21 +454,25 @@ function runnerMetrics(input) {
|
|
|
412
454
|
exit_status: input.status ?? null,
|
|
413
455
|
timed_out: input.timedOut || false,
|
|
414
456
|
error_code: input.errorCode,
|
|
457
|
+
codex_event_types: input.codexEventTypes && input.codexEventTypes.length ? input.codexEventTypes : void 0,
|
|
458
|
+
codex_event_line_count: input.codexEventLineCount,
|
|
459
|
+
codex_non_event_line_count: input.codexNonEventLineCount,
|
|
415
460
|
codex_command: input.config.codexCommand || "codex",
|
|
416
461
|
codex_model: input.config.codexModel,
|
|
417
462
|
codex_sandbox: input.config.codexSandbox || "workspace-write",
|
|
418
463
|
codex_full_auto: input.config.codexFullAuto !== false,
|
|
419
|
-
timeout_ms:
|
|
464
|
+
timeout_ms: input.timeoutMs ?? DEFAULT_CODEX_TIMEOUT_MS
|
|
420
465
|
});
|
|
421
466
|
}
|
|
422
467
|
function createCodexExecJsonRunner(config = {}) {
|
|
423
468
|
return (request) => {
|
|
424
469
|
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
425
470
|
const startedMs = Date.now();
|
|
471
|
+
const timeoutMs = resolveCodexTimeoutMs(config, request);
|
|
426
472
|
if (!request.workdir || !(0, import_node_fs.existsSync)(request.workdir)) {
|
|
427
473
|
return {
|
|
428
474
|
ok: false,
|
|
429
|
-
metrics: runnerMetrics({ request, config, startedAt, startedMs, errorCode: "workdir_missing" }),
|
|
475
|
+
metrics: runnerMetrics({ request, config, startedAt, startedMs, timeoutMs, errorCode: "workdir_missing" }),
|
|
430
476
|
blocker: {
|
|
431
477
|
code: "codex_workdir_missing",
|
|
432
478
|
message: `Codex workdir does not exist for ${request.purpose}.`,
|
|
@@ -461,7 +507,7 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
461
507
|
const proc = (0, import_node_child_process.spawnSync)(config.codexCommand || "codex", args, {
|
|
462
508
|
input: request.prompt,
|
|
463
509
|
encoding: "utf-8",
|
|
464
|
-
timeout:
|
|
510
|
+
timeout: timeoutMs,
|
|
465
511
|
maxBuffer: 10 * 1024 * 1024,
|
|
466
512
|
env
|
|
467
513
|
});
|
|
@@ -480,6 +526,7 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
480
526
|
stderr: proc.stderr || "",
|
|
481
527
|
status: proc.status,
|
|
482
528
|
timedOut,
|
|
529
|
+
timeoutMs,
|
|
483
530
|
errorCode: proc.error.code || "spawn_error"
|
|
484
531
|
}),
|
|
485
532
|
blocker: {
|
|
@@ -502,6 +549,7 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
502
549
|
stdout: proc.stdout || "",
|
|
503
550
|
stderr: proc.stderr || "",
|
|
504
551
|
status: proc.status,
|
|
552
|
+
timeoutMs,
|
|
505
553
|
errorCode: "nonzero_exit"
|
|
506
554
|
}),
|
|
507
555
|
blocker: {
|
|
@@ -514,12 +562,15 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
514
562
|
const finalText = (0, import_node_fs.existsSync)(lastMessagePath) ? (0, import_node_fs.readFileSync)(lastMessagePath, "utf-8") : String(proc.stdout || "");
|
|
515
563
|
const stdoutText = String(proc.stdout || "");
|
|
516
564
|
const stderrText = String(proc.stderr || "");
|
|
517
|
-
const
|
|
565
|
+
const runnerOutputs = [
|
|
518
566
|
{ source: (0, import_node_fs.existsSync)(lastMessagePath) ? "last_message" : "stdout", text: finalText },
|
|
519
567
|
{ source: "stdout", text: stdoutText },
|
|
520
568
|
{ source: "stderr", text: stderrText }
|
|
521
|
-
]
|
|
569
|
+
];
|
|
570
|
+
const { parsed, source: parsedJsonSource } = parseJsonFromRunnerOutputs(runnerOutputs, request.schema);
|
|
522
571
|
if (!parsed) {
|
|
572
|
+
const outputAnalysis = analyzeCodexRunnerOutput(runnerOutputs);
|
|
573
|
+
const errorCode = outputAnalysis.onlyLifecycleEvents ? "no_final_response" : "invalid_json";
|
|
523
574
|
return {
|
|
524
575
|
ok: false,
|
|
525
576
|
stdout: stdoutText,
|
|
@@ -533,12 +584,24 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
533
584
|
stderr: stderrText,
|
|
534
585
|
finalText,
|
|
535
586
|
status: proc.status,
|
|
536
|
-
|
|
587
|
+
timeoutMs,
|
|
588
|
+
errorCode,
|
|
589
|
+
codexEventTypes: outputAnalysis.eventTypes,
|
|
590
|
+
codexEventLineCount: outputAnalysis.eventLineCount,
|
|
591
|
+
codexNonEventLineCount: outputAnalysis.nonEventLineCount
|
|
537
592
|
}),
|
|
538
593
|
blocker: {
|
|
539
|
-
code: "codex_invalid_json",
|
|
540
|
-
message: `Codex completed ${request.purpose}, but did not return valid JSON.`,
|
|
541
|
-
details: {
|
|
594
|
+
code: outputAnalysis.onlyLifecycleEvents ? "codex_no_final_response" : "codex_invalid_json",
|
|
595
|
+
message: outputAnalysis.onlyLifecycleEvents ? `Codex emitted lifecycle events during ${request.purpose}, but did not produce a final JSON response.` : `Codex completed ${request.purpose}, but did not return valid JSON.`,
|
|
596
|
+
details: {
|
|
597
|
+
finalText,
|
|
598
|
+
stdout: stdoutText,
|
|
599
|
+
stderr: stderrText,
|
|
600
|
+
event_types: outputAnalysis.eventTypes,
|
|
601
|
+
event_line_count: outputAnalysis.eventLineCount,
|
|
602
|
+
non_event_line_count: outputAnalysis.nonEventLineCount,
|
|
603
|
+
non_event_samples: outputAnalysis.nonEventSamples
|
|
604
|
+
}
|
|
542
605
|
}
|
|
543
606
|
};
|
|
544
607
|
}
|
|
@@ -556,7 +619,8 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
556
619
|
stderr: stderrText,
|
|
557
620
|
finalText,
|
|
558
621
|
parsedJsonSource,
|
|
559
|
-
status: proc.status
|
|
622
|
+
status: proc.status,
|
|
623
|
+
timeoutMs
|
|
560
624
|
})
|
|
561
625
|
};
|
|
562
626
|
} finally {
|
|
@@ -665,6 +729,7 @@ function createCodexExecAgentAdapter(config = {}, runner = createCodexExecJsonRu
|
|
|
665
729
|
"Write a proof_plan and capture_script that will verify the exact user-facing change.",
|
|
666
730
|
"Use recon_assessment.baseline_understanding as the source of truth. Do not author a proof plan unless it names the observed before state and the requested delta from that state.",
|
|
667
731
|
"Use the recon-approved route and baseline context; make the plan name the concrete target, expected before state, expected after state, and stop condition.",
|
|
732
|
+
"Do not leave this authoring stage pending for external investigation. Keep any repo inspection brief, do not modify files, and return the JSON proof packet from the available state.",
|
|
668
733
|
"Choose the evidence modality from verification_mode and success_criteria: screenshots for visual/UI proof, interactions plus screenshots for interaction proof, structured metrics/logs/JSON/audio analysis for non-visual proof.",
|
|
669
734
|
"For playable/gameplay proof, treat screenshots as supporting artifacts only: start the game, send keyboard or pointer input, measure state before/after, measure non-HUD canvas/playfield pixel deltas across time, and return playability evidence with version riddle-proof.playability.v1.",
|
|
670
735
|
"For interaction proof, return a structured evidence object with start route/state, terminal route/state, action, assertions, and matched UI text. Catch waitForURL or selector timeouts and record them as failed assertions instead of throwing before evidence is emitted.",
|
package/dist/codex-exec-agent.js
CHANGED