@riddledc/riddle-proof 0.8.8 → 0.8.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/codex-exec-agent.cjs +75 -10
- package/dist/adapters/codex-exec-agent.js +1 -1
- package/dist/adapters/codex.cjs +75 -10
- package/dist/adapters/codex.js +1 -1
- package/dist/adapters/local-agent.cjs +75 -10
- package/dist/adapters/local-agent.js +1 -1
- package/dist/advanced/engine-harness.cjs +12 -0
- package/dist/advanced/engine-harness.js +1 -1
- package/dist/advanced/index.cjs +12 -0
- package/dist/advanced/index.d.cts +2 -2
- package/dist/advanced/index.d.ts +2 -2
- package/dist/advanced/index.js +1 -1
- package/dist/advanced/proof-run-core.d.cts +1 -1
- package/dist/advanced/proof-run-core.d.ts +1 -1
- package/dist/advanced/proof-run-engine.d.cts +2 -2
- package/dist/advanced/proof-run-engine.d.ts +2 -2
- package/dist/{chunk-V6VZ3CAI.js → chunk-2PXL3RDB.js} +2 -2
- package/dist/{chunk-E7ATYSYS.js → chunk-BBUO7HM4.js} +12 -0
- package/dist/{chunk-PYCQNK66.js → chunk-EEIYUZXE.js} +75 -10
- package/dist/cli/index.js +3 -3
- package/dist/cli.cjs +87 -10
- package/dist/cli.js +3 -3
- package/dist/codex-exec-agent.cjs +75 -10
- package/dist/codex-exec-agent.js +1 -1
- package/dist/engine-harness.cjs +12 -0
- package/dist/engine-harness.js +1 -1
- package/dist/index.cjs +87 -10
- package/dist/index.js +2 -2
- package/dist/local-agent.cjs +75 -10
- package/dist/local-agent.js +1 -1
- package/dist/{proof-run-core-CE0jx7wL.d.ts → proof-run-core-Ci9uFxMc.d.cts} +1 -1
- package/dist/{proof-run-core-CE0jx7wL.d.cts → proof-run-core-Ci9uFxMc.d.ts} +1 -1
- package/dist/proof-run-core.d.cts +1 -1
- package/dist/proof-run-core.d.ts +1 -1
- package/dist/{proof-run-engine-BlocjMni.d.cts → proof-run-engine-Bd1T43Dy.d.cts} +4 -4
- package/dist/{proof-run-engine-C_m8WJmX.d.ts → proof-run-engine-CXyhB-io.d.ts} +4 -4
- package/dist/proof-run-engine.d.cts +2 -2
- package/dist/proof-run-engine.d.ts +2 -2
- package/package.json +2 -2
- package/runtime/lib/verify.py +88 -2
- package/runtime/tests/recon_verify_smoke.py +147 -24
- package/runtime/tests/trust_boundary_regression.py +143 -0
package/dist/index.cjs
CHANGED
|
@@ -6319,6 +6319,18 @@ async function routeCheckpoint(request, state, result, agent, input) {
|
|
|
6319
6319
|
if (checkpoint === "verify_agent_retry") {
|
|
6320
6320
|
const next = recommendedContinuation(result);
|
|
6321
6321
|
if (next) return { next };
|
|
6322
|
+
return {
|
|
6323
|
+
blocker: {
|
|
6324
|
+
code: "proof_assessment_blocked",
|
|
6325
|
+
checkpoint,
|
|
6326
|
+
message: result.summary || "The supervising proof assessment did not approve shipping and did not provide a safe retry continuation.",
|
|
6327
|
+
details: compactRecord({
|
|
6328
|
+
proofAssessment: result.proofAssessment || result.checkpointContract?.proof_assessment || recordValue(result.raw)?.proofAssessment || null,
|
|
6329
|
+
verifyDecisionRequest: result.verifyDecisionRequest || result.checkpointContract?.verify_decision_request || null,
|
|
6330
|
+
checkpointContract: result.checkpointContract || null
|
|
6331
|
+
})
|
|
6332
|
+
}
|
|
6333
|
+
};
|
|
6322
6334
|
}
|
|
6323
6335
|
if (checkpoint === "awaiting_stage_advance") {
|
|
6324
6336
|
const next = recommendedContinuation(result) || defaultAwaitingStageContinuation(result);
|
|
@@ -6531,6 +6543,8 @@ var import_node_child_process3 = require("child_process");
|
|
|
6531
6543
|
var import_node_fs4 = require("fs");
|
|
6532
6544
|
var import_node_os = __toESM(require("os"), 1);
|
|
6533
6545
|
var import_node_path4 = __toESM(require("path"), 1);
|
|
6546
|
+
var DEFAULT_CODEX_TIMEOUT_MS = 6e5;
|
|
6547
|
+
var DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS = 18e4;
|
|
6534
6548
|
var REFINED_INPUTS_SCHEMA = {
|
|
6535
6549
|
type: "object",
|
|
6536
6550
|
additionalProperties: false,
|
|
@@ -6874,6 +6888,46 @@ function parseJsonFromRunnerOutputs(outputs, schema) {
|
|
|
6874
6888
|
if (!combined.trim() || seen.has(combined)) return { parsed: null, source: "" };
|
|
6875
6889
|
return { parsed: parseJsonObject(combined, schema), source: "combined_output" };
|
|
6876
6890
|
}
|
|
6891
|
+
function resolveCodexTimeoutMs(config, request) {
|
|
6892
|
+
if (typeof config.codexTimeoutMs === "number" && Number.isFinite(config.codexTimeoutMs) && config.codexTimeoutMs > 0) {
|
|
6893
|
+
return Number(config.codexTimeoutMs);
|
|
6894
|
+
}
|
|
6895
|
+
return request.purpose === "proof packet authoring" ? DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS : DEFAULT_CODEX_TIMEOUT_MS;
|
|
6896
|
+
}
|
|
6897
|
+
function isCodexLifecycleEvent(value) {
|
|
6898
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) return false;
|
|
6899
|
+
const type = value.type;
|
|
6900
|
+
return typeof type === "string" && (type.startsWith("thread.") || type.startsWith("turn.") || type.startsWith("exec.") || type.startsWith("agent.") || type.startsWith("token.") || type.startsWith("reasoning.") || type.startsWith("error."));
|
|
6901
|
+
}
|
|
6902
|
+
function analyzeCodexRunnerOutput(outputs) {
|
|
6903
|
+
const eventTypes = /* @__PURE__ */ new Set();
|
|
6904
|
+
let eventLineCount = 0;
|
|
6905
|
+
let nonEventLineCount = 0;
|
|
6906
|
+
const nonEventSamples = [];
|
|
6907
|
+
for (const output of outputs) {
|
|
6908
|
+
const lines = output.text.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
|
|
6909
|
+
for (const line of lines) {
|
|
6910
|
+
try {
|
|
6911
|
+
const parsed = JSON.parse(line);
|
|
6912
|
+
if (isCodexLifecycleEvent(parsed)) {
|
|
6913
|
+
eventLineCount += 1;
|
|
6914
|
+
eventTypes.add(parsed.type);
|
|
6915
|
+
continue;
|
|
6916
|
+
}
|
|
6917
|
+
} catch {
|
|
6918
|
+
}
|
|
6919
|
+
nonEventLineCount += 1;
|
|
6920
|
+
if (nonEventSamples.length < 3) nonEventSamples.push(line.slice(0, 240));
|
|
6921
|
+
}
|
|
6922
|
+
}
|
|
6923
|
+
return {
|
|
6924
|
+
eventLineCount,
|
|
6925
|
+
eventTypes: Array.from(eventTypes),
|
|
6926
|
+
nonEventLineCount,
|
|
6927
|
+
nonEventSamples,
|
|
6928
|
+
onlyLifecycleEvents: eventLineCount > 0 && nonEventLineCount === 0
|
|
6929
|
+
};
|
|
6930
|
+
}
|
|
6877
6931
|
function isHarnessVerificationOnlyBlocker(blocker) {
|
|
6878
6932
|
const text = blocker.toLowerCase();
|
|
6879
6933
|
return (text.includes("erofs") || text.includes("read-only file system")) && text.includes("node_modules") && (text.includes(".vite-temp") || text.includes("vite.config"));
|
|
@@ -6897,21 +6951,25 @@ function runnerMetrics(input) {
|
|
|
6897
6951
|
exit_status: input.status ?? null,
|
|
6898
6952
|
timed_out: input.timedOut || false,
|
|
6899
6953
|
error_code: input.errorCode,
|
|
6954
|
+
codex_event_types: input.codexEventTypes && input.codexEventTypes.length ? input.codexEventTypes : void 0,
|
|
6955
|
+
codex_event_line_count: input.codexEventLineCount,
|
|
6956
|
+
codex_non_event_line_count: input.codexNonEventLineCount,
|
|
6900
6957
|
codex_command: input.config.codexCommand || "codex",
|
|
6901
6958
|
codex_model: input.config.codexModel,
|
|
6902
6959
|
codex_sandbox: input.config.codexSandbox || "workspace-write",
|
|
6903
6960
|
codex_full_auto: input.config.codexFullAuto !== false,
|
|
6904
|
-
timeout_ms:
|
|
6961
|
+
timeout_ms: input.timeoutMs ?? DEFAULT_CODEX_TIMEOUT_MS
|
|
6905
6962
|
});
|
|
6906
6963
|
}
|
|
6907
6964
|
function createCodexExecJsonRunner(config = {}) {
|
|
6908
6965
|
return (request) => {
|
|
6909
6966
|
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
6910
6967
|
const startedMs = Date.now();
|
|
6968
|
+
const timeoutMs = resolveCodexTimeoutMs(config, request);
|
|
6911
6969
|
if (!request.workdir || !(0, import_node_fs4.existsSync)(request.workdir)) {
|
|
6912
6970
|
return {
|
|
6913
6971
|
ok: false,
|
|
6914
|
-
metrics: runnerMetrics({ request, config, startedAt, startedMs, errorCode: "workdir_missing" }),
|
|
6972
|
+
metrics: runnerMetrics({ request, config, startedAt, startedMs, timeoutMs, errorCode: "workdir_missing" }),
|
|
6915
6973
|
blocker: {
|
|
6916
6974
|
code: "codex_workdir_missing",
|
|
6917
6975
|
message: `Codex workdir does not exist for ${request.purpose}.`,
|
|
@@ -6946,7 +7004,7 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
6946
7004
|
const proc = (0, import_node_child_process3.spawnSync)(config.codexCommand || "codex", args, {
|
|
6947
7005
|
input: request.prompt,
|
|
6948
7006
|
encoding: "utf-8",
|
|
6949
|
-
timeout:
|
|
7007
|
+
timeout: timeoutMs,
|
|
6950
7008
|
maxBuffer: 10 * 1024 * 1024,
|
|
6951
7009
|
env
|
|
6952
7010
|
});
|
|
@@ -6965,6 +7023,7 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
6965
7023
|
stderr: proc.stderr || "",
|
|
6966
7024
|
status: proc.status,
|
|
6967
7025
|
timedOut,
|
|
7026
|
+
timeoutMs,
|
|
6968
7027
|
errorCode: proc.error.code || "spawn_error"
|
|
6969
7028
|
}),
|
|
6970
7029
|
blocker: {
|
|
@@ -6987,6 +7046,7 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
6987
7046
|
stdout: proc.stdout || "",
|
|
6988
7047
|
stderr: proc.stderr || "",
|
|
6989
7048
|
status: proc.status,
|
|
7049
|
+
timeoutMs,
|
|
6990
7050
|
errorCode: "nonzero_exit"
|
|
6991
7051
|
}),
|
|
6992
7052
|
blocker: {
|
|
@@ -6999,12 +7059,15 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
6999
7059
|
const finalText = (0, import_node_fs4.existsSync)(lastMessagePath) ? (0, import_node_fs4.readFileSync)(lastMessagePath, "utf-8") : String(proc.stdout || "");
|
|
7000
7060
|
const stdoutText = String(proc.stdout || "");
|
|
7001
7061
|
const stderrText = String(proc.stderr || "");
|
|
7002
|
-
const
|
|
7062
|
+
const runnerOutputs = [
|
|
7003
7063
|
{ source: (0, import_node_fs4.existsSync)(lastMessagePath) ? "last_message" : "stdout", text: finalText },
|
|
7004
7064
|
{ source: "stdout", text: stdoutText },
|
|
7005
7065
|
{ source: "stderr", text: stderrText }
|
|
7006
|
-
]
|
|
7066
|
+
];
|
|
7067
|
+
const { parsed, source: parsedJsonSource } = parseJsonFromRunnerOutputs(runnerOutputs, request.schema);
|
|
7007
7068
|
if (!parsed) {
|
|
7069
|
+
const outputAnalysis = analyzeCodexRunnerOutput(runnerOutputs);
|
|
7070
|
+
const errorCode = outputAnalysis.onlyLifecycleEvents ? "no_final_response" : "invalid_json";
|
|
7008
7071
|
return {
|
|
7009
7072
|
ok: false,
|
|
7010
7073
|
stdout: stdoutText,
|
|
@@ -7018,12 +7081,24 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
7018
7081
|
stderr: stderrText,
|
|
7019
7082
|
finalText,
|
|
7020
7083
|
status: proc.status,
|
|
7021
|
-
|
|
7084
|
+
timeoutMs,
|
|
7085
|
+
errorCode,
|
|
7086
|
+
codexEventTypes: outputAnalysis.eventTypes,
|
|
7087
|
+
codexEventLineCount: outputAnalysis.eventLineCount,
|
|
7088
|
+
codexNonEventLineCount: outputAnalysis.nonEventLineCount
|
|
7022
7089
|
}),
|
|
7023
7090
|
blocker: {
|
|
7024
|
-
code: "codex_invalid_json",
|
|
7025
|
-
message: `Codex completed ${request.purpose}, but did not return valid JSON.`,
|
|
7026
|
-
details: {
|
|
7091
|
+
code: outputAnalysis.onlyLifecycleEvents ? "codex_no_final_response" : "codex_invalid_json",
|
|
7092
|
+
message: outputAnalysis.onlyLifecycleEvents ? `Codex emitted lifecycle events during ${request.purpose}, but did not produce a final JSON response.` : `Codex completed ${request.purpose}, but did not return valid JSON.`,
|
|
7093
|
+
details: {
|
|
7094
|
+
finalText,
|
|
7095
|
+
stdout: stdoutText,
|
|
7096
|
+
stderr: stderrText,
|
|
7097
|
+
event_types: outputAnalysis.eventTypes,
|
|
7098
|
+
event_line_count: outputAnalysis.eventLineCount,
|
|
7099
|
+
non_event_line_count: outputAnalysis.nonEventLineCount,
|
|
7100
|
+
non_event_samples: outputAnalysis.nonEventSamples
|
|
7101
|
+
}
|
|
7027
7102
|
}
|
|
7028
7103
|
};
|
|
7029
7104
|
}
|
|
@@ -7041,7 +7116,8 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
7041
7116
|
stderr: stderrText,
|
|
7042
7117
|
finalText,
|
|
7043
7118
|
parsedJsonSource,
|
|
7044
|
-
status: proc.status
|
|
7119
|
+
status: proc.status,
|
|
7120
|
+
timeoutMs
|
|
7045
7121
|
})
|
|
7046
7122
|
};
|
|
7047
7123
|
} finally {
|
|
@@ -7150,6 +7226,7 @@ function createCodexExecAgentAdapter(config = {}, runner = createCodexExecJsonRu
|
|
|
7150
7226
|
"Write a proof_plan and capture_script that will verify the exact user-facing change.",
|
|
7151
7227
|
"Use recon_assessment.baseline_understanding as the source of truth. Do not author a proof plan unless it names the observed before state and the requested delta from that state.",
|
|
7152
7228
|
"Use the recon-approved route and baseline context; make the plan name the concrete target, expected before state, expected after state, and stop condition.",
|
|
7229
|
+
"Do not leave this authoring stage pending for external investigation. Keep any repo inspection brief, do not modify files, and return the JSON proof packet from the available state.",
|
|
7153
7230
|
"Choose the evidence modality from verification_mode and success_criteria: screenshots for visual/UI proof, interactions plus screenshots for interaction proof, structured metrics/logs/JSON/audio analysis for non-visual proof.",
|
|
7154
7231
|
"For playable/gameplay proof, treat screenshots as supporting artifacts only: start the game, send keyboard or pointer input, measure state before/after, measure non-HUD canvas/playfield pixel deltas across time, and return playability evidence with version riddle-proof.playability.v1.",
|
|
7155
7232
|
"For interaction proof, return a structured evidence object with start route/state, terminal route/state, action, assertions, and matched UI text. Catch waitForURL or selector timeouts and record them as failed assertions instead of throwing before evidence is emitted.",
|
package/dist/index.js
CHANGED
|
@@ -95,7 +95,7 @@ import {
|
|
|
95
95
|
createDisabledRiddleProofAgentAdapter,
|
|
96
96
|
readRiddleProofRunStatus,
|
|
97
97
|
runRiddleProofEngineHarness
|
|
98
|
-
} from "./chunk-
|
|
98
|
+
} from "./chunk-BBUO7HM4.js";
|
|
99
99
|
import {
|
|
100
100
|
RIDDLE_PROOF_RUN_STATE_VERSION,
|
|
101
101
|
appendRunEvent,
|
|
@@ -134,7 +134,7 @@ import {
|
|
|
134
134
|
createCodexExecAgentAdapter,
|
|
135
135
|
createCodexExecJsonRunner,
|
|
136
136
|
runCodexExecAgentDoctor
|
|
137
|
-
} from "./chunk-
|
|
137
|
+
} from "./chunk-EEIYUZXE.js";
|
|
138
138
|
import {
|
|
139
139
|
applyTerminalMetadata,
|
|
140
140
|
compactRecord,
|
package/dist/local-agent.cjs
CHANGED
|
@@ -48,6 +48,8 @@ function compactRecord(input) {
|
|
|
48
48
|
}
|
|
49
49
|
|
|
50
50
|
// src/codex-exec-agent.ts
|
|
51
|
+
var DEFAULT_CODEX_TIMEOUT_MS = 6e5;
|
|
52
|
+
var DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS = 18e4;
|
|
51
53
|
var REFINED_INPUTS_SCHEMA = {
|
|
52
54
|
type: "object",
|
|
53
55
|
additionalProperties: false,
|
|
@@ -391,6 +393,46 @@ function parseJsonFromRunnerOutputs(outputs, schema) {
|
|
|
391
393
|
if (!combined.trim() || seen.has(combined)) return { parsed: null, source: "" };
|
|
392
394
|
return { parsed: parseJsonObject(combined, schema), source: "combined_output" };
|
|
393
395
|
}
|
|
396
|
+
function resolveCodexTimeoutMs(config, request) {
|
|
397
|
+
if (typeof config.codexTimeoutMs === "number" && Number.isFinite(config.codexTimeoutMs) && config.codexTimeoutMs > 0) {
|
|
398
|
+
return Number(config.codexTimeoutMs);
|
|
399
|
+
}
|
|
400
|
+
return request.purpose === "proof packet authoring" ? DEFAULT_PROOF_PACKET_AUTHOR_TIMEOUT_MS : DEFAULT_CODEX_TIMEOUT_MS;
|
|
401
|
+
}
|
|
402
|
+
function isCodexLifecycleEvent(value) {
|
|
403
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) return false;
|
|
404
|
+
const type = value.type;
|
|
405
|
+
return typeof type === "string" && (type.startsWith("thread.") || type.startsWith("turn.") || type.startsWith("exec.") || type.startsWith("agent.") || type.startsWith("token.") || type.startsWith("reasoning.") || type.startsWith("error."));
|
|
406
|
+
}
|
|
407
|
+
function analyzeCodexRunnerOutput(outputs) {
|
|
408
|
+
const eventTypes = /* @__PURE__ */ new Set();
|
|
409
|
+
let eventLineCount = 0;
|
|
410
|
+
let nonEventLineCount = 0;
|
|
411
|
+
const nonEventSamples = [];
|
|
412
|
+
for (const output of outputs) {
|
|
413
|
+
const lines = output.text.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
|
|
414
|
+
for (const line of lines) {
|
|
415
|
+
try {
|
|
416
|
+
const parsed = JSON.parse(line);
|
|
417
|
+
if (isCodexLifecycleEvent(parsed)) {
|
|
418
|
+
eventLineCount += 1;
|
|
419
|
+
eventTypes.add(parsed.type);
|
|
420
|
+
continue;
|
|
421
|
+
}
|
|
422
|
+
} catch {
|
|
423
|
+
}
|
|
424
|
+
nonEventLineCount += 1;
|
|
425
|
+
if (nonEventSamples.length < 3) nonEventSamples.push(line.slice(0, 240));
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
return {
|
|
429
|
+
eventLineCount,
|
|
430
|
+
eventTypes: Array.from(eventTypes),
|
|
431
|
+
nonEventLineCount,
|
|
432
|
+
nonEventSamples,
|
|
433
|
+
onlyLifecycleEvents: eventLineCount > 0 && nonEventLineCount === 0
|
|
434
|
+
};
|
|
435
|
+
}
|
|
394
436
|
function isHarnessVerificationOnlyBlocker(blocker) {
|
|
395
437
|
const text = blocker.toLowerCase();
|
|
396
438
|
return (text.includes("erofs") || text.includes("read-only file system")) && text.includes("node_modules") && (text.includes(".vite-temp") || text.includes("vite.config"));
|
|
@@ -414,21 +456,25 @@ function runnerMetrics(input) {
|
|
|
414
456
|
exit_status: input.status ?? null,
|
|
415
457
|
timed_out: input.timedOut || false,
|
|
416
458
|
error_code: input.errorCode,
|
|
459
|
+
codex_event_types: input.codexEventTypes && input.codexEventTypes.length ? input.codexEventTypes : void 0,
|
|
460
|
+
codex_event_line_count: input.codexEventLineCount,
|
|
461
|
+
codex_non_event_line_count: input.codexNonEventLineCount,
|
|
417
462
|
codex_command: input.config.codexCommand || "codex",
|
|
418
463
|
codex_model: input.config.codexModel,
|
|
419
464
|
codex_sandbox: input.config.codexSandbox || "workspace-write",
|
|
420
465
|
codex_full_auto: input.config.codexFullAuto !== false,
|
|
421
|
-
timeout_ms:
|
|
466
|
+
timeout_ms: input.timeoutMs ?? DEFAULT_CODEX_TIMEOUT_MS
|
|
422
467
|
});
|
|
423
468
|
}
|
|
424
469
|
function createCodexExecJsonRunner(config = {}) {
|
|
425
470
|
return (request) => {
|
|
426
471
|
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
427
472
|
const startedMs = Date.now();
|
|
473
|
+
const timeoutMs = resolveCodexTimeoutMs(config, request);
|
|
428
474
|
if (!request.workdir || !(0, import_node_fs.existsSync)(request.workdir)) {
|
|
429
475
|
return {
|
|
430
476
|
ok: false,
|
|
431
|
-
metrics: runnerMetrics({ request, config, startedAt, startedMs, errorCode: "workdir_missing" }),
|
|
477
|
+
metrics: runnerMetrics({ request, config, startedAt, startedMs, timeoutMs, errorCode: "workdir_missing" }),
|
|
432
478
|
blocker: {
|
|
433
479
|
code: "codex_workdir_missing",
|
|
434
480
|
message: `Codex workdir does not exist for ${request.purpose}.`,
|
|
@@ -463,7 +509,7 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
463
509
|
const proc = (0, import_node_child_process.spawnSync)(config.codexCommand || "codex", args, {
|
|
464
510
|
input: request.prompt,
|
|
465
511
|
encoding: "utf-8",
|
|
466
|
-
timeout:
|
|
512
|
+
timeout: timeoutMs,
|
|
467
513
|
maxBuffer: 10 * 1024 * 1024,
|
|
468
514
|
env
|
|
469
515
|
});
|
|
@@ -482,6 +528,7 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
482
528
|
stderr: proc.stderr || "",
|
|
483
529
|
status: proc.status,
|
|
484
530
|
timedOut,
|
|
531
|
+
timeoutMs,
|
|
485
532
|
errorCode: proc.error.code || "spawn_error"
|
|
486
533
|
}),
|
|
487
534
|
blocker: {
|
|
@@ -504,6 +551,7 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
504
551
|
stdout: proc.stdout || "",
|
|
505
552
|
stderr: proc.stderr || "",
|
|
506
553
|
status: proc.status,
|
|
554
|
+
timeoutMs,
|
|
507
555
|
errorCode: "nonzero_exit"
|
|
508
556
|
}),
|
|
509
557
|
blocker: {
|
|
@@ -516,12 +564,15 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
516
564
|
const finalText = (0, import_node_fs.existsSync)(lastMessagePath) ? (0, import_node_fs.readFileSync)(lastMessagePath, "utf-8") : String(proc.stdout || "");
|
|
517
565
|
const stdoutText = String(proc.stdout || "");
|
|
518
566
|
const stderrText = String(proc.stderr || "");
|
|
519
|
-
const
|
|
567
|
+
const runnerOutputs = [
|
|
520
568
|
{ source: (0, import_node_fs.existsSync)(lastMessagePath) ? "last_message" : "stdout", text: finalText },
|
|
521
569
|
{ source: "stdout", text: stdoutText },
|
|
522
570
|
{ source: "stderr", text: stderrText }
|
|
523
|
-
]
|
|
571
|
+
];
|
|
572
|
+
const { parsed, source: parsedJsonSource } = parseJsonFromRunnerOutputs(runnerOutputs, request.schema);
|
|
524
573
|
if (!parsed) {
|
|
574
|
+
const outputAnalysis = analyzeCodexRunnerOutput(runnerOutputs);
|
|
575
|
+
const errorCode = outputAnalysis.onlyLifecycleEvents ? "no_final_response" : "invalid_json";
|
|
525
576
|
return {
|
|
526
577
|
ok: false,
|
|
527
578
|
stdout: stdoutText,
|
|
@@ -535,12 +586,24 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
535
586
|
stderr: stderrText,
|
|
536
587
|
finalText,
|
|
537
588
|
status: proc.status,
|
|
538
|
-
|
|
589
|
+
timeoutMs,
|
|
590
|
+
errorCode,
|
|
591
|
+
codexEventTypes: outputAnalysis.eventTypes,
|
|
592
|
+
codexEventLineCount: outputAnalysis.eventLineCount,
|
|
593
|
+
codexNonEventLineCount: outputAnalysis.nonEventLineCount
|
|
539
594
|
}),
|
|
540
595
|
blocker: {
|
|
541
|
-
code: "codex_invalid_json",
|
|
542
|
-
message: `Codex completed ${request.purpose}, but did not return valid JSON.`,
|
|
543
|
-
details: {
|
|
596
|
+
code: outputAnalysis.onlyLifecycleEvents ? "codex_no_final_response" : "codex_invalid_json",
|
|
597
|
+
message: outputAnalysis.onlyLifecycleEvents ? `Codex emitted lifecycle events during ${request.purpose}, but did not produce a final JSON response.` : `Codex completed ${request.purpose}, but did not return valid JSON.`,
|
|
598
|
+
details: {
|
|
599
|
+
finalText,
|
|
600
|
+
stdout: stdoutText,
|
|
601
|
+
stderr: stderrText,
|
|
602
|
+
event_types: outputAnalysis.eventTypes,
|
|
603
|
+
event_line_count: outputAnalysis.eventLineCount,
|
|
604
|
+
non_event_line_count: outputAnalysis.nonEventLineCount,
|
|
605
|
+
non_event_samples: outputAnalysis.nonEventSamples
|
|
606
|
+
}
|
|
544
607
|
}
|
|
545
608
|
};
|
|
546
609
|
}
|
|
@@ -558,7 +621,8 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
558
621
|
stderr: stderrText,
|
|
559
622
|
finalText,
|
|
560
623
|
parsedJsonSource,
|
|
561
|
-
status: proc.status
|
|
624
|
+
status: proc.status,
|
|
625
|
+
timeoutMs
|
|
562
626
|
})
|
|
563
627
|
};
|
|
564
628
|
} finally {
|
|
@@ -667,6 +731,7 @@ function createCodexExecAgentAdapter(config = {}, runner = createCodexExecJsonRu
|
|
|
667
731
|
"Write a proof_plan and capture_script that will verify the exact user-facing change.",
|
|
668
732
|
"Use recon_assessment.baseline_understanding as the source of truth. Do not author a proof plan unless it names the observed before state and the requested delta from that state.",
|
|
669
733
|
"Use the recon-approved route and baseline context; make the plan name the concrete target, expected before state, expected after state, and stop condition.",
|
|
734
|
+
"Do not leave this authoring stage pending for external investigation. Keep any repo inspection brief, do not modify files, and return the JSON proof packet from the available state.",
|
|
670
735
|
"Choose the evidence modality from verification_mode and success_criteria: screenshots for visual/UI proof, interactions plus screenshots for interaction proof, structured metrics/logs/JSON/audio analysis for non-visual proof.",
|
|
671
736
|
"For playable/gameplay proof, treat screenshots as supporting artifacts only: start the game, send keyboard or pointer input, measure state before/after, measure non-HUD canvas/playfield pixel deltas across time, and return playability evidence with version riddle-proof.playability.v1.",
|
|
672
737
|
"For interaction proof, return a structured evidence object with start route/state, terminal route/state, action, assertions, and matched UI text. Catch waitForURL or selector timeouts and record them as failed assertions instead of throwing before evidence is emitted.",
|
package/dist/local-agent.js
CHANGED
|
@@ -120,7 +120,7 @@ declare function buildSetupArgs(params: WorkflowParams, config: ReturnType<typeo
|
|
|
120
120
|
target_image_hash: string;
|
|
121
121
|
viewport_matrix_json: string;
|
|
122
122
|
deterministic_setup_json: string;
|
|
123
|
-
reference: "
|
|
123
|
+
reference: "prod" | "before" | "both";
|
|
124
124
|
base_branch: string;
|
|
125
125
|
before_ref: string;
|
|
126
126
|
allow_static_preview_fallback: string;
|
|
@@ -120,7 +120,7 @@ declare function buildSetupArgs(params: WorkflowParams, config: ReturnType<typeo
|
|
|
120
120
|
target_image_hash: string;
|
|
121
121
|
viewport_matrix_json: string;
|
|
122
122
|
deterministic_setup_json: string;
|
|
123
|
-
reference: "
|
|
123
|
+
reference: "prod" | "before" | "both";
|
|
124
124
|
base_branch: string;
|
|
125
125
|
before_ref: string;
|
|
126
126
|
allow_static_preview_fallback: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
export { B as BUNDLED_RIDDLE_PROOF_DIR, C as CHECKPOINT_CONTRACT_VERSION, b as CheckpointInputContract, P as PluginConfig, R as RIDDLE_PROOF_DIR_CANDIDATES, S as ShipGateValidation, c as WORKFLOW_STAGE_ORDER, d as WorkflowAction, W as WorkflowParams, a as WorkflowStage, e as buildCheckpointContract, f as buildSetupArgs, g as checkpointContinueStage, h as clearStageDecisionRequest, i as ensureAction, j as ensureStageLoopState, k as invalidateVerifyEvidence, m as mergeStateFromParams, n as noImplementationModeFor, l as previewModeFromWorkflowMode, o as readState, q as recordStageAttempt, s as requiredBaselineLabelsForState, r as resolveConfig, t as resolveRiddleProofDir, u as setStageDecisionRequest, v as summarizeState, w as validateShipGate, x as visualDeltaForState, y as visualDeltaRequiredForState, z as visualDeltaShipGateReason, A as workflowFile, D as writeState } from './proof-run-core-
|
|
1
|
+
export { B as BUNDLED_RIDDLE_PROOF_DIR, C as CHECKPOINT_CONTRACT_VERSION, b as CheckpointInputContract, P as PluginConfig, R as RIDDLE_PROOF_DIR_CANDIDATES, S as ShipGateValidation, c as WORKFLOW_STAGE_ORDER, d as WorkflowAction, W as WorkflowParams, a as WorkflowStage, e as buildCheckpointContract, f as buildSetupArgs, g as checkpointContinueStage, h as clearStageDecisionRequest, i as ensureAction, j as ensureStageLoopState, k as invalidateVerifyEvidence, m as mergeStateFromParams, n as noImplementationModeFor, l as previewModeFromWorkflowMode, o as readState, q as recordStageAttempt, s as requiredBaselineLabelsForState, r as resolveConfig, t as resolveRiddleProofDir, u as setStageDecisionRequest, v as summarizeState, w as validateShipGate, x as visualDeltaForState, y as visualDeltaRequiredForState, z as visualDeltaShipGateReason, A as workflowFile, D as writeState } from './proof-run-core-Ci9uFxMc.cjs';
|
package/dist/proof-run-core.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export { B as BUNDLED_RIDDLE_PROOF_DIR, C as CHECKPOINT_CONTRACT_VERSION, b as CheckpointInputContract, P as PluginConfig, R as RIDDLE_PROOF_DIR_CANDIDATES, S as ShipGateValidation, c as WORKFLOW_STAGE_ORDER, d as WorkflowAction, W as WorkflowParams, a as WorkflowStage, e as buildCheckpointContract, f as buildSetupArgs, g as checkpointContinueStage, h as clearStageDecisionRequest, i as ensureAction, j as ensureStageLoopState, k as invalidateVerifyEvidence, m as mergeStateFromParams, n as noImplementationModeFor, l as previewModeFromWorkflowMode, o as readState, q as recordStageAttempt, s as requiredBaselineLabelsForState, r as resolveConfig, t as resolveRiddleProofDir, u as setStageDecisionRequest, v as summarizeState, w as validateShipGate, x as visualDeltaForState, y as visualDeltaRequiredForState, z as visualDeltaShipGateReason, A as workflowFile, D as writeState } from './proof-run-core-
|
|
1
|
+
export { B as BUNDLED_RIDDLE_PROOF_DIR, C as CHECKPOINT_CONTRACT_VERSION, b as CheckpointInputContract, P as PluginConfig, R as RIDDLE_PROOF_DIR_CANDIDATES, S as ShipGateValidation, c as WORKFLOW_STAGE_ORDER, d as WorkflowAction, W as WorkflowParams, a as WorkflowStage, e as buildCheckpointContract, f as buildSetupArgs, g as checkpointContinueStage, h as clearStageDecisionRequest, i as ensureAction, j as ensureStageLoopState, k as invalidateVerifyEvidence, m as mergeStateFromParams, n as noImplementationModeFor, l as previewModeFromWorkflowMode, o as readState, q as recordStageAttempt, s as requiredBaselineLabelsForState, r as resolveConfig, t as resolveRiddleProofDir, u as setStageDecisionRequest, v as summarizeState, w as validateShipGate, x as visualDeltaForState, y as visualDeltaRequiredForState, z as visualDeltaShipGateReason, A as workflowFile, D as writeState } from './proof-run-core-Ci9uFxMc.js';
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { W as WorkflowParams, r as resolveConfig, P as PluginConfig, a as WorkflowStage } from './proof-run-core-
|
|
1
|
+
import { W as WorkflowParams, r as resolveConfig, P as PluginConfig, a as WorkflowStage } from './proof-run-core-Ci9uFxMc.cjs';
|
|
2
2
|
|
|
3
3
|
declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, resolvedConfig?: ReturnType<typeof resolveConfig>): Promise<{
|
|
4
4
|
ok: boolean;
|
|
@@ -292,7 +292,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
|
|
|
292
292
|
blocking?: boolean;
|
|
293
293
|
details?: Record<string, unknown>;
|
|
294
294
|
ok: boolean;
|
|
295
|
-
action: "
|
|
295
|
+
action: "setup" | "recon" | "author" | "implement" | "verify" | "ship" | "run";
|
|
296
296
|
state_path: string;
|
|
297
297
|
stage: any;
|
|
298
298
|
summary: string;
|
|
@@ -382,7 +382,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
|
|
|
382
382
|
continueWithStage?: WorkflowStage | null;
|
|
383
383
|
blocking?: boolean;
|
|
384
384
|
details?: Record<string, unknown>;
|
|
385
|
-
action: "
|
|
385
|
+
action: "setup" | "recon" | "author" | "implement" | "verify" | "ship" | "run";
|
|
386
386
|
state_path: string;
|
|
387
387
|
stage: any;
|
|
388
388
|
checkpoint: string;
|
|
@@ -659,7 +659,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
|
|
|
659
659
|
error?: undefined;
|
|
660
660
|
} | {
|
|
661
661
|
ok: boolean;
|
|
662
|
-
action: "
|
|
662
|
+
action: "setup" | "recon" | "author" | "implement" | "verify" | "ship";
|
|
663
663
|
state_path: string;
|
|
664
664
|
stage: any;
|
|
665
665
|
summary: string;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { W as WorkflowParams, r as resolveConfig, P as PluginConfig, a as WorkflowStage } from './proof-run-core-
|
|
1
|
+
import { W as WorkflowParams, r as resolveConfig, P as PluginConfig, a as WorkflowStage } from './proof-run-core-Ci9uFxMc.js';
|
|
2
2
|
|
|
3
3
|
declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, resolvedConfig?: ReturnType<typeof resolveConfig>): Promise<{
|
|
4
4
|
ok: boolean;
|
|
@@ -292,7 +292,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
|
|
|
292
292
|
blocking?: boolean;
|
|
293
293
|
details?: Record<string, unknown>;
|
|
294
294
|
ok: boolean;
|
|
295
|
-
action: "
|
|
295
|
+
action: "setup" | "recon" | "author" | "implement" | "verify" | "ship" | "run";
|
|
296
296
|
state_path: string;
|
|
297
297
|
stage: any;
|
|
298
298
|
summary: string;
|
|
@@ -382,7 +382,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
|
|
|
382
382
|
continueWithStage?: WorkflowStage | null;
|
|
383
383
|
blocking?: boolean;
|
|
384
384
|
details?: Record<string, unknown>;
|
|
385
|
-
action: "
|
|
385
|
+
action: "setup" | "recon" | "author" | "implement" | "verify" | "ship" | "run";
|
|
386
386
|
state_path: string;
|
|
387
387
|
stage: any;
|
|
388
388
|
checkpoint: string;
|
|
@@ -659,7 +659,7 @@ declare function executeWorkflow(params: WorkflowParams, pluginConfig: any, reso
|
|
|
659
659
|
error?: undefined;
|
|
660
660
|
} | {
|
|
661
661
|
ok: boolean;
|
|
662
|
-
action: "
|
|
662
|
+
action: "setup" | "recon" | "author" | "implement" | "verify" | "ship";
|
|
663
663
|
state_path: string;
|
|
664
664
|
stage: any;
|
|
665
665
|
summary: string;
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import './proof-run-core-
|
|
2
|
-
export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from './proof-run-engine-
|
|
1
|
+
import './proof-run-core-Ci9uFxMc.cjs';
|
|
2
|
+
export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from './proof-run-engine-Bd1T43Dy.cjs';
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import './proof-run-core-
|
|
2
|
-
export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from './proof-run-engine-
|
|
1
|
+
import './proof-run-core-Ci9uFxMc.js';
|
|
2
|
+
export { R as RiddleProofEngine, c as createRiddleProofEngine, e as executeWorkflow } from './proof-run-engine-CXyhB-io.js';
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@riddledc/riddle-proof",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.10",
|
|
4
4
|
"description": "Reusable Riddle Proof contracts and helpers for evidence-backed agent changes.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "RiddleDC",
|
|
@@ -227,6 +227,6 @@
|
|
|
227
227
|
"build": "tsup src/index.ts src/types.ts src/result.ts src/state.ts src/checkpoint.ts src/run-card.ts src/runner.ts src/engine-harness.ts src/codex-exec-agent.ts src/local-agent.ts src/cli.ts src/cli/index.ts src/diagnostics.ts src/proof-session.ts src/playability.ts src/basic-gameplay.ts src/profile.ts src/profile/index.ts src/openclaw.ts src/proof-run-core.ts src/proof-run-engine.ts src/riddle-client.ts src/runtime/riddle-client.ts src/spec/index.ts src/spec/types.ts src/spec/result.ts src/spec/state.ts src/spec/checkpoint.ts src/spec/run-card.ts src/runtime/index.ts src/app-contract/index.ts src/advanced/index.ts src/advanced/runner.ts src/advanced/engine-harness.ts src/advanced/proof-run-core.ts src/advanced/proof-run-engine.ts src/adapters/openclaw.ts src/adapters/local-agent.ts src/adapters/codex-exec-agent.ts src/adapters/codex.ts --format cjs,esm --dts --out-dir dist --clean",
|
|
228
228
|
"clean": "rm -rf dist",
|
|
229
229
|
"lint": "echo 'lint: (not configured)'",
|
|
230
|
-
"test": "npm run build && node test.js && node proof-run.test.js"
|
|
230
|
+
"test": "npm run build && node test.js && node proof-run.test.js && node trust-boundary.test.js && python3 runtime/tests/trust_boundary_regression.py"
|
|
231
231
|
}
|
|
232
232
|
}
|