@riddledc/riddle-proof 0.8.5 → 0.8.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/codex-exec-agent.cjs +31 -10
- package/dist/adapters/codex-exec-agent.js +1 -1
- package/dist/adapters/codex.cjs +31 -10
- package/dist/adapters/codex.js +1 -1
- package/dist/adapters/local-agent.cjs +31 -10
- package/dist/adapters/local-agent.js +1 -1
- package/dist/advanced/engine-harness.cjs +64 -7
- package/dist/advanced/engine-harness.js +2 -2
- package/dist/advanced/index.cjs +64 -7
- package/dist/advanced/index.js +4 -4
- package/dist/advanced/proof-run-core.cjs +63 -6
- package/dist/advanced/proof-run-core.js +1 -1
- package/dist/advanced/proof-run-engine.cjs +63 -6
- package/dist/advanced/proof-run-engine.js +2 -2
- package/dist/advanced/runner.js +2 -2
- package/dist/{chunk-GMZ57RRY.js → chunk-46DDSZJR.js} +1 -1
- package/dist/{chunk-RV6LK7HU.js → chunk-5N5QFI2S.js} +63 -6
- package/dist/{chunk-UIJ7X63P.js → chunk-5N6MQCLC.js} +1 -1
- package/dist/{chunk-BDFSMWTI.js → chunk-E7ATYSYS.js} +1 -1
- package/dist/{chunk-7F5LNUGR.js → chunk-PYCQNK66.js} +31 -10
- package/dist/{chunk-OD5UNE57.js → chunk-V6VZ3CAI.js} +2 -2
- package/dist/cli/index.js +4 -4
- package/dist/cli.cjs +100 -22
- package/dist/cli.js +4 -4
- package/dist/codex-exec-agent.cjs +31 -10
- package/dist/codex-exec-agent.js +1 -1
- package/dist/engine-harness.cjs +64 -7
- package/dist/engine-harness.js +2 -2
- package/dist/index.cjs +100 -22
- package/dist/index.js +4 -4
- package/dist/local-agent.cjs +31 -10
- package/dist/local-agent.js +1 -1
- package/dist/proof-run-core.cjs +63 -6
- package/dist/proof-run-core.js +1 -1
- package/dist/proof-run-engine.cjs +63 -6
- package/dist/proof-run-engine.js +2 -2
- package/dist/runner.js +2 -2
- package/package.json +1 -1
- package/runtime/lib/author.py +40 -1
- package/runtime/lib/verify.py +123 -1
- package/runtime/tests/recon_verify_smoke.py +82 -8
|
@@ -377,6 +377,18 @@ function parseJsonObject(raw, schema) {
|
|
|
377
377
|
}
|
|
378
378
|
return null;
|
|
379
379
|
}
|
|
380
|
+
function parseJsonFromRunnerOutputs(outputs, schema) {
|
|
381
|
+
const seen = /* @__PURE__ */ new Set();
|
|
382
|
+
for (const output of outputs) {
|
|
383
|
+
if (!output.text.trim() || seen.has(output.text)) continue;
|
|
384
|
+
seen.add(output.text);
|
|
385
|
+
const parsed = parseJsonObject(output.text, schema);
|
|
386
|
+
if (parsed) return { parsed, source: output.source };
|
|
387
|
+
}
|
|
388
|
+
const combined = outputs.map((output) => output.text).filter((text) => text.trim()).join("\n");
|
|
389
|
+
if (!combined.trim() || seen.has(combined)) return { parsed: null, source: "" };
|
|
390
|
+
return { parsed: parseJsonObject(combined, schema), source: "combined_output" };
|
|
391
|
+
}
|
|
380
392
|
function isHarnessVerificationOnlyBlocker(blocker) {
|
|
381
393
|
const text = blocker.toLowerCase();
|
|
382
394
|
return (text.includes("erofs") || text.includes("read-only file system")) && text.includes("node_modules") && (text.includes(".vite-temp") || text.includes("vite.config"));
|
|
@@ -396,6 +408,7 @@ function runnerMetrics(input) {
|
|
|
396
408
|
stdout_chars: (input.stdout || "").length,
|
|
397
409
|
stderr_chars: (input.stderr || "").length,
|
|
398
410
|
final_message_chars: (input.finalText || "").length,
|
|
411
|
+
parsed_json_source: input.parsedJsonSource,
|
|
399
412
|
exit_status: input.status ?? null,
|
|
400
413
|
timed_out: input.timedOut || false,
|
|
401
414
|
error_code: input.errorCode,
|
|
@@ -499,19 +512,25 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
499
512
|
};
|
|
500
513
|
}
|
|
501
514
|
const finalText = (0, import_node_fs.existsSync)(lastMessagePath) ? (0, import_node_fs.readFileSync)(lastMessagePath, "utf-8") : String(proc.stdout || "");
|
|
502
|
-
const
|
|
515
|
+
const stdoutText = String(proc.stdout || "");
|
|
516
|
+
const stderrText = String(proc.stderr || "");
|
|
517
|
+
const { parsed, source: parsedJsonSource } = parseJsonFromRunnerOutputs([
|
|
518
|
+
{ source: (0, import_node_fs.existsSync)(lastMessagePath) ? "last_message" : "stdout", text: finalText },
|
|
519
|
+
{ source: "stdout", text: stdoutText },
|
|
520
|
+
{ source: "stderr", text: stderrText }
|
|
521
|
+
], request.schema);
|
|
503
522
|
if (!parsed) {
|
|
504
523
|
return {
|
|
505
524
|
ok: false,
|
|
506
|
-
stdout:
|
|
507
|
-
stderr:
|
|
525
|
+
stdout: stdoutText,
|
|
526
|
+
stderr: stderrText,
|
|
508
527
|
metrics: runnerMetrics({
|
|
509
528
|
request,
|
|
510
529
|
config,
|
|
511
530
|
startedAt,
|
|
512
531
|
startedMs,
|
|
513
|
-
stdout:
|
|
514
|
-
stderr:
|
|
532
|
+
stdout: stdoutText,
|
|
533
|
+
stderr: stderrText,
|
|
515
534
|
finalText,
|
|
516
535
|
status: proc.status,
|
|
517
536
|
errorCode: "invalid_json"
|
|
@@ -519,23 +538,24 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
519
538
|
blocker: {
|
|
520
539
|
code: "codex_invalid_json",
|
|
521
540
|
message: `Codex completed ${request.purpose}, but did not return valid JSON.`,
|
|
522
|
-
details: { finalText, stdout:
|
|
541
|
+
details: { finalText, stdout: stdoutText, stderr: stderrText }
|
|
523
542
|
}
|
|
524
543
|
};
|
|
525
544
|
}
|
|
526
545
|
return {
|
|
527
546
|
ok: true,
|
|
528
547
|
json: parsed,
|
|
529
|
-
stdout:
|
|
530
|
-
stderr:
|
|
548
|
+
stdout: stdoutText,
|
|
549
|
+
stderr: stderrText,
|
|
531
550
|
metrics: runnerMetrics({
|
|
532
551
|
request,
|
|
533
552
|
config,
|
|
534
553
|
startedAt,
|
|
535
554
|
startedMs,
|
|
536
|
-
stdout:
|
|
537
|
-
stderr:
|
|
555
|
+
stdout: stdoutText,
|
|
556
|
+
stderr: stderrText,
|
|
538
557
|
finalText,
|
|
558
|
+
parsedJsonSource,
|
|
539
559
|
status: proc.status
|
|
540
560
|
})
|
|
541
561
|
};
|
|
@@ -647,6 +667,7 @@ function createCodexExecAgentAdapter(config = {}, runner = createCodexExecJsonRu
|
|
|
647
667
|
"Use the recon-approved route and baseline context; make the plan name the concrete target, expected before state, expected after state, and stop condition.",
|
|
648
668
|
"Choose the evidence modality from verification_mode and success_criteria: screenshots for visual/UI proof, interactions plus screenshots for interaction proof, structured metrics/logs/JSON/audio analysis for non-visual proof.",
|
|
649
669
|
"For playable/gameplay proof, treat screenshots as supporting artifacts only: start the game, send keyboard or pointer input, measure state before/after, measure non-HUD canvas/playfield pixel deltas across time, and return playability evidence with version riddle-proof.playability.v1.",
|
|
670
|
+
"For interaction proof, return a structured evidence object with start route/state, terminal route/state, action, assertions, and matched UI text. Catch waitForURL or selector timeouts and record them as failed assertions instead of throwing before evidence is emitted.",
|
|
650
671
|
"For structured proof, collect meaningful measurements inside page.evaluate, assign them to an evidence variable, and return that object from capture_script. Screenshots are optional supporting context for data/audio/log/metric/custom modes.",
|
|
651
672
|
"Do not assign globalThis.__riddleProofEvidence, window.__riddleProofEvidence, or self.__riddleProofEvidence in the worker context. Avoid global evidence assignment unless it is inside page.evaluate for compatibility with older packets.",
|
|
652
673
|
"Do not call Playwright page.* APIs inside page.evaluate; page.evaluate runs in the browser page, while page.waitForFunction, page.waitForSelector, page.click, and saveScreenshot belong in the outer capture script.",
|
package/dist/codex-exec-agent.js
CHANGED
package/dist/engine-harness.cjs
CHANGED
|
@@ -172,6 +172,55 @@ function writeState(statePath, state) {
|
|
|
172
172
|
function normalizeOptionalString(value) {
|
|
173
173
|
return typeof value === "string" ? value.trim() : void 0;
|
|
174
174
|
}
|
|
175
|
+
function normalizeRoutePath(value) {
|
|
176
|
+
const raw = typeof value === "string" ? value.trim() : "";
|
|
177
|
+
if (!raw) return "";
|
|
178
|
+
try {
|
|
179
|
+
const url = /^https?:\/\//i.test(raw) ? new URL(raw) : new URL(raw.startsWith("/") || raw.startsWith("?") || raw.startsWith("#") ? raw : `/${raw}`, "https://riddle-proof.local");
|
|
180
|
+
const pathname = url.pathname.replace(/\/+$/, "") || "/";
|
|
181
|
+
return `${pathname}${url.search}${url.hash}`;
|
|
182
|
+
} catch {
|
|
183
|
+
const hashSplit = raw.split("#");
|
|
184
|
+
const beforeHash = hashSplit.shift() || "";
|
|
185
|
+
const hash = hashSplit.length ? `#${hashSplit.join("#")}` : "";
|
|
186
|
+
const querySplit = beforeHash.split("?");
|
|
187
|
+
const rawPath = querySplit.shift() || "";
|
|
188
|
+
const query = querySplit.length ? `?${querySplit.join("?")}` : "";
|
|
189
|
+
const pathname = `/${rawPath}`.replace(/\/+/g, "/").replace(/\/+$/, "") || "/";
|
|
190
|
+
return `${pathname}${query}${hash}`;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
function isInteractionVerificationMode(value) {
|
|
194
|
+
return INTERACTION_VERIFICATION_MODES.has(typeof value === "string" ? value.trim().toLowerCase() : "");
|
|
195
|
+
}
|
|
196
|
+
function stringRecordValue(record, key) {
|
|
197
|
+
if (!record || typeof record !== "object") return "";
|
|
198
|
+
const value = record[key];
|
|
199
|
+
return typeof value === "string" ? value.trim() : "";
|
|
200
|
+
}
|
|
201
|
+
function appendStateWarning(state, key, warning) {
|
|
202
|
+
const existing = Array.isArray(state[key]) ? state[key].filter((item) => typeof item === "string") : [];
|
|
203
|
+
if (!existing.includes(warning)) state[key] = [...existing, warning];
|
|
204
|
+
}
|
|
205
|
+
function interactionStartPathForAuthorPacket(state, parsed, refined) {
|
|
206
|
+
return normalizeRoutePath(
|
|
207
|
+
stringRecordValue(state, "expected_start_path") || stringRecordValue(refined, "expected_start_path") || stringRecordValue(parsed.interaction_contract, "start_path") || stringRecordValue(parsed.proof_contract, "start_path") || stringRecordValue(state, "server_path") || "/"
|
|
208
|
+
) || "/";
|
|
209
|
+
}
|
|
210
|
+
function authorPacketServerPath(state, parsed, refined, serverPath, expectedTerminalPath) {
|
|
211
|
+
if (!isInteractionVerificationMode(state.verification_mode)) return serverPath;
|
|
212
|
+
const startPath = interactionStartPathForAuthorPacket(state, parsed, refined);
|
|
213
|
+
state.expected_start_path = startPath;
|
|
214
|
+
if (expectedTerminalPath && normalizeRoutePath(serverPath) === normalizeRoutePath(expectedTerminalPath) && normalizeRoutePath(serverPath) !== startPath) {
|
|
215
|
+
appendStateWarning(
|
|
216
|
+
state,
|
|
217
|
+
"author_warnings",
|
|
218
|
+
"Supervisor packet refined_inputs.server_path matched the terminal interaction route; kept the recon start route for capture."
|
|
219
|
+
);
|
|
220
|
+
return startPath;
|
|
221
|
+
}
|
|
222
|
+
return serverPath;
|
|
223
|
+
}
|
|
175
224
|
function knownEnvironmentIssuesFromNotes(notes) {
|
|
176
225
|
const text = notes.toLowerCase();
|
|
177
226
|
const issues = [];
|
|
@@ -652,17 +701,24 @@ function mergeStateFromParams(statePath, params) {
|
|
|
652
701
|
state.proof_contract = parsed.proof_contract;
|
|
653
702
|
}
|
|
654
703
|
const refined = parsed?.refined_inputs || {};
|
|
704
|
+
const expectedTerminalPath = normalizeOptionalString(
|
|
705
|
+
typeof refined?.expected_terminal_path === "string" ? refined.expected_terminal_path : typeof parsed?.expected_terminal_path === "string" ? parsed.expected_terminal_path : ""
|
|
706
|
+
) || "";
|
|
655
707
|
if (typeof refined?.server_path === "string") {
|
|
656
|
-
|
|
708
|
+
const refinedServerPath = normalizeOptionalString(refined.server_path) || "";
|
|
709
|
+
state.server_path = authorPacketServerPath(
|
|
710
|
+
state,
|
|
711
|
+
parsed,
|
|
712
|
+
refined,
|
|
713
|
+
refinedServerPath,
|
|
714
|
+
expectedTerminalPath
|
|
715
|
+
);
|
|
657
716
|
state.server_path_source = "supervising_agent";
|
|
658
717
|
}
|
|
659
718
|
if (typeof refined?.wait_for_selector === "string") state.wait_for_selector = normalizeOptionalString(refined.wait_for_selector) || "";
|
|
660
719
|
if (typeof refined?.reference === "string" && refined.reference.trim()) state.reference = refined.reference.trim();
|
|
661
|
-
if (
|
|
662
|
-
state.expected_terminal_path =
|
|
663
|
-
}
|
|
664
|
-
if (typeof parsed?.expected_terminal_path === "string") {
|
|
665
|
-
state.expected_terminal_path = normalizeOptionalString(parsed.expected_terminal_path) || "";
|
|
720
|
+
if (expectedTerminalPath) {
|
|
721
|
+
state.expected_terminal_path = expectedTerminalPath;
|
|
666
722
|
}
|
|
667
723
|
if (typeof parsed?.confidence === "string") state.supervisor_author_confidence = normalizeOptionalString(parsed.confidence) || null;
|
|
668
724
|
if (parsed?.rationale !== void 0) state.supervisor_author_rationale = parsed.rationale;
|
|
@@ -842,7 +898,7 @@ function summarizeState(state) {
|
|
|
842
898
|
state: selected
|
|
843
899
|
};
|
|
844
900
|
}
|
|
845
|
-
var import_node_fs, import_node_crypto2, import_node_path, import_node_url, import_meta, WORKFLOW_STAGE_ORDER, CHECKPOINT_CONTRACT_VERSION, BUNDLED_RIDDLE_PROOF_DIR, RIDDLE_PROOF_DIR_CANDIDATES, VISUAL_FIRST_MODES, CHECKPOINT_CONTRACT_SPECS;
|
|
901
|
+
var import_node_fs, import_node_crypto2, import_node_path, import_node_url, import_meta, WORKFLOW_STAGE_ORDER, CHECKPOINT_CONTRACT_VERSION, BUNDLED_RIDDLE_PROOF_DIR, RIDDLE_PROOF_DIR_CANDIDATES, INTERACTION_VERIFICATION_MODES, VISUAL_FIRST_MODES, CHECKPOINT_CONTRACT_SPECS;
|
|
846
902
|
var init_proof_run_core = __esm({
|
|
847
903
|
"src/proof-run-core.ts"() {
|
|
848
904
|
"use strict";
|
|
@@ -861,6 +917,7 @@ var init_proof_run_core = __esm({
|
|
|
861
917
|
RIDDLE_PROOF_DIR_CANDIDATES = [
|
|
862
918
|
BUNDLED_RIDDLE_PROOF_DIR
|
|
863
919
|
];
|
|
920
|
+
INTERACTION_VERIFICATION_MODES = /* @__PURE__ */ new Set(["interaction", "interactive", "user_flow", "user-flow", "workflow"]);
|
|
864
921
|
VISUAL_FIRST_MODES = /* @__PURE__ */ new Set([
|
|
865
922
|
"visual",
|
|
866
923
|
"render",
|
package/dist/engine-harness.js
CHANGED
|
@@ -2,10 +2,10 @@ import {
|
|
|
2
2
|
createDisabledRiddleProofAgentAdapter,
|
|
3
3
|
readRiddleProofRunStatus,
|
|
4
4
|
runRiddleProofEngineHarness
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-E7ATYSYS.js";
|
|
6
6
|
import "./chunk-YZUVEJ5B.js";
|
|
7
7
|
import "./chunk-FMOYUYH2.js";
|
|
8
|
-
import "./chunk-
|
|
8
|
+
import "./chunk-5N5QFI2S.js";
|
|
9
9
|
import "./chunk-4FOHZ7JG.js";
|
|
10
10
|
import "./chunk-VY4Y5U57.js";
|
|
11
11
|
import "./chunk-MLKGABMK.js";
|
package/dist/index.cjs
CHANGED
|
@@ -172,6 +172,55 @@ function writeState(statePath, state) {
|
|
|
172
172
|
function normalizeOptionalString(value) {
|
|
173
173
|
return typeof value === "string" ? value.trim() : void 0;
|
|
174
174
|
}
|
|
175
|
+
function normalizeRoutePath(value) {
|
|
176
|
+
const raw = typeof value === "string" ? value.trim() : "";
|
|
177
|
+
if (!raw) return "";
|
|
178
|
+
try {
|
|
179
|
+
const url = /^https?:\/\//i.test(raw) ? new URL(raw) : new URL(raw.startsWith("/") || raw.startsWith("?") || raw.startsWith("#") ? raw : `/${raw}`, "https://riddle-proof.local");
|
|
180
|
+
const pathname = url.pathname.replace(/\/+$/, "") || "/";
|
|
181
|
+
return `${pathname}${url.search}${url.hash}`;
|
|
182
|
+
} catch {
|
|
183
|
+
const hashSplit = raw.split("#");
|
|
184
|
+
const beforeHash = hashSplit.shift() || "";
|
|
185
|
+
const hash = hashSplit.length ? `#${hashSplit.join("#")}` : "";
|
|
186
|
+
const querySplit = beforeHash.split("?");
|
|
187
|
+
const rawPath = querySplit.shift() || "";
|
|
188
|
+
const query = querySplit.length ? `?${querySplit.join("?")}` : "";
|
|
189
|
+
const pathname = `/${rawPath}`.replace(/\/+/g, "/").replace(/\/+$/, "") || "/";
|
|
190
|
+
return `${pathname}${query}${hash}`;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
function isInteractionVerificationMode(value) {
|
|
194
|
+
return INTERACTION_VERIFICATION_MODES.has(typeof value === "string" ? value.trim().toLowerCase() : "");
|
|
195
|
+
}
|
|
196
|
+
function stringRecordValue(record, key) {
|
|
197
|
+
if (!record || typeof record !== "object") return "";
|
|
198
|
+
const value = record[key];
|
|
199
|
+
return typeof value === "string" ? value.trim() : "";
|
|
200
|
+
}
|
|
201
|
+
function appendStateWarning(state, key, warning) {
|
|
202
|
+
const existing = Array.isArray(state[key]) ? state[key].filter((item) => typeof item === "string") : [];
|
|
203
|
+
if (!existing.includes(warning)) state[key] = [...existing, warning];
|
|
204
|
+
}
|
|
205
|
+
function interactionStartPathForAuthorPacket(state, parsed, refined) {
|
|
206
|
+
return normalizeRoutePath(
|
|
207
|
+
stringRecordValue(state, "expected_start_path") || stringRecordValue(refined, "expected_start_path") || stringRecordValue(parsed.interaction_contract, "start_path") || stringRecordValue(parsed.proof_contract, "start_path") || stringRecordValue(state, "server_path") || "/"
|
|
208
|
+
) || "/";
|
|
209
|
+
}
|
|
210
|
+
function authorPacketServerPath(state, parsed, refined, serverPath, expectedTerminalPath) {
|
|
211
|
+
if (!isInteractionVerificationMode(state.verification_mode)) return serverPath;
|
|
212
|
+
const startPath = interactionStartPathForAuthorPacket(state, parsed, refined);
|
|
213
|
+
state.expected_start_path = startPath;
|
|
214
|
+
if (expectedTerminalPath && normalizeRoutePath(serverPath) === normalizeRoutePath(expectedTerminalPath) && normalizeRoutePath(serverPath) !== startPath) {
|
|
215
|
+
appendStateWarning(
|
|
216
|
+
state,
|
|
217
|
+
"author_warnings",
|
|
218
|
+
"Supervisor packet refined_inputs.server_path matched the terminal interaction route; kept the recon start route for capture."
|
|
219
|
+
);
|
|
220
|
+
return startPath;
|
|
221
|
+
}
|
|
222
|
+
return serverPath;
|
|
223
|
+
}
|
|
175
224
|
function knownEnvironmentIssuesFromNotes(notes) {
|
|
176
225
|
const text = notes.toLowerCase();
|
|
177
226
|
const issues = [];
|
|
@@ -652,17 +701,24 @@ function mergeStateFromParams(statePath, params) {
|
|
|
652
701
|
state.proof_contract = parsed.proof_contract;
|
|
653
702
|
}
|
|
654
703
|
const refined = parsed?.refined_inputs || {};
|
|
704
|
+
const expectedTerminalPath = normalizeOptionalString(
|
|
705
|
+
typeof refined?.expected_terminal_path === "string" ? refined.expected_terminal_path : typeof parsed?.expected_terminal_path === "string" ? parsed.expected_terminal_path : ""
|
|
706
|
+
) || "";
|
|
655
707
|
if (typeof refined?.server_path === "string") {
|
|
656
|
-
|
|
708
|
+
const refinedServerPath = normalizeOptionalString(refined.server_path) || "";
|
|
709
|
+
state.server_path = authorPacketServerPath(
|
|
710
|
+
state,
|
|
711
|
+
parsed,
|
|
712
|
+
refined,
|
|
713
|
+
refinedServerPath,
|
|
714
|
+
expectedTerminalPath
|
|
715
|
+
);
|
|
657
716
|
state.server_path_source = "supervising_agent";
|
|
658
717
|
}
|
|
659
718
|
if (typeof refined?.wait_for_selector === "string") state.wait_for_selector = normalizeOptionalString(refined.wait_for_selector) || "";
|
|
660
719
|
if (typeof refined?.reference === "string" && refined.reference.trim()) state.reference = refined.reference.trim();
|
|
661
|
-
if (
|
|
662
|
-
state.expected_terminal_path =
|
|
663
|
-
}
|
|
664
|
-
if (typeof parsed?.expected_terminal_path === "string") {
|
|
665
|
-
state.expected_terminal_path = normalizeOptionalString(parsed.expected_terminal_path) || "";
|
|
720
|
+
if (expectedTerminalPath) {
|
|
721
|
+
state.expected_terminal_path = expectedTerminalPath;
|
|
666
722
|
}
|
|
667
723
|
if (typeof parsed?.confidence === "string") state.supervisor_author_confidence = normalizeOptionalString(parsed.confidence) || null;
|
|
668
724
|
if (parsed?.rationale !== void 0) state.supervisor_author_rationale = parsed.rationale;
|
|
@@ -842,7 +898,7 @@ function summarizeState(state) {
|
|
|
842
898
|
state: selected
|
|
843
899
|
};
|
|
844
900
|
}
|
|
845
|
-
var import_node_fs, import_node_crypto2, import_node_path, import_node_url, import_meta, WORKFLOW_STAGE_ORDER, CHECKPOINT_CONTRACT_VERSION, BUNDLED_RIDDLE_PROOF_DIR, RIDDLE_PROOF_DIR_CANDIDATES, VISUAL_FIRST_MODES, CHECKPOINT_CONTRACT_SPECS;
|
|
901
|
+
var import_node_fs, import_node_crypto2, import_node_path, import_node_url, import_meta, WORKFLOW_STAGE_ORDER, CHECKPOINT_CONTRACT_VERSION, BUNDLED_RIDDLE_PROOF_DIR, RIDDLE_PROOF_DIR_CANDIDATES, INTERACTION_VERIFICATION_MODES, VISUAL_FIRST_MODES, CHECKPOINT_CONTRACT_SPECS;
|
|
846
902
|
var init_proof_run_core = __esm({
|
|
847
903
|
"src/proof-run-core.ts"() {
|
|
848
904
|
"use strict";
|
|
@@ -861,6 +917,7 @@ var init_proof_run_core = __esm({
|
|
|
861
917
|
RIDDLE_PROOF_DIR_CANDIDATES = [
|
|
862
918
|
BUNDLED_RIDDLE_PROOF_DIR
|
|
863
919
|
];
|
|
920
|
+
INTERACTION_VERIFICATION_MODES = /* @__PURE__ */ new Set(["interaction", "interactive", "user_flow", "user-flow", "workflow"]);
|
|
864
921
|
VISUAL_FIRST_MODES = /* @__PURE__ */ new Set([
|
|
865
922
|
"visual",
|
|
866
923
|
"render",
|
|
@@ -6805,6 +6862,18 @@ function parseJsonObject(raw, schema) {
|
|
|
6805
6862
|
}
|
|
6806
6863
|
return null;
|
|
6807
6864
|
}
|
|
6865
|
+
function parseJsonFromRunnerOutputs(outputs, schema) {
|
|
6866
|
+
const seen = /* @__PURE__ */ new Set();
|
|
6867
|
+
for (const output of outputs) {
|
|
6868
|
+
if (!output.text.trim() || seen.has(output.text)) continue;
|
|
6869
|
+
seen.add(output.text);
|
|
6870
|
+
const parsed = parseJsonObject(output.text, schema);
|
|
6871
|
+
if (parsed) return { parsed, source: output.source };
|
|
6872
|
+
}
|
|
6873
|
+
const combined = outputs.map((output) => output.text).filter((text) => text.trim()).join("\n");
|
|
6874
|
+
if (!combined.trim() || seen.has(combined)) return { parsed: null, source: "" };
|
|
6875
|
+
return { parsed: parseJsonObject(combined, schema), source: "combined_output" };
|
|
6876
|
+
}
|
|
6808
6877
|
function isHarnessVerificationOnlyBlocker(blocker) {
|
|
6809
6878
|
const text = blocker.toLowerCase();
|
|
6810
6879
|
return (text.includes("erofs") || text.includes("read-only file system")) && text.includes("node_modules") && (text.includes(".vite-temp") || text.includes("vite.config"));
|
|
@@ -6824,6 +6893,7 @@ function runnerMetrics(input) {
|
|
|
6824
6893
|
stdout_chars: (input.stdout || "").length,
|
|
6825
6894
|
stderr_chars: (input.stderr || "").length,
|
|
6826
6895
|
final_message_chars: (input.finalText || "").length,
|
|
6896
|
+
parsed_json_source: input.parsedJsonSource,
|
|
6827
6897
|
exit_status: input.status ?? null,
|
|
6828
6898
|
timed_out: input.timedOut || false,
|
|
6829
6899
|
error_code: input.errorCode,
|
|
@@ -6927,19 +6997,25 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
6927
6997
|
};
|
|
6928
6998
|
}
|
|
6929
6999
|
const finalText = (0, import_node_fs4.existsSync)(lastMessagePath) ? (0, import_node_fs4.readFileSync)(lastMessagePath, "utf-8") : String(proc.stdout || "");
|
|
6930
|
-
const
|
|
7000
|
+
const stdoutText = String(proc.stdout || "");
|
|
7001
|
+
const stderrText = String(proc.stderr || "");
|
|
7002
|
+
const { parsed, source: parsedJsonSource } = parseJsonFromRunnerOutputs([
|
|
7003
|
+
{ source: (0, import_node_fs4.existsSync)(lastMessagePath) ? "last_message" : "stdout", text: finalText },
|
|
7004
|
+
{ source: "stdout", text: stdoutText },
|
|
7005
|
+
{ source: "stderr", text: stderrText }
|
|
7006
|
+
], request.schema);
|
|
6931
7007
|
if (!parsed) {
|
|
6932
7008
|
return {
|
|
6933
7009
|
ok: false,
|
|
6934
|
-
stdout:
|
|
6935
|
-
stderr:
|
|
7010
|
+
stdout: stdoutText,
|
|
7011
|
+
stderr: stderrText,
|
|
6936
7012
|
metrics: runnerMetrics({
|
|
6937
7013
|
request,
|
|
6938
7014
|
config,
|
|
6939
7015
|
startedAt,
|
|
6940
7016
|
startedMs,
|
|
6941
|
-
stdout:
|
|
6942
|
-
stderr:
|
|
7017
|
+
stdout: stdoutText,
|
|
7018
|
+
stderr: stderrText,
|
|
6943
7019
|
finalText,
|
|
6944
7020
|
status: proc.status,
|
|
6945
7021
|
errorCode: "invalid_json"
|
|
@@ -6947,23 +7023,24 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
6947
7023
|
blocker: {
|
|
6948
7024
|
code: "codex_invalid_json",
|
|
6949
7025
|
message: `Codex completed ${request.purpose}, but did not return valid JSON.`,
|
|
6950
|
-
details: { finalText, stdout:
|
|
7026
|
+
details: { finalText, stdout: stdoutText, stderr: stderrText }
|
|
6951
7027
|
}
|
|
6952
7028
|
};
|
|
6953
7029
|
}
|
|
6954
7030
|
return {
|
|
6955
7031
|
ok: true,
|
|
6956
7032
|
json: parsed,
|
|
6957
|
-
stdout:
|
|
6958
|
-
stderr:
|
|
7033
|
+
stdout: stdoutText,
|
|
7034
|
+
stderr: stderrText,
|
|
6959
7035
|
metrics: runnerMetrics({
|
|
6960
7036
|
request,
|
|
6961
7037
|
config,
|
|
6962
7038
|
startedAt,
|
|
6963
7039
|
startedMs,
|
|
6964
|
-
stdout:
|
|
6965
|
-
stderr:
|
|
7040
|
+
stdout: stdoutText,
|
|
7041
|
+
stderr: stderrText,
|
|
6966
7042
|
finalText,
|
|
7043
|
+
parsedJsonSource,
|
|
6967
7044
|
status: proc.status
|
|
6968
7045
|
})
|
|
6969
7046
|
};
|
|
@@ -7075,6 +7152,7 @@ function createCodexExecAgentAdapter(config = {}, runner = createCodexExecJsonRu
|
|
|
7075
7152
|
"Use the recon-approved route and baseline context; make the plan name the concrete target, expected before state, expected after state, and stop condition.",
|
|
7076
7153
|
"Choose the evidence modality from verification_mode and success_criteria: screenshots for visual/UI proof, interactions plus screenshots for interaction proof, structured metrics/logs/JSON/audio analysis for non-visual proof.",
|
|
7077
7154
|
"For playable/gameplay proof, treat screenshots as supporting artifacts only: start the game, send keyboard or pointer input, measure state before/after, measure non-HUD canvas/playfield pixel deltas across time, and return playability evidence with version riddle-proof.playability.v1.",
|
|
7155
|
+
"For interaction proof, return a structured evidence object with start route/state, terminal route/state, action, assertions, and matched UI text. Catch waitForURL or selector timeouts and record them as failed assertions instead of throwing before evidence is emitted.",
|
|
7078
7156
|
"For structured proof, collect meaningful measurements inside page.evaluate, assign them to an evidence variable, and return that object from capture_script. Screenshots are optional supporting context for data/audio/log/metric/custom modes.",
|
|
7079
7157
|
"Do not assign globalThis.__riddleProofEvidence, window.__riddleProofEvidence, or self.__riddleProofEvidence in the worker context. Avoid global evidence assignment unless it is inside page.evaluate for compatibility with older packets.",
|
|
7080
7158
|
"Do not call Playwright page.* APIs inside page.evaluate; page.evaluate runs in the browser page, while page.waitForFunction, page.waitForSelector, page.click, and saveScreenshot belong in the outer capture script.",
|
|
@@ -10474,7 +10552,7 @@ function normalizeRouteInventoryPath(value, label) {
|
|
|
10474
10552
|
const path6 = stringValue5(value);
|
|
10475
10553
|
if (!path6) throw new Error(`${label} requires path.`);
|
|
10476
10554
|
if (!path6.startsWith("/")) throw new Error(`${label}.path must start with /.`);
|
|
10477
|
-
return
|
|
10555
|
+
return normalizeRoutePath2(path6);
|
|
10478
10556
|
}
|
|
10479
10557
|
function normalizeRouteInventoryRoute(input, index) {
|
|
10480
10558
|
if (typeof input === "string") return { path: normalizeRouteInventoryPath(input, `checks route_inventory expected_routes[${index}]`) };
|
|
@@ -11491,7 +11569,7 @@ function expectedFailedNetworkMockConsoleEventSummary(event, evidence) {
|
|
|
11491
11569
|
text: isRecord2(event) && typeof event.text === "string" ? event.text.slice(0, 300) : sample.slice(0, 300)
|
|
11492
11570
|
};
|
|
11493
11571
|
}
|
|
11494
|
-
function
|
|
11572
|
+
function normalizeRoutePath2(path6) {
|
|
11495
11573
|
const value = path6 || "/";
|
|
11496
11574
|
if (value === "/") return "/";
|
|
11497
11575
|
return value.replace(/\/+$/, "") || "/";
|
|
@@ -11531,10 +11609,10 @@ function mountedExpectedRoutePath(targetUrl, expected) {
|
|
|
11531
11609
|
return mountPrefix ? joinMountedRoutePath(mountPrefix, expected) : expected;
|
|
11532
11610
|
}
|
|
11533
11611
|
function routePathMatches(observed, expected, targetUrl) {
|
|
11534
|
-
const normalizedObserved =
|
|
11535
|
-
const normalizedExpected =
|
|
11612
|
+
const normalizedObserved = normalizeRoutePath2(observed);
|
|
11613
|
+
const normalizedExpected = normalizeRoutePath2(expected);
|
|
11536
11614
|
if (normalizedObserved === normalizedExpected) return true;
|
|
11537
|
-
return normalizedObserved ===
|
|
11615
|
+
return normalizedObserved === normalizeRoutePath2(mountedExpectedRoutePath(targetUrl, expected));
|
|
11538
11616
|
}
|
|
11539
11617
|
function successfulRoute(route, targetUrl) {
|
|
11540
11618
|
const matched = route.matched || routePathMatches(route.observed, route.expected_path, targetUrl);
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
runRiddleProof
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-5N6MQCLC.js";
|
|
4
4
|
import "./chunk-6F4PWJZI.js";
|
|
5
5
|
import {
|
|
6
6
|
RIDDLE_PROOF_PLAYABILITY_ASSESSMENT_VERSION,
|
|
@@ -95,7 +95,7 @@ import {
|
|
|
95
95
|
createDisabledRiddleProofAgentAdapter,
|
|
96
96
|
readRiddleProofRunStatus,
|
|
97
97
|
runRiddleProofEngineHarness
|
|
98
|
-
} from "./chunk-
|
|
98
|
+
} from "./chunk-E7ATYSYS.js";
|
|
99
99
|
import {
|
|
100
100
|
RIDDLE_PROOF_RUN_STATE_VERSION,
|
|
101
101
|
appendRunEvent,
|
|
@@ -112,7 +112,7 @@ import {
|
|
|
112
112
|
RIDDLE_PROOF_RUN_CARD_VERSION,
|
|
113
113
|
createRiddleProofRunCard
|
|
114
114
|
} from "./chunk-FMOYUYH2.js";
|
|
115
|
-
import "./chunk-
|
|
115
|
+
import "./chunk-5N5QFI2S.js";
|
|
116
116
|
import {
|
|
117
117
|
RIDDLE_PROOF_CHECKPOINT_PACKET_VERSION,
|
|
118
118
|
RIDDLE_PROOF_CHECKPOINT_RESPONSE_VERSION,
|
|
@@ -134,7 +134,7 @@ import {
|
|
|
134
134
|
createCodexExecAgentAdapter,
|
|
135
135
|
createCodexExecJsonRunner,
|
|
136
136
|
runCodexExecAgentDoctor
|
|
137
|
-
} from "./chunk-
|
|
137
|
+
} from "./chunk-PYCQNK66.js";
|
|
138
138
|
import {
|
|
139
139
|
applyTerminalMetadata,
|
|
140
140
|
compactRecord,
|
package/dist/local-agent.cjs
CHANGED
|
@@ -379,6 +379,18 @@ function parseJsonObject(raw, schema) {
|
|
|
379
379
|
}
|
|
380
380
|
return null;
|
|
381
381
|
}
|
|
382
|
+
function parseJsonFromRunnerOutputs(outputs, schema) {
|
|
383
|
+
const seen = /* @__PURE__ */ new Set();
|
|
384
|
+
for (const output of outputs) {
|
|
385
|
+
if (!output.text.trim() || seen.has(output.text)) continue;
|
|
386
|
+
seen.add(output.text);
|
|
387
|
+
const parsed = parseJsonObject(output.text, schema);
|
|
388
|
+
if (parsed) return { parsed, source: output.source };
|
|
389
|
+
}
|
|
390
|
+
const combined = outputs.map((output) => output.text).filter((text) => text.trim()).join("\n");
|
|
391
|
+
if (!combined.trim() || seen.has(combined)) return { parsed: null, source: "" };
|
|
392
|
+
return { parsed: parseJsonObject(combined, schema), source: "combined_output" };
|
|
393
|
+
}
|
|
382
394
|
function isHarnessVerificationOnlyBlocker(blocker) {
|
|
383
395
|
const text = blocker.toLowerCase();
|
|
384
396
|
return (text.includes("erofs") || text.includes("read-only file system")) && text.includes("node_modules") && (text.includes(".vite-temp") || text.includes("vite.config"));
|
|
@@ -398,6 +410,7 @@ function runnerMetrics(input) {
|
|
|
398
410
|
stdout_chars: (input.stdout || "").length,
|
|
399
411
|
stderr_chars: (input.stderr || "").length,
|
|
400
412
|
final_message_chars: (input.finalText || "").length,
|
|
413
|
+
parsed_json_source: input.parsedJsonSource,
|
|
401
414
|
exit_status: input.status ?? null,
|
|
402
415
|
timed_out: input.timedOut || false,
|
|
403
416
|
error_code: input.errorCode,
|
|
@@ -501,19 +514,25 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
501
514
|
};
|
|
502
515
|
}
|
|
503
516
|
const finalText = (0, import_node_fs.existsSync)(lastMessagePath) ? (0, import_node_fs.readFileSync)(lastMessagePath, "utf-8") : String(proc.stdout || "");
|
|
504
|
-
const
|
|
517
|
+
const stdoutText = String(proc.stdout || "");
|
|
518
|
+
const stderrText = String(proc.stderr || "");
|
|
519
|
+
const { parsed, source: parsedJsonSource } = parseJsonFromRunnerOutputs([
|
|
520
|
+
{ source: (0, import_node_fs.existsSync)(lastMessagePath) ? "last_message" : "stdout", text: finalText },
|
|
521
|
+
{ source: "stdout", text: stdoutText },
|
|
522
|
+
{ source: "stderr", text: stderrText }
|
|
523
|
+
], request.schema);
|
|
505
524
|
if (!parsed) {
|
|
506
525
|
return {
|
|
507
526
|
ok: false,
|
|
508
|
-
stdout:
|
|
509
|
-
stderr:
|
|
527
|
+
stdout: stdoutText,
|
|
528
|
+
stderr: stderrText,
|
|
510
529
|
metrics: runnerMetrics({
|
|
511
530
|
request,
|
|
512
531
|
config,
|
|
513
532
|
startedAt,
|
|
514
533
|
startedMs,
|
|
515
|
-
stdout:
|
|
516
|
-
stderr:
|
|
534
|
+
stdout: stdoutText,
|
|
535
|
+
stderr: stderrText,
|
|
517
536
|
finalText,
|
|
518
537
|
status: proc.status,
|
|
519
538
|
errorCode: "invalid_json"
|
|
@@ -521,23 +540,24 @@ function createCodexExecJsonRunner(config = {}) {
|
|
|
521
540
|
blocker: {
|
|
522
541
|
code: "codex_invalid_json",
|
|
523
542
|
message: `Codex completed ${request.purpose}, but did not return valid JSON.`,
|
|
524
|
-
details: { finalText, stdout:
|
|
543
|
+
details: { finalText, stdout: stdoutText, stderr: stderrText }
|
|
525
544
|
}
|
|
526
545
|
};
|
|
527
546
|
}
|
|
528
547
|
return {
|
|
529
548
|
ok: true,
|
|
530
549
|
json: parsed,
|
|
531
|
-
stdout:
|
|
532
|
-
stderr:
|
|
550
|
+
stdout: stdoutText,
|
|
551
|
+
stderr: stderrText,
|
|
533
552
|
metrics: runnerMetrics({
|
|
534
553
|
request,
|
|
535
554
|
config,
|
|
536
555
|
startedAt,
|
|
537
556
|
startedMs,
|
|
538
|
-
stdout:
|
|
539
|
-
stderr:
|
|
557
|
+
stdout: stdoutText,
|
|
558
|
+
stderr: stderrText,
|
|
540
559
|
finalText,
|
|
560
|
+
parsedJsonSource,
|
|
541
561
|
status: proc.status
|
|
542
562
|
})
|
|
543
563
|
};
|
|
@@ -649,6 +669,7 @@ function createCodexExecAgentAdapter(config = {}, runner = createCodexExecJsonRu
|
|
|
649
669
|
"Use the recon-approved route and baseline context; make the plan name the concrete target, expected before state, expected after state, and stop condition.",
|
|
650
670
|
"Choose the evidence modality from verification_mode and success_criteria: screenshots for visual/UI proof, interactions plus screenshots for interaction proof, structured metrics/logs/JSON/audio analysis for non-visual proof.",
|
|
651
671
|
"For playable/gameplay proof, treat screenshots as supporting artifacts only: start the game, send keyboard or pointer input, measure state before/after, measure non-HUD canvas/playfield pixel deltas across time, and return playability evidence with version riddle-proof.playability.v1.",
|
|
672
|
+
"For interaction proof, return a structured evidence object with start route/state, terminal route/state, action, assertions, and matched UI text. Catch waitForURL or selector timeouts and record them as failed assertions instead of throwing before evidence is emitted.",
|
|
652
673
|
"For structured proof, collect meaningful measurements inside page.evaluate, assign them to an evidence variable, and return that object from capture_script. Screenshots are optional supporting context for data/audio/log/metric/custom modes.",
|
|
653
674
|
"Do not assign globalThis.__riddleProofEvidence, window.__riddleProofEvidence, or self.__riddleProofEvidence in the worker context. Avoid global evidence assignment unless it is inside page.evaluate for compatibility with older packets.",
|
|
654
675
|
"Do not call Playwright page.* APIs inside page.evaluate; page.evaluate runs in the browser page, while page.waitForFunction, page.waitForSelector, page.click, and saveScreenshot belong in the outer capture script.",
|
package/dist/local-agent.js
CHANGED