runcap 0.3.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +235 -20
- package/bin/runcap.mjs +171 -0
- package/examples/outcome-demo/agent-fixes.mjs +24 -0
- package/examples/outcome-demo/agent-spins.mjs +20 -0
- package/examples/outcome-demo/broken.mjs +5 -0
- package/examples/outcome-demo/verify.mjs +7 -0
- package/examples/runcap-adjudicate.yml +57 -0
- package/package.json +24 -12
- package/scripts/adjudicate-test.mjs +334 -0
- package/scripts/guard-test.mjs +76 -0
- package/scripts/make-demo-svg.mjs +20 -20
- package/scripts/mission-test.mjs +148 -0
- package/scripts/outcome-test.mjs +48 -0
- package/scripts/policy-test.mjs +121 -0
- package/scripts/render-media-screenshots.mjs +37 -0
- package/src/adjudicate.mjs +508 -0
- package/src/mission-control.mjs +441 -1
- package/src/policy.mjs +208 -0
|
@@ -0,0 +1,508 @@
|
|
|
1
|
+
// Tier 3: the independent CI adjudicator (`runcap ci --mode adjudicate`).
|
|
2
|
+
//
|
|
3
|
+
// `runcap ci` (Tier 2) recomputes the PASS/BLOCKED *logic* from the policy, but
|
|
4
|
+
// it grades the integrity *facts* that the agent wrote into receipt.json. A
|
|
5
|
+
// forged "VERIFIED_STRONG" receipt therefore grades PASS. This module closes
|
|
6
|
+
// that gap: it recomputes the facts itself, in CI, from the BASE commit of the
|
|
7
|
+
// pull request - the last state a human reviewer approved - and never trusts a
|
|
8
|
+
// number the agent stamped on its own receipt.
|
|
9
|
+
//
|
|
10
|
+
// What it trusts: the base/head SHAs from the trusted `pull_request` event, and
|
|
11
|
+
// the policy + verifier + lockfile read from the BASE commit.
|
|
12
|
+
// What it does NOT trust: receipt.json, gateway telemetry, artifact-reported
|
|
13
|
+
// SHAs, the head-side policy/verifier/workflow, or any agent
|
|
14
|
+
// environment value. Agent telemetry is carried as advisory
|
|
15
|
+
// only and can never move the verdict.
|
|
16
|
+
//
|
|
17
|
+
// Three verdicts:
|
|
18
|
+
// PASS - every changed path is an in-scope regular text
|
|
19
|
+
// edit, the task genuinely failed at base, and the
|
|
20
|
+
// change makes the base-pinned verifier pass in a
|
|
21
|
+
// clean base checkout.
|
|
22
|
+
// BLOCKED - any structurally unsafe change (delete/rename/
|
|
23
|
+
// symlink/submodule/mode/binary/LFS), an out-of-
|
|
24
|
+
// scope edit, a meaningless baseline, or a replay
|
|
25
|
+
// that does not reproduce the pass.
|
|
26
|
+
// HUMAN_APPROVAL_REQUIRED - the change touches the rules or the evidence
|
|
27
|
+
// themselves (policy, workflow, verifier, protected
|
|
28
|
+
// or dependency files). Runcap declines to issue an
|
|
29
|
+
// automated proof; a human CODEOWNER must decide.
|
|
30
|
+
//
|
|
31
|
+
// This module imports only node builtins + js-yaml + validatePolicy/policyMeta
|
|
32
|
+
// from policy.mjs (one direction, no cycle). It never imports mission-control.
|
|
33
|
+
|
|
34
|
+
import { spawn } from "node:child_process";
|
|
35
|
+
import { createHash } from "node:crypto";
|
|
36
|
+
import { mkdir, writeFile, readFile, rm } from "node:fs/promises";
|
|
37
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
38
|
+
import path from "node:path";
|
|
39
|
+
import os from "node:os";
|
|
40
|
+
import yaml from "js-yaml";
|
|
41
|
+
import { validatePolicy, policyMeta } from "./policy.mjs";
|
|
42
|
+
|
|
43
|
+
const POLICY_FILENAMES = ["mission.yaml", "mission.yml", "mission.json"];
|
|
44
|
+
|
|
45
|
+
// Paths that are the rules or the evidence themselves. An edit to any of these
|
|
46
|
+
// is never auto-approved: a human CODEOWNER must sign off, because changing the
|
|
47
|
+
// verifier, the policy, or the workflow changes what "passing" even means.
|
|
48
|
+
const DEPENDENCY_FILES = [
|
|
49
|
+
"package.json", "package-lock.json", "npm-shrinkwrap.json",
|
|
50
|
+
"yarn.lock", "pnpm-lock.yaml", "bun.lockb"
|
|
51
|
+
];
|
|
52
|
+
|
|
53
|
+
// The same protected globs the in-terminal guard uses (tests/config), so the
|
|
54
|
+
// adjudicator and the local guard agree on what counts as evidence.
|
|
55
|
+
const PROTECTED_GLOBS = [
|
|
56
|
+
/(^|\/)[^/]*\.test\.[mc]?[jt]sx?$/,
|
|
57
|
+
/(^|\/)[^/]*\.spec\.[mc]?[jt]sx?$/,
|
|
58
|
+
/(^|\/)__tests__\//,
|
|
59
|
+
/(^|\/)tests?\//,
|
|
60
|
+
/(^|\/)package\.json$/,
|
|
61
|
+
/(^|\/)tsconfig[^/]*\.json$/,
|
|
62
|
+
/(^|\/)jest\.config\./,
|
|
63
|
+
/(^|\/)vitest\.config\./
|
|
64
|
+
];
|
|
65
|
+
|
|
66
|
+
const LFS_POINTER_SIGNATURE = "version https://git-lfs.github.com/spec";
|
|
67
|
+
|
|
68
|
+
// --- git plumbing (local, spawn-based; no influence from agent env) ---------
|
|
69
|
+
|
|
70
|
+
function git(args, cwd) {
|
|
71
|
+
return new Promise((resolve) => {
|
|
72
|
+
const child = spawn("git", args, { cwd, shell: false });
|
|
73
|
+
let stdout = "";
|
|
74
|
+
let stderr = "";
|
|
75
|
+
child.stdout.on("data", (c) => { stdout += c.toString(); });
|
|
76
|
+
child.stderr.on("data", (c) => { stderr += c.toString(); });
|
|
77
|
+
child.on("error", (e) => resolve({ text: "", error: e.message }));
|
|
78
|
+
child.on("close", (code) => resolve({ text: stdout, error: code === 0 ? null : stderr.trim() }));
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Exact bytes of a blob at a commit. Unlike git(), this never trims, so applied
|
|
83
|
+
// file content is byte-identical to what is in the head tree.
|
|
84
|
+
function gitShowBytes(rev, relPath, cwd) {
|
|
85
|
+
return new Promise((resolve) => {
|
|
86
|
+
const child = spawn("git", ["show", `${rev}:${relPath}`], { cwd, shell: false });
|
|
87
|
+
const chunks = [];
|
|
88
|
+
let stderr = "";
|
|
89
|
+
child.stdout.on("data", (c) => chunks.push(c));
|
|
90
|
+
child.stderr.on("data", (c) => { stderr += c.toString(); });
|
|
91
|
+
child.on("error", (e) => resolve({ ok: false, buffer: null, error: e.message }));
|
|
92
|
+
child.on("close", (code) => resolve(code === 0 ? { ok: true, buffer: Buffer.concat(chunks), error: null } : { ok: false, buffer: null, error: stderr.trim() }));
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
async function revExists(rev, cwd) {
|
|
97
|
+
const r = await git(["cat-file", "-e", `${rev}^{commit}`], cwd);
|
|
98
|
+
return r.error === null;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
async function blobExists(rev, relPath, cwd) {
|
|
102
|
+
const r = await git(["cat-file", "-e", `${rev}:${relPath}`], cwd);
|
|
103
|
+
return r.error === null;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Run the base-pinned verify command in a directory. Mirrors mission-control's
|
|
107
|
+
// runShell so a verifier behaves identically here and in the terminal guard.
|
|
108
|
+
function runShell(commandString, cwd) {
|
|
109
|
+
const started = Date.now();
|
|
110
|
+
const shell = process.platform === "win32" ? "cmd" : "sh";
|
|
111
|
+
const shellArgs = process.platform === "win32" ? ["/c", commandString] : ["-c", commandString];
|
|
112
|
+
return new Promise((resolve) => {
|
|
113
|
+
const child = spawn(shell, shellArgs, { cwd, env: { ...process.env, AIM_WRAPPED: "1" }, shell: false });
|
|
114
|
+
let stdout = "";
|
|
115
|
+
let stderr = "";
|
|
116
|
+
child.stdout?.on("data", (c) => { const t = c.toString(); stdout += t; });
|
|
117
|
+
child.stderr?.on("data", (c) => { const t = c.toString(); stderr += t; });
|
|
118
|
+
child.on("error", (e) => resolve({ stdout, stderr: stderr + `\n${e.message}`, exitCode: 127, durationMs: Date.now() - started }));
|
|
119
|
+
child.on("close", (code) => resolve({ stdout, stderr, exitCode: code ?? 1, durationMs: Date.now() - started }));
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// --- SHA resolution (trusted PR event ONLY) ---------------------------------
|
|
124
|
+
|
|
125
|
+
// The ONLY trusted source of base/head is the `pull_request` event payload that
|
|
126
|
+
// GitHub itself writes to $GITHUB_EVENT_PATH. We never read a SHA from the
|
|
127
|
+
// receipt, an artifact, or any agent-controlled value. Explicit flags exist for
|
|
128
|
+
// local runs and tests; on a real PR job the event payload wins.
|
|
129
|
+
function resolveShas({ baseFlag, headFlag } = {}) {
|
|
130
|
+
if (baseFlag && headFlag) {
|
|
131
|
+
return { baseSha: baseFlag, headSha: headFlag, shaSource: "explicit_flags" };
|
|
132
|
+
}
|
|
133
|
+
const eventPath = process.env.GITHUB_EVENT_PATH;
|
|
134
|
+
const eventName = process.env.GITHUB_EVENT_NAME;
|
|
135
|
+
if (eventPath && existsSync(eventPath)) {
|
|
136
|
+
try {
|
|
137
|
+
const event = JSON.parse(readFileSync(eventPath, "utf8"));
|
|
138
|
+
const base = event?.pull_request?.base?.sha;
|
|
139
|
+
const head = event?.pull_request?.head?.sha;
|
|
140
|
+
if (base && head) {
|
|
141
|
+
// pull_request_target would run with base-repo secrets against head code.
|
|
142
|
+
// We only adjudicate the read-only `pull_request` event.
|
|
143
|
+
const trusted = eventName === "pull_request" || eventName === undefined;
|
|
144
|
+
return { baseSha: base, headSha: head, shaSource: trusted ? "github_pull_request_event" : `untrusted_event:${eventName}` };
|
|
145
|
+
}
|
|
146
|
+
} catch { /* fall through to unresolved */ }
|
|
147
|
+
}
|
|
148
|
+
return { baseSha: null, headSha: null, shaSource: "unresolved" };
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// --- policy loaded FROM THE BASE COMMIT -------------------------------------
|
|
152
|
+
|
|
153
|
+
// Read and parse the policy as it exists at the base commit - the rules the
|
|
154
|
+
// reviewer last approved - not the head-side policy the PR could have rewritten.
|
|
155
|
+
async function loadPolicyFromBase(baseSha, explicitPath, cwd) {
|
|
156
|
+
const candidates = explicitPath ? [explicitPath] : POLICY_FILENAMES.map((n) => path.posix.join(".runcap", n));
|
|
157
|
+
for (const rel of candidates) {
|
|
158
|
+
if (!(await blobExists(baseSha, rel, cwd))) continue;
|
|
159
|
+
const got = await gitShowBytes(baseSha, rel, cwd);
|
|
160
|
+
if (!got.ok) continue;
|
|
161
|
+
const raw = got.buffer.toString("utf8");
|
|
162
|
+
let policy;
|
|
163
|
+
try {
|
|
164
|
+
policy = rel.endsWith(".json") ? JSON.parse(raw) : yaml.load(raw);
|
|
165
|
+
} catch (e) {
|
|
166
|
+
return { error: `policy at base:${rel} did not parse: ${e.message}` };
|
|
167
|
+
}
|
|
168
|
+
if (!policy || typeof policy !== "object") return { error: `policy at base:${rel} is not an object.` };
|
|
169
|
+
return {
|
|
170
|
+
result: { policy, raw, hash: createHash("sha256").update(raw).digest("hex"), source: rel }
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
return { error: "no policy (.runcap/mission.{yaml,yml,json}) found at the base commit." };
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// --- diff classification ----------------------------------------------------
|
|
177
|
+
|
|
178
|
+
function isProtectedPath(relPath, extraProtected) {
|
|
179
|
+
if (extraProtected.some((p) => relPath === p || relPath.startsWith(p.replace(/\/?$/, "/")))) return true;
|
|
180
|
+
return PROTECTED_GLOBS.some((re) => re.test(relPath));
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
function withinAllowed(relPath, allowed) {
|
|
184
|
+
if (!allowed || allowed.length === 0) return true;
|
|
185
|
+
return allowed.some((a) => relPath === a || relPath.startsWith(a.replace(/\/?$/, "/")));
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
function isWorkflowPath(relPath) {
|
|
189
|
+
return relPath.startsWith(".github/workflows/");
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
function isPolicyPath(relPath) {
|
|
193
|
+
return POLICY_FILENAMES.some((n) => relPath === path.posix.join(".runcap", n));
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
function isDependencyPath(relPath) {
|
|
197
|
+
const base = relPath.split("/").pop();
|
|
198
|
+
return DEPENDENCY_FILES.includes(base);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// Walk `git diff --raw -z --find-renames base head`. -z gives NUL-delimited
|
|
202
|
+
// fields; rename/copy records carry two paths, everything else one.
|
|
203
|
+
function parseRawDiff(buffer) {
|
|
204
|
+
const parts = buffer.toString("utf8").split("\0");
|
|
205
|
+
const entries = [];
|
|
206
|
+
let i = 0;
|
|
207
|
+
while (i < parts.length) {
|
|
208
|
+
const meta = parts[i];
|
|
209
|
+
if (!meta || meta[0] !== ":") { i++; continue; }
|
|
210
|
+
// ":<oldmode> <newmode> <oldsha> <newsha> <status>"
|
|
211
|
+
const fields = meta.slice(1).split(/\s+/);
|
|
212
|
+
const oldMode = fields[0];
|
|
213
|
+
const newMode = fields[1];
|
|
214
|
+
const statusField = fields[4] ?? "";
|
|
215
|
+
const statusChar = statusField[0] ?? "";
|
|
216
|
+
i++;
|
|
217
|
+
if (statusChar === "R" || statusChar === "C") {
|
|
218
|
+
const srcPath = parts[i]; const dstPath = parts[i + 1];
|
|
219
|
+
i += 2;
|
|
220
|
+
entries.push({ statusChar, statusField, oldMode, newMode, srcPath, path: dstPath });
|
|
221
|
+
} else {
|
|
222
|
+
const p = parts[i];
|
|
223
|
+
i += 1;
|
|
224
|
+
entries.push({ statusChar, statusField, oldMode, newMode, path: p });
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
return entries;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function looksBinary(buffer) {
|
|
231
|
+
// A NUL byte in the first 8KB is git's own "binary" heuristic.
|
|
232
|
+
const slice = buffer.subarray(0, 8192);
|
|
233
|
+
return slice.includes(0);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
function isValidUtf8(buffer) {
|
|
237
|
+
try {
|
|
238
|
+
new TextDecoder("utf-8", { fatal: true }).decode(buffer);
|
|
239
|
+
return true;
|
|
240
|
+
} catch {
|
|
241
|
+
return false;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Classify one diff entry into candidate | blocked | human, with a reason.
|
|
246
|
+
// Structural rejects come first (never auto-approvable), then sensitive paths
|
|
247
|
+
// (human gate), then scope (block), then the in-scope regular edit (candidate).
|
|
248
|
+
async function classifyEntry(entry, { headSha, cwd, protectedPaths, allowed, verifierPaths }) {
|
|
249
|
+
const p = entry.path;
|
|
250
|
+
const s = entry.statusChar;
|
|
251
|
+
|
|
252
|
+
if (s === "D") return { path: p, class: "blocked", detail: "file deleted (deletions are never auto-approved)" };
|
|
253
|
+
if (s === "R") return { path: p, class: "blocked", detail: `file renamed from ${entry.srcPath} (renames are never auto-approved)` };
|
|
254
|
+
if (s === "C") return { path: p, class: "blocked", detail: `file copied from ${entry.srcPath} (copies are never auto-approved)` };
|
|
255
|
+
if (s === "T") return { path: p, class: "blocked", detail: "file type changed (type changes are never auto-approved)" };
|
|
256
|
+
if (entry.newMode === "120000") return { path: p, class: "blocked", detail: "symlink (symlinks are never auto-approved)" };
|
|
257
|
+
if (entry.newMode === "160000") return { path: p, class: "blocked", detail: "submodule/gitlink (submodules are never auto-approved)" };
|
|
258
|
+
if (s === "M" && entry.oldMode !== entry.newMode) return { path: p, class: "blocked", detail: `mode change ${entry.oldMode} -> ${entry.newMode} (mode changes are never auto-approved)` };
|
|
259
|
+
if (entry.newMode !== "100644") return { path: p, class: "blocked", detail: `non-regular file mode ${entry.newMode} (only plain 100644 text files can be auto-applied)` };
|
|
260
|
+
if (s !== "A" && s !== "M") return { path: p, class: "blocked", detail: `unsupported diff status ${entry.statusField}` };
|
|
261
|
+
|
|
262
|
+
// Content checks on the HEAD blob (the bytes we would apply).
|
|
263
|
+
const got = await gitShowBytes(headSha, p, cwd);
|
|
264
|
+
if (!got.ok) return { path: p, class: "blocked", detail: `could not read head blob: ${got.error}` };
|
|
265
|
+
if (looksBinary(got.buffer)) return { path: p, class: "blocked", detail: "binary content (only UTF-8 text files can be auto-applied)" };
|
|
266
|
+
if (!isValidUtf8(got.buffer)) return { path: p, class: "blocked", detail: "not valid UTF-8 (only UTF-8 text files can be auto-applied)" };
|
|
267
|
+
const head = got.buffer.toString("utf8");
|
|
268
|
+
if (head.startsWith(LFS_POINTER_SIGNATURE)) return { path: p, class: "blocked", detail: "Git LFS pointer (real content is not in the tree, cannot replay)" };
|
|
269
|
+
|
|
270
|
+
// Sensitive-path human gate: the rules or the evidence themselves.
|
|
271
|
+
if (isPolicyPath(p)) return { path: p, class: "human", detail: "edits the mission policy (the rules) - human CODEOWNER must approve" };
|
|
272
|
+
if (isWorkflowPath(p)) return { path: p, class: "human", detail: "edits a GitHub workflow - human CODEOWNER must approve" };
|
|
273
|
+
if (verifierPaths.includes(p)) return { path: p, class: "human", detail: "edits a verifier file (the evidence) - human CODEOWNER must approve" };
|
|
274
|
+
if (isDependencyPath(p)) return { path: p, class: "human", detail: "edits a dependency manifest/lockfile - human CODEOWNER must approve" };
|
|
275
|
+
if (isProtectedPath(p, protectedPaths)) return { path: p, class: "human", detail: "edits a protected/test/config path - human CODEOWNER must approve" };
|
|
276
|
+
|
|
277
|
+
// In-scope regular text edit. Out-of-scope edits are blocked.
|
|
278
|
+
if (!withinAllowed(p, allowed)) return { path: p, class: "blocked", detail: "outside the policy's allowed scope" };
|
|
279
|
+
|
|
280
|
+
return { path: p, class: "candidate", detail: s === "A" ? "added in-scope text file" : "modified in-scope text file", blob: got.buffer };
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// The concrete file paths a verify command names, resolved at the BASE commit so
|
|
284
|
+
// a head-side rename of the verifier cannot hide it from the human gate.
|
|
285
|
+
async function verifierFilesAtBase(verify, baseSha, cwd) {
|
|
286
|
+
const tokens = String(verify).split(/\s+/).filter(Boolean);
|
|
287
|
+
const files = [];
|
|
288
|
+
for (const raw of tokens) {
|
|
289
|
+
const tok = raw.replace(/^["']|["']$/g, "");
|
|
290
|
+
if (!/[./]/.test(tok)) continue;
|
|
291
|
+
const rel = tok.replace(/^\.\//, "");
|
|
292
|
+
if (await blobExists(baseSha, rel, cwd)) {
|
|
293
|
+
if (!files.includes(rel)) files.push(rel);
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
return files;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// --- the replay -------------------------------------------------------------
|
|
300
|
+
|
|
301
|
+
// Baseline + replay in a throwaway worktree pinned at the base commit. Deps come
|
|
302
|
+
// from the base lockfile (npm ci --ignore-scripts: no lifecycle scripts, no
|
|
303
|
+
// floating install). Then the permitted candidate blobs are written in and the
|
|
304
|
+
// base-pinned verifier runs again. Truth comes only from this replay.
|
|
305
|
+
async function replay({ baseSha, candidates, verify, cwd }) {
|
|
306
|
+
const tmpBase = await mkdtempWorktreeBase();
|
|
307
|
+
const wt = path.join(tmpBase, `wt-${createHash("sha1").update(`${baseSha}${Date.now()}${Math.random()}`).digest("hex").slice(0, 8)}`);
|
|
308
|
+
const add = await git(["worktree", "add", "--detach", wt, baseSha], cwd);
|
|
309
|
+
if (add.error) {
|
|
310
|
+
return { baselineFailed: null, replayPassed: null, dependencyInstall: "skipped_no_manifest", detail: `worktree add failed: ${add.error}`, ran: false };
|
|
311
|
+
}
|
|
312
|
+
try {
|
|
313
|
+
// Base-pinned dependency install (only when the base has a lockfile).
|
|
314
|
+
let dependencyInstall = "skipped_no_manifest";
|
|
315
|
+
const hasPkg = existsSync(path.join(wt, "package.json"));
|
|
316
|
+
const hasLock = existsSync(path.join(wt, "package-lock.json")) || existsSync(path.join(wt, "npm-shrinkwrap.json"));
|
|
317
|
+
if (hasPkg && hasLock) {
|
|
318
|
+
const ci = await runShell("npm ci --ignore-scripts --no-audit --no-fund", wt);
|
|
319
|
+
dependencyInstall = ci.exitCode === 0 ? "npm_ci_ignore_scripts" : "failed";
|
|
320
|
+
if (ci.exitCode !== 0) {
|
|
321
|
+
return { baselineFailed: null, replayPassed: null, dependencyInstall, detail: "npm ci (base-pinned, --ignore-scripts) failed", ran: true };
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// 1. Baseline: the task must genuinely fail at base, or a later pass is meaningless.
|
|
326
|
+
const baseline = await runShell(verify, wt);
|
|
327
|
+
const baselineFailed = baseline.exitCode !== 0;
|
|
328
|
+
|
|
329
|
+
// 2. Apply only the permitted candidate blobs from head.
|
|
330
|
+
for (const c of candidates) {
|
|
331
|
+
const dst = path.join(wt, c.path);
|
|
332
|
+
await mkdir(path.dirname(dst), { recursive: true });
|
|
333
|
+
await writeFile(dst, c.blob);
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
// 3. Replay the base-pinned verifier with the change applied.
|
|
337
|
+
const after = await runShell(verify, wt);
|
|
338
|
+
const replayPassed = after.exitCode === 0;
|
|
339
|
+
|
|
340
|
+
return { baselineFailed, replayPassed, dependencyInstall, ran: true, detail: "baseline + replay completed in a clean base checkout" };
|
|
341
|
+
} finally {
|
|
342
|
+
await git(["worktree", "remove", "--force", wt], cwd);
|
|
343
|
+
await rm(tmpBase, { recursive: true, force: true }).catch(() => {});
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
async function mkdtempWorktreeBase() {
|
|
348
|
+
const base = path.join(os.tmpdir(), `runcap-adj-${process.pid}-${Date.now()}`);
|
|
349
|
+
await mkdir(base, { recursive: true });
|
|
350
|
+
return base;
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
// --- agent telemetry: deliberately NOT read by the required gate ------------
|
|
354
|
+
|
|
355
|
+
// The agent's receipt is agent-controlled input. A forged "VERIFIED_STRONG"
|
|
356
|
+
// receipt is exactly the Tier 2 attack this gate exists to defeat, so the
|
|
357
|
+
// required job must never parse it: not to grade the verdict (it never did),
|
|
358
|
+
// and not even to display it, because reading attacker-controlled JSON in the
|
|
359
|
+
// mandatory check is needless attack surface (a malformed or enormous receipt
|
|
360
|
+
// could crash or stall the only gate guarding the merge). The verdict therefore
|
|
361
|
+
// reports a constant, telling a reviewer plainly that no receipt was consulted.
|
|
362
|
+
// A later, NON-required report layer may surface advisory telemetry; the gate
|
|
363
|
+
// that decides the merge does not.
|
|
364
|
+
const GATE_AGENT_TELEMETRY = Object.freeze({
|
|
365
|
+
present: false,
|
|
366
|
+
influence_on_verdict: "none",
|
|
367
|
+
truth: "agent_receipt_not_read_by_required_gate"
|
|
368
|
+
});
|
|
369
|
+
|
|
370
|
+
// --- the adjudicator --------------------------------------------------------
|
|
371
|
+
|
|
372
|
+
export async function adjudicate({ cwd = process.cwd(), baseFlag, headFlag, policyPath } = {}) {
|
|
373
|
+
const hardening = { required_profile: "documented", runtime_attestation: "not_performed_in_pr_job" };
|
|
374
|
+
const agentTelemetry = GATE_AGENT_TELEMETRY;
|
|
375
|
+
|
|
376
|
+
const base = (verdict, reasons, extra = {}) => ({
|
|
377
|
+
schema: "runcap.ci-verdict/v1",
|
|
378
|
+
verdict,
|
|
379
|
+
reasons,
|
|
380
|
+
repository_hardening: hardening,
|
|
381
|
+
agent_telemetry: agentTelemetry,
|
|
382
|
+
truth: "recomputed_by_adjudicator_from_base_sha",
|
|
383
|
+
...extra
|
|
384
|
+
});
|
|
385
|
+
|
|
386
|
+
// 1. Resolve base/head from the trusted PR event only.
|
|
387
|
+
const { baseSha, headSha, shaSource } = resolveShas({ baseFlag, headFlag });
|
|
388
|
+
if (!baseSha || !headSha) {
|
|
389
|
+
return base("BLOCKED", ["Could not resolve base/head from the trusted pull_request event (and no explicit --base/--head). Refusing to adjudicate."], { sha_source: shaSource });
|
|
390
|
+
}
|
|
391
|
+
if (shaSource.startsWith("untrusted_event")) {
|
|
392
|
+
return base("BLOCKED", [`Refusing to adjudicate an untrusted event (${shaSource}). Only the read-only pull_request event is adjudicated.`], { base_sha: baseSha, head_sha: headSha, sha_source: shaSource });
|
|
393
|
+
}
|
|
394
|
+
if (!(await revExists(baseSha, cwd)) || !(await revExists(headSha, cwd))) {
|
|
395
|
+
return base("BLOCKED", ["base or head commit is not present in the checkout (fetch depth too shallow?). Refusing to adjudicate."], { base_sha: baseSha, head_sha: headSha, sha_source: shaSource });
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
// 2. Policy from the BASE commit (the approved rules), then validate it.
|
|
399
|
+
const loaded = await loadPolicyFromBase(baseSha, policyPath, cwd);
|
|
400
|
+
if (loaded.error) {
|
|
401
|
+
return base("BLOCKED", [loaded.error], { base_sha: baseSha, head_sha: headSha, sha_source: shaSource });
|
|
402
|
+
}
|
|
403
|
+
const policyResult = loaded.result;
|
|
404
|
+
const { ok, errors } = validatePolicy(policyResult.policy);
|
|
405
|
+
if (!ok) {
|
|
406
|
+
return base("BLOCKED", errors.map((e) => `base policy invalid: ${e}`), { base_sha: baseSha, head_sha: headSha, sha_source: shaSource, policy: policyMeta(policyResult) });
|
|
407
|
+
}
|
|
408
|
+
const verification = policyResult.policy.verification ?? {};
|
|
409
|
+
const verify = verification.command;
|
|
410
|
+
const protectedPaths = Array.isArray(verification.protect) ? verification.protect : [];
|
|
411
|
+
const allowed = Array.isArray(verification.allow) ? verification.allow : [];
|
|
412
|
+
const verifierPaths = await verifierFilesAtBase(verify, baseSha, cwd);
|
|
413
|
+
|
|
414
|
+
// 3. Compute the base..head diff ourselves and classify every entry.
|
|
415
|
+
const rawDiff = await new Promise((resolve) => {
|
|
416
|
+
const child = spawn("git", ["diff", "--raw", "-z", "--find-renames", baseSha, headSha], { cwd, shell: false });
|
|
417
|
+
const chunks = [];
|
|
418
|
+
child.stdout.on("data", (c) => chunks.push(c));
|
|
419
|
+
child.on("error", () => resolve(Buffer.alloc(0)));
|
|
420
|
+
child.on("close", () => resolve(Buffer.concat(chunks)));
|
|
421
|
+
});
|
|
422
|
+
const entries = parseRawDiff(rawDiff);
|
|
423
|
+
const classified = [];
|
|
424
|
+
for (const entry of entries) {
|
|
425
|
+
classified.push(await classifyEntry(entry, { headSha, cwd, protectedPaths, allowed, verifierPaths }));
|
|
426
|
+
}
|
|
427
|
+
const publicClassification = classified.map(({ blob, ...rest }) => rest);
|
|
428
|
+
|
|
429
|
+
const blocked = classified.filter((c) => c.class === "blocked");
|
|
430
|
+
const human = classified.filter((c) => c.class === "human");
|
|
431
|
+
const candidates = classified.filter((c) => c.class === "candidate");
|
|
432
|
+
|
|
433
|
+
const policyBlock = policyMeta(policyResult);
|
|
434
|
+
|
|
435
|
+
// 4. Verdict precedence: any structural/scope reject blocks; else a sensitive
|
|
436
|
+
// path sends it to a human; else we must reproduce the proof ourselves.
|
|
437
|
+
if (blocked.length) {
|
|
438
|
+
return base("BLOCKED", blocked.map((b) => `${b.path}: ${b.detail}`), {
|
|
439
|
+
base_sha: baseSha, head_sha: headSha, sha_source: shaSource, policy: policyBlock,
|
|
440
|
+
diff_classification: publicClassification
|
|
441
|
+
});
|
|
442
|
+
}
|
|
443
|
+
if (human.length) {
|
|
444
|
+
return base("HUMAN_APPROVAL_REQUIRED",
|
|
445
|
+
["Runcap declined to issue an automated proof: the change touches the rules or the evidence themselves. A human CODEOWNER must approve.", ...human.map((h) => `${h.path}: ${h.detail}`)],
|
|
446
|
+
{ base_sha: baseSha, head_sha: headSha, sha_source: shaSource, policy: policyBlock, diff_classification: publicClassification });
|
|
447
|
+
}
|
|
448
|
+
if (candidates.length === 0) {
|
|
449
|
+
return base("BLOCKED", ["No applicable code change to adjudicate (empty or non-content diff)."], {
|
|
450
|
+
base_sha: baseSha, head_sha: headSha, sha_source: shaSource, policy: policyBlock, diff_classification: publicClassification
|
|
451
|
+
});
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// 5. Replay from the base commit with only the candidate blobs applied.
|
|
455
|
+
const r = await replay({ baseSha, candidates, verify, cwd });
|
|
456
|
+
const codeEvidence = {
|
|
457
|
+
truth: "recomputed_by_adjudicator_from_base_sha",
|
|
458
|
+
baseline_failed: r.baselineFailed,
|
|
459
|
+
replay_passed: r.replayPassed,
|
|
460
|
+
dependency_install: r.dependencyInstall,
|
|
461
|
+
candidate_files: candidates.map((c) => c.path),
|
|
462
|
+
detail: r.detail
|
|
463
|
+
};
|
|
464
|
+
|
|
465
|
+
const reasons = [];
|
|
466
|
+
if (r.dependencyInstall === "failed") reasons.push("Base-pinned `npm ci --ignore-scripts` failed: cannot establish a clean baseline.");
|
|
467
|
+
if (r.baselineFailed === false) reasons.push("Baseline already green: the verifier passed at the base commit, so a post-change pass proves nothing.");
|
|
468
|
+
if (r.replayPassed !== true) reasons.push("Replay did not pass: the change did not make the base-pinned verifier pass in a clean base checkout.");
|
|
469
|
+
|
|
470
|
+
if (reasons.length) {
|
|
471
|
+
return base("BLOCKED", reasons, { base_sha: baseSha, head_sha: headSha, sha_source: shaSource, policy: policyBlock, diff_classification: publicClassification, code_evidence: codeEvidence });
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
return base("PASS",
|
|
475
|
+
[`Verifier failed at base and passed after applying ${candidates.length} in-scope text change(s), recomputed in a clean base checkout.`],
|
|
476
|
+
{ base_sha: baseSha, head_sha: headSha, sha_source: shaSource, policy: policyBlock, diff_classification: publicClassification, code_evidence: codeEvidence });
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
// Markdown lines for the PR step summary + terminal print.
|
|
480
|
+
export function formatAdjudication(v) {
|
|
481
|
+
const lines = [
|
|
482
|
+
`Runcap CI adjudication (independent replay from base)`,
|
|
483
|
+
`====================================================`,
|
|
484
|
+
`Verdict: ${v.verdict}`,
|
|
485
|
+
`Base SHA: ${v.base_sha ?? "unresolved"} (source: ${v.sha_source ?? "unknown"})`,
|
|
486
|
+
`Head SHA: ${v.head_sha ?? "unresolved"}`
|
|
487
|
+
];
|
|
488
|
+
if (v.policy) {
|
|
489
|
+
lines.push(`Policy: ${v.policy.mission?.name ?? "(unnamed)"} - hash ${v.policy.hash}`);
|
|
490
|
+
}
|
|
491
|
+
if (v.code_evidence) {
|
|
492
|
+
const ce = v.code_evidence;
|
|
493
|
+
lines.push(`Replay: baseline_failed=${ce.baseline_failed} replay_passed=${ce.replay_passed} deps=${ce.dependency_install}`);
|
|
494
|
+
}
|
|
495
|
+
lines.push(`Hardening: required_profile=${v.repository_hardening.required_profile}, runtime_attestation=${v.repository_hardening.runtime_attestation}`);
|
|
496
|
+
lines.push(`Agent receipt: not read by this required gate (verdict is recomputed from the base commit).`);
|
|
497
|
+
if (Array.isArray(v.reasons) && v.reasons.length) {
|
|
498
|
+
lines.push(v.verdict === "PASS" ? `Why:` : `Why ${v.verdict}:`);
|
|
499
|
+
for (const r of v.reasons) lines.push(` - ${r}`);
|
|
500
|
+
}
|
|
501
|
+
return lines;
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
// Exit code: PASS and HUMAN_APPROVAL_REQUIRED are non-failing (the human gate is
|
|
505
|
+
// a success/neutral outcome that hands authority to a CODEOWNER); BLOCKED fails.
|
|
506
|
+
export function exitCodeFor(verdict) {
|
|
507
|
+
return verdict === "BLOCKED" ? 1 : 0;
|
|
508
|
+
}
|