adversarial-review-gate 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +16 -0
- package/.claude-plugin/plugin.json +13 -0
- package/LICENSE +201 -0
- package/README.md +589 -0
- package/bin/adversarial-review.js +14 -0
- package/package.json +43 -0
- package/src/cli/check.js +74 -0
- package/src/cli/doctor.js +261 -0
- package/src/cli/fail-closed.js +74 -0
- package/src/cli/hook.js +267 -0
- package/src/cli/host-map.js +59 -0
- package/src/cli/install.js +503 -0
- package/src/cli/main.js +48 -0
- package/src/cli/run.js +178 -0
- package/src/core/classify.js +65 -0
- package/src/core/config.js +158 -0
- package/src/core/diff.js +443 -0
- package/src/core/gate.js +753 -0
- package/src/core/git.js +66 -0
- package/src/core/hash.js +27 -0
- package/src/core/load-config.js +133 -0
- package/src/core/paths.js +33 -0
- package/src/core/policy.js +77 -0
- package/src/core/process.js +158 -0
- package/src/core/secrets.js +46 -0
- package/src/core/state.js +107 -0
- package/src/core/transcript.js +381 -0
- package/src/core/verdict.js +67 -0
- package/src/hosts/claude-code.js +77 -0
- package/src/hosts/index.js +60 -0
- package/src/hosts/wrapper.js +37 -0
- package/src/integrations/claude-code/hooks.json +28 -0
- package/src/prompts/adversarial-review-orchestrator.md +219 -0
- package/src/prompts/external-brief.md +167 -0
- package/src/reviewers/codex.js +297 -0
- package/src/reviewers/custom.js +269 -0
- package/src/reviewers/index.js +121 -0
- package/src/reviewers/opencode.js +360 -0
package/src/core/gate.js
ADDED
|
@@ -0,0 +1,753 @@
|
|
|
1
|
+
// Central gate decision engine.
|
|
2
|
+
//
|
|
3
|
+
// `evaluateGate` ties together config/policy/classify/diff/transcript/verdict/
|
|
4
|
+
// hash to produce a single decision: allow, block, or advisory-allow. It is the
|
|
5
|
+
// most security-critical module in the package and MUST FAIL CLOSED in
|
|
6
|
+
// `enforced` and `strict-ci` modes: when anything is ambiguous or broken and
|
|
7
|
+
// there is evidence of a real change, the gate blocks rather than passing.
|
|
8
|
+
//
|
|
9
|
+
// IO is injected so the engine stays testable and pure-ish:
|
|
10
|
+
// - filesystem/git diff comes from `cwd` + `baseline` via buildReviewDiff;
|
|
11
|
+
// - session state comes from `stateDir` via state.js;
|
|
12
|
+
// - external review comes from an injected `reviewerRunner(job)` stub.
|
|
13
|
+
// The engine never spawns real reviewer tools itself.
|
|
14
|
+
|
|
15
|
+
import { buildReviewDiff } from "./diff.js";
|
|
16
|
+
import { classifyPath } from "./classify.js";
|
|
17
|
+
import { scanSecrets } from "./secrets.js";
|
|
18
|
+
import {
|
|
19
|
+
isStrict,
|
|
20
|
+
requiresReviewForCode,
|
|
21
|
+
reviewerErrorAction,
|
|
22
|
+
internalErrorAction,
|
|
23
|
+
blockCapAction,
|
|
24
|
+
skipAllowed,
|
|
25
|
+
} from "./policy.js";
|
|
26
|
+
import {
|
|
27
|
+
parseJsonl,
|
|
28
|
+
scanKeys,
|
|
29
|
+
collectReviewOutputs,
|
|
30
|
+
isSubagentTranscript,
|
|
31
|
+
lastUserText,
|
|
32
|
+
wantsSkip,
|
|
33
|
+
} from "./transcript.js";
|
|
34
|
+
import { parseVerdict } from "./verdict.js";
|
|
35
|
+
import { sha256, stableJson, reviewCacheKey } from "./hash.js";
|
|
36
|
+
import { readSessionState, writeSessionState } from "./state.js";
|
|
37
|
+
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
// Decision constructors (step 1)
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* An allow decision. Extra fields (e.g. `reason`, `cached`) are merged in.
|
|
44
|
+
* @param {object} [extra]
|
|
45
|
+
* @returns {{action:"allow"}}
|
|
46
|
+
*/
|
|
47
|
+
export function allow(extra = {}) {
|
|
48
|
+
return { action: "allow", ...extra };
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* A block decision carrying a human-readable reason.
|
|
53
|
+
* @param {string} reason
|
|
54
|
+
* @param {object} [extra]
|
|
55
|
+
* @returns {{action:"block",reason:string}}
|
|
56
|
+
*/
|
|
57
|
+
export function block(reason, extra = {}) {
|
|
58
|
+
return { action: "block", reason, ...extra };
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* An advisory allow: the change is allowed but a systemMessage is surfaced.
|
|
63
|
+
* @param {string} message
|
|
64
|
+
* @param {object} [extra]
|
|
65
|
+
* @returns {{action:"allow",systemMessage:string}}
|
|
66
|
+
*/
|
|
67
|
+
export function advisory(message, extra = {}) {
|
|
68
|
+
return { action: "allow", systemMessage: message, ...extra };
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
// Level classification (step 2)
|
|
73
|
+
// ---------------------------------------------------------------------------
|
|
74
|
+
|
|
75
|
+
const LEVEL_RANK = { none: 0, single: 1, debate: 2 };
|
|
76
|
+
|
|
77
|
+
// Escalate `current` to `next` only when `next` is a higher tier.
|
|
78
|
+
function escalate(current, next) {
|
|
79
|
+
return LEVEL_RANK[next] > LEVEL_RANK[current] ? next : current;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Determine the required review level for a set of changed files.
|
|
84
|
+
*
|
|
85
|
+
* Rules:
|
|
86
|
+
* - no reviewable files -> "none";
|
|
87
|
+
* - all-code / strict and any reviewable changed file -> at least "single";
|
|
88
|
+
* - sensitive change with `debateOnSensitive` -> "debate";
|
|
89
|
+
* - line/file thresholds escalate (bigDiffLines/bigFileCount -> single,
|
|
90
|
+
* debateDiffLines/debateFileCount -> debate).
|
|
91
|
+
*
|
|
92
|
+
* @param {object} args
|
|
93
|
+
* @param {object} args.config
|
|
94
|
+
* @param {Array<{path:string,status?:string}>} args.changedFiles
|
|
95
|
+
* @param {{lines:number,fileCount:number}} args.diffStats
|
|
96
|
+
* @param {boolean} [args.sensitive] - precomputed sensitive flag (optional).
|
|
97
|
+
* @returns {"none"|"single"|"debate"}
|
|
98
|
+
*/
|
|
99
|
+
export function classifyLevel({ config, changedFiles, diffStats, sensitive }) {
|
|
100
|
+
const thresholds = config.thresholds || {};
|
|
101
|
+
let level = "none";
|
|
102
|
+
|
|
103
|
+
// Inspect each changed file. Renames/deletes still count as reviewable.
|
|
104
|
+
let anyReviewable = false;
|
|
105
|
+
let anySensitive = Boolean(sensitive);
|
|
106
|
+
for (const entry of changedFiles || []) {
|
|
107
|
+
const cls = classifyPath(entry.path, config);
|
|
108
|
+
if (cls.reviewable) anyReviewable = true;
|
|
109
|
+
if (cls.sensitive) anySensitive = true;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if (!anyReviewable) return "none";
|
|
113
|
+
|
|
114
|
+
// In all-code / strict, any reviewable file is at least a single review.
|
|
115
|
+
if (requiresReviewForCode(config)) {
|
|
116
|
+
level = escalate(level, "single");
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Size thresholds.
|
|
120
|
+
const lines = diffStats?.lines || 0;
|
|
121
|
+
const fileCount = diffStats?.fileCount || 0;
|
|
122
|
+
if (lines >= (thresholds.bigDiffLines ?? 80) || fileCount >= (thresholds.bigFileCount ?? 5)) {
|
|
123
|
+
level = escalate(level, "single");
|
|
124
|
+
}
|
|
125
|
+
if (
|
|
126
|
+
lines >= (thresholds.debateDiffLines ?? 250) ||
|
|
127
|
+
fileCount >= (thresholds.debateFileCount ?? 12)
|
|
128
|
+
) {
|
|
129
|
+
level = escalate(level, "debate");
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Sensitive change escalates to debate when configured.
|
|
133
|
+
if (anySensitive && thresholds.debateOnSensitive !== false) {
|
|
134
|
+
level = escalate(level, "debate");
|
|
135
|
+
} else if (anySensitive) {
|
|
136
|
+
// debateOnSensitive disabled: still require at least a single review.
|
|
137
|
+
level = escalate(level, "single");
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
return level;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// ---------------------------------------------------------------------------
|
|
144
|
+
// Helpers
|
|
145
|
+
// ---------------------------------------------------------------------------
|
|
146
|
+
|
|
147
|
+
// Count diff size: number of changed files plus a line estimate from the diff
|
|
148
|
+
// text (count of +/- prefixed lines, ignoring the diff header lines).
|
|
149
|
+
function diffStatsFor(changedFiles, diffText) {
|
|
150
|
+
const fileCount = (changedFiles || []).length;
|
|
151
|
+
let lines = 0;
|
|
152
|
+
for (const raw of String(diffText || "").split(/\r?\n/)) {
|
|
153
|
+
if ((raw.startsWith("+") || raw.startsWith("-")) && !raw.startsWith("+++") && !raw.startsWith("---")) {
|
|
154
|
+
lines += 1;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
return { lines, fileCount };
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// True when at least one changed file is reviewable (code / sensitive / manifest).
|
|
161
|
+
function hasReviewableFile(changedFiles, config) {
|
|
162
|
+
return (changedFiles || []).some((f) => classifyPath(f.path, config).reviewable);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// True when every changed file is docs-only (no reviewable, no sensitive).
|
|
166
|
+
function allDocsOnly(changedFiles, config) {
|
|
167
|
+
const files = changedFiles || [];
|
|
168
|
+
if (files.length === 0) return false;
|
|
169
|
+
return files.every((f) => classifyPath(f.path, config).docsOnly);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Collect reviewable changed-file paths for coverage checks (POSIX-normalized).
|
|
173
|
+
function reviewableChangedPaths(changedFiles, config) {
|
|
174
|
+
return (changedFiles || [])
|
|
175
|
+
.filter((f) => classifyPath(f.path, config).reviewable)
|
|
176
|
+
.map((f) => String(f.path).replace(/\\/g, "/"));
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// True when any reviewable changed file is also sensitive.
|
|
180
|
+
function anySensitiveChange(changedFiles, config) {
|
|
181
|
+
return (changedFiles || []).some((f) => classifyPath(f.path, config).sensitive);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Honor the host recursion guard under either spelling.
|
|
185
|
+
function recursionActive(input) {
|
|
186
|
+
return Boolean(input.stopHookActive || input.stop_hook_active);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Summarize secret-scan findings for a decision message WITHOUT echoing any raw
|
|
190
|
+
// secret material. Only the finding `type`, a count, and (for sensitive paths)
|
|
191
|
+
// the file path are surfaced — never the matched secret value/sample. The
|
|
192
|
+
// `scanSecrets` `sample` field is deliberately ignored here.
|
|
193
|
+
function summarizeSecretFindings(findings) {
|
|
194
|
+
const counts = new Map();
|
|
195
|
+
const paths = [];
|
|
196
|
+
for (const finding of findings) {
|
|
197
|
+
counts.set(finding.type, (counts.get(finding.type) || 0) + 1);
|
|
198
|
+
// Sensitive-path findings carry a non-secret file path that is safe to name.
|
|
199
|
+
if (finding.type === "sensitive_path" && finding.path) {
|
|
200
|
+
paths.push(String(finding.path));
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
const typeParts = [...counts.entries()].map(([type, n]) => `${type} x${n}`);
|
|
204
|
+
let summary = `${findings.length} finding(s): ${typeParts.join(", ")}`;
|
|
205
|
+
if (paths.length > 0) {
|
|
206
|
+
summary += `; sensitive path(s): ${paths.join(", ")}`;
|
|
207
|
+
}
|
|
208
|
+
return summary;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Build the message that instructs the host to run the bundled self-review
|
|
212
|
+
// orchestrator for a given level. This BLOCK is the "self-review required"
|
|
213
|
+
// signal; it is NOT itself a pass.
|
|
214
|
+
//
|
|
215
|
+
// When a self-review `job` is provided, the message embeds the exact contract
|
|
216
|
+
// the orchestrator's FINAL OUTPUT must satisfy: it must emit a verdict block
|
|
217
|
+
// whose `job_id`, `diff_hash`, `payload_hash`, `reviewer`, `level`, and
|
|
218
|
+
// dimension coverage match this job. A timestamp or a forgeable sentinel is no
|
|
219
|
+
// longer sufficient — only a valid, current-diff verdict is accepted.
|
|
220
|
+
function selfReviewBlockReason(level, job) {
|
|
221
|
+
const base =
|
|
222
|
+
level === "debate"
|
|
223
|
+
? "Stop hook feedback: this change has NOT passed an adversarial review. " +
|
|
224
|
+
"Run the bundled self-review orchestrator at DEBATE tier (panel + " +
|
|
225
|
+
"cross-examination + adjudicator) before completing. Critical and " +
|
|
226
|
+
"Important findings must block completion."
|
|
227
|
+
: "Stop hook feedback: this change has NOT passed an adversarial review. " +
|
|
228
|
+
"Run the bundled self-review orchestrator (single adversarial reviewer) " +
|
|
229
|
+
"before completing. Critical and Important findings must block completion.";
|
|
230
|
+
|
|
231
|
+
if (!job) return base;
|
|
232
|
+
|
|
233
|
+
// Embed the verdict contract so the host orchestrator can emit a matching
|
|
234
|
+
// final-output verdict block. Acceptance is verdict-based, not timestamp- or
|
|
235
|
+
// sentinel-based.
|
|
236
|
+
const dims = (job.requiredDimensions || []).join(", ");
|
|
237
|
+
const files = (job.changedFiles || []).join(", ");
|
|
238
|
+
return (
|
|
239
|
+
base +
|
|
240
|
+
" The orchestrator's FINAL OUTPUT must be a verdict block with " +
|
|
241
|
+
`job_id="${job.jobId}", diff_hash="${job.diffHash}", ` +
|
|
242
|
+
`payload_hash="${job.payloadHash}", reviewer="self", level="${level}", ` +
|
|
243
|
+
`required dimensions [${dims}], and coverage.files_examined covering every ` +
|
|
244
|
+
`reviewable changed file [${files}]. A stale or non-matching verdict is rejected.`
|
|
245
|
+
);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Compute the review cache key for an external pass. Uses available config
|
|
249
|
+
// metadata; unknown fields fall back to stable defaults so the key is
|
|
250
|
+
// deterministic within a session.
|
|
251
|
+
function cacheKeyFor(job, config) {
|
|
252
|
+
return reviewCacheKey({
|
|
253
|
+
diffHash: job.diffHash,
|
|
254
|
+
configHash: sha256(stableJson(config)),
|
|
255
|
+
promptHash: job.payloadHash,
|
|
256
|
+
reviewerId: job.reviewer,
|
|
257
|
+
reviewerVersion: job.reviewerVersion || "",
|
|
258
|
+
model: job.model || "",
|
|
259
|
+
level: job.level,
|
|
260
|
+
toolVersion: job.toolVersion || "",
|
|
261
|
+
privacyMode: config.privacy?.externalReview || "",
|
|
262
|
+
});
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// ---------------------------------------------------------------------------
|
|
266
|
+
// Coverage enforcement (deferred check 2)
|
|
267
|
+
// ---------------------------------------------------------------------------
|
|
268
|
+
|
|
269
|
+
// In enforced/strict, a pass must demonstrate coverage of every reviewable
|
|
270
|
+
// changed file. Returns null when coverage is acceptable, or an error reason.
|
|
271
|
+
function coverageFailure(verdict, reviewablePaths) {
|
|
272
|
+
const coverage = verdict.coverage || {};
|
|
273
|
+
const examined = Array.isArray(coverage.files_examined) ? coverage.files_examined : [];
|
|
274
|
+
if (reviewablePaths.length > 0 && examined.length === 0) {
|
|
275
|
+
return "empty_coverage";
|
|
276
|
+
}
|
|
277
|
+
const examinedSet = new Set(examined.map((p) => String(p).replace(/\\/g, "/")));
|
|
278
|
+
for (const path of reviewablePaths) {
|
|
279
|
+
if (!examinedSet.has(path)) return `missing_coverage:${path}`;
|
|
280
|
+
}
|
|
281
|
+
return null;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
// The enforced/strict DEFERRED CHECKS applied to any accepted pass, shared by
|
|
285
|
+
// the external-reviewer path and the native self-review path:
|
|
286
|
+
// (a) payload_hash must match the exact payload the gate built;
|
|
287
|
+
// (b) coverage must be non-empty and cover every reviewable changed file.
|
|
288
|
+
// Returns null when the verdict passes both, or an operational-failure reason.
|
|
289
|
+
function deferredCheckFailure(verdict, job, reviewablePaths) {
|
|
290
|
+
if (verdict.payload_hash !== job.payloadHash) {
|
|
291
|
+
return "payload_hash_mismatch";
|
|
292
|
+
}
|
|
293
|
+
return coverageFailure(verdict, reviewablePaths);
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// ---------------------------------------------------------------------------
|
|
297
|
+
// Native self-review verification (verdict-based; replaces the old timestamp /
|
|
298
|
+
// sentinel "already_reviewed" branch)
|
|
299
|
+
// ---------------------------------------------------------------------------
|
|
300
|
+
|
|
301
|
+
// Determine whether a native self-review has genuinely PASSED for the current
|
|
302
|
+
// change. We scan every review Task/Agent tool-use that COMPLETED after the last
|
|
303
|
+
// edit, parse each subagent's FINAL OUTPUT with parseVerdict against `selfJob`,
|
|
304
|
+
// and accept ONLY a parsed verdict that:
|
|
305
|
+
// - ok:true and verdict.verdict === "pass" (validateVerdict already forces
|
|
306
|
+
// "fail" on any Critical/Important finding);
|
|
307
|
+
// - has job_id / diff_hash / reviewer / level all matching selfJob (so a STALE
|
|
308
|
+
// verdict whose diff_hash differs from the CURRENT diffHash is rejected —
|
|
309
|
+
// this is the freshness guarantee); and
|
|
310
|
+
// - in enforced/strict, passes the SAME deferred checks as the external path
|
|
311
|
+
// (payload_hash match + non-empty coverage of every reviewable changed file).
|
|
312
|
+
// For the debate level the verdict's level must also be "debate".
|
|
313
|
+
//
|
|
314
|
+
// A no-op Task carrying only the sentinel token cannot satisfy any of these, so
|
|
315
|
+
// substring forgery is closed. The bare GATE_SENTINEL substring is never trusted
|
|
316
|
+
// for acceptance; only the verdict block's own sentinel + a valid parse counts.
|
|
317
|
+
function selfReviewSatisfied(entries, lastEditKey, selfJob, reviewablePaths, enforced) {
|
|
318
|
+
if (lastEditKey <= 0) return false;
|
|
319
|
+
const outputs = collectReviewOutputs(entries, lastEditKey);
|
|
320
|
+
for (const output of outputs) {
|
|
321
|
+
// parseVerdict is the sole authority for acceptance. The verdict block's own
|
|
322
|
+
// sentinel (<<<ADVERSARIAL-REVIEW-VERDICT>>>) gates parsing, so the bare
|
|
323
|
+
// GATE_SENTINEL substring is never trusted on its own.
|
|
324
|
+
const parsed = parseVerdict(output, selfJob);
|
|
325
|
+
if (!parsed.ok) continue;
|
|
326
|
+
const verdict = parsed.verdict;
|
|
327
|
+
if (verdict.verdict !== "pass") continue;
|
|
328
|
+
// job_id / diff_hash / reviewer / level are already enforced by
|
|
329
|
+
// validateVerdict; for debate, level equality already requires "debate".
|
|
330
|
+
if (enforced) {
|
|
331
|
+
if (deferredCheckFailure(verdict, selfJob, reviewablePaths)) continue;
|
|
332
|
+
}
|
|
333
|
+
return true;
|
|
334
|
+
}
|
|
335
|
+
return false;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
// ---------------------------------------------------------------------------
|
|
339
|
+
// Reviewer error -> decision mapping
|
|
340
|
+
// ---------------------------------------------------------------------------
|
|
341
|
+
|
|
342
|
+
// Map an operational reviewer failure to a decision per `onReviewerError`.
|
|
343
|
+
// `self-review` falls back to the self-review block for the level.
|
|
344
|
+
function reviewerErrorDecision(config, level, detail) {
|
|
345
|
+
const action = reviewerErrorAction(config);
|
|
346
|
+
if (action === "allow") {
|
|
347
|
+
return advisory(`Reviewer operational failure (${detail}); allowed per soft policy.`);
|
|
348
|
+
}
|
|
349
|
+
if (action === "self-review") {
|
|
350
|
+
return block(selfReviewBlockReason(level), { selfReview: true, reviewerError: detail });
|
|
351
|
+
}
|
|
352
|
+
return block(`Adversarial review could not complete: reviewer operational failure (${detail}).`, {
|
|
353
|
+
reviewerError: detail,
|
|
354
|
+
});
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
// ---------------------------------------------------------------------------
|
|
358
|
+
// Main entry point (step 3 + deferred checks)
|
|
359
|
+
// ---------------------------------------------------------------------------
|
|
360
|
+
|
|
361
|
+
/**
|
|
362
|
+
* Evaluate the gate and return an allow/block/advisory decision.
|
|
363
|
+
*
|
|
364
|
+
* @param {object} input
|
|
365
|
+
* @param {object} input.config - locked effective config (mergeConfig output).
|
|
366
|
+
* @param {string} input.cwd - workspace root for diffing.
|
|
367
|
+
* @param {object} input.baseline - baseline from captureBaseline (or persisted).
|
|
368
|
+
* @param {string} input.transcript - raw JSONL transcript text.
|
|
369
|
+
* @param {object} [input.host] - host descriptor; `{ reviewerMapping: "none"|<tool> }`.
|
|
370
|
+
* @param {Function} [input.reviewerRunner] - async (job) => ({ ok, verdict?|error?, raw? }).
|
|
371
|
+
* @param {number} [input.now] - injected clock (ms).
|
|
372
|
+
* @param {string} [input.sessionId] - session id for state keying.
|
|
373
|
+
* @param {string} [input.stateDir] - directory for per-session state.
|
|
374
|
+
* @param {boolean} [input.stopHookActive] - host recursion guard.
|
|
375
|
+
* @returns {Promise<object>} decision
|
|
376
|
+
*/
|
|
377
|
+
export async function evaluateGate(input) {
|
|
378
|
+
const {
|
|
379
|
+
config,
|
|
380
|
+
cwd,
|
|
381
|
+
baseline,
|
|
382
|
+
transcript,
|
|
383
|
+
host = {},
|
|
384
|
+
reviewerRunner,
|
|
385
|
+
sessionId = "default",
|
|
386
|
+
stateDir,
|
|
387
|
+
transcriptPath = "",
|
|
388
|
+
} = input;
|
|
389
|
+
|
|
390
|
+
// (1) Subagent transcripts never trigger the gate (avoid serializing pipelines).
|
|
391
|
+
if (isSubagentTranscript(transcriptPath, sessionId)) {
|
|
392
|
+
return allow({ reason: "subagent_transcript" });
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// (2) Host recursion guard: a re-entrant stop hook must allow to avoid loops.
|
|
396
|
+
if (recursionActive(input)) {
|
|
397
|
+
return allow({ reason: "stop_hook_active" });
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
const entries = parseJsonl(transcript || "");
|
|
401
|
+
// Note: lastReviewKey/lastDebateKey (timestamp-based review detection) are no
|
|
402
|
+
// longer used for acceptance — native self-review is now verdict-based below.
|
|
403
|
+
const { lastEditKey, editedPaths } = scanKeys(entries);
|
|
404
|
+
|
|
405
|
+
// (3) Build review scope from the authoritative filesystem/git diff.
|
|
406
|
+
let diff;
|
|
407
|
+
try {
|
|
408
|
+
diff = await buildReviewDiff(cwd, baseline);
|
|
409
|
+
} catch {
|
|
410
|
+
diff = null;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
const changedFiles = diff?.changedFiles || [];
|
|
414
|
+
const hasEditEvidence = lastEditKey > 0 || editedPaths.size > 0 || changedFiles.length > 0;
|
|
415
|
+
|
|
416
|
+
// No reviewable changed files AND no edit evidence -> nothing happened.
|
|
417
|
+
if (!hasReviewableFile(changedFiles, config) && !hasEditEvidence) {
|
|
418
|
+
return allow({ reason: "no_edits" });
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
// (4) Edit evidence exists but the diff is empty/unbuildable: never produce a
|
|
422
|
+
// vacuous external pass. Follow onInternalError (allow soft, block enforced).
|
|
423
|
+
const diffUnbuildable = !diff || (changedFiles.length === 0 && !diff.text);
|
|
424
|
+
if (hasEditEvidence && diffUnbuildable) {
|
|
425
|
+
const action = internalErrorAction(config, true);
|
|
426
|
+
if (action === "allow") {
|
|
427
|
+
return advisory("Edit evidence present but no reviewable diff could be built; allowed per soft policy.");
|
|
428
|
+
}
|
|
429
|
+
return block(
|
|
430
|
+
"Adversarial review could not complete: edit evidence exists but no reviewable diff could be built (fail-closed)."
|
|
431
|
+
);
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
// Docs-only changes are allowed (no reviewable/sensitive files).
|
|
435
|
+
if (allDocsOnly(changedFiles, config)) {
|
|
436
|
+
return allow({ reason: "docs_only" });
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
// (5) Determine required review level.
|
|
440
|
+
const diffStats = diffStatsFor(changedFiles, diff.text);
|
|
441
|
+
const sensitive = anySensitiveChange(changedFiles, config);
|
|
442
|
+
const level = classifyLevel({ config, changedFiles, diffStats, sensitive });
|
|
443
|
+
if (level === "none") {
|
|
444
|
+
return allow({ reason: "level_none" });
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// (6) Skip handling: only when the latest GENUINE user message asks to skip
|
|
448
|
+
// AND skipAllowed (never in strict-ci). Otherwise IGNORE the skip entirely.
|
|
449
|
+
if (skipAllowed(config) && wantsSkip(lastUserText(entries))) {
|
|
450
|
+
return advisory("Review skipped at user request (allowSkip is enabled).", { skipped: true });
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
// Build the review scope/payload shared by both the native self-review check
|
|
454
|
+
// and the external-reviewer path. Computed BEFORE completed-review detection
|
|
455
|
+
// so the self-review verdict is verified against the CURRENT diff.
|
|
456
|
+
const reviewablePaths = reviewableChangedPaths(changedFiles, config);
|
|
457
|
+
const payloadHash = sha256(stableJson({ diff: diff.text, level, changedFiles }));
|
|
458
|
+
|
|
459
|
+
// Load session state for block-cap accounting, the pass cache, and the
|
|
460
|
+
// persisted self-review jobId.
|
|
461
|
+
const state = stateDir ? await readSessionState(stateDir, sessionId) : {};
|
|
462
|
+
const cache = state.cache || {};
|
|
463
|
+
const enforced = config.policy.mode === "enforced" || isStrict(config);
|
|
464
|
+
|
|
465
|
+
// (7) Native self-review detection (verdict-based). A timestamp or a forgeable
|
|
466
|
+
// sentinel is NOT sufficient: a completed review Task whose FINAL OUTPUT does
|
|
467
|
+
// not parse to a VALID verdict matching the CURRENT job is rejected. This
|
|
468
|
+
// closes both the freshness bypass (BUG A: a post-review non-Edit file change
|
|
469
|
+
// alters diffHash so a prior verdict no longer matches) and the forgery bypass
|
|
470
|
+
// (BUG B: a no-op Task with the sentinel token cannot produce a valid verdict).
|
|
471
|
+
const selfDimensions = config.reviewers?.self?.requiredDimensions || [
|
|
472
|
+
"Correctness",
|
|
473
|
+
"Security",
|
|
474
|
+
"Tests",
|
|
475
|
+
];
|
|
476
|
+
// Reuse the persisted jobId if the gate previously issued one for THIS diff;
|
|
477
|
+
// otherwise derive a deterministic id from the current diffHash so the
|
|
478
|
+
// orchestrator can reference it before any state is persisted.
|
|
479
|
+
const persistedSelfJobId =
|
|
480
|
+
state.selfReview && state.selfReview.diffHash === diff.diffHash
|
|
481
|
+
? state.selfReview.jobId
|
|
482
|
+
: null;
|
|
483
|
+
const selfJob = {
|
|
484
|
+
jobId: persistedSelfJobId || `ar-self-${diff.diffHash.slice(0, 16)}`,
|
|
485
|
+
diffHash: diff.diffHash,
|
|
486
|
+
payloadHash,
|
|
487
|
+
reviewer: "self",
|
|
488
|
+
level,
|
|
489
|
+
requiredDimensions: selfDimensions,
|
|
490
|
+
changedFiles: reviewablePaths,
|
|
491
|
+
sensitive,
|
|
492
|
+
};
|
|
493
|
+
|
|
494
|
+
if (selfReviewSatisfied(entries, lastEditKey, selfJob, reviewablePaths, enforced)) {
|
|
495
|
+
return allow({ reason: "already_reviewed", level });
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
// Emit the "self-review required" BLOCK for the current level. This is the
|
|
499
|
+
// shared local-review path used both when no external reviewer is configured
|
|
500
|
+
// AND when the privacy gate refuses to send code externally (deny / prompt /
|
|
501
|
+
// secret found with block-external). It persists { jobId, diffHash } so a later
|
|
502
|
+
// turn reuses the same jobId, and counts as a BLOCK, not a pass. `extra` lets
|
|
503
|
+
// callers annotate WHY self-review was forced (e.g. the privacy reason) without
|
|
504
|
+
// ever including raw secret material.
|
|
505
|
+
const emitSelfReviewBlock = async (extra = {}) => {
|
|
506
|
+
if (stateDir) {
|
|
507
|
+
await writeSessionState(stateDir, sessionId, {
|
|
508
|
+
...state,
|
|
509
|
+
selfReview: { jobId: selfJob.jobId, diffHash: selfJob.diffHash },
|
|
510
|
+
});
|
|
511
|
+
}
|
|
512
|
+
return await blockWithCap(
|
|
513
|
+
stateDir,
|
|
514
|
+
sessionId,
|
|
515
|
+
state,
|
|
516
|
+
config,
|
|
517
|
+
block(selfReviewBlockReason(level, selfJob), {
|
|
518
|
+
selfReview: true,
|
|
519
|
+
level,
|
|
520
|
+
jobId: selfJob.jobId,
|
|
521
|
+
diffHash: selfJob.diffHash,
|
|
522
|
+
payloadHash: selfJob.payloadHash,
|
|
523
|
+
requiredDimensions: selfJob.requiredDimensions,
|
|
524
|
+
...extra,
|
|
525
|
+
})
|
|
526
|
+
);
|
|
527
|
+
};
|
|
528
|
+
|
|
529
|
+
// (8) Reviewer routing.
|
|
530
|
+
const reviewerMapping = host.reviewerMapping || host.reviewer || "none";
|
|
531
|
+
const externalReview = reviewerMapping !== "none" && typeof reviewerRunner === "function";
|
|
532
|
+
|
|
533
|
+
if (!externalReview) {
|
|
534
|
+
// Self-review required: emit the orchestrator instruction with the verdict
|
|
535
|
+
// contract. Counts as a BLOCK, not a pass.
|
|
536
|
+
return await emitSelfReviewBlock();
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
// -------------------------------------------------------------------------
|
|
540
|
+
// (8a) PRIVACY GATE — enforced BEFORE any external-reviewer dispatch.
|
|
541
|
+
//
|
|
542
|
+
// The native self-review path above never sends code off-box, so it is
|
|
543
|
+
// unaffected. Reaching here means we are about to hand the diff to an external
|
|
544
|
+
// reviewer tool/provider. FAIL CLOSED: anything other than an explicit allow +
|
|
545
|
+
// a clean secret scan routes back to local self-review rather than leaking the
|
|
546
|
+
// change. Raw secret material is NEVER placed in any decision message.
|
|
547
|
+
// -------------------------------------------------------------------------
|
|
548
|
+
const privacy = config.privacy || {};
|
|
549
|
+
const externalReviewPolicy = privacy.externalReview || "allow";
|
|
550
|
+
const secretScanPolicy = privacy.secretScan || "block-external";
|
|
551
|
+
// Set only in soft mode when secretScan="warn" lets a flagged change proceed to
|
|
552
|
+
// external review; surfaced as a systemMessage on the eventual allow.
|
|
553
|
+
let secretWarning = null;
|
|
554
|
+
|
|
555
|
+
// externalReview policy. `deny` never sends code out; `prompt` cannot obtain
|
|
556
|
+
// consent in this non-interactive gate, so it ALSO fails closed to self-review
|
|
557
|
+
// (interactive consent is the installer's job). Only `allow` proceeds to the
|
|
558
|
+
// secret scan below.
|
|
559
|
+
if (externalReviewPolicy === "deny") {
|
|
560
|
+
return await emitSelfReviewBlock({
|
|
561
|
+
privacyBlocked: true,
|
|
562
|
+
privacyReason: "external_review_denied",
|
|
563
|
+
});
|
|
564
|
+
}
|
|
565
|
+
if (externalReviewPolicy === "prompt") {
|
|
566
|
+
return await emitSelfReviewBlock({
|
|
567
|
+
privacyBlocked: true,
|
|
568
|
+
privacyReason: "external_review_prompt_non_interactive",
|
|
569
|
+
});
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
// Secret scan on the EXACT payload about to be sent (diff text + reviewable
|
|
573
|
+
// changed-file paths). Only reached when externalReview === "allow".
|
|
574
|
+
const secretFindings = scanSecrets(diff.text, reviewablePaths);
|
|
575
|
+
if (secretFindings.length > 0) {
|
|
576
|
+
const findingSummary = summarizeSecretFindings(secretFindings);
|
|
577
|
+
if (secretScanPolicy === "block-all") {
|
|
578
|
+
// Operational block in ALL modes: the secret(s) must be removed before any
|
|
579
|
+
// review. The message names the finding type/path only — never the value.
|
|
580
|
+
return await blockWithCap(
|
|
581
|
+
stateDir,
|
|
582
|
+
sessionId,
|
|
583
|
+
state,
|
|
584
|
+
config,
|
|
585
|
+
block(
|
|
586
|
+
"Secret material detected in the change; remove the secret(s) before review can proceed " +
|
|
587
|
+
`(${findingSummary}).`,
|
|
588
|
+
{ secretBlocked: true, secretScan: "block-all", level }
|
|
589
|
+
)
|
|
590
|
+
);
|
|
591
|
+
}
|
|
592
|
+
if (secretScanPolicy === "warn") {
|
|
593
|
+
// `warn` is only valid in soft mode. In enforced/strict we MUST NOT send
|
|
594
|
+
// secrets externally, so treat any non-soft `warn` as block-external (fail
|
|
595
|
+
// closed). In soft, proceed to external review but attach a warning.
|
|
596
|
+
if (config.policy.mode === "soft") {
|
|
597
|
+
// Fall through to external review, carrying a warning to surface later.
|
|
598
|
+
secretWarning =
|
|
599
|
+
"Warning: possible secret material detected and sent to external review " +
|
|
600
|
+
`(${findingSummary}). Consider secretScan="block-external".`;
|
|
601
|
+
} else {
|
|
602
|
+
return await emitSelfReviewBlock({
|
|
603
|
+
privacyBlocked: true,
|
|
604
|
+
privacyReason: "secret_detected_block_external",
|
|
605
|
+
secretScan: "block-external",
|
|
606
|
+
});
|
|
607
|
+
}
|
|
608
|
+
} else {
|
|
609
|
+
// Default `block-external` (and any unrecognized value): do NOT send the
|
|
610
|
+
// change externally. Route to local self-review. The reason names that
|
|
611
|
+
// secrets were detected (type/path/count) but never the secret value.
|
|
612
|
+
return await emitSelfReviewBlock({
|
|
613
|
+
privacyBlocked: true,
|
|
614
|
+
privacyReason: "secret_detected_block_external",
|
|
615
|
+
secretScan: secretScanPolicy,
|
|
616
|
+
});
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
// Build the external review job. For diff-only payloads the payloadHash equals
|
|
621
|
+
// the diffHash; we compute it explicitly so external reviewers can confirm the
|
|
622
|
+
// exact bytes they reviewed.
|
|
623
|
+
const requiredDimensions = config.reviewers?.[reviewerMapping]?.requiredDimensions || [
|
|
624
|
+
"Correctness",
|
|
625
|
+
"Security",
|
|
626
|
+
"Tests",
|
|
627
|
+
];
|
|
628
|
+
const job = {
|
|
629
|
+
jobId: `ar-${diff.diffHash.slice(0, 16)}-${level}`,
|
|
630
|
+
diffHash: diff.diffHash,
|
|
631
|
+
payloadHash,
|
|
632
|
+
reviewer: reviewerMapping,
|
|
633
|
+
level,
|
|
634
|
+
requiredDimensions,
|
|
635
|
+
changedFiles: reviewablePaths,
|
|
636
|
+
sensitive,
|
|
637
|
+
// Carry the actual diff text so external reviewer adapters can deliver it to
|
|
638
|
+
// the reviewer process. Without this the adapters write an EMPTY diff file and
|
|
639
|
+
// reviewers produce a meaningless pass. Native self-review (selfJob) does NOT
|
|
640
|
+
// need this: it runs inside the host against the live repo.
|
|
641
|
+
diffText: diff.text,
|
|
642
|
+
};
|
|
643
|
+
|
|
644
|
+
// Cache hit: a prior identical review already passed.
|
|
645
|
+
const cacheKey = cacheKeyFor(job, config);
|
|
646
|
+
if (cache[cacheKey]) {
|
|
647
|
+
const extra = { reason: "cached_pass", cached: true, level };
|
|
648
|
+
if (secretWarning) extra.systemMessage = secretWarning;
|
|
649
|
+
return allow(extra);
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
// Run the (injected) external reviewer.
|
|
653
|
+
let result;
|
|
654
|
+
try {
|
|
655
|
+
result = await reviewerRunner(job);
|
|
656
|
+
} catch (err) {
|
|
657
|
+
return await blockWithCap(
|
|
658
|
+
stateDir,
|
|
659
|
+
sessionId,
|
|
660
|
+
state,
|
|
661
|
+
config,
|
|
662
|
+
reviewerErrorDecision(config, level, `runner_threw:${err?.message || "error"}`)
|
|
663
|
+
);
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
// Operational failure (ok:false, timeout, bad output) -> onReviewerError.
|
|
667
|
+
if (!result || result.ok !== true || !result.verdict) {
|
|
668
|
+
const detail = result?.error || "no_verdict";
|
|
669
|
+
return await blockWithCap(
|
|
670
|
+
stateDir,
|
|
671
|
+
sessionId,
|
|
672
|
+
state,
|
|
673
|
+
config,
|
|
674
|
+
reviewerErrorDecision(config, level, detail)
|
|
675
|
+
);
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
const verdict = result.verdict;
|
|
679
|
+
|
|
680
|
+
// A valid fail is NOT an operational failure: block with findings, do not
|
|
681
|
+
// fall back to self-review.
|
|
682
|
+
if (verdict.verdict === "fail") {
|
|
683
|
+
return await blockWithCap(
|
|
684
|
+
stateDir,
|
|
685
|
+
sessionId,
|
|
686
|
+
state,
|
|
687
|
+
config,
|
|
688
|
+
block("Adversarial review FAILED. Critical/Important findings must be resolved.", {
|
|
689
|
+
findings: verdict.findings || [],
|
|
690
|
+
level,
|
|
691
|
+
})
|
|
692
|
+
);
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
// Valid pass: enforce the DEFERRED CHECKS before allowing, in enforced/strict.
|
|
696
|
+
// (a) payload_hash must match the exact payload the gate built; (b) coverage
|
|
697
|
+
// must be non-empty and cover every reviewable changed file.
|
|
698
|
+
if (enforced) {
|
|
699
|
+
const deferredFail = deferredCheckFailure(verdict, job, reviewablePaths);
|
|
700
|
+
if (deferredFail) {
|
|
701
|
+
return await blockWithCap(
|
|
702
|
+
stateDir,
|
|
703
|
+
sessionId,
|
|
704
|
+
state,
|
|
705
|
+
config,
|
|
706
|
+
reviewerErrorDecision(config, level, deferredFail)
|
|
707
|
+
);
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
// Pass accepted: cache it (so a re-run of the identical review is instant).
|
|
712
|
+
if (stateDir) {
|
|
713
|
+
const nextCache = { ...cache, [cacheKey]: true };
|
|
714
|
+
await writeSessionState(stateDir, sessionId, { ...state, cache: nextCache });
|
|
715
|
+
}
|
|
716
|
+
const passExtra = { reason: "external_pass", level, cached: false };
|
|
717
|
+
if (secretWarning) passExtra.systemMessage = secretWarning;
|
|
718
|
+
return allow(passExtra);
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
// ---------------------------------------------------------------------------
|
|
722
|
+
// Block-cap accounting (step 9)
|
|
723
|
+
// ---------------------------------------------------------------------------
|
|
724
|
+
|
|
725
|
+
/**
|
|
726
|
+
* Persist an incremented block counter and, once it exceeds the configured cap,
|
|
727
|
+
* apply blockCapAction. In enforced/strict the default keeps blocking; in soft
|
|
728
|
+
* the cap can release the gate to avoid wedging a developer.
|
|
729
|
+
*
|
|
730
|
+
* @returns {Promise<object>} the original block decision, or a cap override.
|
|
731
|
+
*/
|
|
732
|
+
async function blockWithCap(stateDir, sessionId, state, config, decision) {
|
|
733
|
+
const cap = config.runtime?.blockCap ?? 4;
|
|
734
|
+
const nextCount = (state.blockCount || 0) + 1;
|
|
735
|
+
|
|
736
|
+
if (stateDir) {
|
|
737
|
+
await writeSessionState(stateDir, sessionId, { ...state, blockCount: nextCount });
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
if (nextCount > cap) {
|
|
741
|
+
const action = blockCapAction(config);
|
|
742
|
+
if (action === "allow") {
|
|
743
|
+
return advisory(
|
|
744
|
+
`Block cap (${cap}) exceeded; allowing per soft policy to avoid wedging the session.`,
|
|
745
|
+
{ blockCapReleased: true, blockCount: nextCount }
|
|
746
|
+
);
|
|
747
|
+
}
|
|
748
|
+
// enforced/strict: keep blocking, but annotate the cap state.
|
|
749
|
+
return { ...decision, blockCount: nextCount, blockCapReached: true };
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
return { ...decision, blockCount: nextCount };
|
|
753
|
+
}
|