@kevinrabun/judges 3.113.0 → 3.115.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -0
- package/agents/accessibility.judge.md +37 -0
- package/agents/agent-instructions.judge.md +37 -0
- package/agents/ai-code-safety.judge.md +48 -0
- package/agents/api-contract.judge.md +30 -0
- package/agents/api-design.judge.md +39 -0
- package/agents/authentication.judge.md +37 -0
- package/agents/backwards-compatibility.judge.md +37 -0
- package/agents/caching.judge.md +37 -0
- package/agents/ci-cd.judge.md +37 -0
- package/agents/cloud-readiness.judge.md +37 -0
- package/agents/code-structure.judge.md +48 -0
- package/agents/compliance.judge.md +40 -0
- package/agents/concurrency.judge.md +39 -0
- package/agents/configuration-management.judge.md +37 -0
- package/agents/cost-effectiveness.judge.md +40 -0
- package/agents/cybersecurity.judge.md +36 -0
- package/agents/data-security.judge.md +34 -0
- package/agents/data-sovereignty.judge.md +58 -0
- package/agents/database.judge.md +41 -0
- package/agents/dependency-health.judge.md +39 -0
- package/agents/documentation.judge.md +39 -0
- package/agents/error-handling.judge.md +37 -0
- package/agents/ethics-bias.judge.md +39 -0
- package/agents/false-positive-review.judge.md +73 -0
- package/agents/framework-safety.judge.md +40 -0
- package/agents/hallucination-detection.judge.md +33 -0
- package/agents/iac-security.judge.md +38 -0
- package/agents/intent-alignment.judge.md +31 -0
- package/agents/internationalization.judge.md +42 -0
- package/agents/logging-privacy.judge.md +37 -0
- package/agents/logic-review.judge.md +34 -0
- package/agents/maintainability.judge.md +37 -0
- package/agents/model-fingerprint.judge.md +31 -0
- package/agents/multi-turn-coherence.judge.md +29 -0
- package/agents/observability.judge.md +37 -0
- package/agents/over-engineering.judge.md +48 -0
- package/agents/performance.judge.md +44 -0
- package/agents/portability.judge.md +37 -0
- package/agents/rate-limiting.judge.md +37 -0
- package/agents/reliability.judge.md +39 -0
- package/agents/scalability.judge.md +41 -0
- package/agents/security.judge.md +31 -0
- package/agents/software-practices.judge.md +44 -0
- package/agents/testing.judge.md +39 -0
- package/agents/ux.judge.md +37 -0
- package/dist/api.d.ts +9 -1
- package/dist/api.js +9 -1
- package/dist/commands/fix.d.ts +10 -0
- package/dist/commands/fix.js +52 -0
- package/dist/commands/llm-benchmark.d.ts +13 -4
- package/dist/commands/llm-benchmark.js +39 -8
- package/dist/commands/review.d.ts +51 -1
- package/dist/commands/review.js +213 -7
- package/dist/evaluators/index.js +61 -35
- package/dist/github-app.d.ts +35 -0
- package/dist/github-app.js +125 -4
- package/dist/judges/index.d.ts +23 -61
- package/dist/judges/index.js +49 -63
- package/dist/patches/apply.d.ts +15 -0
- package/dist/patches/apply.js +37 -0
- package/dist/tools/prompts.d.ts +2 -2
- package/dist/tools/prompts.js +21 -10
- package/docs/skills.md +7 -0
- package/package.json +18 -3
- package/packages/judges-cli/README.md +24 -0
- package/packages/judges-cli/bin/judges.js +8 -0
- package/scripts/generate-agents-from-judges.ts +111 -0
- package/scripts/generate-skills-docs.ts +26 -0
- package/scripts/validate-agents.ts +104 -0
- package/server.json +2 -2
- package/skills/ai-code-review.skill.md +57 -0
- package/skills/release-gate.skill.md +27 -0
- package/skills/security-review.skill.md +32 -0
- package/src/agent-loader.ts +324 -0
- package/src/skill-loader.ts +199 -0
package/dist/commands/review.js
CHANGED
|
@@ -17,12 +17,51 @@ import { execFileSync } from "child_process";
|
|
|
17
17
|
import { readFileSync, writeFileSync, unlinkSync } from "fs";
|
|
18
18
|
import { tmpdir } from "os";
|
|
19
19
|
import { resolve, join, extname } from "path";
|
|
20
|
+
import { createHash } from "node:crypto";
|
|
20
21
|
import { evaluateDiff, evaluateWithTribunal } from "../evaluators/index.js";
|
|
21
22
|
import { evaluateProject } from "../evaluators/project.js";
|
|
23
|
+
// Test hook to override evaluateDiff in unit tests
|
|
24
|
+
let evaluateDiffImpl = evaluateDiff;
|
|
25
|
+
export function __setEvaluateDiffImplForTest(fn) {
|
|
26
|
+
evaluateDiffImpl = fn ?? evaluateDiff;
|
|
27
|
+
}
|
|
22
28
|
import { parseConfig, loadCascadingConfig } from "../config.js";
|
|
23
29
|
import { loadFeedbackStore, getFpRateByRule } from "./feedback.js";
|
|
24
30
|
import { JUDGES } from "../judges/index.js";
|
|
25
31
|
import { parseGitHubRepo, tryRunGit } from "../tools/command-safety.js";
|
|
32
|
+
import { extractValidatedLlmFindings } from "./llm-benchmark.js";
|
|
33
|
+
export function dedupeComments(comments) {
|
|
34
|
+
const seen = new Set();
|
|
35
|
+
const out = [];
|
|
36
|
+
for (const c of comments) {
|
|
37
|
+
const key = `${c.path}:${c.line}:${hashBody(c.body)}`;
|
|
38
|
+
if (seen.has(key))
|
|
39
|
+
continue;
|
|
40
|
+
seen.add(key);
|
|
41
|
+
out.push(c);
|
|
42
|
+
}
|
|
43
|
+
return out;
|
|
44
|
+
}
|
|
45
|
+
export function filterAlreadyPostedComments(repo, pr, token, comments) {
|
|
46
|
+
try {
|
|
47
|
+
const resp = apiRequest("GET", `/repos/${repo}/pulls/${pr}/comments`, token);
|
|
48
|
+
const existing = resp.data ?? [];
|
|
49
|
+
const existingKeys = new Set(existing.map((c) => {
|
|
50
|
+
const path = c.path;
|
|
51
|
+
const line = c.line;
|
|
52
|
+
const body = c.body ?? "";
|
|
53
|
+
return `${path}:${line}:${hashBody(body)}`;
|
|
54
|
+
}));
|
|
55
|
+
return comments.filter((c) => !existingKeys.has(`${c.path}:${c.line}:${hashBody(c.body)}`));
|
|
56
|
+
}
|
|
57
|
+
catch (err) {
|
|
58
|
+
console.error("Failed to fetch existing comments, proceeding without dedupe", err);
|
|
59
|
+
return comments;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
function hashBody(body) {
|
|
63
|
+
return createHash("sha1").update(body).digest("hex").slice(0, 8);
|
|
64
|
+
}
|
|
26
65
|
// ─── Language Detection ─────────────────────────────────────────────────────
|
|
27
66
|
const EXT_TO_LANG = {
|
|
28
67
|
".ts": "typescript",
|
|
@@ -71,8 +110,8 @@ export function parsePatchToHunk(filePath, patch) {
|
|
|
71
110
|
const changedLineNumbers = [];
|
|
72
111
|
let newLineNum = 0;
|
|
73
112
|
for (const line of lines) {
|
|
74
|
-
// Hunk header: @@ -10,5 +20,8 @@
|
|
75
|
-
const hunkMatch = line.match(
|
|
113
|
+
// Hunk header: @@ -10,5 +20,8 @@ (some tools omit trailing space/@@)
|
|
114
|
+
const hunkMatch = line.match(/^@@\s*-\d+(?:,\d+)?\s+\+(\d+)(?:,\d+)?\s*@@?/);
|
|
76
115
|
if (hunkMatch) {
|
|
77
116
|
newLineNum = parseInt(hunkMatch[1], 10) - 1;
|
|
78
117
|
continue;
|
|
@@ -209,7 +248,14 @@ function ghCliRequest(method, endpoint, body) {
|
|
|
209
248
|
return { status: 0, data: null };
|
|
210
249
|
}
|
|
211
250
|
}
|
|
251
|
+
// Allow test injection of the GitHub API layer
|
|
252
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
253
|
+
let apiRequestImpl;
|
|
212
254
|
function apiRequest(method, endpoint, token, body) {
|
|
255
|
+
const impl = apiRequestImpl;
|
|
256
|
+
if (impl) {
|
|
257
|
+
return impl(method, endpoint, token, body);
|
|
258
|
+
}
|
|
213
259
|
if (ghCliAvailable()) {
|
|
214
260
|
return ghCliRequest(method, endpoint, body);
|
|
215
261
|
}
|
|
@@ -220,6 +266,102 @@ function apiRequest(method, endpoint, token, body) {
|
|
|
220
266
|
console.error("Either install the `gh` CLI and run `gh auth login`, or set GITHUB_TOKEN env var.");
|
|
221
267
|
process.exit(1);
|
|
222
268
|
}
|
|
269
|
+
export function __setApiRequestImplForTest(fn) {
|
|
270
|
+
apiRequestImpl = fn;
|
|
271
|
+
}
|
|
272
|
+
async function callOpenAiChat(prompt, opts) {
|
|
273
|
+
const baseUrl = opts.baseUrl || "https://api.openai.com/v1/chat/completions";
|
|
274
|
+
// Node 18+ has global fetch; avoid dynamic imports to keep tsc happy without node-fetch types
|
|
275
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
276
|
+
const fetchImpl = globalThis.fetch;
|
|
277
|
+
if (!fetchImpl)
|
|
278
|
+
throw new Error("fetch() not available. Run on Node 18+ or polyfill fetch.");
|
|
279
|
+
const res = await fetchImpl(baseUrl, {
|
|
280
|
+
method: "POST",
|
|
281
|
+
headers: {
|
|
282
|
+
Authorization: `Bearer ${opts.apiKey}`,
|
|
283
|
+
"Content-Type": "application/json",
|
|
284
|
+
},
|
|
285
|
+
body: JSON.stringify({
|
|
286
|
+
model: opts.model,
|
|
287
|
+
messages: [{ role: "user", content: prompt }],
|
|
288
|
+
max_tokens: opts.maxTokens ?? 800,
|
|
289
|
+
temperature: 0.2,
|
|
290
|
+
}),
|
|
291
|
+
});
|
|
292
|
+
if (!res.ok) {
|
|
293
|
+
const text = await res.text().catch(() => "");
|
|
294
|
+
throw new Error(`LLM request failed: ${res.status} ${res.statusText} ${text}`);
|
|
295
|
+
}
|
|
296
|
+
const json = (await res.json());
|
|
297
|
+
const content = json.choices?.[0]?.message?.content;
|
|
298
|
+
if (!content)
|
|
299
|
+
throw new Error("LLM response missing content");
|
|
300
|
+
return content;
|
|
301
|
+
}
|
|
302
|
+
// test hooks for dependency injection
|
|
303
|
+
let callOpenAiChatImpl = callOpenAiChat;
|
|
304
|
+
export function __setCallOpenAiChatImplForTest(fn) {
|
|
305
|
+
callOpenAiChatImpl = fn;
|
|
306
|
+
}
|
|
307
|
+
/** Build a single prompt for the entire PR (tribunal mode). */
|
|
308
|
+
function buildLlmPromptForPr(prFiles, maxBytes = 40000) {
|
|
309
|
+
const snippets = [];
|
|
310
|
+
for (const f of prFiles) {
|
|
311
|
+
if (!f.patch)
|
|
312
|
+
continue;
|
|
313
|
+
if (Buffer.byteLength(f.patch, "utf-8") > maxBytes)
|
|
314
|
+
continue; // drop huge patches
|
|
315
|
+
snippets.push(`--- FILE: ${f.filename} ---\n${f.patch}`);
|
|
316
|
+
}
|
|
317
|
+
const combined = snippets.join("\n\n");
|
|
318
|
+
const prompt = `Review the following PR diff. Return issues with rule IDs, severity, and recommendations.\n\n${combined}`;
|
|
319
|
+
return { prompt, contextSnippets: snippets.slice(0, 5) };
|
|
320
|
+
}
|
|
321
|
+
export async function runLlmDeepReview(prFiles, args) {
|
|
322
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
323
|
+
if (!apiKey) {
|
|
324
|
+
return { warnings: ["OPENAI_API_KEY not set; skipping LLM deep review"] };
|
|
325
|
+
}
|
|
326
|
+
const model = args.llmModel || process.env.OPENAI_MODEL || "gpt-4o";
|
|
327
|
+
const baseUrl = args.llmBaseUrl || process.env.OPENAI_BASE_URL;
|
|
328
|
+
const { constructTribunalPrompt } = await import("./llm-benchmark.js");
|
|
329
|
+
const { buildContextSnippets } = await import("../context/context-snippets.js");
|
|
330
|
+
// Build code blob for tribunal prompt; collapse patches to new content
|
|
331
|
+
const codeBlobs = [];
|
|
332
|
+
const snippetsForRag = [];
|
|
333
|
+
for (const pf of prFiles) {
|
|
334
|
+
if (!pf.patch)
|
|
335
|
+
continue;
|
|
336
|
+
const hunk = parsePatchToHunk(pf.filename, pf.patch);
|
|
337
|
+
codeBlobs.push(`// FILE: ${pf.filename}\n${hunk.newContent}`);
|
|
338
|
+
snippetsForRag.push(hunk.newContent);
|
|
339
|
+
}
|
|
340
|
+
const codeJoined = codeBlobs.join("\n\n");
|
|
341
|
+
// Build context snippets (RAG-lite) for prompt grounding
|
|
342
|
+
const ragSnippets = await buildContextSnippets(snippetsForRag.join("\n\n"), {
|
|
343
|
+
maxSnippets: 4,
|
|
344
|
+
chunkSize: 1500,
|
|
345
|
+
});
|
|
346
|
+
const contextText = ragSnippets.map((s) => s.snippet);
|
|
347
|
+
const tribunalPrompt = constructTribunalPrompt(codeJoined, "mixed", contextText);
|
|
348
|
+
const { prompt: diffPrompt } = buildLlmPromptForPr(prFiles);
|
|
349
|
+
const combinedPrompt = `${tribunalPrompt}\n\n---\n\nDiff summary for additional context:\n${diffPrompt}`;
|
|
350
|
+
const content = await callOpenAiChatImpl(combinedPrompt, { apiKey, model, baseUrl, maxTokens: args.llmMaxTokens });
|
|
351
|
+
// Validate structured findings in LLM output
|
|
352
|
+
// Use global registry prefixes to validate LLM output
|
|
353
|
+
const { getValidRulePrefixes } = await import("./llm-benchmark.js");
|
|
354
|
+
const validation = extractValidatedLlmFindings(content, getValidRulePrefixes());
|
|
355
|
+
const warnings = validation.errors?.length ? validation.errors : undefined;
|
|
356
|
+
const summaryLines = [
|
|
357
|
+
`### 🤖 LLM Deep Review Summary (model: ${model})`,
|
|
358
|
+
"",
|
|
359
|
+
validation.ruleIds.length ? `Detected rule IDs: ${validation.ruleIds.join(", ")}` : "No rule IDs detected.",
|
|
360
|
+
"",
|
|
361
|
+
content,
|
|
362
|
+
];
|
|
363
|
+
return { summary: summaryLines.join("\n"), warnings };
|
|
364
|
+
}
|
|
223
365
|
// ─── Finding → Review Comment ───────────────────────────────────────────────
|
|
224
366
|
const SEVERITY_EMOJI = {
|
|
225
367
|
critical: "🔴",
|
|
@@ -291,7 +433,7 @@ function reviewPrFiles(files, minSeverity, maxComments, options, fpRates, fpThre
|
|
|
291
433
|
...options,
|
|
292
434
|
filePath: file.filename,
|
|
293
435
|
};
|
|
294
|
-
const verdict =
|
|
436
|
+
const verdict = evaluateDiffImpl(hunk.newContent, lang, hunk.changedLines, undefined, fileOpts);
|
|
295
437
|
for (const finding of verdict.findings) {
|
|
296
438
|
// Suppress findings from rules with high FP rates
|
|
297
439
|
if (fpRates && fpThreshold !== undefined) {
|
|
@@ -464,7 +606,7 @@ function _buildReviewSummary(result) {
|
|
|
464
606
|
}
|
|
465
607
|
function parseCommentMeta(comment) {
|
|
466
608
|
// Body format: `🔴 **CRITICAL** — Title here (\`RULE-001\`)`
|
|
467
|
-
const match = comment.body.match(/\*\*(\w+)\*\*\s
|
|
609
|
+
const match = comment.body.match(/\*\*(\w+)\*\*\s{0,5}\u2014([^(`]{1,500})\(`([^`]+)`\)/);
|
|
468
610
|
if (!match)
|
|
469
611
|
return undefined;
|
|
470
612
|
return {
|
|
@@ -554,6 +696,17 @@ export function buildPRReviewNarrative(result) {
|
|
|
554
696
|
}
|
|
555
697
|
lines.push("");
|
|
556
698
|
}
|
|
699
|
+
// ── Layer 2 (optional) ───────────────────────────────────────────
|
|
700
|
+
if (result.llmSummary) {
|
|
701
|
+
lines.push("### 🤖 Layer 2 — AI Deep Review (LLM)");
|
|
702
|
+
lines.push("");
|
|
703
|
+
lines.push(result.llmSummary);
|
|
704
|
+
lines.push("");
|
|
705
|
+
}
|
|
706
|
+
if (result.llmWarnings?.length) {
|
|
707
|
+
lines.push("> ⚠️ LLM warnings: " + result.llmWarnings.join("; "));
|
|
708
|
+
lines.push("");
|
|
709
|
+
}
|
|
557
710
|
// ── Cross-cutting themes ──────────────────────────────────────────
|
|
558
711
|
const byDomain = new Map();
|
|
559
712
|
for (const m of metas) {
|
|
@@ -664,6 +817,8 @@ export function parseReviewArgs(argv) {
|
|
|
664
817
|
minConfidence: 0.6,
|
|
665
818
|
calibrate: true,
|
|
666
819
|
crossFile: false,
|
|
820
|
+
llmDeepReview: false,
|
|
821
|
+
autopilot: false,
|
|
667
822
|
};
|
|
668
823
|
for (let i = 3; i < argv.length; i++) {
|
|
669
824
|
const arg = argv[i];
|
|
@@ -717,6 +872,22 @@ export function parseReviewArgs(argv) {
|
|
|
717
872
|
.map((s) => s.trim())
|
|
718
873
|
.filter(Boolean);
|
|
719
874
|
break;
|
|
875
|
+
case "--llm-deep-review":
|
|
876
|
+
args.llmDeepReview = true;
|
|
877
|
+
break;
|
|
878
|
+
case "--llm-model":
|
|
879
|
+
args.llmModel = argv[++i];
|
|
880
|
+
break;
|
|
881
|
+
case "--llm-base-url":
|
|
882
|
+
args.llmBaseUrl = argv[++i];
|
|
883
|
+
break;
|
|
884
|
+
case "--llm-max-tokens":
|
|
885
|
+
args.llmMaxTokens = parseInt(argv[++i], 10);
|
|
886
|
+
break;
|
|
887
|
+
case "--autopilot":
|
|
888
|
+
case "--gh-autopilot":
|
|
889
|
+
args.autopilot = true;
|
|
890
|
+
break;
|
|
720
891
|
default:
|
|
721
892
|
// Positional: treat as PR number if numeric
|
|
722
893
|
if (!arg.startsWith("-") && /^\d+$/.test(arg) && args.pr === 0) {
|
|
@@ -727,13 +898,17 @@ export function parseReviewArgs(argv) {
|
|
|
727
898
|
}
|
|
728
899
|
return args;
|
|
729
900
|
}
|
|
730
|
-
export function runReview(argv) {
|
|
901
|
+
export async function runReview(argv) {
|
|
731
902
|
const args = parseReviewArgs(argv);
|
|
732
903
|
// In JSON mode, redirect informational output to stderr so stdout is pure JSON
|
|
733
904
|
const _stdoutLog = console.log.bind(console);
|
|
734
905
|
if (args.format === "json") {
|
|
735
906
|
console.log = (...a) => console.error(...a);
|
|
736
907
|
}
|
|
908
|
+
if (args.autopilot) {
|
|
909
|
+
// Autopilot implies live mode
|
|
910
|
+
args.dryRun = false;
|
|
911
|
+
}
|
|
737
912
|
if (args.pr === 0) {
|
|
738
913
|
console.log(`
|
|
739
914
|
Judges Panel — Pull Request Review
|
|
@@ -760,6 +935,7 @@ OPTIONS:
|
|
|
760
935
|
--no-calibrate Disable feedback-driven confidence calibration (enabled by default)
|
|
761
936
|
--cross-file Enable cross-file architectural analysis (detects duplication, taint flows)
|
|
762
937
|
--judges <id,id,...> Only run these judges (comma-separated IDs, e.g. cybersecurity,authentication)
|
|
938
|
+
--autopilot Enable PR autopilot (fetch diff, post inline + summary). Implies live mode.
|
|
763
939
|
|
|
764
940
|
AUTHENTICATION:
|
|
765
941
|
Set GITHUB_TOKEN env var, or install the \`gh\` CLI and run \`gh auth login\`.
|
|
@@ -855,14 +1031,25 @@ AUTHENTICATION:
|
|
|
855
1031
|
console.log("");
|
|
856
1032
|
// Run analysis
|
|
857
1033
|
const result = reviewPrFiles(prFiles, args.minSeverity, args.maxComments, evalOptions, fpRates, fpThreshold, args.crossFile, args.minConfidence);
|
|
1034
|
+
// Deduplicate inline comments to avoid spam on reruns
|
|
1035
|
+
result.comments = dedupeComments(result.comments);
|
|
1036
|
+
// Optional Layer 2 (LLM) augmentation
|
|
1037
|
+
if (args.llmDeepReview) {
|
|
1038
|
+
const { summary, warnings } = await runLlmDeepReview(prFiles, args);
|
|
1039
|
+
if (summary)
|
|
1040
|
+
result.llmSummary = summary;
|
|
1041
|
+
if (warnings?.length)
|
|
1042
|
+
result.llmWarnings = warnings;
|
|
1043
|
+
}
|
|
858
1044
|
if (args.format === "json") {
|
|
859
1045
|
// Post review to GitHub before outputting JSON
|
|
860
1046
|
if (!args.dryRun && (result.comments.length > 0 || args.approve)) {
|
|
1047
|
+
const filteredComments = filterAlreadyPostedComments(repo, args.pr, args.token, result.comments);
|
|
861
1048
|
const reviewEvent = result.approved && args.approve ? "APPROVE" : result.approved ? "COMMENT" : "REQUEST_CHANGES";
|
|
862
1049
|
const reviewBody = {
|
|
863
1050
|
body: buildPRReviewNarrative(result),
|
|
864
1051
|
event: reviewEvent,
|
|
865
|
-
comments:
|
|
1052
|
+
comments: filteredComments,
|
|
866
1053
|
};
|
|
867
1054
|
const reviewResp = apiRequest("POST", `/repos/${repo}/pulls/${args.pr}/reviews`, args.token, reviewBody);
|
|
868
1055
|
if (reviewResp.status !== 200 && reviewResp.status !== 422) {
|
|
@@ -905,10 +1092,11 @@ AUTHENTICATION:
|
|
|
905
1092
|
// Post review to GitHub
|
|
906
1093
|
if (result.comments.length > 0 || args.approve) {
|
|
907
1094
|
const reviewEvent = result.approved && args.approve ? "APPROVE" : result.approved ? "COMMENT" : "REQUEST_CHANGES";
|
|
1095
|
+
const filteredComments = filterAlreadyPostedComments(repo, args.pr, args.token, result.comments);
|
|
908
1096
|
const reviewBody = {
|
|
909
1097
|
body: buildPRReviewNarrative(result),
|
|
910
1098
|
event: reviewEvent,
|
|
911
|
-
comments:
|
|
1099
|
+
comments: filteredComments,
|
|
912
1100
|
};
|
|
913
1101
|
const reviewResp = apiRequest("POST", `/repos/${repo}/pulls/${args.pr}/reviews`, args.token, reviewBody);
|
|
914
1102
|
if (reviewResp.status === 200 || reviewResp.status === 422) {
|
|
@@ -937,3 +1125,21 @@ AUTHENTICATION:
|
|
|
937
1125
|
console.log("");
|
|
938
1126
|
process.exit(result.approved ? 0 : 1);
|
|
939
1127
|
}
|
|
1128
|
+
/**
|
|
1129
|
+
* Programmatic autopilot entrypoint for GitHub App / automations.
|
|
1130
|
+
*/
|
|
1131
|
+
export function runReviewAutopilot(pr, repo) {
|
|
1132
|
+
const argv = ["node", "judges", "review", "--pr", String(pr), "--autopilot"];
|
|
1133
|
+
if (repo)
|
|
1134
|
+
argv.push("--repo", repo);
|
|
1135
|
+
return runReview(argv);
|
|
1136
|
+
}
|
|
1137
|
+
// Test exports (non-public API)
|
|
1138
|
+
export const __test = {
|
|
1139
|
+
__setCallOpenAiChatImplForTest,
|
|
1140
|
+
__setApiRequestImplForTest,
|
|
1141
|
+
__setEvaluateDiffImplForTest,
|
|
1142
|
+
runLlmDeepReview,
|
|
1143
|
+
// expose for patching in tests
|
|
1144
|
+
__evaluateDiffForTest: evaluateDiff,
|
|
1145
|
+
};
|
package/dist/evaluators/index.js
CHANGED
|
@@ -218,7 +218,6 @@ function parseInlineSuppressions(code) {
|
|
|
218
218
|
// Active block suppressions: ruleId → { commentLine, reason }
|
|
219
219
|
const activeBlocks = new Map();
|
|
220
220
|
// Pattern: // judges-ignore[-next-line|-block] RULE-ID [, RULE-ID ...] [-- reason]
|
|
221
|
-
const suppressPattern = /(?:\/\/|#|\/\*)\s*judges-ignore(?:-(next-line|block))?\s+(.+)$/gi;
|
|
222
221
|
const endBlockPattern = /(?:\/\/|#|\/\*)\s*judges-end-block/i;
|
|
223
222
|
for (let i = 0; i < lines.length; i++) {
|
|
224
223
|
const line = lines[i];
|
|
@@ -233,42 +232,67 @@ function parseInlineSuppressions(code) {
|
|
|
233
232
|
arr.push({ ruleId, kind: "block", commentLine: meta.commentLine, reason: meta.reason });
|
|
234
233
|
lineSuppressed.set(lineNum, arr);
|
|
235
234
|
}
|
|
236
|
-
// Parse suppression directives
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
const targetLine = kind === "next-line" ? lineNum + 1 : lineNum;
|
|
247
|
-
for (const rawId of ruleIds) {
|
|
248
|
-
const ruleId = rawId === "*" ? "*" : rawId.toUpperCase();
|
|
249
|
-
if (kind === "block") {
|
|
250
|
-
// Start block suppression — applies to all subsequent lines until end-block
|
|
251
|
-
activeBlocks.set(ruleId, { commentLine: lineNum, reason });
|
|
235
|
+
// Parse suppression directives (string-based to avoid regex redos)
|
|
236
|
+
const ignoreIdx = line.indexOf("judges-ignore");
|
|
237
|
+
if (ignoreIdx >= 0) {
|
|
238
|
+
const before = line.substring(0, ignoreIdx).trimEnd();
|
|
239
|
+
if (before.endsWith("//") || before.endsWith("#") || before.endsWith("/*")) {
|
|
240
|
+
let rest = line.substring(ignoreIdx + "judges-ignore".length);
|
|
241
|
+
let modifier;
|
|
242
|
+
if (rest.toLowerCase().startsWith("-next-line")) {
|
|
243
|
+
modifier = "next-line";
|
|
244
|
+
rest = rest.substring("-next-line".length);
|
|
252
245
|
}
|
|
253
|
-
else {
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
246
|
+
else if (rest.toLowerCase().startsWith("-block")) {
|
|
247
|
+
modifier = "block";
|
|
248
|
+
rest = rest.substring("-block".length);
|
|
249
|
+
}
|
|
250
|
+
const trimmedRest = rest.trimStart();
|
|
251
|
+
if (trimmedRest.length < rest.length && trimmedRest.length > 0) {
|
|
252
|
+
let rawContent = trimmedRest;
|
|
253
|
+
if (rawContent.trimEnd().endsWith("*/")) {
|
|
254
|
+
rawContent = rawContent.replace("*/", "").trimEnd();
|
|
255
|
+
}
|
|
256
|
+
const dashSplit = rawContent.split(" -- ");
|
|
257
|
+
const ruleIds = dashSplit[0].split(/[, \t]+/).filter(Boolean);
|
|
258
|
+
const reason = dashSplit[1]?.trim() || undefined;
|
|
259
|
+
const kind = modifier === "next-line" ? "next-line" : modifier === "block" ? "block" : "line";
|
|
260
|
+
const targetLine = kind === "next-line" ? lineNum + 1 : lineNum;
|
|
261
|
+
for (const rawId of ruleIds) {
|
|
262
|
+
const ruleId = rawId === "*" ? "*" : rawId.toUpperCase();
|
|
263
|
+
if (kind === "block") {
|
|
264
|
+
// Start block suppression — applies to all subsequent lines until end-block
|
|
265
|
+
activeBlocks.set(ruleId, { commentLine: lineNum, reason });
|
|
266
|
+
}
|
|
267
|
+
else {
|
|
268
|
+
const arr = lineSuppressed.get(targetLine) ?? [];
|
|
269
|
+
arr.push({ ruleId, kind, commentLine: lineNum, reason });
|
|
270
|
+
lineSuppressed.set(targetLine, arr);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
257
273
|
}
|
|
258
274
|
}
|
|
259
275
|
}
|
|
260
276
|
// File-level suppression: // judges-file-ignore RULE-ID [-- reason]
|
|
261
|
-
const
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
277
|
+
const fileIgnoreIdx = line.indexOf("judges-file-ignore");
|
|
278
|
+
if (fileIgnoreIdx >= 0) {
|
|
279
|
+
const beforeFile = line.substring(0, fileIgnoreIdx).trimEnd();
|
|
280
|
+
if (beforeFile.endsWith("//") || beforeFile.endsWith("#") || beforeFile.endsWith("/*")) {
|
|
281
|
+
const fileRest = line.substring(fileIgnoreIdx + "judges-file-ignore".length);
|
|
282
|
+
const fileTrimmedRest = fileRest.trimStart();
|
|
283
|
+
if (fileTrimmedRest.length < fileRest.length && fileTrimmedRest.length > 0) {
|
|
284
|
+
let rawFileContent = fileTrimmedRest;
|
|
285
|
+
if (rawFileContent.trimEnd().endsWith("*/")) {
|
|
286
|
+
rawFileContent = rawFileContent.replace("*/", "").trimEnd();
|
|
287
|
+
}
|
|
288
|
+
const fileDashSplit = rawFileContent.split(" -- ");
|
|
289
|
+
const ruleIds = fileDashSplit[0].split(/[, \t]+/).filter(Boolean);
|
|
290
|
+
const reason = fileDashSplit[1]?.trim() || undefined;
|
|
291
|
+
for (const rawId of ruleIds) {
|
|
292
|
+
const ruleId = rawId === "*" ? "*" : rawId.toUpperCase();
|
|
293
|
+
globalSuppressed.push({ ruleId, kind: "file", commentLine: lineNum, reason });
|
|
294
|
+
}
|
|
295
|
+
}
|
|
272
296
|
}
|
|
273
297
|
}
|
|
274
298
|
}
|
|
@@ -722,9 +746,11 @@ export function evaluateWithTribunal(code, language, context, options) {
|
|
|
722
746
|
const modelFindings = calibrated.filter((f) => f.ruleId.startsWith("MFPR-"));
|
|
723
747
|
if (modelFindings.length > 0) {
|
|
724
748
|
// Extract detected model name from the finding title
|
|
725
|
-
const
|
|
726
|
-
|
|
727
|
-
|
|
749
|
+
const title = modelFindings[0].title;
|
|
750
|
+
const mIdx = title.indexOf("matches ");
|
|
751
|
+
const gIdx = mIdx >= 0 ? title.indexOf(" generation", mIdx + 8) : -1;
|
|
752
|
+
if (mIdx >= 0 && gIdx > mIdx) {
|
|
753
|
+
const detectedModel = title.substring(mIdx + 8, gIdx).trim();
|
|
728
754
|
const feedbackStore = loadFeedbackStore();
|
|
729
755
|
if (feedbackStore.entries.length > 0) {
|
|
730
756
|
const modelProfile = buildModelCalibrationProfile(feedbackStore, detectedModel);
|
package/dist/github-app.d.ts
CHANGED
|
@@ -19,7 +19,11 @@
|
|
|
19
19
|
* - JUDGES_PRIVATE_KEY — PEM private key (or path via JUDGES_PRIVATE_KEY_PATH)
|
|
20
20
|
* - JUDGES_WEBHOOK_SECRET — Webhook secret for signature verification
|
|
21
21
|
*/
|
|
22
|
+
import { evaluateWithTribunal } from "./evaluators/index.js";
|
|
23
|
+
import { evaluateProject } from "./evaluators/project.js";
|
|
22
24
|
import type { Severity } from "./types.js";
|
|
25
|
+
export declare let evaluateWithTribunalImpl: typeof evaluateWithTribunal;
|
|
26
|
+
export declare let evaluateProjectImpl: typeof evaluateProject;
|
|
23
27
|
export interface GitHubAppConfig {
|
|
24
28
|
/** GitHub App ID */
|
|
25
29
|
appId: string;
|
|
@@ -39,6 +43,8 @@ export interface GitHubAppConfig {
|
|
|
39
43
|
diffOnly?: boolean;
|
|
40
44
|
/** Path to .judgesrc.json config (optional) */
|
|
41
45
|
configPath?: string;
|
|
46
|
+
/** Enable Layer 2 (LLM) deep review augmentation */
|
|
47
|
+
llmDeepReview?: boolean;
|
|
42
48
|
}
|
|
43
49
|
interface WebhookPayload {
|
|
44
50
|
action: string;
|
|
@@ -98,8 +104,23 @@ interface WebhookResult {
|
|
|
98
104
|
export declare const EXT_TO_LANG: Record<string, string>;
|
|
99
105
|
export declare function detectLanguage(filePath: string): string | undefined;
|
|
100
106
|
export declare function generateJwt(appId: string, privateKey: string): string;
|
|
107
|
+
declare function ghApi(method: string, path: string, token: string, body?: unknown): Promise<{
|
|
108
|
+
status: number;
|
|
109
|
+
data: unknown;
|
|
110
|
+
}>;
|
|
111
|
+
export declare function __setGhApiImplForTest(fn: typeof ghApi | undefined): void;
|
|
112
|
+
interface LlmOptions {
|
|
113
|
+
apiKey: string;
|
|
114
|
+
model: string;
|
|
115
|
+
baseUrl?: string;
|
|
116
|
+
maxTokens?: number;
|
|
117
|
+
}
|
|
118
|
+
declare function callOpenAiChat(prompt: string, opts: LlmOptions): Promise<string>;
|
|
119
|
+
export declare function __setCallOpenAiChatImplForTest(fn: typeof callOpenAiChat): void;
|
|
120
|
+
declare function getInstallationToken(appId: string, privateKey: string, installationId: number): Promise<string>;
|
|
101
121
|
export declare function verifyWebhookSignature(payload: string, signature: string | undefined, secret: string): boolean;
|
|
102
122
|
export declare function parsePatchToHunk(filePath: string, patch: string): DiffHunk;
|
|
123
|
+
declare function reviewPullRequest(payload: WebhookPayload, token: string, config: GitHubAppConfig): Promise<WebhookResult>;
|
|
103
124
|
/**
|
|
104
125
|
* Handle an incoming GitHub webhook event.
|
|
105
126
|
* This is the primary entry point — can be used in serverless functions,
|
|
@@ -107,6 +128,20 @@ export declare function parsePatchToHunk(filePath: string, patch: string): DiffH
|
|
|
107
128
|
*/
|
|
108
129
|
export declare function handleWebhook(event: string, payload: string | WebhookPayload, signature: string | undefined, config: GitHubAppConfig): Promise<WebhookResult>;
|
|
109
130
|
export declare function loadAppConfig(): GitHubAppConfig;
|
|
131
|
+
export declare function __setEvaluateWithTribunalForTest(fn: typeof evaluateWithTribunal | undefined): void;
|
|
132
|
+
export declare function __setEvaluateProjectForTest(fn: typeof evaluateProject | undefined): void;
|
|
133
|
+
export declare function getEvaluateWithTribunalImpl(): typeof evaluateWithTribunal;
|
|
134
|
+
export declare function __getEvaluateWithTribunalImplForTest(): typeof evaluateWithTribunal;
|
|
135
|
+
export declare const __test: {
|
|
136
|
+
__setCallOpenAiChatImplForTest: typeof __setCallOpenAiChatImplForTest;
|
|
137
|
+
__getInstallationTokenForTest: (fn: typeof getInstallationToken) => void;
|
|
138
|
+
__setGhApiImplForTest: typeof __setGhApiImplForTest;
|
|
139
|
+
__setEvaluateWithTribunalForTest: typeof __setEvaluateWithTribunalForTest;
|
|
140
|
+
__setEvaluateProjectForTest: typeof __setEvaluateProjectForTest;
|
|
141
|
+
__getEvaluateWithTribunalImplForTest: typeof __getEvaluateWithTribunalImplForTest;
|
|
142
|
+
parsePatchToHunk: typeof parsePatchToHunk;
|
|
143
|
+
reviewPullRequest: typeof reviewPullRequest;
|
|
144
|
+
};
|
|
110
145
|
/**
|
|
111
146
|
* Start a standalone HTTP server that listens for GitHub webhooks.
|
|
112
147
|
* Usage: `judges app serve --port 3000`
|