@kevinrabun/judges 3.113.0 → 3.115.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +9 -0
  2. package/agents/accessibility.judge.md +37 -0
  3. package/agents/agent-instructions.judge.md +37 -0
  4. package/agents/ai-code-safety.judge.md +48 -0
  5. package/agents/api-contract.judge.md +30 -0
  6. package/agents/api-design.judge.md +39 -0
  7. package/agents/authentication.judge.md +37 -0
  8. package/agents/backwards-compatibility.judge.md +37 -0
  9. package/agents/caching.judge.md +37 -0
  10. package/agents/ci-cd.judge.md +37 -0
  11. package/agents/cloud-readiness.judge.md +37 -0
  12. package/agents/code-structure.judge.md +48 -0
  13. package/agents/compliance.judge.md +40 -0
  14. package/agents/concurrency.judge.md +39 -0
  15. package/agents/configuration-management.judge.md +37 -0
  16. package/agents/cost-effectiveness.judge.md +40 -0
  17. package/agents/cybersecurity.judge.md +36 -0
  18. package/agents/data-security.judge.md +34 -0
  19. package/agents/data-sovereignty.judge.md +58 -0
  20. package/agents/database.judge.md +41 -0
  21. package/agents/dependency-health.judge.md +39 -0
  22. package/agents/documentation.judge.md +39 -0
  23. package/agents/error-handling.judge.md +37 -0
  24. package/agents/ethics-bias.judge.md +39 -0
  25. package/agents/false-positive-review.judge.md +73 -0
  26. package/agents/framework-safety.judge.md +40 -0
  27. package/agents/hallucination-detection.judge.md +33 -0
  28. package/agents/iac-security.judge.md +38 -0
  29. package/agents/intent-alignment.judge.md +31 -0
  30. package/agents/internationalization.judge.md +42 -0
  31. package/agents/logging-privacy.judge.md +37 -0
  32. package/agents/logic-review.judge.md +34 -0
  33. package/agents/maintainability.judge.md +37 -0
  34. package/agents/model-fingerprint.judge.md +31 -0
  35. package/agents/multi-turn-coherence.judge.md +29 -0
  36. package/agents/observability.judge.md +37 -0
  37. package/agents/over-engineering.judge.md +48 -0
  38. package/agents/performance.judge.md +44 -0
  39. package/agents/portability.judge.md +37 -0
  40. package/agents/rate-limiting.judge.md +37 -0
  41. package/agents/reliability.judge.md +39 -0
  42. package/agents/scalability.judge.md +41 -0
  43. package/agents/security.judge.md +31 -0
  44. package/agents/software-practices.judge.md +44 -0
  45. package/agents/testing.judge.md +39 -0
  46. package/agents/ux.judge.md +37 -0
  47. package/dist/api.d.ts +9 -1
  48. package/dist/api.js +9 -1
  49. package/dist/commands/fix.d.ts +10 -0
  50. package/dist/commands/fix.js +52 -0
  51. package/dist/commands/llm-benchmark.d.ts +13 -4
  52. package/dist/commands/llm-benchmark.js +39 -8
  53. package/dist/commands/review.d.ts +51 -1
  54. package/dist/commands/review.js +213 -7
  55. package/dist/evaluators/index.js +61 -35
  56. package/dist/github-app.d.ts +35 -0
  57. package/dist/github-app.js +125 -4
  58. package/dist/judges/index.d.ts +23 -61
  59. package/dist/judges/index.js +49 -63
  60. package/dist/patches/apply.d.ts +15 -0
  61. package/dist/patches/apply.js +37 -0
  62. package/dist/tools/prompts.d.ts +2 -2
  63. package/dist/tools/prompts.js +21 -10
  64. package/docs/skills.md +7 -0
  65. package/package.json +18 -3
  66. package/packages/judges-cli/README.md +24 -0
  67. package/packages/judges-cli/bin/judges.js +8 -0
  68. package/scripts/generate-agents-from-judges.ts +111 -0
  69. package/scripts/generate-skills-docs.ts +26 -0
  70. package/scripts/validate-agents.ts +104 -0
  71. package/server.json +2 -2
  72. package/skills/ai-code-review.skill.md +57 -0
  73. package/skills/release-gate.skill.md +27 -0
  74. package/skills/security-review.skill.md +32 -0
  75. package/src/agent-loader.ts +324 -0
  76. package/src/skill-loader.ts +199 -0
@@ -17,12 +17,51 @@ import { execFileSync } from "child_process";
17
17
  import { readFileSync, writeFileSync, unlinkSync } from "fs";
18
18
  import { tmpdir } from "os";
19
19
  import { resolve, join, extname } from "path";
20
+ import { createHash } from "node:crypto";
20
21
  import { evaluateDiff, evaluateWithTribunal } from "../evaluators/index.js";
21
22
  import { evaluateProject } from "../evaluators/project.js";
23
+ // Test hook to override evaluateDiff in unit tests
24
+ let evaluateDiffImpl = evaluateDiff;
25
+ export function __setEvaluateDiffImplForTest(fn) {
26
+ evaluateDiffImpl = fn ?? evaluateDiff;
27
+ }
22
28
  import { parseConfig, loadCascadingConfig } from "../config.js";
23
29
  import { loadFeedbackStore, getFpRateByRule } from "./feedback.js";
24
30
  import { JUDGES } from "../judges/index.js";
25
31
  import { parseGitHubRepo, tryRunGit } from "../tools/command-safety.js";
32
+ import { extractValidatedLlmFindings } from "./llm-benchmark.js";
33
+ export function dedupeComments(comments) {
34
+ const seen = new Set();
35
+ const out = [];
36
+ for (const c of comments) {
37
+ const key = `${c.path}:${c.line}:${hashBody(c.body)}`;
38
+ if (seen.has(key))
39
+ continue;
40
+ seen.add(key);
41
+ out.push(c);
42
+ }
43
+ return out;
44
+ }
45
+ export function filterAlreadyPostedComments(repo, pr, token, comments) {
46
+ try {
47
+ const resp = apiRequest("GET", `/repos/${repo}/pulls/${pr}/comments`, token);
48
+ const existing = resp.data ?? [];
49
+ const existingKeys = new Set(existing.map((c) => {
50
+ const path = c.path;
51
+ const line = c.line;
52
+ const body = c.body ?? "";
53
+ return `${path}:${line}:${hashBody(body)}`;
54
+ }));
55
+ return comments.filter((c) => !existingKeys.has(`${c.path}:${c.line}:${hashBody(c.body)}`));
56
+ }
57
+ catch (err) {
58
+ console.error("Failed to fetch existing comments, proceeding without dedupe", err);
59
+ return comments;
60
+ }
61
+ }
62
+ function hashBody(body) {
63
+ return createHash("sha1").update(body).digest("hex").slice(0, 8);
64
+ }
26
65
  // ─── Language Detection ─────────────────────────────────────────────────────
27
66
  const EXT_TO_LANG = {
28
67
  ".ts": "typescript",
@@ -71,8 +110,8 @@ export function parsePatchToHunk(filePath, patch) {
71
110
  const changedLineNumbers = [];
72
111
  let newLineNum = 0;
73
112
  for (const line of lines) {
74
- // Hunk header: @@ -10,5 +20,8 @@
75
- const hunkMatch = line.match(/^@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/);
113
+ // Hunk header: @@ -10,5 +20,8 @@ (some tools omit trailing space/@@)
114
+ const hunkMatch = line.match(/^@@\s*-\d+(?:,\d+)?\s+\+(\d+)(?:,\d+)?\s*@@?/);
76
115
  if (hunkMatch) {
77
116
  newLineNum = parseInt(hunkMatch[1], 10) - 1;
78
117
  continue;
@@ -209,7 +248,14 @@ function ghCliRequest(method, endpoint, body) {
209
248
  return { status: 0, data: null };
210
249
  }
211
250
  }
251
+ // Allow test injection of the GitHub API layer
252
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
253
+ let apiRequestImpl;
212
254
  function apiRequest(method, endpoint, token, body) {
255
+ const impl = apiRequestImpl;
256
+ if (impl) {
257
+ return impl(method, endpoint, token, body);
258
+ }
213
259
  if (ghCliAvailable()) {
214
260
  return ghCliRequest(method, endpoint, body);
215
261
  }
@@ -220,6 +266,102 @@ function apiRequest(method, endpoint, token, body) {
220
266
  console.error("Either install the `gh` CLI and run `gh auth login`, or set GITHUB_TOKEN env var.");
221
267
  process.exit(1);
222
268
  }
269
+ export function __setApiRequestImplForTest(fn) {
270
+ apiRequestImpl = fn;
271
+ }
272
+ async function callOpenAiChat(prompt, opts) {
273
+ const baseUrl = opts.baseUrl || "https://api.openai.com/v1/chat/completions";
274
+ // Node 18+ has global fetch; avoid dynamic imports to keep tsc happy without node-fetch types
275
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
276
+ const fetchImpl = globalThis.fetch;
277
+ if (!fetchImpl)
278
+ throw new Error("fetch() not available. Run on Node 18+ or polyfill fetch.");
279
+ const res = await fetchImpl(baseUrl, {
280
+ method: "POST",
281
+ headers: {
282
+ Authorization: `Bearer ${opts.apiKey}`,
283
+ "Content-Type": "application/json",
284
+ },
285
+ body: JSON.stringify({
286
+ model: opts.model,
287
+ messages: [{ role: "user", content: prompt }],
288
+ max_tokens: opts.maxTokens ?? 800,
289
+ temperature: 0.2,
290
+ }),
291
+ });
292
+ if (!res.ok) {
293
+ const text = await res.text().catch(() => "");
294
+ throw new Error(`LLM request failed: ${res.status} ${res.statusText} ${text}`);
295
+ }
296
+ const json = (await res.json());
297
+ const content = json.choices?.[0]?.message?.content;
298
+ if (!content)
299
+ throw new Error("LLM response missing content");
300
+ return content;
301
+ }
302
+ // test hooks for dependency injection
303
+ let callOpenAiChatImpl = callOpenAiChat;
304
+ export function __setCallOpenAiChatImplForTest(fn) {
305
+ callOpenAiChatImpl = fn;
306
+ }
307
+ /** Build a single prompt for the entire PR (tribunal mode). */
308
+ function buildLlmPromptForPr(prFiles, maxBytes = 40000) {
309
+ const snippets = [];
310
+ for (const f of prFiles) {
311
+ if (!f.patch)
312
+ continue;
313
+ if (Buffer.byteLength(f.patch, "utf-8") > maxBytes)
314
+ continue; // drop huge patches
315
+ snippets.push(`--- FILE: ${f.filename} ---\n${f.patch}`);
316
+ }
317
+ const combined = snippets.join("\n\n");
318
+ const prompt = `Review the following PR diff. Return issues with rule IDs, severity, and recommendations.\n\n${combined}`;
319
+ return { prompt, contextSnippets: snippets.slice(0, 5) };
320
+ }
321
+ export async function runLlmDeepReview(prFiles, args) {
322
+ const apiKey = process.env.OPENAI_API_KEY;
323
+ if (!apiKey) {
324
+ return { warnings: ["OPENAI_API_KEY not set; skipping LLM deep review"] };
325
+ }
326
+ const model = args.llmModel || process.env.OPENAI_MODEL || "gpt-4o";
327
+ const baseUrl = args.llmBaseUrl || process.env.OPENAI_BASE_URL;
328
+ const { constructTribunalPrompt } = await import("./llm-benchmark.js");
329
+ const { buildContextSnippets } = await import("../context/context-snippets.js");
330
+ // Build code blob for tribunal prompt; collapse patches to new content
331
+ const codeBlobs = [];
332
+ const snippetsForRag = [];
333
+ for (const pf of prFiles) {
334
+ if (!pf.patch)
335
+ continue;
336
+ const hunk = parsePatchToHunk(pf.filename, pf.patch);
337
+ codeBlobs.push(`// FILE: ${pf.filename}\n${hunk.newContent}`);
338
+ snippetsForRag.push(hunk.newContent);
339
+ }
340
+ const codeJoined = codeBlobs.join("\n\n");
341
+ // Build context snippets (RAG-lite) for prompt grounding
342
+ const ragSnippets = await buildContextSnippets(snippetsForRag.join("\n\n"), {
343
+ maxSnippets: 4,
344
+ chunkSize: 1500,
345
+ });
346
+ const contextText = ragSnippets.map((s) => s.snippet);
347
+ const tribunalPrompt = constructTribunalPrompt(codeJoined, "mixed", contextText);
348
+ const { prompt: diffPrompt } = buildLlmPromptForPr(prFiles);
349
+ const combinedPrompt = `${tribunalPrompt}\n\n---\n\nDiff summary for additional context:\n${diffPrompt}`;
350
+ const content = await callOpenAiChatImpl(combinedPrompt, { apiKey, model, baseUrl, maxTokens: args.llmMaxTokens });
351
+ // Validate structured findings in LLM output
352
+ // Use global registry prefixes to validate LLM output
353
+ const { getValidRulePrefixes } = await import("./llm-benchmark.js");
354
+ const validation = extractValidatedLlmFindings(content, getValidRulePrefixes());
355
+ const warnings = validation.errors?.length ? validation.errors : undefined;
356
+ const summaryLines = [
357
+ `### 🤖 LLM Deep Review Summary (model: ${model})`,
358
+ "",
359
+ validation.ruleIds.length ? `Detected rule IDs: ${validation.ruleIds.join(", ")}` : "No rule IDs detected.",
360
+ "",
361
+ content,
362
+ ];
363
+ return { summary: summaryLines.join("\n"), warnings };
364
+ }
223
365
  // ─── Finding → Review Comment ───────────────────────────────────────────────
224
366
  const SEVERITY_EMOJI = {
225
367
  critical: "🔴",
@@ -291,7 +433,7 @@ function reviewPrFiles(files, minSeverity, maxComments, options, fpRates, fpThre
291
433
  ...options,
292
434
  filePath: file.filename,
293
435
  };
294
- const verdict = evaluateDiff(hunk.newContent, lang, hunk.changedLines, undefined, fileOpts);
436
+ const verdict = evaluateDiffImpl(hunk.newContent, lang, hunk.changedLines, undefined, fileOpts);
295
437
  for (const finding of verdict.findings) {
296
438
  // Suppress findings from rules with high FP rates
297
439
  if (fpRates && fpThreshold !== undefined) {
@@ -464,7 +606,7 @@ function _buildReviewSummary(result) {
464
606
  }
465
607
  function parseCommentMeta(comment) {
466
608
  // Body format: `🔴 **CRITICAL** — Title here (\`RULE-001\`)`
467
- const match = comment.body.match(/\*\*(\w+)\*\*\s*\u2014([^(`]+)\(`([^`]+)`\)/);
609
+ const match = comment.body.match(/\*\*(\w+)\*\*\s{0,5}\u2014([^(`]{1,500})\(`([^`]+)`\)/);
468
610
  if (!match)
469
611
  return undefined;
470
612
  return {
@@ -554,6 +696,17 @@ export function buildPRReviewNarrative(result) {
554
696
  }
555
697
  lines.push("");
556
698
  }
699
+ // ── Layer 2 (optional) ───────────────────────────────────────────
700
+ if (result.llmSummary) {
701
+ lines.push("### 🤖 Layer 2 — AI Deep Review (LLM)");
702
+ lines.push("");
703
+ lines.push(result.llmSummary);
704
+ lines.push("");
705
+ }
706
+ if (result.llmWarnings?.length) {
707
+ lines.push("> ⚠️ LLM warnings: " + result.llmWarnings.join("; "));
708
+ lines.push("");
709
+ }
557
710
  // ── Cross-cutting themes ──────────────────────────────────────────
558
711
  const byDomain = new Map();
559
712
  for (const m of metas) {
@@ -664,6 +817,8 @@ export function parseReviewArgs(argv) {
664
817
  minConfidence: 0.6,
665
818
  calibrate: true,
666
819
  crossFile: false,
820
+ llmDeepReview: false,
821
+ autopilot: false,
667
822
  };
668
823
  for (let i = 3; i < argv.length; i++) {
669
824
  const arg = argv[i];
@@ -717,6 +872,22 @@ export function parseReviewArgs(argv) {
717
872
  .map((s) => s.trim())
718
873
  .filter(Boolean);
719
874
  break;
875
+ case "--llm-deep-review":
876
+ args.llmDeepReview = true;
877
+ break;
878
+ case "--llm-model":
879
+ args.llmModel = argv[++i];
880
+ break;
881
+ case "--llm-base-url":
882
+ args.llmBaseUrl = argv[++i];
883
+ break;
884
+ case "--llm-max-tokens":
885
+ args.llmMaxTokens = parseInt(argv[++i], 10);
886
+ break;
887
+ case "--autopilot":
888
+ case "--gh-autopilot":
889
+ args.autopilot = true;
890
+ break;
720
891
  default:
721
892
  // Positional: treat as PR number if numeric
722
893
  if (!arg.startsWith("-") && /^\d+$/.test(arg) && args.pr === 0) {
@@ -727,13 +898,17 @@ export function parseReviewArgs(argv) {
727
898
  }
728
899
  return args;
729
900
  }
730
- export function runReview(argv) {
901
+ export async function runReview(argv) {
731
902
  const args = parseReviewArgs(argv);
732
903
  // In JSON mode, redirect informational output to stderr so stdout is pure JSON
733
904
  const _stdoutLog = console.log.bind(console);
734
905
  if (args.format === "json") {
735
906
  console.log = (...a) => console.error(...a);
736
907
  }
908
+ if (args.autopilot) {
909
+ // Autopilot implies live mode
910
+ args.dryRun = false;
911
+ }
737
912
  if (args.pr === 0) {
738
913
  console.log(`
739
914
  Judges Panel — Pull Request Review
@@ -760,6 +935,7 @@ OPTIONS:
760
935
  --no-calibrate Disable feedback-driven confidence calibration (enabled by default)
761
936
  --cross-file Enable cross-file architectural analysis (detects duplication, taint flows)
762
937
  --judges <id,id,...> Only run these judges (comma-separated IDs, e.g. cybersecurity,authentication)
938
+ --autopilot Enable PR autopilot (fetch diff, post inline + summary). Implies live mode.
763
939
 
764
940
  AUTHENTICATION:
765
941
  Set GITHUB_TOKEN env var, or install the \`gh\` CLI and run \`gh auth login\`.
@@ -855,14 +1031,25 @@ AUTHENTICATION:
855
1031
  console.log("");
856
1032
  // Run analysis
857
1033
  const result = reviewPrFiles(prFiles, args.minSeverity, args.maxComments, evalOptions, fpRates, fpThreshold, args.crossFile, args.minConfidence);
1034
+ // Deduplicate inline comments to avoid spam on reruns
1035
+ result.comments = dedupeComments(result.comments);
1036
+ // Optional Layer 2 (LLM) augmentation
1037
+ if (args.llmDeepReview) {
1038
+ const { summary, warnings } = await runLlmDeepReview(prFiles, args);
1039
+ if (summary)
1040
+ result.llmSummary = summary;
1041
+ if (warnings?.length)
1042
+ result.llmWarnings = warnings;
1043
+ }
858
1044
  if (args.format === "json") {
859
1045
  // Post review to GitHub before outputting JSON
860
1046
  if (!args.dryRun && (result.comments.length > 0 || args.approve)) {
1047
+ const filteredComments = filterAlreadyPostedComments(repo, args.pr, args.token, result.comments);
861
1048
  const reviewEvent = result.approved && args.approve ? "APPROVE" : result.approved ? "COMMENT" : "REQUEST_CHANGES";
862
1049
  const reviewBody = {
863
1050
  body: buildPRReviewNarrative(result),
864
1051
  event: reviewEvent,
865
- comments: result.comments,
1052
+ comments: filteredComments,
866
1053
  };
867
1054
  const reviewResp = apiRequest("POST", `/repos/${repo}/pulls/${args.pr}/reviews`, args.token, reviewBody);
868
1055
  if (reviewResp.status !== 200 && reviewResp.status !== 422) {
@@ -905,10 +1092,11 @@ AUTHENTICATION:
905
1092
  // Post review to GitHub
906
1093
  if (result.comments.length > 0 || args.approve) {
907
1094
  const reviewEvent = result.approved && args.approve ? "APPROVE" : result.approved ? "COMMENT" : "REQUEST_CHANGES";
1095
+ const filteredComments = filterAlreadyPostedComments(repo, args.pr, args.token, result.comments);
908
1096
  const reviewBody = {
909
1097
  body: buildPRReviewNarrative(result),
910
1098
  event: reviewEvent,
911
- comments: result.comments,
1099
+ comments: filteredComments,
912
1100
  };
913
1101
  const reviewResp = apiRequest("POST", `/repos/${repo}/pulls/${args.pr}/reviews`, args.token, reviewBody);
914
1102
  if (reviewResp.status === 200 || reviewResp.status === 422) {
@@ -937,3 +1125,21 @@ AUTHENTICATION:
937
1125
  console.log("");
938
1126
  process.exit(result.approved ? 0 : 1);
939
1127
  }
1128
+ /**
1129
+ * Programmatic autopilot entrypoint for GitHub App / automations.
1130
+ */
1131
+ export function runReviewAutopilot(pr, repo) {
1132
+ const argv = ["node", "judges", "review", "--pr", String(pr), "--autopilot"];
1133
+ if (repo)
1134
+ argv.push("--repo", repo);
1135
+ return runReview(argv);
1136
+ }
1137
+ // Test exports (non-public API)
1138
+ export const __test = {
1139
+ __setCallOpenAiChatImplForTest,
1140
+ __setApiRequestImplForTest,
1141
+ __setEvaluateDiffImplForTest,
1142
+ runLlmDeepReview,
1143
+ // expose for patching in tests
1144
+ __evaluateDiffForTest: evaluateDiff,
1145
+ };
@@ -218,7 +218,6 @@ function parseInlineSuppressions(code) {
218
218
  // Active block suppressions: ruleId → { commentLine, reason }
219
219
  const activeBlocks = new Map();
220
220
  // Pattern: // judges-ignore[-next-line|-block] RULE-ID [, RULE-ID ...] [-- reason]
221
- const suppressPattern = /(?:\/\/|#|\/\*)\s*judges-ignore(?:-(next-line|block))?\s+(.+)$/gi;
222
221
  const endBlockPattern = /(?:\/\/|#|\/\*)\s*judges-end-block/i;
223
222
  for (let i = 0; i < lines.length; i++) {
224
223
  const line = lines[i];
@@ -233,42 +232,67 @@ function parseInlineSuppressions(code) {
233
232
  arr.push({ ruleId, kind: "block", commentLine: meta.commentLine, reason: meta.reason });
234
233
  lineSuppressed.set(lineNum, arr);
235
234
  }
236
- // Parse suppression directives
237
- let match;
238
- suppressPattern.lastIndex = 0;
239
- while ((match = suppressPattern.exec(line)) !== null) {
240
- const modifier = match[1]?.toLowerCase(); // "next-line", "block", or undefined
241
- const rawContent = match[2].replace(/\s*\*\/\s*$/, "");
242
- const dashSplit = rawContent.split(/\s+--\s+/);
243
- const ruleIds = dashSplit[0].split(/[,\s]+/).filter(Boolean);
244
- const reason = dashSplit[1]?.trim() || undefined;
245
- const kind = modifier === "next-line" ? "next-line" : modifier === "block" ? "block" : "line";
246
- const targetLine = kind === "next-line" ? lineNum + 1 : lineNum;
247
- for (const rawId of ruleIds) {
248
- const ruleId = rawId === "*" ? "*" : rawId.toUpperCase();
249
- if (kind === "block") {
250
- // Start block suppression — applies to all subsequent lines until end-block
251
- activeBlocks.set(ruleId, { commentLine: lineNum, reason });
235
+ // Parse suppression directives (string-based to avoid regex redos)
236
+ const ignoreIdx = line.indexOf("judges-ignore");
237
+ if (ignoreIdx >= 0) {
238
+ const before = line.substring(0, ignoreIdx).trimEnd();
239
+ if (before.endsWith("//") || before.endsWith("#") || before.endsWith("/*")) {
240
+ let rest = line.substring(ignoreIdx + "judges-ignore".length);
241
+ let modifier;
242
+ if (rest.toLowerCase().startsWith("-next-line")) {
243
+ modifier = "next-line";
244
+ rest = rest.substring("-next-line".length);
252
245
  }
253
- else {
254
- const arr = lineSuppressed.get(targetLine) ?? [];
255
- arr.push({ ruleId, kind, commentLine: lineNum, reason });
256
- lineSuppressed.set(targetLine, arr);
246
+ else if (rest.toLowerCase().startsWith("-block")) {
247
+ modifier = "block";
248
+ rest = rest.substring("-block".length);
249
+ }
250
+ const trimmedRest = rest.trimStart();
251
+ if (trimmedRest.length < rest.length && trimmedRest.length > 0) {
252
+ let rawContent = trimmedRest;
253
+ if (rawContent.trimEnd().endsWith("*/")) {
254
+ rawContent = rawContent.replace("*/", "").trimEnd();
255
+ }
256
+ const dashSplit = rawContent.split(" -- ");
257
+ const ruleIds = dashSplit[0].split(/[, \t]+/).filter(Boolean);
258
+ const reason = dashSplit[1]?.trim() || undefined;
259
+ const kind = modifier === "next-line" ? "next-line" : modifier === "block" ? "block" : "line";
260
+ const targetLine = kind === "next-line" ? lineNum + 1 : lineNum;
261
+ for (const rawId of ruleIds) {
262
+ const ruleId = rawId === "*" ? "*" : rawId.toUpperCase();
263
+ if (kind === "block") {
264
+ // Start block suppression — applies to all subsequent lines until end-block
265
+ activeBlocks.set(ruleId, { commentLine: lineNum, reason });
266
+ }
267
+ else {
268
+ const arr = lineSuppressed.get(targetLine) ?? [];
269
+ arr.push({ ruleId, kind, commentLine: lineNum, reason });
270
+ lineSuppressed.set(targetLine, arr);
271
+ }
272
+ }
257
273
  }
258
274
  }
259
275
  }
260
276
  // File-level suppression: // judges-file-ignore RULE-ID [-- reason]
261
- const filePattern = /(?:\/\/|#|\/\*)\s*judges-file-ignore\s+(.+)$/gi;
262
- let fileMatch;
263
- filePattern.lastIndex = 0;
264
- while ((fileMatch = filePattern.exec(line)) !== null) {
265
- const rawFileContent = fileMatch[1].replace(/\s*\*\/\s*$/, "");
266
- const fileDashSplit = rawFileContent.split(/\s+--\s+/);
267
- const ruleIds = fileDashSplit[0].split(/[,\s]+/).filter(Boolean);
268
- const reason = fileDashSplit[1]?.trim() || undefined;
269
- for (const rawId of ruleIds) {
270
- const ruleId = rawId === "*" ? "*" : rawId.toUpperCase();
271
- globalSuppressed.push({ ruleId, kind: "file", commentLine: lineNum, reason });
277
+ const fileIgnoreIdx = line.indexOf("judges-file-ignore");
278
+ if (fileIgnoreIdx >= 0) {
279
+ const beforeFile = line.substring(0, fileIgnoreIdx).trimEnd();
280
+ if (beforeFile.endsWith("//") || beforeFile.endsWith("#") || beforeFile.endsWith("/*")) {
281
+ const fileRest = line.substring(fileIgnoreIdx + "judges-file-ignore".length);
282
+ const fileTrimmedRest = fileRest.trimStart();
283
+ if (fileTrimmedRest.length < fileRest.length && fileTrimmedRest.length > 0) {
284
+ let rawFileContent = fileTrimmedRest;
285
+ if (rawFileContent.trimEnd().endsWith("*/")) {
286
+ rawFileContent = rawFileContent.replace("*/", "").trimEnd();
287
+ }
288
+ const fileDashSplit = rawFileContent.split(" -- ");
289
+ const ruleIds = fileDashSplit[0].split(/[, \t]+/).filter(Boolean);
290
+ const reason = fileDashSplit[1]?.trim() || undefined;
291
+ for (const rawId of ruleIds) {
292
+ const ruleId = rawId === "*" ? "*" : rawId.toUpperCase();
293
+ globalSuppressed.push({ ruleId, kind: "file", commentLine: lineNum, reason });
294
+ }
295
+ }
272
296
  }
273
297
  }
274
298
  }
@@ -722,9 +746,11 @@ export function evaluateWithTribunal(code, language, context, options) {
722
746
  const modelFindings = calibrated.filter((f) => f.ruleId.startsWith("MFPR-"));
723
747
  if (modelFindings.length > 0) {
724
748
  // Extract detected model name from the finding title
725
- const modelMatch = modelFindings[0].title.match(/matches\s+(.+?)\s+generation/);
726
- if (modelMatch) {
727
- const detectedModel = modelMatch[1];
749
+ const title = modelFindings[0].title;
750
+ const mIdx = title.indexOf("matches ");
751
+ const gIdx = mIdx >= 0 ? title.indexOf(" generation", mIdx + 8) : -1;
752
+ if (mIdx >= 0 && gIdx > mIdx) {
753
+ const detectedModel = title.substring(mIdx + 8, gIdx).trim();
728
754
  const feedbackStore = loadFeedbackStore();
729
755
  if (feedbackStore.entries.length > 0) {
730
756
  const modelProfile = buildModelCalibrationProfile(feedbackStore, detectedModel);
@@ -19,7 +19,11 @@
19
19
  * - JUDGES_PRIVATE_KEY — PEM private key (or path via JUDGES_PRIVATE_KEY_PATH)
20
20
  * - JUDGES_WEBHOOK_SECRET — Webhook secret for signature verification
21
21
  */
22
+ import { evaluateWithTribunal } from "./evaluators/index.js";
23
+ import { evaluateProject } from "./evaluators/project.js";
22
24
  import type { Severity } from "./types.js";
25
+ export declare let evaluateWithTribunalImpl: typeof evaluateWithTribunal;
26
+ export declare let evaluateProjectImpl: typeof evaluateProject;
23
27
  export interface GitHubAppConfig {
24
28
  /** GitHub App ID */
25
29
  appId: string;
@@ -39,6 +43,8 @@ export interface GitHubAppConfig {
39
43
  diffOnly?: boolean;
40
44
  /** Path to .judgesrc.json config (optional) */
41
45
  configPath?: string;
46
+ /** Enable Layer 2 (LLM) deep review augmentation */
47
+ llmDeepReview?: boolean;
42
48
  }
43
49
  interface WebhookPayload {
44
50
  action: string;
@@ -98,8 +104,23 @@ interface WebhookResult {
98
104
  export declare const EXT_TO_LANG: Record<string, string>;
99
105
  export declare function detectLanguage(filePath: string): string | undefined;
100
106
  export declare function generateJwt(appId: string, privateKey: string): string;
107
+ declare function ghApi(method: string, path: string, token: string, body?: unknown): Promise<{
108
+ status: number;
109
+ data: unknown;
110
+ }>;
111
+ export declare function __setGhApiImplForTest(fn: typeof ghApi | undefined): void;
112
+ interface LlmOptions {
113
+ apiKey: string;
114
+ model: string;
115
+ baseUrl?: string;
116
+ maxTokens?: number;
117
+ }
118
+ declare function callOpenAiChat(prompt: string, opts: LlmOptions): Promise<string>;
119
+ export declare function __setCallOpenAiChatImplForTest(fn: typeof callOpenAiChat): void;
120
+ declare function getInstallationToken(appId: string, privateKey: string, installationId: number): Promise<string>;
101
121
  export declare function verifyWebhookSignature(payload: string, signature: string | undefined, secret: string): boolean;
102
122
  export declare function parsePatchToHunk(filePath: string, patch: string): DiffHunk;
123
+ declare function reviewPullRequest(payload: WebhookPayload, token: string, config: GitHubAppConfig): Promise<WebhookResult>;
103
124
  /**
104
125
  * Handle an incoming GitHub webhook event.
105
126
  * This is the primary entry point — can be used in serverless functions,
@@ -107,6 +128,20 @@ export declare function parsePatchToHunk(filePath: string, patch: string): DiffH
107
128
  */
108
129
  export declare function handleWebhook(event: string, payload: string | WebhookPayload, signature: string | undefined, config: GitHubAppConfig): Promise<WebhookResult>;
109
130
  export declare function loadAppConfig(): GitHubAppConfig;
131
+ export declare function __setEvaluateWithTribunalForTest(fn: typeof evaluateWithTribunal | undefined): void;
132
+ export declare function __setEvaluateProjectForTest(fn: typeof evaluateProject | undefined): void;
133
+ export declare function getEvaluateWithTribunalImpl(): typeof evaluateWithTribunal;
134
+ export declare function __getEvaluateWithTribunalImplForTest(): typeof evaluateWithTribunal;
135
+ export declare const __test: {
136
+ __setCallOpenAiChatImplForTest: typeof __setCallOpenAiChatImplForTest;
137
+ __getInstallationTokenForTest: (fn: typeof getInstallationToken) => void;
138
+ __setGhApiImplForTest: typeof __setGhApiImplForTest;
139
+ __setEvaluateWithTribunalForTest: typeof __setEvaluateWithTribunalForTest;
140
+ __setEvaluateProjectForTest: typeof __setEvaluateProjectForTest;
141
+ __getEvaluateWithTribunalImplForTest: typeof __getEvaluateWithTribunalImplForTest;
142
+ parsePatchToHunk: typeof parsePatchToHunk;
143
+ reviewPullRequest: typeof reviewPullRequest;
144
+ };
110
145
  /**
111
146
  * Start a standalone HTTP server that listens for GitHub webhooks.
112
147
  * Usage: `judges app serve --port 3000`