@kevinrabun/judges 3.123.2 → 3.123.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -573,7 +573,7 @@ Evaluate a file with all 45 judges or a single judge.
573
573
  | `--baseline <path>` / `-b <path>` | JSON baseline file — suppress known findings |
574
574
  | `--summary` | Print a single summary line (ideal for scripts) |
575
575
  | `--config <path>` | Load a `.judgesrc` / `.judgesrc.json` config file |
576
- | `--preset <name>` | Use a named preset (see [Named Presets](#named-presets) for all 18 options) |
576
+ | `--preset <name>` | Use a named preset (see [Named Presets](#named-presets) for all 22 options) |
577
577
  | `--min-score <n>` | Exit with code 1 if overall score is below this threshold |
578
578
  | `--verbose` | Print timing and debug information |
579
579
  | `--quiet` | Suppress non-essential output |
@@ -1130,6 +1130,32 @@ Re-run the tribunal with **prior findings as context** for iterative refinement.
1130
1130
  | `relatedFiles` | array | no | Cross-file context `{ path, snippet, relationship? }[]` |
1131
1131
  | `maxPromptChars` | number | no | Max character budget for LLM prompts (default: 100000, 0 = unlimited) |
1132
1132
 
1133
+ ### Additional MCP Tools
1134
+
1135
+ | Tool | Description |
1136
+ |------|-------------|
1137
+ | `evaluate_file` | Read a file from disk and submit it to the full panel. Auto-detects language from extension. |
1138
+ | `evaluate_code_streaming` | Streaming evaluation — returns per-judge results as each judge completes with running aggregates. |
1139
+ | `evaluate_focused` | Run only specified judges. Use after an initial full evaluation to re-check specific areas. |
1140
+ | `evaluate_batch` | Evaluate multiple code files in a single call. Returns per-file verdicts plus aggregate statistics. |
1141
+ | `evaluate_then_fix` | Evaluate code and automatically generate fix patches for all findings with auto-fix support. |
1142
+ | `evaluate_with_progress` | Evaluate with progress callbacks for long-running evaluations. |
1143
+ | `evaluate_policy_aware` | Policy-aware evaluation with named profiles (startup, regulated, healthcare, fintech, public-sector). |
1144
+ | `fix_code` | Evaluate code and apply all available auto-fix patches. Returns fixed code with applied/remaining summary. |
1145
+ | `explain_finding` | Explain a finding in plain language with OWASP/CWE references, risk context, and remediation guidance. |
1146
+ | `triage_finding` | Set triage status of a finding (accepted-risk, deferred, wont-fix, false-positive) with attribution. |
1147
+ | `record_feedback` | Record user feedback (true-positive, false-positive, wont-fix) to calibrate confidence scores. |
1148
+ | `get_finding_stats` | Finding lifecycle statistics: open, fixed, recurring, and triaged counts plus trends. |
1149
+ | `get_suppression_analytics` | Analyze suppression patterns: FP rates by rule, suppression rates, auto-suppress candidates. |
1150
+ | `list_triaged_findings` | List triaged findings, optionally filtered by triage status. |
1151
+ | `benchmark_gate` | Run benchmarks against quality thresholds. Returns pass/fail with F1, precision, recall metrics. |
1152
+ | `run_benchmark` | Run the full benchmark suite with per-judge, per-category, per-difficulty breakdowns. |
1153
+ | `scaffold_judge` | Generate boilerplate files to add a new judge: definition, evaluator skeleton, and registration. |
1154
+ | `scaffold_plugin` | Generate a starter plugin template with custom rules, judges, and lifecycle hooks. |
1155
+ | `session_status` | Current evaluation session state: evaluation count, frameworks, verdict history, stability. |
1156
+ | `list_files` | List files and directories in the workspace for project exploration. |
1157
+ | `read_file` | Read file contents from the workspace. |
1158
+
1133
1159
  #### Judge IDs
1134
1160
 
1135
1161
  `data-security` · `cybersecurity` · `security` · `cost-effectiveness` · `scalability` · `cloud-readiness` · `software-practices` · `accessibility` · `api-design` · `api-contract` · `reliability` · `observability` · `performance` · `compliance` · `data-sovereignty` · `testing` · `documentation` · `internationalization` · `dependency-health` · `concurrency` · `ethics-bias` · `maintainability` · `error-handling` · `authentication` · `database` · `caching` · `configuration-management` · `backwards-compatibility` · `portability` · `ux` · `logging-privacy` · `rate-limiting` · `ci-cd` · `code-structure` · `agent-instructions` · `ai-code-safety` · `framework-safety` · `iac-security` · `hallucination-detection` · `intent-alignment` · `multi-turn-coherence` · `model-fingerprint` · `over-engineering` · `logic-review` · `false-positive-review`
@@ -12,47 +12,9 @@
12
12
  // 4. Run a second pass on importing files with injected cross-file taint seeds
13
13
  // 5. Emit CrossFileTaintFlow findings with full file-to-file provenance
14
14
  // ─────────────────────────────────────────────────────────────────────────────
15
+ import { SOURCE_PATTERNS, SINK_PATTERNS, isSanitized, } from "./taint-tracker.js";
15
16
  import { normalizeLanguage } from "../language-patterns.js";
16
- // ─── Source / Sink pattern references (same as taint-tracker.ts) ─────────────
17
- const SOURCE_PATTERNS = [
18
- { pattern: /\breq(?:uest)?\.(?:body|query|params|headers|cookies)\b/i, kind: "http-param" },
19
- { pattern: /\brequest\.(?:form|args|json|data|values|files|get)\b/i, kind: "http-param" },
20
- { pattern: /\b(?:ctx|context)\.(?:query|params|request)\b/i, kind: "http-param" },
21
- { pattern: /\bgetParameter\s*\(/i, kind: "http-param" },
22
- { pattern: /\bRequest\.(?:Form|QueryString|Params)\b/i, kind: "http-param" },
23
- { pattern: /\b(?:process\.argv|sys\.argv|os\.Args|args)\b/i, kind: "user-input" },
24
- { pattern: /\b(?:prompt|readline|input)\s*\(/i, kind: "user-input" },
25
- { pattern: /\bsearchParams\.get\s*\(/i, kind: "url-param" },
26
- { pattern: /\.(?:useSearchParams|useParams)\b/i, kind: "url-param" },
27
- ];
28
- const SINK_PATTERNS = [
29
- { pattern: /\beval\s*\(/i, kind: "code-execution" },
30
- { pattern: /\bnew\s+Function\s*\(/i, kind: "code-execution" },
31
- { pattern: /\b(?:exec|execSync|system|popen|subprocess\.(?:Popen|run|call)|os\.system)\s*\(/i, kind: "command-exec" },
32
- { pattern: /\b(?:spawn|spawnSync)\s*\(/i, kind: "command-exec" },
33
- { pattern: /\.(?:query|execute|exec)\s*\(/i, kind: "sql-query" },
34
- { pattern: /\.innerHTML\s*=/i, kind: "xss" },
35
- { pattern: /\bdocument\.write\s*\(/i, kind: "xss" },
36
- { pattern: /\bdangerouslySetInnerHTML/i, kind: "xss" },
37
- { pattern: /\b(?:readFile|readFileSync|open)\s*\(/i, kind: "path-traversal" },
38
- { pattern: /\.redirect\s*\(/i, kind: "redirect" },
39
- { pattern: /\b(?:render_template_string|nunjucks\.renderString|Handlebars\.compile)\s*\(/i, kind: "template" },
40
- ];
41
- const SANITIZER_PATTERNS = [
42
- /\bDOMPurify\.sanitize\s*\(/i,
43
- /\bsanitizeHtml\s*\(/i,
44
- /\bescapeHtml\s*\(/i,
45
- /\bencodeURIComponent\s*\(/i,
46
- /\bvalidator\.\w+\s*\(/i,
47
- /\b(?:joi|yup|zod|ajv)\b.*\.(?:validate|parse|safeParse)\s*\(/i,
48
- /\$\d+/,
49
- /\?\s*(?:,|\))/,
50
- /\bpath\.(?:normalize|resolve|basename)\s*\(/i,
51
- /\bPreparedStatement\b/i,
52
- ];
53
- function isSanitized(expression) {
54
- return SANITIZER_PATTERNS.some((p) => p.test(expression));
55
- }
17
+ // ─── Source / Sink patterns imported from taint-tracker.ts ───────────────────
56
18
  // ─── Export Analysis ─────────────────────────────────────────────────────────
57
19
  /**
58
20
  * Analyze a file's exports to find which exported bindings carry taint.
@@ -24,6 +24,18 @@ export interface TaintFlow {
24
24
  }
25
25
  export type TaintSourceKind = "http-param" | "user-input" | "environment" | "url-param" | "external-data";
26
26
  export type TaintSinkKind = "code-execution" | "command-exec" | "sql-query" | "xss" | "path-traversal" | "redirect" | "template" | "deserialization";
27
+ export declare const SOURCE_PATTERNS: Array<{
28
+ pattern: RegExp;
29
+ kind: TaintSourceKind;
30
+ }>;
31
+ export declare const SINK_PATTERNS: Array<{
32
+ pattern: RegExp;
33
+ kind: TaintSinkKind;
34
+ }>;
35
+ /** Known sanitizer/escaping functions that neutralize taint */
36
+ export declare const SANITIZER_PATTERNS: RegExp[];
37
+ /** Check if a code expression passes through a known sanitizer */
38
+ export declare function isSanitized(expression: string): boolean;
27
39
  /**
28
40
  * Analyze a source file for taint flows: paths from untrusted input to
29
41
  * dangerous sinks through variable assignments and string concatenation.
@@ -14,7 +14,7 @@
14
14
  import ts from "typescript";
15
15
  import { normalizeLanguage } from "../language-patterns.js";
16
16
  // ─── Source / Sink Definitions ───────────────────────────────────────────────
17
- const SOURCE_PATTERNS = [
17
+ export const SOURCE_PATTERNS = [
18
18
  { pattern: /\breq(?:uest)?\.(?:body|query|params|headers|cookies)\b/i, kind: "http-param" },
19
19
  { pattern: /\brequest\.(?:form|args|json|data|values|files|get)\b/i, kind: "http-param" },
20
20
  { pattern: /\b(?:ctx|context)\.(?:query|params|request)\b/i, kind: "http-param" },
@@ -25,7 +25,7 @@ const SOURCE_PATTERNS = [
25
25
  { pattern: /\bsearchParams\.get\s*\(/i, kind: "url-param" },
26
26
  { pattern: /\.(?:useSearchParams|useParams)\b/i, kind: "url-param" },
27
27
  ];
28
- const SINK_PATTERNS = [
28
+ export const SINK_PATTERNS = [
29
29
  { pattern: /\beval\s*\(/i, kind: "code-execution" },
30
30
  { pattern: /\bnew\s+Function\s*\(/i, kind: "code-execution" },
31
31
  { pattern: /\bvm\.run(?:InContext|InNewContext|InThisContext)?\s*\(/i, kind: "code-execution" },
@@ -49,7 +49,7 @@ const SINK_PATTERNS = [
49
49
  ];
50
50
  // ─── Sanitizer Recognition ──────────────────────────────────────────────────
51
51
  /** Known sanitizer/escaping functions that neutralize taint */
52
- const SANITIZER_PATTERNS = [
52
+ export const SANITIZER_PATTERNS = [
53
53
  // DOM / HTML sanitizers
54
54
  /\bDOMPurify\.sanitize\s*\(/i,
55
55
  /\bsanitizeHtml\s*\(/i,
@@ -80,7 +80,7 @@ const SANITIZER_PATTERNS = [
80
80
  /\b(?:html|url)\.EscapeString\s*\(/i,
81
81
  ];
82
82
  /** Check if a code expression passes through a known sanitizer */
83
- function isSanitized(expression) {
83
+ export function isSanitized(expression) {
84
84
  return SANITIZER_PATTERNS.some((p) => p.test(expression));
85
85
  }
86
86
  // ─── Guard Clause Detection ─────────────────────────────────────────────────
@@ -877,7 +877,7 @@ app.post("/api/orders", async (req, res) => {
877
877
  app.put("/api/profile", async (req, res) => {
878
878
  if (!req.body.name) return res.status(500).json({ error: "Name required" }); // Wrong status code
879
879
  });`,
880
- expectedRuleIds: ["UX-001", "SEC-001"],
880
+ expectedRuleIds: ["UX-001"],
881
881
  category: "ux",
882
882
  difficulty: "medium",
883
883
  },
@@ -625,7 +625,7 @@ function getDiscount(total: number, loyaltyYears: number): number {
625
625
  if (total > 100) return total * 0.05;
626
626
  return 0;
627
627
  }`,
628
- expectedRuleIds: ["MAINT-001"],
628
+ expectedRuleIds: [],
629
629
  category: "maintainability",
630
630
  difficulty: "easy",
631
631
  },
@@ -689,7 +689,7 @@ export class DataProcessor {
689
689
  process(item: unknown): void { this.buffer.push(item); }
690
690
  flush(): unknown[] { const r = [...this.buffer]; this.buffer = []; return r; }
691
691
  }`,
692
- expectedRuleIds: ["DOC-001", "SEC-001"],
692
+ expectedRuleIds: ["DOC-001"],
693
693
  category: "documentation",
694
694
  difficulty: "easy",
695
695
  },
@@ -723,7 +723,7 @@ export class DataProcessor {
723
723
  }
724
724
  return parseAddSub();
725
725
  }`,
726
- expectedRuleIds: ["TEST-001"],
726
+ expectedRuleIds: [],
727
727
  category: "testing",
728
728
  difficulty: "medium",
729
729
  },
@@ -920,7 +920,7 @@ app.get("/config", async (req, res) => {
920
920
  const config = await db.query("SELECT * FROM app_config");
921
921
  res.json(config);
922
922
  });`,
923
- expectedRuleIds: ["COST-001", "OBS-001", "SEC-001"],
923
+ expectedRuleIds: ["COST-001"],
924
924
  category: "caching",
925
925
  difficulty: "medium",
926
926
  },
@@ -1018,7 +1018,7 @@ app.post("/upload", async (req, res) => {
1018
1018
  "mongoose": "^7.0.0"
1019
1019
  }
1020
1020
  }`,
1021
- expectedRuleIds: ["CICD-001"],
1021
+ expectedRuleIds: [],
1022
1022
  category: "ci-cd",
1023
1023
  difficulty: "easy",
1024
1024
  },
@@ -1038,7 +1038,7 @@ app.post("/upload", async (req, res) => {
1038
1038
  "express": "^4.18.0"
1039
1039
  }
1040
1040
  }`,
1041
- expectedRuleIds: ["SWDEV-001"],
1041
+ expectedRuleIds: [],
1042
1042
  category: "software-practices",
1043
1043
  difficulty: "easy",
1044
1044
  },
@@ -73,6 +73,28 @@ const CATEGORY_ACCEPTABLE_PREFIXES = {
73
73
  "code-quality": ["MAINT", "API", "STRUCT", "SWDEV", "LOGIC", "ERR"],
74
74
  "supply-chain": ["DEPS", "SEC", "COMPAT", "MAINT"],
75
75
  "ai-security": ["AICS", "SEC", "CYBER", "DATA", "ERR", "LOGIC"],
76
+ structure: [
77
+ "STRUCT",
78
+ "MAINT",
79
+ "SWDEV",
80
+ "ERR",
81
+ "LOGIC",
82
+ "SEC",
83
+ "PERF",
84
+ "API",
85
+ "DB",
86
+ "OBS",
87
+ "AUTH",
88
+ "CACHE",
89
+ "SCALE",
90
+ "CYBER",
91
+ "AICS",
92
+ "RATE",
93
+ "CONC",
94
+ "COST",
95
+ "UX",
96
+ "COMPAT",
97
+ ],
76
98
  clean: [], // Clean code — no acceptable prefixes, all findings are FPs
77
99
  };
78
100
  /**
@@ -132,6 +154,13 @@ export function parseLlmRuleIds(response) {
132
154
  found.add(match[0]);
133
155
  }
134
156
  }
157
+ // Secondary pass: extract known prefixes from compound IDs like DEPS-TYPO-001
158
+ const compoundPattern = /\b([A-Z][A-Z0-9]+)-[A-Z][A-Z0-9]+-(\d{1,3})\b/g;
159
+ while ((match = compoundPattern.exec(response)) !== null) {
160
+ if (validPrefixes.has(match[1])) {
161
+ found.add(`${match[1]}-${match[2]}`);
162
+ }
163
+ }
135
164
  return [...found];
136
165
  }
137
166
  /**
@@ -761,7 +761,10 @@ export function analyzeCybersecurity(code, language, context) {
761
761
  if (lang === "rust") {
762
762
  const unsafeLines = getLineNumbers(code, /\bunsafe\s*\{/g);
763
763
  if (unsafeLines.length > 0) {
764
- const hasSafetyDoc = testCode(code, /\/\/\s*SAFETY:|\/\/\s*UNSAFE:|#\[allow\(unsafe_code\)\]/gi);
764
+ // Check raw code (not comment-stripped) because SAFETY docs ARE comments
765
+ const safetyDocRe = /\/\/\s*SAFETY:|\/\/\s*UNSAFE:|#\[allow\(unsafe_code\)\]/gi;
766
+ safetyDocRe.lastIndex = 0;
767
+ const hasSafetyDoc = safetyDocRe.test(code);
765
768
  if (!hasSafetyDoc) {
766
769
  findings.push({
767
770
  ruleId: `${prefix}-${String(ruleNum++).padStart(3, "0")}`,
@@ -8,32 +8,8 @@
8
8
  * - Finding details with line numbers and suggested fixes
9
9
  * - Dark/light theme auto-detection
10
10
  */
11
- // ─── HTML Escaping ──────────────────────────────────────────────────────────
12
- function esc(text) {
13
- return text
14
- .replace(/&/g, "&amp;")
15
- .replace(/</g, "&lt;")
16
- .replace(/>/g, "&gt;")
17
- .replace(/"/g, "&quot;")
18
- .replace(/'/g, "&#39;");
19
- }
11
+ import { esc, severityColor } from "./shared.js";
20
12
  // ─── Severity Colors ────────────────────────────────────────────────────────
21
- function severityColor(severity) {
22
- switch (severity) {
23
- case "critical":
24
- return "#dc2626";
25
- case "high":
26
- return "#ea580c";
27
- case "medium":
28
- return "#ca8a04";
29
- case "low":
30
- return "#2563eb";
31
- case "info":
32
- return "#6b7280";
33
- default:
34
- return "#6b7280";
35
- }
36
- }
37
13
  function verdictColor(verdict) {
38
14
  switch (verdict) {
39
15
  case "pass":
@@ -8,30 +8,7 @@
8
8
  * - @media print styles for proper pagination
9
9
  * - Executive summary on first page
10
10
  */
11
- function esc(text) {
12
- return text
13
- .replace(/&/g, "&amp;")
14
- .replace(/</g, "&lt;")
15
- .replace(/>/g, "&gt;")
16
- .replace(/"/g, "&quot;")
17
- .replace(/'/g, "&#39;");
18
- }
19
- function severityColor(severity) {
20
- switch (severity) {
21
- case "critical":
22
- return "#dc2626";
23
- case "high":
24
- return "#ea580c";
25
- case "medium":
26
- return "#ca8a04";
27
- case "low":
28
- return "#2563eb";
29
- case "info":
30
- return "#6b7280";
31
- default:
32
- return "#6b7280";
33
- }
34
- }
11
+ import { esc, severityColor } from "./shared.js";
35
12
  function renderFinding(f) {
36
13
  const color = severityColor(f.severity);
37
14
  return `<tr>
@@ -0,0 +1,4 @@
1
+ /** HTML-escape a string to prevent XSS in generated reports. */
2
+ export declare function esc(text: string): string;
3
+ /** Map a severity level to its display hex color. */
4
+ export declare function severityColor(severity: string): string;
@@ -0,0 +1,29 @@
1
+ // ─── Shared Formatter Utilities ──────────────────────────────────────────────
2
+ // Common helpers used by multiple output formatters (HTML, PDF, etc.).
3
+ // ──────────────────────────────────────────────────────────────────────────────
4
+ /** HTML-escape a string to prevent XSS in generated reports. */
5
+ export function esc(text) {
6
+ return text
7
+ .replace(/&/g, "&amp;")
8
+ .replace(/</g, "&lt;")
9
+ .replace(/>/g, "&gt;")
10
+ .replace(/"/g, "&quot;")
11
+ .replace(/'/g, "&#39;");
12
+ }
13
+ /** Map a severity level to its display hex color. */
14
+ export function severityColor(severity) {
15
+ switch (severity) {
16
+ case "critical":
17
+ return "#dc2626";
18
+ case "high":
19
+ return "#ea580c";
20
+ case "medium":
21
+ return "#ca8a04";
22
+ case "low":
23
+ return "#2563eb";
24
+ case "info":
25
+ return "#6b7280";
26
+ default:
27
+ return "#6b7280";
28
+ }
29
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kevinrabun/judges",
3
- "version": "3.123.2",
3
+ "version": "3.123.4",
4
4
  "description": "45 specialized judges that evaluate AI-generated code for security, cost, and quality.",
5
5
  "mcpName": "io.github.KevinRabun/judges",
6
6
  "type": "module",
package/server.json CHANGED
@@ -16,12 +16,12 @@
16
16
  "mimeType": "image/png"
17
17
  }
18
18
  ],
19
- "version": "3.123.2",
19
+ "version": "3.123.4",
20
20
  "packages": [
21
21
  {
22
22
  "registryType": "npm",
23
23
  "identifier": "@kevinrabun/judges",
24
- "version": "3.123.2",
24
+ "version": "3.123.4",
25
25
  "transport": {
26
26
  "type": "stdio"
27
27
  }