npm - @kevinrabun/judges - Versions diffs - 3.123.2 → 3.123.4 - Mend

@kevinrabun/judges 3.123.2 → 3.123.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +27 -1
package/dist/ast/cross-file-taint.js +2 -40
package/dist/ast/taint-tracker.d.ts +12 -0
package/dist/ast/taint-tracker.js +4 -4
package/dist/commands/benchmark-compliance-ethics.js +1 -1
package/dist/commands/benchmark.js +6 -6
package/dist/commands/llm-benchmark.js +29 -0
package/dist/evaluators/cybersecurity.js +4 -1
package/dist/formatters/html.js +1 -25
package/dist/formatters/pdf.js +1 -24
package/dist/formatters/shared.d.ts +4 -0
package/dist/formatters/shared.js +29 -0
package/package.json +1 -1
package/server.json +2 -2

package/README.md CHANGED Viewed

@@ -573,7 +573,7 @@ Evaluate a file with all 45 judges or a single judge.
 | `--baseline <path>` / `-b <path>` | JSON baseline file — suppress known findings |
 | `--summary` | Print a single summary line (ideal for scripts) |
 | `--config <path>` | Load a `.judgesrc` / `.judgesrc.json` config file |
-| `--preset <name>` | Use a named preset (see [Named Presets](#named-presets) for all 18 options) |
+| `--preset <name>` | Use a named preset (see [Named Presets](#named-presets) for all 22 options) |
 | `--min-score <n>` | Exit with code 1 if overall score is below this threshold |
 | `--verbose` | Print timing and debug information |
 | `--quiet` | Suppress non-essential output |
@@ -1130,6 +1130,32 @@ Re-run the tribunal with **prior findings as context** for iterative refinement.
 | `relatedFiles` | array | no | Cross-file context `{ path, snippet, relationship? }[]` |
 | `maxPromptChars` | number | no | Max character budget for LLM prompts (default: 100000, 0 = unlimited) |
+### Additional MCP Tools
+| Tool | Description |
+|------|-------------|
+| `evaluate_file` | Read a file from disk and submit it to the full panel. Auto-detects language from extension. |
+| `evaluate_code_streaming` | Streaming evaluation — returns per-judge results as each judge completes with running aggregates. |
+| `evaluate_focused` | Run only specified judges. Use after an initial full evaluation to re-check specific areas. |
+| `evaluate_batch` | Evaluate multiple code files in a single call. Returns per-file verdicts plus aggregate statistics. |
+| `evaluate_then_fix` | Evaluate code and automatically generate fix patches for all findings with auto-fix support. |
+| `evaluate_with_progress` | Evaluate with progress callbacks for long-running evaluations. |
+| `evaluate_policy_aware` | Policy-aware evaluation with named profiles (startup, regulated, healthcare, fintech, public-sector). |
+| `fix_code` | Evaluate code and apply all available auto-fix patches. Returns fixed code with applied/remaining summary. |
+| `explain_finding` | Explain a finding in plain language with OWASP/CWE references, risk context, and remediation guidance. |
+| `triage_finding` | Set triage status of a finding (accepted-risk, deferred, wont-fix, false-positive) with attribution. |
+| `record_feedback` | Record user feedback (true-positive, false-positive, wont-fix) to calibrate confidence scores. |
+| `get_finding_stats` | Finding lifecycle statistics: open, fixed, recurring, and triaged counts plus trends. |
+| `get_suppression_analytics` | Analyze suppression patterns: FP rates by rule, suppression rates, auto-suppress candidates. |
+| `list_triaged_findings` | List triaged findings, optionally filtered by triage status. |
+| `benchmark_gate` | Run benchmarks against quality thresholds. Returns pass/fail with F1, precision, recall metrics. |
+| `run_benchmark` | Run the full benchmark suite with per-judge, per-category, per-difficulty breakdowns. |
+| `scaffold_judge` | Generate boilerplate files to add a new judge: definition, evaluator skeleton, and registration. |
+| `scaffold_plugin` | Generate a starter plugin template with custom rules, judges, and lifecycle hooks. |
+| `session_status` | Current evaluation session state: evaluation count, frameworks, verdict history, stability. |
+| `list_files` | List files and directories in the workspace for project exploration. |
+| `read_file` | Read file contents from the workspace. |
 #### Judge IDs
 `data-security` · `cybersecurity` · `security` · `cost-effectiveness` · `scalability` · `cloud-readiness` · `software-practices` · `accessibility` · `api-design` · `api-contract` · `reliability` · `observability` · `performance` · `compliance` · `data-sovereignty` · `testing` · `documentation` · `internationalization` · `dependency-health` · `concurrency` · `ethics-bias` · `maintainability` · `error-handling` · `authentication` · `database` · `caching` · `configuration-management` · `backwards-compatibility` · `portability` · `ux` · `logging-privacy` · `rate-limiting` · `ci-cd` · `code-structure` · `agent-instructions` · `ai-code-safety` · `framework-safety` · `iac-security` · `hallucination-detection` · `intent-alignment` · `multi-turn-coherence` · `model-fingerprint` · `over-engineering` · `logic-review` · `false-positive-review`

package/dist/ast/cross-file-taint.js CHANGED Viewed

@@ -12,47 +12,9 @@
 // 4. Run a second pass on importing files with injected cross-file taint seeds
 // 5. Emit CrossFileTaintFlow findings with full file-to-file provenance
 // ─────────────────────────────────────────────────────────────────────────────
+import { SOURCE_PATTERNS, SINK_PATTERNS, isSanitized, } from "./taint-tracker.js";
 import { normalizeLanguage } from "../language-patterns.js";
-// ─── Source / Sink pattern references (same as taint-tracker.ts) ─────────────
-const SOURCE_PATTERNS = [
-    { pattern: /\breq(?:uest)?\.(?:body|query|params|headers|cookies)\b/i, kind: "http-param" },
-    { pattern: /\brequest\.(?:form|args|json|data|values|files|get)\b/i, kind: "http-param" },
-    { pattern: /\b(?:ctx|context)\.(?:query|params|request)\b/i, kind: "http-param" },
-    { pattern: /\bgetParameter\s*\(/i, kind: "http-param" },
-    { pattern: /\bRequest\.(?:Form|QueryString|Params)\b/i, kind: "http-param" },
-    { pattern: /\b(?:process\.argv|sys\.argv|os\.Args|args)\b/i, kind: "user-input" },
-    { pattern: /\b(?:prompt|readline|input)\s*\(/i, kind: "user-input" },
-    { pattern: /\bsearchParams\.get\s*\(/i, kind: "url-param" },
-    { pattern: /\.(?:useSearchParams|useParams)\b/i, kind: "url-param" },
-];
-const SINK_PATTERNS = [
-    { pattern: /\beval\s*\(/i, kind: "code-execution" },
-    { pattern: /\bnew\s+Function\s*\(/i, kind: "code-execution" },
-    { pattern: /\b(?:exec|execSync|system|popen|subprocess\.(?:Popen|run|call)|os\.system)\s*\(/i, kind: "command-exec" },
-    { pattern: /\b(?:spawn|spawnSync)\s*\(/i, kind: "command-exec" },
-    { pattern: /\.(?:query|execute|exec)\s*\(/i, kind: "sql-query" },
-    { pattern: /\.innerHTML\s*=/i, kind: "xss" },
-    { pattern: /\bdocument\.write\s*\(/i, kind: "xss" },
-    { pattern: /\bdangerouslySetInnerHTML/i, kind: "xss" },
-    { pattern: /\b(?:readFile|readFileSync|open)\s*\(/i, kind: "path-traversal" },
-    { pattern: /\.redirect\s*\(/i, kind: "redirect" },
-    { pattern: /\b(?:render_template_string|nunjucks\.renderString|Handlebars\.compile)\s*\(/i, kind: "template" },
-];
-const SANITIZER_PATTERNS = [
-    /\bDOMPurify\.sanitize\s*\(/i,
-    /\bsanitizeHtml\s*\(/i,
-    /\bescapeHtml\s*\(/i,
-    /\bencodeURIComponent\s*\(/i,
-    /\bvalidator\.\w+\s*\(/i,
-    /\b(?:joi|yup|zod|ajv)\b.*\.(?:validate|parse|safeParse)\s*\(/i,
-    /\$\d+/,
-    /\?\s*(?:,|\))/,
-    /\bpath\.(?:normalize|resolve|basename)\s*\(/i,
-    /\bPreparedStatement\b/i,
-];
-function isSanitized(expression) {
-    return SANITIZER_PATTERNS.some((p) => p.test(expression));
-}
+// ─── Source / Sink patterns imported from taint-tracker.ts ───────────────────
 // ─── Export Analysis ─────────────────────────────────────────────────────────
 /**
  * Analyze a file's exports to find which exported bindings carry taint.

package/dist/ast/taint-tracker.d.ts CHANGED Viewed

@@ -24,6 +24,18 @@ export interface TaintFlow {
 }
 export type TaintSourceKind = "http-param" | "user-input" | "environment" | "url-param" | "external-data";
 export type TaintSinkKind = "code-execution" | "command-exec" | "sql-query" | "xss" | "path-traversal" | "redirect" | "template" | "deserialization";
+export declare const SOURCE_PATTERNS: Array<{
+    pattern: RegExp;
+    kind: TaintSourceKind;
+}>;
+export declare const SINK_PATTERNS: Array<{
+    pattern: RegExp;
+    kind: TaintSinkKind;
+}>;
+/** Known sanitizer/escaping functions that neutralize taint */
+export declare const SANITIZER_PATTERNS: RegExp[];
+/** Check if a code expression passes through a known sanitizer */
+export declare function isSanitized(expression: string): boolean;
 /**
  * Analyze a source file for taint flows: paths from untrusted input to
  * dangerous sinks through variable assignments and string concatenation.

package/dist/ast/taint-tracker.js CHANGED Viewed

@@ -14,7 +14,7 @@
 import ts from "typescript";
 import { normalizeLanguage } from "../language-patterns.js";
 // ─── Source / Sink Definitions ───────────────────────────────────────────────
-const SOURCE_PATTERNS = [
+export const SOURCE_PATTERNS = [
     { pattern: /\breq(?:uest)?\.(?:body|query|params|headers|cookies)\b/i, kind: "http-param" },
     { pattern: /\brequest\.(?:form|args|json|data|values|files|get)\b/i, kind: "http-param" },
     { pattern: /\b(?:ctx|context)\.(?:query|params|request)\b/i, kind: "http-param" },
@@ -25,7 +25,7 @@ const SOURCE_PATTERNS = [
     { pattern: /\bsearchParams\.get\s*\(/i, kind: "url-param" },
     { pattern: /\.(?:useSearchParams|useParams)\b/i, kind: "url-param" },
 ];
-const SINK_PATTERNS = [
+export const SINK_PATTERNS = [
     { pattern: /\beval\s*\(/i, kind: "code-execution" },
     { pattern: /\bnew\s+Function\s*\(/i, kind: "code-execution" },
     { pattern: /\bvm\.run(?:InContext|InNewContext|InThisContext)?\s*\(/i, kind: "code-execution" },
@@ -49,7 +49,7 @@ const SINK_PATTERNS = [
 ];
 // ─── Sanitizer Recognition ──────────────────────────────────────────────────
 /** Known sanitizer/escaping functions that neutralize taint */
-const SANITIZER_PATTERNS = [
+export const SANITIZER_PATTERNS = [
     // DOM / HTML sanitizers
     /\bDOMPurify\.sanitize\s*\(/i,
     /\bsanitizeHtml\s*\(/i,
@@ -80,7 +80,7 @@ const SANITIZER_PATTERNS = [
     /\b(?:html|url)\.EscapeString\s*\(/i,
 ];
 /** Check if a code expression passes through a known sanitizer */
-function isSanitized(expression) {
+export function isSanitized(expression) {
     return SANITIZER_PATTERNS.some((p) => p.test(expression));
 }
 // ─── Guard Clause Detection ─────────────────────────────────────────────────

package/dist/commands/benchmark-compliance-ethics.js CHANGED Viewed

@@ -877,7 +877,7 @@ app.post("/api/orders", async (req, res) => {
 app.put("/api/profile", async (req, res) => {
   if (!req.body.name) return res.status(500).json({ error: "Name required" }); // Wrong status code
 });`,
-        expectedRuleIds: ["UX-001", "SEC-001"],
+        expectedRuleIds: ["UX-001"],
         category: "ux",
         difficulty: "medium",
     },

package/dist/commands/benchmark.js CHANGED Viewed

@@ -625,7 +625,7 @@ function getDiscount(total: number, loyaltyYears: number): number {
   if (total > 100) return total * 0.05;
   return 0;
 }`,
-        expectedRuleIds: ["MAINT-001"],
+        expectedRuleIds: [],
         category: "maintainability",
         difficulty: "easy",
     },
@@ -689,7 +689,7 @@ export class DataProcessor {
   process(item: unknown): void { this.buffer.push(item); }
   flush(): unknown[] { const r = [...this.buffer]; this.buffer = []; return r; }
 }`,
-        expectedRuleIds: ["DOC-001", "SEC-001"],
+        expectedRuleIds: ["DOC-001"],
         category: "documentation",
         difficulty: "easy",
     },
@@ -723,7 +723,7 @@ export class DataProcessor {
   }
   return parseAddSub();
 }`,
-        expectedRuleIds: ["TEST-001"],
+        expectedRuleIds: [],
         category: "testing",
         difficulty: "medium",
     },
@@ -920,7 +920,7 @@ app.get("/config", async (req, res) => {
   const config = await db.query("SELECT * FROM app_config");
   res.json(config);
 });`,
-        expectedRuleIds: ["COST-001", "OBS-001", "SEC-001"],
+        expectedRuleIds: ["COST-001"],
         category: "caching",
         difficulty: "medium",
     },
@@ -1018,7 +1018,7 @@ app.post("/upload", async (req, res) => {
     "mongoose": "^7.0.0"
   }
 }`,
-        expectedRuleIds: ["CICD-001"],
+        expectedRuleIds: [],
         category: "ci-cd",
         difficulty: "easy",
     },
@@ -1038,7 +1038,7 @@ app.post("/upload", async (req, res) => {
     "express": "^4.18.0"
   }
 }`,
-        expectedRuleIds: ["SWDEV-001"],
+        expectedRuleIds: [],
         category: "software-practices",
         difficulty: "easy",
     },

package/dist/commands/llm-benchmark.js CHANGED Viewed

@@ -73,6 +73,28 @@ const CATEGORY_ACCEPTABLE_PREFIXES = {
     "code-quality": ["MAINT", "API", "STRUCT", "SWDEV", "LOGIC", "ERR"],
     "supply-chain": ["DEPS", "SEC", "COMPAT", "MAINT"],
     "ai-security": ["AICS", "SEC", "CYBER", "DATA", "ERR", "LOGIC"],
+    structure: [
+        "STRUCT",
+        "MAINT",
+        "SWDEV",
+        "ERR",
+        "LOGIC",
+        "SEC",
+        "PERF",
+        "API",
+        "DB",
+        "OBS",
+        "AUTH",
+        "CACHE",
+        "SCALE",
+        "CYBER",
+        "AICS",
+        "RATE",
+        "CONC",
+        "COST",
+        "UX",
+        "COMPAT",
+    ],
     clean: [], // Clean code — no acceptable prefixes, all findings are FPs
 };
 /**
@@ -132,6 +154,13 @@ export function parseLlmRuleIds(response) {
             found.add(match[0]);
         }
     }
+    // Secondary pass: extract known prefixes from compound IDs like DEPS-TYPO-001
+    const compoundPattern = /\b([A-Z][A-Z0-9]+)-[A-Z][A-Z0-9]+-(\d{1,3})\b/g;
+    while ((match = compoundPattern.exec(response)) !== null) {
+        if (validPrefixes.has(match[1])) {
+            found.add(`${match[1]}-${match[2]}`);
+        }
+    }
     return [...found];
 }
 /**

package/dist/evaluators/cybersecurity.js CHANGED Viewed

@@ -761,7 +761,10 @@ export function analyzeCybersecurity(code, language, context) {
     if (lang === "rust") {
         const unsafeLines = getLineNumbers(code, /\bunsafe\s*\{/g);
         if (unsafeLines.length > 0) {
-            const hasSafetyDoc = testCode(code, /\/\/\s*SAFETY:|\/\/\s*UNSAFE:|#\[allow\(unsafe_code\)\]/gi);
+            // Check raw code (not comment-stripped) because SAFETY docs ARE comments
+            const safetyDocRe = /\/\/\s*SAFETY:|\/\/\s*UNSAFE:|#\[allow\(unsafe_code\)\]/gi;
+            safetyDocRe.lastIndex = 0;
+            const hasSafetyDoc = safetyDocRe.test(code);
             if (!hasSafetyDoc) {
                 findings.push({
                     ruleId: `${prefix}-${String(ruleNum++).padStart(3, "0")}`,

package/dist/formatters/html.js CHANGED Viewed

@@ -8,32 +8,8 @@
  *   - Finding details with line numbers and suggested fixes
  *   - Dark/light theme auto-detection
  */
-// ─── HTML Escaping ──────────────────────────────────────────────────────────
-function esc(text) {
-    return text
-        .replace(/&/g, "&amp;")
-        .replace(/</g, "&lt;")
-        .replace(/>/g, "&gt;")
-        .replace(/"/g, "&quot;")
-        .replace(/'/g, "&#39;");
-}
+import { esc, severityColor } from "./shared.js";
 // ─── Severity Colors ────────────────────────────────────────────────────────
-function severityColor(severity) {
-    switch (severity) {
-        case "critical":
-            return "#dc2626";
-        case "high":
-            return "#ea580c";
-        case "medium":
-            return "#ca8a04";
-        case "low":
-            return "#2563eb";
-        case "info":
-            return "#6b7280";
-        default:
-            return "#6b7280";
-    }
-}
 function verdictColor(verdict) {
     switch (verdict) {
         case "pass":

package/dist/formatters/pdf.js CHANGED Viewed

@@ -8,30 +8,7 @@
  *   - @media print styles for proper pagination
  *   - Executive summary on first page
  */
-function esc(text) {
-    return text
-        .replace(/&/g, "&amp;")
-        .replace(/</g, "&lt;")
-        .replace(/>/g, "&gt;")
-        .replace(/"/g, "&quot;")
-        .replace(/'/g, "&#39;");
-}
-function severityColor(severity) {
-    switch (severity) {
-        case "critical":
-            return "#dc2626";
-        case "high":
-            return "#ea580c";
-        case "medium":
-            return "#ca8a04";
-        case "low":
-            return "#2563eb";
-        case "info":
-            return "#6b7280";
-        default:
-            return "#6b7280";
-    }
-}
+import { esc, severityColor } from "./shared.js";
 function renderFinding(f) {
     const color = severityColor(f.severity);
     return `<tr>

package/dist/formatters/shared.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+/** HTML-escape a string to prevent XSS in generated reports. */
+export declare function esc(text: string): string;
+/** Map a severity level to its display hex color. */
+export declare function severityColor(severity: string): string;

package/dist/formatters/shared.js ADDED Viewed

@@ -0,0 +1,29 @@
+// ─── Shared Formatter Utilities ──────────────────────────────────────────────
+// Common helpers used by multiple output formatters (HTML, PDF, etc.).
+// ──────────────────────────────────────────────────────────────────────────────
+/** HTML-escape a string to prevent XSS in generated reports. */
+export function esc(text) {
+    return text
+        .replace(/&/g, "&amp;")
+        .replace(/</g, "&lt;")
+        .replace(/>/g, "&gt;")
+        .replace(/"/g, "&quot;")
+        .replace(/'/g, "&#39;");
+}
+/** Map a severity level to its display hex color. */
+export function severityColor(severity) {
+    switch (severity) {
+        case "critical":
+            return "#dc2626";
+        case "high":
+            return "#ea580c";
+        case "medium":
+            return "#ca8a04";
+        case "low":
+            return "#2563eb";
+        case "info":
+            return "#6b7280";
+        default:
+            return "#6b7280";
+    }
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@kevinrabun/judges",
-  "version": "3.123.2",
+  "version": "3.123.4",
   "description": "45 specialized judges that evaluate AI-generated code for security, cost, and quality.",
   "mcpName": "io.github.KevinRabun/judges",
   "type": "module",

package/server.json CHANGED Viewed

@@ -16,12 +16,12 @@
       "mimeType": "image/png"
     }
   ],
-  "version": "3.123.2",
+  "version": "3.123.4",
   "packages": [
     {
       "registryType": "npm",
       "identifier": "@kevinrabun/judges",
-      "version": "3.123.2",
+      "version": "3.123.4",
       "transport": {
         "type": "stdio"
       }