npm - @kevinrabun/judges - Versions diffs - 3.125.0 → 3.126.1 - Mend

@kevinrabun/judges 3.125.0 → 3.126.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/api.d.ts +2 -1
package/dist/api.js +2 -0
package/dist/cli-formatters.js +38 -0
package/dist/cli.js +27 -1
package/dist/evaluators/index.js +163 -1
package/dist/evaluators/shared.js +33 -0
package/dist/regulatory-scope.d.ts +27 -0
package/dist/regulatory-scope.js +181 -0
package/dist/tools/prompts.d.ts +1 -1
package/dist/tools/prompts.js +3 -1
package/dist/types.d.ts +87 -0
package/judgesrc.schema.json +14 -0
package/package.json +2 -2
package/server.json +2 -2

package/dist/api.d.ts CHANGED Viewed

@@ -8,7 +8,7 @@
  * const result = evaluateCode("const x = eval(input);", "typescript");
  * ```
  */
-export type { Severity, Verdict, Finding, Patch, LangFamily, JudgesConfig, RuleOverride, ProjectFile, ProjectVerdict, DiffVerdict, DependencyEntry, DependencyVerdict, JudgeEvaluation, TribunalVerdict, JudgeDefinition, EvaluationContextV2, EvidenceBundleV2, SpecializedFindingV2, TribunalVerdictV2, MustFixGateOptions, MustFixGateResult, AppBuilderWorkflowResult, PlainLanguageFinding, WorkflowTask, PolicyProfile, SuppressionRecord, SuppressionResult, ExecutionTrace, RuleTrace, StreamingBatch, JudgeSelectionContext, JudgeSelectionResult, SessionContext, } from "./types.js";
+export type { Severity, Verdict, Finding, Patch, LangFamily, JudgesConfig, RuleOverride, ProjectFile, ProjectVerdict, DiffVerdict, DependencyEntry, DependencyVerdict, JudgeEvaluation, TribunalVerdict, JudgeDefinition, EvaluationContextV2, EvidenceBundleV2, SpecializedFindingV2, TribunalVerdictV2, MustFixGateOptions, MustFixGateResult, AppBuilderWorkflowResult, PlainLanguageFinding, WorkflowTask, PolicyProfile, SuppressionRecord, SuppressionResult, ExecutionTrace, RuleTrace, StreamingBatch, JudgeSelectionContext, JudgeSelectionResult, SessionContext, HumanFocusGuide, FocusItem, BlindSpot, } from "./types.js";
 export { JudgesError, ConfigError, EvaluationError, ParseError } from "./errors.js";
 export { parseConfig, defaultConfig, mergeConfigs, discoverCascadingConfigs, loadCascadingConfig, loadConfigFile, expandEnvPlaceholders, loadPluginJudges, validatePluginSpecifiers, isValidJudgeDefinition, validateJudgeDefinition, applyOverridesForFile, applyLanguageProfile, resolveExtendsConfig, } from "./config.js";
 export { EXT_TO_LANG, SUPPORTED_EXTENSIONS, detectLanguageFromPath } from "./ext-to-lang.js";
@@ -47,6 +47,7 @@ export { runFeedbackLoop, formatFeedbackLoopReport } from "./feedback-loop.js";
 export type { FeedbackLoopResult, ConfidenceAdjustment, FeedbackLoopStats } from "./feedback-loop.js";
 export { registerPlugin, unregisterPlugin, getRegisteredPlugins, getCustomRules, getPluginJudges, evaluateCustomRules, runBeforeHooks, runAfterHooks, clearPlugins, } from "./plugins.js";
 export type { CustomRule, JudgesPlugin, PluginRegistration } from "./plugins.js";
+export { filterByRegulatoryScope, getSupportedFrameworks } from "./regulatory-scope.js";
 export { JudgeRegistry, defaultRegistry } from "./judge-registry.js";
 export { parseFrontmatter, validateFrontmatter, parseAgentFile, resolveEvaluator, agentToJudgeDefinition, loadAgentDirectory, loadAndRegisterAgents, } from "./agent-loader.js";
 export type { AgentFrontmatter, ParsedAgent } from "./agent-loader.js";

package/dist/api.js CHANGED Viewed

@@ -56,6 +56,8 @@ export { getAgentCard, createTask, getTask, completeTask, failTask, listTasks, p
 export { runFeedbackLoop, formatFeedbackLoopReport } from "./feedback-loop.js";
 // ─── Plugin API ──────────────────────────────────────────────────────────────
 export { registerPlugin, unregisterPlugin, getRegisteredPlugins, getCustomRules, getPluginJudges, evaluateCustomRules, runBeforeHooks, runAfterHooks, clearPlugins, } from "./plugins.js";
+// ─── Regulatory Scope ────────────────────────────────────────────────────────
+export { filterByRegulatoryScope, getSupportedFrameworks } from "./regulatory-scope.js";
 // ─── Judge Registry ──────────────────────────────────────────────────────────
 export { JudgeRegistry, defaultRegistry } from "./judge-registry.js";
 // ─── Agent Markdown Loader ───────────────────────────────────────────────────

package/dist/cli-formatters.js CHANGED Viewed

@@ -136,6 +136,44 @@ export function formatTextOutput(verdict) {
         }
         lines.push("");
     }
+    // Human Focus Guide
+    if (verdict.humanFocusGuide) {
+        const guide = verdict.humanFocusGuide;
+        lines.push("  👤 Human Reviewer Focus Guide");
+        lines.push("  " + "─".repeat(60));
+        lines.push(`  ${guide.summary}`);
+        lines.push("");
+        if (guide.trust.length > 0) {
+            lines.push("  ✅ TRUST (act on these directly):");
+            for (const item of guide.trust.slice(0, 10)) {
+                const lineRef = item.lineNumbers?.[0] ? ` L${item.lineNumbers[0]}` : "";
+                lines.push(`     [${item.severity.toUpperCase()}] ${item.ruleId}${lineRef}: ${item.title}`);
+                lines.push(`            ${item.reason}`);
+            }
+            if (guide.trust.length > 10)
+                lines.push(`     ... and ${guide.trust.length - 10} more`);
+            lines.push("");
+        }
+        if (guide.verify.length > 0) {
+            lines.push("  🔍 VERIFY (use your judgment):");
+            for (const item of guide.verify.slice(0, 10)) {
+                const lineRef = item.lineNumbers?.[0] ? ` L${item.lineNumbers[0]}` : "";
+                lines.push(`     [${item.severity.toUpperCase()}] ${item.ruleId}${lineRef}: ${item.title}`);
+                lines.push(`            ${item.reason}`);
+            }
+            if (guide.verify.length > 10)
+                lines.push(`     ... and ${guide.verify.length - 10} more`);
+            lines.push("");
+        }
+        if (guide.blindSpots.length > 0) {
+            lines.push("  🔦 BLIND SPOTS (automated analysis cannot evaluate):");
+            for (const spot of guide.blindSpots) {
+                lines.push(`     • ${spot.area}`);
+                lines.push(`       ${spot.guidance.slice(0, 120)}${spot.guidance.length > 120 ? "…" : ""}`);
+            }
+            lines.push("");
+        }
+    }
     // Exit guidance
     if (verdict.overallVerdict === "fail") {
         lines.push("  ⛔ FAIL — This code has issues that should be addressed before shipping.");

package/dist/cli.js CHANGED Viewed

@@ -43,6 +43,7 @@ import { formatComparisonReport, formatFullComparisonMatrix, TOOL_PROFILES } fro
 import { loadOverrideStore, applyOverrides } from "./commands/override.js";
 import { runGit } from "./tools/command-safety.js";
 import { detectLanguageFromPath, SUPPORTED_EXTENSIONS } from "./ext-to-lang.js";
+import { getSupportedFrameworks } from "./regulatory-scope.js";
 import { formatTribunalOutput, writeOutputIfSpecified, formatSingleJudgeTextOutput, } from "./cli-formatters.js";
 import { COMMAND_TABLE } from "./cli-dispatch.js";
 // ─── Language Detection ─────────────────────────────────────────────────────
@@ -226,6 +227,8 @@ function printHelp() {
      * over-promising features that aren't wired yet.
      */
     const coreCommands = [
+        ["judges list", "List all available judges"],
+        ["judges list --frameworks", "List supported regulatory frameworks"],
         ["judges eval [options] [file]", "Evaluate code with the full tribunal"],
         ["judges eval --judge <id> [file]", "Evaluate with a single judge"],
         ["judges init", "Interactive project setup wizard"],
@@ -485,6 +488,24 @@ function listJudges() {
     console.log(`  Total: ${judges.length} judges`);
     console.log("");
 }
+// ─── List Regulatory Frameworks ─────────────────────────────────────────────
+function listFrameworks() {
+    const frameworks = getSupportedFrameworks();
+    console.log("");
+    console.log("  Supported Regulatory Frameworks:");
+    console.log("  " + "─".repeat(60));
+    console.log("  Use these IDs in .judgesrc → regulatoryScope: [...]");
+    console.log("");
+    for (const fw of frameworks) {
+        console.log(`  ${fw.id.padEnd(15)} ${fw.description}`);
+    }
+    console.log("");
+    console.log(`  Total: ${frameworks.length} frameworks`);
+    console.log("");
+    console.log("  Example .judgesrc:");
+    console.log('  { "regulatoryScope": ["GDPR", "PCI-DSS", "SOC2"] }');
+    console.log("");
+}
 // ─── Version ────────────────────────────────────────────────────────────────
 function getPackageVersion() {
     try {
@@ -681,7 +702,12 @@ export async function runCli(argv) {
     }
     // ─── List Command ────────────────────────────────────────────────────
     if (args.command === "list") {
-        listJudges();
+        if (argv.includes("--frameworks")) {
+            listFrameworks();
+        }
+        else {
+            listJudges();
+        }
         process.exit(0);
     }
     // ─── Eval Command ────────────────────────────────────────────────────

package/dist/evaluators/index.js CHANGED Viewed

@@ -6,6 +6,7 @@ import { analyzeStructure } from "../ast/index.js";
 import { analyzeTaintFlows } from "../ast/index.js";
 import { LRUCache, contentHash } from "../cache.js";
 import { getSharedDiskCache } from "../disk-cache.js";
+import { filterByRegulatoryScope } from "../regulatory-scope.js";
 // ─── Shared Utilities ────────────────────────────────────────────────────────
 import { calculateScore, deriveVerdict, buildSummary, buildTribunalSummary, formatVerdictAsMarkdown, formatEvaluationAsMarkdown, classifyFile, shouldRunAbsenceRules, applyConfig, applyFrameworkAwareness, } from "./shared.js";
 // ─── Extracted Modules ───────────────────────────────────────────────────────
@@ -414,6 +415,137 @@ function synthesizeReviewDecision(findings) {
         blockingIssues,
     };
 }
+// ─── Human Focus Guide ────────────────────────────────────────────────────────
+/**
+ * Synthesize a Human Focus Guide from tribunal findings.
+ *
+ * Categorizes findings into three buckets:
+ * - **Trust**: High-confidence, evidence-backed findings (confidence ≥ 0.8)
+ * - **Verify**: Lower-confidence or absence-based findings (confidence < 0.8 or absence-based)
+ * - **Blind spots**: Areas automated analysis cannot evaluate (business logic, architecture, UX judgment)
+ *
+ * Also detects code characteristics that suggest human attention is needed.
+ */
+function synthesizeHumanFocusGuide(findings, code, language) {
+    const trust = [];
+    const verify = [];
+    for (const f of findings) {
+        const conf = f.confidence ?? 0.7;
+        const item = {
+            ruleId: f.ruleId,
+            title: f.title,
+            severity: f.severity,
+            confidence: conf,
+            lineNumbers: f.lineNumbers,
+            reason: "",
+        };
+        if (f.isAbsenceBased) {
+            item.reason = "Absence-based — the detected issue may be handled in another file";
+            verify.push(item);
+        }
+        else if (conf >= 0.8 && (f.provenance === "ast-confirmed" || f.provenance === "taint-flow")) {
+            item.reason = "AST/taint-flow confirmed with high confidence";
+            trust.push(item);
+        }
+        else if (conf >= 0.8) {
+            item.reason = "High confidence with concrete evidence";
+            trust.push(item);
+        }
+        else if (conf >= 0.5) {
+            item.reason = `Moderate confidence (${Math.round(conf * 100)}%) — verify manually`;
+            verify.push(item);
+        }
+        else {
+            item.reason = `Low confidence (${Math.round(conf * 100)}%) — may be a false positive`;
+            verify.push(item);
+        }
+    }
+    // ── Blind spots: areas automated analysis cannot evaluate ──
+    const blindSpots = [];
+    // Always include core blind spots
+    blindSpots.push({
+        area: "Business Logic Correctness",
+        guidance: "Verify that the code implements the intended requirements correctly. Automated analysis checks for patterns and vulnerabilities but cannot validate business rules, domain constraints, or functional correctness.",
+    });
+    // Code-characteristic-based blind spots
+    if (code) {
+        const lines = code.split("\n");
+        const lineCount = lines.length;
+        // Complex branching
+        const branchCount = (code.match(/\bif\b|\belse\b|\bswitch\b|\bcase\b|\?\s*:/g) || []).length;
+        if (branchCount > lineCount * 0.15 && branchCount > 10) {
+            blindSpots.push({
+                area: "Complex Control Flow",
+                guidance: `This code has dense branching logic (~${branchCount} branch points). Review edge cases, boundary conditions, and off-by-one errors that pattern matching cannot reliably detect.`,
+            });
+        }
+        // External API/service calls
+        const hasExternalCalls = /fetch\(|axios\.|http\.|https\.|\.request\(|urllib|requests\.|HttpClient|WebClient/i.test(code);
+        if (hasExternalCalls) {
+            blindSpots.push({
+                area: "External Service Integration",
+                guidance: "This code calls external services. Verify timeout behavior, retry logic, circuit breaking, and graceful degradation when services are unavailable. Automated analysis can detect missing patterns but cannot validate the integration logic.",
+            });
+        }
+        // Financial/monetary operations
+        const hasFinancial = /price|amount|balance|payment|invoice|refund|discount|tax|currency|decimal|money/i.test(code);
+        if (hasFinancial) {
+            blindSpots.push({
+                area: "Financial/Monetary Calculations",
+                guidance: "This code handles monetary values. Verify rounding behavior, currency precision, and that floating-point arithmetic is not used for financial calculations.",
+            });
+        }
+        // Complex regex
+        const complexRegex = (code.match(/\/[^/\n]{30,}\//g) || []).length;
+        if (complexRegex > 0) {
+            blindSpots.push({
+                area: "Complex Regular Expressions",
+                guidance: `Found ${complexRegex} complex regex pattern(s). Verify they match the intended inputs and don't have catastrophic backtracking on adversarial input.`,
+            });
+        }
+        // State machines / workflow
+        const hasStateMachine = /state\s*[=:]\s*['"][^'"]+['"]|status\s*===?\s*['"]|transition|workflow|step.*next/i.test(code);
+        if (hasStateMachine) {
+            blindSpots.push({
+                area: "State Management / Workflow Logic",
+                guidance: "This code manages state transitions or workflow steps. Verify that all valid state transitions are handled and invalid transitions are rejected. Automated analysis cannot validate state machine correctness.",
+            });
+        }
+        // PII/sensitive data handling
+        const hasPII = /\b(email|ssn|social.security|phone.number|address|birth.date|passport|national.id|credit.card)\b/i.test(code);
+        if (hasPII) {
+            blindSpots.push({
+                area: "PII / Sensitive Data Handling",
+                guidance: "This code handles personally identifiable information. Verify data minimization, consent tracking, retention policies, and that PII is not logged or transmitted unnecessarily.",
+            });
+        }
+    }
+    // Architecture blind spot (always relevant for non-trivial code)
+    if (code && code.split("\n").length > 50) {
+        blindSpots.push({
+            area: "Architectural Fit",
+            guidance: "Verify this code fits the project's architectural patterns (service boundaries, dependency direction, naming conventions). Automated analysis evaluates code in isolation and cannot assess architectural context.",
+        });
+    }
+    // ── Build summary ──
+    const trustCount = trust.length;
+    const verifyCount = verify.length;
+    const blindCount = blindSpots.length;
+    const parts = [];
+    if (trustCount > 0) {
+        parts.push(`${trustCount} high-confidence finding${trustCount > 1 ? "s" : ""} you can act on directly`);
+    }
+    if (verifyCount > 0) {
+        parts.push(`${verifyCount} finding${verifyCount > 1 ? "s" : ""} that need your judgment`);
+    }
+    if (blindCount > 0) {
+        parts.push(`${blindCount} area${blindCount > 1 ? "s" : ""} that automated analysis cannot evaluate`);
+    }
+    const summary = parts.length > 0
+        ? `Human reviewer: ${parts.join(", ")}. Focus your review time on the "Verify" and "Blind Spots" sections — the "Trust" findings have strong automated evidence.`
+        : "No findings — code looks clean. Focus your review on business logic correctness and architectural fit.";
+    return { trust, verify, blindSpots, summary };
+}
 /**
  * Cap the number of findings by priority-sorting and keeping only
  * the top N.  Ensures high-severity / high-confidence findings always survive.
@@ -571,6 +703,16 @@ export function evaluateWithTribunal(code, language, context, options) {
             }
         }
     }
+    // ── Regulatory scope filtering ──
+    // When regulatoryScope is set in config, suppress findings that cite ONLY
+    // out-of-scope regulatory frameworks.
+    let regulatorySuppressed = 0;
+    if (options?.config?.regulatoryScope && options.config.regulatoryScope.length > 0) {
+        const scopeResult = filterByRegulatoryScope(configFiltered, options.config.regulatoryScope);
+        configFiltered.length = 0;
+        configFiltered.push(...scopeResult.findings);
+        regulatorySuppressed = scopeResult.suppressed;
+    }
     // ── Feedback-driven confidence calibration & auto-tuning ──
     // When options.calibrate is set, load the feedback store and apply:
     // 1. Auto-suppression of rules with FP rate ≥ 80%
@@ -709,8 +851,27 @@ export function evaluateWithTribunal(code, language, context, options) {
             ...(owaspLlmTop10 ? { owaspLlmTop10 } : {}),
         };
     });
+    // ── Consensus-based suppression ──
+    // When consensusThreshold is set: if a supermajority of judges reported
+    // zero findings, suppress findings from the minority outlier judges.
+    // This catches cases where most judges agree code is clean but a few
+    // structurally over-flag (e.g. error-handling, testing).
+    let consensusSuppressed = 0;
+    let postConsensuFindings = allFindings;
+    const consensusThreshold = options?.config?.consensusThreshold;
+    if (consensusThreshold !== undefined && consensusThreshold > 0 && evaluations.length > 0) {
+        const zeroFindingJudges = evaluations.filter((e) => e.findings.length === 0).length;
+        const totalJudges = evaluations.length;
+        const cleanRatio = zeroFindingJudges / totalJudges;
+        if (cleanRatio >= consensusThreshold) {
+            // Majority says clean — suppress minority findings (keep critical severity)
+            const before = postConsensuFindings.length;
+            postConsensuFindings = postConsensuFindings.filter((f) => f.severity === "critical");
+            consensusSuppressed = before - postConsensuFindings.length;
+        }
+    }
     // ── Structured CWE/OWASP IDs and Learn More URLs ──
-    const enrichedFindings = enrichWithSecurityIds(allFindings);
+    const enrichedFindings = enrichWithSecurityIds(postConsensuFindings);
     const mustFixGate = evaluateMustFixGate(enrichedFindings, options?.mustFixGate);
     const criticalCount = enrichedFindings.filter((f) => f.severity === "critical").length;
     const highCount = enrichedFindings.filter((f) => f.severity === "high").length;
@@ -741,6 +902,7 @@ export function evaluateWithTribunal(code, language, context, options) {
             })),
         },
         reviewDecision: synthesizeReviewDecision(enrichedFindings),
+        humanFocusGuide: synthesizeHumanFocusGuide(enrichedFindings, code, language),
     };
     // ── Deep review prompt attachment (P0.1) ──
     // When deepReview is enabled, build and attach a structured LLM prompt

package/dist/evaluators/shared.js CHANGED Viewed

@@ -1036,6 +1036,39 @@ export function formatVerdictAsMarkdown(verdict) {
             md += `---\n\n`;
         }
     }
+    // Human Focus Guide
+    if (verdict.humanFocusGuide) {
+        const guide = verdict.humanFocusGuide;
+        md += `## 👤 Human Reviewer Focus Guide\n\n`;
+        md += `${guide.summary}\n\n`;
+        if (guide.trust.length > 0) {
+            md += `### ✅ Trust (act on these directly)\n\n`;
+            md += `| Severity | Rule | Finding | Reason |\n|---|---|---|---|\n`;
+            for (const item of guide.trust.slice(0, 15)) {
+                md += `| ${item.severity} | \`${item.ruleId}\` | ${item.title} | ${item.reason} |\n`;
+            }
+            if (guide.trust.length > 15)
+                md += `\n*...and ${guide.trust.length - 15} more*\n`;
+            md += `\n`;
+        }
+        if (guide.verify.length > 0) {
+            md += `### 🔍 Verify (use your judgment)\n\n`;
+            md += `| Severity | Rule | Finding | Reason |\n|---|---|---|---|\n`;
+            for (const item of guide.verify.slice(0, 15)) {
+                md += `| ${item.severity} | \`${item.ruleId}\` | ${item.title} | ${item.reason} |\n`;
+            }
+            if (guide.verify.length > 15)
+                md += `\n*...and ${guide.verify.length - 15} more*\n`;
+            md += `\n`;
+        }
+        if (guide.blindSpots.length > 0) {
+            md += `### 🔦 Blind Spots (automated analysis cannot evaluate)\n\n`;
+            for (const spot of guide.blindSpots) {
+                md += `- **${spot.area}** — ${spot.guidance}\n`;
+            }
+            md += `\n`;
+        }
+    }
     return md;
 }
 // ─── Shared Credential / Placeholder Detection ──────────────────────────────

package/dist/regulatory-scope.d.ts ADDED Viewed

@@ -0,0 +1,27 @@
+/**
+ * Regulatory Scope — Framework-aware finding filtering.
+ *
+ * When `regulatoryScope` is set in `.judgesrc`, findings whose `reference`
+ * field cites ONLY out-of-scope frameworks are suppressed. Findings that
+ * cite at least one in-scope framework (or have no regulatory reference)
+ * are kept.
+ */
+import type { Finding } from "./types.js";
+/** Look up supported framework IDs for listing/validation. */
+export declare function getSupportedFrameworks(): Array<{
+    id: string;
+    description: string;
+}>;
+/**
+ * Filter findings based on `regulatoryScope`. Findings that cite ONLY
+ * out-of-scope frameworks are suppressed. Findings with no regulatory
+ * reference or with at least one in-scope framework are kept.
+ *
+ * @param findings - All findings from the tribunal
+ * @param scope - Array of framework IDs (e.g. ["GDPR", "PCI-DSS"])
+ * @returns Object with kept findings and count of suppressed findings
+ */
+export declare function filterByRegulatoryScope(findings: Finding[], scope: string[]): {
+    findings: Finding[];
+    suppressed: number;
+};

package/dist/regulatory-scope.js ADDED Viewed

@@ -0,0 +1,181 @@
+/**
+ * Regulatory Scope — Framework-aware finding filtering.
+ *
+ * When `regulatoryScope` is set in `.judgesrc`, findings whose `reference`
+ * field cites ONLY out-of-scope frameworks are suppressed. Findings that
+ * cite at least one in-scope framework (or have no regulatory reference)
+ * are kept.
+ */
+const FRAMEWORKS = [
+    {
+        id: "GDPR",
+        aliases: [
+            "gdpr",
+            "general data protection",
+            "article 5",
+            "article 6",
+            "article 8",
+            "article 17",
+            "article 22",
+            "article 32",
+            "chapter v",
+            "data protection regulation",
+        ],
+        description: "EU General Data Protection Regulation",
+    },
+    {
+        id: "CCPA",
+        aliases: ["ccpa", "california consumer privacy", "cpra", "right to delete"],
+        description: "California Consumer Privacy Act",
+    },
+    {
+        id: "HIPAA",
+        aliases: [
+            "hipaa",
+            "health insurance portability",
+            "phi",
+            "protected health information",
+            "45 cfr",
+            "security rule",
+            "minimum necessary",
+        ],
+        description: "Health Insurance Portability and Accountability Act",
+    },
+    {
+        id: "PCI-DSS",
+        aliases: ["pci", "pci dss", "pci-dss", "payment card", "cardholder data", "requirement 3"],
+        description: "Payment Card Industry Data Security Standard",
+    },
+    {
+        id: "SOC2",
+        aliases: ["soc 2", "soc2", "trust service", "cc6", "cc7"],
+        description: "SOC 2 Trust Service Criteria",
+    },
+    {
+        id: "SOX",
+        aliases: ["sox", "sarbanes-oxley", "sarbanes oxley"],
+        description: "Sarbanes-Oxley Act",
+    },
+    {
+        id: "COPPA",
+        aliases: ["coppa", "children.*online privacy", "age appropriate design"],
+        description: "Children's Online Privacy Protection Act",
+    },
+    {
+        id: "FERPA",
+        aliases: ["ferpa", "family educational rights"],
+        description: "Family Educational Rights and Privacy Act",
+    },
+    {
+        id: "FedRAMP",
+        aliases: ["fedramp", "fed ramp", "federal risk"],
+        description: "Federal Risk and Authorization Management Program",
+    },
+    {
+        id: "NIST",
+        aliases: ["nist", "sp 800", "800-53", "800-63", "800-131", "800-122", "ssdf"],
+        description: "NIST Cybersecurity Framework & Special Publications",
+    },
+    {
+        id: "ISO27001",
+        aliases: ["iso 27001", "iso27001", "iso/iec 27001"],
+        description: "ISO/IEC 27001 Information Security Management",
+    },
+    {
+        id: "ePrivacy",
+        aliases: ["eprivacy", "e-privacy", "cookie.*directive", "eprivacy directive"],
+        description: "EU ePrivacy Directive",
+    },
+    {
+        id: "DORA",
+        aliases: ["dora", "digital operational resilience"],
+        description: "Digital Operational Resilience Act",
+    },
+    {
+        id: "NIS2",
+        aliases: ["nis2", "nis 2", "network.*information.*security"],
+        description: "Network and Information Security Directive 2",
+    },
+    {
+        id: "EU-AI-Act",
+        aliases: ["eu ai act", "ai act", "artificial intelligence act"],
+        description: "EU Artificial Intelligence Act",
+    },
+    {
+        id: "LGPD",
+        aliases: ["lgpd", "lei geral.*prote"],
+        description: "Brazil General Data Protection Law",
+    },
+    {
+        id: "PIPEDA",
+        aliases: ["pipeda", "personal information protection.*electronic"],
+        description: "Canada Personal Information Protection and Electronic Documents Act",
+    },
+];
+/** Look up supported framework IDs for listing/validation. */
+export function getSupportedFrameworks() {
+    return FRAMEWORKS.map((f) => ({ id: f.id, description: f.description }));
+}
+// ─── Framework Detection in Finding References ──────────────────────────────
+/**
+ * Detect which regulatory frameworks a finding references.
+ * Checks the `reference` and `description` fields for framework aliases.
+ */
+function detectFrameworks(finding) {
+    const detected = new Set();
+    const text = `${finding.reference ?? ""} ${finding.description ?? ""}`.toLowerCase();
+    if (!text.trim())
+        return detected;
+    for (const fw of FRAMEWORKS) {
+        for (const alias of fw.aliases) {
+            if (text.includes(alias.toLowerCase())) {
+                detected.add(fw.id);
+                break;
+            }
+        }
+    }
+    return detected;
+}
+// ─── Regulatory Scope Filter ────────────────────────────────────────────────
+/**
+ * Filter findings based on `regulatoryScope`. Findings that cite ONLY
+ * out-of-scope frameworks are suppressed. Findings with no regulatory
+ * reference or with at least one in-scope framework are kept.
+ *
+ * @param findings - All findings from the tribunal
+ * @param scope - Array of framework IDs (e.g. ["GDPR", "PCI-DSS"])
+ * @returns Object with kept findings and count of suppressed findings
+ */
+export function filterByRegulatoryScope(findings, scope) {
+    if (!scope || scope.length === 0) {
+        return { findings, suppressed: 0 };
+    }
+    const scopeSet = new Set(scope.map((s) => s.toUpperCase()));
+    // Normalize framework IDs (e.g. "pci-dss" → "PCI-DSS")
+    const normalizedScope = new Set();
+    for (const id of scopeSet) {
+        const fw = FRAMEWORKS.find((f) => f.id.toUpperCase() === id);
+        if (fw)
+            normalizedScope.add(fw.id);
+    }
+    let suppressed = 0;
+    const kept = [];
+    for (const finding of findings) {
+        const cited = detectFrameworks(finding);
+        if (cited.size === 0) {
+            // No regulatory reference — keep (it's a general code quality finding)
+            kept.push(finding);
+        }
+        else {
+            // Has regulatory reference — keep only if at least one is in scope
+            const hasInScope = [...cited].some((id) => normalizedScope.has(id));
+            if (hasInScope) {
+                kept.push(finding);
+            }
+            else {
+                suppressed++;
+            }
+        }
+    }
+    return { findings: kept, suppressed };
+}

package/dist/tools/prompts.d.ts CHANGED Viewed

@@ -4,7 +4,7 @@ export declare const SHARED_ADVERSARIAL_MANDATE = "ADVERSARIAL MANDATE (applies
 /** Precision override — ensures evidence-based findings. */
 export declare const PRECISION_MANDATE = "PRECISION MANDATE (this section OVERRIDES the adversarial mandate whenever they conflict):\n- Every finding MUST cite specific code evidence: exact line numbers, API calls, variable names, or patterns. Findings without concrete evidence MUST be discarded \u2014 no exceptions.\n- Do NOT flag the absence of a feature or pattern unless you can identify the specific code location where it SHOULD have been implemented and explain WHY it is required for THIS code.\n- Speculative, hypothetical, or \"just in case\" findings erode developer trust. Only flag issues you are confident exist in the actual code.\n- Prefer fewer, high-confidence findings over many uncertain ones. Quality of findings matters more than quantity.\n- If the code is genuinely well-written with no real issues, reporting ZERO findings is the correct and expected behavior. Do not manufacture findings to avoid an empty report.\n- Clean, well-structured code exists. Acknowledge it by not forcing false issues.\n- RECOGNIZE SECURE PATTERNS: Code using established security libraries and patterns (e.g. helmet, bcrypt/argon2, parameterized queries, input validation, CSRF tokens, rate limiters, proper TLS) is correctly implementing security. Do NOT flag these as insufficient or suggest alternatives unless a concrete vulnerability exists.\n- SCOPE LIMITATION: Only evaluate code that is actually present. Do NOT flag missing features, tests, logging, documentation, error handling, or infrastructure that may exist in other files. Evaluate what IS provided, not what COULD be elsewhere.\n- CONFIDENCE THRESHOLD: Only report findings where you are highly confident (\u226580%) that a real, exploitable issue or concrete deficiency exists in the provided code. When in doubt, do NOT report.\n- FALSE POSITIVE COST: A false positive is MORE harmful than a missed finding. False positives erode developer trust and cause real issues to be ignored. When uncertain, silence is better than a questionable finding.\n\nCOMMON FALSE POSITIVE PATTERNS (do NOT report these):\n- ERR: Do not flag error handling as inadequate when try/catch blocks, validation, or error middleware are present. Missing error handling in a utility function that is clearly called within a guarded context is NOT a finding.\n- LOGIC: Do not flag logic issues for standard patterns (early returns, guard clauses, switch/case with default). Only flag logic errors when you can demonstrate a concrete input that produces an incorrect output.\n- MAINT: Do not flag maintainability concerns for code that follows the language's established idioms. Complexity or length alone is NOT a finding unless it introduces a concrete maintenance burden.\n- SEC: Do not flag security issues when established security libraries (helmet, cors, bcrypt, parameterized queries) are correctly used. \"Could be stronger\" is NOT a vulnerability.\n- STRUCT: Do not flag code structure preferences (file organization, naming conventions) unless they create a concrete deficiency like circular dependencies or unreachable code.";
 /** Clean code gate — explicit instructions when code quality is high. */
-export declare const CLEAN_CODE_GATE = "CLEAN CODE GATE (applies AFTER individual judge evaluation):\n- Before reporting findings, assess the OVERALL quality of the code. If the code follows established conventions, uses appropriate patterns, handles errors, and has no concrete vulnerabilities or deficiencies, the expected output is ZERO findings across ALL judges.\n- Do NOT report stylistic preferences, alternative approaches, or \"nice to have\" improvements as findings. These are opinions, not defects.\n- Do NOT report findings about missing functionality that is likely in other files (tests, configs, middleware, error handlers, logging setup).\n- Do NOT report theoretical risks that require assumptions about the runtime environment, deployment configuration, or code outside the provided snippet.\n- SELF-CHECK before finalizing: For each finding, ask \"Would a senior engineer reviewing this code in a PR agree this must be fixed before merging?\" If the answer is not a clear YES, discard the finding.\n- The goal is to match what a thoughtful, experienced human reviewer would flag \u2014 not to demonstrate comprehensive knowledge of every possible concern.";
+export declare const CLEAN_CODE_GATE = "CLEAN CODE GATE (applies AFTER individual judge evaluation):\n- Before reporting findings, assess the OVERALL quality of the code. If the code follows established conventions, uses appropriate patterns, handles errors, and has no concrete vulnerabilities or deficiencies, the expected output is ZERO findings across ALL judges.\n- Do NOT report stylistic preferences, alternative approaches, or \"nice to have\" improvements as findings. These are opinions, not defects.\n- Do NOT report findings about missing functionality that is likely in other files (tests, configs, middleware, error handlers, logging setup).\n- Do NOT report theoretical risks that require assumptions about the runtime environment, deployment configuration, or code outside the provided snippet.\n- SELF-CHECK before finalizing: For each finding, ask \"Would a senior engineer reviewing this code in a PR agree this must be fixed before merging?\" If the answer is not a clear YES, discard the finding.\n- The goal is to match what a thoughtful, experienced human reviewer would flag \u2014 not to demonstrate comprehensive knowledge of every possible concern.\n- SINGLE-FILE LIMITATION: You are reviewing a code snippet, not a complete project. Missing tests, missing docs, missing middleware, missing configs, missing CI/CD, missing logging setup \u2014 these are EXPECTED in a single-file review. Only flag what is WRONG in the code present, not what is ABSENT from the project.\n- FINAL GATE: If your evaluation produces findings for a code snippet that uses established libraries correctly, has proper error handling, follows language idioms, and contains no security vulnerabilities \u2014 your findings are almost certainly false positives. Discard them and report ZERO findings.";
 /**
  * Extract only the unique evaluation criteria from a judge's systemPrompt,
  * stripping the persona introduction line, the ADVERSARIAL MANDATE block,

package/dist/tools/prompts.js CHANGED Viewed

@@ -44,7 +44,9 @@ export const CLEAN_CODE_GATE = `CLEAN CODE GATE (applies AFTER individual judge
 - Do NOT report findings about missing functionality that is likely in other files (tests, configs, middleware, error handlers, logging setup).
 - Do NOT report theoretical risks that require assumptions about the runtime environment, deployment configuration, or code outside the provided snippet.
 - SELF-CHECK before finalizing: For each finding, ask "Would a senior engineer reviewing this code in a PR agree this must be fixed before merging?" If the answer is not a clear YES, discard the finding.
-- The goal is to match what a thoughtful, experienced human reviewer would flag — not to demonstrate comprehensive knowledge of every possible concern.`;
+- The goal is to match what a thoughtful, experienced human reviewer would flag — not to demonstrate comprehensive knowledge of every possible concern.
+- SINGLE-FILE LIMITATION: You are reviewing a code snippet, not a complete project. Missing tests, missing docs, missing middleware, missing configs, missing CI/CD, missing logging setup — these are EXPECTED in a single-file review. Only flag what is WRONG in the code present, not what is ABSENT from the project.
+- FINAL GATE: If your evaluation produces findings for a code snippet that uses established libraries correctly, has proper error handling, follows language idioms, and contains no security vulnerabilities — your findings are almost certainly false positives. Discard them and report ZERO findings.`;
 // ─── Criteria Extraction ─────────────────────────────────────────────────────
 /**
  * Extract only the unique evaluation criteria from a judge's systemPrompt,

package/dist/types.d.ts CHANGED Viewed

@@ -313,6 +313,45 @@ export interface JudgesConfig {
         url?: string;
         headers?: Record<string, string>;
     };
+    /**
+     * Regulatory frameworks in scope for this project. When set, findings that
+     * cite ONLY out-of-scope frameworks are suppressed, and in-scope findings
+     * are elevated to ensure visibility.
+     *
+     * If not set, all regulatory findings are reported (no filtering).
+     *
+     * Supported values: "GDPR", "CCPA", "HIPAA", "PCI-DSS", "SOC2", "SOX",
+     * "COPPA", "FERPA", "FedRAMP", "NIST", "ISO27001", "ePrivacy", "DORA",
+     * "NIS2", "EU-AI-Act", "LGPD", "PIPEDA"
+     *
+     * Example:
+     * ```json
+     * { "regulatoryScope": ["GDPR", "PCI-DSS", "SOC2"] }
+     * ```
+     */
+    regulatoryScope?: string[];
+    /**
+     * Consensus suppression threshold (0–1). When set, if at least this
+     * fraction of judges report zero findings for a file, findings from
+     * the remaining minority judges are suppressed as outliers.
+     *
+     * This reduces false positives from judges that are structurally prone
+     * to over-flagging clean code. A value of 0.7 means "if 70% of judges
+     * agree the code is clean, suppress the other 30%."
+     *
+     * Default: not set (no consensus suppression).
+     *
+     * Recommended values:
+     * - `0.7` — moderate: suppresses when most judges agree (good for CI)
+     * - `0.8` — conservative: only suppresses with strong consensus
+     * - `0.6` — aggressive: suppresses with slight majority
+     *
+     * Example:
+     * ```json
+     * { "consensusThreshold": 0.7 }
+     * ```
+     */
+    consensusThreshold?: number;
 }
 /**
  * A user-defined pattern-based rule for business logic validation.
@@ -613,6 +652,48 @@ export interface ReviewDecision {
     /** Top blocking issues (up to 3 critical/high findings) */
     blockingIssues: string[];
 }
+/**
+ * A finding categorized for the human focus guide.
+ */
+export interface FocusItem {
+    /** Rule ID (e.g. "SEC-001") */
+    ruleId: string;
+    /** Short title */
+    title: string;
+    /** Severity level */
+    severity: Severity;
+    /** Confidence score (0-1) */
+    confidence: number;
+    /** Line numbers if available */
+    lineNumbers?: number[];
+    /** Why this item is in its bucket */
+    reason: string;
+}
+/**
+ * An area the automated analysis could not evaluate — requires human judgment.
+ */
+export interface BlindSpot {
+    /** Category label (e.g. "Business Logic", "Architectural Fit") */
+    area: string;
+    /** Description of what the reviewer should look for */
+    guidance: string;
+    /** Optional: specific lines or patterns that triggered this recommendation */
+    triggers?: string[];
+}
+/**
+ * Human Focus Guide — directs human reviewers to the areas where their
+ * attention adds the most value beyond what automated analysis provides.
+ */
+export interface HumanFocusGuide {
+    /** High-confidence, evidence-backed findings the reviewer can trust */
+    trust: FocusItem[];
+    /** Lower-confidence or absence-based findings that need human verification */
+    verify: FocusItem[];
+    /** Areas the automated analysis cannot evaluate — human judgment required */
+    blindSpots: BlindSpot[];
+    /** One-paragraph summary for the reviewer */
+    summary: string;
+}
 /**
  * The combined result from the full tribunal panel.
  */
@@ -651,6 +732,12 @@ export interface TribunalVerdict {
      * act as a primary code reviewer rather than just a warning list.
      */
     reviewDecision?: ReviewDecision;
+    /**
+     * Human Focus Guide — directs human reviewers to the areas where their
+     * attention adds the most value beyond what automated analysis provides.
+     * Categorizes findings into trust/verify/blind-spots buckets.
+     */
+    humanFocusGuide?: HumanFocusGuide;
     /**
      * AI model detection escalation. Present when the model-fingerprint judge
      * detects AI-generated code patterns (MFPR-* rules). Downstream consumers

package/judgesrc.schema.json CHANGED Viewed

@@ -88,6 +88,20 @@
       "type": "array",
       "items": { "type": "string" },
       "description": "Plugin module specifiers (npm packages or relative file paths) that export custom JudgeDefinition arrays. Each module must export { judges: JudgeDefinition[] } or a default export."
+    },
+    "regulatoryScope": {
+      "type": "array",
+      "items": {
+        "type": "string",
+        "enum": ["GDPR", "CCPA", "HIPAA", "PCI-DSS", "SOC2", "SOX", "COPPA", "FERPA", "FedRAMP", "NIST", "ISO27001", "ePrivacy", "DORA", "NIS2", "EU-AI-Act", "LGPD", "PIPEDA"]
+      },
+      "description": "Regulatory frameworks in scope for this project. Findings citing ONLY out-of-scope frameworks are suppressed. If omitted, all regulatory findings are reported."
+    },
+    "consensusThreshold": {
+      "type": "number",
+      "minimum": 0,
+      "maximum": 1,
+      "description": "Consensus suppression threshold (0-1). If this fraction of judges report zero findings, minority findings are suppressed. Recommended: 0.7 (moderate), 0.8 (conservative). If omitted, no consensus suppression is applied."
     }
   },
   "additionalProperties": false

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@kevinrabun/judges",
-  "version": "3.125.0",
+  "version": "3.126.1",
   "description": "45 specialized judges that evaluate AI-generated code for security, cost, and quality.",
   "mcpName": "io.github.KevinRabun/judges",
   "type": "module",
@@ -145,7 +145,7 @@
     "zod": "^4.3.6"
   },
   "devDependencies": {
-    "@anthropic-ai/sdk": "^0.80.0",
+    "@anthropic-ai/sdk": "^0.81.0",
     "@eslint/js": "^10.0.1",
     "@types/node": "^25.3.0",
     "@typescript-eslint/eslint-plugin": "^8.56.1",

package/server.json CHANGED Viewed

@@ -16,12 +16,12 @@
       "mimeType": "image/png"
     }
   ],
-  "version": "3.125.0",
+  "version": "3.126.1",
   "packages": [
     {
       "registryType": "npm",
       "identifier": "@kevinrabun/judges",
-      "version": "3.125.0",
+      "version": "3.126.1",
       "transport": {
         "type": "stdio"
       }