npm - @kevinrabun/judges-cli - Versions diffs - 3.124.5 → 3.126.0 - Mend

@kevinrabun/judges-cli 3.124.5 → 3.126.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

package/agents/accessibility.judge.md +1 -1
package/agents/agent-instructions.judge.md +1 -1
package/agents/ai-code-safety.judge.md +10 -1
package/agents/api-design.judge.md +1 -1
package/agents/authentication.judge.md +1 -1
package/agents/backwards-compatibility.judge.md +1 -1
package/agents/caching.judge.md +1 -1
package/agents/ci-cd.judge.md +1 -1
package/agents/cloud-readiness.judge.md +1 -1
package/agents/code-structure.judge.md +1 -1
package/agents/compliance.judge.md +1 -1
package/agents/concurrency.judge.md +1 -1
package/agents/configuration-management.judge.md +1 -1
package/agents/cost-effectiveness.judge.md +9 -1
package/agents/cybersecurity.judge.md +1 -1
package/agents/data-security.judge.md +1 -1
package/agents/data-sovereignty.judge.md +1 -1
package/agents/database.judge.md +1 -1
package/agents/dependency-health.judge.md +1 -1
package/agents/documentation.judge.md +1 -1
package/agents/error-handling.judge.md +1 -1
package/agents/ethics-bias.judge.md +1 -1
package/agents/framework-safety.judge.md +9 -1
package/agents/hallucination-detection.judge.md +1 -1
package/agents/iac-security.judge.md +1 -1
package/agents/intent-alignment.judge.md +1 -1
package/agents/internationalization.judge.md +1 -1
package/agents/logging-privacy.judge.md +1 -1
package/agents/logic-review.judge.md +8 -0
package/agents/maintainability.judge.md +10 -1
package/agents/observability.judge.md +1 -1
package/agents/performance.judge.md +1 -1
package/agents/portability.judge.md +1 -1
package/agents/rate-limiting.judge.md +1 -1
package/agents/reliability.judge.md +1 -1
package/agents/scalability.judge.md +1 -1
package/agents/security.judge.md +1 -1
package/agents/software-practices.judge.md +1 -1
package/agents/testing.judge.md +1 -1
package/agents/ux.judge.md +1 -1
package/dist/api.d.ts +2 -1
package/dist/api.js +2 -0
package/dist/cli-formatters.js +38 -0
package/dist/cli.js +27 -1
package/dist/commands/llm-benchmark.js +18 -5
package/dist/evaluators/index.js +163 -1
package/dist/evaluators/shared.js +33 -0
package/dist/judges/accessibility.js +1 -1
package/dist/judges/agent-instructions.js +1 -1
package/dist/judges/ai-code-safety.js +10 -1
package/dist/judges/api-design.js +1 -1
package/dist/judges/authentication.js +1 -1
package/dist/judges/backwards-compatibility.js +1 -1
package/dist/judges/caching.js +1 -1
package/dist/judges/ci-cd.js +1 -1
package/dist/judges/cloud-readiness.js +1 -1
package/dist/judges/code-structure.js +1 -1
package/dist/judges/compliance.js +1 -1
package/dist/judges/concurrency.js +1 -1
package/dist/judges/configuration-management.js +1 -1
package/dist/judges/cost-effectiveness.js +9 -1
package/dist/judges/cybersecurity.js +1 -1
package/dist/judges/data-security.js +1 -1
package/dist/judges/data-sovereignty.js +1 -1
package/dist/judges/database.js +1 -1
package/dist/judges/dependency-health.js +1 -1
package/dist/judges/documentation.js +1 -1
package/dist/judges/error-handling.js +1 -1
package/dist/judges/ethics-bias.js +1 -1
package/dist/judges/framework-safety.js +9 -1
package/dist/judges/hallucination-detection.js +1 -1
package/dist/judges/iac-security.js +1 -1
package/dist/judges/intent-alignment.js +1 -1
package/dist/judges/internationalization.js +1 -1
package/dist/judges/logging-privacy.js +1 -1
package/dist/judges/logic-review.js +9 -1
package/dist/judges/maintainability.js +10 -1
package/dist/judges/observability.js +1 -1
package/dist/judges/performance.js +1 -1
package/dist/judges/portability.js +1 -1
package/dist/judges/rate-limiting.js +1 -1
package/dist/judges/reliability.js +1 -1
package/dist/judges/scalability.js +1 -1
package/dist/judges/security.js +1 -1
package/dist/judges/software-practices.js +1 -1
package/dist/judges/testing.js +1 -1
package/dist/judges/ux.js +1 -1
package/dist/regulatory-scope.d.ts +27 -0
package/dist/regulatory-scope.js +181 -0
package/dist/tools/prompts.d.ts +1 -1
package/dist/tools/prompts.js +3 -1
package/dist/types.d.ts +87 -0
package/package.json +1 -1

package/dist/judges/maintainability.js CHANGED Viewed

@@ -36,11 +36,20 @@ FALSE POSITIVE AVOIDANCE:
 - Do NOT flag configuration files, data files, or build scripts for code maintainability issues.
 - Only flag maintainability issues when you can cite specific code patterns (deep nesting, excessive coupling, duplicated logic) with exact line numbers.
+CLEAN CODE RECOGNITION (if ALL of the following are true, report ZERO findings):
+- Functions/methods have clear single responsibilities and reasonable length
+- Naming is consistent and self-documenting
+- No deep nesting (>3 levels) or excessive cyclomatic complexity
+- No copy-pasted logic blocks
+- No magic numbers in business logic (configuration constants are fine)
+- Standard library and framework patterns used idiomatically
+- Code reads top-to-bottom without requiring cross-referencing
 ADVERSARIAL MANDATE:
 - Your role is adversarial: assume the code is unmaintainable and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
 - Never praise or compliment the code. Report only problems, risks, and deficiencies.
 - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
-- Absence of findings does not mean the code is maintainable. It means your analysis reached its limits. State this explicitly.`,
+- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
     analyze: analyzeMaintainability,
 };
 defaultRegistry.register(maintainabilityJudge);

package/dist/judges/observability.js CHANGED Viewed

@@ -48,7 +48,7 @@ ADVERSARIAL MANDATE:
 - Your role is adversarial: assume the code is unobservable and will be impossible to debug in production. Actively hunt for monitoring gaps. Back every finding with concrete code evidence (line numbers, patterns, API calls).
 - Never praise or compliment the code. Report only problems, risks, and deficiencies.
 - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
-- Absence of findings does not mean the code is observable. It means your analysis reached its limits. State this explicitly.`,
+- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
     analyze: analyzeObservability,
 };
 defaultRegistry.register(observabilityJudge);

package/dist/judges/performance.js CHANGED Viewed

@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
 - Your role is adversarial: assume the code has performance problems and actively hunt for bottlenecks. Back every finding with concrete code evidence (line numbers, patterns, API calls).
 - Never praise or compliment the code. Report only problems, risks, and deficiencies.
 - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
-- Absence of findings does not mean the code is performant. It means your analysis reached its limits. State this explicitly.`,
+- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
     analyze: analyzePerformance,
 };
 defaultRegistry.register(performanceJudge);

package/dist/judges/portability.js CHANGED Viewed

@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
 - Your role is adversarial: assume the code is not portable and actively hunt for platform dependencies. Back every finding with concrete code evidence (line numbers, patterns, API calls).
 - Never praise or compliment the code. Report only problems, risks, and deficiencies.
 - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
-- Absence of findings does not mean the code is portable. It means your analysis reached its limits. State this explicitly.`,
+- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
     analyze: analyzePortability,
 };
 defaultRegistry.register(portabilityJudge);

package/dist/judges/rate-limiting.js CHANGED Viewed

@@ -49,7 +49,7 @@ ADVERSARIAL MANDATE:
 - Your role is adversarial: assume rate limiting is absent or insufficient and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
 - Never praise or compliment the code. Report only problems, risks, and deficiencies.
 - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
-- Absence of findings does not mean rate limiting is adequate. It means your analysis reached its limits. State this explicitly.`,
+- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
     analyze: analyzeRateLimiting,
 };
 defaultRegistry.register(rateLimitingJudge);

package/dist/judges/reliability.js CHANGED Viewed

@@ -51,7 +51,7 @@ ADVERSARIAL MANDATE:
 - Your role is adversarial: assume the code will fail in production and actively hunt for reliability gaps. Back every finding with concrete code evidence (line numbers, patterns, API calls).
 - Never praise or compliment the code. Report only problems, risks, and deficiencies.
 - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
-- Absence of findings does not mean the code is reliable. It means your analysis reached its limits. State this explicitly.`,
+- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
     analyze: analyzeReliability,
 };
 defaultRegistry.register(reliabilityJudge);

package/dist/judges/scalability.js CHANGED Viewed

@@ -46,7 +46,7 @@ ADVERSARIAL MANDATE:
 - Your role is adversarial: assume the code will not scale and actively hunt for bottlenecks. Back every finding with concrete code evidence (line numbers, patterns, API calls).
 - Never praise or compliment the code. Report only problems, risks, and deficiencies.
 - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
-- Absence of findings does not mean the code will scale. It means your analysis reached its limits. State this explicitly.`,
+- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
     analyze: analyzeScalability,
 };
 defaultRegistry.register(scalabilityJudge);

package/dist/judges/security.js CHANGED Viewed

@@ -58,7 +58,7 @@ ADVERSARIAL MANDATE:
 - Your role is adversarial: assume the code has security vulnerabilities and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
 - Never praise or compliment the code. Report only problems, risks, and deficiencies.
 - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
-- Absence of findings does not mean the code is secure. It means your analysis reached its limits. State this explicitly.`,
+- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
     analyze: analyzeSecurity,
 };
 defaultRegistry.register(securityJudge);

package/dist/judges/software-practices.js CHANGED Viewed

@@ -50,7 +50,7 @@ ADVERSARIAL MANDATE:
 - Your role is adversarial: assume the code has engineering quality problems and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
 - Never praise or compliment the code. Report only problems, risks, and deficiencies.
 - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
-- Absence of findings does not mean the code follows best practices. It means your analysis reached its limits. State this explicitly.`,
+- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
     analyze: analyzeSoftwarePractices,
 };
 defaultRegistry.register(softwarePracticesJudge);

package/dist/judges/testing.js CHANGED Viewed

@@ -48,7 +48,7 @@ ADVERSARIAL MANDATE:
 - Your role is adversarial: assume the test coverage is insufficient and actively hunt for gaps. Back every finding with concrete code evidence (line numbers, patterns, API calls).
 - Never praise or compliment the code. Report only problems, risks, and deficiencies.
 - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
-- Absence of findings does not mean the code is well-tested. It means your analysis reached its limits. State this explicitly.`,
+- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
     analyze: analyzeTesting,
 };
 defaultRegistry.register(testingJudge);

package/dist/judges/ux.js CHANGED Viewed

@@ -40,7 +40,7 @@ ADVERSARIAL MANDATE:
 - Your role is adversarial: assume the user experience is poor and actively hunt for problems. Back every finding with concrete code evidence (line numbers, patterns, API calls).
 - Never praise or compliment the code. Report only problems, risks, and deficiencies.
 - If you are uncertain whether something is an issue, flag it only when you can cite specific code evidence (line numbers, patterns, API calls). Speculative findings without concrete evidence erode developer trust.
-- Absence of findings does not mean the UX is good. It means your analysis reached its limits. State this explicitly.`,
+- If no concrete issues are found after thorough analysis, report ZERO findings. An empty findings list is the correct output for well-written code \u2014 do not manufacture findings to fill the report.`,
     analyze: analyzeUx,
 };
 defaultRegistry.register(uxJudge);

package/dist/regulatory-scope.d.ts ADDED Viewed

@@ -0,0 +1,27 @@
+/**
+ * Regulatory Scope — Framework-aware finding filtering.
+ *
+ * When `regulatoryScope` is set in `.judgesrc`, findings whose `reference`
+ * field cites ONLY out-of-scope frameworks are suppressed. Findings that
+ * cite at least one in-scope framework (or have no regulatory reference)
+ * are kept.
+ */
+import type { Finding } from "./types.js";
+/** Look up supported framework IDs for listing/validation. */
+export declare function getSupportedFrameworks(): Array<{
+    id: string;
+    description: string;
+}>;
+/**
+ * Filter findings based on `regulatoryScope`. Findings that cite ONLY
+ * out-of-scope frameworks are suppressed. Findings with no regulatory
+ * reference or with at least one in-scope framework are kept.
+ *
+ * @param findings - All findings from the tribunal
+ * @param scope - Array of framework IDs (e.g. ["GDPR", "PCI-DSS"])
+ * @returns Object with kept findings and count of suppressed findings
+ */
+export declare function filterByRegulatoryScope(findings: Finding[], scope: string[]): {
+    findings: Finding[];
+    suppressed: number;
+};

package/dist/regulatory-scope.js ADDED Viewed

@@ -0,0 +1,181 @@
+/**
+ * Regulatory Scope — Framework-aware finding filtering.
+ *
+ * When `regulatoryScope` is set in `.judgesrc`, findings whose `reference`
+ * field cites ONLY out-of-scope frameworks are suppressed. Findings that
+ * cite at least one in-scope framework (or have no regulatory reference)
+ * are kept.
+ */
+const FRAMEWORKS = [
+    {
+        id: "GDPR",
+        aliases: [
+            "gdpr",
+            "general data protection",
+            "article 5",
+            "article 6",
+            "article 8",
+            "article 17",
+            "article 22",
+            "article 32",
+            "chapter v",
+            "data protection regulation",
+        ],
+        description: "EU General Data Protection Regulation",
+    },
+    {
+        id: "CCPA",
+        aliases: ["ccpa", "california consumer privacy", "cpra", "right to delete"],
+        description: "California Consumer Privacy Act",
+    },
+    {
+        id: "HIPAA",
+        aliases: [
+            "hipaa",
+            "health insurance portability",
+            "phi",
+            "protected health information",
+            "45 cfr",
+            "security rule",
+            "minimum necessary",
+        ],
+        description: "Health Insurance Portability and Accountability Act",
+    },
+    {
+        id: "PCI-DSS",
+        aliases: ["pci", "pci dss", "pci-dss", "payment card", "cardholder data", "requirement 3"],
+        description: "Payment Card Industry Data Security Standard",
+    },
+    {
+        id: "SOC2",
+        aliases: ["soc 2", "soc2", "trust service", "cc6", "cc7"],
+        description: "SOC 2 Trust Service Criteria",
+    },
+    {
+        id: "SOX",
+        aliases: ["sox", "sarbanes-oxley", "sarbanes oxley"],
+        description: "Sarbanes-Oxley Act",
+    },
+    {
+        id: "COPPA",
+        aliases: ["coppa", "children.*online privacy", "age appropriate design"],
+        description: "Children's Online Privacy Protection Act",
+    },
+    {
+        id: "FERPA",
+        aliases: ["ferpa", "family educational rights"],
+        description: "Family Educational Rights and Privacy Act",
+    },
+    {
+        id: "FedRAMP",
+        aliases: ["fedramp", "fed ramp", "federal risk"],
+        description: "Federal Risk and Authorization Management Program",
+    },
+    {
+        id: "NIST",
+        aliases: ["nist", "sp 800", "800-53", "800-63", "800-131", "800-122", "ssdf"],
+        description: "NIST Cybersecurity Framework & Special Publications",
+    },
+    {
+        id: "ISO27001",
+        aliases: ["iso 27001", "iso27001", "iso/iec 27001"],
+        description: "ISO/IEC 27001 Information Security Management",
+    },
+    {
+        id: "ePrivacy",
+        aliases: ["eprivacy", "e-privacy", "cookie.*directive", "eprivacy directive"],
+        description: "EU ePrivacy Directive",
+    },
+    {
+        id: "DORA",
+        aliases: ["dora", "digital operational resilience"],
+        description: "Digital Operational Resilience Act",
+    },
+    {
+        id: "NIS2",
+        aliases: ["nis2", "nis 2", "network.*information.*security"],
+        description: "Network and Information Security Directive 2",
+    },
+    {
+        id: "EU-AI-Act",
+        aliases: ["eu ai act", "ai act", "artificial intelligence act"],
+        description: "EU Artificial Intelligence Act",
+    },
+    {
+        id: "LGPD",
+        aliases: ["lgpd", "lei geral.*prote"],
+        description: "Brazil General Data Protection Law",
+    },
+    {
+        id: "PIPEDA",
+        aliases: ["pipeda", "personal information protection.*electronic"],
+        description: "Canada Personal Information Protection and Electronic Documents Act",
+    },
+];
+/** Look up supported framework IDs for listing/validation. */
+export function getSupportedFrameworks() {
+    return FRAMEWORKS.map((f) => ({ id: f.id, description: f.description }));
+}
+// ─── Framework Detection in Finding References ──────────────────────────────
+/**
+ * Detect which regulatory frameworks a finding references.
+ * Checks the `reference` and `description` fields for framework aliases.
+ */
+function detectFrameworks(finding) {
+    const detected = new Set();
+    const text = `${finding.reference ?? ""} ${finding.description ?? ""}`.toLowerCase();
+    if (!text.trim())
+        return detected;
+    for (const fw of FRAMEWORKS) {
+        for (const alias of fw.aliases) {
+            if (text.includes(alias.toLowerCase())) {
+                detected.add(fw.id);
+                break;
+            }
+        }
+    }
+    return detected;
+}
+// ─── Regulatory Scope Filter ────────────────────────────────────────────────
+/**
+ * Filter findings based on `regulatoryScope`. Findings that cite ONLY
+ * out-of-scope frameworks are suppressed. Findings with no regulatory
+ * reference or with at least one in-scope framework are kept.
+ *
+ * @param findings - All findings from the tribunal
+ * @param scope - Array of framework IDs (e.g. ["GDPR", "PCI-DSS"])
+ * @returns Object with kept findings and count of suppressed findings
+ */
+export function filterByRegulatoryScope(findings, scope) {
+    if (!scope || scope.length === 0) {
+        return { findings, suppressed: 0 };
+    }
+    const scopeSet = new Set(scope.map((s) => s.toUpperCase()));
+    // Normalize framework IDs (e.g. "pci-dss" → "PCI-DSS")
+    const normalizedScope = new Set();
+    for (const id of scopeSet) {
+        const fw = FRAMEWORKS.find((f) => f.id.toUpperCase() === id);
+        if (fw)
+            normalizedScope.add(fw.id);
+    }
+    let suppressed = 0;
+    const kept = [];
+    for (const finding of findings) {
+        const cited = detectFrameworks(finding);
+        if (cited.size === 0) {
+            // No regulatory reference — keep (it's a general code quality finding)
+            kept.push(finding);
+        }
+        else {
+            // Has regulatory reference — keep only if at least one is in scope
+            const hasInScope = [...cited].some((id) => normalizedScope.has(id));
+            if (hasInScope) {
+                kept.push(finding);
+            }
+            else {
+                suppressed++;
+            }
+        }
+    }
+    return { findings: kept, suppressed };
+}

package/dist/tools/prompts.d.ts CHANGED Viewed

@@ -4,7 +4,7 @@ export declare const SHARED_ADVERSARIAL_MANDATE = "ADVERSARIAL MANDATE (applies
 /** Precision override — ensures evidence-based findings. */
 export declare const PRECISION_MANDATE = "PRECISION MANDATE (this section OVERRIDES the adversarial mandate whenever they conflict):\n- Every finding MUST cite specific code evidence: exact line numbers, API calls, variable names, or patterns. Findings without concrete evidence MUST be discarded \u2014 no exceptions.\n- Do NOT flag the absence of a feature or pattern unless you can identify the specific code location where it SHOULD have been implemented and explain WHY it is required for THIS code.\n- Speculative, hypothetical, or \"just in case\" findings erode developer trust. Only flag issues you are confident exist in the actual code.\n- Prefer fewer, high-confidence findings over many uncertain ones. Quality of findings matters more than quantity.\n- If the code is genuinely well-written with no real issues, reporting ZERO findings is the correct and expected behavior. Do not manufacture findings to avoid an empty report.\n- Clean, well-structured code exists. Acknowledge it by not forcing false issues.\n- RECOGNIZE SECURE PATTERNS: Code using established security libraries and patterns (e.g. helmet, bcrypt/argon2, parameterized queries, input validation, CSRF tokens, rate limiters, proper TLS) is correctly implementing security. Do NOT flag these as insufficient or suggest alternatives unless a concrete vulnerability exists.\n- SCOPE LIMITATION: Only evaluate code that is actually present. Do NOT flag missing features, tests, logging, documentation, error handling, or infrastructure that may exist in other files. Evaluate what IS provided, not what COULD be elsewhere.\n- CONFIDENCE THRESHOLD: Only report findings where you are highly confident (\u226580%) that a real, exploitable issue or concrete deficiency exists in the provided code. When in doubt, do NOT report.\n- FALSE POSITIVE COST: A false positive is MORE harmful than a missed finding. False positives erode developer trust and cause real issues to be ignored. When uncertain, silence is better than a questionable finding.\n\nCOMMON FALSE POSITIVE PATTERNS (do NOT report these):\n- ERR: Do not flag error handling as inadequate when try/catch blocks, validation, or error middleware are present. Missing error handling in a utility function that is clearly called within a guarded context is NOT a finding.\n- LOGIC: Do not flag logic issues for standard patterns (early returns, guard clauses, switch/case with default). Only flag logic errors when you can demonstrate a concrete input that produces an incorrect output.\n- MAINT: Do not flag maintainability concerns for code that follows the language's established idioms. Complexity or length alone is NOT a finding unless it introduces a concrete maintenance burden.\n- SEC: Do not flag security issues when established security libraries (helmet, cors, bcrypt, parameterized queries) are correctly used. \"Could be stronger\" is NOT a vulnerability.\n- STRUCT: Do not flag code structure preferences (file organization, naming conventions) unless they create a concrete deficiency like circular dependencies or unreachable code.";
 /** Clean code gate — explicit instructions when code quality is high. */
-export declare const CLEAN_CODE_GATE = "CLEAN CODE GATE (applies AFTER individual judge evaluation):\n- Before reporting findings, assess the OVERALL quality of the code. If the code follows established conventions, uses appropriate patterns, handles errors, and has no concrete vulnerabilities or deficiencies, the expected output is ZERO findings across ALL judges.\n- Do NOT report stylistic preferences, alternative approaches, or \"nice to have\" improvements as findings. These are opinions, not defects.\n- Do NOT report findings about missing functionality that is likely in other files (tests, configs, middleware, error handlers, logging setup).\n- Do NOT report theoretical risks that require assumptions about the runtime environment, deployment configuration, or code outside the provided snippet.\n- SELF-CHECK before finalizing: For each finding, ask \"Would a senior engineer reviewing this code in a PR agree this must be fixed before merging?\" If the answer is not a clear YES, discard the finding.\n- The goal is to match what a thoughtful, experienced human reviewer would flag \u2014 not to demonstrate comprehensive knowledge of every possible concern.";
+export declare const CLEAN_CODE_GATE = "CLEAN CODE GATE (applies AFTER individual judge evaluation):\n- Before reporting findings, assess the OVERALL quality of the code. If the code follows established conventions, uses appropriate patterns, handles errors, and has no concrete vulnerabilities or deficiencies, the expected output is ZERO findings across ALL judges.\n- Do NOT report stylistic preferences, alternative approaches, or \"nice to have\" improvements as findings. These are opinions, not defects.\n- Do NOT report findings about missing functionality that is likely in other files (tests, configs, middleware, error handlers, logging setup).\n- Do NOT report theoretical risks that require assumptions about the runtime environment, deployment configuration, or code outside the provided snippet.\n- SELF-CHECK before finalizing: For each finding, ask \"Would a senior engineer reviewing this code in a PR agree this must be fixed before merging?\" If the answer is not a clear YES, discard the finding.\n- The goal is to match what a thoughtful, experienced human reviewer would flag \u2014 not to demonstrate comprehensive knowledge of every possible concern.\n- SINGLE-FILE LIMITATION: You are reviewing a code snippet, not a complete project. Missing tests, missing docs, missing middleware, missing configs, missing CI/CD, missing logging setup \u2014 these are EXPECTED in a single-file review. Only flag what is WRONG in the code present, not what is ABSENT from the project.\n- FINAL GATE: If your evaluation produces findings for a code snippet that uses established libraries correctly, has proper error handling, follows language idioms, and contains no security vulnerabilities \u2014 your findings are almost certainly false positives. Discard them and report ZERO findings.";
 /**
  * Extract only the unique evaluation criteria from a judge's systemPrompt,
  * stripping the persona introduction line, the ADVERSARIAL MANDATE block,

package/dist/tools/prompts.js CHANGED Viewed

@@ -44,7 +44,9 @@ export const CLEAN_CODE_GATE = `CLEAN CODE GATE (applies AFTER individual judge
 - Do NOT report findings about missing functionality that is likely in other files (tests, configs, middleware, error handlers, logging setup).
 - Do NOT report theoretical risks that require assumptions about the runtime environment, deployment configuration, or code outside the provided snippet.
 - SELF-CHECK before finalizing: For each finding, ask "Would a senior engineer reviewing this code in a PR agree this must be fixed before merging?" If the answer is not a clear YES, discard the finding.
-- The goal is to match what a thoughtful, experienced human reviewer would flag — not to demonstrate comprehensive knowledge of every possible concern.`;
+- The goal is to match what a thoughtful, experienced human reviewer would flag — not to demonstrate comprehensive knowledge of every possible concern.
+- SINGLE-FILE LIMITATION: You are reviewing a code snippet, not a complete project. Missing tests, missing docs, missing middleware, missing configs, missing CI/CD, missing logging setup — these are EXPECTED in a single-file review. Only flag what is WRONG in the code present, not what is ABSENT from the project.
+- FINAL GATE: If your evaluation produces findings for a code snippet that uses established libraries correctly, has proper error handling, follows language idioms, and contains no security vulnerabilities — your findings are almost certainly false positives. Discard them and report ZERO findings.`;
 // ─── Criteria Extraction ─────────────────────────────────────────────────────
 /**
  * Extract only the unique evaluation criteria from a judge's systemPrompt,

package/dist/types.d.ts CHANGED Viewed

@@ -313,6 +313,45 @@ export interface JudgesConfig {
         url?: string;
         headers?: Record<string, string>;
     };
+    /**
+     * Regulatory frameworks in scope for this project. When set, findings that
+     * cite ONLY out-of-scope frameworks are suppressed, and in-scope findings
+     * are elevated to ensure visibility.
+     *
+     * If not set, all regulatory findings are reported (no filtering).
+     *
+     * Supported values: "GDPR", "CCPA", "HIPAA", "PCI-DSS", "SOC2", "SOX",
+     * "COPPA", "FERPA", "FedRAMP", "NIST", "ISO27001", "ePrivacy", "DORA",
+     * "NIS2", "EU-AI-Act", "LGPD", "PIPEDA"
+     *
+     * Example:
+     * ```json
+     * { "regulatoryScope": ["GDPR", "PCI-DSS", "SOC2"] }
+     * ```
+     */
+    regulatoryScope?: string[];
+    /**
+     * Consensus suppression threshold (0–1). When set, if at least this
+     * fraction of judges report zero findings for a file, findings from
+     * the remaining minority judges are suppressed as outliers.
+     *
+     * This reduces false positives from judges that are structurally prone
+     * to over-flagging clean code. A value of 0.7 means "if 70% of judges
+     * agree the code is clean, suppress the other 30%."
+     *
+     * Default: not set (no consensus suppression).
+     *
+     * Recommended values:
+     * - `0.7` — moderate: suppresses when most judges agree (good for CI)
+     * - `0.8` — conservative: only suppresses with strong consensus
+     * - `0.6` — aggressive: suppresses with slight majority
+     *
+     * Example:
+     * ```json
+     * { "consensusThreshold": 0.7 }
+     * ```
+     */
+    consensusThreshold?: number;
 }
 /**
  * A user-defined pattern-based rule for business logic validation.
@@ -613,6 +652,48 @@ export interface ReviewDecision {
     /** Top blocking issues (up to 3 critical/high findings) */
     blockingIssues: string[];
 }
+/**
+ * A finding categorized for the human focus guide.
+ */
+export interface FocusItem {
+    /** Rule ID (e.g. "SEC-001") */
+    ruleId: string;
+    /** Short title */
+    title: string;
+    /** Severity level */
+    severity: Severity;
+    /** Confidence score (0-1) */
+    confidence: number;
+    /** Line numbers if available */
+    lineNumbers?: number[];
+    /** Why this item is in its bucket */
+    reason: string;
+}
+/**
+ * An area the automated analysis could not evaluate — requires human judgment.
+ */
+export interface BlindSpot {
+    /** Category label (e.g. "Business Logic", "Architectural Fit") */
+    area: string;
+    /** Description of what the reviewer should look for */
+    guidance: string;
+    /** Optional: specific lines or patterns that triggered this recommendation */
+    triggers?: string[];
+}
+/**
+ * Human Focus Guide — directs human reviewers to the areas where their
+ * attention adds the most value beyond what automated analysis provides.
+ */
+export interface HumanFocusGuide {
+    /** High-confidence, evidence-backed findings the reviewer can trust */
+    trust: FocusItem[];
+    /** Lower-confidence or absence-based findings that need human verification */
+    verify: FocusItem[];
+    /** Areas the automated analysis cannot evaluate — human judgment required */
+    blindSpots: BlindSpot[];
+    /** One-paragraph summary for the reviewer */
+    summary: string;
+}
 /**
  * The combined result from the full tribunal panel.
  */
@@ -651,6 +732,12 @@ export interface TribunalVerdict {
      * act as a primary code reviewer rather than just a warning list.
      */
     reviewDecision?: ReviewDecision;
+    /**
+     * Human Focus Guide — directs human reviewers to the areas where their
+     * attention adds the most value beyond what automated analysis provides.
+     * Categorizes findings into trust/verify/blind-spots buckets.
+     */
+    humanFocusGuide?: HumanFocusGuide;
     /**
      * AI model detection escalation. Present when the model-fingerprint judge
      * detects AI-generated code patterns (MFPR-* rules). Downstream consumers

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@kevinrabun/judges-cli",
-  "version": "3.124.5",
+  "version": "3.126.0",
   "description": "CLI wrapper for the Judges code review toolkit.",
   "type": "module",
   "main": "dist/cli.js",