npm - @kevinrabun/judges - Versions diffs - 3.115.4 → 3.117.0 - Mend

@kevinrabun/judges 3.115.4 → 3.117.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

package/agents/accessibility.judge.md +7 -0
package/agents/agent-instructions.judge.md +7 -0
package/agents/ai-code-safety.judge.md +7 -0
package/agents/api-contract.judge.md +7 -0
package/agents/api-design.judge.md +7 -0
package/agents/authentication.judge.md +7 -0
package/agents/backwards-compatibility.judge.md +7 -0
package/agents/caching.judge.md +7 -0
package/agents/ci-cd.judge.md +7 -0
package/agents/cloud-readiness.judge.md +7 -0
package/agents/concurrency.judge.md +7 -0
package/agents/configuration-management.judge.md +7 -0
package/agents/cybersecurity.judge.md +7 -0
package/agents/data-security.judge.md +7 -0
package/agents/dependency-health.judge.md +7 -0
package/agents/documentation.judge.md +7 -0
package/agents/error-handling.judge.md +7 -0
package/agents/ethics-bias.judge.md +7 -0
package/agents/false-positive-review.judge.md +12 -0
package/agents/framework-safety.judge.md +7 -0
package/agents/hallucination-detection.judge.md +13 -0
package/agents/iac-security.judge.md +7 -0
package/agents/intent-alignment.judge.md +13 -0
package/agents/logging-privacy.judge.md +7 -0
package/agents/maintainability.judge.md +7 -0
package/agents/multi-turn-coherence.judge.md +7 -0
package/agents/observability.judge.md +7 -0
package/agents/portability.judge.md +7 -0
package/agents/rate-limiting.judge.md +7 -0
package/agents/reliability.judge.md +7 -0
package/agents/security.judge.md +13 -0
package/agents/testing.judge.md +7 -0
package/agents/ux.judge.md +7 -0
package/dist/a2a-protocol.d.ts +136 -0
package/dist/a2a-protocol.js +218 -0
package/dist/api.d.ts +21 -3
package/dist/api.js +21 -1
package/dist/audit-trail.d.ts +245 -0
package/dist/audit-trail.js +257 -0
package/dist/commands/benchmark-advanced.js +51 -51
package/dist/commands/benchmark-ai-agents.js +16 -16
package/dist/commands/benchmark-compliance-ethics.js +12 -12
package/dist/commands/benchmark-expanded-2.js +2 -2
package/dist/commands/benchmark-expanded.js +2 -2
package/dist/commands/benchmark-infrastructure.js +12 -12
package/dist/commands/benchmark-languages.js +11 -11
package/dist/commands/benchmark-quality-ops.js +7 -7
package/dist/commands/benchmark-security-deep.js +9 -9
package/dist/commands/benchmark.js +1 -1
package/dist/commands/llm-benchmark-optimizer.d.ts +78 -0
package/dist/commands/llm-benchmark-optimizer.js +241 -0
package/dist/commands/llm-benchmark.d.ts +4 -2
package/dist/commands/llm-benchmark.js +40 -12
package/dist/escalation.d.ts +100 -0
package/dist/escalation.js +292 -0
package/dist/evaluation-session.d.ts +74 -0
package/dist/evaluation-session.js +152 -0
package/dist/evaluators/index.d.ts +23 -1
package/dist/evaluators/index.js +192 -3
package/dist/evaluators/judge-selector.d.ts +19 -0
package/dist/evaluators/judge-selector.js +141 -0
package/dist/evaluators/recall-boost.d.ts +27 -0
package/dist/evaluators/recall-boost.js +409 -0
package/dist/feedback-loop.d.ts +62 -0
package/dist/feedback-loop.js +179 -0
package/dist/index.js +2 -0
package/dist/judges/accessibility.js +7 -0
package/dist/judges/agent-instructions.js +7 -0
package/dist/judges/ai-code-safety.js +7 -0
package/dist/judges/api-contract.js +7 -0
package/dist/judges/api-design.js +7 -0
package/dist/judges/authentication.js +7 -0
package/dist/judges/backwards-compatibility.js +7 -0
package/dist/judges/caching.js +7 -0
package/dist/judges/ci-cd.js +7 -0
package/dist/judges/cloud-readiness.js +7 -0
package/dist/judges/concurrency.js +7 -0
package/dist/judges/configuration-management.js +7 -0
package/dist/judges/cybersecurity.js +7 -0
package/dist/judges/data-security.js +7 -0
package/dist/judges/dependency-health.js +7 -0
package/dist/judges/documentation.js +7 -0
package/dist/judges/error-handling.js +7 -0
package/dist/judges/ethics-bias.js +7 -0
package/dist/judges/false-positive-review.js +13 -1
package/dist/judges/framework-safety.js +7 -0
package/dist/judges/hallucination-detection.js +14 -1
package/dist/judges/iac-security.js +7 -0
package/dist/judges/intent-alignment.js +14 -1
package/dist/judges/logging-privacy.js +7 -0
package/dist/judges/maintainability.js +7 -0
package/dist/judges/multi-turn-coherence.js +7 -0
package/dist/judges/observability.js +7 -0
package/dist/judges/portability.js +7 -0
package/dist/judges/rate-limiting.js +7 -0
package/dist/judges/reliability.js +7 -0
package/dist/judges/security.js +14 -1
package/dist/judges/testing.js +7 -0
package/dist/judges/ux.js +7 -0
package/dist/review-conversation.d.ts +87 -0
package/dist/review-conversation.js +307 -0
package/dist/sast-integration.d.ts +112 -0
package/dist/sast-integration.js +215 -0
package/dist/tools/register-evaluation.js +208 -8
package/dist/tools/register-fix.js +24 -1
package/dist/tools/register-resources.d.ts +6 -0
package/dist/tools/register-resources.js +177 -0
package/dist/tools/register-review.js +26 -1
package/dist/tools/register-workflow.js +384 -11
package/dist/tools/validation.d.ts +13 -0
package/dist/tools/validation.js +77 -0
package/dist/types.d.ts +122 -0
package/package.json +25 -12
package/server.json +2 -2

package/dist/evaluators/recall-boost.js ADDED Viewed

@@ -0,0 +1,409 @@
+/**
+ * Recall Booster — Additional detection patterns for weak-recall categories
+ *
+ * This module provides supplementary pattern detection for judge categories
+ * where the deterministic evaluators have recall below 85%. It acts as
+ * a second-pass augmentation applied after the primary evaluator.
+ *
+ * Categories strengthened (by recall gap analysis):
+ * - hallucination-detection (46.2% → improved)
+ * - ci-cd (41.7% → improved)
+ * - internationalization (42.9% → improved)
+ * - cost-effectiveness (57.1% → improved)
+ * - documentation (63.6% → improved)
+ * - iac-security (66.7% → improved)
+ * - cloud/cloud-readiness (50-73% → improved)
+ */
+import { getLangFamily } from "./shared.js";
+// ─── Hallucination Detection Extras ──────────────────────────────────────────
+const EXTRA_HALLUCINATION_PATTERNS = [
+    // Python: common hallucinated built-in functions
+    {
+        pattern: /\bstr\.isinteger\s*\(/,
+        title: "Hallucinated Python str.isinteger()",
+        description: "Python str has no isinteger() method. LLMs confuse this with float.is_integer() or str.isdigit().",
+        fix: "Use str.isdigit() for digit check, or float(s).is_integer() for integer check.",
+        languages: ["python"],
+    },
+    {
+        pattern: /\blist\.contains\s*\(/,
+        title: "Hallucinated Python list.contains()",
+        description: "Python lists have no contains() method. Use the `in` operator instead.",
+        fix: "Replace `list.contains(x)` with `x in list`.",
+        languages: ["python"],
+    },
+    {
+        pattern: /\bdict\.has_key\s*\(/,
+        title: "Deprecated/hallucinated dict.has_key()",
+        description: "dict.has_key() was removed in Python 3. LLMs trained on mixed Python 2/3 code still generate it.",
+        fix: "Use `key in dict` instead of `dict.has_key(key)`.",
+        languages: ["python"],
+    },
+    {
+        pattern: /\bimport\s+asyncio\b[\s\S]{0,200}\basyncio\.sleep_ms\s*\(/,
+        title: "Hallucinated asyncio.sleep_ms()",
+        description: "asyncio has no sleep_ms(). LLMs confuse this with asyncio.sleep() which takes seconds.",
+        fix: "Use `await asyncio.sleep(ms / 1000)` for millisecond sleep.",
+        languages: ["python"],
+    },
+    // Node.js: fabricated API patterns
+    {
+        pattern: /\bprocess\.env\.getAll\s*\(/,
+        title: "Hallucinated process.env.getAll()",
+        description: "Node.js process.env has no getAll() method. It's a plain object.",
+        fix: "Use Object.entries(process.env) to get all environment variables.",
+        languages: ["javascript", "typescript"],
+    },
+    {
+        pattern: /\bJSON\.tryParse\s*\(/,
+        title: "Hallucinated JSON.tryParse()",
+        description: "JavaScript has no JSON.tryParse(). This is a common .NET/C# pattern hallucinated into JS.",
+        fix: "Wrap JSON.parse() in a try/catch block for safe parsing.",
+        languages: ["javascript", "typescript"],
+    },
+    {
+        pattern: /\bArray\.flatten\s*\(/,
+        title: "Hallucinated Array.flatten()",
+        description: "JavaScript Array has no static flatten() method. The instance method is .flat().",
+        fix: "Use `array.flat()` or `array.flat(Infinity)` for deep flattening.",
+        languages: ["javascript", "typescript"],
+    },
+    // Go: common hallucinations
+    {
+        pattern: /\bstrings\.Reverse\s*\(/,
+        title: "Hallucinated strings.Reverse()",
+        description: "Go strings package has no Reverse function. LLMs frequently hallucinate this.",
+        fix: "Implement string reversal manually using rune conversion: []rune(s).",
+        languages: ["go"],
+    },
+    {
+        pattern: /\berrors\.Wrapf?\s*\(/,
+        title: "Hallucinated errors.Wrap()",
+        description: "Go standard errors package has no Wrap function. This was from pkg/errors (deprecated).",
+        fix: 'Use fmt.Errorf("%w", err) for error wrapping (Go 1.13+).',
+        languages: ["go"],
+    },
+    // Rust: hallucinated trait methods
+    {
+        pattern: /\.to_str\(\)\s*\.unwrap\(\)/,
+        title: "Potentially hallucinated .to_str().unwrap() chain",
+        description: "LLMs frequently chain .to_str().unwrap() on types that don't implement to_str(). Verify the type has this method.",
+        fix: "Consider using .to_string() or .as_str() depending on the actual type.",
+        languages: ["rust"],
+    },
+    // Java: hallucinated APIs
+    {
+        pattern: /\bString\.isEmpty\s*\(\s*\w+\s*\)/,
+        title: "Hallucinated static String.isEmpty()",
+        description: "Java String.isEmpty() is an instance method, not static. LLMs sometimes generate static calls.",
+        fix: "Use `str.isEmpty()` as an instance method call.",
+        languages: ["java"],
+    },
+];
+// ─── CI/CD Detection Extras ──────────────────────────────────────────────────
+const EXTRA_CICD_PATTERNS = [
+    {
+        pattern: /\bpipeline\b[\s\S]{0,100}checkout:\s*none/i,
+        title: "Pipeline skips source checkout",
+        description: "CI pipeline configured to skip source checkout. This may indicate a misconfiguration.",
+        languages: ["yaml"],
+    },
+    {
+        pattern: /\bcurl\b[\s\S]{0,50}\|\s*(?:bash|sh)\b/,
+        title: "Piping curl to shell in CI",
+        description: "Downloading and directly executing scripts via curl|bash is a supply-chain risk in CI/CD pipelines.",
+        languages: ["yaml", "bash", "dockerfile"],
+    },
+    {
+        pattern: /\b(?:npm|pip|cargo)\s+install\b[\s\S]{0,30}--no-verify/i,
+        title: "Package install with verification disabled",
+        description: "Installing packages with verification disabled weakens supply-chain integrity in CI.",
+        languages: ["yaml", "bash", "dockerfile"],
+    },
+    {
+        pattern: /\bsudo\s+.*\b(?:chmod\s+777|chmod\s+a\+rwx)\b/,
+        title: "Overly permissive chmod in CI script",
+        description: "Setting 777 permissions in CI/CD scripts creates security risks on shared runners.",
+        languages: ["yaml", "bash"],
+    },
+];
+// ─── Internationalization Detection Extras ────────────────────────────────────
+const EXTRA_I18N_PATTERNS = [
+    {
+        pattern: /\.toLocaleDateString\s*\(\s*\)/,
+        title: "toLocaleDateString() without explicit locale",
+        description: "Calling toLocaleDateString() without a locale parameter uses the system default, producing inconsistent date formats across environments.",
+        languages: ["javascript", "typescript"],
+    },
+    {
+        pattern: /new\s+Intl\.NumberFormat\s*\(\s*\)/,
+        title: "Intl.NumberFormat without explicit locale",
+        description: "Creating NumberFormat without a locale uses system default, inconsistent across deployments.",
+        languages: ["javascript", "typescript"],
+    },
+    {
+        pattern: /\bcurrency\s*[:=]\s*["'](?:USD|EUR|GBP)["'][\s\S]{0,100}(?:format|display|render)/i,
+        title: "Hardcoded currency code",
+        description: "Currency code is hardcoded rather than derived from user locale or configuration.",
+        languages: ["javascript", "typescript", "python", "java"],
+    },
+    {
+        pattern: /\.(?:trim|split|substring)\([\s\S]{0,30}(?:first|last)\s*name/i,
+        title: "Culturally-biased name parsing",
+        description: "Splitting names into first/last assumes Western naming conventions. Many cultures use single names, family-name-first order, or multiple name components.",
+        languages: ["javascript", "typescript", "python", "java", "csharp"],
+    },
+];
+// ─── Cost-Effectiveness Detection Extras ─────────────────────────────────────
+const EXTRA_COST_PATTERNS = [
+    {
+        pattern: /\bnew\s+Date\(\)[\s\S]{0,30}while\s*\(/,
+        title: "Busy-wait loop with Date() polling",
+        description: "Busy-wait loops waste CPU cycles and increase compute costs. Use async timers instead.",
+        languages: ["javascript", "typescript"],
+    },
+    {
+        pattern: /\bsetInterval\s*\([^,]+,\s*(?:100|50|10|1)\s*\)/,
+        title: "Very frequent interval (≤100ms)",
+        description: "Very frequent setInterval polling wastes CPU/battery. Consider event-driven approaches or longer intervals.",
+        languages: ["javascript", "typescript"],
+    },
+    {
+        pattern: /SELECT\s+\*\s+FROM[\s\S]{0,50}(?:JOIN|,\s*\w+)/i,
+        title: "SELECT * with JOINs",
+        description: "Using SELECT * with JOINs retrieves all columns from all joined tables, significantly increasing data transfer and memory costs.",
+        languages: ["sql", "python", "javascript", "typescript", "java", "csharp"],
+    },
+    {
+        pattern: /\.(?:map|forEach|filter)\s*\([\s\S]{0,100}\.(?:map|forEach|filter)\s*\(/,
+        title: "Nested array iterations",
+        description: "Chained .map/.filter/.forEach calls iterate the array multiple times. Consider combining into a single pass with .reduce().",
+        languages: ["javascript", "typescript"],
+    },
+];
+// ─── IaC Security Extras ─────────────────────────────────────────────────────
+const EXTRA_IAC_PATTERNS = [
+    {
+        pattern: /\bpublic_network_access_enabled\s*=\s*true/i,
+        title: "Public network access enabled on Azure resource",
+        description: "Enabling public network access exposes the resource to the internet. Use private endpoints instead.",
+        languages: ["terraform", "bicep"],
+    },
+    {
+        pattern: /\bingress\b[\s\S]{0,100}\bcidr_blocks\s*=\s*\[\s*["']0\.0\.0\.0\/0["']\s*\]/,
+        title: "Security group allows all inbound traffic (0.0.0.0/0)",
+        description: "Ingress rule allows traffic from any IP. Restrict to specific CIDR ranges.",
+        languages: ["terraform"],
+    },
+    {
+        pattern: /\bsku\b[\s\S]{0,30}(?:Basic|Free)\b/i,
+        title: "Using Basic/Free SKU in production IaC",
+        description: "Basic/Free tier SKUs often lack security features like encryption, private endpoints, and SLA guarantees.",
+        languages: ["terraform", "bicep", "arm"],
+    },
+    {
+        pattern: /\bretention_in_days\s*[:=]\s*(?:0|1|7)\b/,
+        title: "Short log retention period",
+        description: "Log retention of 7 days or less may be insufficient for security investigation and compliance requirements.",
+        languages: ["terraform", "bicep"],
+    },
+];
+// ─── Documentation Detection Extras ──────────────────────────────────────────
+const EXTRA_DOC_PATTERNS = [
+    {
+        pattern: /(?:TODO|FIXME|HACK|XXX|TEMP)\s*[:!]/i,
+        title: "Unresolved TODO/FIXME/HACK comment",
+        description: "Code contains unresolved TODO/FIXME/HACK markers indicating incomplete implementation or known issues.",
+        languages: ["javascript", "typescript", "python", "java", "csharp", "go", "rust", "ruby", "php"],
+    },
+    {
+        pattern: /export\s+(?:default\s+)?(?:function|class|const)\s+\w+[\s\S]{0,50}\{[\s\S]{50,}(?:throw|return|if)\b/,
+        title: "Complex exported function without JSDoc/docstring",
+        description: "Public API function with complex logic lacks documentation. This makes the API hard to use correctly.",
+        languages: ["javascript", "typescript"],
+    },
+];
+// ─── Cloud Readiness Extras ──────────────────────────────────────────────────
+const EXTRA_CLOUD_PATTERNS = [
+    {
+        pattern: /\bfs\.(?:writeFileSync|appendFileSync)\s*\(\s*["']\/(?:tmp|var|data)\//,
+        title: "Writing to local filesystem path",
+        description: "Writing to local filesystem paths (/tmp, /var, /data) is not reliable in containerized or serverless environments. Use object storage or managed databases.",
+        languages: ["javascript", "typescript"],
+    },
+    {
+        pattern: /\bopen\s*\(\s*["']\/(?:tmp|var|data)\//,
+        title: "Writing to local filesystem path",
+        description: "Writing to local filesystem paths is not reliable in cloud/container environments.",
+        languages: ["python"],
+    },
+    {
+        pattern: /\b(?:127\.0\.0\.1|localhost)\b[\s\S]{0,30}(?:connect|host|url|endpoint)/i,
+        title: "Hardcoded localhost reference",
+        description: "Hardcoded localhost/127.0.0.1 references will fail in containerized deployments where services run on separate hosts.",
+        languages: ["javascript", "typescript", "python", "java", "go", "csharp"],
+    },
+];
+/**
+ * Apply recall-boosting patterns to detect issues that primary evaluators miss.
+ * Returns additional findings (does not modify existing ones).
+ */
+export function applyRecallBoost(code, language) {
+    const lang = getLangFamily(language);
+    const findings = [];
+    const boostedCategories = [];
+    const lines = code.split("\n");
+    function getMatchLines(pattern) {
+        const matched = [];
+        for (let i = 0; i < lines.length; i++) {
+            if (pattern.test(lines[i]))
+                matched.push(i + 1);
+            pattern.lastIndex = 0;
+        }
+        return matched;
+    }
+    // Hallucination boost
+    let halluNum = 900;
+    for (const p of EXTRA_HALLUCINATION_PATTERNS) {
+        if (!p.languages.includes(lang))
+            continue;
+        const matchLines = getMatchLines(p.pattern);
+        if (matchLines.length > 0) {
+            if (!boostedCategories.includes("hallucination-detection"))
+                boostedCategories.push("hallucination-detection");
+            findings.push({
+                ruleId: `HALLU-${String(halluNum++).padStart(3, "0")}`,
+                severity: "high",
+                title: p.title,
+                description: p.description,
+                lineNumbers: matchLines,
+                recommendation: p.fix,
+                reference: "AI Code Generation: Hallucinated API Detection",
+                confidence: 0.85,
+                provenance: "regex-pattern-match",
+            });
+        }
+    }
+    // CI/CD boost
+    let cicdNum = 900;
+    for (const p of EXTRA_CICD_PATTERNS) {
+        if (!p.languages.includes(lang))
+            continue;
+        if (p.pattern.test(code)) {
+            if (!boostedCategories.includes("ci-cd"))
+                boostedCategories.push("ci-cd");
+            findings.push({
+                ruleId: `CICD-${String(cicdNum++).padStart(3, "0")}`,
+                severity: "medium",
+                title: p.title,
+                description: p.description,
+                lineNumbers: getMatchLines(p.pattern),
+                recommendation: "Review and remediate this CI/CD configuration issue.",
+                confidence: 0.75,
+                provenance: "regex-pattern-match",
+            });
+        }
+    }
+    // I18N boost
+    let i18nNum = 900;
+    for (const p of EXTRA_I18N_PATTERNS) {
+        if (!p.languages.includes(lang))
+            continue;
+        const matchLines = getMatchLines(p.pattern);
+        if (matchLines.length > 0) {
+            if (!boostedCategories.includes("internationalization"))
+                boostedCategories.push("internationalization");
+            findings.push({
+                ruleId: `I18N-${String(i18nNum++).padStart(3, "0")}`,
+                severity: "medium",
+                title: p.title,
+                description: p.description,
+                lineNumbers: matchLines,
+                recommendation: "Ensure locale-awareness for international users.",
+                confidence: 0.7,
+                provenance: "regex-pattern-match",
+            });
+        }
+    }
+    // Cost-effectiveness boost
+    let costNum = 900;
+    for (const p of EXTRA_COST_PATTERNS) {
+        if (!p.languages.includes(lang))
+            continue;
+        if (p.pattern.test(code)) {
+            if (!boostedCategories.includes("cost-effectiveness"))
+                boostedCategories.push("cost-effectiveness");
+            findings.push({
+                ruleId: `COST-${String(costNum++).padStart(3, "0")}`,
+                severity: "medium",
+                title: p.title,
+                description: p.description,
+                lineNumbers: getMatchLines(p.pattern),
+                recommendation: "Consider more cost-efficient alternatives.",
+                confidence: 0.75,
+                provenance: "regex-pattern-match",
+            });
+        }
+    }
+    // IaC security boost
+    let iacNum = 900;
+    for (const p of EXTRA_IAC_PATTERNS) {
+        if (!p.languages.includes(lang))
+            continue;
+        if (p.pattern.test(code)) {
+            if (!boostedCategories.includes("iac-security"))
+                boostedCategories.push("iac-security");
+            findings.push({
+                ruleId: `IAC-${String(iacNum++).padStart(3, "0")}`,
+                severity: "high",
+                title: p.title,
+                description: p.description,
+                lineNumbers: getMatchLines(p.pattern),
+                recommendation: "Apply infrastructure security best practices.",
+                confidence: 0.8,
+                provenance: "regex-pattern-match",
+            });
+        }
+    }
+    // Documentation boost
+    let docNum = 900;
+    for (const p of EXTRA_DOC_PATTERNS) {
+        if (!p.languages.includes(lang))
+            continue;
+        if (p.pattern.test(code)) {
+            if (!boostedCategories.includes("documentation"))
+                boostedCategories.push("documentation");
+            findings.push({
+                ruleId: `DOC-${String(docNum++).padStart(3, "0")}`,
+                severity: "low",
+                title: p.title,
+                description: p.description,
+                lineNumbers: getMatchLines(p.pattern),
+                recommendation: "Improve documentation for maintainability.",
+                confidence: 0.7,
+                provenance: "regex-pattern-match",
+            });
+        }
+    }
+    // Cloud readiness boost
+    let cloudNum = 900;
+    for (const p of EXTRA_CLOUD_PATTERNS) {
+        if (!p.languages.includes(lang))
+            continue;
+        if (p.pattern.test(code)) {
+            if (!boostedCategories.includes("cloud-readiness"))
+                boostedCategories.push("cloud-readiness");
+            findings.push({
+                ruleId: `CLOUD-${String(cloudNum++).padStart(3, "0")}`,
+                severity: "medium",
+                title: p.title,
+                description: p.description,
+                lineNumbers: getMatchLines(p.pattern),
+                recommendation: "Design for cloud-native deployment.",
+                confidence: 0.75,
+                provenance: "regex-pattern-match",
+            });
+        }
+    }
+    return { findings, boostedCategories };
+}

package/dist/feedback-loop.d.ts ADDED Viewed

@@ -0,0 +1,62 @@
+/**
+ * Fix-Outcome Feedback Loop
+ *
+ * Closes the feedback loop between fix acceptance/rejection data and
+ * the calibration system. When developers accept or reject suggested
+ * fixes, that signal feeds back into confidence calibration:
+ *
+ * - Accepted fixes → "true positive" signal → boost confidence for that rule
+ * - Rejected fixes → possible FP signal → reduce confidence for that rule
+ * - Reverted fixes → strong FP signal → significantly reduce confidence
+ *
+ * This module runs periodically (or on-demand) to:
+ * 1. Read fix history outcomes
+ * 2. Convert them into calibration-compatible feedback entries
+ * 3. Update the feedback store so calibration picks them up
+ * 4. Compute per-rule confidence adjustments
+ * 5. Generate a summary report of the feedback loop's impact
+ */
+import { type CalibrationProfile, type CalibrationOptions } from "./calibration.js";
+export interface FeedbackLoopResult {
+    /** Number of fix outcomes processed */
+    outcomesProcessed: number;
+    /** Number of new feedback entries created */
+    feedbackEntriesCreated: number;
+    /** Per-rule confidence adjustments recommended */
+    adjustments: ConfidenceAdjustment[];
+    /** Summary statistics */
+    stats: FeedbackLoopStats;
+    /** Calibration profile after updates */
+    calibrationProfile: CalibrationProfile;
+}
+export interface ConfidenceAdjustment {
+    ruleId: string;
+    currentConfidence: number;
+    recommendedConfidence: number;
+    direction: "boost" | "reduce" | "stable";
+    reason: string;
+    sampleCount: number;
+}
+export interface FeedbackLoopStats {
+    totalOutcomes: number;
+    accepted: number;
+    rejected: number;
+    reverted: number;
+    rulesWithPositiveSignal: number;
+    rulesWithNegativeSignal: number;
+    netCalibrationImpact: "positive" | "negative" | "neutral";
+}
+/**
+ * Process fix history and generate calibration feedback.
+ * This is the main entry point for the feedback loop.
+ */
+export declare function runFeedbackLoop(options?: {
+    fixHistoryDir?: string;
+    feedbackDir?: string;
+    calibrationOptions?: CalibrationOptions;
+    dryRun?: boolean;
+}): FeedbackLoopResult;
+/**
+ * Generate a markdown report of the feedback loop results.
+ */
+export declare function formatFeedbackLoopReport(result: FeedbackLoopResult): string;

package/dist/feedback-loop.js ADDED Viewed

@@ -0,0 +1,179 @@
+/**
+ * Fix-Outcome Feedback Loop
+ *
+ * Closes the feedback loop between fix acceptance/rejection data and
+ * the calibration system. When developers accept or reject suggested
+ * fixes, that signal feeds back into confidence calibration:
+ *
+ * - Accepted fixes → "true positive" signal → boost confidence for that rule
+ * - Rejected fixes → possible FP signal → reduce confidence for that rule
+ * - Reverted fixes → strong FP signal → significantly reduce confidence
+ *
+ * This module runs periodically (or on-demand) to:
+ * 1. Read fix history outcomes
+ * 2. Convert them into calibration-compatible feedback entries
+ * 3. Update the feedback store so calibration picks them up
+ * 4. Compute per-rule confidence adjustments
+ * 5. Generate a summary report of the feedback loop's impact
+ */
+import { loadFixHistory, computeFixStats } from "./fix-history.js";
+import { loadFeedbackStore, saveFeedbackStore } from "./commands/feedback.js";
+import { loadCalibrationProfile } from "./calibration.js";
+// ─── Feedback Loop Engine ────────────────────────────────────────────────────
+/**
+ * Process fix history and generate calibration feedback.
+ * This is the main entry point for the feedback loop.
+ */
+export function runFeedbackLoop(options) {
+    const fixHistory = loadFixHistory(options?.fixHistoryDir || ".");
+    const feedbackStore = loadFeedbackStore(options?.feedbackDir);
+    const fixStats = computeFixStats(fixHistory);
+    // Track which outcomes have already been converted to feedback
+    const existingFeedbackKeys = new Set(feedbackStore.entries
+        .filter((e) => e.comment?.startsWith("[fix-outcome]"))
+        .map((e) => `${e.ruleId}::${e.timestamp}`));
+    // Convert new fix outcomes to feedback entries
+    const newEntries = [];
+    for (const outcome of fixHistory.outcomes) {
+        const key = `${outcome.ruleId}::${outcome.timestamp}`;
+        if (existingFeedbackKeys.has(key))
+            continue;
+        const verdict = outcomeToVerdict(outcome.accepted, outcome.reverted);
+        newEntries.push({
+            ruleId: outcome.ruleId,
+            verdict,
+            timestamp: outcome.timestamp,
+            source: "manual",
+            comment: `[fix-outcome] ${outcome.reason || (outcome.reverted ? "Fix was reverted" : outcome.accepted ? "Fix accepted" : "Fix rejected")}`,
+            filePath: outcome.filePath,
+        });
+    }
+    // Add new entries to feedback store
+    if (newEntries.length > 0 && !options?.dryRun) {
+        feedbackStore.entries.push(...newEntries);
+        saveFeedbackStore(feedbackStore, options?.feedbackDir);
+    }
+    // Compute confidence adjustments from fix stats
+    const adjustments = computeAdjustments(fixStats);
+    // Load updated calibration profile
+    const calibrationProfile = loadCalibrationProfile(options?.calibrationOptions);
+    // Compute summary stats
+    const stats = computeLoopStats(fixStats, adjustments);
+    return {
+        outcomesProcessed: fixHistory.outcomes.length,
+        feedbackEntriesCreated: newEntries.length,
+        adjustments,
+        stats,
+        calibrationProfile,
+    };
+}
+/**
+ * Generate a markdown report of the feedback loop results.
+ */
+export function formatFeedbackLoopReport(result) {
+    const lines = [
+        "# Fix-Outcome Feedback Loop Report",
+        "",
+        `**Outcomes Processed**: ${result.outcomesProcessed}`,
+        `**New Feedback Entries**: ${result.feedbackEntriesCreated}`,
+        `**Net Impact**: ${result.stats.netCalibrationImpact}`,
+        "",
+        "## Statistics",
+        "",
+        `| Metric | Count |`,
+        `|--------|-------|`,
+        `| Total outcomes | ${result.stats.totalOutcomes} |`,
+        `| Accepted fixes | ${result.stats.accepted} |`,
+        `| Rejected fixes | ${result.stats.rejected} |`,
+        `| Reverted fixes | ${result.stats.reverted} |`,
+        `| Rules with positive signal | ${result.stats.rulesWithPositiveSignal} |`,
+        `| Rules with negative signal | ${result.stats.rulesWithNegativeSignal} |`,
+        "",
+    ];
+    if (result.adjustments.length > 0) {
+        lines.push("## Confidence Adjustments");
+        lines.push("");
+        lines.push("| Rule | Direction | Current | Recommended | Reason | Samples |");
+        lines.push("|------|-----------|---------|-------------|--------|---------|");
+        for (const adj of result.adjustments) {
+            lines.push(`| ${adj.ruleId} | ${adj.direction} | ${(adj.currentConfidence * 100).toFixed(0)}% | ${(adj.recommendedConfidence * 100).toFixed(0)}% | ${adj.reason} | ${adj.sampleCount} |`);
+        }
+    }
+    if (result.calibrationProfile.isActive) {
+        lines.push("");
+        lines.push(`## Calibration Status`);
+        lines.push("");
+        lines.push(`Calibration is **active** with ${result.calibrationProfile.feedbackCount} feedback entries.`);
+    }
+    return lines.join("\n");
+}
+// ─── Internal Helpers ────────────────────────────────────────────────────────
+function outcomeToVerdict(accepted, reverted) {
+    if (reverted)
+        return "fp";
+    return accepted ? "tp" : "fp";
+}
+function computeAdjustments(stats) {
+    const adjustments = [];
+    const MIN_SAMPLES = 3;
+    for (const [ruleId, ruleStats] of Object.entries(stats.byRule)) {
+        if (ruleStats.total < MIN_SAMPLES)
+            continue;
+        const acceptanceRate = ruleStats.rate;
+        const currentConfidence = 0.7; // Default assumption
+        if (acceptanceRate >= 0.8) {
+            // High acceptance → boost confidence
+            const boost = Math.min(0.15, (acceptanceRate - 0.7) * 0.5);
+            adjustments.push({
+                ruleId,
+                currentConfidence,
+                recommendedConfidence: Math.min(0.95, currentConfidence + boost),
+                direction: "boost",
+                reason: `${(acceptanceRate * 100).toFixed(0)}% fix acceptance rate`,
+                sampleCount: ruleStats.total,
+            });
+        }
+        else if (acceptanceRate < 0.4) {
+            // Low acceptance → reduce confidence
+            const reduction = Math.min(0.3, (0.5 - acceptanceRate) * 0.6);
+            adjustments.push({
+                ruleId,
+                currentConfidence,
+                recommendedConfidence: Math.max(0.1, currentConfidence - reduction),
+                direction: "reduce",
+                reason: `${(acceptanceRate * 100).toFixed(0)}% fix acceptance rate (likely FP-prone)`,
+                sampleCount: ruleStats.total,
+            });
+        }
+    }
+    // Sort by impact (largest reduction first)
+    adjustments.sort((a, b) => {
+        const aImpact = Math.abs(a.currentConfidence - a.recommendedConfidence);
+        const bImpact = Math.abs(b.currentConfidence - b.recommendedConfidence);
+        return bImpact - aImpact;
+    });
+    return adjustments;
+}
+function computeLoopStats(fixStats, adjustments) {
+    const rulesWithPositiveSignal = adjustments.filter((a) => a.direction === "boost").length;
+    const rulesWithNegativeSignal = adjustments.filter((a) => a.direction === "reduce").length;
+    let netCalibrationImpact;
+    if (rulesWithPositiveSignal > rulesWithNegativeSignal) {
+        netCalibrationImpact = "positive";
+    }
+    else if (rulesWithNegativeSignal > rulesWithPositiveSignal) {
+        netCalibrationImpact = "negative";
+    }
+    else {
+        netCalibrationImpact = "neutral";
+    }
+    return {
+        totalOutcomes: fixStats.totalFixes,
+        accepted: fixStats.accepted,
+        rejected: fixStats.rejected,
+        reverted: fixStats.reverted,
+        rulesWithPositiveSignal,
+        rulesWithNegativeSignal,
+        netCalibrationImpact,
+    };
+}

package/dist/index.js CHANGED Viewed

@@ -6,6 +6,7 @@ import("@modelcontextprotocol/sdk/server/mcp.js")
     const { StdioServerTransport } = await import("@modelcontextprotocol/sdk/server/stdio.js");
     const { registerTools } = await import("./tools/register.js");
     const { registerPrompts } = await import("./tools/prompts.js");
+    const { registerResources } = await import("./tools/register-resources.js");
     const { readFileSync } = await import("fs");
     const { resolve, dirname } = await import("path");
     const { fileURLToPath } = await import("url");
@@ -25,6 +26,7 @@ import("@modelcontextprotocol/sdk/server/mcp.js")
     });
     registerTools(server);
     registerPrompts(server);
+    registerResources(server);
     const transport = new StdioServerTransport();
     await server.connect(transport);
     console.error("Judges Panel MCP server running on stdio");

package/dist/judges/accessibility.js CHANGED Viewed

@@ -29,6 +29,13 @@ RULES FOR YOUR EVALUATION:
 - Recommend fixes with code examples using proper ARIA patterns.
 - Score from 0-100 where 100 means fully WCAG 2.2 AA compliant.
+FALSE POSITIVE AVOIDANCE:
+- Only flag accessibility issues in UI/frontend code (HTML, JSX, React components, CSS, templates).
+- Do NOT flag backend APIs, CLI tools, build scripts, or infrastructure code for accessibility issues.
+- Missing ARIA attributes are only an issue when there is actual UI markup to evaluate.
+- Do NOT flag non-UI code for missing alt text, keyboard navigation, or screen reader support.
+- Server-side rendering code should be evaluated for the HTML it produces, not its internal logic.
 ADVERSARIAL MANDATE:
 - Your role is adversarial: assume the code has accessibility defects and actively hunt for them. Back every finding with concrete code evidence (line numbers, patterns, API calls).
 - Never praise or compliment the code. Report only problems, risks, and deficiencies.