npm - @kevinrabun/judges - Versions diffs - 3.115.4 → 3.117.0 - Mend

@kevinrabun/judges 3.115.4 → 3.117.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

package/agents/accessibility.judge.md +7 -0
package/agents/agent-instructions.judge.md +7 -0
package/agents/ai-code-safety.judge.md +7 -0
package/agents/api-contract.judge.md +7 -0
package/agents/api-design.judge.md +7 -0
package/agents/authentication.judge.md +7 -0
package/agents/backwards-compatibility.judge.md +7 -0
package/agents/caching.judge.md +7 -0
package/agents/ci-cd.judge.md +7 -0
package/agents/cloud-readiness.judge.md +7 -0
package/agents/concurrency.judge.md +7 -0
package/agents/configuration-management.judge.md +7 -0
package/agents/cybersecurity.judge.md +7 -0
package/agents/data-security.judge.md +7 -0
package/agents/dependency-health.judge.md +7 -0
package/agents/documentation.judge.md +7 -0
package/agents/error-handling.judge.md +7 -0
package/agents/ethics-bias.judge.md +7 -0
package/agents/false-positive-review.judge.md +12 -0
package/agents/framework-safety.judge.md +7 -0
package/agents/hallucination-detection.judge.md +13 -0
package/agents/iac-security.judge.md +7 -0
package/agents/intent-alignment.judge.md +13 -0
package/agents/logging-privacy.judge.md +7 -0
package/agents/maintainability.judge.md +7 -0
package/agents/multi-turn-coherence.judge.md +7 -0
package/agents/observability.judge.md +7 -0
package/agents/portability.judge.md +7 -0
package/agents/rate-limiting.judge.md +7 -0
package/agents/reliability.judge.md +7 -0
package/agents/security.judge.md +13 -0
package/agents/testing.judge.md +7 -0
package/agents/ux.judge.md +7 -0
package/dist/a2a-protocol.d.ts +136 -0
package/dist/a2a-protocol.js +218 -0
package/dist/api.d.ts +21 -3
package/dist/api.js +21 -1
package/dist/audit-trail.d.ts +245 -0
package/dist/audit-trail.js +257 -0
package/dist/commands/benchmark-advanced.js +51 -51
package/dist/commands/benchmark-ai-agents.js +16 -16
package/dist/commands/benchmark-compliance-ethics.js +12 -12
package/dist/commands/benchmark-expanded-2.js +2 -2
package/dist/commands/benchmark-expanded.js +2 -2
package/dist/commands/benchmark-infrastructure.js +12 -12
package/dist/commands/benchmark-languages.js +11 -11
package/dist/commands/benchmark-quality-ops.js +7 -7
package/dist/commands/benchmark-security-deep.js +9 -9
package/dist/commands/benchmark.js +1 -1
package/dist/commands/llm-benchmark-optimizer.d.ts +78 -0
package/dist/commands/llm-benchmark-optimizer.js +241 -0
package/dist/commands/llm-benchmark.d.ts +4 -2
package/dist/commands/llm-benchmark.js +40 -12
package/dist/escalation.d.ts +100 -0
package/dist/escalation.js +292 -0
package/dist/evaluation-session.d.ts +74 -0
package/dist/evaluation-session.js +152 -0
package/dist/evaluators/index.d.ts +23 -1
package/dist/evaluators/index.js +192 -3
package/dist/evaluators/judge-selector.d.ts +19 -0
package/dist/evaluators/judge-selector.js +141 -0
package/dist/evaluators/recall-boost.d.ts +27 -0
package/dist/evaluators/recall-boost.js +409 -0
package/dist/feedback-loop.d.ts +62 -0
package/dist/feedback-loop.js +179 -0
package/dist/index.js +2 -0
package/dist/judges/accessibility.js +7 -0
package/dist/judges/agent-instructions.js +7 -0
package/dist/judges/ai-code-safety.js +7 -0
package/dist/judges/api-contract.js +7 -0
package/dist/judges/api-design.js +7 -0
package/dist/judges/authentication.js +7 -0
package/dist/judges/backwards-compatibility.js +7 -0
package/dist/judges/caching.js +7 -0
package/dist/judges/ci-cd.js +7 -0
package/dist/judges/cloud-readiness.js +7 -0
package/dist/judges/concurrency.js +7 -0
package/dist/judges/configuration-management.js +7 -0
package/dist/judges/cybersecurity.js +7 -0
package/dist/judges/data-security.js +7 -0
package/dist/judges/dependency-health.js +7 -0
package/dist/judges/documentation.js +7 -0
package/dist/judges/error-handling.js +7 -0
package/dist/judges/ethics-bias.js +7 -0
package/dist/judges/false-positive-review.js +13 -1
package/dist/judges/framework-safety.js +7 -0
package/dist/judges/hallucination-detection.js +14 -1
package/dist/judges/iac-security.js +7 -0
package/dist/judges/intent-alignment.js +14 -1
package/dist/judges/logging-privacy.js +7 -0
package/dist/judges/maintainability.js +7 -0
package/dist/judges/multi-turn-coherence.js +7 -0
package/dist/judges/observability.js +7 -0
package/dist/judges/portability.js +7 -0
package/dist/judges/rate-limiting.js +7 -0
package/dist/judges/reliability.js +7 -0
package/dist/judges/security.js +14 -1
package/dist/judges/testing.js +7 -0
package/dist/judges/ux.js +7 -0
package/dist/review-conversation.d.ts +87 -0
package/dist/review-conversation.js +307 -0
package/dist/sast-integration.d.ts +112 -0
package/dist/sast-integration.js +215 -0
package/dist/tools/register-evaluation.js +208 -8
package/dist/tools/register-fix.js +24 -1
package/dist/tools/register-resources.d.ts +6 -0
package/dist/tools/register-resources.js +177 -0
package/dist/tools/register-review.js +26 -1
package/dist/tools/register-workflow.js +384 -11
package/dist/tools/validation.d.ts +13 -0
package/dist/tools/validation.js +77 -0
package/dist/types.d.ts +122 -0
package/package.json +25 -12
package/server.json +2 -2

package/dist/tools/register-workflow.js CHANGED Viewed

@@ -4,10 +4,12 @@
 // ──────────────────────────────────────────────────────────────────────────────
 import { z } from "zod";
 import { JUDGES } from "../judges/index.js";
-import { evaluateProject, evaluateDiff, analyzeDependencies, runAppBuilderWorkflow } from "../evaluators/index.js";
+import { evaluateProject, evaluateDiff, analyzeDependencies, runAppBuilderWorkflow, evaluateWithTribunal, enrichWithPatches, formatVerdictAsMarkdown, } from "../evaluators/index.js";
 import { evaluateFilesBatch } from "../api.js";
+import { getGlobalSession } from "../evaluation-session.js";
 import { generatePublicRepoReport } from "../reports/public-repo-report.js";
 import { configSchema, toJudgesConfig } from "./schemas.js";
+import { validateCodeSize } from "./validation.js";
 import { benchmarkGate, formatBenchmarkReport, formatBenchmarkMarkdown, runBenchmarkSuite, } from "../commands/benchmark.js";
 /**
  * Register workflow-focused tools: evaluate_public_repo_report, evaluate_project,
@@ -22,6 +24,10 @@ export function registerWorkflowTools(server) {
     registerBenchmarkGate(server);
     registerBenchmarkDashboard(server);
     registerEvaluateBatch(server);
+    registerEvaluateThenFix(server);
+    registerEvaluateFocused(server);
+    registerSessionStatus(server);
+    registerRecordFeedback(server);
 }
 // ─── evaluate_public_repo_report ─────────────────────────────────────────────
 function registerPublicRepoReport(server) {
@@ -75,6 +81,10 @@ function registerPublicRepoReport(server) {
         keepClone: z.boolean().optional().describe("Keep cloned repository on disk for inspection"),
     }, async ({ repoUrl, branch, outputPath, maxFiles, maxFileBytes, maxFindingsInReport, credentialMode, includeAstFindings, minConfidence, enableMustFixGate, mustFixMinConfidence, mustFixDangerousRulePrefixes, keepClone, }) => {
         try {
+            await server.sendLoggingMessage({
+                level: "info",
+                data: `Cloning repository: ${repoUrl}${branch ? ` (branch: ${branch})` : ""}...`,
+            });
             const report = generatePublicRepoReport({
                 repoUrl,
                 branch,
@@ -112,12 +122,18 @@ function registerPublicRepoReport(server) {
             if (keepClone) {
                 summary += `- Clone path: ${report.clonePath}\n`;
             }
+            const structured = {
+                repoUrl,
+                overallVerdict: report.overallVerdict,
+                averageScore: report.averageScore,
+                analyzedFileCount: report.analyzedFileCount,
+                totalFindings: report.totalFindings,
+                outputPath: report.outputPath ?? null,
+            };
             return {
                 content: [
-                    {
-                        type: "text",
-                        text: `${summary}\n---\n\n${report.markdown}`,
-                    },
+                    { type: "text", text: `${summary}\n---\n\n${report.markdown}` },
+                    { type: "text", text: "```json\n" + JSON.stringify(structured, null, 2) + "\n```" },
                 ],
             };
         }
@@ -217,7 +233,37 @@ function registerAppBuilderFlow(server) {
                     md += `- **${task.priority} ${task.ruleId}** ${task.task}\n`;
                 }
             }
-            return { content: [{ type: "text", text: md }] };
+            const structured = {
+                mode: result.mode,
+                releaseDecision: result.releaseDecision,
+                score: result.score,
+                verdict: result.verdict,
+                criticalCount: result.criticalCount,
+                highCount: result.highCount,
+                mediumCount: result.mediumCount,
+                taskCount: result.tasks.length,
+                aiFixableCount: result.aiFixableNow.length,
+                findings: result.plainLanguageFindings.map((f) => ({
+                    ruleId: f.ruleId,
+                    severity: f.severity,
+                    title: f.title,
+                    whatIsWrong: f.whatIsWrong,
+                    nextAction: f.nextAction,
+                })),
+                tasks: result.tasks.map((t) => ({
+                    priority: t.priority,
+                    owner: t.owner,
+                    effort: t.effort,
+                    ruleId: t.ruleId,
+                    task: t.task,
+                })),
+            };
+            return {
+                content: [
+                    { type: "text", text: md },
+                    { type: "text", text: "```json\n" + JSON.stringify(structured, null, 2) + "\n```" },
+                ],
+            };
         }
         catch (error) {
             return {
@@ -253,6 +299,10 @@ function registerEvaluateProject(server) {
         config: configSchema,
     }, async ({ files, context, includeAstFindings, minConfidence, config }) => {
         try {
+            await server.sendLoggingMessage({
+                level: "info",
+                data: `Evaluating ${files.length} files with ${JUDGES.length} judges...`,
+            });
             const result = evaluateProject(files, context, {
                 includeAstFindings,
                 minConfidence,
@@ -282,7 +332,37 @@ function registerEvaluateProject(server) {
                     md += `- **[${f.severity.toUpperCase()}]** ${f.ruleId}: ${f.title}\n  ${f.description}\n`;
                 }
             }
-            return { content: [{ type: "text", text: md }] };
+            const structured = {
+                overallScore: result.overallScore,
+                overallVerdict: result.overallVerdict,
+                fileCount: result.fileResults.length,
+                criticalCount: result.criticalCount,
+                highCount: result.highCount,
+                fileResults: result.fileResults.map((fr) => ({
+                    path: fr.path,
+                    language: fr.language,
+                    score: fr.score,
+                    findingCount: fr.findings.length,
+                    findings: fr.findings.map((f) => ({
+                        ruleId: f.ruleId,
+                        severity: f.severity,
+                        title: f.title,
+                        line: f.lineNumbers?.[0],
+                    })),
+                })),
+                architecturalFindings: result.architecturalFindings.map((f) => ({
+                    ruleId: f.ruleId,
+                    severity: f.severity,
+                    title: f.title,
+                    description: f.description,
+                })),
+            };
+            return {
+                content: [
+                    { type: "text", text: md },
+                    { type: "text", text: "```json\n" + JSON.stringify(structured, null, 2) + "\n```" },
+                ],
+            };
         }
         catch (error) {
             return {
@@ -314,6 +394,10 @@ function registerEvaluateDiff(server) {
         config: configSchema,
     }, async ({ code, language, changedLines, context, includeAstFindings, minConfidence, config }) => {
         try {
+            const sizeError = validateCodeSize(code);
+            if (sizeError) {
+                return { content: [{ type: "text", text: `Error: ${sizeError}` }], isError: true };
+            }
             const result = evaluateDiff(code, language, changedLines, context, {
                 includeAstFindings,
                 minConfidence,
@@ -334,7 +418,24 @@ function registerEvaluateDiff(server) {
                     md += `**Recommendation:** ${f.recommendation}\n\n`;
                 }
             }
-            return { content: [{ type: "text", text: md }] };
+            const structured = {
+                score: result.score,
+                verdict: result.verdict,
+                linesAnalyzed: result.linesAnalyzed,
+                findingCount: result.findings.length,
+                findings: result.findings.map((f) => ({
+                    ruleId: f.ruleId,
+                    severity: f.severity,
+                    title: f.title,
+                    lineNumbers: f.lineNumbers,
+                })),
+            };
+            return {
+                content: [
+                    { type: "text", text: md },
+                    { type: "text", text: "```json\n" + JSON.stringify(structured, null, 2) + "\n```" },
+                ],
+            };
         }
         catch (error) {
             return {
@@ -382,7 +483,24 @@ function registerAnalyzeDependencies(server) {
                     md += `**Development (${dev.length}):** ${dev.map((d) => `${d.name}@${d.version}`).join(", ")}\n\n`;
                 }
             }
-            return { content: [{ type: "text", text: md }] };
+            const structured = {
+                manifestType,
+                score: result.score,
+                verdict: result.verdict,
+                totalDependencies: result.totalDependencies,
+                findingCount: result.findings.length,
+                findings: result.findings.map((f) => ({
+                    ruleId: f.ruleId,
+                    severity: f.severity,
+                    title: f.title,
+                })),
+            };
+            return {
+                content: [
+                    { type: "text", text: md },
+                    { type: "text", text: "```json\n" + JSON.stringify(structured, null, 2) + "\n```" },
+                ],
+            };
         }
         catch (error) {
             return {
@@ -509,8 +627,13 @@ function registerEvaluateBatch(server) {
     }, async (params) => {
         const config = params.config ? toJudgesConfig(params.config) : undefined;
         const options = config ? { config } : undefined;
+        await server.sendLoggingMessage({ level: "info", data: `Batch evaluation: ${params.files.length} files...` });
         // Use bounded-concurrency parallel evaluation instead of sequential loop
-        const batchResults = await evaluateFilesBatch(params.files, 4, options);
+        const batchResults = await evaluateFilesBatch(params.files, 4, options, (completed, total) => {
+            server
+                .sendLoggingMessage({ level: "info", data: `Progress: ${completed}/${total} files evaluated` })
+                .catch(() => { });
+        });
         const results = batchResults.map((r) => {
             const criticals = r.verdict.findings.filter((f) => f.severity === "critical").length;
             return {
@@ -541,8 +664,258 @@ function registerEvaluateBatch(server) {
             results.map((r) => `| ${r.path} | ${r.score} | ${r.findingCount} | ${r.criticalCount} |`).join("\n") +
             "\n\n" +
             allFindings.join("\n\n");
+        const structured = {
+            fileCount: results.length,
+            averageScore: avgScore,
+            totalFindings,
+            totalCriticals,
+            files: results,
+        };
+        return {
+            content: [
+                { type: "text", text: summary },
+                { type: "text", text: "```json\n" + JSON.stringify(structured, null, 2) + "\n```" },
+            ],
+        };
+    });
+}
+// ─── evaluate_then_fix ───────────────────────────────────────────────────────
+function registerEvaluateThenFix(server) {
+    server.tool("evaluate_then_fix", "Evaluate code and automatically generate fix patches for all findings that have auto-fix support. Returns the evaluation verdict alongside ready-to-apply patches. Use this for a single-step 'review + fix' workflow.", {
+        code: z.string().describe("The source code to evaluate and fix."),
+        language: z.string().describe("The programming language (e.g., 'typescript', 'python')."),
+        context: z.string().optional().describe("Optional context about the code."),
+        includeAstFindings: z.boolean().optional().describe("Include AST/code-structure findings (default: true)"),
+        minConfidence: z
+            .number()
+            .min(0)
+            .max(1)
+            .optional()
+            .describe("Minimum finding confidence to include (0-1, default: 0)"),
+        config: configSchema,
+    }, async ({ code, language, context, includeAstFindings, minConfidence, config }) => {
+        try {
+            const sizeError = validateCodeSize(code);
+            if (sizeError) {
+                return { content: [{ type: "text", text: `Error: ${sizeError}` }], isError: true };
+            }
+            const session = getGlobalSession();
+            // Step 1: Evaluate
+            const verdict = evaluateWithTribunal(code, language, context, {
+                includeAstFindings,
+                minConfidence,
+                config: toJudgesConfig(config),
+                adaptiveSelection: true,
+            });
+            // Step 2: Generate fix patches for all findings
+            const patchedFindings = enrichWithPatches(verdict.findings, code);
+            session.recordEvaluation(context ?? `<inline:${language}>`, code, verdict);
+            const patchableFindings = patchedFindings.filter((f) => f.patch);
+            const patchCount = patchableFindings.length;
+            let md = `# Evaluate & Fix Results\n\n`;
+            md += `**Score:** ${verdict.overallScore}/100 | **Verdict:** ${verdict.overallVerdict.toUpperCase()}\n`;
+            md += `**Total Findings:** ${verdict.findings.length} | **Auto-fixable:** ${patchCount}\n\n`;
+            if (patchCount > 0) {
+                md += `## Auto-Fix Patches\n\n`;
+                md += `The following findings have auto-fix patches ready to apply:\n\n`;
+                for (const f of patchableFindings.slice(0, 20)) {
+                    md += `### ${f.ruleId}: ${f.title}\n`;
+                    md += `- **Severity:** ${f.severity} | **Lines:** ${f.lineNumbers?.join(", ") ?? "N/A"}\n`;
+                    md += `- **Fix:**\n\`\`\`diff\n`;
+                    if (f.patch?.oldText)
+                        md += `- ${f.patch.oldText}\n`;
+                    if (f.patch?.newText)
+                        md += `+ ${f.patch.newText}\n`;
+                    md += `\`\`\`\n\n`;
+                }
+                if (patchableFindings.length > 20) {
+                    md += `> ... and ${patchableFindings.length - 20} more auto-fixable findings\n\n`;
+                }
+            }
+            md += formatVerdictAsMarkdown(verdict);
+            const structuredData = {
+                score: verdict.overallScore,
+                verdict: verdict.overallVerdict,
+                totalFindings: verdict.findings.length,
+                autoFixable: patchCount,
+                patches: patchableFindings.slice(0, 50).map((f) => ({
+                    ruleId: f.ruleId,
+                    severity: f.severity,
+                    title: f.title,
+                    lineNumbers: f.lineNumbers,
+                    oldText: f.patch?.oldText,
+                    newText: f.patch?.newText,
+                })),
+            };
+            return {
+                content: [
+                    { type: "text", text: md },
+                    { type: "text", text: "```json\n" + JSON.stringify(structuredData, null, 2) + "\n```" },
+                ],
+            };
+        }
+        catch (error) {
+            return {
+                content: [
+                    {
+                        type: "text",
+                        text: error instanceof Error ? `Error: ${error.message}` : "Error: evaluate_then_fix failed",
+                    },
+                ],
+                isError: true,
+            };
+        }
+    });
+}
+// ─── evaluate_focused ────────────────────────────────────────────────────────
+function registerEvaluateFocused(server) {
+    server.tool("evaluate_focused", "Run a focused evaluation using only the specified judges. Use this after an initial full evaluation to re-check specific areas — for example, re-run only 'cybersecurity' and 'authentication' judges after applying security fixes. Much faster than a full tribunal evaluation.", {
+        code: z.string().describe("The source code to evaluate."),
+        language: z.string().describe("The programming language (e.g., 'typescript', 'python')."),
+        judgeIds: z
+            .array(z.string())
+            .min(1)
+            .describe("Array of judge IDs to run (e.g., ['cybersecurity', 'authentication', 'data-sovereignty'])"),
+        context: z.string().optional().describe("Optional context about the code."),
+        includeAstFindings: z.boolean().optional().describe("Include AST/code-structure findings (default: true)"),
+        minConfidence: z
+            .number()
+            .min(0)
+            .max(1)
+            .optional()
+            .describe("Minimum finding confidence to include (0-1, default: 0)"),
+        config: configSchema,
+    }, async ({ code, language, judgeIds, context, includeAstFindings, minConfidence, config }) => {
+        try {
+            const sizeError = validateCodeSize(code);
+            if (sizeError) {
+                return { content: [{ type: "text", text: `Error: ${sizeError}` }], isError: true };
+            }
+            const cfgObj = toJudgesConfig(config);
+            // Build a config that disables all judges EXCEPT the focused ones
+            const allJudgeIds = JUDGES.map((j) => j.id);
+            const focusedSet = new Set(judgeIds);
+            const disabledJudges = allJudgeIds.filter((id) => !focusedSet.has(id));
+            const mergedConfig = cfgObj
+                ? { ...cfgObj, disabledJudges: [...(cfgObj.disabledJudges ?? []), ...disabledJudges] }
+                : { disabledJudges };
+            const verdict = evaluateWithTribunal(code, language, context, {
+                includeAstFindings,
+                minConfidence,
+                config: mergedConfig,
+            });
+            let md = `# Focused Evaluation (${judgeIds.length} judges)\n\n`;
+            md += `**Judges:** ${judgeIds.join(", ")}\n`;
+            md += `**Score:** ${verdict.overallScore}/100 | **Verdict:** ${verdict.overallVerdict.toUpperCase()}\n`;
+            md += `**Findings:** ${verdict.findings.length}\n\n`;
+            md += formatVerdictAsMarkdown(verdict);
+            const structuredData = {
+                focusedJudges: judgeIds,
+                score: verdict.overallScore,
+                verdict: verdict.overallVerdict,
+                findingCount: verdict.findings.length,
+                findings: verdict.findings.map((f) => ({
+                    ruleId: f.ruleId,
+                    severity: f.severity,
+                    title: f.title,
+                    lineNumbers: f.lineNumbers,
+                    confidence: f.confidence,
+                })),
+            };
+            return {
+                content: [
+                    { type: "text", text: md },
+                    { type: "text", text: "```json\n" + JSON.stringify(structuredData, null, 2) + "\n```" },
+                ],
+            };
+        }
+        catch (error) {
+            return {
+                content: [
+                    {
+                        type: "text",
+                        text: error instanceof Error ? `Error: ${error.message}` : "Error: Focused evaluation failed",
+                    },
+                ],
+                isError: true,
+            };
+        }
+    });
+}
+// ─── session_status ──────────────────────────────────────────────────────────
+function registerSessionStatus(server) {
+    server.tool("session_status", "Get the current evaluation session status — how many evaluations have been run, detected frameworks, verdict history per file, and stability indicators. Useful for understanding what the tribunal has already reviewed.", {}, async () => {
+        const session = getGlobalSession();
+        const ctx = session.getContext();
+        const filesEvaluated = [...ctx.verdictHistory.entries()].map(([file, history]) => ({
+            file,
+            evaluations: history.length,
+            latestScore: history[history.length - 1]?.score ?? 0,
+            stable: session.isVerdictStable(file),
+        }));
+        let md = `# Evaluation Session Status\n\n`;
+        md += `**Evaluations:** ${ctx.evaluationCount}\n`;
+        md += `**Started:** ${ctx.startedAt}\n`;
+        md += `**Detected Frameworks:** ${ctx.frameworks.length > 0 ? ctx.frameworks.join(", ") : "None yet"}\n`;
+        md += `**Capabilities:** ${ctx.capabilities.size > 0 ? [...ctx.capabilities].join(", ") : "None yet"}\n\n`;
+        if (filesEvaluated.length > 0) {
+            md += `## Files Evaluated\n\n`;
+            md += `| File | Evals | Latest Score | Stable |\n`;
+            md += `|------|-------|--------------|--------|\n`;
+            for (const f of filesEvaluated) {
+                md += `| ${f.file} | ${f.evaluations} | ${f.latestScore}/100 | ${f.stable ? "Yes" : "No"} |\n`;
+            }
+        }
+        const feedbackTally = [...session.getFeedbackTally().entries()];
+        if (feedbackTally.length > 0) {
+            md += `\n## Feedback Tally\n\n`;
+            md += `| Rule | TP | FP | Won't Fix |\n`;
+            md += `|------|----|----|----------|\n`;
+            for (const [rule, counts] of feedbackTally) {
+                md += `| ${rule} | ${counts.tp} | ${counts.fp} | ${counts.wontfix} |\n`;
+            }
+        }
         return {
-            content: [{ type: "text", text: summary }],
+            content: [
+                { type: "text", text: md },
+                {
+                    type: "text",
+                    text: "```json\n" +
+                        JSON.stringify({
+                            evaluationCount: ctx.evaluationCount,
+                            startedAt: ctx.startedAt,
+                            frameworks: ctx.frameworks,
+                            capabilities: [...ctx.capabilities],
+                            filesEvaluated,
+                            feedbackTally: Object.fromEntries(feedbackTally),
+                        }, null, 2) +
+                        "\n```",
+                },
+            ],
+        };
+    });
+}
+// ─── record_feedback ─────────────────────────────────────────────────────────
+function registerRecordFeedback(server) {
+    server.tool("record_feedback", "Record user feedback on a finding — mark it as a true positive (tp), false positive (fp), or won't fix (wontfix). This feedback calibrates confidence scores in subsequent evaluations during the current session, reducing noise from rules the user considers inaccurate.", {
+        ruleId: z.string().describe("The rule ID of the finding (e.g., 'SEC-001', 'AUTH-003')."),
+        verdict: z
+            .enum(["tp", "fp", "wontfix"])
+            .describe("The feedback verdict: tp (true positive), fp (false positive), wontfix (acknowledged but won't fix)."),
+    }, async ({ ruleId, verdict }) => {
+        const session = getGlobalSession();
+        session.recordFeedback(ruleId, verdict);
+        const penalty = session.getConfidencePenalty(ruleId);
+        const penaltyPct = Math.round(penalty * 100);
+        return {
+            content: [
+                {
+                    type: "text",
+                    text: `Feedback recorded: **${ruleId}** → **${verdict}**\n\n` +
+                        `Current confidence multiplier for ${ruleId}: **${penaltyPct}%**\n` +
+                        (verdict === "fp" ? `Future findings for this rule will have reduced confidence in this session.` : ``),
+                },
+            ],
         };
     });
 }

package/dist/tools/validation.d.ts ADDED Viewed

@@ -0,0 +1,13 @@
+/**
+ * Validate that code input is within acceptable size limits.
+ * Returns an error message string if validation fails, or `undefined` if valid.
+ */
+export declare function validateCodeSize(code: string, maxBytes?: number): string | undefined;
+/** Recognized programming languages for validation warnings. */
+export declare const KNOWN_LANGUAGES: Set<string>;
+/**
+ * Check whether a language string is recognized.
+ * Returns the normalized (lowercased) language, or `undefined` if not recognized.
+ * This is advisory only — unrecognized languages are still accepted.
+ */
+export declare function normalizeLanguage(lang: string): string;

package/dist/tools/validation.js ADDED Viewed

@@ -0,0 +1,77 @@
+// ─── Input Validation Helpers ────────────────────────────────────────────────
+// Shared validation for MCP tool inputs at system boundaries.
+// ──────────────────────────────────────────────────────────────────────────────
+/** Maximum code input size (1 MB). Prevents excessive memory/CPU usage. */
+const MAX_CODE_BYTES = 1_048_576;
+/**
+ * Validate that code input is within acceptable size limits.
+ * Returns an error message string if validation fails, or `undefined` if valid.
+ */
+export function validateCodeSize(code, maxBytes = MAX_CODE_BYTES) {
+    if (code.length === 0) {
+        return "Code input is empty.";
+    }
+    const byteLength = Buffer.byteLength(code, "utf-8");
+    if (byteLength > maxBytes) {
+        return `Code input too large (${(byteLength / 1024).toFixed(0)} KB). Maximum allowed: ${(maxBytes / 1024).toFixed(0)} KB.`;
+    }
+    return undefined;
+}
+/** Recognized programming languages for validation warnings. */
+export const KNOWN_LANGUAGES = new Set([
+    "typescript",
+    "javascript",
+    "python",
+    "java",
+    "csharp",
+    "c",
+    "cpp",
+    "go",
+    "rust",
+    "ruby",
+    "php",
+    "swift",
+    "kotlin",
+    "scala",
+    "r",
+    "powershell",
+    "bash",
+    "shell",
+    "sql",
+    "html",
+    "css",
+    "scss",
+    "bicep",
+    "terraform",
+    "hcl",
+    "yaml",
+    "yml",
+    "json",
+    "xml",
+    "toml",
+    "dockerfile",
+    "makefile",
+    "markdown",
+    "plaintext",
+    "objective-c",
+    "dart",
+    "lua",
+    "perl",
+    "elixir",
+    "erlang",
+    "haskell",
+    "fsharp",
+    "vb",
+    "assembly",
+    "zig",
+    "nim",
+    "cloudformation",
+]);
+/**
+ * Check whether a language string is recognized.
+ * Returns the normalized (lowercased) language, or `undefined` if not recognized.
+ * This is advisory only — unrecognized languages are still accepted.
+ */
+export function normalizeLanguage(lang) {
+    return lang.toLowerCase().trim();
+}