@kevinrabun/judges 3.115.4 → 3.117.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/agents/accessibility.judge.md +7 -0
  2. package/agents/agent-instructions.judge.md +7 -0
  3. package/agents/ai-code-safety.judge.md +7 -0
  4. package/agents/api-contract.judge.md +7 -0
  5. package/agents/api-design.judge.md +7 -0
  6. package/agents/authentication.judge.md +7 -0
  7. package/agents/backwards-compatibility.judge.md +7 -0
  8. package/agents/caching.judge.md +7 -0
  9. package/agents/ci-cd.judge.md +7 -0
  10. package/agents/cloud-readiness.judge.md +7 -0
  11. package/agents/concurrency.judge.md +7 -0
  12. package/agents/configuration-management.judge.md +7 -0
  13. package/agents/cybersecurity.judge.md +7 -0
  14. package/agents/data-security.judge.md +7 -0
  15. package/agents/dependency-health.judge.md +7 -0
  16. package/agents/documentation.judge.md +7 -0
  17. package/agents/error-handling.judge.md +7 -0
  18. package/agents/ethics-bias.judge.md +7 -0
  19. package/agents/false-positive-review.judge.md +12 -0
  20. package/agents/framework-safety.judge.md +7 -0
  21. package/agents/hallucination-detection.judge.md +13 -0
  22. package/agents/iac-security.judge.md +7 -0
  23. package/agents/intent-alignment.judge.md +13 -0
  24. package/agents/logging-privacy.judge.md +7 -0
  25. package/agents/maintainability.judge.md +7 -0
  26. package/agents/multi-turn-coherence.judge.md +7 -0
  27. package/agents/observability.judge.md +7 -0
  28. package/agents/portability.judge.md +7 -0
  29. package/agents/rate-limiting.judge.md +7 -0
  30. package/agents/reliability.judge.md +7 -0
  31. package/agents/security.judge.md +13 -0
  32. package/agents/testing.judge.md +7 -0
  33. package/agents/ux.judge.md +7 -0
  34. package/dist/a2a-protocol.d.ts +136 -0
  35. package/dist/a2a-protocol.js +218 -0
  36. package/dist/api.d.ts +21 -3
  37. package/dist/api.js +21 -1
  38. package/dist/audit-trail.d.ts +245 -0
  39. package/dist/audit-trail.js +257 -0
  40. package/dist/commands/benchmark-advanced.js +51 -51
  41. package/dist/commands/benchmark-ai-agents.js +16 -16
  42. package/dist/commands/benchmark-compliance-ethics.js +12 -12
  43. package/dist/commands/benchmark-expanded-2.js +2 -2
  44. package/dist/commands/benchmark-expanded.js +2 -2
  45. package/dist/commands/benchmark-infrastructure.js +12 -12
  46. package/dist/commands/benchmark-languages.js +11 -11
  47. package/dist/commands/benchmark-quality-ops.js +7 -7
  48. package/dist/commands/benchmark-security-deep.js +9 -9
  49. package/dist/commands/benchmark.js +1 -1
  50. package/dist/commands/llm-benchmark-optimizer.d.ts +78 -0
  51. package/dist/commands/llm-benchmark-optimizer.js +241 -0
  52. package/dist/commands/llm-benchmark.d.ts +4 -2
  53. package/dist/commands/llm-benchmark.js +40 -12
  54. package/dist/escalation.d.ts +100 -0
  55. package/dist/escalation.js +292 -0
  56. package/dist/evaluation-session.d.ts +74 -0
  57. package/dist/evaluation-session.js +152 -0
  58. package/dist/evaluators/index.d.ts +23 -1
  59. package/dist/evaluators/index.js +192 -3
  60. package/dist/evaluators/judge-selector.d.ts +19 -0
  61. package/dist/evaluators/judge-selector.js +141 -0
  62. package/dist/evaluators/recall-boost.d.ts +27 -0
  63. package/dist/evaluators/recall-boost.js +409 -0
  64. package/dist/feedback-loop.d.ts +62 -0
  65. package/dist/feedback-loop.js +179 -0
  66. package/dist/index.js +2 -0
  67. package/dist/judges/accessibility.js +7 -0
  68. package/dist/judges/agent-instructions.js +7 -0
  69. package/dist/judges/ai-code-safety.js +7 -0
  70. package/dist/judges/api-contract.js +7 -0
  71. package/dist/judges/api-design.js +7 -0
  72. package/dist/judges/authentication.js +7 -0
  73. package/dist/judges/backwards-compatibility.js +7 -0
  74. package/dist/judges/caching.js +7 -0
  75. package/dist/judges/ci-cd.js +7 -0
  76. package/dist/judges/cloud-readiness.js +7 -0
  77. package/dist/judges/concurrency.js +7 -0
  78. package/dist/judges/configuration-management.js +7 -0
  79. package/dist/judges/cybersecurity.js +7 -0
  80. package/dist/judges/data-security.js +7 -0
  81. package/dist/judges/dependency-health.js +7 -0
  82. package/dist/judges/documentation.js +7 -0
  83. package/dist/judges/error-handling.js +7 -0
  84. package/dist/judges/ethics-bias.js +7 -0
  85. package/dist/judges/false-positive-review.js +13 -1
  86. package/dist/judges/framework-safety.js +7 -0
  87. package/dist/judges/hallucination-detection.js +14 -1
  88. package/dist/judges/iac-security.js +7 -0
  89. package/dist/judges/intent-alignment.js +14 -1
  90. package/dist/judges/logging-privacy.js +7 -0
  91. package/dist/judges/maintainability.js +7 -0
  92. package/dist/judges/multi-turn-coherence.js +7 -0
  93. package/dist/judges/observability.js +7 -0
  94. package/dist/judges/portability.js +7 -0
  95. package/dist/judges/rate-limiting.js +7 -0
  96. package/dist/judges/reliability.js +7 -0
  97. package/dist/judges/security.js +14 -1
  98. package/dist/judges/testing.js +7 -0
  99. package/dist/judges/ux.js +7 -0
  100. package/dist/review-conversation.d.ts +87 -0
  101. package/dist/review-conversation.js +307 -0
  102. package/dist/sast-integration.d.ts +112 -0
  103. package/dist/sast-integration.js +215 -0
  104. package/dist/tools/register-evaluation.js +208 -8
  105. package/dist/tools/register-fix.js +24 -1
  106. package/dist/tools/register-resources.d.ts +6 -0
  107. package/dist/tools/register-resources.js +177 -0
  108. package/dist/tools/register-review.js +26 -1
  109. package/dist/tools/register-workflow.js +384 -11
  110. package/dist/tools/validation.d.ts +13 -0
  111. package/dist/tools/validation.js +77 -0
  112. package/dist/types.d.ts +122 -0
  113. package/package.json +25 -12
  114. package/server.json +2 -2
@@ -5,10 +5,12 @@ import { z } from "zod";
5
5
  import { readFileSync, existsSync } from "fs";
6
6
  import { extname } from "path";
7
7
  import { JUDGES, getJudge, getJudgeSummaries } from "../judges/index.js";
8
- import { evaluateWithJudge, evaluateWithTribunal, formatVerdictAsMarkdown, formatEvaluationAsMarkdown, } from "../evaluators/index.js";
8
+ import { evaluateWithJudge, evaluateWithTribunal, evaluateWithTribunalStreaming, formatVerdictAsMarkdown, formatEvaluationAsMarkdown, } from "../evaluators/index.js";
9
9
  import { evaluateCodeV2, evaluateProjectV2, getSupportedPolicyProfiles } from "../evaluators/v2.js";
10
10
  import { detectProjectContext } from "../evaluators/shared.js";
11
+ import { getGlobalSession } from "../evaluation-session.js";
11
12
  import { configSchema, toJudgesConfig } from "./schemas.js";
13
+ import { validateCodeSize } from "./validation.js";
12
14
  import { buildSingleJudgeDeepReviewSection, buildTribunalDeepReviewSection } from "./deep-review.js";
13
15
  /**
14
16
  * Register evaluation-focused tools: get_judges, evaluate_code,
@@ -20,6 +22,7 @@ export function registerEvaluationTools(server) {
20
22
  registerEvaluateSingleJudge(server);
21
23
  registerEvaluateV2(server);
22
24
  registerEvaluateFile(server);
25
+ registerEvaluateCodeStreaming(server);
23
26
  }
24
27
  // ─── get_judges ──────────────────────────────────────────────────────────────
25
28
  function registerGetJudges(server) {
@@ -34,6 +37,15 @@ function registerGetJudges(server) {
34
37
  type: "text",
35
38
  text: `# Judges Panel\n\n${text}`,
36
39
  },
40
+ {
41
+ type: "text",
42
+ text: "```json\n" +
43
+ JSON.stringify({
44
+ judgeCount: judges.length,
45
+ judges: judges.map((j) => ({ id: j.id, name: j.name, domain: j.domain })),
46
+ }, null, 2) +
47
+ "\n```",
48
+ },
37
49
  ],
38
50
  };
39
51
  });
@@ -70,20 +82,52 @@ function registerEvaluateCode(server) {
70
82
  config: configSchema,
71
83
  }, async ({ code, language, context, includeAstFindings, minConfidence, relatedFiles, config }) => {
72
84
  try {
85
+ const sizeError = validateCodeSize(code);
86
+ if (sizeError) {
87
+ return { content: [{ type: "text", text: `Error: ${sizeError}` }], isError: true };
88
+ }
89
+ const session = getGlobalSession();
73
90
  const verdict = evaluateWithTribunal(code, language, context, {
74
91
  includeAstFindings,
75
92
  minConfidence,
76
93
  config: toJudgesConfig(config),
94
+ adaptiveSelection: true,
95
+ filePath: context,
77
96
  });
97
+ // Track evaluation in session
98
+ session.recordEvaluation(context ?? `<inline:${language}>`, code, verdict);
78
99
  const projectContext = detectProjectContext(code, language);
79
100
  const patternResults = formatVerdictAsMarkdown(verdict);
80
101
  const deepReview = buildTribunalDeepReviewSection(JUDGES, language, context, relatedFiles, projectContext);
102
+ // Structured JSON content block for programmatic consumption
103
+ const structuredData = {
104
+ score: verdict.overallScore,
105
+ verdict: verdict.overallVerdict,
106
+ findingCount: verdict.findings.length,
107
+ criticalCount: verdict.findings.filter((f) => f.severity === "critical").length,
108
+ highCount: verdict.findings.filter((f) => f.severity === "high").length,
109
+ judgesRun: verdict.evaluations.length,
110
+ findings: verdict.findings.map((f) => ({
111
+ ruleId: f.ruleId,
112
+ severity: f.severity,
113
+ title: f.title,
114
+ lineNumbers: f.lineNumbers,
115
+ confidence: f.confidence,
116
+ })),
117
+ sessionStats: {
118
+ evaluationCount: session.evaluationCount,
119
+ },
120
+ };
81
121
  return {
82
122
  content: [
83
123
  {
84
124
  type: "text",
85
125
  text: patternResults + deepReview,
86
126
  },
127
+ {
128
+ type: "text",
129
+ text: "```json\n" + JSON.stringify(structuredData, null, 2) + "\n```",
130
+ },
87
131
  ],
88
132
  };
89
133
  }
@@ -130,6 +174,10 @@ function registerEvaluateSingleJudge(server) {
130
174
  config: configSchema,
131
175
  }, async ({ code, language, judgeId, context, minConfidence, relatedFiles, config }) => {
132
176
  try {
177
+ const sizeError = validateCodeSize(code);
178
+ if (sizeError) {
179
+ return { content: [{ type: "text", text: `Error: ${sizeError}` }], isError: true };
180
+ }
133
181
  const judge = getJudge(judgeId);
134
182
  if (!judge) {
135
183
  return {
@@ -149,12 +197,25 @@ function registerEvaluateSingleJudge(server) {
149
197
  const projectContext = detectProjectContext(code, language);
150
198
  const patternResults = formatEvaluationAsMarkdown(evaluation);
151
199
  const deepReview = buildSingleJudgeDeepReviewSection(judge, language, context, relatedFiles, projectContext);
200
+ const structured = {
201
+ judgeId,
202
+ judgeName: judge.name,
203
+ domain: judge.domain,
204
+ score: evaluation.score,
205
+ verdict: evaluation.verdict,
206
+ findingCount: evaluation.findings.length,
207
+ findings: evaluation.findings.map((f) => ({
208
+ ruleId: f.ruleId,
209
+ severity: f.severity,
210
+ title: f.title,
211
+ lineNumbers: f.lineNumbers,
212
+ confidence: f.confidence,
213
+ })),
214
+ };
152
215
  return {
153
216
  content: [
154
- {
155
- type: "text",
156
- text: patternResults + deepReview,
157
- },
217
+ { type: "text", text: patternResults + deepReview },
218
+ { type: "text", text: "```json\n" + JSON.stringify(structured, null, 2) + "\n```" },
158
219
  ],
159
220
  };
160
221
  }
@@ -173,7 +234,7 @@ function registerEvaluateSingleJudge(server) {
173
234
  }
174
235
  // ─── evaluate_v2 ─────────────────────────────────────────────────────────────
175
236
  function registerEvaluateV2(server) {
176
- server.tool("evaluate_v2", "Run V2 context-aware tribunal evaluation with policy profiles, evidence calibration, specialty feedback, confidence scoring, and uncertainty reporting.", {
237
+ server.tool("evaluate_policy_aware", "Run policy-aware tribunal evaluation with named policy profiles (startup, regulated, healthcare, fintech, public-sector), evidence calibration from runtime metrics, specialty-per-judge feedback, confidence scoring, and uncertainty reporting. Use this when code must meet specific compliance or vertical requirements.", {
177
238
  code: z.string().optional().describe("Source code for single-file mode"),
178
239
  language: z.string().optional().describe("Language for single-file mode"),
179
240
  files: z
@@ -263,7 +324,7 @@ function registerEvaluateV2(server) {
263
324
  evaluationContext,
264
325
  evidence,
265
326
  });
266
- let md = `# V2 Tribunal Evaluation\n\n`;
327
+ let md = `# Policy-Aware Tribunal Evaluation\n\n`;
267
328
  md += `**Policy Profile:** ${result.policyProfile}\n`;
268
329
  md += `**Calibrated Verdict:** ${result.calibratedVerdict.toUpperCase()} (${result.calibratedScore}/100)\n`;
269
330
  md += `**Base Verdict:** ${result.baseVerdict.overallVerdict.toUpperCase()} (${result.baseVerdict.overallScore}/100)\n`;
@@ -310,7 +371,28 @@ function registerEvaluateV2(server) {
310
371
  md += `\n## Supported Policy Profiles\n\n`;
311
372
  md += supportedProfiles.map((profile) => `- ${profile}`).join("\n");
312
373
  md += "\n";
313
- return { content: [{ type: "text", text: md }] };
374
+ const structured = {
375
+ policyProfile: result.policyProfile,
376
+ calibratedScore: result.calibratedScore,
377
+ calibratedVerdict: result.calibratedVerdict,
378
+ baseScore: result.baseVerdict.overallScore,
379
+ baseVerdict: result.baseVerdict.overallVerdict,
380
+ confidence: result.confidence,
381
+ findingCount: result.findings.length,
382
+ findings: result.findings.map((f) => ({
383
+ ruleId: f.ruleId,
384
+ severity: f.severity,
385
+ title: f.title,
386
+ confidence: f.confidence,
387
+ })),
388
+ uncertainty: result.uncertainty,
389
+ };
390
+ return {
391
+ content: [
392
+ { type: "text", text: md },
393
+ { type: "text", text: "```json\n" + JSON.stringify(structured, null, 2) + "\n```" },
394
+ ],
395
+ };
314
396
  }
315
397
  catch (error) {
316
398
  return {
@@ -382,20 +464,60 @@ function registerEvaluateFile(server) {
382
464
  }
383
465
  const code = readFileSync(filePath, "utf-8");
384
466
  const detectedLang = language || detectLanguageFromPath(filePath);
467
+ const session = getGlobalSession();
468
+ // Skip re-evaluation if verdict is stable for this file
469
+ if (session.isVerdictStable(filePath)) {
470
+ const history = session.getVerdictHistory(filePath);
471
+ return {
472
+ content: [
473
+ {
474
+ type: "text",
475
+ text: `# Evaluation: ${filePath}\n\n` +
476
+ `> ⚡ **Verdict stable** — score has converged at **${history[0]?.score ?? 0}/100** ` +
477
+ `across last evaluations. Skipping redundant re-evaluation.\n\n` +
478
+ `Use \`evaluate_code\` with the code directly to force a fresh evaluation.`,
479
+ },
480
+ ],
481
+ };
482
+ }
385
483
  const verdict = evaluateWithTribunal(code, detectedLang, context, {
386
484
  includeAstFindings,
387
485
  minConfidence,
388
486
  config: toJudgesConfig(config),
487
+ adaptiveSelection: true,
488
+ filePath,
389
489
  });
490
+ session.recordEvaluation(filePath, code, verdict);
390
491
  const projectContext = detectProjectContext(code, detectedLang, filePath);
391
492
  const patternResults = formatVerdictAsMarkdown(verdict);
392
493
  const deepReview = buildTribunalDeepReviewSection(JUDGES, detectedLang, context, undefined, projectContext);
494
+ const structuredData = {
495
+ filePath,
496
+ language: detectedLang,
497
+ score: verdict.overallScore,
498
+ verdict: verdict.overallVerdict,
499
+ findingCount: verdict.findings.length,
500
+ criticalCount: verdict.findings.filter((f) => f.severity === "critical").length,
501
+ highCount: verdict.findings.filter((f) => f.severity === "high").length,
502
+ judgesRun: verdict.evaluations.length,
503
+ findings: verdict.findings.map((f) => ({
504
+ ruleId: f.ruleId,
505
+ severity: f.severity,
506
+ title: f.title,
507
+ lineNumbers: f.lineNumbers,
508
+ confidence: f.confidence,
509
+ })),
510
+ };
393
511
  return {
394
512
  content: [
395
513
  {
396
514
  type: "text",
397
515
  text: `# Evaluation: ${filePath}\n\n` + patternResults + deepReview,
398
516
  },
517
+ {
518
+ type: "text",
519
+ text: "```json\n" + JSON.stringify(structuredData, null, 2) + "\n```",
520
+ },
399
521
  ],
400
522
  };
401
523
  }
@@ -412,3 +534,81 @@ function registerEvaluateFile(server) {
412
534
  }
413
535
  });
414
536
  }
537
+ // ─── evaluate_code_streaming ─────────────────────────────────────────────────
538
+ function registerEvaluateCodeStreaming(server) {
539
+ server.tool("evaluate_code_streaming", `Submit code for streaming evaluation — returns per-judge results as each judge completes, with running aggregate scores. Ideal for long evaluations where you want progressive feedback. All ${JUDGES.length} judges run sequentially with per-judge results accumulated into a single structured response.`, {
540
+ code: z.string().describe("The source code to evaluate."),
541
+ language: z.string().describe("The programming language (e.g., 'typescript', 'python', 'javascript')."),
542
+ context: z.string().optional().describe("Optional context about the code."),
543
+ includeAstFindings: z.boolean().optional().describe("Include AST/code-structure findings (default: true)"),
544
+ minConfidence: z
545
+ .number()
546
+ .min(0)
547
+ .max(1)
548
+ .optional()
549
+ .describe("Minimum finding confidence to include (0-1, default: 0)"),
550
+ config: configSchema,
551
+ }, async ({ code, language, context, includeAstFindings, minConfidence, config }) => {
552
+ try {
553
+ const session = getGlobalSession();
554
+ const batches = [];
555
+ let finalBatch;
556
+ for await (const batch of evaluateWithTribunalStreaming(code, language, context, {
557
+ includeAstFindings,
558
+ minConfidence,
559
+ config: toJudgesConfig(config),
560
+ adaptiveSelection: true,
561
+ })) {
562
+ batches.push({
563
+ judgeId: batch.judgeId,
564
+ judgeName: batch.judgeName,
565
+ findingCount: batch.evaluation.findings.length,
566
+ durationMs: batch.evaluation.durationMs ?? 0,
567
+ runningScore: batch.aggregate.currentScore,
568
+ runningVerdict: batch.aggregate.currentVerdict,
569
+ });
570
+ finalBatch = batch;
571
+ }
572
+ // Build progressive markdown
573
+ let md = `# Streaming Evaluation Results\n\n`;
574
+ md += `**Final Score:** ${finalBatch?.aggregate.currentScore ?? 0}/100\n`;
575
+ md += `**Verdict:** ${(finalBatch?.aggregate.currentVerdict ?? "pass").toUpperCase()}\n`;
576
+ md += `**Judges Run:** ${finalBatch?.aggregate.completedJudges ?? 0}/${finalBatch?.aggregate.totalJudges ?? 0}\n`;
577
+ md += `**Total Findings:** ${finalBatch?.aggregate.findingsSoFar ?? 0}\n\n`;
578
+ md += `## Per-Judge Breakdown\n\n`;
579
+ md += `| Judge | Findings | Time (ms) | Running Score |\n`;
580
+ md += `|-------|----------|-----------|---------------|\n`;
581
+ for (const b of batches) {
582
+ md += `| ${b.judgeName} | ${b.findingCount} | ${b.durationMs} | ${b.runningScore}/100 |\n`;
583
+ }
584
+ const structuredData = {
585
+ score: finalBatch?.aggregate.currentScore ?? 0,
586
+ verdict: finalBatch?.aggregate.currentVerdict ?? "pass",
587
+ totalFindings: finalBatch?.aggregate.findingsSoFar ?? 0,
588
+ criticalFindings: finalBatch?.aggregate.criticalSoFar ?? 0,
589
+ highFindings: finalBatch?.aggregate.highSoFar ?? 0,
590
+ judgesRun: finalBatch?.aggregate.completedJudges ?? 0,
591
+ totalJudges: finalBatch?.aggregate.totalJudges ?? 0,
592
+ perJudge: batches,
593
+ sessionEvaluationCount: session.evaluationCount,
594
+ };
595
+ return {
596
+ content: [
597
+ { type: "text", text: md },
598
+ { type: "text", text: "```json\n" + JSON.stringify(structuredData, null, 2) + "\n```" },
599
+ ],
600
+ };
601
+ }
602
+ catch (error) {
603
+ return {
604
+ content: [
605
+ {
606
+ type: "text",
607
+ text: error instanceof Error ? `Error: ${error.message}` : "Error: Streaming evaluation failed",
608
+ },
609
+ ],
610
+ isError: true,
611
+ };
612
+ }
613
+ });
614
+ }
@@ -7,6 +7,7 @@ import { evaluateWithTribunal, evaluateWithJudge } from "../evaluators/index.js"
7
7
  import { getJudge, JUDGES } from "../judges/index.js";
8
8
  import { applyPatches } from "../commands/fix.js";
9
9
  import { configSchema, toJudgesConfig } from "./schemas.js";
10
+ import { validateCodeSize } from "./validation.js";
10
11
  /**
11
12
  * Register the fix_code tool for one-shot code evaluation + auto-fix.
12
13
  */
@@ -38,6 +39,10 @@ function registerFixCode(server) {
38
39
  config: configSchema,
39
40
  }, async ({ code, language, judgeId, context, minConfidence, config }) => {
40
41
  try {
42
+ const sizeError = validateCodeSize(code);
43
+ if (sizeError) {
44
+ return { content: [{ type: "text", text: `Error: ${sizeError}` }], isError: true };
45
+ }
41
46
  const effectiveMinConfidence = minConfidence ?? 0.5;
42
47
  // ── Evaluate ────────────────────────────────────────────────
43
48
  let allFindings;
@@ -133,8 +138,26 @@ function registerFixCode(server) {
133
138
  text += `\n`;
134
139
  }
135
140
  text += `### Fixed Code\n\n\`\`\`${language}\n${fixedCode}\n\`\`\`\n`;
141
+ const structured = {
142
+ totalFindings: allFindings.length,
143
+ autoFixable: fixable.length,
144
+ applied,
145
+ skipped,
146
+ remaining: remaining.length,
147
+ patches: fixable.map((p) => ({
148
+ ruleId: p.ruleId,
149
+ severity: p.severity,
150
+ title: p.title,
151
+ line: p.patch.startLine,
152
+ oldText: p.patch.oldText,
153
+ newText: p.patch.newText,
154
+ })),
155
+ };
136
156
  return {
137
- content: [{ type: "text", text }],
157
+ content: [
158
+ { type: "text", text },
159
+ { type: "text", text: "```json\n" + JSON.stringify(structured, null, 2) + "\n```" },
160
+ ],
138
161
  };
139
162
  }
140
163
  catch (error) {
@@ -0,0 +1,6 @@
1
+ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
+ /**
3
+ * Register MCP resources: judges catalog, presets, session state,
4
+ * and parameterized templates for single-judge / single-preset lookups.
5
+ */
6
+ export declare function registerResources(server: McpServer): void;
@@ -0,0 +1,177 @@
1
+ // ─── MCP Resource Registration ───────────────────────────────────────────────
2
+ // Expose judges metadata, presets, and session state as MCP resources.
3
+ // Includes both static resources and parameterized resource templates for
4
+ // efficient single-item lookups (judges://judge/{id}, judges://preset/{key}).
5
+ // ──────────────────────────────────────────────────────────────────────────────
6
+ import { ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
7
+ import { getJudge, getJudgeSummaries, JUDGES } from "../judges/index.js";
8
+ import { getPreset, PRESETS } from "../presets.js";
9
+ import { getGlobalSession } from "../evaluation-session.js";
10
+ /**
11
+ * Register MCP resources: judges catalog, presets, session state,
12
+ * and parameterized templates for single-judge / single-preset lookups.
13
+ */
14
+ export function registerResources(server) {
15
+ registerJudgesCatalog(server);
16
+ registerPresetsResource(server);
17
+ registerSessionResource(server);
18
+ registerJudgeTemplate(server);
19
+ registerPresetTemplate(server);
20
+ }
21
+ // ─── judges://catalog ────────────────────────────────────────────────────────
22
+ function registerJudgesCatalog(server) {
23
+ server.resource("judges-catalog", "judges://catalog", { description: "Full catalog of all judges on the panel — IDs, names, domains, and descriptions." }, async (uri) => {
24
+ const judges = getJudgeSummaries();
25
+ const data = judges.map((j) => ({
26
+ id: j.id,
27
+ name: j.name,
28
+ domain: j.domain,
29
+ description: j.description,
30
+ }));
31
+ return {
32
+ contents: [
33
+ {
34
+ uri: uri.href,
35
+ mimeType: "application/json",
36
+ text: JSON.stringify(data, null, 2),
37
+ },
38
+ ],
39
+ };
40
+ });
41
+ }
42
+ // ─── judges://presets ────────────────────────────────────────────────────────
43
+ function registerPresetsResource(server) {
44
+ server.resource("presets", "judges://presets", { description: "Available evaluation presets with names, descriptions, and configuration overrides." }, async (uri) => {
45
+ const data = Object.entries(PRESETS).map(([key, preset]) => ({
46
+ key,
47
+ name: preset.name,
48
+ description: preset.description,
49
+ config: preset.config,
50
+ }));
51
+ return {
52
+ contents: [
53
+ {
54
+ uri: uri.href,
55
+ mimeType: "application/json",
56
+ text: JSON.stringify(data, null, 2),
57
+ },
58
+ ],
59
+ };
60
+ });
61
+ }
62
+ // ─── judges://session ────────────────────────────────────────────────────────
63
+ function registerSessionResource(server) {
64
+ server.resource("session", "judges://session", {
65
+ description: "Current evaluation session state — evaluation count, detected frameworks, verdict history, and stability indicators.",
66
+ }, async (uri) => {
67
+ const session = getGlobalSession();
68
+ const ctx = session.getContext();
69
+ const filesEvaluated = [...ctx.verdictHistory.entries()].map(([file, history]) => ({
70
+ file,
71
+ evaluations: history.length,
72
+ latestScore: history[history.length - 1]?.score ?? 0,
73
+ stable: session.isVerdictStable(file),
74
+ }));
75
+ const data = {
76
+ evaluationCount: ctx.evaluationCount,
77
+ startedAt: ctx.startedAt,
78
+ frameworks: ctx.frameworks,
79
+ capabilities: [...ctx.capabilities],
80
+ filesEvaluated,
81
+ };
82
+ return {
83
+ contents: [
84
+ {
85
+ uri: uri.href,
86
+ mimeType: "application/json",
87
+ text: JSON.stringify(data, null, 2),
88
+ },
89
+ ],
90
+ };
91
+ });
92
+ }
93
+ // ─── judges://judge/{id} (template) ─────────────────────────────────────────
94
+ function registerJudgeTemplate(server) {
95
+ const judgeIds = JUDGES.map((j) => j.id);
96
+ server.resource("judge-detail", new ResourceTemplate("judges://judge/{id}", {
97
+ list: async () => ({
98
+ resources: judgeIds.map((id) => ({
99
+ uri: `judges://judge/${id}`,
100
+ name: id,
101
+ })),
102
+ }),
103
+ complete: {
104
+ id: (value) => judgeIds.filter((id) => id.startsWith(value)),
105
+ },
106
+ }), { description: "Detailed info for a single judge — rules, domain, system prompt summary." }, async (uri, { id }) => {
107
+ const judgeId = Array.isArray(id) ? id[0] : id;
108
+ const judge = getJudge(judgeId);
109
+ if (!judge) {
110
+ return {
111
+ contents: [
112
+ {
113
+ uri: uri.href,
114
+ mimeType: "application/json",
115
+ text: JSON.stringify({ error: `Judge '${judgeId}' not found` }),
116
+ },
117
+ ],
118
+ };
119
+ }
120
+ const data = {
121
+ id: judge.id,
122
+ name: judge.name,
123
+ domain: judge.domain,
124
+ description: judge.description,
125
+ rulePrefix: judge.rulePrefix,
126
+ tableDescription: judge.tableDescription,
127
+ promptDescription: judge.promptDescription,
128
+ };
129
+ return {
130
+ contents: [
131
+ {
132
+ uri: uri.href,
133
+ mimeType: "application/json",
134
+ text: JSON.stringify(data, null, 2),
135
+ },
136
+ ],
137
+ };
138
+ });
139
+ }
140
+ // ─── judges://preset/{key} (template) ───────────────────────────────────────
141
+ function registerPresetTemplate(server) {
142
+ const presetKeys = Object.keys(PRESETS);
143
+ server.resource("preset-detail", new ResourceTemplate("judges://preset/{key}", {
144
+ list: async () => ({
145
+ resources: presetKeys.map((key) => ({
146
+ uri: `judges://preset/${key}`,
147
+ name: key,
148
+ })),
149
+ }),
150
+ complete: {
151
+ key: (value) => presetKeys.filter((k) => k.startsWith(value)),
152
+ },
153
+ }), { description: "Detailed configuration for a single evaluation preset." }, async (uri, { key }) => {
154
+ const presetKey = Array.isArray(key) ? key[0] : key;
155
+ const preset = getPreset(presetKey);
156
+ if (!preset) {
157
+ return {
158
+ contents: [
159
+ {
160
+ uri: uri.href,
161
+ mimeType: "application/json",
162
+ text: JSON.stringify({ error: `Preset '${presetKey}' not found` }),
163
+ },
164
+ ],
165
+ };
166
+ }
167
+ return {
168
+ contents: [
169
+ {
170
+ uri: uri.href,
171
+ mimeType: "application/json",
172
+ text: JSON.stringify({ key: presetKey, ...preset }, null, 2),
173
+ },
174
+ ],
175
+ };
176
+ });
177
+ }
@@ -115,8 +115,21 @@ function registerExplainFinding(server) {
115
115
  sections.push(`\n## Remediation\n${remediation}`);
116
116
  }
117
117
  sections.push(`\n## Next steps\n- Use \`triage_finding\` to accept, defer, or dismiss this finding\n- Use \`fix_code\` to auto-fix if a patch is available\n- Use \`evaluate_code\` to re-evaluate after fixing`);
118
+ const structured = {
119
+ ruleId,
120
+ prefix,
121
+ title: title ?? null,
122
+ severity: severity ?? null,
123
+ owasp: ctx?.owasp ?? null,
124
+ cwe: ctx?.cwe ?? null,
125
+ learn: ctx?.learn ?? null,
126
+ remediation: getRemediationGuidance(prefix) ?? null,
127
+ };
118
128
  return {
119
- content: [{ type: "text", text: sections.join("\n") }],
129
+ content: [
130
+ { type: "text", text: sections.join("\n") },
131
+ { type: "text", text: "```json\n" + JSON.stringify(structured, null, 2) + "\n```" },
132
+ ],
120
133
  };
121
134
  });
122
135
  }
@@ -173,6 +186,18 @@ function registerTriageFinding(server) {
173
186
  type: "text",
174
187
  text: `✓ Triaged finding \`${result.ruleId}\` in ${result.filePath} as **${status}**${reason ? `\n\nReason: ${reason}` : ""}${triagedBy ? `\nTriaged by: ${triagedBy}` : ""}`,
175
188
  },
189
+ {
190
+ type: "text",
191
+ text: "```json\n" +
192
+ JSON.stringify({
193
+ ruleId: result.ruleId,
194
+ filePath: result.filePath,
195
+ status,
196
+ reason: reason ?? null,
197
+ triagedBy: triagedBy ?? null,
198
+ }, null, 2) +
199
+ "\n```",
200
+ },
176
201
  ],
177
202
  };
178
203
  }