@kevinrabun/judges 3.115.3 → 3.116.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -6,6 +6,7 @@ import("@modelcontextprotocol/sdk/server/mcp.js")
6
6
  const { StdioServerTransport } = await import("@modelcontextprotocol/sdk/server/stdio.js");
7
7
  const { registerTools } = await import("./tools/register.js");
8
8
  const { registerPrompts } = await import("./tools/prompts.js");
9
+ const { registerResources } = await import("./tools/register-resources.js");
9
10
  const { readFileSync } = await import("fs");
10
11
  const { resolve, dirname } = await import("path");
11
12
  const { fileURLToPath } = await import("url");
@@ -25,6 +26,7 @@ import("@modelcontextprotocol/sdk/server/mcp.js")
25
26
  });
26
27
  registerTools(server);
27
28
  registerPrompts(server);
29
+ registerResources(server);
28
30
  const transport = new StdioServerTransport();
29
31
  await server.connect(transport);
30
32
  console.error("Judges Panel MCP server running on stdio");
@@ -1,17 +1,62 @@
1
1
  /**
2
- * Judge registry bootstrap (agent-native).
2
+ * Judge registry bootstrap.
3
3
  *
4
- * Judges are now sourced from `.judge.md` files in the `agents/` folder (legacy
5
- * `.agent.md` still supported). Each agent frontmatter references an evaluator
6
- * script (in `src/evaluators/`), and the agent loader registers them with the
7
- * unified `JudgeRegistry`.
4
+ * Judges are dual-registered:
5
+ * 1. Static side-effect imports below each module calls
6
+ * `defaultRegistry.register()` at load time. These are inlined by
7
+ * esbuild and work in both ESM and CJS bundles.
8
+ * 2. Agent-native `.judge.md` files loaded at runtime from the `agents/`
9
+ * directory (when available). This enriches / overrides metadata.
8
10
  *
9
- * Legacy side-effect imports have been removed. If you need to add a judge, add
10
- * an agent file and (optionally) an evaluator script, then run:
11
- * - `npm run generate:agents` (to sync)
12
- * - `npm run validate:agents`
11
+ * The static imports guarantee that judges are always available, even in
12
+ * bundled environments (VS Code extension) where `agents/` is absent.
13
13
  */
14
14
  import type { JudgeDefinition } from "../types.js";
15
+ import "./accessibility.js";
16
+ import "./agent-instructions.js";
17
+ import "./ai-code-safety.js";
18
+ import "./api-contract.js";
19
+ import "./api-design.js";
20
+ import "./authentication.js";
21
+ import "./backwards-compatibility.js";
22
+ import "./caching.js";
23
+ import "./ci-cd.js";
24
+ import "./cloud-readiness.js";
25
+ import "./code-structure.js";
26
+ import "./compliance.js";
27
+ import "./concurrency.js";
28
+ import "./configuration-management.js";
29
+ import "./cost-effectiveness.js";
30
+ import "./cybersecurity.js";
31
+ import "./data-security.js";
32
+ import "./data-sovereignty.js";
33
+ import "./database.js";
34
+ import "./dependency-health.js";
35
+ import "./documentation.js";
36
+ import "./error-handling.js";
37
+ import "./ethics-bias.js";
38
+ import "./false-positive-review.js";
39
+ import "./framework-safety.js";
40
+ import "./hallucination-detection.js";
41
+ import "./iac-security.js";
42
+ import "./intent-alignment.js";
43
+ import "./internationalization.js";
44
+ import "./logging-privacy.js";
45
+ import "./logic-review.js";
46
+ import "./maintainability.js";
47
+ import "./model-fingerprint.js";
48
+ import "./multi-turn-coherence.js";
49
+ import "./observability.js";
50
+ import "./over-engineering.js";
51
+ import "./performance.js";
52
+ import "./portability.js";
53
+ import "./rate-limiting.js";
54
+ import "./reliability.js";
55
+ import "./scalability.js";
56
+ import "./security.js";
57
+ import "./software-practices.js";
58
+ import "./testing.js";
59
+ import "./ux.js";
15
60
  /**
16
61
  * Load judges (agent-native). Loads agents from the default `agents/` folder
17
62
  * and returns the current registry snapshot.
@@ -1,30 +1,85 @@
1
1
  /**
2
- * Judge registry bootstrap (agent-native).
2
+ * Judge registry bootstrap.
3
3
  *
4
- * Judges are now sourced from `.judge.md` files in the `agents/` folder (legacy
5
- * `.agent.md` still supported). Each agent frontmatter references an evaluator
6
- * script (in `src/evaluators/`), and the agent loader registers them with the
7
- * unified `JudgeRegistry`.
4
+ * Judges are dual-registered:
5
+ * 1. Static side-effect imports below each module calls
6
+ * `defaultRegistry.register()` at load time. These are inlined by
7
+ * esbuild and work in both ESM and CJS bundles.
8
+ * 2. Agent-native `.judge.md` files loaded at runtime from the `agents/`
9
+ * directory (when available). This enriches / overrides metadata.
8
10
  *
9
- * Legacy side-effect imports have been removed. If you need to add a judge, add
10
- * an agent file and (optionally) an evaluator script, then run:
11
- * - `npm run generate:agents` (to sync)
12
- * - `npm run validate:agents`
11
+ * The static imports guarantee that judges are always available, even in
12
+ * bundled environments (VS Code extension) where `agents/` is absent.
13
13
  */
14
14
  import { defaultRegistry } from "../judge-registry.js";
15
15
  import { loadAndRegisterAgents } from "../agent-loader.js";
16
16
  import { resolve, dirname } from "node:path";
17
17
  import { fileURLToPath } from "node:url";
18
+ // ─── Static side-effect imports (self-registering) ──────────────────────────
19
+ import "./accessibility.js";
20
+ import "./agent-instructions.js";
21
+ import "./ai-code-safety.js";
22
+ import "./api-contract.js";
23
+ import "./api-design.js";
24
+ import "./authentication.js";
25
+ import "./backwards-compatibility.js";
26
+ import "./caching.js";
27
+ import "./ci-cd.js";
28
+ import "./cloud-readiness.js";
29
+ import "./code-structure.js";
30
+ import "./compliance.js";
31
+ import "./concurrency.js";
32
+ import "./configuration-management.js";
33
+ import "./cost-effectiveness.js";
34
+ import "./cybersecurity.js";
35
+ import "./data-security.js";
36
+ import "./data-sovereignty.js";
37
+ import "./database.js";
38
+ import "./dependency-health.js";
39
+ import "./documentation.js";
40
+ import "./error-handling.js";
41
+ import "./ethics-bias.js";
42
+ import "./false-positive-review.js";
43
+ import "./framework-safety.js";
44
+ import "./hallucination-detection.js";
45
+ import "./iac-security.js";
46
+ import "./intent-alignment.js";
47
+ import "./internationalization.js";
48
+ import "./logging-privacy.js";
49
+ import "./logic-review.js";
50
+ import "./maintainability.js";
51
+ import "./model-fingerprint.js";
52
+ import "./multi-turn-coherence.js";
53
+ import "./observability.js";
54
+ import "./over-engineering.js";
55
+ import "./performance.js";
56
+ import "./portability.js";
57
+ import "./rate-limiting.js";
58
+ import "./reliability.js";
59
+ import "./scalability.js";
60
+ import "./security.js";
61
+ import "./software-practices.js";
62
+ import "./testing.js";
63
+ import "./ux.js";
18
64
  // Support both ESM (import.meta.url) and CJS (esbuild bundle) environments.
19
65
  const _importMetaUrl = typeof import.meta?.url === "string" ? import.meta.url : undefined;
20
66
  const __filename = _importMetaUrl ? fileURLToPath(_importMetaUrl) : "";
21
- const __dirname = __filename ? dirname(__filename) : process.cwd();
67
+ const __dirname = __filename ? dirname(__filename) : "";
22
68
  let agentsLoaded = false;
23
69
  function loadDefaultAgents() {
24
70
  if (agentsLoaded)
25
71
  return;
26
- const agentsDir = resolve(__dirname, "..", "..", "agents");
27
- loadAndRegisterAgents(agentsDir, defaultRegistry);
72
+ // Static side-effect imports above already registered all built-in judges.
73
+ // In ESM mode, also load from agents/ directory for metadata enrichment.
74
+ if (__dirname) {
75
+ try {
76
+ const agentsDir = resolve(__dirname, "..", "..", "agents");
77
+ loadAndRegisterAgents(agentsDir, defaultRegistry);
78
+ }
79
+ catch {
80
+ // agents/ directory may not exist — built-in judges are already loaded
81
+ }
82
+ }
28
83
  agentsLoaded = true;
29
84
  }
30
85
  // ─── Optional Agent Loader Integration ──────────────────────────────────────
@@ -42,9 +97,12 @@ export async function loadJudges() {
42
97
  * agents can augment or replace built-in judges. If a judge is already
43
98
  * registered, it is skipped.
44
99
  */
45
- export function loadAgentJudges(dir = resolve(__dirname, "..", "..", "agents")) {
100
+ export function loadAgentJudges(dir) {
101
+ const agentsDir = dir ?? (__dirname ? resolve(__dirname, "..", "..", "agents") : "");
102
+ if (!agentsDir)
103
+ return 0; // CJS bundle — no agents directory available
46
104
  agentsLoaded = false; // allow re-run to pick up new agents if dir changes
47
- const count = loadAndRegisterAgents(dir, defaultRegistry);
105
+ const count = loadAndRegisterAgents(agentsDir, defaultRegistry);
48
106
  agentsLoaded = true;
49
107
  return count;
50
108
  }
@@ -5,10 +5,12 @@ import { z } from "zod";
5
5
  import { readFileSync, existsSync } from "fs";
6
6
  import { extname } from "path";
7
7
  import { JUDGES, getJudge, getJudgeSummaries } from "../judges/index.js";
8
- import { evaluateWithJudge, evaluateWithTribunal, formatVerdictAsMarkdown, formatEvaluationAsMarkdown, } from "../evaluators/index.js";
8
+ import { evaluateWithJudge, evaluateWithTribunal, evaluateWithTribunalStreaming, formatVerdictAsMarkdown, formatEvaluationAsMarkdown, } from "../evaluators/index.js";
9
9
  import { evaluateCodeV2, evaluateProjectV2, getSupportedPolicyProfiles } from "../evaluators/v2.js";
10
10
  import { detectProjectContext } from "../evaluators/shared.js";
11
+ import { getGlobalSession } from "../evaluation-session.js";
11
12
  import { configSchema, toJudgesConfig } from "./schemas.js";
13
+ import { validateCodeSize } from "./validation.js";
12
14
  import { buildSingleJudgeDeepReviewSection, buildTribunalDeepReviewSection } from "./deep-review.js";
13
15
  /**
14
16
  * Register evaluation-focused tools: get_judges, evaluate_code,
@@ -20,6 +22,7 @@ export function registerEvaluationTools(server) {
20
22
  registerEvaluateSingleJudge(server);
21
23
  registerEvaluateV2(server);
22
24
  registerEvaluateFile(server);
25
+ registerEvaluateCodeStreaming(server);
23
26
  }
24
27
  // ─── get_judges ──────────────────────────────────────────────────────────────
25
28
  function registerGetJudges(server) {
@@ -34,6 +37,15 @@ function registerGetJudges(server) {
34
37
  type: "text",
35
38
  text: `# Judges Panel\n\n${text}`,
36
39
  },
40
+ {
41
+ type: "text",
42
+ text: "```json\n" +
43
+ JSON.stringify({
44
+ judgeCount: judges.length,
45
+ judges: judges.map((j) => ({ id: j.id, name: j.name, domain: j.domain })),
46
+ }, null, 2) +
47
+ "\n```",
48
+ },
37
49
  ],
38
50
  };
39
51
  });
@@ -70,20 +82,52 @@ function registerEvaluateCode(server) {
70
82
  config: configSchema,
71
83
  }, async ({ code, language, context, includeAstFindings, minConfidence, relatedFiles, config }) => {
72
84
  try {
85
+ const sizeError = validateCodeSize(code);
86
+ if (sizeError) {
87
+ return { content: [{ type: "text", text: `Error: ${sizeError}` }], isError: true };
88
+ }
89
+ const session = getGlobalSession();
73
90
  const verdict = evaluateWithTribunal(code, language, context, {
74
91
  includeAstFindings,
75
92
  minConfidence,
76
93
  config: toJudgesConfig(config),
94
+ adaptiveSelection: true,
95
+ filePath: context,
77
96
  });
97
+ // Track evaluation in session
98
+ session.recordEvaluation(context ?? `<inline:${language}>`, code, verdict);
78
99
  const projectContext = detectProjectContext(code, language);
79
100
  const patternResults = formatVerdictAsMarkdown(verdict);
80
101
  const deepReview = buildTribunalDeepReviewSection(JUDGES, language, context, relatedFiles, projectContext);
102
+ // Structured JSON content block for programmatic consumption
103
+ const structuredData = {
104
+ score: verdict.overallScore,
105
+ verdict: verdict.overallVerdict,
106
+ findingCount: verdict.findings.length,
107
+ criticalCount: verdict.findings.filter((f) => f.severity === "critical").length,
108
+ highCount: verdict.findings.filter((f) => f.severity === "high").length,
109
+ judgesRun: verdict.evaluations.length,
110
+ findings: verdict.findings.map((f) => ({
111
+ ruleId: f.ruleId,
112
+ severity: f.severity,
113
+ title: f.title,
114
+ lineNumbers: f.lineNumbers,
115
+ confidence: f.confidence,
116
+ })),
117
+ sessionStats: {
118
+ evaluationCount: session.evaluationCount,
119
+ },
120
+ };
81
121
  return {
82
122
  content: [
83
123
  {
84
124
  type: "text",
85
125
  text: patternResults + deepReview,
86
126
  },
127
+ {
128
+ type: "text",
129
+ text: "```json\n" + JSON.stringify(structuredData, null, 2) + "\n```",
130
+ },
87
131
  ],
88
132
  };
89
133
  }
@@ -130,6 +174,10 @@ function registerEvaluateSingleJudge(server) {
130
174
  config: configSchema,
131
175
  }, async ({ code, language, judgeId, context, minConfidence, relatedFiles, config }) => {
132
176
  try {
177
+ const sizeError = validateCodeSize(code);
178
+ if (sizeError) {
179
+ return { content: [{ type: "text", text: `Error: ${sizeError}` }], isError: true };
180
+ }
133
181
  const judge = getJudge(judgeId);
134
182
  if (!judge) {
135
183
  return {
@@ -149,12 +197,25 @@ function registerEvaluateSingleJudge(server) {
149
197
  const projectContext = detectProjectContext(code, language);
150
198
  const patternResults = formatEvaluationAsMarkdown(evaluation);
151
199
  const deepReview = buildSingleJudgeDeepReviewSection(judge, language, context, relatedFiles, projectContext);
200
+ const structured = {
201
+ judgeId,
202
+ judgeName: judge.name,
203
+ domain: judge.domain,
204
+ score: evaluation.score,
205
+ verdict: evaluation.verdict,
206
+ findingCount: evaluation.findings.length,
207
+ findings: evaluation.findings.map((f) => ({
208
+ ruleId: f.ruleId,
209
+ severity: f.severity,
210
+ title: f.title,
211
+ lineNumbers: f.lineNumbers,
212
+ confidence: f.confidence,
213
+ })),
214
+ };
152
215
  return {
153
216
  content: [
154
- {
155
- type: "text",
156
- text: patternResults + deepReview,
157
- },
217
+ { type: "text", text: patternResults + deepReview },
218
+ { type: "text", text: "```json\n" + JSON.stringify(structured, null, 2) + "\n```" },
158
219
  ],
159
220
  };
160
221
  }
@@ -173,7 +234,7 @@ function registerEvaluateSingleJudge(server) {
173
234
  }
174
235
  // ─── evaluate_v2 ─────────────────────────────────────────────────────────────
175
236
  function registerEvaluateV2(server) {
176
- server.tool("evaluate_v2", "Run V2 context-aware tribunal evaluation with policy profiles, evidence calibration, specialty feedback, confidence scoring, and uncertainty reporting.", {
237
+ server.tool("evaluate_policy_aware", "Run policy-aware tribunal evaluation with named policy profiles (startup, regulated, healthcare, fintech, public-sector), evidence calibration from runtime metrics, specialty-per-judge feedback, confidence scoring, and uncertainty reporting. Use this when code must meet specific compliance or vertical requirements.", {
177
238
  code: z.string().optional().describe("Source code for single-file mode"),
178
239
  language: z.string().optional().describe("Language for single-file mode"),
179
240
  files: z
@@ -263,7 +324,7 @@ function registerEvaluateV2(server) {
263
324
  evaluationContext,
264
325
  evidence,
265
326
  });
266
- let md = `# V2 Tribunal Evaluation\n\n`;
327
+ let md = `# Policy-Aware Tribunal Evaluation\n\n`;
267
328
  md += `**Policy Profile:** ${result.policyProfile}\n`;
268
329
  md += `**Calibrated Verdict:** ${result.calibratedVerdict.toUpperCase()} (${result.calibratedScore}/100)\n`;
269
330
  md += `**Base Verdict:** ${result.baseVerdict.overallVerdict.toUpperCase()} (${result.baseVerdict.overallScore}/100)\n`;
@@ -310,7 +371,28 @@ function registerEvaluateV2(server) {
310
371
  md += `\n## Supported Policy Profiles\n\n`;
311
372
  md += supportedProfiles.map((profile) => `- ${profile}`).join("\n");
312
373
  md += "\n";
313
- return { content: [{ type: "text", text: md }] };
374
+ const structured = {
375
+ policyProfile: result.policyProfile,
376
+ calibratedScore: result.calibratedScore,
377
+ calibratedVerdict: result.calibratedVerdict,
378
+ baseScore: result.baseVerdict.overallScore,
379
+ baseVerdict: result.baseVerdict.overallVerdict,
380
+ confidence: result.confidence,
381
+ findingCount: result.findings.length,
382
+ findings: result.findings.map((f) => ({
383
+ ruleId: f.ruleId,
384
+ severity: f.severity,
385
+ title: f.title,
386
+ confidence: f.confidence,
387
+ })),
388
+ uncertainty: result.uncertainty,
389
+ };
390
+ return {
391
+ content: [
392
+ { type: "text", text: md },
393
+ { type: "text", text: "```json\n" + JSON.stringify(structured, null, 2) + "\n```" },
394
+ ],
395
+ };
314
396
  }
315
397
  catch (error) {
316
398
  return {
@@ -382,20 +464,60 @@ function registerEvaluateFile(server) {
382
464
  }
383
465
  const code = readFileSync(filePath, "utf-8");
384
466
  const detectedLang = language || detectLanguageFromPath(filePath);
467
+ const session = getGlobalSession();
468
+ // Skip re-evaluation if verdict is stable for this file
469
+ if (session.isVerdictStable(filePath)) {
470
+ const history = session.getVerdictHistory(filePath);
471
+ return {
472
+ content: [
473
+ {
474
+ type: "text",
475
+ text: `# Evaluation: ${filePath}\n\n` +
476
+ `> ⚡ **Verdict stable** — score has converged at **${history[0]?.score ?? 0}/100** ` +
477
+ `across last evaluations. Skipping redundant re-evaluation.\n\n` +
478
+ `Use \`evaluate_code\` with the code directly to force a fresh evaluation.`,
479
+ },
480
+ ],
481
+ };
482
+ }
385
483
  const verdict = evaluateWithTribunal(code, detectedLang, context, {
386
484
  includeAstFindings,
387
485
  minConfidence,
388
486
  config: toJudgesConfig(config),
487
+ adaptiveSelection: true,
488
+ filePath,
389
489
  });
490
+ session.recordEvaluation(filePath, code, verdict);
390
491
  const projectContext = detectProjectContext(code, detectedLang, filePath);
391
492
  const patternResults = formatVerdictAsMarkdown(verdict);
392
493
  const deepReview = buildTribunalDeepReviewSection(JUDGES, detectedLang, context, undefined, projectContext);
494
+ const structuredData = {
495
+ filePath,
496
+ language: detectedLang,
497
+ score: verdict.overallScore,
498
+ verdict: verdict.overallVerdict,
499
+ findingCount: verdict.findings.length,
500
+ criticalCount: verdict.findings.filter((f) => f.severity === "critical").length,
501
+ highCount: verdict.findings.filter((f) => f.severity === "high").length,
502
+ judgesRun: verdict.evaluations.length,
503
+ findings: verdict.findings.map((f) => ({
504
+ ruleId: f.ruleId,
505
+ severity: f.severity,
506
+ title: f.title,
507
+ lineNumbers: f.lineNumbers,
508
+ confidence: f.confidence,
509
+ })),
510
+ };
393
511
  return {
394
512
  content: [
395
513
  {
396
514
  type: "text",
397
515
  text: `# Evaluation: ${filePath}\n\n` + patternResults + deepReview,
398
516
  },
517
+ {
518
+ type: "text",
519
+ text: "```json\n" + JSON.stringify(structuredData, null, 2) + "\n```",
520
+ },
399
521
  ],
400
522
  };
401
523
  }
@@ -412,3 +534,81 @@ function registerEvaluateFile(server) {
412
534
  }
413
535
  });
414
536
  }
537
+ // ─── evaluate_code_streaming ─────────────────────────────────────────────────
538
+ function registerEvaluateCodeStreaming(server) {
539
+ server.tool("evaluate_code_streaming", `Submit code for streaming evaluation — returns per-judge results as each judge completes, with running aggregate scores. Ideal for long evaluations where you want progressive feedback. All ${JUDGES.length} judges run sequentially with per-judge results accumulated into a single structured response.`, {
540
+ code: z.string().describe("The source code to evaluate."),
541
+ language: z.string().describe("The programming language (e.g., 'typescript', 'python', 'javascript')."),
542
+ context: z.string().optional().describe("Optional context about the code."),
543
+ includeAstFindings: z.boolean().optional().describe("Include AST/code-structure findings (default: true)"),
544
+ minConfidence: z
545
+ .number()
546
+ .min(0)
547
+ .max(1)
548
+ .optional()
549
+ .describe("Minimum finding confidence to include (0-1, default: 0)"),
550
+ config: configSchema,
551
+ }, async ({ code, language, context, includeAstFindings, minConfidence, config }) => {
552
+ try {
553
+ const session = getGlobalSession();
554
+ const batches = [];
555
+ let finalBatch;
556
+ for await (const batch of evaluateWithTribunalStreaming(code, language, context, {
557
+ includeAstFindings,
558
+ minConfidence,
559
+ config: toJudgesConfig(config),
560
+ adaptiveSelection: true,
561
+ })) {
562
+ batches.push({
563
+ judgeId: batch.judgeId,
564
+ judgeName: batch.judgeName,
565
+ findingCount: batch.evaluation.findings.length,
566
+ durationMs: batch.evaluation.durationMs ?? 0,
567
+ runningScore: batch.aggregate.currentScore,
568
+ runningVerdict: batch.aggregate.currentVerdict,
569
+ });
570
+ finalBatch = batch;
571
+ }
572
+ // Build progressive markdown
573
+ let md = `# Streaming Evaluation Results\n\n`;
574
+ md += `**Final Score:** ${finalBatch?.aggregate.currentScore ?? 0}/100\n`;
575
+ md += `**Verdict:** ${(finalBatch?.aggregate.currentVerdict ?? "pass").toUpperCase()}\n`;
576
+ md += `**Judges Run:** ${finalBatch?.aggregate.completedJudges ?? 0}/${finalBatch?.aggregate.totalJudges ?? 0}\n`;
577
+ md += `**Total Findings:** ${finalBatch?.aggregate.findingsSoFar ?? 0}\n\n`;
578
+ md += `## Per-Judge Breakdown\n\n`;
579
+ md += `| Judge | Findings | Time (ms) | Running Score |\n`;
580
+ md += `|-------|----------|-----------|---------------|\n`;
581
+ for (const b of batches) {
582
+ md += `| ${b.judgeName} | ${b.findingCount} | ${b.durationMs} | ${b.runningScore}/100 |\n`;
583
+ }
584
+ const structuredData = {
585
+ score: finalBatch?.aggregate.currentScore ?? 0,
586
+ verdict: finalBatch?.aggregate.currentVerdict ?? "pass",
587
+ totalFindings: finalBatch?.aggregate.findingsSoFar ?? 0,
588
+ criticalFindings: finalBatch?.aggregate.criticalSoFar ?? 0,
589
+ highFindings: finalBatch?.aggregate.highSoFar ?? 0,
590
+ judgesRun: finalBatch?.aggregate.completedJudges ?? 0,
591
+ totalJudges: finalBatch?.aggregate.totalJudges ?? 0,
592
+ perJudge: batches,
593
+ sessionEvaluationCount: session.evaluationCount,
594
+ };
595
+ return {
596
+ content: [
597
+ { type: "text", text: md },
598
+ { type: "text", text: "```json\n" + JSON.stringify(structuredData, null, 2) + "\n```" },
599
+ ],
600
+ };
601
+ }
602
+ catch (error) {
603
+ return {
604
+ content: [
605
+ {
606
+ type: "text",
607
+ text: error instanceof Error ? `Error: ${error.message}` : "Error: Streaming evaluation failed",
608
+ },
609
+ ],
610
+ isError: true,
611
+ };
612
+ }
613
+ });
614
+ }
@@ -7,6 +7,7 @@ import { evaluateWithTribunal, evaluateWithJudge } from "../evaluators/index.js"
7
7
  import { getJudge, JUDGES } from "../judges/index.js";
8
8
  import { applyPatches } from "../commands/fix.js";
9
9
  import { configSchema, toJudgesConfig } from "./schemas.js";
10
+ import { validateCodeSize } from "./validation.js";
10
11
  /**
11
12
  * Register the fix_code tool for one-shot code evaluation + auto-fix.
12
13
  */
@@ -38,6 +39,10 @@ function registerFixCode(server) {
38
39
  config: configSchema,
39
40
  }, async ({ code, language, judgeId, context, minConfidence, config }) => {
40
41
  try {
42
+ const sizeError = validateCodeSize(code);
43
+ if (sizeError) {
44
+ return { content: [{ type: "text", text: `Error: ${sizeError}` }], isError: true };
45
+ }
41
46
  const effectiveMinConfidence = minConfidence ?? 0.5;
42
47
  // ── Evaluate ────────────────────────────────────────────────
43
48
  let allFindings;
@@ -133,8 +138,26 @@ function registerFixCode(server) {
133
138
  text += `\n`;
134
139
  }
135
140
  text += `### Fixed Code\n\n\`\`\`${language}\n${fixedCode}\n\`\`\`\n`;
141
+ const structured = {
142
+ totalFindings: allFindings.length,
143
+ autoFixable: fixable.length,
144
+ applied,
145
+ skipped,
146
+ remaining: remaining.length,
147
+ patches: fixable.map((p) => ({
148
+ ruleId: p.ruleId,
149
+ severity: p.severity,
150
+ title: p.title,
151
+ line: p.patch.startLine,
152
+ oldText: p.patch.oldText,
153
+ newText: p.patch.newText,
154
+ })),
155
+ };
136
156
  return {
137
- content: [{ type: "text", text }],
157
+ content: [
158
+ { type: "text", text },
159
+ { type: "text", text: "```json\n" + JSON.stringify(structured, null, 2) + "\n```" },
160
+ ],
138
161
  };
139
162
  }
140
163
  catch (error) {
@@ -0,0 +1,6 @@
1
+ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
+ /**
3
+ * Register MCP resources: judges catalog, presets, session state,
4
+ * and parameterized templates for single-judge / single-preset lookups.
5
+ */
6
+ export declare function registerResources(server: McpServer): void;