@kevinrabun/judges 3.117.8 → 3.119.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,51 @@
1
+ /**
2
+ * Cross-File Import Resolution
3
+ *
4
+ * Automatically resolves imports from a file's AST and builds
5
+ * related-file context for deeper cross-file analysis. This bridges
6
+ * the gap between single-file deterministic analysis and project-wide
7
+ * vulnerability detection.
8
+ *
9
+ * Provides:
10
+ * - `resolveImports()` — resolves import paths to file content
11
+ * - `buildRelatedFilesContext()` — builds RelatedFileSnippet[] from imports
12
+ */
13
+ import type { RelatedFileSnippet } from "./tools/deep-review.js";
14
+ export interface ResolvedImport {
15
+ /** The import specifier as written in code (e.g., "./utils", "express") */
16
+ specifier: string;
17
+ /** Resolved absolute file path (undefined if external/unresolvable) */
18
+ resolvedPath?: string;
19
+ /** Whether this is a local (relative) import */
20
+ isLocal: boolean;
21
+ /** File content (truncated) if resolved */
22
+ content?: string;
23
+ }
24
+ export interface ImportResolutionResult {
25
+ /** Successfully resolved local imports */
26
+ resolved: ResolvedImport[];
27
+ /** External/unresolvable imports */
28
+ external: string[];
29
+ /** Related file snippets ready for deep-review context */
30
+ relatedFiles: RelatedFileSnippet[];
31
+ }
32
+ /**
33
+ * Resolve imports from a source file and return related file context.
34
+ *
35
+ * Uses the AST parser to extract import specifiers, resolves local imports
36
+ * to actual files, reads their content, and returns structured context
37
+ * suitable for deep-review cross-file analysis.
38
+ *
39
+ * @param code - Source code of the file being analyzed
40
+ * @param language - Programming language
41
+ * @param filePath - Absolute path to the source file (needed for relative import resolution)
42
+ * @param maxImports - Maximum number of imports to resolve (default: 20)
43
+ */
44
+ export declare function resolveImports(code: string, language: string, filePath: string, maxImports?: number): ImportResolutionResult;
45
+ /**
46
+ * Build related files context from a file's imports.
47
+ *
48
+ * Convenience wrapper that returns just the RelatedFileSnippet[] array,
49
+ * ready to be passed to deep-review or MCP tool context.
50
+ */
51
+ export declare function buildRelatedFilesContext(code: string, language: string, filePath: string, maxImports?: number): RelatedFileSnippet[];
@@ -0,0 +1,213 @@
1
+ /**
2
+ * Cross-File Import Resolution
3
+ *
4
+ * Automatically resolves imports from a file's AST and builds
5
+ * related-file context for deeper cross-file analysis. This bridges
6
+ * the gap between single-file deterministic analysis and project-wide
7
+ * vulnerability detection.
8
+ *
9
+ * Provides:
10
+ * - `resolveImports()` — resolves import paths to file content
11
+ * - `buildRelatedFilesContext()` — builds RelatedFileSnippet[] from imports
12
+ */
13
+ import { readFileSync, existsSync } from "fs";
14
+ import { resolve, dirname, join } from "path";
15
+ import { analyzeStructure } from "./ast/index.js";
16
+ // ─── Constants ──────────────────────────────────────────────────────────────
17
+ /** Maximum file size to include as related context (bytes) */
18
+ const MAX_RELATED_FILE_SIZE = 50_000;
19
+ /** Maximum snippet length per related file */
20
+ const MAX_SNIPPET_LENGTH = 3_000;
21
+ /** Maximum number of imports to resolve */
22
+ const MAX_IMPORTS_TO_RESOLVE = 20;
23
+ /** Extensions to try when resolving imports without extensions */
24
+ const RESOLVE_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx", ".mjs", ".py", ".rs", ".go", ".java", ".cs"];
25
+ /** Extensions to try for index files */
26
+ const INDEX_FILES = ["index.ts", "index.tsx", "index.js", "index.jsx", "index.mjs"];
27
+ // ─── Import Resolution ─────────────────────────────────────────────────────
28
+ /**
29
+ * Check if an import specifier is a local/relative import.
30
+ */
31
+ function isLocalImport(specifier) {
32
+ return specifier.startsWith("./") || specifier.startsWith("../") || specifier.startsWith("/");
33
+ }
34
+ /**
35
+ * Try to resolve a local import specifier to an actual file path.
36
+ */
37
+ function resolveLocalImport(specifier, fromDir) {
38
+ // Remove .js extension if present (common in ESM TypeScript)
39
+ const cleanSpecifier = specifier.replace(/\.js$/, "");
40
+ const basePath = resolve(fromDir, cleanSpecifier);
41
+ // Try exact path first
42
+ if (existsSync(basePath) && !isDirectory(basePath)) {
43
+ return basePath;
44
+ }
45
+ // Try with various extensions
46
+ for (const ext of RESOLVE_EXTENSIONS) {
47
+ const withExt = basePath + ext;
48
+ if (existsSync(withExt)) {
49
+ return withExt;
50
+ }
51
+ }
52
+ // Try as directory with index file
53
+ for (const indexFile of INDEX_FILES) {
54
+ const indexPath = join(basePath, indexFile);
55
+ if (existsSync(indexPath)) {
56
+ return indexPath;
57
+ }
58
+ }
59
+ // Try the original specifier with extensions (before .js stripping)
60
+ const origBase = resolve(fromDir, specifier);
61
+ if (origBase !== basePath && existsSync(origBase) && !isDirectory(origBase)) {
62
+ return origBase;
63
+ }
64
+ return undefined;
65
+ }
66
+ function isDirectory(filePath) {
67
+ try {
68
+ const statSync = require("fs").statSync;
69
+ return statSync(filePath).isDirectory();
70
+ }
71
+ catch {
72
+ return false;
73
+ }
74
+ }
75
+ /**
76
+ * Read a file and return a truncated snippet suitable for cross-file context.
77
+ */
78
+ function readSnippet(filePath) {
79
+ try {
80
+ const content = readFileSync(filePath, "utf-8");
81
+ if (content.length > MAX_RELATED_FILE_SIZE) {
82
+ return undefined; // Too large
83
+ }
84
+ if (content.length <= MAX_SNIPPET_LENGTH) {
85
+ return content;
86
+ }
87
+ return content.slice(0, MAX_SNIPPET_LENGTH) + "\n// ... truncated";
88
+ }
89
+ catch {
90
+ return undefined;
91
+ }
92
+ }
93
+ /**
94
+ * Resolve imports from a source file and return related file context.
95
+ *
96
+ * Uses the AST parser to extract import specifiers, resolves local imports
97
+ * to actual files, reads their content, and returns structured context
98
+ * suitable for deep-review cross-file analysis.
99
+ *
100
+ * @param code - Source code of the file being analyzed
101
+ * @param language - Programming language
102
+ * @param filePath - Absolute path to the source file (needed for relative import resolution)
103
+ * @param maxImports - Maximum number of imports to resolve (default: 20)
104
+ */
105
+ export function resolveImports(code, language, filePath, maxImports = MAX_IMPORTS_TO_RESOLVE) {
106
+ const resolved = [];
107
+ const external = [];
108
+ const relatedFiles = [];
109
+ const fromDir = dirname(filePath);
110
+ // Use AST to extract imports
111
+ const structure = analyzeStructure(code, language);
112
+ const imports = structure.imports ?? [];
113
+ // Also extract imports via regex for languages where AST might not capture all
114
+ const regexImports = extractImportsViaRegex(code, language);
115
+ const allImports = [...new Set([...imports, ...regexImports])];
116
+ let resolvedCount = 0;
117
+ for (const specifier of allImports) {
118
+ if (resolvedCount >= maxImports)
119
+ break;
120
+ if (!isLocalImport(specifier)) {
121
+ external.push(specifier);
122
+ continue;
123
+ }
124
+ const resolvedPath = resolveLocalImport(specifier, fromDir);
125
+ if (!resolvedPath) {
126
+ resolved.push({ specifier, isLocal: true });
127
+ continue;
128
+ }
129
+ const snippet = readSnippet(resolvedPath);
130
+ if (!snippet) {
131
+ resolved.push({ specifier, resolvedPath, isLocal: true });
132
+ continue;
133
+ }
134
+ resolved.push({
135
+ specifier,
136
+ resolvedPath,
137
+ isLocal: true,
138
+ content: snippet,
139
+ });
140
+ relatedFiles.push({
141
+ path: specifier,
142
+ snippet,
143
+ relationship: "imported by target",
144
+ });
145
+ resolvedCount++;
146
+ }
147
+ return { resolved, external, relatedFiles };
148
+ }
149
+ /**
150
+ * Build related files context from a file's imports.
151
+ *
152
+ * Convenience wrapper that returns just the RelatedFileSnippet[] array,
153
+ * ready to be passed to deep-review or MCP tool context.
154
+ */
155
+ export function buildRelatedFilesContext(code, language, filePath, maxImports = MAX_IMPORTS_TO_RESOLVE) {
156
+ return resolveImports(code, language, filePath, maxImports).relatedFiles;
157
+ }
158
+ // ─── Regex-based Import Extraction (fallback) ───────────────────────────────
159
+ /**
160
+ * Extract import specifiers using regex patterns for common languages.
161
+ * This supplements the AST parser for cases where the grammar doesn't
162
+ * capture all import forms.
163
+ */
164
+ function extractImportsViaRegex(code, language) {
165
+ const imports = [];
166
+ const lines = code.split("\n");
167
+ for (const line of lines) {
168
+ const trimmed = line.trim();
169
+ // TypeScript/JavaScript: import ... from "specifier"
170
+ // Also: import "specifier" (side-effect)
171
+ // Also: require("specifier")
172
+ if (["typescript", "javascript"].includes(language)) {
173
+ const fromMatch = /from\s+["']([^"']+)["']/.exec(trimmed);
174
+ if (fromMatch) {
175
+ imports.push(fromMatch[1]);
176
+ continue;
177
+ }
178
+ const importMatch = /^import\s+["']([^"']+)["']/.exec(trimmed);
179
+ if (importMatch) {
180
+ imports.push(importMatch[1]);
181
+ continue;
182
+ }
183
+ const requireMatch = /require\s*\(\s*["']([^"']+)["']\s*\)/.exec(trimmed);
184
+ if (requireMatch) {
185
+ imports.push(requireMatch[1]);
186
+ continue;
187
+ }
188
+ }
189
+ // Python: from module import ... / import module
190
+ if (language === "python") {
191
+ const fromImport = /^from\s+(\.[\w.]*)\s+import/.exec(trimmed);
192
+ if (fromImport) {
193
+ imports.push(fromImport[1]);
194
+ continue;
195
+ }
196
+ }
197
+ // Go: import "path" / import ( "path" )
198
+ if (language === "go") {
199
+ const goImport = /^\s*"([^"]+)"/.exec(trimmed);
200
+ if (goImport && goImport[1].includes("/")) {
201
+ imports.push(goImport[1]);
202
+ }
203
+ }
204
+ // Rust: use crate::module / mod module
205
+ if (language === "rust") {
206
+ const useMatch = /^use\s+crate::(\w+)/.exec(trimmed);
207
+ if (useMatch) {
208
+ imports.push(`./${useMatch[1]}`);
209
+ }
210
+ }
211
+ }
212
+ return imports;
213
+ }
@@ -9,6 +9,8 @@ export declare function isContentPolicyRefusal(responseText: string): boolean;
9
9
  export declare const DEEP_REVIEW_PROMPT_INTRO: string;
10
10
  /** Content-policy-safe Assistant identity message. */
11
11
  export declare const DEEP_REVIEW_IDENTITY: string;
12
+ /** Default max chars for LLM-facing prompt content (~25K tokens). */
13
+ export declare const DEFAULT_MAX_PROMPT_CHARS = 100000;
12
14
  export interface RelatedFileSnippet {
13
15
  /** Relative file path */
14
16
  path: string;
@@ -17,11 +19,20 @@ export interface RelatedFileSnippet {
17
19
  /** Why this file is relevant (e.g. "imported by target", "shared type") */
18
20
  relationship?: string;
19
21
  }
22
+ /**
23
+ * Format related files into a prompt section that gives the LLM cross-file
24
+ * visibility for deeper analysis.
25
+ *
26
+ * @param relatedFiles — array of related file snippets
27
+ * @param maxFiles — max files to include (default: 10). Set to 0 for unlimited.
28
+ * @param snippetBudget — per-snippet char cap (default: 3000). Set to 0 for unlimited.
29
+ */
30
+ export declare function formatRelatedFilesSection(relatedFiles: RelatedFileSnippet[], maxFiles?: number, snippetBudget?: number): string;
20
31
  /**
21
32
  * Format detected project context into a prompt section so the LLM
22
33
  * understands the runtime environment, framework, and architectural role.
23
34
  */
24
35
  export declare function formatProjectContextSection(projectContext: ProjectContext): string;
25
- export declare function buildSingleJudgeDeepReviewSection(judge: JudgeDefinition, language: string, context?: string, relatedFiles?: RelatedFileSnippet[], projectContext?: ProjectContext): string;
26
- export declare function buildTribunalDeepReviewSection(judges: JudgeDefinition[], language: string, context?: string, relatedFiles?: RelatedFileSnippet[], projectContext?: ProjectContext): string;
36
+ export declare function buildSingleJudgeDeepReviewSection(judge: JudgeDefinition, language: string, context?: string, relatedFiles?: RelatedFileSnippet[], projectContext?: ProjectContext, maxPromptChars?: number): string;
37
+ export declare function buildTribunalDeepReviewSection(judges: JudgeDefinition[], language: string, context?: string, relatedFiles?: RelatedFileSnippet[], projectContext?: ProjectContext, maxPromptChars?: number): string;
27
38
  export declare function buildSimplifiedDeepReviewSection(language: string, context?: string): string;
@@ -54,27 +54,45 @@ export const DEEP_REVIEW_IDENTITY = `I am the Judges Panel — a professional, c
54
54
  `My role is to provide evidence-based, constructive recommendations that ` +
55
55
  `strengthen code quality, security defenses, and engineering standards. ` +
56
56
  `This is NOT a penetration test or adversarial security assessment.`;
57
+ // ─── Token Budget Constants ──────────────────────────────────────────────────
58
+ /** Default max chars for LLM-facing prompt content (~25K tokens). */
59
+ export const DEFAULT_MAX_PROMPT_CHARS = 100_000;
60
+ /** Per-snippet char cap for related files. */
61
+ const MAX_SNIPPET_CHARS = 3_000;
62
+ /** Max related files to include by default. */
63
+ const MAX_RELATED_FILES = 10;
57
64
  /**
58
65
  * Format related files into a prompt section that gives the LLM cross-file
59
66
  * visibility for deeper analysis.
67
+ *
68
+ * @param relatedFiles — array of related file snippets
69
+ * @param maxFiles — max files to include (default: 10). Set to 0 for unlimited.
70
+ * @param snippetBudget — per-snippet char cap (default: 3000). Set to 0 for unlimited.
60
71
  */
61
- function formatRelatedFilesSection(relatedFiles) {
72
+ export function formatRelatedFilesSection(relatedFiles, maxFiles = MAX_RELATED_FILES, snippetBudget = MAX_SNIPPET_CHARS) {
62
73
  if (relatedFiles.length === 0)
63
74
  return "";
75
+ // Apply file count cap (0 = unlimited)
76
+ const files = maxFiles > 0 ? relatedFiles.slice(0, maxFiles) : relatedFiles;
77
+ const skipped = relatedFiles.length - files.length;
64
78
  let md = `### Related Files\n\n`;
65
79
  md += `> The following files are related to the code under review. Use them to `;
66
80
  md += `understand cross-file data flow, shared types, imports, and call sites. `;
67
81
  md += `These provide context only — focus your findings on the primary code above.\n\n`;
68
- for (const f of relatedFiles) {
82
+ for (const f of files) {
69
83
  md += `<details>\n<summary><code>${f.path}</code>`;
70
84
  if (f.relationship)
71
85
  md += ` — ${f.relationship}`;
72
86
  md += `</summary>\n\n`;
73
- // Limit snippet size to prevent prompt explosion
74
- const truncated = f.snippet.length > 3000 ? f.snippet.slice(0, 3000) + "\n// ... truncated" : f.snippet;
87
+ // Limit snippet size to prevent prompt explosion (0 = unlimited)
88
+ const cap = snippetBudget > 0 ? snippetBudget : Infinity;
89
+ const truncated = f.snippet.length > cap ? f.snippet.slice(0, cap) + "\n// ... truncated" : f.snippet;
75
90
  md += `\`\`\`\n${truncated}\n\`\`\`\n`;
76
91
  md += `</details>\n\n`;
77
92
  }
93
+ if (skipped > 0) {
94
+ md += `> *${skipped} additional related file(s) omitted to stay within token budget.*\n\n`;
95
+ }
78
96
  return md;
79
97
  }
80
98
  // ─── Project Context Section ─────────────────────────────────────────────────
@@ -111,7 +129,13 @@ export function formatProjectContextSection(projectContext) {
111
129
  return md;
112
130
  }
113
131
  // ─── Single-Judge Deep Review ────────────────────────────────────────────────
114
- export function buildSingleJudgeDeepReviewSection(judge, language, context, relatedFiles, projectContext) {
132
+ export function buildSingleJudgeDeepReviewSection(judge, language, context, relatedFiles, projectContext, maxPromptChars) {
133
+ const budget = maxPromptChars ?? DEFAULT_MAX_PROMPT_CHARS;
134
+ const unlimited = budget === 0;
135
+ const maxRelatedFiles = unlimited ? 0 : MAX_RELATED_FILES;
136
+ const snippetCap = unlimited ? 0 : MAX_SNIPPET_CHARS;
137
+ const contextCap = unlimited ? Infinity : Math.max(2000, Math.floor(budget * 0.1));
138
+ const truncatedContext = context && context.length > contextCap ? context.slice(0, contextCap) + "\n… (context truncated)" : context;
115
139
  let md = `\n\n---\n\n`;
116
140
  md += `## 🔍 Deep Contextual Review Required\n\n`;
117
141
  md += DEFENSIVE_PREAMBLE;
@@ -120,14 +144,14 @@ export function buildSingleJudgeDeepReviewSection(judge, language, context, rela
120
144
  md += `business logic validity, cross-module data flow, or context-dependent implications.\n>\n`;
121
145
  md += `> **You MUST now perform a thorough contextual review** of the ${language} code using the expert criteria below. `;
122
146
  md += `Identify issues that pattern matching cannot catch. Incorporate both the pattern findings above AND your own deep analysis into a final, unified evaluation.\n\n`;
123
- if (context) {
124
- md += `**Context provided:** ${context}\n\n`;
147
+ if (truncatedContext) {
148
+ md += `**Context provided:** ${truncatedContext}\n\n`;
125
149
  }
126
150
  if (projectContext) {
127
151
  md += formatProjectContextSection(projectContext);
128
152
  }
129
153
  if (relatedFiles && relatedFiles.length > 0) {
130
- md += formatRelatedFilesSection(relatedFiles);
154
+ md += formatRelatedFilesSection(relatedFiles, maxRelatedFiles, snippetCap);
131
155
  }
132
156
  md += `### ${judge.name} — ${judge.domain}\n\n`;
133
157
  md += `${judge.description}\n\n`;
@@ -160,7 +184,23 @@ export function buildSingleJudgeDeepReviewSection(judge, language, context, rela
160
184
  return md;
161
185
  }
162
186
  // ─── Tribunal Deep Review (full) ─────────────────────────────────────────────
163
- export function buildTribunalDeepReviewSection(judges, language, context, relatedFiles, projectContext) {
187
+ export function buildTribunalDeepReviewSection(judges, language, context, relatedFiles, projectContext, maxPromptChars) {
188
+ const budget = maxPromptChars ?? DEFAULT_MAX_PROMPT_CHARS;
189
+ const unlimited = budget === 0;
190
+ // When budget is tight, use simplified mode (category-based instead of per-judge)
191
+ // The full per-judge section is ~15-18K chars for 45 judges. Simplified is ~2K.
192
+ // Use simplified when remaining budget for judge criteria would be < 5K.
193
+ const estimatedJudgeCriteriaChars = judges.length * 350;
194
+ const estimatedOverhead = 4000; // preamble, instructions, format section
195
+ const estimatedRelatedChars = (relatedFiles?.length ?? 0) * MAX_SNIPPET_CHARS;
196
+ const budgetForJudges = unlimited ? Infinity : budget - estimatedOverhead - estimatedRelatedChars;
197
+ const useSimplified = !unlimited && budgetForJudges < estimatedJudgeCriteriaChars;
198
+ // Determine related files caps
199
+ const maxRelatedFiles = unlimited ? 0 : MAX_RELATED_FILES;
200
+ const snippetCap = unlimited ? 0 : MAX_SNIPPET_CHARS;
201
+ // Truncate context string if it would blow the budget
202
+ const contextCap = unlimited ? Infinity : Math.max(2000, Math.floor(budget * 0.1));
203
+ const truncatedContext = context && context.length > contextCap ? context.slice(0, contextCap) + "\n… (context truncated)" : context;
164
204
  let md = `\n\n---\n\n`;
165
205
  md += `## 🔍 Deep Contextual Review Required\n\n`;
166
206
  md += DEFENSIVE_PREAMBLE;
@@ -170,21 +210,41 @@ export function buildTribunalDeepReviewSection(judges, language, context, relate
170
210
  md += `> **You MUST now perform a thorough contextual review** of the ${language} code from the perspective of ALL ${judges.length} judges below. `;
171
211
  md += `Identify issues that pattern matching cannot catch. Incorporate both the pattern findings above AND your own deep analysis into a final, unified tribunal verdict.\n\n`;
172
212
  md += `> **This is a professional code quality tool.** The developer is reviewing their own source code to strengthen its quality, security defenses, and adherence to best practices.\n\n`;
173
- if (context) {
174
- md += `**Context provided:** ${context}\n\n`;
213
+ if (truncatedContext) {
214
+ md += `**Context provided:** ${truncatedContext}\n\n`;
175
215
  }
176
216
  if (projectContext) {
177
217
  md += formatProjectContextSection(projectContext);
178
218
  }
179
219
  if (relatedFiles && relatedFiles.length > 0) {
180
- md += formatRelatedFilesSection(relatedFiles);
220
+ md += formatRelatedFilesSection(relatedFiles, maxRelatedFiles, snippetCap);
181
221
  }
182
- for (const judge of judges) {
183
- md += `### ${judge.name} ${judge.domain}\n\n`;
184
- md += `${judge.description}\n\n`;
185
- md += `**Rule prefix:** \`${judge.rulePrefix}-\` · **Precision Mandate:** Every finding MUST cite specific code evidence. Do NOT flag absent features speculatively. Do NOT validate Azure resource identifiers for strict UUID/GUID hex compliance — they are opaque platform constants. Prefer fewer, high-confidence findings over many uncertain ones.\n\n`;
222
+ if (useSimplified) {
223
+ // Compact category-based criteria instead of per-judge listing
224
+ md += `### Quality Dimensions (${judges.length} judges)\n\n`;
225
+ md += `> Using compact criteria mode to stay within token budget.\n\n`;
226
+ // Group judges by domain
227
+ const domainGroups = new Map();
228
+ for (const judge of judges) {
229
+ const domain = judge.domain ?? "general";
230
+ if (!domainGroups.has(domain))
231
+ domainGroups.set(domain, []);
232
+ domainGroups.get(domain).push(`\`${judge.rulePrefix}\` ${judge.name}`);
233
+ }
234
+ for (const [domain, names] of domainGroups) {
235
+ md += `**${domain}:** ${names.join(", ")}\n\n`;
236
+ }
237
+ md += `**Precision Mandate:** Every finding MUST cite specific code evidence. Do NOT flag absent features speculatively. Do NOT validate Azure resource identifiers for strict UUID/GUID hex compliance. Prefer fewer, high-confidence findings over many uncertain ones.\n\n`;
186
238
  md += `---\n\n`;
187
239
  }
240
+ else {
241
+ for (const judge of judges) {
242
+ md += `### ${judge.name} — ${judge.domain}\n\n`;
243
+ md += `${judge.description}\n\n`;
244
+ md += `**Rule prefix:** \`${judge.rulePrefix}-\` · **Precision Mandate:** Every finding MUST cite specific code evidence. Do NOT flag absent features speculatively. Do NOT validate Azure resource identifiers for strict UUID/GUID hex compliance — they are opaque platform constants. Prefer fewer, high-confidence findings over many uncertain ones.\n\n`;
245
+ md += `---\n\n`;
246
+ }
247
+ }
188
248
  md += `### False Positive Review\n\n`;
189
249
  md += `Before adding new findings, **review each pattern-based finding above for false positives.** `;
190
250
  md += `Static pattern matching can flag code that is actually correct — for example:\n`;
@@ -4,6 +4,7 @@
4
4
  // ──────────────────────────────────────────────────────────────────────────────
5
5
  import { z } from "zod";
6
6
  import { loadFindingStore, triageFinding, getTriagedFindings, formatTriageSummary, getFindingStats, getSuppressionAnalytics, formatSuppressionAnalytics, } from "../finding-lifecycle.js";
7
+ import { evaluateWithTribunal } from "../evaluators/index.js";
7
8
  // ─── Rule-prefix learning context (shared with CLI --explain) ────────────────
8
9
  const RULE_PREFIX_CONTEXT = {
9
10
  SEC: {
@@ -78,6 +79,7 @@ export function registerReviewTools(server) {
78
79
  registerGetFindingStats(server);
79
80
  registerGetSuppressionAnalytics(server);
80
81
  registerListTriagedFindings(server);
82
+ registerReEvaluateWithContext(server);
81
83
  }
82
84
  // ─── explain_finding ─────────────────────────────────────────────────────────
83
85
  function registerExplainFinding(server) {
@@ -394,3 +396,169 @@ function registerListTriagedFindings(server) {
394
396
  }
395
397
  });
396
398
  }
399
+ // ─── re_evaluate_with_context ────────────────────────────────────────────────
400
+ function registerReEvaluateWithContext(server) {
401
+ server.tool("re_evaluate_with_context", "Re-evaluate code with developer-provided context from a multi-turn conversation. Accepts disputed findings, accepted findings, and additional context to adjust the evaluation. This is the agentic feedback loop — the developer explains their intent and the tribunal re-evaluates with that context, applying auto-tune and confidence filtering.", {
402
+ code: z.string().describe("The source code to re-evaluate"),
403
+ language: z.string().describe("Programming language (e.g., typescript, python, go)"),
404
+ disputedRuleIds: z
405
+ .array(z.string())
406
+ .optional()
407
+ .describe("Rule IDs the developer disputes as false positives (e.g., ['SEC-001', 'PERF-003'])"),
408
+ acceptedRuleIds: z
409
+ .array(z.string())
410
+ .optional()
411
+ .describe("Rule IDs the developer accepts (these will not be filtered)"),
412
+ developerContext: z
413
+ .string()
414
+ .optional()
415
+ .describe("Free-form explanation from the developer about their intent, design decisions, or why certain findings are incorrect"),
416
+ focusAreas: z
417
+ .array(z.string())
418
+ .optional()
419
+ .describe("Specific areas to focus the re-evaluation on (e.g., ['security', 'performance'])"),
420
+ confidenceFilter: z
421
+ .number()
422
+ .min(0)
423
+ .max(1)
424
+ .optional()
425
+ .describe("Minimum confidence threshold — findings below this are dropped (default: 0.5)"),
426
+ filePath: z.string().optional().describe("File path for context-aware evaluation"),
427
+ deepReview: z
428
+ .boolean()
429
+ .optional()
430
+ .describe("Whether to include the LLM deep-review prompt section in the result"),
431
+ relatedFiles: z
432
+ .array(z.object({
433
+ path: z.string().describe("Path of the related file"),
434
+ snippet: z.string().describe("Relevant code snippet from the related file"),
435
+ relationship: z.string().optional().describe("Relationship to the main file (e.g., 'imports', 'tests')"),
436
+ }))
437
+ .optional()
438
+ .describe("Cross-file context for more accurate evaluation"),
439
+ maxPromptChars: z
440
+ .number()
441
+ .min(0)
442
+ .optional()
443
+ .describe("Maximum character budget for LLM prompts. Controls truncation of source code, related files, and context strings in deep-review prompts. Set to 0 to disable all truncation. Default: 100000."),
444
+ }, async ({ code, language, disputedRuleIds, acceptedRuleIds, developerContext, focusAreas, confidenceFilter, filePath, deepReview, relatedFiles, maxPromptChars, }) => {
445
+ try {
446
+ // Build context string from developer inputs
447
+ const contextParts = [];
448
+ if (developerContext) {
449
+ contextParts.push(`Developer context: ${developerContext}`);
450
+ }
451
+ if (disputedRuleIds && disputedRuleIds.length > 0) {
452
+ contextParts.push(`Disputed findings: ${disputedRuleIds.join(", ")}`);
453
+ }
454
+ if (acceptedRuleIds && acceptedRuleIds.length > 0) {
455
+ contextParts.push(`Accepted findings: ${acceptedRuleIds.join(", ")}`);
456
+ }
457
+ if (focusAreas && focusAreas.length > 0) {
458
+ contextParts.push(`Focus areas: ${focusAreas.join(", ")}`);
459
+ }
460
+ const fullContext = contextParts.join("\n");
461
+ // Apply token budget caps to inputs
462
+ const budget = maxPromptChars ?? 100_000;
463
+ const unlimited = budget === 0;
464
+ const codeCap = unlimited ? Infinity : budget;
465
+ const contextCap = unlimited ? Infinity : Math.max(2000, Math.floor(budget * 0.1));
466
+ const cappedCode = code.length > codeCap ? code.slice(0, codeCap) : code;
467
+ const cappedContext = fullContext.length > contextCap ? fullContext.slice(0, contextCap) + "\n… (context truncated)" : fullContext;
468
+ const cappedRelatedFiles = !unlimited && relatedFiles && relatedFiles.length > 10 ? relatedFiles.slice(0, 10) : relatedFiles;
469
+ const evalOptions = {
470
+ autoTune: true,
471
+ deepReview: deepReview ?? false,
472
+ confidenceFilter: confidenceFilter ?? 0.5,
473
+ filePath,
474
+ relatedFiles: cappedRelatedFiles,
475
+ calibrate: true,
476
+ maxPromptChars: maxPromptChars,
477
+ };
478
+ const verdict = evaluateWithTribunal(cappedCode, language, cappedContext || undefined, evalOptions);
479
+ // Post-process: mark disputed findings
480
+ let findings = verdict.findings;
481
+ if (disputedRuleIds && disputedRuleIds.length > 0) {
482
+ const disputedSet = new Set(disputedRuleIds);
483
+ findings = findings.map((f) => {
484
+ if (disputedSet.has(f.ruleId)) {
485
+ return {
486
+ ...f,
487
+ confidence: Math.max(0.1, (f.confidence ?? 0.5) * 0.5),
488
+ confidenceTier: "supplementary",
489
+ };
490
+ }
491
+ return f;
492
+ });
493
+ // Re-filter after confidence adjustment
494
+ if (confidenceFilter) {
495
+ findings = findings.filter((f) => (f.confidence ?? 0.5) >= confidenceFilter);
496
+ }
497
+ }
498
+ const sections = [];
499
+ sections.push(`# Re-Evaluation Results\n`);
500
+ sections.push(`**Verdict:** ${verdict.overallVerdict} · **Score:** ${verdict.overallScore}/100`);
501
+ sections.push(`**Findings:** ${findings.length} (after context adjustment)`);
502
+ if (verdict.autoTuneApplied) {
503
+ sections.push(`**Auto-tune:** ${verdict.autoTuneApplied.suppressed} suppressed, ${verdict.autoTuneApplied.downgraded} downgraded`);
504
+ }
505
+ if (verdict.confidenceFilterApplied) {
506
+ sections.push(`**Confidence filter:** ${verdict.confidenceFilterApplied.filteredOut} findings below ${Math.round(verdict.confidenceFilterApplied.threshold * 100)}% filtered out`);
507
+ }
508
+ if (findings.length > 0) {
509
+ sections.push(`\n## Findings\n`);
510
+ for (const f of findings) {
511
+ const conf = f.confidence !== undefined && f.confidence !== null ? ` (${Math.round(f.confidence * 100)}%)` : "";
512
+ const tier = f.confidenceTier ? ` [${f.confidenceTier}]` : "";
513
+ sections.push(`- **${f.ruleId}** ${f.severity}${conf}${tier}: ${f.title}`);
514
+ }
515
+ }
516
+ if (disputedRuleIds && disputedRuleIds.length > 0) {
517
+ const stillPresent = findings.filter((f) => disputedRuleIds.includes(f.ruleId));
518
+ const resolved = disputedRuleIds.filter((id) => !findings.some((f) => f.ruleId === id));
519
+ if (resolved.length > 0) {
520
+ sections.push(`\n## Disputed findings resolved\n`);
521
+ sections.push(`The following disputed findings were dropped: ${resolved.join(", ")}`);
522
+ }
523
+ if (stillPresent.length > 0) {
524
+ sections.push(`\n## Disputed findings retained\n`);
525
+ sections.push(`The following remain with reduced confidence: ${stillPresent.map((f) => `${f.ruleId} (${Math.round((f.confidence ?? 0) * 100)}%)`).join(", ")}`);
526
+ }
527
+ }
528
+ const structured = {
529
+ overallVerdict: verdict.overallVerdict,
530
+ overallScore: verdict.overallScore,
531
+ findingCount: findings.length,
532
+ autoTuneApplied: verdict.autoTuneApplied ?? null,
533
+ confidenceFilterApplied: verdict.confidenceFilterApplied ?? null,
534
+ disputedResolved: disputedRuleIds?.filter((id) => !findings.some((f) => f.ruleId === id)) ?? [],
535
+ findings: findings.map((f) => ({
536
+ ruleId: f.ruleId,
537
+ severity: f.severity,
538
+ confidence: f.confidence,
539
+ confidenceTier: f.confidenceTier,
540
+ title: f.title,
541
+ })),
542
+ };
543
+ const contentBlocks = [
544
+ { type: "text", text: sections.join("\n") },
545
+ { type: "text", text: "```json\n" + JSON.stringify(structured, null, 2) + "\n```" },
546
+ ];
547
+ if (verdict.deepReviewPrompt) {
548
+ contentBlocks.push({ type: "text", text: verdict.deepReviewPrompt });
549
+ }
550
+ return { content: contentBlocks };
551
+ }
552
+ catch (error) {
553
+ return {
554
+ content: [
555
+ {
556
+ type: "text",
557
+ text: error instanceof Error ? `Error: ${error.message}` : "Error: Re-evaluation failed",
558
+ },
559
+ ],
560
+ isError: true,
561
+ };
562
+ }
563
+ });
564
+ }