@kevinrabun/judges 3.117.8 → 3.119.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -0
- package/dist/api.d.ts +7 -1
- package/dist/api.js +7 -1
- package/dist/commands/watch.d.ts +6 -1
- package/dist/commands/watch.js +4 -4
- package/dist/evaluators/index.d.ts +46 -0
- package/dist/evaluators/index.js +65 -1
- package/dist/git-diff.d.ts +62 -0
- package/dist/git-diff.js +282 -0
- package/dist/import-resolver.d.ts +51 -0
- package/dist/import-resolver.js +213 -0
- package/dist/tools/deep-review.d.ts +13 -2
- package/dist/tools/deep-review.js +76 -16
- package/dist/tools/register-review.js +168 -0
- package/dist/tools/register-workflow.js +116 -0
- package/dist/types.d.ts +22 -0
- package/package.json +1 -1
- package/server.json +2 -2
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cross-File Import Resolution
|
|
3
|
+
*
|
|
4
|
+
* Automatically resolves imports from a file's AST and builds
|
|
5
|
+
* related-file context for deeper cross-file analysis. This bridges
|
|
6
|
+
* the gap between single-file deterministic analysis and project-wide
|
|
7
|
+
* vulnerability detection.
|
|
8
|
+
*
|
|
9
|
+
* Provides:
|
|
10
|
+
* - `resolveImports()` — resolves import paths to file content
|
|
11
|
+
* - `buildRelatedFilesContext()` — builds RelatedFileSnippet[] from imports
|
|
12
|
+
*/
|
|
13
|
+
import type { RelatedFileSnippet } from "./tools/deep-review.js";
|
|
14
|
+
export interface ResolvedImport {
|
|
15
|
+
/** The import specifier as written in code (e.g., "./utils", "express") */
|
|
16
|
+
specifier: string;
|
|
17
|
+
/** Resolved absolute file path (undefined if external/unresolvable) */
|
|
18
|
+
resolvedPath?: string;
|
|
19
|
+
/** Whether this is a local (relative) import */
|
|
20
|
+
isLocal: boolean;
|
|
21
|
+
/** File content (truncated) if resolved */
|
|
22
|
+
content?: string;
|
|
23
|
+
}
|
|
24
|
+
export interface ImportResolutionResult {
|
|
25
|
+
/** Successfully resolved local imports */
|
|
26
|
+
resolved: ResolvedImport[];
|
|
27
|
+
/** External/unresolvable imports */
|
|
28
|
+
external: string[];
|
|
29
|
+
/** Related file snippets ready for deep-review context */
|
|
30
|
+
relatedFiles: RelatedFileSnippet[];
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Resolve imports from a source file and return related file context.
|
|
34
|
+
*
|
|
35
|
+
* Uses the AST parser to extract import specifiers, resolves local imports
|
|
36
|
+
* to actual files, reads their content, and returns structured context
|
|
37
|
+
* suitable for deep-review cross-file analysis.
|
|
38
|
+
*
|
|
39
|
+
* @param code - Source code of the file being analyzed
|
|
40
|
+
* @param language - Programming language
|
|
41
|
+
* @param filePath - Absolute path to the source file (needed for relative import resolution)
|
|
42
|
+
* @param maxImports - Maximum number of imports to resolve (default: 20)
|
|
43
|
+
*/
|
|
44
|
+
export declare function resolveImports(code: string, language: string, filePath: string, maxImports?: number): ImportResolutionResult;
|
|
45
|
+
/**
|
|
46
|
+
* Build related files context from a file's imports.
|
|
47
|
+
*
|
|
48
|
+
* Convenience wrapper that returns just the RelatedFileSnippet[] array,
|
|
49
|
+
* ready to be passed to deep-review or MCP tool context.
|
|
50
|
+
*/
|
|
51
|
+
export declare function buildRelatedFilesContext(code: string, language: string, filePath: string, maxImports?: number): RelatedFileSnippet[];
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cross-File Import Resolution
|
|
3
|
+
*
|
|
4
|
+
* Automatically resolves imports from a file's AST and builds
|
|
5
|
+
* related-file context for deeper cross-file analysis. This bridges
|
|
6
|
+
* the gap between single-file deterministic analysis and project-wide
|
|
7
|
+
* vulnerability detection.
|
|
8
|
+
*
|
|
9
|
+
* Provides:
|
|
10
|
+
* - `resolveImports()` — resolves import paths to file content
|
|
11
|
+
* - `buildRelatedFilesContext()` — builds RelatedFileSnippet[] from imports
|
|
12
|
+
*/
|
|
13
|
+
import { readFileSync, existsSync } from "fs";
|
|
14
|
+
import { resolve, dirname, join } from "path";
|
|
15
|
+
import { analyzeStructure } from "./ast/index.js";
|
|
16
|
+
// ─── Constants ──────────────────────────────────────────────────────────────
|
|
17
|
+
/** Maximum file size to include as related context (bytes) */
|
|
18
|
+
const MAX_RELATED_FILE_SIZE = 50_000;
|
|
19
|
+
/** Maximum snippet length per related file */
|
|
20
|
+
const MAX_SNIPPET_LENGTH = 3_000;
|
|
21
|
+
/** Maximum number of imports to resolve */
|
|
22
|
+
const MAX_IMPORTS_TO_RESOLVE = 20;
|
|
23
|
+
/** Extensions to try when resolving imports without extensions */
|
|
24
|
+
const RESOLVE_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx", ".mjs", ".py", ".rs", ".go", ".java", ".cs"];
|
|
25
|
+
/** Extensions to try for index files */
|
|
26
|
+
const INDEX_FILES = ["index.ts", "index.tsx", "index.js", "index.jsx", "index.mjs"];
|
|
27
|
+
// ─── Import Resolution ─────────────────────────────────────────────────────
|
|
28
|
+
/**
|
|
29
|
+
* Check if an import specifier is a local/relative import.
|
|
30
|
+
*/
|
|
31
|
+
function isLocalImport(specifier) {
|
|
32
|
+
return specifier.startsWith("./") || specifier.startsWith("../") || specifier.startsWith("/");
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Try to resolve a local import specifier to an actual file path.
|
|
36
|
+
*/
|
|
37
|
+
function resolveLocalImport(specifier, fromDir) {
|
|
38
|
+
// Remove .js extension if present (common in ESM TypeScript)
|
|
39
|
+
const cleanSpecifier = specifier.replace(/\.js$/, "");
|
|
40
|
+
const basePath = resolve(fromDir, cleanSpecifier);
|
|
41
|
+
// Try exact path first
|
|
42
|
+
if (existsSync(basePath) && !isDirectory(basePath)) {
|
|
43
|
+
return basePath;
|
|
44
|
+
}
|
|
45
|
+
// Try with various extensions
|
|
46
|
+
for (const ext of RESOLVE_EXTENSIONS) {
|
|
47
|
+
const withExt = basePath + ext;
|
|
48
|
+
if (existsSync(withExt)) {
|
|
49
|
+
return withExt;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
// Try as directory with index file
|
|
53
|
+
for (const indexFile of INDEX_FILES) {
|
|
54
|
+
const indexPath = join(basePath, indexFile);
|
|
55
|
+
if (existsSync(indexPath)) {
|
|
56
|
+
return indexPath;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
// Try the original specifier with extensions (before .js stripping)
|
|
60
|
+
const origBase = resolve(fromDir, specifier);
|
|
61
|
+
if (origBase !== basePath && existsSync(origBase) && !isDirectory(origBase)) {
|
|
62
|
+
return origBase;
|
|
63
|
+
}
|
|
64
|
+
return undefined;
|
|
65
|
+
}
|
|
66
|
+
function isDirectory(filePath) {
|
|
67
|
+
try {
|
|
68
|
+
const statSync = require("fs").statSync;
|
|
69
|
+
return statSync(filePath).isDirectory();
|
|
70
|
+
}
|
|
71
|
+
catch {
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Read a file and return a truncated snippet suitable for cross-file context.
|
|
77
|
+
*/
|
|
78
|
+
function readSnippet(filePath) {
|
|
79
|
+
try {
|
|
80
|
+
const content = readFileSync(filePath, "utf-8");
|
|
81
|
+
if (content.length > MAX_RELATED_FILE_SIZE) {
|
|
82
|
+
return undefined; // Too large
|
|
83
|
+
}
|
|
84
|
+
if (content.length <= MAX_SNIPPET_LENGTH) {
|
|
85
|
+
return content;
|
|
86
|
+
}
|
|
87
|
+
return content.slice(0, MAX_SNIPPET_LENGTH) + "\n// ... truncated";
|
|
88
|
+
}
|
|
89
|
+
catch {
|
|
90
|
+
return undefined;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Resolve imports from a source file and return related file context.
|
|
95
|
+
*
|
|
96
|
+
* Uses the AST parser to extract import specifiers, resolves local imports
|
|
97
|
+
* to actual files, reads their content, and returns structured context
|
|
98
|
+
* suitable for deep-review cross-file analysis.
|
|
99
|
+
*
|
|
100
|
+
* @param code - Source code of the file being analyzed
|
|
101
|
+
* @param language - Programming language
|
|
102
|
+
* @param filePath - Absolute path to the source file (needed for relative import resolution)
|
|
103
|
+
* @param maxImports - Maximum number of imports to resolve (default: 20)
|
|
104
|
+
*/
|
|
105
|
+
export function resolveImports(code, language, filePath, maxImports = MAX_IMPORTS_TO_RESOLVE) {
|
|
106
|
+
const resolved = [];
|
|
107
|
+
const external = [];
|
|
108
|
+
const relatedFiles = [];
|
|
109
|
+
const fromDir = dirname(filePath);
|
|
110
|
+
// Use AST to extract imports
|
|
111
|
+
const structure = analyzeStructure(code, language);
|
|
112
|
+
const imports = structure.imports ?? [];
|
|
113
|
+
// Also extract imports via regex for languages where AST might not capture all
|
|
114
|
+
const regexImports = extractImportsViaRegex(code, language);
|
|
115
|
+
const allImports = [...new Set([...imports, ...regexImports])];
|
|
116
|
+
let resolvedCount = 0;
|
|
117
|
+
for (const specifier of allImports) {
|
|
118
|
+
if (resolvedCount >= maxImports)
|
|
119
|
+
break;
|
|
120
|
+
if (!isLocalImport(specifier)) {
|
|
121
|
+
external.push(specifier);
|
|
122
|
+
continue;
|
|
123
|
+
}
|
|
124
|
+
const resolvedPath = resolveLocalImport(specifier, fromDir);
|
|
125
|
+
if (!resolvedPath) {
|
|
126
|
+
resolved.push({ specifier, isLocal: true });
|
|
127
|
+
continue;
|
|
128
|
+
}
|
|
129
|
+
const snippet = readSnippet(resolvedPath);
|
|
130
|
+
if (!snippet) {
|
|
131
|
+
resolved.push({ specifier, resolvedPath, isLocal: true });
|
|
132
|
+
continue;
|
|
133
|
+
}
|
|
134
|
+
resolved.push({
|
|
135
|
+
specifier,
|
|
136
|
+
resolvedPath,
|
|
137
|
+
isLocal: true,
|
|
138
|
+
content: snippet,
|
|
139
|
+
});
|
|
140
|
+
relatedFiles.push({
|
|
141
|
+
path: specifier,
|
|
142
|
+
snippet,
|
|
143
|
+
relationship: "imported by target",
|
|
144
|
+
});
|
|
145
|
+
resolvedCount++;
|
|
146
|
+
}
|
|
147
|
+
return { resolved, external, relatedFiles };
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Build related files context from a file's imports.
|
|
151
|
+
*
|
|
152
|
+
* Convenience wrapper that returns just the RelatedFileSnippet[] array,
|
|
153
|
+
* ready to be passed to deep-review or MCP tool context.
|
|
154
|
+
*/
|
|
155
|
+
export function buildRelatedFilesContext(code, language, filePath, maxImports = MAX_IMPORTS_TO_RESOLVE) {
|
|
156
|
+
return resolveImports(code, language, filePath, maxImports).relatedFiles;
|
|
157
|
+
}
|
|
158
|
+
// ─── Regex-based Import Extraction (fallback) ───────────────────────────────
|
|
159
|
+
/**
|
|
160
|
+
* Extract import specifiers using regex patterns for common languages.
|
|
161
|
+
* This supplements the AST parser for cases where the grammar doesn't
|
|
162
|
+
* capture all import forms.
|
|
163
|
+
*/
|
|
164
|
+
function extractImportsViaRegex(code, language) {
|
|
165
|
+
const imports = [];
|
|
166
|
+
const lines = code.split("\n");
|
|
167
|
+
for (const line of lines) {
|
|
168
|
+
const trimmed = line.trim();
|
|
169
|
+
// TypeScript/JavaScript: import ... from "specifier"
|
|
170
|
+
// Also: import "specifier" (side-effect)
|
|
171
|
+
// Also: require("specifier")
|
|
172
|
+
if (["typescript", "javascript"].includes(language)) {
|
|
173
|
+
const fromMatch = /from\s+["']([^"']+)["']/.exec(trimmed);
|
|
174
|
+
if (fromMatch) {
|
|
175
|
+
imports.push(fromMatch[1]);
|
|
176
|
+
continue;
|
|
177
|
+
}
|
|
178
|
+
const importMatch = /^import\s+["']([^"']+)["']/.exec(trimmed);
|
|
179
|
+
if (importMatch) {
|
|
180
|
+
imports.push(importMatch[1]);
|
|
181
|
+
continue;
|
|
182
|
+
}
|
|
183
|
+
const requireMatch = /require\s*\(\s*["']([^"']+)["']\s*\)/.exec(trimmed);
|
|
184
|
+
if (requireMatch) {
|
|
185
|
+
imports.push(requireMatch[1]);
|
|
186
|
+
continue;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
// Python: from module import ... / import module
|
|
190
|
+
if (language === "python") {
|
|
191
|
+
const fromImport = /^from\s+(\.[\w.]*)\s+import/.exec(trimmed);
|
|
192
|
+
if (fromImport) {
|
|
193
|
+
imports.push(fromImport[1]);
|
|
194
|
+
continue;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
// Go: import "path" / import ( "path" )
|
|
198
|
+
if (language === "go") {
|
|
199
|
+
const goImport = /^\s*"([^"]+)"/.exec(trimmed);
|
|
200
|
+
if (goImport && goImport[1].includes("/")) {
|
|
201
|
+
imports.push(goImport[1]);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
// Rust: use crate::module / mod module
|
|
205
|
+
if (language === "rust") {
|
|
206
|
+
const useMatch = /^use\s+crate::(\w+)/.exec(trimmed);
|
|
207
|
+
if (useMatch) {
|
|
208
|
+
imports.push(`./${useMatch[1]}`);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
return imports;
|
|
213
|
+
}
|
|
@@ -9,6 +9,8 @@ export declare function isContentPolicyRefusal(responseText: string): boolean;
|
|
|
9
9
|
export declare const DEEP_REVIEW_PROMPT_INTRO: string;
|
|
10
10
|
/** Content-policy-safe Assistant identity message. */
|
|
11
11
|
export declare const DEEP_REVIEW_IDENTITY: string;
|
|
12
|
+
/** Default max chars for LLM-facing prompt content (~25K tokens). */
|
|
13
|
+
export declare const DEFAULT_MAX_PROMPT_CHARS = 100000;
|
|
12
14
|
export interface RelatedFileSnippet {
|
|
13
15
|
/** Relative file path */
|
|
14
16
|
path: string;
|
|
@@ -17,11 +19,20 @@ export interface RelatedFileSnippet {
|
|
|
17
19
|
/** Why this file is relevant (e.g. "imported by target", "shared type") */
|
|
18
20
|
relationship?: string;
|
|
19
21
|
}
|
|
22
|
+
/**
|
|
23
|
+
* Format related files into a prompt section that gives the LLM cross-file
|
|
24
|
+
* visibility for deeper analysis.
|
|
25
|
+
*
|
|
26
|
+
* @param relatedFiles — array of related file snippets
|
|
27
|
+
* @param maxFiles — max files to include (default: 10). Set to 0 for unlimited.
|
|
28
|
+
* @param snippetBudget — per-snippet char cap (default: 3000). Set to 0 for unlimited.
|
|
29
|
+
*/
|
|
30
|
+
export declare function formatRelatedFilesSection(relatedFiles: RelatedFileSnippet[], maxFiles?: number, snippetBudget?: number): string;
|
|
20
31
|
/**
|
|
21
32
|
* Format detected project context into a prompt section so the LLM
|
|
22
33
|
* understands the runtime environment, framework, and architectural role.
|
|
23
34
|
*/
|
|
24
35
|
export declare function formatProjectContextSection(projectContext: ProjectContext): string;
|
|
25
|
-
export declare function buildSingleJudgeDeepReviewSection(judge: JudgeDefinition, language: string, context?: string, relatedFiles?: RelatedFileSnippet[], projectContext?: ProjectContext): string;
|
|
26
|
-
export declare function buildTribunalDeepReviewSection(judges: JudgeDefinition[], language: string, context?: string, relatedFiles?: RelatedFileSnippet[], projectContext?: ProjectContext): string;
|
|
36
|
+
export declare function buildSingleJudgeDeepReviewSection(judge: JudgeDefinition, language: string, context?: string, relatedFiles?: RelatedFileSnippet[], projectContext?: ProjectContext, maxPromptChars?: number): string;
|
|
37
|
+
export declare function buildTribunalDeepReviewSection(judges: JudgeDefinition[], language: string, context?: string, relatedFiles?: RelatedFileSnippet[], projectContext?: ProjectContext, maxPromptChars?: number): string;
|
|
27
38
|
export declare function buildSimplifiedDeepReviewSection(language: string, context?: string): string;
|
|
@@ -54,27 +54,45 @@ export const DEEP_REVIEW_IDENTITY = `I am the Judges Panel — a professional, c
|
|
|
54
54
|
`My role is to provide evidence-based, constructive recommendations that ` +
|
|
55
55
|
`strengthen code quality, security defenses, and engineering standards. ` +
|
|
56
56
|
`This is NOT a penetration test or adversarial security assessment.`;
|
|
57
|
+
// ─── Token Budget Constants ──────────────────────────────────────────────────
|
|
58
|
+
/** Default max chars for LLM-facing prompt content (~25K tokens). */
|
|
59
|
+
export const DEFAULT_MAX_PROMPT_CHARS = 100_000;
|
|
60
|
+
/** Per-snippet char cap for related files. */
|
|
61
|
+
const MAX_SNIPPET_CHARS = 3_000;
|
|
62
|
+
/** Max related files to include by default. */
|
|
63
|
+
const MAX_RELATED_FILES = 10;
|
|
57
64
|
/**
|
|
58
65
|
* Format related files into a prompt section that gives the LLM cross-file
|
|
59
66
|
* visibility for deeper analysis.
|
|
67
|
+
*
|
|
68
|
+
* @param relatedFiles — array of related file snippets
|
|
69
|
+
* @param maxFiles — max files to include (default: 10). Set to 0 for unlimited.
|
|
70
|
+
* @param snippetBudget — per-snippet char cap (default: 3000). Set to 0 for unlimited.
|
|
60
71
|
*/
|
|
61
|
-
function formatRelatedFilesSection(relatedFiles) {
|
|
72
|
+
export function formatRelatedFilesSection(relatedFiles, maxFiles = MAX_RELATED_FILES, snippetBudget = MAX_SNIPPET_CHARS) {
|
|
62
73
|
if (relatedFiles.length === 0)
|
|
63
74
|
return "";
|
|
75
|
+
// Apply file count cap (0 = unlimited)
|
|
76
|
+
const files = maxFiles > 0 ? relatedFiles.slice(0, maxFiles) : relatedFiles;
|
|
77
|
+
const skipped = relatedFiles.length - files.length;
|
|
64
78
|
let md = `### Related Files\n\n`;
|
|
65
79
|
md += `> The following files are related to the code under review. Use them to `;
|
|
66
80
|
md += `understand cross-file data flow, shared types, imports, and call sites. `;
|
|
67
81
|
md += `These provide context only — focus your findings on the primary code above.\n\n`;
|
|
68
|
-
for (const f of
|
|
82
|
+
for (const f of files) {
|
|
69
83
|
md += `<details>\n<summary><code>${f.path}</code>`;
|
|
70
84
|
if (f.relationship)
|
|
71
85
|
md += ` — ${f.relationship}`;
|
|
72
86
|
md += `</summary>\n\n`;
|
|
73
|
-
// Limit snippet size to prevent prompt explosion
|
|
74
|
-
const
|
|
87
|
+
// Limit snippet size to prevent prompt explosion (0 = unlimited)
|
|
88
|
+
const cap = snippetBudget > 0 ? snippetBudget : Infinity;
|
|
89
|
+
const truncated = f.snippet.length > cap ? f.snippet.slice(0, cap) + "\n// ... truncated" : f.snippet;
|
|
75
90
|
md += `\`\`\`\n${truncated}\n\`\`\`\n`;
|
|
76
91
|
md += `</details>\n\n`;
|
|
77
92
|
}
|
|
93
|
+
if (skipped > 0) {
|
|
94
|
+
md += `> *${skipped} additional related file(s) omitted to stay within token budget.*\n\n`;
|
|
95
|
+
}
|
|
78
96
|
return md;
|
|
79
97
|
}
|
|
80
98
|
// ─── Project Context Section ─────────────────────────────────────────────────
|
|
@@ -111,7 +129,13 @@ export function formatProjectContextSection(projectContext) {
|
|
|
111
129
|
return md;
|
|
112
130
|
}
|
|
113
131
|
// ─── Single-Judge Deep Review ────────────────────────────────────────────────
|
|
114
|
-
export function buildSingleJudgeDeepReviewSection(judge, language, context, relatedFiles, projectContext) {
|
|
132
|
+
export function buildSingleJudgeDeepReviewSection(judge, language, context, relatedFiles, projectContext, maxPromptChars) {
|
|
133
|
+
const budget = maxPromptChars ?? DEFAULT_MAX_PROMPT_CHARS;
|
|
134
|
+
const unlimited = budget === 0;
|
|
135
|
+
const maxRelatedFiles = unlimited ? 0 : MAX_RELATED_FILES;
|
|
136
|
+
const snippetCap = unlimited ? 0 : MAX_SNIPPET_CHARS;
|
|
137
|
+
const contextCap = unlimited ? Infinity : Math.max(2000, Math.floor(budget * 0.1));
|
|
138
|
+
const truncatedContext = context && context.length > contextCap ? context.slice(0, contextCap) + "\n… (context truncated)" : context;
|
|
115
139
|
let md = `\n\n---\n\n`;
|
|
116
140
|
md += `## 🔍 Deep Contextual Review Required\n\n`;
|
|
117
141
|
md += DEFENSIVE_PREAMBLE;
|
|
@@ -120,14 +144,14 @@ export function buildSingleJudgeDeepReviewSection(judge, language, context, rela
|
|
|
120
144
|
md += `business logic validity, cross-module data flow, or context-dependent implications.\n>\n`;
|
|
121
145
|
md += `> **You MUST now perform a thorough contextual review** of the ${language} code using the expert criteria below. `;
|
|
122
146
|
md += `Identify issues that pattern matching cannot catch. Incorporate both the pattern findings above AND your own deep analysis into a final, unified evaluation.\n\n`;
|
|
123
|
-
if (
|
|
124
|
-
md += `**Context provided:** ${
|
|
147
|
+
if (truncatedContext) {
|
|
148
|
+
md += `**Context provided:** ${truncatedContext}\n\n`;
|
|
125
149
|
}
|
|
126
150
|
if (projectContext) {
|
|
127
151
|
md += formatProjectContextSection(projectContext);
|
|
128
152
|
}
|
|
129
153
|
if (relatedFiles && relatedFiles.length > 0) {
|
|
130
|
-
md += formatRelatedFilesSection(relatedFiles);
|
|
154
|
+
md += formatRelatedFilesSection(relatedFiles, maxRelatedFiles, snippetCap);
|
|
131
155
|
}
|
|
132
156
|
md += `### ${judge.name} — ${judge.domain}\n\n`;
|
|
133
157
|
md += `${judge.description}\n\n`;
|
|
@@ -160,7 +184,23 @@ export function buildSingleJudgeDeepReviewSection(judge, language, context, rela
|
|
|
160
184
|
return md;
|
|
161
185
|
}
|
|
162
186
|
// ─── Tribunal Deep Review (full) ─────────────────────────────────────────────
|
|
163
|
-
export function buildTribunalDeepReviewSection(judges, language, context, relatedFiles, projectContext) {
|
|
187
|
+
export function buildTribunalDeepReviewSection(judges, language, context, relatedFiles, projectContext, maxPromptChars) {
|
|
188
|
+
const budget = maxPromptChars ?? DEFAULT_MAX_PROMPT_CHARS;
|
|
189
|
+
const unlimited = budget === 0;
|
|
190
|
+
// When budget is tight, use simplified mode (category-based instead of per-judge)
|
|
191
|
+
// The full per-judge section is ~15-18K chars for 45 judges. Simplified is ~2K.
|
|
192
|
+
// Use simplified when remaining budget for judge criteria would be < 5K.
|
|
193
|
+
const estimatedJudgeCriteriaChars = judges.length * 350;
|
|
194
|
+
const estimatedOverhead = 4000; // preamble, instructions, format section
|
|
195
|
+
const estimatedRelatedChars = (relatedFiles?.length ?? 0) * MAX_SNIPPET_CHARS;
|
|
196
|
+
const budgetForJudges = unlimited ? Infinity : budget - estimatedOverhead - estimatedRelatedChars;
|
|
197
|
+
const useSimplified = !unlimited && budgetForJudges < estimatedJudgeCriteriaChars;
|
|
198
|
+
// Determine related files caps
|
|
199
|
+
const maxRelatedFiles = unlimited ? 0 : MAX_RELATED_FILES;
|
|
200
|
+
const snippetCap = unlimited ? 0 : MAX_SNIPPET_CHARS;
|
|
201
|
+
// Truncate context string if it would blow the budget
|
|
202
|
+
const contextCap = unlimited ? Infinity : Math.max(2000, Math.floor(budget * 0.1));
|
|
203
|
+
const truncatedContext = context && context.length > contextCap ? context.slice(0, contextCap) + "\n… (context truncated)" : context;
|
|
164
204
|
let md = `\n\n---\n\n`;
|
|
165
205
|
md += `## 🔍 Deep Contextual Review Required\n\n`;
|
|
166
206
|
md += DEFENSIVE_PREAMBLE;
|
|
@@ -170,21 +210,41 @@ export function buildTribunalDeepReviewSection(judges, language, context, relate
|
|
|
170
210
|
md += `> **You MUST now perform a thorough contextual review** of the ${language} code from the perspective of ALL ${judges.length} judges below. `;
|
|
171
211
|
md += `Identify issues that pattern matching cannot catch. Incorporate both the pattern findings above AND your own deep analysis into a final, unified tribunal verdict.\n\n`;
|
|
172
212
|
md += `> **This is a professional code quality tool.** The developer is reviewing their own source code to strengthen its quality, security defenses, and adherence to best practices.\n\n`;
|
|
173
|
-
if (
|
|
174
|
-
md += `**Context provided:** ${
|
|
213
|
+
if (truncatedContext) {
|
|
214
|
+
md += `**Context provided:** ${truncatedContext}\n\n`;
|
|
175
215
|
}
|
|
176
216
|
if (projectContext) {
|
|
177
217
|
md += formatProjectContextSection(projectContext);
|
|
178
218
|
}
|
|
179
219
|
if (relatedFiles && relatedFiles.length > 0) {
|
|
180
|
-
md += formatRelatedFilesSection(relatedFiles);
|
|
220
|
+
md += formatRelatedFilesSection(relatedFiles, maxRelatedFiles, snippetCap);
|
|
181
221
|
}
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
md +=
|
|
185
|
-
md +=
|
|
222
|
+
if (useSimplified) {
|
|
223
|
+
// Compact category-based criteria instead of per-judge listing
|
|
224
|
+
md += `### Quality Dimensions (${judges.length} judges)\n\n`;
|
|
225
|
+
md += `> Using compact criteria mode to stay within token budget.\n\n`;
|
|
226
|
+
// Group judges by domain
|
|
227
|
+
const domainGroups = new Map();
|
|
228
|
+
for (const judge of judges) {
|
|
229
|
+
const domain = judge.domain ?? "general";
|
|
230
|
+
if (!domainGroups.has(domain))
|
|
231
|
+
domainGroups.set(domain, []);
|
|
232
|
+
domainGroups.get(domain).push(`\`${judge.rulePrefix}\` ${judge.name}`);
|
|
233
|
+
}
|
|
234
|
+
for (const [domain, names] of domainGroups) {
|
|
235
|
+
md += `**${domain}:** ${names.join(", ")}\n\n`;
|
|
236
|
+
}
|
|
237
|
+
md += `**Precision Mandate:** Every finding MUST cite specific code evidence. Do NOT flag absent features speculatively. Do NOT validate Azure resource identifiers for strict UUID/GUID hex compliance. Prefer fewer, high-confidence findings over many uncertain ones.\n\n`;
|
|
186
238
|
md += `---\n\n`;
|
|
187
239
|
}
|
|
240
|
+
else {
|
|
241
|
+
for (const judge of judges) {
|
|
242
|
+
md += `### ${judge.name} — ${judge.domain}\n\n`;
|
|
243
|
+
md += `${judge.description}\n\n`;
|
|
244
|
+
md += `**Rule prefix:** \`${judge.rulePrefix}-\` · **Precision Mandate:** Every finding MUST cite specific code evidence. Do NOT flag absent features speculatively. Do NOT validate Azure resource identifiers for strict UUID/GUID hex compliance — they are opaque platform constants. Prefer fewer, high-confidence findings over many uncertain ones.\n\n`;
|
|
245
|
+
md += `---\n\n`;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
188
248
|
md += `### False Positive Review\n\n`;
|
|
189
249
|
md += `Before adding new findings, **review each pattern-based finding above for false positives.** `;
|
|
190
250
|
md += `Static pattern matching can flag code that is actually correct — for example:\n`;
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
// ──────────────────────────────────────────────────────────────────────────────
|
|
5
5
|
import { z } from "zod";
|
|
6
6
|
import { loadFindingStore, triageFinding, getTriagedFindings, formatTriageSummary, getFindingStats, getSuppressionAnalytics, formatSuppressionAnalytics, } from "../finding-lifecycle.js";
|
|
7
|
+
import { evaluateWithTribunal } from "../evaluators/index.js";
|
|
7
8
|
// ─── Rule-prefix learning context (shared with CLI --explain) ────────────────
|
|
8
9
|
const RULE_PREFIX_CONTEXT = {
|
|
9
10
|
SEC: {
|
|
@@ -78,6 +79,7 @@ export function registerReviewTools(server) {
|
|
|
78
79
|
registerGetFindingStats(server);
|
|
79
80
|
registerGetSuppressionAnalytics(server);
|
|
80
81
|
registerListTriagedFindings(server);
|
|
82
|
+
registerReEvaluateWithContext(server);
|
|
81
83
|
}
|
|
82
84
|
// ─── explain_finding ─────────────────────────────────────────────────────────
|
|
83
85
|
function registerExplainFinding(server) {
|
|
@@ -394,3 +396,169 @@ function registerListTriagedFindings(server) {
|
|
|
394
396
|
}
|
|
395
397
|
});
|
|
396
398
|
}
|
|
399
|
+
// ─── re_evaluate_with_context ────────────────────────────────────────────────
|
|
400
|
+
function registerReEvaluateWithContext(server) {
|
|
401
|
+
server.tool("re_evaluate_with_context", "Re-evaluate code with developer-provided context from a multi-turn conversation. Accepts disputed findings, accepted findings, and additional context to adjust the evaluation. This is the agentic feedback loop — the developer explains their intent and the tribunal re-evaluates with that context, applying auto-tune and confidence filtering.", {
|
|
402
|
+
code: z.string().describe("The source code to re-evaluate"),
|
|
403
|
+
language: z.string().describe("Programming language (e.g., typescript, python, go)"),
|
|
404
|
+
disputedRuleIds: z
|
|
405
|
+
.array(z.string())
|
|
406
|
+
.optional()
|
|
407
|
+
.describe("Rule IDs the developer disputes as false positives (e.g., ['SEC-001', 'PERF-003'])"),
|
|
408
|
+
acceptedRuleIds: z
|
|
409
|
+
.array(z.string())
|
|
410
|
+
.optional()
|
|
411
|
+
.describe("Rule IDs the developer accepts (these will not be filtered)"),
|
|
412
|
+
developerContext: z
|
|
413
|
+
.string()
|
|
414
|
+
.optional()
|
|
415
|
+
.describe("Free-form explanation from the developer about their intent, design decisions, or why certain findings are incorrect"),
|
|
416
|
+
focusAreas: z
|
|
417
|
+
.array(z.string())
|
|
418
|
+
.optional()
|
|
419
|
+
.describe("Specific areas to focus the re-evaluation on (e.g., ['security', 'performance'])"),
|
|
420
|
+
confidenceFilter: z
|
|
421
|
+
.number()
|
|
422
|
+
.min(0)
|
|
423
|
+
.max(1)
|
|
424
|
+
.optional()
|
|
425
|
+
.describe("Minimum confidence threshold — findings below this are dropped (default: 0.5)"),
|
|
426
|
+
filePath: z.string().optional().describe("File path for context-aware evaluation"),
|
|
427
|
+
deepReview: z
|
|
428
|
+
.boolean()
|
|
429
|
+
.optional()
|
|
430
|
+
.describe("Whether to include the LLM deep-review prompt section in the result"),
|
|
431
|
+
relatedFiles: z
|
|
432
|
+
.array(z.object({
|
|
433
|
+
path: z.string().describe("Path of the related file"),
|
|
434
|
+
snippet: z.string().describe("Relevant code snippet from the related file"),
|
|
435
|
+
relationship: z.string().optional().describe("Relationship to the main file (e.g., 'imports', 'tests')"),
|
|
436
|
+
}))
|
|
437
|
+
.optional()
|
|
438
|
+
.describe("Cross-file context for more accurate evaluation"),
|
|
439
|
+
maxPromptChars: z
|
|
440
|
+
.number()
|
|
441
|
+
.min(0)
|
|
442
|
+
.optional()
|
|
443
|
+
.describe("Maximum character budget for LLM prompts. Controls truncation of source code, related files, and context strings in deep-review prompts. Set to 0 to disable all truncation. Default: 100000."),
|
|
444
|
+
}, async ({ code, language, disputedRuleIds, acceptedRuleIds, developerContext, focusAreas, confidenceFilter, filePath, deepReview, relatedFiles, maxPromptChars, }) => {
|
|
445
|
+
try {
|
|
446
|
+
// Build context string from developer inputs
|
|
447
|
+
const contextParts = [];
|
|
448
|
+
if (developerContext) {
|
|
449
|
+
contextParts.push(`Developer context: ${developerContext}`);
|
|
450
|
+
}
|
|
451
|
+
if (disputedRuleIds && disputedRuleIds.length > 0) {
|
|
452
|
+
contextParts.push(`Disputed findings: ${disputedRuleIds.join(", ")}`);
|
|
453
|
+
}
|
|
454
|
+
if (acceptedRuleIds && acceptedRuleIds.length > 0) {
|
|
455
|
+
contextParts.push(`Accepted findings: ${acceptedRuleIds.join(", ")}`);
|
|
456
|
+
}
|
|
457
|
+
if (focusAreas && focusAreas.length > 0) {
|
|
458
|
+
contextParts.push(`Focus areas: ${focusAreas.join(", ")}`);
|
|
459
|
+
}
|
|
460
|
+
const fullContext = contextParts.join("\n");
|
|
461
|
+
// Apply token budget caps to inputs
|
|
462
|
+
const budget = maxPromptChars ?? 100_000;
|
|
463
|
+
const unlimited = budget === 0;
|
|
464
|
+
const codeCap = unlimited ? Infinity : budget;
|
|
465
|
+
const contextCap = unlimited ? Infinity : Math.max(2000, Math.floor(budget * 0.1));
|
|
466
|
+
const cappedCode = code.length > codeCap ? code.slice(0, codeCap) : code;
|
|
467
|
+
const cappedContext = fullContext.length > contextCap ? fullContext.slice(0, contextCap) + "\n… (context truncated)" : fullContext;
|
|
468
|
+
const cappedRelatedFiles = !unlimited && relatedFiles && relatedFiles.length > 10 ? relatedFiles.slice(0, 10) : relatedFiles;
|
|
469
|
+
const evalOptions = {
|
|
470
|
+
autoTune: true,
|
|
471
|
+
deepReview: deepReview ?? false,
|
|
472
|
+
confidenceFilter: confidenceFilter ?? 0.5,
|
|
473
|
+
filePath,
|
|
474
|
+
relatedFiles: cappedRelatedFiles,
|
|
475
|
+
calibrate: true,
|
|
476
|
+
maxPromptChars: maxPromptChars,
|
|
477
|
+
};
|
|
478
|
+
const verdict = evaluateWithTribunal(cappedCode, language, cappedContext || undefined, evalOptions);
|
|
479
|
+
// Post-process: mark disputed findings
|
|
480
|
+
let findings = verdict.findings;
|
|
481
|
+
if (disputedRuleIds && disputedRuleIds.length > 0) {
|
|
482
|
+
const disputedSet = new Set(disputedRuleIds);
|
|
483
|
+
findings = findings.map((f) => {
|
|
484
|
+
if (disputedSet.has(f.ruleId)) {
|
|
485
|
+
return {
|
|
486
|
+
...f,
|
|
487
|
+
confidence: Math.max(0.1, (f.confidence ?? 0.5) * 0.5),
|
|
488
|
+
confidenceTier: "supplementary",
|
|
489
|
+
};
|
|
490
|
+
}
|
|
491
|
+
return f;
|
|
492
|
+
});
|
|
493
|
+
// Re-filter after confidence adjustment
|
|
494
|
+
if (confidenceFilter) {
|
|
495
|
+
findings = findings.filter((f) => (f.confidence ?? 0.5) >= confidenceFilter);
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
const sections = [];
|
|
499
|
+
sections.push(`# Re-Evaluation Results\n`);
|
|
500
|
+
sections.push(`**Verdict:** ${verdict.overallVerdict} · **Score:** ${verdict.overallScore}/100`);
|
|
501
|
+
sections.push(`**Findings:** ${findings.length} (after context adjustment)`);
|
|
502
|
+
if (verdict.autoTuneApplied) {
|
|
503
|
+
sections.push(`**Auto-tune:** ${verdict.autoTuneApplied.suppressed} suppressed, ${verdict.autoTuneApplied.downgraded} downgraded`);
|
|
504
|
+
}
|
|
505
|
+
if (verdict.confidenceFilterApplied) {
|
|
506
|
+
sections.push(`**Confidence filter:** ${verdict.confidenceFilterApplied.filteredOut} findings below ${Math.round(verdict.confidenceFilterApplied.threshold * 100)}% filtered out`);
|
|
507
|
+
}
|
|
508
|
+
if (findings.length > 0) {
|
|
509
|
+
sections.push(`\n## Findings\n`);
|
|
510
|
+
for (const f of findings) {
|
|
511
|
+
const conf = f.confidence !== undefined && f.confidence !== null ? ` (${Math.round(f.confidence * 100)}%)` : "";
|
|
512
|
+
const tier = f.confidenceTier ? ` [${f.confidenceTier}]` : "";
|
|
513
|
+
sections.push(`- **${f.ruleId}** ${f.severity}${conf}${tier}: ${f.title}`);
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
if (disputedRuleIds && disputedRuleIds.length > 0) {
|
|
517
|
+
const stillPresent = findings.filter((f) => disputedRuleIds.includes(f.ruleId));
|
|
518
|
+
const resolved = disputedRuleIds.filter((id) => !findings.some((f) => f.ruleId === id));
|
|
519
|
+
if (resolved.length > 0) {
|
|
520
|
+
sections.push(`\n## Disputed findings resolved\n`);
|
|
521
|
+
sections.push(`The following disputed findings were dropped: ${resolved.join(", ")}`);
|
|
522
|
+
}
|
|
523
|
+
if (stillPresent.length > 0) {
|
|
524
|
+
sections.push(`\n## Disputed findings retained\n`);
|
|
525
|
+
sections.push(`The following remain with reduced confidence: ${stillPresent.map((f) => `${f.ruleId} (${Math.round((f.confidence ?? 0) * 100)}%)`).join(", ")}`);
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
const structured = {
|
|
529
|
+
overallVerdict: verdict.overallVerdict,
|
|
530
|
+
overallScore: verdict.overallScore,
|
|
531
|
+
findingCount: findings.length,
|
|
532
|
+
autoTuneApplied: verdict.autoTuneApplied ?? null,
|
|
533
|
+
confidenceFilterApplied: verdict.confidenceFilterApplied ?? null,
|
|
534
|
+
disputedResolved: disputedRuleIds?.filter((id) => !findings.some((f) => f.ruleId === id)) ?? [],
|
|
535
|
+
findings: findings.map((f) => ({
|
|
536
|
+
ruleId: f.ruleId,
|
|
537
|
+
severity: f.severity,
|
|
538
|
+
confidence: f.confidence,
|
|
539
|
+
confidenceTier: f.confidenceTier,
|
|
540
|
+
title: f.title,
|
|
541
|
+
})),
|
|
542
|
+
};
|
|
543
|
+
const contentBlocks = [
|
|
544
|
+
{ type: "text", text: sections.join("\n") },
|
|
545
|
+
{ type: "text", text: "```json\n" + JSON.stringify(structured, null, 2) + "\n```" },
|
|
546
|
+
];
|
|
547
|
+
if (verdict.deepReviewPrompt) {
|
|
548
|
+
contentBlocks.push({ type: "text", text: verdict.deepReviewPrompt });
|
|
549
|
+
}
|
|
550
|
+
return { content: contentBlocks };
|
|
551
|
+
}
|
|
552
|
+
catch (error) {
|
|
553
|
+
return {
|
|
554
|
+
content: [
|
|
555
|
+
{
|
|
556
|
+
type: "text",
|
|
557
|
+
text: error instanceof Error ? `Error: ${error.message}` : "Error: Re-evaluation failed",
|
|
558
|
+
},
|
|
559
|
+
],
|
|
560
|
+
isError: true,
|
|
561
|
+
};
|
|
562
|
+
}
|
|
563
|
+
});
|
|
564
|
+
}
|