@kevinrabun/judges 3.118.0 → 3.119.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -0
- package/dist/api.d.ts +1 -1
- package/dist/api.js +1 -1
- package/dist/evaluators/index.d.ts +12 -0
- package/dist/evaluators/index.js +1 -1
- package/dist/tools/deep-review.d.ts +13 -2
- package/dist/tools/deep-review.js +76 -16
- package/dist/tools/register-review.js +17 -3
- package/dist/tools/register-workflow.js +7 -1
- package/package.json +1 -1
- package/server.json +2 -2
package/README.md
CHANGED
|
@@ -1098,6 +1098,36 @@ Analyze a dependency manifest file for supply-chain risks, version pinning issue
|
|
|
1098
1098
|
| `manifestType` | string | yes | File type: `package.json`, `requirements.txt`, etc. |
|
|
1099
1099
|
| `context` | string | no | Optional context |
|
|
1100
1100
|
|
|
1101
|
+
### `evaluate_git_diff`
|
|
1102
|
+
Evaluate only **changed lines** from a git diff. Provide either `repoPath` for a live git diff or `diffText` for a pre-computed unified diff.
|
|
1103
|
+
|
|
1104
|
+
| Parameter | Type | Required | Description |
|
|
1105
|
+
|-----------|------|----------|-------------|
|
|
1106
|
+
| `repoPath` | string | conditional | Absolute path to the git repository |
|
|
1107
|
+
| `base` | string | no | Git ref to diff against (default: `HEAD~1`) |
|
|
1108
|
+
| `diffText` | string | conditional | Pre-computed unified diff text |
|
|
1109
|
+
| `confidenceFilter` | number | no | Minimum confidence threshold for findings (0–1) |
|
|
1110
|
+
| `autoTune` | boolean | no | Apply feedback-driven auto-tuning (default: false) |
|
|
1111
|
+
| `maxPromptChars` | number | no | Max character budget for LLM prompts (default: 100000, 0 = unlimited) |
|
|
1112
|
+
| `config` | object | no | Inline configuration |
|
|
1113
|
+
|
|
1114
|
+
### `re_evaluate_with_context`
|
|
1115
|
+
Re-run the tribunal with **prior findings as context** for iterative refinement. Supports dispute resolution, developer context injection, and focus-area filtering.
|
|
1116
|
+
|
|
1117
|
+
| Parameter | Type | Required | Description |
|
|
1118
|
+
|-----------|------|----------|-------------|
|
|
1119
|
+
| `code` | string | yes | Source code to re-evaluate |
|
|
1120
|
+
| `language` | string | yes | Programming language |
|
|
1121
|
+
| `disputedRuleIds` | string[] | no | Rule IDs the developer disputes as false positives |
|
|
1122
|
+
| `acceptedRuleIds` | string[] | no | Rule IDs the developer accepts |
|
|
1123
|
+
| `developerContext` | string | no | Free-form explanation of developer intent |
|
|
1124
|
+
| `focusAreas` | string[] | no | Specific areas to focus on (e.g., `["security"]`) |
|
|
1125
|
+
| `confidenceFilter` | number | no | Minimum confidence threshold (default: 0.5) |
|
|
1126
|
+
| `filePath` | string | no | File path for context-aware evaluation |
|
|
1127
|
+
| `deepReview` | boolean | no | Include LLM deep-review prompt section |
|
|
1128
|
+
| `relatedFiles` | array | no | Cross-file context `{ path, snippet, relationship? }[]` |
|
|
1129
|
+
| `maxPromptChars` | number | no | Max character budget for LLM prompts (default: 100000, 0 = unlimited) |
|
|
1130
|
+
|
|
1101
1131
|
#### Judge IDs
|
|
1102
1132
|
|
|
1103
1133
|
`data-security` · `cybersecurity` · `cost-effectiveness` · `scalability` · `cloud-readiness` · `software-practices` · `accessibility` · `api-design` · `reliability` · `observability` · `performance` · `compliance` · `data-sovereignty` · `testing` · `documentation` · `internationalization` · `dependency-health` · `concurrency` · `ethics-bias` · `maintainability` · `error-handling` · `authentication` · `database` · `caching` · `configuration-management` · `backwards-compatibility` · `portability` · `ux` · `logging-privacy` · `rate-limiting` · `ci-cd` · `code-structure` · `agent-instructions` · `ai-code-safety` · `framework-safety` · `iac-security` · `false-positive-review`
|
package/dist/api.d.ts
CHANGED
|
@@ -20,7 +20,7 @@ export { getPreset, composePresets, listPresets, PRESETS } from "./presets.js";
|
|
|
20
20
|
export type { Preset } from "./presets.js";
|
|
21
21
|
export { evaluateCodeV2, evaluateProjectV2, getSupportedPolicyProfiles } from "./evaluators/v2.js";
|
|
22
22
|
export { analyzeCrossFileTaint } from "./ast/cross-file-taint.js";
|
|
23
|
-
export { buildSingleJudgeDeepReviewSection, buildTribunalDeepReviewSection, buildSimplifiedDeepReviewSection, isContentPolicyRefusal, DEEP_REVIEW_PROMPT_INTRO, DEEP_REVIEW_IDENTITY, } from "./tools/deep-review.js";
|
|
23
|
+
export { buildSingleJudgeDeepReviewSection, buildTribunalDeepReviewSection, buildSimplifiedDeepReviewSection, formatRelatedFilesSection, isContentPolicyRefusal, DEEP_REVIEW_PROMPT_INTRO, DEEP_REVIEW_IDENTITY, DEFAULT_MAX_PROMPT_CHARS, } from "./tools/deep-review.js";
|
|
24
24
|
export type { RelatedFileSnippet } from "./tools/deep-review.js";
|
|
25
25
|
export { getCondensedCriteria } from "./tools/prompts.js";
|
|
26
26
|
export { parseDismissedFindings, recordL2Feedback, loadFeedbackStore, saveFeedbackStore, addFeedback, computeFeedbackStats, getFpRateByRule, mergeFeedbackStores, computeTeamFeedbackStats, formatTeamStatsOutput, } from "./commands/feedback.js";
|
package/dist/api.js
CHANGED
|
@@ -27,7 +27,7 @@ export { evaluateCodeV2, evaluateProjectV2, getSupportedPolicyProfiles } from ".
|
|
|
27
27
|
// ─── Cross-File Taint Analysis ───────────────────────────────────────────────
|
|
28
28
|
export { analyzeCrossFileTaint } from "./ast/cross-file-taint.js";
|
|
29
29
|
// ─── Deep Review Prompts ─────────────────────────────────────────────────────
|
|
30
|
-
export { buildSingleJudgeDeepReviewSection, buildTribunalDeepReviewSection, buildSimplifiedDeepReviewSection, isContentPolicyRefusal, DEEP_REVIEW_PROMPT_INTRO, DEEP_REVIEW_IDENTITY, } from "./tools/deep-review.js";
|
|
30
|
+
export { buildSingleJudgeDeepReviewSection, buildTribunalDeepReviewSection, buildSimplifiedDeepReviewSection, formatRelatedFilesSection, isContentPolicyRefusal, DEEP_REVIEW_PROMPT_INTRO, DEEP_REVIEW_IDENTITY, DEFAULT_MAX_PROMPT_CHARS, } from "./tools/deep-review.js";
|
|
31
31
|
// ─── Prompt Utilities ────────────────────────────────────────────────────────
|
|
32
32
|
export { getCondensedCriteria } from "./tools/prompts.js";
|
|
33
33
|
// ─── Feedback & Calibration ─────────────────────────────────────────────────
|
|
@@ -93,6 +93,18 @@ export interface EvaluationOptions {
|
|
|
93
93
|
* Value range: 0-1 (e.g., 0.6 means only findings with >= 60% confidence appear).
|
|
94
94
|
*/
|
|
95
95
|
confidenceFilter?: number;
|
|
96
|
+
/**
|
|
97
|
+
* Maximum character budget for LLM-facing prompt content.
|
|
98
|
+
* Controls truncation of:
|
|
99
|
+
* - Source code in deep-review prompts (truncated with summary when exceeded)
|
|
100
|
+
* - Related file snippets (array trimmed to fit budget)
|
|
101
|
+
* - Developer context strings (truncated)
|
|
102
|
+
*
|
|
103
|
+
* Defaults to 100_000 (~25K tokens). Set to 0 to disable all truncation
|
|
104
|
+
* (use with caution — large files can produce prompts that exceed model
|
|
105
|
+
* context windows and waste tokens).
|
|
106
|
+
*/
|
|
107
|
+
maxPromptChars?: number;
|
|
96
108
|
/** @internal — pre-computed AST structure for the file (set by evaluateWithTribunal) */
|
|
97
109
|
_astCache?: CodeStructure;
|
|
98
110
|
/** @internal — pre-computed taint flows for the file (set by evaluateWithTribunal) */
|
package/dist/evaluators/index.js
CHANGED
|
@@ -927,7 +927,7 @@ export function evaluateWithTribunal(code, language, context, options) {
|
|
|
927
927
|
try {
|
|
928
928
|
const projectCtx = detectProjectContext(code, language, enrichedOptions.filePath);
|
|
929
929
|
const relatedSnippets = enrichedOptions.relatedFiles ?? [];
|
|
930
|
-
result.deepReviewPrompt = buildTribunalDeepReviewSection(judges, language, context, relatedSnippets.map((r) => ({ path: r.path, snippet: r.snippet, relationship: r.relationship })), projectCtx);
|
|
930
|
+
result.deepReviewPrompt = buildTribunalDeepReviewSection(judges, language, context, relatedSnippets.map((r) => ({ path: r.path, snippet: r.snippet, relationship: r.relationship })), projectCtx, enrichedOptions.maxPromptChars);
|
|
931
931
|
}
|
|
932
932
|
catch {
|
|
933
933
|
// Deep review prompt generation failure is non-fatal
|
|
@@ -9,6 +9,8 @@ export declare function isContentPolicyRefusal(responseText: string): boolean;
|
|
|
9
9
|
export declare const DEEP_REVIEW_PROMPT_INTRO: string;
|
|
10
10
|
/** Content-policy-safe Assistant identity message. */
|
|
11
11
|
export declare const DEEP_REVIEW_IDENTITY: string;
|
|
12
|
+
/** Default max chars for LLM-facing prompt content (~25K tokens). */
|
|
13
|
+
export declare const DEFAULT_MAX_PROMPT_CHARS = 100000;
|
|
12
14
|
export interface RelatedFileSnippet {
|
|
13
15
|
/** Relative file path */
|
|
14
16
|
path: string;
|
|
@@ -17,11 +19,20 @@ export interface RelatedFileSnippet {
|
|
|
17
19
|
/** Why this file is relevant (e.g. "imported by target", "shared type") */
|
|
18
20
|
relationship?: string;
|
|
19
21
|
}
|
|
22
|
+
/**
|
|
23
|
+
* Format related files into a prompt section that gives the LLM cross-file
|
|
24
|
+
* visibility for deeper analysis.
|
|
25
|
+
*
|
|
26
|
+
* @param relatedFiles — array of related file snippets
|
|
27
|
+
* @param maxFiles — max files to include (default: 10). Set to 0 for unlimited.
|
|
28
|
+
* @param snippetBudget — per-snippet char cap (default: 3000). Set to 0 for unlimited.
|
|
29
|
+
*/
|
|
30
|
+
export declare function formatRelatedFilesSection(relatedFiles: RelatedFileSnippet[], maxFiles?: number, snippetBudget?: number): string;
|
|
20
31
|
/**
|
|
21
32
|
* Format detected project context into a prompt section so the LLM
|
|
22
33
|
* understands the runtime environment, framework, and architectural role.
|
|
23
34
|
*/
|
|
24
35
|
export declare function formatProjectContextSection(projectContext: ProjectContext): string;
|
|
25
|
-
export declare function buildSingleJudgeDeepReviewSection(judge: JudgeDefinition, language: string, context?: string, relatedFiles?: RelatedFileSnippet[], projectContext?: ProjectContext): string;
|
|
26
|
-
export declare function buildTribunalDeepReviewSection(judges: JudgeDefinition[], language: string, context?: string, relatedFiles?: RelatedFileSnippet[], projectContext?: ProjectContext): string;
|
|
36
|
+
export declare function buildSingleJudgeDeepReviewSection(judge: JudgeDefinition, language: string, context?: string, relatedFiles?: RelatedFileSnippet[], projectContext?: ProjectContext, maxPromptChars?: number): string;
|
|
37
|
+
export declare function buildTribunalDeepReviewSection(judges: JudgeDefinition[], language: string, context?: string, relatedFiles?: RelatedFileSnippet[], projectContext?: ProjectContext, maxPromptChars?: number): string;
|
|
27
38
|
export declare function buildSimplifiedDeepReviewSection(language: string, context?: string): string;
|
|
@@ -54,27 +54,45 @@ export const DEEP_REVIEW_IDENTITY = `I am the Judges Panel — a professional, c
|
|
|
54
54
|
`My role is to provide evidence-based, constructive recommendations that ` +
|
|
55
55
|
`strengthen code quality, security defenses, and engineering standards. ` +
|
|
56
56
|
`This is NOT a penetration test or adversarial security assessment.`;
|
|
57
|
+
// ─── Token Budget Constants ──────────────────────────────────────────────────
|
|
58
|
+
/** Default max chars for LLM-facing prompt content (~25K tokens). */
|
|
59
|
+
export const DEFAULT_MAX_PROMPT_CHARS = 100_000;
|
|
60
|
+
/** Per-snippet char cap for related files. */
|
|
61
|
+
const MAX_SNIPPET_CHARS = 3_000;
|
|
62
|
+
/** Max related files to include by default. */
|
|
63
|
+
const MAX_RELATED_FILES = 10;
|
|
57
64
|
/**
|
|
58
65
|
* Format related files into a prompt section that gives the LLM cross-file
|
|
59
66
|
* visibility for deeper analysis.
|
|
67
|
+
*
|
|
68
|
+
* @param relatedFiles — array of related file snippets
|
|
69
|
+
* @param maxFiles — max files to include (default: 10). Set to 0 for unlimited.
|
|
70
|
+
* @param snippetBudget — per-snippet char cap (default: 3000). Set to 0 for unlimited.
|
|
60
71
|
*/
|
|
61
|
-
function formatRelatedFilesSection(relatedFiles) {
|
|
72
|
+
export function formatRelatedFilesSection(relatedFiles, maxFiles = MAX_RELATED_FILES, snippetBudget = MAX_SNIPPET_CHARS) {
|
|
62
73
|
if (relatedFiles.length === 0)
|
|
63
74
|
return "";
|
|
75
|
+
// Apply file count cap (0 = unlimited)
|
|
76
|
+
const files = maxFiles > 0 ? relatedFiles.slice(0, maxFiles) : relatedFiles;
|
|
77
|
+
const skipped = relatedFiles.length - files.length;
|
|
64
78
|
let md = `### Related Files\n\n`;
|
|
65
79
|
md += `> The following files are related to the code under review. Use them to `;
|
|
66
80
|
md += `understand cross-file data flow, shared types, imports, and call sites. `;
|
|
67
81
|
md += `These provide context only — focus your findings on the primary code above.\n\n`;
|
|
68
|
-
for (const f of
|
|
82
|
+
for (const f of files) {
|
|
69
83
|
md += `<details>\n<summary><code>${f.path}</code>`;
|
|
70
84
|
if (f.relationship)
|
|
71
85
|
md += ` — ${f.relationship}`;
|
|
72
86
|
md += `</summary>\n\n`;
|
|
73
|
-
// Limit snippet size to prevent prompt explosion
|
|
74
|
-
const
|
|
87
|
+
// Limit snippet size to prevent prompt explosion (0 = unlimited)
|
|
88
|
+
const cap = snippetBudget > 0 ? snippetBudget : Infinity;
|
|
89
|
+
const truncated = f.snippet.length > cap ? f.snippet.slice(0, cap) + "\n// ... truncated" : f.snippet;
|
|
75
90
|
md += `\`\`\`\n${truncated}\n\`\`\`\n`;
|
|
76
91
|
md += `</details>\n\n`;
|
|
77
92
|
}
|
|
93
|
+
if (skipped > 0) {
|
|
94
|
+
md += `> *${skipped} additional related file(s) omitted to stay within token budget.*\n\n`;
|
|
95
|
+
}
|
|
78
96
|
return md;
|
|
79
97
|
}
|
|
80
98
|
// ─── Project Context Section ─────────────────────────────────────────────────
|
|
@@ -111,7 +129,13 @@ export function formatProjectContextSection(projectContext) {
|
|
|
111
129
|
return md;
|
|
112
130
|
}
|
|
113
131
|
// ─── Single-Judge Deep Review ────────────────────────────────────────────────
|
|
114
|
-
export function buildSingleJudgeDeepReviewSection(judge, language, context, relatedFiles, projectContext) {
|
|
132
|
+
export function buildSingleJudgeDeepReviewSection(judge, language, context, relatedFiles, projectContext, maxPromptChars) {
|
|
133
|
+
const budget = maxPromptChars ?? DEFAULT_MAX_PROMPT_CHARS;
|
|
134
|
+
const unlimited = budget === 0;
|
|
135
|
+
const maxRelatedFiles = unlimited ? 0 : MAX_RELATED_FILES;
|
|
136
|
+
const snippetCap = unlimited ? 0 : MAX_SNIPPET_CHARS;
|
|
137
|
+
const contextCap = unlimited ? Infinity : Math.max(2000, Math.floor(budget * 0.1));
|
|
138
|
+
const truncatedContext = context && context.length > contextCap ? context.slice(0, contextCap) + "\n… (context truncated)" : context;
|
|
115
139
|
let md = `\n\n---\n\n`;
|
|
116
140
|
md += `## 🔍 Deep Contextual Review Required\n\n`;
|
|
117
141
|
md += DEFENSIVE_PREAMBLE;
|
|
@@ -120,14 +144,14 @@ export function buildSingleJudgeDeepReviewSection(judge, language, context, rela
|
|
|
120
144
|
md += `business logic validity, cross-module data flow, or context-dependent implications.\n>\n`;
|
|
121
145
|
md += `> **You MUST now perform a thorough contextual review** of the ${language} code using the expert criteria below. `;
|
|
122
146
|
md += `Identify issues that pattern matching cannot catch. Incorporate both the pattern findings above AND your own deep analysis into a final, unified evaluation.\n\n`;
|
|
123
|
-
if (
|
|
124
|
-
md += `**Context provided:** ${
|
|
147
|
+
if (truncatedContext) {
|
|
148
|
+
md += `**Context provided:** ${truncatedContext}\n\n`;
|
|
125
149
|
}
|
|
126
150
|
if (projectContext) {
|
|
127
151
|
md += formatProjectContextSection(projectContext);
|
|
128
152
|
}
|
|
129
153
|
if (relatedFiles && relatedFiles.length > 0) {
|
|
130
|
-
md += formatRelatedFilesSection(relatedFiles);
|
|
154
|
+
md += formatRelatedFilesSection(relatedFiles, maxRelatedFiles, snippetCap);
|
|
131
155
|
}
|
|
132
156
|
md += `### ${judge.name} — ${judge.domain}\n\n`;
|
|
133
157
|
md += `${judge.description}\n\n`;
|
|
@@ -160,7 +184,23 @@ export function buildSingleJudgeDeepReviewSection(judge, language, context, rela
|
|
|
160
184
|
return md;
|
|
161
185
|
}
|
|
162
186
|
// ─── Tribunal Deep Review (full) ─────────────────────────────────────────────
|
|
163
|
-
export function buildTribunalDeepReviewSection(judges, language, context, relatedFiles, projectContext) {
|
|
187
|
+
export function buildTribunalDeepReviewSection(judges, language, context, relatedFiles, projectContext, maxPromptChars) {
|
|
188
|
+
const budget = maxPromptChars ?? DEFAULT_MAX_PROMPT_CHARS;
|
|
189
|
+
const unlimited = budget === 0;
|
|
190
|
+
// When budget is tight, use simplified mode (category-based instead of per-judge)
|
|
191
|
+
// The full per-judge section is ~15-18K chars for 45 judges. Simplified is ~2K.
|
|
192
|
+
// Use simplified when remaining budget for judge criteria would be < 5K.
|
|
193
|
+
const estimatedJudgeCriteriaChars = judges.length * 350;
|
|
194
|
+
const estimatedOverhead = 4000; // preamble, instructions, format section
|
|
195
|
+
const estimatedRelatedChars = (relatedFiles?.length ?? 0) * MAX_SNIPPET_CHARS;
|
|
196
|
+
const budgetForJudges = unlimited ? Infinity : budget - estimatedOverhead - estimatedRelatedChars;
|
|
197
|
+
const useSimplified = !unlimited && budgetForJudges < estimatedJudgeCriteriaChars;
|
|
198
|
+
// Determine related files caps
|
|
199
|
+
const maxRelatedFiles = unlimited ? 0 : MAX_RELATED_FILES;
|
|
200
|
+
const snippetCap = unlimited ? 0 : MAX_SNIPPET_CHARS;
|
|
201
|
+
// Truncate context string if it would blow the budget
|
|
202
|
+
const contextCap = unlimited ? Infinity : Math.max(2000, Math.floor(budget * 0.1));
|
|
203
|
+
const truncatedContext = context && context.length > contextCap ? context.slice(0, contextCap) + "\n… (context truncated)" : context;
|
|
164
204
|
let md = `\n\n---\n\n`;
|
|
165
205
|
md += `## 🔍 Deep Contextual Review Required\n\n`;
|
|
166
206
|
md += DEFENSIVE_PREAMBLE;
|
|
@@ -170,21 +210,41 @@ export function buildTribunalDeepReviewSection(judges, language, context, relate
|
|
|
170
210
|
md += `> **You MUST now perform a thorough contextual review** of the ${language} code from the perspective of ALL ${judges.length} judges below. `;
|
|
171
211
|
md += `Identify issues that pattern matching cannot catch. Incorporate both the pattern findings above AND your own deep analysis into a final, unified tribunal verdict.\n\n`;
|
|
172
212
|
md += `> **This is a professional code quality tool.** The developer is reviewing their own source code to strengthen its quality, security defenses, and adherence to best practices.\n\n`;
|
|
173
|
-
if (
|
|
174
|
-
md += `**Context provided:** ${
|
|
213
|
+
if (truncatedContext) {
|
|
214
|
+
md += `**Context provided:** ${truncatedContext}\n\n`;
|
|
175
215
|
}
|
|
176
216
|
if (projectContext) {
|
|
177
217
|
md += formatProjectContextSection(projectContext);
|
|
178
218
|
}
|
|
179
219
|
if (relatedFiles && relatedFiles.length > 0) {
|
|
180
|
-
md += formatRelatedFilesSection(relatedFiles);
|
|
220
|
+
md += formatRelatedFilesSection(relatedFiles, maxRelatedFiles, snippetCap);
|
|
181
221
|
}
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
md +=
|
|
185
|
-
md +=
|
|
222
|
+
if (useSimplified) {
|
|
223
|
+
// Compact category-based criteria instead of per-judge listing
|
|
224
|
+
md += `### Quality Dimensions (${judges.length} judges)\n\n`;
|
|
225
|
+
md += `> Using compact criteria mode to stay within token budget.\n\n`;
|
|
226
|
+
// Group judges by domain
|
|
227
|
+
const domainGroups = new Map();
|
|
228
|
+
for (const judge of judges) {
|
|
229
|
+
const domain = judge.domain ?? "general";
|
|
230
|
+
if (!domainGroups.has(domain))
|
|
231
|
+
domainGroups.set(domain, []);
|
|
232
|
+
domainGroups.get(domain).push(`\`${judge.rulePrefix}\` ${judge.name}`);
|
|
233
|
+
}
|
|
234
|
+
for (const [domain, names] of domainGroups) {
|
|
235
|
+
md += `**${domain}:** ${names.join(", ")}\n\n`;
|
|
236
|
+
}
|
|
237
|
+
md += `**Precision Mandate:** Every finding MUST cite specific code evidence. Do NOT flag absent features speculatively. Do NOT validate Azure resource identifiers for strict UUID/GUID hex compliance. Prefer fewer, high-confidence findings over many uncertain ones.\n\n`;
|
|
186
238
|
md += `---\n\n`;
|
|
187
239
|
}
|
|
240
|
+
else {
|
|
241
|
+
for (const judge of judges) {
|
|
242
|
+
md += `### ${judge.name} — ${judge.domain}\n\n`;
|
|
243
|
+
md += `${judge.description}\n\n`;
|
|
244
|
+
md += `**Rule prefix:** \`${judge.rulePrefix}-\` · **Precision Mandate:** Every finding MUST cite specific code evidence. Do NOT flag absent features speculatively. Do NOT validate Azure resource identifiers for strict UUID/GUID hex compliance — they are opaque platform constants. Prefer fewer, high-confidence findings over many uncertain ones.\n\n`;
|
|
245
|
+
md += `---\n\n`;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
188
248
|
md += `### False Positive Review\n\n`;
|
|
189
249
|
md += `Before adding new findings, **review each pattern-based finding above for false positives.** `;
|
|
190
250
|
md += `Static pattern matching can flag code that is actually correct — for example:\n`;
|
|
@@ -436,7 +436,12 @@ function registerReEvaluateWithContext(server) {
|
|
|
436
436
|
}))
|
|
437
437
|
.optional()
|
|
438
438
|
.describe("Cross-file context for more accurate evaluation"),
|
|
439
|
-
|
|
439
|
+
maxPromptChars: z
|
|
440
|
+
.number()
|
|
441
|
+
.min(0)
|
|
442
|
+
.optional()
|
|
443
|
+
.describe("Maximum character budget for LLM prompts. Controls truncation of source code, related files, and context strings in deep-review prompts. Set to 0 to disable all truncation. Default: 100000."),
|
|
444
|
+
}, async ({ code, language, disputedRuleIds, acceptedRuleIds, developerContext, focusAreas, confidenceFilter, filePath, deepReview, relatedFiles, maxPromptChars, }) => {
|
|
440
445
|
try {
|
|
441
446
|
// Build context string from developer inputs
|
|
442
447
|
const contextParts = [];
|
|
@@ -453,15 +458,24 @@ function registerReEvaluateWithContext(server) {
|
|
|
453
458
|
contextParts.push(`Focus areas: ${focusAreas.join(", ")}`);
|
|
454
459
|
}
|
|
455
460
|
const fullContext = contextParts.join("\n");
|
|
461
|
+
// Apply token budget caps to inputs
|
|
462
|
+
const budget = maxPromptChars ?? 100_000;
|
|
463
|
+
const unlimited = budget === 0;
|
|
464
|
+
const codeCap = unlimited ? Infinity : budget;
|
|
465
|
+
const contextCap = unlimited ? Infinity : Math.max(2000, Math.floor(budget * 0.1));
|
|
466
|
+
const cappedCode = code.length > codeCap ? code.slice(0, codeCap) : code;
|
|
467
|
+
const cappedContext = fullContext.length > contextCap ? fullContext.slice(0, contextCap) + "\n… (context truncated)" : fullContext;
|
|
468
|
+
const cappedRelatedFiles = !unlimited && relatedFiles && relatedFiles.length > 10 ? relatedFiles.slice(0, 10) : relatedFiles;
|
|
456
469
|
const evalOptions = {
|
|
457
470
|
autoTune: true,
|
|
458
471
|
deepReview: deepReview ?? false,
|
|
459
472
|
confidenceFilter: confidenceFilter ?? 0.5,
|
|
460
473
|
filePath,
|
|
461
|
-
relatedFiles,
|
|
474
|
+
relatedFiles: cappedRelatedFiles,
|
|
462
475
|
calibrate: true,
|
|
476
|
+
maxPromptChars: maxPromptChars,
|
|
463
477
|
};
|
|
464
|
-
const verdict = evaluateWithTribunal(
|
|
478
|
+
const verdict = evaluateWithTribunal(cappedCode, language, cappedContext || undefined, evalOptions);
|
|
465
479
|
// Post-process: mark disputed findings
|
|
466
480
|
let findings = verdict.findings;
|
|
467
481
|
if (disputedRuleIds && disputedRuleIds.length > 0) {
|
|
@@ -946,12 +946,18 @@ function registerEvaluateGitDiff(server) {
|
|
|
946
946
|
.boolean()
|
|
947
947
|
.optional()
|
|
948
948
|
.describe("Apply feedback-driven auto-tuning to reduce false positives (default: false)"),
|
|
949
|
+
maxPromptChars: z
|
|
950
|
+
.number()
|
|
951
|
+
.min(0)
|
|
952
|
+
.optional()
|
|
953
|
+
.describe("Maximum character budget for LLM prompts. Controls truncation of deep-review prompts. Set to 0 to disable all truncation. Default: 100000."),
|
|
949
954
|
config: configSchema,
|
|
950
|
-
}, async ({ repoPath, base, diffText, confidenceFilter, autoTune, config }) => {
|
|
955
|
+
}, async ({ repoPath, base, diffText, confidenceFilter, autoTune, maxPromptChars, config }) => {
|
|
951
956
|
try {
|
|
952
957
|
const evalOptions = {
|
|
953
958
|
confidenceFilter,
|
|
954
959
|
autoTune,
|
|
960
|
+
maxPromptChars,
|
|
955
961
|
config: toJudgesConfig(config),
|
|
956
962
|
};
|
|
957
963
|
let result;
|
package/package.json
CHANGED
package/server.json
CHANGED
|
@@ -7,12 +7,12 @@
|
|
|
7
7
|
"url": "https://github.com/kevinrabun/judges",
|
|
8
8
|
"source": "github"
|
|
9
9
|
},
|
|
10
|
-
"version": "3.
|
|
10
|
+
"version": "3.119.0",
|
|
11
11
|
"packages": [
|
|
12
12
|
{
|
|
13
13
|
"registryType": "npm",
|
|
14
14
|
"identifier": "@kevinrabun/judges",
|
|
15
|
-
"version": "3.
|
|
15
|
+
"version": "3.119.0",
|
|
16
16
|
"transport": {
|
|
17
17
|
"type": "stdio"
|
|
18
18
|
}
|