npm - @kevinrabun/judges - Versions diffs - 3.118.0 → 3.119.0 - Mend

@kevinrabun/judges 3.118.0 → 3.119.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md +30 -0
package/dist/api.d.ts +1 -1
package/dist/api.js +1 -1
package/dist/evaluators/index.d.ts +12 -0
package/dist/evaluators/index.js +1 -1
package/dist/tools/deep-review.d.ts +13 -2
package/dist/tools/deep-review.js +76 -16
package/dist/tools/register-review.js +17 -3
package/dist/tools/register-workflow.js +7 -1
package/package.json +1 -1
package/server.json +2 -2

package/README.md CHANGED Viewed

@@ -1098,6 +1098,36 @@ Analyze a dependency manifest file for supply-chain risks, version pinning issue
 | `manifestType` | string | yes | File type: `package.json`, `requirements.txt`, etc. |
 | `context` | string | no | Optional context |
+### `evaluate_git_diff`
+Evaluate only **changed lines** from a git diff. Provide either `repoPath` for a live git diff or `diffText` for a pre-computed unified diff.
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `repoPath` | string | conditional | Absolute path to the git repository |
+| `base` | string | no | Git ref to diff against (default: `HEAD~1`) |
+| `diffText` | string | conditional | Pre-computed unified diff text |
+| `confidenceFilter` | number | no | Minimum confidence threshold for findings (0–1) |
+| `autoTune` | boolean | no | Apply feedback-driven auto-tuning (default: false) |
+| `maxPromptChars` | number | no | Max character budget for LLM prompts (default: 100000, 0 = unlimited) |
+| `config` | object | no | Inline configuration |
+### `re_evaluate_with_context`
+Re-run the tribunal with **prior findings as context** for iterative refinement. Supports dispute resolution, developer context injection, and focus-area filtering.
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `code` | string | yes | Source code to re-evaluate |
+| `language` | string | yes | Programming language |
+| `disputedRuleIds` | string[] | no | Rule IDs the developer disputes as false positives |
+| `acceptedRuleIds` | string[] | no | Rule IDs the developer accepts |
+| `developerContext` | string | no | Free-form explanation of developer intent |
+| `focusAreas` | string[] | no | Specific areas to focus on (e.g., `["security"]`) |
+| `confidenceFilter` | number | no | Minimum confidence threshold (default: 0.5) |
+| `filePath` | string | no | File path for context-aware evaluation |
+| `deepReview` | boolean | no | Include LLM deep-review prompt section |
+| `relatedFiles` | array | no | Cross-file context `{ path, snippet, relationship? }[]` |
+| `maxPromptChars` | number | no | Max character budget for LLM prompts (default: 100000, 0 = unlimited) |
 #### Judge IDs
 `data-security` · `cybersecurity` · `cost-effectiveness` · `scalability` · `cloud-readiness` · `software-practices` · `accessibility` · `api-design` · `reliability` · `observability` · `performance` · `compliance` · `data-sovereignty` · `testing` · `documentation` · `internationalization` · `dependency-health` · `concurrency` · `ethics-bias` · `maintainability` · `error-handling` · `authentication` · `database` · `caching` · `configuration-management` · `backwards-compatibility` · `portability` · `ux` · `logging-privacy` · `rate-limiting` · `ci-cd` · `code-structure` · `agent-instructions` · `ai-code-safety` · `framework-safety` · `iac-security` · `false-positive-review`

package/dist/api.d.ts CHANGED Viewed

@@ -20,7 +20,7 @@ export { getPreset, composePresets, listPresets, PRESETS } from "./presets.js";
 export type { Preset } from "./presets.js";
 export { evaluateCodeV2, evaluateProjectV2, getSupportedPolicyProfiles } from "./evaluators/v2.js";
 export { analyzeCrossFileTaint } from "./ast/cross-file-taint.js";
-export { buildSingleJudgeDeepReviewSection, buildTribunalDeepReviewSection, buildSimplifiedDeepReviewSection, isContentPolicyRefusal, DEEP_REVIEW_PROMPT_INTRO, DEEP_REVIEW_IDENTITY, } from "./tools/deep-review.js";
+export { buildSingleJudgeDeepReviewSection, buildTribunalDeepReviewSection, buildSimplifiedDeepReviewSection, formatRelatedFilesSection, isContentPolicyRefusal, DEEP_REVIEW_PROMPT_INTRO, DEEP_REVIEW_IDENTITY, DEFAULT_MAX_PROMPT_CHARS, } from "./tools/deep-review.js";
 export type { RelatedFileSnippet } from "./tools/deep-review.js";
 export { getCondensedCriteria } from "./tools/prompts.js";
 export { parseDismissedFindings, recordL2Feedback, loadFeedbackStore, saveFeedbackStore, addFeedback, computeFeedbackStats, getFpRateByRule, mergeFeedbackStores, computeTeamFeedbackStats, formatTeamStatsOutput, } from "./commands/feedback.js";

package/dist/api.js CHANGED Viewed

@@ -27,7 +27,7 @@ export { evaluateCodeV2, evaluateProjectV2, getSupportedPolicyProfiles } from ".
 // ─── Cross-File Taint Analysis ───────────────────────────────────────────────
 export { analyzeCrossFileTaint } from "./ast/cross-file-taint.js";
 // ─── Deep Review Prompts ─────────────────────────────────────────────────────
-export { buildSingleJudgeDeepReviewSection, buildTribunalDeepReviewSection, buildSimplifiedDeepReviewSection, isContentPolicyRefusal, DEEP_REVIEW_PROMPT_INTRO, DEEP_REVIEW_IDENTITY, } from "./tools/deep-review.js";
+export { buildSingleJudgeDeepReviewSection, buildTribunalDeepReviewSection, buildSimplifiedDeepReviewSection, formatRelatedFilesSection, isContentPolicyRefusal, DEEP_REVIEW_PROMPT_INTRO, DEEP_REVIEW_IDENTITY, DEFAULT_MAX_PROMPT_CHARS, } from "./tools/deep-review.js";
 // ─── Prompt Utilities ────────────────────────────────────────────────────────
 export { getCondensedCriteria } from "./tools/prompts.js";
 // ─── Feedback & Calibration ─────────────────────────────────────────────────

package/dist/evaluators/index.d.ts CHANGED Viewed

@@ -93,6 +93,18 @@ export interface EvaluationOptions {
      * Value range: 0-1 (e.g., 0.6 means only findings with >= 60% confidence appear).
      */
     confidenceFilter?: number;
+    /**
+     * Maximum character budget for LLM-facing prompt content.
+     * Controls truncation of:
+     * - Source code in deep-review prompts (truncated with summary when exceeded)
+     * - Related file snippets (array trimmed to fit budget)
+     * - Developer context strings (truncated)
+     *
+     * Defaults to 100_000 (~25K tokens). Set to 0 to disable all truncation
+     * (use with caution — large files can produce prompts that exceed model
+     * context windows and waste tokens).
+     */
+    maxPromptChars?: number;
     /** @internal — pre-computed AST structure for the file (set by evaluateWithTribunal) */
     _astCache?: CodeStructure;
     /** @internal — pre-computed taint flows for the file (set by evaluateWithTribunal) */

package/dist/evaluators/index.js CHANGED Viewed

@@ -927,7 +927,7 @@ export function evaluateWithTribunal(code, language, context, options) {
         try {
             const projectCtx = detectProjectContext(code, language, enrichedOptions.filePath);
             const relatedSnippets = enrichedOptions.relatedFiles ?? [];
-            result.deepReviewPrompt = buildTribunalDeepReviewSection(judges, language, context, relatedSnippets.map((r) => ({ path: r.path, snippet: r.snippet, relationship: r.relationship })), projectCtx);
+            result.deepReviewPrompt = buildTribunalDeepReviewSection(judges, language, context, relatedSnippets.map((r) => ({ path: r.path, snippet: r.snippet, relationship: r.relationship })), projectCtx, enrichedOptions.maxPromptChars);
         }
         catch {
             // Deep review prompt generation failure is non-fatal

package/dist/tools/deep-review.d.ts CHANGED Viewed

@@ -9,6 +9,8 @@ export declare function isContentPolicyRefusal(responseText: string): boolean;
 export declare const DEEP_REVIEW_PROMPT_INTRO: string;
 /** Content-policy-safe Assistant identity message. */
 export declare const DEEP_REVIEW_IDENTITY: string;
+/** Default max chars for LLM-facing prompt content (~25K tokens). */
+export declare const DEFAULT_MAX_PROMPT_CHARS = 100000;
 export interface RelatedFileSnippet {
     /** Relative file path */
     path: string;
@@ -17,11 +19,20 @@ export interface RelatedFileSnippet {
     /** Why this file is relevant (e.g. "imported by target", "shared type") */
     relationship?: string;
 }
+/**
+ * Format related files into a prompt section that gives the LLM cross-file
+ * visibility for deeper analysis.
+ *
+ * @param relatedFiles — array of related file snippets
+ * @param maxFiles — max files to include (default: 10). Set to 0 for unlimited.
+ * @param snippetBudget — per-snippet char cap (default: 3000). Set to 0 for unlimited.
+ */
+export declare function formatRelatedFilesSection(relatedFiles: RelatedFileSnippet[], maxFiles?: number, snippetBudget?: number): string;
 /**
  * Format detected project context into a prompt section so the LLM
  * understands the runtime environment, framework, and architectural role.
  */
 export declare function formatProjectContextSection(projectContext: ProjectContext): string;
-export declare function buildSingleJudgeDeepReviewSection(judge: JudgeDefinition, language: string, context?: string, relatedFiles?: RelatedFileSnippet[], projectContext?: ProjectContext): string;
-export declare function buildTribunalDeepReviewSection(judges: JudgeDefinition[], language: string, context?: string, relatedFiles?: RelatedFileSnippet[], projectContext?: ProjectContext): string;
+export declare function buildSingleJudgeDeepReviewSection(judge: JudgeDefinition, language: string, context?: string, relatedFiles?: RelatedFileSnippet[], projectContext?: ProjectContext, maxPromptChars?: number): string;
+export declare function buildTribunalDeepReviewSection(judges: JudgeDefinition[], language: string, context?: string, relatedFiles?: RelatedFileSnippet[], projectContext?: ProjectContext, maxPromptChars?: number): string;
 export declare function buildSimplifiedDeepReviewSection(language: string, context?: string): string;

package/dist/tools/deep-review.js CHANGED Viewed

@@ -54,27 +54,45 @@ export const DEEP_REVIEW_IDENTITY = `I am the Judges Panel — a professional, c
     `My role is to provide evidence-based, constructive recommendations that ` +
     `strengthen code quality, security defenses, and engineering standards. ` +
     `This is NOT a penetration test or adversarial security assessment.`;
+// ─── Token Budget Constants ──────────────────────────────────────────────────
+/** Default max chars for LLM-facing prompt content (~25K tokens). */
+export const DEFAULT_MAX_PROMPT_CHARS = 100_000;
+/** Per-snippet char cap for related files. */
+const MAX_SNIPPET_CHARS = 3_000;
+/** Max related files to include by default. */
+const MAX_RELATED_FILES = 10;
 /**
  * Format related files into a prompt section that gives the LLM cross-file
  * visibility for deeper analysis.
+ *
+ * @param relatedFiles — array of related file snippets
+ * @param maxFiles — max files to include (default: 10). Set to 0 for unlimited.
+ * @param snippetBudget — per-snippet char cap (default: 3000). Set to 0 for unlimited.
  */
-function formatRelatedFilesSection(relatedFiles) {
+export function formatRelatedFilesSection(relatedFiles, maxFiles = MAX_RELATED_FILES, snippetBudget = MAX_SNIPPET_CHARS) {
     if (relatedFiles.length === 0)
         return "";
+    // Apply file count cap (0 = unlimited)
+    const files = maxFiles > 0 ? relatedFiles.slice(0, maxFiles) : relatedFiles;
+    const skipped = relatedFiles.length - files.length;
     let md = `### Related Files\n\n`;
     md += `> The following files are related to the code under review. Use them to `;
     md += `understand cross-file data flow, shared types, imports, and call sites. `;
     md += `These provide context only — focus your findings on the primary code above.\n\n`;
-    for (const f of relatedFiles) {
+    for (const f of files) {
         md += `<details>\n<summary><code>${f.path}</code>`;
         if (f.relationship)
             md += ` — ${f.relationship}`;
         md += `</summary>\n\n`;
-        // Limit snippet size to prevent prompt explosion
-        const truncated = f.snippet.length > 3000 ? f.snippet.slice(0, 3000) + "\n// ... truncated" : f.snippet;
+        // Limit snippet size to prevent prompt explosion (0 = unlimited)
+        const cap = snippetBudget > 0 ? snippetBudget : Infinity;
+        const truncated = f.snippet.length > cap ? f.snippet.slice(0, cap) + "\n// ... truncated" : f.snippet;
         md += `\`\`\`\n${truncated}\n\`\`\`\n`;
         md += `</details>\n\n`;
     }
+    if (skipped > 0) {
+        md += `> *${skipped} additional related file(s) omitted to stay within token budget.*\n\n`;
+    }
     return md;
 }
 // ─── Project Context Section ─────────────────────────────────────────────────
@@ -111,7 +129,13 @@ export function formatProjectContextSection(projectContext) {
     return md;
 }
 // ─── Single-Judge Deep Review ────────────────────────────────────────────────
-export function buildSingleJudgeDeepReviewSection(judge, language, context, relatedFiles, projectContext) {
+export function buildSingleJudgeDeepReviewSection(judge, language, context, relatedFiles, projectContext, maxPromptChars) {
+    const budget = maxPromptChars ?? DEFAULT_MAX_PROMPT_CHARS;
+    const unlimited = budget === 0;
+    const maxRelatedFiles = unlimited ? 0 : MAX_RELATED_FILES;
+    const snippetCap = unlimited ? 0 : MAX_SNIPPET_CHARS;
+    const contextCap = unlimited ? Infinity : Math.max(2000, Math.floor(budget * 0.1));
+    const truncatedContext = context && context.length > contextCap ? context.slice(0, contextCap) + "\n… (context truncated)" : context;
     let md = `\n\n---\n\n`;
     md += `## 🔍 Deep Contextual Review Required\n\n`;
     md += DEFENSIVE_PREAMBLE;
@@ -120,14 +144,14 @@ export function buildSingleJudgeDeepReviewSection(judge, language, context, rela
     md += `business logic validity, cross-module data flow, or context-dependent implications.\n>\n`;
     md += `> **You MUST now perform a thorough contextual review** of the ${language} code using the expert criteria below. `;
     md += `Identify issues that pattern matching cannot catch. Incorporate both the pattern findings above AND your own deep analysis into a final, unified evaluation.\n\n`;
-    if (context) {
-        md += `**Context provided:** ${context}\n\n`;
+    if (truncatedContext) {
+        md += `**Context provided:** ${truncatedContext}\n\n`;
     }
     if (projectContext) {
         md += formatProjectContextSection(projectContext);
     }
     if (relatedFiles && relatedFiles.length > 0) {
-        md += formatRelatedFilesSection(relatedFiles);
+        md += formatRelatedFilesSection(relatedFiles, maxRelatedFiles, snippetCap);
     }
     md += `### ${judge.name} — ${judge.domain}\n\n`;
     md += `${judge.description}\n\n`;
@@ -160,7 +184,23 @@ export function buildSingleJudgeDeepReviewSection(judge, language, context, rela
     return md;
 }
 // ─── Tribunal Deep Review (full) ─────────────────────────────────────────────
-export function buildTribunalDeepReviewSection(judges, language, context, relatedFiles, projectContext) {
+export function buildTribunalDeepReviewSection(judges, language, context, relatedFiles, projectContext, maxPromptChars) {
+    const budget = maxPromptChars ?? DEFAULT_MAX_PROMPT_CHARS;
+    const unlimited = budget === 0;
+    // When budget is tight, use simplified mode (category-based instead of per-judge)
+    // The full per-judge section is ~15-18K chars for 45 judges. Simplified is ~2K.
+    // Use simplified when remaining budget for judge criteria would be < 5K.
+    const estimatedJudgeCriteriaChars = judges.length * 350;
+    const estimatedOverhead = 4000; // preamble, instructions, format section
+    const estimatedRelatedChars = (relatedFiles?.length ?? 0) * MAX_SNIPPET_CHARS;
+    const budgetForJudges = unlimited ? Infinity : budget - estimatedOverhead - estimatedRelatedChars;
+    const useSimplified = !unlimited && budgetForJudges < estimatedJudgeCriteriaChars;
+    // Determine related files caps
+    const maxRelatedFiles = unlimited ? 0 : MAX_RELATED_FILES;
+    const snippetCap = unlimited ? 0 : MAX_SNIPPET_CHARS;
+    // Truncate context string if it would blow the budget
+    const contextCap = unlimited ? Infinity : Math.max(2000, Math.floor(budget * 0.1));
+    const truncatedContext = context && context.length > contextCap ? context.slice(0, contextCap) + "\n… (context truncated)" : context;
     let md = `\n\n---\n\n`;
     md += `## 🔍 Deep Contextual Review Required\n\n`;
     md += DEFENSIVE_PREAMBLE;
@@ -170,21 +210,41 @@ export function buildTribunalDeepReviewSection(judges, language, context, relate
     md += `> **You MUST now perform a thorough contextual review** of the ${language} code from the perspective of ALL ${judges.length} judges below. `;
     md += `Identify issues that pattern matching cannot catch. Incorporate both the pattern findings above AND your own deep analysis into a final, unified tribunal verdict.\n\n`;
     md += `> **This is a professional code quality tool.** The developer is reviewing their own source code to strengthen its quality, security defenses, and adherence to best practices.\n\n`;
-    if (context) {
-        md += `**Context provided:** ${context}\n\n`;
+    if (truncatedContext) {
+        md += `**Context provided:** ${truncatedContext}\n\n`;
     }
     if (projectContext) {
         md += formatProjectContextSection(projectContext);
     }
     if (relatedFiles && relatedFiles.length > 0) {
-        md += formatRelatedFilesSection(relatedFiles);
+        md += formatRelatedFilesSection(relatedFiles, maxRelatedFiles, snippetCap);
     }
-    for (const judge of judges) {
-        md += `### ${judge.name} — ${judge.domain}\n\n`;
-        md += `${judge.description}\n\n`;
-        md += `**Rule prefix:** \`${judge.rulePrefix}-\` · **Precision Mandate:** Every finding MUST cite specific code evidence. Do NOT flag absent features speculatively. Do NOT validate Azure resource identifiers for strict UUID/GUID hex compliance — they are opaque platform constants. Prefer fewer, high-confidence findings over many uncertain ones.\n\n`;
+    if (useSimplified) {
+        // Compact category-based criteria instead of per-judge listing
+        md += `### Quality Dimensions (${judges.length} judges)\n\n`;
+        md += `> Using compact criteria mode to stay within token budget.\n\n`;
+        // Group judges by domain
+        const domainGroups = new Map();
+        for (const judge of judges) {
+            const domain = judge.domain ?? "general";
+            if (!domainGroups.has(domain))
+                domainGroups.set(domain, []);
+            domainGroups.get(domain).push(`\`${judge.rulePrefix}\` ${judge.name}`);
+        }
+        for (const [domain, names] of domainGroups) {
+            md += `**${domain}:** ${names.join(", ")}\n\n`;
+        }
+        md += `**Precision Mandate:** Every finding MUST cite specific code evidence. Do NOT flag absent features speculatively. Do NOT validate Azure resource identifiers for strict UUID/GUID hex compliance. Prefer fewer, high-confidence findings over many uncertain ones.\n\n`;
         md += `---\n\n`;
     }
+    else {
+        for (const judge of judges) {
+            md += `### ${judge.name} — ${judge.domain}\n\n`;
+            md += `${judge.description}\n\n`;
+            md += `**Rule prefix:** \`${judge.rulePrefix}-\` · **Precision Mandate:** Every finding MUST cite specific code evidence. Do NOT flag absent features speculatively. Do NOT validate Azure resource identifiers for strict UUID/GUID hex compliance — they are opaque platform constants. Prefer fewer, high-confidence findings over many uncertain ones.\n\n`;
+            md += `---\n\n`;
+        }
+    }
     md += `### False Positive Review\n\n`;
     md += `Before adding new findings, **review each pattern-based finding above for false positives.** `;
     md += `Static pattern matching can flag code that is actually correct — for example:\n`;

package/dist/tools/register-review.js CHANGED Viewed

@@ -436,7 +436,12 @@ function registerReEvaluateWithContext(server) {
         }))
             .optional()
             .describe("Cross-file context for more accurate evaluation"),
-    }, async ({ code, language, disputedRuleIds, acceptedRuleIds, developerContext, focusAreas, confidenceFilter, filePath, deepReview, relatedFiles, }) => {
+        maxPromptChars: z
+            .number()
+            .min(0)
+            .optional()
+            .describe("Maximum character budget for LLM prompts. Controls truncation of source code, related files, and context strings in deep-review prompts. Set to 0 to disable all truncation. Default: 100000."),
+    }, async ({ code, language, disputedRuleIds, acceptedRuleIds, developerContext, focusAreas, confidenceFilter, filePath, deepReview, relatedFiles, maxPromptChars, }) => {
         try {
             // Build context string from developer inputs
             const contextParts = [];
@@ -453,15 +458,24 @@ function registerReEvaluateWithContext(server) {
                 contextParts.push(`Focus areas: ${focusAreas.join(", ")}`);
             }
             const fullContext = contextParts.join("\n");
+            // Apply token budget caps to inputs
+            const budget = maxPromptChars ?? 100_000;
+            const unlimited = budget === 0;
+            const codeCap = unlimited ? Infinity : budget;
+            const contextCap = unlimited ? Infinity : Math.max(2000, Math.floor(budget * 0.1));
+            const cappedCode = code.length > codeCap ? code.slice(0, codeCap) : code;
+            const cappedContext = fullContext.length > contextCap ? fullContext.slice(0, contextCap) + "\n… (context truncated)" : fullContext;
+            const cappedRelatedFiles = !unlimited && relatedFiles && relatedFiles.length > 10 ? relatedFiles.slice(0, 10) : relatedFiles;
             const evalOptions = {
                 autoTune: true,
                 deepReview: deepReview ?? false,
                 confidenceFilter: confidenceFilter ?? 0.5,
                 filePath,
-                relatedFiles,
+                relatedFiles: cappedRelatedFiles,
                 calibrate: true,
+                maxPromptChars: maxPromptChars,
             };
-            const verdict = evaluateWithTribunal(code, language, fullContext || undefined, evalOptions);
+            const verdict = evaluateWithTribunal(cappedCode, language, cappedContext || undefined, evalOptions);
             // Post-process: mark disputed findings
             let findings = verdict.findings;
             if (disputedRuleIds && disputedRuleIds.length > 0) {

package/dist/tools/register-workflow.js CHANGED Viewed

@@ -946,12 +946,18 @@ function registerEvaluateGitDiff(server) {
             .boolean()
             .optional()
             .describe("Apply feedback-driven auto-tuning to reduce false positives (default: false)"),
+        maxPromptChars: z
+            .number()
+            .min(0)
+            .optional()
+            .describe("Maximum character budget for LLM prompts. Controls truncation of deep-review prompts. Set to 0 to disable all truncation. Default: 100000."),
         config: configSchema,
-    }, async ({ repoPath, base, diffText, confidenceFilter, autoTune, config }) => {
+    }, async ({ repoPath, base, diffText, confidenceFilter, autoTune, maxPromptChars, config }) => {
         try {
             const evalOptions = {
                 confidenceFilter,
                 autoTune,
+                maxPromptChars,
                 config: toJudgesConfig(config),
             };
             let result;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@kevinrabun/judges",
-  "version": "3.118.0",
+  "version": "3.119.0",
   "description": "45 specialized judges that evaluate AI-generated code for security, cost, and quality.",
   "mcpName": "io.github.KevinRabun/judges",
   "type": "module",

package/server.json CHANGED Viewed

@@ -7,12 +7,12 @@
     "url": "https://github.com/kevinrabun/judges",
     "source": "github"
   },
-  "version": "3.118.0",
+  "version": "3.119.0",
   "packages": [
     {
       "registryType": "npm",
       "identifier": "@kevinrabun/judges",
-      "version": "3.118.0",
+      "version": "3.119.0",
       "transport": {
         "type": "stdio"
       }