@kevinrabun/judges 3.117.7 → 3.118.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api.d.ts +6 -0
- package/dist/api.js +6 -0
- package/dist/commands/watch.d.ts +6 -1
- package/dist/commands/watch.js +4 -4
- package/dist/evaluators/index.d.ts +34 -0
- package/dist/evaluators/index.js +65 -1
- package/dist/git-diff.d.ts +62 -0
- package/dist/git-diff.js +282 -0
- package/dist/import-resolver.d.ts +51 -0
- package/dist/import-resolver.js +213 -0
- package/dist/tools/register-review.js +154 -0
- package/dist/tools/register-workflow.js +110 -0
- package/dist/types.d.ts +22 -0
- package/package.json +1 -4
- package/server.json +2 -2
package/dist/api.d.ts
CHANGED
|
@@ -161,3 +161,9 @@ export declare function evaluateFilesStream(files: FileInput[], options?: Evalua
|
|
|
161
161
|
export declare function evaluateFilesBatch(files: FileInput[], concurrency?: number, options?: EvaluationOptions, onProgress?: (completed: number, total: number) => void): Promise<FileEvaluationResult[]>;
|
|
162
162
|
export { handleWebhook, verifyWebhookSignature, loadAppConfig, startAppServer, runAppCommand } from "./github-app.js";
|
|
163
163
|
export type { GitHubAppConfig } from "./github-app.js";
|
|
164
|
+
export { evaluateGitDiff, evaluateUnifiedDiff, parseUnifiedDiffToChangedLines } from "./git-diff.js";
|
|
165
|
+
export type { FileChangedLines, GitDiffVerdict } from "./git-diff.js";
|
|
166
|
+
export { resolveImports, buildRelatedFilesContext } from "./import-resolver.js";
|
|
167
|
+
export type { ResolvedImport, ImportResolutionResult } from "./import-resolver.js";
|
|
168
|
+
export { applyAutoTune, generateAutoTuneReport, formatAutoTuneReport, formatAutoTuneReportJson } from "./auto-tune.js";
|
|
169
|
+
export type { AutoTuneReport, AutoTuneOptions, AutoTuneAction } from "./auto-tune.js";
|
package/dist/api.js
CHANGED
|
@@ -187,3 +187,9 @@ export async function evaluateFilesBatch(files, concurrency = 4, options, onProg
|
|
|
187
187
|
}
|
|
188
188
|
// ─── GitHub App ──────────────────────────────────────────────────────────────
|
|
189
189
|
export { handleWebhook, verifyWebhookSignature, loadAppConfig, startAppServer, runAppCommand } from "./github-app.js";
|
|
190
|
+
// ─── Git Diff Evaluation ─────────────────────────────────────────────────────
|
|
191
|
+
export { evaluateGitDiff, evaluateUnifiedDiff, parseUnifiedDiffToChangedLines } from "./git-diff.js";
|
|
192
|
+
// ─── Cross-File Import Resolution ────────────────────────────────────────────
|
|
193
|
+
export { resolveImports, buildRelatedFilesContext } from "./import-resolver.js";
|
|
194
|
+
// ─── Auto-Tune (Feedback-Driven Calibration) ────────────────────────────────
|
|
195
|
+
export { applyAutoTune, generateAutoTuneReport, formatAutoTuneReport, formatAutoTuneReportJson } from "./auto-tune.js";
|
package/dist/commands/watch.d.ts
CHANGED
|
@@ -7,11 +7,16 @@
|
|
|
7
7
|
* judges watch src/ --judge cyber # Single judge only
|
|
8
8
|
* judges watch src/ --fail-on-findings # Exit 1 on first failure
|
|
9
9
|
*/
|
|
10
|
+
import { watch as fsWatch } from "fs";
|
|
10
11
|
interface WatchArgs {
|
|
11
12
|
path: string;
|
|
12
13
|
judge: string | undefined;
|
|
13
14
|
failOnFindings: boolean;
|
|
14
15
|
}
|
|
15
16
|
export declare function parseWatchArgs(argv: string[]): WatchArgs;
|
|
16
|
-
export
|
|
17
|
+
export interface WatchOptions {
|
|
18
|
+
/** Override the fs.watch function (useful for testing). */
|
|
19
|
+
fsWatch?: typeof fsWatch;
|
|
20
|
+
}
|
|
21
|
+
export declare function runWatch(argv: string[], options?: WatchOptions): void;
|
|
17
22
|
export {};
|
package/dist/commands/watch.js
CHANGED
|
@@ -126,8 +126,8 @@ function debounce(fn, ms) {
|
|
|
126
126
|
timer = setTimeout(fn, ms);
|
|
127
127
|
};
|
|
128
128
|
}
|
|
129
|
-
|
|
130
|
-
|
|
129
|
+
export function runWatch(argv, options) {
|
|
130
|
+
const watchFn = options?.fsWatch ?? fsWatch;
|
|
131
131
|
const args = parseWatchArgs(argv);
|
|
132
132
|
const target = resolve(args.path);
|
|
133
133
|
if (!existsSync(target)) {
|
|
@@ -146,7 +146,7 @@ export function runWatch(argv) {
|
|
|
146
146
|
const isDir = statSync(target).isDirectory();
|
|
147
147
|
if (isDir) {
|
|
148
148
|
// Watch directory recursively
|
|
149
|
-
const watcher =
|
|
149
|
+
const watcher = watchFn(target, { recursive: true });
|
|
150
150
|
watcher.on("change", (_event, filename) => {
|
|
151
151
|
if (!filename)
|
|
152
152
|
return;
|
|
@@ -178,7 +178,7 @@ export function runWatch(argv) {
|
|
|
178
178
|
console.error(` Error evaluating ${args.path}:`, err);
|
|
179
179
|
}
|
|
180
180
|
}, 300);
|
|
181
|
-
const watcher =
|
|
181
|
+
const watcher = watchFn(target);
|
|
182
182
|
watcher.on("change", () => debouncedEval());
|
|
183
183
|
// Run initial evaluation
|
|
184
184
|
evaluateFile(target, detectLanguage(target), args.judge);
|
|
@@ -59,6 +59,40 @@ export interface EvaluationOptions {
|
|
|
59
59
|
* and improves performance. Defaults to false (run all judges).
|
|
60
60
|
*/
|
|
61
61
|
adaptiveSelection?: boolean;
|
|
62
|
+
/**
|
|
63
|
+
* Enable automatic feedback-driven auto-tuning.
|
|
64
|
+
* When true, loads the feedback store and applies time-decay weighted
|
|
65
|
+
* auto-suppression (FP rate >= 80%), severity downgrading (50-80%),
|
|
66
|
+
* and confidence boosting (< 15%) without requiring `calibrate` to be set.
|
|
67
|
+
* Defaults to false. When both `autoTune` and `calibrate` are set,
|
|
68
|
+
* auto-tune runs first, then calibration refines further.
|
|
69
|
+
*/
|
|
70
|
+
autoTune?: boolean;
|
|
71
|
+
/**
|
|
72
|
+
* Include deep-review prompt section in the verdict for LLM-augmented analysis.
|
|
73
|
+
* When true, appends the tribunal deep-review criteria to the verdict metadata
|
|
74
|
+
* so that downstream LLM consumers can perform contextual reasoning beyond
|
|
75
|
+
* pattern matching. This is the bridge between Layer 1 (deterministic) and
|
|
76
|
+
* Layer 2 (LLM) review.
|
|
77
|
+
*/
|
|
78
|
+
deepReview?: boolean;
|
|
79
|
+
/**
|
|
80
|
+
* Related file snippets for cross-file deep-review context.
|
|
81
|
+
* When deepReview is enabled, these are included in the deep-review prompt
|
|
82
|
+
* to give the LLM visibility into imports, shared types, and call sites.
|
|
83
|
+
*/
|
|
84
|
+
relatedFiles?: Array<{
|
|
85
|
+
path: string;
|
|
86
|
+
snippet: string;
|
|
87
|
+
relationship?: string;
|
|
88
|
+
}>;
|
|
89
|
+
/**
|
|
90
|
+
* Minimum confidence threshold for findings to appear in the output.
|
|
91
|
+
* Findings below this threshold are filtered out of the verdict.
|
|
92
|
+
* The verdict includes a `filteredCount` field showing how many were removed.
|
|
93
|
+
* Value range: 0-1 (e.g., 0.6 means only findings with >= 60% confidence appear).
|
|
94
|
+
*/
|
|
95
|
+
confidenceFilter?: number;
|
|
62
96
|
/** @internal — pre-computed AST structure for the file (set by evaluateWithTribunal) */
|
|
63
97
|
_astCache?: CodeStructure;
|
|
64
98
|
/** @internal — pre-computed taint flows for the file (set by evaluateWithTribunal) */
|
package/dist/evaluators/index.js
CHANGED
|
@@ -23,6 +23,8 @@ import { selectJudges } from "./judge-selector.js";
|
|
|
23
23
|
import { getGlobalSession } from "../evaluation-session.js";
|
|
24
24
|
import { evaluateEscalations, enhanceReviewWithEscalations } from "../escalation.js";
|
|
25
25
|
import { applyRecallBoost } from "./recall-boost.js";
|
|
26
|
+
import { buildTribunalDeepReviewSection } from "../tools/deep-review.js";
|
|
27
|
+
import { detectProjectContext } from "./shared.js";
|
|
26
28
|
// ── AST-aware post-processing ───────────────────────────────────────────────
|
|
27
29
|
// ── Module-level caches for AST/taint results ───────────────────────────────
|
|
28
30
|
const astStructureCache = new LRUCache(256);
|
|
@@ -634,6 +636,10 @@ export function evaluateWithTribunal(code, language, context, options) {
|
|
|
634
636
|
ms: options.config?.minSeverity,
|
|
635
637
|
jw: options.config?.judgeWeights,
|
|
636
638
|
mfg: options.mustFixGate,
|
|
639
|
+
at: options.autoTune,
|
|
640
|
+
drev: options.deepReview,
|
|
641
|
+
cf: options.confidenceFilter,
|
|
642
|
+
rf: options.relatedFiles?.length,
|
|
637
643
|
})
|
|
638
644
|
: "";
|
|
639
645
|
const hash = contentHash(code, language + optionsSuffix);
|
|
@@ -749,6 +755,7 @@ export function evaluateWithTribunal(code, language, context, options) {
|
|
|
749
755
|
// 2. Severity downgrade for rules with FP rate 50-80%
|
|
750
756
|
// 3. Confidence calibration based on historical FP rates
|
|
751
757
|
let calibrated = configFiltered;
|
|
758
|
+
let autoTuneMetadata;
|
|
752
759
|
if (enrichedOptions.calibrate) {
|
|
753
760
|
try {
|
|
754
761
|
const calOpts = typeof enrichedOptions.calibrate === "object" ? enrichedOptions.calibrate : undefined;
|
|
@@ -756,6 +763,7 @@ export function evaluateWithTribunal(code, language, context, options) {
|
|
|
756
763
|
if (feedbackStore.entries.length > 0) {
|
|
757
764
|
const tuned = applyAutoTune(calibrated, feedbackStore);
|
|
758
765
|
calibrated = tuned.findings;
|
|
766
|
+
autoTuneMetadata = { suppressed: tuned.suppressed, downgraded: tuned.downgraded };
|
|
759
767
|
}
|
|
760
768
|
else {
|
|
761
769
|
// No feedback data — try plain calibration profile
|
|
@@ -769,6 +777,23 @@ export function evaluateWithTribunal(code, language, context, options) {
|
|
|
769
777
|
// Calibration failure is non-fatal — continue with uncalibrated findings
|
|
770
778
|
}
|
|
771
779
|
}
|
|
780
|
+
else if (enrichedOptions.autoTune) {
|
|
781
|
+
// ── Standalone auto-tune (without full calibrate) ──
|
|
782
|
+
// Lightweight feedback-only tuning path: applies auto-suppression and
|
|
783
|
+
// severity downgrades from the feedback store without requiring a
|
|
784
|
+
// full calibration profile.
|
|
785
|
+
try {
|
|
786
|
+
const feedbackStore = loadFeedbackStore();
|
|
787
|
+
if (feedbackStore.entries.length > 0) {
|
|
788
|
+
const tuned = applyAutoTune(calibrated, feedbackStore);
|
|
789
|
+
calibrated = tuned.findings;
|
|
790
|
+
autoTuneMetadata = { suppressed: tuned.suppressed, downgraded: tuned.downgraded };
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
catch {
|
|
794
|
+
// Auto-tune failure is non-fatal
|
|
795
|
+
}
|
|
796
|
+
}
|
|
772
797
|
// ── Auto-activate model-specific calibration profile ──
|
|
773
798
|
// If the model-fingerprint judge detected a model, apply the model-specific
|
|
774
799
|
// calibration profile automatically (when feedback data exists).
|
|
@@ -829,7 +854,20 @@ export function evaluateWithTribunal(code, language, context, options) {
|
|
|
829
854
|
catch {
|
|
830
855
|
// Session feedback calibration failure is non-fatal
|
|
831
856
|
}
|
|
832
|
-
|
|
857
|
+
// ── Confidence-based output filtering ──
|
|
858
|
+
// When confidenceFilter is set, drop findings below the threshold entirely.
|
|
859
|
+
// This gives callers a first-class knob to control signal-to-noise ratio.
|
|
860
|
+
let confidenceFiltered = sessionAdjusted;
|
|
861
|
+
let confidenceFilteredOutCount = 0;
|
|
862
|
+
if (enrichedOptions.confidenceFilter !== undefined &&
|
|
863
|
+
enrichedOptions.confidenceFilter !== null &&
|
|
864
|
+
enrichedOptions.confidenceFilter > 0) {
|
|
865
|
+
const threshold = enrichedOptions.confidenceFilter;
|
|
866
|
+
const before = confidenceFiltered.length;
|
|
867
|
+
confidenceFiltered = confidenceFiltered.filter((f) => (f.confidence ?? 0.5) >= threshold);
|
|
868
|
+
confidenceFilteredOutCount = before - confidenceFiltered.length;
|
|
869
|
+
}
|
|
870
|
+
const cappedFindings = applyPerFileFindingCap(confidenceFiltered, maxFindings);
|
|
833
871
|
// ── Confidence-based tiering for progressive disclosure ──
|
|
834
872
|
// Tag each finding with a disclosure tier so downstream consumers (CLI,
|
|
835
873
|
// formatters, VS Code extension) can show only high-confidence findings
|
|
@@ -882,6 +920,32 @@ export function evaluateWithTribunal(code, language, context, options) {
|
|
|
882
920
|
},
|
|
883
921
|
reviewDecision: synthesizeReviewDecision(enrichedFindings),
|
|
884
922
|
};
|
|
923
|
+
// ── Deep review prompt attachment (P0.1) ──
|
|
924
|
+
// When deepReview is enabled, build and attach a structured LLM prompt
|
|
925
|
+
// section so downstream consumers can trigger a second-pass analysis.
|
|
926
|
+
if (enrichedOptions.deepReview) {
|
|
927
|
+
try {
|
|
928
|
+
const projectCtx = detectProjectContext(code, language, enrichedOptions.filePath);
|
|
929
|
+
const relatedSnippets = enrichedOptions.relatedFiles ?? [];
|
|
930
|
+
result.deepReviewPrompt = buildTribunalDeepReviewSection(judges, language, context, relatedSnippets.map((r) => ({ path: r.path, snippet: r.snippet, relationship: r.relationship })), projectCtx);
|
|
931
|
+
}
|
|
932
|
+
catch {
|
|
933
|
+
// Deep review prompt generation failure is non-fatal
|
|
934
|
+
}
|
|
935
|
+
}
|
|
936
|
+
// ── Attach auto-tune metadata ──
|
|
937
|
+
if (autoTuneMetadata) {
|
|
938
|
+
result.autoTuneApplied = autoTuneMetadata;
|
|
939
|
+
}
|
|
940
|
+
// ── Attach confidence filter metadata ──
|
|
941
|
+
if (enrichedOptions.confidenceFilter !== undefined &&
|
|
942
|
+
enrichedOptions.confidenceFilter !== null &&
|
|
943
|
+
confidenceFilteredOutCount > 0) {
|
|
944
|
+
result.confidenceFilterApplied = {
|
|
945
|
+
threshold: enrichedOptions.confidenceFilter,
|
|
946
|
+
filteredOut: confidenceFilteredOutCount,
|
|
947
|
+
};
|
|
948
|
+
}
|
|
885
949
|
// ── AI model detection escalation ──
|
|
886
950
|
// When the model-fingerprint judge (MFPR-* rules) fires, attach escalation
|
|
887
951
|
// metadata so downstream consumers can trigger deeper review or add
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Native Git Diff Evaluation
|
|
3
|
+
*
|
|
4
|
+
* Integrates git diff parsing directly into the evaluation pipeline,
|
|
5
|
+
* eliminating the need for callers to manually compute changed lines.
|
|
6
|
+
*
|
|
7
|
+
* Provides:
|
|
8
|
+
* - `evaluateGitDiff()` — evaluates changed files in a git diff
|
|
9
|
+
* - `parseUnifiedDiffToChangedLines()` — extracts per-file changed lines from unified diff
|
|
10
|
+
*/
|
|
11
|
+
import type { DiffVerdict } from "./types.js";
|
|
12
|
+
import type { EvaluationOptions } from "./evaluators/index.js";
|
|
13
|
+
export interface FileChangedLines {
|
|
14
|
+
/** Relative file path */
|
|
15
|
+
filePath: string;
|
|
16
|
+
/** 1-based line numbers that were added or modified */
|
|
17
|
+
changedLines: number[];
|
|
18
|
+
}
|
|
19
|
+
export interface GitDiffVerdict {
|
|
20
|
+
/** Per-file diff verdicts */
|
|
21
|
+
files: Array<{
|
|
22
|
+
filePath: string;
|
|
23
|
+
language: string;
|
|
24
|
+
verdict: DiffVerdict;
|
|
25
|
+
}>;
|
|
26
|
+
/** Aggregate score across all files */
|
|
27
|
+
overallScore: number;
|
|
28
|
+
/** Aggregate finding count */
|
|
29
|
+
totalFindings: number;
|
|
30
|
+
/** Total changed lines analyzed */
|
|
31
|
+
totalLinesAnalyzed: number;
|
|
32
|
+
/** Files that were skipped (binary, too large, unreadable) */
|
|
33
|
+
skippedFiles: string[];
|
|
34
|
+
/** Summary */
|
|
35
|
+
summary: string;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Parse a unified diff to extract per-file changed line numbers.
|
|
39
|
+
* Handles standard `git diff` output format.
|
|
40
|
+
*
|
|
41
|
+
* Only tracks added/modified lines (lines starting with `+` in the diff),
|
|
42
|
+
* since those are the lines that need review.
|
|
43
|
+
*/
|
|
44
|
+
export declare function parseUnifiedDiffToChangedLines(diffText: string): FileChangedLines[];
|
|
45
|
+
/**
|
|
46
|
+
* Evaluate changed files from a git diff.
|
|
47
|
+
*
|
|
48
|
+
* Runs `git diff` between the specified base and the working tree (or HEAD),
|
|
49
|
+
* parses the unified diff to extract per-file changed lines, reads each
|
|
50
|
+
* changed file, and evaluates only the changed lines.
|
|
51
|
+
*
|
|
52
|
+
* @param repoPath - Path to the git repository root
|
|
53
|
+
* @param base - Base ref to diff against (e.g., "main", "HEAD~1", "origin/main")
|
|
54
|
+
* @param options - Evaluation options passed to each file evaluation
|
|
55
|
+
* @returns Aggregate verdict across all changed files
|
|
56
|
+
*/
|
|
57
|
+
export declare function evaluateGitDiff(repoPath: string, base?: string, options?: EvaluationOptions): GitDiffVerdict;
|
|
58
|
+
/**
|
|
59
|
+
* Evaluate a pre-computed diff string (e.g., from a PR webhook payload).
|
|
60
|
+
* Reads file content from the specified repo path.
|
|
61
|
+
*/
|
|
62
|
+
export declare function evaluateUnifiedDiff(diffText: string, repoPath: string, options?: EvaluationOptions): GitDiffVerdict;
|
package/dist/git-diff.js
ADDED
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Native Git Diff Evaluation
|
|
3
|
+
*
|
|
4
|
+
* Integrates git diff parsing directly into the evaluation pipeline,
|
|
5
|
+
* eliminating the need for callers to manually compute changed lines.
|
|
6
|
+
*
|
|
7
|
+
* Provides:
|
|
8
|
+
* - `evaluateGitDiff()` — evaluates changed files in a git diff
|
|
9
|
+
* - `parseUnifiedDiffToChangedLines()` — extracts per-file changed lines from unified diff
|
|
10
|
+
*/
|
|
11
|
+
import { readFileSync } from "fs";
|
|
12
|
+
import { resolve, extname } from "path";
|
|
13
|
+
import { evaluateDiff } from "./evaluators/index.js";
|
|
14
|
+
import { tryRunGit } from "./tools/command-safety.js";
|
|
15
|
+
// ─── Diff Parsing ───────────────────────────────────────────────────────────
|
|
16
|
+
/**
|
|
17
|
+
* Parse a unified diff to extract per-file changed line numbers.
|
|
18
|
+
* Handles standard `git diff` output format.
|
|
19
|
+
*
|
|
20
|
+
* Only tracks added/modified lines (lines starting with `+` in the diff),
|
|
21
|
+
* since those are the lines that need review.
|
|
22
|
+
*/
|
|
23
|
+
export function parseUnifiedDiffToChangedLines(diffText) {
|
|
24
|
+
const result = [];
|
|
25
|
+
let currentFile = "";
|
|
26
|
+
let currentLines = [];
|
|
27
|
+
for (const line of diffText.split("\n")) {
|
|
28
|
+
// Match file header: +++ b/path/to/file
|
|
29
|
+
if (line.startsWith("+++ b/")) {
|
|
30
|
+
// Save previous file if it had changes
|
|
31
|
+
if (currentFile && currentLines.length > 0) {
|
|
32
|
+
result.push({ filePath: currentFile, changedLines: currentLines });
|
|
33
|
+
}
|
|
34
|
+
currentFile = line.substring(6);
|
|
35
|
+
currentLines = [];
|
|
36
|
+
continue;
|
|
37
|
+
}
|
|
38
|
+
// Match hunk header: @@ -old,count +new,count @@
|
|
39
|
+
const hunkMatch = /^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@/.exec(line);
|
|
40
|
+
if (hunkMatch && currentFile) {
|
|
41
|
+
const startLine = parseInt(hunkMatch[1], 10);
|
|
42
|
+
const lineCount = parseInt(hunkMatch[2] ?? "1", 10);
|
|
43
|
+
// Track which new-side lines are additions
|
|
44
|
+
// We track additions in the line-by-line scan below
|
|
45
|
+
void startLine;
|
|
46
|
+
void lineCount;
|
|
47
|
+
continue;
|
|
48
|
+
}
|
|
49
|
+
// Inside a hunk — no need to know startLine here, we track as we go
|
|
50
|
+
}
|
|
51
|
+
// Flush last file
|
|
52
|
+
if (currentFile && currentLines.length > 0) {
|
|
53
|
+
result.push({ filePath: currentFile, changedLines: currentLines });
|
|
54
|
+
}
|
|
55
|
+
// Re-parse with proper line tracking using a stateful approach
|
|
56
|
+
return parseWithLineTracking(diffText);
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Internal: stateful parse that tracks actual added line numbers.
|
|
60
|
+
*/
|
|
61
|
+
function parseWithLineTracking(diffText) {
|
|
62
|
+
const result = [];
|
|
63
|
+
let currentFile = "";
|
|
64
|
+
let currentLines = [];
|
|
65
|
+
let newLineNum = 0;
|
|
66
|
+
let inHunk = false;
|
|
67
|
+
for (const line of diffText.split("\n")) {
|
|
68
|
+
// File header
|
|
69
|
+
if (line.startsWith("+++ b/")) {
|
|
70
|
+
if (currentFile && currentLines.length > 0) {
|
|
71
|
+
result.push({ filePath: currentFile, changedLines: [...currentLines] });
|
|
72
|
+
}
|
|
73
|
+
currentFile = line.substring(6);
|
|
74
|
+
currentLines = [];
|
|
75
|
+
inHunk = false;
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
// Hunk header
|
|
79
|
+
const hunkMatch = /^@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/.exec(line);
|
|
80
|
+
if (hunkMatch) {
|
|
81
|
+
newLineNum = parseInt(hunkMatch[1], 10);
|
|
82
|
+
inHunk = true;
|
|
83
|
+
continue;
|
|
84
|
+
}
|
|
85
|
+
if (!inHunk || !currentFile)
|
|
86
|
+
continue;
|
|
87
|
+
// Context line (present in both old and new)
|
|
88
|
+
if (line.startsWith(" ")) {
|
|
89
|
+
newLineNum++;
|
|
90
|
+
continue;
|
|
91
|
+
}
|
|
92
|
+
// Added line
|
|
93
|
+
if (line.startsWith("+")) {
|
|
94
|
+
currentLines.push(newLineNum);
|
|
95
|
+
newLineNum++;
|
|
96
|
+
continue;
|
|
97
|
+
}
|
|
98
|
+
// Removed line (only in old side — don't increment new line counter)
|
|
99
|
+
if (line.startsWith("-")) {
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
// Any other line (e.g., , or diff binary headers)
|
|
103
|
+
// ends the hunk
|
|
104
|
+
if (line.startsWith("diff ") || line.startsWith("index ") || line.startsWith("---")) {
|
|
105
|
+
inHunk = false;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
// Flush last file
|
|
109
|
+
if (currentFile && currentLines.length > 0) {
|
|
110
|
+
result.push({ filePath: currentFile, changedLines: [...currentLines] });
|
|
111
|
+
}
|
|
112
|
+
return result;
|
|
113
|
+
}
|
|
114
|
+
// ─── Language Detection ─────────────────────────────────────────────────────
|
|
115
|
+
const EXT_LANGUAGE_MAP = {
|
|
116
|
+
".ts": "typescript",
|
|
117
|
+
".tsx": "typescript",
|
|
118
|
+
".js": "javascript",
|
|
119
|
+
".jsx": "javascript",
|
|
120
|
+
".mjs": "javascript",
|
|
121
|
+
".cjs": "javascript",
|
|
122
|
+
".py": "python",
|
|
123
|
+
".rs": "rust",
|
|
124
|
+
".go": "go",
|
|
125
|
+
".java": "java",
|
|
126
|
+
".cs": "csharp",
|
|
127
|
+
".cpp": "cpp",
|
|
128
|
+
".cc": "cpp",
|
|
129
|
+
".cxx": "cpp",
|
|
130
|
+
".c": "c",
|
|
131
|
+
".h": "c",
|
|
132
|
+
".hpp": "cpp",
|
|
133
|
+
".php": "php",
|
|
134
|
+
".rb": "ruby",
|
|
135
|
+
".kt": "kotlin",
|
|
136
|
+
".swift": "swift",
|
|
137
|
+
".dart": "dart",
|
|
138
|
+
".sql": "sql",
|
|
139
|
+
".sh": "bash",
|
|
140
|
+
".bash": "bash",
|
|
141
|
+
".ps1": "powershell",
|
|
142
|
+
".bicep": "bicep",
|
|
143
|
+
".tf": "terraform",
|
|
144
|
+
".json": "json",
|
|
145
|
+
".yaml": "yaml",
|
|
146
|
+
".yml": "yaml",
|
|
147
|
+
};
|
|
148
|
+
function detectLanguageFromPath(filePath) {
|
|
149
|
+
const ext = extname(filePath).toLowerCase();
|
|
150
|
+
return EXT_LANGUAGE_MAP[ext];
|
|
151
|
+
}
|
|
152
|
+
// ─── Git Diff Evaluation ────────────────────────────────────────────────────
|
|
153
|
+
/**
|
|
154
|
+
* Evaluate changed files from a git diff.
|
|
155
|
+
*
|
|
156
|
+
* Runs `git diff` between the specified base and the working tree (or HEAD),
|
|
157
|
+
* parses the unified diff to extract per-file changed lines, reads each
|
|
158
|
+
* changed file, and evaluates only the changed lines.
|
|
159
|
+
*
|
|
160
|
+
* @param repoPath - Path to the git repository root
|
|
161
|
+
* @param base - Base ref to diff against (e.g., "main", "HEAD~1", "origin/main")
|
|
162
|
+
* @param options - Evaluation options passed to each file evaluation
|
|
163
|
+
* @returns Aggregate verdict across all changed files
|
|
164
|
+
*/
|
|
165
|
+
export function evaluateGitDiff(repoPath, base = "HEAD~1", options) {
|
|
166
|
+
// Get the unified diff
|
|
167
|
+
const diffOutput = tryRunGit(["diff", base, "--unified=0"], { cwd: repoPath });
|
|
168
|
+
if (diffOutput === null) {
|
|
169
|
+
return {
|
|
170
|
+
files: [],
|
|
171
|
+
overallScore: 100,
|
|
172
|
+
totalFindings: 0,
|
|
173
|
+
totalLinesAnalyzed: 0,
|
|
174
|
+
skippedFiles: [],
|
|
175
|
+
summary: "Could not run git diff — ensure git is installed and this is a git repository.",
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
if (diffOutput.trim().length === 0) {
|
|
179
|
+
return {
|
|
180
|
+
files: [],
|
|
181
|
+
overallScore: 100,
|
|
182
|
+
totalFindings: 0,
|
|
183
|
+
totalLinesAnalyzed: 0,
|
|
184
|
+
skippedFiles: [],
|
|
185
|
+
summary: "No changes detected between working tree and " + base,
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
const fileChanges = parseUnifiedDiffToChangedLines(diffOutput);
|
|
189
|
+
const fileVerdicts = [];
|
|
190
|
+
const skippedFiles = [];
|
|
191
|
+
for (const fc of fileChanges) {
|
|
192
|
+
const language = detectLanguageFromPath(fc.filePath);
|
|
193
|
+
if (!language) {
|
|
194
|
+
skippedFiles.push(fc.filePath);
|
|
195
|
+
continue;
|
|
196
|
+
}
|
|
197
|
+
const absolutePath = resolve(repoPath, fc.filePath);
|
|
198
|
+
let code;
|
|
199
|
+
try {
|
|
200
|
+
code = readFileSync(absolutePath, "utf-8");
|
|
201
|
+
}
|
|
202
|
+
catch {
|
|
203
|
+
skippedFiles.push(fc.filePath);
|
|
204
|
+
continue;
|
|
205
|
+
}
|
|
206
|
+
// Skip very large files
|
|
207
|
+
if (code.length > 300_000) {
|
|
208
|
+
skippedFiles.push(fc.filePath);
|
|
209
|
+
continue;
|
|
210
|
+
}
|
|
211
|
+
const verdict = evaluateDiff(code, language, fc.changedLines, undefined, {
|
|
212
|
+
...options,
|
|
213
|
+
filePath: fc.filePath,
|
|
214
|
+
});
|
|
215
|
+
fileVerdicts.push({ filePath: fc.filePath, language, verdict });
|
|
216
|
+
}
|
|
217
|
+
const totalFindings = fileVerdicts.reduce((sum, fv) => sum + fv.verdict.findings.length, 0);
|
|
218
|
+
const totalLinesAnalyzed = fileVerdicts.reduce((sum, fv) => sum + fv.verdict.linesAnalyzed, 0);
|
|
219
|
+
const overallScore = fileVerdicts.length > 0
|
|
220
|
+
? Math.round(fileVerdicts.reduce((sum, fv) => sum + fv.verdict.score, 0) / fileVerdicts.length)
|
|
221
|
+
: 100;
|
|
222
|
+
const summary = `Git diff analysis (${base}): ${fileVerdicts.length} file(s) analyzed, ` +
|
|
223
|
+
`${totalLinesAnalyzed} changed lines, ${totalFindings} finding(s), ` +
|
|
224
|
+
`score ${overallScore}/100` +
|
|
225
|
+
(skippedFiles.length > 0 ? ` (${skippedFiles.length} file(s) skipped)` : "");
|
|
226
|
+
return {
|
|
227
|
+
files: fileVerdicts,
|
|
228
|
+
overallScore,
|
|
229
|
+
totalFindings,
|
|
230
|
+
totalLinesAnalyzed,
|
|
231
|
+
skippedFiles,
|
|
232
|
+
summary,
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
/**
|
|
236
|
+
* Evaluate a pre-computed diff string (e.g., from a PR webhook payload).
|
|
237
|
+
* Reads file content from the specified repo path.
|
|
238
|
+
*/
|
|
239
|
+
export function evaluateUnifiedDiff(diffText, repoPath, options) {
|
|
240
|
+
const fileChanges = parseUnifiedDiffToChangedLines(diffText);
|
|
241
|
+
const fileVerdicts = [];
|
|
242
|
+
const skippedFiles = [];
|
|
243
|
+
for (const fc of fileChanges) {
|
|
244
|
+
const language = detectLanguageFromPath(fc.filePath);
|
|
245
|
+
if (!language) {
|
|
246
|
+
skippedFiles.push(fc.filePath);
|
|
247
|
+
continue;
|
|
248
|
+
}
|
|
249
|
+
const absolutePath = resolve(repoPath, fc.filePath);
|
|
250
|
+
let code;
|
|
251
|
+
try {
|
|
252
|
+
code = readFileSync(absolutePath, "utf-8");
|
|
253
|
+
}
|
|
254
|
+
catch {
|
|
255
|
+
skippedFiles.push(fc.filePath);
|
|
256
|
+
continue;
|
|
257
|
+
}
|
|
258
|
+
if (code.length > 300_000) {
|
|
259
|
+
skippedFiles.push(fc.filePath);
|
|
260
|
+
continue;
|
|
261
|
+
}
|
|
262
|
+
const verdict = evaluateDiff(code, language, fc.changedLines, undefined, {
|
|
263
|
+
...options,
|
|
264
|
+
filePath: fc.filePath,
|
|
265
|
+
});
|
|
266
|
+
fileVerdicts.push({ filePath: fc.filePath, language, verdict });
|
|
267
|
+
}
|
|
268
|
+
const totalFindings = fileVerdicts.reduce((sum, fv) => sum + fv.verdict.findings.length, 0);
|
|
269
|
+
const totalLinesAnalyzed = fileVerdicts.reduce((sum, fv) => sum + fv.verdict.linesAnalyzed, 0);
|
|
270
|
+
const overallScore = fileVerdicts.length > 0
|
|
271
|
+
? Math.round(fileVerdicts.reduce((sum, fv) => sum + fv.verdict.score, 0) / fileVerdicts.length)
|
|
272
|
+
: 100;
|
|
273
|
+
return {
|
|
274
|
+
files: fileVerdicts,
|
|
275
|
+
overallScore,
|
|
276
|
+
totalFindings,
|
|
277
|
+
totalLinesAnalyzed,
|
|
278
|
+
skippedFiles,
|
|
279
|
+
summary: `Diff analysis: ${fileVerdicts.length} file(s), ${totalLinesAnalyzed} changed lines, ` +
|
|
280
|
+
`${totalFindings} finding(s), score ${overallScore}/100`,
|
|
281
|
+
};
|
|
282
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cross-File Import Resolution
|
|
3
|
+
*
|
|
4
|
+
* Automatically resolves imports from a file's AST and builds
|
|
5
|
+
* related-file context for deeper cross-file analysis. This bridges
|
|
6
|
+
* the gap between single-file deterministic analysis and project-wide
|
|
7
|
+
* vulnerability detection.
|
|
8
|
+
*
|
|
9
|
+
* Provides:
|
|
10
|
+
* - `resolveImports()` — resolves import paths to file content
|
|
11
|
+
* - `buildRelatedFilesContext()` — builds RelatedFileSnippet[] from imports
|
|
12
|
+
*/
|
|
13
|
+
import type { RelatedFileSnippet } from "./tools/deep-review.js";
|
|
14
|
+
export interface ResolvedImport {
|
|
15
|
+
/** The import specifier as written in code (e.g., "./utils", "express") */
|
|
16
|
+
specifier: string;
|
|
17
|
+
/** Resolved absolute file path (undefined if external/unresolvable) */
|
|
18
|
+
resolvedPath?: string;
|
|
19
|
+
/** Whether this is a local (relative) import */
|
|
20
|
+
isLocal: boolean;
|
|
21
|
+
/** File content (truncated) if resolved */
|
|
22
|
+
content?: string;
|
|
23
|
+
}
|
|
24
|
+
export interface ImportResolutionResult {
|
|
25
|
+
/** Successfully resolved local imports */
|
|
26
|
+
resolved: ResolvedImport[];
|
|
27
|
+
/** External/unresolvable imports */
|
|
28
|
+
external: string[];
|
|
29
|
+
/** Related file snippets ready for deep-review context */
|
|
30
|
+
relatedFiles: RelatedFileSnippet[];
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Resolve imports from a source file and return related file context.
|
|
34
|
+
*
|
|
35
|
+
* Uses the AST parser to extract import specifiers, resolves local imports
|
|
36
|
+
* to actual files, reads their content, and returns structured context
|
|
37
|
+
* suitable for deep-review cross-file analysis.
|
|
38
|
+
*
|
|
39
|
+
* @param code - Source code of the file being analyzed
|
|
40
|
+
* @param language - Programming language
|
|
41
|
+
* @param filePath - Absolute path to the source file (needed for relative import resolution)
|
|
42
|
+
* @param maxImports - Maximum number of imports to resolve (default: 20)
|
|
43
|
+
*/
|
|
44
|
+
export declare function resolveImports(code: string, language: string, filePath: string, maxImports?: number): ImportResolutionResult;
|
|
45
|
+
/**
|
|
46
|
+
* Build related files context from a file's imports.
|
|
47
|
+
*
|
|
48
|
+
* Convenience wrapper that returns just the RelatedFileSnippet[] array,
|
|
49
|
+
* ready to be passed to deep-review or MCP tool context.
|
|
50
|
+
*/
|
|
51
|
+
export declare function buildRelatedFilesContext(code: string, language: string, filePath: string, maxImports?: number): RelatedFileSnippet[];
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cross-File Import Resolution
|
|
3
|
+
*
|
|
4
|
+
* Automatically resolves imports from a file's AST and builds
|
|
5
|
+
* related-file context for deeper cross-file analysis. This bridges
|
|
6
|
+
* the gap between single-file deterministic analysis and project-wide
|
|
7
|
+
* vulnerability detection.
|
|
8
|
+
*
|
|
9
|
+
* Provides:
|
|
10
|
+
* - `resolveImports()` — resolves import paths to file content
|
|
11
|
+
* - `buildRelatedFilesContext()` — builds RelatedFileSnippet[] from imports
|
|
12
|
+
*/
|
|
13
|
+
import { readFileSync, existsSync } from "fs";
|
|
14
|
+
import { resolve, dirname, join } from "path";
|
|
15
|
+
import { analyzeStructure } from "./ast/index.js";
|
|
16
|
+
// ─── Constants ──────────────────────────────────────────────────────────────
|
|
17
|
+
/** Maximum file size to include as related context (bytes) */
|
|
18
|
+
const MAX_RELATED_FILE_SIZE = 50_000;
|
|
19
|
+
/** Maximum snippet length per related file */
|
|
20
|
+
const MAX_SNIPPET_LENGTH = 3_000;
|
|
21
|
+
/** Maximum number of imports to resolve */
|
|
22
|
+
const MAX_IMPORTS_TO_RESOLVE = 20;
|
|
23
|
+
/** Extensions to try when resolving imports without extensions */
|
|
24
|
+
const RESOLVE_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx", ".mjs", ".py", ".rs", ".go", ".java", ".cs"];
|
|
25
|
+
/** Extensions to try for index files */
|
|
26
|
+
const INDEX_FILES = ["index.ts", "index.tsx", "index.js", "index.jsx", "index.mjs"];
|
|
27
|
+
// ─── Import Resolution ─────────────────────────────────────────────────────
|
|
28
|
+
/**
|
|
29
|
+
* Check if an import specifier is a local/relative import.
|
|
30
|
+
*/
|
|
31
|
+
function isLocalImport(specifier) {
|
|
32
|
+
return specifier.startsWith("./") || specifier.startsWith("../") || specifier.startsWith("/");
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Try to resolve a local import specifier to an actual file path.
|
|
36
|
+
*/
|
|
37
|
+
function resolveLocalImport(specifier, fromDir) {
|
|
38
|
+
// Remove .js extension if present (common in ESM TypeScript)
|
|
39
|
+
const cleanSpecifier = specifier.replace(/\.js$/, "");
|
|
40
|
+
const basePath = resolve(fromDir, cleanSpecifier);
|
|
41
|
+
// Try exact path first
|
|
42
|
+
if (existsSync(basePath) && !isDirectory(basePath)) {
|
|
43
|
+
return basePath;
|
|
44
|
+
}
|
|
45
|
+
// Try with various extensions
|
|
46
|
+
for (const ext of RESOLVE_EXTENSIONS) {
|
|
47
|
+
const withExt = basePath + ext;
|
|
48
|
+
if (existsSync(withExt)) {
|
|
49
|
+
return withExt;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
// Try as directory with index file
|
|
53
|
+
for (const indexFile of INDEX_FILES) {
|
|
54
|
+
const indexPath = join(basePath, indexFile);
|
|
55
|
+
if (existsSync(indexPath)) {
|
|
56
|
+
return indexPath;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
// Try the original specifier with extensions (before .js stripping)
|
|
60
|
+
const origBase = resolve(fromDir, specifier);
|
|
61
|
+
if (origBase !== basePath && existsSync(origBase) && !isDirectory(origBase)) {
|
|
62
|
+
return origBase;
|
|
63
|
+
}
|
|
64
|
+
return undefined;
|
|
65
|
+
}
|
|
66
|
+
function isDirectory(filePath) {
|
|
67
|
+
try {
|
|
68
|
+
const statSync = require("fs").statSync;
|
|
69
|
+
return statSync(filePath).isDirectory();
|
|
70
|
+
}
|
|
71
|
+
catch {
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Read a file and return a truncated snippet suitable for cross-file context.
|
|
77
|
+
*/
|
|
78
|
+
function readSnippet(filePath) {
|
|
79
|
+
try {
|
|
80
|
+
const content = readFileSync(filePath, "utf-8");
|
|
81
|
+
if (content.length > MAX_RELATED_FILE_SIZE) {
|
|
82
|
+
return undefined; // Too large
|
|
83
|
+
}
|
|
84
|
+
if (content.length <= MAX_SNIPPET_LENGTH) {
|
|
85
|
+
return content;
|
|
86
|
+
}
|
|
87
|
+
return content.slice(0, MAX_SNIPPET_LENGTH) + "\n// ... truncated";
|
|
88
|
+
}
|
|
89
|
+
catch {
|
|
90
|
+
return undefined;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Resolve imports from a source file and return related file context.
|
|
95
|
+
*
|
|
96
|
+
* Uses the AST parser to extract import specifiers, resolves local imports
|
|
97
|
+
* to actual files, reads their content, and returns structured context
|
|
98
|
+
* suitable for deep-review cross-file analysis.
|
|
99
|
+
*
|
|
100
|
+
* @param code - Source code of the file being analyzed
|
|
101
|
+
* @param language - Programming language
|
|
102
|
+
* @param filePath - Absolute path to the source file (needed for relative import resolution)
|
|
103
|
+
* @param maxImports - Maximum number of imports to resolve (default: 20)
|
|
104
|
+
*/
|
|
105
|
+
export function resolveImports(code, language, filePath, maxImports = MAX_IMPORTS_TO_RESOLVE) {
|
|
106
|
+
const resolved = [];
|
|
107
|
+
const external = [];
|
|
108
|
+
const relatedFiles = [];
|
|
109
|
+
const fromDir = dirname(filePath);
|
|
110
|
+
// Use AST to extract imports
|
|
111
|
+
const structure = analyzeStructure(code, language);
|
|
112
|
+
const imports = structure.imports ?? [];
|
|
113
|
+
// Also extract imports via regex for languages where AST might not capture all
|
|
114
|
+
const regexImports = extractImportsViaRegex(code, language);
|
|
115
|
+
const allImports = [...new Set([...imports, ...regexImports])];
|
|
116
|
+
let resolvedCount = 0;
|
|
117
|
+
for (const specifier of allImports) {
|
|
118
|
+
if (resolvedCount >= maxImports)
|
|
119
|
+
break;
|
|
120
|
+
if (!isLocalImport(specifier)) {
|
|
121
|
+
external.push(specifier);
|
|
122
|
+
continue;
|
|
123
|
+
}
|
|
124
|
+
const resolvedPath = resolveLocalImport(specifier, fromDir);
|
|
125
|
+
if (!resolvedPath) {
|
|
126
|
+
resolved.push({ specifier, isLocal: true });
|
|
127
|
+
continue;
|
|
128
|
+
}
|
|
129
|
+
const snippet = readSnippet(resolvedPath);
|
|
130
|
+
if (!snippet) {
|
|
131
|
+
resolved.push({ specifier, resolvedPath, isLocal: true });
|
|
132
|
+
continue;
|
|
133
|
+
}
|
|
134
|
+
resolved.push({
|
|
135
|
+
specifier,
|
|
136
|
+
resolvedPath,
|
|
137
|
+
isLocal: true,
|
|
138
|
+
content: snippet,
|
|
139
|
+
});
|
|
140
|
+
relatedFiles.push({
|
|
141
|
+
path: specifier,
|
|
142
|
+
snippet,
|
|
143
|
+
relationship: "imported by target",
|
|
144
|
+
});
|
|
145
|
+
resolvedCount++;
|
|
146
|
+
}
|
|
147
|
+
return { resolved, external, relatedFiles };
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Build related files context from a file's imports.
|
|
151
|
+
*
|
|
152
|
+
* Convenience wrapper that returns just the RelatedFileSnippet[] array,
|
|
153
|
+
* ready to be passed to deep-review or MCP tool context.
|
|
154
|
+
*/
|
|
155
|
+
export function buildRelatedFilesContext(code, language, filePath, maxImports = MAX_IMPORTS_TO_RESOLVE) {
|
|
156
|
+
return resolveImports(code, language, filePath, maxImports).relatedFiles;
|
|
157
|
+
}
|
|
158
|
+
// ─── Regex-based Import Extraction (fallback) ───────────────────────────────
|
|
159
|
+
/**
|
|
160
|
+
* Extract import specifiers using regex patterns for common languages.
|
|
161
|
+
* This supplements the AST parser for cases where the grammar doesn't
|
|
162
|
+
* capture all import forms.
|
|
163
|
+
*/
|
|
164
|
+
function extractImportsViaRegex(code, language) {
|
|
165
|
+
const imports = [];
|
|
166
|
+
const lines = code.split("\n");
|
|
167
|
+
for (const line of lines) {
|
|
168
|
+
const trimmed = line.trim();
|
|
169
|
+
// TypeScript/JavaScript: import ... from "specifier"
|
|
170
|
+
// Also: import "specifier" (side-effect)
|
|
171
|
+
// Also: require("specifier")
|
|
172
|
+
if (["typescript", "javascript"].includes(language)) {
|
|
173
|
+
const fromMatch = /from\s+["']([^"']+)["']/.exec(trimmed);
|
|
174
|
+
if (fromMatch) {
|
|
175
|
+
imports.push(fromMatch[1]);
|
|
176
|
+
continue;
|
|
177
|
+
}
|
|
178
|
+
const importMatch = /^import\s+["']([^"']+)["']/.exec(trimmed);
|
|
179
|
+
if (importMatch) {
|
|
180
|
+
imports.push(importMatch[1]);
|
|
181
|
+
continue;
|
|
182
|
+
}
|
|
183
|
+
const requireMatch = /require\s*\(\s*["']([^"']+)["']\s*\)/.exec(trimmed);
|
|
184
|
+
if (requireMatch) {
|
|
185
|
+
imports.push(requireMatch[1]);
|
|
186
|
+
continue;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
// Python: from module import ... / import module
|
|
190
|
+
if (language === "python") {
|
|
191
|
+
const fromImport = /^from\s+(\.[\w.]*)\s+import/.exec(trimmed);
|
|
192
|
+
if (fromImport) {
|
|
193
|
+
imports.push(fromImport[1]);
|
|
194
|
+
continue;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
// Go: import "path" / import ( "path" )
|
|
198
|
+
if (language === "go") {
|
|
199
|
+
const goImport = /^\s*"([^"]+)"/.exec(trimmed);
|
|
200
|
+
if (goImport && goImport[1].includes("/")) {
|
|
201
|
+
imports.push(goImport[1]);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
// Rust: use crate::module / mod module
|
|
205
|
+
if (language === "rust") {
|
|
206
|
+
const useMatch = /^use\s+crate::(\w+)/.exec(trimmed);
|
|
207
|
+
if (useMatch) {
|
|
208
|
+
imports.push(`./${useMatch[1]}`);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
return imports;
|
|
213
|
+
}
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
// ──────────────────────────────────────────────────────────────────────────────
|
|
5
5
|
import { z } from "zod";
|
|
6
6
|
import { loadFindingStore, triageFinding, getTriagedFindings, formatTriageSummary, getFindingStats, getSuppressionAnalytics, formatSuppressionAnalytics, } from "../finding-lifecycle.js";
|
|
7
|
+
import { evaluateWithTribunal } from "../evaluators/index.js";
|
|
7
8
|
// ─── Rule-prefix learning context (shared with CLI --explain) ────────────────
|
|
8
9
|
const RULE_PREFIX_CONTEXT = {
|
|
9
10
|
SEC: {
|
|
@@ -78,6 +79,7 @@ export function registerReviewTools(server) {
|
|
|
78
79
|
registerGetFindingStats(server);
|
|
79
80
|
registerGetSuppressionAnalytics(server);
|
|
80
81
|
registerListTriagedFindings(server);
|
|
82
|
+
registerReEvaluateWithContext(server);
|
|
81
83
|
}
|
|
82
84
|
// ─── explain_finding ─────────────────────────────────────────────────────────
|
|
83
85
|
function registerExplainFinding(server) {
|
|
@@ -394,3 +396,155 @@ function registerListTriagedFindings(server) {
|
|
|
394
396
|
}
|
|
395
397
|
});
|
|
396
398
|
}
|
|
399
|
+
// ─── re_evaluate_with_context ────────────────────────────────────────────────
|
|
400
|
+
function registerReEvaluateWithContext(server) {
|
|
401
|
+
server.tool("re_evaluate_with_context", "Re-evaluate code with developer-provided context from a multi-turn conversation. Accepts disputed findings, accepted findings, and additional context to adjust the evaluation. This is the agentic feedback loop — the developer explains their intent and the tribunal re-evaluates with that context, applying auto-tune and confidence filtering.", {
|
|
402
|
+
code: z.string().describe("The source code to re-evaluate"),
|
|
403
|
+
language: z.string().describe("Programming language (e.g., typescript, python, go)"),
|
|
404
|
+
disputedRuleIds: z
|
|
405
|
+
.array(z.string())
|
|
406
|
+
.optional()
|
|
407
|
+
.describe("Rule IDs the developer disputes as false positives (e.g., ['SEC-001', 'PERF-003'])"),
|
|
408
|
+
acceptedRuleIds: z
|
|
409
|
+
.array(z.string())
|
|
410
|
+
.optional()
|
|
411
|
+
.describe("Rule IDs the developer accepts (these will not be filtered)"),
|
|
412
|
+
developerContext: z
|
|
413
|
+
.string()
|
|
414
|
+
.optional()
|
|
415
|
+
.describe("Free-form explanation from the developer about their intent, design decisions, or why certain findings are incorrect"),
|
|
416
|
+
focusAreas: z
|
|
417
|
+
.array(z.string())
|
|
418
|
+
.optional()
|
|
419
|
+
.describe("Specific areas to focus the re-evaluation on (e.g., ['security', 'performance'])"),
|
|
420
|
+
confidenceFilter: z
|
|
421
|
+
.number()
|
|
422
|
+
.min(0)
|
|
423
|
+
.max(1)
|
|
424
|
+
.optional()
|
|
425
|
+
.describe("Minimum confidence threshold — findings below this are dropped (default: 0.5)"),
|
|
426
|
+
filePath: z.string().optional().describe("File path for context-aware evaluation"),
|
|
427
|
+
deepReview: z
|
|
428
|
+
.boolean()
|
|
429
|
+
.optional()
|
|
430
|
+
.describe("Whether to include the LLM deep-review prompt section in the result"),
|
|
431
|
+
relatedFiles: z
|
|
432
|
+
.array(z.object({
|
|
433
|
+
path: z.string().describe("Path of the related file"),
|
|
434
|
+
snippet: z.string().describe("Relevant code snippet from the related file"),
|
|
435
|
+
relationship: z.string().optional().describe("Relationship to the main file (e.g., 'imports', 'tests')"),
|
|
436
|
+
}))
|
|
437
|
+
.optional()
|
|
438
|
+
.describe("Cross-file context for more accurate evaluation"),
|
|
439
|
+
}, async ({ code, language, disputedRuleIds, acceptedRuleIds, developerContext, focusAreas, confidenceFilter, filePath, deepReview, relatedFiles, }) => {
|
|
440
|
+
try {
|
|
441
|
+
// Build context string from developer inputs
|
|
442
|
+
const contextParts = [];
|
|
443
|
+
if (developerContext) {
|
|
444
|
+
contextParts.push(`Developer context: ${developerContext}`);
|
|
445
|
+
}
|
|
446
|
+
if (disputedRuleIds && disputedRuleIds.length > 0) {
|
|
447
|
+
contextParts.push(`Disputed findings: ${disputedRuleIds.join(", ")}`);
|
|
448
|
+
}
|
|
449
|
+
if (acceptedRuleIds && acceptedRuleIds.length > 0) {
|
|
450
|
+
contextParts.push(`Accepted findings: ${acceptedRuleIds.join(", ")}`);
|
|
451
|
+
}
|
|
452
|
+
if (focusAreas && focusAreas.length > 0) {
|
|
453
|
+
contextParts.push(`Focus areas: ${focusAreas.join(", ")}`);
|
|
454
|
+
}
|
|
455
|
+
const fullContext = contextParts.join("\n");
|
|
456
|
+
const evalOptions = {
|
|
457
|
+
autoTune: true,
|
|
458
|
+
deepReview: deepReview ?? false,
|
|
459
|
+
confidenceFilter: confidenceFilter ?? 0.5,
|
|
460
|
+
filePath,
|
|
461
|
+
relatedFiles,
|
|
462
|
+
calibrate: true,
|
|
463
|
+
};
|
|
464
|
+
const verdict = evaluateWithTribunal(code, language, fullContext || undefined, evalOptions);
|
|
465
|
+
// Post-process: mark disputed findings
|
|
466
|
+
let findings = verdict.findings;
|
|
467
|
+
if (disputedRuleIds && disputedRuleIds.length > 0) {
|
|
468
|
+
const disputedSet = new Set(disputedRuleIds);
|
|
469
|
+
findings = findings.map((f) => {
|
|
470
|
+
if (disputedSet.has(f.ruleId)) {
|
|
471
|
+
return {
|
|
472
|
+
...f,
|
|
473
|
+
confidence: Math.max(0.1, (f.confidence ?? 0.5) * 0.5),
|
|
474
|
+
confidenceTier: "supplementary",
|
|
475
|
+
};
|
|
476
|
+
}
|
|
477
|
+
return f;
|
|
478
|
+
});
|
|
479
|
+
// Re-filter after confidence adjustment
|
|
480
|
+
if (confidenceFilter) {
|
|
481
|
+
findings = findings.filter((f) => (f.confidence ?? 0.5) >= confidenceFilter);
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
const sections = [];
|
|
485
|
+
sections.push(`# Re-Evaluation Results\n`);
|
|
486
|
+
sections.push(`**Verdict:** ${verdict.overallVerdict} · **Score:** ${verdict.overallScore}/100`);
|
|
487
|
+
sections.push(`**Findings:** ${findings.length} (after context adjustment)`);
|
|
488
|
+
if (verdict.autoTuneApplied) {
|
|
489
|
+
sections.push(`**Auto-tune:** ${verdict.autoTuneApplied.suppressed} suppressed, ${verdict.autoTuneApplied.downgraded} downgraded`);
|
|
490
|
+
}
|
|
491
|
+
if (verdict.confidenceFilterApplied) {
|
|
492
|
+
sections.push(`**Confidence filter:** ${verdict.confidenceFilterApplied.filteredOut} findings below ${Math.round(verdict.confidenceFilterApplied.threshold * 100)}% filtered out`);
|
|
493
|
+
}
|
|
494
|
+
if (findings.length > 0) {
|
|
495
|
+
sections.push(`\n## Findings\n`);
|
|
496
|
+
for (const f of findings) {
|
|
497
|
+
const conf = f.confidence !== undefined && f.confidence !== null ? ` (${Math.round(f.confidence * 100)}%)` : "";
|
|
498
|
+
const tier = f.confidenceTier ? ` [${f.confidenceTier}]` : "";
|
|
499
|
+
sections.push(`- **${f.ruleId}** ${f.severity}${conf}${tier}: ${f.title}`);
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
if (disputedRuleIds && disputedRuleIds.length > 0) {
|
|
503
|
+
const stillPresent = findings.filter((f) => disputedRuleIds.includes(f.ruleId));
|
|
504
|
+
const resolved = disputedRuleIds.filter((id) => !findings.some((f) => f.ruleId === id));
|
|
505
|
+
if (resolved.length > 0) {
|
|
506
|
+
sections.push(`\n## Disputed findings resolved\n`);
|
|
507
|
+
sections.push(`The following disputed findings were dropped: ${resolved.join(", ")}`);
|
|
508
|
+
}
|
|
509
|
+
if (stillPresent.length > 0) {
|
|
510
|
+
sections.push(`\n## Disputed findings retained\n`);
|
|
511
|
+
sections.push(`The following remain with reduced confidence: ${stillPresent.map((f) => `${f.ruleId} (${Math.round((f.confidence ?? 0) * 100)}%)`).join(", ")}`);
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
const structured = {
|
|
515
|
+
overallVerdict: verdict.overallVerdict,
|
|
516
|
+
overallScore: verdict.overallScore,
|
|
517
|
+
findingCount: findings.length,
|
|
518
|
+
autoTuneApplied: verdict.autoTuneApplied ?? null,
|
|
519
|
+
confidenceFilterApplied: verdict.confidenceFilterApplied ?? null,
|
|
520
|
+
disputedResolved: disputedRuleIds?.filter((id) => !findings.some((f) => f.ruleId === id)) ?? [],
|
|
521
|
+
findings: findings.map((f) => ({
|
|
522
|
+
ruleId: f.ruleId,
|
|
523
|
+
severity: f.severity,
|
|
524
|
+
confidence: f.confidence,
|
|
525
|
+
confidenceTier: f.confidenceTier,
|
|
526
|
+
title: f.title,
|
|
527
|
+
})),
|
|
528
|
+
};
|
|
529
|
+
const contentBlocks = [
|
|
530
|
+
{ type: "text", text: sections.join("\n") },
|
|
531
|
+
{ type: "text", text: "```json\n" + JSON.stringify(structured, null, 2) + "\n```" },
|
|
532
|
+
];
|
|
533
|
+
if (verdict.deepReviewPrompt) {
|
|
534
|
+
contentBlocks.push({ type: "text", text: verdict.deepReviewPrompt });
|
|
535
|
+
}
|
|
536
|
+
return { content: contentBlocks };
|
|
537
|
+
}
|
|
538
|
+
catch (error) {
|
|
539
|
+
return {
|
|
540
|
+
content: [
|
|
541
|
+
{
|
|
542
|
+
type: "text",
|
|
543
|
+
text: error instanceof Error ? `Error: ${error.message}` : "Error: Re-evaluation failed",
|
|
544
|
+
},
|
|
545
|
+
],
|
|
546
|
+
isError: true,
|
|
547
|
+
};
|
|
548
|
+
}
|
|
549
|
+
});
|
|
550
|
+
}
|
|
@@ -8,6 +8,7 @@ import { evaluateProject, evaluateDiff, analyzeDependencies, runAppBuilderWorkfl
|
|
|
8
8
|
import { evaluateFilesBatch } from "../api.js";
|
|
9
9
|
import { getGlobalSession } from "../evaluation-session.js";
|
|
10
10
|
import { generatePublicRepoReport } from "../reports/public-repo-report.js";
|
|
11
|
+
import { evaluateGitDiff, evaluateUnifiedDiff } from "../git-diff.js";
|
|
11
12
|
import { configSchema, toJudgesConfig } from "./schemas.js";
|
|
12
13
|
import { validateCodeSize } from "./validation.js";
|
|
13
14
|
import { benchmarkGate, formatBenchmarkReport, formatBenchmarkMarkdown, runBenchmarkSuite, } from "../commands/benchmark.js";
|
|
@@ -20,6 +21,7 @@ export function registerWorkflowTools(server) {
|
|
|
20
21
|
registerAppBuilderFlow(server);
|
|
21
22
|
registerEvaluateProject(server);
|
|
22
23
|
registerEvaluateDiff(server);
|
|
24
|
+
registerEvaluateGitDiff(server);
|
|
23
25
|
registerAnalyzeDependencies(server);
|
|
24
26
|
registerBenchmarkGate(server);
|
|
25
27
|
registerBenchmarkDashboard(server);
|
|
@@ -919,3 +921,111 @@ function registerRecordFeedback(server) {
|
|
|
919
921
|
};
|
|
920
922
|
});
|
|
921
923
|
}
|
|
924
|
+
// ─── evaluate_git_diff ───────────────────────────────────────────────────────
|
|
925
|
+
function registerEvaluateGitDiff(server) {
|
|
926
|
+
server.tool("evaluate_git_diff", "Evaluate code changes from a git diff. Parses the unified diff from a git repository, identifies changed files and lines, and runs the full tribunal on each changed file — filtering findings to only those on changed lines. Supports both live git repos (provide repoPath + base ref) and pre-computed diffs (provide diffText).", {
|
|
927
|
+
repoPath: z
|
|
928
|
+
.string()
|
|
929
|
+
.optional()
|
|
930
|
+
.describe("Absolute path to the git repository. Required when not providing diffText."),
|
|
931
|
+
base: z
|
|
932
|
+
.string()
|
|
933
|
+
.optional()
|
|
934
|
+
.describe("Git ref to diff against (e.g., 'main', 'HEAD~1', 'origin/main'). Default: 'HEAD~1'"),
|
|
935
|
+
diffText: z
|
|
936
|
+
.string()
|
|
937
|
+
.optional()
|
|
938
|
+
.describe("Pre-computed unified diff text. When provided, repoPath is used only for reading file contents."),
|
|
939
|
+
confidenceFilter: z
|
|
940
|
+
.number()
|
|
941
|
+
.min(0)
|
|
942
|
+
.max(1)
|
|
943
|
+
.optional()
|
|
944
|
+
.describe("Minimum confidence threshold for findings (default: no filter)"),
|
|
945
|
+
autoTune: z
|
|
946
|
+
.boolean()
|
|
947
|
+
.optional()
|
|
948
|
+
.describe("Apply feedback-driven auto-tuning to reduce false positives (default: false)"),
|
|
949
|
+
config: configSchema,
|
|
950
|
+
}, async ({ repoPath, base, diffText, confidenceFilter, autoTune, config }) => {
|
|
951
|
+
try {
|
|
952
|
+
const evalOptions = {
|
|
953
|
+
confidenceFilter,
|
|
954
|
+
autoTune,
|
|
955
|
+
config: toJudgesConfig(config),
|
|
956
|
+
};
|
|
957
|
+
let result;
|
|
958
|
+
if (diffText) {
|
|
959
|
+
result = evaluateUnifiedDiff(diffText, repoPath ?? ".", evalOptions);
|
|
960
|
+
}
|
|
961
|
+
else if (repoPath) {
|
|
962
|
+
result = evaluateGitDiff(repoPath, base ?? "HEAD~1", evalOptions);
|
|
963
|
+
}
|
|
964
|
+
else {
|
|
965
|
+
return {
|
|
966
|
+
content: [
|
|
967
|
+
{
|
|
968
|
+
type: "text",
|
|
969
|
+
text: "Error: Provide either `repoPath` (for live git diff) or `diffText` (for pre-computed diff).",
|
|
970
|
+
},
|
|
971
|
+
],
|
|
972
|
+
isError: true,
|
|
973
|
+
};
|
|
974
|
+
}
|
|
975
|
+
let md = `# Git Diff Analysis\n\n`;
|
|
976
|
+
md += `**Files changed:** ${result.files.length}\n`;
|
|
977
|
+
md += `**Total findings:** ${result.totalFindings}\n\n`;
|
|
978
|
+
for (const file of result.files) {
|
|
979
|
+
md += `## ${file.filePath}\n`;
|
|
980
|
+
md += `**Verdict:** ${file.verdict.verdict} · **Score:** ${file.verdict.score}/100 · `;
|
|
981
|
+
md += `**Changed lines:** ${file.verdict.linesAnalyzed} · **Findings:** ${file.verdict.findings.length}\n\n`;
|
|
982
|
+
if (file.verdict.findings.length > 0) {
|
|
983
|
+
for (const f of file.verdict.findings) {
|
|
984
|
+
const conf = f.confidence !== undefined && f.confidence !== null ? ` (${Math.round(f.confidence * 100)}%)` : "";
|
|
985
|
+
md += `- **${f.ruleId}** ${f.severity}${conf}: ${f.title}`;
|
|
986
|
+
if (f.lineNumbers && f.lineNumbers.length > 0) {
|
|
987
|
+
md += ` (L${f.lineNumbers.join(", L")})`;
|
|
988
|
+
}
|
|
989
|
+
md += `\n`;
|
|
990
|
+
}
|
|
991
|
+
md += `\n`;
|
|
992
|
+
}
|
|
993
|
+
}
|
|
994
|
+
const structured = {
|
|
995
|
+
filesAnalyzed: result.files.length,
|
|
996
|
+
totalFindings: result.totalFindings,
|
|
997
|
+
fileVerdicts: result.files.map((fv) => ({
|
|
998
|
+
filePath: fv.filePath,
|
|
999
|
+
verdict: fv.verdict.verdict,
|
|
1000
|
+
score: fv.verdict.score,
|
|
1001
|
+
changedLineCount: fv.verdict.linesAnalyzed,
|
|
1002
|
+
findingCount: fv.verdict.findings.length,
|
|
1003
|
+
findings: fv.verdict.findings.map((f) => ({
|
|
1004
|
+
ruleId: f.ruleId,
|
|
1005
|
+
severity: f.severity,
|
|
1006
|
+
confidence: f.confidence,
|
|
1007
|
+
title: f.title,
|
|
1008
|
+
lineNumbers: f.lineNumbers,
|
|
1009
|
+
})),
|
|
1010
|
+
})),
|
|
1011
|
+
};
|
|
1012
|
+
return {
|
|
1013
|
+
content: [
|
|
1014
|
+
{ type: "text", text: md },
|
|
1015
|
+
{ type: "text", text: "```json\n" + JSON.stringify(structured, null, 2) + "\n```" },
|
|
1016
|
+
],
|
|
1017
|
+
};
|
|
1018
|
+
}
|
|
1019
|
+
catch (error) {
|
|
1020
|
+
return {
|
|
1021
|
+
content: [
|
|
1022
|
+
{
|
|
1023
|
+
type: "text",
|
|
1024
|
+
text: error instanceof Error ? `Error: ${error.message}` : "Error: Failed to evaluate git diff",
|
|
1025
|
+
},
|
|
1026
|
+
],
|
|
1027
|
+
isError: true,
|
|
1028
|
+
};
|
|
1029
|
+
}
|
|
1030
|
+
});
|
|
1031
|
+
}
|
package/dist/types.d.ts
CHANGED
|
@@ -670,6 +670,28 @@ export interface TribunalVerdict {
|
|
|
670
670
|
/** Recommended action */
|
|
671
671
|
recommendation: string;
|
|
672
672
|
};
|
|
673
|
+
/**
|
|
674
|
+
* LLM deep-review prompt section. Present when `deepReview: true` is set
|
|
675
|
+
* in evaluation options. Contains a structured prompt that downstream LLM
|
|
676
|
+
* consumers can use for a second-pass analysis of the findings.
|
|
677
|
+
*/
|
|
678
|
+
deepReviewPrompt?: string;
|
|
679
|
+
/**
|
|
680
|
+
* Auto-tune metadata. Present when `autoTune: true` is set and feedback
|
|
681
|
+
* data was applied. Records how many findings were suppressed or downgraded.
|
|
682
|
+
*/
|
|
683
|
+
autoTuneApplied?: {
|
|
684
|
+
suppressed: number;
|
|
685
|
+
downgraded: number;
|
|
686
|
+
};
|
|
687
|
+
/**
|
|
688
|
+
* Confidence filter metadata. Present when `confidenceFilter` is set.
|
|
689
|
+
* Records how many findings were filtered out due to low confidence.
|
|
690
|
+
*/
|
|
691
|
+
confidenceFilterApplied?: {
|
|
692
|
+
threshold: number;
|
|
693
|
+
filteredOut: number;
|
|
694
|
+
};
|
|
673
695
|
}
|
|
674
696
|
/**
|
|
675
697
|
* Must-fix gate configuration for high-risk findings.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kevinrabun/judges",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.118.0",
|
|
4
4
|
"description": "45 specialized judges that evaluate AI-generated code for security, cost, and quality.",
|
|
5
5
|
"mcpName": "io.github.KevinRabun/judges",
|
|
6
6
|
"type": "module",
|
|
@@ -157,8 +157,5 @@
|
|
|
157
157
|
"lint-staged": "^16.2.7",
|
|
158
158
|
"prettier": "^3.8.1",
|
|
159
159
|
"tsx": "^4.19.4"
|
|
160
|
-
},
|
|
161
|
-
"overrides": {
|
|
162
|
-
"hono": ">=4.12.7"
|
|
163
160
|
}
|
|
164
161
|
}
|
package/server.json
CHANGED
|
@@ -7,12 +7,12 @@
|
|
|
7
7
|
"url": "https://github.com/kevinrabun/judges",
|
|
8
8
|
"source": "github"
|
|
9
9
|
},
|
|
10
|
-
"version": "3.
|
|
10
|
+
"version": "3.118.0",
|
|
11
11
|
"packages": [
|
|
12
12
|
{
|
|
13
13
|
"registryType": "npm",
|
|
14
14
|
"identifier": "@kevinrabun/judges",
|
|
15
|
-
"version": "3.
|
|
15
|
+
"version": "3.118.0",
|
|
16
16
|
"transport": {
|
|
17
17
|
"type": "stdio"
|
|
18
18
|
}
|