@kevinrabun/judges 3.117.8 → 3.119.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1098,6 +1098,36 @@ Analyze a dependency manifest file for supply-chain risks, version pinning issue
1098
1098
  | `manifestType` | string | yes | File type: `package.json`, `requirements.txt`, etc. |
1099
1099
  | `context` | string | no | Optional context |
1100
1100
 
1101
+ ### `evaluate_git_diff`
1102
+ Evaluate only **changed lines** from a git diff. Provide either `repoPath` for a live git diff or `diffText` for a pre-computed unified diff.
1103
+
1104
+ | Parameter | Type | Required | Description |
1105
+ |-----------|------|----------|-------------|
1106
+ | `repoPath` | string | conditional | Absolute path to the git repository |
1107
+ | `base` | string | no | Git ref to diff against (default: `HEAD~1`) |
1108
+ | `diffText` | string | conditional | Pre-computed unified diff text |
1109
+ | `confidenceFilter` | number | no | Minimum confidence threshold for findings (0–1) |
1110
+ | `autoTune` | boolean | no | Apply feedback-driven auto-tuning (default: false) |
1111
+ | `maxPromptChars` | number | no | Max character budget for LLM prompts (default: 100000, 0 = unlimited) |
1112
+ | `config` | object | no | Inline configuration |
1113
+
1114
+ ### `re_evaluate_with_context`
1115
+ Re-run the tribunal with **prior findings as context** for iterative refinement. Supports dispute resolution, developer context injection, and focus-area filtering.
1116
+
1117
+ | Parameter | Type | Required | Description |
1118
+ |-----------|------|----------|-------------|
1119
+ | `code` | string | yes | Source code to re-evaluate |
1120
+ | `language` | string | yes | Programming language |
1121
+ | `disputedRuleIds` | string[] | no | Rule IDs the developer disputes as false positives |
1122
+ | `acceptedRuleIds` | string[] | no | Rule IDs the developer accepts |
1123
+ | `developerContext` | string | no | Free-form explanation of developer intent |
1124
+ | `focusAreas` | string[] | no | Specific areas to focus on (e.g., `["security"]`) |
1125
+ | `confidenceFilter` | number | no | Minimum confidence threshold (default: 0.5) |
1126
+ | `filePath` | string | no | File path for context-aware evaluation |
1127
+ | `deepReview` | boolean | no | Include LLM deep-review prompt section |
1128
+ | `relatedFiles` | array | no | Cross-file context `{ path, snippet, relationship? }[]` |
1129
+ | `maxPromptChars` | number | no | Max character budget for LLM prompts (default: 100000, 0 = unlimited) |
1130
+
1101
1131
  #### Judge IDs
1102
1132
 
1103
1133
  `data-security` · `cybersecurity` · `cost-effectiveness` · `scalability` · `cloud-readiness` · `software-practices` · `accessibility` · `api-design` · `reliability` · `observability` · `performance` · `compliance` · `data-sovereignty` · `testing` · `documentation` · `internationalization` · `dependency-health` · `concurrency` · `ethics-bias` · `maintainability` · `error-handling` · `authentication` · `database` · `caching` · `configuration-management` · `backwards-compatibility` · `portability` · `ux` · `logging-privacy` · `rate-limiting` · `ci-cd` · `code-structure` · `agent-instructions` · `ai-code-safety` · `framework-safety` · `iac-security` · `false-positive-review`
package/dist/api.d.ts CHANGED
@@ -20,7 +20,7 @@ export { getPreset, composePresets, listPresets, PRESETS } from "./presets.js";
20
20
  export type { Preset } from "./presets.js";
21
21
  export { evaluateCodeV2, evaluateProjectV2, getSupportedPolicyProfiles } from "./evaluators/v2.js";
22
22
  export { analyzeCrossFileTaint } from "./ast/cross-file-taint.js";
23
- export { buildSingleJudgeDeepReviewSection, buildTribunalDeepReviewSection, buildSimplifiedDeepReviewSection, isContentPolicyRefusal, DEEP_REVIEW_PROMPT_INTRO, DEEP_REVIEW_IDENTITY, } from "./tools/deep-review.js";
23
+ export { buildSingleJudgeDeepReviewSection, buildTribunalDeepReviewSection, buildSimplifiedDeepReviewSection, formatRelatedFilesSection, isContentPolicyRefusal, DEEP_REVIEW_PROMPT_INTRO, DEEP_REVIEW_IDENTITY, DEFAULT_MAX_PROMPT_CHARS, } from "./tools/deep-review.js";
24
24
  export type { RelatedFileSnippet } from "./tools/deep-review.js";
25
25
  export { getCondensedCriteria } from "./tools/prompts.js";
26
26
  export { parseDismissedFindings, recordL2Feedback, loadFeedbackStore, saveFeedbackStore, addFeedback, computeFeedbackStats, getFpRateByRule, mergeFeedbackStores, computeTeamFeedbackStats, formatTeamStatsOutput, } from "./commands/feedback.js";
@@ -161,3 +161,9 @@ export declare function evaluateFilesStream(files: FileInput[], options?: Evalua
161
161
  export declare function evaluateFilesBatch(files: FileInput[], concurrency?: number, options?: EvaluationOptions, onProgress?: (completed: number, total: number) => void): Promise<FileEvaluationResult[]>;
162
162
  export { handleWebhook, verifyWebhookSignature, loadAppConfig, startAppServer, runAppCommand } from "./github-app.js";
163
163
  export type { GitHubAppConfig } from "./github-app.js";
164
+ export { evaluateGitDiff, evaluateUnifiedDiff, parseUnifiedDiffToChangedLines } from "./git-diff.js";
165
+ export type { FileChangedLines, GitDiffVerdict } from "./git-diff.js";
166
+ export { resolveImports, buildRelatedFilesContext } from "./import-resolver.js";
167
+ export type { ResolvedImport, ImportResolutionResult } from "./import-resolver.js";
168
+ export { applyAutoTune, generateAutoTuneReport, formatAutoTuneReport, formatAutoTuneReportJson } from "./auto-tune.js";
169
+ export type { AutoTuneReport, AutoTuneOptions, AutoTuneAction } from "./auto-tune.js";
package/dist/api.js CHANGED
@@ -27,7 +27,7 @@ export { evaluateCodeV2, evaluateProjectV2, getSupportedPolicyProfiles } from ".
27
27
  // ─── Cross-File Taint Analysis ───────────────────────────────────────────────
28
28
  export { analyzeCrossFileTaint } from "./ast/cross-file-taint.js";
29
29
  // ─── Deep Review Prompts ─────────────────────────────────────────────────────
30
- export { buildSingleJudgeDeepReviewSection, buildTribunalDeepReviewSection, buildSimplifiedDeepReviewSection, isContentPolicyRefusal, DEEP_REVIEW_PROMPT_INTRO, DEEP_REVIEW_IDENTITY, } from "./tools/deep-review.js";
30
+ export { buildSingleJudgeDeepReviewSection, buildTribunalDeepReviewSection, buildSimplifiedDeepReviewSection, formatRelatedFilesSection, isContentPolicyRefusal, DEEP_REVIEW_PROMPT_INTRO, DEEP_REVIEW_IDENTITY, DEFAULT_MAX_PROMPT_CHARS, } from "./tools/deep-review.js";
31
31
  // ─── Prompt Utilities ────────────────────────────────────────────────────────
32
32
  export { getCondensedCriteria } from "./tools/prompts.js";
33
33
  // ─── Feedback & Calibration ─────────────────────────────────────────────────
@@ -187,3 +187,9 @@ export async function evaluateFilesBatch(files, concurrency = 4, options, onProg
187
187
  }
188
188
  // ─── GitHub App ──────────────────────────────────────────────────────────────
189
189
  export { handleWebhook, verifyWebhookSignature, loadAppConfig, startAppServer, runAppCommand } from "./github-app.js";
190
+ // ─── Git Diff Evaluation ─────────────────────────────────────────────────────
191
+ export { evaluateGitDiff, evaluateUnifiedDiff, parseUnifiedDiffToChangedLines } from "./git-diff.js";
192
+ // ─── Cross-File Import Resolution ────────────────────────────────────────────
193
+ export { resolveImports, buildRelatedFilesContext } from "./import-resolver.js";
194
+ // ─── Auto-Tune (Feedback-Driven Calibration) ────────────────────────────────
195
+ export { applyAutoTune, generateAutoTuneReport, formatAutoTuneReport, formatAutoTuneReportJson } from "./auto-tune.js";
@@ -7,11 +7,16 @@
7
7
  * judges watch src/ --judge cyber # Single judge only
8
8
  * judges watch src/ --fail-on-findings # Exit 1 on first failure
9
9
  */
10
+ import { watch as fsWatch } from "fs";
10
11
  interface WatchArgs {
11
12
  path: string;
12
13
  judge: string | undefined;
13
14
  failOnFindings: boolean;
14
15
  }
15
16
  export declare function parseWatchArgs(argv: string[]): WatchArgs;
16
- export declare function runWatch(argv: string[]): void;
17
+ export interface WatchOptions {
18
+ /** Override the fs.watch function (useful for testing). */
19
+ fsWatch?: typeof fsWatch;
20
+ }
21
+ export declare function runWatch(argv: string[], options?: WatchOptions): void;
17
22
  export {};
@@ -126,8 +126,8 @@ function debounce(fn, ms) {
126
126
  timer = setTimeout(fn, ms);
127
127
  };
128
128
  }
129
- // ─── Main Watch Command ────────────────────────────────────────────────────
130
- export function runWatch(argv) {
129
+ export function runWatch(argv, options) {
130
+ const watchFn = options?.fsWatch ?? fsWatch;
131
131
  const args = parseWatchArgs(argv);
132
132
  const target = resolve(args.path);
133
133
  if (!existsSync(target)) {
@@ -146,7 +146,7 @@ export function runWatch(argv) {
146
146
  const isDir = statSync(target).isDirectory();
147
147
  if (isDir) {
148
148
  // Watch directory recursively
149
- const watcher = fsWatch(target, { recursive: true });
149
+ const watcher = watchFn(target, { recursive: true });
150
150
  watcher.on("change", (_event, filename) => {
151
151
  if (!filename)
152
152
  return;
@@ -178,7 +178,7 @@ export function runWatch(argv) {
178
178
  console.error(` Error evaluating ${args.path}:`, err);
179
179
  }
180
180
  }, 300);
181
- const watcher = fsWatch(target);
181
+ const watcher = watchFn(target);
182
182
  watcher.on("change", () => debouncedEval());
183
183
  // Run initial evaluation
184
184
  evaluateFile(target, detectLanguage(target), args.judge);
@@ -59,6 +59,52 @@ export interface EvaluationOptions {
59
59
  * and improves performance. Defaults to false (run all judges).
60
60
  */
61
61
  adaptiveSelection?: boolean;
62
+ /**
63
+ * Enable automatic feedback-driven auto-tuning.
64
+ * When true, loads the feedback store and applies time-decay weighted
65
+ * auto-suppression (FP rate >= 80%), severity downgrading (50-80%),
66
+ * and confidence boosting (< 15%) without requiring `calibrate` to be set.
67
+ * Defaults to false. When both `autoTune` and `calibrate` are set,
68
+ * auto-tune runs first, then calibration refines further.
69
+ */
70
+ autoTune?: boolean;
71
+ /**
72
+ * Include deep-review prompt section in the verdict for LLM-augmented analysis.
73
+ * When true, appends the tribunal deep-review criteria to the verdict metadata
74
+ * so that downstream LLM consumers can perform contextual reasoning beyond
75
+ * pattern matching. This is the bridge between Layer 1 (deterministic) and
76
+ * Layer 2 (LLM) review.
77
+ */
78
+ deepReview?: boolean;
79
+ /**
80
+ * Related file snippets for cross-file deep-review context.
81
+ * When deepReview is enabled, these are included in the deep-review prompt
82
+ * to give the LLM visibility into imports, shared types, and call sites.
83
+ */
84
+ relatedFiles?: Array<{
85
+ path: string;
86
+ snippet: string;
87
+ relationship?: string;
88
+ }>;
89
+ /**
90
+ * Minimum confidence threshold for findings to appear in the output.
91
+ * Findings below this threshold are filtered out of the verdict.
92
+ * The verdict includes a `filteredCount` field showing how many were removed.
93
+ * Value range: 0-1 (e.g., 0.6 means only findings with >= 60% confidence appear).
94
+ */
95
+ confidenceFilter?: number;
96
+ /**
97
+ * Maximum character budget for LLM-facing prompt content.
98
+ * Controls truncation of:
99
+ * - Source code in deep-review prompts (truncated with summary when exceeded)
100
+ * - Related file snippets (array trimmed to fit budget)
101
+ * - Developer context strings (truncated)
102
+ *
103
+ * Defaults to 100_000 (~25K tokens). Set to 0 to disable all truncation
104
+ * (use with caution — large files can produce prompts that exceed model
105
+ * context windows and waste tokens).
106
+ */
107
+ maxPromptChars?: number;
62
108
  /** @internal — pre-computed AST structure for the file (set by evaluateWithTribunal) */
63
109
  _astCache?: CodeStructure;
64
110
  /** @internal — pre-computed taint flows for the file (set by evaluateWithTribunal) */
@@ -23,6 +23,8 @@ import { selectJudges } from "./judge-selector.js";
23
23
  import { getGlobalSession } from "../evaluation-session.js";
24
24
  import { evaluateEscalations, enhanceReviewWithEscalations } from "../escalation.js";
25
25
  import { applyRecallBoost } from "./recall-boost.js";
26
+ import { buildTribunalDeepReviewSection } from "../tools/deep-review.js";
27
+ import { detectProjectContext } from "./shared.js";
26
28
  // ── AST-aware post-processing ───────────────────────────────────────────────
27
29
  // ── Module-level caches for AST/taint results ───────────────────────────────
28
30
  const astStructureCache = new LRUCache(256);
@@ -634,6 +636,10 @@ export function evaluateWithTribunal(code, language, context, options) {
634
636
  ms: options.config?.minSeverity,
635
637
  jw: options.config?.judgeWeights,
636
638
  mfg: options.mustFixGate,
639
+ at: options.autoTune,
640
+ drev: options.deepReview,
641
+ cf: options.confidenceFilter,
642
+ rf: options.relatedFiles?.length,
637
643
  })
638
644
  : "";
639
645
  const hash = contentHash(code, language + optionsSuffix);
@@ -749,6 +755,7 @@ export function evaluateWithTribunal(code, language, context, options) {
749
755
  // 2. Severity downgrade for rules with FP rate 50-80%
750
756
  // 3. Confidence calibration based on historical FP rates
751
757
  let calibrated = configFiltered;
758
+ let autoTuneMetadata;
752
759
  if (enrichedOptions.calibrate) {
753
760
  try {
754
761
  const calOpts = typeof enrichedOptions.calibrate === "object" ? enrichedOptions.calibrate : undefined;
@@ -756,6 +763,7 @@ export function evaluateWithTribunal(code, language, context, options) {
756
763
  if (feedbackStore.entries.length > 0) {
757
764
  const tuned = applyAutoTune(calibrated, feedbackStore);
758
765
  calibrated = tuned.findings;
766
+ autoTuneMetadata = { suppressed: tuned.suppressed, downgraded: tuned.downgraded };
759
767
  }
760
768
  else {
761
769
  // No feedback data — try plain calibration profile
@@ -769,6 +777,23 @@ export function evaluateWithTribunal(code, language, context, options) {
769
777
  // Calibration failure is non-fatal — continue with uncalibrated findings
770
778
  }
771
779
  }
780
+ else if (enrichedOptions.autoTune) {
781
+ // ── Standalone auto-tune (without full calibrate) ──
782
+ // Lightweight feedback-only tuning path: applies auto-suppression and
783
+ // severity downgrades from the feedback store without requiring a
784
+ // full calibration profile.
785
+ try {
786
+ const feedbackStore = loadFeedbackStore();
787
+ if (feedbackStore.entries.length > 0) {
788
+ const tuned = applyAutoTune(calibrated, feedbackStore);
789
+ calibrated = tuned.findings;
790
+ autoTuneMetadata = { suppressed: tuned.suppressed, downgraded: tuned.downgraded };
791
+ }
792
+ }
793
+ catch {
794
+ // Auto-tune failure is non-fatal
795
+ }
796
+ }
772
797
  // ── Auto-activate model-specific calibration profile ──
773
798
  // If the model-fingerprint judge detected a model, apply the model-specific
774
799
  // calibration profile automatically (when feedback data exists).
@@ -829,7 +854,20 @@ export function evaluateWithTribunal(code, language, context, options) {
829
854
  catch {
830
855
  // Session feedback calibration failure is non-fatal
831
856
  }
832
- const cappedFindings = applyPerFileFindingCap(sessionAdjusted, maxFindings);
857
+ // ── Confidence-based output filtering ──
858
+ // When confidenceFilter is set, drop findings below the threshold entirely.
859
+ // This gives callers a first-class knob to control signal-to-noise ratio.
860
+ let confidenceFiltered = sessionAdjusted;
861
+ let confidenceFilteredOutCount = 0;
862
+ if (enrichedOptions.confidenceFilter !== undefined &&
863
+ enrichedOptions.confidenceFilter !== null &&
864
+ enrichedOptions.confidenceFilter > 0) {
865
+ const threshold = enrichedOptions.confidenceFilter;
866
+ const before = confidenceFiltered.length;
867
+ confidenceFiltered = confidenceFiltered.filter((f) => (f.confidence ?? 0.5) >= threshold);
868
+ confidenceFilteredOutCount = before - confidenceFiltered.length;
869
+ }
870
+ const cappedFindings = applyPerFileFindingCap(confidenceFiltered, maxFindings);
833
871
  // ── Confidence-based tiering for progressive disclosure ──
834
872
  // Tag each finding with a disclosure tier so downstream consumers (CLI,
835
873
  // formatters, VS Code extension) can show only high-confidence findings
@@ -882,6 +920,32 @@ export function evaluateWithTribunal(code, language, context, options) {
882
920
  },
883
921
  reviewDecision: synthesizeReviewDecision(enrichedFindings),
884
922
  };
923
+ // ── Deep review prompt attachment (P0.1) ──
924
+ // When deepReview is enabled, build and attach a structured LLM prompt
925
+ // section so downstream consumers can trigger a second-pass analysis.
926
+ if (enrichedOptions.deepReview) {
927
+ try {
928
+ const projectCtx = detectProjectContext(code, language, enrichedOptions.filePath);
929
+ const relatedSnippets = enrichedOptions.relatedFiles ?? [];
930
+ result.deepReviewPrompt = buildTribunalDeepReviewSection(judges, language, context, relatedSnippets.map((r) => ({ path: r.path, snippet: r.snippet, relationship: r.relationship })), projectCtx, enrichedOptions.maxPromptChars);
931
+ }
932
+ catch {
933
+ // Deep review prompt generation failure is non-fatal
934
+ }
935
+ }
936
+ // ── Attach auto-tune metadata ──
937
+ if (autoTuneMetadata) {
938
+ result.autoTuneApplied = autoTuneMetadata;
939
+ }
940
+ // ── Attach confidence filter metadata ──
941
+ if (enrichedOptions.confidenceFilter !== undefined &&
942
+ enrichedOptions.confidenceFilter !== null &&
943
+ confidenceFilteredOutCount > 0) {
944
+ result.confidenceFilterApplied = {
945
+ threshold: enrichedOptions.confidenceFilter,
946
+ filteredOut: confidenceFilteredOutCount,
947
+ };
948
+ }
885
949
  // ── AI model detection escalation ──
886
950
  // When the model-fingerprint judge (MFPR-* rules) fires, attach escalation
887
951
  // metadata so downstream consumers can trigger deeper review or add
@@ -0,0 +1,62 @@
1
+ /**
2
+ * Native Git Diff Evaluation
3
+ *
4
+ * Integrates git diff parsing directly into the evaluation pipeline,
5
+ * eliminating the need for callers to manually compute changed lines.
6
+ *
7
+ * Provides:
8
+ * - `evaluateGitDiff()` — evaluates changed files in a git diff
9
+ * - `parseUnifiedDiffToChangedLines()` — extracts per-file changed lines from unified diff
10
+ */
11
+ import type { DiffVerdict } from "./types.js";
12
+ import type { EvaluationOptions } from "./evaluators/index.js";
13
+ export interface FileChangedLines {
14
+ /** Relative file path */
15
+ filePath: string;
16
+ /** 1-based line numbers that were added or modified */
17
+ changedLines: number[];
18
+ }
19
+ export interface GitDiffVerdict {
20
+ /** Per-file diff verdicts */
21
+ files: Array<{
22
+ filePath: string;
23
+ language: string;
24
+ verdict: DiffVerdict;
25
+ }>;
26
+ /** Aggregate score across all files */
27
+ overallScore: number;
28
+ /** Aggregate finding count */
29
+ totalFindings: number;
30
+ /** Total changed lines analyzed */
31
+ totalLinesAnalyzed: number;
32
+ /** Files that were skipped (binary, too large, unreadable) */
33
+ skippedFiles: string[];
34
+ /** Summary */
35
+ summary: string;
36
+ }
37
+ /**
38
+ * Parse a unified diff to extract per-file changed line numbers.
39
+ * Handles standard `git diff` output format.
40
+ *
41
+ * Only tracks added/modified lines (lines starting with `+` in the diff),
42
+ * since those are the lines that need review.
43
+ */
44
+ export declare function parseUnifiedDiffToChangedLines(diffText: string): FileChangedLines[];
45
+ /**
46
+ * Evaluate changed files from a git diff.
47
+ *
48
+ * Runs `git diff` between the specified base and the working tree (or HEAD),
49
+ * parses the unified diff to extract per-file changed lines, reads each
50
+ * changed file, and evaluates only the changed lines.
51
+ *
52
+ * @param repoPath - Path to the git repository root
53
+ * @param base - Base ref to diff against (e.g., "main", "HEAD~1", "origin/main")
54
+ * @param options - Evaluation options passed to each file evaluation
55
+ * @returns Aggregate verdict across all changed files
56
+ */
57
+ export declare function evaluateGitDiff(repoPath: string, base?: string, options?: EvaluationOptions): GitDiffVerdict;
58
+ /**
59
+ * Evaluate a pre-computed diff string (e.g., from a PR webhook payload).
60
+ * Reads file content from the specified repo path.
61
+ */
62
+ export declare function evaluateUnifiedDiff(diffText: string, repoPath: string, options?: EvaluationOptions): GitDiffVerdict;
@@ -0,0 +1,282 @@
1
+ /**
2
+ * Native Git Diff Evaluation
3
+ *
4
+ * Integrates git diff parsing directly into the evaluation pipeline,
5
+ * eliminating the need for callers to manually compute changed lines.
6
+ *
7
+ * Provides:
8
+ * - `evaluateGitDiff()` — evaluates changed files in a git diff
9
+ * - `parseUnifiedDiffToChangedLines()` — extracts per-file changed lines from unified diff
10
+ */
11
+ import { readFileSync } from "fs";
12
+ import { resolve, extname } from "path";
13
+ import { evaluateDiff } from "./evaluators/index.js";
14
+ import { tryRunGit } from "./tools/command-safety.js";
15
+ // ─── Diff Parsing ───────────────────────────────────────────────────────────
16
+ /**
17
+ * Parse a unified diff to extract per-file changed line numbers.
18
+ * Handles standard `git diff` output format.
19
+ *
20
+ * Only tracks added/modified lines (lines starting with `+` in the diff),
21
+ * since those are the lines that need review.
22
+ */
23
+ export function parseUnifiedDiffToChangedLines(diffText) {
24
+ const result = [];
25
+ let currentFile = "";
26
+ let currentLines = [];
27
+ for (const line of diffText.split("\n")) {
28
+ // Match file header: +++ b/path/to/file
29
+ if (line.startsWith("+++ b/")) {
30
+ // Save previous file if it had changes
31
+ if (currentFile && currentLines.length > 0) {
32
+ result.push({ filePath: currentFile, changedLines: currentLines });
33
+ }
34
+ currentFile = line.substring(6);
35
+ currentLines = [];
36
+ continue;
37
+ }
38
+ // Match hunk header: @@ -old,count +new,count @@
39
+ const hunkMatch = /^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@/.exec(line);
40
+ if (hunkMatch && currentFile) {
41
+ const startLine = parseInt(hunkMatch[1], 10);
42
+ const lineCount = parseInt(hunkMatch[2] ?? "1", 10);
43
+ // Track which new-side lines are additions
44
+ // We track additions in the line-by-line scan below
45
+ void startLine;
46
+ void lineCount;
47
+ continue;
48
+ }
49
+ // Inside a hunk — no need to know startLine here, we track as we go
50
+ }
51
+ // Flush last file
52
+ if (currentFile && currentLines.length > 0) {
53
+ result.push({ filePath: currentFile, changedLines: currentLines });
54
+ }
55
+ // Re-parse with proper line tracking using a stateful approach
56
+ return parseWithLineTracking(diffText);
57
+ }
58
+ /**
59
+ * Internal: stateful parse that tracks actual added line numbers.
60
+ */
61
+ function parseWithLineTracking(diffText) {
62
+ const result = [];
63
+ let currentFile = "";
64
+ let currentLines = [];
65
+ let newLineNum = 0;
66
+ let inHunk = false;
67
+ for (const line of diffText.split("\n")) {
68
+ // File header
69
+ if (line.startsWith("+++ b/")) {
70
+ if (currentFile && currentLines.length > 0) {
71
+ result.push({ filePath: currentFile, changedLines: [...currentLines] });
72
+ }
73
+ currentFile = line.substring(6);
74
+ currentLines = [];
75
+ inHunk = false;
76
+ continue;
77
+ }
78
+ // Hunk header
79
+ const hunkMatch = /^@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/.exec(line);
80
+ if (hunkMatch) {
81
+ newLineNum = parseInt(hunkMatch[1], 10);
82
+ inHunk = true;
83
+ continue;
84
+ }
85
+ if (!inHunk || !currentFile)
86
+ continue;
87
+ // Context line (present in both old and new)
88
+ if (line.startsWith(" ")) {
89
+ newLineNum++;
90
+ continue;
91
+ }
92
+ // Added line
93
+ if (line.startsWith("+")) {
94
+ currentLines.push(newLineNum);
95
+ newLineNum++;
96
+ continue;
97
+ }
98
+ // Removed line (only in old side — don't increment new line counter)
99
+ if (line.startsWith("-")) {
100
+ continue;
101
+ }
102
+ // Any other line (e.g., , or diff binary headers)
103
+ // ends the hunk
104
+ if (line.startsWith("diff ") || line.startsWith("index ") || line.startsWith("---")) {
105
+ inHunk = false;
106
+ }
107
+ }
108
+ // Flush last file
109
+ if (currentFile && currentLines.length > 0) {
110
+ result.push({ filePath: currentFile, changedLines: [...currentLines] });
111
+ }
112
+ return result;
113
+ }
114
+ // ─── Language Detection ─────────────────────────────────────────────────────
115
+ const EXT_LANGUAGE_MAP = {
116
+ ".ts": "typescript",
117
+ ".tsx": "typescript",
118
+ ".js": "javascript",
119
+ ".jsx": "javascript",
120
+ ".mjs": "javascript",
121
+ ".cjs": "javascript",
122
+ ".py": "python",
123
+ ".rs": "rust",
124
+ ".go": "go",
125
+ ".java": "java",
126
+ ".cs": "csharp",
127
+ ".cpp": "cpp",
128
+ ".cc": "cpp",
129
+ ".cxx": "cpp",
130
+ ".c": "c",
131
+ ".h": "c",
132
+ ".hpp": "cpp",
133
+ ".php": "php",
134
+ ".rb": "ruby",
135
+ ".kt": "kotlin",
136
+ ".swift": "swift",
137
+ ".dart": "dart",
138
+ ".sql": "sql",
139
+ ".sh": "bash",
140
+ ".bash": "bash",
141
+ ".ps1": "powershell",
142
+ ".bicep": "bicep",
143
+ ".tf": "terraform",
144
+ ".json": "json",
145
+ ".yaml": "yaml",
146
+ ".yml": "yaml",
147
+ };
148
+ function detectLanguageFromPath(filePath) {
149
+ const ext = extname(filePath).toLowerCase();
150
+ return EXT_LANGUAGE_MAP[ext];
151
+ }
152
+ // ─── Git Diff Evaluation ────────────────────────────────────────────────────
153
+ /**
154
+ * Evaluate changed files from a git diff.
155
+ *
156
+ * Runs `git diff` between the specified base and the working tree (or HEAD),
157
+ * parses the unified diff to extract per-file changed lines, reads each
158
+ * changed file, and evaluates only the changed lines.
159
+ *
160
+ * @param repoPath - Path to the git repository root
161
+ * @param base - Base ref to diff against (e.g., "main", "HEAD~1", "origin/main")
162
+ * @param options - Evaluation options passed to each file evaluation
163
+ * @returns Aggregate verdict across all changed files
164
+ */
165
+ export function evaluateGitDiff(repoPath, base = "HEAD~1", options) {
166
+ // Get the unified diff
167
+ const diffOutput = tryRunGit(["diff", base, "--unified=0"], { cwd: repoPath });
168
+ if (diffOutput === null) {
169
+ return {
170
+ files: [],
171
+ overallScore: 100,
172
+ totalFindings: 0,
173
+ totalLinesAnalyzed: 0,
174
+ skippedFiles: [],
175
+ summary: "Could not run git diff — ensure git is installed and this is a git repository.",
176
+ };
177
+ }
178
+ if (diffOutput.trim().length === 0) {
179
+ return {
180
+ files: [],
181
+ overallScore: 100,
182
+ totalFindings: 0,
183
+ totalLinesAnalyzed: 0,
184
+ skippedFiles: [],
185
+ summary: "No changes detected between working tree and " + base,
186
+ };
187
+ }
188
+ const fileChanges = parseUnifiedDiffToChangedLines(diffOutput);
189
+ const fileVerdicts = [];
190
+ const skippedFiles = [];
191
+ for (const fc of fileChanges) {
192
+ const language = detectLanguageFromPath(fc.filePath);
193
+ if (!language) {
194
+ skippedFiles.push(fc.filePath);
195
+ continue;
196
+ }
197
+ const absolutePath = resolve(repoPath, fc.filePath);
198
+ let code;
199
+ try {
200
+ code = readFileSync(absolutePath, "utf-8");
201
+ }
202
+ catch {
203
+ skippedFiles.push(fc.filePath);
204
+ continue;
205
+ }
206
+ // Skip very large files
207
+ if (code.length > 300_000) {
208
+ skippedFiles.push(fc.filePath);
209
+ continue;
210
+ }
211
+ const verdict = evaluateDiff(code, language, fc.changedLines, undefined, {
212
+ ...options,
213
+ filePath: fc.filePath,
214
+ });
215
+ fileVerdicts.push({ filePath: fc.filePath, language, verdict });
216
+ }
217
+ const totalFindings = fileVerdicts.reduce((sum, fv) => sum + fv.verdict.findings.length, 0);
218
+ const totalLinesAnalyzed = fileVerdicts.reduce((sum, fv) => sum + fv.verdict.linesAnalyzed, 0);
219
+ const overallScore = fileVerdicts.length > 0
220
+ ? Math.round(fileVerdicts.reduce((sum, fv) => sum + fv.verdict.score, 0) / fileVerdicts.length)
221
+ : 100;
222
+ const summary = `Git diff analysis (${base}): ${fileVerdicts.length} file(s) analyzed, ` +
223
+ `${totalLinesAnalyzed} changed lines, ${totalFindings} finding(s), ` +
224
+ `score ${overallScore}/100` +
225
+ (skippedFiles.length > 0 ? ` (${skippedFiles.length} file(s) skipped)` : "");
226
+ return {
227
+ files: fileVerdicts,
228
+ overallScore,
229
+ totalFindings,
230
+ totalLinesAnalyzed,
231
+ skippedFiles,
232
+ summary,
233
+ };
234
+ }
235
+ /**
236
+ * Evaluate a pre-computed diff string (e.g., from a PR webhook payload).
237
+ * Reads file content from the specified repo path.
238
+ */
239
+ export function evaluateUnifiedDiff(diffText, repoPath, options) {
240
+ const fileChanges = parseUnifiedDiffToChangedLines(diffText);
241
+ const fileVerdicts = [];
242
+ const skippedFiles = [];
243
+ for (const fc of fileChanges) {
244
+ const language = detectLanguageFromPath(fc.filePath);
245
+ if (!language) {
246
+ skippedFiles.push(fc.filePath);
247
+ continue;
248
+ }
249
+ const absolutePath = resolve(repoPath, fc.filePath);
250
+ let code;
251
+ try {
252
+ code = readFileSync(absolutePath, "utf-8");
253
+ }
254
+ catch {
255
+ skippedFiles.push(fc.filePath);
256
+ continue;
257
+ }
258
+ if (code.length > 300_000) {
259
+ skippedFiles.push(fc.filePath);
260
+ continue;
261
+ }
262
+ const verdict = evaluateDiff(code, language, fc.changedLines, undefined, {
263
+ ...options,
264
+ filePath: fc.filePath,
265
+ });
266
+ fileVerdicts.push({ filePath: fc.filePath, language, verdict });
267
+ }
268
+ const totalFindings = fileVerdicts.reduce((sum, fv) => sum + fv.verdict.findings.length, 0);
269
+ const totalLinesAnalyzed = fileVerdicts.reduce((sum, fv) => sum + fv.verdict.linesAnalyzed, 0);
270
+ const overallScore = fileVerdicts.length > 0
271
+ ? Math.round(fileVerdicts.reduce((sum, fv) => sum + fv.verdict.score, 0) / fileVerdicts.length)
272
+ : 100;
273
+ return {
274
+ files: fileVerdicts,
275
+ overallScore,
276
+ totalFindings,
277
+ totalLinesAnalyzed,
278
+ skippedFiles,
279
+ summary: `Diff analysis: ${fileVerdicts.length} file(s), ${totalLinesAnalyzed} changed lines, ` +
280
+ `${totalFindings} finding(s), score ${overallScore}/100`,
281
+ };
282
+ }