agent-gauntlet 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/gates/review.ts +59 -8
- package/src/utils/diff-parser.ts +86 -0
package/package.json
CHANGED
package/src/gates/review.ts
CHANGED
|
@@ -3,7 +3,9 @@ import { promisify } from 'node:util';
|
|
|
3
3
|
import { ReviewGateConfig, ReviewPromptFrontmatter } from '../config/types.js';
|
|
4
4
|
import { GateResult } from './result.js';
|
|
5
5
|
import { CLIAdapter, getAdapter } from '../cli-adapters/index.js';
|
|
6
|
-
import {
|
|
6
|
+
import { Logger } from '../output/logger.js';
|
|
7
|
+
import { parseDiff, isValidViolationLocation, type DiffFileRange } from '../utils/diff-parser.js';
|
|
8
|
+
import { type PreviousViolation } from '../utils/log-parser.js';
|
|
7
9
|
|
|
8
10
|
const execAsync = promisify(exec);
|
|
9
11
|
|
|
@@ -13,9 +15,17 @@ const JSON_SYSTEM_INSTRUCTION = `
|
|
|
13
15
|
You are in a read-only mode. You may read files in the repository to gather context.
|
|
14
16
|
Do NOT attempt to modify files or run shell commands that change system state.
|
|
15
17
|
Do NOT access files outside the repository root.
|
|
18
|
+
Do NOT access the .git/ directory or read git history/commit information.
|
|
16
19
|
Use your available file-reading and search tools to find information.
|
|
17
20
|
If the diff is insufficient or ambiguous, use your tools to read the full file content or related files.
|
|
18
21
|
|
|
22
|
+
CRITICAL SCOPE RESTRICTIONS:
|
|
23
|
+
- ONLY review the code changes shown in the diff below
|
|
24
|
+
- DO NOT review commit history or existing code outside the diff
|
|
25
|
+
- All violations MUST reference file paths and line numbers that appear IN THE DIFF
|
|
26
|
+
- The "file" field must match a file from the diff
|
|
27
|
+
- The "line" field must be within a changed region (lines starting with + in the diff)
|
|
28
|
+
|
|
19
29
|
IMPORTANT: You must output ONLY a valid JSON object. Do not output any markdown text, explanations, or code blocks outside of the JSON.
|
|
20
30
|
Each violation MUST include a "priority" field with one of: "critical", "high", "medium", "low".
|
|
21
31
|
|
|
@@ -246,7 +256,11 @@ export class ReviewGateExecutor {
|
|
|
246
256
|
|
|
247
257
|
await adapterLogger(`\n--- Review Output (${adapter.name}) ---\n${output}\n`);
|
|
248
258
|
|
|
249
|
-
const evaluation = this.evaluateOutput(output);
|
|
259
|
+
const evaluation = this.evaluateOutput(output, diff);
|
|
260
|
+
|
|
261
|
+
if (evaluation.filteredCount && evaluation.filteredCount > 0) {
|
|
262
|
+
await adapterLogger(`Note: ${evaluation.filteredCount} out-of-scope violations filtered\n`);
|
|
263
|
+
}
|
|
250
264
|
|
|
251
265
|
// Log formatted summary
|
|
252
266
|
if (evaluation.json) {
|
|
@@ -408,14 +422,21 @@ export class ReviewGateExecutor {
|
|
|
408
422
|
return lines.join('\n');
|
|
409
423
|
}
|
|
410
424
|
|
|
411
|
-
public evaluateOutput(output: string): {
|
|
425
|
+
public evaluateOutput(output: string, diff?: string): {
|
|
426
|
+
status: 'pass' | 'fail' | 'error';
|
|
427
|
+
message: string;
|
|
428
|
+
json?: any;
|
|
429
|
+
filteredCount?: number;
|
|
430
|
+
} {
|
|
431
|
+
const diffRanges = diff ? parseDiff(diff) : undefined;
|
|
432
|
+
|
|
412
433
|
try {
|
|
413
434
|
// 1. Try to extract from markdown code block first (most reliable)
|
|
414
435
|
const jsonBlockMatch = output.match(/```json\s*([\s\S]*?)\s*```/);
|
|
415
436
|
if (jsonBlockMatch) {
|
|
416
437
|
try {
|
|
417
438
|
const json = JSON.parse(jsonBlockMatch[1]);
|
|
418
|
-
return this.validateAndReturn(json);
|
|
439
|
+
return this.validateAndReturn(json, diffRanges);
|
|
419
440
|
} catch {
|
|
420
441
|
// If code block parse fails, fall back to other methods
|
|
421
442
|
}
|
|
@@ -433,7 +454,7 @@ export class ReviewGateExecutor {
|
|
|
433
454
|
const json = JSON.parse(candidate);
|
|
434
455
|
// If we successfully parsed an object with 'status', it's likely our result
|
|
435
456
|
if (json.status) {
|
|
436
|
-
return this.validateAndReturn(json);
|
|
457
|
+
return this.validateAndReturn(json, diffRanges);
|
|
437
458
|
}
|
|
438
459
|
} catch {
|
|
439
460
|
// Not valid JSON, keep searching backwards
|
|
@@ -448,7 +469,7 @@ export class ReviewGateExecutor {
|
|
|
448
469
|
try {
|
|
449
470
|
const candidate = output.substring(firstStart, end + 1);
|
|
450
471
|
const json = JSON.parse(candidate);
|
|
451
|
-
return this.validateAndReturn(json);
|
|
472
|
+
return this.validateAndReturn(json, diffRanges);
|
|
452
473
|
} catch {
|
|
453
474
|
// Ignore
|
|
454
475
|
}
|
|
@@ -461,7 +482,10 @@ export class ReviewGateExecutor {
|
|
|
461
482
|
}
|
|
462
483
|
}
|
|
463
484
|
|
|
464
|
-
private validateAndReturn(
|
|
485
|
+
private validateAndReturn(
|
|
486
|
+
json: any,
|
|
487
|
+
diffRanges?: Map<string, DiffFileRange>
|
|
488
|
+
): { status: 'pass' | 'fail' | 'error'; message: string; json?: any; filteredCount?: number } {
|
|
465
489
|
// Validate Schema
|
|
466
490
|
if (!json.status || (json.status !== 'pass' && json.status !== 'fail')) {
|
|
467
491
|
return { status: 'error', message: 'Invalid JSON: missing or invalid "status" field', json };
|
|
@@ -472,6 +496,33 @@ export class ReviewGateExecutor {
|
|
|
472
496
|
}
|
|
473
497
|
|
|
474
498
|
// json.status === 'fail'
|
|
499
|
+
let filteredCount = 0;
|
|
500
|
+
|
|
501
|
+
if (Array.isArray(json.violations) && diffRanges?.size) {
|
|
502
|
+
const originalCount = json.violations.length;
|
|
503
|
+
|
|
504
|
+
json.violations = json.violations.filter((v: any) => {
|
|
505
|
+
const isValid = isValidViolationLocation(v.file, v.line, diffRanges);
|
|
506
|
+
if (!isValid) {
|
|
507
|
+
// Can't easily access logger here, but could return warning info
|
|
508
|
+
// console.warn(`[WARNING] Filtered violation: ${v.file}:${v.line ?? '?'} (not in diff)`);
|
|
509
|
+
}
|
|
510
|
+
return isValid;
|
|
511
|
+
});
|
|
512
|
+
|
|
513
|
+
filteredCount = originalCount - json.violations.length;
|
|
514
|
+
|
|
515
|
+
// If all filtered out, change to pass
|
|
516
|
+
if (json.violations.length === 0) {
|
|
517
|
+
return {
|
|
518
|
+
status: 'pass',
|
|
519
|
+
message: `Passed (${filteredCount} out-of-scope violations filtered)`,
|
|
520
|
+
json: { status: 'pass' },
|
|
521
|
+
filteredCount
|
|
522
|
+
};
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
|
|
475
526
|
const violationCount = Array.isArray(json.violations) ? json.violations.length : 'some';
|
|
476
527
|
|
|
477
528
|
// Construct a summary message
|
|
@@ -481,7 +532,7 @@ export class ReviewGateExecutor {
|
|
|
481
532
|
msg += `. Example: ${first.issue} in ${first.file}`;
|
|
482
533
|
}
|
|
483
534
|
|
|
484
|
-
return { status: 'fail', message: msg, json };
|
|
535
|
+
return { status: 'fail', message: msg, json, filteredCount };
|
|
485
536
|
}
|
|
486
537
|
|
|
487
538
|
private parseLines(stdout: string): string[] {
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
export type DiffFileRange = Set<number>;
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Parses a unified diff string into a map of filenames to sets of valid line numbers.
|
|
5
|
+
* Valid line numbers are those that appear in the diff as added or modified lines.
|
|
6
|
+
*/
|
|
7
|
+
export function parseDiff(diff: string): Map<string, DiffFileRange> {
|
|
8
|
+
const fileRanges = new Map<string, DiffFileRange>();
|
|
9
|
+
const lines = diff.split('\n');
|
|
10
|
+
|
|
11
|
+
let currentFile: string | null = null;
|
|
12
|
+
let currentRanges: DiffFileRange | null = null;
|
|
13
|
+
let currentLineNumber = 0;
|
|
14
|
+
|
|
15
|
+
for (const line of lines) {
|
|
16
|
+
// Parse file header: diff --git a/path/to/file b/path/to/file
|
|
17
|
+
if (line.startsWith('diff --git')) {
|
|
18
|
+
const parts = line.split(' ');
|
|
19
|
+
if (parts.length >= 4) {
|
|
20
|
+
// Extract filename from b/path/to/file (target file)
|
|
21
|
+
const targetPath = parts[3];
|
|
22
|
+
// Remove 'b/' prefix
|
|
23
|
+
currentFile = targetPath.startsWith('b/') ? targetPath.substring(2) : targetPath;
|
|
24
|
+
|
|
25
|
+
// Skip .git/ paths
|
|
26
|
+
if (currentFile.startsWith('.git/')) {
|
|
27
|
+
currentFile = null;
|
|
28
|
+
currentRanges = null;
|
|
29
|
+
continue;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
currentRanges = new Set<number>();
|
|
33
|
+
fileRanges.set(currentFile, currentRanges);
|
|
34
|
+
}
|
|
35
|
+
continue;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Skip if we're ignoring this file (e.g. .git/)
|
|
39
|
+
if (!currentFile || !currentRanges) continue;
|
|
40
|
+
|
|
41
|
+
// Parse hunk header: @@ -old,count +new,count @@
|
|
42
|
+
if (line.startsWith('@@')) {
|
|
43
|
+
const match = line.match(/@@ \-\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/);
|
|
44
|
+
if (match && match[1]) {
|
|
45
|
+
currentLineNumber = parseInt(match[1], 10);
|
|
46
|
+
}
|
|
47
|
+
continue;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Track added lines
|
|
51
|
+
if (line.startsWith('+') && !line.startsWith('+++')) {
|
|
52
|
+
currentRanges.add(currentLineNumber);
|
|
53
|
+
currentLineNumber++;
|
|
54
|
+
}
|
|
55
|
+
// Track context lines (unchanged) to keep line count correct
|
|
56
|
+
else if (line.startsWith(' ')) {
|
|
57
|
+
currentLineNumber++;
|
|
58
|
+
}
|
|
59
|
+
// Removed lines (-) do not increment the new line counter
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return fileRanges;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Checks if a violation is valid based on the parsed diff ranges.
|
|
67
|
+
*/
|
|
68
|
+
export function isValidViolationLocation(
|
|
69
|
+
file: string,
|
|
70
|
+
line: number | undefined,
|
|
71
|
+
diffRanges: Map<string, DiffFileRange> | undefined
|
|
72
|
+
): boolean {
|
|
73
|
+
// If no diff ranges provided (e.g. full file review), assume valid
|
|
74
|
+
if (!diffRanges) return true;
|
|
75
|
+
|
|
76
|
+
// Line is required for diff-scoped reviews
|
|
77
|
+
if (line === undefined) return false;
|
|
78
|
+
|
|
79
|
+
const validLines = diffRanges.get(file);
|
|
80
|
+
if (!validLines) {
|
|
81
|
+
// File not in diff
|
|
82
|
+
return false;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return validLines.has(line);
|
|
86
|
+
}
|