agent-gauntlet 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-gauntlet",
3
- "version": "0.1.7",
3
+ "version": "0.1.8",
4
4
  "description": "A CLI tool for testing AI coding agents",
5
5
  "license": "Apache-2.0",
6
6
  "author": "Paul Caplan",
@@ -3,7 +3,9 @@ import { promisify } from 'node:util';
3
3
  import { ReviewGateConfig, ReviewPromptFrontmatter } from '../config/types.js';
4
4
  import { GateResult } from './result.js';
5
5
  import { CLIAdapter, getAdapter } from '../cli-adapters/index.js';
6
- import { PreviousViolation } from '../utils/log-parser.js';
6
+ import { Logger } from '../output/logger.js';
7
+ import { parseDiff, isValidViolationLocation, type DiffFileRange } from '../utils/diff-parser.js';
8
+ import { type PreviousViolation } from '../utils/log-parser.js';
7
9
 
8
10
  const execAsync = promisify(exec);
9
11
 
@@ -13,9 +15,17 @@ const JSON_SYSTEM_INSTRUCTION = `
13
15
  You are in a read-only mode. You may read files in the repository to gather context.
14
16
  Do NOT attempt to modify files or run shell commands that change system state.
15
17
  Do NOT access files outside the repository root.
18
+ Do NOT access the .git/ directory or read git history/commit information.
16
19
  Use your available file-reading and search tools to find information.
17
20
  If the diff is insufficient or ambiguous, use your tools to read the full file content or related files.
18
21
 
22
+ CRITICAL SCOPE RESTRICTIONS:
23
+ - ONLY review the code changes shown in the diff below
24
+ - DO NOT review commit history or existing code outside the diff
25
+ - All violations MUST reference file paths and line numbers that appear IN THE DIFF
26
+ - The "file" field must match a file from the diff
27
+ - The "line" field must be within a changed region (lines starting with + in the diff)
28
+
19
29
  IMPORTANT: You must output ONLY a valid JSON object. Do not output any markdown text, explanations, or code blocks outside of the JSON.
20
30
  Each violation MUST include a "priority" field with one of: "critical", "high", "medium", "low".
21
31
 
@@ -246,7 +256,11 @@ export class ReviewGateExecutor {
246
256
 
247
257
  await adapterLogger(`\n--- Review Output (${adapter.name}) ---\n${output}\n`);
248
258
 
249
- const evaluation = this.evaluateOutput(output);
259
+ const evaluation = this.evaluateOutput(output, diff);
260
+
261
+ if (evaluation.filteredCount && evaluation.filteredCount > 0) {
262
+ await adapterLogger(`Note: ${evaluation.filteredCount} out-of-scope violations filtered\n`);
263
+ }
250
264
 
251
265
  // Log formatted summary
252
266
  if (evaluation.json) {
@@ -408,14 +422,21 @@ export class ReviewGateExecutor {
408
422
  return lines.join('\n');
409
423
  }
410
424
 
411
- public evaluateOutput(output: string): { status: 'pass' | 'fail' | 'error'; message: string; json?: any } {
425
+ public evaluateOutput(output: string, diff?: string): {
426
+ status: 'pass' | 'fail' | 'error';
427
+ message: string;
428
+ json?: any;
429
+ filteredCount?: number;
430
+ } {
431
+ const diffRanges = diff ? parseDiff(diff) : undefined;
432
+
412
433
  try {
413
434
  // 1. Try to extract from markdown code block first (most reliable)
414
435
  const jsonBlockMatch = output.match(/```json\s*([\s\S]*?)\s*```/);
415
436
  if (jsonBlockMatch) {
416
437
  try {
417
438
  const json = JSON.parse(jsonBlockMatch[1]);
418
- return this.validateAndReturn(json);
439
+ return this.validateAndReturn(json, diffRanges);
419
440
  } catch {
420
441
  // If code block parse fails, fall back to other methods
421
442
  }
@@ -433,7 +454,7 @@ export class ReviewGateExecutor {
433
454
  const json = JSON.parse(candidate);
434
455
  // If we successfully parsed an object with 'status', it's likely our result
435
456
  if (json.status) {
436
- return this.validateAndReturn(json);
457
+ return this.validateAndReturn(json, diffRanges);
437
458
  }
438
459
  } catch {
439
460
  // Not valid JSON, keep searching backwards
@@ -448,7 +469,7 @@ export class ReviewGateExecutor {
448
469
  try {
449
470
  const candidate = output.substring(firstStart, end + 1);
450
471
  const json = JSON.parse(candidate);
451
- return this.validateAndReturn(json);
472
+ return this.validateAndReturn(json, diffRanges);
452
473
  } catch {
453
474
  // Ignore
454
475
  }
@@ -461,7 +482,10 @@ export class ReviewGateExecutor {
461
482
  }
462
483
  }
463
484
 
464
- private validateAndReturn(json: any): { status: 'pass' | 'fail' | 'error'; message: string; json?: any } {
485
+ private validateAndReturn(
486
+ json: any,
487
+ diffRanges?: Map<string, DiffFileRange>
488
+ ): { status: 'pass' | 'fail' | 'error'; message: string; json?: any; filteredCount?: number } {
465
489
  // Validate Schema
466
490
  if (!json.status || (json.status !== 'pass' && json.status !== 'fail')) {
467
491
  return { status: 'error', message: 'Invalid JSON: missing or invalid "status" field', json };
@@ -472,6 +496,33 @@ export class ReviewGateExecutor {
472
496
  }
473
497
 
474
498
  // json.status === 'fail'
499
+ let filteredCount = 0;
500
+
501
+ if (Array.isArray(json.violations) && diffRanges?.size) {
502
+ const originalCount = json.violations.length;
503
+
504
+ json.violations = json.violations.filter((v: any) => {
505
+ const isValid = isValidViolationLocation(v.file, v.line, diffRanges);
506
+ if (!isValid) {
507
+ // Can't easily access logger here, but could return warning info
508
+ // console.warn(`[WARNING] Filtered violation: ${v.file}:${v.line ?? '?'} (not in diff)`);
509
+ }
510
+ return isValid;
511
+ });
512
+
513
+ filteredCount = originalCount - json.violations.length;
514
+
515
+ // If all filtered out, change to pass
516
+ if (json.violations.length === 0) {
517
+ return {
518
+ status: 'pass',
519
+ message: `Passed (${filteredCount} out-of-scope violations filtered)`,
520
+ json: { status: 'pass' },
521
+ filteredCount
522
+ };
523
+ }
524
+ }
525
+
475
526
  const violationCount = Array.isArray(json.violations) ? json.violations.length : 'some';
476
527
 
477
528
  // Construct a summary message
@@ -481,7 +532,7 @@ export class ReviewGateExecutor {
481
532
  msg += `. Example: ${first.issue} in ${first.file}`;
482
533
  }
483
534
 
484
- return { status: 'fail', message: msg, json };
535
+ return { status: 'fail', message: msg, json, filteredCount };
485
536
  }
486
537
 
487
538
  private parseLines(stdout: string): string[] {
@@ -0,0 +1,86 @@
1
+ export type DiffFileRange = Set<number>;
2
+
3
+ /**
4
+ * Parses a unified diff string into a map of filenames to sets of valid line numbers.
5
+ * Valid line numbers are those that appear in the diff as added or modified lines.
6
+ */
7
+ export function parseDiff(diff: string): Map<string, DiffFileRange> {
8
+ const fileRanges = new Map<string, DiffFileRange>();
9
+ const lines = diff.split('\n');
10
+
11
+ let currentFile: string | null = null;
12
+ let currentRanges: DiffFileRange | null = null;
13
+ let currentLineNumber = 0;
14
+
15
+ for (const line of lines) {
16
+ // Parse file header: diff --git a/path/to/file b/path/to/file
17
+ if (line.startsWith('diff --git')) {
18
+ const parts = line.split(' ');
19
+ if (parts.length >= 4) {
20
+ // Extract filename from b/path/to/file (target file)
21
+ const targetPath = parts[3];
22
+ // Remove 'b/' prefix
23
+ currentFile = targetPath.startsWith('b/') ? targetPath.substring(2) : targetPath;
24
+
25
+ // Skip .git/ paths
26
+ if (currentFile.startsWith('.git/')) {
27
+ currentFile = null;
28
+ currentRanges = null;
29
+ continue;
30
+ }
31
+
32
+ currentRanges = new Set<number>();
33
+ fileRanges.set(currentFile, currentRanges);
34
+ }
35
+ continue;
36
+ }
37
+
38
+ // Skip if we're ignoring this file (e.g. .git/)
39
+ if (!currentFile || !currentRanges) continue;
40
+
41
+ // Parse hunk header: @@ -old,count +new,count @@
42
+ if (line.startsWith('@@')) {
43
+ const match = line.match(/@@ \-\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/);
44
+ if (match && match[1]) {
45
+ currentLineNumber = parseInt(match[1], 10);
46
+ }
47
+ continue;
48
+ }
49
+
50
+ // Track added lines
51
+ if (line.startsWith('+') && !line.startsWith('+++')) {
52
+ currentRanges.add(currentLineNumber);
53
+ currentLineNumber++;
54
+ }
55
+ // Track context lines (unchanged) to keep line count correct
56
+ else if (line.startsWith(' ')) {
57
+ currentLineNumber++;
58
+ }
59
+ // Removed lines (-) do not increment the new line counter
60
+ }
61
+
62
+ return fileRanges;
63
+ }
64
+
65
+ /**
66
+ * Checks if a violation is valid based on the parsed diff ranges.
67
+ */
68
+ export function isValidViolationLocation(
69
+ file: string,
70
+ line: number | undefined,
71
+ diffRanges: Map<string, DiffFileRange> | undefined
72
+ ): boolean {
73
+ // If no diff ranges provided (e.g. full file review), assume valid
74
+ if (!diffRanges) return true;
75
+
76
+ // Line is required for diff-scoped reviews
77
+ if (line === undefined) return false;
78
+
79
+ const validLines = diffRanges.get(file);
80
+ if (!validLines) {
81
+ // File not in diff
82
+ return false;
83
+ }
84
+
85
+ return validLines.has(line);
86
+ }