agent-gauntlet 0.1.10 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +1 -1
  2. package/package.json +4 -2
  3. package/src/cli-adapters/claude.ts +139 -108
  4. package/src/cli-adapters/codex.ts +141 -117
  5. package/src/cli-adapters/cursor.ts +152 -0
  6. package/src/cli-adapters/gemini.ts +171 -139
  7. package/src/cli-adapters/github-copilot.ts +153 -0
  8. package/src/cli-adapters/index.ts +77 -48
  9. package/src/commands/check.test.ts +24 -20
  10. package/src/commands/check.ts +65 -59
  11. package/src/commands/detect.test.ts +38 -32
  12. package/src/commands/detect.ts +74 -61
  13. package/src/commands/health.test.ts +67 -53
  14. package/src/commands/health.ts +167 -145
  15. package/src/commands/help.test.ts +37 -37
  16. package/src/commands/help.ts +30 -22
  17. package/src/commands/index.ts +9 -9
  18. package/src/commands/init.test.ts +118 -107
  19. package/src/commands/init.ts +514 -417
  20. package/src/commands/list.test.ts +87 -70
  21. package/src/commands/list.ts +28 -24
  22. package/src/commands/rerun.ts +142 -119
  23. package/src/commands/review.test.ts +26 -20
  24. package/src/commands/review.ts +65 -59
  25. package/src/commands/run.test.ts +22 -20
  26. package/src/commands/run.ts +64 -58
  27. package/src/commands/shared.ts +44 -35
  28. package/src/config/loader.test.ts +112 -90
  29. package/src/config/loader.ts +132 -123
  30. package/src/config/schema.ts +49 -47
  31. package/src/config/types.ts +15 -13
  32. package/src/config/validator.ts +521 -454
  33. package/src/core/change-detector.ts +122 -104
  34. package/src/core/entry-point.test.ts +60 -62
  35. package/src/core/entry-point.ts +76 -67
  36. package/src/core/job.ts +69 -59
  37. package/src/core/runner.ts +261 -230
  38. package/src/gates/check.ts +78 -69
  39. package/src/gates/result.ts +7 -7
  40. package/src/gates/review.test.ts +174 -138
  41. package/src/gates/review.ts +716 -561
  42. package/src/index.ts +16 -15
  43. package/src/output/console.ts +253 -214
  44. package/src/output/logger.ts +64 -52
  45. package/src/templates/run_gauntlet.template.md +18 -0
  46. package/src/utils/diff-parser.ts +64 -62
  47. package/src/utils/log-parser.ts +227 -206
  48. package/src/utils/sanitizer.ts +1 -1
@@ -1,68 +1,80 @@
1
- import fs from 'node:fs/promises';
2
- import path from 'node:path';
3
- import { sanitizeJobId } from '../utils/sanitizer.js';
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { sanitizeJobId } from "../utils/sanitizer.js";
4
4
 
5
5
  function formatTimestamp(): string {
6
- return new Date().toISOString();
6
+ return new Date().toISOString();
7
7
  }
8
8
 
9
9
  export class Logger {
10
- private initializedFiles: Set<string> = new Set();
10
+ private initializedFiles: Set<string> = new Set();
11
11
 
12
- constructor(private logDir: string) {}
12
+ constructor(private logDir: string) {}
13
13
 
14
- async init() {
15
- await fs.mkdir(this.logDir, { recursive: true });
16
- }
14
+ async init() {
15
+ await fs.mkdir(this.logDir, { recursive: true });
16
+ }
17
17
 
18
- async close() {
19
- // No-op - using append mode
20
- }
18
+ async close() {
19
+ // No-op - using append mode
20
+ }
21
21
 
22
- getLogPath(jobId: string, adapterName?: string): string {
23
- const safeName = sanitizeJobId(jobId);
24
- if (adapterName) {
25
- return path.join(this.logDir, `${safeName}_${adapterName}.log`);
26
- }
27
- return path.join(this.logDir, `${safeName}.log`);
28
- }
22
+ getLogPath(jobId: string, adapterName?: string): string {
23
+ const safeName = sanitizeJobId(jobId);
24
+ if (adapterName) {
25
+ return path.join(this.logDir, `${safeName}_${adapterName}.log`);
26
+ }
27
+ return path.join(this.logDir, `${safeName}.log`);
28
+ }
29
29
 
30
- private async initFile(logPath: string): Promise<void> {
31
- if (!this.initializedFiles.has(logPath)) {
32
- await fs.writeFile(logPath, '');
33
- this.initializedFiles.add(logPath);
34
- }
35
- }
30
+ private async initFile(logPath: string): Promise<void> {
31
+ if (!this.initializedFiles.has(logPath)) {
32
+ await fs.writeFile(logPath, "");
33
+ this.initializedFiles.add(logPath);
34
+ }
35
+ }
36
36
 
37
- async createJobLogger(jobId: string): Promise<(text: string) => Promise<void>> {
38
- const logPath = this.getLogPath(jobId);
39
- await this.initFile(logPath);
37
+ async createJobLogger(
38
+ jobId: string,
39
+ ): Promise<(text: string) => Promise<void>> {
40
+ const logPath = this.getLogPath(jobId);
41
+ await this.initFile(logPath);
40
42
 
41
- return async (text: string) => {
42
- const timestamp = formatTimestamp();
43
- const lines = text.split('\n');
44
- if (lines.length > 0) {
45
- lines[0] = `[${timestamp}] ${lines[0]}`;
46
- }
47
- await fs.appendFile(logPath, lines.join('\n') + (text.endsWith('\n') ? '' : '\n'));
48
- };
49
- }
43
+ return async (text: string) => {
44
+ const timestamp = formatTimestamp();
45
+ const lines = text.split("\n");
46
+ if (lines.length > 0) {
47
+ lines[0] = `[${timestamp}] ${lines[0]}`;
48
+ }
49
+ await fs.appendFile(
50
+ logPath,
51
+ lines.join("\n") + (text.endsWith("\n") ? "" : "\n"),
52
+ );
53
+ };
54
+ }
50
55
 
51
- createLoggerFactory(jobId: string): (adapterName?: string) => Promise<{ logger: (text: string) => Promise<void>; logPath: string }> {
52
- return async (adapterName?: string) => {
53
- const logPath = this.getLogPath(jobId, adapterName);
54
- await this.initFile(logPath);
56
+ createLoggerFactory(
57
+ jobId: string,
58
+ ): (
59
+ adapterName?: string,
60
+ ) => Promise<{ logger: (text: string) => Promise<void>; logPath: string }> {
61
+ return async (adapterName?: string) => {
62
+ const logPath = this.getLogPath(jobId, adapterName);
63
+ await this.initFile(logPath);
55
64
 
56
- const logger = async (text: string) => {
57
- const timestamp = formatTimestamp();
58
- const lines = text.split('\n');
59
- if (lines.length > 0) {
60
- lines[0] = `[${timestamp}] ${lines[0]}`;
61
- }
62
- await fs.appendFile(logPath, lines.join('\n') + (text.endsWith('\n') ? '' : '\n'));
63
- };
65
+ const logger = async (text: string) => {
66
+ const timestamp = formatTimestamp();
67
+ const lines = text.split("\n");
68
+ if (lines.length > 0) {
69
+ lines[0] = `[${timestamp}] ${lines[0]}`;
70
+ }
71
+ await fs.appendFile(
72
+ logPath,
73
+ lines.join("\n") + (text.endsWith("\n") ? "" : "\n"),
74
+ );
75
+ };
64
76
 
65
- return { logger, logPath };
66
- };
67
- }
77
+ return { logger, logPath };
78
+ };
79
+ }
68
80
  }
@@ -0,0 +1,18 @@
1
+ ---
2
+ description: Run the full verification gauntlet
3
+ allowed-tools: Bash
4
+ ---
5
+ # /gauntlet
6
+ Execute the autonomous verification suite.
7
+
8
+ 1. Run `agent-gauntlet run`.
9
+ 2. If it fails, read the log files in `.gauntlet_logs/` to understand exactly what went wrong.
10
+ 3. Fix any code or logic errors found by the tools or AI reviewers, prioritizing higher-priority violations (critical > high > medium > low).
11
+ 4. If you disagree with AI reviewer feedback, briefly explain your reasoning in the code comments rather than ignoring it silently.
12
+ 5. Do NOT commit your changes yet—keep them uncommitted so the rerun command can review them.
13
+ 6. Run `agent-gauntlet rerun` to verify your fixes. The rerun command reviews only uncommitted changes and uses previous failures as context.
14
+ 7. Repeat steps 2-6 until one of the following termination conditions is met:
15
+ - All gates pass
16
+ - You disagree with remaining failures (ask the human how to proceed)
17
+ - Still failing after 3 rerun attempts
18
+ 8. Once all gates pass, do NOT commit or push your changes—await the human's review and explicit instruction to commit.
@@ -5,82 +5,84 @@ export type DiffFileRange = Set<number>;
5
5
  * Valid line numbers are those that appear in the diff as added or modified lines.
6
6
  */
7
7
  export function parseDiff(diff: string): Map<string, DiffFileRange> {
8
- const fileRanges = new Map<string, DiffFileRange>();
9
- const lines = diff.split('\n');
8
+ const fileRanges = new Map<string, DiffFileRange>();
9
+ const lines = diff.split("\n");
10
10
 
11
- let currentFile: string | null = null;
12
- let currentRanges: DiffFileRange | null = null;
13
- let currentLineNumber = 0;
11
+ let currentFile: string | null = null;
12
+ let currentRanges: DiffFileRange | null = null;
13
+ let currentLineNumber = 0;
14
14
 
15
- for (const line of lines) {
16
- // Parse file header: diff --git a/path/to/file b/path/to/file
17
- if (line.startsWith('diff --git')) {
18
- const parts = line.split(' ');
19
- if (parts.length >= 4) {
20
- // Extract filename from b/path/to/file (target file)
21
- const targetPath = parts[3];
22
- // Remove 'b/' prefix
23
- currentFile = targetPath.startsWith('b/') ? targetPath.substring(2) : targetPath;
24
-
25
- // Skip .git/ paths
26
- if (currentFile.startsWith('.git/')) {
27
- currentFile = null;
28
- currentRanges = null;
29
- continue;
30
- }
15
+ for (const line of lines) {
16
+ // Parse file header: diff --git a/path/to/file b/path/to/file
17
+ if (line.startsWith("diff --git")) {
18
+ const parts = line.split(" ");
19
+ if (parts.length >= 4) {
20
+ // Extract filename from b/path/to/file (target file)
21
+ const targetPath = parts[3];
22
+ // Remove 'b/' prefix
23
+ currentFile = targetPath.startsWith("b/")
24
+ ? targetPath.substring(2)
25
+ : targetPath;
31
26
 
32
- currentRanges = new Set<number>();
33
- fileRanges.set(currentFile, currentRanges);
34
- }
35
- continue;
36
- }
27
+ // Skip .git/ paths
28
+ if (currentFile.startsWith(".git/")) {
29
+ currentFile = null;
30
+ currentRanges = null;
31
+ continue;
32
+ }
37
33
 
38
- // Skip if we're ignoring this file (e.g. .git/)
39
- if (!currentFile || !currentRanges) continue;
34
+ currentRanges = new Set<number>();
35
+ fileRanges.set(currentFile, currentRanges);
36
+ }
37
+ continue;
38
+ }
40
39
 
41
- // Parse hunk header: @@ -old,count +new,count @@
42
- if (line.startsWith('@@')) {
43
- const match = line.match(/@@ \-\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/);
44
- if (match && match[1]) {
45
- currentLineNumber = parseInt(match[1], 10);
46
- }
47
- continue;
48
- }
40
+ // Skip if we're ignoring this file (e.g. .git/)
41
+ if (!currentFile || !currentRanges) continue;
49
42
 
50
- // Track added lines
51
- if (line.startsWith('+') && !line.startsWith('+++')) {
52
- currentRanges.add(currentLineNumber);
53
- currentLineNumber++;
54
- }
55
- // Track context lines (unchanged) to keep line count correct
56
- else if (line.startsWith(' ')) {
57
- currentLineNumber++;
58
- }
59
- // Removed lines (-) do not increment the new line counter
60
- }
43
+ // Parse hunk header: @@ -old,count +new,count @@
44
+ if (line.startsWith("@@")) {
45
+ const match = line.match(/@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/);
46
+ if (match?.[1]) {
47
+ currentLineNumber = parseInt(match[1], 10);
48
+ }
49
+ continue;
50
+ }
61
51
 
62
- return fileRanges;
52
+ // Track added lines
53
+ if (line.startsWith("+") && !line.startsWith("+++")) {
54
+ currentRanges.add(currentLineNumber);
55
+ currentLineNumber++;
56
+ }
57
+ // Track context lines (unchanged) to keep line count correct
58
+ else if (line.startsWith(" ")) {
59
+ currentLineNumber++;
60
+ }
61
+ // Removed lines (-) do not increment the new line counter
62
+ }
63
+
64
+ return fileRanges;
63
65
  }
64
66
 
65
67
  /**
66
68
  * Checks if a violation is valid based on the parsed diff ranges.
67
69
  */
68
70
  export function isValidViolationLocation(
69
- file: string,
70
- line: number | undefined,
71
- diffRanges: Map<string, DiffFileRange> | undefined
71
+ file: string,
72
+ line: number | undefined,
73
+ diffRanges: Map<string, DiffFileRange> | undefined,
72
74
  ): boolean {
73
- // If no diff ranges provided (e.g. full file review), assume valid
74
- if (!diffRanges) return true;
75
-
76
- // Line is required for diff-scoped reviews
77
- if (line === undefined) return false;
75
+ // If no diff ranges provided (e.g. full file review), assume valid
76
+ if (!diffRanges) return true;
77
+
78
+ // Line is required for diff-scoped reviews
79
+ if (line === undefined) return false;
78
80
 
79
- const validLines = diffRanges.get(file);
80
- if (!validLines) {
81
- // File not in diff
82
- return false;
83
- }
81
+ const validLines = diffRanges.get(file);
82
+ if (!validLines) {
83
+ // File not in diff
84
+ return false;
85
+ }
84
86
 
85
- return validLines.has(line);
87
+ return validLines.has(line);
86
88
  }