agent-gauntlet 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +1 -1
  2. package/package.json +4 -2
  3. package/src/cli-adapters/claude.ts +139 -108
  4. package/src/cli-adapters/codex.ts +141 -117
  5. package/src/cli-adapters/cursor.ts +152 -0
  6. package/src/cli-adapters/gemini.ts +171 -139
  7. package/src/cli-adapters/github-copilot.ts +153 -0
  8. package/src/cli-adapters/index.ts +77 -48
  9. package/src/commands/check.test.ts +24 -20
  10. package/src/commands/check.ts +65 -59
  11. package/src/commands/detect.test.ts +38 -32
  12. package/src/commands/detect.ts +74 -61
  13. package/src/commands/health.test.ts +67 -53
  14. package/src/commands/health.ts +167 -145
  15. package/src/commands/help.test.ts +37 -37
  16. package/src/commands/help.ts +30 -22
  17. package/src/commands/index.ts +9 -9
  18. package/src/commands/init.test.ts +118 -107
  19. package/src/commands/init.ts +515 -417
  20. package/src/commands/list.test.ts +87 -70
  21. package/src/commands/list.ts +28 -24
  22. package/src/commands/rerun.ts +142 -119
  23. package/src/commands/review.test.ts +26 -20
  24. package/src/commands/review.ts +65 -59
  25. package/src/commands/run.test.ts +22 -20
  26. package/src/commands/run.ts +64 -58
  27. package/src/commands/shared.ts +44 -35
  28. package/src/config/loader.test.ts +112 -90
  29. package/src/config/loader.ts +132 -123
  30. package/src/config/schema.ts +49 -47
  31. package/src/config/types.ts +15 -13
  32. package/src/config/validator.ts +521 -454
  33. package/src/core/change-detector.ts +122 -104
  34. package/src/core/entry-point.test.ts +60 -62
  35. package/src/core/entry-point.ts +76 -67
  36. package/src/core/job.ts +69 -59
  37. package/src/core/runner.ts +261 -221
  38. package/src/gates/check.ts +78 -69
  39. package/src/gates/result.ts +7 -6
  40. package/src/gates/review.test.ts +188 -0
  41. package/src/gates/review.ts +717 -506
  42. package/src/index.ts +16 -15
  43. package/src/output/console.ts +253 -198
  44. package/src/output/logger.ts +65 -51
  45. package/src/templates/run_gauntlet.template.md +18 -0
  46. package/src/utils/diff-parser.ts +64 -62
  47. package/src/utils/log-parser.ts +227 -206
  48. package/src/utils/sanitizer.ts +1 -1
@@ -1,31 +1,41 @@
1
- import { describe, it, expect, beforeEach, afterEach, beforeAll, afterAll } from 'bun:test';
2
- import { Command } from 'commander';
3
- import { registerListCommand } from './list.js';
4
- import fs from 'node:fs/promises';
5
- import path from 'node:path';
1
+ import {
2
+ afterAll,
3
+ afterEach,
4
+ beforeAll,
5
+ beforeEach,
6
+ describe,
7
+ expect,
8
+ it,
9
+ } from "bun:test";
10
+ import fs from "node:fs/promises";
11
+ import path from "node:path";
12
+ import { Command } from "commander";
13
+ import { registerListCommand } from "./list.js";
6
14
 
7
- const TEST_DIR = path.join(process.cwd(), 'test-list-' + Date.now());
8
- const GAUNTLET_DIR = path.join(TEST_DIR, '.gauntlet');
9
- const CHECKS_DIR = path.join(GAUNTLET_DIR, 'checks');
10
- const REVIEWS_DIR = path.join(GAUNTLET_DIR, 'reviews');
15
+ const TEST_DIR = path.join(process.cwd(), `test-list-${Date.now()}`);
16
+ const GAUNTLET_DIR = path.join(TEST_DIR, ".gauntlet");
17
+ const CHECKS_DIR = path.join(GAUNTLET_DIR, "checks");
18
+ const REVIEWS_DIR = path.join(GAUNTLET_DIR, "reviews");
11
19
 
12
- describe('List Command', () => {
13
- let program: Command;
14
- const originalConsoleLog = console.log;
15
- const originalConsoleError = console.error;
16
- const originalCwd = process.cwd();
17
- let logs: string[];
18
- let errors: string[];
20
+ describe("List Command", () => {
21
+ let program: Command;
22
+ const originalConsoleLog = console.log;
23
+ const originalConsoleError = console.error;
24
+ const originalCwd = process.cwd();
25
+ let logs: string[];
26
+ let errors: string[];
19
27
 
20
- beforeAll(async () => {
21
- // Setup test directory structure
22
- await fs.mkdir(TEST_DIR, { recursive: true });
23
- await fs.mkdir(GAUNTLET_DIR, { recursive: true });
24
- await fs.mkdir(CHECKS_DIR, { recursive: true });
25
- await fs.mkdir(REVIEWS_DIR, { recursive: true });
28
+ beforeAll(async () => {
29
+ // Setup test directory structure
30
+ await fs.mkdir(TEST_DIR, { recursive: true });
31
+ await fs.mkdir(GAUNTLET_DIR, { recursive: true });
32
+ await fs.mkdir(CHECKS_DIR, { recursive: true });
33
+ await fs.mkdir(REVIEWS_DIR, { recursive: true });
26
34
 
27
- // Write config.yml
28
- await fs.writeFile(path.join(GAUNTLET_DIR, 'config.yml'), `
35
+ // Write config.yml
36
+ await fs.writeFile(
37
+ path.join(GAUNTLET_DIR, "config.yml"),
38
+ `
29
39
  base_branch: origin/main
30
40
  log_dir: .gauntlet_logs
31
41
  cli:
@@ -38,67 +48,74 @@ entry_points:
38
48
  - lint
39
49
  reviews:
40
50
  - security
41
- `);
51
+ `,
52
+ );
42
53
 
43
- // Write check definition
44
- await fs.writeFile(path.join(CHECKS_DIR, 'lint.yml'), `
54
+ // Write check definition
55
+ await fs.writeFile(
56
+ path.join(CHECKS_DIR, "lint.yml"),
57
+ `
45
58
  name: lint
46
59
  command: npm run lint
47
60
  working_directory: .
48
- `);
61
+ `,
62
+ );
49
63
 
50
- // Write review definition
51
- await fs.writeFile(path.join(REVIEWS_DIR, 'security.md'), `---
64
+ // Write review definition
65
+ await fs.writeFile(
66
+ path.join(REVIEWS_DIR, "security.md"),
67
+ `---
52
68
  cli_preference:
53
69
  - gemini
54
70
  ---
55
71
 
56
72
  # Security Review
57
73
  Review for security.
58
- `);
59
- });
74
+ `,
75
+ );
76
+ });
60
77
 
61
- afterAll(async () => {
62
- await fs.rm(TEST_DIR, { recursive: true, force: true });
63
- });
78
+ afterAll(async () => {
79
+ await fs.rm(TEST_DIR, { recursive: true, force: true });
80
+ });
64
81
 
65
- beforeEach(() => {
66
- program = new Command();
67
- registerListCommand(program);
68
- logs = [];
69
- errors = [];
70
- console.log = (...args: any[]) => {
71
- logs.push(args.join(' '));
72
- };
73
- console.error = (...args: any[]) => {
74
- errors.push(args.join(' '));
75
- };
76
- process.chdir(TEST_DIR);
77
- });
82
+ beforeEach(() => {
83
+ program = new Command();
84
+ registerListCommand(program);
85
+ logs = [];
86
+ errors = [];
87
+ console.log = (...args: unknown[]) => {
88
+ logs.push(args.join(" "));
89
+ };
90
+ console.error = (...args: unknown[]) => {
91
+ errors.push(args.join(" "));
92
+ };
93
+ process.chdir(TEST_DIR);
94
+ });
78
95
 
79
- afterEach(() => {
80
- console.log = originalConsoleLog;
81
- console.error = originalConsoleError;
82
- process.chdir(originalCwd);
83
- });
96
+ afterEach(() => {
97
+ console.log = originalConsoleLog;
98
+ console.error = originalConsoleError;
99
+ process.chdir(originalCwd);
100
+ });
84
101
 
85
- it('should register the list command', () => {
86
- const listCmd = program.commands.find(cmd => cmd.name() === 'list');
87
- expect(listCmd).toBeDefined();
88
- expect(listCmd?.description()).toBe('List configured gates');
89
- });
102
+ it("should register the list command", () => {
103
+ const listCmd = program.commands.find((cmd) => cmd.name() === "list");
104
+ expect(listCmd).toBeDefined();
105
+ expect(listCmd?.description()).toBe("List configured gates");
106
+ });
90
107
 
91
- it('should list check gates, review gates, and entry points', async () => {
92
- const listCmd = program.commands.find(cmd => cmd.name() === 'list');
93
- await listCmd?.parseAsync(['list']);
108
+ it("should list check gates, review gates, and entry points", async () => {
109
+ const listCmd = program.commands.find((cmd) => cmd.name() === "list");
110
+ await listCmd?.parseAsync(["list"]);
94
111
 
95
- const output = logs.join('\n');
96
- expect(output).toContain('Check Gates:');
97
- expect(output).toContain('lint');
98
- expect(output).toContain('Review Gates:');
99
- expect(output).toContain('security');
100
- expect(output).toContain('gemini');
101
- expect(output).toContain('Entry Points:');
102
- expect(output).toContain('src/');
103
- });
112
+ const output = logs.join("\n");
113
+ expect(output).toContain("Check Gates:");
114
+ expect(output).toContain("lint");
115
+ expect(output).toContain("Review Gates:");
116
+ expect(output).toContain("security");
117
+ expect(output).toContain("gemini");
118
+ expect(output).toContain("Entry Points:");
119
+ expect(output).toContain("src/");
120
+ });
104
121
  });
@@ -1,29 +1,33 @@
1
- import type { Command } from 'commander';
2
- import chalk from 'chalk';
3
- import { loadConfig } from '../config/loader.js';
1
+ import chalk from "chalk";
2
+ import type { Command } from "commander";
3
+ import { loadConfig } from "../config/loader.js";
4
4
 
5
5
  export function registerListCommand(program: Command): void {
6
- program
7
- .command('list')
8
- .description('List configured gates')
9
- .action(async () => {
10
- try {
11
- const config = await loadConfig();
12
- console.log(chalk.bold('Check Gates:'));
13
- Object.values(config.checks).forEach(c => console.log(` - ${c.name}`));
14
-
15
- console.log(chalk.bold('\nReview Gates:'));
16
- Object.values(config.reviews).forEach(r => console.log(` - ${r.name} (Tools: ${r.cli_preference?.join(', ')})`));
6
+ program
7
+ .command("list")
8
+ .description("List configured gates")
9
+ .action(async () => {
10
+ try {
11
+ const config = await loadConfig();
12
+ console.log(chalk.bold("Check Gates:"));
13
+ Object.values(config.checks).forEach((c) => {
14
+ console.log(` - ${c.name}`);
15
+ });
17
16
 
18
- console.log(chalk.bold('\nEntry Points:'));
19
- config.project.entry_points.forEach(ep => {
20
- console.log(` - ${ep.path}`);
21
- if (ep.checks) console.log(` Checks: ${ep.checks.join(', ')}`);
22
- if (ep.reviews) console.log(` Reviews: ${ep.reviews.join(', ')}`);
23
- });
17
+ console.log(chalk.bold("\nReview Gates:"));
18
+ Object.values(config.reviews).forEach((r) => {
19
+ console.log(` - ${r.name} (Tools: ${r.cli_preference?.join(", ")})`);
20
+ });
24
21
 
25
- } catch (error: any) {
26
- console.error(chalk.red('Error:'), error.message);
27
- }
28
- });
22
+ console.log(chalk.bold("\nEntry Points:"));
23
+ config.project.entry_points.forEach((ep) => {
24
+ console.log(` - ${ep.path}`);
25
+ if (ep.checks) console.log(` Checks: ${ep.checks.join(", ")}`);
26
+ if (ep.reviews) console.log(` Reviews: ${ep.reviews.join(", ")}`);
27
+ });
28
+ } catch (error: unknown) {
29
+ const err = error as { message?: string };
30
+ console.error(chalk.red("Error:"), err.message);
31
+ }
32
+ });
29
33
  }
@@ -1,122 +1,145 @@
1
- import type { Command } from 'commander';
2
- import chalk from 'chalk';
3
- import { loadConfig } from '../config/loader.js';
4
- import { ChangeDetector } from '../core/change-detector.js';
5
- import { EntryPointExpander } from '../core/entry-point.js';
6
- import { JobGenerator } from '../core/job.js';
7
- import { Runner } from '../core/runner.js';
8
- import { Logger } from '../output/logger.js';
9
- import { ConsoleReporter } from '../output/console.js';
10
- import { findPreviousFailures, type GateFailures, type PreviousViolation } from '../utils/log-parser.js';
11
- import { rotateLogs } from './shared.js';
1
+ import chalk from "chalk";
2
+ import type { Command } from "commander";
3
+ import { loadConfig } from "../config/loader.js";
4
+ import { ChangeDetector } from "../core/change-detector.js";
5
+ import { EntryPointExpander } from "../core/entry-point.js";
6
+ import { JobGenerator } from "../core/job.js";
7
+ import { Runner } from "../core/runner.js";
8
+ import { ConsoleReporter } from "../output/console.js";
9
+ import { Logger } from "../output/logger.js";
10
+ import {
11
+ findPreviousFailures,
12
+ type PreviousViolation,
13
+ } from "../utils/log-parser.js";
14
+ import { rotateLogs } from "./shared.js";
12
15
 
13
16
  export function registerRerunCommand(program: Command): void {
14
- program
15
- .command('rerun')
16
- .description('Rerun gates (checks & reviews) with previous failures as context (defaults to uncommitted changes)')
17
- .option('-g, --gate <name>', 'Run specific gate only')
18
- .option('-c, --commit <sha>', 'Use diff for a specific commit (overrides default uncommitted mode)')
19
- .action(async (options) => {
20
- try {
21
- const config = await loadConfig();
22
-
23
- // Parse previous failures from log files (only for review gates)
24
- console.log(chalk.dim('Analyzing previous runs...'));
25
-
26
- // findPreviousFailures handles errors internally and returns empty array on failure
27
- const previousFailures = await findPreviousFailures(
28
- config.project.log_dir,
29
- options.gate
30
- );
31
-
32
- // Create a map: jobId -> (adapterName -> violations)
33
- const failuresMap = new Map<string, Map<string, PreviousViolation[]>>();
34
- for (const gateFailure of previousFailures) {
35
- const adapterMap = new Map<string, PreviousViolation[]>();
36
- for (const adapterFailure of gateFailure.adapterFailures) {
37
- adapterMap.set(adapterFailure.adapterName, adapterFailure.violations);
38
- }
39
- failuresMap.set(gateFailure.jobId, adapterMap);
40
- }
41
-
42
- if (previousFailures.length > 0) {
43
- const totalViolations = previousFailures.reduce(
44
- (sum, gf) => sum + gf.adapterFailures.reduce(
45
- (s, af) => s + af.violations.length, 0
46
- ), 0
47
- );
48
- console.log(chalk.yellow(
49
- `Found ${previousFailures.length} gate(s) with ${totalViolations} previous violation(s)`
50
- ));
51
- } else {
52
- console.log(chalk.dim('No previous failures found. Running as normal...'));
53
- }
54
-
55
- // Rotate logs before starting the new run
56
- await rotateLogs(config.project.log_dir);
57
-
58
- // Detect changes (default to uncommitted unless --commit is specified)
59
- // Note: Rerun defaults to uncommitted changes for faster iteration loops,
60
- // unlike 'run' which defaults to base_branch comparison.
61
- const changeOptions = {
62
- commit: options.commit,
63
- uncommitted: !options.commit // Default to uncommitted unless commit is specified
64
- };
65
-
66
- const changeDetector = new ChangeDetector(
67
- config.project.base_branch,
68
- changeOptions
69
- );
70
- const expander = new EntryPointExpander();
71
- const jobGen = new JobGenerator(config);
72
-
73
- const modeDesc = options.commit
74
- ? `commit ${options.commit}`
75
- : 'uncommitted changes';
76
- console.log(chalk.dim(`Detecting changes (${modeDesc})...`));
77
-
78
- const changes = await changeDetector.getChangedFiles();
79
-
80
- if (changes.length === 0) {
81
- console.log(chalk.green('No changes detected.'));
82
- process.exit(0);
83
- }
84
-
85
- console.log(chalk.dim(`Found ${changes.length} changed files.`));
86
-
87
- const entryPoints = await expander.expand(config.project.entry_points, changes);
88
- let jobs = jobGen.generateJobs(entryPoints);
89
-
90
- if (options.gate) {
91
- jobs = jobs.filter(j => j.name === options.gate);
92
- }
93
-
94
- if (jobs.length === 0) {
95
- console.log(chalk.yellow('No applicable gates for these changes.'));
96
- process.exit(0);
97
- }
98
-
99
- console.log(chalk.dim(`Running ${jobs.length} gates...`));
100
- if (previousFailures.length > 0) {
101
- console.log(chalk.dim('Previous failures will be injected as context for matching reviewers.'));
102
- }
103
-
104
- const logger = new Logger(config.project.log_dir);
105
- const reporter = new ConsoleReporter();
106
- const runner = new Runner(
107
- config,
108
- logger,
109
- reporter,
110
- failuresMap, // Pass previous failures map
111
- changeOptions // Pass change detection options
112
- );
113
-
114
- const success = await runner.run(jobs);
115
- process.exit(success ? 0 : 1);
116
-
117
- } catch (error: any) {
118
- console.error(chalk.red('Error:'), error.message);
119
- process.exit(1);
120
- }
121
- });
17
+ program
18
+ .command("rerun")
19
+ .description(
20
+ "Rerun gates (checks & reviews) with previous failures as context (defaults to uncommitted changes)",
21
+ )
22
+ .option("-g, --gate <name>", "Run specific gate only")
23
+ .option(
24
+ "-c, --commit <sha>",
25
+ "Use diff for a specific commit (overrides default uncommitted mode)",
26
+ )
27
+ .action(async (options) => {
28
+ try {
29
+ const config = await loadConfig();
30
+
31
+ // Parse previous failures from log files (only for review gates)
32
+ console.log(chalk.dim("Analyzing previous runs..."));
33
+
34
+ // findPreviousFailures handles errors internally and returns empty array on failure
35
+ const previousFailures = await findPreviousFailures(
36
+ config.project.log_dir,
37
+ options.gate,
38
+ );
39
+
40
+ // Create a map: jobId -> (adapterName -> violations)
41
+ const failuresMap = new Map<string, Map<string, PreviousViolation[]>>();
42
+ for (const gateFailure of previousFailures) {
43
+ const adapterMap = new Map<string, PreviousViolation[]>();
44
+ for (const adapterFailure of gateFailure.adapterFailures) {
45
+ adapterMap.set(
46
+ adapterFailure.adapterName,
47
+ adapterFailure.violations,
48
+ );
49
+ }
50
+ failuresMap.set(gateFailure.jobId, adapterMap);
51
+ }
52
+
53
+ if (previousFailures.length > 0) {
54
+ const totalViolations = previousFailures.reduce(
55
+ (sum, gf) =>
56
+ sum +
57
+ gf.adapterFailures.reduce((s, af) => s + af.violations.length, 0),
58
+ 0,
59
+ );
60
+ console.log(
61
+ chalk.yellow(
62
+ `Found ${previousFailures.length} gate(s) with ${totalViolations} previous violation(s)`,
63
+ ),
64
+ );
65
+ } else {
66
+ console.log(
67
+ chalk.dim("No previous failures found. Running as normal..."),
68
+ );
69
+ }
70
+
71
+ // Rotate logs before starting the new run
72
+ await rotateLogs(config.project.log_dir);
73
+
74
+ // Detect changes (default to uncommitted unless --commit is specified)
75
+ // Note: Rerun defaults to uncommitted changes for faster iteration loops,
76
+ // unlike 'run' which defaults to base_branch comparison.
77
+ const changeOptions = {
78
+ commit: options.commit,
79
+ uncommitted: !options.commit, // Default to uncommitted unless commit is specified
80
+ };
81
+
82
+ const changeDetector = new ChangeDetector(
83
+ config.project.base_branch,
84
+ changeOptions,
85
+ );
86
+ const expander = new EntryPointExpander();
87
+ const jobGen = new JobGenerator(config);
88
+
89
+ const modeDesc = options.commit
90
+ ? `commit ${options.commit}`
91
+ : "uncommitted changes";
92
+ console.log(chalk.dim(`Detecting changes (${modeDesc})...`));
93
+
94
+ const changes = await changeDetector.getChangedFiles();
95
+
96
+ if (changes.length === 0) {
97
+ console.log(chalk.green("No changes detected."));
98
+ process.exit(0);
99
+ }
100
+
101
+ console.log(chalk.dim(`Found ${changes.length} changed files.`));
102
+
103
+ const entryPoints = await expander.expand(
104
+ config.project.entry_points,
105
+ changes,
106
+ );
107
+ let jobs = jobGen.generateJobs(entryPoints);
108
+
109
+ if (options.gate) {
110
+ jobs = jobs.filter((j) => j.name === options.gate);
111
+ }
112
+
113
+ if (jobs.length === 0) {
114
+ console.log(chalk.yellow("No applicable gates for these changes."));
115
+ process.exit(0);
116
+ }
117
+
118
+ console.log(chalk.dim(`Running ${jobs.length} gates...`));
119
+ if (previousFailures.length > 0) {
120
+ console.log(
121
+ chalk.dim(
122
+ "Previous failures will be injected as context for matching reviewers.",
123
+ ),
124
+ );
125
+ }
126
+
127
+ const logger = new Logger(config.project.log_dir);
128
+ const reporter = new ConsoleReporter();
129
+ const runner = new Runner(
130
+ config,
131
+ logger,
132
+ reporter,
133
+ failuresMap, // Pass previous failures map
134
+ changeOptions, // Pass change detection options
135
+ );
136
+
137
+ const success = await runner.run(jobs);
138
+ process.exit(success ? 0 : 1);
139
+ } catch (error: unknown) {
140
+ const err = error as { message?: string };
141
+ console.error(chalk.red("Error:"), err.message);
142
+ process.exit(1);
143
+ }
144
+ });
122
145
  }
@@ -1,25 +1,31 @@
1
- import { describe, it, expect, beforeEach } from 'bun:test';
2
- import { Command } from 'commander';
3
- import { registerReviewCommand } from './review.js';
1
+ import { beforeEach, describe, expect, it } from "bun:test";
2
+ import { Command } from "commander";
3
+ import { registerReviewCommand } from "./review.js";
4
4
 
5
- describe('Review Command', () => {
6
- let program: Command;
5
+ describe("Review Command", () => {
6
+ let program: Command;
7
7
 
8
- beforeEach(() => {
9
- program = new Command();
10
- registerReviewCommand(program);
11
- });
8
+ beforeEach(() => {
9
+ program = new Command();
10
+ registerReviewCommand(program);
11
+ });
12
12
 
13
- it('should register the review command', () => {
14
- const reviewCmd = program.commands.find(cmd => cmd.name() === 'review');
15
- expect(reviewCmd).toBeDefined();
16
- expect(reviewCmd?.description()).toBe('Run only applicable reviews for detected changes');
17
- });
13
+ it("should register the review command", () => {
14
+ const reviewCmd = program.commands.find((cmd) => cmd.name() === "review");
15
+ expect(reviewCmd).toBeDefined();
16
+ expect(reviewCmd?.description()).toBe(
17
+ "Run only applicable reviews for detected changes",
18
+ );
19
+ });
18
20
 
19
- it('should have correct options', () => {
20
- const reviewCmd = program.commands.find(cmd => cmd.name() === 'review');
21
- expect(reviewCmd?.options.some(opt => opt.long === '--gate')).toBe(true);
22
- expect(reviewCmd?.options.some(opt => opt.long === '--commit')).toBe(true);
23
- expect(reviewCmd?.options.some(opt => opt.long === '--uncommitted')).toBe(true);
24
- });
21
+ it("should have correct options", () => {
22
+ const reviewCmd = program.commands.find((cmd) => cmd.name() === "review");
23
+ expect(reviewCmd?.options.some((opt) => opt.long === "--gate")).toBe(true);
24
+ expect(reviewCmd?.options.some((opt) => opt.long === "--commit")).toBe(
25
+ true,
26
+ );
27
+ expect(reviewCmd?.options.some((opt) => opt.long === "--uncommitted")).toBe(
28
+ true,
29
+ );
30
+ });
25
31
  });