agent-gauntlet 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +55 -87
  2. package/package.json +4 -2
  3. package/src/bun-plugins.d.ts +4 -0
  4. package/src/cli-adapters/claude.ts +139 -108
  5. package/src/cli-adapters/codex.ts +141 -117
  6. package/src/cli-adapters/cursor.ts +152 -0
  7. package/src/cli-adapters/gemini.ts +171 -139
  8. package/src/cli-adapters/github-copilot.ts +153 -0
  9. package/src/cli-adapters/index.ts +77 -48
  10. package/src/commands/check.test.ts +24 -20
  11. package/src/commands/check.ts +86 -59
  12. package/src/commands/ci/index.ts +15 -0
  13. package/src/commands/ci/init.ts +96 -0
  14. package/src/commands/ci/list-jobs.ts +78 -0
  15. package/src/commands/detect.test.ts +38 -32
  16. package/src/commands/detect.ts +89 -61
  17. package/src/commands/health.test.ts +67 -53
  18. package/src/commands/health.ts +167 -145
  19. package/src/commands/help.test.ts +37 -37
  20. package/src/commands/help.ts +31 -22
  21. package/src/commands/index.ts +10 -9
  22. package/src/commands/init.test.ts +120 -107
  23. package/src/commands/init.ts +514 -417
  24. package/src/commands/list.test.ts +87 -70
  25. package/src/commands/list.ts +28 -24
  26. package/src/commands/rerun.ts +157 -119
  27. package/src/commands/review.test.ts +26 -20
  28. package/src/commands/review.ts +86 -59
  29. package/src/commands/run.test.ts +22 -20
  30. package/src/commands/run.ts +85 -58
  31. package/src/commands/shared.ts +44 -35
  32. package/src/config/ci-loader.ts +33 -0
  33. package/src/config/ci-schema.ts +52 -0
  34. package/src/config/loader.test.ts +112 -90
  35. package/src/config/loader.ts +132 -123
  36. package/src/config/schema.ts +48 -47
  37. package/src/config/types.ts +28 -13
  38. package/src/config/validator.ts +521 -454
  39. package/src/core/change-detector.ts +122 -104
  40. package/src/core/entry-point.test.ts +60 -62
  41. package/src/core/entry-point.ts +120 -74
  42. package/src/core/job.ts +69 -59
  43. package/src/core/runner.ts +264 -230
  44. package/src/gates/check.ts +78 -69
  45. package/src/gates/result.ts +7 -7
  46. package/src/gates/review.test.ts +277 -138
  47. package/src/gates/review.ts +724 -561
  48. package/src/index.ts +18 -15
  49. package/src/output/console.ts +253 -214
  50. package/src/output/logger.ts +66 -52
  51. package/src/templates/run_gauntlet.template.md +18 -0
  52. package/src/templates/workflow.yml +77 -0
  53. package/src/utils/diff-parser.ts +64 -62
  54. package/src/utils/log-parser.ts +227 -206
  55. package/src/utils/sanitizer.ts +1 -1
@@ -1,72 +1,99 @@
1
- import type { Command } from 'commander';
2
- import chalk from 'chalk';
3
- import { loadConfig } from '../config/loader.js';
4
- import { ChangeDetector } from '../core/change-detector.js';
5
- import { EntryPointExpander } from '../core/entry-point.js';
6
- import { JobGenerator } from '../core/job.js';
7
- import { Runner } from '../core/runner.js';
8
- import { Logger } from '../output/logger.js';
9
- import { ConsoleReporter } from '../output/console.js';
10
- import { rotateLogs } from './shared.js';
1
+ import chalk from "chalk";
2
+ import type { Command } from "commander";
3
+ import { loadConfig } from "../config/loader.js";
4
+ import { ChangeDetector } from "../core/change-detector.js";
5
+ import { EntryPointExpander } from "../core/entry-point.js";
6
+ import { JobGenerator } from "../core/job.js";
7
+ import { Runner } from "../core/runner.js";
8
+ import { ConsoleReporter } from "../output/console.js";
9
+ import { Logger } from "../output/logger.js";
10
+ import { rotateLogs } from "./shared.js";
11
11
 
12
12
  export function registerReviewCommand(program: Command): void {
13
- program
14
- .command('review')
15
- .description('Run only applicable reviews for detected changes')
16
- .option('-g, --gate <name>', 'Run specific review gate only')
17
- .option('-c, --commit <sha>', 'Use diff for a specific commit')
18
- .option('-u, --uncommitted', 'Use diff for current uncommitted changes (staged and unstaged)')
19
- .action(async (options) => {
20
- try {
21
- const config = await loadConfig();
13
+ program
14
+ .command("review")
15
+ .description("Run only applicable reviews for detected changes")
16
+ .option(
17
+ "-b, --base-branch <branch>",
18
+ "Override base branch for change detection",
19
+ )
20
+ .option("-g, --gate <name>", "Run specific review gate only")
21
+ .option("-c, --commit <sha>", "Use diff for a specific commit")
22
+ .option(
23
+ "-u, --uncommitted",
24
+ "Use diff for current uncommitted changes (staged and unstaged)",
25
+ )
26
+ .action(async (options) => {
27
+ try {
28
+ const config = await loadConfig();
22
29
 
23
- // Rotate logs before starting
24
- await rotateLogs(config.project.log_dir);
30
+ // Rotate logs before starting
31
+ await rotateLogs(config.project.log_dir);
25
32
 
26
- const changeDetector = new ChangeDetector(config.project.base_branch, {
27
- commit: options.commit,
28
- uncommitted: options.uncommitted
29
- });
30
- const expander = new EntryPointExpander();
31
- const jobGen = new JobGenerator(config);
32
-
33
- console.log(chalk.dim('Detecting changes...'));
34
- const changes = await changeDetector.getChangedFiles();
35
-
36
- if (changes.length === 0) {
37
- console.log(chalk.green('No changes detected.'));
38
- process.exit(0);
39
- }
40
-
41
- console.log(chalk.dim(`Found ${changes.length} changed files.`));
33
+ // Determine effective base branch
34
+ // Priority: CLI override > CI env var > config
35
+ const effectiveBaseBranch =
36
+ options.baseBranch ||
37
+ (process.env.GITHUB_BASE_REF &&
38
+ (process.env.CI === "true" || process.env.GITHUB_ACTIONS === "true")
39
+ ? process.env.GITHUB_BASE_REF
40
+ : null) ||
41
+ config.project.base_branch;
42
42
 
43
- const entryPoints = await expander.expand(config.project.entry_points, changes);
44
- let jobs = jobGen.generateJobs(entryPoints);
43
+ const changeDetector = new ChangeDetector(effectiveBaseBranch, {
44
+ commit: options.commit,
45
+ uncommitted: options.uncommitted,
46
+ });
47
+ const expander = new EntryPointExpander();
48
+ const jobGen = new JobGenerator(config);
45
49
 
46
- // Filter to only reviews
47
- jobs = jobs.filter(j => j.type === 'review');
50
+ console.log(chalk.dim("Detecting changes..."));
51
+ const changes = await changeDetector.getChangedFiles();
48
52
 
49
- if (options.gate) {
50
- jobs = jobs.filter(j => j.name === options.gate);
51
- }
53
+ if (changes.length === 0) {
54
+ console.log(chalk.green("No changes detected."));
55
+ process.exit(0);
56
+ }
52
57
 
53
- if (jobs.length === 0) {
54
- console.log(chalk.yellow('No applicable reviews for these changes.'));
55
- process.exit(0);
56
- }
58
+ console.log(chalk.dim(`Found ${changes.length} changed files.`));
57
59
 
58
- console.log(chalk.dim(`Running ${jobs.length} review(s)...`));
60
+ const entryPoints = await expander.expand(
61
+ config.project.entry_points,
62
+ changes,
63
+ );
64
+ let jobs = jobGen.generateJobs(entryPoints);
59
65
 
60
- const logger = new Logger(config.project.log_dir);
61
- const reporter = new ConsoleReporter();
62
- const runner = new Runner(config, logger, reporter);
66
+ // Filter to only reviews
67
+ jobs = jobs.filter((j) => j.type === "review");
63
68
 
64
- const success = await runner.run(jobs);
65
- process.exit(success ? 0 : 1);
69
+ if (options.gate) {
70
+ jobs = jobs.filter((j) => j.name === options.gate);
71
+ }
66
72
 
67
- } catch (error: any) {
68
- console.error(chalk.red('Error:'), error.message);
69
- process.exit(1);
70
- }
71
- });
73
+ if (jobs.length === 0) {
74
+ console.log(chalk.yellow("No applicable reviews for these changes."));
75
+ process.exit(0);
76
+ }
77
+
78
+ console.log(chalk.dim(`Running ${jobs.length} review(s)...`));
79
+
80
+ const logger = new Logger(config.project.log_dir);
81
+ const reporter = new ConsoleReporter();
82
+ const runner = new Runner(
83
+ config,
84
+ logger,
85
+ reporter,
86
+ undefined,
87
+ undefined,
88
+ effectiveBaseBranch,
89
+ );
90
+
91
+ const success = await runner.run(jobs);
92
+ process.exit(success ? 0 : 1);
93
+ } catch (error: unknown) {
94
+ const err = error as { message?: string };
95
+ console.error(chalk.red("Error:"), err.message);
96
+ process.exit(1);
97
+ }
98
+ });
72
99
  }
@@ -1,25 +1,27 @@
1
- import { describe, it, expect, beforeEach } from 'bun:test';
2
- import { Command } from 'commander';
3
- import { registerRunCommand } from './run.js';
1
+ import { beforeEach, describe, expect, it } from "bun:test";
2
+ import { Command } from "commander";
3
+ import { registerRunCommand } from "./run.js";
4
4
 
5
- describe('Run Command', () => {
6
- let program: Command;
5
+ describe("Run Command", () => {
6
+ let program: Command;
7
7
 
8
- beforeEach(() => {
9
- program = new Command();
10
- registerRunCommand(program);
11
- });
8
+ beforeEach(() => {
9
+ program = new Command();
10
+ registerRunCommand(program);
11
+ });
12
12
 
13
- it('should register the run command', () => {
14
- const runCmd = program.commands.find(cmd => cmd.name() === 'run');
15
- expect(runCmd).toBeDefined();
16
- expect(runCmd?.description()).toBe('Run gates for detected changes');
17
- });
13
+ it("should register the run command", () => {
14
+ const runCmd = program.commands.find((cmd) => cmd.name() === "run");
15
+ expect(runCmd).toBeDefined();
16
+ expect(runCmd?.description()).toBe("Run gates for detected changes");
17
+ });
18
18
 
19
- it('should have correct options', () => {
20
- const runCmd = program.commands.find(cmd => cmd.name() === 'run');
21
- expect(runCmd?.options.some(opt => opt.long === '--gate')).toBe(true);
22
- expect(runCmd?.options.some(opt => opt.long === '--commit')).toBe(true);
23
- expect(runCmd?.options.some(opt => opt.long === '--uncommitted')).toBe(true);
24
- });
19
+ it("should have correct options", () => {
20
+ const runCmd = program.commands.find((cmd) => cmd.name() === "run");
21
+ expect(runCmd?.options.some((opt) => opt.long === "--gate")).toBe(true);
22
+ expect(runCmd?.options.some((opt) => opt.long === "--commit")).toBe(true);
23
+ expect(runCmd?.options.some((opt) => opt.long === "--uncommitted")).toBe(
24
+ true,
25
+ );
26
+ });
25
27
  });
@@ -1,69 +1,96 @@
1
- import type { Command } from 'commander';
2
- import chalk from 'chalk';
3
- import { loadConfig } from '../config/loader.js';
4
- import { ChangeDetector } from '../core/change-detector.js';
5
- import { EntryPointExpander } from '../core/entry-point.js';
6
- import { JobGenerator } from '../core/job.js';
7
- import { Runner } from '../core/runner.js';
8
- import { Logger } from '../output/logger.js';
9
- import { ConsoleReporter } from '../output/console.js';
10
- import { rotateLogs } from './shared.js';
1
+ import chalk from "chalk";
2
+ import type { Command } from "commander";
3
+ import { loadConfig } from "../config/loader.js";
4
+ import { ChangeDetector } from "../core/change-detector.js";
5
+ import { EntryPointExpander } from "../core/entry-point.js";
6
+ import { JobGenerator } from "../core/job.js";
7
+ import { Runner } from "../core/runner.js";
8
+ import { ConsoleReporter } from "../output/console.js";
9
+ import { Logger } from "../output/logger.js";
10
+ import { rotateLogs } from "./shared.js";
11
11
 
12
12
  export function registerRunCommand(program: Command): void {
13
- program
14
- .command('run')
15
- .description('Run gates for detected changes')
16
- .option('-g, --gate <name>', 'Run specific gate only')
17
- .option('-c, --commit <sha>', 'Use diff for a specific commit')
18
- .option('-u, --uncommitted', 'Use diff for current uncommitted changes (staged and unstaged)')
19
- .action(async (options) => {
20
- try {
21
- const config = await loadConfig();
22
-
23
- // Rotate logs before starting
24
- await rotateLogs(config.project.log_dir);
13
+ program
14
+ .command("run")
15
+ .description("Run gates for detected changes")
16
+ .option(
17
+ "-b, --base-branch <branch>",
18
+ "Override base branch for change detection",
19
+ )
20
+ .option("-g, --gate <name>", "Run specific gate only")
21
+ .option("-c, --commit <sha>", "Use diff for a specific commit")
22
+ .option(
23
+ "-u, --uncommitted",
24
+ "Use diff for current uncommitted changes (staged and unstaged)",
25
+ )
26
+ .action(async (options) => {
27
+ try {
28
+ const config = await loadConfig();
25
29
 
26
- const changeDetector = new ChangeDetector(config.project.base_branch, {
27
- commit: options.commit,
28
- uncommitted: options.uncommitted
29
- });
30
- const expander = new EntryPointExpander();
31
- const jobGen = new JobGenerator(config);
32
-
33
- console.log(chalk.dim('Detecting changes...'));
34
- const changes = await changeDetector.getChangedFiles();
35
-
36
- if (changes.length === 0) {
37
- console.log(chalk.green('No changes detected.'));
38
- process.exit(0);
39
- }
40
-
41
- console.log(chalk.dim(`Found ${changes.length} changed files.`));
30
+ // Rotate logs before starting
31
+ await rotateLogs(config.project.log_dir);
42
32
 
43
- const entryPoints = await expander.expand(config.project.entry_points, changes);
44
- let jobs = jobGen.generateJobs(entryPoints);
33
+ // Determine effective base branch
34
+ // Priority: CLI override > CI env var > config
35
+ const effectiveBaseBranch =
36
+ options.baseBranch ||
37
+ (process.env.GITHUB_BASE_REF &&
38
+ (process.env.CI === "true" || process.env.GITHUB_ACTIONS === "true")
39
+ ? process.env.GITHUB_BASE_REF
40
+ : null) ||
41
+ config.project.base_branch;
45
42
 
46
- if (options.gate) {
47
- jobs = jobs.filter(j => j.name === options.gate);
48
- }
43
+ const changeDetector = new ChangeDetector(effectiveBaseBranch, {
44
+ commit: options.commit,
45
+ uncommitted: options.uncommitted,
46
+ });
47
+ const expander = new EntryPointExpander();
48
+ const jobGen = new JobGenerator(config);
49
49
 
50
- if (jobs.length === 0) {
51
- console.log(chalk.yellow('No applicable gates for these changes.'));
52
- process.exit(0);
53
- }
50
+ console.log(chalk.dim("Detecting changes..."));
51
+ const changes = await changeDetector.getChangedFiles();
54
52
 
55
- console.log(chalk.dim(`Running ${jobs.length} gates...`));
53
+ if (changes.length === 0) {
54
+ console.log(chalk.green("No changes detected."));
55
+ process.exit(0);
56
+ }
56
57
 
57
- const logger = new Logger(config.project.log_dir);
58
- const reporter = new ConsoleReporter();
59
- const runner = new Runner(config, logger, reporter);
58
+ console.log(chalk.dim(`Found ${changes.length} changed files.`));
60
59
 
61
- const success = await runner.run(jobs);
62
- process.exit(success ? 0 : 1);
60
+ const entryPoints = await expander.expand(
61
+ config.project.entry_points,
62
+ changes,
63
+ );
64
+ let jobs = jobGen.generateJobs(entryPoints);
63
65
 
64
- } catch (error: any) {
65
- console.error(chalk.red('Error:'), error.message);
66
- process.exit(1);
67
- }
68
- });
66
+ if (options.gate) {
67
+ jobs = jobs.filter((j) => j.name === options.gate);
68
+ }
69
+
70
+ if (jobs.length === 0) {
71
+ console.log(chalk.yellow("No applicable gates for these changes."));
72
+ process.exit(0);
73
+ }
74
+
75
+ console.log(chalk.dim(`Running ${jobs.length} gates...`));
76
+
77
+ const logger = new Logger(config.project.log_dir);
78
+ const reporter = new ConsoleReporter();
79
+ const runner = new Runner(
80
+ config,
81
+ logger,
82
+ reporter,
83
+ undefined,
84
+ undefined,
85
+ effectiveBaseBranch,
86
+ );
87
+
88
+ const success = await runner.run(jobs);
89
+ process.exit(success ? 0 : 1);
90
+ } catch (error: unknown) {
91
+ const err = error as { message?: string };
92
+ console.error(chalk.red("Error:"), err.message);
93
+ process.exit(1);
94
+ }
95
+ });
69
96
  }
@@ -1,44 +1,53 @@
1
- import fs from 'node:fs/promises';
2
- import path from 'node:path';
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
3
 
4
4
  export async function exists(path: string): Promise<boolean> {
5
- try {
6
- await fs.stat(path);
7
- return true;
8
- } catch {
9
- return false;
10
- }
5
+ try {
6
+ await fs.stat(path);
7
+ return true;
8
+ } catch {
9
+ return false;
10
+ }
11
11
  }
12
12
 
13
13
  export async function rotateLogs(logDir: string): Promise<void> {
14
- const previousDir = path.join(logDir, 'previous');
14
+ const previousDir = path.join(logDir, "previous");
15
15
 
16
- try {
17
- // 1. Ensure logDir exists (if not, nothing to rotate, but we should create it for future use if needed,
18
- // though usually the logger creates it. If it doesn't exist, we can just return).
19
- if (!(await exists(logDir))) {
20
- return;
21
- }
16
+ try {
17
+ // 1. Ensure logDir exists (if not, nothing to rotate, but we should create it for future use if needed,
18
+ // though usually the logger creates it. If it doesn't exist, we can just return).
19
+ if (!(await exists(logDir))) {
20
+ return;
21
+ }
22
22
 
23
- // 2. Clear .gauntlet_logs/previous if it exists
24
- if (await exists(previousDir)) {
25
- const previousFiles = await fs.readdir(previousDir);
26
- await Promise.all(
27
- previousFiles.map(file => fs.rm(path.join(previousDir, file), { recursive: true, force: true }))
28
- );
29
- } else {
30
- await fs.mkdir(previousDir, { recursive: true });
31
- }
23
+ // 2. Clear .gauntlet_logs/previous if it exists
24
+ if (await exists(previousDir)) {
25
+ const previousFiles = await fs.readdir(previousDir);
26
+ await Promise.all(
27
+ previousFiles.map((file) =>
28
+ fs.rm(path.join(previousDir, file), { recursive: true, force: true }),
29
+ ),
30
+ );
31
+ } else {
32
+ await fs.mkdir(previousDir, { recursive: true });
33
+ }
32
34
 
33
- // 3. Move all existing files in .gauntlet_logs/ to .gauntlet_logs/previous
34
- const files = await fs.readdir(logDir);
35
- await Promise.all(
36
- files
37
- .filter(file => file !== 'previous')
38
- .map(file => fs.rename(path.join(logDir, file), path.join(previousDir, file)))
39
- );
40
- } catch (error) {
41
- // Log warning but don't crash the run as log rotation failure isn't critical
42
- console.warn(`Failed to rotate logs in ${logDir}:`, error instanceof Error ? error.message : error);
43
- }
35
+ // 3. Move all existing files in .gauntlet_logs/ to .gauntlet_logs/previous
36
+ const files = await fs.readdir(logDir);
37
+ await Promise.all(
38
+ files
39
+ .filter((file) => file !== "previous")
40
+ .map((file) =>
41
+ fs.rename(path.join(logDir, file), path.join(previousDir, file)),
42
+ ),
43
+ );
44
+ } catch (error) {
45
+ // Log warning but don't crash the run as log rotation failure isn't critical
46
+ console.warn(
47
+ "Failed to rotate logs in",
48
+ logDir,
49
+ ":",
50
+ error instanceof Error ? error.message : error,
51
+ );
52
+ }
44
53
  }
@@ -0,0 +1,33 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import YAML from "yaml";
4
+ import { ciConfigSchema } from "./ci-schema.js";
5
+ import type { CIConfig } from "./types.js";
6
+
7
+ const GAUNTLET_DIR = ".gauntlet";
8
+ const CI_FILE = "ci.yml";
9
+
10
+ export async function loadCIConfig(
11
+ rootDir: string = process.cwd(),
12
+ ): Promise<CIConfig> {
13
+ const ciPath = path.join(rootDir, GAUNTLET_DIR, CI_FILE);
14
+
15
+ if (!(await fileExists(ciPath))) {
16
+ throw new Error(
17
+ `CI configuration file not found at ${ciPath}. Run 'agent-gauntlet ci init' to create it.`,
18
+ );
19
+ }
20
+
21
+ const content = await fs.readFile(ciPath, "utf-8");
22
+ const raw = YAML.parse(content);
23
+ return ciConfigSchema.parse(raw);
24
+ }
25
+
26
+ async function fileExists(path: string): Promise<boolean> {
27
+ try {
28
+ const stat = await fs.stat(path);
29
+ return stat.isFile();
30
+ } catch {
31
+ return false;
32
+ }
33
+ }
@@ -0,0 +1,52 @@
1
+ import { z } from "zod";
2
+
3
+ export const runtimeConfigSchema = z.record(
4
+ z.string(),
5
+ z
6
+ .object({
7
+ version: z.string().min(1),
8
+ bundler_cache: z.boolean().optional(),
9
+ })
10
+ .passthrough(),
11
+ );
12
+
13
+ export const serviceConfigSchema = z.record(
14
+ z.string(),
15
+ z
16
+ .object({
17
+ image: z.string().min(1),
18
+ env: z.record(z.string()).optional(),
19
+ ports: z.array(z.string()).optional(),
20
+ options: z.string().optional(),
21
+ health_check: z
22
+ .object({
23
+ cmd: z.string().optional(),
24
+ interval: z.string().optional(),
25
+ timeout: z.string().optional(),
26
+ retries: z.number().optional(),
27
+ })
28
+ .optional(),
29
+ })
30
+ .passthrough(),
31
+ );
32
+
33
+ export const ciSetupStepSchema = z.object({
34
+ name: z.string().min(1),
35
+ run: z.string().min(1),
36
+ working_directory: z.string().optional(),
37
+ if: z.string().optional(),
38
+ });
39
+
40
+ export const ciCheckConfigSchema = z.object({
41
+ name: z.string().min(1),
42
+ requires_runtimes: z.array(z.string()).optional(),
43
+ requires_services: z.array(z.string()).optional(),
44
+ setup: z.array(ciSetupStepSchema).optional(),
45
+ });
46
+
47
+ export const ciConfigSchema = z.object({
48
+ runtimes: runtimeConfigSchema.nullable().optional(),
49
+ services: serviceConfigSchema.nullable().optional(),
50
+ setup: z.array(ciSetupStepSchema).nullable().optional(),
51
+ checks: z.array(ciCheckConfigSchema).nullable().optional(),
52
+ });