agent-gauntlet 0.2.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +3 -3
  2. package/package.json +1 -1
  3. package/src/cli-adapters/claude.ts +13 -1
  4. package/src/cli-adapters/gemini.ts +17 -2
  5. package/src/commands/check.ts +108 -12
  6. package/src/commands/ci/list-jobs.ts +3 -2
  7. package/src/commands/clean.ts +29 -0
  8. package/src/commands/help.ts +1 -1
  9. package/src/commands/index.ts +2 -1
  10. package/src/commands/init.ts +4 -4
  11. package/src/commands/review.ts +108 -12
  12. package/src/commands/run.ts +109 -12
  13. package/src/commands/shared.ts +56 -10
  14. package/src/commands/validate.ts +20 -0
  15. package/src/config/schema.ts +5 -0
  16. package/src/config/validator.ts +6 -13
  17. package/src/core/change-detector.ts +1 -0
  18. package/src/core/entry-point.ts +48 -7
  19. package/src/core/runner.ts +90 -56
  20. package/src/gates/result.ts +32 -0
  21. package/src/gates/review.ts +428 -162
  22. package/src/index.ts +4 -2
  23. package/src/output/console-log.ts +146 -0
  24. package/src/output/console.ts +103 -9
  25. package/src/output/logger.ts +52 -8
  26. package/src/templates/run_gauntlet.template.md +20 -13
  27. package/src/utils/log-parser.ts +498 -162
  28. package/src/utils/session-ref.ts +82 -0
  29. package/src/commands/check.test.ts +0 -29
  30. package/src/commands/detect.test.ts +0 -43
  31. package/src/commands/health.test.ts +0 -93
  32. package/src/commands/help.test.ts +0 -44
  33. package/src/commands/init.test.ts +0 -130
  34. package/src/commands/list.test.ts +0 -121
  35. package/src/commands/rerun.ts +0 -160
  36. package/src/commands/review.test.ts +0 -31
  37. package/src/commands/run.test.ts +0 -27
  38. package/src/config/loader.test.ts +0 -151
  39. package/src/core/entry-point.test.ts +0 -61
  40. package/src/gates/review.test.ts +0 -291
@@ -1,31 +0,0 @@
1
- import { beforeEach, describe, expect, it } from "bun:test";
2
- import { Command } from "commander";
3
- import { registerReviewCommand } from "./review.js";
4
-
5
- describe("Review Command", () => {
6
- let program: Command;
7
-
8
- beforeEach(() => {
9
- program = new Command();
10
- registerReviewCommand(program);
11
- });
12
-
13
- it("should register the review command", () => {
14
- const reviewCmd = program.commands.find((cmd) => cmd.name() === "review");
15
- expect(reviewCmd).toBeDefined();
16
- expect(reviewCmd?.description()).toBe(
17
- "Run only applicable reviews for detected changes",
18
- );
19
- });
20
-
21
- it("should have correct options", () => {
22
- const reviewCmd = program.commands.find((cmd) => cmd.name() === "review");
23
- expect(reviewCmd?.options.some((opt) => opt.long === "--gate")).toBe(true);
24
- expect(reviewCmd?.options.some((opt) => opt.long === "--commit")).toBe(
25
- true,
26
- );
27
- expect(reviewCmd?.options.some((opt) => opt.long === "--uncommitted")).toBe(
28
- true,
29
- );
30
- });
31
- });
@@ -1,27 +0,0 @@
1
- import { beforeEach, describe, expect, it } from "bun:test";
2
- import { Command } from "commander";
3
- import { registerRunCommand } from "./run.js";
4
-
5
- describe("Run Command", () => {
6
- let program: Command;
7
-
8
- beforeEach(() => {
9
- program = new Command();
10
- registerRunCommand(program);
11
- });
12
-
13
- it("should register the run command", () => {
14
- const runCmd = program.commands.find((cmd) => cmd.name() === "run");
15
- expect(runCmd).toBeDefined();
16
- expect(runCmd?.description()).toBe("Run gates for detected changes");
17
- });
18
-
19
- it("should have correct options", () => {
20
- const runCmd = program.commands.find((cmd) => cmd.name() === "run");
21
- expect(runCmd?.options.some((opt) => opt.long === "--gate")).toBe(true);
22
- expect(runCmd?.options.some((opt) => opt.long === "--commit")).toBe(true);
23
- expect(runCmd?.options.some((opt) => opt.long === "--uncommitted")).toBe(
24
- true,
25
- );
26
- });
27
- });
@@ -1,151 +0,0 @@
1
- import { afterAll, beforeAll, describe, expect, it } from "bun:test";
2
- import fs from "node:fs/promises";
3
- import path from "node:path";
4
- import { loadConfig } from "./loader.js";
5
-
6
- const TEST_DIR = path.join(process.cwd(), `test-env-${Date.now()}`);
7
- const GAUNTLET_DIR = path.join(TEST_DIR, ".gauntlet");
8
- const CHECKS_DIR = path.join(GAUNTLET_DIR, "checks");
9
- const REVIEWS_DIR = path.join(GAUNTLET_DIR, "reviews");
10
-
11
- describe("Config Loader", () => {
12
- beforeAll(async () => {
13
- // Setup directory structure
14
- await fs.mkdir(TEST_DIR);
15
- await fs.mkdir(GAUNTLET_DIR);
16
- await fs.mkdir(CHECKS_DIR);
17
- await fs.mkdir(REVIEWS_DIR);
18
-
19
- // Write config.yml
20
- await fs.writeFile(
21
- path.join(GAUNTLET_DIR, "config.yml"),
22
- `
23
- base_branch: origin/dev
24
- log_dir: test_logs
25
- cli:
26
- default_preference:
27
- - claude
28
- - gemini
29
- check_usage_limit: false
30
- entry_points:
31
- - path: src/
32
- checks:
33
- - lint
34
- reviews:
35
- - security
36
- `,
37
- );
38
-
39
- // Write a check definition
40
- await fs.writeFile(
41
- path.join(CHECKS_DIR, "lint.yml"),
42
- `
43
- name: lint
44
- command: npm run lint
45
- working_directory: .
46
- `,
47
- );
48
-
49
- // Write a review definition
50
- await fs.writeFile(
51
- path.join(REVIEWS_DIR, "security.md"),
52
- `---
53
- cli_preference:
54
- - gemini
55
- ---
56
-
57
- # Security Review
58
- Check for vulnerabilities.
59
- `,
60
- );
61
-
62
- // Write a review definition without preference
63
- await fs.writeFile(
64
- path.join(REVIEWS_DIR, "style.md"),
65
- `---
66
- num_reviews: 1
67
- ---
68
-
69
- # Style Review
70
- Check style.
71
- `,
72
- );
73
- });
74
-
75
- afterAll(async () => {
76
- // Cleanup
77
- await fs.rm(TEST_DIR, { recursive: true, force: true });
78
- });
79
-
80
- it("should load project configuration correctly", async () => {
81
- const config = await loadConfig(TEST_DIR);
82
-
83
- expect(config.project.base_branch).toBe("origin/dev");
84
- expect(config.project.log_dir).toBe("test_logs");
85
- expect(config.project.entry_points).toHaveLength(1);
86
- expect(config.project.entry_points[0].path).toBe("src/");
87
- });
88
-
89
- it("should load check gates correctly", async () => {
90
- const config = await loadConfig(TEST_DIR);
91
-
92
- expect(Object.keys(config.checks)).toContain("lint");
93
- expect(config.checks.lint.command).toBe("npm run lint");
94
- });
95
-
96
- it("should load review gates correctly", async () => {
97
- const config = await loadConfig(TEST_DIR);
98
-
99
- expect(Object.keys(config.reviews)).toContain("security");
100
- expect(config.reviews.security.name).toBe("security");
101
- expect(config.reviews.security.cli_preference).toEqual(["gemini"]);
102
- expect(config.reviews.security.promptContent).toContain(
103
- "Check for vulnerabilities.",
104
- );
105
- });
106
-
107
- it("should merge default cli preference", async () => {
108
- const config = await loadConfig(TEST_DIR);
109
-
110
- expect(Object.keys(config.reviews)).toContain("style");
111
- expect(config.reviews.style.cli_preference).toEqual(["claude", "gemini"]);
112
- });
113
-
114
- it("should reject check gate with fail_fast when parallel is true", async () => {
115
- await fs.writeFile(
116
- path.join(CHECKS_DIR, "invalid.yml"),
117
- `
118
- name: invalid
119
- command: echo test
120
- parallel: true
121
- fail_fast: true
122
- `,
123
- );
124
-
125
- await expect(loadConfig(TEST_DIR)).rejects.toThrow(
126
- /fail_fast can only be used when parallel is false/,
127
- );
128
- });
129
-
130
- it("should accept check gate with fail_fast when parallel is false", async () => {
131
- // Clean up the invalid file first
132
- try {
133
- await fs.unlink(path.join(CHECKS_DIR, "invalid.yml"));
134
- } catch {}
135
-
136
- await fs.writeFile(
137
- path.join(CHECKS_DIR, "valid.yml"),
138
- `
139
- name: valid
140
- command: echo test
141
- parallel: false
142
- fail_fast: true
143
- `,
144
- );
145
-
146
- const config = await loadConfig(TEST_DIR);
147
- expect(config.checks.valid).toBeDefined();
148
- expect(config.checks.valid.fail_fast).toBe(true);
149
- expect(config.checks.valid.parallel).toBe(false);
150
- });
151
- });
@@ -1,61 +0,0 @@
1
- import { describe, expect, it } from "bun:test";
2
- import type { EntryPointConfig } from "../config/types.js";
3
- import { EntryPointExpander } from "./entry-point.js";
4
-
5
- describe("EntryPointExpander", () => {
6
- const expander = new EntryPointExpander();
7
-
8
- it("should include root entry point if there are any changes", async () => {
9
- const entryPoints: EntryPointConfig[] = [{ path: "." }];
10
- const changes = ["some/file.ts"];
11
-
12
- const result = await expander.expand(entryPoints, changes);
13
-
14
- expect(result).toHaveLength(1);
15
- expect(result[0].path).toBe(".");
16
- });
17
-
18
- it("should match fixed directory entry points", async () => {
19
- const entryPoints: EntryPointConfig[] = [
20
- { path: "apps/api" },
21
- { path: "apps/web" },
22
- ];
23
- const changes = ["apps/api/src/index.ts"];
24
-
25
- const result = await expander.expand(entryPoints, changes);
26
-
27
- // Result should have root (implicit or explicit fallback in code) + matched
28
- // Looking at code: "if (changedFiles.length > 0) ... results.push({ path: '.', ... })"
29
- // Wait, the code creates a default root config if one isn't provided in the list?
30
- // Code: "const rootConfig = rootEntryPoint ?? { path: '.' }; results.push({ path: '.', config: rootConfig });"
31
- // Yes, it always pushes root if changes > 0.
32
-
33
- expect(result.some((r) => r.path === "apps/api")).toBe(true);
34
- expect(result.some((r) => r.path === "apps/web")).toBe(false);
35
- });
36
-
37
- it("should match wildcard entry points", async () => {
38
- const entryPoints: EntryPointConfig[] = [{ path: "packages/*" }];
39
- const changes = [
40
- "packages/ui/button.ts",
41
- "packages/utils/helper.ts",
42
- "other/file.ts",
43
- ];
44
-
45
- const result = await expander.expand(entryPoints, changes);
46
-
47
- const paths = result.map((r) => r.path);
48
- expect(paths).toContain("packages/ui");
49
- expect(paths).toContain("packages/utils");
50
- expect(paths).not.toContain("packages/other");
51
- });
52
-
53
- it("should handle no changes", async () => {
54
- const entryPoints: EntryPointConfig[] = [{ path: "." }];
55
- const changes: string[] = [];
56
-
57
- const result = await expander.expand(entryPoints, changes);
58
-
59
- expect(result).toHaveLength(0);
60
- });
61
- });
@@ -1,291 +0,0 @@
1
- import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test";
2
- import fs from "node:fs/promises";
3
- import path from "node:path";
4
- import type { CLIAdapter } from "../cli-adapters/index.js";
5
- import type {
6
- ReviewGateConfig,
7
- ReviewPromptFrontmatter,
8
- } from "../config/types.js";
9
- import { Logger } from "../output/logger.js";
10
- import type { ReviewGateExecutor } from "./review.js";
11
-
12
- const TEST_DIR = path.join(process.cwd(), `test-review-logs-${Date.now()}`);
13
-
14
- describe("ReviewGateExecutor Logging", () => {
15
- let logger: Logger;
16
- let executor: ReviewGateExecutor;
17
- let originalCI: string | undefined;
18
- let originalGithubActions: string | undefined;
19
- let originalCwd: string;
20
-
21
- beforeEach(async () => {
22
- await fs.mkdir(TEST_DIR, { recursive: true });
23
-
24
- // Save and disable CI mode for this test to avoid complex git ref issues
25
- originalCI = process.env.CI;
26
- originalGithubActions = process.env.GITHUB_ACTIONS;
27
- originalCwd = process.cwd();
28
- delete process.env.CI;
29
- delete process.env.GITHUB_ACTIONS;
30
-
31
- // Change to test directory with its own git repo to avoid issues with the main repo
32
- process.chdir(TEST_DIR);
33
- // Initialize a minimal git repo for the test
34
- const { exec } = await import("node:child_process");
35
- const { promisify } = await import("node:util");
36
- const execAsync = promisify(exec);
37
- await execAsync("git init");
38
- await execAsync('git config user.email "test@test.com"');
39
- await execAsync('git config user.name "Test"');
40
- // Create an initial commit so we have a history
41
- await fs.writeFile("test.txt", "initial");
42
- await execAsync("git add test.txt");
43
- await execAsync('git commit -m "initial"');
44
- // Create a "main" branch
45
- await execAsync("git branch -M main");
46
- // Create src directory for our test
47
- await fs.mkdir("src", { recursive: true });
48
- await fs.writeFile("src/test.ts", "test content");
49
- await execAsync("git add src/test.ts");
50
- await execAsync('git commit -m "add src"');
51
-
52
- // Make uncommitted changes so the diff isn't empty
53
- await fs.writeFile("src/test.ts", "modified test content");
54
-
55
- // Now create the log directory and logger in the test directory
56
- await fs.mkdir("logs", { recursive: true });
57
- logger = new Logger(path.join(process.cwd(), "logs"));
58
-
59
- // Create a factory function for mock adapters that returns the correct name
60
- const createMockAdapter = (name: string): CLIAdapter =>
61
- ({
62
- name,
63
- isAvailable: async () => true,
64
- checkHealth: async () => ({ status: "healthy" }),
65
- // execute returns the raw string output from the LLM, which is then parsed by the executor.
66
- // The real adapter returns a string. In this test, we return a JSON string to simulate
67
- // the LLM returning structured data. This IS intentional and matches the expected contract
68
- // where execute() -> Promise<string>.
69
- execute: async () => {
70
- await new Promise((r) => setTimeout(r, 1)); // Simulate async work
71
- return JSON.stringify({ status: "pass", message: "OK" });
72
- },
73
- getProjectCommandDir: () => null,
74
- getUserCommandDir: () => null,
75
- getCommandExtension: () => "md",
76
- canUseSymlink: () => false,
77
- transformCommand: (c: string) => c,
78
- }) as unknown as CLIAdapter;
79
-
80
- // Mock getAdapter and other exports that may be imported by other modules
81
- mock.module("../cli-adapters/index.js", () => ({
82
- getAdapter: (name: string) => createMockAdapter(name),
83
- getAllAdapters: () => [
84
- createMockAdapter("codex"),
85
- createMockAdapter("claude"),
86
- ],
87
- getProjectCommandAdapters: () => [
88
- createMockAdapter("codex"),
89
- createMockAdapter("claude"),
90
- ],
91
- getUserCommandAdapters: () => [
92
- createMockAdapter("codex"),
93
- createMockAdapter("claude"),
94
- ],
95
- getValidCLITools: () => ["codex", "claude", "gemini"],
96
- }));
97
-
98
- const { ReviewGateExecutor } = await import("./review.js");
99
- executor = new ReviewGateExecutor();
100
- });
101
-
102
- afterEach(async () => {
103
- // Restore working directory first
104
- process.chdir(originalCwd);
105
-
106
- await fs.rm(TEST_DIR, { recursive: true, force: true });
107
- mock.restore();
108
-
109
- // Restore CI env vars
110
- if (originalCI !== undefined) {
111
- process.env.CI = originalCI;
112
- }
113
- if (originalGithubActions !== undefined) {
114
- process.env.GITHUB_ACTIONS = originalGithubActions;
115
- }
116
- });
117
-
118
- it("should only create adapter-specific logs and no generic log", async () => {
119
- const jobId = "review:src:code-quality";
120
- const config: ReviewGateConfig & ReviewPromptFrontmatter = {
121
- name: "code-quality",
122
- cli_preference: ["codex", "claude"],
123
- num_reviews: 2,
124
- };
125
-
126
- const loggerFactory = logger.createLoggerFactory(jobId);
127
-
128
- // We need to mock getDiff since it uses execAsync which we mocked
129
- // Actually ReviewGateExecutor is a class, we can mock its private method if needed
130
- // or just let it run if the mock promisify works.
131
-
132
- const result = await executor.execute(
133
- jobId,
134
- config,
135
- "src/",
136
- loggerFactory,
137
- "main",
138
- );
139
-
140
- // Enhanced error messages for better debugging
141
- if (result.status !== "pass") {
142
- throw new Error(
143
- `Expected result.status to be "pass" but got "${result.status}". Message: ${result.message || "none"}. Duration: ${result.duration}ms`,
144
- );
145
- }
146
-
147
- if (!result.logPaths) {
148
- throw new Error(
149
- `Expected result.logPaths to be defined but got ${JSON.stringify(result.logPaths)}`,
150
- );
151
- }
152
-
153
- if (result.logPaths.length !== 2) {
154
- throw new Error(
155
- `Expected result.logPaths to have length 2 but got ${result.logPaths.length}. Paths: ${JSON.stringify(result.logPaths)}`,
156
- );
157
- }
158
-
159
- if (!result.logPaths[0]?.includes("review_src_code-quality_codex.log")) {
160
- throw new Error(
161
- `Expected result.logPaths[0] to contain "review_src_code-quality_codex.log" but got "${result.logPaths[0]}"`,
162
- );
163
- }
164
-
165
- if (!result.logPaths[1]?.includes("review_src_code-quality_claude.log")) {
166
- throw new Error(
167
- `Expected result.logPaths[1] to contain "review_src_code-quality_claude.log" but got "${result.logPaths[1]}"`,
168
- );
169
- }
170
-
171
- const files = await fs.readdir("logs");
172
- const filesList = files.join(", ");
173
-
174
- if (!files.includes("review_src_code-quality_codex.log")) {
175
- throw new Error(
176
- `Expected log directory to contain "review_src_code-quality_codex.log" but only found: [${filesList}]`,
177
- );
178
- }
179
-
180
- if (!files.includes("review_src_code-quality_claude.log")) {
181
- throw new Error(
182
- `Expected log directory to contain "review_src_code-quality_claude.log" but only found: [${filesList}]`,
183
- );
184
- }
185
-
186
- if (files.includes("review_src_code-quality.log")) {
187
- throw new Error(
188
- `Expected log directory NOT to contain generic log "review_src_code-quality.log" but it was found. All files: [${filesList}]`,
189
- );
190
- }
191
-
192
- // Verify multiplexed content
193
- const codexLog = await fs.readFile(
194
- "logs/review_src_code-quality_codex.log",
195
- "utf-8",
196
- );
197
- if (!codexLog.includes("Starting review: code-quality")) {
198
- throw new Error(
199
- `Expected codex log to contain "Starting review: code-quality" but got: ${codexLog.substring(0, 200)}...`,
200
- );
201
- }
202
- if (!codexLog.includes("Review result (codex): pass")) {
203
- throw new Error(
204
- `Expected codex log to contain "Review result (codex): pass" but got: ${codexLog.substring(0, 200)}...`,
205
- );
206
- }
207
-
208
- const claudeLog = await fs.readFile(
209
- "logs/review_src_code-quality_claude.log",
210
- "utf-8",
211
- );
212
- if (!claudeLog.includes("Starting review: code-quality")) {
213
- throw new Error(
214
- `Expected claude log to contain "Starting review: code-quality" but got: ${claudeLog.substring(0, 200)}...`,
215
- );
216
- }
217
- if (!claudeLog.includes("Review result (claude): pass")) {
218
- throw new Error(
219
- `Expected claude log to contain "Review result (claude): pass" but got: ${claudeLog.substring(0, 200)}...`,
220
- );
221
- }
222
- });
223
-
224
- it("should be handled correctly by ConsoleReporter", async () => {
225
- const jobId = "review:src:code-quality";
226
- const codexPath = "logs/review_src_code-quality_codex.log";
227
- const claudePath = "logs/review_src_code-quality_claude.log";
228
-
229
- await fs.writeFile(
230
- codexPath,
231
- `
232
- [2026-01-14T10:00:00.000Z] Starting review: code-quality
233
- --- Parsed Result (codex) ---
234
- Status: FAIL
235
- Violations:
236
- 1. src/index.ts:10 - Security risk
237
- Fix: Use a safer method
238
- `,
239
- );
240
-
241
- await fs.writeFile(
242
- claudePath,
243
- `
244
- [2026-01-14T10:00:00.000Z] Starting review: code-quality
245
- --- Parsed Result (claude) ---
246
- Status: FAIL
247
- Violations:
248
- 1. src/main.ts:20 - Style issue
249
- Fix: Rename variable
250
- `,
251
- );
252
-
253
- const result = {
254
- jobId,
255
- status: "fail" as const,
256
- duration: 1000,
257
- message: "Found violations",
258
- logPaths: [codexPath, claudePath],
259
- };
260
-
261
- const { ConsoleReporter } = await import("../output/console.js");
262
- const reporter = new ConsoleReporter();
263
-
264
- // We can access extractFailureDetails directly as it is public
265
- const details = await reporter.extractFailureDetails(result);
266
-
267
- // Check for presence of key information rather than exact counts
268
- expect(
269
- details.some(
270
- (d: string) =>
271
- d.includes("src/index.ts") &&
272
- d.includes("10") &&
273
- d.includes("Security risk"),
274
- ),
275
- ).toBe(true);
276
- expect(details.some((d: string) => d.includes("Use a safer method"))).toBe(
277
- true,
278
- );
279
- expect(
280
- details.some(
281
- (d: string) =>
282
- d.includes("src/main.ts") &&
283
- d.includes("20") &&
284
- d.includes("Style issue"),
285
- ),
286
- ).toBe(true);
287
- expect(details.some((d: string) => d.includes("Rename variable"))).toBe(
288
- true,
289
- );
290
- });
291
- });