agent-gauntlet 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -51
- package/package.json +8 -2
- package/src/commands/detect.ts +9 -8
- package/src/commands/init.ts +11 -1
- package/src/commands/shared.ts +7 -0
- package/src/commands/stop-hook.ts +488 -202
- package/src/config/global.ts +5 -3
- package/src/config/loader.ts +149 -53
- package/src/config/schema.ts +90 -22
- package/src/config/stop-hook-config.ts +93 -0
- package/src/config/types.ts +20 -1
- package/src/config/validator.ts +186 -147
- package/src/core/job.ts +9 -8
- package/src/core/run-executor.ts +372 -180
- package/src/core/runner.ts +139 -51
- package/src/gates/check.ts +20 -15
- package/src/gates/result.ts +1 -0
- package/src/gates/review.ts +29 -36
- package/src/index.ts +0 -0
- package/src/output/app-logger.ts +214 -0
- package/src/output/console.ts +18 -16
- package/src/output/sinks/console-sink.ts +59 -0
- package/src/output/sinks/file-sink.ts +110 -0
- package/src/types/gauntlet-status.ts +16 -1
- package/src/utils/debug-log.ts +81 -2
- package/src/utils/log-parser.ts +8 -5
- package/src/utils/session-ref.ts +5 -3
package/README.md
CHANGED
|
@@ -1,4 +1,11 @@
|
|
|
1
|
-
|
|
1
|
+

|
|
2
|
+
|
|
3
|
+
[](https://github.com/pacaplan/agent-gauntlet/actions/workflows/gauntlet.yml)
|
|
4
|
+
[](https://www.npmjs.com/package/agent-gauntlet)
|
|
5
|
+
[](https://www.npmjs.com/package/agent-gauntlet)
|
|
6
|
+
[](https://www.typescriptlang.org/)
|
|
7
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
8
|
+
[](https://coderabbit.ai)
|
|
2
9
|
|
|
3
10
|
> Don't just review the agent's code — put it through the gauntlet.
|
|
4
11
|
|
|
@@ -15,63 +22,29 @@ For AI reviews, it uses the CLI tool of your choice: Gemini, Codex, Claude Code,
|
|
|
15
22
|
- **Leverage existing subscriptions**: Agent Gauntlet is *free* and tool-agnostic, leveraging the AI CLI tools you already have installed.
|
|
16
23
|
- **Easy CI setup**: Define your checks once, run them locally and in GitHub.
|
|
17
24
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
Agent Gauntlet supports three primary usage patterns, each suited for different development workflows:
|
|
21
|
-
1. Run CLI: `agent-gauntlet run`
|
|
22
|
-
2. Run agent command: `/gauntlet`
|
|
23
|
-
3. Automatically run after agent completes task
|
|
24
|
-
|
|
25
|
-
The use cases below illustrate when each of these patterns may be used.
|
|
26
|
-
|
|
27
|
-
### 1. Planning Mode
|
|
28
|
-
|
|
29
|
-
**Use case:** Generate and review high-level implementation plans before coding.
|
|
30
|
-
|
|
31
|
-
**Problem Gauntlet solves:** Catch architectural issues and requirement misunderstandings before coding to avoid costly rework.
|
|
32
|
-
|
|
33
|
-
**Workflow:**
|
|
34
|
-
|
|
35
|
-
1. Create a plan document in your project directory
|
|
36
|
-
2. Run `agent-gauntlet run` from the terminal
|
|
37
|
-
3. Gauntlet detects the new or modified plan and invokes configured AI CLIs to review it
|
|
38
|
-
4. *(Optional)* Ask your assistant to refine the plan based on review feedback
|
|
39
|
-
|
|
40
|
-
**Note:** Review configuration and prompts are fully customizable. Example prompt: *"Review this plan for completeness and potential issues."*
|
|
41
|
-
|
|
42
|
-
### 2. AI-Assisted Development
|
|
43
|
-
|
|
44
|
-
**Use case:** Pair with an AI coding assistant to implement features with continuous quality checks.
|
|
45
|
-
|
|
46
|
-
**Problem Gauntlet solves:** Catch AI-introduced bugs and quality issues through automated checks and multi-LLM review.
|
|
47
|
-
|
|
48
|
-
**Workflow:**
|
|
25
|
+
### vs AI Code Review Tools
|
|
49
26
|
|
|
50
|
-
|
|
51
|
-
2. Run `/gauntlet` from chat
|
|
52
|
-
3. Gauntlet detects changed files and runs configured checks (linter, tests, type checking, etc.)
|
|
53
|
-
4. Simultaneously, Gauntlet invokes AI CLIs for code review
|
|
54
|
-
5. Assistant reviews results, fixes identified issues, and runs `agent-gauntlet run` again
|
|
55
|
-
6. Gauntlet detects existing logs, switches to verification mode, and checks fixes
|
|
56
|
-
7. Process repeats automatically (up to 3 iterations) until all gates pass
|
|
27
|
+
Unlike traditional code review tools designed for PR workflows, Agent Gauntlet provides real-time feedback loops for autonomous coding agents.
|
|
57
28
|
|
|
58
|
-
|
|
29
|
+
| Use Case | Recommended |
|
|
30
|
+
| :--- | :--- |
|
|
31
|
+
| Autonomous agent development | **Agent Gauntlet** |
|
|
32
|
+
| Traditional PR review with human reviewers | Other tools |
|
|
33
|
+
| IDE-integrated review while coding | Other tools |
|
|
34
|
+
| Enterprise with strict compliance requirements | Other tools |
|
|
35
|
+
| Budget-conscious teams with existing AI CLI tools | **Agent Gauntlet** |
|
|
59
36
|
|
|
60
|
-
|
|
37
|
+
[Full comparison →](docs/feature_comparison.md)
|
|
61
38
|
|
|
62
|
-
|
|
39
|
+
## Common Workflows
|
|
63
40
|
|
|
64
|
-
|
|
41
|
+
Agent Gauntlet supports three workflows, ranging from simple CLI execution to fully autonomous agentic integration:
|
|
65
42
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
- **Git hooks:** Use pre-commit hooks to trigger gauntlet
|
|
70
|
-
- **Agent hooks:** Leverage platform features (e.g., Claude's Stop event)
|
|
71
|
-
2. Assign the task to your agent and step away
|
|
72
|
-
3. When you return: the task is complete, reviewed by a different LLM, all issues fixed, and CI checks passing
|
|
43
|
+
- **CLI Mode** — Run checks via command line; ideal for CI pipelines and scripts.
|
|
44
|
+
- **Assistant Mode** — AI assistant runs validation loop, fixing issues iteratively.
|
|
45
|
+
- **Agentic Mode** — Autonomous agent validates and fixes in real-time via stop hook.
|
|
73
46
|
|
|
74
|
-
|
|
47
|
+

|
|
75
48
|
|
|
76
49
|
## Quick Start
|
|
77
50
|
|
|
@@ -88,4 +61,5 @@ For basic usage and configuration guide, see the [Quick Start Guide](docs/quick-
|
|
|
88
61
|
- [Configuration Reference](docs/config-reference.md) — all configuration fields + defaults
|
|
89
62
|
- [Stop Hook Guide](docs/stop-hook-guide.md) — integrate with Claude Code's stop hook
|
|
90
63
|
- [CLI Invocation Details](docs/cli-invocation-details.md) — how we securely invoke AI CLIs
|
|
64
|
+
- [Feature Comparison](docs/feature_comparison.md) — how Agent Gauntlet compares to other tools
|
|
91
65
|
- [Development Guide](docs/development.md) — how to build and develop this project
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-gauntlet",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.7.0",
|
|
4
4
|
"description": "A CLI tool for testing AI coding agents",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"author": "Paul Caplan",
|
|
@@ -34,16 +34,22 @@
|
|
|
34
34
|
"scripts": {
|
|
35
35
|
"build": "bun build --compile --minify --sourcemap ./src/index.ts --outfile bin/agent-gauntlet",
|
|
36
36
|
"test": "bun test",
|
|
37
|
-
"lint": "biome check src"
|
|
37
|
+
"lint": "biome check src",
|
|
38
|
+
"changeset": "changeset",
|
|
39
|
+
"version": "changeset version",
|
|
40
|
+
"release": "npm publish"
|
|
38
41
|
},
|
|
39
42
|
"devDependencies": {
|
|
40
43
|
"@biomejs/biome": "^2.3.11",
|
|
44
|
+
"@changesets/changelog-github": "^0.5.2",
|
|
45
|
+
"@changesets/cli": "^2.29.8",
|
|
41
46
|
"@types/bun": "latest"
|
|
42
47
|
},
|
|
43
48
|
"peerDependencies": {
|
|
44
49
|
"typescript": "^5"
|
|
45
50
|
},
|
|
46
51
|
"dependencies": {
|
|
52
|
+
"@logtape/logtape": "^2.0.2",
|
|
47
53
|
"chalk": "^5.6.2",
|
|
48
54
|
"commander": "^14.0.2",
|
|
49
55
|
"gray-matter": "^4.0.3",
|
package/src/commands/detect.ts
CHANGED
|
@@ -68,18 +68,19 @@ export function registerDetectCommand(program: Command): void {
|
|
|
68
68
|
|
|
69
69
|
console.log(chalk.bold(`Would run ${jobs.length} gate(s):\n`));
|
|
70
70
|
|
|
71
|
-
// Group jobs by
|
|
72
|
-
const
|
|
71
|
+
// Group jobs by working directory for better display
|
|
72
|
+
const jobsByWorkDir = new Map<string, Job[]>();
|
|
73
73
|
for (const job of jobs) {
|
|
74
|
-
|
|
75
|
-
|
|
74
|
+
const key = job.workingDirectory;
|
|
75
|
+
if (!jobsByWorkDir.has(key)) {
|
|
76
|
+
jobsByWorkDir.set(key, []);
|
|
76
77
|
}
|
|
77
|
-
|
|
78
|
+
jobsByWorkDir.get(key)?.push(job);
|
|
78
79
|
}
|
|
79
80
|
|
|
80
|
-
for (const [
|
|
81
|
-
console.log(chalk.cyan(`
|
|
82
|
-
for (const job of
|
|
81
|
+
for (const [workDir, wdJobs] of jobsByWorkDir.entries()) {
|
|
82
|
+
console.log(chalk.cyan(`Working directory: ${workDir}`));
|
|
83
|
+
for (const job of wdJobs) {
|
|
83
84
|
const typeLabel =
|
|
84
85
|
job.type === "check"
|
|
85
86
|
? chalk.yellow("check")
|
package/src/commands/init.ts
CHANGED
|
@@ -51,6 +51,7 @@ interface InitOptions {
|
|
|
51
51
|
}
|
|
52
52
|
|
|
53
53
|
interface InitConfig {
|
|
54
|
+
baseBranch: string;
|
|
54
55
|
sourceDir: string;
|
|
55
56
|
lintCmd: string | null; // null means not selected, empty string means selected but blank (TODO)
|
|
56
57
|
testCmd: string | null; // null means not selected, empty string means selected but blank (TODO)
|
|
@@ -96,6 +97,7 @@ export function registerInitCommand(program: Command): void {
|
|
|
96
97
|
|
|
97
98
|
if (options.yes) {
|
|
98
99
|
config = {
|
|
100
|
+
baseBranch: "origin/main",
|
|
99
101
|
sourceDir: ".",
|
|
100
102
|
lintCmd: null,
|
|
101
103
|
testCmd: null,
|
|
@@ -296,6 +298,13 @@ async function promptForConfig(
|
|
|
296
298
|
}
|
|
297
299
|
}
|
|
298
300
|
|
|
301
|
+
// Base Branch
|
|
302
|
+
console.log();
|
|
303
|
+
const baseBranchInput = await question(
|
|
304
|
+
"Enter your base branch (e.g., origin/main, origin/develop) [default: origin/main]: ",
|
|
305
|
+
);
|
|
306
|
+
const baseBranch = baseBranchInput || "origin/main";
|
|
307
|
+
|
|
299
308
|
// Source Directory
|
|
300
309
|
console.log();
|
|
301
310
|
const sourceDirInput = await question(
|
|
@@ -325,6 +334,7 @@ async function promptForConfig(
|
|
|
325
334
|
|
|
326
335
|
rl.close();
|
|
327
336
|
return {
|
|
337
|
+
baseBranch,
|
|
328
338
|
sourceDir,
|
|
329
339
|
lintCmd,
|
|
330
340
|
testCmd,
|
|
@@ -356,7 +366,7 @@ function generateConfigYml(config: InitConfig): string {
|
|
|
356
366
|
reviews:
|
|
357
367
|
- code-quality`;
|
|
358
368
|
|
|
359
|
-
return `base_branch:
|
|
369
|
+
return `base_branch: ${config.baseBranch}
|
|
360
370
|
log_dir: gauntlet_logs
|
|
361
371
|
|
|
362
372
|
# Run gates in parallel when possible (default: true)
|
package/src/commands/shared.ts
CHANGED
|
@@ -145,6 +145,12 @@ export async function hasExistingLogs(logDir: string): Promise<boolean> {
|
|
|
145
145
|
/**
|
|
146
146
|
* Get the set of persistent files that should never be moved during clean.
|
|
147
147
|
*/
|
|
148
|
+
/**
|
|
149
|
+
* Marker file used by stop-hook to detect nested invocations.
|
|
150
|
+
* Must match STOP_HOOK_MARKER_FILE in stop-hook.ts.
|
|
151
|
+
*/
|
|
152
|
+
const STOP_HOOK_MARKER_FILE = ".stop-hook-active";
|
|
153
|
+
|
|
148
154
|
function getPersistentFiles(): Set<string> {
|
|
149
155
|
return new Set([
|
|
150
156
|
getExecutionStateFilename(),
|
|
@@ -152,6 +158,7 @@ function getPersistentFiles(): Set<string> {
|
|
|
152
158
|
getDebugLogBackupFilename(),
|
|
153
159
|
LOCK_FILENAME,
|
|
154
160
|
SESSION_REF_FILENAME, // Will be deleted, not moved
|
|
161
|
+
STOP_HOOK_MARKER_FILE, // Cleaned up by stop-hook finally block, not cleanLogs
|
|
155
162
|
]);
|
|
156
163
|
}
|
|
157
164
|
|