npm - agent-gauntlet - Versions diffs - 0.5.0 → 0.7.0 - Mend

agent-gauntlet 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/README.md +25 -51
package/package.json +8 -2
package/src/commands/detect.ts +9 -8
package/src/commands/init.ts +11 -1
package/src/commands/shared.ts +7 -0
package/src/commands/stop-hook.ts +488 -202
package/src/config/global.ts +5 -3
package/src/config/loader.ts +149 -53
package/src/config/schema.ts +90 -22
package/src/config/stop-hook-config.ts +93 -0
package/src/config/types.ts +20 -1
package/src/config/validator.ts +186 -147
package/src/core/job.ts +9 -8
package/src/core/run-executor.ts +372 -180
package/src/core/runner.ts +139 -51
package/src/gates/check.ts +20 -15
package/src/gates/result.ts +1 -0
package/src/gates/review.ts +29 -36
package/src/index.ts +0 -0
package/src/output/app-logger.ts +214 -0
package/src/output/console.ts +18 -16
package/src/output/sinks/console-sink.ts +59 -0
package/src/output/sinks/file-sink.ts +110 -0
package/src/types/gauntlet-status.ts +16 -1
package/src/utils/debug-log.ts +81 -2
package/src/utils/log-parser.ts +8 -5
package/src/utils/session-ref.ts +5 -3

package/README.md CHANGED Viewed

@@ -1,4 +1,11 @@
-# Agent Gauntlet
+![Agent Gauntlet logo](docs/images/logo2.png)
+[![CI](https://github.com/pacaplan/agent-gauntlet/actions/workflows/gauntlet.yml/badge.svg)](https://github.com/pacaplan/agent-gauntlet/actions/workflows/gauntlet.yml)
+[![npm](https://img.shields.io/npm/v/agent-gauntlet)](https://www.npmjs.com/package/agent-gauntlet)
+[![npm downloads](https://img.shields.io/npm/dm/agent-gauntlet)](https://www.npmjs.com/package/agent-gauntlet)
+[![TypeScript](https://img.shields.io/badge/TypeScript-5.0-blue?logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
+[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
+[![CodeRabbit](https://img.shields.io/coderabbit/prs/github/pacaplan/agent-gauntlet)](https://coderabbit.ai)
 > Don't just review the agent's code — put it through the gauntlet.
@@ -15,63 +22,29 @@ For AI reviews, it uses the CLI tool of your choice: Gemini, Codex, Claude Code,
 - **Leverage existing subscriptions**: Agent Gauntlet is *free* and tool-agnostic, leveraging the AI CLI tools you already have installed.
 - **Easy CI setup**: Define your checks once, run them locally and in GitHub.
-## Usage Patterns
-Agent Gauntlet supports three primary usage patterns, each suited for different development workflows:
-1. Run CLI: `agent-gauntlet run`
-2. Run agent command: `/gauntlet`
-3. Automatically run after agent completes task
-The use cases below illustrate when each of these patterns may be used.
-### 1. Planning Mode
-**Use case:** Generate and review high-level implementation plans before coding.
-**Problem Gauntlet solves:** Catch architectural issues and requirement misunderstandings before coding to avoid costly rework.
-**Workflow:**
-1. Create a plan document in your project directory
-2. Run `agent-gauntlet run` from the terminal
-3. Gauntlet detects the new or modified plan and invokes configured AI CLIs to review it
-4. *(Optional)* Ask your assistant to refine the plan based on review feedback
-**Note:** Review configuration and prompts are fully customizable. Example prompt: *"Review this plan for completeness and potential issues."*
-### 2. AI-Assisted Development
-**Use case:** Pair with an AI coding assistant to implement features with continuous quality checks.
-**Problem Gauntlet solves:** Catch AI-introduced bugs and quality issues through automated checks and multi-LLM review.
-**Workflow:**
+### vs AI Code Review Tools
-1. Collaborate with your assistant to implement code changes
-2. Run `/gauntlet` from chat
-3. Gauntlet detects changed files and runs configured checks (linter, tests, type checking, etc.)
-4. Simultaneously, Gauntlet invokes AI CLIs for code review
-5. Assistant reviews results, fixes identified issues, and runs `agent-gauntlet run` again
-6. Gauntlet detects existing logs, switches to verification mode, and checks fixes
-7. Process repeats automatically (up to 3 iterations) until all gates pass
+Unlike traditional code review tools designed for PR workflows, Agent Gauntlet provides real-time feedback loops for autonomous coding agents.
-### 3. Agentic Implementation
+| Use Case | Recommended |
+| :--- | :--- |
+| Autonomous agent development | **Agent Gauntlet** |
+| Traditional PR review with human reviewers | Other tools |
+| IDE-integrated review while coding | Other tools |
+| Enterprise with strict compliance requirements | Other tools |
+| Budget-conscious teams with existing AI CLI tools | **Agent Gauntlet** |
-**Use case:** Delegate well-defined tasks to a coding agent for autonomous implementation.
+[Full comparison →](docs/feature_comparison.md)
-**Problem Gauntlet solves:** Enable autonomous agent development with built-in quality gates, eliminating the validation gap when humans aren't in the loop.
+## Common Workflows
-**Workflow:**
+Agent Gauntlet supports three workflows, ranging from simple CLI execution to fully autonomous agentic integration:
-1. Configure your agent to automatically run `/gauntlet` after completing implementation:
-   - **Rules files:** Add to `.cursorrules`, `AGENT.md`, or similar
-   - **Custom commands:** Create a `/my-dev-workflow` that includes gauntlet
-   - **Git hooks:** Use pre-commit hooks to trigger gauntlet
-   - **Agent hooks:** Leverage platform features (e.g., Claude's Stop event)
-2. Assign the task to your agent and step away
-3. When you return: the task is complete, reviewed by a different LLM, all issues fixed, and CI checks passing
+- **CLI Mode** — Run checks via command line; ideal for CI pipelines and scripts.
+- **Assistant Mode** — AI assistant runs validation loop, fixing issues iteratively.
+- **Agentic Mode** — Autonomous agent validates and fixes in real-time via stop hook.
-**Benefit:** Fully autonomous quality assurance without manual intervention.
+![Agent Gauntlet Workflows](docs/images/workflows.png)
 ## Quick Start
@@ -88,4 +61,5 @@ For basic usage and configuration guide, see the [Quick Start Guide](docs/quick-
 - [Configuration Reference](docs/config-reference.md) — all configuration fields + defaults
 - [Stop Hook Guide](docs/stop-hook-guide.md) — integrate with Claude Code's stop hook
 - [CLI Invocation Details](docs/cli-invocation-details.md) — how we securely invoke AI CLIs
+- [Feature Comparison](docs/feature_comparison.md) — how Agent Gauntlet compares to other tools
 - [Development Guide](docs/development.md) — how to build and develop this project

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agent-gauntlet",
-  "version": "0.5.0",
+  "version": "0.7.0",
   "description": "A CLI tool for testing AI coding agents",
   "license": "Apache-2.0",
   "author": "Paul Caplan",
@@ -34,16 +34,22 @@
   "scripts": {
     "build": "bun build --compile --minify --sourcemap ./src/index.ts --outfile bin/agent-gauntlet",
     "test": "bun test",
-    "lint": "biome check src"
+    "lint": "biome check src",
+    "changeset": "changeset",
+    "version": "changeset version",
+    "release": "npm publish"
   },
   "devDependencies": {
     "@biomejs/biome": "^2.3.11",
+    "@changesets/changelog-github": "^0.5.2",
+    "@changesets/cli": "^2.29.8",
     "@types/bun": "latest"
   },
   "peerDependencies": {
     "typescript": "^5"
   },
   "dependencies": {
+    "@logtape/logtape": "^2.0.2",
     "chalk": "^5.6.2",
     "commander": "^14.0.2",
     "gray-matter": "^4.0.3",

package/src/commands/detect.ts CHANGED Viewed

@@ -68,18 +68,19 @@ export function registerDetectCommand(program: Command): void {
 				console.log(chalk.bold(`Would run ${jobs.length} gate(s):\n`));
-				// Group jobs by entry point for better display
-				const jobsByEntryPoint = new Map<string, Job[]>();
+				// Group jobs by working directory for better display
+				const jobsByWorkDir = new Map<string, Job[]>();
 				for (const job of jobs) {
-					if (!jobsByEntryPoint.has(job.entryPoint)) {
-						jobsByEntryPoint.set(job.entryPoint, []);
+					const key = job.workingDirectory;
+					if (!jobsByWorkDir.has(key)) {
+						jobsByWorkDir.set(key, []);
 					}
-					jobsByEntryPoint.get(job.entryPoint)?.push(job);
+					jobsByWorkDir.get(key)?.push(job);
 				}
-				for (const [entryPoint, entryJobs] of jobsByEntryPoint.entries()) {
-					console.log(chalk.cyan(`Entry point: ${entryPoint}`));
-					for (const job of entryJobs) {
+				for (const [workDir, wdJobs] of jobsByWorkDir.entries()) {
+					console.log(chalk.cyan(`Working directory: ${workDir}`));
+					for (const job of wdJobs) {
 						const typeLabel =
 							job.type === "check"
 								? chalk.yellow("check")

package/src/commands/init.ts CHANGED Viewed

@@ -51,6 +51,7 @@ interface InitOptions {
 }
 interface InitConfig {
+	baseBranch: string;
 	sourceDir: string;
 	lintCmd: string | null; // null means not selected, empty string means selected but blank (TODO)
 	testCmd: string | null; // null means not selected, empty string means selected but blank (TODO)
@@ -96,6 +97,7 @@ export function registerInitCommand(program: Command): void {
 			if (options.yes) {
 				config = {
+					baseBranch: "origin/main",
 					sourceDir: ".",
 					lintCmd: null,
 					testCmd: null,
@@ -296,6 +298,13 @@ async function promptForConfig(
 			}
 		}
+		// Base Branch
+		console.log();
+		const baseBranchInput = await question(
+			"Enter your base branch (e.g., origin/main, origin/develop) [default: origin/main]: ",
+		);
+		const baseBranch = baseBranchInput || "origin/main";
 		// Source Directory
 		console.log();
 		const sourceDirInput = await question(
@@ -325,6 +334,7 @@ async function promptForConfig(
 		rl.close();
 		return {
+			baseBranch,
 			sourceDir,
 			lintCmd,
 			testCmd,
@@ -356,7 +366,7 @@ function generateConfigYml(config: InitConfig): string {
     reviews:
       - code-quality`;
-	return `base_branch: origin/main
+	return `base_branch: ${config.baseBranch}
 log_dir: gauntlet_logs
 # Run gates in parallel when possible (default: true)

package/src/commands/shared.ts CHANGED Viewed

@@ -145,6 +145,12 @@ export async function hasExistingLogs(logDir: string): Promise<boolean> {
 /**
  * Get the set of persistent files that should never be moved during clean.
  */
+/**
+ * Marker file used by stop-hook to detect nested invocations.
+ * Must match STOP_HOOK_MARKER_FILE in stop-hook.ts.
+ */
+const STOP_HOOK_MARKER_FILE = ".stop-hook-active";
 function getPersistentFiles(): Set<string> {
 	return new Set([
 		getExecutionStateFilename(),
@@ -152,6 +158,7 @@ function getPersistentFiles(): Set<string> {
 		getDebugLogBackupFilename(),
 		LOCK_FILENAME,
 		SESSION_REF_FILENAME, // Will be deleted, not moved
+		STOP_HOOK_MARKER_FILE, // Cleaned up by stop-hook finally block, not cleanLogs
 	]);
 }