npm - agent-gauntlet - Versions diffs - 0.1.12 → 0.2.0 - Mend

agent-gauntlet 0.1.12 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/package.json +1 -1
package/src/commands/ci/init.ts +1 -1
package/src/commands/ci/list-jobs.ts +12 -1
package/src/commands/health.test.ts +1 -1
package/src/commands/init.ts +2 -2
package/src/commands/list.test.ts +1 -1
package/src/commands/shared.ts +2 -2
package/src/config/ci-schema.ts +4 -28
package/src/config/loader.ts +51 -4
package/src/config/schema.ts +2 -1
package/src/config/types.ts +6 -1
package/src/core/job.ts +12 -1
package/src/core/runner.ts +6 -2
package/src/gates/check.ts +37 -4
package/src/gates/result.ts +1 -0
package/src/output/console.ts +8 -0
package/src/templates/run_gauntlet.template.md +4 -1
package/src/templates/workflow.yml +4 -2

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agent-gauntlet",
-  "version": "0.1.12",
+  "version": "0.2.0",
   "description": "A CLI tool for testing AI coding agents",
   "license": "Apache-2.0",
   "author": "Paul Caplan",

package/src/commands/ci/init.ts CHANGED Viewed

@@ -72,7 +72,7 @@ checks:
 			.join("\n");
 		templateContent = templateContent.replace(
-			"# Services will be injected here by agent-gauntlet",
+			"    # Services will be injected here by agent-gauntlet",
 			indentedServices,
 		);
 	} else {

package/src/commands/ci/list-jobs.ts CHANGED Viewed

@@ -13,6 +13,7 @@ export async function listJobs(): Promise<void> {
 		);
 		const matrixJobs = [];
+		const seenJobs = new Set<string>();
 		const globalSetup = formatSetup(ciConfig.setup || undefined);
@@ -32,13 +33,23 @@ export async function listJobs(): Promise<void> {
 							continue;
 						}
+						const workingDirectory = checkDef.working_directory || ep.path;
+						// Include entry point in key to ensure each entry point/check pair is distinct
+						const jobKey = `${ep.path}:${check.name}:${workingDirectory}`;
+						// Skip if we've already created a job for this exact entry point/check combination
+						if (seenJobs.has(jobKey)) {
+							continue;
+						}
+						seenJobs.add(jobKey);
 						const id = `${check.name}-${ep.path.replace(/\//g, "-")}`;
 						matrixJobs.push({
 							id,
 							name: check.name,
 							entry_point: ep.path,
-							working_directory: checkDef.working_directory || ep.path,
+							working_directory: workingDirectory,
 							command: checkDef.command,
 							runtimes: check.requires_runtimes || [],
 							services: check.requires_services || [],

package/src/commands/health.test.ts CHANGED Viewed

@@ -33,7 +33,7 @@ describe("Health Command", () => {
 			path.join(GAUNTLET_DIR, "config.yml"),
 			`
 base_branch: origin/main
-log_dir: .gauntlet_logs
+log_dir: gauntlet_logs
 cli:
   default_preference:
     - gemini

package/src/commands/init.ts CHANGED Viewed

@@ -16,7 +16,7 @@ allowed-tools: Bash
 Execute the autonomous verification suite.
 1. Run \`agent-gauntlet run\`.
-2. If it fails, read the log files in \`.gauntlet_logs/\` to understand exactly what went wrong.
+2. If it fails, read the log files in \`gauntlet_logs/\` to understand exactly what went wrong.
 3. Fix any code or logic errors found by the tools or AI reviewers, prioritizing higher-priority violations (critical > high > medium > low).
 4. If you disagree with AI reviewer feedback, briefly explain your reasoning in the code comments rather than ignoring it silently.
 5. Do NOT commit your changes yet—keep them uncommitted so the rerun command can review them.
@@ -336,7 +336,7 @@ function generateConfigYml(config: InitConfig): string {
       - code-quality`;
 	return `base_branch: origin/main
-log_dir: .gauntlet_logs
+log_dir: gauntlet_logs
 # Run gates in parallel when possible (default: true)
 # allow_parallel: true

package/src/commands/list.test.ts CHANGED Viewed

@@ -37,7 +37,7 @@ describe("List Command", () => {
 			path.join(GAUNTLET_DIR, "config.yml"),
 			`
 base_branch: origin/main
-log_dir: .gauntlet_logs
+log_dir: gauntlet_logs
 cli:
   default_preference:
     - gemini

package/src/commands/shared.ts CHANGED Viewed

@@ -20,7 +20,7 @@ export async function rotateLogs(logDir: string): Promise<void> {
 			return;
 		}
-		// 2. Clear .gauntlet_logs/previous if it exists
+		// 2. Clear gauntlet_logs/previous if it exists
 		if (await exists(previousDir)) {
 			const previousFiles = await fs.readdir(previousDir);
 			await Promise.all(
@@ -32,7 +32,7 @@ export async function rotateLogs(logDir: string): Promise<void> {
 			await fs.mkdir(previousDir, { recursive: true });
 		}
-		// 3. Move all existing files in .gauntlet_logs/ to .gauntlet_logs/previous
+		// 3. Move all existing files in gauntlet_logs/ to gauntlet_logs/previous
 		const files = await fs.readdir(logDir);
 		await Promise.all(
 			files

package/src/config/ci-schema.ts CHANGED Viewed

@@ -1,34 +1,10 @@
 import { z } from "zod";
-export const runtimeConfigSchema = z.record(
-	z.string(),
-	z
-		.object({
-			version: z.string().min(1),
-			bundler_cache: z.boolean().optional(),
-		})
-		.passthrough(),
-);
+// Runtime and service schemas use z.any() to allow flexibility for different CI providers
+// Each provider (GitHub Actions, GitLab CI, etc.) has its own configuration structure
+export const runtimeConfigSchema = z.record(z.string(), z.any());
-export const serviceConfigSchema = z.record(
-	z.string(),
-	z
-		.object({
-			image: z.string().min(1),
-			env: z.record(z.string()).optional(),
-			ports: z.array(z.string()).optional(),
-			options: z.string().optional(),
-			health_check: z
-				.object({
-					cmd: z.string().optional(),
-					interval: z.string().optional(),
-					timeout: z.string().optional(),
-					retries: z.number().optional(),
-				})
-				.optional(),
-		})
-		.passthrough(),
-);
+export const serviceConfigSchema = z.record(z.string(), z.any());
 export const ciSetupStepSchema = z.object({
 	name: z.string().min(1),

package/src/config/loader.ts CHANGED Viewed

@@ -7,7 +7,11 @@ import {
 	gauntletConfigSchema,
 	reviewPromptFrontmatterSchema,
 } from "./schema.js";
-import type { CheckGateConfig, LoadedConfig } from "./types.js";
+import type {
+	CheckGateConfig,
+	LoadedCheckGateConfig,
+	LoadedConfig,
+} from "./types.js";
 const GAUNTLET_DIR = ".gauntlet";
 const CONFIG_FILE = "config.yml";
@@ -31,7 +35,7 @@ export async function loadConfig(
 	// 2. Load checks
 	const checksPath = path.join(gauntletPath, CHECKS_DIR);
-	const checks: Record<string, CheckGateConfig> = {};
+	const checks: Record<string, LoadedCheckGateConfig> = {};
 	if (await dirExists(checksPath)) {
 		const checkFiles = await fs.readdir(checksPath);
@@ -41,8 +45,51 @@ export async function loadConfig(
 				const content = await fs.readFile(filePath, "utf-8");
 				const raw = YAML.parse(content);
 				// Ensure name matches filename if not provided or just use filename as key
-				const parsed = checkGateSchema.parse(raw);
-				checks[parsed.name] = parsed;
+				const parsed: CheckGateConfig = checkGateSchema.parse(raw);
+				// Load fix instructions if specified
+				const loadedCheck: LoadedCheckGateConfig = { ...parsed };
+				if (parsed.fix_instructions) {
+					// Security: Reject absolute paths to prevent reading arbitrary files
+					if (path.isAbsolute(parsed.fix_instructions)) {
+						throw new Error(
+							`Fix instructions path must be relative to .gauntlet/ directory, got absolute path: ${parsed.fix_instructions} (referenced by check "${parsed.name}")`,
+						);
+					}
+					// Security: Resolve and validate the path stays within .gauntlet/
+					const fixInstructionsPath = path.resolve(
+						gauntletPath,
+						parsed.fix_instructions,
+					);
+					const normalizedGauntletPath = path.resolve(gauntletPath);
+					const relativePath = path.relative(
+						normalizedGauntletPath,
+						fixInstructionsPath,
+					);
+					// Ensure path doesn't escape .gauntlet/ (no .. segments or absolute paths)
+					if (
+						relativePath.startsWith("..") ||
+						path.isAbsolute(relativePath) ||
+						relativePath === "." ||
+						relativePath === ""
+					) {
+						throw new Error(
+							`Fix instructions path must stay within .gauntlet/ directory and point to a file: ${parsed.fix_instructions} resolves to ${fixInstructionsPath} (referenced by check "${parsed.name}")`,
+						);
+					}
+					if (!(await fileExists(fixInstructionsPath))) {
+						throw new Error(
+							`Fix instructions file not found: ${fixInstructionsPath} (referenced by check "${parsed.name}")`,
+						);
+					}
+					loadedCheck.fixInstructionsContent = await fs.readFile(
+						fixInstructionsPath,
+						"utf-8",
+					);
+				}
+				checks[parsed.name] = loadedCheck;
 			}
 		}
 	}

package/src/config/schema.ts CHANGED Viewed

@@ -14,6 +14,7 @@ export const checkGateSchema = z
 		run_locally: z.boolean().default(true),
 		timeout: z.number().optional(),
 		fail_fast: z.boolean().optional(),
+		fix_instructions: z.string().optional(), // Path relative to .gauntlet/
 	})
 	.refine(
 		(data) => {
@@ -57,7 +58,7 @@ export const entryPointSchema = z.object({
 export const gauntletConfigSchema = z.object({
 	base_branch: z.string().min(1).default("origin/main"),
-	log_dir: z.string().min(1).default(".gauntlet_logs"),
+	log_dir: z.string().min(1).default("gauntlet_logs"),
 	allow_parallel: z.boolean().default(true),
 	cli: cliConfigSchema,
 	entry_points: z.array(entryPointSchema).min(1),

package/src/config/types.ts CHANGED Viewed

@@ -30,9 +30,14 @@ export type CISetupStep = z.infer<typeof ciSetupStepSchema>;
 export type RuntimeConfig = z.infer<typeof runtimeConfigSchema>;
 export type ServiceConfig = z.infer<typeof serviceConfigSchema>;
+// Extended check config with loaded content
+export interface LoadedCheckGateConfig extends CheckGateConfig {
+	fixInstructionsContent?: string;
+}
 // Combined type for the fully loaded configuration
 export interface LoadedConfig {
 	project: GauntletConfig;
-	checks: Record<string, CheckGateConfig>;
+	checks: Record<string, LoadedCheckGateConfig>;
 	reviews: Record<string, ReviewGateConfig & ReviewPromptFrontmatter>; // Merged with frontmatter
 }

package/src/core/job.ts CHANGED Viewed

@@ -22,6 +22,7 @@ export class JobGenerator {
 	generateJobs(expandedEntryPoints: ExpandedEntryPoint[]): Job[] {
 		const jobs: Job[] = [];
+		const seenJobs = new Set<string>();
 		const isCI =
 			process.env.CI === "true" || process.env.GITHUB_ACTIONS === "true";
@@ -41,13 +42,23 @@ export class JobGenerator {
 					if (isCI && !checkConfig.run_in_ci) continue;
 					if (!isCI && !checkConfig.run_locally) continue;
+					const workingDirectory = checkConfig.working_directory || ep.path;
+					// Include entry point in key to ensure each entry point/check pair is distinct
+					const jobKey = `check:${ep.path}:${checkName}:${workingDirectory}`;
+					// Skip if we've already created a job for this exact entry point/check combination
+					if (seenJobs.has(jobKey)) {
+						continue;
+					}
+					seenJobs.add(jobKey);
 					jobs.push({
 						id: `check:${ep.path}:${checkName}`,
 						type: "check",
 						name: checkName,
 						entryPoint: ep.path,
 						gateConfig: checkConfig,
-						workingDirectory: checkConfig.working_directory || ep.path,
+						workingDirectory: workingDirectory,
 					});
 				}
 			}

package/src/core/runner.ts CHANGED Viewed

@@ -5,6 +5,7 @@ import path from "node:path";
 import { promisify } from "node:util";
 import { getAdapter } from "../cli-adapters/index.js";
 import type {
+	LoadedCheckGateConfig,
 	LoadedConfig,
 	ReviewGateConfig,
 	ReviewPromptFrontmatter,
@@ -78,11 +79,14 @@ export class Runner {
 		if (job.type === "check") {
 			const logPath = this.logger.getLogPath(job.id);
 			const jobLogger = await this.logger.createJobLogger(job.id);
+			const effectiveBaseBranch =
+				this.baseBranchOverride || this.config.project.base_branch;
 			result = await this.checkExecutor.execute(
 				job.id,
-				job.gateConfig as CheckGateConfig,
+				job.gateConfig as LoadedCheckGateConfig,
 				job.workingDirectory,
 				jobLogger,
+				effectiveBaseBranch,
 			);
 			result.logPath = logPath;
 		} else {
@@ -129,7 +133,7 @@ export class Runner {
 			if (this.shouldStop) break;
 			if (job.type === "check") {
 				const commandName = this.getCommandName(
-					(job.gateConfig as CheckGateConfig).command,
+					(job.gateConfig as LoadedCheckGateConfig).command,
 				);
 				if (!commandName) {
 					preflightResults.push(

package/src/gates/check.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { exec } from "node:child_process";
 import { promisify } from "node:util";
-import type { CheckGateConfig } from "../config/types.js";
+import type { LoadedCheckGateConfig } from "../config/types.js";
 import type { GateResult } from "./result.js";
 const execAsync = promisify(exec);
@@ -9,20 +9,24 @@ const MAX_BUFFER_BYTES = 10 * 1024 * 1024;
 export class CheckGateExecutor {
 	async execute(
 		jobId: string,
-		config: CheckGateConfig,
+		config: LoadedCheckGateConfig,
 		workingDirectory: string,
 		logger: (output: string) => Promise<void>,
+		baseBranch?: string,
 	): Promise<GateResult> {
 		const startTime = Date.now();
+		// Substitute variables in command
+		const command = this.substituteVariables(config.command, { baseBranch });
 		try {
 			await logger(
 				`[${new Date().toISOString()}] Starting check: ${config.name}\n`,
 			);
-			await logger(`Executing command: ${config.command}\n`);
+			await logger(`Executing command: ${command}\n`);
 			await logger(`Working directory: ${workingDirectory}\n\n`);
-			const { stdout, stderr } = await execAsync(config.command, {
+			const { stdout, stderr } = await execAsync(command, {
 				cwd: workingDirectory,
 				timeout: config.timeout ? config.timeout * 1000 : undefined,
 				maxBuffer: MAX_BUFFER_BYTES,
@@ -60,8 +64,14 @@ export class CheckGateExecutor {
 					status: "fail",
 					duration: Date.now() - startTime,
 					message: `Timed out after ${config.timeout}s`,
+					fixInstructions: config.fixInstructionsContent,
 				};
 				await logger(`Result: ${result.status} - ${result.message}\n`);
+				if (config.fixInstructionsContent) {
+					await logger(
+						`\n--- Fix Instructions ---\n${config.fixInstructionsContent}\n`,
+					);
+				}
 				return result;
 			}
@@ -72,8 +82,14 @@ export class CheckGateExecutor {
 					status: "fail",
 					duration: Date.now() - startTime,
 					message: `Exited with code ${err.code}`,
+					fixInstructions: config.fixInstructionsContent,
 				};
 				await logger(`Result: ${result.status} - ${result.message}\n`);
+				if (config.fixInstructionsContent) {
+					await logger(
+						`\n--- Fix Instructions ---\n${config.fixInstructionsContent}\n`,
+					);
+				}
 				return result;
 			}
@@ -83,9 +99,26 @@ export class CheckGateExecutor {
 				status: "error",
 				duration: Date.now() - startTime,
 				message: err.message || "Unknown error",
+				fixInstructions: config.fixInstructionsContent,
 			};
 			await logger(`Result: ${result.status} - ${result.message}\n`);
+			if (config.fixInstructionsContent) {
+				await logger(
+					`\n--- Fix Instructions ---\n${config.fixInstructionsContent}\n`,
+				);
+			}
 			return result;
 		}
 	}
+	private substituteVariables(
+		command: string,
+		variables: { baseBranch?: string },
+	): string {
+		let result = command;
+		if (variables.baseBranch) {
+			result = result.replace(/\$\{BASE_BRANCH\}/g, variables.baseBranch);
+		}
+		return result;
+	}
 }

package/src/gates/result.ts CHANGED Viewed

@@ -7,4 +7,5 @@ export interface GateResult {
 	message?: string; // summary message
 	logPath?: string; // path to full log
 	logPaths?: string[]; // paths to multiple logs (e.g. per-agent logs)
+	fixInstructions?: string; // Markdown content for fixing failures
 }

package/src/output/console.ts CHANGED Viewed

@@ -251,6 +251,14 @@ export class ConsoleReporter {
 			console.log(chalk.dim(`  Log: ${result.logPath}`));
 		}
+		if (result.fixInstructions) {
+			console.log(
+				chalk.cyan(
+					`  Fix instructions: available (${result.fixInstructions.split("\n").length} lines)`,
+				),
+			);
+		}
 		console.log(""); // Empty line between failures
 	}
 }

package/src/templates/run_gauntlet.template.md CHANGED Viewed

@@ -6,7 +6,10 @@ allowed-tools: Bash
 Execute the autonomous verification suite.
 1. Run `agent-gauntlet run`.
-2. If it fails, read the log files in `.gauntlet_logs/` to understand exactly what went wrong.
+2. If it fails:
+   - Check the console output for "Fix instructions: available" messages.
+   - Read the log files in `gauntlet_logs/` to understand exactly what went wrong.
+   - If fix instructions are available, they will be in the log file under a "--- Fix Instructions ---" section—carefully read and apply them FIRST before attempting other fixes.
 3. Fix any code or logic errors found by the tools or AI reviewers, prioritizing higher-priority violations (critical > high > medium > low).
 4. If you disagree with AI reviewer feedback, briefly explain your reasoning in the code comments rather than ignoring it silently.
 5. Do NOT commit your changes yet—keep them uncommitted so the rerun command can review them.

package/src/templates/workflow.yml CHANGED Viewed

@@ -19,12 +19,14 @@ jobs:
       - name: Install agent-gauntlet
         run: |
           curl -fsSL https://bun.sh/install | bash
-          ~/.bun/bin/bun add -g agent-gauntlet
+          echo "$HOME/.bun/bin" >> $GITHUB_PATH
+          export PATH="$HOME/.bun/bin:$PATH"
+          bun add -g pacaplan/agent-gauntlet#development
       - name: Discover gauntlet jobs
         id: discover
         run: |
-          output=$(~/.bun/bin/agent-gauntlet ci list-jobs)
+          output=$(agent-gauntlet ci list-jobs)
           echo "matrix=$(echo "$output" | jq -c '.matrix')" >> $GITHUB_OUTPUT
           echo "runtimes=$(echo "$output" | jq -c '.runtimes')" >> $GITHUB_OUTPUT