npm - agent-gauntlet - Versions diffs - 0.2.2 → 0.3.0 - Mend

agent-gauntlet 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/README.md +3 -3
package/package.json +1 -1
package/src/cli-adapters/claude.ts +13 -1
package/src/cli-adapters/gemini.ts +17 -2
package/src/commands/check.ts +98 -12
package/src/commands/ci/list-jobs.ts +3 -2
package/src/commands/clean.ts +29 -0
package/src/commands/help.ts +1 -1
package/src/commands/index.ts +1 -1
package/src/commands/init.ts +4 -4
package/src/commands/review.ts +98 -12
package/src/commands/run.ts +98 -12
package/src/commands/shared.ts +56 -10
package/src/config/schema.ts +4 -0
package/src/config/validator.ts +6 -13
package/src/core/change-detector.ts +1 -0
package/src/core/entry-point.ts +48 -7
package/src/core/runner.ts +57 -47
package/src/gates/result.ts +32 -0
package/src/gates/review.ts +323 -51
package/src/index.ts +2 -2
package/src/output/console.ts +96 -9
package/src/output/logger.ts +40 -7
package/src/templates/run_gauntlet.template.md +20 -13
package/src/utils/log-parser.ts +409 -165
package/src/utils/session-ref.ts +82 -0
package/src/commands/check.test.ts +0 -29
package/src/commands/detect.test.ts +0 -43
package/src/commands/health.test.ts +0 -93
package/src/commands/help.test.ts +0 -44
package/src/commands/init.test.ts +0 -130
package/src/commands/list.test.ts +0 -121
package/src/commands/rerun.ts +0 -160
package/src/commands/review.test.ts +0 -31
package/src/commands/run.test.ts +0 -27
package/src/config/loader.test.ts +0 -151
package/src/core/entry-point.test.ts +0 -61
package/src/gates/review.test.ts +0 -291

package/src/commands/shared.ts CHANGED Viewed

@@ -1,26 +1,71 @@
 import fs from "node:fs/promises";
 import path from "node:path";
+import { clearSessionRef } from "../utils/session-ref";
-export async function exists(path: string): Promise<boolean> {
+const LOCK_FILENAME = ".gauntlet-run.lock";
+export async function exists(filePath: string): Promise<boolean> {
 	try {
-		await fs.stat(path);
+		await fs.stat(filePath);
 		return true;
 	} catch {
 		return false;
 	}
 }
-export async function rotateLogs(logDir: string): Promise<void> {
+export async function acquireLock(logDir: string): Promise<void> {
+	await fs.mkdir(logDir, { recursive: true });
+	const lockPath = path.resolve(logDir, LOCK_FILENAME);
+	try {
+		await fs.writeFile(lockPath, String(process.pid), { flag: "wx" });
+	} catch (err: unknown) {
+		if (
+			typeof err === "object" &&
+			err !== null &&
+			"code" in err &&
+			(err as { code: string }).code === "EEXIST"
+		) {
+			console.error(
+				`Error: A gauntlet run is already in progress (lock file: ${lockPath}).`,
+			);
+			console.error(
+				"If no run is actually in progress, delete the lock file manually.",
+			);
+			process.exit(1);
+		}
+		throw err;
+	}
+}
+export async function releaseLock(logDir: string): Promise<void> {
+	const lockPath = path.resolve(logDir, LOCK_FILENAME);
+	try {
+		await fs.rm(lockPath, { force: true });
+	} catch {
+		// no-op if missing
+	}
+}
+export async function hasExistingLogs(logDir: string): Promise<boolean> {
+	try {
+		const entries = await fs.readdir(logDir);
+		return entries.some(
+			(f) => (f.endsWith(".log") || f.endsWith(".json")) && f !== "previous",
+		);
+	} catch {
+		return false;
+	}
+}
+export async function cleanLogs(logDir: string): Promise<void> {
 	const previousDir = path.join(logDir, "previous");
 	try {
-		// 1. Ensure logDir exists (if not, nothing to rotate, but we should create it for future use if needed,
-		//    though usually the logger creates it. If it doesn't exist, we can just return).
 		if (!(await exists(logDir))) {
 			return;
 		}
-		// 2. Clear gauntlet_logs/previous if it exists
+		// 1. Delete all files in previous/
 		if (await exists(previousDir)) {
 			const previousFiles = await fs.readdir(previousDir);
 			await Promise.all(
@@ -32,19 +77,20 @@ export async function rotateLogs(logDir: string): Promise<void> {
 			await fs.mkdir(previousDir, { recursive: true });
 		}
-		// 3. Move all existing files in gauntlet_logs/ to gauntlet_logs/previous
+		// 2. Move all .log and .json files from logDir root into previous/
 		const files = await fs.readdir(logDir);
 		await Promise.all(
 			files
-				.filter((file) => file !== "previous")
+				.filter((file) => file.endsWith(".log") || file.endsWith(".json"))
 				.map((file) =>
 					fs.rename(path.join(logDir, file), path.join(previousDir, file)),
 				),
 		);
+		await clearSessionRef(logDir);
 	} catch (error) {
-		// Log warning but don't crash the run as log rotation failure isn't critical
 		console.warn(
-			"Failed to rotate logs in",
+			"Failed to clean logs in",
 			logDir,
 			":",
 			error instanceof Error ? error.message : error,

package/src/config/schema.ts CHANGED Viewed

@@ -51,6 +51,7 @@ export const reviewPromptFrontmatterSchema = z.object({
 export const entryPointSchema = z.object({
 	path: z.string().min(1),
+	exclude: z.array(z.string().min(1)).optional(),
 	checks: z.array(z.string().min(1)).optional(),
 	reviews: z.array(z.string().min(1)).optional(),
 });
@@ -59,6 +60,9 @@ export const gauntletConfigSchema = z.object({
 	base_branch: z.string().min(1).default("origin/main"),
 	log_dir: z.string().min(1).default("gauntlet_logs"),
 	allow_parallel: z.boolean().default(true),
+	rerun_new_issue_threshold: z
+		.enum(["critical", "high", "medium", "low"])
+		.default("high"),
 	cli: cliConfigSchema,
 	entry_points: z.array(entryPointSchema).min(1),
 });

package/src/config/validator.ts CHANGED Viewed

@@ -108,12 +108,13 @@ export async function validateConfig(
 				if (file.endsWith(".yml") || file.endsWith(".yaml")) {
 					const filePath = path.join(checksPath, file);
 					filesChecked.push(filePath);
+					const name = path.basename(file, path.extname(file));
 					try {
 						const content = await fs.readFile(filePath, "utf-8");
 						const raw = YAML.parse(content);
 						const parsed = checkGateSchema.parse(raw);
-						existingCheckNames.add(parsed.name); // Track that this check exists
-						checks[parsed.name] = parsed;
+						existingCheckNames.add(name); // Track that this check exists
+						checks[name] = parsed;
 						// Semantic validation
 						if (!parsed.command || parsed.command.trim() === "") {
@@ -125,17 +126,9 @@ export async function validateConfig(
 							});
 						}
 					} catch (error: unknown) {
-						// Try to extract check name from raw YAML even if parsing failed
-						try {
-							const content = await fs.readFile(filePath, "utf-8");
-							const raw = YAML.parse(content);
-							if (raw.name && typeof raw.name === "string") {
-								existingCheckNames.add(raw.name); // Track that this check file exists
-							}
-						} catch {
-							// If we can't even parse the name, that's okay - we'll just skip tracking it
-						}
+						// Track that this check file exists even if parsing failed
+						// Use filename-based name since name is no longer in YAML
+						existingCheckNames.add(name);
 						if (error instanceof ZodError) {
 							error.errors.forEach((err) => {
 								issues.push({

package/src/core/change-detector.ts CHANGED Viewed

@@ -6,6 +6,7 @@ const execAsync = promisify(exec);
 export interface ChangeDetectorOptions {
 	commit?: string; // If provided, get diff for this commit vs its parent
 	uncommitted?: boolean; // If true, only get uncommitted changes (staged + unstaged)
+	fixBase?: string; // If provided, get diff from this ref to current working tree
 }
 export class ChangeDetector {

package/src/core/entry-point.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import fs from "node:fs/promises";
 import path from "node:path";
+import { Glob } from "bun";
 import type { EntryPointConfig } from "../config/types.js";
 export interface ExpandedEntryPoint {
@@ -16,24 +17,37 @@ export class EntryPointExpander {
 		const rootEntryPoint = entryPoints.find((ep) => ep.path === ".");
 		// Always include root entry point if configured and there are ANY changes
-		// Or should it only run if files match root patterns?
-		// Spec says: "A root entry point always exists and applies to repository-wide gates."
-		// Usually root gates run on any change or specific files in root.
-		// For simplicity, if root is configured, we'll include it if there are any changed files.
 		if (changedFiles.length > 0) {
 			const rootConfig = rootEntryPoint ?? { path: "." };
-			results.push({ path: ".", config: rootConfig });
+			// Apply exclusion filtering for root if configured
+			const filteredRootChanges = this.filterExcludedFiles(
+				changedFiles,
+				rootConfig.exclude,
+			);
+			if (filteredRootChanges.length > 0) {
+				results.push({ path: ".", config: rootConfig });
+			}
 		}
 		for (const ep of entryPoints) {
 			if (ep.path === ".") continue; // Handled above
+			// Apply exclusion filtering first!
+			const filteredChanges = this.filterExcludedFiles(
+				changedFiles,
+				ep.exclude,
+			);
+			// If no relevant files remain, skip this entry point
+			if (filteredChanges.length === 0) continue;
 			if (ep.path.endsWith("*")) {
 				// Wildcard directory (e.g., "engines/*")
 				const parentDir = ep.path.slice(0, -2); // "engines"
 				const expandedPaths = await this.expandWildcard(
 					parentDir,
-					changedFiles,
+					filteredChanges,
 				);
 				for (const subDir of expandedPaths) {
@@ -44,7 +58,7 @@ export class EntryPointExpander {
 				}
 			} else {
 				// Fixed directory (e.g., "apps/api")
-				if (this.hasChangesInDir(ep.path, changedFiles)) {
+				if (this.hasChangesInDir(ep.path, filteredChanges)) {
 					results.push({
 						path: ep.path,
 						config: ep,
@@ -81,6 +95,33 @@ export class EntryPointExpander {
 		return results;
 	}
+	private filterExcludedFiles(files: string[], patterns?: string[]): string[] {
+		if (!patterns || patterns.length === 0) {
+			return files;
+		}
+		// Pre-compile globs
+		const globs: Glob[] = [];
+		const prefixes: string[] = [];
+		for (const pattern of patterns) {
+			if (pattern.match(/[*?[{]/)) {
+				globs.push(new Glob(pattern));
+			} else {
+				prefixes.push(pattern);
+			}
+		}
+		return files.filter((file) => {
+			// If matches ANY pattern, exclude it
+			const isExcluded =
+				prefixes.some((p) => file === p || file.startsWith(`${p}/`)) ||
+				globs.some((g) => g.match(file));
+			return !isExcluded;
+		});
+	}
 	private async expandWildcard(
 		parentDir: string,
 		changedFiles: string[],

package/src/core/runner.ts CHANGED Viewed

@@ -64,7 +64,7 @@ export class Runner {
 		await Promise.all([...parallelPromises, sequentialPromise]);
-		await this.reporter.printSummary(this.results);
+		await this.reporter.printSummary(this.results, this.config.project.log_dir);
 		return this.results.every((r) => r.status === "pass");
 	}
@@ -76,36 +76,47 @@ export class Runner {
 		let result: GateResult;
-		if (job.type === "check") {
-			const logPath = this.logger.getLogPath(job.id);
-			const jobLogger = await this.logger.createJobLogger(job.id);
-			const effectiveBaseBranch =
-				this.baseBranchOverride || this.config.project.base_branch;
-			result = await this.checkExecutor.execute(
-				job.id,
-				job.gateConfig as LoadedCheckGateConfig,
-				job.workingDirectory,
-				jobLogger,
-				effectiveBaseBranch,
-			);
-			result.logPath = logPath;
-		} else {
-			// Use sanitized Job ID for lookup because that's what log-parser uses (based on filenames)
-			const safeJobId = sanitizeJobId(job.id);
-			const previousFailures = this.previousFailuresMap?.get(safeJobId);
-			const loggerFactory = this.logger.createLoggerFactory(job.id);
-			const effectiveBaseBranch =
-				this.baseBranchOverride || this.config.project.base_branch;
-			result = await this.reviewExecutor.execute(
-				job.id,
-				job.gateConfig as ReviewGateConfig & ReviewPromptFrontmatter,
-				job.entryPoint,
-				loggerFactory,
-				effectiveBaseBranch,
-				previousFailures,
-				this.changeOptions,
-				this.config.project.cli.check_usage_limit,
-			);
+		try {
+			if (job.type === "check") {
+				const logPath = await this.logger.getLogPath(job.id);
+				const jobLogger = await this.logger.createJobLogger(job.id);
+				const effectiveBaseBranch =
+					this.baseBranchOverride || this.config.project.base_branch;
+				result = await this.checkExecutor.execute(
+					job.id,
+					job.gateConfig as LoadedCheckGateConfig,
+					job.workingDirectory,
+					jobLogger,
+					effectiveBaseBranch,
+				);
+				result.logPath = logPath;
+			} else {
+				// Use sanitized Job ID for lookup because that's what log-parser uses (based on filenames)
+				const safeJobId = sanitizeJobId(job.id);
+				const previousFailures = this.previousFailuresMap?.get(safeJobId);
+				const loggerFactory = this.logger.createLoggerFactory(job.id);
+				const effectiveBaseBranch =
+					this.baseBranchOverride || this.config.project.base_branch;
+				result = await this.reviewExecutor.execute(
+					job.id,
+					job.gateConfig as ReviewGateConfig & ReviewPromptFrontmatter,
+					job.entryPoint,
+					loggerFactory,
+					effectiveBaseBranch,
+					previousFailures,
+					this.changeOptions,
+					this.config.project.cli.check_usage_limit,
+					this.config.project.rerun_new_issue_threshold,
+				);
+			}
+		} catch (err) {
+			console.error("[ERROR] Execution failed for", job.id, ":", err);
+			result = {
+				jobId: job.id,
+				status: "error",
+				duration: 0,
+				message: err instanceof Error ? err.message : String(err),
+			};
 		}
 		this.results.push(result);
@@ -136,9 +147,9 @@ export class Runner {
 					(job.gateConfig as LoadedCheckGateConfig).command,
 				);
 				if (!commandName) {
-					preflightResults.push(
-						await this.recordPreflightFailure(job, "Unable to parse command"),
-					);
+					const msg = "Unable to parse command";
+					console.error(`[PREFLIGHT] ${job.id}: ${msg}`);
+					preflightResults.push(await this.recordPreflightFailure(job, msg));
 					if (this.shouldFailFast(job)) this.shouldStop = true;
 					continue;
 				}
@@ -148,12 +159,9 @@ export class Runner {
 					job.workingDirectory,
 				);
 				if (!available) {
-					preflightResults.push(
-						await this.recordPreflightFailure(
-							job,
-							`Missing command: ${commandName}`,
-						),
-					);
+					const msg = `Missing command: ${commandName}`;
+					console.error(`[PREFLIGHT] ${job.id}: ${msg}`);
+					preflightResults.push(await this.recordPreflightFailure(job, msg));
 					if (this.shouldFailFast(job)) this.shouldStop = true;
 					continue;
 				}
@@ -172,12 +180,9 @@ export class Runner {
 				}
 				if (availableTools.length < required) {
-					preflightResults.push(
-						await this.recordPreflightFailure(
-							job,
-							`Missing CLI tools: need ${required}, found ${availableTools.length}`,
-						),
-					);
+					const msg = `Missing CLI tools: need ${required}, found ${availableTools.length} (${availableTools.join(", ") || "none"})`;
+					console.error(`[PREFLIGHT] ${job.id}: ${msg}`);
+					preflightResults.push(await this.recordPreflightFailure(job, msg));
 					if (this.shouldFailFast(job)) this.shouldStop = true;
 					continue;
 				}
@@ -194,7 +199,7 @@ export class Runner {
 		message: string,
 	): Promise<GateResult> {
 		if (job.type === "check") {
-			const logPath = this.logger.getLogPath(job.id);
+			const logPath = await this.logger.getLogPath(job.id);
 			const jobLogger = await this.logger.createJobLogger(job.id);
 			await jobLogger(
 				`[${new Date().toISOString()}] Health check failed\n${message}\n`,
@@ -222,6 +227,11 @@ export class Runner {
 		const health = await adapter.checkHealth({
 			checkUsageLimit: this.config.project.cli.check_usage_limit,
 		});
+		if (health.status !== "healthy") {
+			console.log(
+				`[DEBUG] Adapter ${name} check failed: ${health.status} - ${health.message}`,
+			);
+		}
 		return health.status === "healthy";
 	}

package/src/gates/result.ts CHANGED Viewed

@@ -1,5 +1,23 @@
 export type GateStatus = "pass" | "fail" | "error";
+export interface PreviousViolation {
+	file: string;
+	line: number | string;
+	issue: string;
+	fix?: string;
+	priority?: "critical" | "high" | "medium" | "low";
+	status?: "new" | "fixed" | "skipped";
+	result?: string | null;
+}
+export interface ReviewFullJsonOutput {
+	adapter: string;
+	timestamp: string;
+	status: "pass" | "fail" | "error";
+	rawOutput: string;
+	violations: PreviousViolation[];
+}
 export interface GateResult {
 	jobId: string;
 	status: GateStatus;
@@ -8,10 +26,24 @@ export interface GateResult {
 	logPath?: string; // path to full log
 	logPaths?: string[]; // paths to multiple logs (e.g. per-agent logs)
 	fixInstructions?: string; // Markdown content for fixing failures
+	errorCount?: number; // Number of active failures/violations
+	skipped?: Array<{
+		file: string;
+		line: number | string;
+		issue: string;
+		result?: string | null;
+	}>;
 	subResults?: Array<{
 		nameSuffix: string;
 		status: GateStatus;
 		message: string;
 		logPath?: string;
+		errorCount?: number;
+		skipped?: Array<{
+			file: string;
+			line: number | string;
+			issue: string;
+			result?: string | null;
+		}>;
 	}>;
 }