npm - agent-gauntlet - Versions diffs - 0.2.2 → 0.4.0 - Mend

agent-gauntlet 0.2.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/README.md +3 -3
package/package.json +1 -1
package/src/cli-adapters/claude.ts +13 -1
package/src/cli-adapters/gemini.ts +17 -2
package/src/commands/check.ts +108 -12
package/src/commands/ci/list-jobs.ts +3 -2
package/src/commands/clean.ts +29 -0
package/src/commands/help.ts +1 -1
package/src/commands/index.ts +2 -1
package/src/commands/init.ts +4 -4
package/src/commands/review.ts +108 -12
package/src/commands/run.ts +109 -12
package/src/commands/shared.ts +56 -10
package/src/commands/validate.ts +20 -0
package/src/config/schema.ts +5 -0
package/src/config/validator.ts +6 -13
package/src/core/change-detector.ts +1 -0
package/src/core/entry-point.ts +48 -7
package/src/core/runner.ts +90 -56
package/src/gates/result.ts +32 -0
package/src/gates/review.ts +428 -162
package/src/index.ts +4 -2
package/src/output/console-log.ts +146 -0
package/src/output/console.ts +103 -9
package/src/output/logger.ts +52 -8
package/src/templates/run_gauntlet.template.md +20 -13
package/src/utils/log-parser.ts +498 -162
package/src/utils/session-ref.ts +82 -0
package/src/commands/check.test.ts +0 -29
package/src/commands/detect.test.ts +0 -43
package/src/commands/health.test.ts +0 -93
package/src/commands/help.test.ts +0 -44
package/src/commands/init.test.ts +0 -130
package/src/commands/list.test.ts +0 -121
package/src/commands/rerun.ts +0 -160
package/src/commands/review.test.ts +0 -31
package/src/commands/run.test.ts +0 -27
package/src/config/loader.test.ts +0 -151
package/src/core/entry-point.test.ts +0 -61
package/src/gates/review.test.ts +0 -291

package/src/utils/session-ref.ts ADDED Viewed

@@ -0,0 +1,82 @@
+import { exec } from "node:child_process";
+import fs from "node:fs/promises";
+import path from "node:path";
+import { promisify } from "node:util";
+const SESSION_REF_FILENAME = ".session_ref";
+// Exported for testing - allows injection of mock exec
+export let execFn: (
+	cmd: string,
+) => Promise<{ stdout: string; stderr: string }> = promisify(exec);
+/**
+ * Set the exec function (for testing)
+ */
+export function setExecFn(
+	fn: (cmd: string) => Promise<{ stdout: string; stderr: string }>,
+): void {
+	execFn = fn;
+}
+/**
+ * Reset the exec function to the real implementation
+ */
+export function resetExecFn(): void {
+	execFn = promisify(exec);
+}
+/**
+ * Captures the current git state (working tree) as a commit SHA
+ * and writes it to the log directory.
+ * Uses `git stash create --include-untracked` to capture the state without modifying it.
+ */
+export async function writeSessionRef(logDir: string): Promise<void> {
+	try {
+		// Create a stash of the current state (including untracked files)
+		// This returns a commit SHA but doesn't modify the working tree
+		const { stdout } = await execFn("git stash create --include-untracked");
+		let sha = stdout.trim();
+		if (!sha) {
+			// If no changes to stash (clean working tree), use HEAD
+			const { stdout: headSha } = await execFn("git rev-parse HEAD");
+			sha = headSha.trim();
+		}
+		// Ensure log directory exists
+		await fs.mkdir(logDir, { recursive: true });
+		await fs.writeFile(path.join(logDir, SESSION_REF_FILENAME), sha);
+	} catch (error) {
+		console.warn(
+			"Failed to create session reference:",
+			error instanceof Error ? error.message : String(error),
+		);
+	}
+}
+/**
+ * Reads the stored session reference SHA from the log directory.
+ * Returns null if the file doesn't exist.
+ */
+export async function readSessionRef(logDir: string): Promise<string | null> {
+	try {
+		const refPath = path.join(logDir, SESSION_REF_FILENAME);
+		const content = await fs.readFile(refPath, "utf-8");
+		return content.trim();
+	} catch {
+		return null;
+	}
+}
+/**
+ * Removes the session reference file from the log directory.
+ */
+export async function clearSessionRef(logDir: string): Promise<void> {
+	try {
+		const refPath = path.join(logDir, SESSION_REF_FILENAME);
+		await fs.rm(refPath, { force: true });
+	} catch {
+		// Ignore errors
+	}
+}

package/src/commands/check.test.ts DELETED Viewed

@@ -1,29 +0,0 @@
-import { beforeEach, describe, expect, it } from "bun:test";
-import { Command } from "commander";
-import { registerCheckCommand } from "./check.js";
-describe("Check Command", () => {
-	let program: Command;
-	beforeEach(() => {
-		program = new Command();
-		registerCheckCommand(program);
-	});
-	it("should register the check command", () => {
-		const checkCmd = program.commands.find((cmd) => cmd.name() === "check");
-		expect(checkCmd).toBeDefined();
-		expect(checkCmd?.description()).toBe(
-			"Run only applicable checks for detected changes",
-		);
-	});
-	it("should have correct options", () => {
-		const checkCmd = program.commands.find((cmd) => cmd.name() === "check");
-		expect(checkCmd?.options.some((opt) => opt.long === "--gate")).toBe(true);
-		expect(checkCmd?.options.some((opt) => opt.long === "--commit")).toBe(true);
-		expect(checkCmd?.options.some((opt) => opt.long === "--uncommitted")).toBe(
-			true,
-		);
-	});
-});

package/src/commands/detect.test.ts DELETED Viewed

@@ -1,43 +0,0 @@
-import { afterEach, beforeEach, describe, expect, it } from "bun:test";
-import { Command } from "commander";
-import { registerDetectCommand } from "./detect.js";
-describe("Detect Command", () => {
-	let program: Command;
-	const originalConsoleLog = console.log;
-	const originalConsoleError = console.error;
-	let logs: string[];
-	let errors: string[];
-	beforeEach(() => {
-		program = new Command();
-		registerDetectCommand(program);
-		logs = [];
-		errors = [];
-		console.log = (...args: unknown[]) => {
-			logs.push(args.join(" "));
-		};
-		console.error = (...args: unknown[]) => {
-			errors.push(args.join(" "));
-		};
-	});
-	afterEach(() => {
-		console.log = originalConsoleLog;
-		console.error = originalConsoleError;
-	});
-	it("should register the detect command", () => {
-		const detectCmd = program.commands.find((cmd) => cmd.name() === "detect");
-		expect(detectCmd).toBeDefined();
-		expect(detectCmd?.description()).toBe(
-			"Show what gates would run for detected changes (without executing them)",
-		);
-		expect(detectCmd?.options.some((opt) => opt.long === "--commit")).toBe(
-			true,
-		);
-		expect(detectCmd?.options.some((opt) => opt.long === "--uncommitted")).toBe(
-			true,
-		);
-	});
-});

package/src/commands/health.test.ts DELETED Viewed

@@ -1,93 +0,0 @@
-import {
-	afterAll,
-	afterEach,
-	beforeAll,
-	beforeEach,
-	describe,
-	expect,
-	it,
-} from "bun:test";
-import fs from "node:fs/promises";
-import path from "node:path";
-import { Command } from "commander";
-import { registerHealthCommand } from "./health.js";
-const TEST_DIR = path.join(process.cwd(), `test-health-${Date.now()}`);
-const GAUNTLET_DIR = path.join(TEST_DIR, ".gauntlet");
-const REVIEWS_DIR = path.join(GAUNTLET_DIR, "reviews");
-describe("Health Command", () => {
-	let program: Command;
-	const originalConsoleLog = console.log;
-	const originalCwd = process.cwd();
-	let logs: string[];
-	beforeAll(async () => {
-		// Setup test directory structure
-		await fs.mkdir(TEST_DIR, { recursive: true });
-		await fs.mkdir(GAUNTLET_DIR, { recursive: true });
-		await fs.mkdir(REVIEWS_DIR, { recursive: true });
-		// Write config.yml
-		await fs.writeFile(
-			path.join(GAUNTLET_DIR, "config.yml"),
-			`
-base_branch: origin/main
-log_dir: gauntlet_logs
-cli:
-  default_preference:
-    - gemini
-  check_usage_limit: false
-entry_points:
-  - path: .
-`,
-		);
-		// Write review definition with CLI preference
-		await fs.writeFile(
-			path.join(REVIEWS_DIR, "security.md"),
-			`---
-cli_preference:
-  - gemini
----
-# Security Review
-Review for security.
-`,
-		);
-	});
-	afterAll(async () => {
-		await fs.rm(TEST_DIR, { recursive: true, force: true });
-	});
-	beforeEach(() => {
-		program = new Command();
-		registerHealthCommand(program);
-		logs = [];
-		console.log = (...args: unknown[]) => {
-			logs.push(args.join(" "));
-		};
-		process.chdir(TEST_DIR);
-	});
-	afterEach(() => {
-		console.log = originalConsoleLog;
-		process.chdir(originalCwd);
-	});
-	it("should register the health command", () => {
-		const healthCmd = program.commands.find((cmd) => cmd.name() === "health");
-		expect(healthCmd).toBeDefined();
-		expect(healthCmd?.description()).toBe("Check CLI tool availability");
-	});
-	it("should run health check", async () => {
-		const healthCmd = program.commands.find((cmd) => cmd.name() === "health");
-		await healthCmd?.parseAsync(["health"]);
-		const output = logs.join("\n");
-		expect(output).toContain("Config validation:");
-		expect(output).toContain("CLI Tool Health Check:");
-	});
-});

package/src/commands/help.test.ts DELETED Viewed

@@ -1,44 +0,0 @@
-import { afterEach, beforeEach, describe, expect, it } from "bun:test";
-import { Command } from "commander";
-import { registerHelpCommand } from "./help.js";
-describe("Help Command", () => {
-	let program: Command;
-	const originalConsoleLog = console.log;
-	let logs: string[];
-	beforeEach(() => {
-		program = new Command();
-		registerHelpCommand(program);
-		logs = [];
-		console.log = (...args: unknown[]) => {
-			logs.push(args.join(" "));
-		};
-	});
-	afterEach(() => {
-		console.log = originalConsoleLog;
-	});
-	it("should register the help command", () => {
-		const helpCmd = program.commands.find((cmd) => cmd.name() === "help");
-		expect(helpCmd).toBeDefined();
-		expect(helpCmd?.description()).toBe("Show help information");
-	});
-	it("should output help information when executed", async () => {
-		const helpCmd = program.commands.find((cmd) => cmd.name() === "help");
-		await helpCmd?.parseAsync(["help"]);
-		const output = logs.join("\n");
-		expect(output).toContain("Agent Gauntlet");
-		expect(output).toContain("Commands:");
-		expect(output).toContain("run");
-		expect(output).toContain("check");
-		expect(output).toContain("review");
-		expect(output).toContain("detect");
-		expect(output).toContain("list");
-		expect(output).toContain("health");
-		expect(output).toContain("init");
-	});
-});

package/src/commands/init.test.ts DELETED Viewed

@@ -1,130 +0,0 @@
-import {
-	afterAll,
-	afterEach,
-	beforeAll,
-	beforeEach,
-	describe,
-	expect,
-	it,
-	mock,
-} from "bun:test";
-import fs from "node:fs/promises";
-import path from "node:path";
-import { Command } from "commander";
-const TEST_DIR = path.join(process.cwd(), `test-init-${Date.now()}`);
-// Mock adapters
-const mockAdapters = [
-	{
-		name: "mock-cli-1",
-		isAvailable: async () => true,
-		getProjectCommandDir: () => ".mock1",
-		getUserCommandDir: () => null,
-		getCommandExtension: () => ".sh",
-		canUseSymlink: () => false,
-		transformCommand: (content: string) => content,
-	},
-	{
-		name: "mock-cli-2",
-		isAvailable: async () => false, // Not available
-		getProjectCommandDir: () => ".mock2",
-		getUserCommandDir: () => null,
-		getCommandExtension: () => ".sh",
-		canUseSymlink: () => false,
-		transformCommand: (content: string) => content,
-	},
-];
-mock.module("../cli-adapters/index.js", () => ({
-	getAllAdapters: () => mockAdapters,
-	getProjectCommandAdapters: () => mockAdapters,
-	getUserCommandAdapters: () => [],
-	getAdapter: (name: string) => mockAdapters.find((a) => a.name === name),
-	getValidCLITools: () => mockAdapters.map((a) => a.name),
-}));
-// Import after mocking
-const { registerInitCommand } = await import("./init.js");
-describe("Init Command", () => {
-	let program: Command;
-	const originalConsoleLog = console.log;
-	const originalCwd = process.cwd();
-	let logs: string[];
-	beforeAll(async () => {
-		await fs.mkdir(TEST_DIR, { recursive: true });
-	});
-	afterAll(async () => {
-		await fs.rm(TEST_DIR, { recursive: true, force: true });
-	});
-	beforeEach(() => {
-		program = new Command();
-		registerInitCommand(program);
-		logs = [];
-		console.log = (...args: unknown[]) => {
-			logs.push(args.join(" "));
-		};
-		process.chdir(TEST_DIR);
-	});
-	afterEach(() => {
-		console.log = originalConsoleLog;
-		process.chdir(originalCwd);
-		// Cleanup any created .gauntlet directory
-		return fs
-			.rm(path.join(TEST_DIR, ".gauntlet"), { recursive: true, force: true })
-			.catch(() => {});
-	});
-	it("should register the init command", () => {
-		const initCmd = program.commands.find((cmd) => cmd.name() === "init");
-		expect(initCmd).toBeDefined();
-		expect(initCmd?.description()).toBe("Initialize .gauntlet configuration");
-		expect(initCmd?.options.some((opt) => opt.long === "--yes")).toBe(true);
-	});
-	it("should create .gauntlet directory structure with --yes flag", async () => {
-		// We expect it to use the available mock-cli-1
-		await program.parseAsync(["node", "test", "init", "--yes"]);
-		// Check that files were created
-		const gauntletDir = path.join(TEST_DIR, ".gauntlet");
-		const configFile = path.join(gauntletDir, "config.yml");
-		const reviewsDir = path.join(gauntletDir, "reviews");
-		const checksDir = path.join(gauntletDir, "checks");
-		const runGauntletFile = path.join(gauntletDir, "run_gauntlet.md");
-		expect(await fs.stat(gauntletDir)).toBeDefined();
-		expect(await fs.stat(configFile)).toBeDefined();
-		expect(await fs.stat(reviewsDir)).toBeDefined();
-		expect(await fs.stat(checksDir)).toBeDefined();
-		expect(await fs.stat(runGauntletFile)).toBeDefined();
-		// Verify config content
-		const configContent = await fs.readFile(configFile, "utf-8");
-		expect(configContent).toContain("base_branch");
-		expect(configContent).toContain("log_dir");
-		expect(configContent).toContain("mock-cli-1"); // Should be present
-		expect(configContent).not.toContain("mock-cli-2"); // Should not be present (unavailable)
-		// Verify review file content
-		const reviewFile = path.join(reviewsDir, "code-quality.md");
-		const reviewContent = await fs.readFile(reviewFile, "utf-8");
-		expect(reviewContent).toContain("mock-cli-1");
-	});
-	it("should not create directory if .gauntlet already exists", async () => {
-		// Create .gauntlet directory first
-		const gauntletDir = path.join(TEST_DIR, ".gauntlet");
-		await fs.mkdir(gauntletDir, { recursive: true });
-		await program.parseAsync(["node", "test", "init", "--yes"]);
-		const output = logs.join("\n");
-		expect(output).toContain(".gauntlet directory already exists");
-	});
-});

package/src/commands/list.test.ts DELETED Viewed

@@ -1,121 +0,0 @@
-import {
-	afterAll,
-	afterEach,
-	beforeAll,
-	beforeEach,
-	describe,
-	expect,
-	it,
-} from "bun:test";
-import fs from "node:fs/promises";
-import path from "node:path";
-import { Command } from "commander";
-import { registerListCommand } from "./list.js";
-const TEST_DIR = path.join(process.cwd(), `test-list-${Date.now()}`);
-const GAUNTLET_DIR = path.join(TEST_DIR, ".gauntlet");
-const CHECKS_DIR = path.join(GAUNTLET_DIR, "checks");
-const REVIEWS_DIR = path.join(GAUNTLET_DIR, "reviews");
-describe("List Command", () => {
-	let program: Command;
-	const originalConsoleLog = console.log;
-	const originalConsoleError = console.error;
-	const originalCwd = process.cwd();
-	let logs: string[];
-	let errors: string[];
-	beforeAll(async () => {
-		// Setup test directory structure
-		await fs.mkdir(TEST_DIR, { recursive: true });
-		await fs.mkdir(GAUNTLET_DIR, { recursive: true });
-		await fs.mkdir(CHECKS_DIR, { recursive: true });
-		await fs.mkdir(REVIEWS_DIR, { recursive: true });
-		// Write config.yml
-		await fs.writeFile(
-			path.join(GAUNTLET_DIR, "config.yml"),
-			`
-base_branch: origin/main
-log_dir: gauntlet_logs
-cli:
-  default_preference:
-    - gemini
-  check_usage_limit: false
-entry_points:
-  - path: src/
-    checks:
-      - lint
-    reviews:
-      - security
-`,
-		);
-		// Write check definition
-		await fs.writeFile(
-			path.join(CHECKS_DIR, "lint.yml"),
-			`
-name: lint
-command: npm run lint
-working_directory: .
-`,
-		);
-		// Write review definition
-		await fs.writeFile(
-			path.join(REVIEWS_DIR, "security.md"),
-			`---
-cli_preference:
-  - gemini
----
-# Security Review
-Review for security.
-`,
-		);
-	});
-	afterAll(async () => {
-		await fs.rm(TEST_DIR, { recursive: true, force: true });
-	});
-	beforeEach(() => {
-		program = new Command();
-		registerListCommand(program);
-		logs = [];
-		errors = [];
-		console.log = (...args: unknown[]) => {
-			logs.push(args.join(" "));
-		};
-		console.error = (...args: unknown[]) => {
-			errors.push(args.join(" "));
-		};
-		process.chdir(TEST_DIR);
-	});
-	afterEach(() => {
-		console.log = originalConsoleLog;
-		console.error = originalConsoleError;
-		process.chdir(originalCwd);
-	});
-	it("should register the list command", () => {
-		const listCmd = program.commands.find((cmd) => cmd.name() === "list");
-		expect(listCmd).toBeDefined();
-		expect(listCmd?.description()).toBe("List configured gates");
-	});
-	it("should list check gates, review gates, and entry points", async () => {
-		const listCmd = program.commands.find((cmd) => cmd.name() === "list");
-		await listCmd?.parseAsync(["list"]);
-		const output = logs.join("\n");
-		expect(output).toContain("Check Gates:");
-		expect(output).toContain("lint");
-		expect(output).toContain("Review Gates:");
-		expect(output).toContain("security");
-		expect(output).toContain("gemini");
-		expect(output).toContain("Entry Points:");
-		expect(output).toContain("src/");
-	});
-});

package/src/commands/rerun.ts DELETED Viewed

@@ -1,160 +0,0 @@
-import chalk from "chalk";
-import type { Command } from "commander";
-import { loadConfig } from "../config/loader.js";
-import { ChangeDetector } from "../core/change-detector.js";
-import { EntryPointExpander } from "../core/entry-point.js";
-import { JobGenerator } from "../core/job.js";
-import { Runner } from "../core/runner.js";
-import { ConsoleReporter } from "../output/console.js";
-import { Logger } from "../output/logger.js";
-import {
-	findPreviousFailures,
-	type PreviousViolation,
-} from "../utils/log-parser.js";
-import { rotateLogs } from "./shared.js";
-export function registerRerunCommand(program: Command): void {
-	program
-		.command("rerun")
-		.description(
-			"Rerun gates (checks & reviews) with previous failures as context (defaults to uncommitted changes)",
-		)
-		.option(
-			"-b, --base-branch <branch>",
-			"Override base branch for change detection",
-		)
-		.option("-g, --gate <name>", "Run specific gate only")
-		.option(
-			"-c, --commit <sha>",
-			"Use diff for a specific commit (overrides default uncommitted mode)",
-		)
-		.action(async (options) => {
-			try {
-				const config = await loadConfig();
-				// Parse previous failures from log files (only for review gates)
-				console.log(chalk.dim("Analyzing previous runs..."));
-				// findPreviousFailures handles errors internally and returns empty array on failure
-				const previousFailures = await findPreviousFailures(
-					config.project.log_dir,
-					options.gate,
-				);
-				// Create a map: jobId -> (adapterName -> violations)
-				const failuresMap = new Map<string, Map<string, PreviousViolation[]>>();
-				for (const gateFailure of previousFailures) {
-					const adapterMap = new Map<string, PreviousViolation[]>();
-					for (const adapterFailure of gateFailure.adapterFailures) {
-						adapterMap.set(
-							adapterFailure.adapterName,
-							adapterFailure.violations,
-						);
-					}
-					failuresMap.set(gateFailure.jobId, adapterMap);
-				}
-				if (previousFailures.length > 0) {
-					const totalViolations = previousFailures.reduce(
-						(sum, gf) =>
-							sum +
-							gf.adapterFailures.reduce((s, af) => s + af.violations.length, 0),
-						0,
-					);
-					console.log(
-						chalk.yellow(
-							`Found ${previousFailures.length} gate(s) with ${totalViolations} previous violation(s)`,
-						),
-					);
-				} else {
-					console.log(
-						chalk.dim("No previous failures found. Running as normal..."),
-					);
-				}
-				// Rotate logs before starting the new run
-				await rotateLogs(config.project.log_dir);
-				// Determine effective base branch
-				// Priority: CLI override > CI env var > config
-				const effectiveBaseBranch =
-					options.baseBranch ||
-					(process.env.GITHUB_BASE_REF &&
-					(process.env.CI === "true" || process.env.GITHUB_ACTIONS === "true")
-						? process.env.GITHUB_BASE_REF
-						: null) ||
-					config.project.base_branch;
-				// Detect changes (default to uncommitted unless --commit is specified)
-				// Note: Rerun defaults to uncommitted changes for faster iteration loops,
-				// unlike 'run' which defaults to base_branch comparison.
-				const changeOptions = {
-					commit: options.commit,
-					uncommitted: !options.commit, // Default to uncommitted unless commit is specified
-				};
-				const changeDetector = new ChangeDetector(
-					effectiveBaseBranch,
-					changeOptions,
-				);
-				const expander = new EntryPointExpander();
-				const jobGen = new JobGenerator(config);
-				const modeDesc = options.commit
-					? `commit ${options.commit}`
-					: "uncommitted changes";
-				console.log(chalk.dim(`Detecting changes (${modeDesc})...`));
-				const changes = await changeDetector.getChangedFiles();
-				if (changes.length === 0) {
-					console.log(chalk.green("No changes detected."));
-					process.exit(0);
-				}
-				console.log(chalk.dim(`Found ${changes.length} changed files.`));
-				const entryPoints = await expander.expand(
-					config.project.entry_points,
-					changes,
-				);
-				let jobs = jobGen.generateJobs(entryPoints);
-				if (options.gate) {
-					jobs = jobs.filter((j) => j.name === options.gate);
-				}
-				if (jobs.length === 0) {
-					console.log(chalk.yellow("No applicable gates for these changes."));
-					process.exit(0);
-				}
-				console.log(chalk.dim(`Running ${jobs.length} gates...`));
-				if (previousFailures.length > 0) {
-					console.log(
-						chalk.dim(
-							"Previous failures will be injected as context for matching reviewers.",
-						),
-					);
-				}
-				const logger = new Logger(config.project.log_dir);
-				const reporter = new ConsoleReporter();
-				const runner = new Runner(
-					config,
-					logger,
-					reporter,
-					failuresMap, // Pass previous failures map
-					changeOptions, // Pass change detection options
-					effectiveBaseBranch, // Pass effective base branch
-				);
-				const success = await runner.run(jobs);
-				process.exit(success ? 0 : 1);
-			} catch (error: unknown) {
-				const err = error as { message?: string };
-				console.error(chalk.red("Error:"), err.message);
-				process.exit(1);
-			}
-		});
-}