npm - agent-gauntlet - Versions diffs - 0.2.2 → 0.3.0 - Mend

agent-gauntlet 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/README.md +3 -3
package/package.json +1 -1
package/src/cli-adapters/claude.ts +13 -1
package/src/cli-adapters/gemini.ts +17 -2
package/src/commands/check.ts +98 -12
package/src/commands/ci/list-jobs.ts +3 -2
package/src/commands/clean.ts +29 -0
package/src/commands/help.ts +1 -1
package/src/commands/index.ts +1 -1
package/src/commands/init.ts +4 -4
package/src/commands/review.ts +98 -12
package/src/commands/run.ts +98 -12
package/src/commands/shared.ts +56 -10
package/src/config/schema.ts +4 -0
package/src/config/validator.ts +6 -13
package/src/core/change-detector.ts +1 -0
package/src/core/entry-point.ts +48 -7
package/src/core/runner.ts +57 -47
package/src/gates/result.ts +32 -0
package/src/gates/review.ts +323 -51
package/src/index.ts +2 -2
package/src/output/console.ts +96 -9
package/src/output/logger.ts +40 -7
package/src/templates/run_gauntlet.template.md +20 -13
package/src/utils/log-parser.ts +409 -165
package/src/utils/session-ref.ts +82 -0
package/src/commands/check.test.ts +0 -29
package/src/commands/detect.test.ts +0 -43
package/src/commands/health.test.ts +0 -93
package/src/commands/help.test.ts +0 -44
package/src/commands/init.test.ts +0 -130
package/src/commands/list.test.ts +0 -121
package/src/commands/rerun.ts +0 -160
package/src/commands/review.test.ts +0 -31
package/src/commands/run.test.ts +0 -27
package/src/config/loader.test.ts +0 -151
package/src/core/entry-point.test.ts +0 -61
package/src/gates/review.test.ts +0 -291

package/src/output/console.ts CHANGED Viewed

@@ -2,6 +2,7 @@ import fs from "node:fs/promises";
 import chalk from "chalk";
 import type { Job } from "../core/job.js";
 import type { GateResult } from "../gates/result.js";
+import { reconstructHistory } from "../utils/log-parser.js";
 export class ConsoleReporter {
 	onJobStart(job: Job) {
@@ -10,10 +11,12 @@ export class ConsoleReporter {
 	onJobComplete(job: Job, result: GateResult) {
 		const duration = `${(result.duration / 1000).toFixed(2)}s`;
 		const message = result.message ?? "";
 		if (result.subResults && result.subResults.length > 0) {
 			// Print split results
 			for (const sub of result.subResults) {
 				const statusColor =
 					sub.status === "pass"
@@ -21,6 +24,7 @@ export class ConsoleReporter {
 						: sub.status === "fail"
 							? chalk.red
 							: chalk.magenta;
 				const label =
 					sub.status === "pass"
 						? "PASS"
@@ -29,8 +33,15 @@ export class ConsoleReporter {
 							: "ERROR";
 				let logInfo = "";
 				if (sub.status !== "pass" && sub.logPath) {
-					logInfo = `\n      Log: ${sub.logPath}`;
+					// Prefer JSON if it exists for reviews
+					const displayLog = sub.logPath;
+					const logPrefix = displayLog.endsWith(".json") ? "Review:" : "Log:";
+					logInfo = `\n      ${logPrefix} ${displayLog}`;
 				}
 				console.log(
@@ -66,18 +77,94 @@ export class ConsoleReporter {
 		}
 	}
-	async printSummary(results: GateResult[]) {
-		console.log(`\n${chalk.bold("--- Gauntlet Summary ---")}`);
+	async printSummary(results: GateResult[], logDir?: string) {
+		console.log(
+			`\n${chalk.bold("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")}`,
+		);
+		console.log(chalk.bold("RESULTS SUMMARY"));
+		console.log(chalk.bold("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"));
+		if (logDir) {
+			try {
+				const history = await reconstructHistory(logDir);
+				for (const iter of history) {
+					if (iter.fixed.length === 0 && iter.skipped.length === 0) continue;
+					console.log(`\nIteration ${iter.iteration}:`);
+					for (const f of iter.fixed) {
+						const label = f.adapter ? `${f.jobId} (${f.adapter})` : f.jobId;
+						console.log(chalk.green(`  ✓ Fixed: ${label} - ${f.details}`));
+					}
+					for (const s of iter.skipped) {
+						const label = s.adapter ? `${s.jobId} (${s.adapter})` : s.jobId;
+						console.log(
+							chalk.yellow(
+								`  ⊘ Skipped: ${label} - ${s.file}:${s.line} ${s.issue}`,
+							),
+						);
+						if (s.result) {
+							console.log(chalk.dim(`    Reason: ${s.result}`));
+						}
+					}
+				}
+				const totalFixed = history.reduce(
+					(sum, iter) => sum + iter.fixed.length,
+					0,
+				);
+				const totalSkipped = history.reduce(
+					(sum, iter) => sum + iter.skipped.length,
+					0,
+				);
+				let totalFailed = 0;
+				for (const res of results) {
+					if (res.subResults && res.subResults.length > 0) {
+						for (const sub of res.subResults) {
+							if (sub.status === "fail" || sub.status === "error") {
+								totalFailed += sub.errorCount ?? 1;
+							}
+						}
+					} else if (res.status === "fail" || res.status === "error") {
+						totalFailed += res.errorCount ?? 1;
+					}
+				}
+				console.log(
+					`\n${chalk.bold("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")}`,
+				);
+				const iterationsText =
+					history.length > 1 ? ` after ${history.length} iterations` : "";
+				console.log(
+					`Total: ${totalFixed} fixed, ${totalSkipped} skipped, ${totalFailed} failed${iterationsText}`,
+				);
+			} catch (err) {
+				console.warn(
+					chalk.yellow(`Warning: Failed to reconstruct history: ${err}`),
+				);
+			}
+		}
-		const passed = results.filter((r) => r.status === "pass");
 		const failed = results.filter((r) => r.status === "fail");
 		const errored = results.filter((r) => r.status === "error");
+		const anySkipped = results.some((r) => r.skipped && r.skipped.length > 0);
+		let overallStatus = "Passed";
+		let statusColor = chalk.green;
+		if (errored.length > 0) {
+			overallStatus = "Error";
+			statusColor = chalk.magenta;
+		} else if (failed.length > 0) {
+			overallStatus = "Failed";
+			statusColor = chalk.red;
+		} else if (anySkipped) {
+			overallStatus = "Passed with warnings";
+			statusColor = chalk.yellow;
+		}
-		console.log(`Total: ${results.length}`);
-		console.log(chalk.green(`Passed: ${passed.length}`));
-		if (failed.length > 0) console.log(chalk.red(`Failed: ${failed.length}`));
-		if (errored.length > 0)
-			console.log(chalk.magenta(`Errored: ${errored.length}`));
+		console.log(statusColor(`Status: ${overallStatus}`));
+		console.log(chalk.bold("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"));
 	}
 	/** @internal Public for testing */

package/src/output/logger.ts CHANGED Viewed

@@ -6,8 +6,38 @@ function formatTimestamp(): string {
 	return new Date().toISOString();
 }
+/**
+ * Compute the next run number for a given log file prefix.
+ * Scans existing files in logDir and returns max+1 (or 1 if none exist).
+ */
+async function nextRunNumber(logDir: string, prefix: string): Promise<number> {
+	try {
+		const files = await fs.readdir(logDir);
+		let max = 0;
+		const expectedStart = `${prefix}.`;
+		const expectedEnd = ".log";
+		for (const file of files) {
+			if (!file.startsWith(expectedStart) || !file.endsWith(expectedEnd)) {
+				continue;
+			}
+			const middle = file.slice(
+				expectedStart.length,
+				file.length - expectedEnd.length,
+			);
+			if (/^\d+$/.test(middle)) {
+				const n = parseInt(middle, 10);
+				if (n > max) max = n;
+			}
+		}
+		return max + 1;
+	} catch {
+		return 1;
+	}
+}
 export class Logger {
 	private initializedFiles: Set<string> = new Set();
+	private runNumberCache: Map<string, number> = new Map();
 	constructor(private logDir: string) {}
@@ -19,19 +49,22 @@ export class Logger {
 		// No-op - using append mode
 	}
-	getLogPath(jobId: string, adapterName?: string): string {
+	async getLogPath(jobId: string, adapterName?: string): Promise<string> {
 		const safeName = sanitizeJobId(jobId);
-		if (adapterName) {
-			return path.join(this.logDir, `${safeName}_${adapterName}.log`);
+		const prefix = adapterName ? `${safeName}_${adapterName}` : safeName;
+		if (!this.runNumberCache.has(prefix)) {
+			const num = await nextRunNumber(this.logDir, prefix);
+			this.runNumberCache.set(prefix, num);
 		}
-		return path.join(this.logDir, `${safeName}.log`);
+		const runNum = this.runNumberCache.get(prefix) ?? 1;
+		return path.join(this.logDir, `${prefix}.${runNum}.log`);
 	}
 	private async initFile(logPath: string): Promise<void> {
 		if (this.initializedFiles.has(logPath)) {
 			return;
 		}
-		// Add to set BEFORE writing to make this more atomic
 		this.initializedFiles.add(logPath);
 		await fs.writeFile(logPath, "");
 	}
@@ -39,7 +72,7 @@ export class Logger {
 	async createJobLogger(
 		jobId: string,
 	): Promise<(text: string) => Promise<void>> {
-		const logPath = this.getLogPath(jobId);
+		const logPath = await this.getLogPath(jobId);
 		await this.initFile(logPath);
 		return async (text: string) => {
@@ -61,7 +94,7 @@ export class Logger {
 		adapterName?: string,
 	) => Promise<{ logger: (text: string) => Promise<void>; logPath: string }> {
 		return async (adapterName?: string) => {
-			const logPath = this.getLogPath(jobId, adapterName);
+			const logPath = await this.getLogPath(jobId, adapterName);
 			await this.initFile(logPath);
 			const logger = async (text: string) => {

package/src/templates/run_gauntlet.template.md CHANGED Viewed

@@ -18,17 +18,24 @@ Execute the autonomous verification suite.
 **Review trust level: medium** — Fix issues you reasonably agree with or believe the human wants fixed. Skip issues that are purely stylistic, subjective, or that you believe the human would not want changed. When you skip an issue, briefly state what was skipped and why.
-1. Run `agent-gauntlet run`.
+0. Run `agent-gauntlet clean` to archive any previous log files
+1. Run `agent-gauntlet run`
 2. If it fails:
-   - Check the console output for "Fix instructions: available" messages.
-   - Read the log files in `gauntlet_logs/` to understand exactly what went wrong.
-   - If fix instructions are available, they will be in the log file under a "--- Fix Instructions ---" section—carefully read and apply them FIRST before attempting other fixes.
-3. Fix any code or logic errors found by the tools or AI reviewers, prioritizing higher-priority violations (critical > high > medium > low).
-4. Apply the trust level above when deciding whether to act on AI reviewer feedback. If you skip an issue due to the trust threshold, report it with a brief explanation (e.g., "Skipped: [issue summary] — reason: [stylistic/subjective/disagree]").
-5. Do NOT commit your changes yet—keep them uncommitted so the rerun command can review them.
-6. Run `agent-gauntlet rerun` to verify your fixes. The rerun command reviews only uncommitted changes and uses previous failures as context.
-7. Repeat steps 2-6 until one of the following termination conditions is met:
-   - All gates pass
-   - You are skipping remaining issues
-   - Still failing after 3 rerun attempts
-8. Once all gates pass, do NOT commit or push your changes—await the human's review and explicit instruction to commit.
+   - Identify the failed gates from the console output.
+   - For CHECK failures: Read the `.log` file path provided in the output.
+   - For REVIEW failures: Read the `.json` file path provided in the "Review: <path>" output.
+3. Address the violations:
+   - For REVIEW violations: You MUST update the `"status"` and `"result"` fields in the provided `.json` file for EACH violation.
+     - Set `"status": "fixed"` and add a brief description to `"result"` for issues you fix.
+     - Set `"status": "skipped"` and add a brief reason to `"result"` for issues you skip (based on the trust level).
+     - Do NOT modify any other attributes (file, line, issue, priority) in the JSON file.
+   - Apply the trust level above when deciding whether to act on AI reviewer feedback.
+4. Run `agent-gauntlet run` again to verify your fixes. It will detect existing logs and automatically switch to verification mode.
+5. Repeat steps 2-5 until one of the following termination conditions is met:
+   - "Status: Passed" appears in the output (logs are automatically archived)
+   - "Status: Passed with warnings" appears in the output (remaining issues were skipped)
+   - Still failing after 3 attempts -> Run `agent-gauntlet clean` to archive logs and reset state.
+6. Provide a summary of the session:
+   - Issues Fixed: (list key fixes)
+   - Issues Skipped: (list skipped items and reasons)
+   - Outstanding Failures: (if any, explain why they couldn't be resolved)