npm - agent-gauntlet - Versions diffs - 0.1.9 → 0.1.11 - Mend

agent-gauntlet 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/README.md +1 -1
package/package.json +4 -2
package/src/cli-adapters/claude.ts +139 -108
package/src/cli-adapters/codex.ts +141 -117
package/src/cli-adapters/cursor.ts +152 -0
package/src/cli-adapters/gemini.ts +171 -139
package/src/cli-adapters/github-copilot.ts +153 -0
package/src/cli-adapters/index.ts +77 -48
package/src/commands/check.test.ts +24 -20
package/src/commands/check.ts +65 -59
package/src/commands/detect.test.ts +38 -32
package/src/commands/detect.ts +74 -61
package/src/commands/health.test.ts +67 -53
package/src/commands/health.ts +167 -145
package/src/commands/help.test.ts +37 -37
package/src/commands/help.ts +30 -22
package/src/commands/index.ts +9 -9
package/src/commands/init.test.ts +118 -107
package/src/commands/init.ts +515 -417
package/src/commands/list.test.ts +87 -70
package/src/commands/list.ts +28 -24
package/src/commands/rerun.ts +142 -119
package/src/commands/review.test.ts +26 -20
package/src/commands/review.ts +65 -59
package/src/commands/run.test.ts +22 -20
package/src/commands/run.ts +64 -58
package/src/commands/shared.ts +44 -35
package/src/config/loader.test.ts +112 -90
package/src/config/loader.ts +132 -123
package/src/config/schema.ts +49 -47
package/src/config/types.ts +15 -13
package/src/config/validator.ts +521 -454
package/src/core/change-detector.ts +122 -104
package/src/core/entry-point.test.ts +60 -62
package/src/core/entry-point.ts +76 -67
package/src/core/job.ts +69 -59
package/src/core/runner.ts +261 -221
package/src/gates/check.ts +78 -69
package/src/gates/result.ts +7 -6
package/src/gates/review.test.ts +188 -0
package/src/gates/review.ts +717 -506
package/src/index.ts +16 -15
package/src/output/console.ts +253 -198
package/src/output/logger.ts +65 -51
package/src/templates/run_gauntlet.template.md +18 -0
package/src/utils/diff-parser.ts +64 -62
package/src/utils/log-parser.ts +227 -206
package/src/utils/sanitizer.ts +1 -1

package/src/cli-adapters/cursor.ts ADDED Viewed

@@ -0,0 +1,152 @@
+import { exec } from "node:child_process";
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+import { promisify } from "node:util";
+import { type CLIAdapter, isUsageLimit } from "./index.js";
+const execAsync = promisify(exec);
+const MAX_BUFFER_BYTES = 10 * 1024 * 1024;
+export class CursorAdapter implements CLIAdapter {
+	name = "cursor";
+	async isAvailable(): Promise<boolean> {
+		try {
+			// Note: Cursor's CLI binary is named "agent", not "cursor"
+			await execAsync("which agent");
+			return true;
+		} catch {
+			return false;
+		}
+	}
+	async checkHealth(options?: { checkUsageLimit?: boolean }): Promise<{
+		available: boolean;
+		status: "healthy" | "missing" | "unhealthy";
+		message?: string;
+	}> {
+		const available = await this.isAvailable();
+		if (!available) {
+			return {
+				available: false,
+				status: "missing",
+				message: "Command not found",
+			};
+		}
+		if (options?.checkUsageLimit) {
+			try {
+				// Try a lightweight command to check if we're rate limited
+				const { stdout, stderr } = await execAsync('echo "hello" | agent', {
+					timeout: 10000,
+				});
+				const combined = (stdout || "") + (stderr || "");
+				if (isUsageLimit(combined)) {
+					return {
+						available: true,
+						status: "unhealthy",
+						message: "Usage limit exceeded",
+					};
+				}
+				return { available: true, status: "healthy", message: "Ready" };
+			} catch (error: unknown) {
+				const execError = error as {
+					stderr?: string;
+					stdout?: string;
+					message?: string;
+				};
+				const stderr = execError.stderr || "";
+				const stdout = execError.stdout || "";
+				const combined = stderr + stdout;
+				if (isUsageLimit(combined)) {
+					return {
+						available: true,
+						status: "unhealthy",
+						message: "Usage limit exceeded",
+					};
+				}
+				// Since we sent a valid prompt ("hello"), any other error implies the tool is broken
+				const cleanError =
+					combined.split("\n")[0]?.trim() ||
+					execError.message ||
+					"Command failed";
+				return {
+					available: true,
+					status: "unhealthy",
+					message: `Error: ${cleanError}`,
+				};
+			}
+		}
+		return { available: true, status: "healthy", message: "Ready" };
+	}
+	getProjectCommandDir(): string | null {
+		// Cursor does not support custom commands
+		return null;
+	}
+	getUserCommandDir(): string | null {
+		// Cursor does not support custom commands
+		return null;
+	}
+	getCommandExtension(): string {
+		return ".md";
+	}
+	canUseSymlink(): boolean {
+		// Not applicable - no command directory support
+		return false;
+	}
+	transformCommand(markdownContent: string): string {
+		// Not applicable - no command directory support
+		return markdownContent;
+	}
+	async execute(opts: {
+		prompt: string;
+		diff: string;
+		model?: string;
+		timeoutMs?: number;
+	}): Promise<string> {
+		const fullContent = `${opts.prompt}\n\n--- DIFF ---\n${opts.diff}`;
+		const tmpDir = os.tmpdir();
+		// Include process.pid for uniqueness across concurrent processes
+		const tmpFile = path.join(
+			tmpDir,
+			`gauntlet-cursor-${process.pid}-${Date.now()}.txt`,
+		);
+		await fs.writeFile(tmpFile, fullContent);
+		try {
+			// Cursor agent command reads from stdin
+			// Note: As of the current version, the Cursor 'agent' CLI does not expose
+			// flags for restricting tools or enforcing read-only mode (unlike claude's --allowedTools
+			// or codex's --sandbox read-only). The agent is assumed to be repo-scoped and
+			// safe for code review use. If Cursor adds such flags in the future, they should
+			// be added here for defense-in-depth.
+			//
+			// Shell command construction: We use exec() with shell piping
+			// because the agent requires stdin input. The tmpFile path is system-controlled
+			// (os.tmpdir() + Date.now() + process.pid), not user-supplied, eliminating injection risk.
+			// Double quotes handle paths with spaces.
+			const cmd = `cat "${tmpFile}" | agent`;
+			const { stdout } = await execAsync(cmd, {
+				timeout: opts.timeoutMs,
+				maxBuffer: MAX_BUFFER_BYTES,
+			});
+			return stdout;
+		} finally {
+			// Cleanup errors are intentionally ignored - the tmp file will be cleaned up by OS
+			await fs.unlink(tmpFile).catch(() => {});
+		}
+	}
+}

package/src/cli-adapters/gemini.ts CHANGED Viewed

@@ -1,149 +1,181 @@
-import { exec } from 'node:child_process';
-import { promisify } from 'node:util';
-import { type CLIAdapter, isUsageLimit } from './index.js';
-import fs from 'node:fs/promises';
-import path from 'node:path';
-import os from 'node:os';
+import { exec } from "node:child_process";
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+import { promisify } from "node:util";
+import { type CLIAdapter, isUsageLimit } from "./index.js";
 const execAsync = promisify(exec);
 const MAX_BUFFER_BYTES = 10 * 1024 * 1024;
 export class GeminiAdapter implements CLIAdapter {
-  name = 'gemini';
-  async isAvailable(): Promise<boolean> {
-    try {
-      await execAsync('which gemini');
-      return true;
-    } catch {
-      return false;
-    }
-  }
-  async checkHealth(options?: { checkUsageLimit?: boolean }): Promise<{ available: boolean; status: 'healthy' | 'missing' | 'unhealthy'; message?: string }> {
-    const available = await this.isAvailable();
-    if (!available) {
-      return { available: false, status: 'missing', message: 'Command not found' };
-    }
-    if (options?.checkUsageLimit) {
-      try {
-        const { stdout, stderr } = await execAsync('echo "hello" | gemini --sandbox --output-format text', { timeout: 10000 });
-        const combined = (stdout || '') + (stderr || '');
-        if (isUsageLimit(combined)) {
-           return {
-            available: true,
-            status: 'unhealthy',
-            message: 'Usage limit exceeded'
-          };
-        }
-        return { available: true, status: 'healthy', message: 'Installed' };
-      } catch (error: any) {
-        const stderr = error.stderr || '';
-        const stdout = error.stdout || '';
-        const combined = (stderr + stdout);
-        if (isUsageLimit(combined)) {
-          return {
-            available: true,
-            status: 'unhealthy',
-            message: 'Usage limit exceeded'
-          };
-        }
-        // Since we sent a valid prompt ("hello"), any other error implies the tool is broken
-        const cleanError = combined.split('\n')[0]?.trim() || error.message || 'Command failed';
-        return {
-          available: true,
-          status: 'unhealthy',
-          message: `Error: ${cleanError}`
-        };
-      }
-    }
-    return {
-      available,
-      status: available ? 'healthy' : 'missing',
-      message: available ? 'Installed' : 'Command not found'
-    };
-  }
-  getProjectCommandDir(): string | null {
-    return '.gemini/commands';
-  }
-  getUserCommandDir(): string | null {
-    // Gemini supports user-level commands at ~/.gemini/commands
-    return path.join(os.homedir(), '.gemini', 'commands');
-  }
-  getCommandExtension(): string {
-    return '.toml';
-  }
-  canUseSymlink(): boolean {
-    // Gemini uses TOML format, needs transformation
-    return false;
-  }
-  transformCommand(markdownContent: string): string {
-    // Transform Markdown with YAML frontmatter to Gemini's TOML format
-    const { frontmatter, body } = this.parseMarkdownWithFrontmatter(markdownContent);
-    const description = frontmatter.description || 'Run the gauntlet verification suite';
-    // Escape the body for TOML multi-line string
-    const escapedBody = body.trim();
-    return `description = ${JSON.stringify(description)}
+	name = "gemini";
+	async isAvailable(): Promise<boolean> {
+		try {
+			await execAsync("which gemini");
+			return true;
+		} catch {
+			return false;
+		}
+	}
+	async checkHealth(options?: { checkUsageLimit?: boolean }): Promise<{
+		available: boolean;
+		status: "healthy" | "missing" | "unhealthy";
+		message?: string;
+	}> {
+		const available = await this.isAvailable();
+		if (!available) {
+			return {
+				available: false,
+				status: "missing",
+				message: "Command not found",
+			};
+		}
+		if (options?.checkUsageLimit) {
+			try {
+				const { stdout, stderr } = await execAsync(
+					'echo "hello" | gemini --sandbox --output-format text',
+					{ timeout: 10000 },
+				);
+				const combined = (stdout || "") + (stderr || "");
+				if (isUsageLimit(combined)) {
+					return {
+						available: true,
+						status: "unhealthy",
+						message: "Usage limit exceeded",
+					};
+				}
+				return { available: true, status: "healthy", message: "Installed" };
+			} catch (error: unknown) {
+				const execError = error as {
+					stderr?: string;
+					stdout?: string;
+					message?: string;
+				};
+				const stderr = execError.stderr || "";
+				const stdout = execError.stdout || "";
+				const combined = stderr + stdout;
+				if (isUsageLimit(combined)) {
+					return {
+						available: true,
+						status: "unhealthy",
+						message: "Usage limit exceeded",
+					};
+				}
+				// Since we sent a valid prompt ("hello"), any other error implies the tool is broken
+				const cleanError =
+					combined.split("\n")[0]?.trim() ||
+					execError.message ||
+					"Command failed";
+				return {
+					available: true,
+					status: "unhealthy",
+					message: `Error: ${cleanError}`,
+				};
+			}
+		}
+		return {
+			available,
+			status: available ? "healthy" : "missing",
+			message: available ? "Installed" : "Command not found",
+		};
+	}
+	getProjectCommandDir(): string | null {
+		return ".gemini/commands";
+	}
+	getUserCommandDir(): string | null {
+		// Gemini supports user-level commands at ~/.gemini/commands
+		return path.join(os.homedir(), ".gemini", "commands");
+	}
+	getCommandExtension(): string {
+		return ".toml";
+	}
+	canUseSymlink(): boolean {
+		// Gemini uses TOML format, needs transformation
+		return false;
+	}
+	transformCommand(markdownContent: string): string {
+		// Transform Markdown with YAML frontmatter to Gemini's TOML format
+		const { frontmatter, body } =
+			this.parseMarkdownWithFrontmatter(markdownContent);
+		const description =
+			frontmatter.description || "Run the gauntlet verification suite";
+		// Escape the body for TOML multi-line string
+		const escapedBody = body.trim();
+		return `description = ${JSON.stringify(description)}
 prompt = """
 ${escapedBody}
 """
 `;
-  }
-  private parseMarkdownWithFrontmatter(content: string): { frontmatter: Record<string, string>; body: string } {
-    const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
-    if (!frontmatterMatch) {
-      return { frontmatter: {}, body: content };
-    }
-    const frontmatterStr = frontmatterMatch[1] ?? '';
-    const body = frontmatterMatch[2] ?? '';
-    // Simple YAML parsing for key: value pairs
-    const frontmatter: Record<string, string> = {};
-    for (const line of frontmatterStr.split('\n')) {
-      const kvMatch = line.match(/^([^:]+):\s*(.*)$/);
-      if (kvMatch && kvMatch[1] && kvMatch[2] !== undefined) {
-        frontmatter[kvMatch[1].trim()] = kvMatch[2].trim();
-      }
-    }
-    return { frontmatter, body };
-  }
-  async execute(opts: { prompt: string; diff: string; model?: string; timeoutMs?: number }): Promise<string> {
-    // Construct the full prompt content
-    const fullContent = opts.prompt + "\n\n--- DIFF ---\n" + opts.diff;
-    // Write to a temporary file to avoid shell escaping issues
-    const tmpDir = os.tmpdir();
-    const tmpFile = path.join(tmpDir, `gauntlet-gemini-${Date.now()}.txt`);
-    await fs.writeFile(tmpFile, fullContent);
-    try {
-      // Use gemini CLI with file input
-      // --sandbox: enables the execution sandbox
-      // --allowed-tools: whitelists read-only tools for non-interactive execution
-      // --output-format text: ensures plain text output
-      // Use < for stdin redirection instead of cat pipe (cleaner)
-      const cmd = `gemini --sandbox --allowed-tools read_file,list_directory,glob,search_file_content --output-format text < "${tmpFile}"`;
-      const { stdout } = await execAsync(cmd, { timeout: opts.timeoutMs, maxBuffer: MAX_BUFFER_BYTES });
-      return stdout;
-    } finally {
-      await fs.unlink(tmpFile).catch(() => {});
-    }
-  }
+	}
+	private parseMarkdownWithFrontmatter(content: string): {
+		frontmatter: Record<string, string>;
+		body: string;
+	} {
+		const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
+		if (!frontmatterMatch) {
+			return { frontmatter: {}, body: content };
+		}
+		const frontmatterStr = frontmatterMatch[1] ?? "";
+		const body = frontmatterMatch[2] ?? "";
+		// Simple YAML parsing for key: value pairs
+		const frontmatter: Record<string, string> = {};
+		for (const line of frontmatterStr.split("\n")) {
+			const kvMatch = line.match(/^([^:]+):\s*(.*)$/);
+			if (kvMatch?.[1] && kvMatch[2] !== undefined) {
+				frontmatter[kvMatch[1].trim()] = kvMatch[2].trim();
+			}
+		}
+		return { frontmatter, body };
+	}
+	async execute(opts: {
+		prompt: string;
+		diff: string;
+		model?: string;
+		timeoutMs?: number;
+	}): Promise<string> {
+		// Construct the full prompt content
+		const fullContent = `${opts.prompt}\n\n--- DIFF ---\n${opts.diff}`;
+		// Write to a temporary file to avoid shell escaping issues
+		const tmpDir = os.tmpdir();
+		const tmpFile = path.join(tmpDir, `gauntlet-gemini-${Date.now()}.txt`);
+		await fs.writeFile(tmpFile, fullContent);
+		try {
+			// Use gemini CLI with file input
+			// --sandbox: enables the execution sandbox
+			// --allowed-tools: whitelists read-only tools for non-interactive execution
+			// --output-format text: ensures plain text output
+			// Use < for stdin redirection instead of cat pipe (cleaner)
+			const cmd = `gemini --sandbox --allowed-tools read_file,list_directory,glob,search_file_content --output-format text < "${tmpFile}"`;
+			const { stdout } = await execAsync(cmd, {
+				timeout: opts.timeoutMs,
+				maxBuffer: MAX_BUFFER_BYTES,
+			});
+			return stdout;
+		} finally {
+			await fs.unlink(tmpFile).catch(() => {});
+		}
+	}
 }

package/src/cli-adapters/github-copilot.ts ADDED Viewed

@@ -0,0 +1,153 @@
+import { exec } from "node:child_process";
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+import { promisify } from "node:util";
+import { type CLIAdapter, isUsageLimit } from "./index.js";
+const execAsync = promisify(exec);
+const MAX_BUFFER_BYTES = 10 * 1024 * 1024;
+export class GitHubCopilotAdapter implements CLIAdapter {
+	name = "github-copilot";
+	async isAvailable(): Promise<boolean> {
+		try {
+			await execAsync("which copilot");
+			return true;
+		} catch {
+			return false;
+		}
+	}
+	async checkHealth(options?: { checkUsageLimit?: boolean }): Promise<{
+		available: boolean;
+		status: "healthy" | "missing" | "unhealthy";
+		message?: string;
+	}> {
+		const available = await this.isAvailable();
+		if (!available) {
+			return {
+				available: false,
+				status: "missing",
+				message: "Command not found",
+			};
+		}
+		if (options?.checkUsageLimit) {
+			try {
+				// Try a lightweight command to check if we're rate limited
+				// Use minimal tool permissions for health check
+				const { stdout, stderr } = await execAsync(
+					'echo "hello" | copilot --allow-tool "shell(echo)"',
+					{ timeout: 10000 },
+				);
+				const combined = (stdout || "") + (stderr || "");
+				if (isUsageLimit(combined)) {
+					return {
+						available: true,
+						status: "unhealthy",
+						message: "Usage limit exceeded",
+					};
+				}
+				return { available: true, status: "healthy", message: "Ready" };
+			} catch (error: unknown) {
+				const execError = error as {
+					stderr?: string;
+					stdout?: string;
+					message?: string;
+				};
+				const stderr = execError.stderr || "";
+				const stdout = execError.stdout || "";
+				const combined = stderr + stdout;
+				if (isUsageLimit(combined)) {
+					return {
+						available: true,
+						status: "unhealthy",
+						message: "Usage limit exceeded",
+					};
+				}
+				// Since we sent a valid prompt ("hello"), any other error implies the tool is broken
+				const cleanError =
+					combined.split("\n")[0]?.trim() ||
+					execError.message ||
+					"Command failed";
+				return {
+					available: true,
+					status: "unhealthy",
+					message: `Error: ${cleanError}`,
+				};
+			}
+		}
+		return { available: true, status: "healthy", message: "Ready" };
+	}
+	getProjectCommandDir(): string | null {
+		// GitHub Copilot CLI does not support custom commands (feature request #618)
+		return null;
+	}
+	getUserCommandDir(): string | null {
+		// GitHub Copilot CLI does not support custom commands (feature request #618)
+		return null;
+	}
+	getCommandExtension(): string {
+		return ".md";
+	}
+	canUseSymlink(): boolean {
+		// Not applicable - no command directory support
+		return false;
+	}
+	transformCommand(markdownContent: string): string {
+		// Not applicable - no command directory support
+		return markdownContent;
+	}
+	async execute(opts: {
+		prompt: string;
+		diff: string;
+		model?: string;
+		timeoutMs?: number;
+	}): Promise<string> {
+		const fullContent = `${opts.prompt}\n\n--- DIFF ---\n${opts.diff}`;
+		const tmpDir = os.tmpdir();
+		// Include process.pid for uniqueness across concurrent processes
+		const tmpFile = path.join(
+			tmpDir,
+			`gauntlet-copilot-${process.pid}-${Date.now()}.txt`,
+		);
+		await fs.writeFile(tmpFile, fullContent);
+		try {
+			// Copilot reads from stdin when no -p flag is provided
+			// Tool whitelist: cat/grep/ls/find/head/tail are required for the AI to read
+			// and analyze code files during review. While these tools can access files,
+			// they are read-only and necessary for code review functionality.
+			// The copilot CLI is scoped to the repo directory by default.
+			// git is excluded to prevent access to commit history (review should only see diff).
+			//
+			// Shell command construction: We use exec() with shell piping instead of execFile()
+			// because copilot requires stdin input. The tmpFile path is system-controlled
+			// (os.tmpdir() + Date.now() + process.pid), not user-supplied, eliminating injection risk.
+			// Double quotes handle paths with spaces. This pattern matches claude.ts:131.
+			const cmd = `cat "${tmpFile}" | copilot --allow-tool shell(cat) --allow-tool shell(grep) --allow-tool shell(ls) --allow-tool shell(find) --allow-tool shell(head) --allow-tool shell(tail)`;
+			const { stdout } = await execAsync(cmd, {
+				timeout: opts.timeoutMs,
+				maxBuffer: MAX_BUFFER_BYTES,
+			});
+			return stdout;
+		} finally {
+			// Cleanup errors are intentionally ignored - the tmp file will be cleaned up by OS
+			await fs.unlink(tmpFile).catch(() => {});
+		}
+	}
+}