npm - agent-gauntlet - Versions diffs - 0.1.7 → 0.1.9 - Mend

agent-gauntlet 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/package.json +1 -1
package/src/commands/check.ts +5 -0
package/src/commands/help.ts +1 -0
package/src/commands/init.test.ts +58 -46
package/src/commands/init.ts +278 -100
package/src/commands/rerun.ts +5 -1
package/src/commands/review.ts +5 -0
package/src/commands/run.ts +5 -0
package/src/commands/shared.ts +34 -0
package/src/gates/review.ts +59 -8
package/src/utils/diff-parser.ts +86 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agent-gauntlet",
-  "version": "0.1.7",
+  "version": "0.1.9",
   "description": "A CLI tool for testing AI coding agents",
   "license": "Apache-2.0",
   "author": "Paul Caplan",

package/src/commands/check.ts CHANGED Viewed

@@ -7,6 +7,7 @@ import { JobGenerator } from '../core/job.js';
 import { Runner } from '../core/runner.js';
 import { Logger } from '../output/logger.js';
 import { ConsoleReporter } from '../output/console.js';
+import { rotateLogs } from './shared.js';
 export function registerCheckCommand(program: Command): void {
   program
@@ -18,6 +19,10 @@ export function registerCheckCommand(program: Command): void {
     .action(async (options) => {
       try {
         const config = await loadConfig();
+        // Rotate logs before starting
+        await rotateLogs(config.project.log_dir);
         const changeDetector = new ChangeDetector(config.project.base_branch, {
           commit: options.commit,
           uncommitted: options.uncommitted

package/src/commands/help.ts CHANGED Viewed

@@ -11,6 +11,7 @@ export function registerHelpCommand(program: Command): void {
       console.log('of your repo that changed, based on a configurable set of entry points.\n');
       console.log(chalk.bold('Commands:\n'));
       console.log('  run      Run gates for detected changes');
+      console.log('  rerun    Rerun gates with previous failure context');
       console.log('  check    Run only applicable checks');
       console.log('  review   Run only applicable reviews');
       console.log('  detect   Show what gates would run (without executing them)');

package/src/commands/init.test.ts CHANGED Viewed

@@ -1,11 +1,41 @@
-import { describe, it, expect, beforeEach, afterEach, beforeAll, afterAll } from 'bun:test';
+import { describe, it, expect, beforeEach, afterEach, beforeAll, afterAll, mock } from 'bun:test';
 import { Command } from 'commander';
-import { registerInitCommand } from './init.js';
 import fs from 'node:fs/promises';
 import path from 'node:path';
 const TEST_DIR = path.join(process.cwd(), 'test-init-' + Date.now());
+// Mock adapters
+const mockAdapters = [
+  {
+    name: 'mock-cli-1',
+    isAvailable: async () => true,
+    getProjectCommandDir: () => '.mock1',
+    getUserCommandDir: () => null,
+    getCommandExtension: () => '.sh',
+    canUseSymlink: () => false,
+    transformCommand: (content: string) => content,
+  },
+  {
+    name: 'mock-cli-2',
+    isAvailable: async () => false, // Not available
+    getProjectCommandDir: () => '.mock2',
+    getUserCommandDir: () => null,
+    getCommandExtension: () => '.sh',
+    canUseSymlink: () => false,
+    transformCommand: (content: string) => content,
+  }
+];
+mock.module('../cli-adapters/index.js', () => ({
+  getAllAdapters: () => mockAdapters,
+  getProjectCommandAdapters: () => mockAdapters,
+  getUserCommandAdapters: () => [],
+}));
+// Import after mocking
+const { registerInitCommand } = await import('./init.js');
 describe('Init Command', () => {
   let program: Command;
   const originalConsoleLog = console.log;
@@ -45,50 +75,33 @@ describe('Init Command', () => {
   });
   it('should create .gauntlet directory structure with --yes flag', async () => {
-    const initCmd = program.commands.find(cmd => cmd.name() === 'init');
+    // We expect it to use the available mock-cli-1
+    await program.parseAsync(['node', 'test', 'init', '--yes']);
+    // Check that files were created
+    const gauntletDir = path.join(TEST_DIR, '.gauntlet');
+    const configFile = path.join(gauntletDir, 'config.yml');
+    const reviewsDir = path.join(gauntletDir, 'reviews');
+    const checksDir = path.join(gauntletDir, 'checks');
+    const runGauntletFile = path.join(gauntletDir, 'run_gauntlet.md');
-    // Use a timeout to prevent hanging if prompts occur
-    let timeoutId: ReturnType<typeof setTimeout> | undefined;
-    const testPromise = initCmd?.parseAsync(['init', '--yes']);
-    const timeoutPromise = new Promise((_, reject) => {
-      timeoutId = setTimeout(() => reject(new Error('Test timed out - init command may be prompting')), 3000);
-    });
+    expect(await fs.stat(gauntletDir)).toBeDefined();
+    expect(await fs.stat(configFile)).toBeDefined();
+    expect(await fs.stat(reviewsDir)).toBeDefined();
+    expect(await fs.stat(checksDir)).toBeDefined();
+    expect(await fs.stat(runGauntletFile)).toBeDefined();
-    try {
-      await Promise.race([testPromise, timeoutPromise]);
-      // Check that files were created
-      const gauntletDir = path.join(TEST_DIR, '.gauntlet');
-      const configFile = path.join(gauntletDir, 'config.yml');
-      const reviewsDir = path.join(gauntletDir, 'reviews');
-      const checksDir = path.join(gauntletDir, 'checks');
-      const runGauntletFile = path.join(gauntletDir, 'run_gauntlet.md');
-      expect(await fs.stat(gauntletDir)).toBeDefined();
-      expect(await fs.stat(configFile)).toBeDefined();
-      expect(await fs.stat(reviewsDir)).toBeDefined();
-      expect(await fs.stat(checksDir)).toBeDefined();
-      expect(await fs.stat(runGauntletFile)).toBeDefined();
-      // Verify config content
-      const configContent = await fs.readFile(configFile, 'utf-8');
-      expect(configContent).toContain('base_branch');
-      expect(configContent).toContain('log_dir');
-      // Verify review file content
-      const reviewFile = path.join(reviewsDir, 'code-quality.md');
-      const reviewContent = await fs.readFile(reviewFile, 'utf-8');
-      expect(reviewContent).toContain('cli_preference');
-    } catch (error: any) {
-      // If it times out, skip this test for now - the command installation part may need more complex mocking
-      if (error.message.includes('timed out')) {
-        console.log('Skipping test due to interactive prompt - command installation requires manual testing');
-        return;
-      }
-      throw error;
-    } finally {
-      if (timeoutId) clearTimeout(timeoutId);
-    }
+    // Verify config content
+    const configContent = await fs.readFile(configFile, 'utf-8');
+    expect(configContent).toContain('base_branch');
+    expect(configContent).toContain('log_dir');
+    expect(configContent).toContain('mock-cli-1'); // Should be present
+    expect(configContent).not.toContain('mock-cli-2'); // Should not be present (unavailable)
+    // Verify review file content
+    const reviewFile = path.join(reviewsDir, 'code-quality.md');
+    const reviewContent = await fs.readFile(reviewFile, 'utf-8');
+    expect(reviewContent).toContain('mock-cli-1');
   });
   it('should not create directory if .gauntlet already exists', async () => {
@@ -96,8 +109,7 @@ describe('Init Command', () => {
     const gauntletDir = path.join(TEST_DIR, '.gauntlet');
     await fs.mkdir(gauntletDir, { recursive: true });
-    const initCmd = program.commands.find(cmd => cmd.name() === 'init');
-    await initCmd?.parseAsync(['init', '--yes']);
+    await program.parseAsync(['node', 'test', 'init', '--yes']);
     const output = logs.join('\n');
     expect(output).toContain('.gauntlet directory already exists');

package/src/commands/init.ts CHANGED Viewed

@@ -4,7 +4,9 @@ import fs from 'node:fs/promises';
 import path from 'node:path';
 import readline from 'node:readline';
 import { exists } from './shared.js';
-import { getAllAdapters, getProjectCommandAdapters, getUserCommandAdapters } from '../cli-adapters/index.js';
+import { getAllAdapters, getProjectCommandAdapters, getUserCommandAdapters, type CLIAdapter } from '../cli-adapters/index.js';
+const MAX_PROMPT_ATTEMPTS = 10;
 const GAUNTLET_COMMAND_CONTENT = `---
 description: Run the full verification gauntlet
@@ -31,11 +33,18 @@ interface InitOptions {
   yes?: boolean;
 }
+interface InitConfig {
+  sourceDir: string;
+  lintCmd: string | null; // null means not selected, empty string means selected but blank (TODO)
+  testCmd: string | null; // null means not selected, empty string means selected but blank (TODO)
+  selectedAdapters: CLIAdapter[];
+}
 export function registerInitCommand(program: Command): void {
   program
     .command('init')
     .description('Initialize .gauntlet configuration')
-    .option('-y, --yes', 'Skip prompts and use defaults (project-level commands for all agents)')
+    .option('-y, --yes', 'Skip prompts and use defaults (all available CLIs, source: ., no extra checks)')
     .action(async (options: InitOptions) => {
       const projectRoot = process.cwd();
       const targetDir = path.join(projectRoot, '.gauntlet');
@@ -45,39 +54,90 @@ export function registerInitCommand(program: Command): void {
         return;
       }
+      // 1. CLI Detection
+      console.log('Detecting available CLI agents...');
+      const availableAdapters = await detectAvailableCLIs();
+      if (availableAdapters.length === 0) {
+        console.log();
+        console.log(chalk.red('Error: No CLI agents found. Install at least one:'));
+        console.log('  - Claude: https://docs.anthropic.com/en/docs/claude-code');
+        console.log('  - Gemini: https://github.com/google-gemini/gemini-cli');
+        console.log('  - Codex: https://github.com/openai/codex');
+        console.log();
+        return;
+      }
+      let config: InitConfig;
+      if (options.yes) {
+        config = {
+          sourceDir: '.',
+          lintCmd: null,
+          testCmd: null,
+          selectedAdapters: availableAdapters,
+        };
+      } else {
+        config = await promptForConfig(availableAdapters);
+      }
       // Create base config structure
       await fs.mkdir(targetDir);
       await fs.mkdir(path.join(targetDir, 'checks'));
       await fs.mkdir(path.join(targetDir, 'reviews'));
-      // Write sample config
-      const sampleConfig = `base_branch: origin/main
-log_dir: .gauntlet_logs
-cli:
-  default_preference:
-    - gemini
-    - codex
-    - claude
-  check_usage_limit: false
-entry_points:
-  - path: "."
-    reviews:
-      - code-quality
-`;
-      await fs.writeFile(path.join(targetDir, 'config.yml'), sampleConfig);
+      // 4. Commented Config Templates
+      // Generate config.yml
+      const configContent = generateConfigYml(config);
+      await fs.writeFile(path.join(targetDir, 'config.yml'), configContent);
       console.log(chalk.green('Created .gauntlet/config.yml'));
-      // Write sample review
-      const sampleReview = `---
-cli_preference:
-  - gemini
-  - codex
+      // Generate check files if selected
+      if (config.lintCmd !== null) {
+        const lintContent = `name: lint
+command: ${config.lintCmd || '# command: TODO - add your lint command (e.g., npm run lint)'}
+# parallel: false
+# run_in_ci: true
+# run_locally: true
+# timeout: 300
+`;
+        await fs.writeFile(path.join(targetDir, 'checks', 'lint.yml'), lintContent);
+        console.log(chalk.green('Created .gauntlet/checks/lint.yml'));
+      }
+      if (config.testCmd !== null) {
+        const testContent = `name: unit-tests
+command: ${config.testCmd || '# command: TODO - add your test command (e.g., npm test)'}
+# parallel: false
+# run_in_ci: true
+# run_locally: true
+# timeout: 300
+`;
+        await fs.writeFile(path.join(targetDir, 'checks', 'unit-tests.yml'), testContent);
+        console.log(chalk.green('Created .gauntlet/checks/unit-tests.yml'));
+      }
+      // 5. Improved Default Code Review Prompt
+      const reviewContent = `---
+num_reviews: 1
+# parallel: true
+# timeout: 300
+# cli_preference:
+#   - ${config.selectedAdapters[0]?.name || 'claude'}
 ---
 # Code Review
-Review this code.
+Review the diff for quality issues:
+- **Bugs**: Logic errors, null handling, edge cases, race conditions
+- **Security**: Input validation, secrets exposure, injection risks
+- **Maintainability**: Unclear code, missing error handling, duplication
+- **Performance**: Unnecessary work, N+1 queries, missing optimizations
+For each issue: cite file:line, explain the problem, suggest a fix.
 `;
-      await fs.writeFile(path.join(targetDir, 'reviews', 'code-quality.md'), sampleReview);
+      await fs.writeFile(path.join(targetDir, 'reviews', 'code-quality.md'), reviewContent);
       console.log(chalk.green('Created .gauntlet/reviews/code-quality.md'));
       // Write the canonical gauntlet command file
@@ -87,52 +147,181 @@ Review this code.
       // Handle command installation
       if (options.yes) {
-        // Default: install at project level for all agents
-        const adapters = getProjectCommandAdapters();
-        await installCommands('project', adapters.map(a => a.name), projectRoot, canonicalCommandPath);
+        // Default: install at project level for all selected agents (if they support it)
+        const adaptersToInstall = config.selectedAdapters.filter(a => a.getProjectCommandDir() !== null);
+        if (adaptersToInstall.length > 0) {
+            await installCommands('project', adaptersToInstall.map(a => a.name), projectRoot, canonicalCommandPath);
+        }
       } else {
-        // Interactive prompts
-        await promptAndInstallCommands(projectRoot, canonicalCommandPath);
+        // Interactive prompts - passing available adapters to avoid re-checking or offering unavailable ones
+        await promptAndInstallCommands(projectRoot, canonicalCommandPath, availableAdapters);
       }
     });
 }
-async function promptAndInstallCommands(projectRoot: string, canonicalCommandPath: string): Promise<void> {
-  // Read all lines from stdin first if not a TTY (piped input)
-  const isTTY = process.stdin.isTTY;
-  let inputLines: string[] = [];
-  let lineIndex = 0;
-  if (!isTTY) {
-    // Read all input at once for piped input
-    const chunks: Buffer[] = [];
-    for await (const chunk of process.stdin) {
-      chunks.push(chunk);
+async function detectAvailableCLIs(): Promise<CLIAdapter[]> {
+  const allAdapters = getAllAdapters();
+  const available: CLIAdapter[] = [];
+  for (const adapter of allAdapters) {
+    const isAvailable = await adapter.isAvailable();
+    if (isAvailable) {
+      console.log(chalk.green(`  ✓ ${adapter.name}`));
+      available.push(adapter);
+    } else {
+      console.log(chalk.dim(`  ✗ ${adapter.name} (not installed)`));
     }
-    const input = Buffer.concat(chunks).toString('utf-8');
-    inputLines = input.split('\n').map(l => l.trim());
   }
+  return available;
+}
-  const rl = isTTY ? readline.createInterface({
+async function promptForConfig(availableAdapters: CLIAdapter[]): Promise<InitConfig> {
+  const rl = readline.createInterface({
     input: process.stdin,
     output: process.stdout
-  }) : null;
-  const question = async (prompt: string): Promise<string> => {
-    if (isTTY && rl) {
-      return new Promise((resolve) => {
-        rl.question(prompt, (answer) => {
-          resolve(answer?.trim() ?? '');
-        });
+  });
+  const question = (prompt: string): Promise<string> => {
+    return new Promise((resolve) => {
+      rl.question(prompt, (answer) => {
+        resolve(answer?.trim() ?? '');
       });
-    } else {
-      // Non-interactive: read from pre-buffered lines
-      process.stdout.write(prompt);
-      const answer = inputLines[lineIndex] ?? '';
-      lineIndex++;
-      console.log(answer); // Echo the answer
-      return answer;
+    });
+  };
+  try {
+    // CLI Selection
+    console.log();
+    console.log('Which CLIs would you like to use?');
+    availableAdapters.forEach((adapter, i) => {
+      console.log(`  ${i + 1}) ${adapter.name}`);
+    });
+    console.log(`  ${availableAdapters.length + 1}) All`);
+    let selectedAdapters: CLIAdapter[] = [];
+    let attempts = 0;
+    while (true) {
+      attempts++;
+      if (attempts > MAX_PROMPT_ATTEMPTS) throw new Error('Too many invalid attempts');
+      const answer = await question(`(comma-separated, e.g., 1,2): `);
+      const selections = answer.split(',').map(s => s.trim()).filter(s => s);
+      if (selections.length === 0) {
+        // Default to all if empty? Or force selection? Plan says "Which CLIs...".
+        // Let's assume user must pick or we default to all if they just hit enter?
+        // Actually, usually enter means default. Let's make All the default if just Enter.
+        selectedAdapters = availableAdapters;
+        break;
+      }
+      let valid = true;
+      const chosen: CLIAdapter[] = [];
+      for (const sel of selections) {
+        const num = parseInt(sel, 10);
+        if (isNaN(num) || num < 1 || num > availableAdapters.length + 1) {
+          console.log(chalk.yellow(`Invalid selection: ${sel}`));
+          valid = false;
+          break;
+        }
+        if (num === availableAdapters.length + 1) {
+          chosen.push(...availableAdapters);
+        } else {
+          chosen.push(availableAdapters[num - 1]);
+        }
+      }
+      if (valid) {
+        selectedAdapters = [...new Set(chosen)];
+        break;
+      }
+    }
+    // Source Directory
+    console.log();
+    const sourceDirInput = await question('Enter your source directory (e.g., src, lib, .) [default: .]: ');
+    const sourceDir = sourceDirInput || '.';
+    // Lint Check
+    console.log();
+    const addLint = await question('Would you like to add a linting check? [y/N]: ');
+    let lintCmd: string | null = null;
+    if (addLint.toLowerCase().startsWith('y')) {
+      lintCmd = await question('Enter lint command (blank to fill later): ');
+    }
+    // Unit Test Check
+    console.log();
+    const addTest = await question('Would you like to add a unit test check? [y/N]: ');
+    let testCmd: string | null = null;
+    if (addTest.toLowerCase().startsWith('y')) {
+      testCmd = await question('Enter test command (blank to fill later): ');
     }
+    rl.close();
+    return {
+      sourceDir,
+      lintCmd,
+      testCmd,
+      selectedAdapters
+    };
+  } catch (error) {
+    rl.close();
+    throw error;
+  }
+}
+function generateConfigYml(config: InitConfig): string {
+  const cliList = config.selectedAdapters.map(a => `    - ${a.name}`).join('\n');
+  let entryPoints = '';
+  // If we have checks, we need a source directory entry point
+  if (config.lintCmd !== null || config.testCmd !== null) {
+    entryPoints += `  - path: "${config.sourceDir}"
+    checks:\n`;
+    if (config.lintCmd !== null) entryPoints += `      - lint\n`;
+    if (config.testCmd !== null) entryPoints += `      - unit-tests\n`;
+  }
+  // Always include root entry point for reviews
+  entryPoints += `  - path: "."
+    reviews:
+      - code-quality`;
+  return `base_branch: origin/main
+log_dir: .gauntlet_logs
+# Run gates in parallel when possible (default: true)
+# allow_parallel: true
+cli:
+  default_preference:
+${cliList}
+  # Check CLI usage quota before running (if unavailable, uses next in list)
+  # check_usage_limit: false
+entry_points:
+${entryPoints}
+`;
+}
+async function promptAndInstallCommands(projectRoot: string, canonicalCommandPath: string, availableAdapters: CLIAdapter[]): Promise<void> {
+  // Only proceed if we have available adapters
+  if (availableAdapters.length === 0) return;
+  const rl = readline.createInterface({
+    input: process.stdin,
+    output: process.stdout
+  });
+  const question = (prompt: string): Promise<string> => {
+    return new Promise((resolve) => {
+      rl.question(prompt, (answer) => {
+        resolve(answer?.trim() ?? '');
+      });
+    });
   };
   try {
@@ -150,14 +339,12 @@ async function promptAndInstallCommands(projectRoot: string, canonicalCommandPat
     let installLevel: InstallLevel = 'none';
     let answer = await question('Select option [1-3]: ');
-    // Handle EOF or empty input for non-TTY
-    if (!isTTY && answer === '' && lineIndex > inputLines.length) {
-      console.log(chalk.dim('\nNo input received, skipping command installation.'));
-      return;
-    }
+    let installLevelAttempts = 0;
     while (true) {
+      installLevelAttempts++;
+      if (installLevelAttempts > MAX_PROMPT_ATTEMPTS) throw new Error('Too many invalid attempts');
       if (answer === '1') {
         installLevel = 'none';
         break;
@@ -169,47 +356,48 @@ async function promptAndInstallCommands(projectRoot: string, canonicalCommandPat
         break;
       } else {
         console.log(chalk.yellow('Please enter 1, 2, or 3'));
-        if (!isTTY && lineIndex >= inputLines.length) {
-          console.log(chalk.dim('\nNo more input, skipping command installation.'));
-          return;
-        }
         answer = await question('Select option [1-3]: ');
       }
     }
     if (installLevel === 'none') {
       console.log(chalk.dim('\nSkipping command installation.'));
-      rl?.close();
+      rl.close();
       return;
     }
-    // Question 2: Which agents
-    const allAdapters = getAllAdapters();
-    const availableAdapters = installLevel === 'project'
-      ? allAdapters.filter(a => a.getProjectCommandDir() !== null)
-      : allAdapters.filter(a => a.getUserCommandDir() !== null);
+    // Filter available adapters based on install level support
+    const installableAdapters = installLevel === 'project'
+      ? availableAdapters.filter(a => a.getProjectCommandDir() !== null)
+      : availableAdapters.filter(a => a.getUserCommandDir() !== null);
+    if (installableAdapters.length === 0) {
+      console.log(chalk.yellow(`No available agents support ${installLevel}-level commands.`));
+      rl.close();
+      return;
+    }
     console.log();
     console.log('Which CLI agents would you like to install the command for?');
-    availableAdapters.forEach((adapter, i) => {
+    installableAdapters.forEach((adapter, i) => {
       console.log(`  ${i + 1}) ${adapter.name}`);
     });
-    console.log(`  ${availableAdapters.length + 1}) All of the above`);
+    console.log(`  ${installableAdapters.length + 1}) All of the above`);
     console.log();
     let selectedAgents: string[] = [];
-    answer = await question(`Select options (comma-separated, e.g., 1,2 or ${availableAdapters.length + 1} for all): `);
+    answer = await question(`Select options (comma-separated, e.g., 1,2 or ${installableAdapters.length + 1} for all): `);
+    let agentSelectionAttempts = 0;
     while (true) {
+      agentSelectionAttempts++;
+      if (agentSelectionAttempts > MAX_PROMPT_ATTEMPTS) throw new Error('Too many invalid attempts');
       const selections = answer.split(',').map(s => s.trim()).filter(s => s);
       if (selections.length === 0) {
-        if (!isTTY && lineIndex >= inputLines.length) {
-          console.log(chalk.dim('\nNo more input, skipping command installation.'));
-          return;
-        }
         console.log(chalk.yellow('Please select at least one option'));
-        answer = await question(`Select options (comma-separated, e.g., 1,2 or ${availableAdapters.length + 1} for all): `);
+        answer = await question(`Select options (comma-separated, e.g., 1,2 or ${installableAdapters.length + 1} for all): `);
         continue;
       }
@@ -218,16 +406,15 @@ async function promptAndInstallCommands(projectRoot: string, canonicalCommandPat
       for (const sel of selections) {
         const num = parseInt(sel, 10);
-        if (isNaN(num) || num < 1 || num > availableAdapters.length + 1) {
+        if (isNaN(num) || num < 1 || num > installableAdapters.length + 1) {
           console.log(chalk.yellow(`Invalid selection: ${sel}`));
           valid = false;
           break;
         }
-        if (num === availableAdapters.length + 1) {
-          // All agents
-          agents.push(...availableAdapters.map(a => a.name));
+        if (num === installableAdapters.length + 1) {
+          agents.push(...installableAdapters.map(a => a.name));
         } else {
-          agents.push(availableAdapters[num - 1].name);
+          agents.push(installableAdapters[num - 1].name);
         }
       }
@@ -235,21 +422,16 @@ async function promptAndInstallCommands(projectRoot: string, canonicalCommandPat
         selectedAgents = [...new Set(agents)]; // Dedupe
         break;
       }
-      if (!isTTY && lineIndex >= inputLines.length) {
-        console.log(chalk.dim('\nNo more input, skipping command installation.'));
-        return;
-      }
-      answer = await question(`Select options (comma-separated, e.g., 1,2 or ${availableAdapters.length + 1} for all): `);
+      answer = await question(`Select options (comma-separated, e.g., 1,2 or ${installableAdapters.length + 1} for all): `);
     }
-    rl?.close();
+    rl.close();
     // Install commands
     await installCommands(installLevel, selectedAgents, projectRoot, canonicalCommandPath);
   } catch (error: any) {
-    rl?.close();
+    rl.close();
     throw error;
   }
 }
@@ -286,11 +468,7 @@ async function installCommands(
     }
     if (!commandDir) {
-      if (level === 'project') {
-        console.log(chalk.yellow(`  ${adapter.name}: No project-level command support, skipping`));
-      } else {
-        console.log(chalk.yellow(`  ${adapter.name}: No user-level command support, skipping`));
-      }
+      // This shouldn't happen if we filtered correctly, but good safety check
       continue;
     }

package/src/commands/rerun.ts CHANGED Viewed

@@ -7,7 +7,8 @@ import { JobGenerator } from '../core/job.js';
 import { Runner } from '../core/runner.js';
 import { Logger } from '../output/logger.js';
 import { ConsoleReporter } from '../output/console.js';
-import { findPreviousFailures, GateFailures, PreviousViolation } from '../utils/log-parser.js';
+import { findPreviousFailures, type GateFailures, type PreviousViolation } from '../utils/log-parser.js';
+import { rotateLogs } from './shared.js';
 export function registerRerunCommand(program: Command): void {
   program
@@ -51,6 +52,9 @@ export function registerRerunCommand(program: Command): void {
           console.log(chalk.dim('No previous failures found. Running as normal...'));
         }
+        // Rotate logs before starting the new run
+        await rotateLogs(config.project.log_dir);
         // Detect changes (default to uncommitted unless --commit is specified)
         // Note: Rerun defaults to uncommitted changes for faster iteration loops,
         // unlike 'run' which defaults to base_branch comparison.

package/src/commands/review.ts CHANGED Viewed

@@ -7,6 +7,7 @@ import { JobGenerator } from '../core/job.js';
 import { Runner } from '../core/runner.js';
 import { Logger } from '../output/logger.js';
 import { ConsoleReporter } from '../output/console.js';
+import { rotateLogs } from './shared.js';
 export function registerReviewCommand(program: Command): void {
   program
@@ -18,6 +19,10 @@ export function registerReviewCommand(program: Command): void {
     .action(async (options) => {
       try {
         const config = await loadConfig();
+        // Rotate logs before starting
+        await rotateLogs(config.project.log_dir);
         const changeDetector = new ChangeDetector(config.project.base_branch, {
           commit: options.commit,
           uncommitted: options.uncommitted

package/src/commands/run.ts CHANGED Viewed

@@ -7,6 +7,7 @@ import { JobGenerator } from '../core/job.js';
 import { Runner } from '../core/runner.js';
 import { Logger } from '../output/logger.js';
 import { ConsoleReporter } from '../output/console.js';
+import { rotateLogs } from './shared.js';
 export function registerRunCommand(program: Command): void {
   program
@@ -18,6 +19,10 @@ export function registerRunCommand(program: Command): void {
     .action(async (options) => {
       try {
         const config = await loadConfig();
+        // Rotate logs before starting
+        await rotateLogs(config.project.log_dir);
         const changeDetector = new ChangeDetector(config.project.base_branch, {
           commit: options.commit,
           uncommitted: options.uncommitted

package/src/commands/shared.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import fs from 'node:fs/promises';
+import path from 'node:path';
 export async function exists(path: string): Promise<boolean> {
   try {
@@ -8,3 +9,36 @@ export async function exists(path: string): Promise<boolean> {
     return false;
   }
 }
+export async function rotateLogs(logDir: string): Promise<void> {
+  const previousDir = path.join(logDir, 'previous');
+  try {
+    // 1. Ensure logDir exists (if not, nothing to rotate, but we should create it for future use if needed,
+    //    though usually the logger creates it. If it doesn't exist, we can just return).
+    if (!(await exists(logDir))) {
+        return;
+    }
+    // 2. Clear .gauntlet_logs/previous if it exists
+    if (await exists(previousDir)) {
+      const previousFiles = await fs.readdir(previousDir);
+      await Promise.all(
+        previousFiles.map(file => fs.rm(path.join(previousDir, file), { recursive: true, force: true }))
+      );
+    } else {
+      await fs.mkdir(previousDir, { recursive: true });
+    }
+    // 3. Move all existing files in .gauntlet_logs/ to .gauntlet_logs/previous
+    const files = await fs.readdir(logDir);
+    await Promise.all(
+      files
+        .filter(file => file !== 'previous')
+        .map(file => fs.rename(path.join(logDir, file), path.join(previousDir, file)))
+    );
+  } catch (error) {
+    // Log warning but don't crash the run as log rotation failure isn't critical
+    console.warn(`Failed to rotate logs in ${logDir}:`, error instanceof Error ? error.message : error);
+  }
+}

package/src/gates/review.ts CHANGED Viewed

@@ -3,7 +3,9 @@ import { promisify } from 'node:util';
 import { ReviewGateConfig, ReviewPromptFrontmatter } from '../config/types.js';
 import { GateResult } from './result.js';
 import { CLIAdapter, getAdapter } from '../cli-adapters/index.js';
-import { PreviousViolation } from '../utils/log-parser.js';
+import { Logger } from '../output/logger.js';
+import { parseDiff, isValidViolationLocation, type DiffFileRange } from '../utils/diff-parser.js';
+import { type PreviousViolation } from '../utils/log-parser.js';
 const execAsync = promisify(exec);
@@ -13,9 +15,17 @@ const JSON_SYSTEM_INSTRUCTION = `
 You are in a read-only mode. You may read files in the repository to gather context.
 Do NOT attempt to modify files or run shell commands that change system state.
 Do NOT access files outside the repository root.
+Do NOT access the .git/ directory or read git history/commit information.
 Use your available file-reading and search tools to find information.
 If the diff is insufficient or ambiguous, use your tools to read the full file content or related files.
+CRITICAL SCOPE RESTRICTIONS:
+- ONLY review the code changes shown in the diff below
+- DO NOT review commit history or existing code outside the diff
+- All violations MUST reference file paths and line numbers that appear IN THE DIFF
+- The "file" field must match a file from the diff
+- The "line" field must be within a changed region (lines starting with + in the diff)
 IMPORTANT: You must output ONLY a valid JSON object. Do not output any markdown text, explanations, or code blocks outside of the JSON.
 Each violation MUST include a "priority" field with one of: "critical", "high", "medium", "low".
@@ -246,7 +256,11 @@ export class ReviewGateExecutor {
       await adapterLogger(`\n--- Review Output (${adapter.name}) ---\n${output}\n`);
-      const evaluation = this.evaluateOutput(output);
+      const evaluation = this.evaluateOutput(output, diff);
+      if (evaluation.filteredCount && evaluation.filteredCount > 0) {
+        await adapterLogger(`Note: ${evaluation.filteredCount} out-of-scope violations filtered\n`);
+      }
       // Log formatted summary
       if (evaluation.json) {
@@ -408,14 +422,21 @@ export class ReviewGateExecutor {
     return lines.join('\n');
   }
-  public evaluateOutput(output: string): { status: 'pass' | 'fail' | 'error'; message: string; json?: any } {
+  public evaluateOutput(output: string, diff?: string): {
+    status: 'pass' | 'fail' | 'error';
+    message: string;
+    json?: any;
+    filteredCount?: number;
+  } {
+    const diffRanges = diff ? parseDiff(diff) : undefined;
     try {
       // 1. Try to extract from markdown code block first (most reliable)
       const jsonBlockMatch = output.match(/```json\s*([\s\S]*?)\s*```/);
       if (jsonBlockMatch) {
         try {
           const json = JSON.parse(jsonBlockMatch[1]);
-          return this.validateAndReturn(json);
+          return this.validateAndReturn(json, diffRanges);
         } catch {
           // If code block parse fails, fall back to other methods
         }
@@ -433,7 +454,7 @@ export class ReviewGateExecutor {
             const json = JSON.parse(candidate);
             // If we successfully parsed an object with 'status', it's likely our result
             if (json.status) {
-              return this.validateAndReturn(json);
+              return this.validateAndReturn(json, diffRanges);
             }
           } catch {
             // Not valid JSON, keep searching backwards
@@ -448,7 +469,7 @@ export class ReviewGateExecutor {
          try {
             const candidate = output.substring(firstStart, end + 1);
             const json = JSON.parse(candidate);
-            return this.validateAndReturn(json);
+            return this.validateAndReturn(json, diffRanges);
          } catch {
              // Ignore
          }
@@ -461,7 +482,10 @@ export class ReviewGateExecutor {
     }
   }
-  private validateAndReturn(json: any): { status: 'pass' | 'fail' | 'error'; message: string; json?: any } {
+  private validateAndReturn(
+    json: any,
+    diffRanges?: Map<string, DiffFileRange>
+  ): { status: 'pass' | 'fail' | 'error'; message: string; json?: any; filteredCount?: number } {
       // Validate Schema
       if (!json.status || (json.status !== 'pass' && json.status !== 'fail')) {
          return { status: 'error', message: 'Invalid JSON: missing or invalid "status" field', json };
@@ -472,6 +496,33 @@ export class ReviewGateExecutor {
       }
       // json.status === 'fail'
+      let filteredCount = 0;
+      if (Array.isArray(json.violations) && diffRanges?.size) {
+        const originalCount = json.violations.length;
+        json.violations = json.violations.filter((v: any) => {
+          const isValid = isValidViolationLocation(v.file, v.line, diffRanges);
+          if (!isValid) {
+            // Can't easily access logger here, but could return warning info
+            // console.warn(`[WARNING] Filtered violation: ${v.file}:${v.line ?? '?'} (not in diff)`);
+          }
+          return isValid;
+        });
+        filteredCount = originalCount - json.violations.length;
+        // If all filtered out, change to pass
+        if (json.violations.length === 0) {
+          return {
+            status: 'pass',
+            message: `Passed (${filteredCount} out-of-scope violations filtered)`,
+            json: { status: 'pass' },
+            filteredCount
+          };
+        }
+      }
       const violationCount = Array.isArray(json.violations) ? json.violations.length : 'some';
       // Construct a summary message
@@ -481,7 +532,7 @@ export class ReviewGateExecutor {
           msg += `. Example: ${first.issue} in ${first.file}`;
       }
-      return { status: 'fail', message: msg, json };
+      return { status: 'fail', message: msg, json, filteredCount };
   }
   private parseLines(stdout: string): string[] {

package/src/utils/diff-parser.ts ADDED Viewed

@@ -0,0 +1,86 @@
+export type DiffFileRange = Set<number>;
+/**
+ * Parses a unified diff string into a map of filenames to sets of valid line numbers.
+ * Valid line numbers are those that appear in the diff as added or modified lines.
+ */
+export function parseDiff(diff: string): Map<string, DiffFileRange> {
+  const fileRanges = new Map<string, DiffFileRange>();
+  const lines = diff.split('\n');
+  let currentFile: string | null = null;
+  let currentRanges: DiffFileRange | null = null;
+  let currentLineNumber = 0;
+  for (const line of lines) {
+    // Parse file header: diff --git a/path/to/file b/path/to/file
+    if (line.startsWith('diff --git')) {
+      const parts = line.split(' ');
+      if (parts.length >= 4) {
+        // Extract filename from b/path/to/file (target file)
+        const targetPath = parts[3];
+        // Remove 'b/' prefix
+        currentFile = targetPath.startsWith('b/') ? targetPath.substring(2) : targetPath;
+        // Skip .git/ paths
+        if (currentFile.startsWith('.git/')) {
+          currentFile = null;
+          currentRanges = null;
+          continue;
+        }
+        currentRanges = new Set<number>();
+        fileRanges.set(currentFile, currentRanges);
+      }
+      continue;
+    }
+    // Skip if we're ignoring this file (e.g. .git/)
+    if (!currentFile || !currentRanges) continue;
+    // Parse hunk header: @@ -old,count +new,count @@
+    if (line.startsWith('@@')) {
+      const match = line.match(/@@ \-\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/);
+      if (match && match[1]) {
+        currentLineNumber = parseInt(match[1], 10);
+      }
+      continue;
+    }
+    // Track added lines
+    if (line.startsWith('+') && !line.startsWith('+++')) {
+      currentRanges.add(currentLineNumber);
+      currentLineNumber++;
+    }
+    // Track context lines (unchanged) to keep line count correct
+    else if (line.startsWith(' ')) {
+      currentLineNumber++;
+    }
+    // Removed lines (-) do not increment the new line counter
+  }
+  return fileRanges;
+}
+/**
+ * Checks if a violation is valid based on the parsed diff ranges.
+ */
+export function isValidViolationLocation(
+  file: string,
+  line: number | undefined,
+  diffRanges: Map<string, DiffFileRange> | undefined
+): boolean {
+  // If no diff ranges provided (e.g. full file review), assume valid
+  if (!diffRanges) return true;
+  // Line is required for diff-scoped reviews
+  if (line === undefined) return false;
+  const validLines = diffRanges.get(file);
+  if (!validLines) {
+    // File not in diff
+    return false;
+  }
+  return validLines.has(line);
+}