agent-gauntlet 0.13.0 → 0.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -7,7 +7,7 @@ import { Command } from "commander";
7
7
  // package.json
8
8
  var package_default = {
9
9
  name: "agent-gauntlet",
10
- version: "0.13.0",
10
+ version: "0.13.1",
11
11
  description: "A CLI tool for testing AI coding agents",
12
12
  license: "Apache-2.0",
13
13
  author: "Paul Caplan",
@@ -5244,7 +5244,7 @@ import fs19 from "node:fs";
5244
5244
  import fsPromises2 from "node:fs/promises";
5245
5245
  import path17 from "node:path";
5246
5246
  import { inspect } from "node:util";
5247
- var ANSI_REGEX = /\x1b\[[0-9;]*m/g;
5247
+ var ANSI_REGEX = /\x1b(?:\[[0-9;?]*[A-Za-z]|[78])/g;
5248
5248
  function stripAnsi(text) {
5249
5249
  return text.replace(ANSI_REGEX, "");
5250
5250
  }
@@ -5700,16 +5700,16 @@ function registerCheckCommand(program) {
5700
5700
  await debugLogger?.logRunStart(runMode, changes.length, jobs.length);
5701
5701
  const reporter = new ConsoleReporter;
5702
5702
  const runner = new Runner(config, logger, reporter, failuresMap, changeOptions, effectiveBaseBranch, passedSlotsMap, debugLogger ?? undefined, isRerun);
5703
- const success = await runner.run(jobs);
5704
- await debugLogger?.logRunEnd(success ? "pass" : "fail", 0, 0, 0, logger.getRunNumber());
5703
+ const outcome = await runner.run(jobs);
5704
+ await debugLogger?.logRunEnd(outcome.allPassed ? "pass" : "fail", outcome.stats.fixed, outcome.stats.skipped, outcome.stats.failed, logger.getRunNumber());
5705
5705
  await writeExecutionState(config.project.log_dir);
5706
- if (success) {
5706
+ if (outcome.allPassed) {
5707
5707
  await debugLogger?.logClean("auto", "all_passed");
5708
5708
  await cleanLogs(config.project.log_dir);
5709
5709
  }
5710
5710
  await releaseLock(config.project.log_dir);
5711
5711
  restoreConsole?.restore();
5712
- process.exit(success ? 0 : 1);
5712
+ process.exit(outcome.allPassed ? 0 : 1);
5713
5713
  } catch (error) {
5714
5714
  if (config && lockAcquired) {
5715
5715
  try {
@@ -6760,7 +6760,7 @@ allowed-tools: Bash
6760
6760
  if (isRun) {
6761
6761
  steps.push(`3. If it fails:
6762
6762
  - Identify the failed gates from the console output.
6763
- - For CHECK failures: Read the \`.log\` file path provided in the output.
6763
+ - For CHECK failures: Read the \`.log\` file path provided in the output. If the log contains a \`--- Fix Instructions ---\` section, follow those instructions to fix the issue. If it contains a \`--- Fix Skill: <name> ---\` section, invoke that skill.
6764
6764
  - For REVIEW failures: Read the \`.json\` file path provided in the "Review: <path>" output.
6765
6765
  4. Address the violations:
6766
6766
  - For REVIEW violations: You MUST update the \`"status"\` and \`"result"\` fields in the provided \`.json\` file for EACH violation.
@@ -6779,7 +6779,7 @@ allowed-tools: Bash
6779
6779
  - Outstanding Failures: (if retry limit exceeded, list unverified fixes and remaining issues)`);
6780
6780
  } else {
6781
6781
  steps.push(`3. If any checks fail:
6782
- - Read the \`.log\` file path provided in the output for each failed check.
6782
+ - Read the \`.log\` file path provided in the output for each failed check. If the log contains a \`--- Fix Instructions ---\` section, follow those instructions. If it contains a \`--- Fix Skill: <name> ---\` section, invoke that skill.
6783
6783
  - Fix the issues found.
6784
6784
  4. Run \`${command}\` again to verify your fixes. Do NOT run \`agent-gauntlet clean\` between retries.
6785
6785
  5. Repeat steps 3-4 until all checks pass or you've made 3 attempts.
@@ -6837,6 +6837,7 @@ var HELP_SKILL_BUNDLE = {
6837
6837
  };
6838
6838
  var SETUP_SKILL_CONTENT = readSkillTemplate("setup-skill.md");
6839
6839
  var CHECK_CATALOG_REFERENCE = readSkillTemplate("check-catalog.md");
6840
+ var PROJECT_STRUCTURE_REFERENCE = readSkillTemplate("setup-ref-project-structure.md");
6840
6841
  var SKILL_DEFINITIONS = [
6841
6842
  { action: "run", content: GAUNTLET_RUN_SKILL_CONTENT },
6842
6843
  { action: "check", content: GAUNTLET_CHECK_SKILL_CONTENT },
@@ -6852,7 +6853,10 @@ var SKILL_DEFINITIONS = [
6852
6853
  {
6853
6854
  action: "setup",
6854
6855
  content: SETUP_SKILL_CONTENT,
6855
- references: { "check-catalog.md": CHECK_CATALOG_REFERENCE },
6856
+ references: {
6857
+ "check-catalog.md": CHECK_CATALOG_REFERENCE,
6858
+ "project-structure.md": PROJECT_STRUCTURE_REFERENCE
6859
+ },
6856
6860
  skillsOnly: true
6857
6861
  }
6858
6862
  ];
@@ -7006,14 +7010,29 @@ async function addToGitignore(projectRoot, entry) {
7006
7010
  `);
7007
7011
  console.log(chalk9.green(`Added ${entry} to .gitignore`));
7008
7012
  }
7009
- async function detectBaseBranch() {
7013
+ function gitSilent(args, opts) {
7014
+ const { execFileSync } = __require("node:child_process");
7010
7015
  try {
7011
- const { execSync } = await import("node:child_process");
7012
- const ref = execSync("git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null", { encoding: "utf-8" }).trim();
7013
- if (ref) {
7014
- return ref.replace("refs/remotes/", "");
7016
+ return execFileSync("git", args, {
7017
+ encoding: "utf-8",
7018
+ timeout: opts?.timeout,
7019
+ stdio: ["pipe", "pipe", "ignore"]
7020
+ }).trim();
7021
+ } catch {
7022
+ return null;
7023
+ }
7024
+ }
7025
+ async function detectBaseBranch() {
7026
+ gitSilent(["remote", "set-head", "origin", "--auto"], { timeout: 5000 });
7027
+ const ref = gitSilent(["symbolic-ref", "refs/remotes/origin/HEAD"]);
7028
+ if (ref) {
7029
+ return ref.replace("refs/remotes/", "");
7030
+ }
7031
+ for (const candidate of ["origin/main", "origin/master"]) {
7032
+ if (gitSilent(["rev-parse", "--verify", candidate]) !== null) {
7033
+ return candidate;
7015
7034
  }
7016
- } catch {}
7035
+ }
7017
7036
  return "origin/main";
7018
7037
  }
7019
7038
  function buildAdapterSettingsBlock(adapters3) {
@@ -7392,16 +7411,16 @@ function registerReviewCommand(program) {
7392
7411
  await debugLogger?.logRunStart(runMode, changes.length, jobs.length);
7393
7412
  const reporter = new ConsoleReporter;
7394
7413
  const runner = new Runner(config, logger, reporter, failuresMap, changeOptions, effectiveBaseBranch, passedSlotsMap, debugLogger ?? undefined, isRerun);
7395
- const success = await runner.run(jobs);
7396
- await debugLogger?.logRunEnd(success ? "pass" : "fail", 0, 0, 0, logger.getRunNumber());
7414
+ const outcome = await runner.run(jobs);
7415
+ await debugLogger?.logRunEnd(outcome.allPassed ? "pass" : "fail", outcome.stats.fixed, outcome.stats.skipped, outcome.stats.failed, logger.getRunNumber());
7397
7416
  await writeExecutionState(config.project.log_dir);
7398
- if (success) {
7417
+ if (outcome.allPassed) {
7399
7418
  await debugLogger?.logClean("auto", "all_passed");
7400
7419
  await cleanLogs(config.project.log_dir);
7401
7420
  }
7402
7421
  await releaseLock(config.project.log_dir);
7403
7422
  restoreConsole?.restore();
7404
- process.exit(success ? 0 : 1);
7423
+ process.exit(outcome.allPassed ? 0 : 1);
7405
7424
  } catch (error) {
7406
7425
  if (config && lockAcquired) {
7407
7426
  try {
@@ -8358,4 +8377,4 @@ if (process.argv.length < 3) {
8358
8377
  }
8359
8378
  program.parse(process.argv);
8360
8379
 
8361
- //# debugId=D6CA917DC551041A64756E2164756E21
8380
+ //# debugId=61E76D84AB3F58F264756E2164756E21