agent-gauntlet 0.13.0 → 0.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js
CHANGED
|
@@ -7,7 +7,7 @@ import { Command } from "commander";
|
|
|
7
7
|
// package.json
|
|
8
8
|
var package_default = {
|
|
9
9
|
name: "agent-gauntlet",
|
|
10
|
-
version: "0.13.
|
|
10
|
+
version: "0.13.1",
|
|
11
11
|
description: "A CLI tool for testing AI coding agents",
|
|
12
12
|
license: "Apache-2.0",
|
|
13
13
|
author: "Paul Caplan",
|
|
@@ -5244,7 +5244,7 @@ import fs19 from "node:fs";
|
|
|
5244
5244
|
import fsPromises2 from "node:fs/promises";
|
|
5245
5245
|
import path17 from "node:path";
|
|
5246
5246
|
import { inspect } from "node:util";
|
|
5247
|
-
var ANSI_REGEX = /\x1b
|
|
5247
|
+
var ANSI_REGEX = /\x1b(?:\[[0-9;?]*[A-Za-z]|[78])/g;
|
|
5248
5248
|
function stripAnsi(text) {
|
|
5249
5249
|
return text.replace(ANSI_REGEX, "");
|
|
5250
5250
|
}
|
|
@@ -5700,16 +5700,16 @@ function registerCheckCommand(program) {
|
|
|
5700
5700
|
await debugLogger?.logRunStart(runMode, changes.length, jobs.length);
|
|
5701
5701
|
const reporter = new ConsoleReporter;
|
|
5702
5702
|
const runner = new Runner(config, logger, reporter, failuresMap, changeOptions, effectiveBaseBranch, passedSlotsMap, debugLogger ?? undefined, isRerun);
|
|
5703
|
-
const
|
|
5704
|
-
await debugLogger?.logRunEnd(
|
|
5703
|
+
const outcome = await runner.run(jobs);
|
|
5704
|
+
await debugLogger?.logRunEnd(outcome.allPassed ? "pass" : "fail", outcome.stats.fixed, outcome.stats.skipped, outcome.stats.failed, logger.getRunNumber());
|
|
5705
5705
|
await writeExecutionState(config.project.log_dir);
|
|
5706
|
-
if (
|
|
5706
|
+
if (outcome.allPassed) {
|
|
5707
5707
|
await debugLogger?.logClean("auto", "all_passed");
|
|
5708
5708
|
await cleanLogs(config.project.log_dir);
|
|
5709
5709
|
}
|
|
5710
5710
|
await releaseLock(config.project.log_dir);
|
|
5711
5711
|
restoreConsole?.restore();
|
|
5712
|
-
process.exit(
|
|
5712
|
+
process.exit(outcome.allPassed ? 0 : 1);
|
|
5713
5713
|
} catch (error) {
|
|
5714
5714
|
if (config && lockAcquired) {
|
|
5715
5715
|
try {
|
|
@@ -6760,7 +6760,7 @@ allowed-tools: Bash
|
|
|
6760
6760
|
if (isRun) {
|
|
6761
6761
|
steps.push(`3. If it fails:
|
|
6762
6762
|
- Identify the failed gates from the console output.
|
|
6763
|
-
- For CHECK failures: Read the \`.log\` file path provided in the output.
|
|
6763
|
+
- For CHECK failures: Read the \`.log\` file path provided in the output. If the log contains a \`--- Fix Instructions ---\` section, follow those instructions to fix the issue. If it contains a \`--- Fix Skill: <name> ---\` section, invoke that skill.
|
|
6764
6764
|
- For REVIEW failures: Read the \`.json\` file path provided in the "Review: <path>" output.
|
|
6765
6765
|
4. Address the violations:
|
|
6766
6766
|
- For REVIEW violations: You MUST update the \`"status"\` and \`"result"\` fields in the provided \`.json\` file for EACH violation.
|
|
@@ -6779,7 +6779,7 @@ allowed-tools: Bash
|
|
|
6779
6779
|
- Outstanding Failures: (if retry limit exceeded, list unverified fixes and remaining issues)`);
|
|
6780
6780
|
} else {
|
|
6781
6781
|
steps.push(`3. If any checks fail:
|
|
6782
|
-
- Read the \`.log\` file path provided in the output for each failed check.
|
|
6782
|
+
- Read the \`.log\` file path provided in the output for each failed check. If the log contains a \`--- Fix Instructions ---\` section, follow those instructions. If it contains a \`--- Fix Skill: <name> ---\` section, invoke that skill.
|
|
6783
6783
|
- Fix the issues found.
|
|
6784
6784
|
4. Run \`${command}\` again to verify your fixes. Do NOT run \`agent-gauntlet clean\` between retries.
|
|
6785
6785
|
5. Repeat steps 3-4 until all checks pass or you've made 3 attempts.
|
|
@@ -6837,6 +6837,7 @@ var HELP_SKILL_BUNDLE = {
|
|
|
6837
6837
|
};
|
|
6838
6838
|
var SETUP_SKILL_CONTENT = readSkillTemplate("setup-skill.md");
|
|
6839
6839
|
var CHECK_CATALOG_REFERENCE = readSkillTemplate("check-catalog.md");
|
|
6840
|
+
var PROJECT_STRUCTURE_REFERENCE = readSkillTemplate("setup-ref-project-structure.md");
|
|
6840
6841
|
var SKILL_DEFINITIONS = [
|
|
6841
6842
|
{ action: "run", content: GAUNTLET_RUN_SKILL_CONTENT },
|
|
6842
6843
|
{ action: "check", content: GAUNTLET_CHECK_SKILL_CONTENT },
|
|
@@ -6852,7 +6853,10 @@ var SKILL_DEFINITIONS = [
|
|
|
6852
6853
|
{
|
|
6853
6854
|
action: "setup",
|
|
6854
6855
|
content: SETUP_SKILL_CONTENT,
|
|
6855
|
-
references: {
|
|
6856
|
+
references: {
|
|
6857
|
+
"check-catalog.md": CHECK_CATALOG_REFERENCE,
|
|
6858
|
+
"project-structure.md": PROJECT_STRUCTURE_REFERENCE
|
|
6859
|
+
},
|
|
6856
6860
|
skillsOnly: true
|
|
6857
6861
|
}
|
|
6858
6862
|
];
|
|
@@ -7006,14 +7010,29 @@ async function addToGitignore(projectRoot, entry) {
|
|
|
7006
7010
|
`);
|
|
7007
7011
|
console.log(chalk9.green(`Added ${entry} to .gitignore`));
|
|
7008
7012
|
}
|
|
7009
|
-
|
|
7013
|
+
function gitSilent(args, opts) {
|
|
7014
|
+
const { execFileSync } = __require("node:child_process");
|
|
7010
7015
|
try {
|
|
7011
|
-
|
|
7012
|
-
|
|
7013
|
-
|
|
7014
|
-
|
|
7016
|
+
return execFileSync("git", args, {
|
|
7017
|
+
encoding: "utf-8",
|
|
7018
|
+
timeout: opts?.timeout,
|
|
7019
|
+
stdio: ["pipe", "pipe", "ignore"]
|
|
7020
|
+
}).trim();
|
|
7021
|
+
} catch {
|
|
7022
|
+
return null;
|
|
7023
|
+
}
|
|
7024
|
+
}
|
|
7025
|
+
async function detectBaseBranch() {
|
|
7026
|
+
gitSilent(["remote", "set-head", "origin", "--auto"], { timeout: 5000 });
|
|
7027
|
+
const ref = gitSilent(["symbolic-ref", "refs/remotes/origin/HEAD"]);
|
|
7028
|
+
if (ref) {
|
|
7029
|
+
return ref.replace("refs/remotes/", "");
|
|
7030
|
+
}
|
|
7031
|
+
for (const candidate of ["origin/main", "origin/master"]) {
|
|
7032
|
+
if (gitSilent(["rev-parse", "--verify", candidate]) !== null) {
|
|
7033
|
+
return candidate;
|
|
7015
7034
|
}
|
|
7016
|
-
}
|
|
7035
|
+
}
|
|
7017
7036
|
return "origin/main";
|
|
7018
7037
|
}
|
|
7019
7038
|
function buildAdapterSettingsBlock(adapters3) {
|
|
@@ -7392,16 +7411,16 @@ function registerReviewCommand(program) {
|
|
|
7392
7411
|
await debugLogger?.logRunStart(runMode, changes.length, jobs.length);
|
|
7393
7412
|
const reporter = new ConsoleReporter;
|
|
7394
7413
|
const runner = new Runner(config, logger, reporter, failuresMap, changeOptions, effectiveBaseBranch, passedSlotsMap, debugLogger ?? undefined, isRerun);
|
|
7395
|
-
const
|
|
7396
|
-
await debugLogger?.logRunEnd(
|
|
7414
|
+
const outcome = await runner.run(jobs);
|
|
7415
|
+
await debugLogger?.logRunEnd(outcome.allPassed ? "pass" : "fail", outcome.stats.fixed, outcome.stats.skipped, outcome.stats.failed, logger.getRunNumber());
|
|
7397
7416
|
await writeExecutionState(config.project.log_dir);
|
|
7398
|
-
if (
|
|
7417
|
+
if (outcome.allPassed) {
|
|
7399
7418
|
await debugLogger?.logClean("auto", "all_passed");
|
|
7400
7419
|
await cleanLogs(config.project.log_dir);
|
|
7401
7420
|
}
|
|
7402
7421
|
await releaseLock(config.project.log_dir);
|
|
7403
7422
|
restoreConsole?.restore();
|
|
7404
|
-
process.exit(
|
|
7423
|
+
process.exit(outcome.allPassed ? 0 : 1);
|
|
7405
7424
|
} catch (error) {
|
|
7406
7425
|
if (config && lockAcquired) {
|
|
7407
7426
|
try {
|
|
@@ -8358,4 +8377,4 @@ if (process.argv.length < 3) {
|
|
|
8358
8377
|
}
|
|
8359
8378
|
program.parse(process.argv);
|
|
8360
8379
|
|
|
8361
|
-
//# debugId=
|
|
8380
|
+
//# debugId=61E76D84AB3F58F264756E2164756E21
|