agent-gauntlet 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +25 -23
  2. package/dist/index.js +9226 -0
  3. package/dist/index.js.map +65 -0
  4. package/dist/scripts/status.js +280 -0
  5. package/dist/scripts/status.js.map +10 -0
  6. package/package.json +22 -8
  7. package/src/built-in-reviews/code-quality.md +0 -25
  8. package/src/built-in-reviews/index.ts +0 -28
  9. package/src/bun-plugins.d.ts +0 -4
  10. package/src/cli-adapters/claude.ts +0 -327
  11. package/src/cli-adapters/codex.ts +0 -290
  12. package/src/cli-adapters/cursor.ts +0 -128
  13. package/src/cli-adapters/gemini.ts +0 -510
  14. package/src/cli-adapters/github-copilot.ts +0 -141
  15. package/src/cli-adapters/index.ts +0 -250
  16. package/src/cli-adapters/thinking-budget.ts +0 -23
  17. package/src/commands/check.ts +0 -311
  18. package/src/commands/ci/index.ts +0 -15
  19. package/src/commands/ci/init.ts +0 -96
  20. package/src/commands/ci/list-jobs.ts +0 -90
  21. package/src/commands/clean.ts +0 -54
  22. package/src/commands/detect.ts +0 -173
  23. package/src/commands/health.ts +0 -169
  24. package/src/commands/help.ts +0 -34
  25. package/src/commands/index.ts +0 -13
  26. package/src/commands/init.ts +0 -1878
  27. package/src/commands/list.ts +0 -33
  28. package/src/commands/review.ts +0 -311
  29. package/src/commands/run.ts +0 -29
  30. package/src/commands/shared.ts +0 -267
  31. package/src/commands/stop-hook.ts +0 -567
  32. package/src/commands/validate.ts +0 -20
  33. package/src/commands/wait-ci.ts +0 -518
  34. package/src/config/ci-loader.ts +0 -33
  35. package/src/config/ci-schema.ts +0 -28
  36. package/src/config/global.ts +0 -87
  37. package/src/config/loader.ts +0 -301
  38. package/src/config/schema.ts +0 -165
  39. package/src/config/stop-hook-config.ts +0 -130
  40. package/src/config/types.ts +0 -65
  41. package/src/config/validator.ts +0 -592
  42. package/src/core/change-detector.ts +0 -137
  43. package/src/core/diff-stats.ts +0 -442
  44. package/src/core/entry-point.ts +0 -190
  45. package/src/core/job.ts +0 -96
  46. package/src/core/run-executor.ts +0 -621
  47. package/src/core/runner.ts +0 -290
  48. package/src/gates/check.ts +0 -118
  49. package/src/gates/resolve-check-command.ts +0 -21
  50. package/src/gates/result.ts +0 -54
  51. package/src/gates/review.ts +0 -1333
  52. package/src/hooks/adapters/claude-stop-hook.ts +0 -99
  53. package/src/hooks/adapters/cursor-stop-hook.ts +0 -122
  54. package/src/hooks/adapters/types.ts +0 -94
  55. package/src/hooks/stop-hook-handler.ts +0 -748
  56. package/src/index.ts +0 -47
  57. package/src/output/app-logger.ts +0 -214
  58. package/src/output/console-log.ts +0 -168
  59. package/src/output/console.ts +0 -359
  60. package/src/output/logger.ts +0 -126
  61. package/src/output/sinks/console-sink.ts +0 -59
  62. package/src/output/sinks/file-sink.ts +0 -110
  63. package/src/scripts/status.ts +0 -433
  64. package/src/templates/workflow.yml +0 -79
  65. package/src/types/gauntlet-status.ts +0 -79
  66. package/src/utils/debug-log.ts +0 -392
  67. package/src/utils/diff-parser.ts +0 -103
  68. package/src/utils/execution-state.ts +0 -472
  69. package/src/utils/log-parser.ts +0 -696
  70. package/src/utils/sanitizer.ts +0 -3
  71. package/src/utils/session-ref.ts +0 -91
@@ -1,311 +0,0 @@
1
- import chalk from "chalk";
2
- import type { Command } from "commander";
3
- import { loadGlobalConfig } from "../config/global.js";
4
- import { loadConfig } from "../config/loader.js";
5
- import { ChangeDetector } from "../core/change-detector.js";
6
- import { EntryPointExpander } from "../core/entry-point.js";
7
- import { JobGenerator } from "../core/job.js";
8
- import { Runner } from "../core/runner.js";
9
- import { ConsoleReporter } from "../output/console.js";
10
- import {
11
- type ConsoleLogHandle,
12
- startConsoleLog,
13
- } from "../output/console-log.js";
14
- import { Logger } from "../output/logger.js";
15
- import {
16
- getDebugLogger,
17
- initDebugLogger,
18
- mergeDebugLogConfig,
19
- } from "../utils/debug-log.js";
20
- import {
21
- readExecutionState,
22
- resolveFixBase,
23
- writeExecutionState,
24
- } from "../utils/execution-state.js";
25
- import {
26
- findPreviousFailures,
27
- type PassedSlot,
28
- type PreviousViolation,
29
- } from "../utils/log-parser.js";
30
- import {
31
- acquireLock,
32
- cleanLogs,
33
- hasExistingLogs,
34
- performAutoClean,
35
- releaseLock,
36
- shouldAutoClean,
37
- } from "./shared.js";
38
-
39
- export function registerCheckCommand(program: Command): void {
40
- program
41
- .command("check")
42
- .description("Run only applicable checks for detected changes")
43
- .option(
44
- "-b, --base-branch <branch>",
45
- "Override base branch for change detection",
46
- )
47
- .option("-g, --gate <name>", "Run specific check gate only")
48
- .option("-c, --commit <sha>", "Use diff for a specific commit")
49
- .option(
50
- "-u, --uncommitted",
51
- "Use diff for current uncommitted changes (staged and unstaged)",
52
- )
53
- .action(async (options) => {
54
- let config: Awaited<ReturnType<typeof loadConfig>> | undefined;
55
- let lockAcquired = false;
56
- let restoreConsole: ConsoleLogHandle | undefined;
57
- try {
58
- config = await loadConfig();
59
-
60
- // Initialize debug logger
61
- const globalConfig = await loadGlobalConfig();
62
- const debugLogConfig = mergeDebugLogConfig(
63
- config.project.debug_log,
64
- globalConfig.debug_log,
65
- );
66
- initDebugLogger(config.project.log_dir, debugLogConfig);
67
-
68
- // Log the command invocation
69
- const debugLogger = getDebugLogger();
70
- const args = [
71
- options.baseBranch ? `-b ${options.baseBranch}` : "",
72
- options.gate ? `-g ${options.gate}` : "",
73
- options.commit ? `-c ${options.commit}` : "",
74
- options.uncommitted ? "-u" : "",
75
- ].filter(Boolean);
76
- await debugLogger?.logCommand("check", args);
77
-
78
- // Determine effective base branch first (needed for auto-clean)
79
- const effectiveBaseBranch =
80
- options.baseBranch ||
81
- (process.env.GITHUB_BASE_REF &&
82
- (process.env.CI === "true" || process.env.GITHUB_ACTIONS === "true")
83
- ? process.env.GITHUB_BASE_REF
84
- : null) ||
85
- config.project.base_branch;
86
-
87
- // Auto-clean on context change (branch changed, commit merged)
88
- const autoCleanResult = await shouldAutoClean(
89
- config.project.log_dir,
90
- effectiveBaseBranch,
91
- );
92
- if (autoCleanResult.clean) {
93
- console.log(
94
- chalk.dim(`Auto-cleaning logs (${autoCleanResult.reason})...`),
95
- );
96
- await debugLogger?.logClean(
97
- "auto",
98
- autoCleanResult.reason || "unknown",
99
- );
100
- await performAutoClean(config.project.log_dir, autoCleanResult);
101
- }
102
-
103
- // Detect rerun mode after auto-clean (clean may have removed logs)
104
- const logsExist = await hasExistingLogs(config.project.log_dir);
105
- const isRerun = logsExist && !options.commit;
106
-
107
- // Acquire lock BEFORE starting console log (prevents orphaned log files)
108
- await acquireLock(config.project.log_dir);
109
- lockAcquired = true;
110
-
111
- // Initialize Logger early to get unified run number for console log
112
- const logger = new Logger(config.project.log_dir);
113
- await logger.init();
114
- const runNumber = logger.getRunNumber();
115
-
116
- restoreConsole = await startConsoleLog(
117
- config.project.log_dir,
118
- runNumber,
119
- );
120
-
121
- let failuresMap:
122
- | Map<string, Map<string, PreviousViolation[]>>
123
- | undefined;
124
- let changeOptions:
125
- | { commit?: string; uncommitted?: boolean; fixBase?: string }
126
- | undefined;
127
-
128
- let passedSlotsMap: Map<string, Map<number, PassedSlot>> | undefined;
129
-
130
- if (isRerun) {
131
- console.log(
132
- chalk.dim(
133
- "Existing logs detected — running in verification mode...",
134
- ),
135
- );
136
- const { failures: previousFailures, passedSlots } =
137
- await findPreviousFailures(
138
- config.project.log_dir,
139
- options.gate,
140
- true,
141
- );
142
-
143
- failuresMap = new Map();
144
- for (const gateFailure of previousFailures) {
145
- const adapterMap = new Map<string, PreviousViolation[]>();
146
- for (const af of gateFailure.adapterFailures) {
147
- const key = af.reviewIndex
148
- ? String(af.reviewIndex)
149
- : af.adapterName;
150
- adapterMap.set(key, af.violations);
151
- }
152
- failuresMap.set(gateFailure.jobId, adapterMap);
153
- }
154
-
155
- passedSlotsMap = passedSlots;
156
-
157
- if (previousFailures.length > 0) {
158
- const totalViolations = previousFailures.reduce(
159
- (sum, gf) =>
160
- sum +
161
- gf.adapterFailures.reduce(
162
- (s, af) => s + af.violations.length,
163
- 0,
164
- ),
165
- 0,
166
- );
167
- console.log(
168
- chalk.yellow(
169
- `Found ${previousFailures.length} gate(s) with ${totalViolations} previous violation(s)`,
170
- ),
171
- );
172
- }
173
-
174
- changeOptions = { uncommitted: true };
175
- // Use working_tree_ref from execution state for rerun diff scoping
176
- const executionState = await readExecutionState(
177
- config.project.log_dir,
178
- );
179
- if (executionState?.working_tree_ref) {
180
- changeOptions.fixBase = executionState.working_tree_ref;
181
- }
182
- } else if (!logsExist) {
183
- // Post-clean run: check if execution state has a working_tree_ref to use as fixBase
184
- const executionState = await readExecutionState(
185
- config.project.log_dir,
186
- );
187
- if (executionState) {
188
- const resolved = await resolveFixBase(
189
- executionState,
190
- effectiveBaseBranch,
191
- );
192
- if (resolved.warning) {
193
- console.log(chalk.yellow(`Warning: ${resolved.warning}`));
194
- }
195
- if (resolved.fixBase) {
196
- changeOptions = { fixBase: resolved.fixBase };
197
- }
198
- }
199
- }
200
-
201
- // Allow explicit commit or uncommitted options to override fixBase
202
- if (options.commit || options.uncommitted) {
203
- changeOptions = {
204
- commit: options.commit,
205
- uncommitted: options.uncommitted,
206
- fixBase: changeOptions?.fixBase,
207
- };
208
- }
209
-
210
- const changeDetector = new ChangeDetector(
211
- effectiveBaseBranch,
212
- changeOptions || {
213
- commit: options.commit,
214
- uncommitted: options.uncommitted,
215
- },
216
- );
217
- const expander = new EntryPointExpander();
218
- const jobGen = new JobGenerator(config);
219
-
220
- console.log(chalk.dim("Detecting changes..."));
221
- const changes = await changeDetector.getChangedFiles();
222
-
223
- if (changes.length === 0) {
224
- console.log(chalk.green("No changes detected."));
225
- await writeExecutionState(config.project.log_dir);
226
- await releaseLock(config.project.log_dir);
227
- restoreConsole?.restore();
228
- process.exit(0);
229
- }
230
-
231
- console.log(chalk.dim(`Found ${changes.length} changed files.`));
232
-
233
- const entryPoints = await expander.expand(
234
- config.project.entry_points,
235
- changes,
236
- );
237
- let jobs = jobGen.generateJobs(entryPoints);
238
-
239
- // Filter to only checks
240
- jobs = jobs.filter((j) => j.type === "check");
241
-
242
- if (options.gate) {
243
- jobs = jobs.filter((j) => j.name === options.gate);
244
- }
245
-
246
- if (jobs.length === 0) {
247
- console.log(chalk.yellow("No applicable checks for these changes."));
248
- await writeExecutionState(config.project.log_dir);
249
- await releaseLock(config.project.log_dir);
250
- restoreConsole?.restore();
251
- process.exit(0);
252
- }
253
-
254
- console.log(chalk.dim(`Running ${jobs.length} check(s)...`));
255
-
256
- // Log run start
257
- const runMode = isRerun ? "verification" : "full";
258
- await debugLogger?.logRunStart(runMode, changes.length, jobs.length);
259
-
260
- const reporter = new ConsoleReporter();
261
- const runner = new Runner(
262
- config,
263
- logger,
264
- reporter,
265
- failuresMap,
266
- changeOptions,
267
- effectiveBaseBranch,
268
- passedSlotsMap,
269
- debugLogger ?? undefined,
270
- isRerun,
271
- );
272
-
273
- const success = await runner.run(jobs);
274
-
275
- // Log run end
276
- await debugLogger?.logRunEnd(
277
- success ? "pass" : "fail",
278
- 0,
279
- 0,
280
- 0,
281
- logger.getRunNumber(),
282
- );
283
-
284
- // Write execution state before releasing lock (for interval checks)
285
- // This now captures working_tree_ref which is used for rerun diff scoping
286
- await writeExecutionState(config.project.log_dir);
287
-
288
- if (success) {
289
- await debugLogger?.logClean("auto", "all_passed");
290
- await cleanLogs(config.project.log_dir);
291
- }
292
- await releaseLock(config.project.log_dir);
293
- restoreConsole?.restore();
294
- process.exit(success ? 0 : 1);
295
- } catch (error: unknown) {
296
- // Write execution state even on error (if lock was acquired)
297
- if (config && lockAcquired) {
298
- try {
299
- await writeExecutionState(config.project.log_dir);
300
- } catch {
301
- // Ignore errors writing state during error handling
302
- }
303
- await releaseLock(config.project.log_dir);
304
- }
305
- const err = error as { message?: string };
306
- console.error(chalk.red("Error:"), err.message);
307
- restoreConsole?.restore();
308
- process.exit(1);
309
- }
310
- });
311
- }
@@ -1,15 +0,0 @@
1
- import type { Command } from "commander";
2
- import { initCI } from "./init.js";
3
- import { listJobs } from "./list-jobs.js";
4
-
5
- export function registerCICommand(program: Command): void {
6
- const ci = program.command("ci").description("Manage CI integration");
7
-
8
- ci.command("init")
9
- .description("Initialize CI workflow and configuration")
10
- .action(initCI);
11
-
12
- ci.command("list-jobs")
13
- .description("List CI jobs (used by workflow)")
14
- .action(listJobs);
15
- }
@@ -1,96 +0,0 @@
1
- import fs from "node:fs/promises";
2
- import path from "node:path";
3
- import chalk from "chalk";
4
- import YAML from "yaml";
5
- import { loadCIConfig } from "../../config/ci-loader.js";
6
- import type { CIConfig } from "../../config/types.js";
7
- import workflowTemplate from "../../templates/workflow.yml" with {
8
- type: "text",
9
- };
10
-
11
- export async function initCI(): Promise<void> {
12
- const workflowDir = path.join(process.cwd(), ".github", "workflows");
13
- const workflowPath = path.join(workflowDir, "gauntlet.yml");
14
- const gauntletDir = path.join(process.cwd(), ".gauntlet");
15
- const ciConfigPath = path.join(gauntletDir, "ci.yml");
16
-
17
- // 1. Ensure .gauntlet/ci.yml exists
18
- if (!(await fileExists(ciConfigPath))) {
19
- console.log(chalk.yellow("Creating starter .gauntlet/ci.yml..."));
20
- await fs.mkdir(gauntletDir, { recursive: true });
21
- const starterContent = `# CI Configuration for Agent Gauntlet
22
- # Define runtimes, services, and which checks to run in CI.
23
-
24
- runtimes:
25
- # ruby:
26
- # version: "3.3"
27
- # bundler_cache: true
28
-
29
- services:
30
- # postgres:
31
- # image: postgres:16
32
- # ports: ["5432:5432"]
33
-
34
- setup:
35
- # - name: Global Setup
36
- # run: echo "Setting up..."
37
-
38
- checks:
39
- # - name: linter
40
- # requires_runtimes: [ruby]
41
- `;
42
- await fs.writeFile(ciConfigPath, starterContent);
43
- } else {
44
- console.log(chalk.dim("Found existing .gauntlet/ci.yml"));
45
- }
46
-
47
- // 2. Load CI config to get services
48
- let ciConfig: CIConfig | undefined;
49
- try {
50
- ciConfig = await loadCIConfig();
51
- } catch (_e) {
52
- console.warn(
53
- chalk.yellow(
54
- "Could not load CI config to inject services. Workflow will have no services defined.",
55
- ),
56
- );
57
- }
58
-
59
- // 3. Generate workflow file
60
- console.log(chalk.dim(`Generating ${workflowPath}...`));
61
- await fs.mkdir(workflowDir, { recursive: true });
62
-
63
- let templateContent = workflowTemplate;
64
-
65
- // Inject services
66
- if (ciConfig?.services && Object.keys(ciConfig.services).length > 0) {
67
- const servicesYaml = YAML.stringify({ services: ciConfig.services });
68
- // Indent services
69
- const indentedServices = servicesYaml
70
- .split("\n")
71
- .map((line) => (line.trim() ? ` ${line}` : line))
72
- .join("\n");
73
-
74
- templateContent = templateContent.replace(
75
- " # Services will be injected here by agent-gauntlet",
76
- indentedServices,
77
- );
78
- } else {
79
- templateContent = templateContent.replace(
80
- " # Services will be injected here by agent-gauntlet\n",
81
- "",
82
- );
83
- }
84
-
85
- await fs.writeFile(workflowPath, templateContent);
86
- console.log(chalk.green("Successfully generated GitHub Actions workflow!"));
87
- }
88
-
89
- async function fileExists(path: string): Promise<boolean> {
90
- try {
91
- const stat = await fs.stat(path);
92
- return stat.isFile();
93
- } catch {
94
- return false;
95
- }
96
- }
@@ -1,90 +0,0 @@
1
- import { loadCIConfig } from "../../config/ci-loader.js";
2
- import { loadConfig } from "../../config/loader.js";
3
- import type { CISetupStep } from "../../config/types.js";
4
- import { EntryPointExpander } from "../../core/entry-point.js";
5
-
6
- export async function listJobs(): Promise<void> {
7
- try {
8
- const config = await loadConfig();
9
- const ciConfig = await loadCIConfig();
10
- const expander = new EntryPointExpander();
11
- const expandedEntryPoints = await expander.expandAll(
12
- config.project.entry_points,
13
- );
14
-
15
- const matrixJobs = [];
16
- const seenJobs = new Set<string>();
17
-
18
- const globalSetup = formatSetup(ciConfig.setup || undefined);
19
-
20
- if (ciConfig.checks) {
21
- for (const ep of expandedEntryPoints) {
22
- // Get checks enabled for this entry point
23
- const allowedChecks = new Set(ep.config.checks || []);
24
-
25
- for (const check of ciConfig.checks) {
26
- if (allowedChecks.has(check.name)) {
27
- // Check definition from .gauntlet/checks/*.yml
28
- const checkDef = config.checks[check.name];
29
- if (!checkDef) {
30
- console.warn(
31
- `Warning: Check '${check.name}' found in CI config but not defined in checks/*.yml`,
32
- );
33
- continue;
34
- }
35
-
36
- const workingDirectory = checkDef.working_directory || ep.path;
37
- // Dedupe by check name + working directory only - if two entry points
38
- // both trigger e.g. "test" with working_directory: ".", run it once
39
- const jobKey = `${check.name}:${workingDirectory}`;
40
-
41
- // Skip if we've already created a job for this exact entry point/check combination
42
- if (seenJobs.has(jobKey)) {
43
- continue;
44
- }
45
- seenJobs.add(jobKey);
46
-
47
- const id = `${check.name}-${ep.path.replace(/\//g, "-")}`;
48
-
49
- matrixJobs.push({
50
- id,
51
- name: check.name,
52
- entry_point: ep.path,
53
- working_directory: workingDirectory,
54
- command: checkDef.command,
55
- runtimes: check.requires_runtimes || [],
56
- services: check.requires_services || [],
57
- setup: formatSetup(check.setup || undefined),
58
- global_setup: globalSetup,
59
- });
60
- }
61
- }
62
- }
63
- }
64
-
65
- const output = {
66
- matrix: matrixJobs,
67
- services: ciConfig.services || {},
68
- runtimes: ciConfig.runtimes || {},
69
- };
70
-
71
- console.log(JSON.stringify(output));
72
- } catch (e) {
73
- console.error("Error generating CI jobs:", e);
74
- process.exit(1);
75
- }
76
- }
77
-
78
- const formatSetup = (steps: CISetupStep[] | null | undefined): string => {
79
- if (!steps || steps.length === 0) return "";
80
- return steps
81
- .map((s) => {
82
- const cmd = s.working_directory
83
- ? `(cd "${s.working_directory}" && ${s.run})`
84
- : s.run;
85
- return `echo "::group::${s.name}"
86
- ${cmd}
87
- echo "::endgroup::"`;
88
- })
89
- .join("\n");
90
- };
@@ -1,54 +0,0 @@
1
- import chalk from "chalk";
2
- import type { Command } from "commander";
3
- import { loadGlobalConfig } from "../config/global.js";
4
- import { loadConfig } from "../config/loader.js";
5
- import {
6
- getDebugLogger,
7
- initDebugLogger,
8
- mergeDebugLogConfig,
9
- } from "../utils/debug-log.js";
10
- import { acquireLock, cleanLogs, releaseLock } from "./shared.js";
11
-
12
- export function registerCleanCommand(program: Command): void {
13
- program
14
- .command("clean")
15
- .description("Archive logs")
16
- .action(async () => {
17
- let config: Awaited<ReturnType<typeof loadConfig>> | undefined;
18
- let lockAcquired = false;
19
- try {
20
- config = await loadConfig();
21
-
22
- // Initialize debug logger
23
- const globalConfig = await loadGlobalConfig();
24
- const debugLogConfig = mergeDebugLogConfig(
25
- config.project.debug_log,
26
- globalConfig.debug_log,
27
- );
28
- initDebugLogger(config.project.log_dir, debugLogConfig);
29
-
30
- // Acquire lock BEFORE logging - prevents clean from running during active gauntlet run
31
- await acquireLock(config.project.log_dir);
32
- lockAcquired = true;
33
-
34
- // Log the command invocation (only after lock acquired)
35
- const debugLogger = getDebugLogger();
36
- await debugLogger?.logCommand("clean", []);
37
- await debugLogger?.logClean("manual", "user_request");
38
-
39
- await cleanLogs(
40
- config.project.log_dir,
41
- config.project.max_previous_logs,
42
- );
43
- await releaseLock(config.project.log_dir);
44
- console.log(chalk.green("Logs archived successfully."));
45
- } catch (error: unknown) {
46
- if (config && lockAcquired) {
47
- await releaseLock(config.project.log_dir);
48
- }
49
- const err = error as { message?: string };
50
- console.error(chalk.red("Error:"), err.message);
51
- process.exit(1);
52
- }
53
- });
54
- }