agent-gauntlet 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +25 -23
  2. package/dist/index.js +9226 -0
  3. package/dist/index.js.map +65 -0
  4. package/dist/scripts/status.js +280 -0
  5. package/dist/scripts/status.js.map +10 -0
  6. package/package.json +22 -8
  7. package/src/built-in-reviews/code-quality.md +0 -25
  8. package/src/built-in-reviews/index.ts +0 -28
  9. package/src/bun-plugins.d.ts +0 -4
  10. package/src/cli-adapters/claude.ts +0 -327
  11. package/src/cli-adapters/codex.ts +0 -290
  12. package/src/cli-adapters/cursor.ts +0 -128
  13. package/src/cli-adapters/gemini.ts +0 -510
  14. package/src/cli-adapters/github-copilot.ts +0 -141
  15. package/src/cli-adapters/index.ts +0 -250
  16. package/src/cli-adapters/thinking-budget.ts +0 -23
  17. package/src/commands/check.ts +0 -311
  18. package/src/commands/ci/index.ts +0 -15
  19. package/src/commands/ci/init.ts +0 -96
  20. package/src/commands/ci/list-jobs.ts +0 -90
  21. package/src/commands/clean.ts +0 -54
  22. package/src/commands/detect.ts +0 -173
  23. package/src/commands/health.ts +0 -169
  24. package/src/commands/help.ts +0 -34
  25. package/src/commands/index.ts +0 -13
  26. package/src/commands/init.ts +0 -1878
  27. package/src/commands/list.ts +0 -33
  28. package/src/commands/review.ts +0 -311
  29. package/src/commands/run.ts +0 -29
  30. package/src/commands/shared.ts +0 -267
  31. package/src/commands/stop-hook.ts +0 -567
  32. package/src/commands/validate.ts +0 -20
  33. package/src/commands/wait-ci.ts +0 -518
  34. package/src/config/ci-loader.ts +0 -33
  35. package/src/config/ci-schema.ts +0 -28
  36. package/src/config/global.ts +0 -87
  37. package/src/config/loader.ts +0 -301
  38. package/src/config/schema.ts +0 -165
  39. package/src/config/stop-hook-config.ts +0 -130
  40. package/src/config/types.ts +0 -65
  41. package/src/config/validator.ts +0 -592
  42. package/src/core/change-detector.ts +0 -137
  43. package/src/core/diff-stats.ts +0 -442
  44. package/src/core/entry-point.ts +0 -190
  45. package/src/core/job.ts +0 -96
  46. package/src/core/run-executor.ts +0 -621
  47. package/src/core/runner.ts +0 -290
  48. package/src/gates/check.ts +0 -118
  49. package/src/gates/resolve-check-command.ts +0 -21
  50. package/src/gates/result.ts +0 -54
  51. package/src/gates/review.ts +0 -1333
  52. package/src/hooks/adapters/claude-stop-hook.ts +0 -99
  53. package/src/hooks/adapters/cursor-stop-hook.ts +0 -122
  54. package/src/hooks/adapters/types.ts +0 -94
  55. package/src/hooks/stop-hook-handler.ts +0 -748
  56. package/src/index.ts +0 -47
  57. package/src/output/app-logger.ts +0 -214
  58. package/src/output/console-log.ts +0 -168
  59. package/src/output/console.ts +0 -359
  60. package/src/output/logger.ts +0 -126
  61. package/src/output/sinks/console-sink.ts +0 -59
  62. package/src/output/sinks/file-sink.ts +0 -110
  63. package/src/scripts/status.ts +0 -433
  64. package/src/templates/workflow.yml +0 -79
  65. package/src/types/gauntlet-status.ts +0 -79
  66. package/src/utils/debug-log.ts +0 -392
  67. package/src/utils/diff-parser.ts +0 -103
  68. package/src/utils/execution-state.ts +0 -472
  69. package/src/utils/log-parser.ts +0 -696
  70. package/src/utils/sanitizer.ts +0 -3
  71. package/src/utils/session-ref.ts +0 -91
@@ -1,290 +0,0 @@
1
- import type {
2
- LoadedCheckGateConfig,
3
- LoadedConfig,
4
- LoadedReviewGateConfig,
5
- } from "../config/types.js";
6
- import { CheckGateExecutor } from "../gates/check.js";
7
- import type { GateResult } from "../gates/result.js";
8
- import { ReviewGateExecutor } from "../gates/review.js";
9
- import type { ConsoleReporter } from "../output/console.js";
10
- import type { Logger } from "../output/logger.js";
11
- import type { DebugLogger } from "../utils/debug-log.js";
12
- import type { PreviousViolation } from "../utils/log-parser.js";
13
- import { sanitizeJobId } from "../utils/sanitizer.js";
14
- import type { Job } from "./job.js";
15
-
16
- /**
17
- * Iteration statistics for RUN_END logging.
18
- */
19
- export interface IterationStats {
20
- /** Number of violations marked as fixed */
21
- fixed: number;
22
- /** Number of violations marked as skipped */
23
- skipped: number;
24
- /** Number of remaining active violations (failures) */
25
- failed: number;
26
- }
27
-
28
- /**
29
- * Structured result from Runner.run() for proper status mapping.
30
- */
31
- export interface RunnerOutcome {
32
- /** Whether all gates passed */
33
- allPassed: boolean;
34
- /** Whether any violations were skipped (for passed_with_warnings) */
35
- anySkipped: boolean;
36
- /** Whether retry limit was exceeded */
37
- retryLimitExceeded: boolean;
38
- /** Whether any gates had errors */
39
- anyErrors: boolean;
40
- /** Iteration statistics for debug logging */
41
- stats: IterationStats;
42
- /** Individual gate results */
43
- gateResults: GateResult[];
44
- }
45
-
46
- /**
47
- * Calculate iteration statistics from gate results.
48
- * Aggregates fixed, skipped, and failed counts from all results and subResults.
49
- * For CHECK gates that don't set errorCount, count failed/error status as 1 failure.
50
- */
51
- function calculateStats(results: GateResult[]): IterationStats {
52
- let fixed = 0;
53
- let skipped = 0;
54
- let failed = 0;
55
-
56
- for (const result of results) {
57
- // Count from top-level result
58
- if (result.fixedCount) fixed += result.fixedCount;
59
- if (result.skipped) skipped += result.skipped.length;
60
-
61
- // For failed gates, use errorCount if set, otherwise count as 1 failure
62
- // This handles CHECK gates which only set status but not errorCount
63
- if (result.errorCount) {
64
- failed += result.errorCount;
65
- } else if (result.status === "fail" || result.status === "error") {
66
- failed += 1;
67
- }
68
-
69
- // Count from subResults (review gates)
70
- if (result.subResults) {
71
- for (const sub of result.subResults) {
72
- if (sub.fixedCount) fixed += sub.fixedCount;
73
- if (sub.skipped) skipped += sub.skipped.length;
74
-
75
- if (sub.errorCount) {
76
- failed += sub.errorCount;
77
- } else if (sub.status === "fail" || sub.status === "error") {
78
- failed += 1;
79
- }
80
- }
81
- }
82
- }
83
-
84
- return { fixed, skipped, failed };
85
- }
86
-
87
- export class Runner {
88
- private checkExecutor = new CheckGateExecutor();
89
- private reviewExecutor = new ReviewGateExecutor();
90
- private results: GateResult[] = [];
91
- private shouldStop = false;
92
-
93
- constructor(
94
- private config: LoadedConfig,
95
- private logger: Logger,
96
- private reporter: ConsoleReporter,
97
- private previousFailuresMap?: Map<string, Map<string, PreviousViolation[]>>,
98
- private changeOptions?: { commit?: string; uncommitted?: boolean },
99
- private baseBranchOverride?: string,
100
- private passedSlotsMap?: Map<
101
- string,
102
- Map<number, { adapter: string; passIteration: number }>
103
- >,
104
- private debugLogger?: DebugLogger,
105
- private isRerun?: boolean,
106
- ) {}
107
-
108
- async run(jobs: Job[]): Promise<RunnerOutcome> {
109
- // Note: logger.init() is called by the caller (run-executor, check, review)
110
- // before startConsoleLog to ensure unified numbering
111
-
112
- // Enforce retry limit before executing gates
113
- const maxRetries = this.config.project.max_retries ?? 3;
114
- const currentRunNumber = this.logger.getRunNumber();
115
- const maxAllowedRuns = maxRetries + 1;
116
-
117
- if (currentRunNumber > maxAllowedRuns) {
118
- console.error(
119
- `Retry limit exceeded: run ${currentRunNumber} exceeds max allowed ${maxAllowedRuns} (max_retries: ${maxRetries}). Human input required on what to do next.`,
120
- );
121
- process.exitCode = 1;
122
- return {
123
- allPassed: false,
124
- anySkipped: false,
125
- retryLimitExceeded: true,
126
- anyErrors: false,
127
- stats: { fixed: 0, skipped: 0, failed: 0 },
128
- gateResults: [],
129
- };
130
- }
131
-
132
- const parallelEnabled = this.config.project.allow_parallel;
133
- const parallelJobs = parallelEnabled
134
- ? jobs.filter((j) => j.gateConfig.parallel)
135
- : [];
136
- const sequentialJobs = parallelEnabled
137
- ? jobs.filter((j) => !j.gateConfig.parallel)
138
- : jobs;
139
-
140
- // Start parallel jobs
141
- const parallelPromises = parallelJobs.map((job) => this.executeJob(job));
142
-
143
- // Start sequential jobs
144
- const sequentialPromise = (async () => {
145
- for (const job of sequentialJobs) {
146
- if (this.shouldStop) break;
147
- await this.executeJob(job);
148
- }
149
- })();
150
-
151
- await Promise.all([...parallelPromises, sequentialPromise]);
152
-
153
- const allPassed = this.results.every((r) => r.status === "pass");
154
- const anySkipped = this.results.some(
155
- (r) => r.skipped && r.skipped.length > 0,
156
- );
157
- const anyErrors = this.results.some((r) => r.status === "error");
158
- const retryLimitExceeded =
159
- !allPassed && currentRunNumber === maxAllowedRuns;
160
-
161
- // Calculate statistics from results
162
- const stats = calculateStats(this.results);
163
-
164
- // If on the final allowed run and gates failed, report "Retry limit exceeded"
165
- if (retryLimitExceeded) {
166
- await this.reporter.printSummary(
167
- this.results,
168
- this.config.project.log_dir,
169
- "Retry limit exceeded",
170
- );
171
- return {
172
- allPassed: false,
173
- anySkipped,
174
- retryLimitExceeded: true,
175
- anyErrors,
176
- stats,
177
- gateResults: this.results,
178
- };
179
- }
180
-
181
- await this.reporter.printSummary(this.results, this.config.project.log_dir);
182
-
183
- return {
184
- allPassed,
185
- anySkipped,
186
- retryLimitExceeded: false,
187
- anyErrors,
188
- stats,
189
- gateResults: this.results,
190
- };
191
- }
192
-
193
- private async executeJob(job: Job): Promise<void> {
194
- if (this.shouldStop) return;
195
-
196
- this.reporter.onJobStart(job);
197
-
198
- let result: GateResult;
199
-
200
- const effectiveBaseBranch =
201
- this.baseBranchOverride || this.config.project.base_branch;
202
-
203
- try {
204
- if (job.type === "check") {
205
- const logPath = await this.logger.getLogPath(job.id);
206
- const jobLogger = await this.logger.createJobLogger(job.id);
207
- result = await this.checkExecutor.execute(
208
- job.id,
209
- job.gateConfig as LoadedCheckGateConfig,
210
- job.workingDirectory,
211
- jobLogger,
212
- { baseBranch: effectiveBaseBranch, isRerun: this.isRerun },
213
- );
214
- result.logPath = logPath;
215
- } else {
216
- // Use sanitized Job ID for lookup because that's what log-parser uses (based on filenames)
217
- const safeJobId = sanitizeJobId(job.id);
218
- result = await this.reviewExecutor.execute(
219
- job.id,
220
- job.gateConfig as LoadedReviewGateConfig,
221
- job.entryPoint,
222
- this.logger.createLoggerFactory(job.id),
223
- effectiveBaseBranch,
224
- this.previousFailuresMap?.get(safeJobId),
225
- this.changeOptions,
226
- this.config.project.rerun_new_issue_threshold,
227
- this.passedSlotsMap?.get(safeJobId),
228
- this.config.project.log_dir,
229
- this.config.project.cli?.adapters,
230
- );
231
- }
232
- } catch (err) {
233
- console.error("[ERROR] Execution failed for", job.id, ":", err);
234
- result = {
235
- jobId: job.id,
236
- status: "error",
237
- duration: 0,
238
- message: err instanceof Error ? err.message : String(err),
239
- };
240
- }
241
-
242
- this.results.push(result);
243
- this.reporter.onJobComplete(job, result);
244
- await this.logGateResults(job.id, result);
245
-
246
- this.checkFailFast(job, result);
247
- }
248
-
249
- private checkFailFast(job: Job, result: GateResult): void {
250
- if (result.status === "pass") return;
251
- if (job.type !== "check") return;
252
-
253
- // We know it's a check gate, so cast to check config to access fail_fast safely
254
- const config = job.gateConfig as LoadedCheckGateConfig;
255
- if (config.fail_fast) {
256
- this.shouldStop = true;
257
- }
258
- }
259
-
260
- /**
261
- * Log gate results to the debug log.
262
- * For review gates with subResults, logs one entry per reviewer.
263
- * For check gates, logs a single entry.
264
- */
265
- private async logGateResults(
266
- jobId: string,
267
- result: GateResult,
268
- ): Promise<void> {
269
- if (!this.debugLogger) return;
270
-
271
- if (result.subResults && result.subResults.length > 0) {
272
- for (const sub of result.subResults) {
273
- const cli = sub.nameSuffix.match(/\((.+?)@\d+\)/)?.[1];
274
- await this.debugLogger.logGateResult(
275
- jobId,
276
- sub.status,
277
- sub.duration ?? result.duration,
278
- { violations: sub.errorCount, cli },
279
- );
280
- }
281
- } else {
282
- await this.debugLogger.logGateResult(
283
- jobId,
284
- result.status,
285
- result.duration,
286
- { violations: result.errorCount },
287
- );
288
- }
289
- }
290
- }
@@ -1,118 +0,0 @@
1
- import { exec } from "node:child_process";
2
- import { promisify } from "node:util";
3
- import type { LoadedCheckGateConfig } from "../config/types.js";
4
- import { resolveCheckCommand } from "./resolve-check-command.js";
5
- import type { GateResult } from "./result.js";
6
-
7
- const execAsync = promisify(exec);
8
- const MAX_BUFFER_BYTES = 10 * 1024 * 1024;
9
-
10
- export class CheckGateExecutor {
11
- async execute(
12
- jobId: string,
13
- config: LoadedCheckGateConfig,
14
- workingDirectory: string,
15
- logger: (output: string) => Promise<void>,
16
- options?: { baseBranch?: string; isRerun?: boolean },
17
- ): Promise<GateResult> {
18
- const startTime = Date.now();
19
-
20
- const command = resolveCheckCommand(config, options);
21
-
22
- try {
23
- await logger(
24
- `[${new Date().toISOString()}] Starting check: ${config.name}\n`,
25
- );
26
- await logger(`Executing command: ${command}\n`);
27
- await logger(`Working directory: ${workingDirectory}\n\n`);
28
-
29
- const { stdout, stderr } = await execAsync(command, {
30
- cwd: workingDirectory,
31
- timeout: config.timeout ? config.timeout * 1000 : undefined,
32
- maxBuffer: MAX_BUFFER_BYTES,
33
- });
34
-
35
- if (stdout) await logger(stdout);
36
- if (stderr) await logger(`\nSTDERR:\n${stderr}`);
37
-
38
- const result: GateResult = {
39
- jobId,
40
- status: "pass",
41
- duration: Date.now() - startTime,
42
- message: "Command exited with code 0",
43
- };
44
-
45
- await logger(`Result: ${result.status} - ${result.message}\n`);
46
- return result;
47
- } catch (error: unknown) {
48
- const err = error as {
49
- stdout?: string;
50
- stderr?: string;
51
- message?: string;
52
- signal?: string;
53
- code?: number;
54
- };
55
- if (err.stdout) await logger(err.stdout);
56
- if (err.stderr) await logger(`\nSTDERR:\n${err.stderr}`);
57
-
58
- await logger(`\nCommand failed: ${err.message}`);
59
-
60
- // If it's a timeout
61
- if (err.signal === "SIGTERM" && config.timeout) {
62
- const result: GateResult = {
63
- jobId,
64
- status: "fail",
65
- duration: Date.now() - startTime,
66
- message: `Timed out after ${config.timeout}s`,
67
- fixInstructions: config.fixInstructionsContent,
68
- fixWithSkill: config.fixWithSkill,
69
- };
70
- await logger(`Result: ${result.status} - ${result.message}\n`);
71
- await this.logFixInfo(config, logger);
72
- return result;
73
- }
74
-
75
- // If it's a non-zero exit code
76
- if (typeof err.code === "number") {
77
- const result: GateResult = {
78
- jobId,
79
- status: "fail",
80
- duration: Date.now() - startTime,
81
- message: `Exited with code ${err.code}`,
82
- fixInstructions: config.fixInstructionsContent,
83
- fixWithSkill: config.fixWithSkill,
84
- };
85
- await logger(`Result: ${result.status} - ${result.message}\n`);
86
- await this.logFixInfo(config, logger);
87
- return result;
88
- }
89
-
90
- // Other errors
91
- const result: GateResult = {
92
- jobId,
93
- status: "error",
94
- duration: Date.now() - startTime,
95
- message: err.message || "Unknown error",
96
- fixInstructions: config.fixInstructionsContent,
97
- fixWithSkill: config.fixWithSkill,
98
- };
99
- await logger(`Result: ${result.status} - ${result.message}\n`);
100
- await this.logFixInfo(config, logger);
101
- return result;
102
- }
103
- }
104
-
105
- private async logFixInfo(
106
- config: LoadedCheckGateConfig,
107
- logger: (output: string) => Promise<void>,
108
- ): Promise<void> {
109
- if (config.fixInstructionsContent) {
110
- await logger(
111
- `\n--- Fix Instructions ---\n${config.fixInstructionsContent}\n`,
112
- );
113
- }
114
- if (config.fixWithSkill) {
115
- await logger(`\n--- Fix Skill: ${config.fixWithSkill} ---\n`);
116
- }
117
- }
118
- }
@@ -1,21 +0,0 @@
1
- import type { LoadedCheckGateConfig } from "../config/types.js";
2
-
3
- /**
4
- * Resolves which command to execute for a check gate, selecting rerun_command
5
- * when in rerun mode and performing variable substitution.
6
- */
7
- export function resolveCheckCommand(
8
- config: Pick<LoadedCheckGateConfig, "command" | "rerun_command">,
9
- options?: { baseBranch?: string; isRerun?: boolean },
10
- ): string {
11
- const rawCommand =
12
- options?.isRerun && config.rerun_command
13
- ? config.rerun_command
14
- : config.command;
15
- let result = rawCommand;
16
- const baseBranch = options?.baseBranch;
17
- if (baseBranch) {
18
- result = result.replace(/\$\{BASE_BRANCH\}/g, () => baseBranch);
19
- }
20
- return result;
21
- }
@@ -1,54 +0,0 @@
1
- export type GateStatus = "pass" | "fail" | "error";
2
-
3
- export interface PreviousViolation {
4
- file: string;
5
- line: number | string;
6
- issue: string;
7
- fix?: string;
8
- priority?: "critical" | "high" | "medium" | "low";
9
- status?: "new" | "fixed" | "skipped";
10
- result?: string | null;
11
- }
12
-
13
- export interface ReviewFullJsonOutput {
14
- adapter: string;
15
- timestamp: string;
16
- status: "pass" | "fail" | "error" | "skipped_prior_pass";
17
- rawOutput: string;
18
- violations: PreviousViolation[];
19
- passIteration?: number; // Only present when status is "skipped_prior_pass"
20
- }
21
-
22
- export interface GateResult {
23
- jobId: string;
24
- status: GateStatus;
25
- duration: number; // ms
26
- message?: string; // summary message
27
- logPath?: string; // path to full log
28
- logPaths?: string[]; // paths to multiple logs (e.g. per-agent logs)
29
- fixInstructions?: string; // Markdown content for fixing failures
30
- fixWithSkill?: string; // CLI skill name for fixing failures
31
- errorCount?: number; // Number of active failures/violations
32
- fixedCount?: number; // Number of violations marked as fixed
33
- skipped?: Array<{
34
- file: string;
35
- line: number | string;
36
- issue: string;
37
- result?: string | null;
38
- }>;
39
- subResults?: Array<{
40
- nameSuffix: string;
41
- status: GateStatus;
42
- duration?: number; // per-reviewer timing in ms
43
- message: string;
44
- logPath?: string;
45
- errorCount?: number;
46
- fixedCount?: number;
47
- skipped?: Array<{
48
- file: string;
49
- line: number | string;
50
- issue: string;
51
- result?: string | null;
52
- }>;
53
- }>;
54
- }