agent-gauntlet 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +106 -0
  3. package/package.json +51 -0
  4. package/src/cli-adapters/claude.ts +114 -0
  5. package/src/cli-adapters/codex.ts +123 -0
  6. package/src/cli-adapters/gemini.ts +149 -0
  7. package/src/cli-adapters/index.ts +79 -0
  8. package/src/commands/check.test.ts +25 -0
  9. package/src/commands/check.ts +67 -0
  10. package/src/commands/detect.test.ts +37 -0
  11. package/src/commands/detect.ts +69 -0
  12. package/src/commands/health.test.ts +79 -0
  13. package/src/commands/health.ts +148 -0
  14. package/src/commands/help.test.ts +44 -0
  15. package/src/commands/help.ts +24 -0
  16. package/src/commands/index.ts +9 -0
  17. package/src/commands/init.test.ts +105 -0
  18. package/src/commands/init.ts +330 -0
  19. package/src/commands/list.test.ts +104 -0
  20. package/src/commands/list.ts +29 -0
  21. package/src/commands/rerun.ts +118 -0
  22. package/src/commands/review.test.ts +25 -0
  23. package/src/commands/review.ts +67 -0
  24. package/src/commands/run.test.ts +25 -0
  25. package/src/commands/run.ts +64 -0
  26. package/src/commands/shared.ts +10 -0
  27. package/src/config/loader.test.ts +129 -0
  28. package/src/config/loader.ts +130 -0
  29. package/src/config/schema.ts +63 -0
  30. package/src/config/types.ts +23 -0
  31. package/src/config/validator.ts +493 -0
  32. package/src/core/change-detector.ts +112 -0
  33. package/src/core/entry-point.test.ts +63 -0
  34. package/src/core/entry-point.ts +80 -0
  35. package/src/core/job.ts +74 -0
  36. package/src/core/runner.ts +226 -0
  37. package/src/gates/check.ts +82 -0
  38. package/src/gates/result.ts +9 -0
  39. package/src/gates/review.ts +501 -0
  40. package/src/index.ts +38 -0
  41. package/src/output/console.ts +201 -0
  42. package/src/output/logger.ts +66 -0
  43. package/src/utils/log-parser.ts +228 -0
  44. package/src/utils/sanitizer.ts +3 -0
@@ -0,0 +1,80 @@
1
+ import path from 'node:path';
2
+ import { EntryPointConfig } from '../config/types.js';
3
+
4
+ export interface ExpandedEntryPoint {
5
+ path: string; // The specific directory (e.g., "engines/billing")
6
+ config: EntryPointConfig; // The config that generated this (e.g., "engines/*")
7
+ }
8
+
9
+ export class EntryPointExpander {
10
+ constructor(private rootDir: string = process.cwd()) {}
11
+
12
+ async expand(entryPoints: EntryPointConfig[], changedFiles: string[]): Promise<ExpandedEntryPoint[]> {
13
+ const results: ExpandedEntryPoint[] = [];
14
+ const rootEntryPoint = entryPoints.find(ep => ep.path === '.');
15
+
16
+ // Always include root entry point if configured and there are ANY changes
17
+ // Or should it only run if files match root patterns?
18
+ // Spec says: "A root entry point always exists and applies to repository-wide gates."
19
+ // Usually root gates run on any change or specific files in root.
20
+ // For simplicity, if root is configured, we'll include it if there are any changed files.
21
+ if (changedFiles.length > 0) {
22
+ const rootConfig = rootEntryPoint ?? { path: '.' };
23
+ results.push({ path: '.', config: rootConfig });
24
+ }
25
+
26
+ for (const ep of entryPoints) {
27
+ if (ep.path === '.') continue; // Handled above
28
+
29
+ if (ep.path.endsWith('*')) {
30
+ // Wildcard directory (e.g., "engines/*")
31
+ const parentDir = ep.path.slice(0, -2); // "engines"
32
+ const expandedPaths = await this.expandWildcard(parentDir, changedFiles);
33
+
34
+ for (const subDir of expandedPaths) {
35
+ results.push({
36
+ path: subDir,
37
+ config: ep
38
+ });
39
+ }
40
+ } else {
41
+ // Fixed directory (e.g., "apps/api")
42
+ if (this.hasChangesInDir(ep.path, changedFiles)) {
43
+ results.push({
44
+ path: ep.path,
45
+ config: ep
46
+ });
47
+ }
48
+ }
49
+ }
50
+
51
+ return results;
52
+ }
53
+
54
+ private async expandWildcard(parentDir: string, changedFiles: string[]): Promise<string[]> {
55
+ const affectedSubDirs = new Set<string>();
56
+
57
+ // Filter changes that are inside this parent directory
58
+ const relevantChanges = changedFiles.filter(f => f.startsWith(parentDir + '/'));
59
+
60
+ for (const file of relevantChanges) {
61
+ // file: "engines/billing/src/foo.ts", parentDir: "engines"
62
+ // relPath: "billing/src/foo.ts"
63
+ const relPath = file.slice(parentDir.length + 1);
64
+ const subDirName = relPath.split('/')[0];
65
+
66
+ if (subDirName) {
67
+ affectedSubDirs.add(path.join(parentDir, subDirName));
68
+ }
69
+ }
70
+
71
+ return Array.from(affectedSubDirs);
72
+ }
73
+
74
+ private hasChangesInDir(dirPath: string, changedFiles: string[]): boolean {
75
+ // Check if any changed file starts with the dirPath
76
+ // Need to ensure exact match or subdirectory (e.g. "app" should not match "apple")
77
+ const dirPrefix = dirPath.endsWith('/') ? dirPath : dirPath + '/';
78
+ return changedFiles.some(f => f === dirPath || f.startsWith(dirPrefix));
79
+ }
80
+ }
@@ -0,0 +1,74 @@
1
+ import { ExpandedEntryPoint } from './entry-point.js';
2
+ import { LoadedConfig, CheckGateConfig, ReviewGateConfig, ReviewPromptFrontmatter } from '../config/types.js';
3
+
4
+ export type JobType = 'check' | 'review';
5
+
6
+ export interface Job {
7
+ id: string; // unique id for logging/tracking
8
+ type: JobType;
9
+ name: string;
10
+ entryPoint: string;
11
+ gateConfig: CheckGateConfig | (ReviewGateConfig & ReviewPromptFrontmatter);
12
+ workingDirectory: string;
13
+ }
14
+
15
+ export class JobGenerator {
16
+ constructor(private config: LoadedConfig) {}
17
+
18
+ generateJobs(expandedEntryPoints: ExpandedEntryPoint[]): Job[] {
19
+ const jobs: Job[] = [];
20
+ const isCI = process.env.CI === 'true' || process.env.GITHUB_ACTIONS === 'true';
21
+
22
+ for (const ep of expandedEntryPoints) {
23
+ // 1. Process Checks
24
+ if (ep.config.checks) {
25
+ for (const checkName of ep.config.checks) {
26
+ const checkConfig = this.config.checks[checkName];
27
+ if (!checkConfig) {
28
+ console.warn(`Warning: Check gate '${checkName}' configured in entry point '${ep.path}' but not found in checks definitions.`);
29
+ continue;
30
+ }
31
+
32
+ // Filter based on environment
33
+ if (isCI && !checkConfig.run_in_ci) continue;
34
+ if (!isCI && !checkConfig.run_locally) continue;
35
+
36
+ jobs.push({
37
+ id: `check:${ep.path}:${checkName}`,
38
+ type: 'check',
39
+ name: checkName,
40
+ entryPoint: ep.path,
41
+ gateConfig: checkConfig,
42
+ workingDirectory: checkConfig.working_directory || ep.path
43
+ });
44
+ }
45
+ }
46
+
47
+ // 2. Process Reviews
48
+ if (ep.config.reviews) {
49
+ for (const reviewName of ep.config.reviews) {
50
+ const reviewConfig = this.config.reviews[reviewName];
51
+ if (!reviewConfig) {
52
+ console.warn(`Warning: Review gate '${reviewName}' configured in entry point '${ep.path}' but not found in reviews definitions.`);
53
+ continue;
54
+ }
55
+
56
+ // Filter based on environment
57
+ if (isCI && !reviewConfig.run_in_ci) continue;
58
+ if (!isCI && !reviewConfig.run_locally) continue;
59
+
60
+ jobs.push({
61
+ id: `review:${ep.path}:${reviewName}`,
62
+ type: 'review',
63
+ name: reviewName,
64
+ entryPoint: ep.path,
65
+ gateConfig: reviewConfig,
66
+ workingDirectory: ep.path // Reviews always run in context of entry point
67
+ });
68
+ }
69
+ }
70
+ }
71
+
72
+ return jobs;
73
+ }
74
+ }
@@ -0,0 +1,226 @@
1
+ import { exec } from 'node:child_process';
2
+ import { promisify } from 'node:util';
3
+ import fs from 'node:fs/promises';
4
+ import { constants as fsConstants } from 'node:fs';
5
+ import path from 'node:path';
6
+ import { Job } from './job.js';
7
+ import { CheckGateExecutor } from '../gates/check.js';
8
+ import { ReviewGateExecutor } from '../gates/review.js';
9
+ import { Logger } from '../output/logger.js';
10
+ import { ConsoleReporter } from '../output/console.js';
11
+ import { GateResult } from '../gates/result.js';
12
+ import { LoadedConfig, ReviewGateConfig, ReviewPromptFrontmatter } from '../config/types.js';
13
+ import { getAdapter } from '../cli-adapters/index.js';
14
+ import { PreviousViolation } from '../utils/log-parser.js';
15
+ import { sanitizeJobId } from '../utils/sanitizer.js';
16
+
17
+ const execAsync = promisify(exec);
18
+
19
+ export class Runner {
20
+ private checkExecutor = new CheckGateExecutor();
21
+ private reviewExecutor = new ReviewGateExecutor();
22
+ private results: GateResult[] = [];
23
+ private shouldStop = false;
24
+
25
+ constructor(
26
+ private config: LoadedConfig,
27
+ private logger: Logger,
28
+ private reporter: ConsoleReporter,
29
+ private previousFailuresMap?: Map<string, Map<string, PreviousViolation[]>>,
30
+ private changeOptions?: { commit?: string; uncommitted?: boolean }
31
+ ) {}
32
+
33
+ async run(jobs: Job[]): Promise<boolean> {
34
+ await this.logger.init();
35
+
36
+ const { runnableJobs, preflightResults } = await this.preflight(jobs);
37
+ this.results.push(...preflightResults);
38
+
39
+ const parallelEnabled = this.config.project.allow_parallel;
40
+ const parallelJobs = parallelEnabled ? runnableJobs.filter(j => j.gateConfig.parallel) : [];
41
+ const sequentialJobs = parallelEnabled ? runnableJobs.filter(j => !j.gateConfig.parallel) : runnableJobs;
42
+
43
+ // Start parallel jobs
44
+ const parallelPromises = parallelJobs.map(job => this.executeJob(job));
45
+
46
+ // Start sequential jobs
47
+ // We run them one by one, but concurrently with the parallel batch
48
+ const sequentialPromise = (async () => {
49
+ for (const job of sequentialJobs) {
50
+ if (this.shouldStop) break;
51
+ await this.executeJob(job);
52
+ }
53
+ })();
54
+
55
+ await Promise.all([
56
+ ...parallelPromises,
57
+ sequentialPromise
58
+ ]);
59
+
60
+ await this.reporter.printSummary(this.results);
61
+
62
+ return this.results.every(r => r.status === 'pass');
63
+ }
64
+
65
+ private async executeJob(job: Job): Promise<void> {
66
+ if (this.shouldStop) return;
67
+
68
+ this.reporter.onJobStart(job);
69
+ const logPath = this.logger.getLogPath(job.id);
70
+ const jobLogger = await this.logger.createJobLogger(job.id);
71
+
72
+ let result: GateResult;
73
+
74
+ if (job.type === 'check') {
75
+ result = await this.checkExecutor.execute(
76
+ job.id,
77
+ job.gateConfig as any,
78
+ job.workingDirectory,
79
+ jobLogger
80
+ );
81
+ } else {
82
+ // Use sanitized Job ID for lookup because that's what log-parser uses (based on filenames)
83
+ const safeJobId = sanitizeJobId(job.id);
84
+ const previousFailures = this.previousFailuresMap?.get(safeJobId);
85
+ const loggerFactory = this.logger.createLoggerFactory(job.id);
86
+ result = await this.reviewExecutor.execute(
87
+ job.id,
88
+ job.gateConfig as any,
89
+ job.entryPoint,
90
+ loggerFactory,
91
+ this.config.project.base_branch,
92
+ previousFailures,
93
+ this.changeOptions,
94
+ this.config.project.cli.check_usage_limit
95
+ );
96
+ }
97
+
98
+ result.logPath = logPath;
99
+ this.results.push(result);
100
+ this.reporter.onJobComplete(job, result);
101
+
102
+ // Handle Fail Fast (only for checks, and only when parallel is false)
103
+ // fail_fast can only be set on checks when parallel is false (enforced by schema)
104
+ if (result.status !== 'pass' && job.type === 'check' && job.gateConfig.fail_fast) {
105
+ this.shouldStop = true;
106
+ }
107
+ }
108
+
109
+ private async preflight(jobs: Job[]): Promise<{ runnableJobs: Job[]; preflightResults: GateResult[] }> {
110
+ const runnableJobs: Job[] = [];
111
+ const preflightResults: GateResult[] = [];
112
+ const cliCache = new Map<string, boolean>();
113
+
114
+ for (const job of jobs) {
115
+ if (this.shouldStop) break;
116
+ if (job.type === 'check') {
117
+ const commandName = this.getCommandName((job.gateConfig as any).command);
118
+ if (!commandName) {
119
+ preflightResults.push(await this.recordPreflightFailure(job, 'Unable to parse command'));
120
+ if (this.shouldFailFast(job)) this.shouldStop = true;
121
+ continue;
122
+ }
123
+
124
+ const available = await this.commandExists(commandName, job.workingDirectory);
125
+ if (!available) {
126
+ preflightResults.push(await this.recordPreflightFailure(job, `Missing command: ${commandName}`));
127
+ if (this.shouldFailFast(job)) this.shouldStop = true;
128
+ continue;
129
+ }
130
+ } else {
131
+ const reviewConfig = job.gateConfig as ReviewGateConfig & ReviewPromptFrontmatter;
132
+ const required = reviewConfig.num_reviews ?? 1;
133
+ const availableTools: string[] = [];
134
+
135
+ for (const toolName of reviewConfig.cli_preference || []) {
136
+ if (availableTools.length >= required) break;
137
+ const cached = cliCache.get(toolName);
138
+ const isAvailable = cached ?? await this.checkAdapter(toolName);
139
+ cliCache.set(toolName, isAvailable);
140
+ if (isAvailable) availableTools.push(toolName);
141
+ }
142
+
143
+ if (availableTools.length < required) {
144
+ preflightResults.push(
145
+ await this.recordPreflightFailure(
146
+ job,
147
+ `Missing CLI tools: need ${required}, found ${availableTools.length}`
148
+ )
149
+ );
150
+ if (this.shouldFailFast(job)) this.shouldStop = true;
151
+ continue;
152
+ }
153
+ }
154
+
155
+ runnableJobs.push(job);
156
+ }
157
+
158
+ return { runnableJobs, preflightResults };
159
+ }
160
+
161
+ private async recordPreflightFailure(job: Job, message: string): Promise<GateResult> {
162
+ const logPath = this.logger.getLogPath(job.id);
163
+ const jobLogger = await this.logger.createJobLogger(job.id);
164
+ await jobLogger(`[${new Date().toISOString()}] Health check failed\n${message}\n`);
165
+ return {
166
+ jobId: job.id,
167
+ status: 'error',
168
+ duration: 0,
169
+ message,
170
+ logPath
171
+ };
172
+ }
173
+
174
+ private async checkAdapter(name: string): Promise<boolean> {
175
+ const adapter = getAdapter(name);
176
+ if (!adapter) return false;
177
+ const health = await adapter.checkHealth({
178
+ checkUsageLimit: this.config.project.cli.check_usage_limit
179
+ });
180
+ return health.status === 'healthy';
181
+ }
182
+
183
+ private getCommandName(command: string): string | null {
184
+ const tokens = this.tokenize(command);
185
+ for (const token of tokens) {
186
+ if (token === 'env') continue;
187
+ if (this.isEnvAssignment(token)) continue;
188
+ return token;
189
+ }
190
+ return null;
191
+ }
192
+
193
+ private tokenize(command: string): string[] {
194
+ const matches = command.match(/(?:[^\s"']+|"[^"]*"|'[^']*')+/g);
195
+ if (!matches) return [];
196
+ return matches.map(token => token.replace(/^['"]|['"]$/g, ''));
197
+ }
198
+
199
+ private isEnvAssignment(token: string): boolean {
200
+ return /^[A-Za-z_][A-Za-z0-9_]*=/.test(token);
201
+ }
202
+
203
+ private async commandExists(command: string, cwd: string): Promise<boolean> {
204
+ if (command.includes('/') || command.startsWith('.')) {
205
+ const resolved = path.isAbsolute(command) ? command : path.join(cwd, command);
206
+ try {
207
+ await fs.access(resolved, fsConstants.X_OK);
208
+ return true;
209
+ } catch {
210
+ return false;
211
+ }
212
+ }
213
+
214
+ try {
215
+ await execAsync(`command -v ${command}`);
216
+ return true;
217
+ } catch {
218
+ return false;
219
+ }
220
+ }
221
+
222
+ private shouldFailFast(job: Job): boolean {
223
+ // Only checks can have fail_fast, and only when parallel is false
224
+ return Boolean(job.type === 'check' && job.gateConfig.fail_fast);
225
+ }
226
+ }
@@ -0,0 +1,82 @@
1
+ import { exec } from 'node:child_process';
2
+ import { promisify } from 'node:util';
3
+ import { CheckGateConfig } from '../config/types.js';
4
+ import { GateResult } from './result.js';
5
+
6
+ const execAsync = promisify(exec);
7
+ const MAX_BUFFER_BYTES = 10 * 1024 * 1024;
8
+
9
+ export class CheckGateExecutor {
10
+ async execute(
11
+ jobId: string,
12
+ config: CheckGateConfig,
13
+ workingDirectory: string,
14
+ logger: (output: string) => Promise<void>
15
+ ): Promise<GateResult> {
16
+ const startTime = Date.now();
17
+
18
+ try {
19
+ await logger(`[${new Date().toISOString()}] Starting check: ${config.name}\n`);
20
+ await logger(`Executing command: ${config.command}\n`);
21
+ await logger(`Working directory: ${workingDirectory}\n\n`);
22
+
23
+ const { stdout, stderr } = await execAsync(config.command, {
24
+ cwd: workingDirectory,
25
+ timeout: config.timeout ? config.timeout * 1000 : undefined,
26
+ maxBuffer: MAX_BUFFER_BYTES
27
+ });
28
+
29
+ if (stdout) await logger(stdout);
30
+ if (stderr) await logger(`\nSTDERR:\n${stderr}`);
31
+
32
+ const result: GateResult = {
33
+ jobId,
34
+ status: 'pass',
35
+ duration: Date.now() - startTime,
36
+ message: 'Command exited with code 0'
37
+ };
38
+
39
+ await logger(`Result: ${result.status} - ${result.message}\n`);
40
+ return result;
41
+ } catch (error: any) {
42
+ if (error.stdout) await logger(error.stdout);
43
+ if (error.stderr) await logger(`\nSTDERR:\n${error.stderr}`);
44
+
45
+ await logger(`\nCommand failed: ${error.message}`);
46
+
47
+ // If it's a timeout
48
+ if (error.signal === 'SIGTERM' && config.timeout) {
49
+ const result: GateResult = {
50
+ jobId,
51
+ status: 'fail',
52
+ duration: Date.now() - startTime,
53
+ message: `Timed out after ${config.timeout}s`
54
+ };
55
+ await logger(`Result: ${result.status} - ${result.message}\n`);
56
+ return result;
57
+ }
58
+
59
+ // If it's a non-zero exit code
60
+ if (typeof error.code === 'number') {
61
+ const result: GateResult = {
62
+ jobId,
63
+ status: 'fail',
64
+ duration: Date.now() - startTime,
65
+ message: `Exited with code ${error.code}`
66
+ };
67
+ await logger(`Result: ${result.status} - ${result.message}\n`);
68
+ return result;
69
+ }
70
+
71
+ // Other errors
72
+ const result: GateResult = {
73
+ jobId,
74
+ status: 'error',
75
+ duration: Date.now() - startTime,
76
+ message: error.message || 'Unknown error'
77
+ };
78
+ await logger(`Result: ${result.status} - ${result.message}\n`);
79
+ return result;
80
+ }
81
+ }
82
+ }
@@ -0,0 +1,9 @@
1
+ export type GateStatus = 'pass' | 'fail' | 'error';
2
+
3
+ export interface GateResult {
4
+ jobId: string;
5
+ status: GateStatus;
6
+ duration: number; // ms
7
+ message?: string; // summary message
8
+ logPath?: string; // path to full log
9
+ }