agent-gauntlet 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +106 -0
  3. package/package.json +51 -0
  4. package/src/cli-adapters/claude.ts +114 -0
  5. package/src/cli-adapters/codex.ts +123 -0
  6. package/src/cli-adapters/gemini.ts +149 -0
  7. package/src/cli-adapters/index.ts +79 -0
  8. package/src/commands/check.test.ts +25 -0
  9. package/src/commands/check.ts +67 -0
  10. package/src/commands/detect.test.ts +37 -0
  11. package/src/commands/detect.ts +69 -0
  12. package/src/commands/health.test.ts +79 -0
  13. package/src/commands/health.ts +148 -0
  14. package/src/commands/help.test.ts +44 -0
  15. package/src/commands/help.ts +24 -0
  16. package/src/commands/index.ts +9 -0
  17. package/src/commands/init.test.ts +105 -0
  18. package/src/commands/init.ts +330 -0
  19. package/src/commands/list.test.ts +104 -0
  20. package/src/commands/list.ts +29 -0
  21. package/src/commands/rerun.ts +118 -0
  22. package/src/commands/review.test.ts +25 -0
  23. package/src/commands/review.ts +67 -0
  24. package/src/commands/run.test.ts +25 -0
  25. package/src/commands/run.ts +64 -0
  26. package/src/commands/shared.ts +10 -0
  27. package/src/config/loader.test.ts +129 -0
  28. package/src/config/loader.ts +130 -0
  29. package/src/config/schema.ts +63 -0
  30. package/src/config/types.ts +23 -0
  31. package/src/config/validator.ts +493 -0
  32. package/src/core/change-detector.ts +112 -0
  33. package/src/core/entry-point.test.ts +63 -0
  34. package/src/core/entry-point.ts +80 -0
  35. package/src/core/job.ts +74 -0
  36. package/src/core/runner.ts +226 -0
  37. package/src/gates/check.ts +82 -0
  38. package/src/gates/result.ts +9 -0
  39. package/src/gates/review.ts +501 -0
  40. package/src/index.ts +38 -0
  41. package/src/output/console.ts +201 -0
  42. package/src/output/logger.ts +66 -0
  43. package/src/utils/log-parser.ts +228 -0
  44. package/src/utils/sanitizer.ts +3 -0
@@ -0,0 +1,201 @@
1
+ import chalk from 'chalk';
2
+ import fs from 'node:fs/promises';
3
+ import { GateResult } from '../gates/result.js';
4
+ import { Job } from '../core/job.js';
5
+
6
+ export class ConsoleReporter {
7
+ onJobStart(job: Job) {
8
+ console.log(chalk.blue(`[START] ${job.id}`));
9
+ }
10
+
11
+ onJobComplete(job: Job, result: GateResult) {
12
+ const duration = (result.duration / 1000).toFixed(2) + 's';
13
+ const message = result.message ?? '';
14
+
15
+ if (result.status === 'pass') {
16
+ console.log(chalk.green(`[PASS] ${job.id} (${duration})`));
17
+ } else if (result.status === 'fail') {
18
+ console.log(chalk.red(`[FAIL] ${job.id} (${duration}) - ${message}`));
19
+ } else {
20
+ console.log(chalk.magenta(`[ERROR] ${job.id} (${duration}) - ${message}`));
21
+ }
22
+ }
23
+
24
+ async printSummary(results: GateResult[]) {
25
+ console.log('\n' + chalk.bold('--- Gauntlet Summary ---'));
26
+
27
+ const passed = results.filter(r => r.status === 'pass');
28
+ const failed = results.filter(r => r.status === 'fail');
29
+ const errored = results.filter(r => r.status === 'error');
30
+
31
+ console.log(`Total: ${results.length}`);
32
+ console.log(chalk.green(`Passed: ${passed.length}`));
33
+ if (failed.length > 0) console.log(chalk.red(`Failed: ${failed.length}`));
34
+ if (errored.length > 0) console.log(chalk.magenta(`Errored: ${errored.length}`));
35
+
36
+ if (failed.length > 0 || errored.length > 0) {
37
+ console.log('\n' + chalk.bold('=== Failure Details ===\n'));
38
+
39
+ for (const result of [...failed, ...errored]) {
40
+ const details = await this.extractFailureDetails(result);
41
+ this.printFailureDetails(result, details);
42
+ }
43
+ }
44
+ }
45
+
46
+ private async extractFailureDetails(result: GateResult): Promise<string[]> {
47
+ if (!result.logPath) {
48
+ return [result.message ?? 'Unknown error'];
49
+ }
50
+
51
+ try {
52
+ const logContent = await fs.readFile(result.logPath, 'utf-8');
53
+ return this.parseLogContent(logContent, result.jobId);
54
+ } catch (error) {
55
+ return [result.message ?? 'Unknown error', `(Could not read log file: ${result.logPath})`];
56
+ }
57
+ }
58
+
59
+ private parseLogContent(logContent: string, jobId: string): string[] {
60
+ const lines = logContent.split('\n');
61
+ const details: string[] = [];
62
+
63
+ // Check if this is a review log
64
+ if (jobId.startsWith('review:')) {
65
+ // Look for parsed violations section (formatted output)
66
+ const violationsStart = logContent.indexOf('--- Parsed Result ---');
67
+ if (violationsStart !== -1) {
68
+ const violationsSection = logContent.substring(violationsStart);
69
+ const sectionLines = violationsSection.split('\n');
70
+
71
+ for (let i = 0; i < sectionLines.length; i++) {
72
+ const line = sectionLines[i];
73
+ // Match numbered violation lines: "1. file:line - issue" (line can be a number or '?')
74
+ const violationMatch = line.match(/^\d+\.\s+(.+?):(\d+|\?)\s+-\s+(.+)$/);
75
+ if (violationMatch) {
76
+ const file = violationMatch[1];
77
+ const lineNum = violationMatch[2];
78
+ const issue = violationMatch[3];
79
+ details.push(` ${chalk.cyan(file)}:${chalk.yellow(lineNum)} - ${issue}`);
80
+
81
+ // Check next line for "Fix:" suggestion
82
+ if (i + 1 < sectionLines.length) {
83
+ const nextLine = sectionLines[i + 1].trim();
84
+ if (nextLine.startsWith('Fix:')) {
85
+ const fix = nextLine.substring(4).trim();
86
+ details.push(` ${chalk.dim('Fix:')} ${fix}`);
87
+ i++; // Skip the fix line
88
+ }
89
+ }
90
+ }
91
+ }
92
+ }
93
+
94
+ // If no parsed violations, look for JSON violations (handles both minified and pretty-printed)
95
+ if (details.length === 0) {
96
+ // Find the first '{' and last '}' to extract JSON object
97
+ const jsonStart = logContent.indexOf('{');
98
+ const jsonEnd = logContent.lastIndexOf('}');
99
+ if (jsonStart !== -1 && jsonEnd !== -1 && jsonEnd > jsonStart) {
100
+ try {
101
+ const jsonStr = logContent.substring(jsonStart, jsonEnd + 1);
102
+ const json = JSON.parse(jsonStr);
103
+ if (json.status === 'fail' && json.violations && Array.isArray(json.violations)) {
104
+ json.violations.forEach((v: any) => {
105
+ const file = v.file || 'unknown';
106
+ const line = v.line || '?';
107
+ const issue = v.issue || 'Unknown issue';
108
+ details.push(` ${chalk.cyan(file)}:${chalk.yellow(line)} - ${issue}`);
109
+ if (v.fix) {
110
+ details.push(` ${chalk.dim('Fix:')} ${v.fix}`);
111
+ }
112
+ });
113
+ }
114
+ } catch {
115
+ // JSON parse failed, fall through to other parsing
116
+ }
117
+ }
118
+ }
119
+
120
+ // If still no details, look for error messages
121
+ if (details.length === 0) {
122
+ // Try to find the actual error message (first non-empty line after "Error:")
123
+ const errorIndex = logContent.indexOf('Error:');
124
+ if (errorIndex !== -1) {
125
+ const afterError = logContent.substring(errorIndex + 6).trim();
126
+ const firstErrorLine = afterError.split('\n')[0].trim();
127
+ if (firstErrorLine && !firstErrorLine.startsWith('Usage:') && !firstErrorLine.startsWith('Commands:')) {
128
+ details.push(` ${firstErrorLine}`);
129
+ }
130
+ }
131
+
132
+ // Also check for "Result: error" lines
133
+ if (details.length === 0) {
134
+ const resultMatch = logContent.match(/Result:\s*error(?:\s*-\s*(.+?))?(?:\n|$)/);
135
+ if (resultMatch && resultMatch[1]) {
136
+ details.push(` ${resultMatch[1]}`);
137
+ }
138
+ }
139
+ }
140
+ } else {
141
+ // This is a check log
142
+ // Look for STDERR section
143
+ const stderrStart = logContent.indexOf('STDERR:');
144
+ if (stderrStart !== -1) {
145
+ const stderrSection = logContent.substring(stderrStart + 7).trim();
146
+ const stderrLines = stderrSection.split('\n').filter(line => {
147
+ // Skip empty lines and command output markers
148
+ return line.trim() &&
149
+ !line.includes('STDOUT:') &&
150
+ !line.includes('Command failed:') &&
151
+ !line.includes('Result:');
152
+ });
153
+ if (stderrLines.length > 0) {
154
+ details.push(...stderrLines.slice(0, 10).map(line => ` ${line.trim()}`));
155
+ }
156
+ }
157
+
158
+ // If no STDERR, look for error messages
159
+ if (details.length === 0) {
160
+ const errorMatch = logContent.match(/Command failed:\s*(.+?)(?:\n|$)/);
161
+ if (errorMatch) {
162
+ details.push(` ${errorMatch[1]}`);
163
+ } else {
164
+ // Look for any line with "Result: fail" or "Result: error"
165
+ const resultMatch = logContent.match(/Result:\s*(fail|error)\s*-\s*(.+?)(?:\n|$)/);
166
+ if (resultMatch) {
167
+ details.push(` ${resultMatch[2]}`);
168
+ }
169
+ }
170
+ }
171
+ }
172
+
173
+ // If we still have no details, use the message from the result
174
+ if (details.length === 0) {
175
+ details.push(' (See log file for details)');
176
+ }
177
+
178
+ return details;
179
+ }
180
+
181
+ private printFailureDetails(result: GateResult, details: string[]) {
182
+ const statusColor = result.status === 'error' ? chalk.magenta : chalk.red;
183
+ const statusLabel = result.status === 'error' ? 'ERROR' : 'FAIL';
184
+
185
+ console.log(statusColor(`[${statusLabel}] ${result.jobId}`));
186
+ if (result.message) {
187
+ console.log(chalk.dim(` Summary: ${result.message}`));
188
+ }
189
+
190
+ if (details.length > 0) {
191
+ console.log(chalk.dim(' Details:'));
192
+ details.forEach(detail => console.log(detail));
193
+ }
194
+
195
+ if (result.logPath) {
196
+ console.log(chalk.dim(` Log: ${result.logPath}`));
197
+ }
198
+
199
+ console.log(''); // Empty line between failures
200
+ }
201
+ }
@@ -0,0 +1,66 @@
1
+ import fs from 'node:fs/promises';
2
+ import path from 'node:path';
3
+ import { sanitizeJobId } from '../utils/sanitizer.js';
4
+
5
+ function formatTimestamp(): string {
6
+ return new Date().toISOString();
7
+ }
8
+
9
+ export class Logger {
10
+ private initializedFiles: Set<string> = new Set();
11
+
12
+ constructor(private logDir: string) {}
13
+
14
+ async init() {
15
+ await fs.mkdir(this.logDir, { recursive: true });
16
+ }
17
+
18
+ async close() {
19
+ // No-op - using append mode
20
+ }
21
+
22
+ getLogPath(jobId: string, adapterName?: string): string {
23
+ const safeName = sanitizeJobId(jobId);
24
+ if (adapterName) {
25
+ return path.join(this.logDir, `${safeName}_${adapterName}.log`);
26
+ }
27
+ return path.join(this.logDir, `${safeName}.log`);
28
+ }
29
+
30
+ private async initFile(logPath: string): Promise<void> {
31
+ if (!this.initializedFiles.has(logPath)) {
32
+ await fs.writeFile(logPath, '');
33
+ this.initializedFiles.add(logPath);
34
+ }
35
+ }
36
+
37
+ async createJobLogger(jobId: string): Promise<(text: string) => Promise<void>> {
38
+ const logPath = this.getLogPath(jobId);
39
+ await this.initFile(logPath);
40
+
41
+ return async (text: string) => {
42
+ const timestamp = formatTimestamp();
43
+ const lines = text.split('\n');
44
+ if (lines.length > 0) {
45
+ lines[0] = `[${timestamp}] ${lines[0]}`;
46
+ }
47
+ await fs.appendFile(logPath, lines.join('\n') + (text.endsWith('\n') ? '' : '\n'));
48
+ };
49
+ }
50
+
51
+ createLoggerFactory(jobId: string): (adapterName?: string) => Promise<(text: string) => Promise<void>> {
52
+ return async (adapterName?: string) => {
53
+ const logPath = this.getLogPath(jobId, adapterName);
54
+ await this.initFile(logPath);
55
+
56
+ return async (text: string) => {
57
+ const timestamp = formatTimestamp();
58
+ const lines = text.split('\n');
59
+ if (lines.length > 0) {
60
+ lines[0] = `[${timestamp}] ${lines[0]}`;
61
+ }
62
+ await fs.appendFile(logPath, lines.join('\n') + (text.endsWith('\n') ? '' : '\n'));
63
+ };
64
+ };
65
+ }
66
+ }
@@ -0,0 +1,228 @@
1
+ import fs from 'fs/promises';
2
+ import path from 'path';
3
+
4
+ export interface PreviousViolation {
5
+ file: string;
6
+ line: number | string;
7
+ issue: string;
8
+ fix?: string;
9
+ }
10
+
11
+ export interface AdapterFailure {
12
+ adapterName: string; // e.g., 'claude', 'gemini'
13
+ violations: PreviousViolation[];
14
+ }
15
+
16
+ export interface GateFailures {
17
+ jobId: string; // This will be the sanitized Job ID (filename without extension)
18
+ gateName: string; // Parsed or empty
19
+ entryPoint: string; // Parsed or empty
20
+ adapterFailures: AdapterFailure[]; // Failures grouped by adapter
21
+ logPath: string;
22
+ }
23
+
24
+ /**
25
+ * Parses a single log file to extract failures per adapter.
26
+ * Only processes review gates (ignores check gates).
27
+ */
28
+ export async function parseLogFile(logPath: string): Promise<GateFailures | null> {
29
+ try {
30
+ const content = await fs.readFile(logPath, 'utf-8');
31
+ const filename = path.basename(logPath);
32
+
33
+ // Check if it's a review log by content marker
34
+ if (!content.includes('--- Review Output')) {
35
+ return null;
36
+ }
37
+
38
+ // Use the sanitized filename as the Job ID key
39
+ const jobId = filename.replace(/\.log$/, '');
40
+
41
+ // We can't reliably parse entryPoint/gateName from sanitized filename
42
+ // leaving them empty for now as they aren't critical for the map lookup
43
+ const gateName = '';
44
+ const entryPoint = '';
45
+
46
+ const adapterFailures: AdapterFailure[] = [];
47
+
48
+ // Split by sections using `--- Review Output (adapterName) ---` markers
49
+ const sectionRegex = /--- Review Output \(([^)]+)\) ---/g;
50
+
51
+ let match;
52
+ const sections: { adapter: string, startIndex: number }[] = [];
53
+
54
+ while ((match = sectionRegex.exec(content)) !== null) {
55
+ sections.push({
56
+ adapter: match[1],
57
+ startIndex: match.index
58
+ });
59
+ }
60
+
61
+ if (sections.length === 0) {
62
+ return null;
63
+ }
64
+
65
+ for (let i = 0; i < sections.length; i++) {
66
+ const currentSection = sections[i];
67
+ const nextSection = sections[i + 1];
68
+ const endIndex = nextSection ? nextSection.startIndex : content.length;
69
+ const sectionContent = content.substring(currentSection.startIndex, endIndex);
70
+
71
+ const violations: PreviousViolation[] = [];
72
+
73
+ // 1. Look for "--- Parsed Result ---"
74
+ const parsedResultMatch = sectionContent.match(/--- Parsed Result ---([\s\S]*?)(?:$|---)/);
75
+
76
+ if (parsedResultMatch) {
77
+ const parsedContent = parsedResultMatch[1];
78
+
79
+ // Check status
80
+ if (parsedContent.includes('Status: PASS')) {
81
+ continue; // No violations for this adapter
82
+ }
83
+
84
+ // Extract violations
85
+ // Pattern: 1. src/app.ts:42 - Missing error handling
86
+ // Pattern: 1. src/app.ts:? - Missing error handling
87
+ // Pattern: 1. src/app.ts:NaN - Missing error handling
88
+ /**
89
+ * Extract violations from the parsed result section.
90
+ * Pattern matches "1. file:line - issue" where line can be a number, NaN, or ?.
91
+ */
92
+ const violationRegex = /^\d+\.\s+(.+?):(\d+|NaN|\?)\s+-\s+(.+)$/gm;
93
+ let vMatch;
94
+
95
+ while ((vMatch = violationRegex.exec(parsedContent)) !== null) {
96
+ const file = vMatch[1].trim();
97
+ let line: number | string = vMatch[2];
98
+ if (line !== 'NaN' && line !== '?') {
99
+ line = parseInt(line, 10);
100
+ }
101
+ const issue = vMatch[3].trim();
102
+
103
+ // Look for fix in the next line(s)
104
+ let fix = undefined;
105
+ const remainder = parsedContent.substring(vMatch.index + vMatch[0].length);
106
+
107
+ const fixMatch = remainder.match(/^\s+Fix:\s+(.+)$/m);
108
+ const nextViolationIndex = remainder.search(/^\d+\./m);
109
+
110
+ const isFixBelongingToCurrentViolation = fixMatch?.index !== undefined &&
111
+ (nextViolationIndex === -1 || fixMatch.index < nextViolationIndex);
112
+
113
+ if (isFixBelongingToCurrentViolation && fixMatch) {
114
+ fix = fixMatch[1].trim();
115
+ }
116
+
117
+ violations.push({
118
+ file,
119
+ line,
120
+ issue,
121
+ fix
122
+ });
123
+ }
124
+ } else {
125
+ // Fallback: Try to parse JSON
126
+ // Extract JSON using first '{' and last '}' to capture the full object
127
+ const firstBrace = sectionContent.indexOf('{');
128
+ const lastBrace = sectionContent.lastIndexOf('}');
129
+
130
+ if (firstBrace !== -1 && lastBrace !== -1 && lastBrace > firstBrace) {
131
+ try {
132
+ const jsonStr = sectionContent.substring(firstBrace, lastBrace + 1);
133
+ // Try to find the valid JSON object
134
+ const json = JSON.parse(jsonStr);
135
+
136
+ if (json.violations && Array.isArray(json.violations)) {
137
+ for (const v of json.violations) {
138
+ if (v.file && v.issue) {
139
+ violations.push({
140
+ file: v.file,
141
+ line: v.line || 0,
142
+ issue: v.issue,
143
+ fix: v.fix
144
+ });
145
+ }
146
+ }
147
+ }
148
+ } catch (e: any) {
149
+ // Log warning for debugging (commented out to reduce noise in production)
150
+ // console.warn(`Warning: Failed to parse JSON for ${currentSection.adapter} in ${jobId}: ${e.message}`);
151
+ }
152
+ }
153
+ }
154
+ if (violations.length > 0) {
155
+ adapterFailures.push({
156
+ adapterName: currentSection.adapter,
157
+ violations
158
+ });
159
+ } else if (parsedResultMatch && parsedResultMatch[1].includes('Status: FAIL')) {
160
+ // Track failure even if violations couldn't be parsed
161
+ adapterFailures.push({
162
+ adapterName: currentSection.adapter,
163
+ violations: [{
164
+ file: 'unknown',
165
+ line: '?',
166
+ issue: 'Previous run failed but specific violations could not be parsed'
167
+ }]
168
+ });
169
+ }
170
+ }
171
+
172
+ if (adapterFailures.length === 0) {
173
+ return null;
174
+ }
175
+
176
+ return {
177
+ jobId,
178
+ gateName,
179
+ entryPoint,
180
+ adapterFailures,
181
+ logPath
182
+ };
183
+
184
+ } catch (error) {
185
+ // console.warn(`Error parsing log file ${logPath}:`, error);
186
+ return null;
187
+ }
188
+ }
189
+
190
+ /**
191
+ * Finds all previous failures from the log directory.
192
+ */
193
+ export async function findPreviousFailures(
194
+ logDir: string,
195
+ gateFilter?: string
196
+ ): Promise<GateFailures[]> {
197
+ try {
198
+ const files = await fs.readdir(logDir);
199
+ const gateFailures: GateFailures[] = [];
200
+
201
+ for (const file of files) {
202
+ if (!file.endsWith('.log')) continue;
203
+
204
+ // If gate filter provided, check if filename matches
205
+ // filename is sanitized, so we do a loose check
206
+ if (gateFilter && !file.includes(gateFilter)) {
207
+ continue;
208
+ }
209
+
210
+ const logPath = path.join(logDir, file);
211
+ const failure = await parseLogFile(logPath);
212
+
213
+ if (failure) {
214
+ gateFailures.push(failure);
215
+ }
216
+ }
217
+
218
+ return gateFailures;
219
+ } catch (error: any) {
220
+ // If directory doesn't exist, return empty
221
+ if (typeof error === 'object' && error !== null && 'code' in error && (error as any).code === 'ENOENT') {
222
+ return [];
223
+ }
224
+ // Otherwise log and return empty
225
+ // console.warn(`Error reading log directory ${logDir}:`, error instanceof Error ? error.message : String(error));
226
+ return [];
227
+ }
228
+ }
@@ -0,0 +1,3 @@
1
+ export function sanitizeJobId(jobId: string): string {
2
+ return jobId.replace(/[^a-zA-Z0-9._-]/g, '_');
3
+ }