agent-gauntlet 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +55 -87
  2. package/package.json +4 -2
  3. package/src/bun-plugins.d.ts +4 -0
  4. package/src/cli-adapters/claude.ts +139 -108
  5. package/src/cli-adapters/codex.ts +141 -117
  6. package/src/cli-adapters/cursor.ts +152 -0
  7. package/src/cli-adapters/gemini.ts +171 -139
  8. package/src/cli-adapters/github-copilot.ts +153 -0
  9. package/src/cli-adapters/index.ts +77 -48
  10. package/src/commands/check.test.ts +24 -20
  11. package/src/commands/check.ts +86 -59
  12. package/src/commands/ci/index.ts +15 -0
  13. package/src/commands/ci/init.ts +96 -0
  14. package/src/commands/ci/list-jobs.ts +78 -0
  15. package/src/commands/detect.test.ts +38 -32
  16. package/src/commands/detect.ts +89 -61
  17. package/src/commands/health.test.ts +67 -53
  18. package/src/commands/health.ts +167 -145
  19. package/src/commands/help.test.ts +37 -37
  20. package/src/commands/help.ts +31 -22
  21. package/src/commands/index.ts +10 -9
  22. package/src/commands/init.test.ts +120 -107
  23. package/src/commands/init.ts +514 -417
  24. package/src/commands/list.test.ts +87 -70
  25. package/src/commands/list.ts +28 -24
  26. package/src/commands/rerun.ts +157 -119
  27. package/src/commands/review.test.ts +26 -20
  28. package/src/commands/review.ts +86 -59
  29. package/src/commands/run.test.ts +22 -20
  30. package/src/commands/run.ts +85 -58
  31. package/src/commands/shared.ts +44 -35
  32. package/src/config/ci-loader.ts +33 -0
  33. package/src/config/ci-schema.ts +52 -0
  34. package/src/config/loader.test.ts +112 -90
  35. package/src/config/loader.ts +132 -123
  36. package/src/config/schema.ts +48 -47
  37. package/src/config/types.ts +28 -13
  38. package/src/config/validator.ts +521 -454
  39. package/src/core/change-detector.ts +122 -104
  40. package/src/core/entry-point.test.ts +60 -62
  41. package/src/core/entry-point.ts +120 -74
  42. package/src/core/job.ts +69 -59
  43. package/src/core/runner.ts +264 -230
  44. package/src/gates/check.ts +78 -69
  45. package/src/gates/result.ts +7 -7
  46. package/src/gates/review.test.ts +277 -138
  47. package/src/gates/review.ts +724 -561
  48. package/src/index.ts +18 -15
  49. package/src/output/console.ts +253 -214
  50. package/src/output/logger.ts +66 -52
  51. package/src/templates/run_gauntlet.template.md +18 -0
  52. package/src/templates/workflow.yml +77 -0
  53. package/src/utils/diff-parser.ts +64 -62
  54. package/src/utils/log-parser.ts +227 -206
  55. package/src/utils/sanitizer.ts +1 -1
@@ -1,228 +1,249 @@
1
- import fs from 'fs/promises';
2
- import path from 'path';
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
3
 
4
4
  export interface PreviousViolation {
5
- file: string;
6
- line: number | string;
7
- issue: string;
8
- fix?: string;
5
+ file: string;
6
+ line: number | string;
7
+ issue: string;
8
+ fix?: string;
9
9
  }
10
10
 
11
11
  export interface AdapterFailure {
12
- adapterName: string; // e.g., 'claude', 'gemini'
13
- violations: PreviousViolation[];
12
+ adapterName: string; // e.g., 'claude', 'gemini'
13
+ violations: PreviousViolation[];
14
14
  }
15
15
 
16
16
  export interface GateFailures {
17
- jobId: string; // This will be the sanitized Job ID (filename without extension)
18
- gateName: string; // Parsed or empty
19
- entryPoint: string; // Parsed or empty
20
- adapterFailures: AdapterFailure[]; // Failures grouped by adapter
21
- logPath: string;
17
+ jobId: string; // This will be the sanitized Job ID (filename without extension)
18
+ gateName: string; // Parsed or empty
19
+ entryPoint: string; // Parsed or empty
20
+ adapterFailures: AdapterFailure[]; // Failures grouped by adapter
21
+ logPath: string;
22
22
  }
23
23
 
24
24
  /**
25
25
  * Parses a single log file to extract failures per adapter.
26
26
  * Only processes review gates (ignores check gates).
27
27
  */
28
- export async function parseLogFile(logPath: string): Promise<GateFailures | null> {
29
- try {
30
- const content = await fs.readFile(logPath, 'utf-8');
31
- const filename = path.basename(logPath);
32
-
33
- // Check if it's a review log by content marker
34
- if (!content.includes('--- Review Output')) {
35
- return null;
36
- }
37
-
38
- // Use the sanitized filename as the Job ID key
39
- const jobId = filename.replace(/\.log$/, '');
40
-
41
- // We can't reliably parse entryPoint/gateName from sanitized filename
42
- // leaving them empty for now as they aren't critical for the map lookup
43
- const gateName = '';
44
- const entryPoint = '';
45
-
46
- const adapterFailures: AdapterFailure[] = [];
47
-
48
- // Split by sections using `--- Review Output (adapterName) ---` markers
49
- const sectionRegex = /--- Review Output \(([^)]+)\) ---/g;
50
-
51
- let match;
52
- const sections: { adapter: string, startIndex: number }[] = [];
53
-
54
- while ((match = sectionRegex.exec(content)) !== null) {
55
- sections.push({
56
- adapter: match[1],
57
- startIndex: match.index
58
- });
59
- }
60
-
61
- if (sections.length === 0) {
62
- return null;
63
- }
64
-
65
- for (let i = 0; i < sections.length; i++) {
66
- const currentSection = sections[i];
67
- const nextSection = sections[i + 1];
68
- const endIndex = nextSection ? nextSection.startIndex : content.length;
69
- const sectionContent = content.substring(currentSection.startIndex, endIndex);
70
-
71
- const violations: PreviousViolation[] = [];
72
-
73
- // 1. Look for "--- Parsed Result ---"
74
- const parsedResultMatch = sectionContent.match(/---\s*Parsed Result(?:\s+\(([^)]+)\))?\s*---([\s\S]*?)(?:$|---)/);
75
-
76
- if (parsedResultMatch) {
77
- const parsedContent = parsedResultMatch[2];
78
-
79
- // Check status
80
- if (parsedContent.includes('Status: PASS')) {
81
- continue; // No violations for this adapter
82
- }
83
-
84
- // Extract violations
85
- // Pattern: 1. src/app.ts:42 - Missing error handling
86
- // Pattern: 1. src/app.ts:? - Missing error handling
87
- // Pattern: 1. src/app.ts:NaN - Missing error handling
88
- /**
89
- * Extract violations from the parsed result section.
90
- * Pattern matches "1. file:line - issue" where line can be a number, NaN, or ?.
91
- */
92
- const violationRegex = /^\d+\.\s+(.+?):(\d+|NaN|\?)\s+-\s+(.+)$/gm;
93
- let vMatch;
94
-
95
- while ((vMatch = violationRegex.exec(parsedContent)) !== null) {
96
- const file = vMatch[1].trim();
97
- let line: number | string = vMatch[2];
98
- if (line !== 'NaN' && line !== '?') {
99
- line = parseInt(line, 10);
100
- }
101
- const issue = vMatch[3].trim();
102
-
103
- // Look for fix in the next line(s)
104
- let fix = undefined;
105
- const remainder = parsedContent.substring(vMatch.index + vMatch[0].length);
106
-
107
- const fixMatch = remainder.match(/^\s+Fix:\s+(.+)$/m);
108
- const nextViolationIndex = remainder.search(/^\d+\./m);
109
-
110
- const isFixBelongingToCurrentViolation = fixMatch?.index !== undefined &&
111
- (nextViolationIndex === -1 || fixMatch.index < nextViolationIndex);
112
-
113
- if (isFixBelongingToCurrentViolation && fixMatch) {
114
- fix = fixMatch[1].trim();
115
- }
116
-
117
- violations.push({
118
- file,
119
- line,
120
- issue,
121
- fix
122
- });
123
- }
124
- } else {
125
- // Fallback: Try to parse JSON
126
- // Extract JSON using first '{' and last '}' to capture the full object
127
- const firstBrace = sectionContent.indexOf('{');
128
- const lastBrace = sectionContent.lastIndexOf('}');
129
-
130
- if (firstBrace !== -1 && lastBrace !== -1 && lastBrace > firstBrace) {
131
- try {
132
- const jsonStr = sectionContent.substring(firstBrace, lastBrace + 1);
133
- // Try to find the valid JSON object
134
- const json = JSON.parse(jsonStr);
135
-
136
- if (json.violations && Array.isArray(json.violations)) {
137
- for (const v of json.violations) {
138
- if (v.file && v.issue) {
139
- violations.push({
140
- file: v.file,
141
- line: v.line || 0,
142
- issue: v.issue,
143
- fix: v.fix
144
- });
145
- }
146
- }
147
- }
148
- } catch (e: any) {
149
- // Log warning for debugging (commented out to reduce noise in production)
150
- // console.warn(`Warning: Failed to parse JSON for ${currentSection.adapter} in ${jobId}: ${e.message}`);
151
- }
152
- }
153
- }
154
- if (violations.length > 0) {
155
- adapterFailures.push({
156
- adapterName: currentSection.adapter,
157
- violations
158
- });
159
- } else if (parsedResultMatch && parsedResultMatch[1].includes('Status: FAIL')) {
160
- // Track failure even if violations couldn't be parsed
161
- adapterFailures.push({
162
- adapterName: currentSection.adapter,
163
- violations: [{
164
- file: 'unknown',
165
- line: '?',
166
- issue: 'Previous run failed but specific violations could not be parsed'
167
- }]
168
- });
169
- }
170
- }
171
-
172
- if (adapterFailures.length === 0) {
173
- return null;
174
- }
175
-
176
- return {
177
- jobId,
178
- gateName,
179
- entryPoint,
180
- adapterFailures,
181
- logPath
182
- };
183
-
184
- } catch (error) {
185
- // console.warn(`Error parsing log file ${logPath}:`, error);
186
- return null;
187
- }
28
+ export async function parseLogFile(
29
+ logPath: string,
30
+ ): Promise<GateFailures | null> {
31
+ try {
32
+ const content = await fs.readFile(logPath, "utf-8");
33
+ const filename = path.basename(logPath);
34
+
35
+ // Check if it's a review log by content marker
36
+ if (!content.includes("--- Review Output")) {
37
+ return null;
38
+ }
39
+
40
+ // Use the sanitized filename as the Job ID key
41
+ const jobId = filename.replace(/\.log$/, "");
42
+
43
+ // We can't reliably parse entryPoint/gateName from sanitized filename
44
+ // leaving them empty for now as they aren't critical for the map lookup
45
+ const gateName = "";
46
+ const entryPoint = "";
47
+
48
+ const adapterFailures: AdapterFailure[] = [];
49
+
50
+ // Split by sections using `--- Review Output (adapterName) ---` markers
51
+ const sectionRegex = /--- Review Output \(([^)]+)\) ---/g;
52
+
53
+ let match: RegExpExecArray | null;
54
+ const sections: { adapter: string; startIndex: number }[] = [];
55
+
56
+ for (;;) {
57
+ match = sectionRegex.exec(content);
58
+ if (!match) break;
59
+ sections.push({
60
+ adapter: match[1],
61
+ startIndex: match.index,
62
+ });
63
+ }
64
+
65
+ if (sections.length === 0) {
66
+ return null;
67
+ }
68
+
69
+ for (let i = 0; i < sections.length; i++) {
70
+ const currentSection = sections[i];
71
+ const nextSection = sections[i + 1];
72
+ const endIndex = nextSection ? nextSection.startIndex : content.length;
73
+ const sectionContent = content.substring(
74
+ currentSection.startIndex,
75
+ endIndex,
76
+ );
77
+
78
+ const violations: PreviousViolation[] = [];
79
+
80
+ // 1. Look for "--- Parsed Result ---"
81
+ const parsedResultMatch = sectionContent.match(
82
+ /---\s*Parsed Result(?:\s+\(([^)]+)\))?\s*---([\s\S]*?)(?:$|---)/,
83
+ );
84
+
85
+ if (parsedResultMatch) {
86
+ const parsedContent = parsedResultMatch[2];
87
+
88
+ // Check status
89
+ if (parsedContent.includes("Status: PASS")) {
90
+ continue; // No violations for this adapter
91
+ }
92
+
93
+ // Extract violations
94
+ // Pattern: 1. src/app.ts:42 - Missing error handling
95
+ // Pattern: 1. src/app.ts:? - Missing error handling
96
+ // Pattern: 1. src/app.ts:NaN - Missing error handling
97
+ /**
98
+ * Extract violations from the parsed result section.
99
+ * Pattern matches "1. file:line - issue" where line can be a number, NaN, or ?.
100
+ */
101
+ const violationRegex = /^\d+\.\s+(.+?):(\d+|NaN|\?)\s+-\s+(.+)$/gm;
102
+ let vMatch: RegExpExecArray | null;
103
+
104
+ for (;;) {
105
+ vMatch = violationRegex.exec(parsedContent);
106
+ if (!vMatch) break;
107
+ const file = vMatch[1].trim();
108
+ let line: number | string = vMatch[2];
109
+ if (line !== "NaN" && line !== "?") {
110
+ line = parseInt(line, 10);
111
+ }
112
+ const issue = vMatch[3].trim();
113
+
114
+ // Look for fix in the next line(s)
115
+ let fix: string | undefined;
116
+ const remainder = parsedContent.substring(
117
+ vMatch.index + vMatch[0].length,
118
+ );
119
+
120
+ const fixMatch = remainder.match(/^\s+Fix:\s+(.+)$/m);
121
+ const nextViolationIndex = remainder.search(/^\d+\./m);
122
+
123
+ const isFixBelongingToCurrentViolation =
124
+ fixMatch?.index !== undefined &&
125
+ (nextViolationIndex === -1 || fixMatch.index < nextViolationIndex);
126
+
127
+ if (isFixBelongingToCurrentViolation && fixMatch) {
128
+ fix = fixMatch[1].trim();
129
+ }
130
+
131
+ violations.push({
132
+ file,
133
+ line,
134
+ issue,
135
+ fix,
136
+ });
137
+ }
138
+ } else {
139
+ // Fallback: Try to parse JSON
140
+ // Extract JSON using first '{' and last '}' to capture the full object
141
+ const firstBrace = sectionContent.indexOf("{");
142
+ const lastBrace = sectionContent.lastIndexOf("}");
143
+
144
+ if (firstBrace !== -1 && lastBrace !== -1 && lastBrace > firstBrace) {
145
+ try {
146
+ const jsonStr = sectionContent.substring(firstBrace, lastBrace + 1);
147
+ // Try to find the valid JSON object
148
+ const json = JSON.parse(jsonStr);
149
+
150
+ if (json.violations && Array.isArray(json.violations)) {
151
+ for (const v of json.violations) {
152
+ if (v.file && v.issue) {
153
+ violations.push({
154
+ file: v.file,
155
+ line: v.line || 0,
156
+ issue: v.issue,
157
+ fix: v.fix,
158
+ });
159
+ }
160
+ }
161
+ }
162
+ } catch (_e: unknown) {
163
+ // Log warning for debugging (commented out to reduce noise in production)
164
+ // console.warn(`Warning: Failed to parse JSON for ${currentSection.adapter} in ${jobId}: ${e.message}`);
165
+ }
166
+ }
167
+ }
168
+ if (violations.length > 0) {
169
+ adapterFailures.push({
170
+ adapterName: currentSection.adapter,
171
+ violations,
172
+ });
173
+ } else if (parsedResultMatch?.[1].includes("Status: FAIL")) {
174
+ // Track failure even if violations couldn't be parsed
175
+ adapterFailures.push({
176
+ adapterName: currentSection.adapter,
177
+ violations: [
178
+ {
179
+ file: "unknown",
180
+ line: "?",
181
+ issue:
182
+ "Previous run failed but specific violations could not be parsed",
183
+ },
184
+ ],
185
+ });
186
+ }
187
+ }
188
+
189
+ if (adapterFailures.length === 0) {
190
+ return null;
191
+ }
192
+
193
+ return {
194
+ jobId,
195
+ gateName,
196
+ entryPoint,
197
+ adapterFailures,
198
+ logPath,
199
+ };
200
+ } catch (_error) {
201
+ // console.warn(`Error parsing log file ${logPath}:`, error);
202
+ return null;
203
+ }
188
204
  }
189
205
 
190
206
  /**
191
207
  * Finds all previous failures from the log directory.
192
208
  */
193
209
  export async function findPreviousFailures(
194
- logDir: string,
195
- gateFilter?: string
210
+ logDir: string,
211
+ gateFilter?: string,
196
212
  ): Promise<GateFailures[]> {
197
- try {
198
- const files = await fs.readdir(logDir);
199
- const gateFailures: GateFailures[] = [];
200
-
201
- for (const file of files) {
202
- if (!file.endsWith('.log')) continue;
203
-
204
- // If gate filter provided, check if filename matches
205
- // filename is sanitized, so we do a loose check
206
- if (gateFilter && !file.includes(gateFilter)) {
207
- continue;
208
- }
209
-
210
- const logPath = path.join(logDir, file);
211
- const failure = await parseLogFile(logPath);
212
-
213
- if (failure) {
214
- gateFailures.push(failure);
215
- }
216
- }
217
-
218
- return gateFailures;
219
- } catch (error: any) {
220
- // If directory doesn't exist, return empty
221
- if (typeof error === 'object' && error !== null && 'code' in error && (error as any).code === 'ENOENT') {
222
- return [];
223
- }
224
- // Otherwise log and return empty
225
- // console.warn(`Error reading log directory ${logDir}:`, error instanceof Error ? error.message : String(error));
226
- return [];
227
- }
213
+ try {
214
+ const files = await fs.readdir(logDir);
215
+ const gateFailures: GateFailures[] = [];
216
+
217
+ for (const file of files) {
218
+ if (!file.endsWith(".log")) continue;
219
+
220
+ // If gate filter provided, check if filename matches
221
+ // filename is sanitized, so we do a loose check
222
+ if (gateFilter && !file.includes(gateFilter)) {
223
+ continue;
224
+ }
225
+
226
+ const logPath = path.join(logDir, file);
227
+ const failure = await parseLogFile(logPath);
228
+
229
+ if (failure) {
230
+ gateFailures.push(failure);
231
+ }
232
+ }
233
+
234
+ return gateFailures;
235
+ } catch (error: unknown) {
236
+ // If directory doesn't exist, return empty
237
+ if (
238
+ typeof error === "object" &&
239
+ error !== null &&
240
+ "code" in error &&
241
+ (error as { code: string }).code === "ENOENT"
242
+ ) {
243
+ return [];
244
+ }
245
+ // Otherwise log and return empty
246
+ // console.warn(`Error reading log directory ${logDir}:`, error instanceof Error ? error.message : String(error));
247
+ return [];
248
+ }
228
249
  }
@@ -1,3 +1,3 @@
1
1
  export function sanitizeJobId(jobId: string): string {
2
- return jobId.replace(/[^a-zA-Z0-9._-]/g, '_');
2
+ return jobId.replace(/[^a-zA-Z0-9._-]/g, "_");
3
3
  }