agent-gauntlet 0.1.10 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/package.json +4 -2
- package/src/cli-adapters/claude.ts +139 -108
- package/src/cli-adapters/codex.ts +141 -117
- package/src/cli-adapters/cursor.ts +152 -0
- package/src/cli-adapters/gemini.ts +171 -139
- package/src/cli-adapters/github-copilot.ts +153 -0
- package/src/cli-adapters/index.ts +77 -48
- package/src/commands/check.test.ts +24 -20
- package/src/commands/check.ts +65 -59
- package/src/commands/detect.test.ts +38 -32
- package/src/commands/detect.ts +74 -61
- package/src/commands/health.test.ts +67 -53
- package/src/commands/health.ts +167 -145
- package/src/commands/help.test.ts +37 -37
- package/src/commands/help.ts +30 -22
- package/src/commands/index.ts +9 -9
- package/src/commands/init.test.ts +118 -107
- package/src/commands/init.ts +514 -417
- package/src/commands/list.test.ts +87 -70
- package/src/commands/list.ts +28 -24
- package/src/commands/rerun.ts +142 -119
- package/src/commands/review.test.ts +26 -20
- package/src/commands/review.ts +65 -59
- package/src/commands/run.test.ts +22 -20
- package/src/commands/run.ts +64 -58
- package/src/commands/shared.ts +44 -35
- package/src/config/loader.test.ts +112 -90
- package/src/config/loader.ts +132 -123
- package/src/config/schema.ts +49 -47
- package/src/config/types.ts +15 -13
- package/src/config/validator.ts +521 -454
- package/src/core/change-detector.ts +122 -104
- package/src/core/entry-point.test.ts +60 -62
- package/src/core/entry-point.ts +76 -67
- package/src/core/job.ts +69 -59
- package/src/core/runner.ts +261 -230
- package/src/gates/check.ts +78 -69
- package/src/gates/result.ts +7 -7
- package/src/gates/review.test.ts +174 -138
- package/src/gates/review.ts +716 -561
- package/src/index.ts +16 -15
- package/src/output/console.ts +253 -214
- package/src/output/logger.ts +64 -52
- package/src/templates/run_gauntlet.template.md +18 -0
- package/src/utils/diff-parser.ts +64 -62
- package/src/utils/log-parser.ts +227 -206
- package/src/utils/sanitizer.ts +1 -1
package/src/utils/log-parser.ts
CHANGED
|
@@ -1,228 +1,249 @@
|
|
|
1
|
-
import fs from
|
|
2
|
-
import path from
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
3
|
|
|
4
4
|
export interface PreviousViolation {
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
file: string;
|
|
6
|
+
line: number | string;
|
|
7
|
+
issue: string;
|
|
8
|
+
fix?: string;
|
|
9
9
|
}
|
|
10
10
|
|
|
11
11
|
export interface AdapterFailure {
|
|
12
|
-
|
|
13
|
-
|
|
12
|
+
adapterName: string; // e.g., 'claude', 'gemini'
|
|
13
|
+
violations: PreviousViolation[];
|
|
14
14
|
}
|
|
15
15
|
|
|
16
16
|
export interface GateFailures {
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
17
|
+
jobId: string; // This will be the sanitized Job ID (filename without extension)
|
|
18
|
+
gateName: string; // Parsed or empty
|
|
19
|
+
entryPoint: string; // Parsed or empty
|
|
20
|
+
adapterFailures: AdapterFailure[]; // Failures grouped by adapter
|
|
21
|
+
logPath: string;
|
|
22
22
|
}
|
|
23
23
|
|
|
24
24
|
/**
|
|
25
25
|
* Parses a single log file to extract failures per adapter.
|
|
26
26
|
* Only processes review gates (ignores check gates).
|
|
27
27
|
*/
|
|
28
|
-
export async function parseLogFile(
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
28
|
+
export async function parseLogFile(
|
|
29
|
+
logPath: string,
|
|
30
|
+
): Promise<GateFailures | null> {
|
|
31
|
+
try {
|
|
32
|
+
const content = await fs.readFile(logPath, "utf-8");
|
|
33
|
+
const filename = path.basename(logPath);
|
|
34
|
+
|
|
35
|
+
// Check if it's a review log by content marker
|
|
36
|
+
if (!content.includes("--- Review Output")) {
|
|
37
|
+
return null;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Use the sanitized filename as the Job ID key
|
|
41
|
+
const jobId = filename.replace(/\.log$/, "");
|
|
42
|
+
|
|
43
|
+
// We can't reliably parse entryPoint/gateName from sanitized filename
|
|
44
|
+
// leaving them empty for now as they aren't critical for the map lookup
|
|
45
|
+
const gateName = "";
|
|
46
|
+
const entryPoint = "";
|
|
47
|
+
|
|
48
|
+
const adapterFailures: AdapterFailure[] = [];
|
|
49
|
+
|
|
50
|
+
// Split by sections using `--- Review Output (adapterName) ---` markers
|
|
51
|
+
const sectionRegex = /--- Review Output \(([^)]+)\) ---/g;
|
|
52
|
+
|
|
53
|
+
let match: RegExpExecArray | null;
|
|
54
|
+
const sections: { adapter: string; startIndex: number }[] = [];
|
|
55
|
+
|
|
56
|
+
for (;;) {
|
|
57
|
+
match = sectionRegex.exec(content);
|
|
58
|
+
if (!match) break;
|
|
59
|
+
sections.push({
|
|
60
|
+
adapter: match[1],
|
|
61
|
+
startIndex: match.index,
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
if (sections.length === 0) {
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
for (let i = 0; i < sections.length; i++) {
|
|
70
|
+
const currentSection = sections[i];
|
|
71
|
+
const nextSection = sections[i + 1];
|
|
72
|
+
const endIndex = nextSection ? nextSection.startIndex : content.length;
|
|
73
|
+
const sectionContent = content.substring(
|
|
74
|
+
currentSection.startIndex,
|
|
75
|
+
endIndex,
|
|
76
|
+
);
|
|
77
|
+
|
|
78
|
+
const violations: PreviousViolation[] = [];
|
|
79
|
+
|
|
80
|
+
// 1. Look for "--- Parsed Result ---"
|
|
81
|
+
const parsedResultMatch = sectionContent.match(
|
|
82
|
+
/---\s*Parsed Result(?:\s+\(([^)]+)\))?\s*---([\s\S]*?)(?:$|---)/,
|
|
83
|
+
);
|
|
84
|
+
|
|
85
|
+
if (parsedResultMatch) {
|
|
86
|
+
const parsedContent = parsedResultMatch[2];
|
|
87
|
+
|
|
88
|
+
// Check status
|
|
89
|
+
if (parsedContent.includes("Status: PASS")) {
|
|
90
|
+
continue; // No violations for this adapter
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Extract violations
|
|
94
|
+
// Pattern: 1. src/app.ts:42 - Missing error handling
|
|
95
|
+
// Pattern: 1. src/app.ts:? - Missing error handling
|
|
96
|
+
// Pattern: 1. src/app.ts:NaN - Missing error handling
|
|
97
|
+
/**
|
|
98
|
+
* Extract violations from the parsed result section.
|
|
99
|
+
* Pattern matches "1. file:line - issue" where line can be a number, NaN, or ?.
|
|
100
|
+
*/
|
|
101
|
+
const violationRegex = /^\d+\.\s+(.+?):(\d+|NaN|\?)\s+-\s+(.+)$/gm;
|
|
102
|
+
let vMatch: RegExpExecArray | null;
|
|
103
|
+
|
|
104
|
+
for (;;) {
|
|
105
|
+
vMatch = violationRegex.exec(parsedContent);
|
|
106
|
+
if (!vMatch) break;
|
|
107
|
+
const file = vMatch[1].trim();
|
|
108
|
+
let line: number | string = vMatch[2];
|
|
109
|
+
if (line !== "NaN" && line !== "?") {
|
|
110
|
+
line = parseInt(line, 10);
|
|
111
|
+
}
|
|
112
|
+
const issue = vMatch[3].trim();
|
|
113
|
+
|
|
114
|
+
// Look for fix in the next line(s)
|
|
115
|
+
let fix: string | undefined;
|
|
116
|
+
const remainder = parsedContent.substring(
|
|
117
|
+
vMatch.index + vMatch[0].length,
|
|
118
|
+
);
|
|
119
|
+
|
|
120
|
+
const fixMatch = remainder.match(/^\s+Fix:\s+(.+)$/m);
|
|
121
|
+
const nextViolationIndex = remainder.search(/^\d+\./m);
|
|
122
|
+
|
|
123
|
+
const isFixBelongingToCurrentViolation =
|
|
124
|
+
fixMatch?.index !== undefined &&
|
|
125
|
+
(nextViolationIndex === -1 || fixMatch.index < nextViolationIndex);
|
|
126
|
+
|
|
127
|
+
if (isFixBelongingToCurrentViolation && fixMatch) {
|
|
128
|
+
fix = fixMatch[1].trim();
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
violations.push({
|
|
132
|
+
file,
|
|
133
|
+
line,
|
|
134
|
+
issue,
|
|
135
|
+
fix,
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
} else {
|
|
139
|
+
// Fallback: Try to parse JSON
|
|
140
|
+
// Extract JSON using first '{' and last '}' to capture the full object
|
|
141
|
+
const firstBrace = sectionContent.indexOf("{");
|
|
142
|
+
const lastBrace = sectionContent.lastIndexOf("}");
|
|
143
|
+
|
|
144
|
+
if (firstBrace !== -1 && lastBrace !== -1 && lastBrace > firstBrace) {
|
|
145
|
+
try {
|
|
146
|
+
const jsonStr = sectionContent.substring(firstBrace, lastBrace + 1);
|
|
147
|
+
// Try to find the valid JSON object
|
|
148
|
+
const json = JSON.parse(jsonStr);
|
|
149
|
+
|
|
150
|
+
if (json.violations && Array.isArray(json.violations)) {
|
|
151
|
+
for (const v of json.violations) {
|
|
152
|
+
if (v.file && v.issue) {
|
|
153
|
+
violations.push({
|
|
154
|
+
file: v.file,
|
|
155
|
+
line: v.line || 0,
|
|
156
|
+
issue: v.issue,
|
|
157
|
+
fix: v.fix,
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
} catch (_e: unknown) {
|
|
163
|
+
// Log warning for debugging (commented out to reduce noise in production)
|
|
164
|
+
// console.warn(`Warning: Failed to parse JSON for ${currentSection.adapter} in ${jobId}: ${e.message}`);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
if (violations.length > 0) {
|
|
169
|
+
adapterFailures.push({
|
|
170
|
+
adapterName: currentSection.adapter,
|
|
171
|
+
violations,
|
|
172
|
+
});
|
|
173
|
+
} else if (parsedResultMatch?.[1].includes("Status: FAIL")) {
|
|
174
|
+
// Track failure even if violations couldn't be parsed
|
|
175
|
+
adapterFailures.push({
|
|
176
|
+
adapterName: currentSection.adapter,
|
|
177
|
+
violations: [
|
|
178
|
+
{
|
|
179
|
+
file: "unknown",
|
|
180
|
+
line: "?",
|
|
181
|
+
issue:
|
|
182
|
+
"Previous run failed but specific violations could not be parsed",
|
|
183
|
+
},
|
|
184
|
+
],
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
if (adapterFailures.length === 0) {
|
|
190
|
+
return null;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
return {
|
|
194
|
+
jobId,
|
|
195
|
+
gateName,
|
|
196
|
+
entryPoint,
|
|
197
|
+
adapterFailures,
|
|
198
|
+
logPath,
|
|
199
|
+
};
|
|
200
|
+
} catch (_error) {
|
|
201
|
+
// console.warn(`Error parsing log file ${logPath}:`, error);
|
|
202
|
+
return null;
|
|
203
|
+
}
|
|
188
204
|
}
|
|
189
205
|
|
|
190
206
|
/**
|
|
191
207
|
* Finds all previous failures from the log directory.
|
|
192
208
|
*/
|
|
193
209
|
export async function findPreviousFailures(
|
|
194
|
-
|
|
195
|
-
|
|
210
|
+
logDir: string,
|
|
211
|
+
gateFilter?: string,
|
|
196
212
|
): Promise<GateFailures[]> {
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
213
|
+
try {
|
|
214
|
+
const files = await fs.readdir(logDir);
|
|
215
|
+
const gateFailures: GateFailures[] = [];
|
|
216
|
+
|
|
217
|
+
for (const file of files) {
|
|
218
|
+
if (!file.endsWith(".log")) continue;
|
|
219
|
+
|
|
220
|
+
// If gate filter provided, check if filename matches
|
|
221
|
+
// filename is sanitized, so we do a loose check
|
|
222
|
+
if (gateFilter && !file.includes(gateFilter)) {
|
|
223
|
+
continue;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
const logPath = path.join(logDir, file);
|
|
227
|
+
const failure = await parseLogFile(logPath);
|
|
228
|
+
|
|
229
|
+
if (failure) {
|
|
230
|
+
gateFailures.push(failure);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
return gateFailures;
|
|
235
|
+
} catch (error: unknown) {
|
|
236
|
+
// If directory doesn't exist, return empty
|
|
237
|
+
if (
|
|
238
|
+
typeof error === "object" &&
|
|
239
|
+
error !== null &&
|
|
240
|
+
"code" in error &&
|
|
241
|
+
(error as { code: string }).code === "ENOENT"
|
|
242
|
+
) {
|
|
243
|
+
return [];
|
|
244
|
+
}
|
|
245
|
+
// Otherwise log and return empty
|
|
246
|
+
// console.warn(`Error reading log directory ${logDir}:`, error instanceof Error ? error.message : String(error));
|
|
247
|
+
return [];
|
|
248
|
+
}
|
|
228
249
|
}
|
package/src/utils/sanitizer.ts
CHANGED