agent-gauntlet 0.1.10 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -87
- package/package.json +4 -2
- package/src/bun-plugins.d.ts +4 -0
- package/src/cli-adapters/claude.ts +139 -108
- package/src/cli-adapters/codex.ts +141 -117
- package/src/cli-adapters/cursor.ts +152 -0
- package/src/cli-adapters/gemini.ts +171 -139
- package/src/cli-adapters/github-copilot.ts +153 -0
- package/src/cli-adapters/index.ts +77 -48
- package/src/commands/check.test.ts +24 -20
- package/src/commands/check.ts +86 -59
- package/src/commands/ci/index.ts +15 -0
- package/src/commands/ci/init.ts +96 -0
- package/src/commands/ci/list-jobs.ts +78 -0
- package/src/commands/detect.test.ts +38 -32
- package/src/commands/detect.ts +89 -61
- package/src/commands/health.test.ts +67 -53
- package/src/commands/health.ts +167 -145
- package/src/commands/help.test.ts +37 -37
- package/src/commands/help.ts +31 -22
- package/src/commands/index.ts +10 -9
- package/src/commands/init.test.ts +120 -107
- package/src/commands/init.ts +514 -417
- package/src/commands/list.test.ts +87 -70
- package/src/commands/list.ts +28 -24
- package/src/commands/rerun.ts +157 -119
- package/src/commands/review.test.ts +26 -20
- package/src/commands/review.ts +86 -59
- package/src/commands/run.test.ts +22 -20
- package/src/commands/run.ts +85 -58
- package/src/commands/shared.ts +44 -35
- package/src/config/ci-loader.ts +33 -0
- package/src/config/ci-schema.ts +52 -0
- package/src/config/loader.test.ts +112 -90
- package/src/config/loader.ts +132 -123
- package/src/config/schema.ts +48 -47
- package/src/config/types.ts +28 -13
- package/src/config/validator.ts +521 -454
- package/src/core/change-detector.ts +122 -104
- package/src/core/entry-point.test.ts +60 -62
- package/src/core/entry-point.ts +120 -74
- package/src/core/job.ts +69 -59
- package/src/core/runner.ts +264 -230
- package/src/gates/check.ts +78 -69
- package/src/gates/result.ts +7 -7
- package/src/gates/review.test.ts +277 -138
- package/src/gates/review.ts +724 -561
- package/src/index.ts +18 -15
- package/src/output/console.ts +253 -214
- package/src/output/logger.ts +66 -52
- package/src/templates/run_gauntlet.template.md +18 -0
- package/src/templates/workflow.yml +77 -0
- package/src/utils/diff-parser.ts +64 -62
- package/src/utils/log-parser.ts +227 -206
- package/src/utils/sanitizer.ts +1 -1
package/src/index.ts
CHANGED
|
@@ -1,28 +1,31 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
|
-
import { Command } from
|
|
2
|
+
import { Command } from "commander";
|
|
3
|
+
import packageJson from "../package.json" with { type: "json" };
|
|
3
4
|
import {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
5
|
+
registerCheckCommand,
|
|
6
|
+
registerCICommand,
|
|
7
|
+
registerDetectCommand,
|
|
8
|
+
registerHealthCommand,
|
|
9
|
+
registerHelpCommand,
|
|
10
|
+
registerInitCommand,
|
|
11
|
+
registerListCommand,
|
|
12
|
+
registerRerunCommand,
|
|
13
|
+
registerReviewCommand,
|
|
14
|
+
registerRunCommand,
|
|
15
|
+
} from "./commands/index.js";
|
|
14
16
|
|
|
15
17
|
const program = new Command();
|
|
16
18
|
|
|
17
19
|
program
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
20
|
+
.name("agent-gauntlet")
|
|
21
|
+
.description("AI-assisted quality gates")
|
|
22
|
+
.version(packageJson.version);
|
|
21
23
|
|
|
22
24
|
// Register all commands
|
|
23
25
|
registerRunCommand(program);
|
|
24
26
|
registerRerunCommand(program);
|
|
25
27
|
registerCheckCommand(program);
|
|
28
|
+
registerCICommand(program);
|
|
26
29
|
registerReviewCommand(program);
|
|
27
30
|
registerDetectCommand(program);
|
|
28
31
|
registerListCommand(program);
|
|
@@ -32,7 +35,7 @@ registerHelpCommand(program);
|
|
|
32
35
|
|
|
33
36
|
// Default action: help
|
|
34
37
|
if (process.argv.length < 3) {
|
|
35
|
-
|
|
38
|
+
process.argv.push("help");
|
|
36
39
|
}
|
|
37
40
|
|
|
38
41
|
program.parse(process.argv);
|
package/src/output/console.ts
CHANGED
|
@@ -1,217 +1,256 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import chalk from "chalk";
|
|
3
|
+
import type { Job } from "../core/job.js";
|
|
4
|
+
import type { GateResult } from "../gates/result.js";
|
|
5
5
|
|
|
6
6
|
export class ConsoleReporter {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
}
|
|
7
|
+
onJobStart(job: Job) {
|
|
8
|
+
console.log(chalk.blue(`[START] ${job.id}`));
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
onJobComplete(job: Job, result: GateResult) {
|
|
12
|
+
const duration = `${(result.duration / 1000).toFixed(2)}s`;
|
|
13
|
+
const message = result.message ?? "";
|
|
14
|
+
|
|
15
|
+
if (result.status === "pass") {
|
|
16
|
+
console.log(chalk.green(`[PASS] ${job.id} (${duration})`));
|
|
17
|
+
} else if (result.status === "fail") {
|
|
18
|
+
console.log(chalk.red(`[FAIL] ${job.id} (${duration}) - ${message}`));
|
|
19
|
+
} else {
|
|
20
|
+
console.log(
|
|
21
|
+
chalk.magenta(`[ERROR] ${job.id} (${duration}) - ${message}`),
|
|
22
|
+
);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
async printSummary(results: GateResult[]) {
|
|
27
|
+
console.log(`\n${chalk.bold("--- Gauntlet Summary ---")}`);
|
|
28
|
+
|
|
29
|
+
const passed = results.filter((r) => r.status === "pass");
|
|
30
|
+
const failed = results.filter((r) => r.status === "fail");
|
|
31
|
+
const errored = results.filter((r) => r.status === "error");
|
|
32
|
+
|
|
33
|
+
console.log(`Total: ${results.length}`);
|
|
34
|
+
console.log(chalk.green(`Passed: ${passed.length}`));
|
|
35
|
+
if (failed.length > 0) console.log(chalk.red(`Failed: ${failed.length}`));
|
|
36
|
+
if (errored.length > 0)
|
|
37
|
+
console.log(chalk.magenta(`Errored: ${errored.length}`));
|
|
38
|
+
|
|
39
|
+
if (failed.length > 0 || errored.length > 0) {
|
|
40
|
+
console.log(`\n${chalk.bold("=== Failure Details ===\n")}`);
|
|
41
|
+
|
|
42
|
+
for (const result of [...failed, ...errored]) {
|
|
43
|
+
const details = await this.extractFailureDetails(result);
|
|
44
|
+
this.printFailureDetails(result, details);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/** @internal Public for testing */
|
|
50
|
+
async extractFailureDetails(result: GateResult): Promise<string[]> {
|
|
51
|
+
const logPaths =
|
|
52
|
+
result.logPaths || (result.logPath ? [result.logPath] : []);
|
|
53
|
+
|
|
54
|
+
if (logPaths.length === 0) {
|
|
55
|
+
return [result.message ?? "Unknown error"];
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const allDetails: string[] = [];
|
|
59
|
+
for (const logPath of logPaths) {
|
|
60
|
+
try {
|
|
61
|
+
const logContent = await fs.readFile(logPath, "utf-8");
|
|
62
|
+
const details = this.parseLogContent(logContent, result.jobId);
|
|
63
|
+
allDetails.push(...details);
|
|
64
|
+
} catch (_error: unknown) {
|
|
65
|
+
allDetails.push(`(Could not read log file: ${logPath})`);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return allDetails.length > 0
|
|
70
|
+
? allDetails
|
|
71
|
+
: [result.message ?? "Unknown error"];
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
private parseLogContent(logContent: string, jobId: string): string[] {
|
|
75
|
+
const _lines = logContent.split("\n");
|
|
76
|
+
const details: string[] = [];
|
|
77
|
+
|
|
78
|
+
// Check if this is a review log
|
|
79
|
+
if (jobId.startsWith("review:")) {
|
|
80
|
+
// Look for parsed violations section (formatted output)
|
|
81
|
+
// Use regex to be flexible about adapter name in parentheses
|
|
82
|
+
// Matches: "--- Parsed Result ---" or "--- Parsed Result (adapter) ---"
|
|
83
|
+
const parsedResultRegex = /---\s*Parsed Result(?:\s+\(([^)]+)\))?\s*---/;
|
|
84
|
+
const match = logContent.match(parsedResultRegex);
|
|
85
|
+
|
|
86
|
+
if (match && match.index !== undefined) {
|
|
87
|
+
const violationsStart = match.index;
|
|
88
|
+
const violationsSection = logContent.substring(violationsStart);
|
|
89
|
+
const sectionLines = violationsSection.split("\n");
|
|
90
|
+
|
|
91
|
+
for (let i = 0; i < sectionLines.length; i++) {
|
|
92
|
+
const line = sectionLines[i];
|
|
93
|
+
// Match numbered violation lines: "1. file:line - issue" (line can be a number or '?')
|
|
94
|
+
const violationMatch = line.match(
|
|
95
|
+
/^\d+\.\s+(.+?):(\d+|\?)\s+-\s+(.+)$/,
|
|
96
|
+
);
|
|
97
|
+
if (violationMatch) {
|
|
98
|
+
const file = violationMatch[1];
|
|
99
|
+
const lineNum = violationMatch[2];
|
|
100
|
+
const issue = violationMatch[3];
|
|
101
|
+
details.push(
|
|
102
|
+
` ${chalk.cyan(file)}:${chalk.yellow(lineNum)} - ${issue}`,
|
|
103
|
+
);
|
|
104
|
+
|
|
105
|
+
// Check next line for "Fix:" suggestion
|
|
106
|
+
if (i + 1 < sectionLines.length) {
|
|
107
|
+
const nextLine = sectionLines[i + 1].trim();
|
|
108
|
+
if (nextLine.startsWith("Fix:")) {
|
|
109
|
+
const fix = nextLine.substring(4).trim();
|
|
110
|
+
details.push(` ${chalk.dim("Fix:")} ${fix}`);
|
|
111
|
+
i++; // Skip the fix line
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// If no parsed violations, look for JSON violations (handles both minified and pretty-printed)
|
|
119
|
+
if (details.length === 0) {
|
|
120
|
+
// Find the first '{' and last '}' to extract JSON object
|
|
121
|
+
const jsonStart = logContent.indexOf("{");
|
|
122
|
+
const jsonEnd = logContent.lastIndexOf("}");
|
|
123
|
+
if (jsonStart !== -1 && jsonEnd !== -1 && jsonEnd > jsonStart) {
|
|
124
|
+
try {
|
|
125
|
+
const jsonStr = logContent.substring(jsonStart, jsonEnd + 1);
|
|
126
|
+
const json = JSON.parse(jsonStr);
|
|
127
|
+
if (
|
|
128
|
+
json.status === "fail" &&
|
|
129
|
+
json.violations &&
|
|
130
|
+
Array.isArray(json.violations)
|
|
131
|
+
) {
|
|
132
|
+
json.violations.forEach(
|
|
133
|
+
(v: {
|
|
134
|
+
file?: string;
|
|
135
|
+
line?: number | string;
|
|
136
|
+
issue?: string;
|
|
137
|
+
fix?: string;
|
|
138
|
+
}) => {
|
|
139
|
+
const file = v.file || "unknown";
|
|
140
|
+
const line = v.line || "?";
|
|
141
|
+
const issue = v.issue || "Unknown issue";
|
|
142
|
+
details.push(
|
|
143
|
+
` ${chalk.cyan(file)}:${chalk.yellow(line)} - ${issue}`,
|
|
144
|
+
);
|
|
145
|
+
if (v.fix) {
|
|
146
|
+
details.push(` ${chalk.dim("Fix:")} ${v.fix}`);
|
|
147
|
+
}
|
|
148
|
+
},
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
} catch {
|
|
152
|
+
// JSON parse failed, fall through to other parsing
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// If still no details, look for error messages
|
|
158
|
+
if (details.length === 0) {
|
|
159
|
+
// Try to find the actual error message (first non-empty line after "Error:")
|
|
160
|
+
const errorIndex = logContent.indexOf("Error:");
|
|
161
|
+
if (errorIndex !== -1) {
|
|
162
|
+
const afterError = logContent.substring(errorIndex + 6).trim();
|
|
163
|
+
const firstErrorLine = afterError.split("\n")[0].trim();
|
|
164
|
+
if (
|
|
165
|
+
firstErrorLine &&
|
|
166
|
+
!firstErrorLine.startsWith("Usage:") &&
|
|
167
|
+
!firstErrorLine.startsWith("Commands:")
|
|
168
|
+
) {
|
|
169
|
+
details.push(` ${firstErrorLine}`);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Also check for "Result: error" lines
|
|
174
|
+
if (details.length === 0) {
|
|
175
|
+
const resultMatch = logContent.match(
|
|
176
|
+
/Result:\s*error(?:\s*-\s*(.+?))?(?:\n|$)/,
|
|
177
|
+
);
|
|
178
|
+
if (resultMatch?.[1]) {
|
|
179
|
+
details.push(` ${resultMatch[1]}`);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
} else {
|
|
184
|
+
// This is a check log
|
|
185
|
+
// Look for STDERR section
|
|
186
|
+
const stderrStart = logContent.indexOf("STDERR:");
|
|
187
|
+
if (stderrStart !== -1) {
|
|
188
|
+
const stderrSection = logContent.substring(stderrStart + 7).trim();
|
|
189
|
+
const stderrLines = stderrSection.split("\n").filter((line) => {
|
|
190
|
+
// Skip empty lines and command output markers
|
|
191
|
+
return (
|
|
192
|
+
line.trim() &&
|
|
193
|
+
!line.includes("STDOUT:") &&
|
|
194
|
+
!line.includes("Command failed:") &&
|
|
195
|
+
!line.includes("Result:")
|
|
196
|
+
);
|
|
197
|
+
});
|
|
198
|
+
if (stderrLines.length > 0) {
|
|
199
|
+
details.push(
|
|
200
|
+
...stderrLines.slice(0, 10).map((line) => ` ${line.trim()}`),
|
|
201
|
+
);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// If no STDERR, look for error messages
|
|
206
|
+
if (details.length === 0) {
|
|
207
|
+
const errorMatch = logContent.match(/Command failed:\s*(.+?)(?:\n|$)/);
|
|
208
|
+
if (errorMatch) {
|
|
209
|
+
details.push(` ${errorMatch[1]}`);
|
|
210
|
+
} else {
|
|
211
|
+
// Look for any line with "Result: fail" or "Result: error"
|
|
212
|
+
const resultMatch = logContent.match(
|
|
213
|
+
/Result:\s*(fail|error)\s*-\s*(.+?)(?:\n|$)/,
|
|
214
|
+
);
|
|
215
|
+
if (resultMatch) {
|
|
216
|
+
details.push(` ${resultMatch[2]}`);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// If we still have no details, use the message from the result
|
|
223
|
+
if (details.length === 0) {
|
|
224
|
+
details.push(" (See log file for details)");
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
return details;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
private printFailureDetails(result: GateResult, details: string[]) {
|
|
231
|
+
const statusColor = result.status === "error" ? chalk.magenta : chalk.red;
|
|
232
|
+
const statusLabel = result.status === "error" ? "ERROR" : "FAIL";
|
|
233
|
+
|
|
234
|
+
console.log(statusColor(`[${statusLabel}] ${result.jobId}`));
|
|
235
|
+
if (result.message) {
|
|
236
|
+
console.log(chalk.dim(` Summary: ${result.message}`));
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
if (details.length > 0) {
|
|
240
|
+
console.log(chalk.dim(" Details:"));
|
|
241
|
+
details.forEach((detail) => {
|
|
242
|
+
console.log(detail);
|
|
243
|
+
});
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
if (result.logPaths && result.logPaths.length > 0) {
|
|
247
|
+
result.logPaths.forEach((p) => {
|
|
248
|
+
console.log(chalk.dim(` Log: ${p}`));
|
|
249
|
+
});
|
|
250
|
+
} else if (result.logPath) {
|
|
251
|
+
console.log(chalk.dim(` Log: ${result.logPath}`));
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
console.log(""); // Empty line between failures
|
|
255
|
+
}
|
|
217
256
|
}
|