agent-gauntlet 0.1.10 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/package.json +4 -2
- package/src/cli-adapters/claude.ts +139 -108
- package/src/cli-adapters/codex.ts +141 -117
- package/src/cli-adapters/cursor.ts +152 -0
- package/src/cli-adapters/gemini.ts +171 -139
- package/src/cli-adapters/github-copilot.ts +153 -0
- package/src/cli-adapters/index.ts +77 -48
- package/src/commands/check.test.ts +24 -20
- package/src/commands/check.ts +65 -59
- package/src/commands/detect.test.ts +38 -32
- package/src/commands/detect.ts +74 -61
- package/src/commands/health.test.ts +67 -53
- package/src/commands/health.ts +167 -145
- package/src/commands/help.test.ts +37 -37
- package/src/commands/help.ts +30 -22
- package/src/commands/index.ts +9 -9
- package/src/commands/init.test.ts +118 -107
- package/src/commands/init.ts +514 -417
- package/src/commands/list.test.ts +87 -70
- package/src/commands/list.ts +28 -24
- package/src/commands/rerun.ts +142 -119
- package/src/commands/review.test.ts +26 -20
- package/src/commands/review.ts +65 -59
- package/src/commands/run.test.ts +22 -20
- package/src/commands/run.ts +64 -58
- package/src/commands/shared.ts +44 -35
- package/src/config/loader.test.ts +112 -90
- package/src/config/loader.ts +132 -123
- package/src/config/schema.ts +49 -47
- package/src/config/types.ts +15 -13
- package/src/config/validator.ts +521 -454
- package/src/core/change-detector.ts +122 -104
- package/src/core/entry-point.test.ts +60 -62
- package/src/core/entry-point.ts +76 -67
- package/src/core/job.ts +69 -59
- package/src/core/runner.ts +261 -230
- package/src/gates/check.ts +78 -69
- package/src/gates/result.ts +7 -7
- package/src/gates/review.test.ts +174 -138
- package/src/gates/review.ts +716 -561
- package/src/index.ts +16 -15
- package/src/output/console.ts +253 -214
- package/src/output/logger.ts +64 -52
- package/src/templates/run_gauntlet.template.md +18 -0
- package/src/utils/diff-parser.ts +64 -62
- package/src/utils/log-parser.ts +227 -206
- package/src/utils/sanitizer.ts +1 -1
package/src/output/logger.ts
CHANGED
|
@@ -1,68 +1,80 @@
|
|
|
1
|
-
import fs from
|
|
2
|
-
import path from
|
|
3
|
-
import { sanitizeJobId } from
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { sanitizeJobId } from "../utils/sanitizer.js";
|
|
4
4
|
|
|
5
5
|
function formatTimestamp(): string {
|
|
6
|
-
|
|
6
|
+
return new Date().toISOString();
|
|
7
7
|
}
|
|
8
8
|
|
|
9
9
|
export class Logger {
|
|
10
|
-
|
|
10
|
+
private initializedFiles: Set<string> = new Set();
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
constructor(private logDir: string) {}
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
14
|
+
async init() {
|
|
15
|
+
await fs.mkdir(this.logDir, { recursive: true });
|
|
16
|
+
}
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
18
|
+
async close() {
|
|
19
|
+
// No-op - using append mode
|
|
20
|
+
}
|
|
21
21
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
22
|
+
getLogPath(jobId: string, adapterName?: string): string {
|
|
23
|
+
const safeName = sanitizeJobId(jobId);
|
|
24
|
+
if (adapterName) {
|
|
25
|
+
return path.join(this.logDir, `${safeName}_${adapterName}.log`);
|
|
26
|
+
}
|
|
27
|
+
return path.join(this.logDir, `${safeName}.log`);
|
|
28
|
+
}
|
|
29
29
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
30
|
+
private async initFile(logPath: string): Promise<void> {
|
|
31
|
+
if (!this.initializedFiles.has(logPath)) {
|
|
32
|
+
await fs.writeFile(logPath, "");
|
|
33
|
+
this.initializedFiles.add(logPath);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
36
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
37
|
+
async createJobLogger(
|
|
38
|
+
jobId: string,
|
|
39
|
+
): Promise<(text: string) => Promise<void>> {
|
|
40
|
+
const logPath = this.getLogPath(jobId);
|
|
41
|
+
await this.initFile(logPath);
|
|
40
42
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
43
|
+
return async (text: string) => {
|
|
44
|
+
const timestamp = formatTimestamp();
|
|
45
|
+
const lines = text.split("\n");
|
|
46
|
+
if (lines.length > 0) {
|
|
47
|
+
lines[0] = `[${timestamp}] ${lines[0]}`;
|
|
48
|
+
}
|
|
49
|
+
await fs.appendFile(
|
|
50
|
+
logPath,
|
|
51
|
+
lines.join("\n") + (text.endsWith("\n") ? "" : "\n"),
|
|
52
|
+
);
|
|
53
|
+
};
|
|
54
|
+
}
|
|
50
55
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
56
|
+
createLoggerFactory(
|
|
57
|
+
jobId: string,
|
|
58
|
+
): (
|
|
59
|
+
adapterName?: string,
|
|
60
|
+
) => Promise<{ logger: (text: string) => Promise<void>; logPath: string }> {
|
|
61
|
+
return async (adapterName?: string) => {
|
|
62
|
+
const logPath = this.getLogPath(jobId, adapterName);
|
|
63
|
+
await this.initFile(logPath);
|
|
55
64
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
65
|
+
const logger = async (text: string) => {
|
|
66
|
+
const timestamp = formatTimestamp();
|
|
67
|
+
const lines = text.split("\n");
|
|
68
|
+
if (lines.length > 0) {
|
|
69
|
+
lines[0] = `[${timestamp}] ${lines[0]}`;
|
|
70
|
+
}
|
|
71
|
+
await fs.appendFile(
|
|
72
|
+
logPath,
|
|
73
|
+
lines.join("\n") + (text.endsWith("\n") ? "" : "\n"),
|
|
74
|
+
);
|
|
75
|
+
};
|
|
64
76
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
77
|
+
return { logger, logPath };
|
|
78
|
+
};
|
|
79
|
+
}
|
|
68
80
|
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Run the full verification gauntlet
|
|
3
|
+
allowed-tools: Bash
|
|
4
|
+
---
|
|
5
|
+
# /gauntlet
|
|
6
|
+
Execute the autonomous verification suite.
|
|
7
|
+
|
|
8
|
+
1. Run `agent-gauntlet run`.
|
|
9
|
+
2. If it fails, read the log files in `.gauntlet_logs/` to understand exactly what went wrong.
|
|
10
|
+
3. Fix any code or logic errors found by the tools or AI reviewers, prioritizing higher-priority violations (critical > high > medium > low).
|
|
11
|
+
4. If you disagree with AI reviewer feedback, briefly explain your reasoning in the code comments rather than ignoring it silently.
|
|
12
|
+
5. Do NOT commit your changes yet—keep them uncommitted so the rerun command can review them.
|
|
13
|
+
6. Run `agent-gauntlet rerun` to verify your fixes. The rerun command reviews only uncommitted changes and uses previous failures as context.
|
|
14
|
+
7. Repeat steps 2-6 until one of the following termination conditions is met:
|
|
15
|
+
- All gates pass
|
|
16
|
+
- You disagree with remaining failures (ask the human how to proceed)
|
|
17
|
+
- Still failing after 3 rerun attempts
|
|
18
|
+
8. Once all gates pass, do NOT commit or push your changes—await the human's review and explicit instruction to commit.
|
package/src/utils/diff-parser.ts
CHANGED
|
@@ -5,82 +5,84 @@ export type DiffFileRange = Set<number>;
|
|
|
5
5
|
* Valid line numbers are those that appear in the diff as added or modified lines.
|
|
6
6
|
*/
|
|
7
7
|
export function parseDiff(diff: string): Map<string, DiffFileRange> {
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
const fileRanges = new Map<string, DiffFileRange>();
|
|
9
|
+
const lines = diff.split("\n");
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
let currentFile: string | null = null;
|
|
12
|
+
let currentRanges: DiffFileRange | null = null;
|
|
13
|
+
let currentLineNumber = 0;
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
if (currentFile.startsWith('.git/')) {
|
|
27
|
-
currentFile = null;
|
|
28
|
-
currentRanges = null;
|
|
29
|
-
continue;
|
|
30
|
-
}
|
|
15
|
+
for (const line of lines) {
|
|
16
|
+
// Parse file header: diff --git a/path/to/file b/path/to/file
|
|
17
|
+
if (line.startsWith("diff --git")) {
|
|
18
|
+
const parts = line.split(" ");
|
|
19
|
+
if (parts.length >= 4) {
|
|
20
|
+
// Extract filename from b/path/to/file (target file)
|
|
21
|
+
const targetPath = parts[3];
|
|
22
|
+
// Remove 'b/' prefix
|
|
23
|
+
currentFile = targetPath.startsWith("b/")
|
|
24
|
+
? targetPath.substring(2)
|
|
25
|
+
: targetPath;
|
|
31
26
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
27
|
+
// Skip .git/ paths
|
|
28
|
+
if (currentFile.startsWith(".git/")) {
|
|
29
|
+
currentFile = null;
|
|
30
|
+
currentRanges = null;
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
37
33
|
|
|
38
|
-
|
|
39
|
-
|
|
34
|
+
currentRanges = new Set<number>();
|
|
35
|
+
fileRanges.set(currentFile, currentRanges);
|
|
36
|
+
}
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
40
39
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
const match = line.match(/@@ \-\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/);
|
|
44
|
-
if (match && match[1]) {
|
|
45
|
-
currentLineNumber = parseInt(match[1], 10);
|
|
46
|
-
}
|
|
47
|
-
continue;
|
|
48
|
-
}
|
|
40
|
+
// Skip if we're ignoring this file (e.g. .git/)
|
|
41
|
+
if (!currentFile || !currentRanges) continue;
|
|
49
42
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
}
|
|
59
|
-
// Removed lines (-) do not increment the new line counter
|
|
60
|
-
}
|
|
43
|
+
// Parse hunk header: @@ -old,count +new,count @@
|
|
44
|
+
if (line.startsWith("@@")) {
|
|
45
|
+
const match = line.match(/@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/);
|
|
46
|
+
if (match?.[1]) {
|
|
47
|
+
currentLineNumber = parseInt(match[1], 10);
|
|
48
|
+
}
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
61
51
|
|
|
62
|
-
|
|
52
|
+
// Track added lines
|
|
53
|
+
if (line.startsWith("+") && !line.startsWith("+++")) {
|
|
54
|
+
currentRanges.add(currentLineNumber);
|
|
55
|
+
currentLineNumber++;
|
|
56
|
+
}
|
|
57
|
+
// Track context lines (unchanged) to keep line count correct
|
|
58
|
+
else if (line.startsWith(" ")) {
|
|
59
|
+
currentLineNumber++;
|
|
60
|
+
}
|
|
61
|
+
// Removed lines (-) do not increment the new line counter
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return fileRanges;
|
|
63
65
|
}
|
|
64
66
|
|
|
65
67
|
/**
|
|
66
68
|
* Checks if a violation is valid based on the parsed diff ranges.
|
|
67
69
|
*/
|
|
68
70
|
export function isValidViolationLocation(
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
71
|
+
file: string,
|
|
72
|
+
line: number | undefined,
|
|
73
|
+
diffRanges: Map<string, DiffFileRange> | undefined,
|
|
72
74
|
): boolean {
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
75
|
+
// If no diff ranges provided (e.g. full file review), assume valid
|
|
76
|
+
if (!diffRanges) return true;
|
|
77
|
+
|
|
78
|
+
// Line is required for diff-scoped reviews
|
|
79
|
+
if (line === undefined) return false;
|
|
78
80
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
81
|
+
const validLines = diffRanges.get(file);
|
|
82
|
+
if (!validLines) {
|
|
83
|
+
// File not in diff
|
|
84
|
+
return false;
|
|
85
|
+
}
|
|
84
86
|
|
|
85
|
-
|
|
87
|
+
return validLines.has(line);
|
|
86
88
|
}
|