agent-gauntlet 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +3 -3
  2. package/package.json +1 -1
  3. package/src/cli-adapters/claude.ts +13 -1
  4. package/src/cli-adapters/gemini.ts +17 -2
  5. package/src/commands/check.ts +98 -12
  6. package/src/commands/ci/list-jobs.ts +3 -2
  7. package/src/commands/clean.ts +29 -0
  8. package/src/commands/help.ts +1 -1
  9. package/src/commands/index.ts +1 -1
  10. package/src/commands/init.ts +4 -4
  11. package/src/commands/review.ts +98 -12
  12. package/src/commands/run.ts +98 -12
  13. package/src/commands/shared.ts +56 -10
  14. package/src/config/schema.ts +4 -0
  15. package/src/config/validator.ts +6 -13
  16. package/src/core/change-detector.ts +1 -0
  17. package/src/core/entry-point.ts +48 -7
  18. package/src/core/runner.ts +57 -47
  19. package/src/gates/result.ts +32 -0
  20. package/src/gates/review.ts +323 -51
  21. package/src/index.ts +2 -2
  22. package/src/output/console.ts +96 -9
  23. package/src/output/logger.ts +40 -7
  24. package/src/templates/run_gauntlet.template.md +20 -13
  25. package/src/utils/log-parser.ts +409 -165
  26. package/src/utils/session-ref.ts +82 -0
  27. package/src/commands/check.test.ts +0 -29
  28. package/src/commands/detect.test.ts +0 -43
  29. package/src/commands/health.test.ts +0 -93
  30. package/src/commands/help.test.ts +0 -44
  31. package/src/commands/init.test.ts +0 -130
  32. package/src/commands/list.test.ts +0 -121
  33. package/src/commands/rerun.ts +0 -160
  34. package/src/commands/review.test.ts +0 -31
  35. package/src/commands/run.test.ts +0 -27
  36. package/src/config/loader.test.ts +0 -151
  37. package/src/core/entry-point.test.ts +0 -61
  38. package/src/gates/review.test.ts +0 -291
@@ -1,26 +1,71 @@
1
1
  import fs from "node:fs/promises";
2
2
  import path from "node:path";
3
+ import { clearSessionRef } from "../utils/session-ref";
3
4
 
4
- export async function exists(path: string): Promise<boolean> {
5
+ const LOCK_FILENAME = ".gauntlet-run.lock";
6
+
7
+ export async function exists(filePath: string): Promise<boolean> {
5
8
  try {
6
- await fs.stat(path);
9
+ await fs.stat(filePath);
7
10
  return true;
8
11
  } catch {
9
12
  return false;
10
13
  }
11
14
  }
12
15
 
13
- export async function rotateLogs(logDir: string): Promise<void> {
16
+ export async function acquireLock(logDir: string): Promise<void> {
17
+ await fs.mkdir(logDir, { recursive: true });
18
+ const lockPath = path.resolve(logDir, LOCK_FILENAME);
19
+ try {
20
+ await fs.writeFile(lockPath, String(process.pid), { flag: "wx" });
21
+ } catch (err: unknown) {
22
+ if (
23
+ typeof err === "object" &&
24
+ err !== null &&
25
+ "code" in err &&
26
+ (err as { code: string }).code === "EEXIST"
27
+ ) {
28
+ console.error(
29
+ `Error: A gauntlet run is already in progress (lock file: ${lockPath}).`,
30
+ );
31
+ console.error(
32
+ "If no run is actually in progress, delete the lock file manually.",
33
+ );
34
+ process.exit(1);
35
+ }
36
+ throw err;
37
+ }
38
+ }
39
+
40
+ export async function releaseLock(logDir: string): Promise<void> {
41
+ const lockPath = path.resolve(logDir, LOCK_FILENAME);
42
+ try {
43
+ await fs.rm(lockPath, { force: true });
44
+ } catch {
45
+ // no-op if missing
46
+ }
47
+ }
48
+
49
+ export async function hasExistingLogs(logDir: string): Promise<boolean> {
50
+ try {
51
+ const entries = await fs.readdir(logDir);
52
+ return entries.some(
53
+ (f) => (f.endsWith(".log") || f.endsWith(".json")) && f !== "previous",
54
+ );
55
+ } catch {
56
+ return false;
57
+ }
58
+ }
59
+
60
+ export async function cleanLogs(logDir: string): Promise<void> {
14
61
  const previousDir = path.join(logDir, "previous");
15
62
 
16
63
  try {
17
- // 1. Ensure logDir exists (if not, nothing to rotate, but we should create it for future use if needed,
18
- // though usually the logger creates it. If it doesn't exist, we can just return).
19
64
  if (!(await exists(logDir))) {
20
65
  return;
21
66
  }
22
67
 
23
- // 2. Clear gauntlet_logs/previous if it exists
68
+ // 1. Delete all files in previous/
24
69
  if (await exists(previousDir)) {
25
70
  const previousFiles = await fs.readdir(previousDir);
26
71
  await Promise.all(
@@ -32,19 +77,20 @@ export async function rotateLogs(logDir: string): Promise<void> {
32
77
  await fs.mkdir(previousDir, { recursive: true });
33
78
  }
34
79
 
35
- // 3. Move all existing files in gauntlet_logs/ to gauntlet_logs/previous
80
+ // 2. Move all .log and .json files from logDir root into previous/
36
81
  const files = await fs.readdir(logDir);
37
82
  await Promise.all(
38
83
  files
39
- .filter((file) => file !== "previous")
84
+ .filter((file) => file.endsWith(".log") || file.endsWith(".json"))
40
85
  .map((file) =>
41
86
  fs.rename(path.join(logDir, file), path.join(previousDir, file)),
42
87
  ),
43
88
  );
89
+
90
+ await clearSessionRef(logDir);
44
91
  } catch (error) {
45
- // Log warning but don't crash the run as log rotation failure isn't critical
46
92
  console.warn(
47
- "Failed to rotate logs in",
93
+ "Failed to clean logs in",
48
94
  logDir,
49
95
  ":",
50
96
  error instanceof Error ? error.message : error,
@@ -51,6 +51,7 @@ export const reviewPromptFrontmatterSchema = z.object({
51
51
 
52
52
  export const entryPointSchema = z.object({
53
53
  path: z.string().min(1),
54
+ exclude: z.array(z.string().min(1)).optional(),
54
55
  checks: z.array(z.string().min(1)).optional(),
55
56
  reviews: z.array(z.string().min(1)).optional(),
56
57
  });
@@ -59,6 +60,9 @@ export const gauntletConfigSchema = z.object({
59
60
  base_branch: z.string().min(1).default("origin/main"),
60
61
  log_dir: z.string().min(1).default("gauntlet_logs"),
61
62
  allow_parallel: z.boolean().default(true),
63
+ rerun_new_issue_threshold: z
64
+ .enum(["critical", "high", "medium", "low"])
65
+ .default("high"),
62
66
  cli: cliConfigSchema,
63
67
  entry_points: z.array(entryPointSchema).min(1),
64
68
  });
@@ -108,12 +108,13 @@ export async function validateConfig(
108
108
  if (file.endsWith(".yml") || file.endsWith(".yaml")) {
109
109
  const filePath = path.join(checksPath, file);
110
110
  filesChecked.push(filePath);
111
+ const name = path.basename(file, path.extname(file));
111
112
  try {
112
113
  const content = await fs.readFile(filePath, "utf-8");
113
114
  const raw = YAML.parse(content);
114
115
  const parsed = checkGateSchema.parse(raw);
115
- existingCheckNames.add(parsed.name); // Track that this check exists
116
- checks[parsed.name] = parsed;
116
+ existingCheckNames.add(name); // Track that this check exists
117
+ checks[name] = parsed;
117
118
 
118
119
  // Semantic validation
119
120
  if (!parsed.command || parsed.command.trim() === "") {
@@ -125,17 +126,9 @@ export async function validateConfig(
125
126
  });
126
127
  }
127
128
  } catch (error: unknown) {
128
- // Try to extract check name from raw YAML even if parsing failed
129
- try {
130
- const content = await fs.readFile(filePath, "utf-8");
131
- const raw = YAML.parse(content);
132
- if (raw.name && typeof raw.name === "string") {
133
- existingCheckNames.add(raw.name); // Track that this check file exists
134
- }
135
- } catch {
136
- // If we can't even parse the name, that's okay - we'll just skip tracking it
137
- }
138
-
129
+ // Track that this check file exists even if parsing failed
130
+ // Use filename-based name since name is no longer in YAML
131
+ existingCheckNames.add(name);
139
132
  if (error instanceof ZodError) {
140
133
  error.errors.forEach((err) => {
141
134
  issues.push({
@@ -6,6 +6,7 @@ const execAsync = promisify(exec);
6
6
  export interface ChangeDetectorOptions {
7
7
  commit?: string; // If provided, get diff for this commit vs its parent
8
8
  uncommitted?: boolean; // If true, only get uncommitted changes (staged + unstaged)
9
+ fixBase?: string; // If provided, get diff from this ref to current working tree
9
10
  }
10
11
 
11
12
  export class ChangeDetector {
@@ -1,5 +1,6 @@
1
1
  import fs from "node:fs/promises";
2
2
  import path from "node:path";
3
+ import { Glob } from "bun";
3
4
  import type { EntryPointConfig } from "../config/types.js";
4
5
 
5
6
  export interface ExpandedEntryPoint {
@@ -16,24 +17,37 @@ export class EntryPointExpander {
16
17
  const rootEntryPoint = entryPoints.find((ep) => ep.path === ".");
17
18
 
18
19
  // Always include root entry point if configured and there are ANY changes
19
- // Or should it only run if files match root patterns?
20
- // Spec says: "A root entry point always exists and applies to repository-wide gates."
21
- // Usually root gates run on any change or specific files in root.
22
- // For simplicity, if root is configured, we'll include it if there are any changed files.
23
20
  if (changedFiles.length > 0) {
24
21
  const rootConfig = rootEntryPoint ?? { path: "." };
25
- results.push({ path: ".", config: rootConfig });
22
+ // Apply exclusion filtering for root if configured
23
+ const filteredRootChanges = this.filterExcludedFiles(
24
+ changedFiles,
25
+ rootConfig.exclude,
26
+ );
27
+
28
+ if (filteredRootChanges.length > 0) {
29
+ results.push({ path: ".", config: rootConfig });
30
+ }
26
31
  }
27
32
 
28
33
  for (const ep of entryPoints) {
29
34
  if (ep.path === ".") continue; // Handled above
30
35
 
36
+ // Apply exclusion filtering first!
37
+ const filteredChanges = this.filterExcludedFiles(
38
+ changedFiles,
39
+ ep.exclude,
40
+ );
41
+
42
+ // If no relevant files remain, skip this entry point
43
+ if (filteredChanges.length === 0) continue;
44
+
31
45
  if (ep.path.endsWith("*")) {
32
46
  // Wildcard directory (e.g., "engines/*")
33
47
  const parentDir = ep.path.slice(0, -2); // "engines"
34
48
  const expandedPaths = await this.expandWildcard(
35
49
  parentDir,
36
- changedFiles,
50
+ filteredChanges,
37
51
  );
38
52
 
39
53
  for (const subDir of expandedPaths) {
@@ -44,7 +58,7 @@ export class EntryPointExpander {
44
58
  }
45
59
  } else {
46
60
  // Fixed directory (e.g., "apps/api")
47
- if (this.hasChangesInDir(ep.path, changedFiles)) {
61
+ if (this.hasChangesInDir(ep.path, filteredChanges)) {
48
62
  results.push({
49
63
  path: ep.path,
50
64
  config: ep,
@@ -81,6 +95,33 @@ export class EntryPointExpander {
81
95
  return results;
82
96
  }
83
97
 
98
+ private filterExcludedFiles(files: string[], patterns?: string[]): string[] {
99
+ if (!patterns || patterns.length === 0) {
100
+ return files;
101
+ }
102
+
103
+ // Pre-compile globs
104
+ const globs: Glob[] = [];
105
+ const prefixes: string[] = [];
106
+
107
+ for (const pattern of patterns) {
108
+ if (pattern.match(/[*?[{]/)) {
109
+ globs.push(new Glob(pattern));
110
+ } else {
111
+ prefixes.push(pattern);
112
+ }
113
+ }
114
+
115
+ return files.filter((file) => {
116
+ // If matches ANY pattern, exclude it
117
+ const isExcluded =
118
+ prefixes.some((p) => file === p || file.startsWith(`${p}/`)) ||
119
+ globs.some((g) => g.match(file));
120
+
121
+ return !isExcluded;
122
+ });
123
+ }
124
+
84
125
  private async expandWildcard(
85
126
  parentDir: string,
86
127
  changedFiles: string[],
@@ -64,7 +64,7 @@ export class Runner {
64
64
 
65
65
  await Promise.all([...parallelPromises, sequentialPromise]);
66
66
 
67
- await this.reporter.printSummary(this.results);
67
+ await this.reporter.printSummary(this.results, this.config.project.log_dir);
68
68
 
69
69
  return this.results.every((r) => r.status === "pass");
70
70
  }
@@ -76,36 +76,47 @@ export class Runner {
76
76
 
77
77
  let result: GateResult;
78
78
 
79
- if (job.type === "check") {
80
- const logPath = this.logger.getLogPath(job.id);
81
- const jobLogger = await this.logger.createJobLogger(job.id);
82
- const effectiveBaseBranch =
83
- this.baseBranchOverride || this.config.project.base_branch;
84
- result = await this.checkExecutor.execute(
85
- job.id,
86
- job.gateConfig as LoadedCheckGateConfig,
87
- job.workingDirectory,
88
- jobLogger,
89
- effectiveBaseBranch,
90
- );
91
- result.logPath = logPath;
92
- } else {
93
- // Use sanitized Job ID for lookup because that's what log-parser uses (based on filenames)
94
- const safeJobId = sanitizeJobId(job.id);
95
- const previousFailures = this.previousFailuresMap?.get(safeJobId);
96
- const loggerFactory = this.logger.createLoggerFactory(job.id);
97
- const effectiveBaseBranch =
98
- this.baseBranchOverride || this.config.project.base_branch;
99
- result = await this.reviewExecutor.execute(
100
- job.id,
101
- job.gateConfig as ReviewGateConfig & ReviewPromptFrontmatter,
102
- job.entryPoint,
103
- loggerFactory,
104
- effectiveBaseBranch,
105
- previousFailures,
106
- this.changeOptions,
107
- this.config.project.cli.check_usage_limit,
108
- );
79
+ try {
80
+ if (job.type === "check") {
81
+ const logPath = await this.logger.getLogPath(job.id);
82
+ const jobLogger = await this.logger.createJobLogger(job.id);
83
+ const effectiveBaseBranch =
84
+ this.baseBranchOverride || this.config.project.base_branch;
85
+ result = await this.checkExecutor.execute(
86
+ job.id,
87
+ job.gateConfig as LoadedCheckGateConfig,
88
+ job.workingDirectory,
89
+ jobLogger,
90
+ effectiveBaseBranch,
91
+ );
92
+ result.logPath = logPath;
93
+ } else {
94
+ // Use sanitized Job ID for lookup because that's what log-parser uses (based on filenames)
95
+ const safeJobId = sanitizeJobId(job.id);
96
+ const previousFailures = this.previousFailuresMap?.get(safeJobId);
97
+ const loggerFactory = this.logger.createLoggerFactory(job.id);
98
+ const effectiveBaseBranch =
99
+ this.baseBranchOverride || this.config.project.base_branch;
100
+ result = await this.reviewExecutor.execute(
101
+ job.id,
102
+ job.gateConfig as ReviewGateConfig & ReviewPromptFrontmatter,
103
+ job.entryPoint,
104
+ loggerFactory,
105
+ effectiveBaseBranch,
106
+ previousFailures,
107
+ this.changeOptions,
108
+ this.config.project.cli.check_usage_limit,
109
+ this.config.project.rerun_new_issue_threshold,
110
+ );
111
+ }
112
+ } catch (err) {
113
+ console.error("[ERROR] Execution failed for", job.id, ":", err);
114
+ result = {
115
+ jobId: job.id,
116
+ status: "error",
117
+ duration: 0,
118
+ message: err instanceof Error ? err.message : String(err),
119
+ };
109
120
  }
110
121
 
111
122
  this.results.push(result);
@@ -136,9 +147,9 @@ export class Runner {
136
147
  (job.gateConfig as LoadedCheckGateConfig).command,
137
148
  );
138
149
  if (!commandName) {
139
- preflightResults.push(
140
- await this.recordPreflightFailure(job, "Unable to parse command"),
141
- );
150
+ const msg = "Unable to parse command";
151
+ console.error(`[PREFLIGHT] ${job.id}: ${msg}`);
152
+ preflightResults.push(await this.recordPreflightFailure(job, msg));
142
153
  if (this.shouldFailFast(job)) this.shouldStop = true;
143
154
  continue;
144
155
  }
@@ -148,12 +159,9 @@ export class Runner {
148
159
  job.workingDirectory,
149
160
  );
150
161
  if (!available) {
151
- preflightResults.push(
152
- await this.recordPreflightFailure(
153
- job,
154
- `Missing command: ${commandName}`,
155
- ),
156
- );
162
+ const msg = `Missing command: ${commandName}`;
163
+ console.error(`[PREFLIGHT] ${job.id}: ${msg}`);
164
+ preflightResults.push(await this.recordPreflightFailure(job, msg));
157
165
  if (this.shouldFailFast(job)) this.shouldStop = true;
158
166
  continue;
159
167
  }
@@ -172,12 +180,9 @@ export class Runner {
172
180
  }
173
181
 
174
182
  if (availableTools.length < required) {
175
- preflightResults.push(
176
- await this.recordPreflightFailure(
177
- job,
178
- `Missing CLI tools: need ${required}, found ${availableTools.length}`,
179
- ),
180
- );
183
+ const msg = `Missing CLI tools: need ${required}, found ${availableTools.length} (${availableTools.join(", ") || "none"})`;
184
+ console.error(`[PREFLIGHT] ${job.id}: ${msg}`);
185
+ preflightResults.push(await this.recordPreflightFailure(job, msg));
181
186
  if (this.shouldFailFast(job)) this.shouldStop = true;
182
187
  continue;
183
188
  }
@@ -194,7 +199,7 @@ export class Runner {
194
199
  message: string,
195
200
  ): Promise<GateResult> {
196
201
  if (job.type === "check") {
197
- const logPath = this.logger.getLogPath(job.id);
202
+ const logPath = await this.logger.getLogPath(job.id);
198
203
  const jobLogger = await this.logger.createJobLogger(job.id);
199
204
  await jobLogger(
200
205
  `[${new Date().toISOString()}] Health check failed\n${message}\n`,
@@ -222,6 +227,11 @@ export class Runner {
222
227
  const health = await adapter.checkHealth({
223
228
  checkUsageLimit: this.config.project.cli.check_usage_limit,
224
229
  });
230
+ if (health.status !== "healthy") {
231
+ console.log(
232
+ `[DEBUG] Adapter ${name} check failed: ${health.status} - ${health.message}`,
233
+ );
234
+ }
225
235
  return health.status === "healthy";
226
236
  }
227
237
 
@@ -1,5 +1,23 @@
1
1
  export type GateStatus = "pass" | "fail" | "error";
2
2
 
3
+ export interface PreviousViolation {
4
+ file: string;
5
+ line: number | string;
6
+ issue: string;
7
+ fix?: string;
8
+ priority?: "critical" | "high" | "medium" | "low";
9
+ status?: "new" | "fixed" | "skipped";
10
+ result?: string | null;
11
+ }
12
+
13
+ export interface ReviewFullJsonOutput {
14
+ adapter: string;
15
+ timestamp: string;
16
+ status: "pass" | "fail" | "error";
17
+ rawOutput: string;
18
+ violations: PreviousViolation[];
19
+ }
20
+
3
21
  export interface GateResult {
4
22
  jobId: string;
5
23
  status: GateStatus;
@@ -8,10 +26,24 @@ export interface GateResult {
8
26
  logPath?: string; // path to full log
9
27
  logPaths?: string[]; // paths to multiple logs (e.g. per-agent logs)
10
28
  fixInstructions?: string; // Markdown content for fixing failures
29
+ errorCount?: number; // Number of active failures/violations
30
+ skipped?: Array<{
31
+ file: string;
32
+ line: number | string;
33
+ issue: string;
34
+ result?: string | null;
35
+ }>;
11
36
  subResults?: Array<{
12
37
  nameSuffix: string;
13
38
  status: GateStatus;
14
39
  message: string;
15
40
  logPath?: string;
41
+ errorCount?: number;
42
+ skipped?: Array<{
43
+ file: string;
44
+ line: number | string;
45
+ issue: string;
46
+ result?: string | null;
47
+ }>;
16
48
  }>;
17
49
  }