agent-gauntlet 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +25 -23
  2. package/dist/index.js +9226 -0
  3. package/dist/index.js.map +65 -0
  4. package/dist/scripts/status.js +280 -0
  5. package/dist/scripts/status.js.map +10 -0
  6. package/package.json +22 -8
  7. package/src/built-in-reviews/code-quality.md +0 -25
  8. package/src/built-in-reviews/index.ts +0 -28
  9. package/src/bun-plugins.d.ts +0 -4
  10. package/src/cli-adapters/claude.ts +0 -327
  11. package/src/cli-adapters/codex.ts +0 -290
  12. package/src/cli-adapters/cursor.ts +0 -128
  13. package/src/cli-adapters/gemini.ts +0 -510
  14. package/src/cli-adapters/github-copilot.ts +0 -141
  15. package/src/cli-adapters/index.ts +0 -250
  16. package/src/cli-adapters/thinking-budget.ts +0 -23
  17. package/src/commands/check.ts +0 -311
  18. package/src/commands/ci/index.ts +0 -15
  19. package/src/commands/ci/init.ts +0 -96
  20. package/src/commands/ci/list-jobs.ts +0 -90
  21. package/src/commands/clean.ts +0 -54
  22. package/src/commands/detect.ts +0 -173
  23. package/src/commands/health.ts +0 -169
  24. package/src/commands/help.ts +0 -34
  25. package/src/commands/index.ts +0 -13
  26. package/src/commands/init.ts +0 -1878
  27. package/src/commands/list.ts +0 -33
  28. package/src/commands/review.ts +0 -311
  29. package/src/commands/run.ts +0 -29
  30. package/src/commands/shared.ts +0 -267
  31. package/src/commands/stop-hook.ts +0 -567
  32. package/src/commands/validate.ts +0 -20
  33. package/src/commands/wait-ci.ts +0 -518
  34. package/src/config/ci-loader.ts +0 -33
  35. package/src/config/ci-schema.ts +0 -28
  36. package/src/config/global.ts +0 -87
  37. package/src/config/loader.ts +0 -301
  38. package/src/config/schema.ts +0 -165
  39. package/src/config/stop-hook-config.ts +0 -130
  40. package/src/config/types.ts +0 -65
  41. package/src/config/validator.ts +0 -592
  42. package/src/core/change-detector.ts +0 -137
  43. package/src/core/diff-stats.ts +0 -442
  44. package/src/core/entry-point.ts +0 -190
  45. package/src/core/job.ts +0 -96
  46. package/src/core/run-executor.ts +0 -621
  47. package/src/core/runner.ts +0 -290
  48. package/src/gates/check.ts +0 -118
  49. package/src/gates/resolve-check-command.ts +0 -21
  50. package/src/gates/result.ts +0 -54
  51. package/src/gates/review.ts +0 -1333
  52. package/src/hooks/adapters/claude-stop-hook.ts +0 -99
  53. package/src/hooks/adapters/cursor-stop-hook.ts +0 -122
  54. package/src/hooks/adapters/types.ts +0 -94
  55. package/src/hooks/stop-hook-handler.ts +0 -748
  56. package/src/index.ts +0 -47
  57. package/src/output/app-logger.ts +0 -214
  58. package/src/output/console-log.ts +0 -168
  59. package/src/output/console.ts +0 -359
  60. package/src/output/logger.ts +0 -126
  61. package/src/output/sinks/console-sink.ts +0 -59
  62. package/src/output/sinks/file-sink.ts +0 -110
  63. package/src/scripts/status.ts +0 -433
  64. package/src/templates/workflow.yml +0 -79
  65. package/src/types/gauntlet-status.ts +0 -79
  66. package/src/utils/debug-log.ts +0 -392
  67. package/src/utils/diff-parser.ts +0 -103
  68. package/src/utils/execution-state.ts +0 -472
  69. package/src/utils/log-parser.ts +0 -696
  70. package/src/utils/sanitizer.ts +0 -3
  71. package/src/utils/session-ref.ts +0 -91
@@ -1,141 +0,0 @@
1
- import { exec } from "node:child_process";
2
- import fs from "node:fs/promises";
3
- import os from "node:os";
4
- import path from "node:path";
5
- import { promisify } from "node:util";
6
- import { type CLIAdapter, runStreamingCommand } from "./index.js";
7
-
8
- const execAsync = promisify(exec);
9
- const MAX_BUFFER_BYTES = 10 * 1024 * 1024;
10
-
11
- export class GitHubCopilotAdapter implements CLIAdapter {
12
- name = "github-copilot";
13
-
14
- async isAvailable(): Promise<boolean> {
15
- try {
16
- await execAsync("which copilot");
17
- return true;
18
- } catch {
19
- return false;
20
- }
21
- }
22
-
23
- async checkHealth(): Promise<{
24
- available: boolean;
25
- status: "healthy" | "missing" | "unhealthy";
26
- message?: string;
27
- }> {
28
- const available = await this.isAvailable();
29
- if (!available) {
30
- return {
31
- available: false,
32
- status: "missing",
33
- message: "Command not found",
34
- };
35
- }
36
-
37
- return { available: true, status: "healthy", message: "Ready" };
38
- }
39
-
40
- getProjectCommandDir(): string | null {
41
- // GitHub Copilot CLI does not support custom commands (feature request #618)
42
- return null;
43
- }
44
-
45
- getUserCommandDir(): string | null {
46
- // GitHub Copilot CLI does not support custom commands (feature request #618)
47
- return null;
48
- }
49
-
50
- getProjectSkillDir(): string | null {
51
- return null;
52
- }
53
-
54
- getUserSkillDir(): string | null {
55
- return null;
56
- }
57
-
58
- getCommandExtension(): string {
59
- return ".md";
60
- }
61
-
62
- canUseSymlink(): boolean {
63
- // Not applicable - no command directory support
64
- return false;
65
- }
66
-
67
- transformCommand(markdownContent: string): string {
68
- // Not applicable - no command directory support
69
- return markdownContent;
70
- }
71
-
72
- async execute(opts: {
73
- prompt: string;
74
- diff: string;
75
- model?: string;
76
- timeoutMs?: number;
77
- onOutput?: (chunk: string) => void;
78
- }): Promise<string> {
79
- const fullContent = `${opts.prompt}\n\n--- DIFF ---\n${opts.diff}`;
80
-
81
- const tmpDir = os.tmpdir();
82
- // Include process.pid for uniqueness across concurrent processes
83
- const tmpFile = path.join(
84
- tmpDir,
85
- `gauntlet-copilot-${process.pid}-${Date.now()}.txt`,
86
- );
87
- await fs.writeFile(tmpFile, fullContent);
88
-
89
- // Copilot reads from stdin when no -p flag is provided
90
- // Tool whitelist: cat/grep/ls/find/head/tail are required for the AI to read
91
- // and analyze code files during review. While these tools can access files,
92
- // they are read-only and necessary for code review functionality.
93
- // The copilot CLI is scoped to the repo directory by default.
94
- // git is excluded to prevent access to commit history (review should only see diff).
95
- const args = [
96
- "--allow-tool",
97
- "shell(cat)",
98
- "--allow-tool",
99
- "shell(grep)",
100
- "--allow-tool",
101
- "shell(ls)",
102
- "--allow-tool",
103
- "shell(find)",
104
- "--allow-tool",
105
- "shell(head)",
106
- "--allow-tool",
107
- "shell(tail)",
108
- ];
109
-
110
- const cleanup = () => fs.unlink(tmpFile).catch(() => {});
111
-
112
- // If onOutput callback is provided, use spawn for real-time streaming
113
- if (opts.onOutput) {
114
- return runStreamingCommand({
115
- command: "copilot",
116
- args,
117
- tmpFile,
118
- timeoutMs: opts.timeoutMs,
119
- onOutput: opts.onOutput,
120
- cleanup,
121
- });
122
- }
123
-
124
- // Otherwise use exec for buffered output
125
- // Shell command construction: We use exec() with shell piping instead of execFile()
126
- // because copilot requires stdin input. The tmpFile path is system-controlled
127
- // (os.tmpdir() + Date.now() + process.pid), not user-supplied, eliminating injection risk.
128
- // Double quotes handle paths with spaces. This pattern matches claude.ts:131.
129
- try {
130
- const cmd = `cat "${tmpFile}" | copilot --allow-tool "shell(cat)" --allow-tool "shell(grep)" --allow-tool "shell(ls)" --allow-tool "shell(find)" --allow-tool "shell(head)" --allow-tool "shell(tail)"`;
131
- const { stdout } = await execAsync(cmd, {
132
- timeout: opts.timeoutMs,
133
- maxBuffer: MAX_BUFFER_BYTES,
134
- });
135
- return stdout;
136
- } finally {
137
- // Cleanup errors are intentionally ignored - the tmp file will be cleaned up by OS
138
- await cleanup();
139
- }
140
- }
141
- }
@@ -1,250 +0,0 @@
1
- import { type ChildProcess, spawn } from "node:child_process";
2
- import type { FileHandle } from "node:fs/promises";
3
- import fs from "node:fs/promises";
4
-
5
- export interface CLIAdapterHealth {
6
- available: boolean;
7
- status: "healthy" | "missing" | "unhealthy";
8
- message?: string;
9
- }
10
-
11
- /**
12
- * Collects stderr from a child process and returns a getter for the accumulated output.
13
- * Also forwards each chunk to the optional onOutput callback.
14
- */
15
- export function collectStderr(
16
- child: ChildProcess,
17
- onOutput?: (text: string) => void,
18
- ): () => string {
19
- const chunks: string[] = [];
20
- child.stderr?.on("data", (data: Buffer) => {
21
- const text = data.toString();
22
- chunks.push(text);
23
- onOutput?.(text);
24
- });
25
- return () => chunks.join("");
26
- }
27
-
28
- /**
29
- * Builds an Error for a non-zero process exit, including stdout and stderr if available.
30
- * Both stdout and stderr are included to ensure usage limit messages are captured
31
- * regardless of which stream the CLI writes them to.
32
- */
33
- export function processExitError(
34
- code: number | null,
35
- getStderr: () => string,
36
- getStdout?: () => string,
37
- ): Error {
38
- const stderr = getStderr();
39
- const stdout = getStdout?.() ?? "";
40
- const output = [stdout, stderr].filter(Boolean).join("\n");
41
- return new Error(
42
- `Process exited with code ${code}${output ? `\n${output}` : ""}`,
43
- );
44
- }
45
-
46
- export async function runStreamingCommand(opts: {
47
- command: string;
48
- args: string[];
49
- tmpFile: string;
50
- timeoutMs?: number;
51
- onOutput?: (chunk: string) => void;
52
- cleanup: () => Promise<void>;
53
- env?: NodeJS.ProcessEnv;
54
- }): Promise<string> {
55
- return new Promise((resolve, reject) => {
56
- const chunks: string[] = [];
57
- const inputStream = fs.open(opts.tmpFile, "r").then((handle) => {
58
- const stream = handle.createReadStream();
59
- return { stream, handle };
60
- });
61
-
62
- inputStream
63
- .then(({ stream, handle }) => {
64
- const child = spawn(opts.command, opts.args, {
65
- stdio: ["pipe", "pipe", "pipe"],
66
- env: opts.env,
67
- });
68
-
69
- stream.pipe(child.stdin);
70
-
71
- let timeoutId: ReturnType<typeof setTimeout> | undefined;
72
- if (opts.timeoutMs) {
73
- timeoutId = setTimeout(() => {
74
- child.kill("SIGTERM");
75
- reject(new Error("Command timed out"));
76
- }, opts.timeoutMs);
77
- }
78
-
79
- child.stdout.on("data", (data: Buffer) => {
80
- const chunk = data.toString();
81
- chunks.push(chunk);
82
- opts.onOutput?.(chunk);
83
- });
84
-
85
- const getStderr = collectStderr(child, opts.onOutput);
86
-
87
- child.on("close", (code) => {
88
- void finalizeProcessClose({
89
- code,
90
- timeoutId,
91
- handle,
92
- cleanup: opts.cleanup,
93
- chunks,
94
- getStderr,
95
- resolve,
96
- reject,
97
- });
98
- });
99
-
100
- child.on("error", (err) => {
101
- if (timeoutId) clearTimeout(timeoutId);
102
- handle.close().catch(() => {});
103
- opts.cleanup().then(() => reject(err));
104
- });
105
- })
106
- .catch((err) => {
107
- opts.cleanup().then(() => reject(err));
108
- });
109
- });
110
- }
111
-
112
- export async function finalizeProcessClose(opts: {
113
- code: number | null;
114
- timeoutId?: ReturnType<typeof setTimeout>;
115
- handle: FileHandle;
116
- cleanup: () => Promise<void>;
117
- chunks: string[];
118
- getStderr: () => string;
119
- resolve: (value: string) => void;
120
- reject: (error: Error) => void;
121
- }): Promise<void> {
122
- if (opts.timeoutId) clearTimeout(opts.timeoutId);
123
- await opts.handle.close().catch(() => {});
124
- await opts.cleanup();
125
-
126
- if (opts.code === 0 || opts.code === null) {
127
- opts.resolve(opts.chunks.join(""));
128
- } else {
129
- opts.reject(
130
- processExitError(opts.code, opts.getStderr, () => opts.chunks.join("")),
131
- );
132
- }
133
- }
134
-
135
- export function isUsageLimit(output: string): boolean {
136
- const lower = output.toLowerCase();
137
- return (
138
- lower.includes("usage limit") ||
139
- lower.includes("quota exceeded") ||
140
- lower.includes("quota will reset") ||
141
- lower.includes("credit balance is too low") ||
142
- lower.includes("out of extra usage") ||
143
- lower.includes("out of usage")
144
- );
145
- }
146
-
147
- export interface CLIAdapter {
148
- name: string;
149
- isAvailable(): Promise<boolean>;
150
- checkHealth(): Promise<CLIAdapterHealth>;
151
- execute(opts: {
152
- prompt: string;
153
- diff: string;
154
- model?: string;
155
- timeoutMs?: number;
156
- /** Optional callback for real-time output streaming */
157
- onOutput?: (chunk: string) => void;
158
- /** Whether to allow tool use for this adapter. Defaults to true. */
159
- allowToolUse?: boolean;
160
- /** Thinking budget level (off/low/medium/high). */
161
- thinkingBudget?: string;
162
- }): Promise<string>;
163
- /**
164
- * Returns the project-scoped command directory path (relative to project root).
165
- * Returns null if the CLI only supports user-level commands.
166
- */
167
- getProjectCommandDir(): string | null;
168
- /**
169
- * Returns the user-level command directory path (absolute path).
170
- * Returns null if the CLI doesn't support user-level commands.
171
- */
172
- getUserCommandDir(): string | null;
173
- /**
174
- * Returns the project-scoped skill directory path (relative to project root).
175
- * Returns null if the CLI doesn't support the skills model.
176
- */
177
- getProjectSkillDir(): string | null;
178
- /**
179
- * Returns the user-level skill directory path (absolute path).
180
- * Returns null if the CLI doesn't support the skills model.
181
- */
182
- getUserSkillDir(): string | null;
183
- /**
184
- * Returns the command file extension used by this CLI.
185
- */
186
- getCommandExtension(): string;
187
- /**
188
- * Returns true if this adapter can use symlinks (same format as source Markdown).
189
- */
190
- canUseSymlink(): boolean;
191
- /**
192
- * Transforms gauntlet command content to this CLI's format.
193
- * The source content is always Markdown with YAML frontmatter.
194
- */
195
- transformCommand(markdownContent: string): string;
196
- }
197
-
198
- import { ClaudeAdapter } from "./claude.js";
199
- import { CodexAdapter } from "./codex.js";
200
- import { CursorAdapter } from "./cursor.js";
201
- import { GeminiAdapter } from "./gemini.js";
202
- import { GitHubCopilotAdapter } from "./github-copilot.js";
203
-
204
- export {
205
- GeminiAdapter,
206
- CodexAdapter,
207
- ClaudeAdapter,
208
- GitHubCopilotAdapter,
209
- CursorAdapter,
210
- };
211
-
212
- // Adapter registry: keys should use lowercase with hyphens for multi-word names
213
- const adapters: Record<string, CLIAdapter> = {
214
- gemini: new GeminiAdapter(),
215
- codex: new CodexAdapter(),
216
- claude: new ClaudeAdapter(),
217
- "github-copilot": new GitHubCopilotAdapter(),
218
- cursor: new CursorAdapter(),
219
- };
220
-
221
- export function getAdapter(name: string): CLIAdapter | undefined {
222
- return adapters[name];
223
- }
224
-
225
- export function getAllAdapters(): CLIAdapter[] {
226
- return Object.values(adapters);
227
- }
228
-
229
- /**
230
- * Returns all adapters that support project-scoped commands.
231
- */
232
- export function getProjectCommandAdapters(): CLIAdapter[] {
233
- return Object.values(adapters).filter(
234
- (a) => a.getProjectCommandDir() !== null,
235
- );
236
- }
237
-
238
- /**
239
- * Returns all adapters that support user-level commands.
240
- */
241
- export function getUserCommandAdapters(): CLIAdapter[] {
242
- return Object.values(adapters).filter((a) => a.getUserCommandDir() !== null);
243
- }
244
-
245
- /**
246
- * Returns all valid CLI tool names (adapter registry keys).
247
- */
248
- export function getValidCLITools(): string[] {
249
- return Object.keys(adapters);
250
- }
@@ -1,23 +0,0 @@
1
- /** Maps unified thinking budget levels to Claude MAX_THINKING_TOKENS values. */
2
- export const CLAUDE_THINKING_TOKENS: Record<string, number> = {
3
- off: 0,
4
- low: 8000,
5
- medium: 16000,
6
- high: 31999,
7
- };
8
-
9
- /** Maps unified thinking budget levels to Codex model_reasoning_effort values. */
10
- export const CODEX_REASONING_EFFORT: Record<string, string> = {
11
- off: "minimal",
12
- low: "low",
13
- medium: "medium",
14
- high: "high",
15
- };
16
-
17
- /** Maps unified thinking budget levels to Gemini thinkingBudget values. */
18
- export const GEMINI_THINKING_BUDGET: Record<string, number> = {
19
- off: 0,
20
- low: 4096,
21
- medium: 8192,
22
- high: 24576,
23
- };