agent-gauntlet 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -4
- package/src/built-in-reviews/code-quality.md +25 -0
- package/src/built-in-reviews/index.ts +28 -0
- package/src/cli-adapters/claude.ts +236 -84
- package/src/cli-adapters/codex.ts +196 -77
- package/src/cli-adapters/cursor.ts +17 -57
- package/src/cli-adapters/gemini.ts +415 -108
- package/src/cli-adapters/github-copilot.ts +17 -57
- package/src/cli-adapters/index.ts +142 -0
- package/src/cli-adapters/thinking-budget.ts +23 -0
- package/src/commands/check.ts +18 -19
- package/src/commands/clean.ts +10 -5
- package/src/commands/detect.ts +104 -29
- package/src/commands/index.ts +1 -0
- package/src/commands/init.ts +1371 -261
- package/src/commands/review.ts +18 -19
- package/src/commands/shared.ts +60 -32
- package/src/commands/stop-hook.ts +134 -332
- package/src/commands/wait-ci.ts +518 -0
- package/src/config/global.ts +10 -1
- package/src/config/loader.ts +28 -12
- package/src/config/schema.ts +31 -8
- package/src/config/stop-hook-config.ts +80 -43
- package/src/config/types.ts +2 -0
- package/src/config/validator.ts +8 -8
- package/src/core/change-detector.ts +38 -32
- package/src/core/run-executor.ts +50 -46
- package/src/core/runner.ts +50 -24
- package/src/gates/check.ts +3 -14
- package/src/gates/resolve-check-command.ts +21 -0
- package/src/gates/result.ts +1 -0
- package/src/gates/review.ts +44 -5
- package/src/hooks/adapters/claude-stop-hook.ts +99 -0
- package/src/hooks/adapters/cursor-stop-hook.ts +122 -0
- package/src/hooks/adapters/types.ts +94 -0
- package/src/hooks/stop-hook-handler.ts +748 -0
- package/src/index.ts +2 -0
- package/src/output/app-logger.ts +1 -1
- package/src/output/console.ts +3 -3
- package/src/output/sinks/file-sink.ts +2 -2
- package/src/scripts/status.ts +433 -0
- package/src/types/gauntlet-status.ts +14 -2
- package/src/utils/debug-log.ts +55 -3
- package/src/utils/diff-parser.ts +63 -48
- package/src/utils/execution-state.ts +69 -9
- package/src/templates/run_gauntlet.template.md +0 -41
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-gauntlet",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.10.0",
|
|
4
4
|
"description": "A CLI tool for testing AI coding agents",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"author": "Paul Caplan",
|
|
@@ -35,10 +35,10 @@
|
|
|
35
35
|
"build": "bun build --compile --minify --sourcemap ./src/index.ts --outfile bin/agent-gauntlet",
|
|
36
36
|
"test": "bun test",
|
|
37
37
|
"lint": "biome check src",
|
|
38
|
+
"typecheck": "tsc --noEmit && tsc --noEmit -p test/tsconfig.json",
|
|
38
39
|
"changeset": "changeset",
|
|
39
40
|
"version": "changeset version",
|
|
40
|
-
"release": "
|
|
41
|
-
"gen-changeset": "bun scripts/gen-changeset.ts"
|
|
41
|
+
"release": "changeset publish"
|
|
42
42
|
},
|
|
43
43
|
"devDependencies": {
|
|
44
44
|
"@biomejs/biome": "^2.3.11",
|
|
@@ -57,4 +57,4 @@
|
|
|
57
57
|
"yaml": "^2.8.2",
|
|
58
58
|
"zod": "^4.3.5"
|
|
59
59
|
}
|
|
60
|
-
}
|
|
60
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Code Quality Review
|
|
2
|
+
|
|
3
|
+
You are a senior software engineer performing a code review. Your primary goal is to identify **real problems** that could cause bugs, security vulnerabilities, or performance issues in production. Do not report style, formatting, naming conventions, or maintainability suggestions unless you see something egregious.
|
|
4
|
+
|
|
5
|
+
## Focus Areas (in priority order)
|
|
6
|
+
|
|
7
|
+
1. **Bugs** — Logic errors, null/undefined issues, race conditions, unhandled edge cases, resource leaks
|
|
8
|
+
2. **Security** — Injection vulnerabilities, auth/authz flaws, sensitive data exposure, input validation gaps
|
|
9
|
+
3. **Performance** — Algorithmic complexity issues, N+1 queries, blocking operations, memory problems
|
|
10
|
+
4. **Maintainability** — Unclear code, missing error handling, duplication
|
|
11
|
+
|
|
12
|
+
## Do NOT Report
|
|
13
|
+
|
|
14
|
+
- Style, formatting, or naming preferences
|
|
15
|
+
- Missing documentation, comments, or type annotations
|
|
16
|
+
- Suggestions for "better" abstractions or patterns that aren't broken
|
|
17
|
+
- Hypothetical issues that require unlikely preconditions
|
|
18
|
+
- Issues in code that wasn't changed in this diff
|
|
19
|
+
|
|
20
|
+
## Guidelines
|
|
21
|
+
|
|
22
|
+
- **Threshold**: only report issues you would block a PR over
|
|
23
|
+
- Explain **why** each issue is a problem with a concrete failure scenario
|
|
24
|
+
- Provide a **concrete fix** with corrected code
|
|
25
|
+
- If the status quo works correctly, it's not a violation
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
// @ts-expect-error Bun text import
|
|
2
|
+
import codeQualityContent from "./code-quality.md" with { type: "text" };
|
|
3
|
+
|
|
4
|
+
const BUILT_IN_PREFIX = "built-in:";
|
|
5
|
+
|
|
6
|
+
const builtInSources: Record<string, string> = {
|
|
7
|
+
"code-quality": codeQualityContent,
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Check if a review name uses the built-in prefix.
|
|
12
|
+
*/
|
|
13
|
+
export function isBuiltInReview(name: string): boolean {
|
|
14
|
+
return name.startsWith(BUILT_IN_PREFIX);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Load a built-in review prompt by name. Returns the raw markdown content.
|
|
19
|
+
*/
|
|
20
|
+
export function loadBuiltInReview(name: string): string {
|
|
21
|
+
const source = builtInSources[name];
|
|
22
|
+
|
|
23
|
+
if (!source) {
|
|
24
|
+
throw new Error(`Unknown built-in review: "${name}"`);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return source;
|
|
28
|
+
}
|
|
@@ -1,14 +1,197 @@
|
|
|
1
|
-
import { exec
|
|
1
|
+
import { exec } from "node:child_process";
|
|
2
2
|
import fs from "node:fs/promises";
|
|
3
3
|
import os from "node:os";
|
|
4
4
|
import path from "node:path";
|
|
5
5
|
import { promisify } from "node:util";
|
|
6
6
|
import { GAUNTLET_STOP_HOOK_ACTIVE_ENV } from "../commands/stop-hook.js";
|
|
7
|
-
import
|
|
7
|
+
import { getDebugLogger } from "../utils/debug-log.js";
|
|
8
|
+
import { type CLIAdapter, runStreamingCommand } from "./index.js";
|
|
9
|
+
import { CLAUDE_THINKING_TOKENS } from "./thinking-budget.js";
|
|
8
10
|
|
|
9
11
|
const execAsync = promisify(exec);
|
|
10
12
|
const MAX_BUFFER_BYTES = 10 * 1024 * 1024;
|
|
11
13
|
|
|
14
|
+
// Matches OTel console exporter metric blocks dumped to stdout at process exit.
|
|
15
|
+
// Requires `descriptor`, `dataPointType`, and `dataPoints` fields which are
|
|
16
|
+
// unique to OTel SDK output and won't appear in normal code review content.
|
|
17
|
+
// Optionally matches [otel] prefix that some exporters add.
|
|
18
|
+
const OTEL_METRIC_BLOCK_RE =
|
|
19
|
+
/(?:\[otel\]\s*)?\{\s*\n\s*descriptor:\s*\{[\s\S]*?dataPointType:\s*\d+[\s\S]*?dataPoints:\s*\[[\s\S]*?\]\s*,?\s*\n\}/g;
|
|
20
|
+
|
|
21
|
+
interface OtelUsage {
|
|
22
|
+
cost?: number;
|
|
23
|
+
input?: number;
|
|
24
|
+
output?: number;
|
|
25
|
+
cacheRead?: number;
|
|
26
|
+
cacheCreation?: number;
|
|
27
|
+
toolCalls?: number;
|
|
28
|
+
toolContentBytes?: number;
|
|
29
|
+
apiRequests?: number;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const TOKEN_TYPES = ["input", "output", "cacheRead", "cacheCreation"] as const;
|
|
33
|
+
|
|
34
|
+
function parseCostBlock(block: string): number | undefined {
|
|
35
|
+
const match = block.match(/value:\s*([\d.]+)/);
|
|
36
|
+
return match ? Number.parseFloat(match[1]!) : undefined;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function parseTokenBlock(block: string): Partial<OtelUsage> {
|
|
40
|
+
const result: Partial<OtelUsage> = {};
|
|
41
|
+
const re = /type:\s*"(\w+)"[\s\S]*?value:\s*(\d+)(?:,|\s*\})/g;
|
|
42
|
+
for (const match of block.matchAll(re)) {
|
|
43
|
+
const type = match[1]! as (typeof TOKEN_TYPES)[number];
|
|
44
|
+
if (TOKEN_TYPES.includes(type)) {
|
|
45
|
+
result[type] = Number.parseInt(match[2]!, 10);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return result;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function parseOtelMetrics(blocks: string[]): OtelUsage {
|
|
52
|
+
const usage: OtelUsage = {};
|
|
53
|
+
for (const block of blocks) {
|
|
54
|
+
const nameMatch = block.match(/name:\s*"([^"]+)"/);
|
|
55
|
+
if (!nameMatch) continue;
|
|
56
|
+
|
|
57
|
+
if (nameMatch[1] === "claude_code.cost.usage") {
|
|
58
|
+
usage.cost = parseCostBlock(block);
|
|
59
|
+
} else if (nameMatch[1] === "claude_code.token.usage") {
|
|
60
|
+
Object.assign(usage, parseTokenBlock(block));
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return usage;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Matches OTel console log exporter event records emitted by Claude Code.
|
|
67
|
+
// The Node.js SDK console exporter uses util.inspect() format with unquoted keys
|
|
68
|
+
// and single-quoted strings. Blocks start with `resource:` and contain a `body:`
|
|
69
|
+
// field with the event name (e.g. 'claude_code.tool_result').
|
|
70
|
+
const OTEL_LOG_BLOCK_RE =
|
|
71
|
+
/\{\s*\n\s*resource:\s*\{[\s\S]*?body:\s*'claude_code\.\w+'[\s\S]*?\n\}/g;
|
|
72
|
+
|
|
73
|
+
/** Pre-compiled regexes for extracting single-quoted attribute values from OTel log blocks. */
|
|
74
|
+
const OTEL_ATTR_RE = {
|
|
75
|
+
body: /body:\s*'([^']*)'/,
|
|
76
|
+
tool_result_size_bytes: /tool_result_size_bytes:\s*'([^']*)'/,
|
|
77
|
+
input_tokens: /input_tokens:\s*'([^']*)'/,
|
|
78
|
+
output_tokens: /output_tokens:\s*'([^']*)'/,
|
|
79
|
+
cache_read_tokens: /cache_read_tokens:\s*'([^']*)'/,
|
|
80
|
+
cache_creation_tokens: /cache_creation_tokens:\s*'([^']*)'/,
|
|
81
|
+
cost_usd: /cost_usd:\s*'([^']*)'/,
|
|
82
|
+
} as const;
|
|
83
|
+
|
|
84
|
+
/** Maps OTel api_request attribute regexes to OtelUsage fields. */
|
|
85
|
+
const API_REQUEST_FIELDS: Array<[RegExp, keyof OtelUsage]> = [
|
|
86
|
+
[OTEL_ATTR_RE.input_tokens, "input"],
|
|
87
|
+
[OTEL_ATTR_RE.output_tokens, "output"],
|
|
88
|
+
[OTEL_ATTR_RE.cache_read_tokens, "cacheRead"],
|
|
89
|
+
[OTEL_ATTR_RE.cache_creation_tokens, "cacheCreation"],
|
|
90
|
+
[OTEL_ATTR_RE.cost_usd, "cost"],
|
|
91
|
+
];
|
|
92
|
+
|
|
93
|
+
/** Accumulate a tool_result log block into usage. */
|
|
94
|
+
function accumulateToolResult(block: string, usage: OtelUsage): void {
|
|
95
|
+
usage.toolCalls = (usage.toolCalls || 0) + 1;
|
|
96
|
+
const bytes = block.match(OTEL_ATTR_RE.tool_result_size_bytes)?.[1];
|
|
97
|
+
if (bytes !== undefined) {
|
|
98
|
+
usage.toolContentBytes = (usage.toolContentBytes || 0) + Number(bytes);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/** Accumulate an api_request log block into usage. */
|
|
103
|
+
function accumulateApiRequest(block: string, usage: OtelUsage): void {
|
|
104
|
+
usage.apiRequests = (usage.apiRequests || 0) + 1;
|
|
105
|
+
for (const [re, field] of API_REQUEST_FIELDS) {
|
|
106
|
+
const val = block.match(re)?.[1];
|
|
107
|
+
if (val !== undefined) {
|
|
108
|
+
usage[field] = (usage[field] || 0) + Number(val);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/** Accumulate tool_result and api_request event data from OTel log blocks. */
|
|
114
|
+
function parseOtelLogEvents(raw: string, usage: OtelUsage): void {
|
|
115
|
+
const blocks = raw.match(OTEL_LOG_BLOCK_RE);
|
|
116
|
+
if (!blocks) return;
|
|
117
|
+
for (const block of blocks) {
|
|
118
|
+
const body = block.match(OTEL_ATTR_RE.body)?.[1];
|
|
119
|
+
if (body === "claude_code.tool_result") {
|
|
120
|
+
accumulateToolResult(block, usage);
|
|
121
|
+
} else if (body === "claude_code.api_request") {
|
|
122
|
+
accumulateApiRequest(block, usage);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const OTEL_SUMMARY_FIELDS: Array<[keyof OtelUsage, string]> = [
|
|
128
|
+
["input", "in"],
|
|
129
|
+
["output", "out"],
|
|
130
|
+
["cacheRead", "cacheRead"],
|
|
131
|
+
["cacheCreation", "cacheWrite"],
|
|
132
|
+
["toolCalls", "tool_calls"],
|
|
133
|
+
["toolContentBytes", "tool_content_bytes"],
|
|
134
|
+
["apiRequests", "api_requests"],
|
|
135
|
+
];
|
|
136
|
+
|
|
137
|
+
function formatOtelSummary(usage: OtelUsage): string | null {
|
|
138
|
+
if (usage.cost === undefined && usage.input === undefined) return null;
|
|
139
|
+
|
|
140
|
+
const parts: string[] = [];
|
|
141
|
+
if (usage.cost !== undefined) parts.push(`cost=$${usage.cost.toFixed(4)}`);
|
|
142
|
+
for (const [key, label] of OTEL_SUMMARY_FIELDS) {
|
|
143
|
+
if (usage[key] !== undefined) parts.push(`${label}=${usage[key]}`);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
return `[otel] ${parts.join(" ")}`;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function extractOtelMetrics(
|
|
150
|
+
raw: string,
|
|
151
|
+
onLog?: (msg: string) => void,
|
|
152
|
+
): string {
|
|
153
|
+
const metricBlocks = raw.match(OTEL_METRIC_BLOCK_RE);
|
|
154
|
+
const usage = metricBlocks ? parseOtelMetrics(metricBlocks) : {};
|
|
155
|
+
|
|
156
|
+
// Also parse log events for tool call and API request counts
|
|
157
|
+
parseOtelLogEvents(raw, usage);
|
|
158
|
+
|
|
159
|
+
const summary = formatOtelSummary(usage);
|
|
160
|
+
if (summary) {
|
|
161
|
+
onLog?.(`\n${summary}\n`);
|
|
162
|
+
process.stderr.write(`${summary}\n`);
|
|
163
|
+
getDebugLogger()?.logTelemetry({ adapter: "claude", summary });
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
return raw
|
|
167
|
+
.replace(OTEL_METRIC_BLOCK_RE, "")
|
|
168
|
+
.replace(OTEL_LOG_BLOCK_RE, "")
|
|
169
|
+
.trimEnd();
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/** Build OTel environment overrides for console export. */
|
|
173
|
+
function buildOtelEnv(): Record<string, string> {
|
|
174
|
+
const env: Record<string, string> = {};
|
|
175
|
+
if (!process.env.CLAUDE_CODE_ENABLE_TELEMETRY) {
|
|
176
|
+
env.CLAUDE_CODE_ENABLE_TELEMETRY = "1";
|
|
177
|
+
}
|
|
178
|
+
if (!process.env.OTEL_METRICS_EXPORTER) {
|
|
179
|
+
env.OTEL_METRICS_EXPORTER = "console";
|
|
180
|
+
}
|
|
181
|
+
if (!process.env.OTEL_LOGS_EXPORTER) {
|
|
182
|
+
env.OTEL_LOGS_EXPORTER = "console";
|
|
183
|
+
}
|
|
184
|
+
return env;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/** Strip OTel metric and log blocks from raw output. */
|
|
188
|
+
function stripOtelBlocks(raw: string): string {
|
|
189
|
+
return raw
|
|
190
|
+
.replace(OTEL_METRIC_BLOCK_RE, "")
|
|
191
|
+
.replace(OTEL_LOG_BLOCK_RE, "")
|
|
192
|
+
.trimEnd();
|
|
193
|
+
}
|
|
194
|
+
|
|
12
195
|
export class ClaudeAdapter implements CLIAdapter {
|
|
13
196
|
name = "claude";
|
|
14
197
|
|
|
@@ -43,21 +226,26 @@ export class ClaudeAdapter implements CLIAdapter {
|
|
|
43
226
|
}
|
|
44
227
|
|
|
45
228
|
getUserCommandDir(): string | null {
|
|
46
|
-
// Claude supports user-level commands at ~/.claude/commands
|
|
47
229
|
return path.join(os.homedir(), ".claude", "commands");
|
|
48
230
|
}
|
|
49
231
|
|
|
232
|
+
getProjectSkillDir(): string | null {
|
|
233
|
+
return ".claude/skills";
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
getUserSkillDir(): string | null {
|
|
237
|
+
return path.join(os.homedir(), ".claude", "skills");
|
|
238
|
+
}
|
|
239
|
+
|
|
50
240
|
getCommandExtension(): string {
|
|
51
241
|
return ".md";
|
|
52
242
|
}
|
|
53
243
|
|
|
54
244
|
canUseSymlink(): boolean {
|
|
55
|
-
// Claude uses the same Markdown format as our canonical file
|
|
56
245
|
return true;
|
|
57
246
|
}
|
|
58
247
|
|
|
59
248
|
transformCommand(markdownContent: string): string {
|
|
60
|
-
// Claude uses the same Markdown format, no transformation needed
|
|
61
249
|
return markdownContent;
|
|
62
250
|
}
|
|
63
251
|
|
|
@@ -67,107 +255,71 @@ export class ClaudeAdapter implements CLIAdapter {
|
|
|
67
255
|
model?: string;
|
|
68
256
|
timeoutMs?: number;
|
|
69
257
|
onOutput?: (chunk: string) => void;
|
|
258
|
+
allowToolUse?: boolean;
|
|
259
|
+
thinkingBudget?: string;
|
|
70
260
|
}): Promise<string> {
|
|
71
261
|
const fullContent = `${opts.prompt}\n\n--- DIFF ---\n${opts.diff}`;
|
|
72
262
|
|
|
73
263
|
const tmpDir = os.tmpdir();
|
|
74
|
-
// Include process.pid for uniqueness across concurrent processes
|
|
75
264
|
const tmpFile = path.join(
|
|
76
265
|
tmpDir,
|
|
77
266
|
`gauntlet-claude-${process.pid}-${Date.now()}.txt`,
|
|
78
267
|
);
|
|
79
268
|
await fs.writeFile(tmpFile, fullContent);
|
|
80
269
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
270
|
+
const args = ["-p"];
|
|
271
|
+
if (opts.allowToolUse === false) {
|
|
272
|
+
args.push("--tools", "");
|
|
273
|
+
} else {
|
|
274
|
+
args.push("--allowedTools", "Read,Glob,Grep");
|
|
275
|
+
}
|
|
276
|
+
args.push("--max-turns", "10");
|
|
277
|
+
|
|
278
|
+
const otelEnv = buildOtelEnv();
|
|
279
|
+
const thinkingEnv: Record<string, string> = {};
|
|
280
|
+
if (opts.thinkingBudget && opts.thinkingBudget in CLAUDE_THINKING_TOKENS) {
|
|
281
|
+
thinkingEnv.MAX_THINKING_TOKENS = String(
|
|
282
|
+
CLAUDE_THINKING_TOKENS[opts.thinkingBudget],
|
|
283
|
+
);
|
|
284
|
+
}
|
|
92
285
|
|
|
93
286
|
const cleanup = () => fs.unlink(tmpFile).catch(() => {});
|
|
287
|
+
const execEnv = {
|
|
288
|
+
...process.env,
|
|
289
|
+
[GAUNTLET_STOP_HOOK_ACTIVE_ENV]: "1",
|
|
290
|
+
...otelEnv,
|
|
291
|
+
...thinkingEnv,
|
|
292
|
+
};
|
|
94
293
|
|
|
95
|
-
// If onOutput callback is provided, use spawn for real-time streaming
|
|
96
294
|
if (opts.onOutput) {
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
env: {
|
|
109
|
-
...process.env,
|
|
110
|
-
[GAUNTLET_STOP_HOOK_ACTIVE_ENV]: "1",
|
|
111
|
-
},
|
|
112
|
-
});
|
|
113
|
-
|
|
114
|
-
stream.pipe(child.stdin);
|
|
115
|
-
|
|
116
|
-
let timeoutId: ReturnType<typeof setTimeout> | undefined;
|
|
117
|
-
if (opts.timeoutMs) {
|
|
118
|
-
timeoutId = setTimeout(() => {
|
|
119
|
-
child.kill("SIGTERM");
|
|
120
|
-
reject(new Error("Command timed out"));
|
|
121
|
-
}, opts.timeoutMs);
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
child.stdout.on("data", (data: Buffer) => {
|
|
125
|
-
const chunk = data.toString();
|
|
126
|
-
chunks.push(chunk);
|
|
127
|
-
opts.onOutput?.(chunk);
|
|
128
|
-
});
|
|
129
|
-
|
|
130
|
-
child.stderr.on("data", (data: Buffer) => {
|
|
131
|
-
// Only log stderr, don't include in return value
|
|
132
|
-
opts.onOutput?.(data.toString());
|
|
133
|
-
});
|
|
134
|
-
|
|
135
|
-
child.on("close", (code) => {
|
|
136
|
-
if (timeoutId) clearTimeout(timeoutId);
|
|
137
|
-
handle.close().catch(() => {});
|
|
138
|
-
cleanup().then(() => {
|
|
139
|
-
if (code === 0 || code === null) {
|
|
140
|
-
resolve(chunks.join(""));
|
|
141
|
-
} else {
|
|
142
|
-
reject(new Error(`Process exited with code ${code}`));
|
|
143
|
-
}
|
|
144
|
-
});
|
|
145
|
-
});
|
|
146
|
-
|
|
147
|
-
child.on("error", (err) => {
|
|
148
|
-
if (timeoutId) clearTimeout(timeoutId);
|
|
149
|
-
handle.close().catch(() => {});
|
|
150
|
-
cleanup().then(() => reject(err));
|
|
151
|
-
});
|
|
152
|
-
})
|
|
153
|
-
.catch((err) => {
|
|
154
|
-
cleanup().then(() => reject(err));
|
|
155
|
-
});
|
|
295
|
+
const outputBuffer: string[] = [];
|
|
296
|
+
const raw = await runStreamingCommand({
|
|
297
|
+
command: "claude",
|
|
298
|
+
args,
|
|
299
|
+
tmpFile,
|
|
300
|
+
timeoutMs: opts.timeoutMs,
|
|
301
|
+
onOutput: (chunk: string) => {
|
|
302
|
+
outputBuffer.push(chunk);
|
|
303
|
+
},
|
|
304
|
+
cleanup,
|
|
305
|
+
env: execEnv,
|
|
156
306
|
});
|
|
307
|
+
const cleanedOutput = extractOtelMetrics(
|
|
308
|
+
outputBuffer.join(""),
|
|
309
|
+
opts.onOutput,
|
|
310
|
+
);
|
|
311
|
+
opts.onOutput(cleanedOutput);
|
|
312
|
+
return stripOtelBlocks(raw);
|
|
157
313
|
}
|
|
158
314
|
|
|
159
|
-
// Otherwise use exec for buffered output
|
|
160
315
|
try {
|
|
161
|
-
const cmd = `cat "${tmpFile}" | claude
|
|
316
|
+
const cmd = `cat "${tmpFile}" | claude ${args.map((a) => (a === "" ? '""' : a)).join(" ")}`;
|
|
162
317
|
const { stdout } = await execAsync(cmd, {
|
|
163
318
|
timeout: opts.timeoutMs,
|
|
164
319
|
maxBuffer: MAX_BUFFER_BYTES,
|
|
165
|
-
env:
|
|
166
|
-
...process.env,
|
|
167
|
-
[GAUNTLET_STOP_HOOK_ACTIVE_ENV]: "1",
|
|
168
|
-
},
|
|
320
|
+
env: execEnv,
|
|
169
321
|
});
|
|
170
|
-
return stdout;
|
|
322
|
+
return extractOtelMetrics(stdout);
|
|
171
323
|
} finally {
|
|
172
324
|
await cleanup();
|
|
173
325
|
}
|