agent-gauntlet 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -23
- package/dist/index.js +9226 -0
- package/dist/index.js.map +65 -0
- package/dist/scripts/status.js +280 -0
- package/dist/scripts/status.js.map +10 -0
- package/package.json +22 -8
- package/src/built-in-reviews/code-quality.md +0 -25
- package/src/built-in-reviews/index.ts +0 -28
- package/src/bun-plugins.d.ts +0 -4
- package/src/cli-adapters/claude.ts +0 -327
- package/src/cli-adapters/codex.ts +0 -290
- package/src/cli-adapters/cursor.ts +0 -128
- package/src/cli-adapters/gemini.ts +0 -510
- package/src/cli-adapters/github-copilot.ts +0 -141
- package/src/cli-adapters/index.ts +0 -250
- package/src/cli-adapters/thinking-budget.ts +0 -23
- package/src/commands/check.ts +0 -311
- package/src/commands/ci/index.ts +0 -15
- package/src/commands/ci/init.ts +0 -96
- package/src/commands/ci/list-jobs.ts +0 -90
- package/src/commands/clean.ts +0 -54
- package/src/commands/detect.ts +0 -173
- package/src/commands/health.ts +0 -169
- package/src/commands/help.ts +0 -34
- package/src/commands/index.ts +0 -13
- package/src/commands/init.ts +0 -1878
- package/src/commands/list.ts +0 -33
- package/src/commands/review.ts +0 -311
- package/src/commands/run.ts +0 -29
- package/src/commands/shared.ts +0 -267
- package/src/commands/stop-hook.ts +0 -567
- package/src/commands/validate.ts +0 -20
- package/src/commands/wait-ci.ts +0 -518
- package/src/config/ci-loader.ts +0 -33
- package/src/config/ci-schema.ts +0 -28
- package/src/config/global.ts +0 -87
- package/src/config/loader.ts +0 -301
- package/src/config/schema.ts +0 -165
- package/src/config/stop-hook-config.ts +0 -130
- package/src/config/types.ts +0 -65
- package/src/config/validator.ts +0 -592
- package/src/core/change-detector.ts +0 -137
- package/src/core/diff-stats.ts +0 -442
- package/src/core/entry-point.ts +0 -190
- package/src/core/job.ts +0 -96
- package/src/core/run-executor.ts +0 -621
- package/src/core/runner.ts +0 -290
- package/src/gates/check.ts +0 -118
- package/src/gates/resolve-check-command.ts +0 -21
- package/src/gates/result.ts +0 -54
- package/src/gates/review.ts +0 -1333
- package/src/hooks/adapters/claude-stop-hook.ts +0 -99
- package/src/hooks/adapters/cursor-stop-hook.ts +0 -122
- package/src/hooks/adapters/types.ts +0 -94
- package/src/hooks/stop-hook-handler.ts +0 -748
- package/src/index.ts +0 -47
- package/src/output/app-logger.ts +0 -214
- package/src/output/console-log.ts +0 -168
- package/src/output/console.ts +0 -359
- package/src/output/logger.ts +0 -126
- package/src/output/sinks/console-sink.ts +0 -59
- package/src/output/sinks/file-sink.ts +0 -110
- package/src/scripts/status.ts +0 -433
- package/src/templates/workflow.yml +0 -79
- package/src/types/gauntlet-status.ts +0 -79
- package/src/utils/debug-log.ts +0 -392
- package/src/utils/diff-parser.ts +0 -103
- package/src/utils/execution-state.ts +0 -472
- package/src/utils/log-parser.ts +0 -696
- package/src/utils/sanitizer.ts +0 -3
- package/src/utils/session-ref.ts +0 -91
|
@@ -1,327 +0,0 @@
|
|
|
1
|
-
import { exec } from "node:child_process";
|
|
2
|
-
import fs from "node:fs/promises";
|
|
3
|
-
import os from "node:os";
|
|
4
|
-
import path from "node:path";
|
|
5
|
-
import { promisify } from "node:util";
|
|
6
|
-
import { GAUNTLET_STOP_HOOK_ACTIVE_ENV } from "../commands/stop-hook.js";
|
|
7
|
-
import { getDebugLogger } from "../utils/debug-log.js";
|
|
8
|
-
import { type CLIAdapter, runStreamingCommand } from "./index.js";
|
|
9
|
-
import { CLAUDE_THINKING_TOKENS } from "./thinking-budget.js";
|
|
10
|
-
|
|
11
|
-
const execAsync = promisify(exec);
|
|
12
|
-
const MAX_BUFFER_BYTES = 10 * 1024 * 1024;
|
|
13
|
-
|
|
14
|
-
// Matches OTel console exporter metric blocks dumped to stdout at process exit.
|
|
15
|
-
// Requires `descriptor`, `dataPointType`, and `dataPoints` fields which are
|
|
16
|
-
// unique to OTel SDK output and won't appear in normal code review content.
|
|
17
|
-
// Optionally matches [otel] prefix that some exporters add.
|
|
18
|
-
const OTEL_METRIC_BLOCK_RE =
|
|
19
|
-
/(?:\[otel\]\s*)?\{\s*\n\s*descriptor:\s*\{[\s\S]*?dataPointType:\s*\d+[\s\S]*?dataPoints:\s*\[[\s\S]*?\]\s*,?\s*\n\}/g;
|
|
20
|
-
|
|
21
|
-
interface OtelUsage {
|
|
22
|
-
cost?: number;
|
|
23
|
-
input?: number;
|
|
24
|
-
output?: number;
|
|
25
|
-
cacheRead?: number;
|
|
26
|
-
cacheCreation?: number;
|
|
27
|
-
toolCalls?: number;
|
|
28
|
-
toolContentBytes?: number;
|
|
29
|
-
apiRequests?: number;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
const TOKEN_TYPES = ["input", "output", "cacheRead", "cacheCreation"] as const;
|
|
33
|
-
|
|
34
|
-
function parseCostBlock(block: string): number | undefined {
|
|
35
|
-
const match = block.match(/value:\s*([\d.]+)/);
|
|
36
|
-
return match ? Number.parseFloat(match[1]!) : undefined;
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
function parseTokenBlock(block: string): Partial<OtelUsage> {
|
|
40
|
-
const result: Partial<OtelUsage> = {};
|
|
41
|
-
const re = /type:\s*"(\w+)"[\s\S]*?value:\s*(\d+)(?:,|\s*\})/g;
|
|
42
|
-
for (const match of block.matchAll(re)) {
|
|
43
|
-
const type = match[1]! as (typeof TOKEN_TYPES)[number];
|
|
44
|
-
if (TOKEN_TYPES.includes(type)) {
|
|
45
|
-
result[type] = Number.parseInt(match[2]!, 10);
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
return result;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
function parseOtelMetrics(blocks: string[]): OtelUsage {
|
|
52
|
-
const usage: OtelUsage = {};
|
|
53
|
-
for (const block of blocks) {
|
|
54
|
-
const nameMatch = block.match(/name:\s*"([^"]+)"/);
|
|
55
|
-
if (!nameMatch) continue;
|
|
56
|
-
|
|
57
|
-
if (nameMatch[1] === "claude_code.cost.usage") {
|
|
58
|
-
usage.cost = parseCostBlock(block);
|
|
59
|
-
} else if (nameMatch[1] === "claude_code.token.usage") {
|
|
60
|
-
Object.assign(usage, parseTokenBlock(block));
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
return usage;
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
// Matches OTel console log exporter event records emitted by Claude Code.
|
|
67
|
-
// The Node.js SDK console exporter uses util.inspect() format with unquoted keys
|
|
68
|
-
// and single-quoted strings. Blocks start with `resource:` and contain a `body:`
|
|
69
|
-
// field with the event name (e.g. 'claude_code.tool_result').
|
|
70
|
-
const OTEL_LOG_BLOCK_RE =
|
|
71
|
-
/\{\s*\n\s*resource:\s*\{[\s\S]*?body:\s*'claude_code\.\w+'[\s\S]*?\n\}/g;
|
|
72
|
-
|
|
73
|
-
/** Pre-compiled regexes for extracting single-quoted attribute values from OTel log blocks. */
|
|
74
|
-
const OTEL_ATTR_RE = {
|
|
75
|
-
body: /body:\s*'([^']*)'/,
|
|
76
|
-
tool_result_size_bytes: /tool_result_size_bytes:\s*'([^']*)'/,
|
|
77
|
-
input_tokens: /input_tokens:\s*'([^']*)'/,
|
|
78
|
-
output_tokens: /output_tokens:\s*'([^']*)'/,
|
|
79
|
-
cache_read_tokens: /cache_read_tokens:\s*'([^']*)'/,
|
|
80
|
-
cache_creation_tokens: /cache_creation_tokens:\s*'([^']*)'/,
|
|
81
|
-
cost_usd: /cost_usd:\s*'([^']*)'/,
|
|
82
|
-
} as const;
|
|
83
|
-
|
|
84
|
-
/** Maps OTel api_request attribute regexes to OtelUsage fields. */
|
|
85
|
-
const API_REQUEST_FIELDS: Array<[RegExp, keyof OtelUsage]> = [
|
|
86
|
-
[OTEL_ATTR_RE.input_tokens, "input"],
|
|
87
|
-
[OTEL_ATTR_RE.output_tokens, "output"],
|
|
88
|
-
[OTEL_ATTR_RE.cache_read_tokens, "cacheRead"],
|
|
89
|
-
[OTEL_ATTR_RE.cache_creation_tokens, "cacheCreation"],
|
|
90
|
-
[OTEL_ATTR_RE.cost_usd, "cost"],
|
|
91
|
-
];
|
|
92
|
-
|
|
93
|
-
/** Accumulate a tool_result log block into usage. */
|
|
94
|
-
function accumulateToolResult(block: string, usage: OtelUsage): void {
|
|
95
|
-
usage.toolCalls = (usage.toolCalls || 0) + 1;
|
|
96
|
-
const bytes = block.match(OTEL_ATTR_RE.tool_result_size_bytes)?.[1];
|
|
97
|
-
if (bytes !== undefined) {
|
|
98
|
-
usage.toolContentBytes = (usage.toolContentBytes || 0) + Number(bytes);
|
|
99
|
-
}
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
/** Accumulate an api_request log block into usage. */
|
|
103
|
-
function accumulateApiRequest(block: string, usage: OtelUsage): void {
|
|
104
|
-
usage.apiRequests = (usage.apiRequests || 0) + 1;
|
|
105
|
-
for (const [re, field] of API_REQUEST_FIELDS) {
|
|
106
|
-
const val = block.match(re)?.[1];
|
|
107
|
-
if (val !== undefined) {
|
|
108
|
-
usage[field] = (usage[field] || 0) + Number(val);
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
/** Accumulate tool_result and api_request event data from OTel log blocks. */
|
|
114
|
-
function parseOtelLogEvents(raw: string, usage: OtelUsage): void {
|
|
115
|
-
const blocks = raw.match(OTEL_LOG_BLOCK_RE);
|
|
116
|
-
if (!blocks) return;
|
|
117
|
-
for (const block of blocks) {
|
|
118
|
-
const body = block.match(OTEL_ATTR_RE.body)?.[1];
|
|
119
|
-
if (body === "claude_code.tool_result") {
|
|
120
|
-
accumulateToolResult(block, usage);
|
|
121
|
-
} else if (body === "claude_code.api_request") {
|
|
122
|
-
accumulateApiRequest(block, usage);
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
const OTEL_SUMMARY_FIELDS: Array<[keyof OtelUsage, string]> = [
|
|
128
|
-
["input", "in"],
|
|
129
|
-
["output", "out"],
|
|
130
|
-
["cacheRead", "cacheRead"],
|
|
131
|
-
["cacheCreation", "cacheWrite"],
|
|
132
|
-
["toolCalls", "tool_calls"],
|
|
133
|
-
["toolContentBytes", "tool_content_bytes"],
|
|
134
|
-
["apiRequests", "api_requests"],
|
|
135
|
-
];
|
|
136
|
-
|
|
137
|
-
function formatOtelSummary(usage: OtelUsage): string | null {
|
|
138
|
-
if (usage.cost === undefined && usage.input === undefined) return null;
|
|
139
|
-
|
|
140
|
-
const parts: string[] = [];
|
|
141
|
-
if (usage.cost !== undefined) parts.push(`cost=$${usage.cost.toFixed(4)}`);
|
|
142
|
-
for (const [key, label] of OTEL_SUMMARY_FIELDS) {
|
|
143
|
-
if (usage[key] !== undefined) parts.push(`${label}=${usage[key]}`);
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
return `[otel] ${parts.join(" ")}`;
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
function extractOtelMetrics(
|
|
150
|
-
raw: string,
|
|
151
|
-
onLog?: (msg: string) => void,
|
|
152
|
-
): string {
|
|
153
|
-
const metricBlocks = raw.match(OTEL_METRIC_BLOCK_RE);
|
|
154
|
-
const usage = metricBlocks ? parseOtelMetrics(metricBlocks) : {};
|
|
155
|
-
|
|
156
|
-
// Also parse log events for tool call and API request counts
|
|
157
|
-
parseOtelLogEvents(raw, usage);
|
|
158
|
-
|
|
159
|
-
const summary = formatOtelSummary(usage);
|
|
160
|
-
if (summary) {
|
|
161
|
-
onLog?.(`\n${summary}\n`);
|
|
162
|
-
process.stderr.write(`${summary}\n`);
|
|
163
|
-
getDebugLogger()?.logTelemetry({ adapter: "claude", summary });
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
return raw
|
|
167
|
-
.replace(OTEL_METRIC_BLOCK_RE, "")
|
|
168
|
-
.replace(OTEL_LOG_BLOCK_RE, "")
|
|
169
|
-
.trimEnd();
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
/** Build OTel environment overrides for console export. */
|
|
173
|
-
function buildOtelEnv(): Record<string, string> {
|
|
174
|
-
const env: Record<string, string> = {};
|
|
175
|
-
if (!process.env.CLAUDE_CODE_ENABLE_TELEMETRY) {
|
|
176
|
-
env.CLAUDE_CODE_ENABLE_TELEMETRY = "1";
|
|
177
|
-
}
|
|
178
|
-
if (!process.env.OTEL_METRICS_EXPORTER) {
|
|
179
|
-
env.OTEL_METRICS_EXPORTER = "console";
|
|
180
|
-
}
|
|
181
|
-
if (!process.env.OTEL_LOGS_EXPORTER) {
|
|
182
|
-
env.OTEL_LOGS_EXPORTER = "console";
|
|
183
|
-
}
|
|
184
|
-
return env;
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
/** Strip OTel metric and log blocks from raw output. */
|
|
188
|
-
function stripOtelBlocks(raw: string): string {
|
|
189
|
-
return raw
|
|
190
|
-
.replace(OTEL_METRIC_BLOCK_RE, "")
|
|
191
|
-
.replace(OTEL_LOG_BLOCK_RE, "")
|
|
192
|
-
.trimEnd();
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
export class ClaudeAdapter implements CLIAdapter {
|
|
196
|
-
name = "claude";
|
|
197
|
-
|
|
198
|
-
async isAvailable(): Promise<boolean> {
|
|
199
|
-
try {
|
|
200
|
-
await execAsync("which claude");
|
|
201
|
-
return true;
|
|
202
|
-
} catch {
|
|
203
|
-
return false;
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
async checkHealth(): Promise<{
|
|
208
|
-
available: boolean;
|
|
209
|
-
status: "healthy" | "missing" | "unhealthy";
|
|
210
|
-
message?: string;
|
|
211
|
-
}> {
|
|
212
|
-
const available = await this.isAvailable();
|
|
213
|
-
if (!available) {
|
|
214
|
-
return {
|
|
215
|
-
available: false,
|
|
216
|
-
status: "missing",
|
|
217
|
-
message: "Command not found",
|
|
218
|
-
};
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
return { available: true, status: "healthy", message: "Ready" };
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
getProjectCommandDir(): string | null {
|
|
225
|
-
return ".claude/commands";
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
getUserCommandDir(): string | null {
|
|
229
|
-
return path.join(os.homedir(), ".claude", "commands");
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
getProjectSkillDir(): string | null {
|
|
233
|
-
return ".claude/skills";
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
getUserSkillDir(): string | null {
|
|
237
|
-
return path.join(os.homedir(), ".claude", "skills");
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
getCommandExtension(): string {
|
|
241
|
-
return ".md";
|
|
242
|
-
}
|
|
243
|
-
|
|
244
|
-
canUseSymlink(): boolean {
|
|
245
|
-
return true;
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
transformCommand(markdownContent: string): string {
|
|
249
|
-
return markdownContent;
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
async execute(opts: {
|
|
253
|
-
prompt: string;
|
|
254
|
-
diff: string;
|
|
255
|
-
model?: string;
|
|
256
|
-
timeoutMs?: number;
|
|
257
|
-
onOutput?: (chunk: string) => void;
|
|
258
|
-
allowToolUse?: boolean;
|
|
259
|
-
thinkingBudget?: string;
|
|
260
|
-
}): Promise<string> {
|
|
261
|
-
const fullContent = `${opts.prompt}\n\n--- DIFF ---\n${opts.diff}`;
|
|
262
|
-
|
|
263
|
-
const tmpDir = os.tmpdir();
|
|
264
|
-
const tmpFile = path.join(
|
|
265
|
-
tmpDir,
|
|
266
|
-
`gauntlet-claude-${process.pid}-${Date.now()}.txt`,
|
|
267
|
-
);
|
|
268
|
-
await fs.writeFile(tmpFile, fullContent);
|
|
269
|
-
|
|
270
|
-
const args = ["-p"];
|
|
271
|
-
if (opts.allowToolUse === false) {
|
|
272
|
-
args.push("--tools", "");
|
|
273
|
-
} else {
|
|
274
|
-
args.push("--allowedTools", "Read,Glob,Grep");
|
|
275
|
-
}
|
|
276
|
-
args.push("--max-turns", "10");
|
|
277
|
-
|
|
278
|
-
const otelEnv = buildOtelEnv();
|
|
279
|
-
const thinkingEnv: Record<string, string> = {};
|
|
280
|
-
if (opts.thinkingBudget && opts.thinkingBudget in CLAUDE_THINKING_TOKENS) {
|
|
281
|
-
thinkingEnv.MAX_THINKING_TOKENS = String(
|
|
282
|
-
CLAUDE_THINKING_TOKENS[opts.thinkingBudget],
|
|
283
|
-
);
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
const cleanup = () => fs.unlink(tmpFile).catch(() => {});
|
|
287
|
-
const execEnv = {
|
|
288
|
-
...process.env,
|
|
289
|
-
[GAUNTLET_STOP_HOOK_ACTIVE_ENV]: "1",
|
|
290
|
-
...otelEnv,
|
|
291
|
-
...thinkingEnv,
|
|
292
|
-
};
|
|
293
|
-
|
|
294
|
-
if (opts.onOutput) {
|
|
295
|
-
const outputBuffer: string[] = [];
|
|
296
|
-
const raw = await runStreamingCommand({
|
|
297
|
-
command: "claude",
|
|
298
|
-
args,
|
|
299
|
-
tmpFile,
|
|
300
|
-
timeoutMs: opts.timeoutMs,
|
|
301
|
-
onOutput: (chunk: string) => {
|
|
302
|
-
outputBuffer.push(chunk);
|
|
303
|
-
},
|
|
304
|
-
cleanup,
|
|
305
|
-
env: execEnv,
|
|
306
|
-
});
|
|
307
|
-
const cleanedOutput = extractOtelMetrics(
|
|
308
|
-
outputBuffer.join(""),
|
|
309
|
-
opts.onOutput,
|
|
310
|
-
);
|
|
311
|
-
opts.onOutput(cleanedOutput);
|
|
312
|
-
return stripOtelBlocks(raw);
|
|
313
|
-
}
|
|
314
|
-
|
|
315
|
-
try {
|
|
316
|
-
const cmd = `cat "${tmpFile}" | claude ${args.map((a) => (a === "" ? '""' : a)).join(" ")}`;
|
|
317
|
-
const { stdout } = await execAsync(cmd, {
|
|
318
|
-
timeout: opts.timeoutMs,
|
|
319
|
-
maxBuffer: MAX_BUFFER_BYTES,
|
|
320
|
-
env: execEnv,
|
|
321
|
-
});
|
|
322
|
-
return extractOtelMetrics(stdout);
|
|
323
|
-
} finally {
|
|
324
|
-
await cleanup();
|
|
325
|
-
}
|
|
326
|
-
}
|
|
327
|
-
}
|
|
@@ -1,290 +0,0 @@
|
|
|
1
|
-
import { exec } from "node:child_process";
|
|
2
|
-
import fs from "node:fs/promises";
|
|
3
|
-
import os from "node:os";
|
|
4
|
-
import path from "node:path";
|
|
5
|
-
import { promisify } from "node:util";
|
|
6
|
-
import { getDebugLogger } from "../utils/debug-log.js";
|
|
7
|
-
import { type CLIAdapter, runStreamingCommand } from "./index.js";
|
|
8
|
-
import { CODEX_REASONING_EFFORT } from "./thinking-budget.js";
|
|
9
|
-
|
|
10
|
-
const execAsync = promisify(exec);
|
|
11
|
-
const MAX_BUFFER_BYTES = 10 * 1024 * 1024;
|
|
12
|
-
|
|
13
|
-
interface CodexUsage {
|
|
14
|
-
inputTokens?: number;
|
|
15
|
-
cachedInputTokens?: number;
|
|
16
|
-
outputTokens?: number;
|
|
17
|
-
toolCalls?: number;
|
|
18
|
-
apiRequests?: number;
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
/** Parse a single JSONL line into a typed event, or undefined on failure. */
|
|
22
|
-
function parseJsonlLine(
|
|
23
|
-
line: string,
|
|
24
|
-
): { type: string; [key: string]: unknown } | undefined {
|
|
25
|
-
try {
|
|
26
|
-
const obj = JSON.parse(line);
|
|
27
|
-
if (obj && typeof obj.type === "string") return obj;
|
|
28
|
-
} catch {
|
|
29
|
-
/* skip malformed lines */
|
|
30
|
-
}
|
|
31
|
-
return undefined;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
/** Maps Codex turn usage JSON fields to CodexUsage fields. */
|
|
35
|
-
const TURN_USAGE_MAP: Array<[string, keyof CodexUsage]> = [
|
|
36
|
-
["input_tokens", "inputTokens"],
|
|
37
|
-
["cached_input_tokens", "cachedInputTokens"],
|
|
38
|
-
["output_tokens", "outputTokens"],
|
|
39
|
-
];
|
|
40
|
-
|
|
41
|
-
/** Accumulate a turn.completed event's usage into totals. */
|
|
42
|
-
function accumulateTurnUsage(
|
|
43
|
-
event: { type: string; [key: string]: unknown },
|
|
44
|
-
usage: CodexUsage,
|
|
45
|
-
): void {
|
|
46
|
-
const u = event.usage as Record<string, number | undefined> | undefined;
|
|
47
|
-
if (!u) return;
|
|
48
|
-
usage.apiRequests = (usage.apiRequests || 0) + 1;
|
|
49
|
-
for (const [jsonKey, usageKey] of TURN_USAGE_MAP) {
|
|
50
|
-
if (u[jsonKey] !== undefined) {
|
|
51
|
-
usage[usageKey] = (usage[usageKey] || 0) + u[jsonKey]!;
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
/** Check if an item.completed event represents a tool call (command, file, mcp). */
|
|
57
|
-
function isToolCallItem(event: {
|
|
58
|
-
type: string;
|
|
59
|
-
[key: string]: unknown;
|
|
60
|
-
}): boolean {
|
|
61
|
-
const item = event.item as { type?: string } | undefined;
|
|
62
|
-
if (!item?.type) return false;
|
|
63
|
-
return (
|
|
64
|
-
item.type === "command_execution" ||
|
|
65
|
-
item.type === "file_change" ||
|
|
66
|
-
item.type === "mcp_tool_call"
|
|
67
|
-
);
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
/** Extract the final agent message text from a completed item. */
|
|
71
|
-
function extractAgentMessage(event: {
|
|
72
|
-
type: string;
|
|
73
|
-
[key: string]: unknown;
|
|
74
|
-
}): string | undefined {
|
|
75
|
-
const item = event.item as { type?: string; text?: string } | undefined;
|
|
76
|
-
if (item?.type === "agent_message" && typeof item.text === "string") {
|
|
77
|
-
return item.text;
|
|
78
|
-
}
|
|
79
|
-
return undefined;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
const SUMMARY_FIELDS: Array<[keyof CodexUsage, string]> = [
|
|
83
|
-
["inputTokens", "in"],
|
|
84
|
-
["cachedInputTokens", "cache"],
|
|
85
|
-
["outputTokens", "out"],
|
|
86
|
-
["toolCalls", "tool_calls"],
|
|
87
|
-
["apiRequests", "api_requests"],
|
|
88
|
-
];
|
|
89
|
-
|
|
90
|
-
function formatCodexSummary(usage: CodexUsage): string | null {
|
|
91
|
-
const parts = SUMMARY_FIELDS.filter(([key]) => usage[key] !== undefined).map(
|
|
92
|
-
([key, label]) => `${label}=${usage[key]}`,
|
|
93
|
-
);
|
|
94
|
-
return parts.length > 0 ? `[codex-telemetry] ${parts.join(" ")}` : null;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
/** Process a single item.completed event, updating usage and returning any agent message. */
|
|
98
|
-
function processItemCompleted(
|
|
99
|
-
event: { type: string; [key: string]: unknown },
|
|
100
|
-
usage: CodexUsage,
|
|
101
|
-
): string | undefined {
|
|
102
|
-
if (isToolCallItem(event)) {
|
|
103
|
-
usage.toolCalls = (usage.toolCalls || 0) + 1;
|
|
104
|
-
}
|
|
105
|
-
return extractAgentMessage(event);
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
/** Route a parsed JSONL event to the appropriate handler, returning any agent message. */
|
|
109
|
-
function processCodexEvent(
|
|
110
|
-
event: { type: string; [key: string]: unknown },
|
|
111
|
-
usage: CodexUsage,
|
|
112
|
-
): string | undefined {
|
|
113
|
-
if (event.type === "turn.completed") {
|
|
114
|
-
accumulateTurnUsage(event, usage);
|
|
115
|
-
return undefined;
|
|
116
|
-
}
|
|
117
|
-
if (event.type === "item.completed") {
|
|
118
|
-
return processItemCompleted(event, usage);
|
|
119
|
-
}
|
|
120
|
-
return undefined;
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
/** Emit a telemetry summary to logs and debug log. */
|
|
124
|
-
function emitCodexSummary(
|
|
125
|
-
usage: CodexUsage,
|
|
126
|
-
onLog?: (msg: string) => void,
|
|
127
|
-
): void {
|
|
128
|
-
const summary = formatCodexSummary(usage);
|
|
129
|
-
if (!summary) return;
|
|
130
|
-
onLog?.(`\n${summary}\n`);
|
|
131
|
-
process.stderr.write(`${summary}\n`);
|
|
132
|
-
getDebugLogger()?.logTelemetry({ adapter: "codex", summary });
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
/**
|
|
136
|
-
* Parse JSONL output from `codex exec --json`, extracting the final agent
|
|
137
|
-
* message, token usage, and tool call counts.
|
|
138
|
-
*/
|
|
139
|
-
function parseCodexJsonl(
|
|
140
|
-
raw: string,
|
|
141
|
-
onLog?: (msg: string) => void,
|
|
142
|
-
): { text: string; usage: CodexUsage } {
|
|
143
|
-
const usage: CodexUsage = {};
|
|
144
|
-
let lastAgentMessage = "";
|
|
145
|
-
|
|
146
|
-
for (const line of raw.split("\n")) {
|
|
147
|
-
const event = parseJsonlLine(line.trim());
|
|
148
|
-
if (!event) continue;
|
|
149
|
-
const msg = processCodexEvent(event, usage);
|
|
150
|
-
if (msg !== undefined) lastAgentMessage = msg;
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
emitCodexSummary(usage, onLog);
|
|
154
|
-
return { text: lastAgentMessage, usage };
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
export class CodexAdapter implements CLIAdapter {
|
|
158
|
-
name = "codex";
|
|
159
|
-
|
|
160
|
-
async isAvailable(): Promise<boolean> {
|
|
161
|
-
try {
|
|
162
|
-
await execAsync("which codex");
|
|
163
|
-
return true;
|
|
164
|
-
} catch {
|
|
165
|
-
return false;
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
async checkHealth(): Promise<{
|
|
170
|
-
available: boolean;
|
|
171
|
-
status: "healthy" | "missing" | "unhealthy";
|
|
172
|
-
message?: string;
|
|
173
|
-
}> {
|
|
174
|
-
const available = await this.isAvailable();
|
|
175
|
-
if (!available) {
|
|
176
|
-
return {
|
|
177
|
-
available: false,
|
|
178
|
-
status: "missing",
|
|
179
|
-
message: "Command not found",
|
|
180
|
-
};
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
return { available: true, status: "healthy", message: "Installed" };
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
getProjectCommandDir(): string | null {
|
|
187
|
-
// Codex only supports user-level prompts at ~/.codex/prompts/
|
|
188
|
-
// No project-scoped commands available
|
|
189
|
-
return null;
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
getUserCommandDir(): string | null {
|
|
193
|
-
// Codex uses user-level prompts at ~/.codex/prompts/
|
|
194
|
-
return path.join(os.homedir(), ".codex", "prompts");
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
getProjectSkillDir(): string | null {
|
|
198
|
-
return null;
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
getUserSkillDir(): string | null {
|
|
202
|
-
return null;
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
getCommandExtension(): string {
|
|
206
|
-
return ".md";
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
canUseSymlink(): boolean {
|
|
210
|
-
// Codex uses the same Markdown format as our canonical file
|
|
211
|
-
return true;
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
transformCommand(markdownContent: string): string {
|
|
215
|
-
// Codex uses the same Markdown format as Claude, no transformation needed
|
|
216
|
-
return markdownContent;
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
private buildArgs(allowToolUse?: boolean, thinkingBudget?: string): string[] {
|
|
220
|
-
const args = [
|
|
221
|
-
"exec",
|
|
222
|
-
"--cd",
|
|
223
|
-
process.cwd(),
|
|
224
|
-
"--sandbox",
|
|
225
|
-
"read-only",
|
|
226
|
-
"-c",
|
|
227
|
-
'ask_for_approval="never"',
|
|
228
|
-
];
|
|
229
|
-
if (allowToolUse === false) {
|
|
230
|
-
args.push("--disable", "shell_tool");
|
|
231
|
-
}
|
|
232
|
-
if (thinkingBudget && thinkingBudget in CODEX_REASONING_EFFORT) {
|
|
233
|
-
const effort = CODEX_REASONING_EFFORT[thinkingBudget];
|
|
234
|
-
args.push("-c", `model_reasoning_effort="${effort}"`);
|
|
235
|
-
}
|
|
236
|
-
args.push("--json", "-");
|
|
237
|
-
return args;
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
async execute(opts: {
|
|
241
|
-
prompt: string;
|
|
242
|
-
diff: string;
|
|
243
|
-
model?: string;
|
|
244
|
-
timeoutMs?: number;
|
|
245
|
-
onOutput?: (chunk: string) => void;
|
|
246
|
-
allowToolUse?: boolean;
|
|
247
|
-
thinkingBudget?: string;
|
|
248
|
-
}): Promise<string> {
|
|
249
|
-
const fullContent = `${opts.prompt}\n\n--- DIFF ---\n${opts.diff}`;
|
|
250
|
-
|
|
251
|
-
const tmpDir = os.tmpdir();
|
|
252
|
-
const tmpFile = path.join(tmpDir, `gauntlet-codex-${Date.now()}.txt`);
|
|
253
|
-
await fs.writeFile(tmpFile, fullContent);
|
|
254
|
-
|
|
255
|
-
const args = this.buildArgs(opts.allowToolUse, opts.thinkingBudget);
|
|
256
|
-
|
|
257
|
-
const cleanup = () => fs.unlink(tmpFile).catch(() => {});
|
|
258
|
-
|
|
259
|
-
// If onOutput callback is provided, use spawn for real-time streaming
|
|
260
|
-
if (opts.onOutput) {
|
|
261
|
-
const raw = await runStreamingCommand({
|
|
262
|
-
command: "codex",
|
|
263
|
-
args,
|
|
264
|
-
tmpFile,
|
|
265
|
-
timeoutMs: opts.timeoutMs,
|
|
266
|
-
onOutput: (chunk: string) => {
|
|
267
|
-
opts.onOutput?.(chunk);
|
|
268
|
-
},
|
|
269
|
-
cleanup,
|
|
270
|
-
});
|
|
271
|
-
|
|
272
|
-
const { text } = parseCodexJsonl(raw, opts.onOutput);
|
|
273
|
-
return text || raw.trimEnd();
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
// Otherwise use exec for buffered output
|
|
277
|
-
try {
|
|
278
|
-
const quoteArg = (a: string) => `"${a.replace(/(["\\$`])/g, "\\$1")}"`;
|
|
279
|
-
const cmd = `cat "${tmpFile}" | codex ${args.map(quoteArg).join(" ")}`;
|
|
280
|
-
const { stdout } = await execAsync(cmd, {
|
|
281
|
-
timeout: opts.timeoutMs,
|
|
282
|
-
maxBuffer: MAX_BUFFER_BYTES,
|
|
283
|
-
});
|
|
284
|
-
const { text } = parseCodexJsonl(stdout);
|
|
285
|
-
return text || stdout.trimEnd();
|
|
286
|
-
} finally {
|
|
287
|
-
await cleanup();
|
|
288
|
-
}
|
|
289
|
-
}
|
|
290
|
-
}
|