edsger 0.55.4 → 0.56.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/quality-benchmark/index.d.ts +32 -0
- package/dist/commands/quality-benchmark/index.js +124 -0
- package/dist/index.js +24 -0
- package/dist/phases/quality-benchmark/index.d.ts +65 -0
- package/dist/phases/quality-benchmark/index.js +194 -0
- package/dist/phases/quality-benchmark/mcp-server.d.ts +46 -0
- package/dist/phases/quality-benchmark/mcp-server.js +252 -0
- package/dist/phases/quality-benchmark/parsers.d.ts +22 -0
- package/dist/phases/quality-benchmark/parsers.js +1022 -0
- package/dist/phases/quality-benchmark/prompts.d.ts +31 -0
- package/dist/phases/quality-benchmark/prompts.js +154 -0
- package/dist/phases/quality-benchmark/rubric.md +1066 -0
- package/dist/phases/quality-benchmark/tool-catalog.d.ts +33 -0
- package/dist/phases/quality-benchmark/tool-catalog.js +597 -0
- package/dist/phases/quality-benchmark/tool-runner.d.ts +69 -0
- package/dist/phases/quality-benchmark/tool-runner.js +399 -0
- package/dist/phases/quality-benchmark/types.d.ts +312 -0
- package/dist/phases/quality-benchmark/types.js +23 -0
- package/package.json +4 -4
|
@@ -0,0 +1,399 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool execution layer for the quality-benchmark phase.
|
|
3
|
+
*
|
|
4
|
+
* Three operations exposed to the rest of the phase (and ultimately to
|
|
5
|
+
* the LLM via the MCP server in `mcp-server.ts`):
|
|
6
|
+
*
|
|
7
|
+
* probeTool(id, ctx) -> { available, version, install_command }
|
|
8
|
+
* installTool(id, ctx) -> { installed, version, error }
|
|
9
|
+
* executeTool(id, ctx) -> { summary, run, parser_output }
|
|
10
|
+
*
|
|
11
|
+
* All commands come from the authoritative `TOOL_CATALOG` — callers may
|
|
12
|
+
* not pass arbitrary command strings. Outputs are saved to a per-run
|
|
13
|
+
* scratch directory so the LLM never has to ferry megabytes of JSON
|
|
14
|
+
* through its context; only the small `ParsedToolOutput` flows back.
|
|
15
|
+
*
|
|
16
|
+
* Safety rails enforced here (in addition to those documented in the
|
|
17
|
+
* rubric):
|
|
18
|
+
* - All commands run with cwd = repo root (or an explicit override)
|
|
19
|
+
* - Per-command timeout; SIGKILL on overrun
|
|
20
|
+
* - No sudo / no system package managers (refused at install time)
|
|
21
|
+
* - Captured outputs truncated at 16 MiB to bound disk usage
|
|
22
|
+
*/
|
|
23
|
+
import { spawn } from 'node:child_process';
|
|
24
|
+
import { existsSync, mkdirSync, promises as fsp, readFileSync } from 'node:fs';
|
|
25
|
+
import { homedir, tmpdir } from 'node:os';
|
|
26
|
+
import { join } from 'node:path';
|
|
27
|
+
import { parseToolOutput } from './parsers.js';
|
|
28
|
+
import { ALL_INSTALL_COMMANDS, TOOL_CATALOG_BY_ID } from './tool-catalog.js';
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
// Configuration
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
/** Maximum captured stdout/stderr per command (16 MiB). */
|
|
33
|
+
const MAX_CAPTURE_BYTES = 16 * 1024 * 1024;
|
|
34
|
+
/** Buffer threshold for streaming output to disk — keeps RSS low on huge logs. */
|
|
35
|
+
const STREAM_FLUSH_BYTES = 1 * 1024 * 1024;
|
|
36
|
+
/** Default timeout for installer commands (5 minutes). */
|
|
37
|
+
const INSTALL_TIMEOUT_MS = 5 * 60 * 1000;
|
|
38
|
+
/** Probe commands should always be near-instant; clamp to 30 s. */
|
|
39
|
+
const PROBE_TIMEOUT_MS = 30 * 1000;
|
|
40
|
+
/**
|
|
41
|
+
* Forbidden installer patterns. The catalog itself never contains these,
|
|
42
|
+
* but we re-check at runtime as a defense in depth — a future PR cannot
|
|
43
|
+
* silently introduce `sudo apt install foo`.
|
|
44
|
+
*/
|
|
45
|
+
const FORBIDDEN_INSTALL_PATTERNS = [
|
|
46
|
+
/\bsudo\b/i,
|
|
47
|
+
/\bapt(-get)?\b/i,
|
|
48
|
+
/\bbrew\b/i,
|
|
49
|
+
/\byum\b/i,
|
|
50
|
+
/\bdnf\b/i,
|
|
51
|
+
/\bpacman\b/i,
|
|
52
|
+
/\bzypper\b/i,
|
|
53
|
+
/\bapk\b/i,
|
|
54
|
+
/\bchoco(latey)?\b/i,
|
|
55
|
+
/\bscoop\b/i,
|
|
56
|
+
/\bwinget\b/i,
|
|
57
|
+
];
|
|
58
|
+
/**
|
|
59
|
+
* Forbidden flags in tool invocations: anything that mutates the repo.
|
|
60
|
+
* Caught on top of catalog whitelisting because future maintainers might
|
|
61
|
+
* accidentally introduce `--fix`.
|
|
62
|
+
*/
|
|
63
|
+
const FORBIDDEN_RUN_PATTERNS = [
|
|
64
|
+
/\B--fix\b/,
|
|
65
|
+
/\B--auto-?fix\b/,
|
|
66
|
+
/\B--write\b/,
|
|
67
|
+
/\bgit\s+(commit|push|reset\s+--hard|clean\s+-f)/i,
|
|
68
|
+
/\brm\s+-rf\s+\//,
|
|
69
|
+
];
|
|
70
|
+
// ---------------------------------------------------------------------------
|
|
71
|
+
// Scan-dir helpers
|
|
72
|
+
// ---------------------------------------------------------------------------
|
|
73
|
+
export function createRunnerContext(opts) {
|
|
74
|
+
const runId = opts.run_id ?? new Date().toISOString().replace(/[:.]/g, '-');
|
|
75
|
+
const base = opts.base_dir ?? join(homedir() || tmpdir(), '.edsger', 'quality-runs');
|
|
76
|
+
const scanDir = join(base, runId);
|
|
77
|
+
mkdirSync(scanDir, { recursive: true });
|
|
78
|
+
return {
|
|
79
|
+
repo_root: opts.repo_root,
|
|
80
|
+
package_manager: opts.package_manager,
|
|
81
|
+
install_enabled: opts.install_enabled ?? true,
|
|
82
|
+
scan_dir: scanDir,
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
// ---------------------------------------------------------------------------
|
|
86
|
+
// Command template resolution
|
|
87
|
+
// ---------------------------------------------------------------------------
|
|
88
|
+
function resolvePlaceholders(template, ctx) {
|
|
89
|
+
return template
|
|
90
|
+
.replaceAll('%REPO_ROOT%', ctx.repo_root)
|
|
91
|
+
.replaceAll('%PKG_MANAGER%', ctx.package_manager ?? 'npm')
|
|
92
|
+
.replaceAll('%SCAN_DIR%', ctx.scan_dir);
|
|
93
|
+
}
|
|
94
|
+
function getEntry(id) {
|
|
95
|
+
const entry = TOOL_CATALOG_BY_ID.get(id);
|
|
96
|
+
if (!entry) {
|
|
97
|
+
throw new Error(`Unknown tool id: ${id} (not in TOOL_CATALOG)`);
|
|
98
|
+
}
|
|
99
|
+
return entry;
|
|
100
|
+
}
|
|
101
|
+
async function runCommand(command, opts) {
|
|
102
|
+
const started = Date.now();
|
|
103
|
+
const outPath = opts.outputPath ??
|
|
104
|
+
join(opts.cwd, `.runner-${Math.random().toString(36).slice(2, 10)}.out`);
|
|
105
|
+
const child = spawn('bash', ['-lc', command], {
|
|
106
|
+
cwd: opts.cwd,
|
|
107
|
+
env: { ...process.env, ...(opts.env ?? {}) },
|
|
108
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
109
|
+
});
|
|
110
|
+
let stdout = '';
|
|
111
|
+
let stderr = '';
|
|
112
|
+
let stdoutBytes = 0;
|
|
113
|
+
let stderrBytes = 0;
|
|
114
|
+
let truncatedStdout = false;
|
|
115
|
+
// Open file for streaming stdout to disk (audit trail).
|
|
116
|
+
const fileHandle = await fsp.open(outPath, 'w');
|
|
117
|
+
let bufferedToFlush = '';
|
|
118
|
+
const flushBuffer = async () => {
|
|
119
|
+
if (bufferedToFlush.length === 0) {
|
|
120
|
+
return;
|
|
121
|
+
}
|
|
122
|
+
await fileHandle.write(bufferedToFlush);
|
|
123
|
+
bufferedToFlush = '';
|
|
124
|
+
};
|
|
125
|
+
child.stdout?.on('data', (chunk) => {
|
|
126
|
+
const str = chunk.toString('utf8');
|
|
127
|
+
stdoutBytes += chunk.byteLength;
|
|
128
|
+
bufferedToFlush += str;
|
|
129
|
+
if (bufferedToFlush.length >= STREAM_FLUSH_BYTES) {
|
|
130
|
+
void flushBuffer();
|
|
131
|
+
}
|
|
132
|
+
// Keep in-memory copy bounded
|
|
133
|
+
if (!truncatedStdout && stdout.length + str.length <= MAX_CAPTURE_BYTES) {
|
|
134
|
+
stdout += str;
|
|
135
|
+
}
|
|
136
|
+
else if (!truncatedStdout) {
|
|
137
|
+
stdout += str.slice(0, MAX_CAPTURE_BYTES - stdout.length);
|
|
138
|
+
truncatedStdout = true;
|
|
139
|
+
}
|
|
140
|
+
});
|
|
141
|
+
child.stderr?.on('data', (chunk) => {
|
|
142
|
+
const str = chunk.toString('utf8');
|
|
143
|
+
stderrBytes += chunk.byteLength;
|
|
144
|
+
if (stderr.length + str.length <= MAX_CAPTURE_BYTES) {
|
|
145
|
+
stderr += str;
|
|
146
|
+
}
|
|
147
|
+
else if (stderr.length < MAX_CAPTURE_BYTES) {
|
|
148
|
+
stderr += str.slice(0, MAX_CAPTURE_BYTES - stderr.length);
|
|
149
|
+
}
|
|
150
|
+
});
|
|
151
|
+
let timedOut = false;
|
|
152
|
+
const timer = setTimeout(() => {
|
|
153
|
+
timedOut = true;
|
|
154
|
+
child.kill('SIGKILL');
|
|
155
|
+
}, opts.timeoutMs);
|
|
156
|
+
const closePromise = new Promise((resolve) => {
|
|
157
|
+
child.on('close', (code, signal) => {
|
|
158
|
+
resolve({ exit_code: code ?? -1, signal });
|
|
159
|
+
});
|
|
160
|
+
});
|
|
161
|
+
const { exit_code, signal } = await closePromise;
|
|
162
|
+
clearTimeout(timer);
|
|
163
|
+
await flushBuffer();
|
|
164
|
+
await fileHandle.close();
|
|
165
|
+
return {
|
|
166
|
+
stdout,
|
|
167
|
+
stderr,
|
|
168
|
+
exit_code,
|
|
169
|
+
signal,
|
|
170
|
+
duration_ms: Date.now() - started,
|
|
171
|
+
timed_out: timedOut,
|
|
172
|
+
raw_output_path: outPath,
|
|
173
|
+
// Audit aids (not used by parsers)
|
|
174
|
+
// @ts-expect-error - intentionally extending capture with stats
|
|
175
|
+
bytes_captured: { stdout: stdoutBytes, stderr: stderrBytes },
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
// ---------------------------------------------------------------------------
|
|
179
|
+
// Probing
|
|
180
|
+
// ---------------------------------------------------------------------------
|
|
181
|
+
/** Extract a version-looking token from a probe command's output. */
|
|
182
|
+
function extractVersion(out) {
|
|
183
|
+
const m = out.match(/v?(\d+\.\d+(?:\.\d+)?(?:[-+][\w.-]+)?)/);
|
|
184
|
+
return m ? m[1] : null;
|
|
185
|
+
}
|
|
186
|
+
export async function probeTool(id, ctx) {
|
|
187
|
+
const entry = getEntry(id);
|
|
188
|
+
const cmd = resolvePlaceholders(entry.probe, ctx);
|
|
189
|
+
const cap = await runCommand(cmd, {
|
|
190
|
+
cwd: ctx.repo_root,
|
|
191
|
+
timeoutMs: PROBE_TIMEOUT_MS,
|
|
192
|
+
});
|
|
193
|
+
// `command -v` returns non-zero when not found; combined probes that run
|
|
194
|
+
// both `command -v X` and `X --version` need an exit-zero check.
|
|
195
|
+
if (cap.timed_out || cap.exit_code !== 0) {
|
|
196
|
+
return {
|
|
197
|
+
available: false,
|
|
198
|
+
version: null,
|
|
199
|
+
install_command: entry.install,
|
|
200
|
+
install_prereq: entry.install_prereq,
|
|
201
|
+
reason: cap.timed_out ? 'probe_failed' : 'not_found',
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
const version = extractVersion(cap.stdout) ?? extractVersion(cap.stderr);
|
|
205
|
+
return {
|
|
206
|
+
available: true,
|
|
207
|
+
version,
|
|
208
|
+
install_command: entry.install,
|
|
209
|
+
install_prereq: entry.install_prereq,
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
export async function probeInstaller(prereq, ctx) {
|
|
213
|
+
if (prereq === null) {
|
|
214
|
+
return true;
|
|
215
|
+
}
|
|
216
|
+
const probeMap = {
|
|
217
|
+
pipx: 'command -v pipx',
|
|
218
|
+
go: 'command -v go',
|
|
219
|
+
cargo: 'command -v cargo',
|
|
220
|
+
npx: 'command -v npx',
|
|
221
|
+
gem: 'command -v gem',
|
|
222
|
+
};
|
|
223
|
+
const cap = await runCommand(probeMap[prereq], {
|
|
224
|
+
cwd: ctx.repo_root,
|
|
225
|
+
timeoutMs: PROBE_TIMEOUT_MS,
|
|
226
|
+
});
|
|
227
|
+
return cap.exit_code === 0;
|
|
228
|
+
}
|
|
229
|
+
// ---------------------------------------------------------------------------
|
|
230
|
+
// Installing
|
|
231
|
+
// ---------------------------------------------------------------------------
|
|
232
|
+
function assertInstallSafe(cmd) {
|
|
233
|
+
// Whitelist: must be byte-for-byte in the catalog. Defense in depth.
|
|
234
|
+
if (!ALL_INSTALL_COMMANDS.includes(cmd)) {
|
|
235
|
+
throw new Error(`Refused install command not present in TOOL_CATALOG: ${cmd.slice(0, 80)}`);
|
|
236
|
+
}
|
|
237
|
+
for (const pat of FORBIDDEN_INSTALL_PATTERNS) {
|
|
238
|
+
if (pat.test(cmd)) {
|
|
239
|
+
throw new Error(`Refused install command matching forbidden pattern ${String(pat)}: ${cmd.slice(0, 80)}`);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
export async function installTool(id, ctx) {
|
|
244
|
+
const entry = getEntry(id);
|
|
245
|
+
if (!entry.install) {
|
|
246
|
+
return { installed: false, version: null, error: 'no_install_command' };
|
|
247
|
+
}
|
|
248
|
+
if (!ctx.install_enabled) {
|
|
249
|
+
return { installed: false, version: null, error: 'install_disabled' };
|
|
250
|
+
}
|
|
251
|
+
const prereqOk = await probeInstaller(entry.install_prereq, ctx);
|
|
252
|
+
if (!prereqOk) {
|
|
253
|
+
return {
|
|
254
|
+
installed: false,
|
|
255
|
+
version: null,
|
|
256
|
+
error: `prereq_missing: ${entry.install_prereq}`,
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
assertInstallSafe(entry.install);
|
|
260
|
+
const cap = await runCommand(entry.install, {
|
|
261
|
+
cwd: ctx.repo_root,
|
|
262
|
+
timeoutMs: INSTALL_TIMEOUT_MS,
|
|
263
|
+
});
|
|
264
|
+
if (cap.exit_code !== 0 || cap.timed_out) {
|
|
265
|
+
return {
|
|
266
|
+
installed: false,
|
|
267
|
+
version: null,
|
|
268
|
+
error: tailString(cap.stderr || cap.stdout, 500),
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
// Re-probe to confirm installation took
|
|
272
|
+
const re = await probeTool(id, ctx);
|
|
273
|
+
return {
|
|
274
|
+
installed: re.available,
|
|
275
|
+
version: re.version,
|
|
276
|
+
error: re.available ? undefined : 'reprobe_failed',
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
// ---------------------------------------------------------------------------
|
|
280
|
+
// Executing
|
|
281
|
+
// ---------------------------------------------------------------------------
|
|
282
|
+
function assertRunSafe(cmd) {
|
|
283
|
+
for (const pat of FORBIDDEN_RUN_PATTERNS) {
|
|
284
|
+
if (pat.test(cmd)) {
|
|
285
|
+
throw new Error(`Refused run command matching forbidden pattern ${String(pat)}: ${cmd.slice(0, 120)}`);
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
function buildParserContext(ctx) {
|
|
290
|
+
return { repo_root: ctx.repo_root };
|
|
291
|
+
}
|
|
292
|
+
function nowIso() {
|
|
293
|
+
return new Date().toISOString();
|
|
294
|
+
}
|
|
295
|
+
function tailString(s, n) {
|
|
296
|
+
if (!s) {
|
|
297
|
+
return '';
|
|
298
|
+
}
|
|
299
|
+
return s.length <= n ? s : s.slice(-n);
|
|
300
|
+
}
|
|
301
|
+
export async function executeTool(id, ctx) {
|
|
302
|
+
const entry = getEntry(id);
|
|
303
|
+
const cmd = resolvePlaceholders(entry.command, ctx);
|
|
304
|
+
assertRunSafe(cmd);
|
|
305
|
+
const outPath = join(ctx.scan_dir, `${entry.id}.out`);
|
|
306
|
+
const ranAt = nowIso();
|
|
307
|
+
const cap = await runCommand(cmd, {
|
|
308
|
+
cwd: ctx.repo_root,
|
|
309
|
+
timeoutMs: entry.timeout_minutes * 60 * 1000,
|
|
310
|
+
outputPath: outPath,
|
|
311
|
+
});
|
|
312
|
+
// Some tools genuinely succeed with non-zero exit (e.g. linters with findings).
|
|
313
|
+
const isOk = !cap.timed_out &&
|
|
314
|
+
(cap.exit_code === 0 ||
|
|
315
|
+
(entry.tolerate_nonzero_exit === true && cap.stdout.length > 0));
|
|
316
|
+
// If output was streamed to disk and the in-memory copy was truncated,
|
|
317
|
+
// re-read the head of the file for parsing.
|
|
318
|
+
const stdoutForParser = cap.stdout.length > 0 ? cap.stdout : safeReadHead(outPath);
|
|
319
|
+
const parsed = isOk
|
|
320
|
+
? parseToolOutput(id, stdoutForParser, cap.stderr, buildParserContext(ctx))
|
|
321
|
+
: {
|
|
322
|
+
tool_id: id,
|
|
323
|
+
summary: {
|
|
324
|
+
tier: 'counts',
|
|
325
|
+
counts: { errors: 0, warnings: 0, info: 0 },
|
|
326
|
+
},
|
|
327
|
+
oneliner: cap.timed_out
|
|
328
|
+
? `timed out after ${entry.timeout_minutes}m`
|
|
329
|
+
: `exit ${cap.exit_code}`,
|
|
330
|
+
};
|
|
331
|
+
const run = {
|
|
332
|
+
tool_id: id,
|
|
333
|
+
ran_at: ranAt,
|
|
334
|
+
duration_ms: cap.duration_ms,
|
|
335
|
+
exit_code: cap.exit_code,
|
|
336
|
+
findings_count: findingsCount(parsed),
|
|
337
|
+
summary: parsed.oneliner,
|
|
338
|
+
parsed: isOk,
|
|
339
|
+
stderr_tail: isOk ? undefined : tailString(cap.stderr, 500),
|
|
340
|
+
raw_output_path: outPath,
|
|
341
|
+
};
|
|
342
|
+
return { parsed, run, ok: isOk };
|
|
343
|
+
}
|
|
344
|
+
function findingsCount(p) {
|
|
345
|
+
switch (p.summary.tier) {
|
|
346
|
+
case 'counts':
|
|
347
|
+
return p.summary.counts.errors + p.summary.counts.warnings;
|
|
348
|
+
case 'findings':
|
|
349
|
+
return p.summary.counts.total;
|
|
350
|
+
case 'metrics':
|
|
351
|
+
return 0;
|
|
352
|
+
default:
|
|
353
|
+
return 0;
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
function safeReadHead(path, maxBytes = MAX_CAPTURE_BYTES) {
|
|
357
|
+
try {
|
|
358
|
+
if (!existsSync(path)) {
|
|
359
|
+
return '';
|
|
360
|
+
}
|
|
361
|
+
const buf = readFileSync(path);
|
|
362
|
+
const slice = buf.byteLength <= maxBytes ? buf : buf.subarray(0, maxBytes);
|
|
363
|
+
return slice.toString('utf8');
|
|
364
|
+
}
|
|
365
|
+
catch {
|
|
366
|
+
return '';
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
// ---------------------------------------------------------------------------
|
|
370
|
+
// Helpers exported for the MCP server / phase index
|
|
371
|
+
// ---------------------------------------------------------------------------
|
|
372
|
+
/** Convert a failed probe into the UnavailableTool record stored in DB. */
|
|
373
|
+
export function probeToUnavailable(id, probe, reason) {
|
|
374
|
+
const entry = TOOL_CATALOG_BY_ID.get(id);
|
|
375
|
+
return {
|
|
376
|
+
name: id,
|
|
377
|
+
category: entry?.category ?? 'lint',
|
|
378
|
+
install_command: probe.install_command,
|
|
379
|
+
reason,
|
|
380
|
+
};
|
|
381
|
+
}
|
|
382
|
+
/** Convert an install failure to UnavailableTool. */
|
|
383
|
+
export function installFailureToUnavailable(id, res) {
|
|
384
|
+
const entry = TOOL_CATALOG_BY_ID.get(id);
|
|
385
|
+
let reason = 'install_failed';
|
|
386
|
+
if (res.error?.startsWith('prereq_missing')) {
|
|
387
|
+
reason = 'prereq_missing';
|
|
388
|
+
}
|
|
389
|
+
else if (res.error === 'install_disabled') {
|
|
390
|
+
reason = 'install_disabled';
|
|
391
|
+
}
|
|
392
|
+
return {
|
|
393
|
+
name: id,
|
|
394
|
+
category: entry?.category ?? 'lint',
|
|
395
|
+
install_command: entry?.install ?? null,
|
|
396
|
+
reason,
|
|
397
|
+
detail: res.error,
|
|
398
|
+
};
|
|
399
|
+
}
|