edsger 0.55.4 → 0.56.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,399 @@
1
+ /**
2
+ * Tool execution layer for the quality-benchmark phase.
3
+ *
4
+ * Three operations exposed to the rest of the phase (and ultimately to
5
+ * the LLM via the MCP server in `mcp-server.ts`):
6
+ *
7
+ * probeTool(id, ctx) -> { available, version, install_command }
8
+ * installTool(id, ctx) -> { installed, version, error }
9
+ * executeTool(id, ctx) -> { summary, run, parser_output }
10
+ *
11
+ * All commands come from the authoritative `TOOL_CATALOG` — callers may
12
+ * not pass arbitrary command strings. Outputs are saved to a per-run
13
+ * scratch directory so the LLM never has to ferry megabytes of JSON
14
+ * through its context; only the small `ParsedToolOutput` flows back.
15
+ *
16
+ * Safety rails enforced here (in addition to those documented in the
17
+ * rubric):
18
+ * - All commands run with cwd = repo root (or an explicit override)
19
+ * - Per-command timeout; SIGKILL on overrun
20
+ * - No sudo / no system package managers (refused at install time)
21
+ * - Captured outputs truncated at 16 MiB to bound disk usage
22
+ */
23
+ import { spawn } from 'node:child_process';
24
+ import { existsSync, mkdirSync, promises as fsp, readFileSync } from 'node:fs';
25
+ import { homedir, tmpdir } from 'node:os';
26
+ import { join } from 'node:path';
27
+ import { parseToolOutput } from './parsers.js';
28
+ import { ALL_INSTALL_COMMANDS, TOOL_CATALOG_BY_ID } from './tool-catalog.js';
29
+ // ---------------------------------------------------------------------------
30
+ // Configuration
31
+ // ---------------------------------------------------------------------------
32
+ /** Maximum captured stdout/stderr per command (16 MiB). */
33
+ const MAX_CAPTURE_BYTES = 16 * 1024 * 1024;
34
+ /** Buffer threshold for streaming output to disk — keeps RSS low on huge logs. */
35
+ const STREAM_FLUSH_BYTES = 1 * 1024 * 1024;
36
+ /** Default timeout for installer commands (5 minutes). */
37
+ const INSTALL_TIMEOUT_MS = 5 * 60 * 1000;
38
+ /** Probe commands should always be near-instant; clamp to 30 s. */
39
+ const PROBE_TIMEOUT_MS = 30 * 1000;
40
+ /**
41
+ * Forbidden installer patterns. The catalog itself never contains these,
42
+ * but we re-check at runtime as a defense in depth — a future PR cannot
43
+ * silently introduce `sudo apt install foo`.
44
+ */
45
+ const FORBIDDEN_INSTALL_PATTERNS = [
46
+ /\bsudo\b/i,
47
+ /\bapt(-get)?\b/i,
48
+ /\bbrew\b/i,
49
+ /\byum\b/i,
50
+ /\bdnf\b/i,
51
+ /\bpacman\b/i,
52
+ /\bzypper\b/i,
53
+ /\bapk\b/i,
54
+ /\bchoco(latey)?\b/i,
55
+ /\bscoop\b/i,
56
+ /\bwinget\b/i,
57
+ ];
58
+ /**
59
+ * Forbidden flags in tool invocations: anything that mutates the repo.
60
+ * Caught on top of catalog whitelisting because future maintainers might
61
+ * accidentally introduce `--fix`.
62
+ */
63
+ const FORBIDDEN_RUN_PATTERNS = [
64
+ /\B--fix\b/,
65
+ /\B--auto-?fix\b/,
66
+ /\B--write\b/,
67
+ /\bgit\s+(commit|push|reset\s+--hard|clean\s+-f)/i,
68
+ /\brm\s+-rf\s+\//,
69
+ ];
70
+ // ---------------------------------------------------------------------------
71
+ // Scan-dir helpers
72
+ // ---------------------------------------------------------------------------
73
+ export function createRunnerContext(opts) {
74
+ const runId = opts.run_id ?? new Date().toISOString().replace(/[:.]/g, '-');
75
+ const base = opts.base_dir ?? join(homedir() || tmpdir(), '.edsger', 'quality-runs');
76
+ const scanDir = join(base, runId);
77
+ mkdirSync(scanDir, { recursive: true });
78
+ return {
79
+ repo_root: opts.repo_root,
80
+ package_manager: opts.package_manager,
81
+ install_enabled: opts.install_enabled ?? true,
82
+ scan_dir: scanDir,
83
+ };
84
+ }
85
+ // ---------------------------------------------------------------------------
86
+ // Command template resolution
87
+ // ---------------------------------------------------------------------------
88
+ function resolvePlaceholders(template, ctx) {
89
+ return template
90
+ .replaceAll('%REPO_ROOT%', ctx.repo_root)
91
+ .replaceAll('%PKG_MANAGER%', ctx.package_manager ?? 'npm')
92
+ .replaceAll('%SCAN_DIR%', ctx.scan_dir);
93
+ }
94
+ function getEntry(id) {
95
+ const entry = TOOL_CATALOG_BY_ID.get(id);
96
+ if (!entry) {
97
+ throw new Error(`Unknown tool id: ${id} (not in TOOL_CATALOG)`);
98
+ }
99
+ return entry;
100
+ }
101
+ async function runCommand(command, opts) {
102
+ const started = Date.now();
103
+ const outPath = opts.outputPath ??
104
+ join(opts.cwd, `.runner-${Math.random().toString(36).slice(2, 10)}.out`);
105
+ const child = spawn('bash', ['-lc', command], {
106
+ cwd: opts.cwd,
107
+ env: { ...process.env, ...(opts.env ?? {}) },
108
+ stdio: ['ignore', 'pipe', 'pipe'],
109
+ });
110
+ let stdout = '';
111
+ let stderr = '';
112
+ let stdoutBytes = 0;
113
+ let stderrBytes = 0;
114
+ let truncatedStdout = false;
115
+ // Open file for streaming stdout to disk (audit trail).
116
+ const fileHandle = await fsp.open(outPath, 'w');
117
+ let bufferedToFlush = '';
118
+ const flushBuffer = async () => {
119
+ if (bufferedToFlush.length === 0) {
120
+ return;
121
+ }
122
+ await fileHandle.write(bufferedToFlush);
123
+ bufferedToFlush = '';
124
+ };
125
+ child.stdout?.on('data', (chunk) => {
126
+ const str = chunk.toString('utf8');
127
+ stdoutBytes += chunk.byteLength;
128
+ bufferedToFlush += str;
129
+ if (bufferedToFlush.length >= STREAM_FLUSH_BYTES) {
130
+ void flushBuffer();
131
+ }
132
+ // Keep in-memory copy bounded
133
+ if (!truncatedStdout && stdout.length + str.length <= MAX_CAPTURE_BYTES) {
134
+ stdout += str;
135
+ }
136
+ else if (!truncatedStdout) {
137
+ stdout += str.slice(0, MAX_CAPTURE_BYTES - stdout.length);
138
+ truncatedStdout = true;
139
+ }
140
+ });
141
+ child.stderr?.on('data', (chunk) => {
142
+ const str = chunk.toString('utf8');
143
+ stderrBytes += chunk.byteLength;
144
+ if (stderr.length + str.length <= MAX_CAPTURE_BYTES) {
145
+ stderr += str;
146
+ }
147
+ else if (stderr.length < MAX_CAPTURE_BYTES) {
148
+ stderr += str.slice(0, MAX_CAPTURE_BYTES - stderr.length);
149
+ }
150
+ });
151
+ let timedOut = false;
152
+ const timer = setTimeout(() => {
153
+ timedOut = true;
154
+ child.kill('SIGKILL');
155
+ }, opts.timeoutMs);
156
+ const closePromise = new Promise((resolve) => {
157
+ child.on('close', (code, signal) => {
158
+ resolve({ exit_code: code ?? -1, signal });
159
+ });
160
+ });
161
+ const { exit_code, signal } = await closePromise;
162
+ clearTimeout(timer);
163
+ await flushBuffer();
164
+ await fileHandle.close();
165
+ return {
166
+ stdout,
167
+ stderr,
168
+ exit_code,
169
+ signal,
170
+ duration_ms: Date.now() - started,
171
+ timed_out: timedOut,
172
+ raw_output_path: outPath,
173
+ // Audit aids (not used by parsers)
174
+ // @ts-expect-error - intentionally extending capture with stats
175
+ bytes_captured: { stdout: stdoutBytes, stderr: stderrBytes },
176
+ };
177
+ }
178
+ // ---------------------------------------------------------------------------
179
+ // Probing
180
+ // ---------------------------------------------------------------------------
181
+ /** Extract a version-looking token from a probe command's output. */
182
+ function extractVersion(out) {
183
+ const m = out.match(/v?(\d+\.\d+(?:\.\d+)?(?:[-+][\w.-]+)?)/);
184
+ return m ? m[1] : null;
185
+ }
186
+ export async function probeTool(id, ctx) {
187
+ const entry = getEntry(id);
188
+ const cmd = resolvePlaceholders(entry.probe, ctx);
189
+ const cap = await runCommand(cmd, {
190
+ cwd: ctx.repo_root,
191
+ timeoutMs: PROBE_TIMEOUT_MS,
192
+ });
193
+ // `command -v` returns non-zero when not found; combined probes that run
194
+ // both `command -v X` and `X --version` need an exit-zero check.
195
+ if (cap.timed_out || cap.exit_code !== 0) {
196
+ return {
197
+ available: false,
198
+ version: null,
199
+ install_command: entry.install,
200
+ install_prereq: entry.install_prereq,
201
+ reason: cap.timed_out ? 'probe_failed' : 'not_found',
202
+ };
203
+ }
204
+ const version = extractVersion(cap.stdout) ?? extractVersion(cap.stderr);
205
+ return {
206
+ available: true,
207
+ version,
208
+ install_command: entry.install,
209
+ install_prereq: entry.install_prereq,
210
+ };
211
+ }
212
+ export async function probeInstaller(prereq, ctx) {
213
+ if (prereq === null) {
214
+ return true;
215
+ }
216
+ const probeMap = {
217
+ pipx: 'command -v pipx',
218
+ go: 'command -v go',
219
+ cargo: 'command -v cargo',
220
+ npx: 'command -v npx',
221
+ gem: 'command -v gem',
222
+ };
223
+ const cap = await runCommand(probeMap[prereq], {
224
+ cwd: ctx.repo_root,
225
+ timeoutMs: PROBE_TIMEOUT_MS,
226
+ });
227
+ return cap.exit_code === 0;
228
+ }
229
+ // ---------------------------------------------------------------------------
230
+ // Installing
231
+ // ---------------------------------------------------------------------------
232
+ function assertInstallSafe(cmd) {
233
+ // Whitelist: must be byte-for-byte in the catalog. Defense in depth.
234
+ if (!ALL_INSTALL_COMMANDS.includes(cmd)) {
235
+ throw new Error(`Refused install command not present in TOOL_CATALOG: ${cmd.slice(0, 80)}`);
236
+ }
237
+ for (const pat of FORBIDDEN_INSTALL_PATTERNS) {
238
+ if (pat.test(cmd)) {
239
+ throw new Error(`Refused install command matching forbidden pattern ${String(pat)}: ${cmd.slice(0, 80)}`);
240
+ }
241
+ }
242
+ }
243
+ export async function installTool(id, ctx) {
244
+ const entry = getEntry(id);
245
+ if (!entry.install) {
246
+ return { installed: false, version: null, error: 'no_install_command' };
247
+ }
248
+ if (!ctx.install_enabled) {
249
+ return { installed: false, version: null, error: 'install_disabled' };
250
+ }
251
+ const prereqOk = await probeInstaller(entry.install_prereq, ctx);
252
+ if (!prereqOk) {
253
+ return {
254
+ installed: false,
255
+ version: null,
256
+ error: `prereq_missing: ${entry.install_prereq}`,
257
+ };
258
+ }
259
+ assertInstallSafe(entry.install);
260
+ const cap = await runCommand(entry.install, {
261
+ cwd: ctx.repo_root,
262
+ timeoutMs: INSTALL_TIMEOUT_MS,
263
+ });
264
+ if (cap.exit_code !== 0 || cap.timed_out) {
265
+ return {
266
+ installed: false,
267
+ version: null,
268
+ error: tailString(cap.stderr || cap.stdout, 500),
269
+ };
270
+ }
271
+ // Re-probe to confirm installation took
272
+ const re = await probeTool(id, ctx);
273
+ return {
274
+ installed: re.available,
275
+ version: re.version,
276
+ error: re.available ? undefined : 'reprobe_failed',
277
+ };
278
+ }
279
+ // ---------------------------------------------------------------------------
280
+ // Executing
281
+ // ---------------------------------------------------------------------------
282
+ function assertRunSafe(cmd) {
283
+ for (const pat of FORBIDDEN_RUN_PATTERNS) {
284
+ if (pat.test(cmd)) {
285
+ throw new Error(`Refused run command matching forbidden pattern ${String(pat)}: ${cmd.slice(0, 120)}`);
286
+ }
287
+ }
288
+ }
289
+ function buildParserContext(ctx) {
290
+ return { repo_root: ctx.repo_root };
291
+ }
292
+ function nowIso() {
293
+ return new Date().toISOString();
294
+ }
295
+ function tailString(s, n) {
296
+ if (!s) {
297
+ return '';
298
+ }
299
+ return s.length <= n ? s : s.slice(-n);
300
+ }
301
+ export async function executeTool(id, ctx) {
302
+ const entry = getEntry(id);
303
+ const cmd = resolvePlaceholders(entry.command, ctx);
304
+ assertRunSafe(cmd);
305
+ const outPath = join(ctx.scan_dir, `${entry.id}.out`);
306
+ const ranAt = nowIso();
307
+ const cap = await runCommand(cmd, {
308
+ cwd: ctx.repo_root,
309
+ timeoutMs: entry.timeout_minutes * 60 * 1000,
310
+ outputPath: outPath,
311
+ });
312
+ // Some tools genuinely succeed with non-zero exit (e.g. linters with findings).
313
+ const isOk = !cap.timed_out &&
314
+ (cap.exit_code === 0 ||
315
+ (entry.tolerate_nonzero_exit === true && cap.stdout.length > 0));
316
+ // If output was streamed to disk and the in-memory copy was truncated,
317
+ // re-read the head of the file for parsing.
318
+ const stdoutForParser = cap.stdout.length > 0 ? cap.stdout : safeReadHead(outPath);
319
+ const parsed = isOk
320
+ ? parseToolOutput(id, stdoutForParser, cap.stderr, buildParserContext(ctx))
321
+ : {
322
+ tool_id: id,
323
+ summary: {
324
+ tier: 'counts',
325
+ counts: { errors: 0, warnings: 0, info: 0 },
326
+ },
327
+ oneliner: cap.timed_out
328
+ ? `timed out after ${entry.timeout_minutes}m`
329
+ : `exit ${cap.exit_code}`,
330
+ };
331
+ const run = {
332
+ tool_id: id,
333
+ ran_at: ranAt,
334
+ duration_ms: cap.duration_ms,
335
+ exit_code: cap.exit_code,
336
+ findings_count: findingsCount(parsed),
337
+ summary: parsed.oneliner,
338
+ parsed: isOk,
339
+ stderr_tail: isOk ? undefined : tailString(cap.stderr, 500),
340
+ raw_output_path: outPath,
341
+ };
342
+ return { parsed, run, ok: isOk };
343
+ }
344
+ function findingsCount(p) {
345
+ switch (p.summary.tier) {
346
+ case 'counts':
347
+ return p.summary.counts.errors + p.summary.counts.warnings;
348
+ case 'findings':
349
+ return p.summary.counts.total;
350
+ case 'metrics':
351
+ return 0;
352
+ default:
353
+ return 0;
354
+ }
355
+ }
356
+ function safeReadHead(path, maxBytes = MAX_CAPTURE_BYTES) {
357
+ try {
358
+ if (!existsSync(path)) {
359
+ return '';
360
+ }
361
+ const buf = readFileSync(path);
362
+ const slice = buf.byteLength <= maxBytes ? buf : buf.subarray(0, maxBytes);
363
+ return slice.toString('utf8');
364
+ }
365
+ catch {
366
+ return '';
367
+ }
368
+ }
369
+ // ---------------------------------------------------------------------------
370
+ // Helpers exported for the MCP server / phase index
371
+ // ---------------------------------------------------------------------------
372
+ /** Convert a failed probe into the UnavailableTool record stored in DB. */
373
+ export function probeToUnavailable(id, probe, reason) {
374
+ const entry = TOOL_CATALOG_BY_ID.get(id);
375
+ return {
376
+ name: id,
377
+ category: entry?.category ?? 'lint',
378
+ install_command: probe.install_command,
379
+ reason,
380
+ };
381
+ }
382
+ /** Convert an install failure to UnavailableTool. */
383
+ export function installFailureToUnavailable(id, res) {
384
+ const entry = TOOL_CATALOG_BY_ID.get(id);
385
+ let reason = 'install_failed';
386
+ if (res.error?.startsWith('prereq_missing')) {
387
+ reason = 'prereq_missing';
388
+ }
389
+ else if (res.error === 'install_disabled') {
390
+ reason = 'install_disabled';
391
+ }
392
+ return {
393
+ name: id,
394
+ category: entry?.category ?? 'lint',
395
+ install_command: entry?.install ?? null,
396
+ reason,
397
+ detail: res.error,
398
+ };
399
+ }