@tagma/sdk 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/runner.ts CHANGED
@@ -1,460 +1,460 @@
1
- import { existsSync, readFileSync, statSync } from 'node:fs';
2
- import { dirname, isAbsolute, join, resolve as pathResolve } from 'node:path';
3
- import type { SpawnSpec, DriverPlugin, TaskResult } from './types';
4
- import { shellArgs } from './utils';
5
-
6
- // Delay before escalating SIGTERM to SIGKILL when killing a timed-out process.
7
- const SIGKILL_DELAY_MS = 3_000;
8
-
9
- /**
10
- * On Windows, proc.kill('SIGTERM') / proc.kill('SIGKILL') only terminate the
11
- * direct child process. When the child is a .cmd/.bat wrapper (e.g. claude.cmd),
12
- * cmd.exe spawns the real process as a grandchild — proc.kill misses it entirely.
13
- * `taskkill /F /T /PID` kills the entire process tree rooted at the given PID.
14
- */
15
- function killProcessTree(pid: number): void {
16
- if (process.platform !== 'win32') return;
17
- try {
18
- const result = Bun.spawnSync(['taskkill', '/F', '/T', '/PID', String(pid)], {
19
- stdout: 'pipe',
20
- stderr: 'pipe',
21
- });
22
- if (result.exitCode !== 0) {
23
- const stderr = new TextDecoder().decode(result.stderr);
24
- // Exit code 128 = process not found (already exited) — not worth warning about
25
- if (result.exitCode !== 128) {
26
- console.error(
27
- `[killProcessTree] taskkill exited ${result.exitCode} for PID ${pid}: ${stderr.trim()}`,
28
- );
29
- }
30
- }
31
- } catch {
32
- /* best-effort — process may have already exited */
33
- }
34
- }
35
-
36
- export interface RunOptions {
37
- readonly timeoutMs?: number;
38
- readonly signal?: AbortSignal; // pipeline-level abort
39
- }
40
-
41
- /**
42
- * On Windows, Bun.spawn does NOT auto-append PATHEXT extensions like
43
- * CreateProcess does. A bare command like `claude` fails with ENOENT if the
44
- * actual file on disk is `claude.cmd` / `claude.bat` / `claude.ps1`. We
45
- * manually resolve the command against PATH + PATHEXT here so Drivers can
46
- * keep using short names (`claude`, `npx`, etc.) cross-platform.
47
- *
48
- * We also auto-unwrap npm-generated .cmd shims into direct `node <js>`
49
- * invocations. Spawning the .cmd routes argv through cmd.exe, which silently
50
- * truncates any argv element at the first newline — a multi-line prompt
51
- * reaches the child as just its first line. By targeting the underlying JS
52
- * entry point directly we bypass cmd.exe entirely and newlines survive.
53
- *
54
- * Results are cached by (cmd, envPath) key so repeated spawns of the same
55
- * command don't block the event loop with synchronous PATH/shim scans.
56
- *
57
- * Returns the original name if resolution fails; Bun will raise the same
58
- * ENOENT it would have otherwise.
59
- */
60
- const RESOLVED_EXE_CACHE_MAX = 128;
61
- // A cache entry is the replacement argv head for the command:
62
- // - [path] — a single resolved executable (e.g. `foo.exe`)
63
- // - [node, jsEntry] — an npm-shim unwrapped into `node <js>`
64
- // - null — resolution failed, leave the original name
65
- const resolvedExeCache = new Map<string, readonly string[] | null>();
66
-
67
- /** Evict the oldest entry when the cache is at capacity. */
68
- function evictIfFull(): void {
69
- if (resolvedExeCache.size >= RESOLVED_EXE_CACHE_MAX) {
70
- // Map iteration order is insertion order — delete the first (oldest) key.
71
- const oldest = resolvedExeCache.keys().next().value;
72
- if (oldest !== undefined) resolvedExeCache.delete(oldest);
73
- }
74
- }
75
-
76
- /**
77
- * Parse an npm-generated .cmd shim and return the underlying JS entry path.
78
- *
79
- * npm's shim has the shape:
80
- * "%_prog%" "%dp0%\node_modules\<pkg>\bin\<script>" %*
81
- *
82
- * We extract the second double-quoted path, substitute `%dp0%` with the
83
- * wrapper's own directory, and return the absolute JS path. Returns null for
84
- * anything that doesn't match the npm-shim pattern (user-written .cmd
85
- * scripts, non-node tools, etc.), which keeps the caller on the .cmd path.
86
- */
87
- function parseNpmCmdShim(wrapperPath: string): string | null {
88
- let contents: string;
89
- try {
90
- contents = readFileSync(wrapperPath, 'utf8');
91
- } catch {
92
- return null;
93
- }
94
- const execLine = contents
95
- .split(/\r?\n/)
96
- .find((l) => l.includes('%*') && l.includes('%dp0%'));
97
- if (!execLine) return null;
98
- const quoted = execLine.match(/"([^"]+)"/g);
99
- if (!quoted || quoted.length < 2) return null;
100
- const rawTarget = quoted[1]!.slice(1, -1); // strip surrounding quotes
101
- const wrapperDir = dirname(wrapperPath);
102
- // %dp0% expands to wrapper dir with a trailing backslash; strip either form.
103
- const expanded = rawTarget.replace(/%dp0%\\?/i, '').replace(/\//g, '\\');
104
- const abs = isAbsolute(expanded) ? expanded : pathResolve(wrapperDir, expanded);
105
- return existsSync(abs) ? abs : null;
106
- }
107
-
108
- /**
109
- * Given a resolved .cmd/.bat path, return the argv prefix that should be
110
- * spawned instead. For npm shims this is `[node, js-entry]`; for everything
111
- * else it's `[wrapperPath]` (unchanged, caller keeps using the wrapper).
112
- */
113
- function unwrapCmdShim(wrapperPath: string): readonly string[] {
114
- if (!/\.(cmd|bat)$/i.test(wrapperPath)) return [wrapperPath];
115
- const jsEntry = parseNpmCmdShim(wrapperPath);
116
- if (!jsEntry) return [wrapperPath];
117
- // Prefer node colocated with the wrapper (npm global bin often ships one).
118
- const colocated = join(dirname(wrapperPath), 'node.exe');
119
- const nodeExe = existsSync(colocated) ? colocated : 'node';
120
- return [nodeExe, jsEntry];
121
- }
122
-
123
- function resolveWindowsExe(args: readonly string[], envPath: string): readonly string[] {
124
- if (process.platform !== 'win32' || args.length === 0) return args;
125
- const cmd = args[0]!;
126
- // Already a full path or has an extension → trust caller. We still attempt
127
- // shim unwrapping when the caller handed us a bare .cmd/.bat so drivers
128
- // that resolve the shim themselves still benefit from the cmd.exe bypass.
129
- if (isAbsolute(cmd) || /\.[a-z0-9]+$/i.test(cmd)) {
130
- if (/\.(cmd|bat)$/i.test(cmd) && existsSync(cmd)) {
131
- const unwrapped = unwrapCmdShim(cmd);
132
- if (unwrapped.length === 2) return [...unwrapped, ...args.slice(1)];
133
- }
134
- return args;
135
- }
136
-
137
- const cacheKey = `${cmd}\x00${envPath}`;
138
- if (resolvedExeCache.has(cacheKey)) {
139
- // ?? null coerces undefined→null so the subsequent guard narrows cleanly.
140
- const cached = resolvedExeCache.get(cacheKey) ?? null;
141
- return cached !== null ? [...cached, ...args.slice(1)] : args;
142
- }
143
-
144
- const exts = (process.env.PATHEXT ?? '.COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC')
145
- .split(';')
146
- .filter(Boolean);
147
- const dirs = envPath.split(';').filter(Boolean);
148
-
149
- for (const dir of dirs) {
150
- for (const ext of exts) {
151
- const candidate = join(dir, cmd + ext);
152
- try {
153
- if (existsSync(candidate) && statSync(candidate).isFile()) {
154
- const head = unwrapCmdShim(candidate);
155
- evictIfFull();
156
- resolvedExeCache.set(cacheKey, head);
157
- return [...head, ...args.slice(1)];
158
- }
159
- } catch {
160
- /* stat race — skip */
161
- }
162
- }
163
- }
164
- evictIfFull();
165
- resolvedExeCache.set(cacheKey, null);
166
- return args;
167
- }
168
-
169
- /**
170
- * H2: Build a "failed before spawn" result. Tagged as 'spawn_error' so the
171
- * engine can show a useful classification ("driver tried to launch X but
172
- * the binary wasn't found") rather than the misleading "timeout".
173
- */
174
- function failResult(stderr: string, durationMs: number): TaskResult {
175
- return {
176
- exitCode: -1,
177
- stdout: '',
178
- stderr,
179
- stderrPath: null,
180
- durationMs,
181
- sessionId: null,
182
- normalizedOutput: null,
183
- failureKind: 'spawn_error',
184
- };
185
- }
186
-
187
- /**
188
- * R2: Validate a SpawnSpec returned by a third-party driver. Returns null on
189
- * success or a human-readable error message describing the first violation.
190
- *
191
- * Catching this here is critical: an undetected bad spec ends up calling
192
- * Bun.spawn with garbage and the resulting TypeError leaks into engine
193
- * processTask's catch block as "Cannot read properties of undefined". By
194
- * validating here we surface a clear "Driver X returned invalid args" message
195
- * instead, and short-circuit before holding any process resources.
196
- */
197
- export function validateSpawnSpec(spec: unknown, driverName: string): string | null {
198
- if (!spec || typeof spec !== 'object') {
199
- return `Driver "${driverName}".buildCommand returned ${spec === null ? 'null' : typeof spec}, expected SpawnSpec object`;
200
- }
201
- const s = spec as Record<string, unknown>;
202
- if (!Array.isArray(s.args)) {
203
- return `Driver "${driverName}".buildCommand returned spec.args of type ${typeof s.args}, expected string[]`;
204
- }
205
- if (s.args.length === 0) {
206
- return `Driver "${driverName}".buildCommand returned an empty spec.args array`;
207
- }
208
- for (let i = 0; i < s.args.length; i++) {
209
- if (typeof s.args[i] !== 'string') {
210
- return `Driver "${driverName}".buildCommand returned spec.args[${i}] of type ${typeof s.args[i]}, expected string`;
211
- }
212
- }
213
- if (typeof s.args[0] !== 'string' || s.args[0].length === 0) {
214
- return `Driver "${driverName}".buildCommand returned an empty executable name in spec.args[0]`;
215
- }
216
- if (s.cwd !== undefined && typeof s.cwd !== 'string') {
217
- return `Driver "${driverName}".buildCommand returned spec.cwd of type ${typeof s.cwd}, expected string or undefined`;
218
- }
219
- if (s.stdin !== undefined && typeof s.stdin !== 'string') {
220
- return `Driver "${driverName}".buildCommand returned spec.stdin of type ${typeof s.stdin}, expected string or undefined`;
221
- }
222
- if (s.env !== undefined) {
223
- if (!s.env || typeof s.env !== 'object' || Array.isArray(s.env)) {
224
- return `Driver "${driverName}".buildCommand returned spec.env that is not a plain object`;
225
- }
226
- for (const [k, v] of Object.entries(s.env as Record<string, unknown>)) {
227
- if (typeof v !== 'string') {
228
- return `Driver "${driverName}".buildCommand returned spec.env.${k} of type ${typeof v}, expected string`;
229
- }
230
- }
231
- }
232
- return null;
233
- }
234
-
235
- export async function runSpawn(
236
- spec: SpawnSpec,
237
- driver: DriverPlugin | null,
238
- opts: RunOptions = {},
239
- ): Promise<TaskResult> {
240
- const { timeoutMs, signal } = opts;
241
- const start = performance.now();
242
- const elapsed = () => Math.round(performance.now() - start);
243
-
244
- if (signal?.aborted) {
245
- return failResult('Pipeline aborted before spawn', 0);
246
- }
247
-
248
- // R2: validate the spec before touching it. A third-party driver that
249
- // returns a malformed SpawnSpec used to crash deep inside Bun.spawn with
250
- // an opaque TypeError; now we report a clear "Driver X returned …" message.
251
- const validationError = validateSpawnSpec(spec, driver?.name ?? '<unknown>');
252
- if (validationError !== null) {
253
- return failResult(validationError, elapsed());
254
- }
255
-
256
- const mergedEnv = { ...process.env, ...(spec.env ?? {}) };
257
- const resolvedArgs = resolveWindowsExe(spec.args, mergedEnv.PATH ?? process.env.PATH ?? '');
258
-
259
- // ── 1. Spawn (catch ENOENT / bad-cwd up front) ────────────────────────
260
- let proc: ReturnType<typeof Bun.spawn>;
261
- try {
262
- proc = Bun.spawn(resolvedArgs as string[], {
263
- cwd: spec.cwd,
264
- env: mergedEnv,
265
- stdout: 'pipe',
266
- stderr: 'pipe',
267
- stdin: spec.stdin ? 'pipe' : undefined,
268
- });
269
- } catch (err) {
270
- return failResult(String(err), elapsed());
271
- }
272
-
273
- // ── 2. Write stdin ─────────────────────────────────────────────────────
274
- // Child may exit before reading (e.g. quick-fail commands that don't
275
- // touch stdin) → swallow EPIPE rather than surfacing it as an
276
- // engine-level error.
277
- if (spec.stdin && proc.stdin && typeof proc.stdin !== 'number') {
278
- try {
279
- await proc.stdin.write(spec.stdin);
280
- await proc.stdin.end();
281
- } catch {
282
- /* ignore EPIPE / closed-pipe errors */
283
- }
284
- }
285
-
286
- // ── 3. Timeout & abort handling ────────────────────────────────────────
287
- let killedByUs = false;
288
- let timedOut = false;
289
- let timer: ReturnType<typeof setTimeout> | null = null;
290
- let forceTimer: ReturnType<typeof setTimeout> | null = null;
291
-
292
- const killGracefully = () => {
293
- if (killedByUs) return;
294
- killedByUs = true;
295
-
296
- if (process.platform === 'win32') {
297
- // On Windows, kill the entire process tree via taskkill. This handles
298
- // .cmd wrappers and nested child processes that proc.kill() misses.
299
- killProcessTree(proc.pid);
300
- } else {
301
- proc.kill('SIGTERM');
302
- // If the child ignores SIGTERM, escalate to SIGKILL after 3 s.
303
- forceTimer = setTimeout(() => {
304
- try {
305
- proc.kill('SIGKILL');
306
- } catch {
307
- /* already exited */
308
- }
309
- }, SIGKILL_DELAY_MS);
310
- }
311
- };
312
-
313
- if (timeoutMs && timeoutMs > 0) {
314
- timer = setTimeout(() => {
315
- timedOut = true;
316
- killGracefully();
317
- }, timeoutMs);
318
- }
319
-
320
- const onAbort = () => killGracefully();
321
- if (signal) {
322
- if (signal.aborted) {
323
- killGracefully();
324
- } else {
325
- signal.addEventListener('abort', onAbort, { once: true });
326
- }
327
- }
328
-
329
- // ── 4. Collect output & wait (parallel to avoid pipe-buffer deadlock) ─
330
- const stdoutStream = typeof proc.stdout === 'object' ? proc.stdout : undefined;
331
- const stderrStream = typeof proc.stderr === 'object' ? proc.stderr : undefined;
332
-
333
- const [exitCode, stdout, stderr] = await Promise.all([
334
- proc.exited,
335
- stdoutStream ? new Response(stdoutStream).text() : Promise.resolve(''),
336
- stderrStream ? new Response(stderrStream).text() : Promise.resolve(''),
337
- ]);
338
-
339
- // ── 5. Cleanup timers & listeners ──────────────────────────────────────
340
- if (timer) clearTimeout(timer);
341
- if (forceTimer) clearTimeout(forceTimer);
342
- if (signal) signal.removeEventListener('abort', onAbort);
343
-
344
- const durationMs = elapsed();
345
-
346
- // We initiated the kill (timeout or abort) — always treat as non-success
347
- // regardless of exit code. A process that catches SIGTERM and exits 0 still
348
- // hit the timeout; letting it pass as success would unblock downstream tasks
349
- // incorrectly. The `timedOut` flag guards against the narrow race where the
350
- // process exits naturally at the exact moment the timeout fires — even if
351
- // killedByUs wasn't set in time, the timeout intention still applies.
352
- if (killedByUs || timedOut) {
353
- return {
354
- exitCode: -1,
355
- stdout,
356
- stderr,
357
- stderrPath: null,
358
- durationMs,
359
- sessionId: null,
360
- normalizedOutput: null,
361
- // H2: explicit kind so engine.ts no longer has to guess "is exitCode -1
362
- // a timeout or a spawn-failure?" Both used to share the same code.
363
- failureKind: 'timeout',
364
- };
365
- }
366
-
367
- // ── 6. Let driver extract metadata ─────────────────────────────────────
368
- // R1: parseResult is third-party code — wrap it in try/catch so a buggy
369
- // extractor doesn't discard a perfectly good spawn result. R5: even on
370
- // success, type-guard sessionId/normalizedOutput so a mistyped return
371
- // value doesn't poison sessionMap/normalizedMap downstream.
372
- let sessionId: string | null = null;
373
- let normalizedOutput: string | null = null;
374
- // M12: drivers can flip a task's terminal status to failed even when the
375
- // process exited 0 (e.g. opencode returning `{type:"error"}` JSON). When
376
- // the flag is set, we synthesize a non-zero exit code and append a reason
377
- // line to stderr so engine.ts marks the task as failed with a useful
378
- // explanation instead of letting the error JSON pass through as success.
379
- let forcedFailureMessage: string | null = null;
380
- if (driver?.parseResult) {
381
- try {
382
- const meta = driver.parseResult(stdout, stderr);
383
- if (meta && typeof meta === 'object') {
384
- if (typeof meta.sessionId === 'string' && /^[\w.-]{1,256}$/.test(meta.sessionId)) {
385
- sessionId = meta.sessionId;
386
- }
387
- if (typeof meta.normalizedOutput === 'string') {
388
- normalizedOutput = meta.normalizedOutput;
389
- }
390
- if (meta.forceFailure === true) {
391
- forcedFailureMessage =
392
- typeof meta.forceFailureReason === 'string'
393
- ? meta.forceFailureReason
394
- : 'Driver flagged task as failed (forceFailure)';
395
- }
396
- }
397
- } catch (err) {
398
- // The spawn itself succeeded; only metadata extraction failed.
399
- // Fall through with sessionId/normalizedOutput = null and append a
400
- // breadcrumb to stderr so the user can see WHY continue_from broke.
401
- const msg = err instanceof Error ? err.message : String(err);
402
- const note = `\n[runner] driver "${driver.name}".parseResult threw: ${msg}`;
403
- return {
404
- exitCode,
405
- stdout,
406
- stderr: stderr + note,
407
- stderrPath: null,
408
- durationMs,
409
- sessionId: null,
410
- normalizedOutput: null,
411
- // H2: parseResult threw — the spawn itself succeeded, so the failure
412
- // is "the process exited but the driver couldn't parse it". Surface
413
- // that as exit_nonzero (when the actual exit was non-zero) or null
414
- // (when the underlying exit was 0 — UI will still mark it failed via
415
- // engine.ts because the result is incomplete).
416
- failureKind: exitCode === 0 ? null : 'exit_nonzero',
417
- };
418
- }
419
- }
420
-
421
- // M12: when the driver forced a failure, treat as exit_nonzero with the
422
- // reason appended to stderr so users see WHY the task failed without
423
- // having to dig through driver-specific JSON.
424
- if (forcedFailureMessage !== null) {
425
- return {
426
- exitCode: exitCode === 0 ? 1 : exitCode,
427
- stdout,
428
- stderr: stderr + (stderr.endsWith('\n') ? '' : '\n') + `[driver] ${forcedFailureMessage}`,
429
- stderrPath: null,
430
- durationMs,
431
- sessionId,
432
- normalizedOutput,
433
- failureKind: 'exit_nonzero',
434
- };
435
- }
436
- return {
437
- exitCode,
438
- stdout,
439
- stderr,
440
- stderrPath: null,
441
- durationMs,
442
- sessionId,
443
- normalizedOutput,
444
- // H2: success vs nonzero exit. Engine uses this to short-circuit the
445
- // timeout branch even if a third-party driver returns -1 by mistake.
446
- failureKind: exitCode === 0 ? null : 'exit_nonzero',
447
- };
448
- }
449
-
450
- export async function runCommand(
451
- command: string,
452
- cwd: string,
453
- opts: RunOptions = {},
454
- ): Promise<TaskResult> {
455
- const spec: SpawnSpec = {
456
- args: shellArgs(command),
457
- cwd,
458
- };
459
- return runSpawn(spec, null, opts);
460
- }
1
+ import { existsSync, readFileSync, statSync } from 'node:fs';
2
+ import { dirname, isAbsolute, join, resolve as pathResolve } from 'node:path';
3
+ import type { SpawnSpec, DriverPlugin, TaskResult } from './types';
4
+ import { shellArgs } from './utils';
5
+
6
+ // Delay before escalating SIGTERM to SIGKILL when killing a timed-out process.
7
+ const SIGKILL_DELAY_MS = 3_000;
8
+
9
+ /**
10
+ * On Windows, proc.kill('SIGTERM') / proc.kill('SIGKILL') only terminate the
11
+ * direct child process. When the child is a .cmd/.bat wrapper (e.g. claude.cmd),
12
+ * cmd.exe spawns the real process as a grandchild — proc.kill misses it entirely.
13
+ * `taskkill /F /T /PID` kills the entire process tree rooted at the given PID.
14
+ */
15
+ function killProcessTree(pid: number): void {
16
+ if (process.platform !== 'win32') return;
17
+ try {
18
+ const result = Bun.spawnSync(['taskkill', '/F', '/T', '/PID', String(pid)], {
19
+ stdout: 'pipe',
20
+ stderr: 'pipe',
21
+ });
22
+ if (result.exitCode !== 0) {
23
+ const stderr = new TextDecoder().decode(result.stderr);
24
+ // Exit code 128 = process not found (already exited) — not worth warning about
25
+ if (result.exitCode !== 128) {
26
+ console.error(
27
+ `[killProcessTree] taskkill exited ${result.exitCode} for PID ${pid}: ${stderr.trim()}`,
28
+ );
29
+ }
30
+ }
31
+ } catch {
32
+ /* best-effort — process may have already exited */
33
+ }
34
+ }
35
+
36
+ export interface RunOptions {
37
+ readonly timeoutMs?: number;
38
+ readonly signal?: AbortSignal; // pipeline-level abort
39
+ }
40
+
41
+ /**
42
+ * On Windows, Bun.spawn does NOT auto-append PATHEXT extensions like
43
+ * CreateProcess does. A bare command like `claude` fails with ENOENT if the
44
+ * actual file on disk is `claude.cmd` / `claude.bat` / `claude.ps1`. We
45
+ * manually resolve the command against PATH + PATHEXT here so Drivers can
46
+ * keep using short names (`claude`, `npx`, etc.) cross-platform.
47
+ *
48
+ * We also auto-unwrap npm-generated .cmd shims into direct `node <js>`
49
+ * invocations. Spawning the .cmd routes argv through cmd.exe, which silently
50
+ * truncates any argv element at the first newline — a multi-line prompt
51
+ * reaches the child as just its first line. By targeting the underlying JS
52
+ * entry point directly we bypass cmd.exe entirely and newlines survive.
53
+ *
54
+ * Results are cached by (cmd, envPath) key so repeated spawns of the same
55
+ * command don't block the event loop with synchronous PATH/shim scans.
56
+ *
57
+ * Returns the original name if resolution fails; Bun will raise the same
58
+ * ENOENT it would have otherwise.
59
+ */
60
+ const RESOLVED_EXE_CACHE_MAX = 128;
61
+ // A cache entry is the replacement argv head for the command:
62
+ // - [path] — a single resolved executable (e.g. `foo.exe`)
63
+ // - [node, jsEntry] — an npm-shim unwrapped into `node <js>`
64
+ // - null — resolution failed, leave the original name
65
+ const resolvedExeCache = new Map<string, readonly string[] | null>();
66
+
67
+ /** Evict the oldest entry when the cache is at capacity. */
68
+ function evictIfFull(): void {
69
+ if (resolvedExeCache.size >= RESOLVED_EXE_CACHE_MAX) {
70
+ // Map iteration order is insertion order — delete the first (oldest) key.
71
+ const oldest = resolvedExeCache.keys().next().value;
72
+ if (oldest !== undefined) resolvedExeCache.delete(oldest);
73
+ }
74
+ }
75
+
76
+ /**
77
+ * Parse an npm-generated .cmd shim and return the underlying JS entry path.
78
+ *
79
+ * npm's shim has the shape:
80
+ * "%_prog%" "%dp0%\node_modules\<pkg>\bin\<script>" %*
81
+ *
82
+ * We extract the second double-quoted path, substitute `%dp0%` with the
83
+ * wrapper's own directory, and return the absolute JS path. Returns null for
84
+ * anything that doesn't match the npm-shim pattern (user-written .cmd
85
+ * scripts, non-node tools, etc.), which keeps the caller on the .cmd path.
86
+ */
87
+ function parseNpmCmdShim(wrapperPath: string): string | null {
88
+ let contents: string;
89
+ try {
90
+ contents = readFileSync(wrapperPath, 'utf8');
91
+ } catch {
92
+ return null;
93
+ }
94
+ const execLine = contents
95
+ .split(/\r?\n/)
96
+ .find((l) => l.includes('%*') && l.includes('%dp0%'));
97
+ if (!execLine) return null;
98
+ const quoted = execLine.match(/"([^"]+)"/g);
99
+ if (!quoted || quoted.length < 2) return null;
100
+ const rawTarget = quoted[1]!.slice(1, -1); // strip surrounding quotes
101
+ const wrapperDir = dirname(wrapperPath);
102
+ // %dp0% expands to wrapper dir with a trailing backslash; strip either form.
103
+ const expanded = rawTarget.replace(/%dp0%\\?/i, '').replace(/\//g, '\\');
104
+ const abs = isAbsolute(expanded) ? expanded : pathResolve(wrapperDir, expanded);
105
+ return existsSync(abs) ? abs : null;
106
+ }
107
+
108
+ /**
109
+ * Given a resolved .cmd/.bat path, return the argv prefix that should be
110
+ * spawned instead. For npm shims this is `[node, js-entry]`; for everything
111
+ * else it's `[wrapperPath]` (unchanged, caller keeps using the wrapper).
112
+ */
113
+ function unwrapCmdShim(wrapperPath: string): readonly string[] {
114
+ if (!/\.(cmd|bat)$/i.test(wrapperPath)) return [wrapperPath];
115
+ const jsEntry = parseNpmCmdShim(wrapperPath);
116
+ if (!jsEntry) return [wrapperPath];
117
+ // Prefer node colocated with the wrapper (npm global bin often ships one).
118
+ const colocated = join(dirname(wrapperPath), 'node.exe');
119
+ const nodeExe = existsSync(colocated) ? colocated : 'node';
120
+ return [nodeExe, jsEntry];
121
+ }
122
+
123
+ function resolveWindowsExe(args: readonly string[], envPath: string): readonly string[] {
124
+ if (process.platform !== 'win32' || args.length === 0) return args;
125
+ const cmd = args[0]!;
126
+ // Already a full path or has an extension → trust caller. We still attempt
127
+ // shim unwrapping when the caller handed us a bare .cmd/.bat so drivers
128
+ // that resolve the shim themselves still benefit from the cmd.exe bypass.
129
+ if (isAbsolute(cmd) || /\.[a-z0-9]+$/i.test(cmd)) {
130
+ if (/\.(cmd|bat)$/i.test(cmd) && existsSync(cmd)) {
131
+ const unwrapped = unwrapCmdShim(cmd);
132
+ if (unwrapped.length === 2) return [...unwrapped, ...args.slice(1)];
133
+ }
134
+ return args;
135
+ }
136
+
137
+ const cacheKey = `${cmd}\x00${envPath}`;
138
+ if (resolvedExeCache.has(cacheKey)) {
139
+ // ?? null coerces undefined→null so the subsequent guard narrows cleanly.
140
+ const cached = resolvedExeCache.get(cacheKey) ?? null;
141
+ return cached !== null ? [...cached, ...args.slice(1)] : args;
142
+ }
143
+
144
+ const exts = (process.env.PATHEXT ?? '.COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC')
145
+ .split(';')
146
+ .filter(Boolean);
147
+ const dirs = envPath.split(';').filter(Boolean);
148
+
149
+ for (const dir of dirs) {
150
+ for (const ext of exts) {
151
+ const candidate = join(dir, cmd + ext);
152
+ try {
153
+ if (existsSync(candidate) && statSync(candidate).isFile()) {
154
+ const head = unwrapCmdShim(candidate);
155
+ evictIfFull();
156
+ resolvedExeCache.set(cacheKey, head);
157
+ return [...head, ...args.slice(1)];
158
+ }
159
+ } catch {
160
+ /* stat race — skip */
161
+ }
162
+ }
163
+ }
164
+ evictIfFull();
165
+ resolvedExeCache.set(cacheKey, null);
166
+ return args;
167
+ }
168
+
169
+ /**
170
+ * H2: Build a "failed before spawn" result. Tagged as 'spawn_error' so the
171
+ * engine can show a useful classification ("driver tried to launch X but
172
+ * the binary wasn't found") rather than the misleading "timeout".
173
+ */
174
+ function failResult(stderr: string, durationMs: number): TaskResult {
175
+ return {
176
+ exitCode: -1,
177
+ stdout: '',
178
+ stderr,
179
+ stderrPath: null,
180
+ durationMs,
181
+ sessionId: null,
182
+ normalizedOutput: null,
183
+ failureKind: 'spawn_error',
184
+ };
185
+ }
186
+
187
+ /**
188
+ * R2: Validate a SpawnSpec returned by a third-party driver. Returns null on
189
+ * success or a human-readable error message describing the first violation.
190
+ *
191
+ * Catching this here is critical: an undetected bad spec ends up calling
192
+ * Bun.spawn with garbage and the resulting TypeError leaks into engine
193
+ * processTask's catch block as "Cannot read properties of undefined". By
194
+ * validating here we surface a clear "Driver X returned invalid args" message
195
+ * instead, and short-circuit before holding any process resources.
196
+ */
197
+ export function validateSpawnSpec(spec: unknown, driverName: string): string | null {
198
+ if (!spec || typeof spec !== 'object') {
199
+ return `Driver "${driverName}".buildCommand returned ${spec === null ? 'null' : typeof spec}, expected SpawnSpec object`;
200
+ }
201
+ const s = spec as Record<string, unknown>;
202
+ if (!Array.isArray(s.args)) {
203
+ return `Driver "${driverName}".buildCommand returned spec.args of type ${typeof s.args}, expected string[]`;
204
+ }
205
+ if (s.args.length === 0) {
206
+ return `Driver "${driverName}".buildCommand returned an empty spec.args array`;
207
+ }
208
+ for (let i = 0; i < s.args.length; i++) {
209
+ if (typeof s.args[i] !== 'string') {
210
+ return `Driver "${driverName}".buildCommand returned spec.args[${i}] of type ${typeof s.args[i]}, expected string`;
211
+ }
212
+ }
213
+ if (typeof s.args[0] !== 'string' || s.args[0].length === 0) {
214
+ return `Driver "${driverName}".buildCommand returned an empty executable name in spec.args[0]`;
215
+ }
216
+ if (s.cwd !== undefined && typeof s.cwd !== 'string') {
217
+ return `Driver "${driverName}".buildCommand returned spec.cwd of type ${typeof s.cwd}, expected string or undefined`;
218
+ }
219
+ if (s.stdin !== undefined && typeof s.stdin !== 'string') {
220
+ return `Driver "${driverName}".buildCommand returned spec.stdin of type ${typeof s.stdin}, expected string or undefined`;
221
+ }
222
+ if (s.env !== undefined) {
223
+ if (!s.env || typeof s.env !== 'object' || Array.isArray(s.env)) {
224
+ return `Driver "${driverName}".buildCommand returned spec.env that is not a plain object`;
225
+ }
226
+ for (const [k, v] of Object.entries(s.env as Record<string, unknown>)) {
227
+ if (typeof v !== 'string') {
228
+ return `Driver "${driverName}".buildCommand returned spec.env.${k} of type ${typeof v}, expected string`;
229
+ }
230
+ }
231
+ }
232
+ return null;
233
+ }
234
+
235
+ export async function runSpawn(
236
+ spec: SpawnSpec,
237
+ driver: DriverPlugin | null,
238
+ opts: RunOptions = {},
239
+ ): Promise<TaskResult> {
240
+ const { timeoutMs, signal } = opts;
241
+ const start = performance.now();
242
+ const elapsed = () => Math.round(performance.now() - start);
243
+
244
+ if (signal?.aborted) {
245
+ return failResult('Pipeline aborted before spawn', 0);
246
+ }
247
+
248
+ // R2: validate the spec before touching it. A third-party driver that
249
+ // returns a malformed SpawnSpec used to crash deep inside Bun.spawn with
250
+ // an opaque TypeError; now we report a clear "Driver X returned …" message.
251
+ const validationError = validateSpawnSpec(spec, driver?.name ?? '<unknown>');
252
+ if (validationError !== null) {
253
+ return failResult(validationError, elapsed());
254
+ }
255
+
256
+ const mergedEnv = { ...process.env, ...(spec.env ?? {}) };
257
+ const resolvedArgs = resolveWindowsExe(spec.args, mergedEnv.PATH ?? process.env.PATH ?? '');
258
+
259
+ // ── 1. Spawn (catch ENOENT / bad-cwd up front) ────────────────────────
260
+ let proc: ReturnType<typeof Bun.spawn>;
261
+ try {
262
+ proc = Bun.spawn(resolvedArgs as string[], {
263
+ cwd: spec.cwd,
264
+ env: mergedEnv,
265
+ stdout: 'pipe',
266
+ stderr: 'pipe',
267
+ stdin: spec.stdin ? 'pipe' : undefined,
268
+ });
269
+ } catch (err) {
270
+ return failResult(String(err), elapsed());
271
+ }
272
+
273
+ // ── 2. Write stdin ─────────────────────────────────────────────────────
274
+ // Child may exit before reading (e.g. quick-fail commands that don't
275
+ // touch stdin) → swallow EPIPE rather than surfacing it as an
276
+ // engine-level error.
277
+ if (spec.stdin && proc.stdin && typeof proc.stdin !== 'number') {
278
+ try {
279
+ await proc.stdin.write(spec.stdin);
280
+ await proc.stdin.end();
281
+ } catch {
282
+ /* ignore EPIPE / closed-pipe errors */
283
+ }
284
+ }
285
+
286
+ // ── 3. Timeout & abort handling ────────────────────────────────────────
287
+ let killedByUs = false;
288
+ let timedOut = false;
289
+ let timer: ReturnType<typeof setTimeout> | null = null;
290
+ let forceTimer: ReturnType<typeof setTimeout> | null = null;
291
+
292
+ const killGracefully = () => {
293
+ if (killedByUs) return;
294
+ killedByUs = true;
295
+
296
+ if (process.platform === 'win32') {
297
+ // On Windows, kill the entire process tree via taskkill. This handles
298
+ // .cmd wrappers and nested child processes that proc.kill() misses.
299
+ killProcessTree(proc.pid);
300
+ } else {
301
+ proc.kill('SIGTERM');
302
+ // If the child ignores SIGTERM, escalate to SIGKILL after 3 s.
303
+ forceTimer = setTimeout(() => {
304
+ try {
305
+ proc.kill('SIGKILL');
306
+ } catch {
307
+ /* already exited */
308
+ }
309
+ }, SIGKILL_DELAY_MS);
310
+ }
311
+ };
312
+
313
+ if (timeoutMs && timeoutMs > 0) {
314
+ timer = setTimeout(() => {
315
+ timedOut = true;
316
+ killGracefully();
317
+ }, timeoutMs);
318
+ }
319
+
320
+ const onAbort = () => killGracefully();
321
+ if (signal) {
322
+ if (signal.aborted) {
323
+ killGracefully();
324
+ } else {
325
+ signal.addEventListener('abort', onAbort, { once: true });
326
+ }
327
+ }
328
+
329
+ // ── 4. Collect output & wait (parallel to avoid pipe-buffer deadlock) ─
330
+ const stdoutStream = typeof proc.stdout === 'object' ? proc.stdout : undefined;
331
+ const stderrStream = typeof proc.stderr === 'object' ? proc.stderr : undefined;
332
+
333
+ const [exitCode, stdout, stderr] = await Promise.all([
334
+ proc.exited,
335
+ stdoutStream ? new Response(stdoutStream).text() : Promise.resolve(''),
336
+ stderrStream ? new Response(stderrStream).text() : Promise.resolve(''),
337
+ ]);
338
+
339
+ // ── 5. Cleanup timers & listeners ──────────────────────────────────────
340
+ if (timer) clearTimeout(timer);
341
+ if (forceTimer) clearTimeout(forceTimer);
342
+ if (signal) signal.removeEventListener('abort', onAbort);
343
+
344
+ const durationMs = elapsed();
345
+
346
+ // We initiated the kill (timeout or abort) — always treat as non-success
347
+ // regardless of exit code. A process that catches SIGTERM and exits 0 still
348
+ // hit the timeout; letting it pass as success would unblock downstream tasks
349
+ // incorrectly. The `timedOut` flag guards against the narrow race where the
350
+ // process exits naturally at the exact moment the timeout fires — even if
351
+ // killedByUs wasn't set in time, the timeout intention still applies.
352
+ if (killedByUs || timedOut) {
353
+ return {
354
+ exitCode: -1,
355
+ stdout,
356
+ stderr,
357
+ stderrPath: null,
358
+ durationMs,
359
+ sessionId: null,
360
+ normalizedOutput: null,
361
+ // H2: explicit kind so engine.ts no longer has to guess "is exitCode -1
362
+ // a timeout or a spawn-failure?" Both used to share the same code.
363
+ failureKind: 'timeout',
364
+ };
365
+ }
366
+
367
+ // ── 6. Let driver extract metadata ─────────────────────────────────────
368
+ // R1: parseResult is third-party code — wrap it in try/catch so a buggy
369
+ // extractor doesn't discard a perfectly good spawn result. R5: even on
370
+ // success, type-guard sessionId/normalizedOutput so a mistyped return
371
+ // value doesn't poison sessionMap/normalizedMap downstream.
372
+ let sessionId: string | null = null;
373
+ let normalizedOutput: string | null = null;
374
+ // M12: drivers can flip a task's terminal status to failed even when the
375
+ // process exited 0 (e.g. opencode returning `{type:"error"}` JSON). When
376
+ // the flag is set, we synthesize a non-zero exit code and append a reason
377
+ // line to stderr so engine.ts marks the task as failed with a useful
378
+ // explanation instead of letting the error JSON pass through as success.
379
+ let forcedFailureMessage: string | null = null;
380
+ if (driver?.parseResult) {
381
+ try {
382
+ const meta = driver.parseResult(stdout, stderr);
383
+ if (meta && typeof meta === 'object') {
384
+ if (typeof meta.sessionId === 'string' && /^[\w.-]{1,256}$/.test(meta.sessionId)) {
385
+ sessionId = meta.sessionId;
386
+ }
387
+ if (typeof meta.normalizedOutput === 'string') {
388
+ normalizedOutput = meta.normalizedOutput;
389
+ }
390
+ if (meta.forceFailure === true) {
391
+ forcedFailureMessage =
392
+ typeof meta.forceFailureReason === 'string'
393
+ ? meta.forceFailureReason
394
+ : 'Driver flagged task as failed (forceFailure)';
395
+ }
396
+ }
397
+ } catch (err) {
398
+ // The spawn itself succeeded; only metadata extraction failed.
399
+ // Fall through with sessionId/normalizedOutput = null and append a
400
+ // breadcrumb to stderr so the user can see WHY continue_from broke.
401
+ const msg = err instanceof Error ? err.message : String(err);
402
+ const note = `\n[runner] driver "${driver.name}".parseResult threw: ${msg}`;
403
+ return {
404
+ exitCode,
405
+ stdout,
406
+ stderr: stderr + note,
407
+ stderrPath: null,
408
+ durationMs,
409
+ sessionId: null,
410
+ normalizedOutput: null,
411
+ // H2: parseResult threw — the spawn itself succeeded, so the failure
412
+ // is "the process exited but the driver couldn't parse it". Surface
413
+ // that as exit_nonzero (when the actual exit was non-zero) or null
414
+ // (when the underlying exit was 0 — UI will still mark it failed via
415
+ // engine.ts because the result is incomplete).
416
+ failureKind: exitCode === 0 ? null : 'exit_nonzero',
417
+ };
418
+ }
419
+ }
420
+
421
+ // M12: when the driver forced a failure, treat as exit_nonzero with the
422
+ // reason appended to stderr so users see WHY the task failed without
423
+ // having to dig through driver-specific JSON.
424
+ if (forcedFailureMessage !== null) {
425
+ return {
426
+ exitCode: exitCode === 0 ? 1 : exitCode,
427
+ stdout,
428
+ stderr: stderr + (stderr.endsWith('\n') ? '' : '\n') + `[driver] ${forcedFailureMessage}`,
429
+ stderrPath: null,
430
+ durationMs,
431
+ sessionId,
432
+ normalizedOutput,
433
+ failureKind: 'exit_nonzero',
434
+ };
435
+ }
436
+ return {
437
+ exitCode,
438
+ stdout,
439
+ stderr,
440
+ stderrPath: null,
441
+ durationMs,
442
+ sessionId,
443
+ normalizedOutput,
444
+ // H2: success vs nonzero exit. Engine uses this to short-circuit the
445
+ // timeout branch even if a third-party driver returns -1 by mistake.
446
+ failureKind: exitCode === 0 ? null : 'exit_nonzero',
447
+ };
448
+ }
449
+
450
+ export async function runCommand(
451
+ command: string,
452
+ cwd: string,
453
+ opts: RunOptions = {},
454
+ ): Promise<TaskResult> {
455
+ const spec: SpawnSpec = {
456
+ args: shellArgs(command),
457
+ cwd,
458
+ };
459
+ return runSpawn(spec, null, opts);
460
+ }