@tagma/sdk 0.4.12 → 0.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +572 -566
  2. package/dist/adapters/websocket-approval.d.ts.map +1 -1
  3. package/dist/adapters/websocket-approval.js +3 -1
  4. package/dist/adapters/websocket-approval.js.map +1 -1
  5. package/dist/approval.d.ts.map +1 -1
  6. package/dist/approval.js.map +1 -1
  7. package/dist/completions/exit-code.d.ts.map +1 -1
  8. package/dist/completions/exit-code.js.map +1 -1
  9. package/dist/completions/file-exists.d.ts.map +1 -1
  10. package/dist/completions/file-exists.js.map +1 -1
  11. package/dist/completions/output-check.js +2 -7
  12. package/dist/completions/output-check.js.map +1 -1
  13. package/dist/config-ops.d.ts.map +1 -1
  14. package/dist/config-ops.js +24 -26
  15. package/dist/config-ops.js.map +1 -1
  16. package/dist/dag.d.ts.map +1 -1
  17. package/dist/dag.js +1 -1
  18. package/dist/dag.js.map +1 -1
  19. package/dist/drivers/claude-code.d.ts.map +1 -1
  20. package/dist/drivers/claude-code.js +10 -5
  21. package/dist/drivers/claude-code.js.map +1 -1
  22. package/dist/engine.d.ts.map +1 -1
  23. package/dist/engine.js +54 -27
  24. package/dist/engine.js.map +1 -1
  25. package/dist/hooks.d.ts.map +1 -1
  26. package/dist/hooks.js +1 -3
  27. package/dist/hooks.js.map +1 -1
  28. package/dist/logger.d.ts.map +1 -1
  29. package/dist/logger.js +4 -2
  30. package/dist/logger.js.map +1 -1
  31. package/dist/pipeline-runner.d.ts.map +1 -1
  32. package/dist/pipeline-runner.js +10 -4
  33. package/dist/pipeline-runner.js.map +1 -1
  34. package/dist/registry.d.ts +11 -1
  35. package/dist/registry.d.ts.map +1 -1
  36. package/dist/registry.js +28 -3
  37. package/dist/registry.js.map +1 -1
  38. package/dist/runner.d.ts.map +1 -1
  39. package/dist/runner.js +18 -13
  40. package/dist/runner.js.map +1 -1
  41. package/dist/schema.d.ts.map +1 -1
  42. package/dist/schema.js +14 -14
  43. package/dist/schema.js.map +1 -1
  44. package/dist/schema.test.js +5 -1
  45. package/dist/schema.test.js.map +1 -1
  46. package/dist/sdk.d.ts +2 -2
  47. package/dist/sdk.d.ts.map +1 -1
  48. package/dist/sdk.js +1 -1
  49. package/dist/sdk.js.map +1 -1
  50. package/dist/triggers/file.d.ts.map +1 -1
  51. package/dist/triggers/file.js +11 -4
  52. package/dist/triggers/file.js.map +1 -1
  53. package/dist/triggers/manual.d.ts.map +1 -1
  54. package/dist/triggers/manual.js +2 -1
  55. package/dist/triggers/manual.js.map +1 -1
  56. package/dist/utils.d.ts.map +1 -1
  57. package/dist/utils.js +13 -6
  58. package/dist/utils.js.map +1 -1
  59. package/dist/validate-raw.d.ts.map +1 -1
  60. package/dist/validate-raw.js +40 -11
  61. package/dist/validate-raw.js.map +1 -1
  62. package/package.json +2 -2
  63. package/scripts/preinstall.js +1 -1
  64. package/src/adapters/stdin-approval.ts +106 -106
  65. package/src/adapters/websocket-approval.ts +224 -220
  66. package/src/approval.ts +131 -125
  67. package/src/bootstrap.ts +37 -37
  68. package/src/completions/exit-code.ts +34 -30
  69. package/src/completions/file-exists.ts +66 -60
  70. package/src/completions/output-check.ts +86 -86
  71. package/src/config-ops.ts +307 -322
  72. package/src/dag.ts +234 -228
  73. package/src/drivers/claude-code.ts +250 -240
  74. package/src/engine.ts +1098 -935
  75. package/src/hooks.ts +187 -179
  76. package/src/logger.ts +182 -178
  77. package/src/middlewares/static-context.ts +45 -45
  78. package/src/pipeline-runner.ts +156 -150
  79. package/src/registry.ts +51 -23
  80. package/src/runner.ts +395 -397
  81. package/src/schema.test.ts +5 -1
  82. package/src/schema.ts +338 -328
  83. package/src/sdk.ts +91 -81
  84. package/src/triggers/file.ts +33 -14
  85. package/src/triggers/manual.ts +86 -81
  86. package/src/types.ts +18 -18
  87. package/src/utils.ts +202 -191
  88. package/src/validate-raw.ts +442 -409
package/src/runner.ts CHANGED
@@ -1,397 +1,395 @@
1
- import { existsSync } from 'node:fs';
2
- import { isAbsolute, join } from 'node:path';
3
- import type { SpawnSpec, DriverPlugin, TaskResult } from './types';
4
- import { shellArgs } from './utils';
5
-
6
- // Delay before escalating SIGTERM to SIGKILL when killing a timed-out process.
7
- const SIGKILL_DELAY_MS = 3_000;
8
-
9
- /**
10
- * On Windows, proc.kill('SIGTERM') / proc.kill('SIGKILL') only terminate the
11
- * direct child process. When the child is a .cmd/.bat wrapper (e.g. claude.cmd),
12
- * cmd.exe spawns the real process as a grandchild — proc.kill misses it entirely.
13
- * `taskkill /F /T /PID` kills the entire process tree rooted at the given PID.
14
- */
15
- function killProcessTree(pid: number): void {
16
- if (process.platform !== 'win32') return;
17
- try {
18
- const result = Bun.spawnSync(['taskkill', '/F', '/T', '/PID', String(pid)], {
19
- stdout: 'pipe',
20
- stderr: 'pipe',
21
- });
22
- if (result.exitCode !== 0) {
23
- const stderr = new TextDecoder().decode(result.stderr);
24
- // Exit code 128 = process not found (already exited) — not worth warning about
25
- if (result.exitCode !== 128) {
26
- console.error(`[killProcessTree] taskkill exited ${result.exitCode} for PID ${pid}: ${stderr.trim()}`);
27
- }
28
- }
29
- } catch {
30
- /* best-effort — process may have already exited */
31
- }
32
- }
33
-
34
- export interface RunOptions {
35
- readonly timeoutMs?: number;
36
- readonly signal?: AbortSignal; // pipeline-level abort
37
- }
38
-
39
- /**
40
- * On Windows, Bun.spawn does NOT auto-append PATHEXT extensions like
41
- * CreateProcess does. A bare command like `claude` fails with ENOENT if the
42
- * actual file on disk is `claude.cmd` / `claude.bat` / `claude.ps1`. We
43
- * manually resolve the command against PATH + PATHEXT here so Drivers can
44
- * keep using short names (`claude`, `npx`, etc.) cross-platform.
45
- *
46
- * Results are cached by (cmd, envPath) key so repeated spawns of the same
47
- * command don't block the event loop with synchronous PATH scans.
48
- *
49
- * Returns the original name if resolution fails; Bun will raise the same
50
- * ENOENT it would have otherwise.
51
- */
52
- const RESOLVED_EXE_CACHE_MAX = 128;
53
- const resolvedExeCache = new Map<string, string | null>();
54
-
55
- /** Evict the oldest entry when the cache is at capacity. */
56
- function evictIfFull(): void {
57
- if (resolvedExeCache.size >= RESOLVED_EXE_CACHE_MAX) {
58
- // Map iteration order is insertion order — delete the first (oldest) key.
59
- const oldest = resolvedExeCache.keys().next().value;
60
- if (oldest !== undefined) resolvedExeCache.delete(oldest);
61
- }
62
- }
63
-
64
- function resolveWindowsExe(
65
- args: readonly string[],
66
- envPath: string,
67
- ): readonly string[] {
68
- if (process.platform !== 'win32' || args.length === 0) return args;
69
- const cmd = args[0]!;
70
- // Already a full path or has an extension → trust caller.
71
- if (isAbsolute(cmd) || /\.[a-z0-9]+$/i.test(cmd)) return args;
72
-
73
- const cacheKey = `${cmd}\x00${envPath}`;
74
- if (resolvedExeCache.has(cacheKey)) {
75
- // ?? null coerces undefined→null so cached is string|null and the !== null
76
- // check narrows it to string without a spurious 'undefined' arm.
77
- const cached = resolvedExeCache.get(cacheKey) ?? null;
78
- return cached !== null ? [cached, ...args.slice(1)] : args;
79
- }
80
-
81
- const exts = (
82
- process.env.PATHEXT ??
83
- '.COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC'
84
- )
85
- .split(';')
86
- .filter(Boolean);
87
- const dirs = envPath.split(';').filter(Boolean);
88
-
89
- for (const dir of dirs) {
90
- for (const ext of exts) {
91
- const candidate = join(dir, cmd + ext);
92
- if (existsSync(candidate)) {
93
- evictIfFull();
94
- resolvedExeCache.set(cacheKey, candidate);
95
- return [candidate, ...args.slice(1)];
96
- }
97
- }
98
- }
99
- evictIfFull();
100
- resolvedExeCache.set(cacheKey, null);
101
- return args;
102
- }
103
-
104
- /**
105
- * H2: Build a "failed before spawn" result. Tagged as 'spawn_error' so the
106
- * engine can show a useful classification ("driver tried to launch X but
107
- * the binary wasn't found") rather than the misleading "timeout".
108
- */
109
- function failResult(stderr: string, durationMs: number): TaskResult {
110
- return {
111
- exitCode: -1,
112
- stdout: '',
113
- stderr,
114
- stderrPath: null,
115
- durationMs,
116
- sessionId: null,
117
- normalizedOutput: null,
118
- failureKind: 'spawn_error',
119
- };
120
- }
121
-
122
- /**
123
- * R2: Validate a SpawnSpec returned by a third-party driver. Returns null on
124
- * success or a human-readable error message describing the first violation.
125
- *
126
- * Catching this here is critical: an undetected bad spec ends up calling
127
- * Bun.spawn with garbage and the resulting TypeError leaks into engine
128
- * processTask's catch block as "Cannot read properties of undefined". By
129
- * validating here we surface a clear "Driver X returned invalid args" message
130
- * instead, and short-circuit before holding any process resources.
131
- */
132
- export function validateSpawnSpec(spec: unknown, driverName: string): string | null {
133
- if (!spec || typeof spec !== 'object') {
134
- return `Driver "${driverName}".buildCommand returned ${spec === null ? 'null' : typeof spec}, expected SpawnSpec object`;
135
- }
136
- const s = spec as Record<string, unknown>;
137
- if (!Array.isArray(s.args)) {
138
- return `Driver "${driverName}".buildCommand returned spec.args of type ${typeof s.args}, expected string[]`;
139
- }
140
- if (s.args.length === 0) {
141
- return `Driver "${driverName}".buildCommand returned an empty spec.args array`;
142
- }
143
- for (let i = 0; i < s.args.length; i++) {
144
- if (typeof s.args[i] !== 'string') {
145
- return `Driver "${driverName}".buildCommand returned spec.args[${i}] of type ${typeof s.args[i]}, expected string`;
146
- }
147
- }
148
- if (typeof s.args[0] !== 'string' || s.args[0].length === 0) {
149
- return `Driver "${driverName}".buildCommand returned an empty executable name in spec.args[0]`;
150
- }
151
- if (s.cwd !== undefined && typeof s.cwd !== 'string') {
152
- return `Driver "${driverName}".buildCommand returned spec.cwd of type ${typeof s.cwd}, expected string or undefined`;
153
- }
154
- if (s.stdin !== undefined && typeof s.stdin !== 'string') {
155
- return `Driver "${driverName}".buildCommand returned spec.stdin of type ${typeof s.stdin}, expected string or undefined`;
156
- }
157
- if (s.env !== undefined) {
158
- if (!s.env || typeof s.env !== 'object' || Array.isArray(s.env)) {
159
- return `Driver "${driverName}".buildCommand returned spec.env that is not a plain object`;
160
- }
161
- for (const [k, v] of Object.entries(s.env as Record<string, unknown>)) {
162
- if (typeof v !== 'string') {
163
- return `Driver "${driverName}".buildCommand returned spec.env.${k} of type ${typeof v}, expected string`;
164
- }
165
- }
166
- }
167
- return null;
168
- }
169
-
170
- export async function runSpawn(
171
- spec: SpawnSpec,
172
- driver: DriverPlugin | null,
173
- opts: RunOptions = {},
174
- ): Promise<TaskResult> {
175
- const { timeoutMs, signal } = opts;
176
- const start = performance.now();
177
- const elapsed = () => Math.round(performance.now() - start);
178
-
179
- if (signal?.aborted) {
180
- return failResult('Pipeline aborted before spawn', 0);
181
- }
182
-
183
- // R2: validate the spec before touching it. A third-party driver that
184
- // returns a malformed SpawnSpec used to crash deep inside Bun.spawn with
185
- // an opaque TypeError; now we report a clear "Driver X returned …" message.
186
- const validationError = validateSpawnSpec(spec, driver?.name ?? '<unknown>');
187
- if (validationError !== null) {
188
- return failResult(validationError, elapsed());
189
- }
190
-
191
- const mergedEnv = { ...process.env, ...(spec.env ?? {}) };
192
- const resolvedArgs = resolveWindowsExe(
193
- spec.args,
194
- mergedEnv.PATH ?? process.env.PATH ?? '',
195
- );
196
-
197
- // ── 1. Spawn (catch ENOENT / bad-cwd up front) ────────────────────────
198
- let proc: ReturnType<typeof Bun.spawn>;
199
- try {
200
- proc = Bun.spawn(resolvedArgs as string[], {
201
- cwd: spec.cwd,
202
- env: mergedEnv,
203
- stdout: 'pipe',
204
- stderr: 'pipe',
205
- stdin: spec.stdin ? 'pipe' : undefined,
206
- });
207
- } catch (err) {
208
- return failResult(String(err), elapsed());
209
- }
210
-
211
- // ── 2. Write stdin ─────────────────────────────────────────────────────
212
- // Child may exit before reading (e.g. quick-fail commands that don't
213
- // touch stdin) → swallow EPIPE rather than surfacing it as an
214
- // engine-level error.
215
- if (spec.stdin && proc.stdin && typeof proc.stdin !== 'number') {
216
- try {
217
- proc.stdin.write(spec.stdin);
218
- proc.stdin.end();
219
- } catch {
220
- /* ignore EPIPE / closed-pipe errors */
221
- }
222
- }
223
-
224
- // ── 3. Timeout & abort handling ────────────────────────────────────────
225
- let killedByUs = false;
226
- let timedOut = false;
227
- let timer: ReturnType<typeof setTimeout> | null = null;
228
- let forceTimer: ReturnType<typeof setTimeout> | null = null;
229
-
230
- const killGracefully = () => {
231
- if (killedByUs) return;
232
- killedByUs = true;
233
-
234
- if (process.platform === 'win32') {
235
- // On Windows, kill the entire process tree via taskkill. This handles
236
- // .cmd wrappers and nested child processes that proc.kill() misses.
237
- killProcessTree(proc.pid);
238
- } else {
239
- proc.kill('SIGTERM');
240
- // If the child ignores SIGTERM, escalate to SIGKILL after 3 s.
241
- forceTimer = setTimeout(() => {
242
- try {
243
- proc.kill('SIGKILL');
244
- } catch {
245
- /* already exited */
246
- }
247
- }, SIGKILL_DELAY_MS);
248
- }
249
- };
250
-
251
- if (timeoutMs && timeoutMs > 0) {
252
- timer = setTimeout(() => {
253
- timedOut = true;
254
- killGracefully();
255
- }, timeoutMs);
256
- }
257
-
258
- const onAbort = () => killGracefully();
259
- if (signal) {
260
- if (signal.aborted) {
261
- killGracefully();
262
- } else {
263
- signal.addEventListener('abort', onAbort, { once: true });
264
- }
265
- }
266
-
267
- // ── 4. Collect output & wait (parallel to avoid pipe-buffer deadlock) ─
268
- const stdoutStream = typeof proc.stdout === 'object' ? proc.stdout : undefined;
269
- const stderrStream = typeof proc.stderr === 'object' ? proc.stderr : undefined;
270
-
271
- const [exitCode, stdout, stderr] = await Promise.all([
272
- proc.exited,
273
- stdoutStream ? new Response(stdoutStream).text() : Promise.resolve(''),
274
- stderrStream ? new Response(stderrStream).text() : Promise.resolve(''),
275
- ]);
276
-
277
- // ── 5. Cleanup timers & listeners ──────────────────────────────────────
278
- if (timer) clearTimeout(timer);
279
- if (forceTimer) clearTimeout(forceTimer);
280
- if (signal) signal.removeEventListener('abort', onAbort);
281
-
282
- const durationMs = elapsed();
283
-
284
- // We initiated the kill (timeout or abort) always treat as non-success
285
- // regardless of exit code. A process that catches SIGTERM and exits 0 still
286
- // hit the timeout; letting it pass as success would unblock downstream tasks
287
- // incorrectly. The `timedOut` flag guards against the narrow race where the
288
- // process exits naturally at the exact moment the timeout fires — even if
289
- // killedByUs wasn't set in time, the timeout intention still applies.
290
- if (killedByUs || timedOut) {
291
- return {
292
- exitCode: -1,
293
- stdout,
294
- stderr,
295
- stderrPath: null,
296
- durationMs,
297
- sessionId: null,
298
- normalizedOutput: null,
299
- // H2: explicit kind so engine.ts no longer has to guess "is exitCode -1
300
- // a timeout or a spawn-failure?" Both used to share the same code.
301
- failureKind: 'timeout',
302
- };
303
- }
304
-
305
- // ── 6. Let driver extract metadata ─────────────────────────────────────
306
- // R1: parseResult is third-party code — wrap it in try/catch so a buggy
307
- // extractor doesn't discard a perfectly good spawn result. R5: even on
308
- // success, type-guard sessionId/normalizedOutput so a mistyped return
309
- // value doesn't poison sessionMap/normalizedMap downstream.
310
- let sessionId: string | null = null;
311
- let normalizedOutput: string | null = null;
312
- // M12: drivers can flip a task's terminal status to failed even when the
313
- // process exited 0 (e.g. opencode returning `{type:"error"}` JSON). When
314
- // the flag is set, we synthesize a non-zero exit code and append a reason
315
- // line to stderr so engine.ts marks the task as failed with a useful
316
- // explanation instead of letting the error JSON pass through as success.
317
- let forcedFailureMessage: string | null = null;
318
- if (driver?.parseResult) {
319
- try {
320
- const meta = driver.parseResult(stdout, stderr);
321
- if (meta && typeof meta === 'object') {
322
- if (typeof meta.sessionId === 'string' && meta.sessionId.length > 0) {
323
- sessionId = meta.sessionId;
324
- }
325
- if (typeof meta.normalizedOutput === 'string') {
326
- normalizedOutput = meta.normalizedOutput;
327
- }
328
- if (meta.forceFailure === true) {
329
- forcedFailureMessage = typeof meta.forceFailureReason === 'string'
330
- ? meta.forceFailureReason
331
- : 'Driver flagged task as failed (forceFailure)';
332
- }
333
- }
334
- } catch (err) {
335
- // The spawn itself succeeded; only metadata extraction failed.
336
- // Fall through with sessionId/normalizedOutput = null and append a
337
- // breadcrumb to stderr so the user can see WHY continue_from broke.
338
- const msg = err instanceof Error ? err.message : String(err);
339
- const note = `\n[runner] driver "${driver.name}".parseResult threw: ${msg}`;
340
- return {
341
- exitCode,
342
- stdout,
343
- stderr: stderr + note,
344
- stderrPath: null,
345
- durationMs,
346
- sessionId: null,
347
- normalizedOutput: null,
348
- // H2: parseResult threw the spawn itself succeeded, so the failure
349
- // is "the process exited but the driver couldn't parse it". Surface
350
- // that as exit_nonzero (when the actual exit was non-zero) or null
351
- // (when the underlying exit was 0 UI will still mark it failed via
352
- // engine.ts because the result is incomplete).
353
- failureKind: exitCode === 0 ? null : 'exit_nonzero',
354
- };
355
- }
356
- }
357
-
358
- // M12: when the driver forced a failure, treat as exit_nonzero with the
359
- // reason appended to stderr so users see WHY the task failed without
360
- // having to dig through driver-specific JSON.
361
- if (forcedFailureMessage !== null) {
362
- return {
363
- exitCode: exitCode === 0 ? 1 : exitCode,
364
- stdout,
365
- stderr: stderr + (stderr.endsWith('\n') ? '' : '\n') + `[driver] ${forcedFailureMessage}`,
366
- stderrPath: null,
367
- durationMs,
368
- sessionId,
369
- normalizedOutput,
370
- failureKind: 'exit_nonzero',
371
- };
372
- }
373
- return {
374
- exitCode,
375
- stdout,
376
- stderr,
377
- stderrPath: null,
378
- durationMs,
379
- sessionId,
380
- normalizedOutput,
381
- // H2: success vs nonzero exit. Engine uses this to short-circuit the
382
- // timeout branch even if a third-party driver returns -1 by mistake.
383
- failureKind: exitCode === 0 ? null : 'exit_nonzero',
384
- };
385
- }
386
-
387
- export async function runCommand(
388
- command: string,
389
- cwd: string,
390
- opts: RunOptions = {},
391
- ): Promise<TaskResult> {
392
- const spec: SpawnSpec = {
393
- args: shellArgs(command),
394
- cwd,
395
- };
396
- return runSpawn(spec, null, opts);
397
- }
1
+ import { existsSync, statSync } from 'node:fs';
2
+ import { isAbsolute, join } from 'node:path';
3
+ import type { SpawnSpec, DriverPlugin, TaskResult } from './types';
4
+ import { shellArgs } from './utils';
5
+
6
+ // Delay before escalating SIGTERM to SIGKILL when killing a timed-out process.
7
+ const SIGKILL_DELAY_MS = 3_000;
8
+
9
+ /**
10
+ * On Windows, proc.kill('SIGTERM') / proc.kill('SIGKILL') only terminate the
11
+ * direct child process. When the child is a .cmd/.bat wrapper (e.g. claude.cmd),
12
+ * cmd.exe spawns the real process as a grandchild — proc.kill misses it entirely.
13
+ * `taskkill /F /T /PID` kills the entire process tree rooted at the given PID.
14
+ */
15
+ function killProcessTree(pid: number): void {
16
+ if (process.platform !== 'win32') return;
17
+ try {
18
+ const result = Bun.spawnSync(['taskkill', '/F', '/T', '/PID', String(pid)], {
19
+ stdout: 'pipe',
20
+ stderr: 'pipe',
21
+ });
22
+ if (result.exitCode !== 0) {
23
+ const stderr = new TextDecoder().decode(result.stderr);
24
+ // Exit code 128 = process not found (already exited) — not worth warning about
25
+ if (result.exitCode !== 128) {
26
+ console.error(
27
+ `[killProcessTree] taskkill exited ${result.exitCode} for PID ${pid}: ${stderr.trim()}`,
28
+ );
29
+ }
30
+ }
31
+ } catch {
32
+ /* best-effort — process may have already exited */
33
+ }
34
+ }
35
+
36
+ export interface RunOptions {
37
+ readonly timeoutMs?: number;
38
+ readonly signal?: AbortSignal; // pipeline-level abort
39
+ }
40
+
41
+ /**
42
+ * On Windows, Bun.spawn does NOT auto-append PATHEXT extensions like
43
+ * CreateProcess does. A bare command like `claude` fails with ENOENT if the
44
+ * actual file on disk is `claude.cmd` / `claude.bat` / `claude.ps1`. We
45
+ * manually resolve the command against PATH + PATHEXT here so Drivers can
46
+ * keep using short names (`claude`, `npx`, etc.) cross-platform.
47
+ *
48
+ * Results are cached by (cmd, envPath) key so repeated spawns of the same
49
+ * command don't block the event loop with synchronous PATH scans.
50
+ *
51
+ * Returns the original name if resolution fails; Bun will raise the same
52
+ * ENOENT it would have otherwise.
53
+ */
54
+ const RESOLVED_EXE_CACHE_MAX = 128;
55
+ const resolvedExeCache = new Map<string, string | null>();
56
+
57
+ /** Evict the oldest entry when the cache is at capacity. */
58
+ function evictIfFull(): void {
59
+ if (resolvedExeCache.size >= RESOLVED_EXE_CACHE_MAX) {
60
+ // Map iteration order is insertion order — delete the first (oldest) key.
61
+ const oldest = resolvedExeCache.keys().next().value;
62
+ if (oldest !== undefined) resolvedExeCache.delete(oldest);
63
+ }
64
+ }
65
+
66
+ function resolveWindowsExe(args: readonly string[], envPath: string): readonly string[] {
67
+ if (process.platform !== 'win32' || args.length === 0) return args;
68
+ const cmd = args[0]!;
69
+ // Already a full path or has an extension → trust caller.
70
+ if (isAbsolute(cmd) || /\.[a-z0-9]+$/i.test(cmd)) return args;
71
+
72
+ const cacheKey = `${cmd}\x00${envPath}`;
73
+ if (resolvedExeCache.has(cacheKey)) {
74
+ // ?? null coerces undefined→null so cached is string|null and the !== null
75
+ // check narrows it to string without a spurious 'undefined' arm.
76
+ const cached = resolvedExeCache.get(cacheKey) ?? null;
77
+ return cached !== null ? [cached, ...args.slice(1)] : args;
78
+ }
79
+
80
+ const exts = (process.env.PATHEXT ?? '.COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC')
81
+ .split(';')
82
+ .filter(Boolean);
83
+ const dirs = envPath.split(';').filter(Boolean);
84
+
85
+ for (const dir of dirs) {
86
+ for (const ext of exts) {
87
+ const candidate = join(dir, cmd + ext);
88
+ try {
89
+ if (existsSync(candidate) && statSync(candidate).isFile()) {
90
+ evictIfFull();
91
+ resolvedExeCache.set(cacheKey, candidate);
92
+ return [candidate, ...args.slice(1)];
93
+ }
94
+ } catch {
95
+ /* stat race — skip */
96
+ }
97
+ }
98
+ }
99
+ evictIfFull();
100
+ resolvedExeCache.set(cacheKey, null);
101
+ return args;
102
+ }
103
+
104
+ /**
105
+ * H2: Build a "failed before spawn" result. Tagged as 'spawn_error' so the
106
+ * engine can show a useful classification ("driver tried to launch X but
107
+ * the binary wasn't found") rather than the misleading "timeout".
108
+ */
109
+ function failResult(stderr: string, durationMs: number): TaskResult {
110
+ return {
111
+ exitCode: -1,
112
+ stdout: '',
113
+ stderr,
114
+ stderrPath: null,
115
+ durationMs,
116
+ sessionId: null,
117
+ normalizedOutput: null,
118
+ failureKind: 'spawn_error',
119
+ };
120
+ }
121
+
122
+ /**
123
+ * R2: Validate a SpawnSpec returned by a third-party driver. Returns null on
124
+ * success or a human-readable error message describing the first violation.
125
+ *
126
+ * Catching this here is critical: an undetected bad spec ends up calling
127
+ * Bun.spawn with garbage and the resulting TypeError leaks into engine
128
+ * processTask's catch block as "Cannot read properties of undefined". By
129
+ * validating here we surface a clear "Driver X returned invalid args" message
130
+ * instead, and short-circuit before holding any process resources.
131
+ */
132
+ export function validateSpawnSpec(spec: unknown, driverName: string): string | null {
133
+ if (!spec || typeof spec !== 'object') {
134
+ return `Driver "${driverName}".buildCommand returned ${spec === null ? 'null' : typeof spec}, expected SpawnSpec object`;
135
+ }
136
+ const s = spec as Record<string, unknown>;
137
+ if (!Array.isArray(s.args)) {
138
+ return `Driver "${driverName}".buildCommand returned spec.args of type ${typeof s.args}, expected string[]`;
139
+ }
140
+ if (s.args.length === 0) {
141
+ return `Driver "${driverName}".buildCommand returned an empty spec.args array`;
142
+ }
143
+ for (let i = 0; i < s.args.length; i++) {
144
+ if (typeof s.args[i] !== 'string') {
145
+ return `Driver "${driverName}".buildCommand returned spec.args[${i}] of type ${typeof s.args[i]}, expected string`;
146
+ }
147
+ }
148
+ if (typeof s.args[0] !== 'string' || s.args[0].length === 0) {
149
+ return `Driver "${driverName}".buildCommand returned an empty executable name in spec.args[0]`;
150
+ }
151
+ if (s.cwd !== undefined && typeof s.cwd !== 'string') {
152
+ return `Driver "${driverName}".buildCommand returned spec.cwd of type ${typeof s.cwd}, expected string or undefined`;
153
+ }
154
+ if (s.stdin !== undefined && typeof s.stdin !== 'string') {
155
+ return `Driver "${driverName}".buildCommand returned spec.stdin of type ${typeof s.stdin}, expected string or undefined`;
156
+ }
157
+ if (s.env !== undefined) {
158
+ if (!s.env || typeof s.env !== 'object' || Array.isArray(s.env)) {
159
+ return `Driver "${driverName}".buildCommand returned spec.env that is not a plain object`;
160
+ }
161
+ for (const [k, v] of Object.entries(s.env as Record<string, unknown>)) {
162
+ if (typeof v !== 'string') {
163
+ return `Driver "${driverName}".buildCommand returned spec.env.${k} of type ${typeof v}, expected string`;
164
+ }
165
+ }
166
+ }
167
+ return null;
168
+ }
169
+
170
+ export async function runSpawn(
171
+ spec: SpawnSpec,
172
+ driver: DriverPlugin | null,
173
+ opts: RunOptions = {},
174
+ ): Promise<TaskResult> {
175
+ const { timeoutMs, signal } = opts;
176
+ const start = performance.now();
177
+ const elapsed = () => Math.round(performance.now() - start);
178
+
179
+ if (signal?.aborted) {
180
+ return failResult('Pipeline aborted before spawn', 0);
181
+ }
182
+
183
+ // R2: validate the spec before touching it. A third-party driver that
184
+ // returns a malformed SpawnSpec used to crash deep inside Bun.spawn with
185
+ // an opaque TypeError; now we report a clear "Driver X returned …" message.
186
+ const validationError = validateSpawnSpec(spec, driver?.name ?? '<unknown>');
187
+ if (validationError !== null) {
188
+ return failResult(validationError, elapsed());
189
+ }
190
+
191
+ const mergedEnv = { ...process.env, ...(spec.env ?? {}) };
192
+ const resolvedArgs = resolveWindowsExe(spec.args, mergedEnv.PATH ?? process.env.PATH ?? '');
193
+
194
+ // ── 1. Spawn (catch ENOENT / bad-cwd up front) ────────────────────────
195
+ let proc: ReturnType<typeof Bun.spawn>;
196
+ try {
197
+ proc = Bun.spawn(resolvedArgs as string[], {
198
+ cwd: spec.cwd,
199
+ env: mergedEnv,
200
+ stdout: 'pipe',
201
+ stderr: 'pipe',
202
+ stdin: spec.stdin ? 'pipe' : undefined,
203
+ });
204
+ } catch (err) {
205
+ return failResult(String(err), elapsed());
206
+ }
207
+
208
+ // ── 2. Write stdin ─────────────────────────────────────────────────────
209
+ // Child may exit before reading (e.g. quick-fail commands that don't
210
+ // touch stdin) → swallow EPIPE rather than surfacing it as an
211
+ // engine-level error.
212
+ if (spec.stdin && proc.stdin && typeof proc.stdin !== 'number') {
213
+ try {
214
+ await proc.stdin.write(spec.stdin);
215
+ await proc.stdin.end();
216
+ } catch {
217
+ /* ignore EPIPE / closed-pipe errors */
218
+ }
219
+ }
220
+
221
+ // ── 3. Timeout & abort handling ────────────────────────────────────────
222
+ let killedByUs = false;
223
+ let timedOut = false;
224
+ let timer: ReturnType<typeof setTimeout> | null = null;
225
+ let forceTimer: ReturnType<typeof setTimeout> | null = null;
226
+
227
+ const killGracefully = () => {
228
+ if (killedByUs) return;
229
+ killedByUs = true;
230
+
231
+ if (process.platform === 'win32') {
232
+ // On Windows, kill the entire process tree via taskkill. This handles
233
+ // .cmd wrappers and nested child processes that proc.kill() misses.
234
+ killProcessTree(proc.pid);
235
+ } else {
236
+ proc.kill('SIGTERM');
237
+ // If the child ignores SIGTERM, escalate to SIGKILL after 3 s.
238
+ forceTimer = setTimeout(() => {
239
+ try {
240
+ proc.kill('SIGKILL');
241
+ } catch {
242
+ /* already exited */
243
+ }
244
+ }, SIGKILL_DELAY_MS);
245
+ }
246
+ };
247
+
248
+ if (timeoutMs && timeoutMs > 0) {
249
+ timer = setTimeout(() => {
250
+ timedOut = true;
251
+ killGracefully();
252
+ }, timeoutMs);
253
+ }
254
+
255
+ const onAbort = () => killGracefully();
256
+ if (signal) {
257
+ if (signal.aborted) {
258
+ killGracefully();
259
+ } else {
260
+ signal.addEventListener('abort', onAbort, { once: true });
261
+ }
262
+ }
263
+
264
+ // ── 4. Collect output & wait (parallel to avoid pipe-buffer deadlock) ─
265
+ const stdoutStream = typeof proc.stdout === 'object' ? proc.stdout : undefined;
266
+ const stderrStream = typeof proc.stderr === 'object' ? proc.stderr : undefined;
267
+
268
+ const [exitCode, stdout, stderr] = await Promise.all([
269
+ proc.exited,
270
+ stdoutStream ? new Response(stdoutStream).text() : Promise.resolve(''),
271
+ stderrStream ? new Response(stderrStream).text() : Promise.resolve(''),
272
+ ]);
273
+
274
+ // ── 5. Cleanup timers & listeners ──────────────────────────────────────
275
+ if (timer) clearTimeout(timer);
276
+ if (forceTimer) clearTimeout(forceTimer);
277
+ if (signal) signal.removeEventListener('abort', onAbort);
278
+
279
+ const durationMs = elapsed();
280
+
281
+ // We initiated the kill (timeout or abort) — always treat as non-success
282
+ // regardless of exit code. A process that catches SIGTERM and exits 0 still
283
+ // hit the timeout; letting it pass as success would unblock downstream tasks
284
+ // incorrectly. The `timedOut` flag guards against the narrow race where the
285
+ // process exits naturally at the exact moment the timeout fires even if
286
+ // killedByUs wasn't set in time, the timeout intention still applies.
287
+ if (killedByUs || timedOut) {
288
+ return {
289
+ exitCode: -1,
290
+ stdout,
291
+ stderr,
292
+ stderrPath: null,
293
+ durationMs,
294
+ sessionId: null,
295
+ normalizedOutput: null,
296
+ // H2: explicit kind so engine.ts no longer has to guess "is exitCode -1
297
+ // a timeout or a spawn-failure?" Both used to share the same code.
298
+ failureKind: 'timeout',
299
+ };
300
+ }
301
+
302
+ // ── 6. Let driver extract metadata ─────────────────────────────────────
303
+ // R1: parseResult is third-party code — wrap it in try/catch so a buggy
304
+ // extractor doesn't discard a perfectly good spawn result. R5: even on
305
+ // success, type-guard sessionId/normalizedOutput so a mistyped return
306
+ // value doesn't poison sessionMap/normalizedMap downstream.
307
+ let sessionId: string | null = null;
308
+ let normalizedOutput: string | null = null;
309
+ // M12: drivers can flip a task's terminal status to failed even when the
310
+ // process exited 0 (e.g. opencode returning `{type:"error"}` JSON). When
311
+ // the flag is set, we synthesize a non-zero exit code and append a reason
312
+ // line to stderr so engine.ts marks the task as failed with a useful
313
+ // explanation instead of letting the error JSON pass through as success.
314
+ let forcedFailureMessage: string | null = null;
315
+ if (driver?.parseResult) {
316
+ try {
317
+ const meta = driver.parseResult(stdout, stderr);
318
+ if (meta && typeof meta === 'object') {
319
+ if (typeof meta.sessionId === 'string' && /^[\w.-]{1,256}$/.test(meta.sessionId)) {
320
+ sessionId = meta.sessionId;
321
+ }
322
+ if (typeof meta.normalizedOutput === 'string') {
323
+ normalizedOutput = meta.normalizedOutput;
324
+ }
325
+ if (meta.forceFailure === true) {
326
+ forcedFailureMessage =
327
+ typeof meta.forceFailureReason === 'string'
328
+ ? meta.forceFailureReason
329
+ : 'Driver flagged task as failed (forceFailure)';
330
+ }
331
+ }
332
+ } catch (err) {
333
+ // The spawn itself succeeded; only metadata extraction failed.
334
+ // Fall through with sessionId/normalizedOutput = null and append a
335
+ // breadcrumb to stderr so the user can see WHY continue_from broke.
336
+ const msg = err instanceof Error ? err.message : String(err);
337
+ const note = `\n[runner] driver "${driver.name}".parseResult threw: ${msg}`;
338
+ return {
339
+ exitCode,
340
+ stdout,
341
+ stderr: stderr + note,
342
+ stderrPath: null,
343
+ durationMs,
344
+ sessionId: null,
345
+ normalizedOutput: null,
346
+ // H2: parseResult threw — the spawn itself succeeded, so the failure
347
+ // is "the process exited but the driver couldn't parse it". Surface
348
+ // that as exit_nonzero (when the actual exit was non-zero) or null
349
+ // (when the underlying exit was 0 UI will still mark it failed via
350
+ // engine.ts because the result is incomplete).
351
+ failureKind: exitCode === 0 ? null : 'exit_nonzero',
352
+ };
353
+ }
354
+ }
355
+
356
+ // M12: when the driver forced a failure, treat as exit_nonzero with the
357
+ // reason appended to stderr so users see WHY the task failed without
358
+ // having to dig through driver-specific JSON.
359
+ if (forcedFailureMessage !== null) {
360
+ return {
361
+ exitCode: exitCode === 0 ? 1 : exitCode,
362
+ stdout,
363
+ stderr: stderr + (stderr.endsWith('\n') ? '' : '\n') + `[driver] ${forcedFailureMessage}`,
364
+ stderrPath: null,
365
+ durationMs,
366
+ sessionId,
367
+ normalizedOutput,
368
+ failureKind: 'exit_nonzero',
369
+ };
370
+ }
371
+ return {
372
+ exitCode,
373
+ stdout,
374
+ stderr,
375
+ stderrPath: null,
376
+ durationMs,
377
+ sessionId,
378
+ normalizedOutput,
379
+ // H2: success vs nonzero exit. Engine uses this to short-circuit the
380
+ // timeout branch even if a third-party driver returns -1 by mistake.
381
+ failureKind: exitCode === 0 ? null : 'exit_nonzero',
382
+ };
383
+ }
384
+
385
+ export async function runCommand(
386
+ command: string,
387
+ cwd: string,
388
+ opts: RunOptions = {},
389
+ ): Promise<TaskResult> {
390
+ const spec: SpawnSpec = {
391
+ args: shellArgs(command),
392
+ cwd,
393
+ };
394
+ return runSpawn(spec, null, opts);
395
+ }