@tagma/sdk 0.6.4 → 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/README.md +8 -5
  2. package/dist/dag.test.d.ts +2 -0
  3. package/dist/dag.test.d.ts.map +1 -0
  4. package/dist/dag.test.js +42 -0
  5. package/dist/dag.test.js.map +1 -0
  6. package/dist/engine-ports.test.d.ts +2 -0
  7. package/dist/engine-ports.test.d.ts.map +1 -0
  8. package/dist/engine-ports.test.js +378 -0
  9. package/dist/engine-ports.test.js.map +1 -0
  10. package/dist/engine.d.ts.map +1 -1
  11. package/dist/engine.js +194 -21
  12. package/dist/engine.js.map +1 -1
  13. package/dist/pipeline-runner.d.ts.map +1 -1
  14. package/dist/pipeline-runner.js +3 -0
  15. package/dist/pipeline-runner.js.map +1 -1
  16. package/dist/ports.d.ts +118 -0
  17. package/dist/ports.d.ts.map +1 -0
  18. package/dist/ports.js +365 -0
  19. package/dist/ports.js.map +1 -0
  20. package/dist/ports.test.d.ts +2 -0
  21. package/dist/ports.test.d.ts.map +1 -0
  22. package/dist/ports.test.js +262 -0
  23. package/dist/ports.test.js.map +1 -0
  24. package/dist/prompt-doc.d.ts +35 -1
  25. package/dist/prompt-doc.d.ts.map +1 -1
  26. package/dist/prompt-doc.js +110 -0
  27. package/dist/prompt-doc.js.map +1 -1
  28. package/dist/prompt-doc.test.d.ts +2 -0
  29. package/dist/prompt-doc.test.d.ts.map +1 -0
  30. package/dist/prompt-doc.test.js +145 -0
  31. package/dist/prompt-doc.test.js.map +1 -0
  32. package/dist/runner.d.ts +17 -0
  33. package/dist/runner.d.ts.map +1 -1
  34. package/dist/runner.js +171 -8
  35. package/dist/runner.js.map +1 -1
  36. package/dist/runner.test.d.ts +2 -0
  37. package/dist/runner.test.d.ts.map +1 -0
  38. package/dist/runner.test.js +119 -0
  39. package/dist/runner.test.js.map +1 -0
  40. package/dist/schema-ports.test.d.ts +2 -0
  41. package/dist/schema-ports.test.d.ts.map +1 -0
  42. package/dist/schema-ports.test.js +219 -0
  43. package/dist/schema-ports.test.js.map +1 -0
  44. package/dist/schema.d.ts.map +1 -1
  45. package/dist/schema.js +8 -0
  46. package/dist/schema.js.map +1 -1
  47. package/dist/sdk.d.ts +3 -1
  48. package/dist/sdk.d.ts.map +1 -1
  49. package/dist/sdk.js +5 -1
  50. package/dist/sdk.js.map +1 -1
  51. package/dist/validate-raw-ports.test.d.ts +2 -0
  52. package/dist/validate-raw-ports.test.d.ts.map +1 -0
  53. package/dist/validate-raw-ports.test.js +157 -0
  54. package/dist/validate-raw-ports.test.js.map +1 -0
  55. package/dist/validate-raw.d.ts.map +1 -1
  56. package/dist/validate-raw.js +141 -0
  57. package/dist/validate-raw.js.map +1 -1
  58. package/package.json +2 -7
  59. package/src/dag.test.ts +56 -0
  60. package/src/engine-ports.test.ts +404 -0
  61. package/src/engine.ts +231 -24
  62. package/src/pipeline-runner.ts +3 -0
  63. package/src/ports.test.ts +301 -0
  64. package/src/ports.ts +442 -0
  65. package/src/prompt-doc.test.ts +174 -0
  66. package/src/prompt-doc.ts +121 -1
  67. package/src/runner.test.ts +142 -0
  68. package/src/runner.ts +198 -8
  69. package/src/schema-ports.test.ts +236 -0
  70. package/src/schema.ts +8 -0
  71. package/src/sdk.ts +14 -0
  72. package/src/validate-raw-ports.test.ts +198 -0
  73. package/src/validate-raw.ts +155 -1
package/src/prompt-doc.ts CHANGED
@@ -1,4 +1,4 @@
1
- import type { PromptDocument, PromptContextBlock } from './types';
1
+ import type { PortDef, PromptContextBlock, PromptDocument } from './types';
2
2
 
3
3
  /**
4
4
  * Build a fresh `PromptDocument` from a raw task string.
@@ -47,3 +47,123 @@ export function appendContext(
47
47
  ): PromptDocument {
48
48
  return { contexts: [...doc.contexts, block], task: doc.task };
49
49
  }
50
+
51
+ /**
52
+ * Helper: return a new document with the given block PREPENDED. The
53
+ * engine uses this to place port-related context blocks (`[Inputs]`,
54
+ * `[Output Format]`) at the top of the document so middlewares that
55
+ * assemble retrieval context against the task's inputs see them.
56
+ */
57
+ export function prependContext(
58
+ doc: PromptDocument,
59
+ block: PromptContextBlock,
60
+ ): PromptDocument {
61
+ return { contexts: [block, ...doc.contexts], task: doc.task };
62
+ }
63
+
64
+ /**
65
+ * Build an `[Inputs]` context block from a map of resolved port inputs.
66
+ * Each input is rendered on its own line as `name: <value>` with an
67
+ * optional trailing `# <description>` comment so the model has both the
68
+ * value and the reason it matters.
69
+ *
70
+ * The block is *only* useful for AI tasks; command tasks consume inputs
71
+ * through `{{inputs.X}}` substitution in their command line and do not
72
+ * need this context.
73
+ *
74
+ * Returns null when there are no inputs to render — callers can forward
75
+ * that nullish value to `prependContext` via an `if (block)` check so
76
+ * empty-input tasks don't grow a noise block in their prompt.
77
+ */
78
+ export function renderInputsBlock(
79
+ inputsDecl: readonly PortDef[] | undefined,
80
+ values: Readonly<Record<string, unknown>>,
81
+ ): PromptContextBlock | null {
82
+ if (!inputsDecl || inputsDecl.length === 0) return null;
83
+ const lines: string[] = [];
84
+ for (const port of inputsDecl) {
85
+ if (!(port.name in values)) continue;
86
+ const raw = values[port.name];
87
+ const rendered = renderInputValue(raw);
88
+ const descr = port.description?.trim();
89
+ lines.push(descr ? `${port.name}: ${rendered} # ${descr}` : `${port.name}: ${rendered}`);
90
+ }
91
+ if (lines.length === 0) return null;
92
+ return { label: 'Inputs', content: lines.join('\n') };
93
+ }
94
+
95
+ function renderInputValue(value: unknown): string {
96
+ if (value === null || value === undefined) return '';
97
+ if (typeof value === 'string') return JSON.stringify(value);
98
+ if (typeof value === 'number' || typeof value === 'boolean') return String(value);
99
+ try {
100
+ return JSON.stringify(value);
101
+ } catch {
102
+ return String(value);
103
+ }
104
+ }
105
+
106
+ /**
107
+ * Build an `[Output Format]` context block from a task's declared output
108
+ * ports. The block instructs the model to emit a final-line JSON object
109
+ * matching the declared schema so `extractTaskOutputs` can pick it up
110
+ * without fragile heuristics. Returns null when the task declares no
111
+ * outputs.
112
+ *
113
+ * The instruction is deliberately short and explicit — a terse "emit
114
+ * this object as JSON on the final line" beats a long schema dump
115
+ * because shorter prompts compose better with downstream middlewares.
116
+ */
117
+ export function renderOutputSchemaBlock(
118
+ outputsDecl: readonly PortDef[] | undefined,
119
+ ): PromptContextBlock | null {
120
+ if (!outputsDecl || outputsDecl.length === 0) return null;
121
+ const lines: string[] = [];
122
+ lines.push(
123
+ 'After your response, emit a single JSON object on the FINAL line with these keys:',
124
+ );
125
+ for (const port of outputsDecl) {
126
+ const descr = port.description?.trim();
127
+ const enumHint =
128
+ port.type === 'enum' && port.enum?.length
129
+ ? ` (one of: ${port.enum.map((v) => JSON.stringify(v)).join(', ')})`
130
+ : '';
131
+ lines.push(
132
+ descr
133
+ ? ` - ${port.name} (${port.type}${enumHint}): ${descr}`
134
+ : ` - ${port.name} (${port.type}${enumHint})`,
135
+ );
136
+ }
137
+ const example = buildExampleObject(outputsDecl);
138
+ lines.push('');
139
+ lines.push(`Example final line: ${JSON.stringify(example)}`);
140
+ return { label: 'Output Format', content: lines.join('\n') };
141
+ }
142
+
143
+ function buildExampleObject(outputsDecl: readonly PortDef[]): Record<string, unknown> {
144
+ const example: Record<string, unknown> = {};
145
+ for (const port of outputsDecl) {
146
+ if (port.default !== undefined) {
147
+ example[port.name] = port.default;
148
+ continue;
149
+ }
150
+ switch (port.type) {
151
+ case 'string':
152
+ example[port.name] = '...';
153
+ break;
154
+ case 'number':
155
+ example[port.name] = 0;
156
+ break;
157
+ case 'boolean':
158
+ example[port.name] = false;
159
+ break;
160
+ case 'enum':
161
+ example[port.name] = port.enum?.[0] ?? '...';
162
+ break;
163
+ case 'json':
164
+ default:
165
+ example[port.name] = null;
166
+ }
167
+ }
168
+ return example;
169
+ }
@@ -0,0 +1,142 @@
1
+ import { test, expect } from 'bun:test';
2
+ import { mkdtempSync, readFileSync, rmSync, statSync } from 'node:fs';
3
+ import { tmpdir } from 'node:os';
4
+ import { join } from 'node:path';
5
+ import { runSpawn } from './runner';
6
+
7
+ // Portable output producer — node is guaranteed in the bun dev env. Using a
8
+ // known runtime avoids shell-quoting differences between platforms.
9
+ function nodeArg(script: string): string[] {
10
+ return ['node', '-e', script];
11
+ }
12
+
13
+ test('runSpawn: small output is returned whole, persisted byte-identical', async () => {
14
+ const tmp = mkdtempSync(join(tmpdir(), 'tagma-runner-small-'));
15
+ const stdoutPath = join(tmp, 'out');
16
+ const stderrPath = join(tmp, 'err');
17
+ try {
18
+ const result = await runSpawn(
19
+ { args: nodeArg('process.stdout.write("hello world"); process.stderr.write("oops")') },
20
+ null,
21
+ { stdoutPath, stderrPath },
22
+ );
23
+ expect(result.exitCode).toBe(0);
24
+ expect(result.stdout).toBe('hello world');
25
+ expect(result.stderr).toBe('oops');
26
+ expect(result.stdoutBytes).toBe(11);
27
+ expect(result.stderrBytes).toBe(4);
28
+ expect(result.stdoutPath).toBe(stdoutPath);
29
+ expect(result.stderrPath).toBe(stderrPath);
30
+ expect(readFileSync(stdoutPath, 'utf8')).toBe('hello world');
31
+ expect(readFileSync(stderrPath, 'utf8')).toBe('oops');
32
+ } finally {
33
+ rmSync(tmp, { recursive: true, force: true });
34
+ }
35
+ });
36
+
37
+ test('runSpawn: oversized output — bounded tail in memory, full bytes on disk', async () => {
38
+ const tmp = mkdtempSync(join(tmpdir(), 'tagma-runner-big-'));
39
+ const stdoutPath = join(tmp, 'out');
40
+ try {
41
+ // Produce 3 MB of output against a 512 KB cap. The child writes in one
42
+ // shot; the runner should slice the single chunk's tail rather than
43
+ // evicting (the "pathological one-chunk-over-cap" branch).
44
+ const cap = 512 * 1024;
45
+ const totalBytes = 3 * 1024 * 1024;
46
+ const result = await runSpawn(
47
+ {
48
+ args: nodeArg(
49
+ `process.stdout.write("a".repeat(${totalBytes}))`,
50
+ ),
51
+ },
52
+ null,
53
+ { stdoutPath, maxStdoutTailBytes: cap },
54
+ );
55
+ expect(result.exitCode).toBe(0);
56
+ // Total bytes reported match reality
57
+ expect(result.stdoutBytes).toBe(totalBytes);
58
+ // In-memory tail bounded (tail + truncation marker header is a couple
59
+ // hundred bytes at most; give it slack)
60
+ expect(result.stdout.length).toBeLessThan(cap + 1024);
61
+ expect(result.stdout.length).toBeGreaterThan(cap - 1024);
62
+ // Truncation breadcrumb present and points at the full output
63
+ expect(result.stdout).toContain('truncated from head');
64
+ expect(result.stdout).toContain(stdoutPath);
65
+ // The tail ends with the trailing bytes the child wrote ('a')
66
+ expect(result.stdout.endsWith('a')).toBe(true);
67
+ // Disk copy is byte-exact and full-length
68
+ const onDiskBytes = statSync(stdoutPath).size;
69
+ expect(onDiskBytes).toBe(totalBytes);
70
+ } finally {
71
+ rmSync(tmp, { recursive: true, force: true });
72
+ }
73
+ });
74
+
75
+ test('runSpawn: chunked output — tail eviction keeps retained <= cap', async () => {
76
+ const tmp = mkdtempSync(join(tmpdir(), 'tagma-runner-chunked-'));
77
+ const stdoutPath = join(tmp, 'out');
78
+ try {
79
+ // Emit 8 chunks × 64 KB with sync drains between them, so the runner
80
+ // receives them as distinct chunks rather than one blob. Cap at 128 KB
81
+ // forces eviction of older chunks.
82
+ const cap = 128 * 1024;
83
+ const chunkSize = 64 * 1024;
84
+ const nChunks = 8;
85
+ const script = `
86
+ const chunk = 'b'.repeat(${chunkSize});
87
+ (async () => {
88
+ for (let i = 0; i < ${nChunks}; i++) {
89
+ process.stdout.write(chunk);
90
+ await new Promise(r => setImmediate(r));
91
+ }
92
+ })();
93
+ `;
94
+ const result = await runSpawn(
95
+ { args: nodeArg(script) },
96
+ null,
97
+ { stdoutPath, maxStdoutTailBytes: cap },
98
+ );
99
+ expect(result.exitCode).toBe(0);
100
+ expect(result.stdoutBytes).toBe(nChunks * chunkSize);
101
+ // Retained tail should be strictly bounded by cap (eviction case, no
102
+ // single-chunk slice). Allow small overhead for the truncation marker.
103
+ expect(result.stdout.length).toBeLessThan(cap + 1024);
104
+ expect(result.stdout).toContain('truncated from head');
105
+ // Full stream on disk
106
+ expect(statSync(stdoutPath).size).toBe(nChunks * chunkSize);
107
+ } finally {
108
+ rmSync(tmp, { recursive: true, force: true });
109
+ }
110
+ });
111
+
112
+ test('runSpawn: no path configured — memory-only tail, returns null paths', async () => {
113
+ const result = await runSpawn(
114
+ { args: nodeArg('process.stdout.write("inline only")') },
115
+ null,
116
+ {},
117
+ );
118
+ expect(result.exitCode).toBe(0);
119
+ expect(result.stdout).toBe('inline only');
120
+ expect(result.stdoutPath).toBeNull();
121
+ expect(result.stderrPath).toBeNull();
122
+ });
123
+
124
+ test('runSpawn: pre-spawn failure (bad executable) — no paths leak on disk', async () => {
125
+ const tmp = mkdtempSync(join(tmpdir(), 'tagma-runner-bad-'));
126
+ const stdoutPath = join(tmp, 'out');
127
+ try {
128
+ const result = await runSpawn(
129
+ { args: ['this-command-definitely-does-not-exist-xyz123'] },
130
+ null,
131
+ { stdoutPath },
132
+ );
133
+ expect(result.exitCode).toBe(-1);
134
+ expect(result.failureKind).toBe('spawn_error');
135
+ // On pre-spawn failure the runner never opened the file, so stdoutPath
136
+ // is null (not the unopened path). Callers can rely on this to decide
137
+ // whether a disk file exists to read.
138
+ expect(result.stdoutPath).toBeNull();
139
+ } finally {
140
+ rmSync(tmp, { recursive: true, force: true });
141
+ }
142
+ });
package/src/runner.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  import { existsSync, readFileSync, statSync } from 'node:fs';
2
+ import { mkdir, open, type FileHandle } from 'node:fs/promises';
2
3
  import { dirname, isAbsolute, join, resolve as pathResolve } from 'node:path';
3
4
  import type { SpawnSpec, DriverPlugin, TaskResult } from './types';
4
5
  import { shellArgs } from './utils';
@@ -6,6 +7,17 @@ import { shellArgs } from './utils';
6
7
  // Delay before escalating SIGTERM to SIGKILL when killing a timed-out process.
7
8
  const SIGKILL_DELAY_MS = 3_000;
8
9
 
10
+ /**
11
+ * Default cap for the in-memory tail retained for each stream. Picked so that
12
+ * a task producing runaway output (AI agent bug, adversarial input) cannot
13
+ * balloon the sidecar's RSS, while still being large enough that typical AI
14
+ * responses (which top out around low-MB of text) are returned whole. Callers
15
+ * that need different limits supply `RunOptions.maxStdoutTailBytes` /
16
+ * `.maxStderrTailBytes`.
17
+ */
18
+ const DEFAULT_STDOUT_TAIL_BYTES = 8 * 1024 * 1024; // 8 MB
19
+ const DEFAULT_STDERR_TAIL_BYTES = 4 * 1024 * 1024; // 4 MB
20
+
9
21
  /**
10
22
  * On Windows, proc.kill('SIGTERM') / proc.kill('SIGKILL') only terminate the
11
23
  * direct child process. When the child is a .cmd/.bat wrapper (e.g. claude.cmd),
@@ -36,6 +48,152 @@ function killProcessTree(pid: number): void {
36
48
  export interface RunOptions {
37
49
  readonly timeoutMs?: number;
38
50
  readonly signal?: AbortSignal; // pipeline-level abort
51
+ /**
52
+ * If set, stream the child's stdout to this file path as it arrives. The
53
+ * returned `TaskResult.stdout` is still a bounded in-memory tail
54
+ * (`maxStdoutTailBytes`) — callers that need the full output should read
55
+ * from the returned `stdoutPath`. Parent directories are created as needed.
56
+ */
57
+ readonly stdoutPath?: string;
58
+ /** Symmetric to `stdoutPath` for stderr. */
59
+ readonly stderrPath?: string;
60
+ /**
61
+ * Cap on bytes retained in memory for the returned `TaskResult.stdout`
62
+ * string. Defaults to `DEFAULT_STDOUT_TAIL_BYTES`. Bytes beyond this cap
63
+ * from the HEAD of the stream are dropped from the in-memory string; the
64
+ * on-disk file (if `stdoutPath` is set) is still the full output.
65
+ */
66
+ readonly maxStdoutTailBytes?: number;
67
+ readonly maxStderrTailBytes?: number;
68
+ }
69
+
70
+ /**
71
+ * Read a stream to completion, persisting every chunk to `filePath` (when
72
+ * provided) while keeping only the last `maxTailBytes` bytes in memory.
73
+ *
74
+ * Why the split: large child outputs (multi-MB AI responses, verbose debug
75
+ * dumps) used to accumulate entirely in memory via `new Response(s).text()`,
76
+ * which let a runaway task balloon the sidecar's RSS. Streaming to disk +
77
+ * bounded tail gives callers: (a) unbounded data fidelity on disk, (b) fixed
78
+ * memory footprint, (c) the tail — which is almost always what callers
79
+ * actually consume (final AI answer, error summary, last N lines).
80
+ *
81
+ * Backpressure: we `await fh.write(chunk)` per chunk, so if disk is slow we
82
+ * naturally slow the reader — but we do NOT stop reading the pipe, so the
83
+ * child never blocks on a full stdout pipe. Disk errors don't abort the
84
+ * stream; we close the handle, null it, and keep consuming into the tail
85
+ * buffer only (with a breadcrumb in the returned text).
86
+ *
87
+ * Tail eviction: drops whole chunks from the front until total retained is
88
+ * at or below the cap. If a single chunk alone exceeds the cap (rare — would
89
+ * require a >cap-bytes chunkless burst from the child), we slice its tail.
90
+ * UTF-8 boundaries at the slice point may emit replacement characters when
91
+ * decoded — acceptable (the trailing/leading codepoint is a cosmetic loss).
92
+ */
93
+ async function collectStream(
94
+ stream: ReadableStream<Uint8Array> | undefined,
95
+ filePath: string | undefined,
96
+ maxTailBytes: number,
97
+ ): Promise<{ text: string; totalBytes: number; path: string | null }> {
98
+ if (!stream) return { text: '', totalBytes: 0, path: null };
99
+
100
+ let fh: FileHandle | null = null;
101
+ let diskWriteFailed = false;
102
+ if (filePath) {
103
+ try {
104
+ await mkdir(dirname(filePath), { recursive: true });
105
+ fh = await open(filePath, 'w');
106
+ } catch (err) {
107
+ console.error(
108
+ `[runner] failed to open ${filePath} for output streaming: ${err instanceof Error ? err.message : String(err)}`,
109
+ );
110
+ diskWriteFailed = true;
111
+ }
112
+ }
113
+
114
+ const chunks: Uint8Array[] = [];
115
+ let tailBytes = 0;
116
+ let totalBytes = 0;
117
+ const reader = stream.getReader();
118
+
119
+ try {
120
+ for (;;) {
121
+ const { done, value } = await reader.read();
122
+ if (done) break;
123
+ totalBytes += value.length;
124
+
125
+ // Disk: persist every byte. Failure here degrades to tail-only mode
126
+ // without interrupting the stream (child must not block on pipe fill).
127
+ if (fh) {
128
+ try {
129
+ await fh.write(value);
130
+ } catch (err) {
131
+ console.error(
132
+ `[runner] disk write failed for ${filePath}: ${err instanceof Error ? err.message : String(err)}`,
133
+ );
134
+ try {
135
+ await fh.close();
136
+ } catch {
137
+ /* ignore */
138
+ }
139
+ fh = null;
140
+ diskWriteFailed = true;
141
+ }
142
+ }
143
+
144
+ // Tail: append then evict whole chunks from the head while the total
145
+ // retained exceeds the cap. Keep at least one chunk so short outputs
146
+ // aren't lost entirely. Post-condition: tailBytes <= maxTailBytes OR
147
+ // only one chunk remains (handled by the next block).
148
+ chunks.push(value);
149
+ tailBytes += value.length;
150
+ while (chunks.length > 1 && tailBytes > maxTailBytes) {
151
+ tailBytes -= chunks.shift()!.length;
152
+ }
153
+ // Pathological: a single chunk larger than the cap. Slice its tail.
154
+ if (chunks.length === 1 && chunks[0]!.length > maxTailBytes) {
155
+ const only = chunks[0]!;
156
+ chunks[0] = only.slice(only.length - maxTailBytes);
157
+ tailBytes = chunks[0]!.length;
158
+ }
159
+ }
160
+ } finally {
161
+ reader.releaseLock();
162
+ if (fh) {
163
+ try {
164
+ await fh.close();
165
+ } catch {
166
+ /* ignore */
167
+ }
168
+ }
169
+ }
170
+
171
+ // Decode retained chunks. `stream: true` lets the decoder buffer partial
172
+ // code points across chunks, handling all boundaries except the very first
173
+ // chunk (which may itself start mid-codepoint after eviction) — that
174
+ // boundary gets a U+FFFD replacement, which is preferable to throwing.
175
+ const decoder = new TextDecoder();
176
+ let text = '';
177
+ for (const c of chunks) text += decoder.decode(c, { stream: true });
178
+ text += decoder.decode();
179
+
180
+ if (totalBytes > tailBytes) {
181
+ const dropped = totalBytes - tailBytes;
182
+ const pathHint = filePath
183
+ ? diskWriteFailed
184
+ ? `${filePath} (partial — disk write failed mid-stream)`
185
+ : filePath
186
+ : 'not persisted (no path configured)';
187
+ text = `[…${dropped} bytes truncated from head — full output at: ${pathHint}]\n${text}`;
188
+ }
189
+
190
+ return {
191
+ text,
192
+ totalBytes,
193
+ // Return the path even on partial-write failure so operators can still
194
+ // inspect the head bytes we managed to persist.
195
+ path: filePath ?? null,
196
+ };
39
197
  }
40
198
 
41
199
  /**
@@ -170,13 +328,20 @@ function resolveWindowsExe(args: readonly string[], envPath: string): readonly s
170
328
  * H2: Build a "failed before spawn" result. Tagged as 'spawn_error' so the
171
329
  * engine can show a useful classification ("driver tried to launch X but
172
330
  * the binary wasn't found") rather than the misleading "timeout".
331
+ *
332
+ * Pre-spawn failures never opened the output files, so stdoutPath /
333
+ * stderrPath are null regardless of what the caller passed in opts — there
334
+ * is nothing on disk to point at.
173
335
  */
174
336
  function failResult(stderr: string, durationMs: number): TaskResult {
175
337
  return {
176
338
  exitCode: -1,
177
339
  stdout: '',
178
340
  stderr,
341
+ stdoutPath: null,
179
342
  stderrPath: null,
343
+ stdoutBytes: 0,
344
+ stderrBytes: stderr.length,
180
345
  durationMs,
181
346
  sessionId: null,
182
347
  normalizedOutput: null,
@@ -326,15 +491,28 @@ export async function runSpawn(
326
491
  }
327
492
  }
328
493
 
329
- // ── 4. Collect output & wait (parallel to avoid pipe-buffer deadlock) ─
494
+ // ── 4. Collect output & wait ──────────────────────────────────────────
495
+ // Both streams are drained concurrently with `proc.exited` to avoid the
496
+ // classic pipe-buffer deadlock (child blocks on a full stdout pipe, parent
497
+ // is blocked waiting on exit which the child can't reach). Each stream is
498
+ // persisted to disk via `collectStream` as it arrives so we never hold the
499
+ // full output in memory — only the bounded tail.
330
500
  const stdoutStream = typeof proc.stdout === 'object' ? proc.stdout : undefined;
331
501
  const stderrStream = typeof proc.stderr === 'object' ? proc.stderr : undefined;
502
+ const stdoutCap = opts.maxStdoutTailBytes ?? DEFAULT_STDOUT_TAIL_BYTES;
503
+ const stderrCap = opts.maxStderrTailBytes ?? DEFAULT_STDERR_TAIL_BYTES;
332
504
 
333
- const [exitCode, stdout, stderr] = await Promise.all([
505
+ const [exitCode, stdoutResult, stderrResult] = await Promise.all([
334
506
  proc.exited,
335
- stdoutStream ? new Response(stdoutStream).text() : Promise.resolve(''),
336
- stderrStream ? new Response(stderrStream).text() : Promise.resolve(''),
507
+ collectStream(stdoutStream, opts.stdoutPath, stdoutCap),
508
+ collectStream(stderrStream, opts.stderrPath, stderrCap),
337
509
  ]);
510
+ const stdout = stdoutResult.text;
511
+ const stderr = stderrResult.text;
512
+ const stdoutPath = stdoutResult.path;
513
+ const stderrPath = stderrResult.path;
514
+ const stdoutBytes = stdoutResult.totalBytes;
515
+ const stderrBytes = stderrResult.totalBytes;
338
516
 
339
517
  // ── 5. Cleanup timers & listeners ──────────────────────────────────────
340
518
  if (timer) clearTimeout(timer);
@@ -354,7 +532,10 @@ export async function runSpawn(
354
532
  exitCode: -1,
355
533
  stdout,
356
534
  stderr,
357
- stderrPath: null,
535
+ stdoutPath,
536
+ stderrPath,
537
+ stdoutBytes,
538
+ stderrBytes,
358
539
  durationMs,
359
540
  sessionId: null,
360
541
  normalizedOutput: null,
@@ -404,7 +585,10 @@ export async function runSpawn(
404
585
  exitCode,
405
586
  stdout,
406
587
  stderr: stderr + note,
407
- stderrPath: null,
588
+ stdoutPath,
589
+ stderrPath,
590
+ stdoutBytes,
591
+ stderrBytes,
408
592
  durationMs,
409
593
  sessionId: null,
410
594
  normalizedOutput: null,
@@ -426,7 +610,10 @@ export async function runSpawn(
426
610
  exitCode: exitCode === 0 ? 1 : exitCode,
427
611
  stdout,
428
612
  stderr: stderr + (stderr.endsWith('\n') ? '' : '\n') + `[driver] ${forcedFailureMessage}`,
429
- stderrPath: null,
613
+ stdoutPath,
614
+ stderrPath,
615
+ stdoutBytes,
616
+ stderrBytes,
430
617
  durationMs,
431
618
  sessionId,
432
619
  normalizedOutput,
@@ -437,7 +624,10 @@ export async function runSpawn(
437
624
  exitCode,
438
625
  stdout,
439
626
  stderr,
440
- stderrPath: null,
627
+ stdoutPath,
628
+ stderrPath,
629
+ stdoutBytes,
630
+ stderrBytes,
441
631
  durationMs,
442
632
  sessionId,
443
633
  normalizedOutput,