npm - @tagma/sdk - Versions diffs - 0.6.3 → 0.6.5 - Mend

@tagma/sdk 0.6.3 → 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

package/README.md +8 -5
package/dist/dag.test.d.ts +2 -0
package/dist/dag.test.d.ts.map +1 -0
package/dist/dag.test.js +42 -0
package/dist/dag.test.js.map +1 -0
package/dist/engine-ports.test.d.ts +2 -0
package/dist/engine-ports.test.d.ts.map +1 -0
package/dist/engine-ports.test.js +378 -0
package/dist/engine-ports.test.js.map +1 -0
package/dist/engine.d.ts.map +1 -1
package/dist/engine.js +194 -21
package/dist/engine.js.map +1 -1
package/dist/pipeline-runner.d.ts.map +1 -1
package/dist/pipeline-runner.js +3 -0
package/dist/pipeline-runner.js.map +1 -1
package/dist/ports.d.ts +118 -0
package/dist/ports.d.ts.map +1 -0
package/dist/ports.js +365 -0
package/dist/ports.js.map +1 -0
package/dist/ports.test.d.ts +2 -0
package/dist/ports.test.d.ts.map +1 -0
package/dist/ports.test.js +262 -0
package/dist/ports.test.js.map +1 -0
package/dist/prompt-doc.d.ts +35 -1
package/dist/prompt-doc.d.ts.map +1 -1
package/dist/prompt-doc.js +110 -0
package/dist/prompt-doc.js.map +1 -1
package/dist/prompt-doc.test.d.ts +2 -0
package/dist/prompt-doc.test.d.ts.map +1 -0
package/dist/prompt-doc.test.js +145 -0
package/dist/prompt-doc.test.js.map +1 -0
package/dist/runner.d.ts +17 -0
package/dist/runner.d.ts.map +1 -1
package/dist/runner.js +171 -8
package/dist/runner.js.map +1 -1
package/dist/runner.test.d.ts +2 -0
package/dist/runner.test.d.ts.map +1 -0
package/dist/runner.test.js +119 -0
package/dist/runner.test.js.map +1 -0
package/dist/schema-ports.test.d.ts +2 -0
package/dist/schema-ports.test.d.ts.map +1 -0
package/dist/schema-ports.test.js +219 -0
package/dist/schema-ports.test.js.map +1 -0
package/dist/schema.d.ts.map +1 -1
package/dist/schema.js +8 -0
package/dist/schema.js.map +1 -1
package/dist/sdk.d.ts +3 -1
package/dist/sdk.d.ts.map +1 -1
package/dist/sdk.js +5 -1
package/dist/sdk.js.map +1 -1
package/dist/validate-raw-ports.test.d.ts +2 -0
package/dist/validate-raw-ports.test.d.ts.map +1 -0
package/dist/validate-raw-ports.test.js +157 -0
package/dist/validate-raw-ports.test.js.map +1 -0
package/dist/validate-raw.d.ts.map +1 -1
package/dist/validate-raw.js +141 -0
package/dist/validate-raw.js.map +1 -1
package/package.json +2 -7
package/src/dag.test.ts +56 -0
package/src/engine-ports.test.ts +404 -0
package/src/engine.ts +231 -24
package/src/pipeline-runner.ts +3 -0
package/src/ports.test.ts +301 -0
package/src/ports.ts +442 -0
package/src/prompt-doc.test.ts +174 -0
package/src/prompt-doc.ts +121 -1
package/src/runner.test.ts +142 -0
package/src/runner.ts +198 -8
package/src/schema-ports.test.ts +236 -0
package/src/schema.ts +8 -0
package/src/sdk.ts +14 -0
package/src/validate-raw-ports.test.ts +198 -0
package/src/validate-raw.ts +155 -1

package/src/prompt-doc.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { PromptDocument, PromptContextBlock } from './types';
+import type { PortDef, PromptContextBlock, PromptDocument } from './types';
 /**
  * Build a fresh `PromptDocument` from a raw task string.
@@ -47,3 +47,123 @@ export function appendContext(
 ): PromptDocument {
   return { contexts: [...doc.contexts, block], task: doc.task };
 }
+/**
+ * Helper: return a new document with the given block PREPENDED. The
+ * engine uses this to place port-related context blocks (`[Inputs]`,
+ * `[Output Format]`) at the top of the document so middlewares that
+ * assemble retrieval context against the task's inputs see them.
+ */
+export function prependContext(
+  doc: PromptDocument,
+  block: PromptContextBlock,
+): PromptDocument {
+  return { contexts: [block, ...doc.contexts], task: doc.task };
+}
+/**
+ * Build an `[Inputs]` context block from a map of resolved port inputs.
+ * Each input is rendered on its own line as `name: <value>` with an
+ * optional trailing `# <description>` comment so the model has both the
+ * value and the reason it matters.
+ *
+ * The block is *only* useful for AI tasks; command tasks consume inputs
+ * through `{{inputs.X}}` substitution in their command line and do not
+ * need this context.
+ *
+ * Returns null when there are no inputs to render — callers can forward
+ * that nullish value to `prependContext` via an `if (block)` check so
+ * empty-input tasks don't grow a noise block in their prompt.
+ */
+export function renderInputsBlock(
+  inputsDecl: readonly PortDef[] | undefined,
+  values: Readonly<Record<string, unknown>>,
+): PromptContextBlock | null {
+  if (!inputsDecl || inputsDecl.length === 0) return null;
+  const lines: string[] = [];
+  for (const port of inputsDecl) {
+    if (!(port.name in values)) continue;
+    const raw = values[port.name];
+    const rendered = renderInputValue(raw);
+    const descr = port.description?.trim();
+    lines.push(descr ? `${port.name}: ${rendered}  # ${descr}` : `${port.name}: ${rendered}`);
+  }
+  if (lines.length === 0) return null;
+  return { label: 'Inputs', content: lines.join('\n') };
+}
+function renderInputValue(value: unknown): string {
+  if (value === null || value === undefined) return '';
+  if (typeof value === 'string') return JSON.stringify(value);
+  if (typeof value === 'number' || typeof value === 'boolean') return String(value);
+  try {
+    return JSON.stringify(value);
+  } catch {
+    return String(value);
+  }
+}
+/**
+ * Build an `[Output Format]` context block from a task's declared output
+ * ports. The block instructs the model to emit a final-line JSON object
+ * matching the declared schema so `extractTaskOutputs` can pick it up
+ * without fragile heuristics. Returns null when the task declares no
+ * outputs.
+ *
+ * The instruction is deliberately short and explicit — a terse "emit
+ * this object as JSON on the final line" beats a long schema dump
+ * because shorter prompts compose better with downstream middlewares.
+ */
+export function renderOutputSchemaBlock(
+  outputsDecl: readonly PortDef[] | undefined,
+): PromptContextBlock | null {
+  if (!outputsDecl || outputsDecl.length === 0) return null;
+  const lines: string[] = [];
+  lines.push(
+    'After your response, emit a single JSON object on the FINAL line with these keys:',
+  );
+  for (const port of outputsDecl) {
+    const descr = port.description?.trim();
+    const enumHint =
+      port.type === 'enum' && port.enum?.length
+        ? ` (one of: ${port.enum.map((v) => JSON.stringify(v)).join(', ')})`
+        : '';
+    lines.push(
+      descr
+        ? `  - ${port.name} (${port.type}${enumHint}): ${descr}`
+        : `  - ${port.name} (${port.type}${enumHint})`,
+    );
+  }
+  const example = buildExampleObject(outputsDecl);
+  lines.push('');
+  lines.push(`Example final line: ${JSON.stringify(example)}`);
+  return { label: 'Output Format', content: lines.join('\n') };
+}
+function buildExampleObject(outputsDecl: readonly PortDef[]): Record<string, unknown> {
+  const example: Record<string, unknown> = {};
+  for (const port of outputsDecl) {
+    if (port.default !== undefined) {
+      example[port.name] = port.default;
+      continue;
+    }
+    switch (port.type) {
+      case 'string':
+        example[port.name] = '...';
+        break;
+      case 'number':
+        example[port.name] = 0;
+        break;
+      case 'boolean':
+        example[port.name] = false;
+        break;
+      case 'enum':
+        example[port.name] = port.enum?.[0] ?? '...';
+        break;
+      case 'json':
+      default:
+        example[port.name] = null;
+    }
+  }
+  return example;
+}

package/src/runner.test.ts ADDED Viewed

@@ -0,0 +1,142 @@
+import { test, expect } from 'bun:test';
+import { mkdtempSync, readFileSync, rmSync, statSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { runSpawn } from './runner';
+// Portable output producer — node is guaranteed in the bun dev env. Using a
+// known runtime avoids shell-quoting differences between platforms.
+function nodeArg(script: string): string[] {
+  return ['node', '-e', script];
+}
+test('runSpawn: small output is returned whole, persisted byte-identical', async () => {
+  const tmp = mkdtempSync(join(tmpdir(), 'tagma-runner-small-'));
+  const stdoutPath = join(tmp, 'out');
+  const stderrPath = join(tmp, 'err');
+  try {
+    const result = await runSpawn(
+      { args: nodeArg('process.stdout.write("hello world"); process.stderr.write("oops")') },
+      null,
+      { stdoutPath, stderrPath },
+    );
+    expect(result.exitCode).toBe(0);
+    expect(result.stdout).toBe('hello world');
+    expect(result.stderr).toBe('oops');
+    expect(result.stdoutBytes).toBe(11);
+    expect(result.stderrBytes).toBe(4);
+    expect(result.stdoutPath).toBe(stdoutPath);
+    expect(result.stderrPath).toBe(stderrPath);
+    expect(readFileSync(stdoutPath, 'utf8')).toBe('hello world');
+    expect(readFileSync(stderrPath, 'utf8')).toBe('oops');
+  } finally {
+    rmSync(tmp, { recursive: true, force: true });
+  }
+});
+test('runSpawn: oversized output — bounded tail in memory, full bytes on disk', async () => {
+  const tmp = mkdtempSync(join(tmpdir(), 'tagma-runner-big-'));
+  const stdoutPath = join(tmp, 'out');
+  try {
+    // Produce 3 MB of output against a 512 KB cap. The child writes in one
+    // shot; the runner should slice the single chunk's tail rather than
+    // evicting (the "pathological one-chunk-over-cap" branch).
+    const cap = 512 * 1024;
+    const totalBytes = 3 * 1024 * 1024;
+    const result = await runSpawn(
+      {
+        args: nodeArg(
+          `process.stdout.write("a".repeat(${totalBytes}))`,
+        ),
+      },
+      null,
+      { stdoutPath, maxStdoutTailBytes: cap },
+    );
+    expect(result.exitCode).toBe(0);
+    // Total bytes reported match reality
+    expect(result.stdoutBytes).toBe(totalBytes);
+    // In-memory tail bounded (tail + truncation marker header is a couple
+    // hundred bytes at most; give it slack)
+    expect(result.stdout.length).toBeLessThan(cap + 1024);
+    expect(result.stdout.length).toBeGreaterThan(cap - 1024);
+    // Truncation breadcrumb present and points at the full output
+    expect(result.stdout).toContain('truncated from head');
+    expect(result.stdout).toContain(stdoutPath);
+    // The tail ends with the trailing bytes the child wrote ('a')
+    expect(result.stdout.endsWith('a')).toBe(true);
+    // Disk copy is byte-exact and full-length
+    const onDiskBytes = statSync(stdoutPath).size;
+    expect(onDiskBytes).toBe(totalBytes);
+  } finally {
+    rmSync(tmp, { recursive: true, force: true });
+  }
+});
+test('runSpawn: chunked output — tail eviction keeps retained <= cap', async () => {
+  const tmp = mkdtempSync(join(tmpdir(), 'tagma-runner-chunked-'));
+  const stdoutPath = join(tmp, 'out');
+  try {
+    // Emit 8 chunks × 64 KB with sync drains between them, so the runner
+    // receives them as distinct chunks rather than one blob. Cap at 128 KB
+    // forces eviction of older chunks.
+    const cap = 128 * 1024;
+    const chunkSize = 64 * 1024;
+    const nChunks = 8;
+    const script = `
+      const chunk = 'b'.repeat(${chunkSize});
+      (async () => {
+        for (let i = 0; i < ${nChunks}; i++) {
+          process.stdout.write(chunk);
+          await new Promise(r => setImmediate(r));
+        }
+      })();
+    `;
+    const result = await runSpawn(
+      { args: nodeArg(script) },
+      null,
+      { stdoutPath, maxStdoutTailBytes: cap },
+    );
+    expect(result.exitCode).toBe(0);
+    expect(result.stdoutBytes).toBe(nChunks * chunkSize);
+    // Retained tail should be strictly bounded by cap (eviction case, no
+    // single-chunk slice). Allow small overhead for the truncation marker.
+    expect(result.stdout.length).toBeLessThan(cap + 1024);
+    expect(result.stdout).toContain('truncated from head');
+    // Full stream on disk
+    expect(statSync(stdoutPath).size).toBe(nChunks * chunkSize);
+  } finally {
+    rmSync(tmp, { recursive: true, force: true });
+  }
+});
+test('runSpawn: no path configured — memory-only tail, returns null paths', async () => {
+  const result = await runSpawn(
+    { args: nodeArg('process.stdout.write("inline only")') },
+    null,
+    {},
+  );
+  expect(result.exitCode).toBe(0);
+  expect(result.stdout).toBe('inline only');
+  expect(result.stdoutPath).toBeNull();
+  expect(result.stderrPath).toBeNull();
+});
+test('runSpawn: pre-spawn failure (bad executable) — no paths leak on disk', async () => {
+  const tmp = mkdtempSync(join(tmpdir(), 'tagma-runner-bad-'));
+  const stdoutPath = join(tmp, 'out');
+  try {
+    const result = await runSpawn(
+      { args: ['this-command-definitely-does-not-exist-xyz123'] },
+      null,
+      { stdoutPath },
+    );
+    expect(result.exitCode).toBe(-1);
+    expect(result.failureKind).toBe('spawn_error');
+    // On pre-spawn failure the runner never opened the file, so stdoutPath
+    // is null (not the unopened path). Callers can rely on this to decide
+    // whether a disk file exists to read.
+    expect(result.stdoutPath).toBeNull();
+  } finally {
+    rmSync(tmp, { recursive: true, force: true });
+  }
+});

package/src/runner.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import { existsSync, readFileSync, statSync } from 'node:fs';
+import { mkdir, open, type FileHandle } from 'node:fs/promises';
 import { dirname, isAbsolute, join, resolve as pathResolve } from 'node:path';
 import type { SpawnSpec, DriverPlugin, TaskResult } from './types';
 import { shellArgs } from './utils';
@@ -6,6 +7,17 @@ import { shellArgs } from './utils';
 // Delay before escalating SIGTERM to SIGKILL when killing a timed-out process.
 const SIGKILL_DELAY_MS = 3_000;
+/**
+ * Default cap for the in-memory tail retained for each stream. Picked so that
+ * a task producing runaway output (AI agent bug, adversarial input) cannot
+ * balloon the sidecar's RSS, while still being large enough that typical AI
+ * responses (which top out around low-MB of text) are returned whole. Callers
+ * that need different limits supply `RunOptions.maxStdoutTailBytes` /
+ * `.maxStderrTailBytes`.
+ */
+const DEFAULT_STDOUT_TAIL_BYTES = 8 * 1024 * 1024; // 8 MB
+const DEFAULT_STDERR_TAIL_BYTES = 4 * 1024 * 1024; // 4 MB
 /**
  * On Windows, proc.kill('SIGTERM') / proc.kill('SIGKILL') only terminate the
  * direct child process. When the child is a .cmd/.bat wrapper (e.g. claude.cmd),
@@ -36,6 +48,152 @@ function killProcessTree(pid: number): void {
 export interface RunOptions {
   readonly timeoutMs?: number;
   readonly signal?: AbortSignal; // pipeline-level abort
+  /**
+   * If set, stream the child's stdout to this file path as it arrives. The
+   * returned `TaskResult.stdout` is still a bounded in-memory tail
+   * (`maxStdoutTailBytes`) — callers that need the full output should read
+   * from the returned `stdoutPath`. Parent directories are created as needed.
+   */
+  readonly stdoutPath?: string;
+  /** Symmetric to `stdoutPath` for stderr. */
+  readonly stderrPath?: string;
+  /**
+   * Cap on bytes retained in memory for the returned `TaskResult.stdout`
+   * string. Defaults to `DEFAULT_STDOUT_TAIL_BYTES`. Bytes beyond this cap
+   * from the HEAD of the stream are dropped from the in-memory string; the
+   * on-disk file (if `stdoutPath` is set) is still the full output.
+   */
+  readonly maxStdoutTailBytes?: number;
+  readonly maxStderrTailBytes?: number;
+}
+/**
+ * Read a stream to completion, persisting every chunk to `filePath` (when
+ * provided) while keeping only the last `maxTailBytes` bytes in memory.
+ *
+ * Why the split: large child outputs (multi-MB AI responses, verbose debug
+ * dumps) used to accumulate entirely in memory via `new Response(s).text()`,
+ * which let a runaway task balloon the sidecar's RSS. Streaming to disk +
+ * bounded tail gives callers: (a) unbounded data fidelity on disk, (b) fixed
+ * memory footprint, (c) the tail — which is almost always what callers
+ * actually consume (final AI answer, error summary, last N lines).
+ *
+ * Backpressure: we `await fh.write(chunk)` per chunk, so if disk is slow we
+ * naturally slow the reader — but we do NOT stop reading the pipe, so the
+ * child never blocks on a full stdout pipe. Disk errors don't abort the
+ * stream; we close the handle, null it, and keep consuming into the tail
+ * buffer only (with a breadcrumb in the returned text).
+ *
+ * Tail eviction: drops whole chunks from the front until total retained is
+ * at or below the cap. If a single chunk alone exceeds the cap (rare — would
+ * require a >cap-bytes chunkless burst from the child), we slice its tail.
+ * UTF-8 boundaries at the slice point may emit replacement characters when
+ * decoded — acceptable (the trailing/leading codepoint is a cosmetic loss).
+ */
+async function collectStream(
+  stream: ReadableStream<Uint8Array> | undefined,
+  filePath: string | undefined,
+  maxTailBytes: number,
+): Promise<{ text: string; totalBytes: number; path: string | null }> {
+  if (!stream) return { text: '', totalBytes: 0, path: null };
+  let fh: FileHandle | null = null;
+  let diskWriteFailed = false;
+  if (filePath) {
+    try {
+      await mkdir(dirname(filePath), { recursive: true });
+      fh = await open(filePath, 'w');
+    } catch (err) {
+      console.error(
+        `[runner] failed to open ${filePath} for output streaming: ${err instanceof Error ? err.message : String(err)}`,
+      );
+      diskWriteFailed = true;
+    }
+  }
+  const chunks: Uint8Array[] = [];
+  let tailBytes = 0;
+  let totalBytes = 0;
+  const reader = stream.getReader();
+  try {
+    for (;;) {
+      const { done, value } = await reader.read();
+      if (done) break;
+      totalBytes += value.length;
+      // Disk: persist every byte. Failure here degrades to tail-only mode
+      // without interrupting the stream (child must not block on pipe fill).
+      if (fh) {
+        try {
+          await fh.write(value);
+        } catch (err) {
+          console.error(
+            `[runner] disk write failed for ${filePath}: ${err instanceof Error ? err.message : String(err)}`,
+          );
+          try {
+            await fh.close();
+          } catch {
+            /* ignore */
+          }
+          fh = null;
+          diskWriteFailed = true;
+        }
+      }
+      // Tail: append then evict whole chunks from the head while the total
+      // retained exceeds the cap. Keep at least one chunk so short outputs
+      // aren't lost entirely. Post-condition: tailBytes <= maxTailBytes OR
+      // only one chunk remains (handled by the next block).
+      chunks.push(value);
+      tailBytes += value.length;
+      while (chunks.length > 1 && tailBytes > maxTailBytes) {
+        tailBytes -= chunks.shift()!.length;
+      }
+      // Pathological: a single chunk larger than the cap. Slice its tail.
+      if (chunks.length === 1 && chunks[0]!.length > maxTailBytes) {
+        const only = chunks[0]!;
+        chunks[0] = only.slice(only.length - maxTailBytes);
+        tailBytes = chunks[0]!.length;
+      }
+    }
+  } finally {
+    reader.releaseLock();
+    if (fh) {
+      try {
+        await fh.close();
+      } catch {
+        /* ignore */
+      }
+    }
+  }
+  // Decode retained chunks. `stream: true` lets the decoder buffer partial
+  // code points across chunks, handling all boundaries except the very first
+  // chunk (which may itself start mid-codepoint after eviction) — that
+  // boundary gets a U+FFFD replacement, which is preferable to throwing.
+  const decoder = new TextDecoder();
+  let text = '';
+  for (const c of chunks) text += decoder.decode(c, { stream: true });
+  text += decoder.decode();
+  if (totalBytes > tailBytes) {
+    const dropped = totalBytes - tailBytes;
+    const pathHint = filePath
+      ? diskWriteFailed
+        ? `${filePath} (partial — disk write failed mid-stream)`
+        : filePath
+      : 'not persisted (no path configured)';
+    text = `[…${dropped} bytes truncated from head — full output at: ${pathHint}]\n${text}`;
+  }
+  return {
+    text,
+    totalBytes,
+    // Return the path even on partial-write failure so operators can still
+    // inspect the head bytes we managed to persist.
+    path: filePath ?? null,
+  };
 }
 /**
@@ -170,13 +328,20 @@ function resolveWindowsExe(args: readonly string[], envPath: string): readonly s
  * H2: Build a "failed before spawn" result. Tagged as 'spawn_error' so the
  * engine can show a useful classification ("driver tried to launch X but
  * the binary wasn't found") rather than the misleading "timeout".
+ *
+ * Pre-spawn failures never opened the output files, so stdoutPath /
+ * stderrPath are null regardless of what the caller passed in opts — there
+ * is nothing on disk to point at.
  */
 function failResult(stderr: string, durationMs: number): TaskResult {
   return {
     exitCode: -1,
     stdout: '',
     stderr,
+    stdoutPath: null,
     stderrPath: null,
+    stdoutBytes: 0,
+    stderrBytes: stderr.length,
     durationMs,
     sessionId: null,
     normalizedOutput: null,
@@ -326,15 +491,28 @@ export async function runSpawn(
     }
   }
-  // ── 4. Collect output & wait (parallel to avoid pipe-buffer deadlock) ─
+  // ── 4. Collect output & wait ──────────────────────────────────────────
+  // Both streams are drained concurrently with `proc.exited` to avoid the
+  // classic pipe-buffer deadlock (child blocks on a full stdout pipe, parent
+  // is blocked waiting on exit which the child can't reach). Each stream is
+  // persisted to disk via `collectStream` as it arrives so we never hold the
+  // full output in memory — only the bounded tail.
   const stdoutStream = typeof proc.stdout === 'object' ? proc.stdout : undefined;
   const stderrStream = typeof proc.stderr === 'object' ? proc.stderr : undefined;
+  const stdoutCap = opts.maxStdoutTailBytes ?? DEFAULT_STDOUT_TAIL_BYTES;
+  const stderrCap = opts.maxStderrTailBytes ?? DEFAULT_STDERR_TAIL_BYTES;
-  const [exitCode, stdout, stderr] = await Promise.all([
+  const [exitCode, stdoutResult, stderrResult] = await Promise.all([
     proc.exited,
-    stdoutStream ? new Response(stdoutStream).text() : Promise.resolve(''),
-    stderrStream ? new Response(stderrStream).text() : Promise.resolve(''),
+    collectStream(stdoutStream, opts.stdoutPath, stdoutCap),
+    collectStream(stderrStream, opts.stderrPath, stderrCap),
   ]);
+  const stdout = stdoutResult.text;
+  const stderr = stderrResult.text;
+  const stdoutPath = stdoutResult.path;
+  const stderrPath = stderrResult.path;
+  const stdoutBytes = stdoutResult.totalBytes;
+  const stderrBytes = stderrResult.totalBytes;
   // ── 5. Cleanup timers & listeners ──────────────────────────────────────
   if (timer) clearTimeout(timer);
@@ -354,7 +532,10 @@ export async function runSpawn(
       exitCode: -1,
       stdout,
       stderr,
-      stderrPath: null,
+      stdoutPath,
+      stderrPath,
+      stdoutBytes,
+      stderrBytes,
       durationMs,
       sessionId: null,
       normalizedOutput: null,
@@ -404,7 +585,10 @@ export async function runSpawn(
         exitCode,
         stdout,
         stderr: stderr + note,
-        stderrPath: null,
+        stdoutPath,
+        stderrPath,
+        stdoutBytes,
+        stderrBytes,
         durationMs,
         sessionId: null,
         normalizedOutput: null,
@@ -426,7 +610,10 @@ export async function runSpawn(
       exitCode: exitCode === 0 ? 1 : exitCode,
       stdout,
       stderr: stderr + (stderr.endsWith('\n') ? '' : '\n') + `[driver] ${forcedFailureMessage}`,
-      stderrPath: null,
+      stdoutPath,
+      stderrPath,
+      stdoutBytes,
+      stderrBytes,
       durationMs,
       sessionId,
       normalizedOutput,
@@ -437,7 +624,10 @@ export async function runSpawn(
     exitCode,
     stdout,
     stderr,
-    stderrPath: null,
+    stdoutPath,
+    stderrPath,
+    stdoutBytes,
+    stderrBytes,
     durationMs,
     sessionId,
     normalizedOutput,