npm - @vellumai/assistant - Versions diffs - 0.5.2 → 0.5.3 - Mend

@vellumai/assistant 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

package/ARCHITECTURE.md +109 -0
package/docs/skills.md +100 -0
package/package.json +1 -1
package/src/__tests__/conversation-agent-loop-overflow.test.ts +7 -0
package/src/__tests__/conversation-agent-loop.test.ts +7 -0
package/src/__tests__/conversation-memory-dirty-tail.test.ts +150 -0
package/src/__tests__/conversation-provider-retry-repair.test.ts +7 -0
package/src/__tests__/conversation-wipe.test.ts +226 -0
package/src/__tests__/db-memory-archive-migration.test.ts +372 -0
package/src/__tests__/db-memory-brief-state-migration.test.ts +213 -0
package/src/__tests__/db-memory-reducer-checkpoints.test.ts +273 -0
package/src/__tests__/inline-command-runner.test.ts +311 -0
package/src/__tests__/inline-skill-authoring-guard.test.ts +220 -0
package/src/__tests__/inline-skill-load-permissions.test.ts +435 -0
package/src/__tests__/list-messages-attachments.test.ts +96 -0
package/src/__tests__/memory-brief-open-loops.test.ts +530 -0
package/src/__tests__/memory-brief-time.test.ts +285 -0
package/src/__tests__/memory-brief-wrapper.test.ts +311 -0
package/src/__tests__/memory-chunk-archive.test.ts +400 -0
package/src/__tests__/memory-chunk-dual-write.test.ts +453 -0
package/src/__tests__/memory-episode-archive.test.ts +370 -0
package/src/__tests__/memory-episode-dual-write.test.ts +626 -0
package/src/__tests__/memory-observation-archive.test.ts +375 -0
package/src/__tests__/memory-observation-dual-write.test.ts +318 -0
package/src/__tests__/memory-recall-quality.test.ts +2 -2
package/src/__tests__/memory-reducer-store.test.ts +728 -0
package/src/__tests__/memory-reducer-types.test.ts +699 -0
package/src/__tests__/memory-reducer.test.ts +698 -0
package/src/__tests__/memory-regressions.test.ts +6 -4
package/src/__tests__/memory-simplified-config.test.ts +281 -0
package/src/__tests__/parse-identity-fields.test.ts +129 -0
package/src/__tests__/skill-load-inline-command.test.ts +598 -0
package/src/__tests__/skill-load-inline-includes.test.ts +644 -0
package/src/__tests__/skills-inline-command-expansions.test.ts +301 -0
package/src/__tests__/skills-transitive-hash.test.ts +333 -0
package/src/__tests__/vellum-self-knowledge-inline-command.test.ts +320 -0
package/src/__tests__/workspace-migration-backfill-installation-id.test.ts +4 -4
package/src/config/bundled-skills/app-builder/SKILL.md +8 -8
package/src/config/bundled-skills/skill-management/SKILL.md +1 -1
package/src/config/bundled-skills/skill-management/TOOLS.json +2 -2
package/src/config/feature-flag-registry.json +16 -0
package/src/config/loader.ts +1 -0
package/src/config/raw-config-utils.ts +28 -0
package/src/config/schema.ts +12 -0
package/src/config/schemas/memory-simplified.ts +101 -0
package/src/config/schemas/memory.ts +4 -0
package/src/config/skills.ts +50 -4
package/src/daemon/conversation-agent-loop-handlers.ts +8 -3
package/src/daemon/conversation-agent-loop.ts +71 -1
package/src/daemon/conversation-lifecycle.ts +11 -1
package/src/daemon/conversation-runtime-assembly.ts +2 -1
package/src/daemon/conversation-surfaces.ts +31 -8
package/src/daemon/conversation.ts +40 -23
package/src/daemon/handlers/config-embeddings.ts +10 -2
package/src/daemon/handlers/config-model.ts +0 -9
package/src/daemon/handlers/identity.ts +12 -1
package/src/daemon/lifecycle.ts +9 -1
package/src/daemon/message-types/conversations.ts +0 -1
package/src/daemon/server.ts +1 -1
package/src/followups/followup-store.ts +47 -1
package/src/memory/archive-store.ts +400 -0
package/src/memory/brief-formatting.ts +33 -0
package/src/memory/brief-open-loops.ts +266 -0
package/src/memory/brief-time.ts +161 -0
package/src/memory/brief.ts +75 -0
package/src/memory/conversation-crud.ts +245 -101
package/src/memory/db-init.ts +12 -0
package/src/memory/indexer.ts +106 -15
package/src/memory/job-handlers/embedding.test.ts +1 -0
package/src/memory/job-handlers/embedding.ts +83 -0
package/src/memory/job-utils.ts +1 -1
package/src/memory/jobs-store.ts +6 -0
package/src/memory/jobs-worker.ts +12 -0
package/src/memory/migrations/185-memory-brief-state.ts +52 -0
package/src/memory/migrations/186-memory-archive.ts +109 -0
package/src/memory/migrations/187-memory-reducer-checkpoints.ts +19 -0
package/src/memory/migrations/index.ts +3 -0
package/src/memory/qdrant-client.ts +23 -4
package/src/memory/reducer-store.ts +271 -0
package/src/memory/reducer-types.ts +99 -0
package/src/memory/reducer.ts +453 -0
package/src/memory/schema/conversations.ts +3 -0
package/src/memory/schema/index.ts +2 -0
package/src/memory/schema/memory-archive.ts +121 -0
package/src/memory/schema/memory-brief.ts +55 -0
package/src/memory/search/semantic.ts +17 -4
package/src/oauth/oauth-store.ts +3 -1
package/src/permissions/checker.ts +89 -6
package/src/permissions/defaults.ts +14 -0
package/src/runtime/routes/conversation-management-routes.ts +6 -0
package/src/runtime/routes/conversation-query-routes.ts +7 -0
package/src/runtime/routes/conversation-routes.ts +52 -5
package/src/runtime/routes/identity-routes.ts +2 -35
package/src/runtime/routes/llm-context-normalization.ts +14 -1
package/src/runtime/routes/memory-item-routes.ts +90 -5
package/src/runtime/routes/secret-routes.ts +2 -0
package/src/runtime/routes/surface-action-routes.ts +68 -1
package/src/schedule/schedule-store.ts +21 -0
package/src/skills/inline-command-expansions.ts +204 -0
package/src/skills/inline-command-render.ts +127 -0
package/src/skills/inline-command-runner.ts +242 -0
package/src/skills/transitive-version-hash.ts +88 -0
package/src/tasks/task-store.ts +43 -1
package/src/tools/permission-checker.ts +8 -1
package/src/tools/skills/load.ts +140 -6
package/src/util/platform.ts +18 -0
package/src/workspace/migrations/{002-backfill-installation-id.ts → 011-backfill-installation-id.ts} +1 -1
package/src/workspace/migrations/registry.ts +1 -1

package/src/__tests__/db-memory-reducer-checkpoints.test.ts ADDED Viewed

@@ -0,0 +1,273 @@
+import { mkdtempSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { Database } from "bun:sqlite";
+import {
+  afterAll,
+  afterEach,
+  beforeEach,
+  describe,
+  expect,
+  mock,
+  test,
+} from "bun:test";
+import { drizzle } from "drizzle-orm/bun-sqlite";
+const testDir = mkdtempSync(join(tmpdir(), "memory-reducer-checkpoints-"));
+const dbPath = join(testDir, "test.db");
+const originalBunTest = process.env.BUN_TEST;
+mock.module("../util/platform.js", () => ({
+  getDataDir: () => testDir,
+  isMacOS: () => process.platform === "darwin",
+  isLinux: () => process.platform === "linux",
+  isWindows: () => process.platform === "win32",
+  getPidPath: () => join(testDir, "test.pid"),
+  getDbPath: () => dbPath,
+  getLogPath: () => join(testDir, "test.log"),
+  ensureDataDir: () => {},
+  getConversationsDir: () => join(testDir, "conversations"),
+}));
+mock.module("../util/logger.js", () => ({
+  getLogger: () =>
+    new Proxy({} as Record<string, unknown>, {
+      get: () => () => {},
+    }),
+}));
+import { initializeDb, resetDb } from "../memory/db.js";
+import { getSqliteFrom } from "../memory/db-connection.js";
+import { migrateMemoryReducerCheckpoints } from "../memory/migrations/187-memory-reducer-checkpoints.js";
+import * as schema from "../memory/schema.js";
+function createTestDb() {
+  const sqlite = new Database(":memory:");
+  sqlite.exec("PRAGMA journal_mode=WAL");
+  sqlite.exec("PRAGMA foreign_keys = ON");
+  return drizzle(sqlite, { schema });
+}
+function getColumnInfo(
+  raw: Database,
+): Array<{ name: string; notnull: number }> {
+  return raw.query(`PRAGMA table_info(conversations)`).all() as Array<{
+    name: string;
+    notnull: number;
+  }>;
+}
+function bootstrapPreCheckpointConversations(raw: Database): void {
+  raw.exec(/*sql*/ `
+    CREATE TABLE conversations (
+      id TEXT PRIMARY KEY,
+      title TEXT,
+      created_at INTEGER NOT NULL,
+      updated_at INTEGER NOT NULL,
+      total_input_tokens INTEGER NOT NULL DEFAULT 0,
+      total_output_tokens INTEGER NOT NULL DEFAULT 0,
+      total_estimated_cost REAL NOT NULL DEFAULT 0,
+      context_summary TEXT,
+      context_compacted_message_count INTEGER NOT NULL DEFAULT 0,
+      context_compacted_at INTEGER,
+      conversation_type TEXT NOT NULL DEFAULT 'standard',
+      source TEXT NOT NULL DEFAULT 'user',
+      memory_scope_id TEXT NOT NULL DEFAULT 'default',
+      origin_channel TEXT,
+      origin_interface TEXT,
+      fork_parent_conversation_id TEXT,
+      fork_parent_message_id TEXT,
+      is_auto_title INTEGER NOT NULL DEFAULT 1,
+      schedule_job_id TEXT
+    )
+  `);
+}
+function removeTestDbFiles(): void {
+  rmSync(dbPath, { force: true });
+  rmSync(`${dbPath}-shm`, { force: true });
+  rmSync(`${dbPath}-wal`, { force: true });
+}
+describe("memory reducer checkpoint columns migration", () => {
+  beforeEach(() => {
+    process.env.BUN_TEST = "0";
+    resetDb();
+    removeTestDbFiles();
+  });
+  afterEach(() => {
+    resetDb();
+    removeTestDbFiles();
+  });
+  afterAll(() => {
+    if (originalBunTest === undefined) {
+      delete process.env.BUN_TEST;
+    } else {
+      process.env.BUN_TEST = originalBunTest;
+    }
+    resetDb();
+    removeTestDbFiles();
+    try {
+      rmSync(testDir, { recursive: true });
+    } catch {
+      /* best effort */
+    }
+  });
+  test("fresh DB initialization includes nullable reducer checkpoint columns", () => {
+    initializeDb();
+    const raw = new Database(dbPath);
+    const columns = getColumnInfo(raw);
+    const checkpointColumns = columns.filter(
+      (c) =>
+        c.name === "memory_reduced_through_message_id" ||
+        c.name === "memory_dirty_tail_since_message_id" ||
+        c.name === "memory_last_reduced_at",
+    );
+    expect(checkpointColumns).toHaveLength(3);
+    expect(checkpointColumns.every((c) => c.notnull === 0)).toBe(true);
+    raw.close();
+  });
+  test("migration upgrades the pre-checkpoint schema without disturbing existing rows", () => {
+    const db = createTestDb();
+    const raw = getSqliteFrom(db);
+    const now = Date.now();
+    bootstrapPreCheckpointConversations(raw);
+    raw.exec(/*sql*/ `
+      INSERT INTO conversations (
+        id,
+        title,
+        created_at,
+        updated_at,
+        conversation_type,
+        source,
+        memory_scope_id,
+        is_auto_title
+      ) VALUES (
+        'conv-upgrade',
+        'Existing conversation',
+        ${now},
+        ${now},
+        'standard',
+        'user',
+        'default',
+        1
+      )
+    `);
+    migrateMemoryReducerCheckpoints(db);
+    const columnNames = getColumnInfo(raw).map((c) => c.name);
+    expect(columnNames).toContain("memory_reduced_through_message_id");
+    expect(columnNames).toContain("memory_dirty_tail_since_message_id");
+    expect(columnNames).toContain("memory_last_reduced_at");
+    const row = raw
+      .query(
+        `SELECT id, title, memory_reduced_through_message_id, memory_dirty_tail_since_message_id, memory_last_reduced_at
+         FROM conversations WHERE id = 'conv-upgrade'`,
+      )
+      .get() as {
+      id: string;
+      title: string | null;
+      memory_reduced_through_message_id: string | null;
+      memory_dirty_tail_since_message_id: string | null;
+      memory_last_reduced_at: number | null;
+    } | null;
+    expect(row).toEqual({
+      id: "conv-upgrade",
+      title: "Existing conversation",
+      memory_reduced_through_message_id: null,
+      memory_dirty_tail_since_message_id: null,
+      memory_last_reduced_at: null,
+    });
+    raw.close();
+  });
+  test("re-running the migration preserves populated checkpoint values", () => {
+    const db = createTestDb();
+    const raw = getSqliteFrom(db);
+    const now = Date.now();
+    bootstrapPreCheckpointConversations(raw);
+    raw.exec(/*sql*/ `
+      INSERT INTO conversations (
+        id,
+        title,
+        created_at,
+        updated_at,
+        conversation_type,
+        source,
+        memory_scope_id,
+        is_auto_title
+      ) VALUES (
+        'conv-rerun',
+        'Reduced conversation',
+        ${now},
+        ${now},
+        'standard',
+        'user',
+        'default',
+        1
+      )
+    `);
+    migrateMemoryReducerCheckpoints(db);
+    raw.exec(/*sql*/ `
+      UPDATE conversations
+      SET memory_reduced_through_message_id = 'msg-100',
+          memory_dirty_tail_since_message_id = 'msg-101',
+          memory_last_reduced_at = ${now}
+      WHERE id = 'conv-rerun'
+    `);
+    expect(() => migrateMemoryReducerCheckpoints(db)).not.toThrow();
+    const row = raw
+      .query(
+        `SELECT memory_reduced_through_message_id, memory_dirty_tail_since_message_id, memory_last_reduced_at
+         FROM conversations WHERE id = 'conv-rerun'`,
+      )
+      .get() as {
+      memory_reduced_through_message_id: string | null;
+      memory_dirty_tail_since_message_id: string | null;
+      memory_last_reduced_at: number | null;
+    } | null;
+    expect(row).toEqual({
+      memory_reduced_through_message_id: "msg-100",
+      memory_dirty_tail_since_message_id: "msg-101",
+      memory_last_reduced_at: now,
+    });
+    raw.close();
+  });
+  test("getConversation exposes the new checkpoint fields as null for new rows", async () => {
+    initializeDb();
+    // Dynamic import to avoid circular module init issues — conversation-crud
+    // depends on getDb being initialized which happens in initializeDb above.
+    const { createConversation, getConversation } =
+      await import("../memory/conversation-crud.js");
+    const created = createConversation("Test conversation");
+    const loaded = getConversation(created.id);
+    expect(loaded).not.toBeNull();
+    expect(loaded!.memoryReducedThroughMessageId).toBeNull();
+    expect(loaded!.memoryDirtyTailSinceMessageId).toBeNull();
+    expect(loaded!.memoryLastReducedAt).toBeNull();
+  });
+});

package/src/__tests__/inline-command-runner.test.ts ADDED Viewed

@@ -0,0 +1,311 @@
+import { describe, expect, mock, test } from "bun:test";
+import type { InlineCommandResult } from "../skills/inline-command-runner.js";
+// ---------------------------------------------------------------------------
+// Mocks — must be declared before the module under test is imported
+// ---------------------------------------------------------------------------
+const mockConfig = {
+  provider: "anthropic",
+  model: "test",
+  maxTokens: 4096,
+  dataDir: "/tmp",
+  sandbox: { enabled: true },
+  timeouts: {
+    shellDefaultTimeoutSec: 120,
+    shellMaxTimeoutSec: 600,
+    permissionTimeoutSec: 300,
+  },
+  rateLimit: { maxRequestsPerMinute: 0 },
+  secretDetection: {
+    enabled: true,
+    action: "warn" as const,
+    entropyThreshold: 4.0,
+  },
+  auditLog: { retentionDays: 0 },
+};
+mock.module("../config/loader.js", () => ({
+  getConfig: () => mockConfig,
+  loadConfig: () => mockConfig,
+  invalidateConfigCache: () => {},
+  saveConfig: () => {},
+  loadRawConfig: () => ({}),
+  saveRawConfig: () => {},
+  getNestedValue: () => undefined,
+  setNestedValue: () => {},
+}));
+mock.module("../util/logger.js", () => ({
+  getLogger: () =>
+    new Proxy({} as Record<string, unknown>, {
+      get: () => () => {},
+    }),
+}));
+// Track wrapCommand calls to verify sandbox-only execution
+let lastWrapCall: {
+  command: string;
+  workingDir: string;
+  config: { enabled: boolean };
+  options?: { networkMode?: string };
+} | null = null;
+mock.module("../tools/terminal/sandbox.js", () => ({
+  wrapCommand: (
+    command: string,
+    workingDir: string,
+    config: { enabled: boolean },
+    options?: { networkMode?: string },
+  ) => {
+    lastWrapCall = { command, workingDir, config, options };
+    return {
+      command: "bash",
+      args: ["-c", "--", command],
+      sandboxed: false,
+    };
+  },
+}));
+mock.module("../tools/terminal/safe-env.js", () => ({
+  buildSanitizedEnv: () => ({
+    PATH: process.env.PATH ?? "/usr/bin:/bin",
+    HOME: process.env.HOME ?? "/tmp",
+  }),
+}));
+import { runInlineCommand } from "../skills/inline-command-runner.js";
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+const CWD = process.cwd();
+function expectOk(result: InlineCommandResult): void {
+  expect(result.ok).toBe(true);
+  expect(result.failureReason).toBeUndefined();
+}
+function expectFailure(
+  result: InlineCommandResult,
+  reason: InlineCommandResult["failureReason"],
+): void {
+  expect(result.ok).toBe(false);
+  expect(result.failureReason).toBe(reason);
+}
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+describe("runInlineCommand", () => {
+  // ── Sandbox enforcement ──────────────────────────────────────────────────
+  describe("sandbox enforcement", () => {
+    test("always passes sandbox config with enabled=true", async () => {
+      lastWrapCall = null;
+      await runInlineCommand("echo sandbox-check", CWD);
+      expect(lastWrapCall).not.toBeNull();
+      expect(lastWrapCall!.config.enabled).toBe(true);
+    });
+    test("always passes networkMode=off", async () => {
+      lastWrapCall = null;
+      await runInlineCommand("echo network-check", CWD);
+      expect(lastWrapCall).not.toBeNull();
+      expect(lastWrapCall!.options?.networkMode).toBe("off");
+    });
+    test("uses the provided workingDir as cwd", async () => {
+      lastWrapCall = null;
+      const customDir = "/tmp/my-project";
+      await runInlineCommand("echo cwd-check", customDir);
+      expect(lastWrapCall).not.toBeNull();
+      expect(lastWrapCall!.workingDir).toBe(customDir);
+    });
+    test("passes the literal command string to the sandbox", async () => {
+      lastWrapCall = null;
+      await runInlineCommand("git log --oneline -n 5", CWD);
+      expect(lastWrapCall).not.toBeNull();
+      expect(lastWrapCall!.command).toBe("git log --oneline -n 5");
+    });
+  });
+  // ── Successful execution ─────────────────────────────────────────────────
+  describe("successful execution", () => {
+    test("captures stdout from a simple echo", async () => {
+      const result = await runInlineCommand("echo hello-world", CWD);
+      expectOk(result);
+      expect(result.output).toBe("hello-world");
+    });
+    test("captures multi-line stdout", async () => {
+      const result = await runInlineCommand(
+        "printf 'line1\\nline2\\nline3'",
+        CWD,
+      );
+      expectOk(result);
+      expect(result.output).toContain("line1");
+      expect(result.output).toContain("line2");
+      expect(result.output).toContain("line3");
+    });
+    test("returns empty string for command with no output", async () => {
+      const result = await runInlineCommand("true", CWD);
+      expectOk(result);
+      expect(result.output).toBe("");
+    });
+  });
+  // ── ANSI stripping ──────────────────────────────────────────────────────
+  describe("ANSI stripping", () => {
+    test("strips SGR color codes from output", async () => {
+      const result = await runInlineCommand(
+        "printf '\\033[31mred\\033[0m normal'",
+        CWD,
+      );
+      expectOk(result);
+      expect(result.output).toBe("red normal");
+      expect(result.output).not.toContain("\x1b");
+    });
+    test("strips cursor movement sequences", async () => {
+      const result = await runInlineCommand("printf '\\033[2Ahello'", CWD);
+      expectOk(result);
+      expect(result.output).toBe("hello");
+    });
+  });
+  // ── Binary output rejection ──────────────────────────────────────────────
+  describe("binary output rejection", () => {
+    test("rejects binary-ish output", async () => {
+      // Generate output with >10% control characters
+      const result = await runInlineCommand(
+        "printf '\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07abc'",
+        CWD,
+      );
+      expectFailure(result, "binary_output");
+      expect(result.output).toBe("Inline command produced binary output.");
+    });
+  });
+  // ── Output clamping ──────────────────────────────────────────────────────
+  describe("output clamping", () => {
+    test("truncates output exceeding the cap", async () => {
+      // Generate output larger than a small cap
+      const result = await runInlineCommand("printf '%0.s-' {1..200}", CWD, {
+        maxOutputChars: 50,
+      });
+      expectOk(result);
+      expect(result.output.length).toBeLessThanOrEqual(
+        50 + "\n[output truncated]".length,
+      );
+      expect(result.output).toContain("[output truncated]");
+    });
+    test("does not truncate output under the cap", async () => {
+      const result = await runInlineCommand("echo short", CWD, {
+        maxOutputChars: 1000,
+      });
+      expectOk(result);
+      expect(result.output).toBe("short");
+      expect(result.output).not.toContain("[output truncated]");
+    });
+  });
+  // ── Timeout handling ─────────────────────────────────────────────────────
+  describe("timeout handling", () => {
+    test("produces deterministic timeout result", async () => {
+      const result = await runInlineCommand("sleep 60", CWD, {
+        timeoutMs: 200,
+      });
+      expectFailure(result, "timeout");
+      expect(result.output).toBe("Inline command timed out after 200ms.");
+    });
+  });
+  // ── Non-zero exit ────────────────────────────────────────────────────────
+  describe("non-zero exit", () => {
+    test("produces deterministic failure for exit code 1", async () => {
+      const result = await runInlineCommand("exit 1", CWD);
+      expectFailure(result, "non_zero_exit");
+      expect(result.output).toBe("Inline command failed (exit code 1).");
+    });
+    test("produces deterministic failure for exit code 127", async () => {
+      const result = await runInlineCommand(
+        "nonexistent_command_that_does_not_exist_xyz",
+        CWD,
+      );
+      expectFailure(result, "non_zero_exit");
+      expect(result.output).toMatch(
+        /Inline command failed \(exit code \d+\)\./,
+      );
+    });
+    test("does not expose stderr in the error result", async () => {
+      const result = await runInlineCommand("echo err-msg >&2 && exit 1", CWD);
+      expectFailure(result, "non_zero_exit");
+      expect(result.output).not.toContain("err-msg");
+      expect(result.output).toBe("Inline command failed (exit code 1).");
+    });
+  });
+  // ── Spawn failures ───────────────────────────────────────────────────────
+  describe("spawn failures", () => {
+    test("returns spawn_failure when cwd does not exist", async () => {
+      // When the working directory doesn't exist, the child process fails to
+      // start (ENOENT from posix_spawn). The runner should catch this and
+      // return a deterministic spawn_failure result.
+      const result = await runInlineCommand(
+        "echo hello",
+        "/nonexistent/path/that/does/not/exist",
+      );
+      expectFailure(result, "spawn_failure");
+      expect(result.output).toBe("Inline command could not be started.");
+    });
+  });
+  // ── stderr suppression ─────────────────────────────────────────────────
+  describe("stderr suppression", () => {
+    test("does not include stderr in successful output", async () => {
+      const result = await runInlineCommand(
+        "echo stdout-only && echo stderr-msg >&2",
+        CWD,
+      );
+      // Command may succeed (exit 0) — stderr should not leak into output
+      expectOk(result);
+      expect(result.output).toBe("stdout-only");
+      expect(result.output).not.toContain("stderr-msg");
+    });
+  });
+});