npm - @vellumai/assistant - Versions diffs - 0.5.2 → 0.5.3 - Mend

@vellumai/assistant 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

package/ARCHITECTURE.md +109 -0
package/docs/skills.md +100 -0
package/package.json +1 -1
package/src/__tests__/conversation-agent-loop-overflow.test.ts +7 -0
package/src/__tests__/conversation-agent-loop.test.ts +7 -0
package/src/__tests__/conversation-memory-dirty-tail.test.ts +150 -0
package/src/__tests__/conversation-provider-retry-repair.test.ts +7 -0
package/src/__tests__/conversation-wipe.test.ts +226 -0
package/src/__tests__/db-memory-archive-migration.test.ts +372 -0
package/src/__tests__/db-memory-brief-state-migration.test.ts +213 -0
package/src/__tests__/db-memory-reducer-checkpoints.test.ts +273 -0
package/src/__tests__/inline-command-runner.test.ts +311 -0
package/src/__tests__/inline-skill-authoring-guard.test.ts +220 -0
package/src/__tests__/inline-skill-load-permissions.test.ts +435 -0
package/src/__tests__/list-messages-attachments.test.ts +96 -0
package/src/__tests__/memory-brief-open-loops.test.ts +530 -0
package/src/__tests__/memory-brief-time.test.ts +285 -0
package/src/__tests__/memory-brief-wrapper.test.ts +311 -0
package/src/__tests__/memory-chunk-archive.test.ts +400 -0
package/src/__tests__/memory-chunk-dual-write.test.ts +453 -0
package/src/__tests__/memory-episode-archive.test.ts +370 -0
package/src/__tests__/memory-episode-dual-write.test.ts +626 -0
package/src/__tests__/memory-observation-archive.test.ts +375 -0
package/src/__tests__/memory-observation-dual-write.test.ts +318 -0
package/src/__tests__/memory-recall-quality.test.ts +2 -2
package/src/__tests__/memory-reducer-store.test.ts +728 -0
package/src/__tests__/memory-reducer-types.test.ts +699 -0
package/src/__tests__/memory-reducer.test.ts +698 -0
package/src/__tests__/memory-regressions.test.ts +6 -4
package/src/__tests__/memory-simplified-config.test.ts +281 -0
package/src/__tests__/parse-identity-fields.test.ts +129 -0
package/src/__tests__/skill-load-inline-command.test.ts +598 -0
package/src/__tests__/skill-load-inline-includes.test.ts +644 -0
package/src/__tests__/skills-inline-command-expansions.test.ts +301 -0
package/src/__tests__/skills-transitive-hash.test.ts +333 -0
package/src/__tests__/vellum-self-knowledge-inline-command.test.ts +320 -0
package/src/__tests__/workspace-migration-backfill-installation-id.test.ts +4 -4
package/src/config/bundled-skills/app-builder/SKILL.md +8 -8
package/src/config/bundled-skills/skill-management/SKILL.md +1 -1
package/src/config/bundled-skills/skill-management/TOOLS.json +2 -2
package/src/config/feature-flag-registry.json +16 -0
package/src/config/loader.ts +1 -0
package/src/config/raw-config-utils.ts +28 -0
package/src/config/schema.ts +12 -0
package/src/config/schemas/memory-simplified.ts +101 -0
package/src/config/schemas/memory.ts +4 -0
package/src/config/skills.ts +50 -4
package/src/daemon/conversation-agent-loop-handlers.ts +8 -3
package/src/daemon/conversation-agent-loop.ts +71 -1
package/src/daemon/conversation-lifecycle.ts +11 -1
package/src/daemon/conversation-runtime-assembly.ts +2 -1
package/src/daemon/conversation-surfaces.ts +31 -8
package/src/daemon/conversation.ts +40 -23
package/src/daemon/handlers/config-embeddings.ts +10 -2
package/src/daemon/handlers/config-model.ts +0 -9
package/src/daemon/handlers/identity.ts +12 -1
package/src/daemon/lifecycle.ts +9 -1
package/src/daemon/message-types/conversations.ts +0 -1
package/src/daemon/server.ts +1 -1
package/src/followups/followup-store.ts +47 -1
package/src/memory/archive-store.ts +400 -0
package/src/memory/brief-formatting.ts +33 -0
package/src/memory/brief-open-loops.ts +266 -0
package/src/memory/brief-time.ts +161 -0
package/src/memory/brief.ts +75 -0
package/src/memory/conversation-crud.ts +245 -101
package/src/memory/db-init.ts +12 -0
package/src/memory/indexer.ts +106 -15
package/src/memory/job-handlers/embedding.test.ts +1 -0
package/src/memory/job-handlers/embedding.ts +83 -0
package/src/memory/job-utils.ts +1 -1
package/src/memory/jobs-store.ts +6 -0
package/src/memory/jobs-worker.ts +12 -0
package/src/memory/migrations/185-memory-brief-state.ts +52 -0
package/src/memory/migrations/186-memory-archive.ts +109 -0
package/src/memory/migrations/187-memory-reducer-checkpoints.ts +19 -0
package/src/memory/migrations/index.ts +3 -0
package/src/memory/qdrant-client.ts +23 -4
package/src/memory/reducer-store.ts +271 -0
package/src/memory/reducer-types.ts +99 -0
package/src/memory/reducer.ts +453 -0
package/src/memory/schema/conversations.ts +3 -0
package/src/memory/schema/index.ts +2 -0
package/src/memory/schema/memory-archive.ts +121 -0
package/src/memory/schema/memory-brief.ts +55 -0
package/src/memory/search/semantic.ts +17 -4
package/src/oauth/oauth-store.ts +3 -1
package/src/permissions/checker.ts +89 -6
package/src/permissions/defaults.ts +14 -0
package/src/runtime/routes/conversation-management-routes.ts +6 -0
package/src/runtime/routes/conversation-query-routes.ts +7 -0
package/src/runtime/routes/conversation-routes.ts +52 -5
package/src/runtime/routes/identity-routes.ts +2 -35
package/src/runtime/routes/llm-context-normalization.ts +14 -1
package/src/runtime/routes/memory-item-routes.ts +90 -5
package/src/runtime/routes/secret-routes.ts +2 -0
package/src/runtime/routes/surface-action-routes.ts +68 -1
package/src/schedule/schedule-store.ts +21 -0
package/src/skills/inline-command-expansions.ts +204 -0
package/src/skills/inline-command-render.ts +127 -0
package/src/skills/inline-command-runner.ts +242 -0
package/src/skills/transitive-version-hash.ts +88 -0
package/src/tasks/task-store.ts +43 -1
package/src/tools/permission-checker.ts +8 -1
package/src/tools/skills/load.ts +140 -6
package/src/util/platform.ts +18 -0
package/src/workspace/migrations/{002-backfill-installation-id.ts → 011-backfill-installation-id.ts} +1 -1
package/src/workspace/migrations/registry.ts +1 -1

package/src/__tests__/skill-load-inline-includes.test.ts ADDED Viewed

@@ -0,0 +1,644 @@
+/**
+ * Tests for inline command expansion rendering in *included* child skills
+ * during skill_load.
+ *
+ * Validates that:
+ * - A root skill's included children with `!\`command\`` tokens get those
+ *   tokens expanded at skill_load time through the same sandbox-only renderer
+ *   used for root skills.
+ * - Multiple children with a mix of inline-command and static bodies are all
+ *   rendered correctly, preserving existing include ordering.
+ * - A child-render failure is confined to that child's substituted block and
+ *   does not corrupt sibling skill output.
+ */
+import {
+  existsSync,
+  mkdirSync,
+  mkdtempSync,
+  rmSync,
+  writeFileSync,
+} from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
+// ── Test directory ────────────────────────────────────────────────────────────
+const TEST_DIR = mkdtempSync(
+  join(tmpdir(), "vellum-skill-load-inline-includes-test-"),
+);
+// ── Mocks (must be declared before any imports from the project) ─────────────
+const platformOverrides: Record<string, (...args: unknown[]) => unknown> = {
+  getRootDir: () => TEST_DIR,
+  getDataDir: () => join(TEST_DIR, "data"),
+  ensureDataDir: () => {},
+  getPidPath: () => join(TEST_DIR, "vellum.pid"),
+  getDbPath: () => join(TEST_DIR, "data", "assistant.db"),
+  getLogPath: () => join(TEST_DIR, "logs", "vellum.log"),
+  getWorkspaceDir: () => join(TEST_DIR, "workspace"),
+  getWorkspaceSkillsDir: () => join(TEST_DIR, "skills"),
+  getWorkspaceConfigPath: () => join(TEST_DIR, "workspace", "config.json"),
+  getWorkspaceHooksDir: () => join(TEST_DIR, "workspace", "hooks"),
+  getWorkspacePromptPath: (f: unknown) =>
+    join(TEST_DIR, "workspace", String(f)),
+  getInterfacesDir: () => join(TEST_DIR, "interfaces"),
+  getHooksDir: () => join(TEST_DIR, "hooks"),
+  getSandboxRootDir: () => join(TEST_DIR, "sandbox"),
+  getSandboxWorkingDir: () => join(TEST_DIR, "sandbox", "work"),
+  getHistoryPath: () => join(TEST_DIR, "history"),
+  getSessionTokenPath: () => join(TEST_DIR, "session-token"),
+  readSessionToken: () => null,
+  getClipboardCommand: () => null,
+  readLockfile: () => null,
+  normalizeAssistantId: (id: unknown) => String(id),
+  writeLockfile: () => {},
+  getEmbeddingModelsDir: () => join(TEST_DIR, "embedding-models"),
+  getTCPPort: () => 8765,
+  isTCPEnabled: () => false,
+  getTCPHost: () => "127.0.0.1",
+  isIOSPairingEnabled: () => false,
+  getPlatformTokenPath: () => join(TEST_DIR, "platform-token"),
+  readPlatformToken: () => null,
+  isMacOS: () => process.platform === "darwin",
+  isLinux: () => process.platform === "linux",
+  isWindows: () => process.platform === "win32",
+  getPlatformName: () => process.platform,
+  getWorkspaceDirDisplay: () => "~/.vellum/workspace",
+  getConversationsDir: () => join(TEST_DIR, "conversations"),
+};
+mock.module("../util/platform.js", () => platformOverrides);
+mock.module("../util/logger.js", () => ({
+  getLogger: () =>
+    new Proxy({} as Record<string, unknown>, {
+      get: () => () => {},
+    }),
+  truncateForLog: (s: unknown) => String(s),
+}));
+// Track inline command runner calls
+interface RunInlineCommandCall {
+  command: string;
+  workingDir: string;
+}
+const runInlineCommandCalls: RunInlineCommandCall[] = [];
+/** Return type matching InlineCommandResult from the runner module. */
+interface MockInlineCommandResult {
+  output: string;
+  ok: boolean;
+  failureReason?:
+    | "timeout"
+    | "non_zero_exit"
+    | "binary_output"
+    | "spawn_failure";
+}
+type MockRunFn = (
+  command: string,
+  workingDir: string,
+) => Promise<MockInlineCommandResult>;
+// Default mock: commands succeed with their command string echoed
+let mockRunInlineCommand = mock<MockRunFn>(
+  (command: string, workingDir: string) => {
+    runInlineCommandCalls.push({ command, workingDir });
+    return Promise.resolve({
+      output: `result of: ${command}`,
+      ok: true,
+    });
+  },
+);
+mock.module("../skills/inline-command-runner.js", () => ({
+  runInlineCommand: (command: string, workingDir: string, _options?: unknown) =>
+    mockRunInlineCommand(command, workingDir),
+}));
+// Mock autoInstallFromCatalog
+const mockAutoInstall = mock((_skillId: string) => Promise.resolve(false));
+mock.module("../skills/catalog-install.js", () => ({
+  autoInstallFromCatalog: (skillId: string) => mockAutoInstall(skillId),
+  resolveCatalog: (_skillId?: string) => Promise.resolve([]),
+}));
+interface TestConfig {
+  permissions: { mode: "strict" | "workspace" };
+  skills: { load: { extraDirs: string[] } };
+  sandbox: { enabled: boolean };
+  assistantFeatureFlagValues?: Record<string, boolean>;
+  [key: string]: unknown;
+}
+const testConfig: TestConfig = {
+  permissions: { mode: "workspace" },
+  skills: { load: { extraDirs: [] } },
+  sandbox: { enabled: true },
+  assistantFeatureFlagValues: {
+    "feature_flags.inline-skill-commands.enabled": true,
+  },
+};
+mock.module("../config/loader.js", () => ({
+  getConfig: () => testConfig,
+  loadConfig: () => testConfig,
+  invalidateConfigCache: () => {},
+  saveConfig: () => {},
+  loadRawConfig: () => ({}),
+  saveRawConfig: () => {},
+  getNestedValue: () => undefined,
+  setNestedValue: () => {},
+}));
+// ── Imports (after mocks) ─────────────────────────────────────────────────
+await import("../tools/skills/load.js");
+const { getTool } = await import("../tools/registry.js");
+// ── Helpers ───────────────────────────────────────────────────────────────
+function writeSkill(
+  skillId: string,
+  name: string,
+  description: string,
+  body: string,
+  options?: { includes?: string[] },
+): void {
+  const skillDir = join(TEST_DIR, "skills", skillId);
+  mkdirSync(skillDir, { recursive: true });
+  let frontmatter = `---\nname: "${name}"\ndescription: "${description}"`;
+  if (options?.includes && options.includes.length > 0) {
+    frontmatter += `\nmetadata:\n  vellum:\n    includes:\n`;
+    for (const inc of options.includes) {
+      frontmatter += `      - "${inc}"\n`;
+    }
+  }
+  frontmatter += `\n---\n\n`;
+  writeFileSync(join(skillDir, "SKILL.md"), `${frontmatter}${body}\n`);
+}
+async function executeSkillLoad(
+  input: Record<string, unknown>,
+  workingDir = "/tmp",
+): Promise<{ content: string; isError: boolean }> {
+  const tool = getTool("skill_load");
+  if (!tool) throw new Error("skill_load tool was not registered");
+  const result = await tool.execute(input, {
+    workingDir,
+    conversationId: "conversation-1",
+    trustClass: "guardian",
+  });
+  return { content: result.content, isError: result.isError };
+}
+// ── Tests ─────────────────────────────────────────────────────────────────
+describe("skill_load inline command expansion for included skills", () => {
+  beforeEach(() => {
+    mkdirSync(join(TEST_DIR, "skills"), { recursive: true });
+    runInlineCommandCalls.length = 0;
+    mockAutoInstall.mockReset();
+    mockAutoInstall.mockImplementation(() => Promise.resolve(false));
+    // Reset to default: commands succeed
+    mockRunInlineCommand = mock<MockRunFn>(
+      (command: string, workingDir: string) => {
+        runInlineCommandCalls.push({ command, workingDir });
+        return Promise.resolve({
+          output: `result of: ${command}`,
+          ok: true,
+        });
+      },
+    );
+    mock.module("../skills/inline-command-runner.js", () => ({
+      runInlineCommand: (
+        command: string,
+        workingDir: string,
+        _options?: unknown,
+      ) => mockRunInlineCommand(command, workingDir),
+    }));
+    // Enable the feature flag
+    testConfig.assistantFeatureFlagValues = {
+      "feature_flags.inline-skill-commands.enabled": true,
+    };
+    testConfig.skills = { load: { extraDirs: [] } };
+  });
+  afterEach(() => {
+    if (existsSync(TEST_DIR)) {
+      rmSync(TEST_DIR, { recursive: true, force: true });
+    }
+  });
+  // ── Single inline-command child ──────────────────────────────────────
+  describe("single inline-command child", () => {
+    test("included child with inline commands gets tokens expanded", async () => {
+      writeSkill(
+        "child-dynamic",
+        "Child Dynamic",
+        "A child with inline commands",
+        'Current env: !`echo "production"`',
+      );
+      writeSkill(
+        "parent-skill",
+        "Parent Skill",
+        "A parent that includes a dynamic child",
+        "Parent body content.",
+        { includes: ["child-dynamic"] },
+      );
+      const result = await executeSkillLoad({ skill: "parent-skill" });
+      expect(result.isError).toBe(false);
+      // The child's inline command should be expanded
+      expect(result.content).toContain(
+        '<inline_skill_command index="0">result of: echo "production"</inline_skill_command>',
+      );
+      // The raw token should not appear
+      expect(result.content).not.toContain('!`echo "production"`');
+      // Parent body should still be present
+      expect(result.content).toContain("Parent body content.");
+    });
+    test("passes conversation working directory to child inline command runner", async () => {
+      writeSkill(
+        "child-cwd",
+        "Child CWD",
+        "Check cwd forwarding",
+        "Info: !`pwd`",
+      );
+      writeSkill(
+        "parent-cwd",
+        "Parent CWD",
+        "Parent for cwd test",
+        "Parent body.",
+        { includes: ["child-cwd"] },
+      );
+      const workingDir = "/my/project/root";
+      await executeSkillLoad({ skill: "parent-cwd" }, workingDir);
+      expect(runInlineCommandCalls.length).toBeGreaterThanOrEqual(1);
+      const pwdCall = runInlineCommandCalls.find((c) => c.command === "pwd");
+      expect(pwdCall).toBeDefined();
+      expect(pwdCall!.workingDir).toBe(workingDir);
+    });
+  });
+  // ── Multiple children: mixed inline and static ───────────────────────
+  describe("multiple children with mixed bodies", () => {
+    test("renders inline commands in dynamic children while leaving static children unchanged", async () => {
+      writeSkill(
+        "child-static",
+        "Static Child",
+        "A static child",
+        "Just plain static content.",
+      );
+      writeSkill(
+        "child-dynamic-a",
+        "Dynamic Child A",
+        "Dynamic child A",
+        "Version: !`echo v1`",
+      );
+      writeSkill(
+        "child-dynamic-b",
+        "Dynamic Child B",
+        "Dynamic child B",
+        "Host: !`hostname`",
+      );
+      writeSkill(
+        "parent-mixed",
+        "Parent Mixed",
+        "Parent with mixed children",
+        "Root body content.",
+        { includes: ["child-static", "child-dynamic-a", "child-dynamic-b"] },
+      );
+      const result = await executeSkillLoad({ skill: "parent-mixed" });
+      expect(result.isError).toBe(false);
+      // Static child should appear unchanged
+      expect(result.content).toContain("Just plain static content.");
+      // Dynamic child A should have its token expanded
+      expect(result.content).toContain(
+        '<inline_skill_command index="0">result of: echo v1</inline_skill_command>',
+      );
+      expect(result.content).not.toContain("!`echo v1`");
+      // Dynamic child B should have its token expanded
+      expect(result.content).toContain(
+        '<inline_skill_command index="0">result of: hostname</inline_skill_command>',
+      );
+      expect(result.content).not.toContain("!`hostname`");
+    });
+    test("preserves include ordering in output", async () => {
+      writeSkill("child-first", "First Child", "First child", "First body.");
+      writeSkill(
+        "child-second",
+        "Second Child",
+        "Second child",
+        "Data: !`echo second`",
+      );
+      writeSkill("child-third", "Third Child", "Third child", "Third body.");
+      writeSkill(
+        "parent-ordered",
+        "Parent Ordered",
+        "Parent with ordered includes",
+        "Root.",
+        { includes: ["child-first", "child-second", "child-third"] },
+      );
+      const result = await executeSkillLoad({ skill: "parent-ordered" });
+      expect(result.isError).toBe(false);
+      // Verify ordering: first appears before second, second before third
+      const firstIdx = result.content.indexOf(
+        "--- Included Skill: First Child",
+      );
+      const secondIdx = result.content.indexOf(
+        "--- Included Skill: Second Child",
+      );
+      const thirdIdx = result.content.indexOf(
+        "--- Included Skill: Third Child",
+      );
+      expect(firstIdx).toBeGreaterThan(-1);
+      expect(secondIdx).toBeGreaterThan(-1);
+      expect(thirdIdx).toBeGreaterThan(-1);
+      expect(firstIdx).toBeLessThan(secondIdx);
+      expect(secondIdx).toBeLessThan(thirdIdx);
+    });
+  });
+  // ── Child render failures are isolated ───────────────────────────────
+  describe("child render failure isolation", () => {
+    test("a failing child command renders a stub without corrupting siblings", async () => {
+      mockRunInlineCommand = mock<MockRunFn>(
+        (command: string, workingDir: string) => {
+          runInlineCommandCalls.push({ command, workingDir });
+          // The "bad-cmd" command fails; others succeed
+          if (command === "bad-cmd") {
+            return Promise.resolve({
+              output: "Inline command failed (exit code 1).",
+              ok: false,
+              failureReason: "non_zero_exit",
+            });
+          }
+          return Promise.resolve({
+            output: `result of: ${command}`,
+            ok: true,
+          });
+        },
+      );
+      mock.module("../skills/inline-command-runner.js", () => ({
+        runInlineCommand: (
+          command: string,
+          workingDir: string,
+          _options?: unknown,
+        ) => mockRunInlineCommand(command, workingDir),
+      }));
+      writeSkill(
+        "child-ok",
+        "OK Child",
+        "Successful child",
+        "Info: !`echo success`",
+      );
+      writeSkill(
+        "child-fail",
+        "Failing Child",
+        "Failing child",
+        "Data: !`bad-cmd`",
+      );
+      writeSkill(
+        "child-ok-too",
+        "Also OK Child",
+        "Another successful child",
+        "More: !`echo also-ok`",
+      );
+      writeSkill(
+        "parent-isolated",
+        "Parent Isolated",
+        "Tests failure isolation",
+        "Root content.",
+        { includes: ["child-ok", "child-fail", "child-ok-too"] },
+      );
+      const result = await executeSkillLoad({ skill: "parent-isolated" });
+      expect(result.isError).toBe(false);
+      // OK child's command should be expanded successfully
+      expect(result.content).toContain(
+        '<inline_skill_command index="0">result of: echo success</inline_skill_command>',
+      );
+      // Failing child's command should show a failure stub
+      expect(result.content).toContain(
+        '<inline_skill_command index="0">[inline command unavailable: command failed]</inline_skill_command>',
+      );
+      // Also-OK child's command should be expanded successfully
+      expect(result.content).toContain(
+        '<inline_skill_command index="0">result of: echo also-ok</inline_skill_command>',
+      );
+      // Root content should be intact
+      expect(result.content).toContain("Root content.");
+    });
+    test("a child with mixed success/failure renders both correctly", async () => {
+      mockRunInlineCommand = mock<MockRunFn>(
+        (command: string, workingDir: string) => {
+          runInlineCommandCalls.push({ command, workingDir });
+          // Fail the second command within this child
+          if (command === "fail-me") {
+            return Promise.resolve({
+              output: "timed out",
+              ok: false,
+              failureReason: "timeout",
+            });
+          }
+          return Promise.resolve({
+            output: `result of: ${command}`,
+            ok: true,
+          });
+        },
+      );
+      mock.module("../skills/inline-command-runner.js", () => ({
+        runInlineCommand: (
+          command: string,
+          workingDir: string,
+          _options?: unknown,
+        ) => mockRunInlineCommand(command, workingDir),
+      }));
+      writeSkill(
+        "child-mixed-cmds",
+        "Mixed Commands Child",
+        "Child with mixed results",
+        "A: !`echo ok` B: !`fail-me` C: !`echo fine`",
+      );
+      writeSkill(
+        "parent-mixed-child",
+        "Parent Mixed Child",
+        "Parent with mixed-result child",
+        "Root.",
+        { includes: ["child-mixed-cmds"] },
+      );
+      const result = await executeSkillLoad({ skill: "parent-mixed-child" });
+      expect(result.isError).toBe(false);
+      // First and third succeed
+      expect(result.content).toContain(
+        '<inline_skill_command index="0">result of: echo ok</inline_skill_command>',
+      );
+      expect(result.content).toContain(
+        '<inline_skill_command index="2">result of: echo fine</inline_skill_command>',
+      );
+      // Second fails with timeout stub
+      expect(result.content).toContain(
+        '<inline_skill_command index="1">[inline command unavailable: command timed out]</inline_skill_command>',
+      );
+    });
+    test("render exception in one child does not prevent sibling rendering", async () => {
+      // Simulate a child whose renderInlineCommands call throws an exception
+      mockRunInlineCommand = mock<MockRunFn>(
+        (command: string, workingDir: string) => {
+          runInlineCommandCalls.push({ command, workingDir });
+          if (command === "crash-cmd") {
+            // Simulate a throw inside the runner
+            throw new Error("Simulated runner crash");
+          }
+          return Promise.resolve({
+            output: `result of: ${command}`,
+            ok: true,
+          });
+        },
+      );
+      mock.module("../skills/inline-command-runner.js", () => ({
+        runInlineCommand: (
+          command: string,
+          workingDir: string,
+          _options?: unknown,
+        ) => mockRunInlineCommand(command, workingDir),
+      }));
+      writeSkill(
+        "child-crash",
+        "Crashing Child",
+        "Child that crashes",
+        "Data: !`crash-cmd`",
+      );
+      writeSkill(
+        "child-healthy",
+        "Healthy Child",
+        "Healthy child",
+        "Info: !`echo healthy`",
+      );
+      writeSkill(
+        "parent-crash-test",
+        "Parent Crash Test",
+        "Tests exception isolation",
+        "Root body.",
+        { includes: ["child-crash", "child-healthy"] },
+      );
+      const result = await executeSkillLoad({ skill: "parent-crash-test" });
+      expect(result.isError).toBe(false);
+      // The crashing child should fall back to raw body (the try/catch in
+      // load.ts catches the exception and leaves the body unmodified)
+      expect(result.content).toContain("--- Included Skill: Crashing Child");
+      // The healthy child should still have its inline command expanded
+      expect(result.content).toContain(
+        '<inline_skill_command index="0">result of: echo healthy</inline_skill_command>',
+      );
+      // Root body intact
+      expect(result.content).toContain("Root body.");
+    });
+  });
+  // ── Feature flag off for child inline commands ────────────────────────
+  describe("feature flag disabled for included skills", () => {
+    test("skill_load returns error when child has inline commands and flag is off", async () => {
+      testConfig.assistantFeatureFlagValues = {
+        "feature_flags.inline-skill-commands.enabled": false,
+      };
+      writeSkill(
+        "child-flag-off",
+        "Flag Off Child",
+        "Child with inline cmds",
+        "Data: !`echo hello`",
+      );
+      writeSkill(
+        "parent-flag-off",
+        "Parent Flag Off",
+        "Parent for flag-off test",
+        "Root content.",
+        { includes: ["child-flag-off"] },
+      );
+      const result = await executeSkillLoad({ skill: "parent-flag-off" });
+      // Fail closed: the entire skill_load must error when any included child
+      // has inline commands and the feature flag is off, matching the root
+      // skill behavior and the documented fail-closed contract.
+      expect(result.isError).toBe(true);
+      expect(result.content).toContain("child-flag-off");
+      expect(result.content).toContain(
+        "inline-skill-commands feature flag is disabled",
+      );
+      // Runner should not be called
+      expect(runInlineCommandCalls).toHaveLength(0);
+    });
+  });
+  // ── Root with inline + child with inline ──────────────────────────────
+  describe("root and child both have inline commands", () => {
+    test("both root and child inline commands are expanded", async () => {
+      writeSkill(
+        "child-both",
+        "Child Both",
+        "Child with inline",
+        "Child data: !`echo child-output`",
+      );
+      writeSkill(
+        "parent-both",
+        "Parent Both",
+        "Parent with inline",
+        "Root data: !`echo root-output`",
+        { includes: ["child-both"] },
+      );
+      const result = await executeSkillLoad({ skill: "parent-both" });
+      expect(result.isError).toBe(false);
+      // Root inline command expanded
+      expect(result.content).toContain(
+        '<inline_skill_command index="0">result of: echo root-output</inline_skill_command>',
+      );
+      // Child inline command expanded
+      expect(result.content).toContain(
+        '<inline_skill_command index="0">result of: echo child-output</inline_skill_command>',
+      );
+      // No raw tokens
+      expect(result.content).not.toContain("!`echo root-output`");
+      expect(result.content).not.toContain("!`echo child-output`");
+    });
+  });
+});