@vellumai/assistant 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/ARCHITECTURE.md +109 -0
  2. package/docs/architecture/memory.md +105 -0
  3. package/docs/skills.md +100 -0
  4. package/package.json +1 -1
  5. package/src/__tests__/archive-recall.test.ts +560 -0
  6. package/src/__tests__/conversation-agent-loop-overflow.test.ts +7 -0
  7. package/src/__tests__/conversation-agent-loop.test.ts +7 -0
  8. package/src/__tests__/conversation-clear-safety.test.ts +259 -0
  9. package/src/__tests__/conversation-memory-dirty-tail.test.ts +150 -0
  10. package/src/__tests__/conversation-provider-retry-repair.test.ts +7 -0
  11. package/src/__tests__/conversation-switch-memory-reduction.test.ts +474 -0
  12. package/src/__tests__/conversation-wipe.test.ts +226 -0
  13. package/src/__tests__/db-memory-archive-migration.test.ts +372 -0
  14. package/src/__tests__/db-memory-brief-state-migration.test.ts +213 -0
  15. package/src/__tests__/db-memory-reducer-checkpoints.test.ts +273 -0
  16. package/src/__tests__/db-schedule-syntax-migration.test.ts +3 -0
  17. package/src/__tests__/inline-command-runner.test.ts +311 -0
  18. package/src/__tests__/inline-skill-authoring-guard.test.ts +220 -0
  19. package/src/__tests__/inline-skill-load-permissions.test.ts +435 -0
  20. package/src/__tests__/list-messages-attachments.test.ts +96 -0
  21. package/src/__tests__/memory-brief-open-loops.test.ts +530 -0
  22. package/src/__tests__/memory-brief-time.test.ts +285 -0
  23. package/src/__tests__/memory-brief-wrapper.test.ts +311 -0
  24. package/src/__tests__/memory-chunk-archive.test.ts +400 -0
  25. package/src/__tests__/memory-chunk-dual-write.test.ts +453 -0
  26. package/src/__tests__/memory-episode-archive.test.ts +370 -0
  27. package/src/__tests__/memory-episode-dual-write.test.ts +626 -0
  28. package/src/__tests__/memory-observation-archive.test.ts +375 -0
  29. package/src/__tests__/memory-observation-dual-write.test.ts +318 -0
  30. package/src/__tests__/memory-recall-quality.test.ts +2 -2
  31. package/src/__tests__/memory-reducer-job.test.ts +538 -0
  32. package/src/__tests__/memory-reducer-scheduling.test.ts +473 -0
  33. package/src/__tests__/memory-reducer-store.test.ts +728 -0
  34. package/src/__tests__/memory-reducer-types.test.ts +707 -0
  35. package/src/__tests__/memory-reducer.test.ts +704 -0
  36. package/src/__tests__/memory-regressions.test.ts +30 -8
  37. package/src/__tests__/memory-simplified-config.test.ts +281 -0
  38. package/src/__tests__/parse-identity-fields.test.ts +129 -0
  39. package/src/__tests__/simplified-memory-e2e.test.ts +666 -0
  40. package/src/__tests__/simplified-memory-runtime.test.ts +616 -0
  41. package/src/__tests__/skill-load-inline-command.test.ts +598 -0
  42. package/src/__tests__/skill-load-inline-includes.test.ts +644 -0
  43. package/src/__tests__/skills-inline-command-expansions.test.ts +301 -0
  44. package/src/__tests__/skills-transitive-hash.test.ts +333 -0
  45. package/src/__tests__/vellum-self-knowledge-inline-command.test.ts +320 -0
  46. package/src/__tests__/workspace-migration-backfill-installation-id.test.ts +4 -4
  47. package/src/cli/commands/conversations.ts +18 -0
  48. package/src/config/bundled-skills/app-builder/SKILL.md +8 -8
  49. package/src/config/bundled-skills/schedule/TOOLS.json +8 -0
  50. package/src/config/bundled-skills/skill-management/SKILL.md +1 -1
  51. package/src/config/bundled-skills/skill-management/TOOLS.json +2 -2
  52. package/src/config/feature-flag-registry.json +16 -0
  53. package/src/config/raw-config-utils.ts +28 -0
  54. package/src/config/schema.ts +12 -0
  55. package/src/config/schemas/memory-simplified.ts +101 -0
  56. package/src/config/schemas/memory.ts +4 -0
  57. package/src/config/skills.ts +50 -4
  58. package/src/daemon/conversation-agent-loop-handlers.ts +8 -3
  59. package/src/daemon/conversation-agent-loop.ts +71 -1
  60. package/src/daemon/conversation-lifecycle.ts +11 -1
  61. package/src/daemon/conversation-memory.ts +117 -0
  62. package/src/daemon/conversation-runtime-assembly.ts +3 -1
  63. package/src/daemon/conversation-surfaces.ts +31 -8
  64. package/src/daemon/conversation.ts +40 -23
  65. package/src/daemon/handlers/config-embeddings.ts +10 -2
  66. package/src/daemon/handlers/config-model.ts +0 -9
  67. package/src/daemon/handlers/conversations.ts +11 -0
  68. package/src/daemon/handlers/identity.ts +12 -1
  69. package/src/daemon/lifecycle.ts +52 -1
  70. package/src/daemon/message-types/conversations.ts +0 -1
  71. package/src/daemon/server.ts +1 -1
  72. package/src/followups/followup-store.ts +47 -1
  73. package/src/memory/archive-recall.ts +516 -0
  74. package/src/memory/archive-store.ts +400 -0
  75. package/src/memory/brief-formatting.ts +33 -0
  76. package/src/memory/brief-open-loops.ts +266 -0
  77. package/src/memory/brief-time.ts +162 -0
  78. package/src/memory/brief.ts +75 -0
  79. package/src/memory/conversation-crud.ts +455 -101
  80. package/src/memory/conversation-key-store.ts +33 -4
  81. package/src/memory/db-init.ts +16 -0
  82. package/src/memory/indexer.ts +106 -15
  83. package/src/memory/job-handlers/backfill-simplified-memory.ts +462 -0
  84. package/src/memory/job-handlers/conversation-starters.ts +9 -3
  85. package/src/memory/job-handlers/embedding.test.ts +1 -0
  86. package/src/memory/job-handlers/embedding.ts +83 -0
  87. package/src/memory/job-handlers/reduce-conversation-memory.ts +229 -0
  88. package/src/memory/job-utils.ts +1 -1
  89. package/src/memory/jobs-store.ts +8 -0
  90. package/src/memory/jobs-worker.ts +20 -0
  91. package/src/memory/migrations/036-normalize-phone-identities.ts +49 -14
  92. package/src/memory/migrations/135-backfill-contact-interaction-stats.ts +9 -1
  93. package/src/memory/migrations/141-rename-verification-table.ts +8 -0
  94. package/src/memory/migrations/142-rename-verification-session-id-column.ts +7 -2
  95. package/src/memory/migrations/174-rename-thread-starters-table.ts +8 -0
  96. package/src/memory/migrations/185-memory-brief-state.ts +52 -0
  97. package/src/memory/migrations/186-memory-archive.ts +109 -0
  98. package/src/memory/migrations/187-memory-reducer-checkpoints.ts +19 -0
  99. package/src/memory/migrations/188-schedule-quiet-flag.ts +13 -0
  100. package/src/memory/migrations/index.ts +4 -0
  101. package/src/memory/qdrant-client.ts +23 -4
  102. package/src/memory/reducer-scheduler.ts +242 -0
  103. package/src/memory/reducer-store.ts +271 -0
  104. package/src/memory/reducer-types.ts +106 -0
  105. package/src/memory/reducer.ts +467 -0
  106. package/src/memory/schema/conversations.ts +3 -0
  107. package/src/memory/schema/index.ts +2 -0
  108. package/src/memory/schema/infrastructure.ts +1 -0
  109. package/src/memory/schema/memory-archive.ts +121 -0
  110. package/src/memory/schema/memory-brief.ts +55 -0
  111. package/src/memory/search/semantic.ts +17 -4
  112. package/src/oauth/oauth-store.ts +3 -1
  113. package/src/permissions/checker.ts +89 -6
  114. package/src/permissions/defaults.ts +14 -0
  115. package/src/runtime/auth/route-policy.ts +10 -1
  116. package/src/runtime/routes/conversation-management-routes.ts +94 -2
  117. package/src/runtime/routes/conversation-query-routes.ts +7 -0
  118. package/src/runtime/routes/conversation-routes.ts +52 -5
  119. package/src/runtime/routes/guardian-bootstrap-routes.ts +19 -7
  120. package/src/runtime/routes/identity-routes.ts +2 -35
  121. package/src/runtime/routes/llm-context-normalization.ts +14 -1
  122. package/src/runtime/routes/memory-item-routes.ts +90 -5
  123. package/src/runtime/routes/secret-routes.ts +3 -0
  124. package/src/runtime/routes/surface-action-routes.ts +68 -1
  125. package/src/schedule/schedule-store.ts +28 -0
  126. package/src/schedule/scheduler.ts +6 -2
  127. package/src/skills/inline-command-expansions.ts +204 -0
  128. package/src/skills/inline-command-render.ts +127 -0
  129. package/src/skills/inline-command-runner.ts +242 -0
  130. package/src/skills/transitive-version-hash.ts +88 -0
  131. package/src/tasks/task-store.ts +43 -1
  132. package/src/telemetry/usage-telemetry-reporter.ts +1 -1
  133. package/src/tools/filesystem/edit.ts +6 -1
  134. package/src/tools/filesystem/read.ts +6 -1
  135. package/src/tools/filesystem/write.ts +6 -1
  136. package/src/tools/memory/handlers.ts +129 -1
  137. package/src/tools/permission-checker.ts +8 -1
  138. package/src/tools/schedule/create.ts +3 -0
  139. package/src/tools/schedule/list.ts +5 -1
  140. package/src/tools/schedule/update.ts +6 -0
  141. package/src/tools/skills/load.ts +140 -6
  142. package/src/util/platform.ts +18 -0
  143. package/src/workspace/migrations/{002-backfill-installation-id.ts → 011-backfill-installation-id.ts} +1 -1
  144. package/src/workspace/migrations/registry.ts +1 -1
@@ -0,0 +1,644 @@
1
+ /**
2
+ * Tests for inline command expansion rendering in *included* child skills
3
+ * during skill_load.
4
+ *
5
+ * Validates that:
6
+ * - A root skill's included children with `!\`command\`` tokens get those
7
+ * tokens expanded at skill_load time through the same sandbox-only renderer
8
+ * used for root skills.
9
+ * - Multiple children with a mix of inline-command and static bodies are all
10
+ * rendered correctly, preserving existing include ordering.
11
+ * - A child-render failure is confined to that child's substituted block and
12
+ * does not corrupt sibling skill output.
13
+ */
14
+
15
+ import {
16
+ existsSync,
17
+ mkdirSync,
18
+ mkdtempSync,
19
+ rmSync,
20
+ writeFileSync,
21
+ } from "node:fs";
22
+ import { tmpdir } from "node:os";
23
+ import { join } from "node:path";
24
+ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
25
+
26
+ // ── Test directory ────────────────────────────────────────────────────────────
27
+
28
+ const TEST_DIR = mkdtempSync(
29
+ join(tmpdir(), "vellum-skill-load-inline-includes-test-"),
30
+ );
31
+
32
+ // ── Mocks (must be declared before any imports from the project) ─────────────
33
+
34
+ const platformOverrides: Record<string, (...args: unknown[]) => unknown> = {
35
+ getRootDir: () => TEST_DIR,
36
+ getDataDir: () => join(TEST_DIR, "data"),
37
+ ensureDataDir: () => {},
38
+ getPidPath: () => join(TEST_DIR, "vellum.pid"),
39
+ getDbPath: () => join(TEST_DIR, "data", "assistant.db"),
40
+ getLogPath: () => join(TEST_DIR, "logs", "vellum.log"),
41
+ getWorkspaceDir: () => join(TEST_DIR, "workspace"),
42
+ getWorkspaceSkillsDir: () => join(TEST_DIR, "skills"),
43
+ getWorkspaceConfigPath: () => join(TEST_DIR, "workspace", "config.json"),
44
+ getWorkspaceHooksDir: () => join(TEST_DIR, "workspace", "hooks"),
45
+ getWorkspacePromptPath: (f: unknown) =>
46
+ join(TEST_DIR, "workspace", String(f)),
47
+ getInterfacesDir: () => join(TEST_DIR, "interfaces"),
48
+ getHooksDir: () => join(TEST_DIR, "hooks"),
49
+ getSandboxRootDir: () => join(TEST_DIR, "sandbox"),
50
+ getSandboxWorkingDir: () => join(TEST_DIR, "sandbox", "work"),
51
+ getHistoryPath: () => join(TEST_DIR, "history"),
52
+ getSessionTokenPath: () => join(TEST_DIR, "session-token"),
53
+ readSessionToken: () => null,
54
+ getClipboardCommand: () => null,
55
+ readLockfile: () => null,
56
+ normalizeAssistantId: (id: unknown) => String(id),
57
+ writeLockfile: () => {},
58
+ getEmbeddingModelsDir: () => join(TEST_DIR, "embedding-models"),
59
+ getTCPPort: () => 8765,
60
+ isTCPEnabled: () => false,
61
+ getTCPHost: () => "127.0.0.1",
62
+ isIOSPairingEnabled: () => false,
63
+ getPlatformTokenPath: () => join(TEST_DIR, "platform-token"),
64
+ readPlatformToken: () => null,
65
+ isMacOS: () => process.platform === "darwin",
66
+ isLinux: () => process.platform === "linux",
67
+ isWindows: () => process.platform === "win32",
68
+ getPlatformName: () => process.platform,
69
+ getWorkspaceDirDisplay: () => "~/.vellum/workspace",
70
+ getConversationsDir: () => join(TEST_DIR, "conversations"),
71
+ };
72
+ mock.module("../util/platform.js", () => platformOverrides);
73
+
74
+ mock.module("../util/logger.js", () => ({
75
+ getLogger: () =>
76
+ new Proxy({} as Record<string, unknown>, {
77
+ get: () => () => {},
78
+ }),
79
+ truncateForLog: (s: unknown) => String(s),
80
+ }));
81
+
82
+ // Track inline command runner calls
83
+ interface RunInlineCommandCall {
84
+ command: string;
85
+ workingDir: string;
86
+ }
87
+ const runInlineCommandCalls: RunInlineCommandCall[] = [];
88
+
89
+ /** Return type matching InlineCommandResult from the runner module. */
90
+ interface MockInlineCommandResult {
91
+ output: string;
92
+ ok: boolean;
93
+ failureReason?:
94
+ | "timeout"
95
+ | "non_zero_exit"
96
+ | "binary_output"
97
+ | "spawn_failure";
98
+ }
99
+
100
+ type MockRunFn = (
101
+ command: string,
102
+ workingDir: string,
103
+ ) => Promise<MockInlineCommandResult>;
104
+
105
+ // Default mock: commands succeed with their command string echoed
106
+ let mockRunInlineCommand = mock<MockRunFn>(
107
+ (command: string, workingDir: string) => {
108
+ runInlineCommandCalls.push({ command, workingDir });
109
+ return Promise.resolve({
110
+ output: `result of: ${command}`,
111
+ ok: true,
112
+ });
113
+ },
114
+ );
115
+
116
+ mock.module("../skills/inline-command-runner.js", () => ({
117
+ runInlineCommand: (command: string, workingDir: string, _options?: unknown) =>
118
+ mockRunInlineCommand(command, workingDir),
119
+ }));
120
+
121
+ // Mock autoInstallFromCatalog
122
+ const mockAutoInstall = mock((_skillId: string) => Promise.resolve(false));
123
+ mock.module("../skills/catalog-install.js", () => ({
124
+ autoInstallFromCatalog: (skillId: string) => mockAutoInstall(skillId),
125
+ resolveCatalog: (_skillId?: string) => Promise.resolve([]),
126
+ }));
127
+
128
+ interface TestConfig {
129
+ permissions: { mode: "strict" | "workspace" };
130
+ skills: { load: { extraDirs: string[] } };
131
+ sandbox: { enabled: boolean };
132
+ assistantFeatureFlagValues?: Record<string, boolean>;
133
+ [key: string]: unknown;
134
+ }
135
+
136
+ const testConfig: TestConfig = {
137
+ permissions: { mode: "workspace" },
138
+ skills: { load: { extraDirs: [] } },
139
+ sandbox: { enabled: true },
140
+ assistantFeatureFlagValues: {
141
+ "feature_flags.inline-skill-commands.enabled": true,
142
+ },
143
+ };
144
+
145
+ mock.module("../config/loader.js", () => ({
146
+ getConfig: () => testConfig,
147
+ loadConfig: () => testConfig,
148
+ invalidateConfigCache: () => {},
149
+ saveConfig: () => {},
150
+ loadRawConfig: () => ({}),
151
+ saveRawConfig: () => {},
152
+ getNestedValue: () => undefined,
153
+ setNestedValue: () => {},
154
+ }));
155
+
156
+ // ── Imports (after mocks) ─────────────────────────────────────────────────
157
+
158
+ await import("../tools/skills/load.js");
159
+ const { getTool } = await import("../tools/registry.js");
160
+
161
+ // ── Helpers ───────────────────────────────────────────────────────────────
162
+
163
+ function writeSkill(
164
+ skillId: string,
165
+ name: string,
166
+ description: string,
167
+ body: string,
168
+ options?: { includes?: string[] },
169
+ ): void {
170
+ const skillDir = join(TEST_DIR, "skills", skillId);
171
+ mkdirSync(skillDir, { recursive: true });
172
+
173
+ let frontmatter = `---\nname: "${name}"\ndescription: "${description}"`;
174
+ if (options?.includes && options.includes.length > 0) {
175
+ frontmatter += `\nmetadata:\n vellum:\n includes:\n`;
176
+ for (const inc of options.includes) {
177
+ frontmatter += ` - "${inc}"\n`;
178
+ }
179
+ }
180
+ frontmatter += `\n---\n\n`;
181
+
182
+ writeFileSync(join(skillDir, "SKILL.md"), `${frontmatter}${body}\n`);
183
+ }
184
+
185
+ async function executeSkillLoad(
186
+ input: Record<string, unknown>,
187
+ workingDir = "/tmp",
188
+ ): Promise<{ content: string; isError: boolean }> {
189
+ const tool = getTool("skill_load");
190
+ if (!tool) throw new Error("skill_load tool was not registered");
191
+
192
+ const result = await tool.execute(input, {
193
+ workingDir,
194
+ conversationId: "conversation-1",
195
+ trustClass: "guardian",
196
+ });
197
+ return { content: result.content, isError: result.isError };
198
+ }
199
+
200
+ // ── Tests ─────────────────────────────────────────────────────────────────
201
+
202
+ describe("skill_load inline command expansion for included skills", () => {
203
+ beforeEach(() => {
204
+ mkdirSync(join(TEST_DIR, "skills"), { recursive: true });
205
+ runInlineCommandCalls.length = 0;
206
+ mockAutoInstall.mockReset();
207
+ mockAutoInstall.mockImplementation(() => Promise.resolve(false));
208
+
209
+ // Reset to default: commands succeed
210
+ mockRunInlineCommand = mock<MockRunFn>(
211
+ (command: string, workingDir: string) => {
212
+ runInlineCommandCalls.push({ command, workingDir });
213
+ return Promise.resolve({
214
+ output: `result of: ${command}`,
215
+ ok: true,
216
+ });
217
+ },
218
+ );
219
+ mock.module("../skills/inline-command-runner.js", () => ({
220
+ runInlineCommand: (
221
+ command: string,
222
+ workingDir: string,
223
+ _options?: unknown,
224
+ ) => mockRunInlineCommand(command, workingDir),
225
+ }));
226
+
227
+ // Enable the feature flag
228
+ testConfig.assistantFeatureFlagValues = {
229
+ "feature_flags.inline-skill-commands.enabled": true,
230
+ };
231
+ testConfig.skills = { load: { extraDirs: [] } };
232
+ });
233
+
234
+ afterEach(() => {
235
+ if (existsSync(TEST_DIR)) {
236
+ rmSync(TEST_DIR, { recursive: true, force: true });
237
+ }
238
+ });
239
+
240
+ // ── Single inline-command child ──────────────────────────────────────
241
+
242
+ describe("single inline-command child", () => {
243
+ test("included child with inline commands gets tokens expanded", async () => {
244
+ writeSkill(
245
+ "child-dynamic",
246
+ "Child Dynamic",
247
+ "A child with inline commands",
248
+ 'Current env: !`echo "production"`',
249
+ );
250
+ writeSkill(
251
+ "parent-skill",
252
+ "Parent Skill",
253
+ "A parent that includes a dynamic child",
254
+ "Parent body content.",
255
+ { includes: ["child-dynamic"] },
256
+ );
257
+
258
+ const result = await executeSkillLoad({ skill: "parent-skill" });
259
+ expect(result.isError).toBe(false);
260
+ // The child's inline command should be expanded
261
+ expect(result.content).toContain(
262
+ '<inline_skill_command index="0">result of: echo "production"</inline_skill_command>',
263
+ );
264
+ // The raw token should not appear
265
+ expect(result.content).not.toContain('!`echo "production"`');
266
+ // Parent body should still be present
267
+ expect(result.content).toContain("Parent body content.");
268
+ });
269
+
270
+ test("passes conversation working directory to child inline command runner", async () => {
271
+ writeSkill(
272
+ "child-cwd",
273
+ "Child CWD",
274
+ "Check cwd forwarding",
275
+ "Info: !`pwd`",
276
+ );
277
+ writeSkill(
278
+ "parent-cwd",
279
+ "Parent CWD",
280
+ "Parent for cwd test",
281
+ "Parent body.",
282
+ { includes: ["child-cwd"] },
283
+ );
284
+
285
+ const workingDir = "/my/project/root";
286
+ await executeSkillLoad({ skill: "parent-cwd" }, workingDir);
287
+ expect(runInlineCommandCalls.length).toBeGreaterThanOrEqual(1);
288
+ const pwdCall = runInlineCommandCalls.find((c) => c.command === "pwd");
289
+ expect(pwdCall).toBeDefined();
290
+ expect(pwdCall!.workingDir).toBe(workingDir);
291
+ });
292
+ });
293
+
294
+ // ── Multiple children: mixed inline and static ───────────────────────
295
+
296
+ describe("multiple children with mixed bodies", () => {
297
+ test("renders inline commands in dynamic children while leaving static children unchanged", async () => {
298
+ writeSkill(
299
+ "child-static",
300
+ "Static Child",
301
+ "A static child",
302
+ "Just plain static content.",
303
+ );
304
+ writeSkill(
305
+ "child-dynamic-a",
306
+ "Dynamic Child A",
307
+ "Dynamic child A",
308
+ "Version: !`echo v1`",
309
+ );
310
+ writeSkill(
311
+ "child-dynamic-b",
312
+ "Dynamic Child B",
313
+ "Dynamic child B",
314
+ "Host: !`hostname`",
315
+ );
316
+ writeSkill(
317
+ "parent-mixed",
318
+ "Parent Mixed",
319
+ "Parent with mixed children",
320
+ "Root body content.",
321
+ { includes: ["child-static", "child-dynamic-a", "child-dynamic-b"] },
322
+ );
323
+
324
+ const result = await executeSkillLoad({ skill: "parent-mixed" });
325
+ expect(result.isError).toBe(false);
326
+
327
+ // Static child should appear unchanged
328
+ expect(result.content).toContain("Just plain static content.");
329
+
330
+ // Dynamic child A should have its token expanded
331
+ expect(result.content).toContain(
332
+ '<inline_skill_command index="0">result of: echo v1</inline_skill_command>',
333
+ );
334
+ expect(result.content).not.toContain("!`echo v1`");
335
+
336
+ // Dynamic child B should have its token expanded
337
+ expect(result.content).toContain(
338
+ '<inline_skill_command index="0">result of: hostname</inline_skill_command>',
339
+ );
340
+ expect(result.content).not.toContain("!`hostname`");
341
+ });
342
+
343
+ test("preserves include ordering in output", async () => {
344
+ writeSkill("child-first", "First Child", "First child", "First body.");
345
+ writeSkill(
346
+ "child-second",
347
+ "Second Child",
348
+ "Second child",
349
+ "Data: !`echo second`",
350
+ );
351
+ writeSkill("child-third", "Third Child", "Third child", "Third body.");
352
+ writeSkill(
353
+ "parent-ordered",
354
+ "Parent Ordered",
355
+ "Parent with ordered includes",
356
+ "Root.",
357
+ { includes: ["child-first", "child-second", "child-third"] },
358
+ );
359
+
360
+ const result = await executeSkillLoad({ skill: "parent-ordered" });
361
+ expect(result.isError).toBe(false);
362
+
363
+ // Verify ordering: first appears before second, second before third
364
+ const firstIdx = result.content.indexOf(
365
+ "--- Included Skill: First Child",
366
+ );
367
+ const secondIdx = result.content.indexOf(
368
+ "--- Included Skill: Second Child",
369
+ );
370
+ const thirdIdx = result.content.indexOf(
371
+ "--- Included Skill: Third Child",
372
+ );
373
+ expect(firstIdx).toBeGreaterThan(-1);
374
+ expect(secondIdx).toBeGreaterThan(-1);
375
+ expect(thirdIdx).toBeGreaterThan(-1);
376
+ expect(firstIdx).toBeLessThan(secondIdx);
377
+ expect(secondIdx).toBeLessThan(thirdIdx);
378
+ });
379
+ });
380
+
381
+ // ── Child render failures are isolated ───────────────────────────────
382
+
383
+ describe("child render failure isolation", () => {
384
+ test("a failing child command renders a stub without corrupting siblings", async () => {
385
+ mockRunInlineCommand = mock<MockRunFn>(
386
+ (command: string, workingDir: string) => {
387
+ runInlineCommandCalls.push({ command, workingDir });
388
+ // The "bad-cmd" command fails; others succeed
389
+ if (command === "bad-cmd") {
390
+ return Promise.resolve({
391
+ output: "Inline command failed (exit code 1).",
392
+ ok: false,
393
+ failureReason: "non_zero_exit",
394
+ });
395
+ }
396
+ return Promise.resolve({
397
+ output: `result of: ${command}`,
398
+ ok: true,
399
+ });
400
+ },
401
+ );
402
+ mock.module("../skills/inline-command-runner.js", () => ({
403
+ runInlineCommand: (
404
+ command: string,
405
+ workingDir: string,
406
+ _options?: unknown,
407
+ ) => mockRunInlineCommand(command, workingDir),
408
+ }));
409
+
410
+ writeSkill(
411
+ "child-ok",
412
+ "OK Child",
413
+ "Successful child",
414
+ "Info: !`echo success`",
415
+ );
416
+ writeSkill(
417
+ "child-fail",
418
+ "Failing Child",
419
+ "Failing child",
420
+ "Data: !`bad-cmd`",
421
+ );
422
+ writeSkill(
423
+ "child-ok-too",
424
+ "Also OK Child",
425
+ "Another successful child",
426
+ "More: !`echo also-ok`",
427
+ );
428
+ writeSkill(
429
+ "parent-isolated",
430
+ "Parent Isolated",
431
+ "Tests failure isolation",
432
+ "Root content.",
433
+ { includes: ["child-ok", "child-fail", "child-ok-too"] },
434
+ );
435
+
436
+ const result = await executeSkillLoad({ skill: "parent-isolated" });
437
+ expect(result.isError).toBe(false);
438
+
439
+ // OK child's command should be expanded successfully
440
+ expect(result.content).toContain(
441
+ '<inline_skill_command index="0">result of: echo success</inline_skill_command>',
442
+ );
443
+
444
+ // Failing child's command should show a failure stub
445
+ expect(result.content).toContain(
446
+ '<inline_skill_command index="0">[inline command unavailable: command failed]</inline_skill_command>',
447
+ );
448
+
449
+ // Also-OK child's command should be expanded successfully
450
+ expect(result.content).toContain(
451
+ '<inline_skill_command index="0">result of: echo also-ok</inline_skill_command>',
452
+ );
453
+
454
+ // Root content should be intact
455
+ expect(result.content).toContain("Root content.");
456
+ });
457
+
458
+ test("a child with mixed success/failure renders both correctly", async () => {
459
+ mockRunInlineCommand = mock<MockRunFn>(
460
+ (command: string, workingDir: string) => {
461
+ runInlineCommandCalls.push({ command, workingDir });
462
+ // Fail the second command within this child
463
+ if (command === "fail-me") {
464
+ return Promise.resolve({
465
+ output: "timed out",
466
+ ok: false,
467
+ failureReason: "timeout",
468
+ });
469
+ }
470
+ return Promise.resolve({
471
+ output: `result of: ${command}`,
472
+ ok: true,
473
+ });
474
+ },
475
+ );
476
+ mock.module("../skills/inline-command-runner.js", () => ({
477
+ runInlineCommand: (
478
+ command: string,
479
+ workingDir: string,
480
+ _options?: unknown,
481
+ ) => mockRunInlineCommand(command, workingDir),
482
+ }));
483
+
484
+ writeSkill(
485
+ "child-mixed-cmds",
486
+ "Mixed Commands Child",
487
+ "Child with mixed results",
488
+ "A: !`echo ok` B: !`fail-me` C: !`echo fine`",
489
+ );
490
+ writeSkill(
491
+ "parent-mixed-child",
492
+ "Parent Mixed Child",
493
+ "Parent with mixed-result child",
494
+ "Root.",
495
+ { includes: ["child-mixed-cmds"] },
496
+ );
497
+
498
+ const result = await executeSkillLoad({ skill: "parent-mixed-child" });
499
+ expect(result.isError).toBe(false);
500
+
501
+ // First and third succeed
502
+ expect(result.content).toContain(
503
+ '<inline_skill_command index="0">result of: echo ok</inline_skill_command>',
504
+ );
505
+ expect(result.content).toContain(
506
+ '<inline_skill_command index="2">result of: echo fine</inline_skill_command>',
507
+ );
508
+ // Second fails with timeout stub
509
+ expect(result.content).toContain(
510
+ '<inline_skill_command index="1">[inline command unavailable: command timed out]</inline_skill_command>',
511
+ );
512
+ });
513
+
514
+ test("render exception in one child does not prevent sibling rendering", async () => {
515
+ // Simulate a child whose renderInlineCommands call throws an exception
516
+ mockRunInlineCommand = mock<MockRunFn>(
517
+ (command: string, workingDir: string) => {
518
+ runInlineCommandCalls.push({ command, workingDir });
519
+ if (command === "crash-cmd") {
520
+ // Simulate a throw inside the runner
521
+ throw new Error("Simulated runner crash");
522
+ }
523
+ return Promise.resolve({
524
+ output: `result of: ${command}`,
525
+ ok: true,
526
+ });
527
+ },
528
+ );
529
+ mock.module("../skills/inline-command-runner.js", () => ({
530
+ runInlineCommand: (
531
+ command: string,
532
+ workingDir: string,
533
+ _options?: unknown,
534
+ ) => mockRunInlineCommand(command, workingDir),
535
+ }));
536
+
537
+ writeSkill(
538
+ "child-crash",
539
+ "Crashing Child",
540
+ "Child that crashes",
541
+ "Data: !`crash-cmd`",
542
+ );
543
+ writeSkill(
544
+ "child-healthy",
545
+ "Healthy Child",
546
+ "Healthy child",
547
+ "Info: !`echo healthy`",
548
+ );
549
+ writeSkill(
550
+ "parent-crash-test",
551
+ "Parent Crash Test",
552
+ "Tests exception isolation",
553
+ "Root body.",
554
+ { includes: ["child-crash", "child-healthy"] },
555
+ );
556
+
557
+ const result = await executeSkillLoad({ skill: "parent-crash-test" });
558
+ expect(result.isError).toBe(false);
559
+
560
+ // The crashing child should fall back to raw body (the try/catch in
561
+ // load.ts catches the exception and leaves the body unmodified)
562
+ expect(result.content).toContain("--- Included Skill: Crashing Child");
563
+
564
+ // The healthy child should still have its inline command expanded
565
+ expect(result.content).toContain(
566
+ '<inline_skill_command index="0">result of: echo healthy</inline_skill_command>',
567
+ );
568
+
569
+ // Root body intact
570
+ expect(result.content).toContain("Root body.");
571
+ });
572
+ });
573
+
574
+ // ── Feature flag off for child inline commands ────────────────────────
575
+
576
+ describe("feature flag disabled for included skills", () => {
577
+ test("skill_load returns error when child has inline commands and flag is off", async () => {
578
+ testConfig.assistantFeatureFlagValues = {
579
+ "feature_flags.inline-skill-commands.enabled": false,
580
+ };
581
+
582
+ writeSkill(
583
+ "child-flag-off",
584
+ "Flag Off Child",
585
+ "Child with inline cmds",
586
+ "Data: !`echo hello`",
587
+ );
588
+ writeSkill(
589
+ "parent-flag-off",
590
+ "Parent Flag Off",
591
+ "Parent for flag-off test",
592
+ "Root content.",
593
+ { includes: ["child-flag-off"] },
594
+ );
595
+
596
+ const result = await executeSkillLoad({ skill: "parent-flag-off" });
597
+ // Fail closed: the entire skill_load must error when any included child
598
+ // has inline commands and the feature flag is off, matching the root
599
+ // skill behavior and the documented fail-closed contract.
600
+ expect(result.isError).toBe(true);
601
+ expect(result.content).toContain("child-flag-off");
602
+ expect(result.content).toContain(
603
+ "inline-skill-commands feature flag is disabled",
604
+ );
605
+ // Runner should not be called
606
+ expect(runInlineCommandCalls).toHaveLength(0);
607
+ });
608
+ });
609
+
610
+ // ── Root with inline + child with inline ──────────────────────────────
611
+
612
+ describe("root and child both have inline commands", () => {
613
+ test("both root and child inline commands are expanded", async () => {
614
+ writeSkill(
615
+ "child-both",
616
+ "Child Both",
617
+ "Child with inline",
618
+ "Child data: !`echo child-output`",
619
+ );
620
+ writeSkill(
621
+ "parent-both",
622
+ "Parent Both",
623
+ "Parent with inline",
624
+ "Root data: !`echo root-output`",
625
+ { includes: ["child-both"] },
626
+ );
627
+
628
+ const result = await executeSkillLoad({ skill: "parent-both" });
629
+ expect(result.isError).toBe(false);
630
+
631
+ // Root inline command expanded
632
+ expect(result.content).toContain(
633
+ '<inline_skill_command index="0">result of: echo root-output</inline_skill_command>',
634
+ );
635
+ // Child inline command expanded
636
+ expect(result.content).toContain(
637
+ '<inline_skill_command index="0">result of: echo child-output</inline_skill_command>',
638
+ );
639
+ // No raw tokens
640
+ expect(result.content).not.toContain("!`echo root-output`");
641
+ expect(result.content).not.toContain("!`echo child-output`");
642
+ });
643
+ });
644
+ });