@vellumai/assistant 0.3.19 → 0.3.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. package/ARCHITECTURE.md +151 -15
  2. package/Dockerfile +1 -0
  3. package/README.md +40 -4
  4. package/bun.lock +139 -2
  5. package/docs/architecture/integrations.md +7 -11
  6. package/package.json +2 -1
  7. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +54 -0
  8. package/src/__tests__/approval-primitive.test.ts +540 -0
  9. package/src/__tests__/assistant-feature-flag-guard.test.ts +206 -0
  10. package/src/__tests__/assistant-feature-flag-guardrails.test.ts +198 -0
  11. package/src/__tests__/assistant-feature-flags-integration.test.ts +272 -0
  12. package/src/__tests__/call-controller.test.ts +439 -108
  13. package/src/__tests__/channel-invite-transport.test.ts +264 -0
  14. package/src/__tests__/cli.test.ts +42 -1
  15. package/src/__tests__/config-schema.test.ts +11 -127
  16. package/src/__tests__/config-watcher.test.ts +0 -8
  17. package/src/__tests__/daemon-lifecycle.test.ts +1 -0
  18. package/src/__tests__/daemon-server-session-init.test.ts +8 -2
  19. package/src/__tests__/diff.test.ts +22 -0
  20. package/src/__tests__/guardian-action-copy-generator.test.ts +5 -0
  21. package/src/__tests__/guardian-action-grant-mint-consume.test.ts +300 -32
  22. package/src/__tests__/guardian-action-late-reply.test.ts +546 -1
  23. package/src/__tests__/guardian-actions-endpoint.test.ts +774 -0
  24. package/src/__tests__/guardian-control-plane-policy.test.ts +36 -3
  25. package/src/__tests__/guardian-dispatch.test.ts +124 -0
  26. package/src/__tests__/guardian-grant-minting.test.ts +6 -17
  27. package/src/__tests__/inbound-invite-redemption.test.ts +367 -0
  28. package/src/__tests__/invite-redemption-service.test.ts +306 -0
  29. package/src/__tests__/ipc-snapshot.test.ts +57 -0
  30. package/src/__tests__/notification-decision-fallback.test.ts +88 -0
  31. package/src/__tests__/sandbox-diagnostics.test.ts +6 -249
  32. package/src/__tests__/sandbox-host-parity.test.ts +6 -13
  33. package/src/__tests__/scoped-approval-grants.test.ts +6 -6
  34. package/src/__tests__/scoped-grant-security-matrix.test.ts +5 -4
  35. package/src/__tests__/script-proxy-session-manager.test.ts +1 -19
  36. package/src/__tests__/session-load-history-repair.test.ts +169 -2
  37. package/src/__tests__/session-runtime-assembly.test.ts +33 -5
  38. package/src/__tests__/skill-feature-flags-integration.test.ts +171 -0
  39. package/src/__tests__/skill-feature-flags.test.ts +188 -0
  40. package/src/__tests__/skill-load-feature-flag.test.ts +141 -0
  41. package/src/__tests__/skill-mirror-parity.test.ts +1 -0
  42. package/src/__tests__/skill-projection-feature-flag.test.ts +363 -0
  43. package/src/__tests__/system-prompt.test.ts +1 -1
  44. package/src/__tests__/terminal-sandbox.test.ts +142 -9
  45. package/src/__tests__/terminal-tools.test.ts +2 -93
  46. package/src/__tests__/thread-seed-composer.test.ts +18 -0
  47. package/src/__tests__/tool-approval-handler.test.ts +350 -0
  48. package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +8 -10
  49. package/src/__tests__/voice-scoped-grant-consumer.test.ts +46 -84
  50. package/src/agent/loop.ts +36 -1
  51. package/src/approvals/approval-primitive.ts +381 -0
  52. package/src/approvals/guardian-decision-primitive.ts +191 -0
  53. package/src/calls/call-controller.ts +252 -209
  54. package/src/calls/call-domain.ts +44 -6
  55. package/src/calls/guardian-dispatch.ts +48 -0
  56. package/src/calls/types.ts +1 -1
  57. package/src/calls/voice-session-bridge.ts +46 -30
  58. package/src/cli/core-commands.ts +0 -4
  59. package/src/cli/mcp.ts +58 -0
  60. package/src/cli.ts +76 -34
  61. package/src/config/__tests__/feature-flag-registry-guard.test.ts +179 -0
  62. package/src/config/assistant-feature-flags.ts +162 -0
  63. package/src/config/bundled-skills/api-mapping/icon.svg +18 -0
  64. package/src/config/bundled-skills/messaging/TOOLS.json +30 -0
  65. package/src/config/bundled-skills/messaging/tools/slack-delete-message.ts +24 -0
  66. package/src/config/bundled-skills/notifications/SKILL.md +1 -1
  67. package/src/config/bundled-skills/reminder/SKILL.md +49 -2
  68. package/src/config/bundled-skills/time-based-actions/SKILL.md +49 -2
  69. package/src/config/bundled-skills/voice-setup/SKILL.md +122 -0
  70. package/src/config/core-schema.ts +1 -1
  71. package/src/config/env-registry.ts +10 -0
  72. package/src/config/feature-flag-registry.json +61 -0
  73. package/src/config/loader.ts +22 -1
  74. package/src/config/mcp-schema.ts +46 -0
  75. package/src/config/sandbox-schema.ts +0 -39
  76. package/src/config/schema.ts +18 -2
  77. package/src/config/skill-state.ts +34 -0
  78. package/src/config/skills-schema.ts +0 -1
  79. package/src/config/skills.ts +9 -0
  80. package/src/config/system-prompt.ts +110 -46
  81. package/src/config/templates/SOUL.md +1 -1
  82. package/src/config/types.ts +19 -1
  83. package/src/config/vellum-skills/catalog.json +1 -1
  84. package/src/config/vellum-skills/guardian-verify-setup/SKILL.md +1 -0
  85. package/src/config/vellum-skills/sms-setup/SKILL.md +1 -1
  86. package/src/config/vellum-skills/telegram-setup/SKILL.md +6 -5
  87. package/src/config/vellum-skills/trusted-contacts/SKILL.md +105 -3
  88. package/src/config/vellum-skills/twilio-setup/SKILL.md +1 -1
  89. package/src/daemon/config-watcher.ts +0 -1
  90. package/src/daemon/daemon-control.ts +1 -1
  91. package/src/daemon/guardian-invite-intent.ts +124 -0
  92. package/src/daemon/handlers/avatar.ts +68 -0
  93. package/src/daemon/handlers/browser.ts +2 -2
  94. package/src/daemon/handlers/guardian-actions.ts +120 -0
  95. package/src/daemon/handlers/index.ts +4 -0
  96. package/src/daemon/handlers/sessions.ts +19 -0
  97. package/src/daemon/handlers/shared.ts +3 -1
  98. package/src/daemon/install-cli-launchers.ts +58 -13
  99. package/src/daemon/ipc-contract/guardian-actions.ts +53 -0
  100. package/src/daemon/ipc-contract/sessions.ts +8 -2
  101. package/src/daemon/ipc-contract/settings.ts +25 -2
  102. package/src/daemon/ipc-contract-inventory.json +10 -0
  103. package/src/daemon/ipc-contract.ts +4 -0
  104. package/src/daemon/lifecycle.ts +14 -2
  105. package/src/daemon/main.ts +1 -0
  106. package/src/daemon/providers-setup.ts +26 -1
  107. package/src/daemon/server.ts +1 -0
  108. package/src/daemon/session-lifecycle.ts +52 -7
  109. package/src/daemon/session-memory.ts +45 -0
  110. package/src/daemon/session-process.ts +258 -432
  111. package/src/daemon/session-runtime-assembly.ts +12 -0
  112. package/src/daemon/session-skill-tools.ts +14 -1
  113. package/src/daemon/session-tool-setup.ts +5 -0
  114. package/src/daemon/session.ts +11 -0
  115. package/src/daemon/shutdown-handlers.ts +11 -0
  116. package/src/daemon/tool-side-effects.ts +35 -9
  117. package/src/index.ts +2 -2
  118. package/src/mcp/client.ts +152 -0
  119. package/src/mcp/manager.ts +139 -0
  120. package/src/memory/conversation-display-order-migration.ts +44 -0
  121. package/src/memory/conversation-queries.ts +2 -0
  122. package/src/memory/conversation-store.ts +91 -0
  123. package/src/memory/db-init.ts +5 -1
  124. package/src/memory/embedding-local.ts +13 -8
  125. package/src/memory/guardian-action-store.ts +125 -2
  126. package/src/memory/ingress-invite-store.ts +95 -1
  127. package/src/memory/migrations/035-guardian-action-supersession.ts +23 -0
  128. package/src/memory/migrations/index.ts +2 -1
  129. package/src/memory/schema.ts +5 -1
  130. package/src/memory/scoped-approval-grants.ts +14 -5
  131. package/src/messaging/providers/slack/client.ts +12 -0
  132. package/src/messaging/providers/slack/types.ts +5 -0
  133. package/src/notifications/decision-engine.ts +49 -12
  134. package/src/notifications/emit-signal.ts +7 -0
  135. package/src/notifications/signal.ts +7 -0
  136. package/src/notifications/thread-seed-composer.ts +2 -1
  137. package/src/runtime/channel-approval-types.ts +16 -6
  138. package/src/runtime/channel-approvals.ts +19 -15
  139. package/src/runtime/channel-invite-transport.ts +85 -0
  140. package/src/runtime/channel-invite-transports/telegram.ts +105 -0
  141. package/src/runtime/guardian-action-grant-minter.ts +92 -35
  142. package/src/runtime/guardian-action-message-composer.ts +30 -0
  143. package/src/runtime/guardian-decision-types.ts +91 -0
  144. package/src/runtime/http-server.ts +23 -1
  145. package/src/runtime/ingress-service.ts +22 -0
  146. package/src/runtime/invite-redemption-service.ts +181 -0
  147. package/src/runtime/invite-redemption-templates.ts +39 -0
  148. package/src/runtime/routes/call-routes.ts +2 -1
  149. package/src/runtime/routes/guardian-action-routes.ts +206 -0
  150. package/src/runtime/routes/guardian-approval-interception.ts +66 -190
  151. package/src/runtime/routes/identity-routes.ts +73 -0
  152. package/src/runtime/routes/inbound-message-handler.ts +486 -394
  153. package/src/runtime/routes/pairing-routes.ts +4 -0
  154. package/src/security/encrypted-store.ts +31 -17
  155. package/src/security/keychain.ts +176 -2
  156. package/src/security/secure-keys.ts +97 -0
  157. package/src/security/tool-approval-digest.ts +1 -1
  158. package/src/tools/browser/browser-execution.ts +2 -2
  159. package/src/tools/browser/browser-manager.ts +46 -32
  160. package/src/tools/browser/browser-screencast.ts +2 -2
  161. package/src/tools/calls/call-start.ts +1 -1
  162. package/src/tools/executor.ts +22 -17
  163. package/src/tools/mcp/mcp-tool-factory.ts +100 -0
  164. package/src/tools/network/script-proxy/session-manager.ts +1 -5
  165. package/src/tools/registry.ts +64 -1
  166. package/src/tools/skills/load.ts +22 -8
  167. package/src/tools/system/avatar-generator.ts +119 -0
  168. package/src/tools/system/navigate-settings.ts +65 -0
  169. package/src/tools/system/open-system-settings.ts +75 -0
  170. package/src/tools/system/voice-config.ts +121 -32
  171. package/src/tools/terminal/backends/native.ts +40 -19
  172. package/src/tools/terminal/backends/types.ts +3 -3
  173. package/src/tools/terminal/parser.ts +1 -1
  174. package/src/tools/terminal/sandbox-diagnostics.ts +6 -87
  175. package/src/tools/terminal/sandbox.ts +1 -12
  176. package/src/tools/terminal/shell.ts +3 -31
  177. package/src/tools/tool-approval-handler.ts +141 -3
  178. package/src/tools/tool-manifest.ts +6 -0
  179. package/src/tools/types.ts +10 -2
  180. package/src/util/diff.ts +36 -13
  181. package/Dockerfile.sandbox +0 -5
  182. package/src/__tests__/doordash-client.test.ts +0 -187
  183. package/src/__tests__/doordash-session.test.ts +0 -154
  184. package/src/__tests__/signup-e2e.test.ts +0 -354
  185. package/src/__tests__/terminal-sandbox-docker.test.ts +0 -1065
  186. package/src/__tests__/terminal-sandbox.integration.test.ts +0 -180
  187. package/src/cli/doordash.ts +0 -1057
  188. package/src/config/bundled-skills/doordash/SKILL.md +0 -163
  189. package/src/config/templates/LOOKS.md +0 -25
  190. package/src/doordash/cart-queries.ts +0 -787
  191. package/src/doordash/client.ts +0 -1016
  192. package/src/doordash/order-queries.ts +0 -85
  193. package/src/doordash/queries.ts +0 -13
  194. package/src/doordash/query-extractor.ts +0 -94
  195. package/src/doordash/search-queries.ts +0 -203
  196. package/src/doordash/session.ts +0 -84
  197. package/src/doordash/store-queries.ts +0 -246
  198. package/src/doordash/types.ts +0 -367
  199. package/src/tools/terminal/backends/docker.ts +0 -379
@@ -335,11 +335,6 @@ describe('buildChannelAwarenessSection', () => {
335
335
  expect(section).toContain('computer-control permissions on non-dashboard');
336
336
  });
337
337
 
338
- test('includes guardian context contract for channel actors', () => {
339
- const section = buildChannelAwarenessSection();
340
- expect(section).toContain('<guardian_context>');
341
- expect(section).toContain('Never infer guardian status');
342
- });
343
338
  });
344
339
 
345
340
  // ---------------------------------------------------------------------------
@@ -569,6 +564,39 @@ describe('injectGuardianContext', () => {
569
564
  expect(text).toContain('source_channel: sms');
570
565
  expect(text).toContain('</guardian_context>');
571
566
  });
567
+
568
+ test('includes behavioral guidance for non-guardian actors', () => {
569
+ const ctx: GuardianRuntimeContext = {
570
+ sourceChannel: 'telegram',
571
+ actorRole: 'non-guardian',
572
+ guardianExternalUserId: 'guardian-user-1',
573
+ guardianChatId: 'chat-1',
574
+ requesterIdentifier: '@someone',
575
+ requesterExternalUserId: 'other-user-1',
576
+ requesterChatId: 'chat-2',
577
+ };
578
+
579
+ const result = injectGuardianContext(baseUserMessage, ctx);
580
+ const text = (result.content[0] as { type: 'text'; text: string }).text;
581
+ expect(text).toContain('non-guardian account');
582
+ expect(text).toContain('Do not explain the verification system');
583
+ });
584
+
585
+ test('omits non-guardian behavioral guidance for guardian actors', () => {
586
+ const ctx: GuardianRuntimeContext = {
587
+ sourceChannel: 'telegram',
588
+ actorRole: 'guardian',
589
+ guardianExternalUserId: 'guardian-user-1',
590
+ guardianChatId: 'chat-1',
591
+ requesterIdentifier: '@guardian',
592
+ requesterExternalUserId: 'guardian-user-1',
593
+ requesterChatId: 'chat-1',
594
+ };
595
+
596
+ const result = injectGuardianContext(baseUserMessage, ctx);
597
+ const text = (result.content[0] as { type: 'text'; text: string }).text;
598
+ expect(text).not.toContain('non-guardian account');
599
+ });
572
600
  });
573
601
 
574
602
  describe('stripGuardianContext', () => {
@@ -0,0 +1,171 @@
1
+ /**
2
+ * Integration tests for skill feature flag enforcement at system prompt,
3
+ * skill_load, and session-skill-tools projection layers.
4
+ */
5
+ import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
6
+ import { tmpdir } from 'node:os';
7
+ import { join } from 'node:path';
8
+
9
+ import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test';
10
+
11
+ // ---------------------------------------------------------------------------
12
+ // Test-scoped temp directory and config state
13
+ // ---------------------------------------------------------------------------
14
+
15
+ const TEST_DIR = join(tmpdir(), `vellum-skill-flags-test-${crypto.randomUUID()}`);
16
+
17
+ let currentConfig: Record<string, unknown> = {
18
+ sandbox: { enabled: false, backend: 'native' },
19
+ featureFlags: {},
20
+ };
21
+
22
+ const DECLARED_SKILL_ID = 'hatch-new-assistant';
23
+ const DECLARED_LEGACY_KEY = 'skills.hatch-new-assistant.enabled';
24
+
25
+ mock.module('../util/platform.js', () => ({
26
+ getRootDir: () => TEST_DIR,
27
+ getDataDir: () => TEST_DIR,
28
+ getWorkspaceDir: () => TEST_DIR,
29
+ getWorkspaceConfigPath: () => join(TEST_DIR, 'config.json'),
30
+ getWorkspaceSkillsDir: () => join(TEST_DIR, 'skills'),
31
+ getWorkspaceHooksDir: () => join(TEST_DIR, 'hooks'),
32
+ getWorkspacePromptPath: (file: string) => join(TEST_DIR, file),
33
+ ensureDataDir: () => {},
34
+ getSocketPath: () => join(TEST_DIR, 'vellum.sock'),
35
+ getPidPath: () => join(TEST_DIR, 'vellum.pid'),
36
+ getDbPath: () => join(TEST_DIR, 'data', 'assistant.db'),
37
+ getLogPath: () => join(TEST_DIR, 'logs', 'vellum.log'),
38
+ getHistoryPath: () => join(TEST_DIR, 'history'),
39
+ getHooksDir: () => join(TEST_DIR, 'hooks'),
40
+ getIpcBlobDir: () => join(TEST_DIR, 'ipc-blobs'),
41
+ getSandboxRootDir: () => join(TEST_DIR, 'sandbox'),
42
+ getSandboxWorkingDir: () => TEST_DIR,
43
+ getInterfacesDir: () => join(TEST_DIR, 'interfaces'),
44
+ isMacOS: () => false,
45
+ isLinux: () => false,
46
+ isWindows: () => false,
47
+ getPlatformName: () => 'linux',
48
+ getClipboardCommand: () => null,
49
+ removeSocketFile: () => {},
50
+ migratePath: () => {},
51
+ migrateToWorkspaceLayout: () => {},
52
+ migrateToDataLayout: () => {},
53
+ }));
54
+
55
+ mock.module('../util/logger.js', () => ({
56
+ getLogger: () => new Proxy({} as Record<string, unknown>, {
57
+ get: () => () => {},
58
+ }),
59
+ isDebug: () => false,
60
+ truncateForLog: (v: string) => v,
61
+ }));
62
+
63
+ mock.module('../config/loader.js', () => ({
64
+ getConfig: () => currentConfig,
65
+ }));
66
+
67
+ mock.module('../config/user-reference.js', () => ({
68
+ resolveUserReference: () => 'TestUser',
69
+ }));
70
+
71
+ mock.module('../security/parental-control-store.js', () => ({
72
+ getParentalControlSettings: () => ({ enabled: false, contentRestrictions: [], blockedToolCategories: [] }),
73
+ }));
74
+
75
+ mock.module('../tools/credentials/metadata-store.js', () => ({
76
+ listCredentialMetadata: () => [],
77
+ }));
78
+
79
+ const { buildSystemPrompt } = await import('../config/system-prompt.js');
80
+
81
+ // ---------------------------------------------------------------------------
82
+ // Setup / Teardown
83
+ // ---------------------------------------------------------------------------
84
+
85
+ beforeEach(() => {
86
+ mkdirSync(TEST_DIR, { recursive: true });
87
+ // Reset config to defaults before each test
88
+ currentConfig = {
89
+ sandbox: { enabled: false, backend: 'native' },
90
+ featureFlags: {},
91
+ };
92
+ });
93
+
94
+ afterEach(() => {
95
+ if (existsSync(TEST_DIR)) {
96
+ rmSync(TEST_DIR, { recursive: true, force: true });
97
+ }
98
+ });
99
+
100
+ // ---------------------------------------------------------------------------
101
+ // Helpers
102
+ // ---------------------------------------------------------------------------
103
+
104
+ function createSkillOnDisk(id: string, name: string, description: string): void {
105
+ const skillsDir = join(TEST_DIR, 'skills');
106
+ mkdirSync(join(skillsDir, id), { recursive: true });
107
+ writeFileSync(
108
+ join(skillsDir, id, 'SKILL.md'),
109
+ `---\nname: "${name}"\ndescription: "${description}"\n---\n\nInstructions for ${id}.\n`,
110
+ );
111
+ // Ensure SKILLS.md index references the skill
112
+ const indexPath = join(skillsDir, 'SKILLS.md');
113
+ const existing = existsSync(indexPath) ? readFileSync(indexPath, 'utf-8') : '';
114
+ writeFileSync(indexPath, existing + `- ${id}\n`);
115
+ }
116
+
117
+ // ---------------------------------------------------------------------------
118
+ // System prompt — feature flag filtering
119
+ // ---------------------------------------------------------------------------
120
+
121
+ describe('buildSystemPrompt feature flag filtering', () => {
122
+ test('flag OFF skill does not appear in <available_skills> section', () => {
123
+ createSkillOnDisk(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior');
124
+ createSkillOnDisk('twitter', 'Twitter', 'Post to X/Twitter');
125
+
126
+ currentConfig = {
127
+ sandbox: { enabled: false, backend: 'native' },
128
+ featureFlags: { [DECLARED_LEGACY_KEY]: false },
129
+ };
130
+
131
+ const result = buildSystemPrompt();
132
+
133
+ // twitter should be visible, declared flagged skill should not
134
+ expect(result).toContain('id="twitter"');
135
+ expect(result).not.toContain(`id="${DECLARED_SKILL_ID}"`);
136
+ });
137
+
138
+ test('all skills visible when featureFlags is empty', () => {
139
+ createSkillOnDisk(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior');
140
+ createSkillOnDisk('twitter', 'Twitter', 'Post to X/Twitter');
141
+
142
+ currentConfig = {
143
+ sandbox: { enabled: false, backend: 'native' },
144
+ featureFlags: {},
145
+ };
146
+
147
+ const result = buildSystemPrompt();
148
+
149
+ expect(result).toContain(`id="${DECLARED_SKILL_ID}"`);
150
+ expect(result).toContain('id="twitter"');
151
+ });
152
+
153
+ test('flagged-off skills hidden even when all workspace skill flags are OFF', () => {
154
+ createSkillOnDisk(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior');
155
+ createSkillOnDisk('twitter', 'Twitter', 'Post to X/Twitter');
156
+
157
+ currentConfig = {
158
+ sandbox: { enabled: false, backend: 'native' },
159
+ featureFlags: {
160
+ [DECLARED_LEGACY_KEY]: false,
161
+ 'skills.twitter.enabled': false,
162
+ },
163
+ };
164
+
165
+ const result = buildSystemPrompt();
166
+
167
+ // Both are hidden: declared skill via registry, undeclared via persisted override.
168
+ expect(result).not.toContain(`id="${DECLARED_SKILL_ID}"`);
169
+ expect(result).not.toContain('id="twitter"');
170
+ });
171
+ });
@@ -0,0 +1,188 @@
1
+ import { describe, expect, test } from 'bun:test';
2
+
3
+ import { isAssistantFeatureFlagEnabled } from '../config/assistant-feature-flags.js';
4
+ import type { AssistantConfig } from '../config/schema.js';
5
+ import { isSkillFeatureEnabled, resolveSkillStates } from '../config/skill-state.js';
6
+ import type { SkillSummary } from '../config/skills.js';
7
+
8
+ const DECLARED_FLAG_KEY = 'feature_flags.hatch-new-assistant.enabled';
9
+ const DECLARED_SKILL_ID = 'hatch-new-assistant';
10
+ // ---------------------------------------------------------------------------
11
+ // Helpers
12
+ // ---------------------------------------------------------------------------
13
+
14
+ /** Create a minimal AssistantConfig with optional feature flag values. */
15
+ function makeConfig(overrides: Partial<AssistantConfig> = {}): AssistantConfig {
16
+ return {
17
+ skills: {
18
+ entries: {},
19
+ load: { extraDirs: [], watch: true, watchDebounceMs: 250 },
20
+ install: { nodeManager: 'npm' },
21
+ allowBundled: null,
22
+ remoteProviders: { skillssh: { enabled: true }, clawhub: { enabled: true } },
23
+ remotePolicy: { blockSuspicious: true, blockMalware: true, maxSkillsShRisk: 'medium' },
24
+ },
25
+ ...overrides,
26
+ } as AssistantConfig;
27
+ }
28
+
29
+ /** Create a minimal SkillSummary for testing. */
30
+ function makeSkill(id: string, source: 'bundled' | 'managed' = 'bundled'): SkillSummary {
31
+ return {
32
+ id,
33
+ name: `${id} skill`,
34
+ description: `Description for ${id}`,
35
+ directoryPath: `/fake/skills/${id}`,
36
+ skillFilePath: `/fake/skills/${id}/SKILL.md`,
37
+ bundled: source === 'bundled',
38
+ userInvocable: true,
39
+ disableModelInvocation: false,
40
+ source,
41
+ };
42
+ }
43
+
44
+ // ---------------------------------------------------------------------------
45
+ // isSkillFeatureEnabled (legacy wrapper — backward compat)
46
+ // ---------------------------------------------------------------------------
47
+
48
+ describe('isSkillFeatureEnabled', () => {
49
+ test('returns true when no flag overrides', () => {
50
+ const config = makeConfig();
51
+ expect(isSkillFeatureEnabled(DECLARED_SKILL_ID, config)).toBe(true);
52
+ });
53
+
54
+ test('returns true when skill key is explicitly true', () => {
55
+ const config = makeConfig({
56
+ assistantFeatureFlagValues: { [DECLARED_FLAG_KEY]: true },
57
+ });
58
+ expect(isSkillFeatureEnabled(DECLARED_SKILL_ID, config)).toBe(true);
59
+ });
60
+
61
+ test('returns false when skill key is explicitly false', () => {
62
+ const config = makeConfig({
63
+ assistantFeatureFlagValues: { [DECLARED_FLAG_KEY]: false },
64
+ });
65
+ expect(isSkillFeatureEnabled(DECLARED_SKILL_ID, config)).toBe(false);
66
+ });
67
+ });
68
+
69
+ // ---------------------------------------------------------------------------
70
+ // isAssistantFeatureFlagEnabled (full canonical key)
71
+ // ---------------------------------------------------------------------------
72
+
73
+ describe('isAssistantFeatureFlagEnabled', () => {
74
+ test('returns true for unknown flags (open by default)', () => {
75
+ const config = makeConfig();
76
+ expect(isAssistantFeatureFlagEnabled('feature_flags.unknown.enabled', config)).toBe(true);
77
+ });
78
+
79
+ test('assistantFeatureFlagValues overrides registry default', () => {
80
+ const config = {
81
+ ...makeConfig(),
82
+ assistantFeatureFlagValues: { [DECLARED_FLAG_KEY]: false },
83
+ } as AssistantConfig;
84
+ expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(false);
85
+ });
86
+
87
+ test('falls back to registry default when no override', () => {
88
+ const config = makeConfig();
89
+ // hatch-new-assistant defaults to true in the registry
90
+ expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(true);
91
+ });
92
+
93
+ test('respects persisted overrides for undeclared keys', () => {
94
+ const config = makeConfig({
95
+ assistantFeatureFlagValues: { 'feature_flags.browser.enabled': false },
96
+ });
97
+ expect(isAssistantFeatureFlagEnabled('feature_flags.browser.enabled', config)).toBe(false);
98
+ });
99
+
100
+ test('undeclared keys with no persisted override default to enabled', () => {
101
+ const config = makeConfig();
102
+ expect(isAssistantFeatureFlagEnabled('feature_flags.browser.enabled', config)).toBe(true);
103
+ });
104
+ });
105
+
106
+ // ---------------------------------------------------------------------------
107
+ // resolveSkillStates — feature flag filtering
108
+ // ---------------------------------------------------------------------------
109
+
110
+ describe('resolveSkillStates with feature flags', () => {
111
+ test('flag OFF skill does not appear in resolved list', () => {
112
+ const catalog = [makeSkill(DECLARED_SKILL_ID), makeSkill('twitter')];
113
+ const config = makeConfig({
114
+ assistantFeatureFlagValues: { [DECLARED_FLAG_KEY]: false },
115
+ });
116
+
117
+ const resolved = resolveSkillStates(catalog, config);
118
+ const ids = resolved.map((r) => r.summary.id);
119
+
120
+ expect(ids).not.toContain(DECLARED_SKILL_ID);
121
+ expect(ids).toContain('twitter');
122
+ });
123
+
124
+ test('flag ON skill appears normally', () => {
125
+ const catalog = [makeSkill(DECLARED_SKILL_ID), makeSkill('twitter')];
126
+ const config = makeConfig({
127
+ assistantFeatureFlagValues: {
128
+ [DECLARED_FLAG_KEY]: true,
129
+ 'feature_flags.twitter.enabled': true,
130
+ },
131
+ });
132
+
133
+ const resolved = resolveSkillStates(catalog, config);
134
+ const ids = resolved.map((r) => r.summary.id);
135
+
136
+ expect(ids).toContain(DECLARED_SKILL_ID);
137
+ expect(ids).toContain('twitter');
138
+ });
139
+
140
+ test('missing flag key defaults to enabled', () => {
141
+ const catalog = [makeSkill(DECLARED_SKILL_ID)];
142
+ const config = makeConfig();
143
+
144
+ const resolved = resolveSkillStates(catalog, config);
145
+ expect(resolved.length).toBe(1);
146
+ expect(resolved[0].summary.id).toBe(DECLARED_SKILL_ID);
147
+ });
148
+
149
+ test('feature flag OFF takes precedence over user-enabled config entry', () => {
150
+ const catalog = [makeSkill(DECLARED_SKILL_ID)];
151
+ const config = makeConfig({
152
+ assistantFeatureFlagValues: { [DECLARED_FLAG_KEY]: false },
153
+ skills: {
154
+ entries: { [DECLARED_SKILL_ID]: { enabled: true } },
155
+ load: { extraDirs: [], watch: true, watchDebounceMs: 250 },
156
+ install: { nodeManager: 'npm' },
157
+ allowBundled: null,
158
+ remoteProviders: { skillssh: { enabled: true }, clawhub: { enabled: true } },
159
+ remotePolicy: { blockSuspicious: true, blockMalware: true, maxSkillsShRisk: 'medium' },
160
+ },
161
+ });
162
+
163
+ const resolved = resolveSkillStates(catalog, config);
164
+ // The skill should not appear at all — feature flag is a higher-priority gate
165
+ expect(resolved.length).toBe(0);
166
+ });
167
+
168
+ test('multiple skills with mixed flags — persisted overrides respected', () => {
169
+ const catalog = [
170
+ makeSkill(DECLARED_SKILL_ID),
171
+ makeSkill('twitter'),
172
+ makeSkill('deploy'),
173
+ ];
174
+ const config = makeConfig({
175
+ assistantFeatureFlagValues: {
176
+ [DECLARED_FLAG_KEY]: false,
177
+ 'feature_flags.deploy.enabled': false,
178
+ },
179
+ });
180
+
181
+ const resolved = resolveSkillStates(catalog, config);
182
+ const ids = resolved.map((r) => r.summary.id);
183
+
184
+ // Both declared (hatch-new-assistant) and undeclared (deploy) skills with
185
+ // persisted false overrides are filtered out; only twitter remains.
186
+ expect(ids).toEqual(['twitter']);
187
+ });
188
+ });
@@ -0,0 +1,141 @@
1
+ /**
2
+ * Tests that skill_load rejects loading a skill whose feature flag is OFF
3
+ * with a deterministic error message.
4
+ */
5
+ import { existsSync, mkdirSync, rmSync, writeFileSync } from 'node:fs';
6
+ import { tmpdir } from 'node:os';
7
+ import { join } from 'node:path';
8
+
9
+ import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test';
10
+
11
+ const TEST_DIR = join(tmpdir(), `vellum-skill-load-flag-test-${crypto.randomUUID()}`);
12
+
13
+ let currentConfig: Record<string, unknown> = {
14
+ featureFlags: {},
15
+ };
16
+
17
+ const DECLARED_SKILL_ID = 'hatch-new-assistant';
18
+ const DECLARED_LEGACY_KEY = 'skills.hatch-new-assistant.enabled';
19
+
20
+ const platformOverrides: Record<string, (...args: unknown[]) => unknown> = {
21
+ getRootDir: () => TEST_DIR,
22
+ getDataDir: () => TEST_DIR,
23
+ ensureDataDir: () => {},
24
+ getSocketPath: () => join(TEST_DIR, 'vellum.sock'),
25
+ getPidPath: () => join(TEST_DIR, 'vellum.pid'),
26
+ getDbPath: () => join(TEST_DIR, 'data', 'assistant.db'),
27
+ getLogPath: () => join(TEST_DIR, 'logs', 'vellum.log'),
28
+ getWorkspaceDir: () => join(TEST_DIR, 'workspace'),
29
+ getWorkspaceSkillsDir: () => join(TEST_DIR, 'skills'),
30
+ getWorkspaceConfigPath: () => join(TEST_DIR, 'workspace', 'config.json'),
31
+ getWorkspaceHooksDir: () => join(TEST_DIR, 'workspace', 'hooks'),
32
+ getWorkspacePromptPath: (f: unknown) => join(TEST_DIR, 'workspace', String(f)),
33
+ getInterfacesDir: () => join(TEST_DIR, 'interfaces'),
34
+ getHooksDir: () => join(TEST_DIR, 'hooks'),
35
+ getIpcBlobDir: () => join(TEST_DIR, 'blobs'),
36
+ getSandboxRootDir: () => join(TEST_DIR, 'sandbox'),
37
+ getSandboxWorkingDir: () => join(TEST_DIR, 'sandbox', 'work'),
38
+ getHistoryPath: () => join(TEST_DIR, 'history'),
39
+ getSessionTokenPath: () => join(TEST_DIR, 'session-token'),
40
+ readSessionToken: () => null,
41
+ getClipboardCommand: () => null,
42
+ isMacOS: () => process.platform === 'darwin',
43
+ isLinux: () => process.platform === 'linux',
44
+ isWindows: () => process.platform === 'win32',
45
+ getPlatformName: () => process.platform,
46
+ migratePath: () => {},
47
+ migrateToWorkspaceLayout: () => {},
48
+ migrateToDataLayout: () => {},
49
+ removeSocketFile: () => {},
50
+ };
51
+ mock.module('../util/platform.js', () => platformOverrides);
52
+
53
+ mock.module('../util/logger.js', () => ({
54
+ getLogger: () => new Proxy({} as Record<string, unknown>, {
55
+ get: () => () => {},
56
+ }),
57
+ }));
58
+
59
+ mock.module('../config/loader.js', () => ({
60
+ getConfig: () => currentConfig,
61
+ }));
62
+
63
+ await import('../tools/skills/load.js');
64
+ const { getTool } = await import('../tools/registry.js');
65
+
66
+ function writeSkill(skillId: string, name: string, description: string, body: string): void {
67
+ const skillDir = join(TEST_DIR, 'skills', skillId);
68
+ mkdirSync(skillDir, { recursive: true });
69
+ writeFileSync(
70
+ join(skillDir, 'SKILL.md'),
71
+ `---\nname: "${name}"\ndescription: "${description}"\n---\n\n${body}\n`,
72
+ );
73
+ }
74
+
75
+ async function executeSkillLoad(input: Record<string, unknown>): Promise<{ content: string; isError: boolean }> {
76
+ const tool = getTool('skill_load');
77
+ if (!tool) throw new Error('skill_load tool was not registered');
78
+
79
+ const result = await tool.execute(input, {
80
+ workingDir: '/tmp',
81
+ sessionId: 'session-1',
82
+ conversationId: 'conversation-1',
83
+ });
84
+ return { content: result.content, isError: result.isError };
85
+ }
86
+
87
+ describe('skill_load feature flag enforcement', () => {
88
+ beforeEach(() => {
89
+ mkdirSync(join(TEST_DIR, 'skills'), { recursive: true });
90
+ currentConfig = { featureFlags: {} };
91
+ });
92
+
93
+ afterEach(() => {
94
+ if (existsSync(TEST_DIR)) {
95
+ rmSync(TEST_DIR, { recursive: true, force: true });
96
+ }
97
+ });
98
+
99
+ test('returns deterministic error for flag OFF skill', async () => {
100
+ writeSkill(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior', 'Use the feature.');
101
+ writeFileSync(join(TEST_DIR, 'skills', 'SKILLS.md'), `- ${DECLARED_SKILL_ID}\n`);
102
+
103
+ currentConfig = {
104
+ featureFlags: { [DECLARED_LEGACY_KEY]: false },
105
+ };
106
+
107
+ const result = await executeSkillLoad({ skill: DECLARED_SKILL_ID });
108
+
109
+ expect(result.isError).toBe(true);
110
+ expect(result.content).toContain('disabled by feature flag');
111
+ expect(result.content).toContain(DECLARED_SKILL_ID);
112
+ });
113
+
114
+ test('loads skill normally when flag is ON', async () => {
115
+ writeSkill(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior', 'Use the feature.');
116
+ writeFileSync(join(TEST_DIR, 'skills', 'SKILLS.md'), `- ${DECLARED_SKILL_ID}\n`);
117
+
118
+ currentConfig = {
119
+ featureFlags: { [DECLARED_LEGACY_KEY]: true },
120
+ };
121
+
122
+ const result = await executeSkillLoad({ skill: DECLARED_SKILL_ID });
123
+
124
+ expect(result.isError).toBe(false);
125
+ expect(result.content).toContain('Skill: Hatch New Assistant');
126
+ });
127
+
128
+ test('loads skill normally when flag key is absent (defaults to enabled)', async () => {
129
+ writeSkill(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior', 'Use the feature.');
130
+ writeFileSync(join(TEST_DIR, 'skills', 'SKILLS.md'), `- ${DECLARED_SKILL_ID}\n`);
131
+
132
+ currentConfig = {
133
+ featureFlags: {},
134
+ };
135
+
136
+ const result = await executeSkillLoad({ skill: DECLARED_SKILL_ID });
137
+
138
+ expect(result.isError).toBe(false);
139
+ expect(result.content).toContain('Skill: Hatch New Assistant');
140
+ });
141
+ });
@@ -36,6 +36,7 @@ const TOPLEVEL_CATALOG = join(TOPLEVEL_SKILLS_DIR, 'catalog.json');
36
36
  // ---------------------------------------------------------------------------
37
37
 
38
38
  const TOPLEVEL_ONLY_SKILLS = new Set([
39
+ 'doordash',
39
40
  'google-oauth-setup',
40
41
  'notion',
41
42
  'notion-oauth-setup',