@vellumai/assistant 0.3.19 → 0.3.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. package/ARCHITECTURE.md +151 -15
  2. package/Dockerfile +1 -0
  3. package/README.md +40 -4
  4. package/docs/architecture/integrations.md +7 -11
  5. package/package.json +1 -1
  6. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +54 -0
  7. package/src/__tests__/approval-primitive.test.ts +540 -0
  8. package/src/__tests__/assistant-feature-flag-guard.test.ts +206 -0
  9. package/src/__tests__/assistant-feature-flag-guardrails.test.ts +198 -0
  10. package/src/__tests__/assistant-feature-flags-integration.test.ts +272 -0
  11. package/src/__tests__/call-controller.test.ts +439 -108
  12. package/src/__tests__/channel-invite-transport.test.ts +264 -0
  13. package/src/__tests__/cli.test.ts +42 -1
  14. package/src/__tests__/config-schema.test.ts +11 -127
  15. package/src/__tests__/config-watcher.test.ts +0 -8
  16. package/src/__tests__/daemon-lifecycle.test.ts +1 -0
  17. package/src/__tests__/daemon-server-session-init.test.ts +8 -2
  18. package/src/__tests__/diff.test.ts +22 -0
  19. package/src/__tests__/guardian-action-copy-generator.test.ts +5 -0
  20. package/src/__tests__/guardian-action-grant-mint-consume.test.ts +300 -32
  21. package/src/__tests__/guardian-action-late-reply.test.ts +546 -1
  22. package/src/__tests__/guardian-actions-endpoint.test.ts +774 -0
  23. package/src/__tests__/guardian-control-plane-policy.test.ts +36 -3
  24. package/src/__tests__/guardian-dispatch.test.ts +124 -0
  25. package/src/__tests__/guardian-grant-minting.test.ts +6 -17
  26. package/src/__tests__/inbound-invite-redemption.test.ts +367 -0
  27. package/src/__tests__/invite-redemption-service.test.ts +306 -0
  28. package/src/__tests__/ipc-snapshot.test.ts +57 -0
  29. package/src/__tests__/notification-decision-fallback.test.ts +88 -0
  30. package/src/__tests__/sandbox-diagnostics.test.ts +6 -249
  31. package/src/__tests__/sandbox-host-parity.test.ts +6 -13
  32. package/src/__tests__/scoped-approval-grants.test.ts +6 -6
  33. package/src/__tests__/scoped-grant-security-matrix.test.ts +5 -4
  34. package/src/__tests__/script-proxy-session-manager.test.ts +1 -19
  35. package/src/__tests__/session-load-history-repair.test.ts +169 -2
  36. package/src/__tests__/session-runtime-assembly.test.ts +33 -5
  37. package/src/__tests__/skill-feature-flags-integration.test.ts +171 -0
  38. package/src/__tests__/skill-feature-flags.test.ts +188 -0
  39. package/src/__tests__/skill-load-feature-flag.test.ts +141 -0
  40. package/src/__tests__/skill-mirror-parity.test.ts +1 -0
  41. package/src/__tests__/skill-projection-feature-flag.test.ts +363 -0
  42. package/src/__tests__/system-prompt.test.ts +1 -1
  43. package/src/__tests__/terminal-sandbox.test.ts +142 -9
  44. package/src/__tests__/terminal-tools.test.ts +2 -93
  45. package/src/__tests__/thread-seed-composer.test.ts +18 -0
  46. package/src/__tests__/tool-approval-handler.test.ts +350 -0
  47. package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +8 -10
  48. package/src/__tests__/voice-scoped-grant-consumer.test.ts +46 -84
  49. package/src/agent/loop.ts +36 -1
  50. package/src/approvals/approval-primitive.ts +381 -0
  51. package/src/approvals/guardian-decision-primitive.ts +191 -0
  52. package/src/calls/call-controller.ts +252 -209
  53. package/src/calls/call-domain.ts +44 -6
  54. package/src/calls/guardian-dispatch.ts +48 -0
  55. package/src/calls/types.ts +1 -1
  56. package/src/calls/voice-session-bridge.ts +46 -30
  57. package/src/cli/core-commands.ts +0 -4
  58. package/src/cli.ts +76 -34
  59. package/src/config/__tests__/feature-flag-registry-guard.test.ts +179 -0
  60. package/src/config/assistant-feature-flags.ts +162 -0
  61. package/src/config/bundled-skills/api-mapping/icon.svg +18 -0
  62. package/src/config/bundled-skills/messaging/TOOLS.json +30 -0
  63. package/src/config/bundled-skills/messaging/tools/slack-delete-message.ts +24 -0
  64. package/src/config/bundled-skills/notifications/SKILL.md +1 -1
  65. package/src/config/bundled-skills/reminder/SKILL.md +49 -2
  66. package/src/config/bundled-skills/time-based-actions/SKILL.md +49 -2
  67. package/src/config/bundled-skills/voice-setup/SKILL.md +122 -0
  68. package/src/config/core-schema.ts +1 -1
  69. package/src/config/env-registry.ts +10 -0
  70. package/src/config/feature-flag-registry.json +61 -0
  71. package/src/config/loader.ts +22 -1
  72. package/src/config/sandbox-schema.ts +0 -39
  73. package/src/config/schema.ts +6 -2
  74. package/src/config/skill-state.ts +34 -0
  75. package/src/config/skills-schema.ts +0 -1
  76. package/src/config/skills.ts +9 -0
  77. package/src/config/system-prompt.ts +110 -46
  78. package/src/config/templates/SOUL.md +1 -1
  79. package/src/config/types.ts +19 -1
  80. package/src/config/vellum-skills/catalog.json +1 -1
  81. package/src/config/vellum-skills/guardian-verify-setup/SKILL.md +1 -0
  82. package/src/config/vellum-skills/sms-setup/SKILL.md +1 -1
  83. package/src/config/vellum-skills/telegram-setup/SKILL.md +1 -1
  84. package/src/config/vellum-skills/trusted-contacts/SKILL.md +104 -3
  85. package/src/config/vellum-skills/twilio-setup/SKILL.md +1 -1
  86. package/src/daemon/config-watcher.ts +0 -1
  87. package/src/daemon/daemon-control.ts +1 -1
  88. package/src/daemon/guardian-invite-intent.ts +124 -0
  89. package/src/daemon/handlers/avatar.ts +68 -0
  90. package/src/daemon/handlers/browser.ts +2 -2
  91. package/src/daemon/handlers/guardian-actions.ts +120 -0
  92. package/src/daemon/handlers/index.ts +4 -0
  93. package/src/daemon/handlers/sessions.ts +19 -0
  94. package/src/daemon/handlers/shared.ts +3 -1
  95. package/src/daemon/install-cli-launchers.ts +58 -13
  96. package/src/daemon/ipc-contract/guardian-actions.ts +53 -0
  97. package/src/daemon/ipc-contract/sessions.ts +8 -2
  98. package/src/daemon/ipc-contract/settings.ts +25 -2
  99. package/src/daemon/ipc-contract-inventory.json +10 -0
  100. package/src/daemon/ipc-contract.ts +4 -0
  101. package/src/daemon/lifecycle.ts +6 -2
  102. package/src/daemon/main.ts +1 -0
  103. package/src/daemon/server.ts +1 -0
  104. package/src/daemon/session-lifecycle.ts +52 -7
  105. package/src/daemon/session-memory.ts +45 -0
  106. package/src/daemon/session-process.ts +258 -432
  107. package/src/daemon/session-runtime-assembly.ts +12 -0
  108. package/src/daemon/session-skill-tools.ts +14 -1
  109. package/src/daemon/session-tool-setup.ts +5 -0
  110. package/src/daemon/session.ts +11 -0
  111. package/src/daemon/tool-side-effects.ts +35 -9
  112. package/src/index.ts +0 -2
  113. package/src/memory/conversation-display-order-migration.ts +44 -0
  114. package/src/memory/conversation-queries.ts +2 -0
  115. package/src/memory/conversation-store.ts +91 -0
  116. package/src/memory/db-init.ts +5 -1
  117. package/src/memory/embedding-local.ts +13 -8
  118. package/src/memory/guardian-action-store.ts +125 -2
  119. package/src/memory/ingress-invite-store.ts +95 -1
  120. package/src/memory/migrations/035-guardian-action-supersession.ts +23 -0
  121. package/src/memory/migrations/index.ts +2 -1
  122. package/src/memory/schema.ts +5 -1
  123. package/src/memory/scoped-approval-grants.ts +14 -5
  124. package/src/messaging/providers/slack/client.ts +12 -0
  125. package/src/messaging/providers/slack/types.ts +5 -0
  126. package/src/notifications/decision-engine.ts +49 -12
  127. package/src/notifications/emit-signal.ts +7 -0
  128. package/src/notifications/signal.ts +7 -0
  129. package/src/notifications/thread-seed-composer.ts +2 -1
  130. package/src/runtime/channel-approval-types.ts +16 -6
  131. package/src/runtime/channel-approvals.ts +19 -15
  132. package/src/runtime/channel-invite-transport.ts +85 -0
  133. package/src/runtime/channel-invite-transports/telegram.ts +105 -0
  134. package/src/runtime/guardian-action-grant-minter.ts +92 -35
  135. package/src/runtime/guardian-action-message-composer.ts +30 -0
  136. package/src/runtime/guardian-decision-types.ts +91 -0
  137. package/src/runtime/http-server.ts +23 -1
  138. package/src/runtime/ingress-service.ts +22 -0
  139. package/src/runtime/invite-redemption-service.ts +181 -0
  140. package/src/runtime/invite-redemption-templates.ts +39 -0
  141. package/src/runtime/routes/call-routes.ts +2 -1
  142. package/src/runtime/routes/guardian-action-routes.ts +206 -0
  143. package/src/runtime/routes/guardian-approval-interception.ts +66 -190
  144. package/src/runtime/routes/inbound-message-handler.ts +486 -394
  145. package/src/runtime/routes/pairing-routes.ts +4 -0
  146. package/src/security/encrypted-store.ts +31 -17
  147. package/src/security/keychain.ts +176 -2
  148. package/src/security/secure-keys.ts +97 -0
  149. package/src/security/tool-approval-digest.ts +1 -1
  150. package/src/tools/browser/browser-execution.ts +2 -2
  151. package/src/tools/browser/browser-manager.ts +46 -32
  152. package/src/tools/browser/browser-screencast.ts +2 -2
  153. package/src/tools/calls/call-start.ts +1 -1
  154. package/src/tools/executor.ts +22 -17
  155. package/src/tools/network/script-proxy/session-manager.ts +1 -5
  156. package/src/tools/skills/load.ts +22 -8
  157. package/src/tools/system/avatar-generator.ts +119 -0
  158. package/src/tools/system/navigate-settings.ts +65 -0
  159. package/src/tools/system/open-system-settings.ts +75 -0
  160. package/src/tools/system/voice-config.ts +121 -32
  161. package/src/tools/terminal/backends/native.ts +40 -19
  162. package/src/tools/terminal/backends/types.ts +3 -3
  163. package/src/tools/terminal/parser.ts +1 -1
  164. package/src/tools/terminal/sandbox-diagnostics.ts +6 -87
  165. package/src/tools/terminal/sandbox.ts +1 -12
  166. package/src/tools/terminal/shell.ts +3 -31
  167. package/src/tools/tool-approval-handler.ts +141 -3
  168. package/src/tools/tool-manifest.ts +6 -0
  169. package/src/tools/types.ts +6 -0
  170. package/src/util/diff.ts +36 -13
  171. package/Dockerfile.sandbox +0 -5
  172. package/src/__tests__/doordash-client.test.ts +0 -187
  173. package/src/__tests__/doordash-session.test.ts +0 -154
  174. package/src/__tests__/signup-e2e.test.ts +0 -354
  175. package/src/__tests__/terminal-sandbox-docker.test.ts +0 -1065
  176. package/src/__tests__/terminal-sandbox.integration.test.ts +0 -180
  177. package/src/cli/doordash.ts +0 -1057
  178. package/src/config/bundled-skills/doordash/SKILL.md +0 -163
  179. package/src/config/templates/LOOKS.md +0 -25
  180. package/src/doordash/cart-queries.ts +0 -787
  181. package/src/doordash/client.ts +0 -1016
  182. package/src/doordash/order-queries.ts +0 -85
  183. package/src/doordash/queries.ts +0 -13
  184. package/src/doordash/query-extractor.ts +0 -94
  185. package/src/doordash/search-queries.ts +0 -203
  186. package/src/doordash/session.ts +0 -84
  187. package/src/doordash/store-queries.ts +0 -246
  188. package/src/doordash/types.ts +0 -367
  189. package/src/tools/terminal/backends/docker.ts +0 -379
@@ -335,11 +335,6 @@ describe('buildChannelAwarenessSection', () => {
335
335
  expect(section).toContain('computer-control permissions on non-dashboard');
336
336
  });
337
337
 
338
- test('includes guardian context contract for channel actors', () => {
339
- const section = buildChannelAwarenessSection();
340
- expect(section).toContain('<guardian_context>');
341
- expect(section).toContain('Never infer guardian status');
342
- });
343
338
  });
344
339
 
345
340
  // ---------------------------------------------------------------------------
@@ -569,6 +564,39 @@ describe('injectGuardianContext', () => {
569
564
  expect(text).toContain('source_channel: sms');
570
565
  expect(text).toContain('</guardian_context>');
571
566
  });
567
+
568
+ test('includes behavioral guidance for non-guardian actors', () => {
569
+ const ctx: GuardianRuntimeContext = {
570
+ sourceChannel: 'telegram',
571
+ actorRole: 'non-guardian',
572
+ guardianExternalUserId: 'guardian-user-1',
573
+ guardianChatId: 'chat-1',
574
+ requesterIdentifier: '@someone',
575
+ requesterExternalUserId: 'other-user-1',
576
+ requesterChatId: 'chat-2',
577
+ };
578
+
579
+ const result = injectGuardianContext(baseUserMessage, ctx);
580
+ const text = (result.content[0] as { type: 'text'; text: string }).text;
581
+ expect(text).toContain('non-guardian account');
582
+ expect(text).toContain('Do not explain the verification system');
583
+ });
584
+
585
+ test('omits non-guardian behavioral guidance for guardian actors', () => {
586
+ const ctx: GuardianRuntimeContext = {
587
+ sourceChannel: 'telegram',
588
+ actorRole: 'guardian',
589
+ guardianExternalUserId: 'guardian-user-1',
590
+ guardianChatId: 'chat-1',
591
+ requesterIdentifier: '@guardian',
592
+ requesterExternalUserId: 'guardian-user-1',
593
+ requesterChatId: 'chat-1',
594
+ };
595
+
596
+ const result = injectGuardianContext(baseUserMessage, ctx);
597
+ const text = (result.content[0] as { type: 'text'; text: string }).text;
598
+ expect(text).not.toContain('non-guardian account');
599
+ });
572
600
  });
573
601
 
574
602
  describe('stripGuardianContext', () => {
@@ -0,0 +1,171 @@
1
+ /**
2
+ * Integration tests for skill feature flag enforcement at system prompt,
3
+ * skill_load, and session-skill-tools projection layers.
4
+ */
5
+ import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
6
+ import { tmpdir } from 'node:os';
7
+ import { join } from 'node:path';
8
+
9
+ import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test';
10
+
11
+ // ---------------------------------------------------------------------------
12
+ // Test-scoped temp directory and config state
13
+ // ---------------------------------------------------------------------------
14
+
15
+ const TEST_DIR = join(tmpdir(), `vellum-skill-flags-test-${crypto.randomUUID()}`);
16
+
17
+ let currentConfig: Record<string, unknown> = {
18
+ sandbox: { enabled: false, backend: 'native' },
19
+ featureFlags: {},
20
+ };
21
+
22
+ const DECLARED_SKILL_ID = 'hatch-new-assistant';
23
+ const DECLARED_LEGACY_KEY = 'skills.hatch-new-assistant.enabled';
24
+
25
+ mock.module('../util/platform.js', () => ({
26
+ getRootDir: () => TEST_DIR,
27
+ getDataDir: () => TEST_DIR,
28
+ getWorkspaceDir: () => TEST_DIR,
29
+ getWorkspaceConfigPath: () => join(TEST_DIR, 'config.json'),
30
+ getWorkspaceSkillsDir: () => join(TEST_DIR, 'skills'),
31
+ getWorkspaceHooksDir: () => join(TEST_DIR, 'hooks'),
32
+ getWorkspacePromptPath: (file: string) => join(TEST_DIR, file),
33
+ ensureDataDir: () => {},
34
+ getSocketPath: () => join(TEST_DIR, 'vellum.sock'),
35
+ getPidPath: () => join(TEST_DIR, 'vellum.pid'),
36
+ getDbPath: () => join(TEST_DIR, 'data', 'assistant.db'),
37
+ getLogPath: () => join(TEST_DIR, 'logs', 'vellum.log'),
38
+ getHistoryPath: () => join(TEST_DIR, 'history'),
39
+ getHooksDir: () => join(TEST_DIR, 'hooks'),
40
+ getIpcBlobDir: () => join(TEST_DIR, 'ipc-blobs'),
41
+ getSandboxRootDir: () => join(TEST_DIR, 'sandbox'),
42
+ getSandboxWorkingDir: () => TEST_DIR,
43
+ getInterfacesDir: () => join(TEST_DIR, 'interfaces'),
44
+ isMacOS: () => false,
45
+ isLinux: () => false,
46
+ isWindows: () => false,
47
+ getPlatformName: () => 'linux',
48
+ getClipboardCommand: () => null,
49
+ removeSocketFile: () => {},
50
+ migratePath: () => {},
51
+ migrateToWorkspaceLayout: () => {},
52
+ migrateToDataLayout: () => {},
53
+ }));
54
+
55
+ mock.module('../util/logger.js', () => ({
56
+ getLogger: () => new Proxy({} as Record<string, unknown>, {
57
+ get: () => () => {},
58
+ }),
59
+ isDebug: () => false,
60
+ truncateForLog: (v: string) => v,
61
+ }));
62
+
63
+ mock.module('../config/loader.js', () => ({
64
+ getConfig: () => currentConfig,
65
+ }));
66
+
67
+ mock.module('../config/user-reference.js', () => ({
68
+ resolveUserReference: () => 'TestUser',
69
+ }));
70
+
71
+ mock.module('../security/parental-control-store.js', () => ({
72
+ getParentalControlSettings: () => ({ enabled: false, contentRestrictions: [], blockedToolCategories: [] }),
73
+ }));
74
+
75
+ mock.module('../tools/credentials/metadata-store.js', () => ({
76
+ listCredentialMetadata: () => [],
77
+ }));
78
+
79
+ const { buildSystemPrompt } = await import('../config/system-prompt.js');
80
+
81
+ // ---------------------------------------------------------------------------
82
+ // Setup / Teardown
83
+ // ---------------------------------------------------------------------------
84
+
85
+ beforeEach(() => {
86
+ mkdirSync(TEST_DIR, { recursive: true });
87
+ // Reset config to defaults before each test
88
+ currentConfig = {
89
+ sandbox: { enabled: false, backend: 'native' },
90
+ featureFlags: {},
91
+ };
92
+ });
93
+
94
+ afterEach(() => {
95
+ if (existsSync(TEST_DIR)) {
96
+ rmSync(TEST_DIR, { recursive: true, force: true });
97
+ }
98
+ });
99
+
100
+ // ---------------------------------------------------------------------------
101
+ // Helpers
102
+ // ---------------------------------------------------------------------------
103
+
104
+ function createSkillOnDisk(id: string, name: string, description: string): void {
105
+ const skillsDir = join(TEST_DIR, 'skills');
106
+ mkdirSync(join(skillsDir, id), { recursive: true });
107
+ writeFileSync(
108
+ join(skillsDir, id, 'SKILL.md'),
109
+ `---\nname: "${name}"\ndescription: "${description}"\n---\n\nInstructions for ${id}.\n`,
110
+ );
111
+ // Ensure SKILLS.md index references the skill
112
+ const indexPath = join(skillsDir, 'SKILLS.md');
113
+ const existing = existsSync(indexPath) ? readFileSync(indexPath, 'utf-8') : '';
114
+ writeFileSync(indexPath, existing + `- ${id}\n`);
115
+ }
116
+
117
+ // ---------------------------------------------------------------------------
118
+ // System prompt — feature flag filtering
119
+ // ---------------------------------------------------------------------------
120
+
121
+ describe('buildSystemPrompt feature flag filtering', () => {
122
+ test('flag OFF skill does not appear in <available_skills> section', () => {
123
+ createSkillOnDisk(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior');
124
+ createSkillOnDisk('twitter', 'Twitter', 'Post to X/Twitter');
125
+
126
+ currentConfig = {
127
+ sandbox: { enabled: false, backend: 'native' },
128
+ featureFlags: { [DECLARED_LEGACY_KEY]: false },
129
+ };
130
+
131
+ const result = buildSystemPrompt();
132
+
133
+ // twitter should be visible, declared flagged skill should not
134
+ expect(result).toContain('id="twitter"');
135
+ expect(result).not.toContain(`id="${DECLARED_SKILL_ID}"`);
136
+ });
137
+
138
+ test('all skills visible when featureFlags is empty', () => {
139
+ createSkillOnDisk(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior');
140
+ createSkillOnDisk('twitter', 'Twitter', 'Post to X/Twitter');
141
+
142
+ currentConfig = {
143
+ sandbox: { enabled: false, backend: 'native' },
144
+ featureFlags: {},
145
+ };
146
+
147
+ const result = buildSystemPrompt();
148
+
149
+ expect(result).toContain(`id="${DECLARED_SKILL_ID}"`);
150
+ expect(result).toContain('id="twitter"');
151
+ });
152
+
153
+ test('flagged-off skills hidden even when all workspace skill flags are OFF', () => {
154
+ createSkillOnDisk(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior');
155
+ createSkillOnDisk('twitter', 'Twitter', 'Post to X/Twitter');
156
+
157
+ currentConfig = {
158
+ sandbox: { enabled: false, backend: 'native' },
159
+ featureFlags: {
160
+ [DECLARED_LEGACY_KEY]: false,
161
+ 'skills.twitter.enabled': false,
162
+ },
163
+ };
164
+
165
+ const result = buildSystemPrompt();
166
+
167
+ // Both are hidden: declared skill via registry, undeclared via persisted override.
168
+ expect(result).not.toContain(`id="${DECLARED_SKILL_ID}"`);
169
+ expect(result).not.toContain('id="twitter"');
170
+ });
171
+ });
@@ -0,0 +1,188 @@
1
+ import { describe, expect, test } from 'bun:test';
2
+
3
+ import { isAssistantFeatureFlagEnabled } from '../config/assistant-feature-flags.js';
4
+ import type { AssistantConfig } from '../config/schema.js';
5
+ import { isSkillFeatureEnabled, resolveSkillStates } from '../config/skill-state.js';
6
+ import type { SkillSummary } from '../config/skills.js';
7
+
8
+ const DECLARED_FLAG_KEY = 'feature_flags.hatch-new-assistant.enabled';
9
+ const DECLARED_SKILL_ID = 'hatch-new-assistant';
10
+ // ---------------------------------------------------------------------------
11
+ // Helpers
12
+ // ---------------------------------------------------------------------------
13
+
14
+ /** Create a minimal AssistantConfig with optional feature flag values. */
15
+ function makeConfig(overrides: Partial<AssistantConfig> = {}): AssistantConfig {
16
+ return {
17
+ skills: {
18
+ entries: {},
19
+ load: { extraDirs: [], watch: true, watchDebounceMs: 250 },
20
+ install: { nodeManager: 'npm' },
21
+ allowBundled: null,
22
+ remoteProviders: { skillssh: { enabled: true }, clawhub: { enabled: true } },
23
+ remotePolicy: { blockSuspicious: true, blockMalware: true, maxSkillsShRisk: 'medium' },
24
+ },
25
+ ...overrides,
26
+ } as AssistantConfig;
27
+ }
28
+
29
+ /** Create a minimal SkillSummary for testing. */
30
+ function makeSkill(id: string, source: 'bundled' | 'managed' = 'bundled'): SkillSummary {
31
+ return {
32
+ id,
33
+ name: `${id} skill`,
34
+ description: `Description for ${id}`,
35
+ directoryPath: `/fake/skills/${id}`,
36
+ skillFilePath: `/fake/skills/${id}/SKILL.md`,
37
+ bundled: source === 'bundled',
38
+ userInvocable: true,
39
+ disableModelInvocation: false,
40
+ source,
41
+ };
42
+ }
43
+
44
+ // ---------------------------------------------------------------------------
45
+ // isSkillFeatureEnabled (legacy wrapper — backward compat)
46
+ // ---------------------------------------------------------------------------
47
+
48
+ describe('isSkillFeatureEnabled', () => {
49
+ test('returns true when no flag overrides', () => {
50
+ const config = makeConfig();
51
+ expect(isSkillFeatureEnabled(DECLARED_SKILL_ID, config)).toBe(true);
52
+ });
53
+
54
+ test('returns true when skill key is explicitly true', () => {
55
+ const config = makeConfig({
56
+ assistantFeatureFlagValues: { [DECLARED_FLAG_KEY]: true },
57
+ });
58
+ expect(isSkillFeatureEnabled(DECLARED_SKILL_ID, config)).toBe(true);
59
+ });
60
+
61
+ test('returns false when skill key is explicitly false', () => {
62
+ const config = makeConfig({
63
+ assistantFeatureFlagValues: { [DECLARED_FLAG_KEY]: false },
64
+ });
65
+ expect(isSkillFeatureEnabled(DECLARED_SKILL_ID, config)).toBe(false);
66
+ });
67
+ });
68
+
69
+ // ---------------------------------------------------------------------------
70
+ // isAssistantFeatureFlagEnabled (full canonical key)
71
+ // ---------------------------------------------------------------------------
72
+
73
+ describe('isAssistantFeatureFlagEnabled', () => {
74
+ test('returns true for unknown flags (open by default)', () => {
75
+ const config = makeConfig();
76
+ expect(isAssistantFeatureFlagEnabled('feature_flags.unknown.enabled', config)).toBe(true);
77
+ });
78
+
79
+ test('assistantFeatureFlagValues overrides registry default', () => {
80
+ const config = {
81
+ ...makeConfig(),
82
+ assistantFeatureFlagValues: { [DECLARED_FLAG_KEY]: false },
83
+ } as AssistantConfig;
84
+ expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(false);
85
+ });
86
+
87
+ test('falls back to registry default when no override', () => {
88
+ const config = makeConfig();
89
+ // hatch-new-assistant defaults to true in the registry
90
+ expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(true);
91
+ });
92
+
93
+ test('respects persisted overrides for undeclared keys', () => {
94
+ const config = makeConfig({
95
+ assistantFeatureFlagValues: { 'feature_flags.browser.enabled': false },
96
+ });
97
+ expect(isAssistantFeatureFlagEnabled('feature_flags.browser.enabled', config)).toBe(false);
98
+ });
99
+
100
+ test('undeclared keys with no persisted override default to enabled', () => {
101
+ const config = makeConfig();
102
+ expect(isAssistantFeatureFlagEnabled('feature_flags.browser.enabled', config)).toBe(true);
103
+ });
104
+ });
105
+
106
+ // ---------------------------------------------------------------------------
107
+ // resolveSkillStates — feature flag filtering
108
+ // ---------------------------------------------------------------------------
109
+
110
+ describe('resolveSkillStates with feature flags', () => {
111
+ test('flag OFF skill does not appear in resolved list', () => {
112
+ const catalog = [makeSkill(DECLARED_SKILL_ID), makeSkill('twitter')];
113
+ const config = makeConfig({
114
+ assistantFeatureFlagValues: { [DECLARED_FLAG_KEY]: false },
115
+ });
116
+
117
+ const resolved = resolveSkillStates(catalog, config);
118
+ const ids = resolved.map((r) => r.summary.id);
119
+
120
+ expect(ids).not.toContain(DECLARED_SKILL_ID);
121
+ expect(ids).toContain('twitter');
122
+ });
123
+
124
+ test('flag ON skill appears normally', () => {
125
+ const catalog = [makeSkill(DECLARED_SKILL_ID), makeSkill('twitter')];
126
+ const config = makeConfig({
127
+ assistantFeatureFlagValues: {
128
+ [DECLARED_FLAG_KEY]: true,
129
+ 'feature_flags.twitter.enabled': true,
130
+ },
131
+ });
132
+
133
+ const resolved = resolveSkillStates(catalog, config);
134
+ const ids = resolved.map((r) => r.summary.id);
135
+
136
+ expect(ids).toContain(DECLARED_SKILL_ID);
137
+ expect(ids).toContain('twitter');
138
+ });
139
+
140
+ test('missing flag key defaults to enabled', () => {
141
+ const catalog = [makeSkill(DECLARED_SKILL_ID)];
142
+ const config = makeConfig();
143
+
144
+ const resolved = resolveSkillStates(catalog, config);
145
+ expect(resolved.length).toBe(1);
146
+ expect(resolved[0].summary.id).toBe(DECLARED_SKILL_ID);
147
+ });
148
+
149
+ test('feature flag OFF takes precedence over user-enabled config entry', () => {
150
+ const catalog = [makeSkill(DECLARED_SKILL_ID)];
151
+ const config = makeConfig({
152
+ assistantFeatureFlagValues: { [DECLARED_FLAG_KEY]: false },
153
+ skills: {
154
+ entries: { [DECLARED_SKILL_ID]: { enabled: true } },
155
+ load: { extraDirs: [], watch: true, watchDebounceMs: 250 },
156
+ install: { nodeManager: 'npm' },
157
+ allowBundled: null,
158
+ remoteProviders: { skillssh: { enabled: true }, clawhub: { enabled: true } },
159
+ remotePolicy: { blockSuspicious: true, blockMalware: true, maxSkillsShRisk: 'medium' },
160
+ },
161
+ });
162
+
163
+ const resolved = resolveSkillStates(catalog, config);
164
+ // The skill should not appear at all — feature flag is a higher-priority gate
165
+ expect(resolved.length).toBe(0);
166
+ });
167
+
168
+ test('multiple skills with mixed flags — persisted overrides respected', () => {
169
+ const catalog = [
170
+ makeSkill(DECLARED_SKILL_ID),
171
+ makeSkill('twitter'),
172
+ makeSkill('deploy'),
173
+ ];
174
+ const config = makeConfig({
175
+ assistantFeatureFlagValues: {
176
+ [DECLARED_FLAG_KEY]: false,
177
+ 'feature_flags.deploy.enabled': false,
178
+ },
179
+ });
180
+
181
+ const resolved = resolveSkillStates(catalog, config);
182
+ const ids = resolved.map((r) => r.summary.id);
183
+
184
+ // Both declared (hatch-new-assistant) and undeclared (deploy) skills with
185
+ // persisted false overrides are filtered out; only twitter remains.
186
+ expect(ids).toEqual(['twitter']);
187
+ });
188
+ });
@@ -0,0 +1,141 @@
1
+ /**
2
+ * Tests that skill_load rejects loading a skill whose feature flag is OFF
3
+ * with a deterministic error message.
4
+ */
5
+ import { existsSync, mkdirSync, rmSync, writeFileSync } from 'node:fs';
6
+ import { tmpdir } from 'node:os';
7
+ import { join } from 'node:path';
8
+
9
+ import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test';
10
+
11
+ const TEST_DIR = join(tmpdir(), `vellum-skill-load-flag-test-${crypto.randomUUID()}`);
12
+
13
+ let currentConfig: Record<string, unknown> = {
14
+ featureFlags: {},
15
+ };
16
+
17
+ const DECLARED_SKILL_ID = 'hatch-new-assistant';
18
+ const DECLARED_LEGACY_KEY = 'skills.hatch-new-assistant.enabled';
19
+
20
+ const platformOverrides: Record<string, (...args: unknown[]) => unknown> = {
21
+ getRootDir: () => TEST_DIR,
22
+ getDataDir: () => TEST_DIR,
23
+ ensureDataDir: () => {},
24
+ getSocketPath: () => join(TEST_DIR, 'vellum.sock'),
25
+ getPidPath: () => join(TEST_DIR, 'vellum.pid'),
26
+ getDbPath: () => join(TEST_DIR, 'data', 'assistant.db'),
27
+ getLogPath: () => join(TEST_DIR, 'logs', 'vellum.log'),
28
+ getWorkspaceDir: () => join(TEST_DIR, 'workspace'),
29
+ getWorkspaceSkillsDir: () => join(TEST_DIR, 'skills'),
30
+ getWorkspaceConfigPath: () => join(TEST_DIR, 'workspace', 'config.json'),
31
+ getWorkspaceHooksDir: () => join(TEST_DIR, 'workspace', 'hooks'),
32
+ getWorkspacePromptPath: (f: unknown) => join(TEST_DIR, 'workspace', String(f)),
33
+ getInterfacesDir: () => join(TEST_DIR, 'interfaces'),
34
+ getHooksDir: () => join(TEST_DIR, 'hooks'),
35
+ getIpcBlobDir: () => join(TEST_DIR, 'blobs'),
36
+ getSandboxRootDir: () => join(TEST_DIR, 'sandbox'),
37
+ getSandboxWorkingDir: () => join(TEST_DIR, 'sandbox', 'work'),
38
+ getHistoryPath: () => join(TEST_DIR, 'history'),
39
+ getSessionTokenPath: () => join(TEST_DIR, 'session-token'),
40
+ readSessionToken: () => null,
41
+ getClipboardCommand: () => null,
42
+ isMacOS: () => process.platform === 'darwin',
43
+ isLinux: () => process.platform === 'linux',
44
+ isWindows: () => process.platform === 'win32',
45
+ getPlatformName: () => process.platform,
46
+ migratePath: () => {},
47
+ migrateToWorkspaceLayout: () => {},
48
+ migrateToDataLayout: () => {},
49
+ removeSocketFile: () => {},
50
+ };
51
+ mock.module('../util/platform.js', () => platformOverrides);
52
+
53
+ mock.module('../util/logger.js', () => ({
54
+ getLogger: () => new Proxy({} as Record<string, unknown>, {
55
+ get: () => () => {},
56
+ }),
57
+ }));
58
+
59
+ mock.module('../config/loader.js', () => ({
60
+ getConfig: () => currentConfig,
61
+ }));
62
+
63
+ await import('../tools/skills/load.js');
64
+ const { getTool } = await import('../tools/registry.js');
65
+
66
+ function writeSkill(skillId: string, name: string, description: string, body: string): void {
67
+ const skillDir = join(TEST_DIR, 'skills', skillId);
68
+ mkdirSync(skillDir, { recursive: true });
69
+ writeFileSync(
70
+ join(skillDir, 'SKILL.md'),
71
+ `---\nname: "${name}"\ndescription: "${description}"\n---\n\n${body}\n`,
72
+ );
73
+ }
74
+
75
+ async function executeSkillLoad(input: Record<string, unknown>): Promise<{ content: string; isError: boolean }> {
76
+ const tool = getTool('skill_load');
77
+ if (!tool) throw new Error('skill_load tool was not registered');
78
+
79
+ const result = await tool.execute(input, {
80
+ workingDir: '/tmp',
81
+ sessionId: 'session-1',
82
+ conversationId: 'conversation-1',
83
+ });
84
+ return { content: result.content, isError: result.isError };
85
+ }
86
+
87
+ describe('skill_load feature flag enforcement', () => {
88
+ beforeEach(() => {
89
+ mkdirSync(join(TEST_DIR, 'skills'), { recursive: true });
90
+ currentConfig = { featureFlags: {} };
91
+ });
92
+
93
+ afterEach(() => {
94
+ if (existsSync(TEST_DIR)) {
95
+ rmSync(TEST_DIR, { recursive: true, force: true });
96
+ }
97
+ });
98
+
99
+ test('returns deterministic error for flag OFF skill', async () => {
100
+ writeSkill(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior', 'Use the feature.');
101
+ writeFileSync(join(TEST_DIR, 'skills', 'SKILLS.md'), `- ${DECLARED_SKILL_ID}\n`);
102
+
103
+ currentConfig = {
104
+ featureFlags: { [DECLARED_LEGACY_KEY]: false },
105
+ };
106
+
107
+ const result = await executeSkillLoad({ skill: DECLARED_SKILL_ID });
108
+
109
+ expect(result.isError).toBe(true);
110
+ expect(result.content).toContain('disabled by feature flag');
111
+ expect(result.content).toContain(DECLARED_SKILL_ID);
112
+ });
113
+
114
+ test('loads skill normally when flag is ON', async () => {
115
+ writeSkill(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior', 'Use the feature.');
116
+ writeFileSync(join(TEST_DIR, 'skills', 'SKILLS.md'), `- ${DECLARED_SKILL_ID}\n`);
117
+
118
+ currentConfig = {
119
+ featureFlags: { [DECLARED_LEGACY_KEY]: true },
120
+ };
121
+
122
+ const result = await executeSkillLoad({ skill: DECLARED_SKILL_ID });
123
+
124
+ expect(result.isError).toBe(false);
125
+ expect(result.content).toContain('Skill: Hatch New Assistant');
126
+ });
127
+
128
+ test('loads skill normally when flag key is absent (defaults to enabled)', async () => {
129
+ writeSkill(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior', 'Use the feature.');
130
+ writeFileSync(join(TEST_DIR, 'skills', 'SKILLS.md'), `- ${DECLARED_SKILL_ID}\n`);
131
+
132
+ currentConfig = {
133
+ featureFlags: {},
134
+ };
135
+
136
+ const result = await executeSkillLoad({ skill: DECLARED_SKILL_ID });
137
+
138
+ expect(result.isError).toBe(false);
139
+ expect(result.content).toContain('Skill: Hatch New Assistant');
140
+ });
141
+ });
@@ -36,6 +36,7 @@ const TOPLEVEL_CATALOG = join(TOPLEVEL_SKILLS_DIR, 'catalog.json');
36
36
  // ---------------------------------------------------------------------------
37
37
 
38
38
  const TOPLEVEL_ONLY_SKILLS = new Set([
39
+ 'doordash',
39
40
  'google-oauth-setup',
40
41
  'notion',
41
42
  'notion-oauth-setup',