@vellumai/assistant 0.3.19 → 0.3.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. package/ARCHITECTURE.md +151 -15
  2. package/Dockerfile +1 -0
  3. package/README.md +40 -4
  4. package/bun.lock +139 -2
  5. package/docs/architecture/integrations.md +7 -11
  6. package/package.json +2 -1
  7. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +54 -0
  8. package/src/__tests__/approval-primitive.test.ts +540 -0
  9. package/src/__tests__/assistant-feature-flag-guard.test.ts +206 -0
  10. package/src/__tests__/assistant-feature-flag-guardrails.test.ts +198 -0
  11. package/src/__tests__/assistant-feature-flags-integration.test.ts +272 -0
  12. package/src/__tests__/call-controller.test.ts +439 -108
  13. package/src/__tests__/channel-invite-transport.test.ts +264 -0
  14. package/src/__tests__/cli.test.ts +42 -1
  15. package/src/__tests__/config-schema.test.ts +11 -127
  16. package/src/__tests__/config-watcher.test.ts +0 -8
  17. package/src/__tests__/daemon-lifecycle.test.ts +1 -0
  18. package/src/__tests__/daemon-server-session-init.test.ts +8 -2
  19. package/src/__tests__/diff.test.ts +22 -0
  20. package/src/__tests__/guardian-action-copy-generator.test.ts +5 -0
  21. package/src/__tests__/guardian-action-grant-mint-consume.test.ts +300 -32
  22. package/src/__tests__/guardian-action-late-reply.test.ts +546 -1
  23. package/src/__tests__/guardian-actions-endpoint.test.ts +774 -0
  24. package/src/__tests__/guardian-control-plane-policy.test.ts +36 -3
  25. package/src/__tests__/guardian-dispatch.test.ts +124 -0
  26. package/src/__tests__/guardian-grant-minting.test.ts +6 -17
  27. package/src/__tests__/inbound-invite-redemption.test.ts +367 -0
  28. package/src/__tests__/invite-redemption-service.test.ts +306 -0
  29. package/src/__tests__/ipc-snapshot.test.ts +57 -0
  30. package/src/__tests__/notification-decision-fallback.test.ts +88 -0
  31. package/src/__tests__/sandbox-diagnostics.test.ts +6 -249
  32. package/src/__tests__/sandbox-host-parity.test.ts +6 -13
  33. package/src/__tests__/scoped-approval-grants.test.ts +6 -6
  34. package/src/__tests__/scoped-grant-security-matrix.test.ts +5 -4
  35. package/src/__tests__/script-proxy-session-manager.test.ts +1 -19
  36. package/src/__tests__/session-load-history-repair.test.ts +169 -2
  37. package/src/__tests__/session-runtime-assembly.test.ts +33 -5
  38. package/src/__tests__/skill-feature-flags-integration.test.ts +171 -0
  39. package/src/__tests__/skill-feature-flags.test.ts +188 -0
  40. package/src/__tests__/skill-load-feature-flag.test.ts +141 -0
  41. package/src/__tests__/skill-mirror-parity.test.ts +1 -0
  42. package/src/__tests__/skill-projection-feature-flag.test.ts +363 -0
  43. package/src/__tests__/system-prompt.test.ts +1 -1
  44. package/src/__tests__/terminal-sandbox.test.ts +142 -9
  45. package/src/__tests__/terminal-tools.test.ts +2 -93
  46. package/src/__tests__/thread-seed-composer.test.ts +18 -0
  47. package/src/__tests__/tool-approval-handler.test.ts +350 -0
  48. package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +8 -10
  49. package/src/__tests__/voice-scoped-grant-consumer.test.ts +46 -84
  50. package/src/agent/loop.ts +36 -1
  51. package/src/approvals/approval-primitive.ts +381 -0
  52. package/src/approvals/guardian-decision-primitive.ts +191 -0
  53. package/src/calls/call-controller.ts +252 -209
  54. package/src/calls/call-domain.ts +44 -6
  55. package/src/calls/guardian-dispatch.ts +48 -0
  56. package/src/calls/types.ts +1 -1
  57. package/src/calls/voice-session-bridge.ts +46 -30
  58. package/src/cli/core-commands.ts +0 -4
  59. package/src/cli/mcp.ts +58 -0
  60. package/src/cli.ts +76 -34
  61. package/src/config/__tests__/feature-flag-registry-guard.test.ts +179 -0
  62. package/src/config/assistant-feature-flags.ts +162 -0
  63. package/src/config/bundled-skills/api-mapping/icon.svg +18 -0
  64. package/src/config/bundled-skills/messaging/TOOLS.json +30 -0
  65. package/src/config/bundled-skills/messaging/tools/slack-delete-message.ts +24 -0
  66. package/src/config/bundled-skills/notifications/SKILL.md +1 -1
  67. package/src/config/bundled-skills/reminder/SKILL.md +49 -2
  68. package/src/config/bundled-skills/time-based-actions/SKILL.md +49 -2
  69. package/src/config/bundled-skills/voice-setup/SKILL.md +122 -0
  70. package/src/config/core-schema.ts +1 -1
  71. package/src/config/env-registry.ts +10 -0
  72. package/src/config/feature-flag-registry.json +61 -0
  73. package/src/config/loader.ts +22 -1
  74. package/src/config/mcp-schema.ts +46 -0
  75. package/src/config/sandbox-schema.ts +0 -39
  76. package/src/config/schema.ts +18 -2
  77. package/src/config/skill-state.ts +34 -0
  78. package/src/config/skills-schema.ts +0 -1
  79. package/src/config/skills.ts +9 -0
  80. package/src/config/system-prompt.ts +110 -46
  81. package/src/config/templates/SOUL.md +1 -1
  82. package/src/config/types.ts +19 -1
  83. package/src/config/vellum-skills/catalog.json +1 -1
  84. package/src/config/vellum-skills/guardian-verify-setup/SKILL.md +1 -0
  85. package/src/config/vellum-skills/sms-setup/SKILL.md +1 -1
  86. package/src/config/vellum-skills/telegram-setup/SKILL.md +6 -5
  87. package/src/config/vellum-skills/trusted-contacts/SKILL.md +105 -3
  88. package/src/config/vellum-skills/twilio-setup/SKILL.md +1 -1
  89. package/src/daemon/config-watcher.ts +0 -1
  90. package/src/daemon/daemon-control.ts +1 -1
  91. package/src/daemon/guardian-invite-intent.ts +124 -0
  92. package/src/daemon/handlers/avatar.ts +68 -0
  93. package/src/daemon/handlers/browser.ts +2 -2
  94. package/src/daemon/handlers/guardian-actions.ts +120 -0
  95. package/src/daemon/handlers/index.ts +4 -0
  96. package/src/daemon/handlers/sessions.ts +19 -0
  97. package/src/daemon/handlers/shared.ts +3 -1
  98. package/src/daemon/install-cli-launchers.ts +58 -13
  99. package/src/daemon/ipc-contract/guardian-actions.ts +53 -0
  100. package/src/daemon/ipc-contract/sessions.ts +8 -2
  101. package/src/daemon/ipc-contract/settings.ts +25 -2
  102. package/src/daemon/ipc-contract-inventory.json +10 -0
  103. package/src/daemon/ipc-contract.ts +4 -0
  104. package/src/daemon/lifecycle.ts +14 -2
  105. package/src/daemon/main.ts +1 -0
  106. package/src/daemon/providers-setup.ts +26 -1
  107. package/src/daemon/server.ts +1 -0
  108. package/src/daemon/session-lifecycle.ts +52 -7
  109. package/src/daemon/session-memory.ts +45 -0
  110. package/src/daemon/session-process.ts +258 -432
  111. package/src/daemon/session-runtime-assembly.ts +12 -0
  112. package/src/daemon/session-skill-tools.ts +14 -1
  113. package/src/daemon/session-tool-setup.ts +5 -0
  114. package/src/daemon/session.ts +11 -0
  115. package/src/daemon/shutdown-handlers.ts +11 -0
  116. package/src/daemon/tool-side-effects.ts +35 -9
  117. package/src/index.ts +2 -2
  118. package/src/mcp/client.ts +152 -0
  119. package/src/mcp/manager.ts +139 -0
  120. package/src/memory/conversation-display-order-migration.ts +44 -0
  121. package/src/memory/conversation-queries.ts +2 -0
  122. package/src/memory/conversation-store.ts +91 -0
  123. package/src/memory/db-init.ts +5 -1
  124. package/src/memory/embedding-local.ts +13 -8
  125. package/src/memory/guardian-action-store.ts +125 -2
  126. package/src/memory/ingress-invite-store.ts +95 -1
  127. package/src/memory/migrations/035-guardian-action-supersession.ts +23 -0
  128. package/src/memory/migrations/index.ts +2 -1
  129. package/src/memory/schema.ts +5 -1
  130. package/src/memory/scoped-approval-grants.ts +14 -5
  131. package/src/messaging/providers/slack/client.ts +12 -0
  132. package/src/messaging/providers/slack/types.ts +5 -0
  133. package/src/notifications/decision-engine.ts +49 -12
  134. package/src/notifications/emit-signal.ts +7 -0
  135. package/src/notifications/signal.ts +7 -0
  136. package/src/notifications/thread-seed-composer.ts +2 -1
  137. package/src/runtime/channel-approval-types.ts +16 -6
  138. package/src/runtime/channel-approvals.ts +19 -15
  139. package/src/runtime/channel-invite-transport.ts +85 -0
  140. package/src/runtime/channel-invite-transports/telegram.ts +105 -0
  141. package/src/runtime/guardian-action-grant-minter.ts +92 -35
  142. package/src/runtime/guardian-action-message-composer.ts +30 -0
  143. package/src/runtime/guardian-decision-types.ts +91 -0
  144. package/src/runtime/http-server.ts +23 -1
  145. package/src/runtime/ingress-service.ts +22 -0
  146. package/src/runtime/invite-redemption-service.ts +181 -0
  147. package/src/runtime/invite-redemption-templates.ts +39 -0
  148. package/src/runtime/routes/call-routes.ts +2 -1
  149. package/src/runtime/routes/guardian-action-routes.ts +206 -0
  150. package/src/runtime/routes/guardian-approval-interception.ts +66 -190
  151. package/src/runtime/routes/identity-routes.ts +73 -0
  152. package/src/runtime/routes/inbound-message-handler.ts +486 -394
  153. package/src/runtime/routes/pairing-routes.ts +4 -0
  154. package/src/security/encrypted-store.ts +31 -17
  155. package/src/security/keychain.ts +176 -2
  156. package/src/security/secure-keys.ts +97 -0
  157. package/src/security/tool-approval-digest.ts +1 -1
  158. package/src/tools/browser/browser-execution.ts +2 -2
  159. package/src/tools/browser/browser-manager.ts +46 -32
  160. package/src/tools/browser/browser-screencast.ts +2 -2
  161. package/src/tools/calls/call-start.ts +1 -1
  162. package/src/tools/executor.ts +22 -17
  163. package/src/tools/mcp/mcp-tool-factory.ts +100 -0
  164. package/src/tools/network/script-proxy/session-manager.ts +1 -5
  165. package/src/tools/registry.ts +64 -1
  166. package/src/tools/skills/load.ts +22 -8
  167. package/src/tools/system/avatar-generator.ts +119 -0
  168. package/src/tools/system/navigate-settings.ts +65 -0
  169. package/src/tools/system/open-system-settings.ts +75 -0
  170. package/src/tools/system/voice-config.ts +121 -32
  171. package/src/tools/terminal/backends/native.ts +40 -19
  172. package/src/tools/terminal/backends/types.ts +3 -3
  173. package/src/tools/terminal/parser.ts +1 -1
  174. package/src/tools/terminal/sandbox-diagnostics.ts +6 -87
  175. package/src/tools/terminal/sandbox.ts +1 -12
  176. package/src/tools/terminal/shell.ts +3 -31
  177. package/src/tools/tool-approval-handler.ts +141 -3
  178. package/src/tools/tool-manifest.ts +6 -0
  179. package/src/tools/types.ts +10 -2
  180. package/src/util/diff.ts +36 -13
  181. package/Dockerfile.sandbox +0 -5
  182. package/src/__tests__/doordash-client.test.ts +0 -187
  183. package/src/__tests__/doordash-session.test.ts +0 -154
  184. package/src/__tests__/signup-e2e.test.ts +0 -354
  185. package/src/__tests__/terminal-sandbox-docker.test.ts +0 -1065
  186. package/src/__tests__/terminal-sandbox.integration.test.ts +0 -180
  187. package/src/cli/doordash.ts +0 -1057
  188. package/src/config/bundled-skills/doordash/SKILL.md +0 -163
  189. package/src/config/templates/LOOKS.md +0 -25
  190. package/src/doordash/cart-queries.ts +0 -787
  191. package/src/doordash/client.ts +0 -1016
  192. package/src/doordash/order-queries.ts +0 -85
  193. package/src/doordash/queries.ts +0 -13
  194. package/src/doordash/query-extractor.ts +0 -94
  195. package/src/doordash/search-queries.ts +0 -203
  196. package/src/doordash/session.ts +0 -84
  197. package/src/doordash/store-queries.ts +0 -246
  198. package/src/doordash/types.ts +0 -367
  199. package/src/tools/terminal/backends/docker.ts +0 -379
@@ -0,0 +1,206 @@
1
+ import { execFileSync } from 'node:child_process';
2
+ import { readFileSync } from 'node:fs';
3
+ import { join } from 'node:path';
4
+
5
+ import { describe, expect, test } from 'bun:test';
6
+
7
+ /**
8
+ * Guard tests for assistant feature flags.
9
+ *
10
+ * 1. Key format validation: ensure production code uses the canonical
11
+ * `feature_flags.<flagId>.enabled` format, not the legacy
12
+ * `skills.<id>.enabled` format.
13
+ *
14
+ * 2. Declaration coverage: ensure all assistant-scope flag keys in the
15
+ * unified registry conform to the canonical format.
16
+ *
17
+ * See AGENTS.md "Assistant Feature Flags" for the full convention.
18
+ */
19
+
20
+ // ---------------------------------------------------------------------------
21
+ // Helpers
22
+ // ---------------------------------------------------------------------------
23
+
24
+ /** Resolve repo root (tests run from assistant/) */
25
+ function getRepoRoot(): string {
26
+ return join(process.cwd(), '..');
27
+ }
28
+
29
+ function getRegistryPath(): string {
30
+ return join(getRepoRoot(), 'meta', 'feature-flags', 'feature-flag-registry.json');
31
+ }
32
+
33
+ interface RegistryFlag {
34
+ id: string;
35
+ scope: string;
36
+ key: string;
37
+ label: string;
38
+ description: string;
39
+ defaultEnabled: boolean;
40
+ }
41
+
42
+ interface Registry {
43
+ version: number;
44
+ flags: RegistryFlag[];
45
+ }
46
+
47
+ function loadRegistry(): Registry {
48
+ const raw = readFileSync(getRegistryPath(), 'utf-8');
49
+ return JSON.parse(raw);
50
+ }
51
+
52
+ const CANONICAL_KEY_RE = /^feature_flags\.[a-z0-9][a-z0-9._-]*\.enabled$/;
53
+
54
+ /**
55
+ * Files allowed to contain the legacy `skills.<id>.enabled` key format.
56
+ * Keep this list minimal — only files that genuinely need to reference
57
+ * the legacy format for backward compatibility.
58
+ */
59
+ const LEGACY_KEY_ALLOWLIST = new Set([
60
+ // macOS client: fallback reads from legacy config section
61
+ 'clients/macos/vellum-assistant/Features/Settings/SettingsAccountTab.swift',
62
+ ]);
63
+
64
+ function isTestFile(filePath: string): boolean {
65
+ return (
66
+ filePath.includes('/__tests__/') ||
67
+ filePath.includes('/Tests/') ||
68
+ filePath.endsWith('.test.ts') ||
69
+ filePath.endsWith('.test.js') ||
70
+ filePath.endsWith('.spec.ts') ||
71
+ filePath.endsWith('.spec.js') ||
72
+ filePath.endsWith('Tests.swift')
73
+ );
74
+ }
75
+
76
+ // ---------------------------------------------------------------------------
77
+ // Test: key format validation
78
+ // ---------------------------------------------------------------------------
79
+
80
+ describe('assistant feature flag guard', () => {
81
+ test('no production files use legacy skills.<id>.enabled key format outside allowlist', () => {
82
+ // Search for the legacy key pattern in string literals across the codebase.
83
+ // The pattern matches quoted strings like 'skills.browser.enabled',
84
+ // "skills.browser.enabled", or `skills.browser.enabled`.
85
+ const pattern = `['"\`]skills\\.[a-z][a-z0-9._-]*\\.enabled['"\`]`;
86
+
87
+ let grepOutput = '';
88
+ try {
89
+ // Use execFileSync to avoid shell interpretation — the pattern contains
90
+ // backtick characters that would trigger command substitution in /bin/sh
91
+ // if passed through execSync's shell.
92
+ grepOutput = execFileSync(
93
+ 'git',
94
+ ['grep', '-lE', pattern, '--', '*.ts', '*.tsx', '*.js', '*.jsx', '*.swift'],
95
+ { encoding: 'utf-8', cwd: getRepoRoot() },
96
+ ).trim();
97
+ } catch (err) {
98
+ // Exit code 1 means no matches — happy path
99
+ if ((err as { status?: number }).status === 1) {
100
+ return;
101
+ }
102
+ throw err;
103
+ }
104
+
105
+ const files = grepOutput.split('\n').filter((f) => f.length > 0);
106
+ const violations = files.filter((f) => {
107
+ if (isTestFile(f)) return false;
108
+ if (LEGACY_KEY_ALLOWLIST.has(f)) return false;
109
+ return true;
110
+ });
111
+
112
+ if (violations.length > 0) {
113
+ const message = [
114
+ 'Found production files using the legacy `skills.<id>.enabled` key format.',
115
+ 'New code must use the canonical format: `feature_flags.<id>.enabled`.',
116
+ 'See AGENTS.md "Assistant Feature Flags" for the convention.',
117
+ '',
118
+ 'Violations:',
119
+ ...violations.map((f) => ` - ${f}`),
120
+ '',
121
+ 'To fix: replace `skills.<id>.enabled` with `feature_flags.<id>.enabled`.',
122
+ 'If backward-compat access is genuinely needed, add to LEGACY_KEY_ALLOWLIST in assistant-feature-flag-guard.test.ts.',
123
+ ].join('\n');
124
+
125
+ expect(violations, message).toEqual([]);
126
+ }
127
+ });
128
+
129
+ // ---------------------------------------------------------------------------
130
+ // Test: unified registry key format (assistant-scope only)
131
+ // ---------------------------------------------------------------------------
132
+
133
+ test('all assistant-scope keys in the unified registry use the canonical feature_flags.<id>.enabled format', () => {
134
+ const registry = loadRegistry();
135
+ const assistantFlags = registry.flags.filter((f) => f.scope === 'assistant');
136
+ const keys = assistantFlags.map((f) => f.key);
137
+
138
+ const violations = keys.filter((key) => !CANONICAL_KEY_RE.test(key));
139
+
140
+ if (violations.length > 0) {
141
+ const message = [
142
+ 'Found assistant-scope keys in the unified registry that do not match the canonical format.',
143
+ 'Expected format: feature_flags.<flagId>.enabled',
144
+ '',
145
+ 'Violations:',
146
+ ...violations.map((k) => ` - ${k}`),
147
+ ].join('\n');
148
+
149
+ expect(violations, message).toEqual([]);
150
+ }
151
+ });
152
+
153
+ // ---------------------------------------------------------------------------
154
+ // Test: registry entries have required fields
155
+ // ---------------------------------------------------------------------------
156
+
157
+ // ---------------------------------------------------------------------------
158
+ // Test: bundled registry copy stays in sync with canonical meta/ copy
159
+ // ---------------------------------------------------------------------------
160
+
161
+ test('bundled assistant/src/config/feature-flag-registry.json matches canonical meta/ copy', () => {
162
+ const canonicalPath = getRegistryPath();
163
+ const bundledPath = join(process.cwd(), 'src', 'config', 'feature-flag-registry.json');
164
+
165
+ const canonical = JSON.parse(readFileSync(canonicalPath, 'utf-8'));
166
+ const bundled = JSON.parse(readFileSync(bundledPath, 'utf-8'));
167
+
168
+ expect(bundled).toEqual(canonical);
169
+ });
170
+
171
+ // ---------------------------------------------------------------------------
172
+ // Test: registry entries have required fields
173
+ // ---------------------------------------------------------------------------
174
+
175
+ test('all assistant-scope entries in the unified registry have required fields', () => {
176
+ const registry = loadRegistry();
177
+ const assistantFlags = registry.flags.filter((f) => f.scope === 'assistant');
178
+ const violations: string[] = [];
179
+
180
+ for (const flag of assistantFlags) {
181
+ if (typeof flag.defaultEnabled !== 'boolean') {
182
+ violations.push(`${flag.key}: missing or non-boolean 'defaultEnabled'`);
183
+ }
184
+ if (typeof flag.description !== 'string' || flag.description.length === 0) {
185
+ violations.push(`${flag.key}: missing or empty 'description'`);
186
+ }
187
+ if (typeof flag.label !== 'string' || flag.label.length === 0) {
188
+ violations.push(`${flag.key}: missing or empty 'label'`);
189
+ }
190
+ if (typeof flag.id !== 'string' || flag.id.length === 0) {
191
+ violations.push(`${flag.key}: missing or empty 'id'`);
192
+ }
193
+ }
194
+
195
+ if (violations.length > 0) {
196
+ const message = [
197
+ 'Found entries in the unified registry with missing or invalid required fields.',
198
+ '',
199
+ 'Violations:',
200
+ ...violations.map((v) => ` - ${v}`),
201
+ ].join('\n');
202
+
203
+ expect(violations, message).toEqual([]);
204
+ }
205
+ });
206
+ });
@@ -0,0 +1,198 @@
1
+ /**
2
+ * Guard tests for assistant feature flag conventions:
3
+ *
4
+ * 1. Key format: all feature flag keys used in production code must follow the
5
+ * canonical `feature_flags.<flag_id>.enabled` format. Any remaining
6
+ * `skills.<id>.enabled` usage outside of migration/backward-compat code is
7
+ * flagged — including template literal forms like `skills.${skillId}.enabled`.
8
+ *
9
+ * 2. Declaration coverage: all literal keys passed to
10
+ * `isAssistantFeatureFlagEnabled('<key>', ...)` in production code must be
11
+ * declared in the unified registry. This keeps flag usage declarative while
12
+ * allowing skills to exist without corresponding feature flags.
13
+ */
14
+
15
+ import { execSync } from 'node:child_process';
16
+ import { readFileSync } from 'node:fs';
17
+ import { join } from 'node:path';
18
+
19
+ import { describe, expect, test } from 'bun:test';
20
+
21
+ // ---------------------------------------------------------------------------
22
+ // Helpers
23
+ // ---------------------------------------------------------------------------
24
+
25
+ /** Resolve the repo root from the assistant/ package directory. */
26
+ function getRepoRoot(): string {
27
+ return join(process.cwd(), '..');
28
+ }
29
+
30
+ interface RegistryFlag {
31
+ id: string;
32
+ scope: string;
33
+ key: string;
34
+ label: string;
35
+ description: string;
36
+ defaultEnabled: boolean;
37
+ }
38
+
39
+ interface Registry {
40
+ version: number;
41
+ flags: RegistryFlag[];
42
+ }
43
+
44
+ function loadRegistry(): Registry {
45
+ const registryPath = join(getRepoRoot(), 'meta', 'feature-flags', 'feature-flag-registry.json');
46
+ return JSON.parse(readFileSync(registryPath, 'utf-8'));
47
+ }
48
+
49
+ /**
50
+ * Files allowed to contain `skills.<id>.enabled` string literals because they
51
+ * are part of the backward-compat / migration layer or are test files
52
+ * exercising legacy paths.
53
+ */
54
+ const LEGACY_KEY_ALLOWLIST = new Set([
55
+ // Legacy wrapper (deprecated, kept for migration)
56
+ 'assistant/src/config/skill-state.ts',
57
+ // Type definitions documenting the legacy format
58
+ 'assistant/src/config/types.ts',
59
+ // macOS client: fallback reads from legacy config section
60
+ 'clients/macos/vellum-assistant/Features/Settings/SettingsAccountTab.swift',
61
+ ]);
62
+
63
+ function isTestFile(filePath: string): boolean {
64
+ return (
65
+ filePath.includes('/__tests__/') ||
66
+ filePath.endsWith('.test.ts') ||
67
+ filePath.endsWith('.test.js') ||
68
+ filePath.endsWith('.spec.ts') ||
69
+ filePath.endsWith('.spec.js')
70
+ );
71
+ }
72
+
73
+ // ---------------------------------------------------------------------------
74
+ // Guard 1: Key format — no stale `skills.<id>.enabled` in production code
75
+ // ---------------------------------------------------------------------------
76
+
77
+ describe('assistant feature flag key format guard', () => {
78
+ test('no production TypeScript files use skills.<id>.enabled outside allowlist', () => {
79
+ const repoRoot = getRepoRoot();
80
+
81
+ // Search for string literals and template literals containing
82
+ // `skills.<id>.enabled` or `skills.${...}.enabled` in .ts files
83
+ // under assistant/src/ and gateway/src/ (excluding test files and
84
+ // allowlisted paths). The pattern catches both literal keys
85
+ // (e.g., `skills.foo.enabled`) and template literal forms
86
+ // (e.g., `skills.${skillId}.enabled`).
87
+ let grepOutput = '';
88
+ try {
89
+ grepOutput = execSync(
90
+ `git grep -lE "skills\\.[a-z0-9_-]+\\.enabled|skills\\.\\$\\{" -- 'assistant/src/**/*.ts' 'gateway/src/**/*.ts'`,
91
+ { encoding: 'utf-8', cwd: repoRoot },
92
+ ).trim();
93
+ } catch (err) {
94
+ // Exit code 1 means no matches — happy path
95
+ if ((err as { status?: number }).status === 1) {
96
+ return;
97
+ }
98
+ throw err;
99
+ }
100
+
101
+ const files = grepOutput.split('\n').filter((f) => f.length > 0);
102
+ const violations = files.filter((f) => {
103
+ if (isTestFile(f)) return false;
104
+ if (LEGACY_KEY_ALLOWLIST.has(f)) return false;
105
+ return true;
106
+ });
107
+
108
+ if (violations.length > 0) {
109
+ const message = [
110
+ 'Found production TypeScript files using legacy `skills.<id>.enabled` key format.',
111
+ 'Use the canonical `feature_flags.<id>.enabled` format instead.',
112
+ 'Call `isAssistantFeatureFlagEnabled(`feature_flags.${skillId}.enabled`, config)` to check skill flags.',
113
+ '',
114
+ 'Violations:',
115
+ ...violations.map((f) => ` - ${f}`),
116
+ '',
117
+ 'If this is a legitimate backward-compat path, add it to LEGACY_KEY_ALLOWLIST in',
118
+ 'assistant-feature-flag-guardrails.test.ts.',
119
+ ].join('\n');
120
+
121
+ expect(violations, message).toEqual([]);
122
+ }
123
+ });
124
+ });
125
+
126
+ // ---------------------------------------------------------------------------
127
+ // Guard 2: Declaration coverage for literal key usage
128
+ // ---------------------------------------------------------------------------
129
+
130
+ describe('assistant feature flag declaration coverage guard', () => {
131
+ test('all literal flag keys in isAssistantFeatureFlagEnabled calls are declared in the unified registry', () => {
132
+ const repoRoot = getRepoRoot();
133
+
134
+ // Load the unified registry and extract assistant-scope keys
135
+ const registry = loadRegistry();
136
+ const declaredKeys = new Set(
137
+ registry.flags
138
+ .filter((f) => f.scope === 'assistant')
139
+ .map((f) => f.key),
140
+ );
141
+
142
+ // Extract full keys from isAssistantFeatureFlagEnabled('<key>', ...) calls
143
+ // in non-test production files. We read each matching file and apply a
144
+ // multiline regex so that calls split across lines are still caught:
145
+ //
146
+ // isAssistantFeatureFlagEnabled(
147
+ // 'feature_flags.foo.enabled',
148
+ // config,
149
+ // )
150
+ //
151
+ const usedKeys = new Set<string>();
152
+ let matchingFiles = '';
153
+ try {
154
+ matchingFiles = execSync(
155
+ `git grep -l "isAssistantFeatureFlagEnabled" -- 'assistant/src/**/*.ts' ':!assistant/src/__tests__/**'`,
156
+ { encoding: 'utf-8', cwd: repoRoot },
157
+ ).trim();
158
+ } catch (err) {
159
+ if ((err as { status?: number }).status !== 1) throw err;
160
+ }
161
+
162
+ if (matchingFiles) {
163
+ // Multiline regex: match the function name, optional whitespace/newlines,
164
+ // opening paren, optional whitespace/newlines, then a quoted string key.
165
+ const multilinePattern = /isAssistantFeatureFlagEnabled\(\s*['"]([^'"]+)['"]/g;
166
+ for (const relPath of matchingFiles.split('\n')) {
167
+ if (!relPath) continue;
168
+ const absPath = join(repoRoot, relPath);
169
+ const content = readFileSync(absPath, 'utf-8');
170
+ for (const match of content.matchAll(multilinePattern)) {
171
+ usedKeys.add(match[1]);
172
+ }
173
+ }
174
+ }
175
+
176
+ // Check that all used keys are declared in the registry
177
+ const undeclared: string[] = [];
178
+ for (const key of usedKeys) {
179
+ if (!declaredKeys.has(key)) {
180
+ undeclared.push(key);
181
+ }
182
+ }
183
+
184
+ if (undeclared.length > 0) {
185
+ const message = [
186
+ 'Found feature flag keys used in production code that are NOT declared in the unified registry.',
187
+ `Registry: meta/feature-flags/feature-flag-registry.json`,
188
+ '',
189
+ 'Undeclared keys:',
190
+ ...undeclared.map((k) => ` - ${k}`),
191
+ '',
192
+ 'To fix: add the missing key(s) to the unified registry with scope "assistant".',
193
+ ].join('\n');
194
+
195
+ expect(undeclared, message).toEqual([]);
196
+ }
197
+ });
198
+ });
@@ -0,0 +1,272 @@
1
+ /**
2
+ * Integration tests for assistant feature flag enforcement at system prompt,
3
+ * skill_load, and session-skill-tools projection layers.
4
+ *
5
+ * Covers:
6
+ * - Flag OFF blocks all exposure paths
7
+ * - Missing persisted value falls back to code default
8
+ * - New assistantFeatureFlagValues is the sole override mechanism
9
+ * - Undeclared keys default to enabled
10
+ */
11
+ import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
12
+ import { tmpdir } from 'node:os';
13
+ import { join } from 'node:path';
14
+
15
+ import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test';
16
+
17
+ // ---------------------------------------------------------------------------
18
+ // Test-scoped temp directory and config state
19
+ // ---------------------------------------------------------------------------
20
+
21
+ const TEST_DIR = join(tmpdir(), `vellum-asst-flags-test-${crypto.randomUUID()}`);
22
+
23
+ let currentConfig: Record<string, unknown> = {
24
+ sandbox: { enabled: false, backend: 'native' },
25
+ };
26
+
27
+ const DECLARED_FLAG_KEY = 'feature_flags.hatch-new-assistant.enabled';
28
+ const DECLARED_SKILL_ID = 'hatch-new-assistant';
29
+
30
+ mock.module('../util/platform.js', () => ({
31
+ getRootDir: () => TEST_DIR,
32
+ getDataDir: () => TEST_DIR,
33
+ getWorkspaceDir: () => TEST_DIR,
34
+ getWorkspaceConfigPath: () => join(TEST_DIR, 'config.json'),
35
+ getWorkspaceSkillsDir: () => join(TEST_DIR, 'skills'),
36
+ getWorkspaceHooksDir: () => join(TEST_DIR, 'hooks'),
37
+ getWorkspacePromptPath: (file: string) => join(TEST_DIR, file),
38
+ ensureDataDir: () => {},
39
+ getSocketPath: () => join(TEST_DIR, 'vellum.sock'),
40
+ getPidPath: () => join(TEST_DIR, 'vellum.pid'),
41
+ getDbPath: () => join(TEST_DIR, 'data', 'assistant.db'),
42
+ getLogPath: () => join(TEST_DIR, 'logs', 'vellum.log'),
43
+ getHistoryPath: () => join(TEST_DIR, 'history'),
44
+ getHooksDir: () => join(TEST_DIR, 'hooks'),
45
+ getIpcBlobDir: () => join(TEST_DIR, 'ipc-blobs'),
46
+ getSandboxRootDir: () => join(TEST_DIR, 'sandbox'),
47
+ getSandboxWorkingDir: () => TEST_DIR,
48
+ getInterfacesDir: () => join(TEST_DIR, 'interfaces'),
49
+ isMacOS: () => false,
50
+ isLinux: () => false,
51
+ isWindows: () => false,
52
+ getPlatformName: () => 'linux',
53
+ getClipboardCommand: () => null,
54
+ removeSocketFile: () => {},
55
+ migratePath: () => {},
56
+ migrateToWorkspaceLayout: () => {},
57
+ migrateToDataLayout: () => {},
58
+ }));
59
+
60
+ mock.module('../util/logger.js', () => ({
61
+ getLogger: () => new Proxy({} as Record<string, unknown>, {
62
+ get: () => () => {},
63
+ }),
64
+ isDebug: () => false,
65
+ truncateForLog: (v: string) => v,
66
+ }));
67
+
68
+ mock.module('../config/loader.js', () => ({
69
+ getConfig: () => currentConfig,
70
+ }));
71
+
72
+ mock.module('../config/user-reference.js', () => ({
73
+ resolveUserReference: () => 'TestUser',
74
+ }));
75
+
76
+ mock.module('../security/parental-control-store.js', () => ({
77
+ getParentalControlSettings: () => ({ enabled: false, contentRestrictions: [], blockedToolCategories: [] }),
78
+ }));
79
+
80
+ mock.module('../tools/credentials/metadata-store.js', () => ({
81
+ listCredentialMetadata: () => [],
82
+ }));
83
+
84
+ const { buildSystemPrompt } = await import('../config/system-prompt.js');
85
+ const { isAssistantFeatureFlagEnabled } = await import('../config/assistant-feature-flags.js');
86
+ const { isSkillFeatureEnabled } = await import('../config/skill-state.js');
87
+
88
+ // ---------------------------------------------------------------------------
89
+ // Setup / Teardown
90
+ // ---------------------------------------------------------------------------
91
+
92
+ beforeEach(() => {
93
+ mkdirSync(TEST_DIR, { recursive: true });
94
+ currentConfig = {
95
+ sandbox: { enabled: false, backend: 'native' },
96
+ };
97
+ });
98
+
99
+ afterEach(() => {
100
+ if (existsSync(TEST_DIR)) {
101
+ rmSync(TEST_DIR, { recursive: true, force: true });
102
+ }
103
+ });
104
+
105
+ // ---------------------------------------------------------------------------
106
+ // Helpers
107
+ // ---------------------------------------------------------------------------
108
+
109
+ function createSkillOnDisk(id: string, name: string, description: string): void {
110
+ const skillsDir = join(TEST_DIR, 'skills');
111
+ mkdirSync(join(skillsDir, id), { recursive: true });
112
+ writeFileSync(
113
+ join(skillsDir, id, 'SKILL.md'),
114
+ `---\nname: "${name}"\ndescription: "${description}"\n---\n\nInstructions for ${id}.\n`,
115
+ );
116
+ const indexPath = join(skillsDir, 'SKILLS.md');
117
+ const existing = existsSync(indexPath) ? readFileSync(indexPath, 'utf-8') : '';
118
+ writeFileSync(indexPath, existing + `- ${id}\n`);
119
+ }
120
+
121
+ // ---------------------------------------------------------------------------
122
+ // System prompt — assistant feature flag filtering
123
+ // ---------------------------------------------------------------------------
124
+
125
+ describe('buildSystemPrompt assistant feature flag filtering', () => {
126
+ test('flag OFF skill does not appear in <available_skills> section', () => {
127
+ createSkillOnDisk(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior');
128
+ createSkillOnDisk('twitter', 'Twitter', 'Post to X/Twitter');
129
+
130
+ currentConfig = {
131
+ sandbox: { enabled: false, backend: 'native' },
132
+ assistantFeatureFlagValues: { [DECLARED_FLAG_KEY]: false },
133
+ };
134
+
135
+ const result = buildSystemPrompt();
136
+
137
+ expect(result).toContain('id="twitter"');
138
+ expect(result).not.toContain(`id="${DECLARED_SKILL_ID}"`);
139
+ });
140
+
141
+ test('all skills visible when no flag overrides set', () => {
142
+ createSkillOnDisk(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior');
143
+ createSkillOnDisk('twitter', 'Twitter', 'Post to X/Twitter');
144
+
145
+ currentConfig = {
146
+ sandbox: { enabled: false, backend: 'native' },
147
+ };
148
+
149
+ const result = buildSystemPrompt();
150
+
151
+ expect(result).toContain(`id="${DECLARED_SKILL_ID}"`);
152
+ expect(result).toContain('id="twitter"');
153
+ });
154
+
155
+ test('flagged-off skills hidden when all flags are OFF', () => {
156
+ createSkillOnDisk(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior');
157
+ createSkillOnDisk('twitter', 'Twitter', 'Post to X/Twitter');
158
+
159
+ currentConfig = {
160
+ sandbox: { enabled: false, backend: 'native' },
161
+ assistantFeatureFlagValues: {
162
+ [DECLARED_FLAG_KEY]: false,
163
+ 'feature_flags.twitter.enabled': false,
164
+ },
165
+ };
166
+
167
+ const result = buildSystemPrompt();
168
+
169
+ expect(result).not.toContain(`id="${DECLARED_SKILL_ID}"`);
170
+ expect(result).not.toContain('id="twitter"');
171
+ });
172
+
173
+ test('assistantFeatureFlagValues overrides control visibility', () => {
174
+ createSkillOnDisk(DECLARED_SKILL_ID, 'Hatch New Assistant', 'Toggle hatch new assistant behavior');
175
+
176
+ currentConfig = {
177
+ sandbox: { enabled: false, backend: 'native' },
178
+ assistantFeatureFlagValues: { [DECLARED_FLAG_KEY]: true },
179
+ };
180
+
181
+ const result = buildSystemPrompt();
182
+
183
+ expect(result).toContain(`id="${DECLARED_SKILL_ID}"`);
184
+ });
185
+
186
+ test('persisted overrides for undeclared flags are respected', () => {
187
+ createSkillOnDisk('browser', 'Browser', 'Web browsing automation');
188
+
189
+ currentConfig = {
190
+ sandbox: { enabled: false, backend: 'native' },
191
+ assistantFeatureFlagValues: { 'feature_flags.browser.enabled': false },
192
+ };
193
+
194
+ const result = buildSystemPrompt();
195
+
196
+ // Even though 'browser' is not in the defaults registry, the user
197
+ // explicitly disabled it — that override must be honored.
198
+ expect(result).not.toContain('id="browser"');
199
+ });
200
+
201
+ test('undeclared flags with no persisted override default to enabled', () => {
202
+ createSkillOnDisk('browser', 'Browser', 'Web browsing automation');
203
+
204
+ currentConfig = {
205
+ sandbox: { enabled: false, backend: 'native' },
206
+ };
207
+
208
+ const result = buildSystemPrompt();
209
+
210
+ expect(result).toContain('id="browser"');
211
+ });
212
+ });
213
+
214
+ // ---------------------------------------------------------------------------
215
+ // Resolver unit tests (within integration context)
216
+ // ---------------------------------------------------------------------------
217
+
218
+ describe('isAssistantFeatureFlagEnabled', () => {
219
+ test('reads from assistantFeatureFlagValues', () => {
220
+ const config = {
221
+ assistantFeatureFlagValues: { [DECLARED_FLAG_KEY]: true },
222
+ } as any;
223
+
224
+ expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(true);
225
+ });
226
+
227
+ test('explicit false override in assistantFeatureFlagValues', () => {
228
+ const config = {
229
+ assistantFeatureFlagValues: { [DECLARED_FLAG_KEY]: false },
230
+ } as any;
231
+
232
+ expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(false);
233
+ });
234
+
235
+ test('missing persisted value falls back to defaults registry defaultEnabled', () => {
236
+ // No explicit config at all — should fall back to defaults registry
237
+ // which has defaultEnabled: true for hatch-new-assistant
238
+ const config = {} as any;
239
+
240
+ expect(isAssistantFeatureFlagEnabled(DECLARED_FLAG_KEY, config)).toBe(true);
241
+ });
242
+
243
+ test('unknown flag defaults to true when no persisted override', () => {
244
+ const config = {} as any;
245
+
246
+ expect(isAssistantFeatureFlagEnabled('feature_flags.unknown-skill.enabled', config)).toBe(true);
247
+ });
248
+
249
+ test('undeclared flag respects persisted canonical override', () => {
250
+ const config = {
251
+ assistantFeatureFlagValues: { 'feature_flags.browser.enabled': false },
252
+ } as any;
253
+
254
+ expect(isAssistantFeatureFlagEnabled('feature_flags.browser.enabled', config)).toBe(false);
255
+ });
256
+ });
257
+
258
+ describe('legacy isSkillFeatureEnabled backward compat', () => {
259
+ test('delegates to the canonical resolver', () => {
260
+ const config = {
261
+ assistantFeatureFlagValues: { [DECLARED_FLAG_KEY]: false },
262
+ } as any;
263
+
264
+ expect(isSkillFeatureEnabled(DECLARED_SKILL_ID, config)).toBe(false);
265
+ });
266
+
267
+ test('enabled when no override set', () => {
268
+ const config = {} as any;
269
+
270
+ expect(isSkillFeatureEnabled(DECLARED_SKILL_ID, config)).toBe(true);
271
+ });
272
+ });