@vellumai/assistant 0.5.9 → 0.5.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. package/AGENTS.md +9 -1
  2. package/ARCHITECTURE.md +48 -48
  3. package/Dockerfile +2 -0
  4. package/README.md +1 -1
  5. package/docs/architecture/integrations.md +6 -13
  6. package/docs/architecture/memory.md +7 -12
  7. package/docs/architecture/security.md +5 -5
  8. package/docs/credential-execution-service.md +9 -9
  9. package/docs/skills.md +1 -1
  10. package/node_modules/@vellumai/credential-storage/src/index.ts +2 -2
  11. package/node_modules/@vellumai/credential-storage/src/static-credentials.ts +1 -1
  12. package/openapi.yaml +7130 -0
  13. package/package.json +2 -1
  14. package/scripts/generate-openapi.ts +562 -0
  15. package/src/__tests__/acp-session.test.ts +239 -44
  16. package/src/__tests__/assistant-feature-flag-guard.test.ts +8 -8
  17. package/src/__tests__/assistant-feature-flag-guardrails.test.ts +5 -86
  18. package/src/__tests__/assistant-feature-flags-integration.test.ts +7 -14
  19. package/src/__tests__/browser-skill-endstate.test.ts +1 -1
  20. package/src/__tests__/btw-routes.test.ts +8 -0
  21. package/src/__tests__/bundled-skill-retrieval-guard.test.ts +10 -10
  22. package/src/__tests__/channel-approvals.test.ts +7 -7
  23. package/src/__tests__/channel-readiness-service.test.ts +41 -0
  24. package/src/__tests__/config-schema.test.ts +10 -2
  25. package/src/__tests__/context-memory-e2e.test.ts +2 -6
  26. package/src/__tests__/conversation-skill-tools.test.ts +1 -3
  27. package/src/__tests__/conversation-title-service.test.ts +2 -15
  28. package/src/__tests__/credential-execution-feature-gates.test.ts +4 -8
  29. package/src/__tests__/credential-execution-managed-contract.test.ts +8 -8
  30. package/src/__tests__/credential-security-e2e.test.ts +4 -4
  31. package/src/__tests__/credential-security-invariants.test.ts +3 -3
  32. package/src/__tests__/credentials-cli.test.ts +3 -3
  33. package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +1 -1
  34. package/src/__tests__/gateway-only-guard.test.ts +3 -0
  35. package/src/__tests__/heartbeat-service.test.ts +35 -0
  36. package/src/__tests__/host-shell-tool.test.ts +1 -1
  37. package/src/__tests__/inline-skill-load-permissions.test.ts +3 -3
  38. package/src/__tests__/llm-request-log-turn-query.test.ts +64 -0
  39. package/src/__tests__/log-export-workspace.test.ts +1 -1
  40. package/src/__tests__/mcp-client-auth.test.ts +1 -1
  41. package/src/__tests__/memory-lifecycle-e2e.test.ts +2 -2
  42. package/src/__tests__/memory-recall-log-store.test.ts +182 -0
  43. package/src/__tests__/memory-recall-quality.test.ts +6 -8
  44. package/src/__tests__/memory-regressions.test.ts +53 -42
  45. package/src/__tests__/memory-retrieval.benchmark.test.ts +5 -9
  46. package/src/__tests__/messaging-skill-split.test.ts +2 -17
  47. package/src/__tests__/oauth-cli.test.ts +98 -551
  48. package/src/__tests__/platform-callback-registration.test.ts +119 -0
  49. package/src/__tests__/secret-ingress-channel.test.ts +261 -0
  50. package/src/__tests__/secret-ingress-cli.test.ts +201 -0
  51. package/src/__tests__/secret-ingress-http.test.ts +312 -0
  52. package/src/__tests__/secret-ingress.test.ts +283 -0
  53. package/src/__tests__/secret-onetime-send.test.ts +4 -4
  54. package/src/__tests__/skill-feature-flags-integration.test.ts +4 -4
  55. package/src/__tests__/skill-feature-flags.test.ts +11 -19
  56. package/src/__tests__/skill-load-feature-flag.test.ts +1 -1
  57. package/src/__tests__/skill-load-inline-command.test.ts +3 -3
  58. package/src/__tests__/skill-load-inline-includes.test.ts +2 -2
  59. package/src/__tests__/skill-memory.test.ts +2 -4
  60. package/src/__tests__/skill-projection-feature-flag.test.ts +2 -4
  61. package/src/__tests__/skill-projection.benchmark.test.ts +1 -3
  62. package/src/__tests__/skills.test.ts +16 -2
  63. package/src/__tests__/slack-channel-config.test.ts +1 -1
  64. package/src/__tests__/slack-skill.test.ts +5 -69
  65. package/src/__tests__/vellum-self-knowledge-inline-command.test.ts +1 -1
  66. package/src/__tests__/workspace-migration-015-migrate-credentials-to-keychain.test.ts +5 -238
  67. package/src/__tests__/workspace-migration-016-migrate-credentials-from-keychain.test.ts +5 -206
  68. package/src/__tests__/workspace-migration-018-rekey-compound-credential-keys.test.ts +181 -0
  69. package/src/__tests__/workspace-migrations-runner.test.ts +15 -7
  70. package/src/acp/client-handler.ts +113 -31
  71. package/src/acp/session-manager.ts +29 -27
  72. package/src/approvals/guardian-request-resolvers.ts +1 -1
  73. package/src/cli/AGENTS.md +73 -0
  74. package/src/cli/commands/autonomy.ts +3 -5
  75. package/src/cli/commands/credential-execution.ts +1 -2
  76. package/src/cli/commands/credentials.ts +4 -4
  77. package/src/cli/commands/memory.ts +2 -3
  78. package/src/cli/commands/oauth/__tests__/connect.test.ts +785 -0
  79. package/src/cli/commands/oauth/__tests__/disconnect.test.ts +760 -0
  80. package/src/cli/commands/oauth/__tests__/mode.test.ts +672 -0
  81. package/src/cli/commands/oauth/__tests__/ping.test.ts +690 -0
  82. package/src/cli/commands/oauth/__tests__/status.test.ts +579 -0
  83. package/src/cli/commands/oauth/__tests__/token.test.ts +467 -0
  84. package/src/cli/commands/oauth/apps.ts +29 -11
  85. package/src/cli/commands/oauth/connect.ts +373 -0
  86. package/src/cli/commands/oauth/connections.ts +14 -493
  87. package/src/cli/commands/oauth/disconnect.ts +333 -0
  88. package/src/cli/commands/oauth/index.ts +62 -10
  89. package/src/cli/commands/oauth/mode.ts +263 -0
  90. package/src/cli/commands/oauth/ping.ts +222 -0
  91. package/src/cli/commands/oauth/providers.ts +30 -3
  92. package/src/cli/commands/oauth/request.ts +576 -0
  93. package/src/cli/commands/oauth/shared.ts +132 -0
  94. package/src/cli/commands/oauth/status.ts +202 -0
  95. package/src/cli/commands/oauth/token.ts +159 -0
  96. package/src/cli/commands/platform.ts +20 -14
  97. package/src/cli.ts +82 -17
  98. package/src/config/assistant-feature-flags.ts +74 -11
  99. package/src/config/bundled-skills/_shared/CLI_RETRIEVAL_PATTERN.md +1 -1
  100. package/src/config/bundled-skills/app-builder/tools/app-create.ts +1 -1
  101. package/src/config/bundled-skills/messaging/SKILL.md +13 -36
  102. package/src/config/bundled-skills/messaging/TOOLS.json +9 -9
  103. package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +1 -1
  104. package/src/config/bundled-skills/notifications/SKILL.md +1 -1
  105. package/src/config/bundled-skills/schedule/SKILL.md +2 -2
  106. package/src/config/bundled-skills/settings/SKILL.md +5 -3
  107. package/src/config/bundled-skills/settings/TOOLS.json +17 -0
  108. package/src/config/bundled-skills/settings/tools/avatar-get.ts +50 -0
  109. package/src/config/bundled-skills/settings/tools/avatar-remove.ts +7 -0
  110. package/src/config/bundled-skills/settings/tools/avatar-update.ts +6 -1
  111. package/src/config/bundled-skills/settings/tools/identity-avatar.ts +55 -0
  112. package/src/config/bundled-skills/skills-catalog/SKILL.md +3 -3
  113. package/src/config/bundled-skills/slack/SKILL.md +58 -44
  114. package/src/config/bundled-tool-registry.ts +2 -19
  115. package/src/config/env.ts +5 -1
  116. package/src/config/feature-flag-registry.json +57 -41
  117. package/src/config/loader.ts +4 -0
  118. package/src/config/schemas/platform.ts +0 -8
  119. package/src/config/schemas/security.ts +9 -1
  120. package/src/config/schemas/services.ts +1 -1
  121. package/src/config/skill-state.ts +1 -3
  122. package/src/config/skills.ts +2 -4
  123. package/src/credential-execution/feature-gates.ts +9 -16
  124. package/src/credential-execution/process-manager.ts +12 -0
  125. package/src/daemon/config-watcher.ts +4 -0
  126. package/src/daemon/conversation-agent-loop-handlers.ts +10 -0
  127. package/src/daemon/conversation-agent-loop.ts +49 -2
  128. package/src/daemon/conversation-memory.ts +0 -1
  129. package/src/daemon/handlers/config-slack-channel.ts +43 -1
  130. package/src/daemon/handlers/conversations.ts +41 -33
  131. package/src/daemon/lifecycle.ts +28 -5
  132. package/src/daemon/message-types/acp.ts +0 -15
  133. package/src/daemon/message-types/memory.ts +0 -1
  134. package/src/daemon/message-types/messages.ts +9 -1
  135. package/src/daemon/message-types/schedules.ts +9 -0
  136. package/src/daemon/server.ts +19 -7
  137. package/src/email/feature-gate.ts +3 -3
  138. package/src/heartbeat/heartbeat-service.ts +48 -0
  139. package/src/inbound/platform-callback-registration.ts +61 -7
  140. package/src/mcp/mcp-oauth-provider.ts +3 -3
  141. package/src/memory/app-store.ts +3 -3
  142. package/src/memory/conversation-crud.ts +124 -0
  143. package/src/memory/conversation-title-service.ts +7 -17
  144. package/src/memory/db-init.ts +8 -0
  145. package/src/memory/embedding-local.ts +47 -2
  146. package/src/memory/indexer.ts +13 -10
  147. package/src/memory/items-extractor.ts +12 -4
  148. package/src/memory/job-utils.ts +5 -0
  149. package/src/memory/jobs-store.ts +10 -2
  150. package/src/memory/journal-memory.ts +6 -2
  151. package/src/memory/llm-request-log-store.ts +88 -21
  152. package/src/memory/memory-recall-log-store.ts +128 -0
  153. package/src/memory/migrations/194-memory-recall-logs.ts +50 -0
  154. package/src/memory/migrations/195-oauth-providers-ping-config.ts +23 -0
  155. package/src/memory/migrations/index.ts +2 -0
  156. package/src/memory/migrations/validate-migration-state.ts +14 -1
  157. package/src/memory/retriever.test.ts +4 -5
  158. package/src/memory/schema/infrastructure.ts +31 -0
  159. package/src/memory/schema/oauth.ts +3 -0
  160. package/src/messaging/providers/telegram-bot/adapter.ts +1 -1
  161. package/src/oauth/connect-orchestrator.ts +54 -0
  162. package/src/oauth/manual-token-connection.ts +5 -5
  163. package/src/oauth/oauth-store.ts +26 -5
  164. package/src/oauth/seed-providers.ts +10 -1
  165. package/src/permissions/checker.ts +2 -2
  166. package/src/permissions/trust-client.ts +2 -2
  167. package/src/platform/client.ts +2 -2
  168. package/src/prompts/journal-context.ts +6 -1
  169. package/src/providers/anthropic/client.ts +143 -1
  170. package/src/runtime/auth/__tests__/middleware.test.ts +19 -0
  171. package/src/runtime/auth/route-policy.ts +0 -1
  172. package/src/runtime/btw-sidechain.ts +7 -1
  173. package/src/runtime/channel-approvals.ts +2 -2
  174. package/src/runtime/channel-readiness-service.ts +30 -7
  175. package/src/runtime/http-router.ts +31 -0
  176. package/src/runtime/http-server.ts +21 -4
  177. package/src/runtime/http-types.ts +2 -0
  178. package/src/runtime/pending-interactions.ts +21 -3
  179. package/src/runtime/routes/acp-routes.ts +46 -28
  180. package/src/runtime/routes/app-management-routes.ts +123 -0
  181. package/src/runtime/routes/app-routes.ts +31 -0
  182. package/src/runtime/routes/approval-routes.ts +108 -3
  183. package/src/runtime/routes/attachment-routes.ts +45 -0
  184. package/src/runtime/routes/avatar-routes.ts +16 -0
  185. package/src/runtime/routes/brain-graph-routes.ts +18 -0
  186. package/src/runtime/routes/btw-routes.ts +20 -0
  187. package/src/runtime/routes/call-routes.ts +81 -0
  188. package/src/runtime/routes/channel-readiness-routes.ts +48 -7
  189. package/src/runtime/routes/channel-routes.ts +18 -0
  190. package/src/runtime/routes/channel-verification-routes.ts +49 -1
  191. package/src/runtime/routes/contact-routes.ts +77 -0
  192. package/src/runtime/routes/conversation-attention-routes.ts +37 -0
  193. package/src/runtime/routes/conversation-management-routes.ts +94 -0
  194. package/src/runtime/routes/conversation-query-routes.ts +78 -0
  195. package/src/runtime/routes/conversation-routes.ts +115 -38
  196. package/src/runtime/routes/conversation-starter-routes.ts +29 -0
  197. package/src/runtime/routes/debug-routes.ts +23 -0
  198. package/src/runtime/routes/diagnostics-routes.ts +30 -0
  199. package/src/runtime/routes/documents-routes.ts +42 -0
  200. package/src/runtime/routes/events-routes.ts +10 -0
  201. package/src/runtime/routes/global-search-routes.ts +35 -0
  202. package/src/runtime/routes/guardian-action-routes.ts +47 -2
  203. package/src/runtime/routes/guardian-approval-prompt.ts +77 -2
  204. package/src/runtime/routes/heartbeat-routes.ts +278 -0
  205. package/src/runtime/routes/host-bash-routes.ts +16 -1
  206. package/src/runtime/routes/host-cu-routes.ts +23 -1
  207. package/src/runtime/routes/host-file-routes.ts +18 -1
  208. package/src/runtime/routes/identity-routes.ts +35 -0
  209. package/src/runtime/routes/inbound-message-handler.ts +46 -25
  210. package/src/runtime/routes/inbound-stages/secret-ingress-check.ts +30 -2
  211. package/src/runtime/routes/inbound-stages/transcribe-audio.ts +1 -2
  212. package/src/runtime/routes/integrations/twilio.ts +32 -22
  213. package/src/runtime/routes/invite-routes.ts +83 -0
  214. package/src/runtime/routes/log-export-routes.ts +14 -0
  215. package/src/runtime/routes/memory-item-routes.ts +99 -1
  216. package/src/runtime/routes/migration-rollback-routes.ts +25 -0
  217. package/src/runtime/routes/migration-routes.ts +40 -0
  218. package/src/runtime/routes/notification-routes.ts +20 -0
  219. package/src/runtime/routes/oauth-apps.ts +11 -3
  220. package/src/runtime/routes/pairing-routes.ts +15 -0
  221. package/src/runtime/routes/recording-routes.ts +72 -0
  222. package/src/runtime/routes/schedule-routes.ts +77 -5
  223. package/src/runtime/routes/secret-routes.ts +63 -1
  224. package/src/runtime/routes/settings-routes.ts +91 -1
  225. package/src/runtime/routes/skills-routes.ts +98 -16
  226. package/src/runtime/routes/subagents-routes.ts +38 -3
  227. package/src/runtime/routes/surface-action-routes.ts +66 -24
  228. package/src/runtime/routes/surface-content-routes.ts +20 -0
  229. package/src/runtime/routes/telemetry-routes.ts +12 -0
  230. package/src/runtime/routes/trace-event-routes.ts +25 -0
  231. package/src/runtime/routes/trust-rules-routes.ts +46 -0
  232. package/src/runtime/routes/tts-routes.ts +15 -4
  233. package/src/runtime/routes/upgrade-broadcast-routes.ts +38 -0
  234. package/src/runtime/routes/usage-routes.ts +59 -0
  235. package/src/runtime/routes/watch-routes.ts +28 -0
  236. package/src/runtime/routes/work-items-routes.ts +59 -0
  237. package/src/runtime/routes/workspace-commit-routes.ts +12 -0
  238. package/src/runtime/routes/workspace-routes.ts +102 -0
  239. package/src/schedule/scheduler.ts +7 -1
  240. package/src/security/AGENTS.md +7 -0
  241. package/src/security/credential-backend.ts +1 -1
  242. package/src/security/encrypted-store.ts +3 -3
  243. package/src/security/oauth2.ts +55 -0
  244. package/src/security/secret-ingress.ts +174 -0
  245. package/src/security/secret-patterns.ts +133 -0
  246. package/src/security/secret-scanner.ts +28 -117
  247. package/src/signals/confirm.ts +12 -8
  248. package/src/signals/user-message.ts +18 -3
  249. package/src/skills/skill-memory.ts +1 -2
  250. package/src/tasks/task-runner.ts +7 -1
  251. package/src/tools/credentials/broker.ts +1 -1
  252. package/src/tools/credentials/metadata-store.ts +1 -1
  253. package/src/tools/credentials/vault.ts +2 -3
  254. package/src/tools/memory/definitions.ts +1 -1
  255. package/src/tools/memory/handlers.test.ts +2 -4
  256. package/src/tools/skills/load.ts +1 -1
  257. package/src/tools/terminal/safe-env.ts +7 -0
  258. package/src/tools/tool-manifest.ts +1 -1
  259. package/src/util/log-redact.ts +9 -34
  260. package/src/workspace/migrations/015-migrate-credentials-to-keychain.ts +13 -148
  261. package/src/workspace/migrations/016-migrate-credentials-from-keychain.ts +7 -145
  262. package/src/workspace/migrations/AGENTS.md +11 -0
  263. package/src/workspace/migrations/runner.ts +16 -6
  264. package/src/workspace/migrations/types.ts +7 -0
  265. package/docs/architecture/keychain-broker.md +0 -69
  266. package/src/__tests__/keychain-broker-client.test.ts +0 -800
  267. package/src/cli/commands/oauth/platform.ts +0 -525
  268. package/src/config/bundled-skills/slack/TOOLS.json +0 -272
  269. package/src/config/bundled-skills/slack/tools/shared.ts +0 -34
  270. package/src/config/bundled-skills/slack/tools/slack-add-reaction.ts +0 -27
  271. package/src/config/bundled-skills/slack/tools/slack-channel-details.ts +0 -38
  272. package/src/config/bundled-skills/slack/tools/slack-channel-permissions.ts +0 -146
  273. package/src/config/bundled-skills/slack/tools/slack-configure-channels.ts +0 -105
  274. package/src/config/bundled-skills/slack/tools/slack-delete-message.ts +0 -26
  275. package/src/config/bundled-skills/slack/tools/slack-edit-message.ts +0 -27
  276. package/src/config/bundled-skills/slack/tools/slack-leave-channel.ts +0 -25
  277. package/src/config/bundled-skills/slack/tools/slack-scan-digest.ts +0 -372
  278. package/src/security/keychain-broker-client.ts +0 -446
@@ -1,11 +1,9 @@
1
1
  import { beforeEach, describe, expect, mock, test } from "bun:test";
2
2
 
3
- import {
4
- resolvePermission,
5
- VellumAcpClientHandler,
6
- } from "../acp/client-handler.js";
3
+ import { VellumAcpClientHandler } from "../acp/client-handler.js";
7
4
  import { AcpSessionManager } from "../acp/session-manager.js";
8
5
  import type { ServerMessage } from "../daemon/message-protocol.js";
6
+ import * as pendingInteractions from "../runtime/pending-interactions.js";
9
7
 
10
8
  // ---------------------------------------------------------------------------
11
9
  // VellumAcpClientHandler tests
@@ -14,18 +12,17 @@ import type { ServerMessage } from "../daemon/message-protocol.js";
14
12
  describe("VellumAcpClientHandler", () => {
15
13
  let sent: ServerMessage[];
16
14
  let sendToVellum: (msg: ServerMessage) => void;
17
- let pendingPermissions: Map<string, { resolve: (optionId: string) => void }>;
18
15
  let handler: VellumAcpClientHandler;
19
16
 
20
17
  beforeEach(() => {
21
18
  sent = [];
22
19
  sendToVellum = (msg) => sent.push(msg);
23
- pendingPermissions = new Map();
24
20
  handler = new VellumAcpClientHandler(
25
21
  "session-1",
26
22
  sendToVellum,
27
- pendingPermissions,
23
+ "conv-parent",
28
24
  );
25
+ pendingInteractions.clear();
29
26
  });
30
27
 
31
28
  describe("sessionUpdate", () => {
@@ -152,7 +149,7 @@ describe("VellumAcpClientHandler", () => {
152
149
  });
153
150
 
154
151
  describe("requestPermission", () => {
155
- test("sends permission request and resolves when permission is granted", async () => {
152
+ test("sends confirmation_request and resolves when permission is granted", async () => {
156
153
  const resultPromise = handler.requestPermission({
157
154
  toolCall: {
158
155
  title: "Run command",
@@ -165,52 +162,135 @@ describe("VellumAcpClientHandler", () => {
165
162
  ],
166
163
  } as any);
167
164
 
168
- // Should have sent a permission request
165
+ // Should have sent a standard confirmation_request with ACP context
169
166
  expect(sent).toHaveLength(1);
170
167
  const msg = sent[0] as any;
171
- expect(msg.type).toBe("acp_permission_request");
172
- expect(msg.acpSessionId).toBe("session-1");
173
- expect(msg.toolTitle).toBe("Run command");
174
- expect(msg.toolKind).toBe("execute");
175
- expect(msg.options).toHaveLength(2);
176
-
177
- // A pending permission should exist
178
- expect(pendingPermissions.size).toBe(1);
168
+ expect(msg.type).toBe("confirmation_request");
169
+ expect(msg.toolName).toBe("ACP Agent: Run command");
170
+ expect(msg.riskLevel).toBe("medium"); // ACP defaults to medium
171
+ expect(msg.persistentDecisionsAllowed).toBe(false);
172
+ expect(msg.allowlistOptions).toEqual([]);
173
+ // ACP-specific fields passed through for client rendering
174
+ expect(msg.acpToolKind).toBe("execute");
175
+ expect(msg.acpOptions).toEqual([
176
+ { optionId: "allow", name: "Allow", kind: "allow_once" },
177
+ { optionId: "deny", name: "Deny", kind: "reject_once" },
178
+ ]);
179
+
179
180
  const requestId = msg.requestId;
180
181
 
181
- // Resolve the permission
182
- resolvePermission(pendingPermissions, requestId, "allow");
182
+ // Resolve via the pendingInteractions tracker (same as POST /v1/confirm)
183
+ const interaction = pendingInteractions.resolve(requestId);
184
+ expect(interaction).toBeDefined();
185
+ expect(interaction!.kind).toBe("acp_confirmation");
186
+ interaction!.directResolve!("allow");
183
187
 
184
188
  const result = await resultPromise;
185
189
  expect(result).toEqual({
186
190
  outcome: { outcome: "selected", optionId: "allow" },
187
191
  });
188
- expect(pendingPermissions.size).toBe(0);
189
192
  });
190
- });
191
- });
192
193
 
193
- // ---------------------------------------------------------------------------
194
- // resolvePermission standalone tests
195
- // ---------------------------------------------------------------------------
194
+ test("maps deny decision to reject_once option", async () => {
195
+ const resultPromise = handler.requestPermission({
196
+ toolCall: {
197
+ title: "Write file",
198
+ kind: "edit",
199
+ rawInput: { path: "/tmp/test.txt" },
200
+ },
201
+ options: [
202
+ { optionId: "opt-allow", name: "Allow", kind: "allow_once" },
203
+ { optionId: "opt-deny", name: "Deny", kind: "reject_once" },
204
+ ],
205
+ } as any);
196
206
 
197
- describe("resolvePermission", () => {
198
- test("resolves and removes the pending entry", () => {
199
- let resolved = "";
200
- const pending = new Map<string, { resolve: (id: string) => void }>();
201
- pending.set("req-1", { resolve: (id) => (resolved = id) });
207
+ const msg = sent[0] as any;
208
+ expect(msg.riskLevel).toBe("medium"); // ACP defaults to medium
202
209
 
203
- resolvePermission(pending, "req-1", "allow");
210
+ const interaction = pendingInteractions.resolve(msg.requestId);
211
+ interaction!.directResolve!("deny");
204
212
 
205
- expect(resolved).toBe("allow");
206
- expect(pending.size).toBe(0);
207
- });
213
+ const result = await resultPromise;
214
+ expect(result).toEqual({
215
+ outcome: { outcome: "selected", optionId: "opt-deny" },
216
+ });
217
+ });
218
+
219
+ test("defaults riskLevel to medium for all ACP permissions", async () => {
220
+ handler.requestPermission({
221
+ toolCall: {
222
+ title: "Read file",
223
+ kind: "read",
224
+ },
225
+ options: [{ optionId: "allow", name: "Allow", kind: "allow_once" }],
226
+ } as any);
227
+
228
+ const msg = sent[0] as any;
229
+ expect(msg.riskLevel).toBe("medium");
230
+ });
231
+
232
+ test("ACP registration survives sendToVellum overwrite (makeEventSender race)", async () => {
233
+ // Simulate makeEventSender: when sendToVellum is called with a
234
+ // confirmation_request, it overwrites the pendingInteractions entry
235
+ // with a normal "confirmation" (no directResolve). This is what
236
+ // happens in production because sendToVellum goes through the
237
+ // conversation's event sender.
238
+ const overwritingSend = (msg: ServerMessage) => {
239
+ sent.push(msg);
240
+ if ((msg as any).type === "confirmation_request") {
241
+ pendingInteractions.register((msg as any).requestId, {
242
+ conversation: {} as any, // fake conversation
243
+ conversationId: "conv-123",
244
+ kind: "confirmation",
245
+ confirmationDetails: {
246
+ toolName: (msg as any).toolName,
247
+ input: (msg as any).input,
248
+ riskLevel: (msg as any).riskLevel,
249
+ allowlistOptions: [],
250
+ scopeOptions: [],
251
+ },
252
+ // NO directResolve — this is the bug scenario
253
+ });
254
+ }
255
+ };
256
+
257
+ // Create handler with the overwriting sender
258
+ const racyHandler = new VellumAcpClientHandler(
259
+ "session-racy",
260
+ overwritingSend,
261
+ "conv-racy",
262
+ );
263
+
264
+ const resultPromise = racyHandler.requestPermission({
265
+ toolCall: {
266
+ title: "Write file",
267
+ kind: "edit",
268
+ rawInput: "test",
269
+ },
270
+ options: [
271
+ { optionId: "yes", name: "Allow", kind: "allow_once" },
272
+ { optionId: "no", name: "Deny", kind: "reject_once" },
273
+ ],
274
+ } as any);
208
275
 
209
- test("is a no-op when request ID is not found", () => {
210
- const pending = new Map<string, { resolve: (id: string) => void }>();
211
- // Should not throw
212
- resolvePermission(pending, "nonexistent", "allow");
213
- expect(pending.size).toBe(0);
276
+ const requestId = (sent[sent.length - 1] as any).requestId;
277
+
278
+ // The critical assertion: after requestPermission completes setup,
279
+ // the pendingInteractions entry must be the ACP one with directResolve,
280
+ // NOT the overwritten "confirmation" without it.
281
+ const interaction = pendingInteractions.resolve(requestId);
282
+ expect(interaction).toBeDefined();
283
+ expect(interaction!.kind).toBe("acp_confirmation");
284
+ expect(interaction!.directResolve).toBeDefined();
285
+
286
+ // Resolve it — this would fail silently if the overwrite won
287
+ interaction!.directResolve!("allow");
288
+
289
+ const result = await resultPromise;
290
+ expect(result).toEqual({
291
+ outcome: { outcome: "selected", optionId: "yes" },
292
+ });
293
+ });
214
294
  });
215
295
  });
216
296
 
@@ -270,11 +350,126 @@ describe("AcpSessionManager", () => {
270
350
  });
271
351
  });
272
352
 
273
- describe("resolvePermission", () => {
274
- test("logs warning for unknown request ID (no throw)", () => {
275
- const manager = new AcpSessionManager(5);
276
- // Should not throw just logs a warning
277
- manager.resolvePermission("unknown-req", "allow");
353
+ describe("session cleanup after prompt", () => {
354
+ test("completed session is removed from the session map", async () => {
355
+ let resolvePrompt: (v: { stopReason: string }) => void;
356
+ const promptPromise = new Promise<{ stopReason: string }>((r) => {
357
+ resolvePrompt = r;
358
+ });
359
+
360
+ const manager = new AcpSessionManager(1);
361
+ const sendToVellum = mock(() => {});
362
+
363
+ // Inject a fake session directly into the manager to avoid needing
364
+ // a real child process.
365
+ const fakeProcess = {
366
+ prompt: () => promptPromise,
367
+ kill: mock(() => {}),
368
+ spawn: mock(() => {}),
369
+ initialize: mock(() => Promise.resolve()),
370
+ createSession: mock(() => Promise.resolve("proto-session")),
371
+ cancel: mock(() => Promise.resolve()),
372
+ };
373
+ const fakeHandler = new VellumAcpClientHandler(
374
+ "test-session",
375
+ sendToVellum,
376
+ "conv-1",
377
+ );
378
+
379
+ // Access private sessions map via any cast
380
+ const sessions = (manager as any).sessions as Map<string, any>;
381
+ const entry = {
382
+ process: fakeProcess,
383
+ state: {
384
+ id: "test-session",
385
+ agentId: "agent-1",
386
+ acpSessionId: "proto-session",
387
+ status: "running",
388
+ startedAt: Date.now(),
389
+ },
390
+ clientHandler: fakeHandler,
391
+ sendToVellum,
392
+ currentPrompt: null as any,
393
+ parentConversationId: "conv-1",
394
+ cwd: "/tmp",
395
+ };
396
+ sessions.set("test-session", entry);
397
+
398
+ // Fire the prompt in the background via the private method
399
+ const bgPromise = (manager as any).firePromptInBackground(
400
+ "test-session",
401
+ entry,
402
+ "proto-session",
403
+ "do something",
404
+ );
405
+ entry.currentPrompt = bgPromise;
406
+
407
+ // Session exists before completion
408
+ expect((manager.getStatus() as any[]).length).toBe(1);
409
+
410
+ // Complete the prompt
411
+ resolvePrompt!({ stopReason: "end_turn" });
412
+ await bgPromise;
413
+
414
+ // Session should be cleaned up
415
+ expect((manager.getStatus() as any[]).length).toBe(0);
416
+ expect(fakeProcess.kill).toHaveBeenCalled();
417
+ });
418
+
419
+ test("failed session is removed from the session map", async () => {
420
+ const manager = new AcpSessionManager(1);
421
+ const sendToVellum = mock(() => {});
422
+
423
+ let rejectPrompt: (e: Error) => void;
424
+ const promptPromise = new Promise<{ stopReason: string }>((_r, rej) => {
425
+ rejectPrompt = rej;
426
+ });
427
+
428
+ const fakeProcess = {
429
+ prompt: () => promptPromise,
430
+ kill: mock(() => {}),
431
+ };
432
+ const fakeHandler = new VellumAcpClientHandler(
433
+ "test-session-2",
434
+ sendToVellum,
435
+ "conv-2",
436
+ );
437
+
438
+ const sessions = (manager as any).sessions as Map<string, any>;
439
+ const entry = {
440
+ process: fakeProcess,
441
+ state: {
442
+ id: "test-session-2",
443
+ agentId: "agent-1",
444
+ acpSessionId: "proto-session-2",
445
+ status: "running",
446
+ startedAt: Date.now(),
447
+ },
448
+ clientHandler: fakeHandler,
449
+ sendToVellum,
450
+ currentPrompt: null as any,
451
+ parentConversationId: "conv-2",
452
+ cwd: "/tmp",
453
+ };
454
+ sessions.set("test-session-2", entry);
455
+
456
+ const bgPromise = (manager as any).firePromptInBackground(
457
+ "test-session-2",
458
+ entry,
459
+ "proto-session-2",
460
+ "do something",
461
+ );
462
+ entry.currentPrompt = bgPromise;
463
+
464
+ expect((manager.getStatus() as any[]).length).toBe(1);
465
+
466
+ // Fail the prompt
467
+ rejectPrompt!(new Error("agent crashed"));
468
+ await bgPromise;
469
+
470
+ // Session should be cleaned up even on failure
471
+ expect((manager.getStatus() as any[]).length).toBe(0);
472
+ expect(fakeProcess.kill).toHaveBeenCalled();
278
473
  });
279
474
  });
280
475
 
@@ -7,11 +7,11 @@ import { describe, expect, test } from "bun:test";
7
7
  * Guard tests for assistant feature flags.
8
8
  *
9
9
  * 1. Key format validation: ensure production code uses the canonical
10
- * `feature_flags.<flagId>.enabled` format, not the legacy
11
- * `skills.<id>.enabled` format.
10
+ * simple kebab-case format (e.g., "browser", "ces-tools"), not the
11
+ * legacy `skills.<id>.enabled` format.
12
12
  *
13
13
  * 2. Declaration coverage: ensure all assistant-scope flag keys in the
14
- * unified registry conform to the canonical format.
14
+ * unified registry conform to the simple kebab-case format.
15
15
  *
16
16
  * See AGENTS.md "Assistant Feature Flags" for the full convention.
17
17
  */
@@ -53,7 +53,7 @@ function loadRegistry(): Registry {
53
53
  return JSON.parse(raw);
54
54
  }
55
55
 
56
- const CANONICAL_KEY_RE = /^feature_flags\.[a-z0-9][a-z0-9._-]*\.enabled$/;
56
+ const CANONICAL_KEY_RE = /^[a-z0-9][a-z0-9-]*$/;
57
57
 
58
58
  /**
59
59
  * Files allowed to contain the legacy `skills.<id>.enabled` key format.
@@ -126,13 +126,13 @@ describe("assistant feature flag guard", () => {
126
126
  if (violations.length > 0) {
127
127
  const message = [
128
128
  "Found production files using the legacy `skills.<id>.enabled` key format.",
129
- "New code must use the canonical format: `feature_flags.<id>.enabled`.",
129
+ 'New code must use the canonical simple kebab-case format (e.g., "browser", "ces-tools").',
130
130
  'See AGENTS.md "Assistant Feature Flags" for the convention.',
131
131
  "",
132
132
  "Violations:",
133
133
  ...violations.map((f) => ` - ${f}`),
134
134
  "",
135
- "To fix: replace `skills.<id>.enabled` with `feature_flags.<id>.enabled`.",
135
+ "To fix: replace `skills.<id>.enabled` with the simple kebab-case format.",
136
136
  "If backward-compat access is genuinely needed, add to LEGACY_KEY_ALLOWLIST in assistant-feature-flag-guard.test.ts.",
137
137
  ].join("\n");
138
138
 
@@ -144,7 +144,7 @@ describe("assistant feature flag guard", () => {
144
144
  // Test: unified registry key format (assistant-scope only)
145
145
  // ---------------------------------------------------------------------------
146
146
 
147
- test("all assistant-scope keys in the unified registry use the canonical feature_flags.<id>.enabled format", () => {
147
+ test("all assistant-scope keys in the unified registry use the canonical simple kebab-case format", () => {
148
148
  const registry = loadRegistry();
149
149
  const assistantFlags = registry.flags.filter(
150
150
  (f) => f.scope === "assistant",
@@ -156,7 +156,7 @@ describe("assistant feature flag guard", () => {
156
156
  if (violations.length > 0) {
157
157
  const message = [
158
158
  "Found assistant-scope keys in the unified registry that do not match the canonical format.",
159
- "Expected format: feature_flags.<flagId>.enabled",
159
+ 'Expected format: simple kebab-case (e.g., "browser", "ces-tools")',
160
160
  "",
161
161
  "Violations:",
162
162
  ...violations.map((k) => ` - ${k}`),
@@ -1,8 +1,8 @@
1
1
  /**
2
2
  * Guard tests for assistant feature flag conventions:
3
3
  *
4
- * 1. Key format: all feature flag keys used in production code must follow the
5
- * canonical `feature_flags.<flag_id>.enabled` format. Any remaining
4
+ * 1. Key format: all feature flag keys used in production code must use
5
+ * simple kebab-case format (e.g., "browser", "ces-tools"). Any remaining
6
6
  * `skills.<id>.enabled` usage outside of migration/backward-compat code is
7
7
  * flagged — including template literal forms like `skills.${skillId}.enabled`.
8
8
  *
@@ -10,11 +10,6 @@
10
10
  * `isAssistantFeatureFlagEnabled('<key>', ...)` in production code must be
11
11
  * declared in the unified registry. This keeps flag usage declarative while
12
12
  * allowing skills to exist without corresponding feature flags.
13
- *
14
- * 3. Indirect key coverage: all `feature_flags.<id>.enabled` string literals
15
- * anywhere in production code (maps, constants, variables, etc.) must be
16
- * declared in the unified registry. This catches indirect key patterns that
17
- * Guard 2 would miss, such as flag keys stored in lookup maps or constants.
18
13
  */
19
14
 
20
15
  import { execSync } from "node:child_process";
@@ -115,8 +110,8 @@ describe("assistant feature flag key format guard", () => {
115
110
  if (violations.length > 0) {
116
111
  const message = [
117
112
  "Found production TypeScript files using legacy `skills.<id>.enabled` key format.",
118
- "Use the canonical `feature_flags.<id>.enabled` format instead.",
119
- "Call `isAssistantFeatureFlagEnabled(`feature_flags.${skillId}.enabled`, config)` to check skill flags.",
113
+ "Use simple kebab-case keys instead (e.g., `contacts`, `browser`).",
114
+ "Call `isAssistantFeatureFlagEnabled(skillId, config)` to check skill flags.",
120
115
  "",
121
116
  "Violations:",
122
117
  ...violations.map((f) => ` - ${f}`),
@@ -149,7 +144,7 @@ describe("assistant feature flag declaration coverage guard", () => {
149
144
  // multiline regex so that calls split across lines are still caught:
150
145
  //
151
146
  // isAssistantFeatureFlagEnabled(
152
- // 'feature_flags.foo.enabled',
147
+ // 'browser',
153
148
  // config,
154
149
  // )
155
150
  //
@@ -202,79 +197,3 @@ describe("assistant feature flag declaration coverage guard", () => {
202
197
  }
203
198
  });
204
199
  });
205
-
206
- // ---------------------------------------------------------------------------
207
- // Guard 3: Indirect key coverage — flag key literals anywhere in production code
208
- // ---------------------------------------------------------------------------
209
-
210
- describe("assistant feature flag indirect key coverage guard", () => {
211
- test("all feature_flags.<id>.enabled string literals in production code are declared in the unified registry", () => {
212
- const repoRoot = getRepoRoot();
213
-
214
- // Load the unified registry and extract all declared keys (any scope)
215
- const registry = loadRegistry();
216
- const declaredKeys = new Set(registry.flags.map((f) => f.key));
217
-
218
- // Search for any string literal matching the canonical key pattern
219
- // in production .ts files under assistant/src/ and gateway/src/.
220
- // This catches keys in maps, constants, variables, or any other
221
- // indirect patterns that Guard 2 would miss.
222
- let grepOutput = "";
223
- try {
224
- grepOutput = execSync(
225
- `git grep -nE "feature_flags\\.[a-z0-9_-]+\\.enabled\\b" -- 'assistant/src/**/*.ts' 'gateway/src/**/*.ts'`,
226
- { encoding: "utf-8", cwd: repoRoot },
227
- ).trim();
228
- } catch (err) {
229
- // Exit code 1 means no matches — happy path
230
- if ((err as { status?: number }).status === 1) {
231
- return;
232
- }
233
- throw err;
234
- }
235
-
236
- const keyPattern = /feature_flags\.[a-z0-9_-]+\.enabled\b/g;
237
- const undeclared: string[] = [];
238
-
239
- for (const line of grepOutput.split("\n")) {
240
- if (!line) continue;
241
-
242
- // Format: "file:line:content"
243
- const colonIdx = line.indexOf(":");
244
- if (colonIdx === -1) continue;
245
- const filePath = line.slice(0, colonIdx);
246
-
247
- // Skip test files and persisted-data migration files (they reference retired flag keys by design)
248
- if (isTestFile(filePath)) continue;
249
- if (
250
- filePath.includes("/workspace/migrations/") ||
251
- filePath.includes("/memory/migrations/")
252
- )
253
- continue;
254
-
255
- // Extract all key occurrences from this line
256
- const content = line.slice(colonIdx + 1);
257
- for (const match of content.matchAll(keyPattern)) {
258
- const key = match[0];
259
- if (!declaredKeys.has(key)) {
260
- undeclared.push(`${filePath}: ${key}`);
261
- }
262
- }
263
- }
264
-
265
- if (undeclared.length > 0) {
266
- const message = [
267
- "Found feature_flags.<id>.enabled string literals in production code that are NOT declared in the unified registry.",
268
- "This catches indirect flag key usage (maps, constants, variables) that the direct-call guard misses.",
269
- `Registry: meta/feature-flags/feature-flag-registry.json`,
270
- "",
271
- "Undeclared keys:",
272
- ...undeclared.map((k) => ` - ${k}`),
273
- "",
274
- "To fix: add the missing key(s) to the unified registry, or remove the stale reference.",
275
- ].join("\n");
276
-
277
- expect(undeclared, message).toEqual([]);
278
- }
279
- });
280
- });
@@ -45,7 +45,7 @@ let currentConfig: Record<string, unknown> = {
45
45
  };
46
46
 
47
47
  const DECLARED_FLAG_ID = "contacts";
48
- const DECLARED_FLAG_KEY = `feature_flags.${DECLARED_FLAG_ID}.enabled`;
48
+ const DECLARED_FLAG_KEY = DECLARED_FLAG_ID;
49
49
  const DECLARED_SKILL_ID = "contacts";
50
50
 
51
51
  // eslint-disable-next-line @typescript-eslint/no-require-imports
@@ -201,7 +201,7 @@ describe("buildSystemPrompt assistant feature flag filtering", () => {
201
201
 
202
202
  _setOverridesForTesting({
203
203
  [DECLARED_FLAG_KEY]: false,
204
- "feature_flags.browser.enabled": true,
204
+ browser: true,
205
205
  });
206
206
 
207
207
  currentConfig = {
@@ -286,7 +286,7 @@ describe("buildSystemPrompt assistant feature flag filtering", () => {
286
286
 
287
287
  _setOverridesForTesting({
288
288
  [DECLARED_FLAG_KEY]: false,
289
- "feature_flags.email-channel.enabled": false,
289
+ "email-channel": false,
290
290
  });
291
291
 
292
292
  currentConfig = {
@@ -356,7 +356,7 @@ describe("buildSystemPrompt assistant feature flag filtering", () => {
356
356
  "browser",
357
357
  );
358
358
 
359
- _setOverridesForTesting({ "feature_flags.browser.enabled": false });
359
+ _setOverridesForTesting({ browser: false });
360
360
 
361
361
  currentConfig = {
362
362
  services: {
@@ -478,21 +478,14 @@ describe("isAssistantFeatureFlagEnabled", () => {
478
478
  test("unknown flag defaults to true when no persisted override", () => {
479
479
  const config = {} as any;
480
480
 
481
- expect(
482
- isAssistantFeatureFlagEnabled(
483
- "feature_flags.unknown-skill.enabled",
484
- config,
485
- ),
486
- ).toBe(true);
481
+ expect(isAssistantFeatureFlagEnabled("unknown-skill", config)).toBe(true);
487
482
  });
488
483
 
489
484
  test("undeclared flag respects persisted override", () => {
490
- _setOverridesForTesting({ "feature_flags.browser.enabled": false });
485
+ _setOverridesForTesting({ browser: false });
491
486
  const config = {} as any;
492
487
 
493
- expect(
494
- isAssistantFeatureFlagEnabled("feature_flags.browser.enabled", config),
495
- ).toBe(false);
488
+ expect(isAssistantFeatureFlagEnabled("browser", config)).toBe(false);
496
489
  });
497
490
  });
498
491
 
@@ -39,7 +39,7 @@ describe("browser skill migration end-state", () => {
39
39
  beforeAll(async () => {
40
40
  __resetRegistryForTesting();
41
41
  _setOverridesForTesting({
42
- "feature_flags.browser.enabled": true,
42
+ browser: true,
43
43
  });
44
44
  await initializeTools();
45
45
  });
@@ -61,6 +61,12 @@ mock.module("../prompts/system-prompt.js", () => ({
61
61
  buildSystemPrompt: mockBuildSystemPrompt,
62
62
  }));
63
63
 
64
+ mock.module("../prompts/persona-resolver.js", () => ({
65
+ resolveGuardianPersona: () => null,
66
+ resolveChannelPersona: () => null,
67
+ resolveUserPersona: () => null,
68
+ }));
69
+
64
70
  // ---------------------------------------------------------------------------
65
71
  // Imports (after mocks)
66
72
  // ---------------------------------------------------------------------------
@@ -304,7 +310,9 @@ describe("POST /v1/btw", () => {
304
310
  // System prompt built by buildSystemPrompt({ excludeBootstrap: true })
305
311
  expect(systemPrompt).toBe(MOCK_SYSTEM_PROMPT);
306
312
  expect(mockBuildSystemPrompt).toHaveBeenCalledWith({
313
+ channelPersona: null,
307
314
  excludeBootstrap: true,
315
+ userPersona: null,
308
316
  });
309
317
 
310
318
  // Options: tool_choice must be "none"