@vellumai/assistant 0.7.3 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/ARCHITECTURE.md +29 -28
  2. package/Dockerfile +1 -0
  3. package/__tests__/permissions/gateway-threshold-reader.test.ts +236 -9
  4. package/bun.lock +3 -0
  5. package/knip.json +1 -0
  6. package/node_modules/@vellumai/ipc-server-utils/bun.lock +24 -0
  7. package/node_modules/@vellumai/ipc-server-utils/package.json +18 -0
  8. package/node_modules/@vellumai/ipc-server-utils/src/index.ts +6 -0
  9. package/node_modules/@vellumai/ipc-server-utils/src/socket-watchdog.test.ts +430 -0
  10. package/node_modules/@vellumai/ipc-server-utils/src/socket-watchdog.ts +221 -0
  11. package/node_modules/@vellumai/ipc-server-utils/tsconfig.json +20 -0
  12. package/openapi.yaml +22 -4
  13. package/package.json +3 -1
  14. package/src/__tests__/annotate-risk-options.test.ts +291 -0
  15. package/src/__tests__/approval-cascade.test.ts +8 -16
  16. package/src/__tests__/approval-routes-http.test.ts +6 -0
  17. package/src/__tests__/auto-analysis-end-to-end.test.ts +12 -25
  18. package/src/__tests__/call-constants.test.ts +10 -1
  19. package/src/__tests__/call-controller.test.ts +127 -0
  20. package/src/__tests__/cli-memory-v2-reembed-skills.test.ts +58 -28
  21. package/src/__tests__/config-loader-platform-defaults.test.ts +284 -1
  22. package/src/__tests__/context-search-memory-source.test.ts +3 -26
  23. package/src/__tests__/context-search-pkb-source.test.ts +12 -6
  24. package/src/__tests__/conversation-abort-tool-results.test.ts +1 -6
  25. package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +1 -1
  26. package/src/__tests__/conversation-agent-loop-overflow.test.ts +1 -1
  27. package/src/__tests__/conversation-agent-loop.test.ts +3 -3
  28. package/src/__tests__/conversation-confirmation-signals.test.ts +5 -13
  29. package/src/__tests__/conversation-init.benchmark.test.ts +1 -1
  30. package/src/__tests__/conversation-process-callsite.test.ts +1 -6
  31. package/src/__tests__/conversation-provider-retry-repair.test.ts +1 -6
  32. package/src/__tests__/conversation-runtime-assembly.test.ts +15 -6
  33. package/src/__tests__/conversation-slash-unknown.test.ts +1 -6
  34. package/src/__tests__/conversation-surfaces-action-delivery.test.ts +170 -9
  35. package/src/__tests__/conversation-surfaces-data-persist.test.ts +73 -1
  36. package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +59 -0
  37. package/src/__tests__/conversation-workspace-injection.test.ts +1 -7
  38. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +1 -7
  39. package/src/__tests__/filing-service.test.ts +2 -19
  40. package/src/__tests__/handlers-skills-memory-v2-reseed.test.ts +10 -26
  41. package/src/__tests__/injector-chain.test.ts +24 -16
  42. package/src/__tests__/injector-pkb-v2-silenced.test.ts +10 -7
  43. package/src/__tests__/lifecycle-memory-v2-seed.test.ts +154 -67
  44. package/src/__tests__/notification-decision-fallback.test.ts +91 -0
  45. package/src/__tests__/notification-decision-strategy.test.ts +22 -0
  46. package/src/__tests__/oauth-cli.test.ts +121 -0
  47. package/src/__tests__/relay-server.test.ts +46 -2
  48. package/src/__tests__/secret-prompt-log-hygiene.test.ts +7 -5
  49. package/src/__tests__/secret-prompter-channel-fallback.test.ts +7 -5
  50. package/src/__tests__/secret-response-routing.test.ts +7 -5
  51. package/src/__tests__/server-history-render.test.ts +82 -0
  52. package/src/__tests__/skill-include-graph.test.ts +31 -0
  53. package/src/__tests__/skill-load-tool.test.ts +44 -16
  54. package/src/__tests__/skills.test.ts +39 -0
  55. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -42
  56. package/src/__tests__/tool-executor.test.ts +155 -0
  57. package/src/__tests__/voice-session-bridge.test.ts +3 -0
  58. package/src/__tests__/workspace-migration-069-seed-onboarding-threads.test.ts +120 -0
  59. package/src/__tests__/workspace-migration-071-remove-safe-storage-release-note.test.ts +206 -0
  60. package/src/__tests__/workspace-migration-safe-storage-limits-release.test.ts +15 -27
  61. package/src/agent/loop.ts +11 -0
  62. package/src/approvals/guardian-decision-primitive.ts +0 -13
  63. package/src/approvals/guardian-request-resolvers.ts +4 -32
  64. package/src/calls/call-constants.ts +5 -8
  65. package/src/calls/call-controller.ts +130 -67
  66. package/src/calls/relay-server.ts +7 -1
  67. package/src/calls/voice-session-bridge.ts +1 -1
  68. package/src/cli/commands/memory-v2.ts +7 -7
  69. package/src/cli/commands/oauth/__tests__/connect.test.ts +0 -254
  70. package/src/cli/commands/oauth/connect.ts +10 -52
  71. package/src/config/bundled-skills/app-builder/SKILL.md +1 -3
  72. package/src/config/feature-flag-registry.json +1 -17
  73. package/src/config/loader.ts +72 -19
  74. package/src/config/schemas/memory-v2.ts +1 -1
  75. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +32 -0
  76. package/src/daemon/conversation-agent-loop-handlers.ts +32 -0
  77. package/src/daemon/conversation-agent-loop.ts +13 -10
  78. package/src/daemon/conversation-lifecycle.ts +22 -8
  79. package/src/daemon/conversation-surfaces.ts +16 -14
  80. package/src/daemon/conversation-tool-setup.ts +9 -5
  81. package/src/daemon/conversation.ts +1 -1
  82. package/src/daemon/handlers/shared.ts +26 -0
  83. package/src/daemon/host-bash-proxy.ts +1 -1
  84. package/src/daemon/host-browser-proxy.ts +1 -1
  85. package/src/daemon/host-cu-proxy.ts +1 -1
  86. package/src/daemon/host-file-proxy.ts +1 -1
  87. package/src/daemon/host-transfer-proxy.ts +2 -2
  88. package/src/daemon/lifecycle.ts +88 -73
  89. package/src/daemon/memory-v2-startup.ts +55 -14
  90. package/src/daemon/message-types/messages.ts +19 -1
  91. package/src/documents/document-store.ts +35 -1
  92. package/src/filing/filing-service.ts +2 -3
  93. package/src/heartbeat/heartbeat-service.ts +1 -1
  94. package/src/ipc/assistant-server.ts +93 -36
  95. package/src/ipc/skill-server.ts +99 -42
  96. package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +10 -57
  97. package/src/memory/context-search/sources/memory-v2.ts +1 -17
  98. package/src/memory/context-search/sources/memory.ts +2 -2
  99. package/src/memory/context-search/sources/pkb.ts +2 -3
  100. package/src/memory/graph/__tests__/conversation-graph-memory-v2-routing.test.ts +104 -61
  101. package/src/memory/graph/__tests__/handle-remember-v2.test.ts +11 -26
  102. package/src/memory/graph/conversation-graph-memory.ts +32 -9
  103. package/src/memory/graph/graph-search.test.ts +6 -5
  104. package/src/memory/graph/graph-search.ts +3 -4
  105. package/src/memory/graph/retriever.test.ts +12 -7
  106. package/src/memory/graph/retriever.ts +4 -5
  107. package/src/memory/graph/tool-handlers.ts +3 -4
  108. package/src/memory/graph/tools.ts +4 -4
  109. package/src/memory/indexer.ts +1 -2
  110. package/src/memory/jobs/__tests__/embed-concept-page.test.ts +116 -0
  111. package/src/memory/jobs/embed-concept-page.ts +223 -87
  112. package/src/memory/jobs-worker.ts +8 -4
  113. package/src/memory/pkb/pkb-search.test.ts +6 -5
  114. package/src/memory/pkb/pkb-search.ts +4 -5
  115. package/src/memory/qdrant-client.ts +3 -0
  116. package/src/memory/search/semantic.ts +4 -5
  117. package/src/memory/v2/__tests__/activation.test.ts +35 -5
  118. package/src/memory/v2/__tests__/consolidation-job.test.ts +21 -32
  119. package/src/memory/v2/__tests__/injection.test.ts +140 -23
  120. package/src/memory/v2/__tests__/qdrant.test.ts +310 -9
  121. package/src/memory/v2/__tests__/sim.test.ts +118 -7
  122. package/src/memory/v2/__tests__/static-context.test.ts +1 -13
  123. package/src/memory/v2/__tests__/sweep-job.test.ts +19 -33
  124. package/src/memory/v2/consolidation-job.ts +7 -8
  125. package/src/memory/v2/injection.ts +32 -12
  126. package/src/memory/v2/page-store.ts +39 -0
  127. package/src/memory/v2/prompts/consolidation.ts +5 -0
  128. package/src/memory/v2/qdrant.ts +209 -48
  129. package/src/memory/v2/sim.ts +67 -26
  130. package/src/memory/v2/static-context.ts +4 -8
  131. package/src/memory/v2/sweep-job.ts +5 -6
  132. package/src/memory/v2/types.ts +7 -0
  133. package/src/notifications/copy-composer.ts +46 -12
  134. package/src/notifications/decision-engine.ts +46 -0
  135. package/src/permissions/gateway-threshold-reader.ts +116 -8
  136. package/src/permissions/prompter.ts +86 -96
  137. package/src/permissions/secret-prompter.ts +31 -31
  138. package/src/plugins/defaults/injectors.ts +1 -2
  139. package/src/proactive-artifact/job.test.ts +51 -4
  140. package/src/proactive-artifact/job.ts +16 -2
  141. package/src/proactive-artifact/message-copy.ts +18 -1
  142. package/src/prompts/templates/SOUL.md +13 -28
  143. package/src/runtime/auth/route-policy.ts +1 -0
  144. package/src/runtime/channel-approvals.ts +3 -2
  145. package/src/runtime/guardian-reply-router.ts +0 -10
  146. package/src/runtime/pending-interactions.ts +19 -15
  147. package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +147 -0
  148. package/src/runtime/routes/approval-routes.ts +7 -3
  149. package/src/runtime/routes/consolidation-routes.ts +8 -9
  150. package/src/runtime/routes/conversation-query-routes.ts +44 -1
  151. package/src/runtime/routes/debug-bash-routes.ts +2 -0
  152. package/src/runtime/routes/filing-routes.ts +2 -3
  153. package/src/runtime/routes/inbound-stages/guardian-reply-intercept.ts +0 -3
  154. package/src/runtime/routes/memory-item-routes.test.ts +3 -9
  155. package/src/runtime/routes/memory-item-routes.ts +5 -6
  156. package/src/runtime/routes/memory-v2-routes.ts +103 -17
  157. package/src/skills/include-graph.ts +35 -13
  158. package/src/tools/document/document-tool.ts +20 -0
  159. package/src/tools/executor.ts +18 -2
  160. package/src/tools/memory/register.test.ts +7 -5
  161. package/src/tools/permission-checker.ts +15 -0
  162. package/src/tools/skills/load.ts +24 -20
  163. package/src/tools/tool-name-aliases.ts +19 -0
  164. package/src/tools/types.ts +19 -1
  165. package/src/workspace/migrations/067-release-notes-safe-storage-limits.ts +4 -62
  166. package/src/workspace/migrations/069-seed-onboarding-threads.ts +28 -0
  167. package/src/workspace/migrations/070-memory-v2-summary-schema-rebuild.ts +31 -0
  168. package/src/workspace/migrations/071-remove-safe-storage-release-note.ts +111 -0
  169. package/src/workspace/migrations/registry.ts +6 -0
@@ -12,14 +12,8 @@
12
12
  */
13
13
  import { describe, expect, mock, test } from "bun:test";
14
14
 
15
- import { _setOverridesForTesting } from "../config/assistant-feature-flags.js";
16
15
  import type { Message, ProviderResponse } from "../providers/types.js";
17
16
 
18
- // This test exercises v1 conversation routing. The `memory-v2-enabled` flag
19
- // (registry default `true`) flips memory routing to v2 — disable it here so
20
- // the v1 paths under test stay active.
21
- _setOverridesForTesting({ "memory-v2-enabled": false });
22
-
23
17
  // Use an object wrapper so TypeScript doesn't narrow the captured type to
24
18
  // `undefined` based on the initial assignment in the test setup.
25
19
  const captured: {
@@ -83,6 +77,7 @@ mock.module("../config/loader.js", () => ({
83
77
  pricingOverrides: [],
84
78
  },
85
79
  rateLimit: { maxRequestsPerMinute: 0 },
80
+ memory: { v2: { enabled: false } },
86
81
  daemon: {
87
82
  startupSocketWaitMs: 5000,
88
83
  stopTimeoutMs: 5000,
@@ -1,17 +1,11 @@
1
1
  import { beforeEach, describe, expect, mock, test } from "bun:test";
2
2
 
3
3
  import type { AgentEvent } from "../agent/loop.js";
4
- import { _setOverridesForTesting } from "../config/assistant-feature-flags.js";
5
4
  import type { UserMessageAttachment } from "../daemon/message-protocol.js";
6
5
  import { resetPluginRegistryAndRegisterDefaults } from "../plugins/defaults/index.js";
7
6
  import type { Message, ProviderResponse } from "../providers/types.js";
8
7
  import { ProviderError } from "../util/errors.js";
9
8
 
10
- // This test exercises v1 conversation routing. The `memory-v2-enabled` flag
11
- // (registry default `true`) flips memory routing to v2 — disable it here so
12
- // the v1 paths under test stay active.
13
- _setOverridesForTesting({ "memory-v2-enabled": false });
14
-
15
9
  mock.module("../util/logger.js", () => ({
16
10
  getLogger: () =>
17
11
  new Proxy({} as Record<string, unknown>, { get: () => () => {} }),
@@ -63,6 +57,7 @@ mock.module("../config/loader.js", () => ({
63
57
  pricingOverrides: [],
64
58
  },
65
59
  rateLimit: { maxRequestsPerMinute: 0 },
60
+ memory: { v2: { enabled: false } },
66
61
  services: {
67
62
  inference: {
68
63
  mode: "your-own",
@@ -1,11 +1,20 @@
1
1
  import { beforeEach, describe, expect, mock, test } from "bun:test";
2
2
 
3
- import { _setOverridesForTesting } from "../config/assistant-feature-flags.js";
4
-
5
- // This test exercises v1 conversation routing. The `memory-v2-enabled` flag
6
- // (registry default `true`) flips memory routing to v2 — disable it here so
7
- // the v1 paths under test stay active.
8
- _setOverridesForTesting({ "memory-v2-enabled": false });
3
+ // This test exercises v1 PKB injection. `config.memory.v2.enabled` (default
4
+ // `true`) makes the PKB injector go silent — force it off here so the v1
5
+ // injection chain assertions stay meaningful.
6
+ const realLoaderForAssemblyTest = await import("../config/loader.js");
7
+ const realGetConfigForAssemblyTest = realLoaderForAssemblyTest.getConfig;
8
+ mock.module("../config/loader.js", () => ({
9
+ ...realLoaderForAssemblyTest,
10
+ getConfig: () => {
11
+ const real = realGetConfigForAssemblyTest();
12
+ return {
13
+ ...real,
14
+ memory: { ...real.memory, v2: { ...real.memory.v2, enabled: false } },
15
+ };
16
+ },
17
+ }));
9
18
 
10
19
  // PKB search is mocked so the reminder-hints tests can assert behavior
11
20
  // without standing up Qdrant. The mock returns whatever is staged in
@@ -5,15 +5,9 @@ import type {
5
5
  CheckpointDecision,
6
6
  CheckpointInfo,
7
7
  } from "../agent/loop.js";
8
- import { _setOverridesForTesting } from "../config/assistant-feature-flags.js";
9
8
  import type { ServerMessage } from "../daemon/message-protocol.js";
10
9
  import type { Message, ProviderResponse } from "../providers/types.js";
11
10
 
12
- // This test exercises v1 conversation routing. The `memory-v2-enabled` flag
13
- // (registry default `true`) flips memory routing to v2 — disable it here so
14
- // the v1 paths under test stay active.
15
- _setOverridesForTesting({ "memory-v2-enabled": false });
16
-
17
11
  // ---------------------------------------------------------------------------
18
12
  // Mocks — must precede the Conversation import so Bun applies them at load time.
19
13
  // ---------------------------------------------------------------------------
@@ -66,6 +60,7 @@ mock.module("../config/loader.js", () => ({
66
60
  pricingOverrides: [],
67
61
  },
68
62
  rateLimit: { maxRequestsPerMinute: 0 },
63
+ memory: { v2: { enabled: false } },
69
64
  daemon: {
70
65
  startupSocketWaitMs: 5000,
71
66
  stopTimeoutMs: 5000,
@@ -1,13 +1,19 @@
1
- import { describe, expect, test } from "bun:test";
2
-
3
- import {
4
- createSurfaceMutex,
5
- handleSurfaceAction,
6
- type SurfaceConversationContext,
7
- surfaceProxyResolver,
8
- } from "../daemon/conversation-surfaces.js";
1
+ import { beforeEach, describe, expect, mock, test } from "bun:test";
2
+
3
+ import type { ServerMessage } from "../daemon/message-protocol.js";
4
+
5
+ let broadcastedMessages: ServerMessage[] = [];
6
+ const realEventHub = await import("../runtime/assistant-event-hub.js");
7
+ mock.module("../runtime/assistant-event-hub.js", () => ({
8
+ ...realEventHub,
9
+ broadcastMessage: (msg: ServerMessage) => broadcastedMessages.push(msg),
10
+ }));
11
+
12
+ const { createSurfaceMutex, handleSurfaceAction, surfaceProxyResolver } =
13
+ await import("../daemon/conversation-surfaces.js");
14
+
15
+ import type { SurfaceConversationContext } from "../daemon/conversation-surfaces.js";
9
16
  import type {
10
- ServerMessage,
11
17
  SurfaceData,
12
18
  SurfaceType,
13
19
  UiSurfaceShow,
@@ -81,6 +87,10 @@ function makeContext(sent: ServerMessage[] = []): SurfaceConversationContext & {
81
87
  }
82
88
 
83
89
  describe("surface action delivery to assistant", () => {
90
+ beforeEach(() => {
91
+ broadcastedMessages = [];
92
+ });
93
+
84
94
  test("table action button click triggers processMessage with action content", async () => {
85
95
  const sent: ServerMessage[] = [];
86
96
  const ctx = makeContext(sent);
@@ -199,4 +209,155 @@ describe("surface action delivery to assistant", () => {
199
209
  "[User action on app:",
200
210
  );
201
211
  });
212
+
213
+ test("confirmation surface broadcasts ui_surface_complete on action", async () => {
214
+ const sent: ServerMessage[] = [];
215
+ const ctx = makeContext(sent);
216
+
217
+ const showResult = await surfaceProxyResolver(ctx, "ui_show", {
218
+ surface_type: "confirmation",
219
+ title: "Delete files?",
220
+ data: {
221
+ message: "This will permanently delete 3 files.",
222
+ confirmLabel: "Delete",
223
+ cancelLabel: "Keep",
224
+ },
225
+ });
226
+
227
+ expect(showResult.isError).toBe(false);
228
+ expect(showResult.yieldToUser).toBe(true);
229
+
230
+ const showMessage = sent.find(
231
+ (msg): msg is UiSurfaceShow => msg.type === "ui_surface_show",
232
+ ) as UiSurfaceShow;
233
+ const surfaceId = showMessage.surfaceId;
234
+ expect(ctx.pendingSurfaceActions.has(surfaceId)).toBe(true);
235
+
236
+ await handleSurfaceAction(ctx, surfaceId, "confirm", {});
237
+
238
+ const completeMsg = broadcastedMessages.find(
239
+ (m) =>
240
+ (m as unknown as Record<string, unknown>).type ===
241
+ "ui_surface_complete" &&
242
+ (m as unknown as Record<string, unknown>).surfaceId === surfaceId,
243
+ ) as unknown as Record<string, unknown> | undefined;
244
+ expect(completeMsg).toBeDefined();
245
+ expect(completeMsg?.conversationId).toBe("conv-1");
246
+ expect(completeMsg?.summary).toContain("Delete");
247
+ });
248
+
249
+ test("file_upload surface broadcasts ui_surface_complete on action", async () => {
250
+ const sent: ServerMessage[] = [];
251
+ const ctx = makeContext(sent);
252
+
253
+ const showResult = await surfaceProxyResolver(ctx, "ui_show", {
254
+ surface_type: "file_upload",
255
+ title: "Upload documents",
256
+ data: { accept: ".pdf,.docx", maxFiles: 5 },
257
+ });
258
+
259
+ expect(showResult.isError).toBe(false);
260
+ expect(showResult.yieldToUser).toBe(true);
261
+
262
+ const showMessage = sent.find(
263
+ (msg): msg is UiSurfaceShow => msg.type === "ui_surface_show",
264
+ ) as UiSurfaceShow;
265
+ const surfaceId = showMessage.surfaceId;
266
+ expect(ctx.pendingSurfaceActions.has(surfaceId)).toBe(true);
267
+
268
+ await handleSurfaceAction(ctx, surfaceId, "submit", {
269
+ files: [
270
+ {
271
+ filename: "doc.pdf",
272
+ mimeType: "application/pdf",
273
+ data: "base64encodedcontent",
274
+ },
275
+ ],
276
+ });
277
+
278
+ const completeMsg = broadcastedMessages.find(
279
+ (m) =>
280
+ (m as unknown as Record<string, unknown>).type ===
281
+ "ui_surface_complete" &&
282
+ (m as unknown as Record<string, unknown>).surfaceId === surfaceId,
283
+ ) as unknown as Record<string, unknown> | undefined;
284
+ expect(completeMsg).toBeDefined();
285
+ expect(completeMsg?.conversationId).toBe("conv-1");
286
+ });
287
+
288
+ test("file_upload completion event does not include base64 file blobs", async () => {
289
+ const sent: ServerMessage[] = [];
290
+ const ctx = makeContext(sent);
291
+
292
+ await surfaceProxyResolver(ctx, "ui_show", {
293
+ surface_type: "file_upload",
294
+ title: "Upload",
295
+ data: { accept: "*" },
296
+ });
297
+
298
+ const showMessage = sent.find(
299
+ (msg): msg is UiSurfaceShow => msg.type === "ui_surface_show",
300
+ ) as UiSurfaceShow;
301
+ const surfaceId = showMessage.surfaceId;
302
+
303
+ const largeBase64 = "A".repeat(10_000);
304
+ await handleSurfaceAction(ctx, surfaceId, "submit", {
305
+ files: [
306
+ {
307
+ filename: "big.pdf",
308
+ mimeType: "application/pdf",
309
+ data: largeBase64,
310
+ },
311
+ ],
312
+ });
313
+
314
+ const completeMsg = broadcastedMessages.find(
315
+ (m) =>
316
+ (m as unknown as Record<string, unknown>).type ===
317
+ "ui_surface_complete" &&
318
+ (m as unknown as Record<string, unknown>).surfaceId === surfaceId,
319
+ ) as unknown as Record<string, unknown> | undefined;
320
+ expect(completeMsg).toBeDefined();
321
+
322
+ const submittedData = completeMsg?.submittedData as
323
+ | Record<string, unknown>
324
+ | undefined;
325
+ // The files array with base64 blobs should be stripped from the
326
+ // completion event — only the sanitized payload (without files) is sent.
327
+ expect(submittedData?.files).toBeUndefined();
328
+ // The raw base64 content should not appear anywhere in the event
329
+ expect(JSON.stringify(completeMsg)).not.toContain(largeBase64);
330
+ });
331
+
332
+ test("table surface does NOT broadcast ui_surface_complete (not one-shot)", async () => {
333
+ const sent: ServerMessage[] = [];
334
+ const ctx = makeContext(sent);
335
+
336
+ await surfaceProxyResolver(ctx, "ui_show", {
337
+ surface_type: "table",
338
+ title: "Items",
339
+ data: {
340
+ columns: [{ id: "name", label: "Name" }],
341
+ rows: [{ id: "r1", cells: { name: "Item 1" } }],
342
+ },
343
+ actions: [{ id: "select", label: "Select" }],
344
+ });
345
+
346
+ const showMessage = sent.find(
347
+ (msg): msg is UiSurfaceShow => msg.type === "ui_surface_show",
348
+ ) as UiSurfaceShow;
349
+ const surfaceId = showMessage.surfaceId;
350
+
351
+ broadcastedMessages = [];
352
+ await handleSurfaceAction(ctx, surfaceId, "select", {
353
+ selectedIds: ["r1"],
354
+ });
355
+
356
+ const completeMsg = broadcastedMessages.find(
357
+ (m) =>
358
+ (m as unknown as Record<string, unknown>).type ===
359
+ "ui_surface_complete",
360
+ );
361
+ expect(completeMsg).toBeUndefined();
362
+ });
202
363
  });
@@ -1,5 +1,14 @@
1
1
  import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
2
2
 
3
+ import type { ServerMessage } from "../daemon/message-protocol.js";
4
+
5
+ const realEventHub = await import("../runtime/assistant-event-hub.js");
6
+
7
+ mock.module("../runtime/assistant-event-hub.js", () => ({
8
+ ...realEventHub,
9
+ broadcastMessage: (_msg: ServerMessage) => {},
10
+ }));
11
+
3
12
  // Mock the persistence layer the surface helpers reach into so we can
4
13
  // observe writes without touching SQLite. We swap this out per test by
5
14
  // re-assigning the spies recorded on the closure below.
@@ -29,15 +38,16 @@ const {
29
38
  flushPendingSurfaceDataPersists,
30
39
  createSurfaceMutex,
31
40
  flushSurfaceDataPersist,
41
+ handleSurfaceAction,
32
42
  markSurfaceCompleted,
33
43
  scheduleSurfaceDataPersist,
44
+ showStandaloneSurface,
34
45
  surfaceProxyResolver,
35
46
  } = await import("../daemon/conversation-surfaces.js");
36
47
 
37
48
  import type { SurfaceConversationContext } from "../daemon/conversation-surfaces.js";
38
49
  import type {
39
50
  CardSurfaceData,
40
- ServerMessage,
41
51
  SurfaceData,
42
52
  SurfaceType,
43
53
  } from "../daemon/message-protocol.js";
@@ -60,6 +70,8 @@ function makeContext(sent: ServerMessage[] = []): SurfaceConversationContext {
60
70
  accumulatedSurfaceState: new Map<string, Record<string, unknown>>(),
61
71
  surfaceActionRequestIds: new Set<string>(),
62
72
  currentTurnSurfaces: [],
73
+ pendingStandaloneSurfaces: new Map(),
74
+ recentlyCompletedStandaloneSurfaces: new Map(),
63
75
  isProcessing: () => false,
64
76
  enqueueMessage: () => ({ queued: false, requestId: "req-1" }),
65
77
  getQueueDepth: () => 0,
@@ -402,3 +414,63 @@ describe("ui_surface_update persistence", () => {
402
414
  cancelPendingSurfaceDataPersists("conv-other");
403
415
  });
404
416
  });
417
+
418
+ describe("standalone surface DB persistence", () => {
419
+ let writes: Array<{ id: string; content: unknown }> = [];
420
+
421
+ beforeEach(() => {
422
+ writes = [];
423
+ updateMessageContentSpy = (id: string, content: string) => {
424
+ writes.push({ id, content: JSON.parse(content) });
425
+ };
426
+ getMessagesImpl = () => [];
427
+ cancelPendingSurfaceDataPersists();
428
+ });
429
+
430
+ afterEach(() => {
431
+ cancelPendingSurfaceDataPersists();
432
+ });
433
+
434
+ test("standalone surface action persists completed state to DB", async () => {
435
+ const ctx = makeContext();
436
+ const surfaceId = "standalone-persist-1";
437
+
438
+ seedRows([
439
+ {
440
+ id: "msg-standalone",
441
+ content: [
442
+ { type: "text", text: "confirm this" },
443
+ {
444
+ type: "ui_surface",
445
+ surfaceId,
446
+ surfaceType: "confirmation",
447
+ data: { message: "Proceed?" },
448
+ },
449
+ ],
450
+ },
451
+ ]);
452
+
453
+ const resultPromise = showStandaloneSurface(
454
+ ctx,
455
+ {
456
+ conversationId: "conv-persist-1",
457
+ surfaceType: "confirmation",
458
+ data: { message: "Proceed?" },
459
+ timeoutMs: 60_000,
460
+ },
461
+ surfaceId,
462
+ );
463
+
464
+ await handleSurfaceAction(ctx, surfaceId, "confirm", {});
465
+ const result = await resultPromise;
466
+ expect(result.status).toBe("submitted");
467
+
468
+ expect(writes.length).toBeGreaterThanOrEqual(1);
469
+ const finalBlocks = writes[writes.length - 1].content as Array<
470
+ Record<string, unknown>
471
+ >;
472
+ const surfaceBlock = finalBlocks.find((b) => b.type === "ui_surface")!;
473
+ expect(surfaceBlock.completed).toBe(true);
474
+ expect(surfaceBlock.completionSummary).toBe("Confirmed");
475
+ });
476
+ });
@@ -264,6 +264,65 @@ describe("session-tool-setup app refresh side effects", () => {
264
264
  });
265
265
  });
266
266
 
267
+ test("canonicalizes create_app skill_execute alias before hooks run", async () => {
268
+ const ctx = makeCtx({ allowedToolNames: new Set(["app_create"]) });
269
+ const executor = makeFakeExecutor({
270
+ content: JSON.stringify({ id: "alias-app-1", name: "Alias App" }),
271
+ isError: false,
272
+ });
273
+
274
+ const toolFn = createToolExecutor(
275
+ executor as unknown as ToolExecutor,
276
+ noopPrompter,
277
+ noopSecretPrompter,
278
+ ctx,
279
+ noopLifecycleHandler,
280
+ );
281
+
282
+ await toolFn("skill_execute", {
283
+ tool: "create_app",
284
+ input: { name: "Alias App" },
285
+ activity: "Building app",
286
+ });
287
+
288
+ const calls = executor.execute.mock.calls as unknown[][];
289
+ expect(calls[0][0]).toBe("app_create");
290
+ expect(calls[0][1]).toEqual({ name: "Alias App" });
291
+ expect(broadcastSpy.mock.calls.length).toBeGreaterThanOrEqual(1);
292
+ expect((broadcastSpy.mock.calls as unknown[][])[0][0]).toEqual({
293
+ type: "app_files_changed",
294
+ appId: "alias-app-1",
295
+ });
296
+ });
297
+
298
+ test("preserves exact active create_app skill tool when app_create is also active", async () => {
299
+ const ctx = makeCtx({
300
+ allowedToolNames: new Set(["create_app", "app_create"]),
301
+ });
302
+ const executor = makeFakeExecutor({
303
+ content: JSON.stringify({ id: "custom-app-1", name: "Custom App" }),
304
+ isError: false,
305
+ });
306
+
307
+ const toolFn = createToolExecutor(
308
+ executor as unknown as ToolExecutor,
309
+ noopPrompter,
310
+ noopSecretPrompter,
311
+ ctx,
312
+ noopLifecycleHandler,
313
+ );
314
+
315
+ await toolFn("skill_execute", {
316
+ tool: "create_app",
317
+ input: { name: "Custom App" },
318
+ activity: "Running custom app tool",
319
+ });
320
+
321
+ const calls = executor.execute.mock.calls as unknown[][];
322
+ expect(calls[0][0]).toBe("create_app");
323
+ expect(broadcastSpy).not.toHaveBeenCalled();
324
+ });
325
+
267
326
  test("skips side effects when app_create result is an error", async () => {
268
327
  const ctx = makeCtx();
269
328
  const executor = makeFakeExecutor({ content: "Error", isError: true });
@@ -1,14 +1,8 @@
1
1
  import { beforeEach, describe, expect, mock, test } from "bun:test";
2
2
 
3
3
  import type { AgentEvent } from "../agent/loop.js";
4
- import { _setOverridesForTesting } from "../config/assistant-feature-flags.js";
5
4
  import type { Message, ProviderResponse } from "../providers/types.js";
6
5
 
7
- // This test exercises v1 conversation routing. The `memory-v2-enabled` flag
8
- // (registry default `true`) flips memory routing to v2 — disable it here so
9
- // the v1 paths under test stay active.
10
- _setOverridesForTesting({ "memory-v2-enabled": false });
11
-
12
6
  // ---------------------------------------------------------------------------
13
7
  // Track agent loop calls
14
8
  // ---------------------------------------------------------------------------
@@ -69,7 +63,7 @@ mock.module("../config/loader.js", () => ({
69
63
  pricingOverrides: [],
70
64
  },
71
65
  rateLimit: { maxRequestsPerMinute: 0 },
72
- memory: { enabled: false },
66
+ memory: { enabled: false, v2: { enabled: false } },
73
67
  daemon: {
74
68
  startupSocketWaitMs: 5000,
75
69
  stopTimeoutMs: 5000,
@@ -1,14 +1,8 @@
1
1
  import { beforeEach, describe, expect, mock, test } from "bun:test";
2
2
 
3
3
  import type { AgentEvent } from "../agent/loop.js";
4
- import { _setOverridesForTesting } from "../config/assistant-feature-flags.js";
5
4
  import type { Message, ProviderResponse } from "../providers/types.js";
6
5
 
7
- // This test exercises v1 conversation routing. The `memory-v2-enabled` flag
8
- // (registry default `true`) flips memory routing to v2 — disable it here so
9
- // the v1 paths under test stay active.
10
- _setOverridesForTesting({ "memory-v2-enabled": false });
11
-
12
6
  // ---------------------------------------------------------------------------
13
7
  // Configurable agent loop behavior
14
8
  // ---------------------------------------------------------------------------
@@ -67,7 +61,7 @@ mock.module("../config/loader.js", () => ({
67
61
  pricingOverrides: [],
68
62
  },
69
63
  rateLimit: { maxRequestsPerMinute: 0 },
70
- memory: { enabled: false },
64
+ memory: { enabled: false, v2: { enabled: false } },
71
65
  daemon: {
72
66
  startupSocketWaitMs: 5000,
73
67
  stopTimeoutMs: 5000,
@@ -34,9 +34,6 @@ mock.module("../config/loader.js", () => ({
34
34
  invalidateConfigCache: () => {},
35
35
  }));
36
36
 
37
- const { _setOverridesForTesting } =
38
- await import("../config/assistant-feature-flags.js");
39
-
40
37
  // Mock conversation store
41
38
  const createdConversations: Array<{ title: string; conversationType: string }> =
42
39
  [];
@@ -128,7 +125,6 @@ describe("FilingService", () => {
128
125
  } catch {
129
126
  // best-effort
130
127
  }
131
- _setOverridesForTesting({});
132
128
  });
133
129
 
134
130
  beforeEach(() => {
@@ -331,8 +327,7 @@ describe("FilingService", () => {
331
327
  });
332
328
 
333
329
  describe("memory v2 gate", () => {
334
- test("start() does not schedule timers when v2 flag and config are both on", () => {
335
- _setOverridesForTesting({ "memory-v2-enabled": true });
330
+ test("start() does not schedule timers when memory.v2.enabled is true", () => {
336
331
  mockConfig.memory.v2.enabled = true;
337
332
 
338
333
  const service = createService();
@@ -342,24 +337,12 @@ describe("FilingService", () => {
342
337
  expect(service.nextCompactionAt).toBeNull();
343
338
  });
344
339
 
345
- test("start() does not schedule timers when only the flag is on", () => {
346
- _setOverridesForTesting({ "memory-v2-enabled": true });
340
+ test("start() schedules timers when memory.v2.enabled is false (v1 filing runs)", () => {
347
341
  mockConfig.memory.v2.enabled = false;
348
342
 
349
343
  const service = createService();
350
344
  service.start();
351
345
 
352
- expect(service.nextRunAt).toBeNull();
353
- expect(service.nextCompactionAt).toBeNull();
354
- });
355
-
356
- test("start() schedules timers when only the config is on", () => {
357
- _setOverridesForTesting({ "memory-v2-enabled": false });
358
- mockConfig.memory.v2.enabled = true;
359
-
360
- const service = createService();
361
- service.start();
362
-
363
346
  expect(service.nextRunAt).not.toBeNull();
364
347
  expect(service.nextCompactionAt).not.toBeNull();
365
348
  service.stop();
@@ -5,17 +5,16 @@
5
5
  * One representative call site (the `installSkill` bundled branch) is
6
6
  * exercised — all 5 sites share the same delegation to
7
7
  * `maybeSeedMemoryV2Skills`, so a single suite covers behavior. Validates:
8
- * - flag + config both on → helper invoked after seedSkillGraphNodes
9
- * and the seed observed (callOrder picks up "v2")
10
- * - flag off → helper still invoked, but the seed short-circuits
11
- * - config.memory.v2.enabled off → helper still invoked, seed short-circuits
8
+ * - config on → helper invoked after seedSkillGraphNodes and the seed
9
+ * observed (callOrder picks up "v2")
10
+ * - config off → helper still invoked, but the seed short-circuits
12
11
  *
13
12
  * The handler delegates to `maybeSeedMemoryV2Skills` from
14
13
  * `daemon/memory-v2-startup.ts`. We mock that module directly so the test
15
14
  * does not have to drain the dynamic-import microtask chain. The helper's
16
- * gate semantics (flag + config + rejection swallowing) are covered by
17
- * `lifecycle-memory-v2-seed.test.ts`; here we only verify that the
18
- * handler invokes the helper synchronously with the live config.
15
+ * gate semantics are covered by `lifecycle-memory-v2-seed.test.ts`; here
16
+ * we only verify that the handler invokes the helper synchronously with
17
+ * the live config.
19
18
  */
20
19
  import { beforeEach, describe, expect, mock, test } from "bun:test";
21
20
 
@@ -23,7 +22,7 @@ import { beforeEach, describe, expect, mock, test } from "bun:test";
23
22
  // Programmable test state
24
23
  // ---------------------------------------------------------------------------
25
24
 
26
- const flagsState = { flagEnabled: true, configV2Enabled: true };
25
+ const flagsState = { configV2Enabled: true };
27
26
 
28
27
  const callOrder: string[] = [];
29
28
 
@@ -55,10 +54,7 @@ mock.module("../config/skills.js", () => ({
55
54
  }));
56
55
 
57
56
  mock.module("../config/assistant-feature-flags.js", () => ({
58
- isAssistantFeatureFlagEnabled: (key: string) => {
59
- if (key === "memory-v2-enabled") return flagsState.flagEnabled;
60
- return true;
61
- },
57
+ isAssistantFeatureFlagEnabled: () => true,
62
58
  }));
63
59
 
64
60
  // Stub both `getConfig` and `loadConfig`. `loadConfig` is reached by code
@@ -220,18 +216,17 @@ const { installSkill } = await import("../daemon/handlers/skills.js");
220
216
 
221
217
  describe("v2 skill re-seed gating in skill handlers", () => {
222
218
  beforeEach(() => {
223
- flagsState.flagEnabled = true;
224
219
  flagsState.configV2Enabled = true;
225
220
  callOrder.length = 0;
226
221
  mockSeedSkillGraphNodes.mockClear();
227
222
  mockMaybeSeedMemoryV2Skills.mockClear();
228
223
  mockMaybeSeedMemoryV2Skills.mockImplementation((config) => {
229
- if (!flagsState.flagEnabled || !config.memory.v2.enabled) return;
224
+ if (!config.memory.v2.enabled) return;
230
225
  callOrder.push("v2");
231
226
  });
232
227
  });
233
228
 
234
- test("flag + config both on → maybeSeedMemoryV2Skills invoked after seedSkillGraphNodes", async () => {
229
+ test("config on → maybeSeedMemoryV2Skills invoked after seedSkillGraphNodes", async () => {
235
230
  const result = await installSkill({ slug: "bundled-skill" });
236
231
 
237
232
  expect(result.success).toBe(true);
@@ -240,17 +235,6 @@ describe("v2 skill re-seed gating in skill handlers", () => {
240
235
  expect(callOrder).toEqual(["v1", "v2"]);
241
236
  });
242
237
 
243
- test("flag off → seed mock observes the disabled flag and skips", async () => {
244
- flagsState.flagEnabled = false;
245
-
246
- const result = await installSkill({ slug: "bundled-skill" });
247
-
248
- expect(result.success).toBe(true);
249
- expect(mockSeedSkillGraphNodes).toHaveBeenCalledTimes(1);
250
- expect(mockMaybeSeedMemoryV2Skills).toHaveBeenCalledTimes(1);
251
- expect(callOrder).toEqual(["v1"]);
252
- });
253
-
254
238
  test("config.memory.v2.enabled off → seed mock observes config and skips", async () => {
255
239
  flagsState.configV2Enabled = false;
256
240