@vellumai/assistant 0.7.3 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/ARCHITECTURE.md +29 -28
  2. package/Dockerfile +1 -0
  3. package/__tests__/permissions/gateway-threshold-reader.test.ts +236 -9
  4. package/bun.lock +3 -0
  5. package/knip.json +1 -0
  6. package/node_modules/@vellumai/ipc-server-utils/bun.lock +24 -0
  7. package/node_modules/@vellumai/ipc-server-utils/package.json +18 -0
  8. package/node_modules/@vellumai/ipc-server-utils/src/index.ts +6 -0
  9. package/node_modules/@vellumai/ipc-server-utils/src/socket-watchdog.test.ts +430 -0
  10. package/node_modules/@vellumai/ipc-server-utils/src/socket-watchdog.ts +221 -0
  11. package/node_modules/@vellumai/ipc-server-utils/tsconfig.json +20 -0
  12. package/openapi.yaml +22 -4
  13. package/package.json +3 -1
  14. package/src/__tests__/annotate-risk-options.test.ts +291 -0
  15. package/src/__tests__/approval-cascade.test.ts +8 -16
  16. package/src/__tests__/approval-routes-http.test.ts +6 -0
  17. package/src/__tests__/auto-analysis-end-to-end.test.ts +12 -25
  18. package/src/__tests__/call-constants.test.ts +10 -1
  19. package/src/__tests__/call-controller.test.ts +127 -0
  20. package/src/__tests__/cli-memory-v2-reembed-skills.test.ts +58 -28
  21. package/src/__tests__/config-loader-platform-defaults.test.ts +284 -1
  22. package/src/__tests__/context-search-memory-source.test.ts +3 -26
  23. package/src/__tests__/context-search-pkb-source.test.ts +12 -6
  24. package/src/__tests__/conversation-abort-tool-results.test.ts +1 -6
  25. package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +1 -1
  26. package/src/__tests__/conversation-agent-loop-overflow.test.ts +1 -1
  27. package/src/__tests__/conversation-agent-loop.test.ts +3 -3
  28. package/src/__tests__/conversation-confirmation-signals.test.ts +5 -13
  29. package/src/__tests__/conversation-init.benchmark.test.ts +1 -1
  30. package/src/__tests__/conversation-process-callsite.test.ts +1 -6
  31. package/src/__tests__/conversation-provider-retry-repair.test.ts +1 -6
  32. package/src/__tests__/conversation-runtime-assembly.test.ts +15 -6
  33. package/src/__tests__/conversation-slash-unknown.test.ts +1 -6
  34. package/src/__tests__/conversation-surfaces-action-delivery.test.ts +170 -9
  35. package/src/__tests__/conversation-surfaces-data-persist.test.ts +73 -1
  36. package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +59 -0
  37. package/src/__tests__/conversation-workspace-injection.test.ts +1 -7
  38. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +1 -7
  39. package/src/__tests__/filing-service.test.ts +2 -19
  40. package/src/__tests__/handlers-skills-memory-v2-reseed.test.ts +10 -26
  41. package/src/__tests__/injector-chain.test.ts +24 -16
  42. package/src/__tests__/injector-pkb-v2-silenced.test.ts +10 -7
  43. package/src/__tests__/lifecycle-memory-v2-seed.test.ts +154 -67
  44. package/src/__tests__/notification-decision-fallback.test.ts +91 -0
  45. package/src/__tests__/notification-decision-strategy.test.ts +22 -0
  46. package/src/__tests__/oauth-cli.test.ts +121 -0
  47. package/src/__tests__/relay-server.test.ts +46 -2
  48. package/src/__tests__/secret-prompt-log-hygiene.test.ts +7 -5
  49. package/src/__tests__/secret-prompter-channel-fallback.test.ts +7 -5
  50. package/src/__tests__/secret-response-routing.test.ts +7 -5
  51. package/src/__tests__/server-history-render.test.ts +82 -0
  52. package/src/__tests__/skill-include-graph.test.ts +31 -0
  53. package/src/__tests__/skill-load-tool.test.ts +44 -16
  54. package/src/__tests__/skills.test.ts +39 -0
  55. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -42
  56. package/src/__tests__/tool-executor.test.ts +155 -0
  57. package/src/__tests__/voice-session-bridge.test.ts +3 -0
  58. package/src/__tests__/workspace-migration-069-seed-onboarding-threads.test.ts +120 -0
  59. package/src/__tests__/workspace-migration-071-remove-safe-storage-release-note.test.ts +206 -0
  60. package/src/__tests__/workspace-migration-safe-storage-limits-release.test.ts +15 -27
  61. package/src/agent/loop.ts +11 -0
  62. package/src/approvals/guardian-decision-primitive.ts +0 -13
  63. package/src/approvals/guardian-request-resolvers.ts +4 -32
  64. package/src/calls/call-constants.ts +5 -8
  65. package/src/calls/call-controller.ts +130 -67
  66. package/src/calls/relay-server.ts +7 -1
  67. package/src/calls/voice-session-bridge.ts +1 -1
  68. package/src/cli/commands/memory-v2.ts +7 -7
  69. package/src/cli/commands/oauth/__tests__/connect.test.ts +0 -254
  70. package/src/cli/commands/oauth/connect.ts +10 -52
  71. package/src/config/bundled-skills/app-builder/SKILL.md +1 -3
  72. package/src/config/feature-flag-registry.json +1 -17
  73. package/src/config/loader.ts +72 -19
  74. package/src/config/schemas/memory-v2.ts +1 -1
  75. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +32 -0
  76. package/src/daemon/conversation-agent-loop-handlers.ts +32 -0
  77. package/src/daemon/conversation-agent-loop.ts +13 -10
  78. package/src/daemon/conversation-lifecycle.ts +22 -8
  79. package/src/daemon/conversation-surfaces.ts +16 -14
  80. package/src/daemon/conversation-tool-setup.ts +9 -5
  81. package/src/daemon/conversation.ts +1 -1
  82. package/src/daemon/handlers/shared.ts +26 -0
  83. package/src/daemon/host-bash-proxy.ts +1 -1
  84. package/src/daemon/host-browser-proxy.ts +1 -1
  85. package/src/daemon/host-cu-proxy.ts +1 -1
  86. package/src/daemon/host-file-proxy.ts +1 -1
  87. package/src/daemon/host-transfer-proxy.ts +2 -2
  88. package/src/daemon/lifecycle.ts +88 -73
  89. package/src/daemon/memory-v2-startup.ts +55 -14
  90. package/src/daemon/message-types/messages.ts +19 -1
  91. package/src/documents/document-store.ts +35 -1
  92. package/src/filing/filing-service.ts +2 -3
  93. package/src/heartbeat/heartbeat-service.ts +1 -1
  94. package/src/ipc/assistant-server.ts +93 -36
  95. package/src/ipc/skill-server.ts +99 -42
  96. package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +10 -57
  97. package/src/memory/context-search/sources/memory-v2.ts +1 -17
  98. package/src/memory/context-search/sources/memory.ts +2 -2
  99. package/src/memory/context-search/sources/pkb.ts +2 -3
  100. package/src/memory/graph/__tests__/conversation-graph-memory-v2-routing.test.ts +104 -61
  101. package/src/memory/graph/__tests__/handle-remember-v2.test.ts +11 -26
  102. package/src/memory/graph/conversation-graph-memory.ts +32 -9
  103. package/src/memory/graph/graph-search.test.ts +6 -5
  104. package/src/memory/graph/graph-search.ts +3 -4
  105. package/src/memory/graph/retriever.test.ts +12 -7
  106. package/src/memory/graph/retriever.ts +4 -5
  107. package/src/memory/graph/tool-handlers.ts +3 -4
  108. package/src/memory/graph/tools.ts +4 -4
  109. package/src/memory/indexer.ts +1 -2
  110. package/src/memory/jobs/__tests__/embed-concept-page.test.ts +116 -0
  111. package/src/memory/jobs/embed-concept-page.ts +223 -87
  112. package/src/memory/jobs-worker.ts +8 -4
  113. package/src/memory/pkb/pkb-search.test.ts +6 -5
  114. package/src/memory/pkb/pkb-search.ts +4 -5
  115. package/src/memory/qdrant-client.ts +3 -0
  116. package/src/memory/search/semantic.ts +4 -5
  117. package/src/memory/v2/__tests__/activation.test.ts +35 -5
  118. package/src/memory/v2/__tests__/consolidation-job.test.ts +21 -32
  119. package/src/memory/v2/__tests__/injection.test.ts +140 -23
  120. package/src/memory/v2/__tests__/qdrant.test.ts +310 -9
  121. package/src/memory/v2/__tests__/sim.test.ts +118 -7
  122. package/src/memory/v2/__tests__/static-context.test.ts +1 -13
  123. package/src/memory/v2/__tests__/sweep-job.test.ts +19 -33
  124. package/src/memory/v2/consolidation-job.ts +7 -8
  125. package/src/memory/v2/injection.ts +32 -12
  126. package/src/memory/v2/page-store.ts +39 -0
  127. package/src/memory/v2/prompts/consolidation.ts +5 -0
  128. package/src/memory/v2/qdrant.ts +209 -48
  129. package/src/memory/v2/sim.ts +67 -26
  130. package/src/memory/v2/static-context.ts +4 -8
  131. package/src/memory/v2/sweep-job.ts +5 -6
  132. package/src/memory/v2/types.ts +7 -0
  133. package/src/notifications/copy-composer.ts +46 -12
  134. package/src/notifications/decision-engine.ts +46 -0
  135. package/src/permissions/gateway-threshold-reader.ts +116 -8
  136. package/src/permissions/prompter.ts +86 -96
  137. package/src/permissions/secret-prompter.ts +31 -31
  138. package/src/plugins/defaults/injectors.ts +1 -2
  139. package/src/proactive-artifact/job.test.ts +51 -4
  140. package/src/proactive-artifact/job.ts +16 -2
  141. package/src/proactive-artifact/message-copy.ts +18 -1
  142. package/src/prompts/templates/SOUL.md +13 -28
  143. package/src/runtime/auth/route-policy.ts +1 -0
  144. package/src/runtime/channel-approvals.ts +3 -2
  145. package/src/runtime/guardian-reply-router.ts +0 -10
  146. package/src/runtime/pending-interactions.ts +19 -15
  147. package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +147 -0
  148. package/src/runtime/routes/approval-routes.ts +7 -3
  149. package/src/runtime/routes/consolidation-routes.ts +8 -9
  150. package/src/runtime/routes/conversation-query-routes.ts +44 -1
  151. package/src/runtime/routes/debug-bash-routes.ts +2 -0
  152. package/src/runtime/routes/filing-routes.ts +2 -3
  153. package/src/runtime/routes/inbound-stages/guardian-reply-intercept.ts +0 -3
  154. package/src/runtime/routes/memory-item-routes.test.ts +3 -9
  155. package/src/runtime/routes/memory-item-routes.ts +5 -6
  156. package/src/runtime/routes/memory-v2-routes.ts +103 -17
  157. package/src/skills/include-graph.ts +35 -13
  158. package/src/tools/document/document-tool.ts +20 -0
  159. package/src/tools/executor.ts +18 -2
  160. package/src/tools/memory/register.test.ts +7 -5
  161. package/src/tools/permission-checker.ts +15 -0
  162. package/src/tools/skills/load.ts +24 -20
  163. package/src/tools/tool-name-aliases.ts +19 -0
  164. package/src/tools/types.ts +19 -1
  165. package/src/workspace/migrations/067-release-notes-safe-storage-limits.ts +4 -62
  166. package/src/workspace/migrations/069-seed-onboarding-threads.ts +28 -0
  167. package/src/workspace/migrations/070-memory-v2-summary-schema-rebuild.ts +31 -0
  168. package/src/workspace/migrations/071-remove-safe-storage-release-note.ts +111 -0
  169. package/src/workspace/migrations/registry.ts +6 -0
@@ -6,8 +6,6 @@ metadata:
6
6
  emoji: "🏗️"
7
7
  vellum:
8
8
  display-name: "App Builder"
9
- includes:
10
- - "frontend-design"
11
9
  activation-hints:
12
10
  - "User asks to build an app, landing page, website, dashboard, tool, calculator, game, tracker, or interactive page"
13
11
  - "User asks to visualize data or says 'let's visualize this' — use the app sandbox to build interactive visualizations"
@@ -20,7 +18,7 @@ You are an expert app builder and visual designer. When the user asks you to cre
20
18
 
21
19
  **Your default behavior:** Build immediately. The user types "build me a habit tracker" and you deliver a complete, polished app with a domain-matched color palette, atmospheric background, and thoughtful interactions. Don't ask what colors they want. Don't show wireframes. Just build something stunning and let them refine from there.
22
20
 
23
- **Design quality is delegated to the `frontend-design` skill.** That skill defines your aesthetic principles: typography, color strategy, motion, spatial composition, and visual detail. Follow it completely for every build. This skill (app-builder) handles the technical infrastructure: sandbox constraints, data bridge, widget API, app lifecycle, and interaction patterns.
21
+ **Design quality is delegated to the `frontend-design` skill, so you must also load/install that before proceeding.** That skill defines your aesthetic principles: typography, color strategy, motion, spatial composition, and visual detail. Follow it completely for every build. This skill (app-builder) handles the technical infrastructure: sandbox constraints, data bridge, widget API, app lifecycle, and interaction patterns.
24
22
 
25
23
  ## Filesystem Layout
26
24
 
@@ -249,14 +249,6 @@
249
249
  "description": "Enable disk pressure protection flows that block background work and remote actors while storage is critically low.",
250
250
  "defaultEnabled": false
251
251
  },
252
- {
253
- "id": "memory-v2-enabled",
254
- "scope": "assistant",
255
- "key": "memory-v2-enabled",
256
- "label": "Memory v2 (concept-page activation model)",
257
- "description": "Enables the v2 memory subsystem: prose concept pages with bidirectional edges, activation-based retrieval, and hourly LLM-driven consolidation. When on, v1 graph extraction/maintenance and PKB filing are suppressed; flipping the flag back off re-engages the full v1 pipeline.",
258
- "defaultEnabled": true
259
- },
260
252
  {
261
253
  "id": "account-deletion",
262
254
  "scope": "client",
@@ -286,15 +278,7 @@
286
278
  "scope": "assistant",
287
279
  "key": "pro-plan-adjust",
288
280
  "label": "Pro Plan Adjust",
289
- "description": "Show the rich Plan card (current plan, features, Manage/Upgrade CTA) at the top of the macOS Settings → Billing tab. The 'Configure Auto Top Ups' CTA is gated separately on `auto-credit-topup`.",
290
- "defaultEnabled": false
291
- },
292
- {
293
- "id": "auto-credit-topup",
294
- "scope": "assistant",
295
- "key": "auto-credit-topup",
296
- "label": "Auto Credit Top-Up",
297
- "description": "Show the 'Configure Auto Top Ups' CTA in the macOS Settings → Billing tab. Mirrors the platform web flag of the same name that gates the auto-reload card and /v1/organizations/billing/auto-top-up/ API.",
281
+ "description": "Show the rich Plan card (current plan, features, Manage/Upgrade CTA) at the top of the macOS Settings → Billing tab.",
298
282
  "defaultEnabled": false
299
283
  }
300
284
  ]
@@ -108,14 +108,16 @@ function cloneDefaultConfig(): AssistantConfig {
108
108
 
109
109
  /**
110
110
  * Returns deployment-context-aware config defaults that override schema
111
- * defaults for platform-managed assistants. Only applied when initializing
112
- * a fresh config (config.json does not yet exist).
111
+ * defaults for platform-managed assistants. Applied to every `loadConfig()`
112
+ * call as a fill-only pass they only fill keys that are absent from the
113
+ * raw config on disk, so an explicit user choice (e.g. saving "your-own"
114
+ * via the macOS Models & Services UI) always wins.
113
115
  *
114
116
  * IS_PLATFORM is set by the Vellum platform launcher for all hosted
115
117
  * assistant deployments. Local, Docker, and bare-metal assistants are
116
118
  * unaffected.
117
119
  */
118
- function getDeploymentContextDefaults(): Record<string, unknown> {
120
+ export function getDeploymentContextDefaults(): Record<string, unknown> {
119
121
  if (process.env.IS_PLATFORM !== "true" && process.env.IS_PLATFORM !== "1") {
120
122
  return {};
121
123
  }
@@ -138,6 +140,49 @@ function getDeploymentContextDefaults(): Record<string, unknown> {
138
140
  };
139
141
  }
140
142
 
143
+ /**
144
+ * Apply `contextDefaults` to `target` for any leaf keys that are absent from
145
+ * `fileConfig` (the raw config-on-disk payload). Mutates `target` in place.
146
+ *
147
+ * "Absent" is checked at the leaf level by walking the `contextDefaults`
148
+ * shape: nested objects recurse so a partial override on disk (e.g.
149
+ * `{services: {inference: {model: "x"}}}` with no explicit `mode`) lets the
150
+ * context default for `mode` win while leaving the user's `model` untouched.
151
+ *
152
+ * Pre-condition: `target` has already been passed through `validateWithSchema`
153
+ * so every nested object in `contextDefaults` has a corresponding object in
154
+ * `target`. The defensive whole-subtree assignment in the `!targetChild`
155
+ * branch only fires for malformed inputs.
156
+ */
157
+ export function fillContextDefaultsForMissingKeys(
158
+ target: Record<string, unknown>,
159
+ fileConfig: Record<string, unknown>,
160
+ contextDefaults: Record<string, unknown>,
161
+ ): void {
162
+ for (const [key, value] of Object.entries(contextDefaults)) {
163
+ const fileVal = fileConfig[key];
164
+ if (
165
+ value !== null &&
166
+ typeof value === "object" &&
167
+ !Array.isArray(value)
168
+ ) {
169
+ const targetChild = readPlainObject(target[key]);
170
+ const fileChild = readPlainObject(fileVal);
171
+ if (targetChild) {
172
+ fillContextDefaultsForMissingKeys(
173
+ targetChild,
174
+ fileChild ?? {},
175
+ value as Record<string, unknown>,
176
+ );
177
+ } else {
178
+ target[key] = structuredClone(value);
179
+ }
180
+ } else if (fileVal === undefined) {
181
+ target[key] = value;
182
+ }
183
+ }
184
+ }
185
+
141
186
  /**
142
187
  * Build a filesystem-safe ISO-8601 timestamp for use in quarantine filenames.
143
188
  * Replaces `:` (invalid on Windows, confusing on macOS Finder) with `-` so the
@@ -665,11 +710,31 @@ export function loadConfig(): AssistantConfig {
665
710
  }
666
711
  }
667
712
 
713
+ // Layer deployment-context defaults (e.g. IS_PLATFORM=true → all service
714
+ // modes = "managed") onto the in-memory config for any leaves that aren't
715
+ // explicitly set in `fileConfig`. This runs on every load — not just the
716
+ // first — because the workspace config file is written by upstream
717
+ // lifecycle steps (`mergeDefaultWorkspaceConfig`, `seedInferenceProfiles`)
718
+ // before `loadConfig()` is reached. Gating on `!configFileExisted` would
719
+ // make the context defaults dead code on platform-managed daemons whose
720
+ // config.json was created by those earlier steps without service-mode
721
+ // entries. Explicit user choices on disk are preserved because the helper
722
+ // only fills missing keys.
723
+ const contextDefaults = getDeploymentContextDefaults();
724
+ if (Object.keys(contextDefaults).length > 0) {
725
+ fillContextDefaultsForMissingKeys(
726
+ config as unknown as Record<string, unknown>,
727
+ fileConfig,
728
+ contextDefaults,
729
+ );
730
+ }
731
+
668
732
  // First-launch seed only: when config.json does not exist, write the full
669
- // schema defaults to disk so users can discover and edit all available
670
- // options. When the file already exists, leave it alone disk represents
671
- // user intent, while the in-memory `cached: AssistantConfig` (above) has
672
- // all schema defaults applied via `applyNestedDefaults`/`validateWithSchema`,
733
+ // schema defaults (with any deployment-context overrides already applied
734
+ // above) to disk so users can discover and edit all available options.
735
+ // When the file already exists, leave it alone disk represents user
736
+ // intent, while the in-memory `cached: AssistantConfig` (above) has all
737
+ // schema defaults applied via `applyNestedDefaults`/`validateWithSchema`,
673
738
  // so consumers calling `getConfig().memory.v2.bm25_b` continue to receive
674
739
  // the schema default whenever the field is absent on disk.
675
740
  //
@@ -687,18 +752,6 @@ export function loadConfig(): AssistantConfig {
687
752
  }
688
753
  // Strip dataDir (runtime-derived) from the persisted config
689
754
  const { dataDir: _, ...persistable } = config;
690
-
691
- // Layer deployment context defaults on top of schema defaults.
692
- // These are overrides the daemon derives from its environment (e.g.
693
- // IS_PLATFORM → all service modes = "managed"). Schema defaults
694
- // remain the fallback for non-platform deployments.
695
- const contextDefaults = getDeploymentContextDefaults();
696
- if (Object.keys(contextDefaults).length > 0) {
697
- deepMergeOverwrite(
698
- persistable as Record<string, unknown>,
699
- contextDefaults,
700
- );
701
- }
702
755
  writeFileSync(configPath, JSON.stringify(persistable, null, 2) + "\n");
703
756
  log.info("Wrote default config to %s", configPath);
704
757
  } catch (err) {
@@ -50,7 +50,7 @@ export const MemoryV2ConfigSchema = z
50
50
  .boolean({ error: "memory.v2.enabled must be a boolean" })
51
51
  .default(true)
52
52
  .describe(
53
- "Whether the v2 memory subsystem (concept-page activation model) is enabled. Independent of the memory-v2-enabled feature flag — both must be true for v2 to run.",
53
+ "Whether the v2 memory subsystem (concept-page activation model) is enabled.",
54
54
  ),
55
55
  sweep_enabled: z
56
56
  .boolean({ error: "memory.v2.sweep_enabled must be a boolean" })
@@ -56,6 +56,16 @@ let autoAnalyzeEnabled = true;
56
56
  // `disposeConversation` must skip the `graph_extract` enqueue.
57
57
  const autoAnalysisConversations = new Set<string>();
58
58
 
59
+ // Toggles the `memory.v2.enabled` flag the disposal code reads via
60
+ // `getConfig()`. Defaults to false so the bulk of the suite — which asserts
61
+ // v1 graph_extract still fires — keeps its semantics. The dedicated v2 cases
62
+ // flip this to true.
63
+ let v2Enabled = false;
64
+
65
+ mock.module("../../config/loader.js", () => ({
66
+ getConfig: () => ({ memory: { v2: { enabled: v2Enabled } } }),
67
+ }));
68
+
59
69
  mock.module("../../memory/auto-analysis-guard.js", () => ({
60
70
  AUTO_ANALYSIS_SOURCE: "auto-analysis",
61
71
  isAutoAnalysisConversation: (conversationId: string) =>
@@ -160,6 +170,7 @@ describe("disposeConversation — auto-analysis enqueue", () => {
160
170
  autoAnalyzeCalls.length = 0;
161
171
  autoAnalyzeEnabled = true;
162
172
  autoAnalysisConversations.clear();
173
+ v2Enabled = false;
163
174
  });
164
175
 
165
176
  test("guardian conversation with auto-analyze ON — enqueues both graph_extract and conversation_analyze (via helper)", () => {
@@ -313,4 +324,25 @@ describe("disposeConversation — auto-analysis enqueue", () => {
313
324
  }));
314
325
  autoAnalyzeEnabled = originalEnabled;
315
326
  });
327
+
328
+ test("memory v2 enabled — graph_extract enqueue is suppressed (auto-analysis still runs)", () => {
329
+ // Under memory v2, the v1 graph has no readers (retrieval is bypassed at
330
+ // conversation-graph-memory.ts), so producing extraction jobs just fills
331
+ // the queue with stale work. Auto-analysis is orthogonal and must keep
332
+ // running.
333
+ v2Enabled = true;
334
+ const ctx = makeDisposeContext({
335
+ conversationId: "conv-v2-on",
336
+ trustClass: "guardian",
337
+ });
338
+
339
+ disposeConversation(ctx);
340
+
341
+ expect(memoryJobCalls).toHaveLength(0);
342
+ expect(autoAnalyzeCalls).toHaveLength(1);
343
+ expect(autoAnalyzeCalls[0]).toEqual({
344
+ conversationId: "conv-v2-on",
345
+ trigger: "lifecycle",
346
+ });
347
+ });
316
348
  });
@@ -170,6 +170,16 @@ export interface EventHandlerState {
170
170
  approvalMode?: string;
171
171
  approvalReason?: string;
172
172
  riskThreshold?: string;
173
+ /** Display-only regex ladder for the rule editor (narrowest → broadest). */
174
+ riskScopeOptions?: Array<{ pattern: string; label: string }>;
175
+ /** Minimatch save patterns for the rule editor (narrowest → broadest). */
176
+ riskAllowlistOptions?: Array<{
177
+ label: string;
178
+ description: string;
179
+ pattern: string;
180
+ }>;
181
+ /** Directory scope ladder for the rule editor. */
182
+ riskDirectoryScopeOptions?: Array<{ scope: string; label: string }>;
173
183
  }
174
184
  >;
175
185
  /** tool_use_ids emitted in the current turn (populated in handleToolUse, cleared after annotation). */
@@ -554,6 +564,14 @@ export function handleToolResult(
554
564
  approvalMode: event.approvalMode,
555
565
  approvalReason: event.approvalReason,
556
566
  riskThreshold: event.riskThreshold,
567
+ // Capture the 3 risk-option arrays so the persisted tool_use block
568
+ // carries the same chip ladder as the live tool_result event. Without
569
+ // these, hydrated chips from chat history fall back to the synthesized
570
+ // `*` allowlist and an empty scope ladder (see the comment on
571
+ // `synthesizeFallbackOption` in web's RuleEditorModal).
572
+ riskScopeOptions: event.riskScopeOptions,
573
+ riskAllowlistOptions: event.riskAllowlistOptions,
574
+ riskDirectoryScopeOptions: event.riskDirectoryScopeOptions,
557
575
  });
558
576
  }
559
577
 
@@ -633,6 +651,7 @@ export function handleToolResult(
633
651
  matchedTrustRuleId: event.matchedTrustRuleId,
634
652
  isContainerized: event.isContainerized,
635
653
  riskScopeOptions: event.riskScopeOptions,
654
+ riskAllowlistOptions: event.riskAllowlistOptions,
636
655
  riskDirectoryScopeOptions: event.riskDirectoryScopeOptions,
637
656
  approvalMode: event.approvalMode,
638
657
  approvalReason: event.approvalReason,
@@ -694,6 +713,19 @@ function annotatePersistedAssistantMessage(
694
713
  if (risk.approvalMode) rec._approvalMode = risk.approvalMode;
695
714
  if (risk.approvalReason) rec._approvalReason = risk.approvalReason;
696
715
  if (risk.riskThreshold) rec._riskThreshold = risk.riskThreshold;
716
+ // Persist the 3 risk-option arrays so the rule editor's chip ladder
717
+ // survives chat-history reload. These mirror the same-named fields
718
+ // on the live `tool_result` event; clients should read them back via
719
+ // `shared.ts` and treat them identically to the live values.
720
+ if (risk.riskScopeOptions && risk.riskScopeOptions.length > 0)
721
+ rec._riskScopeOptions = risk.riskScopeOptions;
722
+ if (risk.riskAllowlistOptions && risk.riskAllowlistOptions.length > 0)
723
+ rec._riskAllowlistOptions = risk.riskAllowlistOptions;
724
+ if (
725
+ risk.riskDirectoryScopeOptions &&
726
+ risk.riskDirectoryScopeOptions.length > 0
727
+ )
728
+ rec._riskDirectoryScopeOptions = risk.riskDirectoryScopeOptions;
697
729
  modified = true;
698
730
  }
699
731
  }
@@ -986,7 +986,7 @@ export async function runAgentLoopImpl(
986
986
  compactableStartIndex: 1,
987
987
  };
988
988
  };
989
- const applySuccessfulCompaction = (
989
+ const applySuccessfulCompaction = async (
990
990
  result: Awaited<ReturnType<typeof ctx.contextWindowManager.maybeCompact>>,
991
991
  compactedBasis?: Message[],
992
992
  ) => {
@@ -1000,7 +1000,7 @@ export async function runAgentLoopImpl(
1000
1000
  provenanceContext,
1001
1001
  result.compactedMessages,
1002
1002
  );
1003
- applyCompactionResult(ctx, result, onEvent, reqId, {
1003
+ await applyCompactionResult(ctx, result, onEvent, reqId, {
1004
1004
  slackContextCompactionWatermarkTs: slackWatermarkTs,
1005
1005
  });
1006
1006
  currentSlackContextSummary = result.summaryText;
@@ -1087,7 +1087,10 @@ export async function runAgentLoopImpl(
1087
1087
  await trackCompactionOutcome(ctx, compacted.summaryFailed, onEvent);
1088
1088
  }
1089
1089
  if (compacted?.compacted) {
1090
- applySuccessfulCompaction(compacted, messagesForStartOfTurnCompaction);
1090
+ await applySuccessfulCompaction(
1091
+ compacted,
1092
+ messagesForStartOfTurnCompaction,
1093
+ );
1091
1094
  shouldInjectWorkspace = true;
1092
1095
  if (compacted.compactedPersistedMessages > 0) {
1093
1096
  compactedThisTurn = true;
@@ -1790,7 +1793,7 @@ export async function runAgentLoopImpl(
1790
1793
  await trackCompactionOutcome(ctx, result.summaryFailed, onEvent);
1791
1794
  }
1792
1795
  if (result.compacted) {
1793
- applySuccessfulCompaction(result, compactedBasis);
1796
+ await applySuccessfulCompaction(result, compactedBasis);
1794
1797
  shouldInjectWorkspace = true;
1795
1798
  }
1796
1799
  },
@@ -2119,7 +2122,7 @@ export async function runAgentLoopImpl(
2119
2122
  );
2120
2123
  }
2121
2124
  if (midLoopCompact.compacted) {
2122
- applySuccessfulCompaction(midLoopCompact, rawHistory);
2125
+ await applySuccessfulCompaction(midLoopCompact, rawHistory);
2123
2126
  reducerCompacted = true;
2124
2127
  shouldInjectWorkspace = true;
2125
2128
  }
@@ -2371,7 +2374,7 @@ export async function runAgentLoopImpl(
2371
2374
  }
2372
2375
 
2373
2376
  if (step.compactionResult?.compacted) {
2374
- applySuccessfulCompaction(
2377
+ await applySuccessfulCompaction(
2375
2378
  step.compactionResult,
2376
2379
  convergenceCompactionBasis,
2377
2380
  );
@@ -2537,7 +2540,7 @@ export async function runAgentLoopImpl(
2537
2540
  );
2538
2541
  }
2539
2542
  if (emergencyCompact?.compacted) {
2540
- applySuccessfulCompaction(emergencyCompact, ctx.messages);
2543
+ await applySuccessfulCompaction(emergencyCompact, ctx.messages);
2541
2544
  reducerCompacted = true;
2542
2545
  shouldInjectWorkspace = true;
2543
2546
  }
@@ -3223,7 +3226,7 @@ export interface CompactionApplyContext {
3223
3226
  * truth for the UI indicator after compaction. Emitting both caused a
3224
3227
  * redundant SwiftUI invalidation on every compaction.
3225
3228
  */
3226
- export function applyCompactionResult(
3229
+ export async function applyCompactionResult(
3227
3230
  ctx: CompactionApplyContext,
3228
3231
  result: {
3229
3232
  messages: Message[];
@@ -3249,12 +3252,12 @@ export function applyCompactionResult(
3249
3252
  options: {
3250
3253
  slackContextCompactionWatermarkTs?: string | null;
3251
3254
  } = {},
3252
- ): void {
3255
+ ): Promise<void> {
3253
3256
  ctx.messages = result.messages;
3254
3257
  ctx.contextCompactedMessageCount += result.compactedPersistedMessages;
3255
3258
  const compactedAt = Date.now();
3256
3259
  ctx.contextCompactedAt = compactedAt;
3257
- ctx.graphMemory.onCompacted(result.compactedPersistedMessages);
3260
+ await ctx.graphMemory.onCompacted(result.compactedPersistedMessages);
3258
3261
  updateConversationContextWindow(
3259
3262
  ctx.conversationId,
3260
3263
  result.summaryText,
@@ -4,6 +4,7 @@
4
4
  * can delegate without exposing its full surface.
5
5
  */
6
6
 
7
+ import { getConfig } from "../config/loader.js";
7
8
  import { createContextSummaryMessage } from "../context/window-manager.js";
8
9
  import type { EventBus } from "../events/bus.js";
9
10
  import type { AssistantDomainEvents } from "../events/domain-events.js";
@@ -373,16 +374,29 @@ export function disposeConversation(ctx: DisposeContext): void {
373
374
  // Best-effort — don't block conversation disposal
374
375
  }
375
376
  if (!isAutoAnalysis) {
377
+ // Suppress v1 graph extraction when memory v2 is active — v2 reads
378
+ // from buffer.md and concept pages, so the v1 graph would be stale
379
+ // data nobody consumes. Mirrors the gate applied in `indexer.ts`
380
+ // for the per-message indexing path. Fail open to v1 if config
381
+ // can't load, since the worker handler also short-circuits.
382
+ let v2Enabled = false;
376
383
  try {
377
- enqueueMemoryJob("graph_extract", {
378
- conversationId: ctx.conversationId,
379
- scopeId: "default",
380
- ...(ctx.activeContextNodeIds?.length
381
- ? { activeContextNodeIds: ctx.activeContextNodeIds }
382
- : {}),
383
- });
384
+ v2Enabled = getConfig().memory.v2.enabled;
384
385
  } catch {
385
- // Best-effort — don't block conversation disposal
386
+ // Best-effort — fall through to legacy v1 enqueue
387
+ }
388
+ if (!v2Enabled) {
389
+ try {
390
+ enqueueMemoryJob("graph_extract", {
391
+ conversationId: ctx.conversationId,
392
+ scopeId: "default",
393
+ ...(ctx.activeContextNodeIds?.length
394
+ ? { activeContextNodeIds: ctx.activeContextNodeIds }
395
+ : {}),
396
+ });
397
+ } catch {
398
+ // Best-effort — don't block conversation disposal
399
+ }
386
400
  }
387
401
  }
388
402
 
@@ -1159,6 +1159,7 @@ export async function handleSurfaceAction(
1159
1159
  summary,
1160
1160
  submittedData: data,
1161
1161
  });
1162
+ markSurfaceCompleted(ctx, surfaceId, summary);
1162
1163
 
1163
1164
  // Cleanup and resolve — order matters: cleanup clears the timer
1164
1165
  // before resolve() unblocks the caller.
@@ -1505,20 +1506,6 @@ export async function handleSurfaceAction(
1505
1506
  surfaceData,
1506
1507
  );
1507
1508
 
1508
- // Forms are one-shot surfaces — auto-complete immediately so the client
1509
- // transitions from the "Submitting…" spinner to a completion chip without
1510
- // requiring the LLM to call ui_dismiss.
1511
- if (pending.surfaceType === "form") {
1512
- broadcastMessage({
1513
- type: "ui_surface_complete",
1514
- conversationId: ctx.conversationId,
1515
- surfaceId,
1516
- summary,
1517
- submittedData: mergedData,
1518
- });
1519
- markSurfaceCompleted(ctx, surfaceId, summary);
1520
- }
1521
-
1522
1509
  // Extract file attachments from action data so they are sent as proper
1523
1510
  // image/file content blocks instead of dumping base64 into the text.
1524
1511
  let pendingAttachments: UserMessageAttachment[] = [];
@@ -1648,6 +1635,21 @@ export async function handleSurfaceAction(
1648
1635
  return;
1649
1636
  }
1650
1637
 
1638
+ // One-shot interactive surfaces — auto-complete now that the message has
1639
+ // been accepted. Deferred until after rejection check so the surface stays
1640
+ // active and retryable if the queue was full.
1641
+ const ONE_SHOT_SURFACE_TYPES = ["form", "confirmation", "file_upload"];
1642
+ if (ONE_SHOT_SURFACE_TYPES.includes(pending.surfaceType)) {
1643
+ broadcastMessage({
1644
+ type: "ui_surface_complete",
1645
+ conversationId: ctx.conversationId,
1646
+ surfaceId,
1647
+ summary,
1648
+ submittedData: mergedDataForText,
1649
+ });
1650
+ markSurfaceCompleted(ctx, surfaceId, summary);
1651
+ }
1652
+
1651
1653
  // One-shot: clear accumulated state now that the message has been accepted.
1652
1654
  // Deferred until after rejection check so state is preserved for retry on rejection.
1653
1655
  if (accumulatedState && Object.keys(accumulatedState).length > 0) {
@@ -30,6 +30,7 @@ import {
30
30
  ACTIVITY_SKIP_SET,
31
31
  injectActivityField,
32
32
  } from "../tools/schema-transforms.js";
33
+ import { resolveToolNameAlias } from "../tools/tool-name-aliases.js";
33
34
  import {
34
35
  isDiskPressureCleanupToolName,
35
36
  type ProxyApprovalCallback,
@@ -122,7 +123,9 @@ export function createToolExecutor(
122
123
  toolUseId?: string,
123
124
  turnContext?: import("../plugins/types.js").TurnContext,
124
125
  ) => {
125
- if (isDoordashCommand(name, input)) {
126
+ const executionName = resolveToolNameAlias(name, ctx.allowedToolNames);
127
+
128
+ if (isDoordashCommand(executionName, input)) {
126
129
  markDoordashStepInProgress(ctx, input);
127
130
  }
128
131
 
@@ -208,8 +211,9 @@ export function createToolExecutor(
208
211
  // route through the full executor pipeline so the underlying tool's
209
212
  // risk level, permission checks, hooks, and lifecycle events all fire
210
213
  // with the real tool name.
211
- if (name === "skill_execute") {
212
- const toolName = typeof input.tool === "string" ? input.tool : "";
214
+ if (executionName === "skill_execute") {
215
+ const rawToolName = typeof input.tool === "string" ? input.tool : "";
216
+ const toolName = resolveToolNameAlias(rawToolName, ctx.allowedToolNames);
213
217
  const rawToolInput =
214
218
  input.input != null && typeof input.input === "object"
215
219
  ? (input.input as Record<string, unknown>)
@@ -242,7 +246,7 @@ export function createToolExecutor(
242
246
  }
243
247
 
244
248
  const result = await executor.execute(
245
- name,
249
+ executionName,
246
250
  input,
247
251
  toolContext,
248
252
  turnContext,
@@ -251,7 +255,7 @@ export function createToolExecutor(
251
255
  ctx.approvedViaPromptThisTurn = true;
252
256
  }
253
257
 
254
- runPostExecutionSideEffects(name, input, result, { ctx });
258
+ runPostExecutionSideEffects(executionName, input, result, { ctx });
255
259
 
256
260
  return result;
257
261
  };
@@ -1052,7 +1052,7 @@ export class Conversation {
1052
1052
  );
1053
1053
  }
1054
1054
  if (result.compacted) {
1055
- applyCompactionResult(this, result, this.sendToClient, null, {
1055
+ await applyCompactionResult(this, result, this.sendToClient, null, {
1056
1056
  slackContextCompactionWatermarkTs: getSlackCompactionWatermarkForPrefix(
1057
1057
  slackChronologicalContext,
1058
1058
  result.compactedMessages,
@@ -63,6 +63,20 @@ export interface HistoryToolCall {
63
63
  approvalReason?: string;
64
64
  /** Snapshot of the auto-approve threshold at execution time. */
65
65
  riskThreshold?: string;
66
+ /**
67
+ * Display-only regex ladder for the rule editor (narrowest → broadest).
68
+ * Persisted on tool_use blocks by `annotatePersistedAssistantMessage` so
69
+ * historical chips render the same ladder as live tool_result events.
70
+ */
71
+ riskScopeOptions?: Array<{ pattern: string; label: string }>;
72
+ /** Minimatch save patterns for the rule editor (narrowest → broadest). */
73
+ riskAllowlistOptions?: Array<{
74
+ label: string;
75
+ description: string;
76
+ pattern: string;
77
+ }>;
78
+ /** Directory scope ladder for the rule editor. */
79
+ riskDirectoryScopeOptions?: Array<{ scope: string; label: string }>;
66
80
  }
67
81
 
68
82
  export interface HistorySurface {
@@ -368,6 +382,18 @@ export function renderHistoryContent(content: unknown): RenderedHistoryContent {
368
382
  entry.approvalReason = block._approvalReason;
369
383
  if (typeof block._riskThreshold === "string")
370
384
  entry.riskThreshold = block._riskThreshold;
385
+ // Read back the 3 risk-option arrays persisted by
386
+ // `annotatePersistedAssistantMessage`. Validate the array shape only
387
+ // — element shapes are best-effort (we trust our own writer).
388
+ if (Array.isArray(block._riskScopeOptions))
389
+ entry.riskScopeOptions =
390
+ block._riskScopeOptions as HistoryToolCall["riskScopeOptions"];
391
+ if (Array.isArray(block._riskAllowlistOptions))
392
+ entry.riskAllowlistOptions =
393
+ block._riskAllowlistOptions as HistoryToolCall["riskAllowlistOptions"];
394
+ if (Array.isArray(block._riskDirectoryScopeOptions))
395
+ entry.riskDirectoryScopeOptions =
396
+ block._riskDirectoryScopeOptions as HistoryToolCall["riskDirectoryScopeOptions"];
371
397
  toolCalls.push(entry);
372
398
  if (id) pendingToolUses.set(id, entry);
373
399
  contentOrder.push(`tool:${toolCalls.length - 1}`);
@@ -166,7 +166,7 @@ export class HostBashProxy {
166
166
  pendingInteractions.register(requestId, {
167
167
  conversationId,
168
168
  kind: "host_bash",
169
- rpcResolve: resolve,
169
+ rpcResolve: resolve as (v: unknown) => void,
170
170
  rpcReject: reject,
171
171
  timer,
172
172
  detachAbort,
@@ -135,7 +135,7 @@ export class HostBrowserProxy {
135
135
  pendingInteractions.register(requestId, {
136
136
  conversationId,
137
137
  kind: "host_browser",
138
- rpcResolve: resolve,
138
+ rpcResolve: resolve as (v: unknown) => void,
139
139
  rpcReject: reject,
140
140
  timer,
141
141
  detachAbort,
@@ -239,7 +239,7 @@ export class HostCuProxy {
239
239
  targetClientId != null
240
240
  ? assistantEventHub.getActorPrincipalIdForClient(targetClientId)
241
241
  : undefined,
242
- rpcResolve: resolve,
242
+ rpcResolve: resolve as (v: unknown) => void,
243
243
  rpcReject: reject,
244
244
  timer,
245
245
  detachAbort,
@@ -186,7 +186,7 @@ export class HostFileProxy {
186
186
  resolvedTargetClientId,
187
187
  )
188
188
  : undefined,
189
- rpcResolve: resolve,
189
+ rpcResolve: resolve as (v: unknown) => void,
190
190
  rpcReject: reject,
191
191
  timer,
192
192
  detachAbort,
@@ -273,7 +273,7 @@ export class HostTransferProxy {
273
273
  resolvedTargetClientId,
274
274
  )
275
275
  : undefined,
276
- rpcResolve: resolve,
276
+ rpcResolve: resolve as (v: unknown) => void,
277
277
  rpcReject: reject,
278
278
  timer,
279
279
  detachAbort,
@@ -462,7 +462,7 @@ export class HostTransferProxy {
462
462
  resolvedTargetClientId,
463
463
  )
464
464
  : undefined,
465
- rpcResolve: resolve,
465
+ rpcResolve: resolve as (v: unknown) => void,
466
466
  rpcReject: reject,
467
467
  timer,
468
468
  detachAbort,