@vellumai/assistant 0.10.3-staging.2 → 0.10.4-staging.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. package/openapi.yaml +73 -56
  2. package/package.json +1 -1
  3. package/src/__tests__/actor-trust-resolver-address-fallback.test.ts +83 -31
  4. package/src/__tests__/assistant-stream-state.test.ts +3 -76
  5. package/src/__tests__/background-workers-disk-pressure.test.ts +4 -2
  6. package/src/__tests__/channel-approval-routes.test.ts +21 -26
  7. package/src/__tests__/channel-delivery-store.test.ts +28 -0
  8. package/src/__tests__/channel-guardian.test.ts +82 -32
  9. package/src/__tests__/channel-inbound-disk-pressure.test.ts +11 -19
  10. package/src/__tests__/channel-reply-delivery.test.ts +6 -2
  11. package/src/__tests__/compaction-ledger-store.test.ts +128 -0
  12. package/src/__tests__/config-loader-backfill.test.ts +148 -0
  13. package/src/__tests__/consult-deadline.test.ts +60 -0
  14. package/src/__tests__/contact-store-interaction-info.test.ts +156 -0
  15. package/src/__tests__/contact-store-user-file.test.ts +7 -10
  16. package/src/__tests__/contacts-relay-reads.test.ts +6 -9
  17. package/src/__tests__/contacts-write.test.ts +0 -2
  18. package/src/__tests__/conversation-agent-loop-overflow.test.ts +4 -2
  19. package/src/__tests__/conversation-agent-loop.test.ts +98 -7
  20. package/src/__tests__/conversation-attention-telegram.test.ts +9 -11
  21. package/src/__tests__/conversation-error.test.ts +18 -0
  22. package/src/__tests__/conversation-fork-crud.test.ts +354 -24
  23. package/src/__tests__/conversation-title-service.test.ts +222 -201
  24. package/src/__tests__/db-compaction-events-migration.test.ts +129 -0
  25. package/src/__tests__/delete-propagation.test.ts +5 -3
  26. package/src/__tests__/dm-backfill.test.ts +6 -4
  27. package/src/__tests__/emit-signal-routing-intent.test.ts +2 -6
  28. package/src/__tests__/guardian-binding-drift-heal.test.ts +43 -23
  29. package/src/__tests__/guardian-dispatch.test.ts +50 -5
  30. package/src/__tests__/guardian-routing-state.test.ts +6 -10
  31. package/src/__tests__/helpers/channel-test-adapter.ts +45 -12
  32. package/src/__tests__/helpers/create-guardian-binding.ts +15 -23
  33. package/src/__tests__/helpers/mock-logger.ts +1 -0
  34. package/src/__tests__/helpers/seed-contact-channel.ts +96 -0
  35. package/src/__tests__/inbound-invite-redemption.test.ts +87 -10
  36. package/src/__tests__/invite-redemption-service.test.ts +273 -53
  37. package/src/__tests__/invite-routes-http.test.ts +34 -0
  38. package/src/__tests__/invite-service-ipc.test.ts +65 -2
  39. package/src/__tests__/list-messages-page-latest.test.ts +173 -4
  40. package/src/__tests__/mcp-config-secret-boundary.test.ts +3 -0
  41. package/src/__tests__/non-member-access-request.test.ts +15 -13
  42. package/src/__tests__/onboarding-persona-write.test.ts +52 -22
  43. package/src/__tests__/persist-onboarding-artifacts.test.ts +1 -0
  44. package/src/__tests__/persona-resolver.test.ts +75 -45
  45. package/src/__tests__/plugin-bootstrap.test.ts +13 -5
  46. package/src/__tests__/plugin-disabled-state.test.ts +190 -0
  47. package/src/__tests__/provider-usage-tracking.test.ts +1 -1
  48. package/src/__tests__/reaction-intercept-cold-cache-warm.test.ts +135 -0
  49. package/src/__tests__/reaction-intercept-member-verdict-warm.test.ts +158 -0
  50. package/src/__tests__/reaction-persistence.test.ts +51 -4
  51. package/src/__tests__/relay-server.test.ts +88 -31
  52. package/src/__tests__/runtime-attachment-metadata.test.ts +9 -11
  53. package/src/__tests__/settings-routes.test.ts +32 -0
  54. package/src/__tests__/slack-block-formatting.test.ts +1 -38
  55. package/src/__tests__/sse-actor-principal-guardian-source.test.ts +13 -36
  56. package/src/__tests__/stt-hints.test.ts +6 -3
  57. package/src/__tests__/subagent-fork-prompt-role.test.ts +195 -0
  58. package/src/__tests__/subagent-fork-spawn.test.ts +6 -7
  59. package/src/__tests__/subagent-role-registry.test.ts +17 -4
  60. package/src/__tests__/subagent-spawn-and-await.test.ts +546 -0
  61. package/src/__tests__/subagent-tools.test.ts +398 -3
  62. package/src/__tests__/thread-backfill.test.ts +3 -3
  63. package/src/__tests__/tool-preview-lifecycle.test.ts +26 -10
  64. package/src/__tests__/tool-start-timestamp.test.ts +4 -3
  65. package/src/__tests__/trusted-contact-approval-notifier.test.ts +37 -51
  66. package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +2 -2
  67. package/src/__tests__/trusted-contact-lifecycle-notifications.test.ts +9 -7
  68. package/src/__tests__/trusted-contact-multichannel.test.ts +16 -7
  69. package/src/__tests__/trusted-contact-verification.test.ts +79 -54
  70. package/src/__tests__/voice-guardian-cold-cache-warm.test.ts +137 -0
  71. package/src/__tests__/voice-invite-redemption.test.ts +183 -20
  72. package/src/__tests__/workspace-migration-102-preserve-heartbeat-enabled-for-existing-workspaces.test.ts +3 -3
  73. package/src/__tests__/workspace-migration-111-prune-seeded-callsite-defaults.test.ts +2 -2
  74. package/src/__tests__/workspace-migration-112-remove-advisor-callsite-override.test.ts +170 -0
  75. package/src/__tests__/workspace-migration-drop-user-md.test.ts +196 -238
  76. package/src/a2a/__tests__/e2e-a2a-channel.test.ts +35 -47
  77. package/src/agent/loop-exclusive-tool.test.ts +19 -15
  78. package/src/agent/loop-native-web-search.test.ts +200 -0
  79. package/src/agent/loop.ts +108 -1
  80. package/src/api/responses/conversation-message.ts +9 -0
  81. package/src/approvals/guardian-request-resolvers.ts +16 -4
  82. package/src/calls/__tests__/relay-setup-router.test.ts +10 -18
  83. package/src/calls/guardian-dispatch.ts +14 -11
  84. package/src/calls/inbound-trust-reader.ts +7 -1
  85. package/src/calls/relay-access-wait.ts +6 -6
  86. package/src/calls/relay-server.ts +22 -2
  87. package/src/calls/relay-setup-router.ts +10 -10
  88. package/src/cli/commands/__tests__/conversations-slack.test.ts +1 -0
  89. package/src/cli/commands/contacts.ts +10 -7
  90. package/src/cli/commands/memory/__tests__/worker.test.ts +147 -17
  91. package/src/cli/commands/memory/worker.ts +97 -30
  92. package/src/cli/commands/plugins.ts +3 -146
  93. package/src/cli/lib/__tests__/list-installed-plugins.test.ts +17 -17
  94. package/src/cli/lib/__tests__/publish-plugin.test.ts +98 -0
  95. package/src/cli/lib/publish-plugin.ts +231 -1
  96. package/src/config/__tests__/sync-gated-profiles.test.ts +5 -7
  97. package/src/config/bundled-skills/subagent/SKILL.md +16 -1
  98. package/src/config/bundled-skills/subagent/TOOLS.json +5 -4
  99. package/src/config/call-site-defaults.ts +0 -6
  100. package/src/config/llm-resolver.ts +0 -3
  101. package/src/config/schemas/call-site-catalog.ts +0 -7
  102. package/src/config/schemas/heartbeat.ts +2 -5
  103. package/src/config/schemas/llm.ts +3 -12
  104. package/src/config/schemas/memory-lifecycle.ts +1 -1
  105. package/src/config/seed-inference-profiles.ts +76 -35
  106. package/src/config/sync-gated-profiles.ts +0 -3
  107. package/src/contacts/__tests__/contacts-write-revoke-relay.test.ts +7 -8
  108. package/src/contacts/__tests__/member-write-relay.test.ts +35 -11
  109. package/src/contacts/contact-store.ts +27 -237
  110. package/src/contacts/contacts-write.ts +18 -58
  111. package/src/contacts/gateway-channel-read.ts +51 -0
  112. package/src/contacts/member-write-relay.ts +25 -31
  113. package/src/contacts/types.ts +3 -15
  114. package/src/daemon/__tests__/conversation-tool-setup.test.ts +0 -44
  115. package/src/daemon/conversation-agent-loop-handlers.ts +29 -10
  116. package/src/daemon/conversation-agent-loop.ts +68 -61
  117. package/src/daemon/conversation-error.ts +7 -10
  118. package/src/daemon/conversation-tool-setup.ts +0 -10
  119. package/src/daemon/conversation.ts +10 -0
  120. package/src/daemon/external-plugins-bootstrap.ts +8 -2
  121. package/src/daemon/handlers/__tests__/config-a2a-accept.test.ts +0 -1
  122. package/src/daemon/handlers/__tests__/config-a2a-complete.test.ts +0 -2
  123. package/src/daemon/handlers/__tests__/config-a2a-redeem.test.ts +0 -2
  124. package/src/daemon/handlers/__tests__/config-channels.test.ts +9 -14
  125. package/src/daemon/handlers/config-channels.ts +14 -29
  126. package/src/daemon/lifecycle.ts +16 -4
  127. package/src/daemon/message-types/surfaces.ts +2 -0
  128. package/src/heartbeat/heartbeat-service.ts +5 -0
  129. package/src/home/relationship-state-writer.ts +5 -0
  130. package/src/memory/__tests__/embedding-cache.test.ts +136 -0
  131. package/src/memory/compaction-ledger-store.ts +107 -0
  132. package/src/memory/conversation-crud.ts +136 -61
  133. package/src/memory/conversation-title-service.ts +173 -24
  134. package/src/memory/embedding-backend.ts +8 -1
  135. package/src/memory/embedding-cache.ts +139 -0
  136. package/src/memory/jobs-worker.ts +75 -29
  137. package/src/memory/memory-retrospective-job.ts +5 -0
  138. package/src/memory/migrations/209-strip-thinking-from-consolidated.ts +27 -5
  139. package/src/memory/migrations/302-create-compaction-events.ts +107 -0
  140. package/src/memory/migrations/303-add-conversation-creation-seq.ts +33 -0
  141. package/src/memory/migrations/__tests__/209-strip-thinking-from-consolidated.test.ts +79 -6
  142. package/src/memory/schema/contacts.ts +6 -2
  143. package/src/memory/schema/conversations.ts +39 -0
  144. package/src/memory/steps.ts +1090 -367
  145. package/src/memory/worker-control.ts +104 -18
  146. package/src/memory/worker-process.ts +17 -0
  147. package/src/messaging/channel-binding-metadata.ts +31 -0
  148. package/src/messaging/channel-binding-schema.ts +51 -0
  149. package/src/messaging/providers/__tests__/callback-routing.test.ts +45 -0
  150. package/src/messaging/providers/__tests__/transport-dispatch.test.ts +195 -0
  151. package/src/messaging/providers/a2a/__tests__/deliver.test.ts +11 -0
  152. package/src/messaging/providers/a2a/deliver.ts +5 -1
  153. package/src/messaging/providers/a2a/transport.ts +10 -0
  154. package/src/messaging/providers/callback-routing.ts +48 -0
  155. package/src/messaging/providers/channel-transport.ts +55 -0
  156. package/src/messaging/providers/index.ts +65 -241
  157. package/src/messaging/providers/slack/binding-metadata.ts +62 -0
  158. package/src/messaging/providers/slack/transport.ts +92 -0
  159. package/src/messaging/providers/telegram-bot/transport.ts +51 -0
  160. package/src/messaging/providers/whatsapp/transport.ts +38 -0
  161. package/src/notifications/__tests__/broadcaster.test.ts +0 -8
  162. package/src/notifications/__tests__/connected-channels.test.ts +8 -36
  163. package/src/notifications/__tests__/destination-resolver.test.ts +12 -117
  164. package/src/notifications/destination-resolver.ts +7 -23
  165. package/src/notifications/emit-signal.ts +5 -11
  166. package/src/plugins/defaults/index.ts +0 -35
  167. package/src/plugins/defaults/memory-v3-shadow/__tests__/dense.test.ts +11 -0
  168. package/src/plugins/defaults/memory-v3-shadow/__tests__/section-dense-store.test.ts +243 -2
  169. package/src/plugins/defaults/memory-v3-shadow/section-dense-store.ts +167 -14
  170. package/src/plugins/disabled-state.ts +31 -0
  171. package/src/plugins/registry.ts +55 -12
  172. package/src/prompts/persona-resolver.ts +43 -11
  173. package/src/providers/call-site-routing.ts +41 -0
  174. package/src/providers/provider-send-message.ts +6 -0
  175. package/src/providers/ratelimit.ts +6 -0
  176. package/src/providers/registry.ts +1 -1
  177. package/src/providers/retry.ts +6 -0
  178. package/src/providers/types.ts +13 -0
  179. package/src/providers/usage-tracking.ts +6 -0
  180. package/src/runtime/__tests__/guardian-vellum-migration.test.ts +30 -27
  181. package/src/runtime/__tests__/local-principal-trust.test.ts +16 -18
  182. package/src/runtime/__tests__/member-verdict-cache.test.ts +119 -0
  183. package/src/runtime/__tests__/trust-verdict-consumer.test.ts +115 -168
  184. package/src/runtime/access-request-helper.ts +1 -2
  185. package/src/runtime/actor-trust-resolver.ts +44 -17
  186. package/src/runtime/anchored-guardian.test.ts +7 -54
  187. package/src/runtime/anchored-guardian.ts +4 -53
  188. package/src/runtime/assistant-stream-state.ts +12 -74
  189. package/src/runtime/channel-reply-delivery.ts +3 -8
  190. package/src/runtime/guardian-vellum-migration.ts +18 -16
  191. package/src/runtime/invite-redemption-service.ts +25 -10
  192. package/src/runtime/local-actor-identity.test.ts +108 -0
  193. package/src/runtime/local-actor-identity.ts +27 -20
  194. package/src/runtime/member-verdict-cache.ts +0 -0
  195. package/src/runtime/routes/__tests__/contact-routes.test.ts +100 -7
  196. package/src/runtime/routes/__tests__/global-search-routes.test.ts +1 -2
  197. package/src/runtime/routes/__tests__/surface-action-routes.test.ts +2 -1
  198. package/src/runtime/routes/contact-routes.ts +40 -25
  199. package/src/runtime/routes/conversation-list-routes.ts +1 -29
  200. package/src/runtime/routes/conversation-routes.ts +27 -7
  201. package/src/runtime/routes/inbound-stages/acl-enforcement.ts +0 -10
  202. package/src/runtime/routes/inbound-stages/background-dispatch.ts +4 -8
  203. package/src/runtime/routes/inbound-stages/reaction-intercept.ts +19 -0
  204. package/src/runtime/routes/settings-routes.ts +8 -3
  205. package/src/runtime/services/conversation-serializer.ts +6 -49
  206. package/src/runtime/slack-block-formatting.ts +0 -15
  207. package/src/runtime/trust-verdict-consumer.ts +36 -41
  208. package/src/subagent/__tests__/consult-prompt.test.ts +35 -0
  209. package/src/{plugins/defaults/advisor/__tests__/transcript.test.ts → subagent/__tests__/consult-transcript.test.ts} +47 -10
  210. package/src/{plugins/defaults/advisor/steering.ts → subagent/consult-prompt.ts} +17 -39
  211. package/src/{plugins/defaults/advisor/transcript.ts → subagent/consult-transcript.ts} +18 -8
  212. package/src/subagent/index.ts +1 -1
  213. package/src/subagent/manager.ts +245 -33
  214. package/src/subagent/types.ts +8 -1
  215. package/src/tools/registry.ts +10 -3
  216. package/src/tools/subagent/consult-deadline.ts +49 -0
  217. package/src/tools/subagent/spawn.ts +234 -5
  218. package/src/util/logger.ts +9 -0
  219. package/src/util/platform.ts +14 -0
  220. package/src/workspace/migrations/031-drop-user-md.ts +232 -148
  221. package/src/workspace/migrations/112-remove-advisor-callsite-override.ts +64 -0
  222. package/src/workspace/migrations/registry.ts +2 -0
  223. package/src/plugins/defaults/advisor/__tests__/advisor-gate.test.ts +0 -56
  224. package/src/plugins/defaults/advisor/__tests__/advisor-state-store.test.ts +0 -43
  225. package/src/plugins/defaults/advisor/__tests__/agent-loop-integration.test.ts +0 -137
  226. package/src/plugins/defaults/advisor/__tests__/consult.test.ts +0 -314
  227. package/src/plugins/defaults/advisor/__tests__/context-pack-gating.test.ts +0 -106
  228. package/src/plugins/defaults/advisor/__tests__/context-pack.test.ts +0 -60
  229. package/src/plugins/defaults/advisor/__tests__/hooks.test.ts +0 -138
  230. package/src/plugins/defaults/advisor/advisor-gate.ts +0 -29
  231. package/src/plugins/defaults/advisor/advisor-state-store.ts +0 -94
  232. package/src/plugins/defaults/advisor/config.ts +0 -21
  233. package/src/plugins/defaults/advisor/consult.ts +0 -197
  234. package/src/plugins/defaults/advisor/context-pack.ts +0 -288
  235. package/src/plugins/defaults/advisor/hooks/post-model-call.ts +0 -34
  236. package/src/plugins/defaults/advisor/hooks/pre-model-call.ts +0 -30
  237. package/src/plugins/defaults/advisor/hooks/user-prompt-submit.ts +0 -19
  238. package/src/plugins/defaults/advisor/package.json +0 -14
  239. package/src/plugins/defaults/advisor/tools/advisor.ts +0 -92
@@ -14,21 +14,109 @@ mock.module("../../../../memory/qdrant-client.js", () => ({
14
14
 
15
15
  // Stub the shared embedding backend. Records inputs and returns one
16
16
  // deterministic vector per input so `upsertSections` can map vectors → points.
17
+ // `statusProvider/Model` is what `getMemoryBackendStatus` reports (the cache-read
18
+ // identity); `embedProvider/Model` is what `embedWithBackend` returns — kept
19
+ // separate so a test can simulate a provider rotation between the two.
17
20
  const embedState = {
18
21
  calls: [] as string[][],
19
22
  dim: 4,
23
+ statusProvider: "local" as string | null,
24
+ statusModel: "test-model" as string | null,
25
+ embedProvider: "local",
26
+ embedModel: "test-model",
27
+ // Gemini embedding options that change the vector for identical text. The
28
+ // mocked `geminiCacheExtras` renders these into cache-key fragments exactly
29
+ // as the production helper does, so a test can flip the task type and assert
30
+ // the section cache treats it as a miss.
31
+ geminiTaskType: undefined as string | undefined,
32
+ geminiDimensions: undefined as number | undefined,
20
33
  };
21
34
  mock.module("../../../../memory/embedding-backend.js", () => ({
35
+ getMemoryBackendStatus: async () => ({
36
+ enabled: true,
37
+ degraded: false,
38
+ provider: embedState.statusProvider,
39
+ model: embedState.statusModel,
40
+ reason: null,
41
+ }),
22
42
  embedWithBackend: async (_config: unknown, inputs: string[]) => {
23
43
  embedState.calls.push(inputs);
24
44
  return {
25
- provider: "local",
26
- model: "test-model",
45
+ provider: embedState.embedProvider,
46
+ model: embedState.embedModel,
27
47
  vectors: inputs.map((_input, i) =>
28
48
  Array.from({ length: embedState.dim }, (_v, j) => (i + 1) * (j + 1)),
29
49
  ),
30
50
  };
31
51
  },
52
+ geminiCacheExtras: () => {
53
+ const extras: string[] = [];
54
+ if (embedState.geminiTaskType) {
55
+ extras.push(`task=${embedState.geminiTaskType}`);
56
+ }
57
+ if (embedState.geminiDimensions != null) {
58
+ extras.push(`dim=${embedState.geminiDimensions}`);
59
+ }
60
+ return extras;
61
+ },
62
+ }));
63
+
64
+ // In-memory stand-in for the `memory_embeddings` dense cache. Lets each test
65
+ // program hits/misses without a real DB; keyed exactly as the production helper
66
+ // (`targetType|targetId|provider|model`) with a stored dimension for the
67
+ // dim-match gate. `getDb` is stubbed to a sentinel since the mock ignores it.
68
+ const cacheState = {
69
+ store: new Map<
70
+ string,
71
+ { dense: number[]; contentHash: string; dimensions: number }
72
+ >(),
73
+ reads: [] as string[],
74
+ };
75
+ function cacheKey(k: {
76
+ targetType: string;
77
+ targetId: string;
78
+ provider: string;
79
+ model: string;
80
+ }): string {
81
+ return `${k.targetType}|${k.targetId}|${k.provider}|${k.model}`;
82
+ }
83
+ mock.module("../../../../memory/embedding-cache.js", () => ({
84
+ readEmbeddingCache: (
85
+ _db: unknown,
86
+ key: {
87
+ targetType: string;
88
+ targetId: string;
89
+ provider: string;
90
+ model: string;
91
+ expectedDim: number;
92
+ },
93
+ ) => {
94
+ cacheState.reads.push(cacheKey(key));
95
+ const row = cacheState.store.get(cacheKey(key));
96
+ if (!row || row.dimensions !== key.expectedDim) return null;
97
+ return { dense: row.dense, contentHash: row.contentHash };
98
+ },
99
+ writeEmbeddingCache: (
100
+ _db: unknown,
101
+ params: {
102
+ targetType: string;
103
+ targetId: string;
104
+ provider: string;
105
+ model: string;
106
+ dense: number[];
107
+ contentHash: string;
108
+ },
109
+ ) => {
110
+ cacheState.store.set(cacheKey(params), {
111
+ dense: params.dense,
112
+ contentHash: params.contentHash,
113
+ dimensions: params.dense.length,
114
+ });
115
+ },
116
+ }));
117
+
118
+ mock.module("../../../../memory/db-connection.js", () => ({
119
+ getDb: () => ({}),
32
120
  }));
33
121
 
34
122
  // Mock the underlying @qdrant/js-client-rest package. The mock client records
@@ -159,6 +247,14 @@ function resetState(): void {
159
247
  state.scrollCalls.length = 0;
160
248
  embedState.calls.length = 0;
161
249
  embedState.dim = 4;
250
+ embedState.statusProvider = "local";
251
+ embedState.statusModel = "test-model";
252
+ embedState.embedProvider = "local";
253
+ embedState.embedModel = "test-model";
254
+ embedState.geminiTaskType = undefined;
255
+ embedState.geminiDimensions = undefined;
256
+ cacheState.store.clear();
257
+ cacheState.reads.length = 0;
162
258
  _resetSectionDenseStoreForTests();
163
259
  }
164
260
 
@@ -376,6 +472,151 @@ describe("memory v3 section-dense-store — upsert", () => {
376
472
  });
377
473
  });
378
474
 
475
+ describe("memory v3 section-dense-store — embedding cache", () => {
476
+ beforeEach(resetState);
477
+ afterEach(resetState);
478
+
479
+ test("re-upserting unchanged sections serves from cache (no second embed)", async () => {
480
+ state.collectionExists = true;
481
+ const sections = [
482
+ section("people/alice", 0, "alice lead text"),
483
+ section("people/alice", 1, "alice section one"),
484
+ ];
485
+
486
+ await upsertSections(CONFIG, sections);
487
+ expect(embedState.calls).toHaveLength(1); // cold cache → embedded once
488
+
489
+ await upsertSections(CONFIG, sections);
490
+ // No new backend call — the second pass reused both cached vectors.
491
+ expect(embedState.calls).toHaveLength(1);
492
+ // But the points were still upserted (rebuilt from cache), with identical
493
+ // vectors to the first pass.
494
+ expect(state.upsertCalls).toHaveLength(2);
495
+ expect(state.upsertCalls[1]!.points.map((p) => p.vector)).toEqual(
496
+ state.upsertCalls[0]!.points.map((p) => p.vector),
497
+ );
498
+ });
499
+
500
+ test("a changed section text re-embeds (content hash differs)", async () => {
501
+ state.collectionExists = true;
502
+
503
+ await upsertSections(CONFIG, [section("people/alice", 0, "text A")]);
504
+ await upsertSections(CONFIG, [section("people/alice", 0, "text B")]);
505
+
506
+ expect(embedState.calls).toEqual([["text A"], ["text B"]]);
507
+ });
508
+
509
+ test("partial hit embeds only the changed section, upserts both", async () => {
510
+ state.collectionExists = true;
511
+ await upsertSections(CONFIG, [
512
+ section("people/alice", 0, "lead"),
513
+ section("people/alice", 1, "one"),
514
+ ]);
515
+ embedState.calls.length = 0;
516
+
517
+ await upsertSections(CONFIG, [
518
+ section("people/alice", 0, "lead"), // unchanged → cache hit
519
+ section("people/alice", 1, "one changed"), // changed → miss
520
+ ]);
521
+
522
+ // Only the changed section's text reached the backend.
523
+ expect(embedState.calls).toEqual([["one changed"]]);
524
+ // Both points (cached + freshly embedded) were upserted.
525
+ expect(state.upsertCalls.at(-1)!.points).toHaveLength(2);
526
+ });
527
+
528
+ test("no resolved provider skips the cache and embeds every section", async () => {
529
+ state.collectionExists = true;
530
+ embedState.statusProvider = null;
531
+ embedState.statusModel = null;
532
+
533
+ await upsertSections(CONFIG, [
534
+ section("people/alice", 0, "x"),
535
+ section("people/alice", 1, "y"),
536
+ ]);
537
+
538
+ // Without an embedding identity the cache cannot be keyed, so it is never
539
+ // read and every section is embedded — matching the pre-cache behavior.
540
+ expect(cacheState.reads).toEqual([]);
541
+ expect(embedState.calls).toEqual([["x", "y"]]);
542
+ });
543
+
544
+ test("a provider rotation re-embeds the whole batch under the new identity", async () => {
545
+ state.collectionExists = true;
546
+ await upsertSections(CONFIG, [
547
+ section("people/alice", 0, "lead"),
548
+ section("people/alice", 1, "one"),
549
+ ]);
550
+ embedState.calls.length = 0;
551
+
552
+ // The cache-read identity still resolves to local/test-model (so section 0
553
+ // is a hit), but the backend now answers as a different provider.
554
+ embedState.embedProvider = "openai";
555
+ embedState.embedModel = "text-embedding-3";
556
+
557
+ await upsertSections(CONFIG, [
558
+ section("people/alice", 0, "lead"), // hit under the old identity
559
+ section("people/alice", 1, "one changed"), // miss
560
+ ]);
561
+
562
+ // First the misses are embedded; the rotated provider on that response
563
+ // forces a full re-embed of every section so the collection stays in one
564
+ // embedding space.
565
+ expect(embedState.calls).toEqual([
566
+ ["one changed"],
567
+ ["lead", "one changed"],
568
+ ]);
569
+ });
570
+
571
+ test("a Gemini task-type change re-embeds unchanged text (cache miss)", async () => {
572
+ state.collectionExists = true;
573
+ embedState.statusProvider = "gemini";
574
+ embedState.statusModel = "gemini-embedding-2";
575
+ embedState.embedProvider = "gemini";
576
+ embedState.embedModel = "gemini-embedding-2";
577
+ embedState.geminiTaskType = "RETRIEVAL_DOCUMENT";
578
+
579
+ await upsertSections(CONFIG, [
580
+ section("people/alice", 0, "alice lead text"),
581
+ ]);
582
+ expect(embedState.calls).toHaveLength(1); // cold cache → embedded once
583
+
584
+ // Same text, same provider/model — but a different Gemini task type yields a
585
+ // different vector, so the row cached under the old task type must not be
586
+ // served. The extras are folded into the content hash, so the comparison
587
+ // misses and the section re-embeds under the new task type.
588
+ embedState.geminiTaskType = "SEMANTIC_SIMILARITY";
589
+
590
+ await upsertSections(CONFIG, [
591
+ section("people/alice", 0, "alice lead text"),
592
+ ]);
593
+
594
+ expect(embedState.calls).toEqual([
595
+ ["alice lead text"],
596
+ ["alice lead text"],
597
+ ]);
598
+ });
599
+
600
+ test("an unchanged Gemini task type still serves from cache (no spurious miss)", async () => {
601
+ state.collectionExists = true;
602
+ embedState.statusProvider = "gemini";
603
+ embedState.statusModel = "gemini-embedding-2";
604
+ embedState.embedProvider = "gemini";
605
+ embedState.embedModel = "gemini-embedding-2";
606
+ embedState.geminiTaskType = "RETRIEVAL_DOCUMENT";
607
+
608
+ const sections = [section("people/alice", 0, "alice lead text")];
609
+
610
+ await upsertSections(CONFIG, sections);
611
+ await upsertSections(CONFIG, sections);
612
+
613
+ // Folding the extras into the hash must not break ordinary hits: with the
614
+ // task type unchanged the second pass reuses the cached vector.
615
+ expect(embedState.calls).toHaveLength(1);
616
+ expect(state.upsertCalls).toHaveLength(2);
617
+ });
618
+ });
619
+
379
620
  describe("memory v3 section-dense-store — delete", () => {
380
621
  beforeEach(resetState);
381
622
  afterEach(resetState);
@@ -19,11 +19,23 @@
19
19
  // shared Qdrant client; the read side (`dense.ts`) reuses that client to query
20
20
  // this collection.
21
21
 
22
+ import { createHash } from "node:crypto";
23
+
22
24
  import { QdrantClient as QdrantRestClient } from "@qdrant/js-client-rest";
23
25
  import { v5 as uuidv5 } from "uuid";
24
26
 
25
27
  import type { AssistantConfig } from "../../../config/types.js";
26
- import { embedWithBackend } from "../../../memory/embedding-backend.js";
28
+ import { getDb } from "../../../memory/db-connection.js";
29
+ import {
30
+ embedWithBackend,
31
+ geminiCacheExtras,
32
+ getMemoryBackendStatus,
33
+ } from "../../../memory/embedding-backend.js";
34
+ import {
35
+ readEmbeddingCache,
36
+ writeEmbeddingCache,
37
+ } from "../../../memory/embedding-cache.js";
38
+ import { embeddingInputContentHash } from "../../../memory/embedding-types.js";
27
39
  import { resolveQdrantUrl } from "../../../memory/qdrant-client.js";
28
40
  import { getLogger } from "../../../util/logger.js";
29
41
  import type { Section } from "./types.js";
@@ -166,6 +178,34 @@ export async function ensureSectionCollection(
166
178
  _collectionReady = true;
167
179
  }
168
180
 
181
+ /**
182
+ * `target_type` marker on `memory_embeddings` rows that cache section vectors.
183
+ * Distinct from the v2 `concept_page` rows so the two caches never collide on a
184
+ * shared `(targetType, targetId, provider, model)` key.
185
+ */
186
+ const V3_SECTION_TARGET_TYPE = "v3_section";
187
+
188
+ /** Human-readable cache id for a section: `<article>#<ordinal>`. */
189
+ function sectionCacheId(article: string, ordinal: number): string {
190
+ return `${article}#${ordinal}`;
191
+ }
192
+
193
+ /**
194
+ * Content hash a section's cached vector is keyed by. Folds the provider's
195
+ * embedding-option extras (Gemini task type / output dimensions) into the base
196
+ * text hash, so changing an option that alters the vector for identical text is
197
+ * a cache miss that re-embeds. With no extras the bare text hash is returned
198
+ * unchanged, keeping existing rows valid for non-Gemini and default-Gemini
199
+ * configs.
200
+ */
201
+ function sectionContentHash(text: string, extras: string[]): string {
202
+ const base = embeddingInputContentHash({ type: "text", text });
203
+ if (extras.length === 0) return base;
204
+ return createHash("sha256")
205
+ .update(`${base}\0${extras.join("\0")}`)
206
+ .digest("hex");
207
+ }
208
+
169
209
  /**
170
210
  * Embed each section's `text` and upsert one point per section, keyed by a
171
211
  * deterministic `(article, ordinal)`-derived ID. Stable IDs mean re-upserting
@@ -173,6 +213,10 @@ export async function ensureSectionCollection(
173
213
  * so the operation is idempotent. Payload carries `{ article, ordinal, title }`
174
214
  * for downstream filtering and rendering.
175
215
  *
216
+ * Unchanged sections are served from the `memory_embeddings` cache rather than
217
+ * re-embedded — see {@link embedSectionsCached} — so a maintain pass that
218
+ * re-selects an already-embedded page makes no backend round-trip for it.
219
+ *
176
220
  * An empty `sections` array is a no-op (no embedding round-trip).
177
221
  */
178
222
  export async function upsertSections(
@@ -183,20 +227,25 @@ export async function upsertSections(
183
227
 
184
228
  await ensureSectionCollection(config);
185
229
 
186
- const { vectors } = await embedWithBackend(
187
- config,
188
- sections.map((s) => s.text),
189
- );
230
+ const vectors = await embedSectionsCached(config, sections);
231
+
232
+ const points = sections.flatMap((section, i) => {
233
+ const vector = vectors[i];
234
+ if (!vector) return [];
235
+ return [
236
+ {
237
+ id: pointIdForSection(section.article, section.ordinal),
238
+ vector,
239
+ payload: {
240
+ article: section.article,
241
+ ordinal: section.ordinal,
242
+ title: section.title,
243
+ },
244
+ },
245
+ ];
246
+ });
190
247
 
191
- const points = sections.map((section, i) => ({
192
- id: pointIdForSection(section.article, section.ordinal),
193
- vector: vectors[i]!,
194
- payload: {
195
- article: section.article,
196
- ordinal: section.ordinal,
197
- title: section.title,
198
- },
199
- }));
248
+ if (points.length === 0) return;
200
249
 
201
250
  await getSectionDenseClient(config).upsert(SECTION_COLLECTION, {
202
251
  wait: true,
@@ -204,6 +253,110 @@ export async function upsertSections(
204
253
  });
205
254
  }
206
255
 
256
+ /**
257
+ * Resolve a dense vector per section, reusing cached vectors for sections whose
258
+ * `text` is unchanged and embedding only the misses in a single batched backend
259
+ * call. Returns one entry per input section, index-aligned; a position is left
260
+ * `undefined` only when a fresh embed produced no vector for it.
261
+ *
262
+ * The cache lives in the shared `memory_embeddings` table keyed on
263
+ * `(targetType="v3_section", targetId="<article>#<ordinal>", provider, model)`.
264
+ * It survives the `deleteSectionsForArticle` callers run before upserting (that
265
+ * delete clears only Qdrant points), so an unchanged section rebuilds its point
266
+ * from the cache without a backend round-trip. Vectors are stored and upserted
267
+ * raw — the section dense lane applies no anisotropy correction, so the cached
268
+ * vector equals the upserted one.
269
+ */
270
+ async function embedSectionsCached(
271
+ config: AssistantConfig,
272
+ sections: Section[],
273
+ ): Promise<Array<number[] | undefined>> {
274
+ const expectedDim = config.memory.qdrant.vectorSize;
275
+
276
+ // Cache identity: read rows under the currently-selected provider/model. When
277
+ // no provider resolves (backend down/disabled) skip the cache and let the
278
+ // batched embed below surface the failure exactly as the uncached path did.
279
+ const status = await getMemoryBackendStatus(config);
280
+ const db = getDb();
281
+
282
+ // Only Gemini's options change the vector for identical text, so fold the
283
+ // extras into the cache identity only when Gemini is the resolved provider;
284
+ // other backends keep the bare text hash. See {@link sectionContentHash}.
285
+ const extras = status.provider === "gemini" ? geminiCacheExtras(config) : [];
286
+ const hashes = sections.map((s) => sectionContentHash(s.text, extras));
287
+
288
+ const result: Array<number[] | undefined> = new Array(sections.length);
289
+ const missIndices: number[] = [];
290
+ if (status.provider && status.model) {
291
+ for (let i = 0; i < sections.length; i++) {
292
+ const section = sections[i]!;
293
+ const cached = readEmbeddingCache(db, {
294
+ targetType: V3_SECTION_TARGET_TYPE,
295
+ targetId: sectionCacheId(section.article, section.ordinal),
296
+ provider: status.provider,
297
+ model: status.model,
298
+ expectedDim,
299
+ });
300
+ if (cached && cached.contentHash === hashes[i]) {
301
+ result[i] = cached.dense;
302
+ } else {
303
+ missIndices.push(i);
304
+ }
305
+ }
306
+ } else {
307
+ for (let i = 0; i < sections.length; i++) missIndices.push(i);
308
+ }
309
+
310
+ if (missIndices.length === 0) return result;
311
+
312
+ // Embed the misses in one batched call (the dominant cost).
313
+ let embedded = await embedWithBackend(
314
+ config,
315
+ missIndices.map((i) => sections[i]!.text),
316
+ );
317
+ let writeProvider = embedded.provider;
318
+ let writeModel = embedded.model;
319
+ let effectiveIndices = missIndices;
320
+
321
+ // A provider/model rotation between the cache read and the embed would mix two
322
+ // embedding spaces in one collection: cached hits carry the old identity, the
323
+ // fresh misses the new. Re-embed every section under the new identity so the
324
+ // whole batch (and the cache rows it writes) shares one space.
325
+ const hadHits = missIndices.length < sections.length;
326
+ const rotated =
327
+ hadHits &&
328
+ (embedded.provider !== status.provider || embedded.model !== status.model);
329
+ if (rotated) {
330
+ effectiveIndices = sections.map((_, i) => i);
331
+ embedded = await embedWithBackend(
332
+ config,
333
+ sections.map((s) => s.text),
334
+ );
335
+ writeProvider = embedded.provider;
336
+ writeModel = embedded.model;
337
+ }
338
+
339
+ const now = Date.now();
340
+ for (let j = 0; j < effectiveIndices.length; j++) {
341
+ const i = effectiveIndices[j]!;
342
+ const vector = embedded.vectors[j];
343
+ if (!vector) continue;
344
+ result[i] = vector;
345
+ const section = sections[i]!;
346
+ writeEmbeddingCache(db, {
347
+ targetType: V3_SECTION_TARGET_TYPE,
348
+ targetId: sectionCacheId(section.article, section.ordinal),
349
+ dense: vector,
350
+ contentHash: hashes[i]!,
351
+ provider: writeProvider,
352
+ model: writeModel,
353
+ now,
354
+ });
355
+ }
356
+
357
+ return result;
358
+ }
359
+
207
360
  /**
208
361
  * Delete every section point belonging to an article. Used by incremental
209
362
  * rebuilds (in a later PR) to clear an article's stale sections before
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Plugin disabled-state check.
3
+ *
4
+ * A plugin is disabled when a `.disabled` sentinel file exists inside its
5
+ * workspace plugin directory (`<workspace>/plugins/<name>/.disabled`). This
6
+ * is the single source of truth for the enabled/disabled state of both
7
+ * user-installed and default plugins.
8
+ *
9
+ * Each surface that exposes plugin contributions (hooks, tools, routes) calls
10
+ * {@link isPluginDisabled} at read time so that toggling a plugin via the CLI
11
+ * (`assistant plugins disable/enable <name>`) takes effect on the next turn
12
+ * without a daemon restart.
13
+ */
14
+
15
+ import { existsSync } from "node:fs";
16
+ import { join } from "node:path";
17
+
18
+ import { getWorkspacePluginsDir } from "../util/platform.js";
19
+
20
+ /**
21
+ * Return `true` when the `.disabled` sentinel exists for `pluginName`.
22
+ *
23
+ * The check is a synchronous `existsSync` — the same primitive already used
24
+ * by `scanPlugins` and `bootstrapPlugins`. It is cheap (one `stat` syscall)
25
+ * and does not need caching for the current call pattern: `getHooksFor` is
26
+ * invoked a handful of times per turn, one per hook event, and the number of
27
+ * default plugins is small.
28
+ */
29
+ export function isPluginDisabled(pluginName: string): boolean {
30
+ return existsSync(join(getWorkspacePluginsDir(), pluginName, ".disabled"));
31
+ }
@@ -19,6 +19,7 @@
19
19
  * Design doc: `.private/plans/agent-plugin-system.md`.
20
20
  */
21
21
 
22
+ import { isPluginDisabled } from "./disabled-state.js";
22
23
  import { getUserHooksFor } from "./mtime-cache.js";
23
24
  import {
24
25
  type HookFunction,
@@ -35,6 +36,25 @@ import {
35
36
  */
36
37
  const registeredPlugins = new Map<string, Plugin>();
37
38
 
39
+ /**
40
+ * Hook registry — the per-surface registry for default plugin hooks. Each
41
+ * hook name maps to an ordered list of `{fn, pluginName}` entries, one per
42
+ * plugin that contributes that hook. The list order matches registration
43
+ * order (i.e. {@link getAllDefaultPlugins} array order), which fixes
44
+ * hook-chain ordering the same way the old `registeredPlugins` map did.
45
+ *
46
+ * Populated by {@link registerPlugin} alongside `registeredPlugins` and
47
+ * depleted by {@link unregisterPlugin}. {@link getHooksFor} reads from this
48
+ * map (not `registeredPlugins`) so it can filter disabled plugins at read
49
+ * time via {@link isPluginDisabled} — the key mechanism that makes
50
+ * `assistant plugins disable default-*` take effect immediately in a
51
+ * running assistant.
52
+ */
53
+ const hookRegistry = new Map<
54
+ string,
55
+ Array<{ fn: HookFunction; pluginName: string }>
56
+ >();
57
+
38
58
  /**
39
59
  * Latch that closes the per-boot registration window. Flipped to `true` by
40
60
  * {@link closeRegistration} once `loadUserPlugins()` has returned. After that,
@@ -126,6 +146,22 @@ export function registerPlugin(plugin: Plugin): void {
126
146
  }
127
147
 
128
148
  registeredPlugins.set(name, plugin);
149
+
150
+ // Register each hook into the per-surface hook registry. `getHooksFor`
151
+ // reads from this map (filtered by `isPluginDisabled`) rather than from
152
+ // `registeredPlugins`, so toggling the `.disabled` sentinel at runtime
153
+ // takes effect on the next turn without mutating the plugin registry.
154
+ if (plugin.hooks) {
155
+ for (const [hookName, fn] of Object.entries(plugin.hooks)) {
156
+ if (typeof fn !== "function") continue;
157
+ let list = hookRegistry.get(hookName);
158
+ if (!list) {
159
+ list = [];
160
+ hookRegistry.set(hookName, list);
161
+ }
162
+ list.push({ fn: fn as HookFunction, pluginName: name });
163
+ }
164
+ }
129
165
  }
130
166
 
131
167
  // ─── Queries ─────────────────────────────────────────────────────────────────
@@ -159,12 +195,14 @@ export function getRegisteredPlugins(): Plugin[] {
159
195
  export async function getHooksFor<TCtx = unknown>(
160
196
  name: string,
161
197
  ): Promise<HookFunction<TCtx>[]> {
162
- // First-party defaults from the registry (synchronous).
198
+ // First-party defaults from the hook registry, filtered by the `.disabled`
199
+ // sentinel at read time. This is what makes `assistant plugins disable
200
+ // default-*` take effect immediately in a running assistant: the hooks stay
201
+ // registered but are filtered out on the next turn.
163
202
  const defaultHooks: HookFunction<TCtx>[] = [];
164
- for (const plugin of registeredPlugins.values()) {
165
- const hook = plugin.hooks?.[name];
166
- if (hook) {
167
- defaultHooks.push(hook as HookFunction<TCtx>);
203
+ for (const entry of hookRegistry.get(name) ?? []) {
204
+ if (!isPluginDisabled(entry.pluginName)) {
205
+ defaultHooks.push(entry.fn as HookFunction<TCtx>);
168
206
  }
169
207
  }
170
208
 
@@ -199,14 +237,18 @@ export function closeRegistration(): void {
199
237
  */
200
238
  export function unregisterPlugin(name: string): void {
201
239
  registeredPlugins.delete(name);
202
- }
203
-
204
- export function getRegisteredPlugin(name: string): Plugin | undefined {
205
- return registeredPlugins.get(name);
206
- }
207
240
 
208
- export function setRegisteredPlugin(plugin: Plugin): void {
209
- registeredPlugins.set(plugin.manifest.name, plugin);
241
+ // Remove all hooks contributed by this plugin from the hook registry.
242
+ // Used by the bootstrap failure path (init threw) and the feature-flag
243
+ // skip path — both are boot-time decisions where the plugin's hooks should
244
+ // never participate in the turn lifecycle.
245
+ for (const [, list] of hookRegistry) {
246
+ for (let i = list.length - 1; i >= 0; i--) {
247
+ if (list[i]!.pluginName === name) {
248
+ list.splice(i, 1);
249
+ }
250
+ }
251
+ }
210
252
  }
211
253
 
212
254
  // ─── Test hooks ──────────────────────────────────────────────────────────────
@@ -228,6 +270,7 @@ export function resetPluginRegistryForTests(): void {
228
270
  );
229
271
  }
230
272
  registeredPlugins.clear();
273
+ hookRegistry.clear();
231
274
  // Re-open the registration window so subsequent tests can register plugins
232
275
  // again. Without this, the latch set by a prior `closeRegistration()` call
233
276
  // would leak across test cases and reject legitimate registrations.