@vellumai/assistant 0.7.3 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/ARCHITECTURE.md +29 -28
  2. package/Dockerfile +1 -0
  3. package/__tests__/permissions/gateway-threshold-reader.test.ts +236 -9
  4. package/bun.lock +3 -0
  5. package/knip.json +1 -0
  6. package/node_modules/@vellumai/ipc-server-utils/bun.lock +24 -0
  7. package/node_modules/@vellumai/ipc-server-utils/package.json +18 -0
  8. package/node_modules/@vellumai/ipc-server-utils/src/index.ts +6 -0
  9. package/node_modules/@vellumai/ipc-server-utils/src/socket-watchdog.test.ts +430 -0
  10. package/node_modules/@vellumai/ipc-server-utils/src/socket-watchdog.ts +221 -0
  11. package/node_modules/@vellumai/ipc-server-utils/tsconfig.json +20 -0
  12. package/openapi.yaml +22 -4
  13. package/package.json +3 -1
  14. package/src/__tests__/annotate-risk-options.test.ts +291 -0
  15. package/src/__tests__/approval-cascade.test.ts +8 -16
  16. package/src/__tests__/approval-routes-http.test.ts +6 -0
  17. package/src/__tests__/auto-analysis-end-to-end.test.ts +12 -25
  18. package/src/__tests__/call-constants.test.ts +10 -1
  19. package/src/__tests__/call-controller.test.ts +127 -0
  20. package/src/__tests__/cli-memory-v2-reembed-skills.test.ts +58 -28
  21. package/src/__tests__/config-loader-platform-defaults.test.ts +284 -1
  22. package/src/__tests__/context-search-memory-source.test.ts +3 -26
  23. package/src/__tests__/context-search-pkb-source.test.ts +12 -6
  24. package/src/__tests__/conversation-abort-tool-results.test.ts +1 -6
  25. package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +1 -1
  26. package/src/__tests__/conversation-agent-loop-overflow.test.ts +1 -1
  27. package/src/__tests__/conversation-agent-loop.test.ts +3 -3
  28. package/src/__tests__/conversation-confirmation-signals.test.ts +5 -13
  29. package/src/__tests__/conversation-init.benchmark.test.ts +1 -1
  30. package/src/__tests__/conversation-process-callsite.test.ts +1 -6
  31. package/src/__tests__/conversation-provider-retry-repair.test.ts +1 -6
  32. package/src/__tests__/conversation-runtime-assembly.test.ts +15 -6
  33. package/src/__tests__/conversation-slash-unknown.test.ts +1 -6
  34. package/src/__tests__/conversation-surfaces-action-delivery.test.ts +170 -9
  35. package/src/__tests__/conversation-surfaces-data-persist.test.ts +73 -1
  36. package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +59 -0
  37. package/src/__tests__/conversation-workspace-injection.test.ts +1 -7
  38. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +1 -7
  39. package/src/__tests__/filing-service.test.ts +2 -19
  40. package/src/__tests__/handlers-skills-memory-v2-reseed.test.ts +10 -26
  41. package/src/__tests__/injector-chain.test.ts +24 -16
  42. package/src/__tests__/injector-pkb-v2-silenced.test.ts +10 -7
  43. package/src/__tests__/lifecycle-memory-v2-seed.test.ts +154 -67
  44. package/src/__tests__/notification-decision-fallback.test.ts +91 -0
  45. package/src/__tests__/notification-decision-strategy.test.ts +22 -0
  46. package/src/__tests__/oauth-cli.test.ts +121 -0
  47. package/src/__tests__/relay-server.test.ts +46 -2
  48. package/src/__tests__/secret-prompt-log-hygiene.test.ts +7 -5
  49. package/src/__tests__/secret-prompter-channel-fallback.test.ts +7 -5
  50. package/src/__tests__/secret-response-routing.test.ts +7 -5
  51. package/src/__tests__/server-history-render.test.ts +82 -0
  52. package/src/__tests__/skill-include-graph.test.ts +31 -0
  53. package/src/__tests__/skill-load-tool.test.ts +44 -16
  54. package/src/__tests__/skills.test.ts +39 -0
  55. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -42
  56. package/src/__tests__/tool-executor.test.ts +155 -0
  57. package/src/__tests__/voice-session-bridge.test.ts +3 -0
  58. package/src/__tests__/workspace-migration-069-seed-onboarding-threads.test.ts +120 -0
  59. package/src/__tests__/workspace-migration-071-remove-safe-storage-release-note.test.ts +206 -0
  60. package/src/__tests__/workspace-migration-safe-storage-limits-release.test.ts +15 -27
  61. package/src/agent/loop.ts +11 -0
  62. package/src/approvals/guardian-decision-primitive.ts +0 -13
  63. package/src/approvals/guardian-request-resolvers.ts +4 -32
  64. package/src/calls/call-constants.ts +5 -8
  65. package/src/calls/call-controller.ts +130 -67
  66. package/src/calls/relay-server.ts +7 -1
  67. package/src/calls/voice-session-bridge.ts +1 -1
  68. package/src/cli/commands/memory-v2.ts +7 -7
  69. package/src/cli/commands/oauth/__tests__/connect.test.ts +0 -254
  70. package/src/cli/commands/oauth/connect.ts +10 -52
  71. package/src/config/bundled-skills/app-builder/SKILL.md +1 -3
  72. package/src/config/feature-flag-registry.json +1 -17
  73. package/src/config/loader.ts +72 -19
  74. package/src/config/schemas/memory-v2.ts +1 -1
  75. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +32 -0
  76. package/src/daemon/conversation-agent-loop-handlers.ts +32 -0
  77. package/src/daemon/conversation-agent-loop.ts +13 -10
  78. package/src/daemon/conversation-lifecycle.ts +22 -8
  79. package/src/daemon/conversation-surfaces.ts +16 -14
  80. package/src/daemon/conversation-tool-setup.ts +9 -5
  81. package/src/daemon/conversation.ts +1 -1
  82. package/src/daemon/handlers/shared.ts +26 -0
  83. package/src/daemon/host-bash-proxy.ts +1 -1
  84. package/src/daemon/host-browser-proxy.ts +1 -1
  85. package/src/daemon/host-cu-proxy.ts +1 -1
  86. package/src/daemon/host-file-proxy.ts +1 -1
  87. package/src/daemon/host-transfer-proxy.ts +2 -2
  88. package/src/daemon/lifecycle.ts +88 -73
  89. package/src/daemon/memory-v2-startup.ts +55 -14
  90. package/src/daemon/message-types/messages.ts +19 -1
  91. package/src/documents/document-store.ts +35 -1
  92. package/src/filing/filing-service.ts +2 -3
  93. package/src/heartbeat/heartbeat-service.ts +1 -1
  94. package/src/ipc/assistant-server.ts +93 -36
  95. package/src/ipc/skill-server.ts +99 -42
  96. package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +10 -57
  97. package/src/memory/context-search/sources/memory-v2.ts +1 -17
  98. package/src/memory/context-search/sources/memory.ts +2 -2
  99. package/src/memory/context-search/sources/pkb.ts +2 -3
  100. package/src/memory/graph/__tests__/conversation-graph-memory-v2-routing.test.ts +104 -61
  101. package/src/memory/graph/__tests__/handle-remember-v2.test.ts +11 -26
  102. package/src/memory/graph/conversation-graph-memory.ts +32 -9
  103. package/src/memory/graph/graph-search.test.ts +6 -5
  104. package/src/memory/graph/graph-search.ts +3 -4
  105. package/src/memory/graph/retriever.test.ts +12 -7
  106. package/src/memory/graph/retriever.ts +4 -5
  107. package/src/memory/graph/tool-handlers.ts +3 -4
  108. package/src/memory/graph/tools.ts +4 -4
  109. package/src/memory/indexer.ts +1 -2
  110. package/src/memory/jobs/__tests__/embed-concept-page.test.ts +116 -0
  111. package/src/memory/jobs/embed-concept-page.ts +223 -87
  112. package/src/memory/jobs-worker.ts +8 -4
  113. package/src/memory/pkb/pkb-search.test.ts +6 -5
  114. package/src/memory/pkb/pkb-search.ts +4 -5
  115. package/src/memory/qdrant-client.ts +3 -0
  116. package/src/memory/search/semantic.ts +4 -5
  117. package/src/memory/v2/__tests__/activation.test.ts +35 -5
  118. package/src/memory/v2/__tests__/consolidation-job.test.ts +21 -32
  119. package/src/memory/v2/__tests__/injection.test.ts +140 -23
  120. package/src/memory/v2/__tests__/qdrant.test.ts +310 -9
  121. package/src/memory/v2/__tests__/sim.test.ts +118 -7
  122. package/src/memory/v2/__tests__/static-context.test.ts +1 -13
  123. package/src/memory/v2/__tests__/sweep-job.test.ts +19 -33
  124. package/src/memory/v2/consolidation-job.ts +7 -8
  125. package/src/memory/v2/injection.ts +32 -12
  126. package/src/memory/v2/page-store.ts +39 -0
  127. package/src/memory/v2/prompts/consolidation.ts +5 -0
  128. package/src/memory/v2/qdrant.ts +209 -48
  129. package/src/memory/v2/sim.ts +67 -26
  130. package/src/memory/v2/static-context.ts +4 -8
  131. package/src/memory/v2/sweep-job.ts +5 -6
  132. package/src/memory/v2/types.ts +7 -0
  133. package/src/notifications/copy-composer.ts +46 -12
  134. package/src/notifications/decision-engine.ts +46 -0
  135. package/src/permissions/gateway-threshold-reader.ts +116 -8
  136. package/src/permissions/prompter.ts +86 -96
  137. package/src/permissions/secret-prompter.ts +31 -31
  138. package/src/plugins/defaults/injectors.ts +1 -2
  139. package/src/proactive-artifact/job.test.ts +51 -4
  140. package/src/proactive-artifact/job.ts +16 -2
  141. package/src/proactive-artifact/message-copy.ts +18 -1
  142. package/src/prompts/templates/SOUL.md +13 -28
  143. package/src/runtime/auth/route-policy.ts +1 -0
  144. package/src/runtime/channel-approvals.ts +3 -2
  145. package/src/runtime/guardian-reply-router.ts +0 -10
  146. package/src/runtime/pending-interactions.ts +19 -15
  147. package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +147 -0
  148. package/src/runtime/routes/approval-routes.ts +7 -3
  149. package/src/runtime/routes/consolidation-routes.ts +8 -9
  150. package/src/runtime/routes/conversation-query-routes.ts +44 -1
  151. package/src/runtime/routes/debug-bash-routes.ts +2 -0
  152. package/src/runtime/routes/filing-routes.ts +2 -3
  153. package/src/runtime/routes/inbound-stages/guardian-reply-intercept.ts +0 -3
  154. package/src/runtime/routes/memory-item-routes.test.ts +3 -9
  155. package/src/runtime/routes/memory-item-routes.ts +5 -6
  156. package/src/runtime/routes/memory-v2-routes.ts +103 -17
  157. package/src/skills/include-graph.ts +35 -13
  158. package/src/tools/document/document-tool.ts +20 -0
  159. package/src/tools/executor.ts +18 -2
  160. package/src/tools/memory/register.test.ts +7 -5
  161. package/src/tools/permission-checker.ts +15 -0
  162. package/src/tools/skills/load.ts +24 -20
  163. package/src/tools/tool-name-aliases.ts +19 -0
  164. package/src/tools/types.ts +19 -1
  165. package/src/workspace/migrations/067-release-notes-safe-storage-limits.ts +4 -62
  166. package/src/workspace/migrations/069-seed-onboarding-threads.ts +28 -0
  167. package/src/workspace/migrations/070-memory-v2-summary-schema-rebuild.ts +31 -0
  168. package/src/workspace/migrations/071-remove-safe-storage-release-note.ts +111 -0
  169. package/src/workspace/migrations/registry.ts +6 -0
@@ -86,6 +86,10 @@ const upsertCalls: Array<{
86
86
  slug: string;
87
87
  dense: number[];
88
88
  sparse: { indices: number[]; values: number[] };
89
+ summary?: {
90
+ dense: number[];
91
+ sparse: { indices: number[]; values: number[] };
92
+ };
89
93
  updatedAt: number;
90
94
  }> = [];
91
95
 
@@ -96,6 +100,10 @@ mock.module("../../v2/qdrant.js", () => ({
96
100
  slug: string;
97
101
  dense: number[];
98
102
  sparse: { indices: number[]; values: number[] };
103
+ summary?: {
104
+ dense: number[];
105
+ sparse: { indices: number[]; values: number[] };
106
+ };
99
107
  updatedAt: number;
100
108
  }) => {
101
109
  upsertCalls.push(params);
@@ -242,6 +250,114 @@ describe("embedConceptPageJob — happy path", () => {
242
250
  });
243
251
  });
244
252
 
253
+ describe("embedConceptPageJob — summary embedding", () => {
254
+ test("embeds the summary when present and forwards summary vectors to upsert", async () => {
255
+ await writePage(tmpWorkspace, {
256
+ slug: "summarized-page",
257
+ frontmatter: {
258
+ edges: [],
259
+ ref_files: [],
260
+ summary: "A short prose summary that retrieval indexes separately.",
261
+ },
262
+ body: "Long-form body content.\n",
263
+ });
264
+
265
+ await embedConceptPageJob(
266
+ makeJob({ slug: "summarized-page" }),
267
+ TEST_CONFIG,
268
+ );
269
+
270
+ // Body and summary are batched into one backend call (saves a round-trip).
271
+ expect(embedWithBackendCalls).toHaveLength(1);
272
+ expect(embedWithBackendCalls[0].inputs).toHaveLength(2);
273
+ expect(upsertCalls).toHaveLength(1);
274
+ const call = upsertCalls[0];
275
+ expect(call.slug).toBe("summarized-page");
276
+ expect(call.dense).toEqual([0.1, 0.2, 0.3, 0.4]);
277
+ expect(call.sparse).toBeDefined();
278
+ expect(call.summary?.dense).toEqual([0.1, 0.2, 0.3, 0.4]);
279
+ expect(call.summary?.sparse).toBeDefined();
280
+ });
281
+
282
+ test("skips summary embedding when the page has no summary in frontmatter", async () => {
283
+ await writePage(tmpWorkspace, {
284
+ slug: "legacy-page",
285
+ frontmatter: { edges: [], ref_files: [] },
286
+ body: "Body only — no summary in frontmatter.\n",
287
+ });
288
+
289
+ await embedConceptPageJob(makeJob({ slug: "legacy-page" }), TEST_CONFIG);
290
+
291
+ // Only the body was embedded.
292
+ expect(embedWithBackendCalls).toHaveLength(1);
293
+ expect(upsertCalls).toHaveLength(1);
294
+ const call = upsertCalls[0];
295
+ expect(call.summary).toBeUndefined();
296
+ });
297
+
298
+ test("skips summary embedding when the summary is whitespace-only", async () => {
299
+ // Whitespace-only summaries (` `, `\n`) are equivalent to absent — the
300
+ // embedding backend would reject the empty input downstream anyway.
301
+ await writePage(tmpWorkspace, {
302
+ slug: "whitespace-summary",
303
+ frontmatter: {
304
+ edges: [],
305
+ ref_files: [],
306
+ summary: " ",
307
+ },
308
+ body: "Body content.\n",
309
+ });
310
+
311
+ await embedConceptPageJob(
312
+ makeJob({ slug: "whitespace-summary" }),
313
+ TEST_CONFIG,
314
+ );
315
+
316
+ expect(embedWithBackendCalls).toHaveLength(1);
317
+ expect(upsertCalls[0].summary).toBeUndefined();
318
+ });
319
+
320
+ test("body and summary cache rows are independent (summary edit doesn't invalidate body)", async () => {
321
+ // Write a page with a summary, run the job to prime caches.
322
+ await writePage(tmpWorkspace, {
323
+ slug: "cached-summary",
324
+ frontmatter: {
325
+ edges: [],
326
+ ref_files: [],
327
+ summary: "First version of the summary.",
328
+ },
329
+ body: "Stable body that never changes.\n",
330
+ });
331
+ await embedConceptPageJob(
332
+ makeJob({ slug: "cached-summary" }),
333
+ TEST_CONFIG,
334
+ );
335
+ // Body + summary batched into a single backend call on first run.
336
+ expect(embedWithBackendCalls).toHaveLength(1);
337
+ expect(embedWithBackendCalls[0].inputs).toHaveLength(2);
338
+
339
+ // Edit only the summary — body stays identical, only the summary text
340
+ // changes. Re-running the job should hit the body cache (no re-embed)
341
+ // but recompute the summary embedding.
342
+ await writePage(tmpWorkspace, {
343
+ slug: "cached-summary",
344
+ frontmatter: {
345
+ edges: [],
346
+ ref_files: [],
347
+ summary: "Second version of the summary, different wording.",
348
+ },
349
+ body: "Stable body that never changes.\n",
350
+ });
351
+ await embedConceptPageJob(
352
+ makeJob({ slug: "cached-summary" }),
353
+ TEST_CONFIG,
354
+ );
355
+ // One additional backend call with only the summary text — body hit the cache.
356
+ expect(embedWithBackendCalls).toHaveLength(2);
357
+ expect(embedWithBackendCalls[1].inputs).toHaveLength(1);
358
+ });
359
+ });
360
+
245
361
  describe("embedConceptPageJob — cache hit", () => {
246
362
  test("reuses the cached dense vector when content hash matches", async () => {
247
363
  await writePage(tmpWorkspace, {
@@ -98,52 +98,93 @@ export async function embedConceptPageJob(
98
98
  );
99
99
  }
100
100
 
101
- const contentHash = embeddingInputContentHash({ type: "text", text });
102
101
  const expectedDim = config.memory.qdrant.vectorSize;
103
- let provider = status.provider;
104
- let model = status.model!;
102
+ // The status provider is the cache lookup key for any prior row; the
103
+ // *actual* provider/model come back on the embedded result. They usually
104
+ // match, but a backend swap mid-run would surface here — body and summary
105
+ // are then re-embedded together so both rows write under the same identity.
106
+ const cacheProvider = status.provider;
107
+ const cacheModel = status.model!;
108
+
109
+ const db = getDb();
105
110
 
106
111
  // Cache lookup: same (targetType, targetId, provider, model) row gets
107
112
  // reused across runs as long as `contentHash` matches. The dim mismatch
108
113
  // check guards against a config change (vectorSize bumped) since the last
109
- // write — in that case we treat the row as stale and re-embed.
110
- const db = getDb();
111
- let cachedRow = db
112
- .select({
113
- vectorBlob: memoryEmbeddings.vectorBlob,
114
- vectorJson: memoryEmbeddings.vectorJson,
115
- dimensions: memoryEmbeddings.dimensions,
116
- contentHash: memoryEmbeddings.contentHash,
117
- })
118
- .from(memoryEmbeddings)
119
- .where(
120
- and(
121
- eq(memoryEmbeddings.targetType, CONCEPT_PAGE_TARGET_TYPE),
122
- eq(memoryEmbeddings.targetId, slug),
123
- eq(memoryEmbeddings.provider, provider),
124
- eq(memoryEmbeddings.model, model),
125
- ),
126
- )
127
- .get();
128
- if (cachedRow && cachedRow.dimensions !== expectedDim) cachedRow = undefined;
129
- if (cachedRow && cachedRow.contentHash !== contentHash) cachedRow = undefined;
130
-
131
- let dense: number[];
132
- let cacheHit = false;
133
- if (cachedRow) {
134
- dense = cachedRow.vectorBlob
135
- ? blobToVector(cachedRow.vectorBlob as Buffer)
136
- : (JSON.parse(cachedRow.vectorJson!) as number[]);
137
- cacheHit = true;
138
- } else {
139
- const embedded = await embedWithBackend(config, [{ type: "text", text }]);
140
- const vector = embedded.vectors[0];
141
- if (!vector) return;
142
- dense = vector;
143
- provider = embedded.provider;
144
- model = embedded.model;
114
+ // write — in that case we treat the row as stale and re-embed. The body
115
+ // and (optional) summary share the same provider/model — but each gets
116
+ // its own cache row keyed by a distinct targetId so summary edits don't
117
+ // invalidate the body cache and vice versa.
118
+ const bodyContentHash = embeddingInputContentHash({ type: "text", text });
119
+ const bodyCache = readEmbeddingCache(
120
+ db,
121
+ slug,
122
+ cacheProvider,
123
+ cacheModel,
124
+ expectedDim,
125
+ );
126
+ const bodyCacheHit = bodyCache?.contentHash === bodyContentHash;
127
+
128
+ // Optional summary embedding — only when the page has a `summary` in its
129
+ // frontmatter. Pages without one fall back to body-only retrieval at
130
+ // query time (the activation pipeline reads the summary score as
131
+ // undefined and uses the body score directly).
132
+ const summaryText = page.frontmatter.summary?.trim() ?? "";
133
+ const hasSummary = summaryText.length > 0;
134
+ const summaryCacheId = `${slug}#summary`;
135
+ const summaryContentHash = hasSummary
136
+ ? embeddingInputContentHash({ type: "text", text: summaryText })
137
+ : undefined;
138
+ const summaryCache = hasSummary
139
+ ? readEmbeddingCache(
140
+ db,
141
+ summaryCacheId,
142
+ cacheProvider,
143
+ cacheModel,
144
+ expectedDim,
145
+ )
146
+ : null;
147
+ const summaryCacheHit =
148
+ hasSummary && summaryCache?.contentHash === summaryContentHash;
149
+
150
+ // Batch all cache misses into one `embedWithBackend` call. Each backend
151
+ // round-trip is the dominant cost — fresh body + fresh summary in a
152
+ // single batch saves a round-trip vs serial calls and gives both vectors
153
+ // the same provider/model regardless of any backend rotation mid-run.
154
+ type Slot = "body" | "summary";
155
+ const toEmbed: Array<{ type: "text"; text: string }> = [];
156
+ const slots: Slot[] = [];
157
+ if (!bodyCacheHit) {
158
+ toEmbed.push({ type: "text", text });
159
+ slots.push("body");
160
+ }
161
+ if (hasSummary && !summaryCacheHit) {
162
+ toEmbed.push({ type: "text", text: summaryText });
163
+ slots.push("summary");
145
164
  }
146
165
 
166
+ let bodyDense: number[] | undefined = bodyCacheHit ? bodyCache!.dense : undefined;
167
+ let summaryDense: number[] | undefined = summaryCacheHit
168
+ ? summaryCache!.dense
169
+ : undefined;
170
+ let writeProvider = cacheProvider;
171
+ let writeModel = cacheModel;
172
+ if (toEmbed.length > 0) {
173
+ const embedded = await embedWithBackend(config, toEmbed);
174
+ writeProvider = embedded.provider;
175
+ writeModel = embedded.model;
176
+ for (let i = 0; i < slots.length; i++) {
177
+ const vector = embedded.vectors[i];
178
+ if (!vector) continue;
179
+ if (slots[i] === "body") bodyDense = vector;
180
+ else summaryDense = vector;
181
+ }
182
+ }
183
+ // Body embedding is the ground truth — without it the page can't surface.
184
+ // (Cache hit paths populate `bodyDense` above; a fresh embed that returned
185
+ // no vectors short-circuits here too.)
186
+ if (!bodyDense) return;
187
+
147
188
  // Sparse is cheap (in-process tokenization) and changes any time the body
148
189
  // changes, so we always recompute it rather than caching alongside dense.
149
190
  // BM25 weights live on the doc side; queries embed binary occurrence in
@@ -151,57 +192,42 @@ export async function embedConceptPageJob(
151
192
  // corpus for the first time), fall back to the legacy TF-only encoding —
152
193
  // the next reembed pass overwrites the page once stats are available.
153
194
  const corpusStats = getConceptPageCorpusStats();
154
- const sparse = corpusStats
155
- ? generateBm25DocEmbedding(text, corpusStats, {
156
- k1: config.memory.v2.bm25_k1,
157
- b: config.memory.v2.bm25_b,
158
- })
159
- : generateSparseEmbedding(text);
195
+ const encodeSparse = (input: string) =>
196
+ corpusStats
197
+ ? generateBm25DocEmbedding(input, corpusStats, {
198
+ k1: config.memory.v2.bm25_k1,
199
+ b: config.memory.v2.bm25_b,
200
+ })
201
+ : generateSparseEmbedding(input);
202
+ const sparse = encodeSparse(text);
203
+ const summarySparse = hasSummary ? encodeSparse(summaryText) : undefined;
160
204
 
161
205
  const now = Date.now();
162
206
  // Persist freshly embedded vectors for cross-restart reuse. On cache hit
163
207
  // the existing row already has identical content + hash, so the write
164
208
  // would be a no-op — skip it. Best-effort: write failure is not fatal,
165
209
  // we still want the Qdrant upsert below to fire.
166
- if (!cacheHit) {
167
- try {
168
- const blobValue = vectorToBlob(dense);
169
- db.insert(memoryEmbeddings)
170
- .values({
171
- id: randomUUID(),
172
- targetType: CONCEPT_PAGE_TARGET_TYPE,
173
- targetId: slug,
174
- provider,
175
- model,
176
- dimensions: dense.length,
177
- vectorBlob: blobValue,
178
- vectorJson: null,
179
- contentHash,
180
- createdAt: now,
181
- updatedAt: now,
182
- })
183
- .onConflictDoUpdate({
184
- target: [
185
- memoryEmbeddings.targetType,
186
- memoryEmbeddings.targetId,
187
- memoryEmbeddings.provider,
188
- memoryEmbeddings.model,
189
- ],
190
- set: {
191
- vectorBlob: blobValue,
192
- vectorJson: null,
193
- dimensions: dense.length,
194
- contentHash,
195
- updatedAt: now,
196
- },
197
- })
198
- .run();
199
- } catch (err) {
200
- log.warn(
201
- { err, slug },
202
- "Failed to write concept-page embedding cache row",
203
- );
204
- }
210
+ if (!bodyCacheHit) {
211
+ writeEmbeddingCache(db, {
212
+ slug,
213
+ cacheId: slug,
214
+ dense: bodyDense,
215
+ contentHash: bodyContentHash,
216
+ provider: writeProvider,
217
+ model: writeModel,
218
+ now,
219
+ });
220
+ }
221
+ if (hasSummary && !summaryCacheHit && summaryDense && summaryContentHash) {
222
+ writeEmbeddingCache(db, {
223
+ slug,
224
+ cacheId: summaryCacheId,
225
+ dense: summaryDense,
226
+ contentHash: summaryContentHash,
227
+ provider: writeProvider,
228
+ model: writeModel,
229
+ now,
230
+ });
205
231
  }
206
232
 
207
233
  // Apply anisotropy correction at the boundary between the (raw) cached
@@ -210,19 +236,129 @@ export async function embedConceptPageJob(
210
236
  // the cache survives and the (cheap) correction math reruns over each
211
237
  // cached vector. Pass-through when no calibration is fit yet.
212
238
  const correctedDense = await applyCorrectionIfCalibrated(
213
- dense,
214
- provider,
215
- model,
239
+ bodyDense,
240
+ writeProvider,
241
+ writeModel,
216
242
  );
243
+ const correctedSummaryDense = summaryDense
244
+ ? await applyCorrectionIfCalibrated(summaryDense, writeProvider, writeModel)
245
+ : undefined;
217
246
 
218
247
  await upsertConceptPageEmbedding({
219
248
  slug,
220
249
  dense: correctedDense,
221
250
  sparse,
251
+ summary:
252
+ correctedSummaryDense && summarySparse
253
+ ? { dense: correctedSummaryDense, sparse: summarySparse }
254
+ : undefined,
222
255
  updatedAt: now,
223
256
  });
224
257
  }
225
258
 
259
+ /** SQLite cache row shape returned by `readEmbeddingCache`. */
260
+ interface EmbeddingCacheEntry {
261
+ dense: number[];
262
+ contentHash: string;
263
+ }
264
+
265
+ /**
266
+ * Look up a cached dense vector keyed on `(targetType, targetId, provider,
267
+ * model)`. Returns the row only when the persisted dimensions match the
268
+ * configured expectation — a stale row from a previous `vectorSize` is
269
+ * treated as a cache miss so the caller re-embeds.
270
+ */
271
+ function readEmbeddingCache(
272
+ db: ReturnType<typeof getDb>,
273
+ cacheId: string,
274
+ provider: string,
275
+ model: string,
276
+ expectedDim: number,
277
+ ): EmbeddingCacheEntry | null {
278
+ const row = db
279
+ .select({
280
+ vectorBlob: memoryEmbeddings.vectorBlob,
281
+ vectorJson: memoryEmbeddings.vectorJson,
282
+ dimensions: memoryEmbeddings.dimensions,
283
+ contentHash: memoryEmbeddings.contentHash,
284
+ })
285
+ .from(memoryEmbeddings)
286
+ .where(
287
+ and(
288
+ eq(memoryEmbeddings.targetType, CONCEPT_PAGE_TARGET_TYPE),
289
+ eq(memoryEmbeddings.targetId, cacheId),
290
+ eq(memoryEmbeddings.provider, provider),
291
+ eq(memoryEmbeddings.model, model),
292
+ ),
293
+ )
294
+ .get();
295
+ if (!row || row.dimensions !== expectedDim) return null;
296
+ // A row without a contentHash is a legacy/corrupt entry — treat as a miss
297
+ // and force a re-embed rather than misalign the cache key.
298
+ if (row.contentHash === null) return null;
299
+ const dense = row.vectorBlob
300
+ ? blobToVector(row.vectorBlob as Buffer)
301
+ : (JSON.parse(row.vectorJson!) as number[]);
302
+ return { dense, contentHash: row.contentHash };
303
+ }
304
+
305
+ /**
306
+ * Persist a freshly embedded dense vector in the SQLite cache. Best-effort:
307
+ * a write failure is logged and swallowed so the Qdrant upsert still runs.
308
+ */
309
+ function writeEmbeddingCache(
310
+ db: ReturnType<typeof getDb>,
311
+ params: {
312
+ slug: string;
313
+ cacheId: string;
314
+ dense: number[];
315
+ contentHash: string;
316
+ provider: string;
317
+ model: string;
318
+ now: number;
319
+ },
320
+ ): void {
321
+ const { slug, cacheId, dense, contentHash, provider, model, now } = params;
322
+ try {
323
+ const blobValue = vectorToBlob(dense);
324
+ db.insert(memoryEmbeddings)
325
+ .values({
326
+ id: randomUUID(),
327
+ targetType: CONCEPT_PAGE_TARGET_TYPE,
328
+ targetId: cacheId,
329
+ provider,
330
+ model,
331
+ dimensions: dense.length,
332
+ vectorBlob: blobValue,
333
+ vectorJson: null,
334
+ contentHash,
335
+ createdAt: now,
336
+ updatedAt: now,
337
+ })
338
+ .onConflictDoUpdate({
339
+ target: [
340
+ memoryEmbeddings.targetType,
341
+ memoryEmbeddings.targetId,
342
+ memoryEmbeddings.provider,
343
+ memoryEmbeddings.model,
344
+ ],
345
+ set: {
346
+ vectorBlob: blobValue,
347
+ vectorJson: null,
348
+ dimensions: dense.length,
349
+ contentHash,
350
+ updatedAt: now,
351
+ },
352
+ })
353
+ .run();
354
+ } catch (err) {
355
+ log.warn(
356
+ { err, slug, cacheId },
357
+ "Failed to write concept-page embedding cache row",
358
+ );
359
+ }
360
+ }
361
+
226
362
  /**
227
363
  * Enqueue an `embed_concept_page` job (async, fire-and-forget). Modeled on
228
364
  * `enqueuePkbIndexJob` — callers that want a slug re-embedded after a write
@@ -11,7 +11,6 @@ import {
11
11
  getLastScheduledCleanupEnqueueMs,
12
12
  markScheduledCleanupEnqueued,
13
13
  } from "./cleanup-schedule-state.js";
14
- import { isMemoryV2ReadActive } from "./context-search/sources/memory-v2.js";
15
14
  import { conversationAnalyzeJob } from "./conversation-analyze-job.js";
16
15
  import { maybeRunDbMaintenance } from "./db-maintenance.js";
17
16
  import { bootstrapFromHistory } from "./graph/bootstrap.js";
@@ -510,6 +509,11 @@ async function processJob(
510
509
  await embedGraphTriggerJob(job, config);
511
510
  return;
512
511
  case "graph_extract":
512
+ // Stale rows enqueued before v2 was enabled (or by any unguarded v1
513
+ // path) must not consume embedding/extraction budget when v2 is on.
514
+ if (config.memory.v2.enabled) {
515
+ return;
516
+ }
513
517
  await graphExtractJob(job, config);
514
518
  return;
515
519
  case "conversation_analyze":
@@ -623,8 +627,8 @@ export const GRAPH_MAINTENANCE_CHECKPOINTS = {
623
627
  * Enqueue periodic graph maintenance jobs.
624
628
  *
625
629
  * Mutually exclusive between v1 and v2:
626
- * - v2 active (both `memory-v2-enabled` flag and `memory.v2.enabled`
627
- * config on) → only `memory_v2_consolidate` is scheduled.
630
+ * - v2 active (`memory.v2.enabled` on) only `memory_v2_consolidate` is
631
+ * scheduled.
628
632
  * - v2 inactive → the four v1 entries (decay, consolidate, pattern_scan,
629
633
  * narrative) are scheduled instead.
630
634
  *
@@ -643,7 +647,7 @@ export function maybeEnqueueGraphMaintenanceJobs(
643
647
  config: AssistantConfig,
644
648
  nowMs = Date.now(),
645
649
  ): void {
646
- const v2Active = isMemoryV2ReadActive(config);
650
+ const v2Active = config.memory.v2.enabled;
647
651
 
648
652
  const schedule: Array<{
649
653
  key: string;
@@ -1,12 +1,13 @@
1
1
  import { beforeEach, describe, expect, mock, test } from "bun:test";
2
2
 
3
3
  import { makeMockLogger } from "../../__tests__/helpers/mock-logger.js";
4
- import { _setOverridesForTesting } from "../../config/assistant-feature-flags.js";
5
4
 
6
- // This test exercises the v1 PKB search path. The `memory-v2-enabled` flag
7
- // (registry default `true`) makes pkb-search short-circuit to keep traffic
8
- // off the legacy collection — disable it so the v1 path stays under test.
9
- _setOverridesForTesting({ "memory-v2-enabled": false });
5
+ // This test exercises the v1 PKB search path. `config.memory.v2.enabled`
6
+ // (default `true`) makes pkb-search short-circuit to keep traffic off the
7
+ // legacy collection — force it off so the v1 path stays under test.
8
+ mock.module("../../config/loader.js", () => ({
9
+ getConfig: () => ({ memory: { v2: { enabled: false } } }),
10
+ }));
10
11
 
11
12
  mock.module("../../util/logger.js", () => ({
12
13
  getLogger: () => makeMockLogger(),
@@ -4,7 +4,6 @@
4
4
 
5
5
  import { getConfig } from "../../config/loader.js";
6
6
  import { getLogger } from "../../util/logger.js";
7
- import { isMemoryV2ReadActive } from "../context-search/sources/memory-v2.js";
8
7
  import {
9
8
  isQdrantBreakerOpen,
10
9
  withQdrantBreaker,
@@ -42,10 +41,10 @@ export async function searchPkbFiles(
42
41
  limit: number,
43
42
  scopeIds?: string[],
44
43
  ): Promise<PkbSearchResult[]> {
45
- // v2 owns the read path when both gates are on; v2 absorbs PKB as a read
46
- // source, so PKB hint search short-circuits to keep traffic off the v1
47
- // collection (avoiding OOM-crash risk from a corrupted sparse segment).
48
- if (isMemoryV2ReadActive(getConfig())) return [];
44
+ // v2 owns the read path when enabled; v2 absorbs PKB as a read source,
45
+ // so PKB hint search short-circuits to keep traffic off the v1 collection
46
+ // (avoiding OOM-crash risk from a corrupted sparse segment).
47
+ if (getConfig().memory.v2.enabled) return [];
49
48
 
50
49
  if (isQdrantBreakerOpen()) {
51
50
  log.warn("Qdrant circuit breaker open, skipping PKB search");
@@ -248,6 +248,9 @@ export class VellumQdrantClient {
248
248
  m: 16,
249
249
  ef_construct: 100,
250
250
  },
251
+ optimizers_config: {
252
+ default_segment_number: 2,
253
+ },
251
254
  quantization_config:
252
255
  this.quantization === "scalar"
253
256
  ? {
@@ -1,7 +1,6 @@
1
1
  import { inArray } from "drizzle-orm";
2
2
 
3
3
  import { getConfig } from "../../config/loader.js";
4
- import { isMemoryV2ReadActive } from "../context-search/sources/memory-v2.js";
5
4
  import { getDb } from "../db-connection.js";
6
5
  import { withQdrantBreaker } from "../qdrant-circuit-breaker.js";
7
6
  import type {
@@ -56,10 +55,10 @@ export async function semanticSearch(
56
55
  ): Promise<Candidate[]> {
57
56
  if (limit <= 0) return [];
58
57
 
59
- // v2 owns the read path when both gates are on; the v1 `memory` collection
60
- // is in active retirement, and routing semantic recall there would re-enter
61
- // the same corrupted sparse segments that can OOM-crash Qdrant.
62
- if (isMemoryV2ReadActive(getConfig())) return [];
58
+ // v2 owns the read path when enabled; the v1 `memory` collection is in
59
+ // active retirement, and routing semantic recall there would re-enter the
60
+ // same corrupted sparse segments that can OOM-crash Qdrant.
61
+ if (getConfig().memory.v2.enabled) return [];
63
62
 
64
63
  const qdrant = getQdrantClient();
65
64
 
@@ -114,7 +114,10 @@ class MockQdrantClient {
114
114
  limit: params.limit,
115
115
  filter: params.filter,
116
116
  });
117
- const channel = params.using as "dense" | "sparse";
117
+ // The four-channel hybrid query fires body-dense, body-sparse,
118
+ // summary-dense, summary-sparse in order; both dense channels share
119
+ // the dense queue and both sparse channels share the sparse queue.
120
+ const channel = params.using.endsWith("sparse") ? "sparse" : "dense";
118
121
  return state.queryResponses[channel].shift() ?? { points: [] };
119
122
  }
120
123
  }
@@ -223,9 +226,20 @@ function makeConfig(
223
226
  } as unknown as AssistantConfig;
224
227
  }
225
228
 
226
- /** Stage a single dense + sparse pair on the response queues. */
229
+ /**
230
+ * Stage a single hybrid-query response — body channels first, then summary
231
+ * channels (which default to empty). The four-channel hybrid query fires
232
+ * body-dense, body-sparse, summary-dense, summary-sparse in that order, so
233
+ * each logical call consumes 2 dense + 2 sparse queue entries.
234
+ */
227
235
  function stageHybridResponse(
228
- hits: Array<{ slug: string; denseScore?: number; sparseScore?: number }>,
236
+ hits: Array<{
237
+ slug: string;
238
+ denseScore?: number;
239
+ sparseScore?: number;
240
+ summaryDenseScore?: number;
241
+ summarySparseScore?: number;
242
+ }>,
229
243
  ): void {
230
244
  state.queryResponses.dense.push({
231
245
  points: hits
@@ -237,6 +251,22 @@ function stageHybridResponse(
237
251
  .filter((h) => h.sparseScore !== undefined)
238
252
  .map((h) => ({ score: h.sparseScore, payload: { slug: h.slug } })),
239
253
  });
254
+ state.queryResponses.dense.push({
255
+ points: hits
256
+ .filter((h) => h.summaryDenseScore !== undefined)
257
+ .map((h) => ({
258
+ score: h.summaryDenseScore,
259
+ payload: { slug: h.slug },
260
+ })),
261
+ });
262
+ state.queryResponses.sparse.push({
263
+ points: hits
264
+ .filter((h) => h.summarySparseScore !== undefined)
265
+ .map((h) => ({
266
+ score: h.summarySparseScore,
267
+ payload: { slug: h.slug },
268
+ })),
269
+ });
240
270
  }
241
271
 
242
272
  beforeEach(resetState);
@@ -369,7 +399,7 @@ describe("selectCandidates", () => {
369
399
  nowText: "",
370
400
  config: makeConfig(),
371
401
  });
372
- expect(state.queryCalls).toHaveLength(2);
402
+ expect(state.queryCalls).toHaveLength(4);
373
403
  for (const call of state.queryCalls) {
374
404
  expect(call.limit).toBe(1_000_000);
375
405
  expect(call.filter).toBeUndefined();
@@ -385,7 +415,7 @@ describe("selectCandidates", () => {
385
415
  nowText: "",
386
416
  config: makeConfig({ ann_candidate_limit: 25 }),
387
417
  });
388
- expect(state.queryCalls).toHaveLength(2);
418
+ expect(state.queryCalls).toHaveLength(4);
389
419
  for (const call of state.queryCalls) {
390
420
  expect(call.limit).toBe(25);
391
421
  expect(call.filter).toBeUndefined();