@vellumai/assistant 0.7.3 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/ARCHITECTURE.md +29 -28
  2. package/Dockerfile +1 -0
  3. package/__tests__/permissions/gateway-threshold-reader.test.ts +236 -9
  4. package/bun.lock +3 -0
  5. package/knip.json +1 -0
  6. package/node_modules/@vellumai/ipc-server-utils/bun.lock +24 -0
  7. package/node_modules/@vellumai/ipc-server-utils/package.json +18 -0
  8. package/node_modules/@vellumai/ipc-server-utils/src/index.ts +6 -0
  9. package/node_modules/@vellumai/ipc-server-utils/src/socket-watchdog.test.ts +430 -0
  10. package/node_modules/@vellumai/ipc-server-utils/src/socket-watchdog.ts +221 -0
  11. package/node_modules/@vellumai/ipc-server-utils/tsconfig.json +20 -0
  12. package/openapi.yaml +22 -4
  13. package/package.json +3 -1
  14. package/src/__tests__/annotate-risk-options.test.ts +291 -0
  15. package/src/__tests__/approval-cascade.test.ts +8 -16
  16. package/src/__tests__/approval-routes-http.test.ts +6 -0
  17. package/src/__tests__/auto-analysis-end-to-end.test.ts +12 -25
  18. package/src/__tests__/call-constants.test.ts +10 -1
  19. package/src/__tests__/call-controller.test.ts +127 -0
  20. package/src/__tests__/cli-memory-v2-reembed-skills.test.ts +58 -28
  21. package/src/__tests__/config-loader-platform-defaults.test.ts +284 -1
  22. package/src/__tests__/context-search-memory-source.test.ts +3 -26
  23. package/src/__tests__/context-search-pkb-source.test.ts +12 -6
  24. package/src/__tests__/conversation-abort-tool-results.test.ts +1 -6
  25. package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +1 -1
  26. package/src/__tests__/conversation-agent-loop-overflow.test.ts +1 -1
  27. package/src/__tests__/conversation-agent-loop.test.ts +3 -3
  28. package/src/__tests__/conversation-confirmation-signals.test.ts +5 -13
  29. package/src/__tests__/conversation-init.benchmark.test.ts +1 -1
  30. package/src/__tests__/conversation-process-callsite.test.ts +1 -6
  31. package/src/__tests__/conversation-provider-retry-repair.test.ts +1 -6
  32. package/src/__tests__/conversation-runtime-assembly.test.ts +15 -6
  33. package/src/__tests__/conversation-slash-unknown.test.ts +1 -6
  34. package/src/__tests__/conversation-surfaces-action-delivery.test.ts +170 -9
  35. package/src/__tests__/conversation-surfaces-data-persist.test.ts +73 -1
  36. package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +59 -0
  37. package/src/__tests__/conversation-workspace-injection.test.ts +1 -7
  38. package/src/__tests__/conversation-workspace-tool-tracking.test.ts +1 -7
  39. package/src/__tests__/filing-service.test.ts +2 -19
  40. package/src/__tests__/handlers-skills-memory-v2-reseed.test.ts +10 -26
  41. package/src/__tests__/injector-chain.test.ts +24 -16
  42. package/src/__tests__/injector-pkb-v2-silenced.test.ts +10 -7
  43. package/src/__tests__/lifecycle-memory-v2-seed.test.ts +154 -67
  44. package/src/__tests__/notification-decision-fallback.test.ts +91 -0
  45. package/src/__tests__/notification-decision-strategy.test.ts +22 -0
  46. package/src/__tests__/oauth-cli.test.ts +121 -0
  47. package/src/__tests__/relay-server.test.ts +46 -2
  48. package/src/__tests__/secret-prompt-log-hygiene.test.ts +7 -5
  49. package/src/__tests__/secret-prompter-channel-fallback.test.ts +7 -5
  50. package/src/__tests__/secret-response-routing.test.ts +7 -5
  51. package/src/__tests__/server-history-render.test.ts +82 -0
  52. package/src/__tests__/skill-include-graph.test.ts +31 -0
  53. package/src/__tests__/skill-load-tool.test.ts +44 -16
  54. package/src/__tests__/skills.test.ts +39 -0
  55. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -42
  56. package/src/__tests__/tool-executor.test.ts +155 -0
  57. package/src/__tests__/voice-session-bridge.test.ts +3 -0
  58. package/src/__tests__/workspace-migration-069-seed-onboarding-threads.test.ts +120 -0
  59. package/src/__tests__/workspace-migration-071-remove-safe-storage-release-note.test.ts +206 -0
  60. package/src/__tests__/workspace-migration-safe-storage-limits-release.test.ts +15 -27
  61. package/src/agent/loop.ts +11 -0
  62. package/src/approvals/guardian-decision-primitive.ts +0 -13
  63. package/src/approvals/guardian-request-resolvers.ts +4 -32
  64. package/src/calls/call-constants.ts +5 -8
  65. package/src/calls/call-controller.ts +130 -67
  66. package/src/calls/relay-server.ts +7 -1
  67. package/src/calls/voice-session-bridge.ts +1 -1
  68. package/src/cli/commands/memory-v2.ts +7 -7
  69. package/src/cli/commands/oauth/__tests__/connect.test.ts +0 -254
  70. package/src/cli/commands/oauth/connect.ts +10 -52
  71. package/src/config/bundled-skills/app-builder/SKILL.md +1 -3
  72. package/src/config/feature-flag-registry.json +1 -17
  73. package/src/config/loader.ts +72 -19
  74. package/src/config/schemas/memory-v2.ts +1 -1
  75. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +32 -0
  76. package/src/daemon/conversation-agent-loop-handlers.ts +32 -0
  77. package/src/daemon/conversation-agent-loop.ts +13 -10
  78. package/src/daemon/conversation-lifecycle.ts +22 -8
  79. package/src/daemon/conversation-surfaces.ts +16 -14
  80. package/src/daemon/conversation-tool-setup.ts +9 -5
  81. package/src/daemon/conversation.ts +1 -1
  82. package/src/daemon/handlers/shared.ts +26 -0
  83. package/src/daemon/host-bash-proxy.ts +1 -1
  84. package/src/daemon/host-browser-proxy.ts +1 -1
  85. package/src/daemon/host-cu-proxy.ts +1 -1
  86. package/src/daemon/host-file-proxy.ts +1 -1
  87. package/src/daemon/host-transfer-proxy.ts +2 -2
  88. package/src/daemon/lifecycle.ts +88 -73
  89. package/src/daemon/memory-v2-startup.ts +55 -14
  90. package/src/daemon/message-types/messages.ts +19 -1
  91. package/src/documents/document-store.ts +35 -1
  92. package/src/filing/filing-service.ts +2 -3
  93. package/src/heartbeat/heartbeat-service.ts +1 -1
  94. package/src/ipc/assistant-server.ts +93 -36
  95. package/src/ipc/skill-server.ts +99 -42
  96. package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +10 -57
  97. package/src/memory/context-search/sources/memory-v2.ts +1 -17
  98. package/src/memory/context-search/sources/memory.ts +2 -2
  99. package/src/memory/context-search/sources/pkb.ts +2 -3
  100. package/src/memory/graph/__tests__/conversation-graph-memory-v2-routing.test.ts +104 -61
  101. package/src/memory/graph/__tests__/handle-remember-v2.test.ts +11 -26
  102. package/src/memory/graph/conversation-graph-memory.ts +32 -9
  103. package/src/memory/graph/graph-search.test.ts +6 -5
  104. package/src/memory/graph/graph-search.ts +3 -4
  105. package/src/memory/graph/retriever.test.ts +12 -7
  106. package/src/memory/graph/retriever.ts +4 -5
  107. package/src/memory/graph/tool-handlers.ts +3 -4
  108. package/src/memory/graph/tools.ts +4 -4
  109. package/src/memory/indexer.ts +1 -2
  110. package/src/memory/jobs/__tests__/embed-concept-page.test.ts +116 -0
  111. package/src/memory/jobs/embed-concept-page.ts +223 -87
  112. package/src/memory/jobs-worker.ts +8 -4
  113. package/src/memory/pkb/pkb-search.test.ts +6 -5
  114. package/src/memory/pkb/pkb-search.ts +4 -5
  115. package/src/memory/qdrant-client.ts +3 -0
  116. package/src/memory/search/semantic.ts +4 -5
  117. package/src/memory/v2/__tests__/activation.test.ts +35 -5
  118. package/src/memory/v2/__tests__/consolidation-job.test.ts +21 -32
  119. package/src/memory/v2/__tests__/injection.test.ts +140 -23
  120. package/src/memory/v2/__tests__/qdrant.test.ts +310 -9
  121. package/src/memory/v2/__tests__/sim.test.ts +118 -7
  122. package/src/memory/v2/__tests__/static-context.test.ts +1 -13
  123. package/src/memory/v2/__tests__/sweep-job.test.ts +19 -33
  124. package/src/memory/v2/consolidation-job.ts +7 -8
  125. package/src/memory/v2/injection.ts +32 -12
  126. package/src/memory/v2/page-store.ts +39 -0
  127. package/src/memory/v2/prompts/consolidation.ts +5 -0
  128. package/src/memory/v2/qdrant.ts +209 -48
  129. package/src/memory/v2/sim.ts +67 -26
  130. package/src/memory/v2/static-context.ts +4 -8
  131. package/src/memory/v2/sweep-job.ts +5 -6
  132. package/src/memory/v2/types.ts +7 -0
  133. package/src/notifications/copy-composer.ts +46 -12
  134. package/src/notifications/decision-engine.ts +46 -0
  135. package/src/permissions/gateway-threshold-reader.ts +116 -8
  136. package/src/permissions/prompter.ts +86 -96
  137. package/src/permissions/secret-prompter.ts +31 -31
  138. package/src/plugins/defaults/injectors.ts +1 -2
  139. package/src/proactive-artifact/job.test.ts +51 -4
  140. package/src/proactive-artifact/job.ts +16 -2
  141. package/src/proactive-artifact/message-copy.ts +18 -1
  142. package/src/prompts/templates/SOUL.md +13 -28
  143. package/src/runtime/auth/route-policy.ts +1 -0
  144. package/src/runtime/channel-approvals.ts +3 -2
  145. package/src/runtime/guardian-reply-router.ts +0 -10
  146. package/src/runtime/pending-interactions.ts +19 -15
  147. package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +147 -0
  148. package/src/runtime/routes/approval-routes.ts +7 -3
  149. package/src/runtime/routes/consolidation-routes.ts +8 -9
  150. package/src/runtime/routes/conversation-query-routes.ts +44 -1
  151. package/src/runtime/routes/debug-bash-routes.ts +2 -0
  152. package/src/runtime/routes/filing-routes.ts +2 -3
  153. package/src/runtime/routes/inbound-stages/guardian-reply-intercept.ts +0 -3
  154. package/src/runtime/routes/memory-item-routes.test.ts +3 -9
  155. package/src/runtime/routes/memory-item-routes.ts +5 -6
  156. package/src/runtime/routes/memory-v2-routes.ts +103 -17
  157. package/src/skills/include-graph.ts +35 -13
  158. package/src/tools/document/document-tool.ts +20 -0
  159. package/src/tools/executor.ts +18 -2
  160. package/src/tools/memory/register.test.ts +7 -5
  161. package/src/tools/permission-checker.ts +15 -0
  162. package/src/tools/skills/load.ts +24 -20
  163. package/src/tools/tool-name-aliases.ts +19 -0
  164. package/src/tools/types.ts +19 -1
  165. package/src/workspace/migrations/067-release-notes-safe-storage-limits.ts +4 -62
  166. package/src/workspace/migrations/069-seed-onboarding-threads.ts +28 -0
  167. package/src/workspace/migrations/070-memory-v2-summary-schema-rebuild.ts +31 -0
  168. package/src/workspace/migrations/071-remove-safe-storage-release-note.ts +111 -0
  169. package/src/workspace/migrations/registry.ts +6 -0
@@ -48,21 +48,43 @@ export interface ConceptPagePayload {
48
48
  export interface ConceptPageQueryResult {
49
49
  slug: string;
50
50
  /**
51
- * Dense cosine similarity, when the slug appeared in the dense top-`limit`.
52
- * `undefined` if the slug only appeared in the sparse channel.
51
+ * Dense cosine similarity against the page body, when the slug appeared in
52
+ * the body dense top-`limit`. `undefined` if the slug only appeared in the
53
+ * sparse channel — or in a summary-side channel.
53
54
  */
54
55
  denseScore?: number;
55
56
  /**
56
- * Sparse score, when the slug appeared in the sparse top-`limit`.
57
- * `undefined` if the slug only appeared in the dense channel. Lives on a
58
- * different scale than `denseScore` — callers must normalize before fusing.
57
+ * Sparse score against the page body, when the slug appeared in the body
58
+ * sparse top-`limit`. `undefined` if the slug only appeared in the dense
59
+ * channel. Lives on a different scale than `denseScore` — callers must
60
+ * normalize before fusing.
59
61
  */
60
62
  sparseScore?: number;
63
+ /**
64
+ * Dense cosine similarity against the page's frontmatter `summary`, when
65
+ * the page has a summary embedded and the slug appeared in the summary
66
+ * dense top-`limit`. `undefined` for pages without a summary embedding —
67
+ * those fall back to body-only scoring.
68
+ */
69
+ summaryDenseScore?: number;
70
+ /**
71
+ * Sparse score against the page's frontmatter `summary`, paired with
72
+ * `summaryDenseScore`. `undefined` for pages without a summary embedding.
73
+ */
74
+ summarySparseScore?: number;
61
75
  }
62
76
 
63
77
  let _client: QdrantRestClient | null = null;
64
78
  let _collectionReady = false;
65
- let _collectionReadyPromise: Promise<void> | null = null;
79
+ let _collectionReadyPromise: Promise<{ migrated: boolean }> | null = null;
80
+
81
+ /**
82
+ * Named vectors the v2 concept-page collection must expose. Existing
83
+ * collections that lack any of these get destructively recreated by
84
+ * `ensureConceptPageCollectionOnce` — see the `migrated` return flag.
85
+ */
86
+ const REQUIRED_DENSE_VECTORS = ["dense", "summary_dense"] as const;
87
+ const REQUIRED_SPARSE_VECTORS = ["sparse", "summary_sparse"] as const;
66
88
 
67
89
  /** Lazily create a Qdrant REST client bound to the resolved URL. */
68
90
  function getClient(): QdrantRestClient {
@@ -76,16 +98,19 @@ function getClient(): QdrantRestClient {
76
98
  }
77
99
 
78
100
  /**
79
- * Create the v2 concept-page collection if it does not already exist.
80
- * Idempotent: a no-op when the collection is already present.
81
- *
82
- * Vector layout mirrors `VellumQdrantClient.ensureCollection` named dense
83
- * (cosine, configurable size + on-disk) and sparse vectors. The vector size
84
- * and on-disk flag inherit from `config.memory.qdrant` so v2 stays aligned
85
- * with the user's existing embedding backend without separate knobs.
101
+ * Create the v2 concept-page collection if it does not already exist, or
102
+ * destructively recreate it when the existing schema is missing any of the
103
+ * required named vectors (see `REQUIRED_DENSE_VECTORS` /
104
+ * `REQUIRED_SPARSE_VECTORS`). The latter case is signalled to callers via
105
+ * `{ migrated: true }` so they can enqueue a backfill — pre-#29823
106
+ * collections lack `summary_dense` / `summary_sparse` and every query
107
+ * referencing those named vectors fails with HTTP 400 until the collection
108
+ * is rebuilt. Mirrors `VellumQdrantClient.ensureCollection` for v1.
86
109
  */
87
- export async function ensureConceptPageCollection(): Promise<void> {
88
- if (_collectionReady) return;
110
+ export async function ensureConceptPageCollection(): Promise<{
111
+ migrated: boolean;
112
+ }> {
113
+ if (_collectionReady) return { migrated: false };
89
114
  if (_collectionReadyPromise) return _collectionReadyPromise;
90
115
 
91
116
  _collectionReadyPromise = ensureConceptPageCollectionOnce().finally(() => {
@@ -94,17 +119,46 @@ export async function ensureConceptPageCollection(): Promise<void> {
94
119
  return _collectionReadyPromise;
95
120
  }
96
121
 
97
- async function ensureConceptPageCollectionOnce(): Promise<void> {
122
+ async function ensureConceptPageCollectionOnce(): Promise<{
123
+ migrated: boolean;
124
+ }> {
98
125
  const client = getClient();
99
126
  const config = getConfig();
100
127
  const vectorSize = config.memory.qdrant.vectorSize;
101
128
  const onDisk = config.memory.qdrant.onDisk;
102
129
 
130
+ let migrated = false;
131
+
103
132
  try {
104
133
  const exists = await client.collectionExists(MEMORY_V2_COLLECTION);
105
134
  if (exists.exists) {
106
- _collectionReady = true;
107
- return;
135
+ // Assume compatible on probe failure rather than risk a destructive
136
+ // recreate — mirrors v1's posture in `VellumQdrantClient.ensureCollection`.
137
+ let info: Awaited<ReturnType<typeof client.getCollection>>;
138
+ try {
139
+ info = await client.getCollection(MEMORY_V2_COLLECTION);
140
+ } catch (err) {
141
+ log.warn(
142
+ { err, collection: MEMORY_V2_COLLECTION },
143
+ "Failed to probe v2 collection schema; assuming compatible",
144
+ );
145
+ _collectionReady = true;
146
+ return { migrated: false };
147
+ }
148
+
149
+ const missing = missingNamedVectors(info);
150
+ if (missing.length === 0) {
151
+ _collectionReady = true;
152
+ return { migrated: false };
153
+ }
154
+
155
+ log.warn(
156
+ { collection: MEMORY_V2_COLLECTION, missingNamedVectors: missing },
157
+ "Memory v2 concept-page collection schema drift detected — deleting and recreating; embeddings will be regenerated by background reembed",
158
+ );
159
+ await client.deleteCollection(MEMORY_V2_COLLECTION);
160
+ migrated = true;
161
+ // Fall through to creation below.
108
162
  }
109
163
  } catch (err) {
110
164
  // Treat "not found"-shaped errors as "needs creation" and fall through.
@@ -124,15 +178,28 @@ async function ensureConceptPageCollectionOnce(): Promise<void> {
124
178
  distance: "Cosine",
125
179
  on_disk: onDisk,
126
180
  },
181
+ // Optional second dense vector covering the page's frontmatter
182
+ // `summary`. Pages without a summary store nothing under this name —
183
+ // Qdrant supports per-point named-vector subsets — so the named-vector
184
+ // index stays cheap until summaries are populated.
185
+ summary_dense: {
186
+ size: vectorSize,
187
+ distance: "Cosine",
188
+ on_disk: onDisk,
189
+ },
127
190
  },
128
191
  sparse_vectors: {
129
192
  sparse: {}, // Qdrant auto-infers sparse vector params
193
+ summary_sparse: {}, // BM25 sparse vector for the summary
130
194
  },
131
195
  hnsw_config: {
132
196
  on_disk: onDisk,
133
197
  m: 16,
134
198
  ef_construct: 100,
135
199
  },
200
+ optimizers_config: {
201
+ default_segment_number: 2,
202
+ },
136
203
  on_disk_payload: onDisk,
137
204
  });
138
205
  } catch (err) {
@@ -143,7 +210,7 @@ async function ensureConceptPageCollectionOnce(): Promise<void> {
143
210
  (err as { status: number }).status === 409
144
211
  ) {
145
212
  _collectionReady = true;
146
- return;
213
+ return { migrated };
147
214
  }
148
215
  throw err;
149
216
  }
@@ -156,32 +223,86 @@ async function ensureConceptPageCollectionOnce(): Promise<void> {
156
223
  });
157
224
 
158
225
  _collectionReady = true;
226
+ return { migrated };
227
+ }
228
+
229
+ /**
230
+ * Return the names of required named vectors absent from the collection's
231
+ * current schema. An empty array means the collection is fully migrated.
232
+ *
233
+ * If the response shape is unparseable (e.g. Qdrant returns an unexpected
234
+ * structure) we treat it as "everything is missing" so the caller's drift
235
+ * branch fires — combined with the `getCollection` try/catch in the caller,
236
+ * a thrown probe falls back to "assume compatible" while a parsed-but-empty
237
+ * response triggers the safer recreate.
238
+ */
239
+ function missingNamedVectors(
240
+ info: Awaited<ReturnType<QdrantRestClient["getCollection"]>>,
241
+ ): string[] {
242
+ const params = info.config?.params;
243
+ const dense = params?.vectors;
244
+ const sparse = (params as { sparse_vectors?: unknown } | undefined)
245
+ ?.sparse_vectors;
246
+ const denseNames =
247
+ dense && typeof dense === "object" && !("size" in dense)
248
+ ? new Set(Object.keys(dense))
249
+ : new Set<string>();
250
+ const sparseNames =
251
+ sparse && typeof sparse === "object"
252
+ ? new Set(Object.keys(sparse as Record<string, unknown>))
253
+ : new Set<string>();
254
+
255
+ const missing: string[] = [];
256
+ for (const name of REQUIRED_DENSE_VECTORS) {
257
+ if (!denseNames.has(name)) missing.push(name);
258
+ }
259
+ for (const name of REQUIRED_SPARSE_VECTORS) {
260
+ if (!sparseNames.has(name)) missing.push(name);
261
+ }
262
+ return missing;
159
263
  }
160
264
 
161
265
  /**
162
266
  * Upsert a concept page's dense + sparse embedding. The point ID is derived
163
267
  * deterministically from the slug so subsequent calls for the same slug
164
268
  * replace the prior point in place rather than accumulating duplicates.
269
+ *
270
+ * `summary` is optional — supplied when the page's frontmatter carries a
271
+ * `summary`, omitted otherwise. Pages without a summary store only the body
272
+ * vectors and fall back to body-only scoring at query time. The grouped
273
+ * shape enforces at the type level that summary dense and sparse are
274
+ * always written together.
165
275
  */
166
276
  export async function upsertConceptPageEmbedding(params: {
167
277
  slug: string;
168
278
  dense: number[];
169
279
  sparse: SparseEmbedding;
280
+ summary?: { dense: number[]; sparse: SparseEmbedding };
170
281
  updatedAt: number;
171
282
  }): Promise<void> {
172
283
  await ensureConceptPageCollection();
173
284
 
174
- const { slug, dense, sparse, updatedAt } = params;
285
+ const { slug, dense, sparse, summary, updatedAt } = params;
175
286
  const client = getClient();
176
287
  const pointId = pointIdForSlug(slug);
177
288
 
289
+ // Qdrant lets us upsert any subset of named vectors per point. The summary
290
+ // entries appear only when the caller passed a `summary` block — pairing
291
+ // them at the type level keeps query-time fusion symmetric with the body
292
+ // channels.
293
+ const vector: Record<string, number[] | SparseEmbedding> = { dense, sparse };
294
+ if (summary) {
295
+ vector.summary_dense = summary.dense;
296
+ vector.summary_sparse = summary.sparse;
297
+ }
298
+
178
299
  const upsertOnce = () =>
179
300
  client.upsert(MEMORY_V2_COLLECTION, {
180
301
  wait: true,
181
302
  points: [
182
303
  {
183
304
  id: pointId,
184
- vector: { dense, sparse },
305
+ vector,
185
306
  payload: { slug, updated_at: updatedAt },
186
307
  },
187
308
  ],
@@ -290,6 +411,30 @@ export async function pruneSlugsWithPrefixExcept(
290
411
  }
291
412
  }
292
413
 
414
+ /**
415
+ * Approximate count of points in the v2 concept-page collection. Used by the
416
+ * daemon-startup rebuild hook to detect "collection exists but empty" — the
417
+ * crash-mid-rebuild recovery case where a prior boot dropped + recreated the
418
+ * collection but died before reembed completed. Returns `0` if the collection
419
+ * does not exist or the count call fails (treated as "needs reembed" by the
420
+ * caller).
421
+ */
422
+ export async function countConceptPagePoints(): Promise<number> {
423
+ await ensureConceptPageCollection();
424
+ try {
425
+ const result = await getClient().count(MEMORY_V2_COLLECTION, {
426
+ exact: false,
427
+ });
428
+ return result.count;
429
+ } catch (err) {
430
+ log.warn(
431
+ { err, collection: MEMORY_V2_COLLECTION },
432
+ "Failed to count v2 concept-page collection — treating as empty",
433
+ );
434
+ return 0;
435
+ }
436
+ }
437
+
293
438
  /**
294
439
  * Best-effort delete of the legacy `memory_v2_skills` Qdrant collection. Skill
295
440
  * embeddings now live alongside concept pages in `memory_v2_concept_pages`
@@ -319,9 +464,15 @@ export async function dropLegacySkillsCollection(): Promise<void> {
319
464
  * a normalized weighted-sum — because RRF would discard the score magnitudes
320
465
  * the activation formula needs.
321
466
  *
467
+ * Four channels are queried concurrently: body dense, body sparse, summary
468
+ * dense, summary sparse. The summary channels only return hits for pages whose
469
+ * frontmatter carries a `summary` (and therefore stored `summary_dense` /
470
+ * `summary_sparse` named vectors at upsert time). Pages without a summary
471
+ * surface body-only scores; callers fall back to body-only fusion for those.
472
+ *
322
473
  * Each channel returns up to `limit` hits. A slug is included in the result
323
- * if it appears in either channel; the missing channel's score is left
324
- * `undefined` so callers can detect single-channel matches.
474
+ * if it appears in any channel; missing channel scores stay `undefined` so
475
+ * callers can distinguish "no match in this channel" from "match with score 0".
325
476
  *
326
477
  * `restrictToSlugs`, when provided, filters the search server-side to only
327
478
  * those slugs (Qdrant `slug IN [...]` filter). Used by `simBatch` when the
@@ -355,42 +506,51 @@ export async function hybridQueryConceptPages(
355
506
  // Qdrant 1.13.x sparse-index crash that we've reproduced in the wild.
356
507
  const skipSparse = options?.skipSparse ?? false;
357
508
 
358
- const denseQuery = () =>
509
+ const queryDense = (using: string) =>
359
510
  client.query(MEMORY_V2_COLLECTION, {
360
511
  query: dense,
361
- using: "dense",
512
+ using,
362
513
  limit,
363
514
  with_payload: true,
364
515
  filter,
365
516
  });
366
- const sparseQuery = () =>
517
+ const querySparse = (using: string) =>
367
518
  client.query(MEMORY_V2_COLLECTION, {
368
519
  query: sparse,
369
- using: "sparse",
520
+ using,
370
521
  limit,
371
522
  with_payload: true,
372
523
  filter,
373
524
  });
374
525
 
375
- // Run both queries concurrently — they hit independent named vectors.
376
- // When sparse is gated off we still resolve a Promise so the destructuring
377
- // below stays uniform; the empty `points: []` matches the shape of a
378
- // no-hit Qdrant response.
526
+ // Run all four channels concurrently — they hit independent named vectors.
527
+ // When sparse is gated off the sparse channels still resolve a Promise so
528
+ // the destructuring below stays uniform; the empty `points: []` matches
529
+ // the shape of a no-hit Qdrant response.
379
530
  const emptyResult = {
380
531
  points: [] as Array<{ payload?: unknown; score?: number }>,
381
532
  };
382
533
  const runQueries = async () =>
383
- Promise.all([denseQuery(), skipSparse ? emptyResult : sparseQuery()]);
534
+ Promise.all([
535
+ queryDense("dense"),
536
+ skipSparse ? emptyResult : querySparse("sparse"),
537
+ queryDense("summary_dense"),
538
+ skipSparse ? emptyResult : querySparse("summary_sparse"),
539
+ ]);
384
540
 
385
541
  let denseResults;
386
542
  let sparseResults;
543
+ let summaryDenseResults;
544
+ let summarySparseResults;
387
545
  try {
388
- [denseResults, sparseResults] = await runQueries();
546
+ [denseResults, sparseResults, summaryDenseResults, summarySparseResults] =
547
+ await runQueries();
389
548
  } catch (err) {
390
549
  if (isCollectionMissing(err)) {
391
550
  _collectionReady = false;
392
551
  await ensureConceptPageCollection();
393
- [denseResults, sparseResults] = await runQueries();
552
+ [denseResults, sparseResults, summaryDenseResults, summarySparseResults] =
553
+ await runQueries();
394
554
  } else {
395
555
  throw err;
396
556
  }
@@ -399,21 +559,22 @@ export async function hybridQueryConceptPages(
399
559
  // Merge by slug. Missing-side scores stay undefined so the fuser can tell
400
560
  // "no match in this channel" apart from "match with score 0".
401
561
  const merged = new Map<string, ConceptPageQueryResult>();
402
- for (const point of denseResults.points ?? []) {
403
- const slug = (point.payload as { slug?: unknown } | null)?.slug;
404
- if (typeof slug !== "string") continue;
405
- merged.set(slug, { slug, denseScore: point.score ?? 0 });
406
- }
407
- for (const point of sparseResults.points ?? []) {
408
- const slug = (point.payload as { slug?: unknown } | null)?.slug;
409
- if (typeof slug !== "string") continue;
410
- const existing = merged.get(slug);
411
- if (existing) {
412
- existing.sparseScore = point.score ?? 0;
413
- } else {
414
- merged.set(slug, { slug, sparseScore: point.score ?? 0 });
562
+ const recordHit = (
563
+ points: Array<{ payload?: unknown; score?: number }> | undefined,
564
+ set: (entry: ConceptPageQueryResult, score: number) => void,
565
+ ): void => {
566
+ for (const point of points ?? []) {
567
+ const slug = (point.payload as { slug?: unknown } | null)?.slug;
568
+ if (typeof slug !== "string") continue;
569
+ const existing = merged.get(slug) ?? { slug };
570
+ set(existing, point.score ?? 0);
571
+ merged.set(slug, existing);
415
572
  }
416
- }
573
+ };
574
+ recordHit(denseResults.points, (e, s) => (e.denseScore = s));
575
+ recordHit(sparseResults.points, (e, s) => (e.sparseScore = s));
576
+ recordHit(summaryDenseResults.points, (e, s) => (e.summaryDenseScore = s));
577
+ recordHit(summarySparseResults.points, (e, s) => (e.summarySparseScore = s));
417
578
 
418
579
  return Array.from(merged.values());
419
580
  }
@@ -120,14 +120,18 @@ export function effectiveWeights(
120
120
  * sparse via the in-process TF-IDF encoder).
121
121
  * 2. Run server-side dense + sparse queries against the v2 concept-page
122
122
  * Qdrant collection, restricted to `candidateSlugs` so we don't waste
123
- * query bandwidth on unrelated pages.
124
- * 3. Fuse: per slug, `score = clamp01(dense_weight · denseCosine +
125
- * sparse_weight · normalizedSparse)`. Sparse scores are normalized by
126
- * the per-batch maximum (so the largest is 1.0); slugs missing from a
127
- * channel contribute 0 from that channel.
123
+ * query bandwidth on unrelated pages. The query hits four channels per
124
+ * page: body dense + body sparse, and (for pages that have a summary
125
+ * embedded) summary dense + summary sparse.
126
+ * 3. Fuse: per slug, score = `max(fused(body), fused(summary))`. Each
127
+ * half is `clamp01(dense_weight · denseCosine + sparse_weight ·
128
+ * normalizedSparse)` with sparse normalized by the per-batch maximum.
129
+ * Pages without a summary embedding fall back to body-only fusion —
130
+ * the summary half is undefined and the max collapses to the body
131
+ * score.
128
132
  *
129
133
  * Returns a `Map<slug, score>` containing only the candidate slugs that hit
130
- * in at least one channel. Slugs in `candidateSlugs` that miss both channels
134
+ * in at least one channel. Slugs in `candidateSlugs` that miss every channel
131
135
  * are absent from the map; callers should treat absence as score = 0 (the
132
136
  * activation pipeline does this implicitly when reading back A_o).
133
137
  *
@@ -181,20 +185,52 @@ export async function simBatch(
181
185
  return new Map();
182
186
  }
183
187
 
184
- const maxSparse = computeMaxSparse(hits);
188
+ // Compute per-batch sparse maxima independently for the body and summary
189
+ // channels so each side normalizes against its own scale. Mixing the two
190
+ // — e.g. dividing every sparse score by the larger of the two maxima —
191
+ // would punish whichever channel happened to have lower-magnitude scores
192
+ // even when its hits were the best matches available.
193
+ const maxBodySparse = computeMaxSparse(hits, (h) => h.sparseScore);
194
+ const maxSummarySparse = computeMaxSparse(hits, (h) => h.summarySparseScore);
185
195
  const { dense_weight: baseDense, sparse_weight: baseSparse } =
186
196
  config.memory.v2;
187
- const { dense: denseWeight, sparse: sparseWeight } = effectiveWeights(
188
- hits,
189
- maxSparse,
197
+ const { dense: bodyDenseWeight, sparse: bodySparseWeight } = effectiveWeights(
198
+ hits.map((h) => ({ sparseScore: h.sparseScore })),
199
+ maxBodySparse,
190
200
  baseDense,
191
201
  baseSparse,
192
202
  config,
193
203
  );
204
+ const { dense: summaryDenseWeight, sparse: summarySparseWeight } =
205
+ effectiveWeights(
206
+ hits.map((h) => ({ sparseScore: h.summarySparseScore })),
207
+ maxSummarySparse,
208
+ baseDense,
209
+ baseSparse,
210
+ config,
211
+ );
194
212
 
195
213
  const scores = new Map<string, number>();
196
214
  for (const hit of hits) {
197
- scores.set(hit.slug, fuseHit(hit, maxSparse, denseWeight, sparseWeight));
215
+ const bodyScore = fuseHalf(
216
+ hit.denseScore,
217
+ hit.sparseScore,
218
+ maxBodySparse,
219
+ bodyDenseWeight,
220
+ bodySparseWeight,
221
+ );
222
+ const summaryScore = fuseHalf(
223
+ hit.summaryDenseScore,
224
+ hit.summarySparseScore,
225
+ maxSummarySparse,
226
+ summaryDenseWeight,
227
+ summarySparseWeight,
228
+ );
229
+ // Pages without a summary embedding return undefined for both summary
230
+ // channels; their `summaryScore` falls back to the body score so the
231
+ // max collapses cleanly to body-only behavior.
232
+ const score = Math.max(bodyScore ?? 0, summaryScore ?? bodyScore ?? 0);
233
+ scores.set(hit.slug, score);
198
234
  }
199
235
 
200
236
  return scores;
@@ -207,36 +243,41 @@ function throwIfAborted(signal: AbortSignal | undefined): void {
207
243
  }
208
244
 
209
245
  /**
210
- * Per-batch sparse-score maximum used for normalization. Hits missing from
211
- * the sparse channel contribute 0 (handled by the `undefined` guard).
246
+ * Per-batch sparse-score maximum used for normalization. The accessor picks
247
+ * which sparse channel to scan `sparseScore` for the body channel,
248
+ * `summarySparseScore` for the summary channel. Hits missing from the
249
+ * channel contribute 0 (handled by the `undefined` guard).
212
250
  */
213
- function computeMaxSparse(
214
- hits: ReadonlyArray<{ sparseScore?: number }>,
251
+ function computeMaxSparse<T>(
252
+ hits: ReadonlyArray<T>,
253
+ accessor: (hit: T) => number | undefined,
215
254
  ): number {
216
255
  let max = 0;
217
256
  for (const hit of hits) {
218
- if (hit.sparseScore !== undefined && hit.sparseScore > max) {
219
- max = hit.sparseScore;
257
+ const value = accessor(hit);
258
+ if (value !== undefined && value > max) {
259
+ max = value;
220
260
  }
221
261
  }
222
262
  return max;
223
263
  }
224
264
 
225
265
  /**
226
- * Fuse a single hit's dense + sparse scores into a normalized [0, 1] score
266
+ * Fuse one half of a hit (body or summary) into a normalized [0, 1] score
227
267
  * via `clamp01(dense_weight · dense + sparse_weight · sparse/maxSparse)`.
228
- * Missing-channel scores contribute 0.
268
+ * Returns `undefined` when neither channel hit a signal the half had no
269
+ * match at all, so the caller can fall back to the other half cleanly.
229
270
  */
230
- function fuseHit(
231
- hit: { denseScore?: number; sparseScore?: number },
271
+ function fuseHalf(
272
+ denseScore: number | undefined,
273
+ sparseScore: number | undefined,
232
274
  maxSparse: number,
233
275
  denseWeight: number,
234
276
  sparseWeight: number,
235
- ): number {
236
- const dense = hit.denseScore ?? 0;
277
+ ): number | undefined {
278
+ if (denseScore === undefined && sparseScore === undefined) return undefined;
279
+ const dense = denseScore ?? 0;
237
280
  const sparseNormalized =
238
- hit.sparseScore !== undefined && maxSparse > 0
239
- ? hit.sparseScore / maxSparse
240
- : 0;
281
+ sparseScore !== undefined && maxSparse > 0 ? sparseScore / maxSparse : 0;
241
282
  return clamp01(denseWeight * dense + sparseWeight * sparseNormalized);
242
283
  }
@@ -18,7 +18,6 @@
18
18
  // matching the existing PKB auto-inject pattern.
19
19
 
20
20
  import type { ChannelId } from "../../channels/types.js";
21
- import { isAssistantFeatureFlagEnabled } from "../../config/assistant-feature-flags.js";
22
21
  import { loadConfig } from "../../config/loader.js";
23
22
  import { readPromptFile } from "../../prompts/system-prompt.js";
24
23
  import { getWorkspacePromptPath } from "../../util/platform.js";
@@ -36,9 +35,9 @@ const MEMORY_V2_STATIC_BLOCKS: readonly MemoryV2StaticBlock[] = [
36
35
  ];
37
36
 
38
37
  /**
39
- * Build the v2 static memory block, gated on `memory-v2-enabled` +
40
- * `config.memory.v2.enabled`. Empty/missing files are skipped; returns
41
- * `null` when the gate is off or every file is empty.
38
+ * Build the v2 static memory block, gated on `config.memory.v2.enabled`.
39
+ * Empty/missing files are skipped; returns `null` when the gate is off or
40
+ * every file is empty.
42
41
  */
43
42
  export function readMemoryV2StaticContent(): string | null {
44
43
  let config;
@@ -47,10 +46,7 @@ export function readMemoryV2StaticContent(): string | null {
47
46
  } catch {
48
47
  return null;
49
48
  }
50
- if (
51
- !isAssistantFeatureFlagEnabled("memory-v2-enabled", config) ||
52
- !config.memory.v2.enabled
53
- ) {
49
+ if (!config.memory.v2.enabled) {
54
50
  return null;
55
51
  }
56
52
 
@@ -13,10 +13,10 @@
13
13
  * extraction-trigger path. Until then this handler is invoked only by
14
14
  * `memory_v2_sweep` rows enqueued explicitly (tests, future CLI).
15
15
  *
16
- * Skipped entirely when the `memory-v2-enabled` feature flag is off, or when
16
+ * Skipped entirely when `config.memory.v2.enabled` is false, or when
17
17
  * `config.memory.v2.sweep_enabled` is false — keeps the sweep dormant in
18
18
  * v1-only workspaces and in v2 workspaces that haven't opted in, even if a
19
- * stale row sits in the queue at flag-flip time.
19
+ * stale row sits in the queue when v2 is disabled.
20
20
  */
21
21
 
22
22
  import { readFileSync } from "node:fs";
@@ -25,7 +25,6 @@ import { join } from "node:path";
25
25
  import { desc, gt } from "drizzle-orm";
26
26
  import { z } from "zod";
27
27
 
28
- import { isAssistantFeatureFlagEnabled } from "../../config/assistant-feature-flags.js";
29
28
  import type { AssistantConfig } from "../../config/types.js";
30
29
  import { getAssistantName } from "../../daemon/identity-helpers.js";
31
30
  import {
@@ -104,12 +103,12 @@ export async function memoryV2SweepJob(
104
103
  _job: MemoryJob,
105
104
  config: AssistantConfig,
106
105
  ): Promise<number> {
107
- if (!isAssistantFeatureFlagEnabled("memory-v2-enabled", config)) {
108
- log.debug("memory-v2-enabled flag off; sweep skipped");
106
+ if (!config.memory?.v2?.enabled) {
107
+ log.debug("memory.v2.enabled is false; sweep skipped");
109
108
  return 0;
110
109
  }
111
110
 
112
- if (!config.memory?.v2?.sweep_enabled) {
111
+ if (!config.memory.v2.sweep_enabled) {
113
112
  log.debug("memory.v2.sweep_enabled is false; sweep skipped");
114
113
  return 0;
115
114
  }
@@ -26,10 +26,17 @@ import { z } from "zod";
26
26
  * B → A. The full graph is the union of every page's `edges:` list — there
27
27
  * is no separate edges-index file. `ref_files` lists paths to attached media
28
28
  * (images, audio, etc.).
29
+ *
30
+ * `summary` is a 1-4 sentence prose description of the page. When present,
31
+ * retrieval injects the path + summary instead of the full page so the agent
32
+ * can decide whether to read the file. Optional because legacy pages predating
33
+ * the summary field still parse — those fall back to full-page injection and
34
+ * full-page-only similarity.
29
35
  */
30
36
  export const ConceptPageFrontmatterSchema = z.object({
31
37
  edges: z.array(z.string()).default([]),
32
38
  ref_files: z.array(z.string()).default([]),
39
+ summary: z.string().optional(),
33
40
  });
34
41
 
35
42
  export type ConceptPageFrontmatter = z.infer<