@vellumai/assistant 0.4.49 → 0.4.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. package/ARCHITECTURE.md +24 -33
  2. package/README.md +3 -3
  3. package/docs/architecture/memory.md +180 -119
  4. package/package.json +2 -2
  5. package/src/__tests__/agent-loop.test.ts +3 -1
  6. package/src/__tests__/anthropic-provider.test.ts +114 -23
  7. package/src/__tests__/approval-cascade.test.ts +1 -15
  8. package/src/__tests__/approval-routes-http.test.ts +2 -0
  9. package/src/__tests__/assistant-feature-flag-guard.test.ts +0 -23
  10. package/src/__tests__/canonical-guardian-store.test.ts +95 -0
  11. package/src/__tests__/checker.test.ts +13 -0
  12. package/src/__tests__/config-schema.test.ts +1 -68
  13. package/src/__tests__/context-memory-e2e.test.ts +11 -100
  14. package/src/__tests__/conversation-routes-guardian-reply.test.ts +8 -0
  15. package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
  16. package/src/__tests__/credential-security-e2e.test.ts +1 -0
  17. package/src/__tests__/credential-vault-unit.test.ts +4 -0
  18. package/src/__tests__/credential-vault.test.ts +13 -1
  19. package/src/__tests__/cu-unified-flow.test.ts +532 -0
  20. package/src/__tests__/date-context.test.ts +93 -77
  21. package/src/__tests__/deterministic-verification-control-plane.test.ts +64 -0
  22. package/src/__tests__/guardian-routing-invariants.test.ts +93 -0
  23. package/src/__tests__/history-repair.test.ts +245 -0
  24. package/src/__tests__/host-cu-proxy.test.ts +165 -3
  25. package/src/__tests__/http-user-message-parity.test.ts +1 -0
  26. package/src/__tests__/invite-redemption-service.test.ts +65 -1
  27. package/src/__tests__/keychain-broker-client.test.ts +4 -4
  28. package/src/__tests__/memory-context-benchmark.benchmark.test.ts +56 -18
  29. package/src/__tests__/memory-lifecycle-e2e.test.ts +244 -387
  30. package/src/__tests__/memory-recall-quality.test.ts +244 -407
  31. package/src/__tests__/memory-regressions.experimental.test.ts +126 -101
  32. package/src/__tests__/memory-regressions.test.ts +477 -2841
  33. package/src/__tests__/memory-retrieval.benchmark.test.ts +33 -150
  34. package/src/__tests__/memory-upsert-concurrency.test.ts +5 -244
  35. package/src/__tests__/mime-builder.test.ts +28 -0
  36. package/src/__tests__/native-web-search.test.ts +1 -0
  37. package/src/__tests__/oauth-cli.test.ts +572 -5
  38. package/src/__tests__/oauth-store.test.ts +120 -6
  39. package/src/__tests__/qdrant-collection-migration.test.ts +53 -8
  40. package/src/__tests__/registry.test.ts +0 -1
  41. package/src/__tests__/relay-server.test.ts +46 -1
  42. package/src/__tests__/schedule-tools.test.ts +32 -0
  43. package/src/__tests__/script-proxy-certs.test.ts +1 -1
  44. package/src/__tests__/secret-onetime-send.test.ts +1 -0
  45. package/src/__tests__/secure-keys.test.ts +7 -2
  46. package/src/__tests__/send-endpoint-busy.test.ts +3 -0
  47. package/src/__tests__/session-abort-tool-results.test.ts +1 -14
  48. package/src/__tests__/session-agent-loop-overflow.test.ts +1583 -0
  49. package/src/__tests__/session-agent-loop.test.ts +19 -15
  50. package/src/__tests__/session-confirmation-signals.test.ts +1 -15
  51. package/src/__tests__/session-error.test.ts +124 -2
  52. package/src/__tests__/session-history-web-search.test.ts +918 -0
  53. package/src/__tests__/session-pre-run-repair.test.ts +1 -14
  54. package/src/__tests__/session-provider-retry-repair.test.ts +25 -28
  55. package/src/__tests__/session-queue.test.ts +37 -27
  56. package/src/__tests__/session-runtime-assembly.test.ts +54 -0
  57. package/src/__tests__/session-slash-known.test.ts +1 -15
  58. package/src/__tests__/session-slash-queue.test.ts +1 -15
  59. package/src/__tests__/session-slash-unknown.test.ts +1 -15
  60. package/src/__tests__/session-workspace-cache-state.test.ts +3 -33
  61. package/src/__tests__/session-workspace-injection.test.ts +3 -37
  62. package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -37
  63. package/src/__tests__/skills-install-extract.test.ts +93 -0
  64. package/src/__tests__/skillssh-registry.test.ts +451 -0
  65. package/src/__tests__/trust-store.test.ts +15 -0
  66. package/src/__tests__/voice-invite-redemption.test.ts +32 -1
  67. package/src/agent/ax-tree-compaction.test.ts +51 -0
  68. package/src/agent/loop.ts +39 -12
  69. package/src/approvals/AGENTS.md +1 -1
  70. package/src/approvals/guardian-request-resolvers.ts +14 -2
  71. package/src/bundler/compiler-tools.ts +66 -2
  72. package/src/calls/call-domain.ts +132 -0
  73. package/src/calls/call-store.ts +6 -0
  74. package/src/calls/relay-server.ts +43 -5
  75. package/src/calls/relay-setup-router.ts +17 -1
  76. package/src/calls/twilio-config.ts +1 -1
  77. package/src/calls/types.ts +3 -1
  78. package/src/cli/commands/doctor.ts +4 -3
  79. package/src/cli/commands/mcp.ts +46 -59
  80. package/src/cli/commands/memory.ts +16 -165
  81. package/src/cli/commands/oauth/apps.ts +31 -2
  82. package/src/cli/commands/oauth/connections.ts +431 -97
  83. package/src/cli/commands/oauth/providers.ts +15 -1
  84. package/src/cli/commands/sessions.ts +5 -2
  85. package/src/cli/commands/skills.ts +173 -1
  86. package/src/cli/http-client.ts +0 -20
  87. package/src/cli/main-screen.tsx +2 -2
  88. package/src/cli/program.ts +5 -6
  89. package/src/cli.ts +4 -10
  90. package/src/config/bundled-skills/computer-use/TOOLS.json +1 -1
  91. package/src/config/bundled-skills/computer-use/tools/computer-use-observe.ts +12 -0
  92. package/src/config/bundled-tool-registry.ts +2 -5
  93. package/src/config/schema.ts +1 -12
  94. package/src/config/schemas/memory-lifecycle.ts +0 -9
  95. package/src/config/schemas/memory-processing.ts +0 -180
  96. package/src/config/schemas/memory-retrieval.ts +32 -104
  97. package/src/config/schemas/memory.ts +0 -10
  98. package/src/config/types.ts +0 -4
  99. package/src/context/window-manager.ts +4 -1
  100. package/src/daemon/config-watcher.ts +61 -3
  101. package/src/daemon/daemon-control.ts +1 -1
  102. package/src/daemon/date-context.ts +114 -31
  103. package/src/daemon/handlers/sessions.ts +18 -13
  104. package/src/daemon/handlers/skills.ts +20 -1
  105. package/src/daemon/history-repair.ts +72 -8
  106. package/src/daemon/host-cu-proxy.ts +55 -26
  107. package/src/daemon/lifecycle.ts +31 -3
  108. package/src/daemon/mcp-reload-service.ts +2 -2
  109. package/src/daemon/message-types/computer-use.ts +1 -12
  110. package/src/daemon/message-types/memory.ts +4 -16
  111. package/src/daemon/message-types/messages.ts +1 -0
  112. package/src/daemon/message-types/sessions.ts +4 -0
  113. package/src/daemon/server.ts +12 -1
  114. package/src/daemon/session-agent-loop-handlers.ts +38 -0
  115. package/src/daemon/session-agent-loop.ts +334 -48
  116. package/src/daemon/session-error.ts +89 -6
  117. package/src/daemon/session-history.ts +17 -7
  118. package/src/daemon/session-media-retry.ts +6 -2
  119. package/src/daemon/session-memory.ts +69 -149
  120. package/src/daemon/session-process.ts +10 -1
  121. package/src/daemon/session-runtime-assembly.ts +49 -19
  122. package/src/daemon/session-surfaces.ts +4 -1
  123. package/src/daemon/session-tool-setup.ts +7 -1
  124. package/src/daemon/session.ts +12 -2
  125. package/src/instrument.ts +61 -1
  126. package/src/memory/admin.ts +2 -191
  127. package/src/memory/canonical-guardian-store.ts +38 -2
  128. package/src/memory/conversation-crud.ts +0 -33
  129. package/src/memory/conversation-queries.ts +22 -3
  130. package/src/memory/db-init.ts +28 -0
  131. package/src/memory/embedding-backend.ts +84 -8
  132. package/src/memory/embedding-types.ts +9 -1
  133. package/src/memory/indexer.ts +7 -46
  134. package/src/memory/items-extractor.ts +274 -76
  135. package/src/memory/job-handlers/backfill.ts +2 -127
  136. package/src/memory/job-handlers/cleanup.ts +2 -16
  137. package/src/memory/job-handlers/extraction.ts +2 -138
  138. package/src/memory/job-handlers/index-maintenance.ts +1 -6
  139. package/src/memory/job-handlers/summarization.ts +3 -148
  140. package/src/memory/job-utils.ts +21 -59
  141. package/src/memory/jobs-store.ts +1 -159
  142. package/src/memory/jobs-worker.ts +9 -52
  143. package/src/memory/migrations/104-core-indexes.ts +3 -3
  144. package/src/memory/migrations/149-oauth-tables.ts +2 -0
  145. package/src/memory/migrations/150-oauth-apps-client-secret-path.ts +98 -0
  146. package/src/memory/migrations/151-oauth-providers-ping-url.ts +11 -0
  147. package/src/memory/migrations/152-memory-item-supersession.ts +44 -0
  148. package/src/memory/migrations/153-drop-entity-tables.ts +15 -0
  149. package/src/memory/migrations/154-drop-fts.ts +20 -0
  150. package/src/memory/migrations/155-drop-conflicts.ts +7 -0
  151. package/src/memory/migrations/156-call-session-invite-metadata.ts +24 -0
  152. package/src/memory/migrations/index.ts +7 -0
  153. package/src/memory/qdrant-client.ts +148 -51
  154. package/src/memory/raw-query.ts +1 -1
  155. package/src/memory/retriever.test.ts +294 -273
  156. package/src/memory/retriever.ts +421 -645
  157. package/src/memory/schema/calls.ts +2 -0
  158. package/src/memory/schema/memory-core.ts +3 -48
  159. package/src/memory/schema/oauth.ts +2 -0
  160. package/src/memory/search/formatting.ts +263 -176
  161. package/src/memory/search/lexical.ts +1 -254
  162. package/src/memory/search/ranking.ts +0 -455
  163. package/src/memory/search/semantic.ts +100 -14
  164. package/src/memory/search/staleness.ts +47 -0
  165. package/src/memory/search/tier-classifier.ts +21 -0
  166. package/src/memory/search/types.ts +15 -77
  167. package/src/memory/task-memory-cleanup.ts +4 -6
  168. package/src/messaging/providers/gmail/mime-builder.ts +17 -7
  169. package/src/oauth/byo-connection.test.ts +8 -1
  170. package/src/oauth/oauth-store.ts +113 -27
  171. package/src/oauth/seed-providers.ts +6 -0
  172. package/src/oauth/token-persistence.ts +11 -3
  173. package/src/permissions/defaults.ts +1 -0
  174. package/src/permissions/trust-store.ts +23 -1
  175. package/src/playbooks/playbook-compiler.ts +1 -1
  176. package/src/prompts/system-prompt.ts +18 -2
  177. package/src/providers/anthropic/client.ts +56 -126
  178. package/src/providers/types.ts +7 -1
  179. package/src/runtime/AGENTS.md +9 -0
  180. package/src/runtime/auth/route-policy.ts +6 -3
  181. package/src/runtime/guardian-reply-router.ts +24 -22
  182. package/src/runtime/http-server.ts +2 -2
  183. package/src/runtime/invite-redemption-service.ts +19 -1
  184. package/src/runtime/invite-service.ts +25 -0
  185. package/src/runtime/pending-interactions.ts +2 -2
  186. package/src/runtime/routes/brain-graph-routes.ts +10 -90
  187. package/src/runtime/routes/conversation-routes.ts +9 -1
  188. package/src/runtime/routes/inbound-stages/acl-enforcement.ts +21 -12
  189. package/src/runtime/routes/memory-item-routes.test.ts +754 -0
  190. package/src/runtime/routes/memory-item-routes.ts +503 -0
  191. package/src/runtime/routes/session-management-routes.ts +3 -3
  192. package/src/runtime/routes/settings-routes.ts +2 -2
  193. package/src/runtime/routes/trust-rules-routes.ts +14 -0
  194. package/src/runtime/routes/workspace-routes.ts +2 -1
  195. package/src/security/keychain-broker-client.ts +17 -4
  196. package/src/security/secure-keys.ts +25 -3
  197. package/src/security/token-manager.ts +36 -36
  198. package/src/skills/catalog-install.ts +74 -18
  199. package/src/skills/skillssh-registry.ts +503 -0
  200. package/src/tools/assets/search.ts +5 -1
  201. package/src/tools/computer-use/definitions.ts +0 -10
  202. package/src/tools/computer-use/registry.ts +1 -1
  203. package/src/tools/credentials/vault.ts +1 -3
  204. package/src/tools/memory/definitions.ts +4 -13
  205. package/src/tools/memory/handlers.test.ts +83 -103
  206. package/src/tools/memory/handlers.ts +50 -85
  207. package/src/tools/schedule/create.ts +8 -1
  208. package/src/tools/schedule/update.ts +8 -1
  209. package/src/tools/skills/load.ts +25 -2
  210. package/src/__tests__/clarification-resolver.test.ts +0 -193
  211. package/src/__tests__/conflict-intent-tokenization.test.ts +0 -160
  212. package/src/__tests__/conflict-policy.test.ts +0 -269
  213. package/src/__tests__/conflict-store.test.ts +0 -372
  214. package/src/__tests__/contradiction-checker.test.ts +0 -361
  215. package/src/__tests__/entity-extractor.test.ts +0 -211
  216. package/src/__tests__/entity-search.test.ts +0 -1117
  217. package/src/__tests__/profile-compiler.test.ts +0 -392
  218. package/src/__tests__/session-conflict-gate.test.ts +0 -1228
  219. package/src/__tests__/session-profile-injection.test.ts +0 -557
  220. package/src/config/bundled-skills/knowledge-graph/SKILL.md +0 -25
  221. package/src/config/bundled-skills/knowledge-graph/TOOLS.json +0 -66
  222. package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +0 -211
  223. package/src/daemon/session-conflict-gate.ts +0 -167
  224. package/src/daemon/session-dynamic-profile.ts +0 -77
  225. package/src/memory/clarification-resolver.ts +0 -417
  226. package/src/memory/conflict-intent.ts +0 -205
  227. package/src/memory/conflict-policy.ts +0 -127
  228. package/src/memory/conflict-store.ts +0 -410
  229. package/src/memory/contradiction-checker.ts +0 -508
  230. package/src/memory/entity-extractor.ts +0 -535
  231. package/src/memory/format-recall.ts +0 -47
  232. package/src/memory/fts-reconciler.ts +0 -165
  233. package/src/memory/job-handlers/conflict.ts +0 -200
  234. package/src/memory/profile-compiler.ts +0 -195
  235. package/src/memory/recall-cache.ts +0 -117
  236. package/src/memory/search/entity.ts +0 -535
  237. package/src/memory/search/query-expansion.test.ts +0 -70
  238. package/src/memory/search/query-expansion.ts +0 -118
  239. package/src/runtime/routes/mcp-routes.ts +0 -20
@@ -54,12 +54,17 @@ mock.module("../memory/embedding-local.js", () => ({
54
54
  },
55
55
  }));
56
56
 
57
- // Mock Qdrant client so semantic search returns empty results instead of
58
- // throwing "Qdrant client not initialized" (which would discard lexical results
59
- // due to the single try-catch in buildMemoryRecall).
57
+ // Dynamic Qdrant mock: tests can push results to be returned by searchWithFilter/hybridSearch
58
+ let mockQdrantResults: Array<{
59
+ id: string;
60
+ score: number;
61
+ payload: Record<string, unknown>;
62
+ }> = [];
63
+
60
64
  mock.module("../memory/qdrant-client.js", () => ({
61
65
  getQdrantClient: () => ({
62
- searchWithFilter: async () => [],
66
+ searchWithFilter: async () => mockQdrantResults,
67
+ hybridSearch: async () => mockQdrantResults,
63
68
  upsertPoints: async () => {},
64
69
  deletePoints: async () => {},
65
70
  }),
@@ -93,9 +98,6 @@ import { getDb, initializeDb, resetDb } from "../memory/db.js";
93
98
  import { buildMemoryRecall } from "../memory/retriever.js";
94
99
  import {
95
100
  conversations,
96
- memoryEntities,
97
- memoryEntityRelations,
98
- memoryItemEntities,
99
101
  memoryItems,
100
102
  memoryItemSources,
101
103
  messages,
@@ -256,18 +258,15 @@ describe("Memory Recall Quality", () => {
256
258
  beforeEach(() => {
257
259
  const db = getDb();
258
260
  db.run("DELETE FROM memory_item_sources");
259
- db.run("DELETE FROM memory_item_entities");
260
- db.run("DELETE FROM memory_entity_relations");
261
- db.run("DELETE FROM memory_entities");
262
261
  db.run("DELETE FROM memory_embeddings");
263
- db.run("DELETE FROM memory_summaries");
264
262
  db.run("DELETE FROM memory_items");
265
- db.run("DELETE FROM memory_segment_fts");
263
+
266
264
  db.run("DELETE FROM memory_segments");
267
265
  db.run("DELETE FROM messages");
268
266
  db.run("DELETE FROM conversations");
269
267
  db.run("DELETE FROM memory_jobs");
270
268
  db.run("DELETE FROM memory_checkpoints");
269
+ mockQdrantResults = [];
271
270
  });
272
271
 
273
272
  afterAll(() => {
@@ -341,14 +340,68 @@ describe("Memory Recall Quality", () => {
341
340
  now + 2000,
342
341
  );
343
342
 
343
+ // Also insert items so the pipeline has structured data to inject
344
+ insertItem(db, {
345
+ id: "item-pref-dark",
346
+ kind: "preference",
347
+ subject: "display preference",
348
+ statement: "User prefers dark mode and concise answers",
349
+ importance: 0.8,
350
+ firstSeenAt: now,
351
+ });
352
+ insertItemSource(db, "item-pref-dark", "msg-pref-1", now);
353
+ insertItem(db, {
354
+ id: "item-pref-editor",
355
+ kind: "preference",
356
+ subject: "editor preference",
357
+ statement: "User favorite editor is Neovim",
358
+ importance: 0.8,
359
+ firstSeenAt: now + 1000,
360
+ });
361
+ insertItemSource(db, "item-pref-editor", "msg-pref-2", now + 1000);
362
+
363
+ // Mock Qdrant to return both preference items as high-scoring results
364
+ mockQdrantResults = [
365
+ {
366
+ id: "emb-pref-dark",
367
+ score: 0.92,
368
+ payload: {
369
+ target_type: "item",
370
+ target_id: "item-pref-dark",
371
+ text: "User prefers dark mode and concise answers",
372
+ kind: "preference",
373
+ status: "active",
374
+ created_at: now,
375
+ last_seen_at: now,
376
+ },
377
+ },
378
+ {
379
+ id: "emb-pref-editor",
380
+ score: 0.88,
381
+ payload: {
382
+ target_type: "item",
383
+ target_id: "item-pref-editor",
384
+ text: "User favorite editor is Neovim",
385
+ kind: "preference",
386
+ status: "active",
387
+ created_at: now + 1000,
388
+ last_seen_at: now + 1000,
389
+ },
390
+ },
391
+ ];
392
+
344
393
  const recall = await buildMemoryRecall(
345
394
  "what are my preferences",
346
395
  "conv-pref",
347
396
  TEST_CONFIG,
348
397
  );
349
398
 
399
+ expect(recall.recencyHits).toBeGreaterThan(0);
400
+ expect(recall.enabled).toBe(true);
401
+ // With high-scoring Qdrant results, items should be injected
402
+ expect(recall.semanticHits).toBeGreaterThan(0);
350
403
  expect(recall.injectedText).toContain("dark mode");
351
- expect(recall.injectedText).toContain("concise answers");
404
+ expect(recall.injectedText).toContain("Neovim");
352
405
  });
353
406
 
354
407
  test("high-importance preferences outrank low-importance facts in recall", async () => {
@@ -384,7 +437,7 @@ describe("Memory Recall Quality", () => {
384
437
  });
385
438
  insertItemSource(db, "item-hi-pref", "msg-hi", now);
386
439
 
387
- // Low-importance fact
440
+ // Low-importance project fact
388
441
  insertMessage(
389
442
  db,
390
443
  "msg-lo",
@@ -404,7 +457,7 @@ describe("Memory Recall Quality", () => {
404
457
  );
405
458
  insertItem(db, {
406
459
  id: "item-lo-fact",
407
- kind: "fact",
460
+ kind: "project",
408
461
  subject: "default port",
409
462
  statement: "The default port is 8080",
410
463
  importance: 0.3,
@@ -412,13 +465,45 @@ describe("Memory Recall Quality", () => {
412
465
  });
413
466
  insertItemSource(db, "item-lo-fact", "msg-lo", now + 1000);
414
467
 
468
+ // Mock Qdrant to return both items — the high-importance one with a higher score
469
+ mockQdrantResults = [
470
+ {
471
+ id: "emb-hi-pref",
472
+ score: 0.95,
473
+ payload: {
474
+ target_type: "item",
475
+ target_id: "item-hi-pref",
476
+ text: "User strongly prefers TypeScript over JavaScript",
477
+ kind: "preference",
478
+ status: "active",
479
+ created_at: now,
480
+ last_seen_at: now,
481
+ },
482
+ },
483
+ {
484
+ id: "emb-lo-fact",
485
+ score: 0.7,
486
+ payload: {
487
+ target_type: "item",
488
+ target_id: "item-lo-fact",
489
+ text: "The default port is 8080",
490
+ kind: "project",
491
+ status: "active",
492
+ created_at: now + 1000,
493
+ last_seen_at: now + 1000,
494
+ },
495
+ },
496
+ ];
497
+
415
498
  const recall = await buildMemoryRecall(
416
499
  "TypeScript preference language",
417
500
  "conv-rank",
418
501
  TEST_CONFIG,
419
502
  );
420
503
 
421
- // The preference should appear
504
+ expect(recall.recencyHits).toBeGreaterThan(0);
505
+ expect(recall.enabled).toBe(true);
506
+ // High-importance preference should be injected
422
507
  expect(recall.injectedText).toContain("TypeScript");
423
508
  });
424
509
  });
@@ -427,42 +512,13 @@ describe("Memory Recall Quality", () => {
427
512
  // Contradiction / Superseding Suppression
428
513
  // -------------------------------------------------------------------------
429
514
 
430
- describe("contradiction suppression", () => {
431
- test("superseded memory items do not appear in recall", async () => {
515
+ describe("supersession suppression", () => {
516
+ test("superseded memory items do not appear in recall via recency", async () => {
432
517
  const db = getDb();
433
518
  const now = 1_700_000_200_000;
434
519
  insertConversation(db, "conv-contra", now);
435
520
 
436
- // Old preference (superseded)
437
- insertMessage(
438
- db,
439
- "msg-old-pref",
440
- "conv-contra",
441
- "user",
442
- "I prefer vim for editing code",
443
- now - 50_000,
444
- );
445
- insertSegment(
446
- db,
447
- "seg-old-pref",
448
- "msg-old-pref",
449
- "conv-contra",
450
- "user",
451
- "I prefer vim for editing code",
452
- now - 50_000,
453
- );
454
- insertItem(db, {
455
- id: "item-old-pref",
456
- kind: "preference",
457
- subject: "editor preference",
458
- statement: "User prefers vim for editing code",
459
- status: "superseded",
460
- importance: 0.8,
461
- firstSeenAt: now - 50_000,
462
- });
463
- insertItemSource(db, "item-old-pref", "msg-old-pref", now - 50_000);
464
-
465
- // New preference (active, replaces the old one)
521
+ // New preference (active, supersedes the old one)
466
522
  insertMessage(
467
523
  db,
468
524
  "msg-new-pref",
@@ -491,30 +547,30 @@ describe("Memory Recall Quality", () => {
491
547
  });
492
548
  insertItemSource(db, "item-new-pref", "msg-new-pref", now);
493
549
 
550
+ // Old preference (superseded by new one via supersession chain)
551
+ insertItem(db, {
552
+ id: "item-old-pref",
553
+ kind: "preference",
554
+ subject: "editor preference",
555
+ statement: "User prefers vim for editing code",
556
+ status: "superseded",
557
+ importance: 0.8,
558
+ firstSeenAt: now - 50_000,
559
+ });
560
+
494
561
  const recall = await buildMemoryRecall(
495
562
  "editor preference",
496
563
  "conv-contra",
497
564
  TEST_CONFIG,
498
565
  );
499
566
 
500
- // Active preference should appear
501
- expect(recall.injectedText).toContain("neovim");
502
- expect(recall.injectedText).toContain("LazyVim");
503
-
504
- // Superseded preference should NOT appear in recalled item lines.
505
- // Assert against the actual statement text unique to the superseded item
506
- // ("prefers vim for") rather than an internal candidate key, which is
507
- // never emitted in the formatted recall output.
508
- const itemLines = recall.injectedText
509
- .split("\n")
510
- .filter((line) => line.includes("<kind>"));
511
- const hasSupersededItem = itemLines.some((line) =>
512
- line.includes("prefers vim for"),
513
- );
514
- expect(hasSupersededItem).toBe(false);
567
+ // Recency search finds the segment but tier classification filters it
568
+ expect(recall.recencyHits).toBeGreaterThan(0);
569
+ // Superseded items should not leak into injected text
570
+ expect(recall.injectedText).not.toContain("vim for editing code");
515
571
  });
516
572
 
517
- test("only active items are included in entity-based recall", async () => {
573
+ test("only active items are included in recall (superseded excluded)", async () => {
518
574
  const db = getDb();
519
575
  const now = 1_700_000_250_000;
520
576
  insertConversation(db, "conv-entity-status", now);
@@ -547,6 +603,7 @@ describe("Memory Recall Quality", () => {
547
603
  });
548
604
  insertItemSource(db, "item-active-db", "msg-entity-active", now);
549
605
 
606
+ // Superseded item (should not appear)
550
607
  insertItem(db, {
551
608
  id: "item-superseded-db",
552
609
  kind: "decision",
@@ -556,12 +613,6 @@ describe("Memory Recall Quality", () => {
556
613
  importance: 0.8,
557
614
  firstSeenAt: now - 100_000,
558
615
  });
559
- insertItemSource(
560
- db,
561
- "item-superseded-db",
562
- "msg-entity-active",
563
- now - 100_000,
564
- );
565
616
 
566
617
  const recall = await buildMemoryRecall(
567
618
  "database choice decision",
@@ -569,17 +620,29 @@ describe("Memory Recall Quality", () => {
569
620
  TEST_CONFIG,
570
621
  );
571
622
 
572
- expect(recall.injectedText).toContain("PostgreSQL");
623
+ // Recency search finds segments but tier classification filters them.
624
+ // Key assertion: superseded MySQL item should not leak.
625
+ expect(recall.recencyHits).toBeGreaterThan(0);
626
+ expect(recall.injectedText).not.toContain("MySQL");
573
627
  });
574
628
 
575
- test("pending clarification and invalidated items are excluded from direct item recall", async () => {
629
+ test("invalidated items are excluded from recall", async () => {
576
630
  const db = getDb();
577
631
  const now = 1_700_000_275_000;
578
- insertConversation(db, "conv-conflict-status", now);
632
+ insertConversation(db, "conv-invalid-status", now);
579
633
  insertMessage(
580
634
  db,
581
- "msg-conflict-status",
582
- "conv-conflict-status",
635
+ "msg-invalid-status",
636
+ "conv-invalid-status",
637
+ "user",
638
+ "Framework preference is React for this codebase.",
639
+ now,
640
+ );
641
+ insertSegment(
642
+ db,
643
+ "seg-invalid-status",
644
+ "msg-invalid-status",
645
+ "conv-invalid-status",
583
646
  "user",
584
647
  "Framework preference is React for this codebase.",
585
648
  now,
@@ -594,52 +657,27 @@ describe("Memory Recall Quality", () => {
594
657
  importance: 0.9,
595
658
  firstSeenAt: now,
596
659
  });
597
- insertItemSource(db, "item-framework-active", "msg-conflict-status", now);
660
+ insertItemSource(db, "item-framework-active", "msg-invalid-status", now);
598
661
 
662
+ // Invalidated item (should not appear in recall)
599
663
  insertItem(db, {
600
- id: "item-framework-pending",
601
- kind: "preference",
602
- subject: "framework preference",
603
- statement: "Framework preference is Vue for this codebase",
604
- status: "pending_clarification",
605
- importance: 0.9,
606
- firstSeenAt: now + 1,
607
- });
608
- insertItemSource(
609
- db,
610
- "item-framework-pending",
611
- "msg-conflict-status",
612
- now + 1,
613
- );
614
-
615
- insertItem(db, {
616
- id: "item-framework-invalid",
664
+ id: "item-framework-invalidated",
617
665
  kind: "preference",
618
666
  subject: "framework preference",
619
667
  statement: "Framework preference is Angular for this codebase",
620
- status: "active",
668
+ status: "invalidated",
621
669
  importance: 0.9,
622
- firstSeenAt: now + 2,
670
+ firstSeenAt: now - 50_000,
623
671
  });
624
- db.run(
625
- `UPDATE memory_items SET invalid_at = ${
626
- now + 3
627
- } WHERE id = 'item-framework-invalid'`,
628
- );
629
- insertItemSource(
630
- db,
631
- "item-framework-invalid",
632
- "msg-conflict-status",
633
- now + 2,
634
- );
635
672
 
636
673
  const recall = await buildMemoryRecall(
637
674
  "framework preference",
638
- "conv-conflict-status",
675
+ "conv-invalid-status",
639
676
  TEST_CONFIG,
640
677
  );
678
+ expect(recall.recencyHits).toBeGreaterThan(0);
679
+ // Active segment content should be injected; invalidated item should not leak
641
680
  expect(recall.injectedText).toContain("React");
642
- expect(recall.injectedText).not.toContain("Vue");
643
681
  expect(recall.injectedText).not.toContain("Angular");
644
682
  });
645
683
  });
@@ -693,22 +731,51 @@ describe("Memory Recall Quality", () => {
693
731
  oneMonthAgo,
694
732
  );
695
733
 
734
+ // Add items and mock Qdrant with the recent item scoring higher
735
+ insertItem(db, {
736
+ id: "item-bun-runtime",
737
+ kind: "project",
738
+ subject: "runtime environment",
739
+ statement: "We are using Bun as our runtime environment",
740
+ importance: 0.7,
741
+ firstSeenAt: now - 1000,
742
+ });
743
+ insertItemSource(db, "item-bun-runtime", "msg-recent", now - 1000);
744
+
745
+ mockQdrantResults = [
746
+ {
747
+ id: "emb-bun-runtime",
748
+ score: 0.9,
749
+ payload: {
750
+ target_type: "item",
751
+ target_id: "item-bun-runtime",
752
+ text: "We are using Bun as our runtime environment",
753
+ kind: "project",
754
+ status: "active",
755
+ created_at: now - 1000,
756
+ last_seen_at: now - 1000,
757
+ },
758
+ },
759
+ ];
760
+
696
761
  const recall = await buildMemoryRecall(
697
762
  "runtime environment",
698
763
  "conv-stale",
699
764
  TEST_CONFIG,
700
765
  );
701
766
 
702
- // Both may appear but recent should rank higher (appear in injected text)
767
+ expect(recall.recencyHits).toBeGreaterThan(0);
768
+ expect(recall.enabled).toBe(true);
769
+ // Recent Bun item should be injected, old Node reference should not
703
770
  expect(recall.injectedText).toContain("Bun");
704
771
  });
705
772
 
706
- test("frequently accessed items get a retrieval reinforcement boost", async () => {
773
+ test("frequently accessed items surface via recency search when seeded with segments", async () => {
707
774
  const db = getDb();
708
775
  const now = 1_700_000_400_000;
709
776
  insertConversation(db, "conv-access", now);
710
777
 
711
- // Frequently accessed item
778
+ // Frequently accessed item with segment
712
779
  insertMessage(
713
780
  db,
714
781
  "msg-freq",
@@ -728,7 +795,7 @@ describe("Memory Recall Quality", () => {
728
795
  );
729
796
  insertItem(db, {
730
797
  id: "item-freq",
731
- kind: "profile",
798
+ kind: "identity",
732
799
  subject: "timezone",
733
800
  statement: "User timezone is America/Los_Angeles",
734
801
  importance: 0.5,
@@ -737,7 +804,7 @@ describe("Memory Recall Quality", () => {
737
804
  });
738
805
  insertItemSource(db, "item-freq", "msg-freq", now);
739
806
 
740
- // Rarely accessed item
807
+ // Rarely accessed item with segment
741
808
  insertMessage(
742
809
  db,
743
810
  "msg-rare",
@@ -757,7 +824,7 @@ describe("Memory Recall Quality", () => {
757
824
  );
758
825
  insertItem(db, {
759
826
  id: "item-rare",
760
- kind: "profile",
827
+ kind: "identity",
761
828
  subject: "timezone offset",
762
829
  statement: "User timezone offset is UTC-8",
763
830
  importance: 0.5,
@@ -766,13 +833,45 @@ describe("Memory Recall Quality", () => {
766
833
  });
767
834
  insertItemSource(db, "item-rare", "msg-rare", now + 1000);
768
835
 
836
+ // Mock Qdrant with the frequently accessed item scoring higher
837
+ mockQdrantResults = [
838
+ {
839
+ id: "emb-freq",
840
+ score: 0.92,
841
+ payload: {
842
+ target_type: "item",
843
+ target_id: "item-freq",
844
+ text: "User timezone is America/Los_Angeles",
845
+ kind: "identity",
846
+ status: "active",
847
+ created_at: now,
848
+ last_seen_at: now,
849
+ },
850
+ },
851
+ {
852
+ id: "emb-rare",
853
+ score: 0.75,
854
+ payload: {
855
+ target_type: "item",
856
+ target_id: "item-rare",
857
+ text: "User timezone offset is UTC-8",
858
+ kind: "identity",
859
+ status: "active",
860
+ created_at: now + 1000,
861
+ last_seen_at: now + 1000,
862
+ },
863
+ },
864
+ ];
865
+
769
866
  const recall = await buildMemoryRecall(
770
867
  "timezone",
771
868
  "conv-access",
772
869
  TEST_CONFIG,
773
870
  );
774
871
 
775
- // The frequently accessed item should appear
872
+ expect(recall.recencyHits).toBeGreaterThan(0);
873
+ expect(recall.enabled).toBe(true);
874
+ // Frequently accessed timezone item should be in injected text
776
875
  expect(recall.injectedText).toContain("America/Los_Angeles");
777
876
  });
778
877
  });
@@ -782,12 +881,12 @@ describe("Memory Recall Quality", () => {
782
881
  // -------------------------------------------------------------------------
783
882
 
784
883
  describe("multi-source recall", () => {
785
- test("lexical and item-based results are merged into a single recall", async () => {
884
+ test("recency search surfaces segments when hybrid search is unavailable", async () => {
786
885
  const db = getDb();
787
886
  const now = 1_700_000_500_000;
788
887
  insertConversation(db, "conv-multi", now);
789
888
 
790
- // Segment (lexical source)
889
+ // Segment (recency source)
791
890
  insertMessage(
792
891
  db,
793
892
  "msg-seg",
@@ -806,7 +905,7 @@ describe("Memory Recall Quality", () => {
806
905
  now,
807
906
  );
808
907
 
809
- // Item (entity/item source)
908
+ // Item (constraint kind)
810
909
  insertItem(db, {
811
910
  id: "item-deploy-rule",
812
911
  kind: "constraint",
@@ -817,16 +916,33 @@ describe("Memory Recall Quality", () => {
817
916
  });
818
917
  insertItemSource(db, "item-deploy-rule", "msg-seg", now);
819
918
 
919
+ // Mock Qdrant to return the deployment rule item
920
+ mockQdrantResults = [
921
+ {
922
+ id: "emb-deploy-rule",
923
+ score: 0.91,
924
+ payload: {
925
+ target_type: "item",
926
+ target_id: "item-deploy-rule",
927
+ text: "Always deploy to staging before production",
928
+ kind: "constraint",
929
+ status: "active",
930
+ created_at: now,
931
+ last_seen_at: now,
932
+ },
933
+ },
934
+ ];
935
+
820
936
  const recall = await buildMemoryRecall(
821
937
  "deployment staging production",
822
938
  "conv-multi",
823
939
  TEST_CONFIG,
824
940
  );
825
941
 
826
- // Both the segment and item should contribute to the recall
827
- expect(recall.lexicalHits).toBeGreaterThan(0);
942
+ expect(recall.recencyHits).toBeGreaterThan(0);
943
+ expect(recall.enabled).toBe(true);
944
+ // Deployment rule should be injected
828
945
  expect(recall.injectedText).toContain("staging");
829
- expect(recall.injectedText).toContain("production");
830
946
  });
831
947
 
832
948
  test("recall with no matching content returns empty injection", async () => {
@@ -843,288 +959,6 @@ describe("Memory Recall Quality", () => {
843
959
  expect(recall.injectedText).toBe("");
844
960
  expect(recall.injectedTokens).toBe(0);
845
961
  });
846
-
847
- test("entity alias matching recalls linked items on indirect query terms", async () => {
848
- const db = getDb();
849
- const now = 1_700_000_650_000;
850
- insertConversation(db, "conv-entity-alias", now);
851
- insertMessage(
852
- db,
853
- "msg-entity-alias",
854
- "conv-entity-alias",
855
- "user",
856
- "Our team standard editor is Visual Studio Code.",
857
- now,
858
- );
859
-
860
- insertItem(db, {
861
- id: "item-editor-vscode",
862
- kind: "preference",
863
- subject: "editor preference",
864
- statement: "Team standard editor is Visual Studio Code",
865
- importance: 0.8,
866
- firstSeenAt: now,
867
- });
868
- insertItemSource(db, "item-editor-vscode", "msg-entity-alias", now);
869
-
870
- db.insert(memoryEntities)
871
- .values({
872
- id: "entity-vscode",
873
- name: "Visual Studio Code",
874
- type: "tool",
875
- aliases: JSON.stringify(["vscode"]),
876
- description: null,
877
- firstSeenAt: now,
878
- lastSeenAt: now,
879
- mentionCount: 1,
880
- })
881
- .run();
882
- db.insert(memoryItemEntities)
883
- .values({
884
- memoryItemId: "item-editor-vscode",
885
- entityId: "entity-vscode",
886
- })
887
- .run();
888
-
889
- const recall = await buildMemoryRecall(
890
- "vscode debug setup",
891
- "conv-entity-alias",
892
- TEST_CONFIG,
893
- );
894
- expect(recall.entityHits).toBeGreaterThan(0);
895
- expect(recall.injectedText).toContain("Visual Studio Code");
896
- });
897
-
898
- test("relation expansion recalls neighbor-linked items when only seed entity is mentioned", async () => {
899
- const db = getDb();
900
- const now = 1_700_000_680_000;
901
- insertConversation(db, "conv-entity-rel", now);
902
- insertMessage(
903
- db,
904
- "msg-entity-rel",
905
- "conv-entity-rel",
906
- "user",
907
- "Project Atlas reliability playbook.",
908
- now,
909
- );
910
-
911
- insertItem(db, {
912
- id: "item-k8s-hpa",
913
- kind: "fact",
914
- subject: "autoscaling strategy",
915
- statement:
916
- "Use Kubernetes horizontal pod autoscaling for sustained traffic spikes",
917
- importance: 0.75,
918
- firstSeenAt: now,
919
- });
920
- insertItemSource(db, "item-k8s-hpa", "msg-entity-rel", now);
921
-
922
- db.insert(memoryEntities)
923
- .values([
924
- {
925
- id: "entity-atlas",
926
- name: "Project Atlas",
927
- type: "project",
928
- aliases: JSON.stringify(["atlas"]),
929
- description: null,
930
- firstSeenAt: now,
931
- lastSeenAt: now,
932
- mentionCount: 1,
933
- },
934
- {
935
- id: "entity-kubernetes",
936
- name: "Kubernetes",
937
- type: "tool",
938
- aliases: JSON.stringify(["k8s"]),
939
- description: null,
940
- firstSeenAt: now,
941
- lastSeenAt: now,
942
- mentionCount: 1,
943
- },
944
- ])
945
- .run();
946
- db.insert(memoryEntityRelations)
947
- .values({
948
- id: "rel-atlas-k8s",
949
- sourceEntityId: "entity-atlas",
950
- targetEntityId: "entity-kubernetes",
951
- relation: "uses",
952
- evidence: "Project Atlas runs on Kubernetes",
953
- firstSeenAt: now,
954
- lastSeenAt: now,
955
- })
956
- .run();
957
- db.insert(memoryItemEntities)
958
- .values({
959
- memoryItemId: "item-k8s-hpa",
960
- entityId: "entity-kubernetes",
961
- })
962
- .run();
963
-
964
- const relationConfig = {
965
- ...TEST_CONFIG,
966
- memory: {
967
- ...TEST_CONFIG.memory,
968
- entity: {
969
- ...TEST_CONFIG.memory.entity,
970
- relationRetrieval: {
971
- ...TEST_CONFIG.memory.entity.relationRetrieval,
972
- enabled: true,
973
- maxSeedEntities: 4,
974
- maxNeighborEntities: 4,
975
- maxEdges: 6,
976
- neighborScoreMultiplier: 0.6,
977
- },
978
- },
979
- },
980
- };
981
-
982
- const recall = await buildMemoryRecall(
983
- "atlas reliability guidance",
984
- "conv-entity-rel",
985
- relationConfig,
986
- );
987
- expect(recall.entityHits).toBeGreaterThan(0);
988
- expect(recall.injectedText).toContain(
989
- "Kubernetes horizontal pod autoscaling",
990
- );
991
- });
992
-
993
- test("direct preference evidence outranks weak relation-expanded noise", async () => {
994
- const db = getDb();
995
- const now = 1_700_000_690_000;
996
- insertConversation(db, "conv-rel-rank", now);
997
- insertMessage(
998
- db,
999
- "msg-rel-rank",
1000
- "conv-rel-rank",
1001
- "user",
1002
- "For Project Atlas deployments, we prefer blue-green rollout strategy.",
1003
- now,
1004
- );
1005
- insertSegment(
1006
- db,
1007
- "seg-rel-rank",
1008
- "msg-rel-rank",
1009
- "conv-rel-rank",
1010
- "user",
1011
- "For Project Atlas deployments, we prefer blue-green rollout strategy.",
1012
- now,
1013
- );
1014
-
1015
- insertItem(db, {
1016
- id: "item-direct-pref",
1017
- kind: "preference",
1018
- subject: "deployment preference",
1019
- statement: "Project Atlas deployment preference is blue-green rollouts",
1020
- importance: 0.95,
1021
- firstSeenAt: now,
1022
- });
1023
- insertItemSource(db, "item-direct-pref", "msg-rel-rank", now);
1024
-
1025
- db.insert(memoryEntities)
1026
- .values({
1027
- id: "entity-atlas-rank",
1028
- name: "Project Atlas",
1029
- type: "project",
1030
- aliases: JSON.stringify(["atlas"]),
1031
- description: null,
1032
- firstSeenAt: now,
1033
- lastSeenAt: now,
1034
- mentionCount: 1,
1035
- })
1036
- .run();
1037
- db.insert(memoryItemEntities)
1038
- .values({
1039
- memoryItemId: "item-direct-pref",
1040
- entityId: "entity-atlas-rank",
1041
- })
1042
- .run();
1043
-
1044
- for (let index = 1; index <= 8; index++) {
1045
- const entityId = `entity-noise-${index}`;
1046
- const itemId = `item-rel-noise-${index}`;
1047
- db.insert(memoryEntities)
1048
- .values({
1049
- id: entityId,
1050
- name: `AtlasTool${index}`,
1051
- type: "tool",
1052
- aliases: JSON.stringify([`atlas-tool-${index}`]),
1053
- description: null,
1054
- firstSeenAt: now + index,
1055
- lastSeenAt: now + index,
1056
- mentionCount: 1,
1057
- })
1058
- .run();
1059
- db.insert(memoryEntityRelations)
1060
- .values({
1061
- id: `rel-atlas-noise-${index}`,
1062
- sourceEntityId: "entity-atlas-rank",
1063
- targetEntityId: entityId,
1064
- relation: "uses",
1065
- evidence: `Project Atlas uses AtlasTool${index}`,
1066
- firstSeenAt: now + index,
1067
- lastSeenAt: now + index,
1068
- })
1069
- .run();
1070
- insertItem(db, {
1071
- id: itemId,
1072
- kind: "fact",
1073
- subject: `monitoring tool ${index}`,
1074
- statement: `ObservabilityTool${index} emits generic telemetry metrics`,
1075
- importance: 0.35,
1076
- firstSeenAt: now + index,
1077
- });
1078
- insertItemSource(db, itemId, "msg-rel-rank", now + index);
1079
- db.insert(memoryItemEntities)
1080
- .values({
1081
- memoryItemId: itemId,
1082
- entityId,
1083
- })
1084
- .run();
1085
- }
1086
-
1087
- const relationConfig = {
1088
- ...TEST_CONFIG,
1089
- memory: {
1090
- ...TEST_CONFIG.memory,
1091
- retrieval: {
1092
- ...TEST_CONFIG.memory.retrieval,
1093
- semanticTopK: 10,
1094
- },
1095
- entity: {
1096
- ...TEST_CONFIG.memory.entity,
1097
- relationRetrieval: {
1098
- ...TEST_CONFIG.memory.entity.relationRetrieval,
1099
- enabled: true,
1100
- maxSeedEntities: 6,
1101
- maxNeighborEntities: 20,
1102
- maxEdges: 20,
1103
- neighborScoreMultiplier: 0.7,
1104
- },
1105
- },
1106
- },
1107
- };
1108
-
1109
- const recall = await buildMemoryRecall(
1110
- "atlas deployment preference strategy",
1111
- "conv-rel-rank",
1112
- relationConfig,
1113
- );
1114
- const orderedKeys = recall.topCandidates.map(
1115
- (candidate) => candidate.key,
1116
- );
1117
- const directIndex = orderedKeys.indexOf("item:item-direct-pref");
1118
- const noiseIndices = orderedKeys
1119
- .map((key, index) => ({ key, index }))
1120
- .filter(({ key }) => key.startsWith("item:item-rel-noise-"))
1121
- .map(({ index }) => index);
1122
-
1123
- expect(directIndex).toBeGreaterThanOrEqual(0);
1124
- expect(noiseIndices.length).toBeGreaterThan(0);
1125
- expect(noiseIndices.every((index) => index > directIndex)).toBe(true);
1126
- expect(noiseIndices.length).toBeLessThanOrEqual(4);
1127
- });
1128
962
  });
1129
963
 
1130
964
  // -------------------------------------------------------------------------
@@ -1152,7 +986,7 @@ describe("Memory Recall Quality", () => {
1152
986
  );
1153
987
  });
1154
988
 
1155
- test("precision@k guard for preference recall fixture", async () => {
989
+ test("precision@k guard verifies pipeline completes with seeded segments", async () => {
1156
990
  const db = getDb();
1157
991
  const now = 1_700_000_700_000;
1158
992
  insertConversation(db, "conv-pk", now);
@@ -1188,12 +1022,15 @@ describe("Memory Recall Quality", () => {
1188
1022
  TEST_CONFIG,
1189
1023
  );
1190
1024
 
1191
- // At least 2 of 3 preference segments should appear in recall
1025
+ // Recency-only candidates are promoted to tier 2 and injected.
1026
+ // Verify the pipeline recalled the preference content.
1027
+ expect(recall.recencyHits).toBeGreaterThan(0);
1028
+ expect(recall.enabled).toBe(true);
1192
1029
  assertPrecisionAtK(
1193
1030
  recall.injectedText,
1194
- ["dark mode", "TypeScript", "tabs over spaces"],
1031
+ ["dark mode", "TypeScript", "tabs"],
1195
1032
  2,
1196
- "preference-recall",
1033
+ "preference recall precision",
1197
1034
  );
1198
1035
  });
1199
1036
  });