@vellumai/assistant 0.4.49 → 0.4.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. package/ARCHITECTURE.md +24 -33
  2. package/README.md +3 -3
  3. package/docs/architecture/memory.md +180 -119
  4. package/package.json +2 -2
  5. package/src/__tests__/agent-loop.test.ts +3 -1
  6. package/src/__tests__/anthropic-provider.test.ts +114 -23
  7. package/src/__tests__/approval-cascade.test.ts +1 -15
  8. package/src/__tests__/approval-routes-http.test.ts +2 -0
  9. package/src/__tests__/assistant-feature-flag-guard.test.ts +0 -23
  10. package/src/__tests__/canonical-guardian-store.test.ts +95 -0
  11. package/src/__tests__/checker.test.ts +13 -0
  12. package/src/__tests__/config-schema.test.ts +1 -68
  13. package/src/__tests__/context-memory-e2e.test.ts +11 -100
  14. package/src/__tests__/conversation-routes-guardian-reply.test.ts +8 -0
  15. package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
  16. package/src/__tests__/credential-security-e2e.test.ts +1 -0
  17. package/src/__tests__/credential-vault-unit.test.ts +4 -0
  18. package/src/__tests__/credential-vault.test.ts +13 -1
  19. package/src/__tests__/cu-unified-flow.test.ts +532 -0
  20. package/src/__tests__/date-context.test.ts +93 -77
  21. package/src/__tests__/deterministic-verification-control-plane.test.ts +64 -0
  22. package/src/__tests__/guardian-routing-invariants.test.ts +93 -0
  23. package/src/__tests__/history-repair.test.ts +245 -0
  24. package/src/__tests__/host-cu-proxy.test.ts +165 -3
  25. package/src/__tests__/http-user-message-parity.test.ts +1 -0
  26. package/src/__tests__/invite-redemption-service.test.ts +65 -1
  27. package/src/__tests__/keychain-broker-client.test.ts +4 -4
  28. package/src/__tests__/memory-context-benchmark.benchmark.test.ts +56 -18
  29. package/src/__tests__/memory-lifecycle-e2e.test.ts +244 -387
  30. package/src/__tests__/memory-recall-quality.test.ts +244 -407
  31. package/src/__tests__/memory-regressions.experimental.test.ts +126 -101
  32. package/src/__tests__/memory-regressions.test.ts +477 -2841
  33. package/src/__tests__/memory-retrieval.benchmark.test.ts +33 -150
  34. package/src/__tests__/memory-upsert-concurrency.test.ts +5 -244
  35. package/src/__tests__/mime-builder.test.ts +28 -0
  36. package/src/__tests__/native-web-search.test.ts +1 -0
  37. package/src/__tests__/oauth-cli.test.ts +572 -5
  38. package/src/__tests__/oauth-store.test.ts +120 -6
  39. package/src/__tests__/qdrant-collection-migration.test.ts +53 -8
  40. package/src/__tests__/registry.test.ts +0 -1
  41. package/src/__tests__/relay-server.test.ts +46 -1
  42. package/src/__tests__/schedule-tools.test.ts +32 -0
  43. package/src/__tests__/script-proxy-certs.test.ts +1 -1
  44. package/src/__tests__/secret-onetime-send.test.ts +1 -0
  45. package/src/__tests__/secure-keys.test.ts +7 -2
  46. package/src/__tests__/send-endpoint-busy.test.ts +3 -0
  47. package/src/__tests__/session-abort-tool-results.test.ts +1 -14
  48. package/src/__tests__/session-agent-loop-overflow.test.ts +1583 -0
  49. package/src/__tests__/session-agent-loop.test.ts +19 -15
  50. package/src/__tests__/session-confirmation-signals.test.ts +1 -15
  51. package/src/__tests__/session-error.test.ts +124 -2
  52. package/src/__tests__/session-history-web-search.test.ts +918 -0
  53. package/src/__tests__/session-pre-run-repair.test.ts +1 -14
  54. package/src/__tests__/session-provider-retry-repair.test.ts +25 -28
  55. package/src/__tests__/session-queue.test.ts +37 -27
  56. package/src/__tests__/session-runtime-assembly.test.ts +54 -0
  57. package/src/__tests__/session-slash-known.test.ts +1 -15
  58. package/src/__tests__/session-slash-queue.test.ts +1 -15
  59. package/src/__tests__/session-slash-unknown.test.ts +1 -15
  60. package/src/__tests__/session-workspace-cache-state.test.ts +3 -33
  61. package/src/__tests__/session-workspace-injection.test.ts +3 -37
  62. package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -37
  63. package/src/__tests__/skills-install-extract.test.ts +93 -0
  64. package/src/__tests__/skillssh-registry.test.ts +451 -0
  65. package/src/__tests__/trust-store.test.ts +15 -0
  66. package/src/__tests__/voice-invite-redemption.test.ts +32 -1
  67. package/src/agent/ax-tree-compaction.test.ts +51 -0
  68. package/src/agent/loop.ts +39 -12
  69. package/src/approvals/AGENTS.md +1 -1
  70. package/src/approvals/guardian-request-resolvers.ts +14 -2
  71. package/src/bundler/compiler-tools.ts +66 -2
  72. package/src/calls/call-domain.ts +132 -0
  73. package/src/calls/call-store.ts +6 -0
  74. package/src/calls/relay-server.ts +43 -5
  75. package/src/calls/relay-setup-router.ts +17 -1
  76. package/src/calls/twilio-config.ts +1 -1
  77. package/src/calls/types.ts +3 -1
  78. package/src/cli/commands/doctor.ts +4 -3
  79. package/src/cli/commands/mcp.ts +46 -59
  80. package/src/cli/commands/memory.ts +16 -165
  81. package/src/cli/commands/oauth/apps.ts +31 -2
  82. package/src/cli/commands/oauth/connections.ts +431 -97
  83. package/src/cli/commands/oauth/providers.ts +15 -1
  84. package/src/cli/commands/sessions.ts +5 -2
  85. package/src/cli/commands/skills.ts +173 -1
  86. package/src/cli/http-client.ts +0 -20
  87. package/src/cli/main-screen.tsx +2 -2
  88. package/src/cli/program.ts +5 -6
  89. package/src/cli.ts +4 -10
  90. package/src/config/bundled-skills/computer-use/TOOLS.json +1 -1
  91. package/src/config/bundled-skills/computer-use/tools/computer-use-observe.ts +12 -0
  92. package/src/config/bundled-tool-registry.ts +2 -5
  93. package/src/config/schema.ts +1 -12
  94. package/src/config/schemas/memory-lifecycle.ts +0 -9
  95. package/src/config/schemas/memory-processing.ts +0 -180
  96. package/src/config/schemas/memory-retrieval.ts +32 -104
  97. package/src/config/schemas/memory.ts +0 -10
  98. package/src/config/types.ts +0 -4
  99. package/src/context/window-manager.ts +4 -1
  100. package/src/daemon/config-watcher.ts +61 -3
  101. package/src/daemon/daemon-control.ts +1 -1
  102. package/src/daemon/date-context.ts +114 -31
  103. package/src/daemon/handlers/sessions.ts +18 -13
  104. package/src/daemon/handlers/skills.ts +20 -1
  105. package/src/daemon/history-repair.ts +72 -8
  106. package/src/daemon/host-cu-proxy.ts +55 -26
  107. package/src/daemon/lifecycle.ts +31 -3
  108. package/src/daemon/mcp-reload-service.ts +2 -2
  109. package/src/daemon/message-types/computer-use.ts +1 -12
  110. package/src/daemon/message-types/memory.ts +4 -16
  111. package/src/daemon/message-types/messages.ts +1 -0
  112. package/src/daemon/message-types/sessions.ts +4 -0
  113. package/src/daemon/server.ts +12 -1
  114. package/src/daemon/session-agent-loop-handlers.ts +38 -0
  115. package/src/daemon/session-agent-loop.ts +334 -48
  116. package/src/daemon/session-error.ts +89 -6
  117. package/src/daemon/session-history.ts +17 -7
  118. package/src/daemon/session-media-retry.ts +6 -2
  119. package/src/daemon/session-memory.ts +69 -149
  120. package/src/daemon/session-process.ts +10 -1
  121. package/src/daemon/session-runtime-assembly.ts +49 -19
  122. package/src/daemon/session-surfaces.ts +4 -1
  123. package/src/daemon/session-tool-setup.ts +7 -1
  124. package/src/daemon/session.ts +12 -2
  125. package/src/instrument.ts +61 -1
  126. package/src/memory/admin.ts +2 -191
  127. package/src/memory/canonical-guardian-store.ts +38 -2
  128. package/src/memory/conversation-crud.ts +0 -33
  129. package/src/memory/conversation-queries.ts +22 -3
  130. package/src/memory/db-init.ts +28 -0
  131. package/src/memory/embedding-backend.ts +84 -8
  132. package/src/memory/embedding-types.ts +9 -1
  133. package/src/memory/indexer.ts +7 -46
  134. package/src/memory/items-extractor.ts +274 -76
  135. package/src/memory/job-handlers/backfill.ts +2 -127
  136. package/src/memory/job-handlers/cleanup.ts +2 -16
  137. package/src/memory/job-handlers/extraction.ts +2 -138
  138. package/src/memory/job-handlers/index-maintenance.ts +1 -6
  139. package/src/memory/job-handlers/summarization.ts +3 -148
  140. package/src/memory/job-utils.ts +21 -59
  141. package/src/memory/jobs-store.ts +1 -159
  142. package/src/memory/jobs-worker.ts +9 -52
  143. package/src/memory/migrations/104-core-indexes.ts +3 -3
  144. package/src/memory/migrations/149-oauth-tables.ts +2 -0
  145. package/src/memory/migrations/150-oauth-apps-client-secret-path.ts +98 -0
  146. package/src/memory/migrations/151-oauth-providers-ping-url.ts +11 -0
  147. package/src/memory/migrations/152-memory-item-supersession.ts +44 -0
  148. package/src/memory/migrations/153-drop-entity-tables.ts +15 -0
  149. package/src/memory/migrations/154-drop-fts.ts +20 -0
  150. package/src/memory/migrations/155-drop-conflicts.ts +7 -0
  151. package/src/memory/migrations/156-call-session-invite-metadata.ts +24 -0
  152. package/src/memory/migrations/index.ts +7 -0
  153. package/src/memory/qdrant-client.ts +148 -51
  154. package/src/memory/raw-query.ts +1 -1
  155. package/src/memory/retriever.test.ts +294 -273
  156. package/src/memory/retriever.ts +421 -645
  157. package/src/memory/schema/calls.ts +2 -0
  158. package/src/memory/schema/memory-core.ts +3 -48
  159. package/src/memory/schema/oauth.ts +2 -0
  160. package/src/memory/search/formatting.ts +263 -176
  161. package/src/memory/search/lexical.ts +1 -254
  162. package/src/memory/search/ranking.ts +0 -455
  163. package/src/memory/search/semantic.ts +100 -14
  164. package/src/memory/search/staleness.ts +47 -0
  165. package/src/memory/search/tier-classifier.ts +21 -0
  166. package/src/memory/search/types.ts +15 -77
  167. package/src/memory/task-memory-cleanup.ts +4 -6
  168. package/src/messaging/providers/gmail/mime-builder.ts +17 -7
  169. package/src/oauth/byo-connection.test.ts +8 -1
  170. package/src/oauth/oauth-store.ts +113 -27
  171. package/src/oauth/seed-providers.ts +6 -0
  172. package/src/oauth/token-persistence.ts +11 -3
  173. package/src/permissions/defaults.ts +1 -0
  174. package/src/permissions/trust-store.ts +23 -1
  175. package/src/playbooks/playbook-compiler.ts +1 -1
  176. package/src/prompts/system-prompt.ts +18 -2
  177. package/src/providers/anthropic/client.ts +56 -126
  178. package/src/providers/types.ts +7 -1
  179. package/src/runtime/AGENTS.md +9 -0
  180. package/src/runtime/auth/route-policy.ts +6 -3
  181. package/src/runtime/guardian-reply-router.ts +24 -22
  182. package/src/runtime/http-server.ts +2 -2
  183. package/src/runtime/invite-redemption-service.ts +19 -1
  184. package/src/runtime/invite-service.ts +25 -0
  185. package/src/runtime/pending-interactions.ts +2 -2
  186. package/src/runtime/routes/brain-graph-routes.ts +10 -90
  187. package/src/runtime/routes/conversation-routes.ts +9 -1
  188. package/src/runtime/routes/inbound-stages/acl-enforcement.ts +21 -12
  189. package/src/runtime/routes/memory-item-routes.test.ts +754 -0
  190. package/src/runtime/routes/memory-item-routes.ts +503 -0
  191. package/src/runtime/routes/session-management-routes.ts +3 -3
  192. package/src/runtime/routes/settings-routes.ts +2 -2
  193. package/src/runtime/routes/trust-rules-routes.ts +14 -0
  194. package/src/runtime/routes/workspace-routes.ts +2 -1
  195. package/src/security/keychain-broker-client.ts +17 -4
  196. package/src/security/secure-keys.ts +25 -3
  197. package/src/security/token-manager.ts +36 -36
  198. package/src/skills/catalog-install.ts +74 -18
  199. package/src/skills/skillssh-registry.ts +503 -0
  200. package/src/tools/assets/search.ts +5 -1
  201. package/src/tools/computer-use/definitions.ts +0 -10
  202. package/src/tools/computer-use/registry.ts +1 -1
  203. package/src/tools/credentials/vault.ts +1 -3
  204. package/src/tools/memory/definitions.ts +4 -13
  205. package/src/tools/memory/handlers.test.ts +83 -103
  206. package/src/tools/memory/handlers.ts +50 -85
  207. package/src/tools/schedule/create.ts +8 -1
  208. package/src/tools/schedule/update.ts +8 -1
  209. package/src/tools/skills/load.ts +25 -2
  210. package/src/__tests__/clarification-resolver.test.ts +0 -193
  211. package/src/__tests__/conflict-intent-tokenization.test.ts +0 -160
  212. package/src/__tests__/conflict-policy.test.ts +0 -269
  213. package/src/__tests__/conflict-store.test.ts +0 -372
  214. package/src/__tests__/contradiction-checker.test.ts +0 -361
  215. package/src/__tests__/entity-extractor.test.ts +0 -211
  216. package/src/__tests__/entity-search.test.ts +0 -1117
  217. package/src/__tests__/profile-compiler.test.ts +0 -392
  218. package/src/__tests__/session-conflict-gate.test.ts +0 -1228
  219. package/src/__tests__/session-profile-injection.test.ts +0 -557
  220. package/src/config/bundled-skills/knowledge-graph/SKILL.md +0 -25
  221. package/src/config/bundled-skills/knowledge-graph/TOOLS.json +0 -66
  222. package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +0 -211
  223. package/src/daemon/session-conflict-gate.ts +0 -167
  224. package/src/daemon/session-dynamic-profile.ts +0 -77
  225. package/src/memory/clarification-resolver.ts +0 -417
  226. package/src/memory/conflict-intent.ts +0 -205
  227. package/src/memory/conflict-policy.ts +0 -127
  228. package/src/memory/conflict-store.ts +0 -410
  229. package/src/memory/contradiction-checker.ts +0 -508
  230. package/src/memory/entity-extractor.ts +0 -535
  231. package/src/memory/format-recall.ts +0 -47
  232. package/src/memory/fts-reconciler.ts +0 -165
  233. package/src/memory/job-handlers/conflict.ts +0 -200
  234. package/src/memory/profile-compiler.ts +0 -195
  235. package/src/memory/recall-cache.ts +0 -117
  236. package/src/memory/search/entity.ts +0 -535
  237. package/src/memory/search/query-expansion.test.ts +0 -70
  238. package/src/memory/search/query-expansion.ts +0 -118
  239. package/src/runtime/routes/mcp-routes.ts +0 -20
@@ -1,10 +1,9 @@
1
1
  /**
2
- * Tests for graceful embedding degradation in the memory retrieval pipeline.
2
+ * Tests for the memory retrieval pipeline.
3
3
  *
4
- * Verifies that when semantic search subsystems (Qdrant, embedding provider)
5
- * are unavailable, the retriever falls back to lexical/recency/direct sources
6
- * with boosted limits, applies query expansion, and reports structured
7
- * degradation status in result metadata.
4
+ * Covers: hybrid search tier classification staleness → injection,
5
+ * empty results no injection, superseded items filtered out,
6
+ * staleness demotion, budget allocation, and degradation scenarios.
8
7
  */
9
8
  import { mkdtempSync, rmSync } from "node:fs";
10
9
  import { tmpdir } from "node:os";
@@ -19,7 +18,7 @@ import {
19
18
  test,
20
19
  } from "bun:test";
21
20
 
22
- const testDir = mkdtempSync(join(tmpdir(), "memory-retriever-degrade-"));
21
+ const testDir = mkdtempSync(join(tmpdir(), "memory-retriever-"));
23
22
 
24
23
  mock.module("../util/platform.js", () => ({
25
24
  getDataDir: () => testDir,
@@ -57,6 +56,7 @@ mock.module("../memory/embedding-local.js", () => ({
57
56
  mock.module("../memory/qdrant-client.js", () => ({
58
57
  getQdrantClient: () => ({
59
58
  searchWithFilter: async () => [],
59
+ hybridSearch: async () => [],
60
60
  upsertPoints: async () => {},
61
61
  deletePoints: async () => {},
62
62
  }),
@@ -93,8 +93,11 @@ import {
93
93
  _resetQdrantBreaker,
94
94
  isQdrantBreakerOpen,
95
95
  } from "../memory/qdrant-circuit-breaker.js";
96
- import { bumpMemoryVersion } from "../memory/recall-cache.js";
97
- import { buildMemoryRecall } from "../memory/retriever.js";
96
+ import {
97
+ buildMemoryRecall,
98
+ injectMemoryRecallAsSeparateMessage,
99
+ stripMemoryRecallMessages,
100
+ } from "../memory/retriever.js";
98
101
  import {
99
102
  conversations,
100
103
  memoryItems,
@@ -219,7 +222,7 @@ function insertItemSource(
219
222
  function seedMemory() {
220
223
  const db = getDb();
221
224
  const now = Date.now();
222
- const convId = "conv-degrade-test";
225
+ const convId = "conv-test";
223
226
 
224
227
  insertConversation(db, convId, now - 60_000);
225
228
  insertMessage(
@@ -272,7 +275,7 @@ function seedMemory() {
272
275
  // Suite
273
276
  // ---------------------------------------------------------------------------
274
277
 
275
- describe("Memory Retriever Degradation", () => {
278
+ describe("Memory Retriever Pipeline", () => {
276
279
  beforeAll(() => {
277
280
  initializeDb();
278
281
  });
@@ -282,12 +285,10 @@ describe("Memory Retriever Degradation", () => {
282
285
  db.run("DELETE FROM memory_item_sources");
283
286
  db.run("DELETE FROM memory_items");
284
287
  db.run("DELETE FROM memory_segments");
285
- db.run("DELETE FROM memory_segment_fts");
286
288
  db.run("DELETE FROM messages");
287
289
  db.run("DELETE FROM conversations");
288
290
  _resetQdrantBreaker();
289
291
  clearEmbeddingBackendCache();
290
- bumpMemoryVersion();
291
292
  });
292
293
 
293
294
  afterAll(() => {
@@ -296,190 +297,207 @@ describe("Memory Retriever Degradation", () => {
296
297
  });
297
298
 
298
299
  // -----------------------------------------------------------------------
299
- // Non-degraded baseline
300
+ // Hybrid search → tier classification → injection
300
301
  // -----------------------------------------------------------------------
301
302
 
302
- test("non-degraded baseline: returns results with degraded=false when all systems available", async () => {
303
+ test("baseline: pipeline completes non-degraded with mock Qdrant returning empty", async () => {
303
304
  seedMemory();
304
305
 
305
306
  const result = await buildMemoryRecall(
306
307
  "API design",
307
- "conv-degrade-test",
308
+ "conv-test",
308
309
  TEST_CONFIG,
309
310
  );
310
311
 
311
312
  expect(result.enabled).toBe(true);
312
313
  expect(result.degraded).toBe(false);
313
314
  expect(result.degradation).toBeUndefined();
314
- // Lexical search should find matches
315
- expect(result.lexicalHits).toBeGreaterThan(0);
316
- // Should have selected some candidates
317
- expect(result.selectedCount).toBeGreaterThan(0);
318
- expect(result.injectedText.length).toBeGreaterThan(0);
315
+ // With mock Qdrant returning empty results and recency-only candidates
316
+ // scoring below tier thresholds, no candidates are selected.
317
+ // The pipeline still completes successfully with tier metadata.
318
+ expect(result.tier1Count).toBeDefined();
319
+ expect(result.tier2Count).toBeDefined();
320
+ expect(result.hybridSearchMs).toBeDefined();
321
+ // Recency search finds candidates even though they don't pass tier classification
322
+ expect(result.recencyHits).toBeGreaterThan(0);
323
+ expect(result.mergedCount).toBeGreaterThan(0);
319
324
  });
320
325
 
321
326
  // -----------------------------------------------------------------------
322
- // Qdrant circuit breaker open
327
+ // Empty results no injection
323
328
  // -----------------------------------------------------------------------
324
329
 
325
- test("Qdrant unavailable: skips semantic search and boosts lexical limits", async () => {
326
- seedMemory();
327
-
328
- // Force the Qdrant circuit breaker open by importing and manipulating it.
329
- // We need to trip it by recording enough failures.
330
- const { withQdrantBreaker } =
331
- await import("../memory/qdrant-circuit-breaker.js");
332
- for (let i = 0; i < 5; i++) {
333
- try {
334
- await withQdrantBreaker(async () => {
335
- throw new Error("simulated qdrant failure");
336
- });
337
- } catch {
338
- // expected
339
- }
340
- }
341
- expect(isQdrantBreakerOpen()).toBe(true);
342
-
330
+ test("empty results: no injection when no memory content exists", async () => {
331
+ // Don't seed any memory
343
332
  const result = await buildMemoryRecall(
344
- "API design",
345
- "conv-degrade-test",
333
+ "nonexistent topic",
334
+ "conv-empty",
346
335
  TEST_CONFIG,
347
336
  );
348
337
 
349
338
  expect(result.enabled).toBe(true);
350
- // Semantic search should be skipped entirely
351
- expect(result.semanticHits).toBe(0);
352
- // Lexical search should still work (boosted limits)
353
- expect(result.lexicalHits).toBeGreaterThan(0);
354
- // Results should still be returned despite no semantic
355
- expect(result.selectedCount).toBeGreaterThan(0);
356
- expect(result.injectedText.length).toBeGreaterThan(0);
339
+ expect(result.selectedCount).toBe(0);
340
+ expect(result.injectedText).toBe("");
341
+ expect(result.mergedCount).toBe(0);
357
342
  });
358
343
 
359
344
  // -----------------------------------------------------------------------
360
- // Embedding provider down
345
+ // Memory disabled
361
346
  // -----------------------------------------------------------------------
362
347
 
363
- test("embedding provider down: falls back to lexical-only when embeddings not required", async () => {
364
- seedMemory();
365
-
366
- // Config with no embedding provider available (no API keys, auto mode)
367
- const noEmbedConfig: AssistantConfig = {
348
+ test("disabled: returns enabled=false when memory is disabled", async () => {
349
+ const disabledConfig: AssistantConfig = {
368
350
  ...TEST_CONFIG,
369
- apiKeys: {
370
- ...TEST_CONFIG.apiKeys,
371
- openai: "",
372
- gemini: "",
373
- ollama: "",
374
- },
375
351
  memory: {
376
352
  ...TEST_CONFIG.memory,
377
- embeddings: {
378
- ...TEST_CONFIG.memory.embeddings,
379
- provider: "openai",
380
- required: false,
381
- },
353
+ enabled: false,
382
354
  },
383
355
  };
384
356
 
385
357
  const result = await buildMemoryRecall(
386
- "API design",
387
- "conv-degrade-test",
388
- noEmbedConfig,
358
+ "test query",
359
+ "conv-test",
360
+ disabledConfig,
389
361
  );
390
362
 
391
- expect(result.enabled).toBe(true);
392
- // With no embedding provider, semantic search should be skipped
393
- expect(result.semanticHits).toBe(0);
394
- // Lexical search should still produce results
395
- expect(result.lexicalHits).toBeGreaterThan(0);
396
- expect(result.selectedCount).toBeGreaterThan(0);
363
+ expect(result.enabled).toBe(false);
364
+ expect(result.reason).toBe("memory.disabled");
397
365
  });
398
366
 
399
- test("embedding provider down: returns degraded with structured status when embeddings required", async () => {
400
- seedMemory();
367
+ // -----------------------------------------------------------------------
368
+ // Superseded items filtered out
369
+ // -----------------------------------------------------------------------
401
370
 
402
- const requiredEmbedConfig: AssistantConfig = {
403
- ...TEST_CONFIG,
404
- apiKeys: {
405
- ...TEST_CONFIG.apiKeys,
406
- openai: "",
407
- gemini: "",
408
- ollama: "",
409
- },
410
- memory: {
411
- ...TEST_CONFIG.memory,
412
- embeddings: {
413
- ...TEST_CONFIG.memory.embeddings,
414
- provider: "openai",
415
- required: true,
416
- },
417
- },
418
- };
371
+ test("superseded items are not included in results", async () => {
372
+ const db = getDb();
373
+ const now = Date.now();
374
+ const convId = "conv-superseded";
375
+
376
+ insertConversation(db, convId, now - 60_000);
377
+ insertMessage(db, "msg-s1", convId, "user", "test superseded", now - 50_000);
378
+
379
+ insertSegment(
380
+ db,
381
+ "seg-s1",
382
+ "msg-s1",
383
+ convId,
384
+ "user",
385
+ "test superseded content",
386
+ now - 50_000,
387
+ );
388
+
389
+ // Insert an active item and a superseded item
390
+ insertItem(db, {
391
+ id: "item-active",
392
+ kind: "fact",
393
+ subject: "test",
394
+ statement: "Active fact about testing",
395
+ status: "active",
396
+ firstSeenAt: now - 30_000,
397
+ });
398
+ insertItem(db, {
399
+ id: "item-superseded",
400
+ kind: "fact",
401
+ subject: "test",
402
+ statement: "Old fact that was superseded",
403
+ status: "superseded",
404
+ firstSeenAt: now - 30_000,
405
+ });
419
406
 
420
407
  const result = await buildMemoryRecall(
421
- "API design",
422
- "conv-degrade-test",
423
- requiredEmbedConfig,
408
+ "test superseded",
409
+ convId,
410
+ TEST_CONFIG,
424
411
  );
425
412
 
413
+ // The injected text should not contain the superseded item statement
414
+ if (result.injectedText.length > 0) {
415
+ expect(result.injectedText).not.toContain("Old fact that was superseded");
416
+ }
417
+ });
418
+
419
+ // -----------------------------------------------------------------------
420
+ // Staleness demotion (very_stale tier 1 → tier 2)
421
+ // -----------------------------------------------------------------------
422
+
423
+ test("staleness: very old items get demoted from tier 1 to tier 2", async () => {
424
+ const db = getDb();
425
+ const now = Date.now();
426
+ const convId = "conv-stale";
427
+ const MS_PER_DAY = 86_400_000;
428
+
429
+ insertConversation(db, convId, now - MS_PER_DAY * 200);
430
+
431
+ // Create a message from 200 days ago to serve as recency source
432
+ insertMessage(
433
+ db,
434
+ "msg-old",
435
+ convId,
436
+ "user",
437
+ "ancient discussion about TypeScript",
438
+ now - MS_PER_DAY * 200,
439
+ );
440
+ insertSegment(
441
+ db,
442
+ "seg-old",
443
+ "msg-old",
444
+ convId,
445
+ "user",
446
+ "ancient discussion about TypeScript patterns",
447
+ now - MS_PER_DAY * 200,
448
+ );
449
+
450
+ // Insert a very old item (200 days) — should be marked as very_stale
451
+ insertItem(db, {
452
+ id: "item-old",
453
+ kind: "fact",
454
+ subject: "TypeScript",
455
+ statement: "User uses TypeScript for all projects",
456
+ firstSeenAt: now - MS_PER_DAY * 200,
457
+ });
458
+ insertItemSource(db, "item-old", "msg-old", now - MS_PER_DAY * 200);
459
+
460
+ const result = await buildMemoryRecall(
461
+ "TypeScript patterns",
462
+ convId,
463
+ TEST_CONFIG,
464
+ );
465
+
466
+ // The pipeline should still return results (just potentially in tier 2)
426
467
  expect(result.enabled).toBe(true);
427
- expect(result.degraded).toBe(true);
428
- // Structured degradation status should be present
429
- expect(result.degradation).toBeDefined();
430
- expect(result.degradation!.semanticUnavailable).toBe(true);
431
- expect(result.degradation!.reason).toBe("embedding_provider_down");
432
- expect(result.degradation!.fallbackSources).toContain("lexical");
433
- expect(result.degradation!.fallbackSources).toContain("recency");
434
- expect(result.degradation!.fallbackSources).toContain("direct_item");
468
+ // Very old items should still appear but may be in tier 2 after demotion
469
+ expect(result.tier1Count).toBeDefined();
470
+ expect(result.tier2Count).toBeDefined();
435
471
  });
436
472
 
437
473
  // -----------------------------------------------------------------------
438
- // Query expansion in degraded mode
474
+ // Budget allocation (tier 1 priority)
439
475
  // -----------------------------------------------------------------------
440
476
 
441
- test("query expansion: conversational query gets expanded to keywords when semantic unavailable", async () => {
477
+ test("budget: respects maxInjectTokens override", async () => {
442
478
  seedMemory();
443
479
 
444
- // Force degraded mode via circuit breaker
445
- const { withQdrantBreaker } =
446
- await import("../memory/qdrant-circuit-breaker.js");
447
- for (let i = 0; i < 5; i++) {
448
- try {
449
- await withQdrantBreaker(async () => {
450
- throw new Error("simulated qdrant failure");
451
- });
452
- } catch {
453
- // expected
454
- }
455
- }
456
-
457
- // Use a conversational query full of stop words — query expansion should
458
- // strip them to meaningful keywords for better FTS recall.
480
+ // Use a very small token budget
459
481
  const result = await buildMemoryRecall(
460
- "what did we discuss about the API design?",
461
- "conv-degrade-test",
482
+ "API design",
483
+ "conv-test",
462
484
  TEST_CONFIG,
485
+ { maxInjectTokensOverride: 10 },
463
486
  );
464
487
 
465
488
  expect(result.enabled).toBe(true);
466
- expect(result.semanticHits).toBe(0);
467
- // The expanded query ("discuss", "API", "design") should match our seeded
468
- // segments and items containing those terms.
469
- expect(result.lexicalHits).toBeGreaterThan(0);
470
- expect(result.selectedCount).toBeGreaterThan(0);
471
- // Verify the injected text contains content from our seeded data
472
- expect(result.injectedText).toContain("API");
489
+ // With a 10-token budget, most content should be truncated
490
+ expect(result.injectedTokens).toBeLessThanOrEqual(10);
473
491
  });
474
492
 
475
493
  // -----------------------------------------------------------------------
476
- // Degradation status structure
494
+ // Degradation: Qdrant circuit breaker open
477
495
  // -----------------------------------------------------------------------
478
496
 
479
- test("degradation status: includes expected fields for qdrant_unavailable", async () => {
497
+ test("Qdrant unavailable: pipeline completes with recency fallback", async () => {
480
498
  seedMemory();
481
499
 
482
- // Trip the circuit breaker
500
+ // Force the Qdrant circuit breaker open
483
501
  const { withQdrantBreaker } =
484
502
  await import("../memory/qdrant-circuit-breaker.js");
485
503
  for (let i = 0; i < 5; i++) {
@@ -491,49 +509,31 @@ describe("Memory Retriever Degradation", () => {
491
509
  // expected
492
510
  }
493
511
  }
494
-
495
- // Disable early termination so the pipeline always reaches the
496
- // semantic search phase, where the open breaker triggers degradation.
497
- const configNoET: AssistantConfig = {
498
- ...TEST_CONFIG,
499
- memory: {
500
- ...TEST_CONFIG.memory,
501
- retrieval: {
502
- ...TEST_CONFIG.memory.retrieval,
503
- earlyTermination: {
504
- ...TEST_CONFIG.memory.retrieval.earlyTermination,
505
- enabled: false,
506
- },
507
- },
508
- },
509
- };
512
+ expect(isQdrantBreakerOpen()).toBe(true);
510
513
 
511
514
  const result = await buildMemoryRecall(
512
515
  "API design",
513
- "conv-degrade-test",
514
- configNoET,
516
+ "conv-test",
517
+ TEST_CONFIG,
515
518
  );
516
519
 
517
- // The local stub produces a non-null zero vector, so semanticSearch()
518
- // is still attempted. The open breaker causes withQdrantBreaker() to
519
- // throw, which sets semanticSearchFailed = true and propagates into
520
- // the degradation field with reason 'qdrant_unavailable'.
521
520
  expect(result.enabled).toBe(true);
521
+ // Semantic/hybrid search should be skipped
522
522
  expect(result.semanticHits).toBe(0);
523
- // Results are still returned from lexical sources
524
- expect(result.selectedCount).toBeGreaterThan(0);
525
- // Verify structured degradation metadata
526
- expect(result.degradation).toBeDefined();
527
- expect(result.degradation!.reason).toBe("qdrant_unavailable");
528
- expect(result.degradation!.semanticUnavailable).toBe(true);
529
- expect(result.degradation!.fallbackSources).toBeInstanceOf(Array);
530
- expect(result.degradation!.fallbackSources.length).toBeGreaterThan(0);
523
+ // Recency search finds candidates (but they may not pass tier thresholds
524
+ // since recency-only candidates have no semantic score component)
525
+ expect(result.recencyHits).toBeGreaterThan(0);
526
+ expect(result.mergedCount).toBeGreaterThan(0);
531
527
  });
532
528
 
533
- test("degradation status: entity fallback included when entity search enabled", async () => {
529
+ // -----------------------------------------------------------------------
530
+ // Degradation: embedding provider down
531
+ // -----------------------------------------------------------------------
532
+
533
+ test("embedding provider down: returns degraded when embeddings required", async () => {
534
534
  seedMemory();
535
535
 
536
- const entityConfig: AssistantConfig = {
536
+ const requiredEmbedConfig: AssistantConfig = {
537
537
  ...TEST_CONFIG,
538
538
  apiKeys: {
539
539
  ...TEST_CONFIG.apiKeys,
@@ -543,10 +543,6 @@ describe("Memory Retriever Degradation", () => {
543
543
  },
544
544
  memory: {
545
545
  ...TEST_CONFIG.memory,
546
- entity: {
547
- ...TEST_CONFIG.memory.entity,
548
- enabled: true,
549
- },
550
546
  embeddings: {
551
547
  ...TEST_CONFIG.memory.embeddings,
552
548
  provider: "openai",
@@ -557,57 +553,150 @@ describe("Memory Retriever Degradation", () => {
557
553
 
558
554
  const result = await buildMemoryRecall(
559
555
  "API design",
560
- "conv-degrade-test",
561
- entityConfig,
556
+ "conv-test",
557
+ requiredEmbedConfig,
562
558
  );
563
559
 
560
+ expect(result.enabled).toBe(true);
561
+ expect(result.degraded).toBe(true);
564
562
  expect(result.degradation).toBeDefined();
565
- expect(result.degradation!.fallbackSources).toContain("entity");
563
+ expect(result.degradation!.semanticUnavailable).toBe(true);
564
+ expect(result.degradation!.reason).toBe("embedding_provider_down");
565
+ expect(result.degradation!.fallbackSources).toContain("recency");
566
566
  });
567
567
 
568
- test("degradation status: entity fallback excluded when entity search disabled", async () => {
568
+ // -----------------------------------------------------------------------
569
+ // Signal abort
570
+ // -----------------------------------------------------------------------
571
+
572
+ test("abort: returns early when signal is aborted", async () => {
569
573
  seedMemory();
574
+ const controller = new AbortController();
575
+ controller.abort();
570
576
 
571
- const noEntityConfig: AssistantConfig = {
572
- ...TEST_CONFIG,
573
- apiKeys: {
574
- ...TEST_CONFIG.apiKeys,
575
- openai: "",
576
- gemini: "",
577
- ollama: "",
577
+ const result = await buildMemoryRecall(
578
+ "API design",
579
+ "conv-test",
580
+ TEST_CONFIG,
581
+ { signal: controller.signal },
582
+ );
583
+
584
+ expect(result.enabled).toBe(true);
585
+ expect(result.reason).toBe("memory.aborted");
586
+ expect(result.injectedText).toBe("");
587
+ });
588
+
589
+ // -----------------------------------------------------------------------
590
+ // stripMemoryRecallMessages with <memory_context> format
591
+ // -----------------------------------------------------------------------
592
+
593
+ test("stripMemoryRecallMessages: strips <memory_context> XML format", () => {
594
+ type Msg = {
595
+ role: "user" | "assistant";
596
+ content: Array<{ type: string; text?: string }>;
597
+ };
598
+ const recallText = "<memory_context>\n\n<relevant_context>\nsome context\n</relevant_context>\n\n</memory_context>";
599
+
600
+ const msgs: Msg[] = [
601
+ {
602
+ role: "user",
603
+ content: [{ type: "text", text: recallText }],
578
604
  },
579
- memory: {
580
- ...TEST_CONFIG.memory,
581
- entity: {
582
- ...TEST_CONFIG.memory.entity,
583
- enabled: false,
584
- },
585
- embeddings: {
586
- ...TEST_CONFIG.memory.embeddings,
587
- provider: "openai",
588
- required: true,
589
- },
605
+ {
606
+ role: "assistant",
607
+ content: [{ type: "text", text: "[Memory context loaded.]" }],
590
608
  },
609
+ {
610
+ role: "user",
611
+ content: [{ type: "text", text: "Hello, what do you know about me?" }],
612
+ },
613
+ ];
614
+
615
+ const cleaned = stripMemoryRecallMessages(msgs, recallText);
616
+ expect(cleaned).toHaveLength(1);
617
+ expect(cleaned[0].role).toBe("user");
618
+ expect(cleaned[0].content[0].text).toBe("Hello, what do you know about me?");
619
+ });
620
+
621
+ test("stripMemoryRecallMessages: handles <memory_context> with slightly different content", () => {
622
+ type Msg = {
623
+ role: "user" | "assistant";
624
+ content: Array<{ type: string; text?: string }>;
591
625
  };
626
+ const originalRecall = "<memory_context>\n\n<relevant_context>\noriginal\n</relevant_context>\n\n</memory_context>";
627
+ const actualRecall = "<memory_context>\n\n<relevant_context>\nslightly different\n</relevant_context>\n\n</memory_context>";
592
628
 
593
- const result = await buildMemoryRecall(
594
- "API design",
595
- "conv-degrade-test",
596
- noEntityConfig,
597
- );
629
+ const msgs: Msg[] = [
630
+ {
631
+ role: "user",
632
+ content: [{ type: "text", text: actualRecall }],
633
+ },
634
+ {
635
+ role: "assistant",
636
+ content: [{ type: "text", text: "[Memory context loaded.]" }],
637
+ },
638
+ {
639
+ role: "user",
640
+ content: [{ type: "text", text: "follow-up question" }],
641
+ },
642
+ ];
598
643
 
599
- expect(result.degradation).toBeDefined();
600
- expect(result.degradation!.fallbackSources).not.toContain("entity");
601
- expect(result.degradation!.fallbackSources).toContain("lexical");
602
- expect(result.degradation!.fallbackSources).toContain("recency");
603
- expect(result.degradation!.fallbackSources).toContain("direct_item");
644
+ // The <memory_context> tag-based matching should work even when exact text differs
645
+ const cleaned = stripMemoryRecallMessages(msgs, originalRecall);
646
+ expect(cleaned).toHaveLength(1);
647
+ expect(cleaned[0].content[0].text).toBe("follow-up question");
648
+ });
649
+
650
+ // -----------------------------------------------------------------------
651
+ // injectMemoryRecallAsSeparateMessage
652
+ // -----------------------------------------------------------------------
653
+
654
+ test("injectMemoryRecallAsSeparateMessage: injects context + ack before last user message", () => {
655
+ type Msg = {
656
+ role: "user" | "assistant";
657
+ content: Array<{ type: string; text?: string }>;
658
+ };
659
+ const msgs: Msg[] = [
660
+ {
661
+ role: "user",
662
+ content: [{ type: "text", text: "Hello" }],
663
+ },
664
+ ];
665
+
666
+ const recallText = "<memory_context>\n\n<relevant_context>\ntest\n</relevant_context>\n\n</memory_context>";
667
+ const result = injectMemoryRecallAsSeparateMessage(msgs, recallText);
668
+
669
+ expect(result).toHaveLength(3);
670
+ expect(result[0].role).toBe("user");
671
+ expect(result[0].content[0].text).toBe(recallText);
672
+ expect(result[1].role).toBe("assistant");
673
+ expect(result[1].content[0].text).toBe("[Memory context loaded.]");
674
+ expect(result[2].role).toBe("user");
675
+ expect(result[2].content[0].text).toBe("Hello");
676
+ });
677
+
678
+ test("injectMemoryRecallAsSeparateMessage: no-op for empty text", () => {
679
+ type Msg = {
680
+ role: "user" | "assistant";
681
+ content: Array<{ type: string; text?: string }>;
682
+ };
683
+ const msgs: Msg[] = [
684
+ {
685
+ role: "user",
686
+ content: [{ type: "text", text: "Hello" }],
687
+ },
688
+ ];
689
+
690
+ const result = injectMemoryRecallAsSeparateMessage(msgs, "");
691
+ expect(result).toHaveLength(1);
692
+ expect(result[0].content[0].text).toBe("Hello");
604
693
  });
605
694
 
606
695
  // -----------------------------------------------------------------------
607
696
  // Local embedding stub end-to-end
608
697
  // -----------------------------------------------------------------------
609
698
 
610
- test("local embedding stub: pipeline completes non-degraded with zero-vector embeddings", async () => {
699
+ test("local embedding: pipeline completes non-degraded", async () => {
611
700
  seedMemory();
612
701
 
613
702
  const localEmbedConfig: AssistantConfig = {
@@ -624,7 +713,7 @@ describe("Memory Retriever Degradation", () => {
624
713
 
625
714
  const result = await buildMemoryRecall(
626
715
  "API design",
627
- "conv-degrade-test",
716
+ "conv-test",
628
717
  localEmbedConfig,
629
718
  );
630
719
 
@@ -632,75 +721,7 @@ describe("Memory Retriever Degradation", () => {
632
721
  // pipeline proceeds non-degraded end-to-end.
633
722
  expect(result.enabled).toBe(true);
634
723
  expect(result.degraded).toBe(false);
635
- expect(result.selectedCount).toBeGreaterThan(0);
636
- });
637
-
638
- // -----------------------------------------------------------------------
639
- // Degraded results bypass the recall cache
640
- // -----------------------------------------------------------------------
641
-
642
- test("degraded results are not cached", async () => {
643
- seedMemory();
644
-
645
- // Trip the circuit breaker so semantic search fails
646
- const { withQdrantBreaker } =
647
- await import("../memory/qdrant-circuit-breaker.js");
648
- for (let i = 0; i < 5; i++) {
649
- try {
650
- await withQdrantBreaker(async () => {
651
- throw new Error("simulated qdrant failure");
652
- });
653
- } catch {
654
- // expected
655
- }
656
- }
657
- expect(isQdrantBreakerOpen()).toBe(true);
658
-
659
- // Disable early termination so semantic search is attempted and fails,
660
- // which sets semanticSearchFailed=true → result.degraded=true.
661
- const degradedConfig: AssistantConfig = {
662
- ...TEST_CONFIG,
663
- memory: {
664
- ...TEST_CONFIG.memory,
665
- retrieval: {
666
- ...TEST_CONFIG.memory.retrieval,
667
- earlyTermination: {
668
- ...TEST_CONFIG.memory.retrieval.earlyTermination,
669
- enabled: false,
670
- },
671
- },
672
- },
673
- };
674
-
675
- const first = await buildMemoryRecall(
676
- "API design cache test",
677
- "conv-degrade-test",
678
- degradedConfig,
679
- );
680
- expect(first.degraded).toBe(true);
681
- expect(first.selectedCount).toBeGreaterThan(0);
682
-
683
- // Second call with same inputs — should NOT be served from cache.
684
- // If the degraded result were incorrectly cached, this call would
685
- // return instantly from cache. Instead it should re-execute the
686
- // pipeline and produce a fresh degraded result.
687
- const second = await buildMemoryRecall(
688
- "API design cache test",
689
- "conv-degrade-test",
690
- degradedConfig,
691
- );
692
- expect(second.degraded).toBe(true);
693
- expect(second.selectedCount).toBeGreaterThan(0);
694
-
695
- // Verify the cache is empty for this query by resetting the breaker
696
- // and calling again — a non-degraded result should come back (proving
697
- // the degraded result was never cached).
698
- _resetQdrantBreaker();
699
- const recovered = await buildMemoryRecall(
700
- "API design cache test",
701
- "conv-degrade-test",
702
- degradedConfig,
703
- );
704
- expect(recovered.degraded).toBe(false);
724
+ // Recency search finds candidates; hybrid search returns empty from mock
725
+ expect(result.recencyHits).toBeGreaterThan(0);
705
726
  });
706
727
  });