@vellumai/assistant 0.4.49 → 0.4.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. package/ARCHITECTURE.md +24 -33
  2. package/README.md +3 -3
  3. package/docs/architecture/memory.md +180 -119
  4. package/package.json +2 -2
  5. package/src/__tests__/agent-loop.test.ts +3 -1
  6. package/src/__tests__/anthropic-provider.test.ts +114 -23
  7. package/src/__tests__/approval-cascade.test.ts +1 -15
  8. package/src/__tests__/approval-routes-http.test.ts +2 -0
  9. package/src/__tests__/assistant-feature-flag-guard.test.ts +0 -23
  10. package/src/__tests__/canonical-guardian-store.test.ts +95 -0
  11. package/src/__tests__/checker.test.ts +13 -0
  12. package/src/__tests__/config-schema.test.ts +1 -68
  13. package/src/__tests__/context-memory-e2e.test.ts +11 -100
  14. package/src/__tests__/conversation-routes-guardian-reply.test.ts +8 -0
  15. package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
  16. package/src/__tests__/credential-security-e2e.test.ts +1 -0
  17. package/src/__tests__/credential-vault-unit.test.ts +4 -0
  18. package/src/__tests__/credential-vault.test.ts +13 -1
  19. package/src/__tests__/cu-unified-flow.test.ts +532 -0
  20. package/src/__tests__/date-context.test.ts +93 -77
  21. package/src/__tests__/deterministic-verification-control-plane.test.ts +64 -0
  22. package/src/__tests__/guardian-routing-invariants.test.ts +93 -0
  23. package/src/__tests__/history-repair.test.ts +245 -0
  24. package/src/__tests__/host-cu-proxy.test.ts +165 -3
  25. package/src/__tests__/http-user-message-parity.test.ts +1 -0
  26. package/src/__tests__/invite-redemption-service.test.ts +65 -1
  27. package/src/__tests__/keychain-broker-client.test.ts +4 -4
  28. package/src/__tests__/memory-context-benchmark.benchmark.test.ts +56 -18
  29. package/src/__tests__/memory-lifecycle-e2e.test.ts +244 -387
  30. package/src/__tests__/memory-recall-quality.test.ts +244 -407
  31. package/src/__tests__/memory-regressions.experimental.test.ts +126 -101
  32. package/src/__tests__/memory-regressions.test.ts +477 -2841
  33. package/src/__tests__/memory-retrieval.benchmark.test.ts +33 -150
  34. package/src/__tests__/memory-upsert-concurrency.test.ts +5 -244
  35. package/src/__tests__/mime-builder.test.ts +28 -0
  36. package/src/__tests__/native-web-search.test.ts +1 -0
  37. package/src/__tests__/oauth-cli.test.ts +572 -5
  38. package/src/__tests__/oauth-store.test.ts +120 -6
  39. package/src/__tests__/qdrant-collection-migration.test.ts +53 -8
  40. package/src/__tests__/registry.test.ts +0 -1
  41. package/src/__tests__/relay-server.test.ts +46 -1
  42. package/src/__tests__/schedule-tools.test.ts +32 -0
  43. package/src/__tests__/script-proxy-certs.test.ts +1 -1
  44. package/src/__tests__/secret-onetime-send.test.ts +1 -0
  45. package/src/__tests__/secure-keys.test.ts +7 -2
  46. package/src/__tests__/send-endpoint-busy.test.ts +3 -0
  47. package/src/__tests__/session-abort-tool-results.test.ts +1 -14
  48. package/src/__tests__/session-agent-loop-overflow.test.ts +1583 -0
  49. package/src/__tests__/session-agent-loop.test.ts +19 -15
  50. package/src/__tests__/session-confirmation-signals.test.ts +1 -15
  51. package/src/__tests__/session-error.test.ts +124 -2
  52. package/src/__tests__/session-history-web-search.test.ts +918 -0
  53. package/src/__tests__/session-pre-run-repair.test.ts +1 -14
  54. package/src/__tests__/session-provider-retry-repair.test.ts +25 -28
  55. package/src/__tests__/session-queue.test.ts +37 -27
  56. package/src/__tests__/session-runtime-assembly.test.ts +54 -0
  57. package/src/__tests__/session-slash-known.test.ts +1 -15
  58. package/src/__tests__/session-slash-queue.test.ts +1 -15
  59. package/src/__tests__/session-slash-unknown.test.ts +1 -15
  60. package/src/__tests__/session-workspace-cache-state.test.ts +3 -33
  61. package/src/__tests__/session-workspace-injection.test.ts +3 -37
  62. package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -37
  63. package/src/__tests__/skills-install-extract.test.ts +93 -0
  64. package/src/__tests__/skillssh-registry.test.ts +451 -0
  65. package/src/__tests__/trust-store.test.ts +15 -0
  66. package/src/__tests__/voice-invite-redemption.test.ts +32 -1
  67. package/src/agent/ax-tree-compaction.test.ts +51 -0
  68. package/src/agent/loop.ts +39 -12
  69. package/src/approvals/AGENTS.md +1 -1
  70. package/src/approvals/guardian-request-resolvers.ts +14 -2
  71. package/src/bundler/compiler-tools.ts +66 -2
  72. package/src/calls/call-domain.ts +132 -0
  73. package/src/calls/call-store.ts +6 -0
  74. package/src/calls/relay-server.ts +43 -5
  75. package/src/calls/relay-setup-router.ts +17 -1
  76. package/src/calls/twilio-config.ts +1 -1
  77. package/src/calls/types.ts +3 -1
  78. package/src/cli/commands/doctor.ts +4 -3
  79. package/src/cli/commands/mcp.ts +46 -59
  80. package/src/cli/commands/memory.ts +16 -165
  81. package/src/cli/commands/oauth/apps.ts +31 -2
  82. package/src/cli/commands/oauth/connections.ts +431 -97
  83. package/src/cli/commands/oauth/providers.ts +15 -1
  84. package/src/cli/commands/sessions.ts +5 -2
  85. package/src/cli/commands/skills.ts +173 -1
  86. package/src/cli/http-client.ts +0 -20
  87. package/src/cli/main-screen.tsx +2 -2
  88. package/src/cli/program.ts +5 -6
  89. package/src/cli.ts +4 -10
  90. package/src/config/bundled-skills/computer-use/TOOLS.json +1 -1
  91. package/src/config/bundled-skills/computer-use/tools/computer-use-observe.ts +12 -0
  92. package/src/config/bundled-tool-registry.ts +2 -5
  93. package/src/config/schema.ts +1 -12
  94. package/src/config/schemas/memory-lifecycle.ts +0 -9
  95. package/src/config/schemas/memory-processing.ts +0 -180
  96. package/src/config/schemas/memory-retrieval.ts +32 -104
  97. package/src/config/schemas/memory.ts +0 -10
  98. package/src/config/types.ts +0 -4
  99. package/src/context/window-manager.ts +4 -1
  100. package/src/daemon/config-watcher.ts +61 -3
  101. package/src/daemon/daemon-control.ts +1 -1
  102. package/src/daemon/date-context.ts +114 -31
  103. package/src/daemon/handlers/sessions.ts +18 -13
  104. package/src/daemon/handlers/skills.ts +20 -1
  105. package/src/daemon/history-repair.ts +72 -8
  106. package/src/daemon/host-cu-proxy.ts +55 -26
  107. package/src/daemon/lifecycle.ts +31 -3
  108. package/src/daemon/mcp-reload-service.ts +2 -2
  109. package/src/daemon/message-types/computer-use.ts +1 -12
  110. package/src/daemon/message-types/memory.ts +4 -16
  111. package/src/daemon/message-types/messages.ts +1 -0
  112. package/src/daemon/message-types/sessions.ts +4 -0
  113. package/src/daemon/server.ts +12 -1
  114. package/src/daemon/session-agent-loop-handlers.ts +38 -0
  115. package/src/daemon/session-agent-loop.ts +334 -48
  116. package/src/daemon/session-error.ts +89 -6
  117. package/src/daemon/session-history.ts +17 -7
  118. package/src/daemon/session-media-retry.ts +6 -2
  119. package/src/daemon/session-memory.ts +69 -149
  120. package/src/daemon/session-process.ts +10 -1
  121. package/src/daemon/session-runtime-assembly.ts +49 -19
  122. package/src/daemon/session-surfaces.ts +4 -1
  123. package/src/daemon/session-tool-setup.ts +7 -1
  124. package/src/daemon/session.ts +12 -2
  125. package/src/instrument.ts +61 -1
  126. package/src/memory/admin.ts +2 -191
  127. package/src/memory/canonical-guardian-store.ts +38 -2
  128. package/src/memory/conversation-crud.ts +0 -33
  129. package/src/memory/conversation-queries.ts +22 -3
  130. package/src/memory/db-init.ts +28 -0
  131. package/src/memory/embedding-backend.ts +84 -8
  132. package/src/memory/embedding-types.ts +9 -1
  133. package/src/memory/indexer.ts +7 -46
  134. package/src/memory/items-extractor.ts +274 -76
  135. package/src/memory/job-handlers/backfill.ts +2 -127
  136. package/src/memory/job-handlers/cleanup.ts +2 -16
  137. package/src/memory/job-handlers/extraction.ts +2 -138
  138. package/src/memory/job-handlers/index-maintenance.ts +1 -6
  139. package/src/memory/job-handlers/summarization.ts +3 -148
  140. package/src/memory/job-utils.ts +21 -59
  141. package/src/memory/jobs-store.ts +1 -159
  142. package/src/memory/jobs-worker.ts +9 -52
  143. package/src/memory/migrations/104-core-indexes.ts +3 -3
  144. package/src/memory/migrations/149-oauth-tables.ts +2 -0
  145. package/src/memory/migrations/150-oauth-apps-client-secret-path.ts +98 -0
  146. package/src/memory/migrations/151-oauth-providers-ping-url.ts +11 -0
  147. package/src/memory/migrations/152-memory-item-supersession.ts +44 -0
  148. package/src/memory/migrations/153-drop-entity-tables.ts +15 -0
  149. package/src/memory/migrations/154-drop-fts.ts +20 -0
  150. package/src/memory/migrations/155-drop-conflicts.ts +7 -0
  151. package/src/memory/migrations/156-call-session-invite-metadata.ts +24 -0
  152. package/src/memory/migrations/index.ts +7 -0
  153. package/src/memory/qdrant-client.ts +148 -51
  154. package/src/memory/raw-query.ts +1 -1
  155. package/src/memory/retriever.test.ts +294 -273
  156. package/src/memory/retriever.ts +421 -645
  157. package/src/memory/schema/calls.ts +2 -0
  158. package/src/memory/schema/memory-core.ts +3 -48
  159. package/src/memory/schema/oauth.ts +2 -0
  160. package/src/memory/search/formatting.ts +263 -176
  161. package/src/memory/search/lexical.ts +1 -254
  162. package/src/memory/search/ranking.ts +0 -455
  163. package/src/memory/search/semantic.ts +100 -14
  164. package/src/memory/search/staleness.ts +47 -0
  165. package/src/memory/search/tier-classifier.ts +21 -0
  166. package/src/memory/search/types.ts +15 -77
  167. package/src/memory/task-memory-cleanup.ts +4 -6
  168. package/src/messaging/providers/gmail/mime-builder.ts +17 -7
  169. package/src/oauth/byo-connection.test.ts +8 -1
  170. package/src/oauth/oauth-store.ts +113 -27
  171. package/src/oauth/seed-providers.ts +6 -0
  172. package/src/oauth/token-persistence.ts +11 -3
  173. package/src/permissions/defaults.ts +1 -0
  174. package/src/permissions/trust-store.ts +23 -1
  175. package/src/playbooks/playbook-compiler.ts +1 -1
  176. package/src/prompts/system-prompt.ts +18 -2
  177. package/src/providers/anthropic/client.ts +56 -126
  178. package/src/providers/types.ts +7 -1
  179. package/src/runtime/AGENTS.md +9 -0
  180. package/src/runtime/auth/route-policy.ts +6 -3
  181. package/src/runtime/guardian-reply-router.ts +24 -22
  182. package/src/runtime/http-server.ts +2 -2
  183. package/src/runtime/invite-redemption-service.ts +19 -1
  184. package/src/runtime/invite-service.ts +25 -0
  185. package/src/runtime/pending-interactions.ts +2 -2
  186. package/src/runtime/routes/brain-graph-routes.ts +10 -90
  187. package/src/runtime/routes/conversation-routes.ts +9 -1
  188. package/src/runtime/routes/inbound-stages/acl-enforcement.ts +21 -12
  189. package/src/runtime/routes/memory-item-routes.test.ts +754 -0
  190. package/src/runtime/routes/memory-item-routes.ts +503 -0
  191. package/src/runtime/routes/session-management-routes.ts +3 -3
  192. package/src/runtime/routes/settings-routes.ts +2 -2
  193. package/src/runtime/routes/trust-rules-routes.ts +14 -0
  194. package/src/runtime/routes/workspace-routes.ts +2 -1
  195. package/src/security/keychain-broker-client.ts +17 -4
  196. package/src/security/secure-keys.ts +25 -3
  197. package/src/security/token-manager.ts +36 -36
  198. package/src/skills/catalog-install.ts +74 -18
  199. package/src/skills/skillssh-registry.ts +503 -0
  200. package/src/tools/assets/search.ts +5 -1
  201. package/src/tools/computer-use/definitions.ts +0 -10
  202. package/src/tools/computer-use/registry.ts +1 -1
  203. package/src/tools/credentials/vault.ts +1 -3
  204. package/src/tools/memory/definitions.ts +4 -13
  205. package/src/tools/memory/handlers.test.ts +83 -103
  206. package/src/tools/memory/handlers.ts +50 -85
  207. package/src/tools/schedule/create.ts +8 -1
  208. package/src/tools/schedule/update.ts +8 -1
  209. package/src/tools/skills/load.ts +25 -2
  210. package/src/__tests__/clarification-resolver.test.ts +0 -193
  211. package/src/__tests__/conflict-intent-tokenization.test.ts +0 -160
  212. package/src/__tests__/conflict-policy.test.ts +0 -269
  213. package/src/__tests__/conflict-store.test.ts +0 -372
  214. package/src/__tests__/contradiction-checker.test.ts +0 -361
  215. package/src/__tests__/entity-extractor.test.ts +0 -211
  216. package/src/__tests__/entity-search.test.ts +0 -1117
  217. package/src/__tests__/profile-compiler.test.ts +0 -392
  218. package/src/__tests__/session-conflict-gate.test.ts +0 -1228
  219. package/src/__tests__/session-profile-injection.test.ts +0 -557
  220. package/src/config/bundled-skills/knowledge-graph/SKILL.md +0 -25
  221. package/src/config/bundled-skills/knowledge-graph/TOOLS.json +0 -66
  222. package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +0 -211
  223. package/src/daemon/session-conflict-gate.ts +0 -167
  224. package/src/daemon/session-dynamic-profile.ts +0 -77
  225. package/src/memory/clarification-resolver.ts +0 -417
  226. package/src/memory/conflict-intent.ts +0 -205
  227. package/src/memory/conflict-policy.ts +0 -127
  228. package/src/memory/conflict-store.ts +0 -410
  229. package/src/memory/contradiction-checker.ts +0 -508
  230. package/src/memory/entity-extractor.ts +0 -535
  231. package/src/memory/format-recall.ts +0 -47
  232. package/src/memory/fts-reconciler.ts +0 -165
  233. package/src/memory/job-handlers/conflict.ts +0 -200
  234. package/src/memory/profile-compiler.ts +0 -195
  235. package/src/memory/recall-cache.ts +0 -117
  236. package/src/memory/search/entity.ts +0 -535
  237. package/src/memory/search/query-expansion.test.ts +0 -70
  238. package/src/memory/search/query-expansion.ts +0 -118
  239. package/src/runtime/routes/mcp-routes.ts +0 -20
@@ -4,6 +4,13 @@
4
4
  * Measures end-to-end memory recall time with varying database sizes.
5
5
  * Validates latency stays within acceptable bounds and token budget
6
6
  * enforcement works correctly.
7
+ *
8
+ * The new pipeline uses hybrid search (Qdrant) + recency search.
9
+ * With Qdrant mocked and semanticSearch returning empty, only recency
10
+ * search provides candidates. These recency-only candidates have
11
+ * low finalScore (< 0.6) and are filtered out by tier classification,
12
+ * so injectedText is empty. The tests verify pipeline completion,
13
+ * latency bounds, and correct handling of recency hits.
7
14
  */
8
15
  import { mkdtempSync, rmSync } from "node:fs";
9
16
  import { tmpdir } from "node:os";
@@ -38,8 +45,7 @@ mock.module("../util/logger.js", () => ({
38
45
  }),
39
46
  }));
40
47
 
41
- // Counter for semantic search invocations — used to verify early termination
42
- // skips the call entirely rather than relying on flaky wall-clock comparisons.
48
+ // Counter for semantic search invocations
43
49
  let semanticSearchCallCount = 0;
44
50
 
45
51
  mock.module("../memory/search/semantic.js", () => ({
@@ -63,6 +69,8 @@ mock.module("../memory/embedding-backend.js", () => ({
63
69
  model: "mock-embedding",
64
70
  vectors: [new Array(1536).fill(0)],
65
71
  }),
72
+ generateSparseEmbedding: () => ({ indices: [], values: [] }),
73
+ logMemoryEmbeddingWarning: () => {},
66
74
  }));
67
75
 
68
76
  import { DEFAULT_CONFIG } from "../config/defaults.js";
@@ -135,13 +143,7 @@ function makeConfig(overrides?: { maxInjectTokens?: number }): AssistantConfig {
135
143
  },
136
144
  retrieval: {
137
145
  ...DEFAULT_CONFIG.memory.retrieval,
138
- lexicalTopK: 50,
139
- semanticTopK: 20,
140
146
  maxInjectTokens: overrides?.maxInjectTokens ?? 750,
141
- reranking: {
142
- ...DEFAULT_CONFIG.memory.retrieval.reranking,
143
- enabled: false,
144
- },
145
147
  dynamicBudget: {
146
148
  enabled: false,
147
149
  minInjectTokens: 160,
@@ -161,13 +163,9 @@ describe("Memory retrieval benchmark", () => {
161
163
  beforeEach(() => {
162
164
  const db = getDb();
163
165
  db.run("DELETE FROM memory_item_sources");
164
- db.run("DELETE FROM memory_item_entities");
165
- db.run("DELETE FROM memory_entity_relations");
166
- db.run("DELETE FROM memory_entities");
167
166
  db.run("DELETE FROM memory_embeddings");
168
- db.run("DELETE FROM memory_summaries");
169
167
  db.run("DELETE FROM memory_items");
170
- db.run("DELETE FROM memory_segment_fts");
168
+
171
169
  db.run("DELETE FROM memory_segments");
172
170
  db.run("DELETE FROM messages");
173
171
  db.run("DELETE FROM conversations");
@@ -198,8 +196,8 @@ describe("Memory retrieval benchmark", () => {
198
196
 
199
197
  expect(recall.enabled).toBe(true);
200
198
  expect(recall.degraded).toBe(false);
201
- expect(recall.lexicalHits).toBeGreaterThan(0);
202
- expect(recall.selectedCount).toBeGreaterThan(0);
199
+ // Recency search finds conversation-scoped segments
200
+ expect(recall.recencyHits).toBeGreaterThan(0);
203
201
  // Relaxed threshold — guards against severe regressions, not precise benchmarking
204
202
  expect(recall.latencyMs).toBeLessThan(500);
205
203
  });
@@ -218,8 +216,7 @@ describe("Memory retrieval benchmark", () => {
218
216
 
219
217
  expect(recall.enabled).toBe(true);
220
218
  expect(recall.degraded).toBe(false);
221
- expect(recall.lexicalHits).toBeGreaterThan(0);
222
- expect(recall.selectedCount).toBeGreaterThan(0);
219
+ expect(recall.recencyHits).toBeGreaterThan(0);
223
220
  expect(recall.latencyMs).toBeLessThan(1000);
224
221
  });
225
222
 
@@ -237,8 +234,7 @@ describe("Memory retrieval benchmark", () => {
237
234
 
238
235
  expect(recall.enabled).toBe(true);
239
236
  expect(recall.degraded).toBe(false);
240
- expect(recall.lexicalHits).toBeGreaterThan(0);
241
- expect(recall.selectedCount).toBeGreaterThan(0);
237
+ expect(recall.recencyHits).toBeGreaterThan(0);
242
238
  expect(recall.latencyMs).toBeLessThan(2000);
243
239
  });
244
240
 
@@ -256,10 +252,11 @@ describe("Memory retrieval benchmark", () => {
256
252
  );
257
253
 
258
254
  expect(recall.enabled).toBe(true);
255
+ // With Qdrant mocked empty and recency-only candidates below tier threshold,
256
+ // injectedTokens is 0. Verify the budget cap is still respected.
259
257
  expect(recall.injectedTokens).toBeLessThanOrEqual(smallBudget);
260
- expect(recall.injectedTokens).toBeGreaterThan(0);
261
258
 
262
- // Compare against a larger budget to verify the cap actually constrains
259
+ // Compare against a larger budget
263
260
  const largeBudget = 2000;
264
261
  const largeConfig = makeConfig({ maxInjectTokens: largeBudget });
265
262
  const largeRecall = await buildMemoryRecall(
@@ -275,137 +272,20 @@ describe("Memory retrieval benchmark", () => {
275
272
  );
276
273
  });
277
274
 
278
- test("early termination reduces latency when applicable", async () => {
279
- const conversationId = "conv-bench-et";
280
- const now = 1_700_500_000_000;
281
- // Seed enough items that early termination can trigger
282
- seedMemoryItems(conversationId, 500, now);
283
-
284
- // Config with early termination enabled and low thresholds to trigger it
285
- const etConfig: AssistantConfig = {
286
- ...DEFAULT_CONFIG,
287
- memory: {
288
- ...DEFAULT_CONFIG.memory,
289
- embeddings: {
290
- ...DEFAULT_CONFIG.memory.embeddings,
291
- provider: "local" as const,
292
- required: false,
293
- },
294
- retrieval: {
295
- ...DEFAULT_CONFIG.memory.retrieval,
296
- lexicalTopK: 50,
297
- semanticTopK: 20,
298
- maxInjectTokens: 750,
299
- reranking: {
300
- ...DEFAULT_CONFIG.memory.retrieval.reranking,
301
- enabled: false,
302
- },
303
- dynamicBudget: {
304
- enabled: false,
305
- minInjectTokens: 160,
306
- maxInjectTokens: 750,
307
- targetHeadroomTokens: 900,
308
- },
309
- earlyTermination: {
310
- enabled: true,
311
- minCandidates: 5,
312
- minHighConfidence: 3,
313
- confidenceThreshold: 0.3,
314
- },
315
- },
316
- },
317
- };
318
-
319
- const recall = await buildMemoryRecall(
320
- "What do we know about topic-5 and keyword-3?",
321
- conversationId,
322
- etConfig,
323
- );
324
-
325
- expect(recall.enabled).toBe(true);
326
- expect(recall.earlyTerminated).toBe(true);
327
- // Semantic search should be skipped when early termination fires
328
- expect(recall.semanticHits).toBe(0);
329
- expect(recall.selectedCount).toBeGreaterThan(0);
330
- });
331
-
332
- test("early termination skips semantic search entirely", async () => {
333
- const conversationId = "conv-bench-et-skip";
275
+ test("semantic search is invoked when not early terminated", async () => {
276
+ const conversationId = "conv-bench-semantic";
334
277
  const now = 1_700_500_000_000;
335
- seedMemoryItems(conversationId, 500, now);
278
+ seedMemoryItems(conversationId, 100, now);
336
279
 
337
280
  const query = "What do we know about topic-5 and keyword-3?";
338
281
 
339
- const etConfig: AssistantConfig = {
340
- ...DEFAULT_CONFIG,
341
- memory: {
342
- ...DEFAULT_CONFIG.memory,
343
- embeddings: {
344
- ...DEFAULT_CONFIG.memory.embeddings,
345
- provider: "local" as const,
346
- required: false,
347
- },
348
- retrieval: {
349
- ...DEFAULT_CONFIG.memory.retrieval,
350
- lexicalTopK: 50,
351
- semanticTopK: 20,
352
- maxInjectTokens: 750,
353
- reranking: {
354
- ...DEFAULT_CONFIG.memory.retrieval.reranking,
355
- enabled: false,
356
- },
357
- dynamicBudget: {
358
- enabled: false,
359
- minInjectTokens: 160,
360
- maxInjectTokens: 750,
361
- targetHeadroomTokens: 900,
362
- },
363
- earlyTermination: {
364
- enabled: true,
365
- minCandidates: 5,
366
- minHighConfidence: 3,
367
- confidenceThreshold: 0.3,
368
- },
369
- },
370
- },
371
- };
372
-
373
- const noEtConfig: AssistantConfig = {
374
- ...etConfig,
375
- memory: {
376
- ...etConfig.memory,
377
- retrieval: {
378
- ...etConfig.memory.retrieval,
379
- earlyTermination: {
380
- enabled: false,
381
- minCandidates: 5,
382
- minHighConfidence: 3,
383
- confidenceThreshold: 0.3,
384
- },
385
- },
386
- },
387
- };
388
-
389
- // Run with ET enabled — semantic search should be skipped
390
- semanticSearchCallCount = 0;
391
- const etRecall = await buildMemoryRecall(query, conversationId, etConfig);
392
- const etCalls = semanticSearchCallCount;
393
-
394
- expect(etRecall.earlyTerminated).toBe(true);
395
- expect(etRecall.semanticHits).toBe(0);
396
- expect(etCalls).toBe(0);
397
-
398
- // Run without ET — semantic search should be invoked
282
+ // earlyTermination is always false in the new pipeline, so semantic
283
+ // search should always be invoked when a query vector is available.
399
284
  semanticSearchCallCount = 0;
400
- const baselineRecall = await buildMemoryRecall(
401
- query,
402
- conversationId,
403
- noEtConfig,
404
- );
405
- const baselineCalls = semanticSearchCallCount;
285
+ const config = makeConfig();
286
+ await buildMemoryRecall(query, conversationId, config);
406
287
 
407
- expect(baselineRecall.earlyTerminated).toBe(false);
408
- expect(baselineCalls).toBeGreaterThan(0);
288
+ expect(semanticSearchCallCount).toBeGreaterThan(0);
409
289
  });
410
290
 
411
291
  test("recall.latencyMs tracks wall-clock within 50% tolerance", async () => {
@@ -427,14 +307,17 @@ describe("Memory retrieval benchmark", () => {
427
307
  const wallMs = Date.now() - wallStart;
428
308
 
429
309
  expect(recall.enabled).toBe(true);
430
- expect(recall.latencyMs).toBeGreaterThan(0);
310
+ // latencyMs may be 0 when the pipeline runs very fast (< 1ms granularity)
311
+ expect(recall.latencyMs).toBeGreaterThanOrEqual(0);
431
312
 
432
313
  // Self-reported latencyMs should agree with wall-clock within 50%.
433
314
  // Tolerance is wide because both sides use Date.now() (integer ms),
434
315
  // so on fast runs the quantization error can be large relative to
435
316
  // total elapsed time.
436
- const ratio = recall.latencyMs / Math.max(wallMs, 1);
437
- expect(ratio).toBeGreaterThanOrEqual(0.5);
438
- expect(ratio).toBeLessThanOrEqual(1.5);
317
+ if (wallMs > 0) {
318
+ const ratio = recall.latencyMs / wallMs;
319
+ expect(ratio).toBeGreaterThanOrEqual(0.5);
320
+ expect(ratio).toBeLessThanOrEqual(1.5);
321
+ }
439
322
  });
440
323
  });
@@ -1,11 +1,10 @@
1
1
  /**
2
2
  * Atomicity tests for memory UPSERT paths.
3
3
  *
4
- * SQLite is single-writer, and indexMessageNow / createOrUpdatePendingConflict
5
- * are synchronous functions. Because every call runs to completion before the
6
- * next microtask starts, the Promise.all / Promise.resolve().then() pattern
7
- * used here does NOT create true concurrent execution — calls still run
8
- * sequentially.
4
+ * SQLite is single-writer, and indexMessageNow is a synchronous function.
5
+ * Because every call runs to completion before the next microtask starts, the
6
+ * Promise.all / Promise.resolve().then() pattern used here does NOT create
7
+ * true concurrent execution — calls still run sequentially.
9
8
  *
10
9
  * What these tests DO verify is the correctness of the ON CONFLICT /
11
10
  * IMMEDIATE-transaction logic when the same logical operation is repeated many
@@ -47,6 +46,7 @@ mock.module("../util/logger.js", () => ({
47
46
  mock.module("../memory/qdrant-client.js", () => ({
48
47
  getQdrantClient: () => ({
49
48
  searchWithFilter: async () => [],
49
+ hybridSearch: async () => [],
50
50
  upsertPoints: async () => {},
51
51
  deletePoints: async () => {},
52
52
  }),
@@ -75,10 +75,6 @@ mock.module("../config/loader.js", () => ({
75
75
  invalidateConfigCache: () => {},
76
76
  }));
77
77
 
78
- import {
79
- createOrUpdatePendingConflict,
80
- listPendingConflicts,
81
- } from "../memory/conflict-store.js";
82
78
  import { getDb, initializeDb, resetDb } from "../memory/db.js";
83
79
  import { indexMessageNow } from "../memory/indexer.js";
84
80
  import {
@@ -102,15 +98,9 @@ afterAll(() => {
102
98
 
103
99
  function resetTables() {
104
100
  const db = getDb();
105
- db.run("DELETE FROM memory_item_conflicts");
106
- db.run("DELETE FROM memory_item_entities");
107
- db.run("DELETE FROM memory_entity_relations");
108
- db.run("DELETE FROM memory_entities");
109
101
  db.run("DELETE FROM memory_item_sources");
110
102
  db.run("DELETE FROM memory_embeddings");
111
- db.run("DELETE FROM memory_summaries");
112
103
  db.run("DELETE FROM memory_items");
113
- db.run("DELETE FROM memory_segment_fts");
114
104
  db.run("DELETE FROM memory_segments");
115
105
  db.run("DELETE FROM memory_jobs");
116
106
  db.run("DELETE FROM messages");
@@ -151,50 +141,6 @@ function seedConversationAndMessage(
151
141
  .run();
152
142
  }
153
143
 
154
- /** Insert a pair of memory items that can serve as conflict participants. */
155
- function seedItemPair(
156
- suffix: string,
157
- scopeId = "default",
158
- ): { existingItemId: string; candidateItemId: string } {
159
- const db = getDb();
160
- const now = Date.now();
161
- const existingItemId = `existing-${suffix}`;
162
- const candidateItemId = `candidate-${suffix}`;
163
- db.insert(memoryItems)
164
- .values([
165
- {
166
- id: existingItemId,
167
- kind: "preference",
168
- subject: "framework preference",
169
- statement: `Existing statement ${suffix}`,
170
- status: "active",
171
- confidence: 0.8,
172
- importance: 0.7,
173
- fingerprint: `fp-existing-${suffix}`,
174
- verificationState: "assistant_inferred",
175
- scopeId,
176
- firstSeenAt: now,
177
- lastSeenAt: now,
178
- },
179
- {
180
- id: candidateItemId,
181
- kind: "preference",
182
- subject: "framework preference",
183
- statement: `Candidate statement ${suffix}`,
184
- status: "pending_clarification",
185
- confidence: 0.8,
186
- importance: 0.7,
187
- fingerprint: `fp-candidate-${suffix}`,
188
- verificationState: "assistant_inferred",
189
- scopeId,
190
- firstSeenAt: now,
191
- lastSeenAt: now,
192
- },
193
- ])
194
- .run();
195
- return { existingItemId, candidateItemId };
196
- }
197
-
198
144
  // ─────────────────────────────────────────────────────────────────────────────
199
145
  // Test suite: segment UPSERT atomicity under parallel indexer load
200
146
  // ─────────────────────────────────────────────────────────────────────────────
@@ -484,191 +430,6 @@ describe("segment UPSERT atomicity under repeated indexer invocations", () => {
484
430
  });
485
431
  });
486
432
 
487
- // ─────────────────────────────────────────────────────────────────────────────
488
- // Test suite: conflict creation UPSERT atomicity
489
- // ─────────────────────────────────────────────────────────────────────────────
490
-
491
- describe("conflict creation UPSERT atomicity", () => {
492
- beforeEach(() => {
493
- resetTables();
494
- });
495
-
496
- test("repeated createOrUpdatePendingConflict calls for the same pair produce exactly one conflict row", async () => {
497
- // Critical UPSERT path: the same conflict pair inserted multiple times
498
- // (e.g. duplicate worker dispatches, retries). The IMMEDIATE transaction
499
- // guard in createOrUpdatePendingConflict must ensure only one row exists.
500
- const pair = seedItemPair("parallel-create");
501
-
502
- // Call createOrUpdatePendingConflict N times for the same pair. Calls run
503
- // sequentially (synchronous); the test verifies that repeated calls produce
504
- // exactly one conflict row — the IMMEDIATE transaction deduplication path.
505
- const WORKERS = 10;
506
- const results = await Promise.all(
507
- Array.from({ length: WORKERS }, (_, i) =>
508
- Promise.resolve().then(() =>
509
- createOrUpdatePendingConflict({
510
- scopeId: "default",
511
- existingItemId: pair.existingItemId,
512
- candidateItemId: pair.candidateItemId,
513
- relationship: "ambiguous_contradiction",
514
- clarificationQuestion: `Worker ${i} discovered a contradiction`,
515
- }),
516
- ),
517
- ),
518
- );
519
-
520
- // All callers must receive the same conflict ID — the deduplication path
521
- // returns the existing row on the second and subsequent calls.
522
- const firstId = results[0].id;
523
- for (const result of results) {
524
- expect(result.id).toBe(firstId);
525
- }
526
-
527
- // Exactly one pending conflict row in the DB.
528
- const pending = listPendingConflicts("default");
529
- expect(pending).toHaveLength(1);
530
- expect(pending[0].id).toBe(firstId);
531
- });
532
-
533
- test("conflict creation for different pairs produces distinct rows without cross-contamination", async () => {
534
- // Each unique item pair must get its own conflict row — deduplication must
535
- // be scoped to the pair, not global. Also exercises the idempotent
536
- // insert-then-update path within each pair.
537
- const PAIR_COUNT = 6;
538
- const pairs = Array.from({ length: PAIR_COUNT }, (_, i) =>
539
- seedItemPair(`multi-pair-${i}`),
540
- );
541
-
542
- // For each pair, make two calls: one insert and one update. All calls run
543
- // sequentially. The test verifies that each pair ends up with exactly one
544
- // conflict row (no cross-pair contamination, idempotent update path works).
545
- await Promise.all(
546
- pairs.flatMap((pair) => [
547
- // First call: insert with 'contradiction'.
548
- Promise.resolve().then(() =>
549
- createOrUpdatePendingConflict({
550
- scopeId: "default",
551
- existingItemId: pair.existingItemId,
552
- candidateItemId: pair.candidateItemId,
553
- relationship: "contradiction",
554
- }),
555
- ),
556
- // Second call: update to 'ambiguous_contradiction' — tests the idempotent update path.
557
- Promise.resolve().then(() =>
558
- createOrUpdatePendingConflict({
559
- scopeId: "default",
560
- existingItemId: pair.existingItemId,
561
- candidateItemId: pair.candidateItemId,
562
- relationship: "ambiguous_contradiction",
563
- }),
564
- ),
565
- ]),
566
- );
567
-
568
- // Each pair must have produced exactly one pending conflict.
569
- const pending = listPendingConflicts("default");
570
- expect(pending).toHaveLength(PAIR_COUNT);
571
-
572
- // All conflict IDs must be unique.
573
- const ids = pending.map((c) => c.id);
574
- expect(new Set(ids).size).toBe(PAIR_COUNT);
575
-
576
- // Each returned conflict must reference the correct item pair.
577
- for (let i = 0; i < PAIR_COUNT; i++) {
578
- const pair = pairs[i];
579
- const found = pending.find(
580
- (c) =>
581
- c.existingItemId === pair.existingItemId &&
582
- c.candidateItemId === pair.candidateItemId,
583
- );
584
- expect(found).toBeDefined();
585
- // The update call ran after the insert, so relationship is ambiguous_contradiction.
586
- expect(found!.relationship).toBe("ambiguous_contradiction");
587
- }
588
- });
589
-
590
- test("repeated updates to the same conflict row converge to a consistent state", async () => {
591
- // Multiple update calls for the same conflict (e.g. repeated worker runs).
592
- // All updates must succeed (last writer wins is acceptable) and the row
593
- // must remain internally consistent.
594
- const pair = seedItemPair("concurrent-update");
595
- const first = createOrUpdatePendingConflict({
596
- scopeId: "default",
597
- existingItemId: pair.existingItemId,
598
- candidateItemId: pair.candidateItemId,
599
- relationship: "contradiction",
600
- clarificationQuestion: "Initial question",
601
- });
602
-
603
- // Call createOrUpdatePendingConflict N times against the same existing row.
604
- // Calls are sequential; the test verifies the row stays consistent (one row,
605
- // valid status/relationship) after repeated updates — last writer wins.
606
- const UPDATES = 8;
607
- const results = await Promise.all(
608
- Array.from({ length: UPDATES }, (_, i) =>
609
- Promise.resolve().then(() =>
610
- createOrUpdatePendingConflict({
611
- scopeId: "default",
612
- existingItemId: pair.existingItemId,
613
- candidateItemId: pair.candidateItemId,
614
- relationship: "ambiguous_contradiction",
615
- clarificationQuestion: `Updated question from worker ${i}`,
616
- }),
617
- ),
618
- ),
619
- );
620
-
621
- // All calls must return the same conflict ID.
622
- for (const result of results) {
623
- expect(result.id).toBe(first.id);
624
- }
625
-
626
- // Still exactly one row in the DB.
627
- const pending = listPendingConflicts("default");
628
- expect(pending).toHaveLength(1);
629
-
630
- // The row must be consistent: valid status, valid relationship.
631
- const conflict = pending[0];
632
- expect(conflict.status).toBe("pending_clarification");
633
- expect(conflict.relationship).toBe("ambiguous_contradiction");
634
- });
635
-
636
- test("scope isolation ensures conflicts in different scopes do not interfere", async () => {
637
- // Conflicts created in different scopes must not cross-contaminate each
638
- // other's conflict sets — scopeId must be part of the deduplication key.
639
- const SCOPES = ["scope-alpha", "scope-beta", "scope-gamma"];
640
- const scopePairs = SCOPES.map((scope) => ({
641
- scope,
642
- pair: seedItemPair(`scope-${scope}`, scope),
643
- }));
644
-
645
- // Make 3 calls per scope for all scopes. Calls run sequentially; the test
646
- // verifies that each scope produces exactly one conflict row and that there
647
- // is no cross-scope contamination from repeated same-scope calls.
648
- await Promise.all(
649
- scopePairs.flatMap(({ scope, pair }) =>
650
- Array.from({ length: 3 }, () =>
651
- Promise.resolve().then(() =>
652
- createOrUpdatePendingConflict({
653
- scopeId: scope,
654
- existingItemId: pair.existingItemId,
655
- candidateItemId: pair.candidateItemId,
656
- relationship: "contradiction",
657
- }),
658
- ),
659
- ),
660
- ),
661
- );
662
-
663
- for (const scope of SCOPES) {
664
- const pending = listPendingConflicts(scope);
665
- // Exactly one conflict per scope, no cross-scope leakage.
666
- expect(pending).toHaveLength(1);
667
- expect(pending[0].scopeId).toBe(scope);
668
- }
669
- });
670
- });
671
-
672
433
  // ─────────────────────────────────────────────────────────────────────────────
673
434
  // Test suite: memory segment job atomicity
674
435
  // ─────────────────────────────────────────────────────────────────────────────
@@ -81,4 +81,32 @@ describe("buildMultipartMime", () => {
81
81
  expect(decoded).toContain('filename="b.png"');
82
82
  expect(decoded).toContain("Content-Type: image/png");
83
83
  });
84
+
85
+ test("sanitizes CRLF from header values to prevent header injection", () => {
86
+ const result = buildMultipartMime({
87
+ to: "victim@example.com\r\nBcc: attacker@example.com",
88
+ subject: "Fwd: Hello\r\nCc: attacker@example.com",
89
+ body: "Body",
90
+ cc: "team@example.com\nX-Injected: yes",
91
+ bcc: "audit@example.com\r\nX-Another: value",
92
+ inReplyTo: "<id@example.com>\nReferences: <evil@example.com>",
93
+ attachments: [],
94
+ });
95
+
96
+ const decoded = Buffer.from(
97
+ result.replace(/-/g, "+").replace(/_/g, "/"),
98
+ "base64",
99
+ ).toString("utf-8");
100
+
101
+ expect(decoded).toContain("To: victim@example.com Bcc: attacker@example.com");
102
+ expect(decoded).toContain("Subject: Fwd: Hello Cc: attacker@example.com");
103
+ expect(decoded).toContain("Cc: team@example.com X-Injected: yes");
104
+ expect(decoded).toContain("Bcc: audit@example.com X-Another: value");
105
+ expect(decoded).toContain(
106
+ "In-Reply-To: <id@example.com> References: <evil@example.com>",
107
+ );
108
+ expect(decoded).not.toContain("\r\nBcc: attacker@example.com");
109
+ expect(decoded).not.toContain("\r\nCc: attacker@example.com");
110
+ });
111
+
84
112
  });
@@ -431,6 +431,7 @@ describe("Native Web Search — Streaming Events", () => {
431
431
  type: "server_tool_start",
432
432
  name: "web_search",
433
433
  toolUseId: "stu_stream123",
434
+ input: {},
434
435
  });
435
436
  });
436
437