@vellumai/assistant 0.4.49 → 0.4.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. package/ARCHITECTURE.md +24 -33
  2. package/README.md +3 -3
  3. package/docs/architecture/memory.md +180 -119
  4. package/package.json +2 -2
  5. package/src/__tests__/agent-loop.test.ts +3 -1
  6. package/src/__tests__/anthropic-provider.test.ts +114 -23
  7. package/src/__tests__/approval-cascade.test.ts +1 -15
  8. package/src/__tests__/approval-routes-http.test.ts +2 -0
  9. package/src/__tests__/assistant-feature-flag-guard.test.ts +0 -23
  10. package/src/__tests__/canonical-guardian-store.test.ts +95 -0
  11. package/src/__tests__/checker.test.ts +13 -0
  12. package/src/__tests__/config-schema.test.ts +1 -68
  13. package/src/__tests__/context-memory-e2e.test.ts +11 -100
  14. package/src/__tests__/conversation-routes-guardian-reply.test.ts +8 -0
  15. package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
  16. package/src/__tests__/credential-security-e2e.test.ts +1 -0
  17. package/src/__tests__/credential-vault-unit.test.ts +4 -0
  18. package/src/__tests__/credential-vault.test.ts +13 -1
  19. package/src/__tests__/cu-unified-flow.test.ts +532 -0
  20. package/src/__tests__/date-context.test.ts +93 -77
  21. package/src/__tests__/deterministic-verification-control-plane.test.ts +64 -0
  22. package/src/__tests__/guardian-routing-invariants.test.ts +93 -0
  23. package/src/__tests__/history-repair.test.ts +245 -0
  24. package/src/__tests__/host-cu-proxy.test.ts +165 -3
  25. package/src/__tests__/http-user-message-parity.test.ts +1 -0
  26. package/src/__tests__/invite-redemption-service.test.ts +65 -1
  27. package/src/__tests__/keychain-broker-client.test.ts +4 -4
  28. package/src/__tests__/memory-context-benchmark.benchmark.test.ts +56 -18
  29. package/src/__tests__/memory-lifecycle-e2e.test.ts +244 -387
  30. package/src/__tests__/memory-recall-quality.test.ts +244 -407
  31. package/src/__tests__/memory-regressions.experimental.test.ts +126 -101
  32. package/src/__tests__/memory-regressions.test.ts +477 -2841
  33. package/src/__tests__/memory-retrieval.benchmark.test.ts +33 -150
  34. package/src/__tests__/memory-upsert-concurrency.test.ts +5 -244
  35. package/src/__tests__/mime-builder.test.ts +28 -0
  36. package/src/__tests__/native-web-search.test.ts +1 -0
  37. package/src/__tests__/oauth-cli.test.ts +572 -5
  38. package/src/__tests__/oauth-store.test.ts +120 -6
  39. package/src/__tests__/qdrant-collection-migration.test.ts +53 -8
  40. package/src/__tests__/registry.test.ts +0 -1
  41. package/src/__tests__/relay-server.test.ts +46 -1
  42. package/src/__tests__/schedule-tools.test.ts +32 -0
  43. package/src/__tests__/script-proxy-certs.test.ts +1 -1
  44. package/src/__tests__/secret-onetime-send.test.ts +1 -0
  45. package/src/__tests__/secure-keys.test.ts +7 -2
  46. package/src/__tests__/send-endpoint-busy.test.ts +3 -0
  47. package/src/__tests__/session-abort-tool-results.test.ts +1 -14
  48. package/src/__tests__/session-agent-loop-overflow.test.ts +1583 -0
  49. package/src/__tests__/session-agent-loop.test.ts +19 -15
  50. package/src/__tests__/session-confirmation-signals.test.ts +1 -15
  51. package/src/__tests__/session-error.test.ts +124 -2
  52. package/src/__tests__/session-history-web-search.test.ts +918 -0
  53. package/src/__tests__/session-pre-run-repair.test.ts +1 -14
  54. package/src/__tests__/session-provider-retry-repair.test.ts +25 -28
  55. package/src/__tests__/session-queue.test.ts +37 -27
  56. package/src/__tests__/session-runtime-assembly.test.ts +54 -0
  57. package/src/__tests__/session-slash-known.test.ts +1 -15
  58. package/src/__tests__/session-slash-queue.test.ts +1 -15
  59. package/src/__tests__/session-slash-unknown.test.ts +1 -15
  60. package/src/__tests__/session-workspace-cache-state.test.ts +3 -33
  61. package/src/__tests__/session-workspace-injection.test.ts +3 -37
  62. package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -37
  63. package/src/__tests__/skills-install-extract.test.ts +93 -0
  64. package/src/__tests__/skillssh-registry.test.ts +451 -0
  65. package/src/__tests__/trust-store.test.ts +15 -0
  66. package/src/__tests__/voice-invite-redemption.test.ts +32 -1
  67. package/src/agent/ax-tree-compaction.test.ts +51 -0
  68. package/src/agent/loop.ts +39 -12
  69. package/src/approvals/AGENTS.md +1 -1
  70. package/src/approvals/guardian-request-resolvers.ts +14 -2
  71. package/src/bundler/compiler-tools.ts +66 -2
  72. package/src/calls/call-domain.ts +132 -0
  73. package/src/calls/call-store.ts +6 -0
  74. package/src/calls/relay-server.ts +43 -5
  75. package/src/calls/relay-setup-router.ts +17 -1
  76. package/src/calls/twilio-config.ts +1 -1
  77. package/src/calls/types.ts +3 -1
  78. package/src/cli/commands/doctor.ts +4 -3
  79. package/src/cli/commands/mcp.ts +46 -59
  80. package/src/cli/commands/memory.ts +16 -165
  81. package/src/cli/commands/oauth/apps.ts +31 -2
  82. package/src/cli/commands/oauth/connections.ts +431 -97
  83. package/src/cli/commands/oauth/providers.ts +15 -1
  84. package/src/cli/commands/sessions.ts +5 -2
  85. package/src/cli/commands/skills.ts +173 -1
  86. package/src/cli/http-client.ts +0 -20
  87. package/src/cli/main-screen.tsx +2 -2
  88. package/src/cli/program.ts +5 -6
  89. package/src/cli.ts +4 -10
  90. package/src/config/bundled-skills/computer-use/TOOLS.json +1 -1
  91. package/src/config/bundled-skills/computer-use/tools/computer-use-observe.ts +12 -0
  92. package/src/config/bundled-tool-registry.ts +2 -5
  93. package/src/config/schema.ts +1 -12
  94. package/src/config/schemas/memory-lifecycle.ts +0 -9
  95. package/src/config/schemas/memory-processing.ts +0 -180
  96. package/src/config/schemas/memory-retrieval.ts +32 -104
  97. package/src/config/schemas/memory.ts +0 -10
  98. package/src/config/types.ts +0 -4
  99. package/src/context/window-manager.ts +4 -1
  100. package/src/daemon/config-watcher.ts +61 -3
  101. package/src/daemon/daemon-control.ts +1 -1
  102. package/src/daemon/date-context.ts +114 -31
  103. package/src/daemon/handlers/sessions.ts +18 -13
  104. package/src/daemon/handlers/skills.ts +20 -1
  105. package/src/daemon/history-repair.ts +72 -8
  106. package/src/daemon/host-cu-proxy.ts +55 -26
  107. package/src/daemon/lifecycle.ts +31 -3
  108. package/src/daemon/mcp-reload-service.ts +2 -2
  109. package/src/daemon/message-types/computer-use.ts +1 -12
  110. package/src/daemon/message-types/memory.ts +4 -16
  111. package/src/daemon/message-types/messages.ts +1 -0
  112. package/src/daemon/message-types/sessions.ts +4 -0
  113. package/src/daemon/server.ts +12 -1
  114. package/src/daemon/session-agent-loop-handlers.ts +38 -0
  115. package/src/daemon/session-agent-loop.ts +334 -48
  116. package/src/daemon/session-error.ts +89 -6
  117. package/src/daemon/session-history.ts +17 -7
  118. package/src/daemon/session-media-retry.ts +6 -2
  119. package/src/daemon/session-memory.ts +69 -149
  120. package/src/daemon/session-process.ts +10 -1
  121. package/src/daemon/session-runtime-assembly.ts +49 -19
  122. package/src/daemon/session-surfaces.ts +4 -1
  123. package/src/daemon/session-tool-setup.ts +7 -1
  124. package/src/daemon/session.ts +12 -2
  125. package/src/instrument.ts +61 -1
  126. package/src/memory/admin.ts +2 -191
  127. package/src/memory/canonical-guardian-store.ts +38 -2
  128. package/src/memory/conversation-crud.ts +0 -33
  129. package/src/memory/conversation-queries.ts +22 -3
  130. package/src/memory/db-init.ts +28 -0
  131. package/src/memory/embedding-backend.ts +84 -8
  132. package/src/memory/embedding-types.ts +9 -1
  133. package/src/memory/indexer.ts +7 -46
  134. package/src/memory/items-extractor.ts +274 -76
  135. package/src/memory/job-handlers/backfill.ts +2 -127
  136. package/src/memory/job-handlers/cleanup.ts +2 -16
  137. package/src/memory/job-handlers/extraction.ts +2 -138
  138. package/src/memory/job-handlers/index-maintenance.ts +1 -6
  139. package/src/memory/job-handlers/summarization.ts +3 -148
  140. package/src/memory/job-utils.ts +21 -59
  141. package/src/memory/jobs-store.ts +1 -159
  142. package/src/memory/jobs-worker.ts +9 -52
  143. package/src/memory/migrations/104-core-indexes.ts +3 -3
  144. package/src/memory/migrations/149-oauth-tables.ts +2 -0
  145. package/src/memory/migrations/150-oauth-apps-client-secret-path.ts +98 -0
  146. package/src/memory/migrations/151-oauth-providers-ping-url.ts +11 -0
  147. package/src/memory/migrations/152-memory-item-supersession.ts +44 -0
  148. package/src/memory/migrations/153-drop-entity-tables.ts +15 -0
  149. package/src/memory/migrations/154-drop-fts.ts +20 -0
  150. package/src/memory/migrations/155-drop-conflicts.ts +7 -0
  151. package/src/memory/migrations/156-call-session-invite-metadata.ts +24 -0
  152. package/src/memory/migrations/index.ts +7 -0
  153. package/src/memory/qdrant-client.ts +148 -51
  154. package/src/memory/raw-query.ts +1 -1
  155. package/src/memory/retriever.test.ts +294 -273
  156. package/src/memory/retriever.ts +421 -645
  157. package/src/memory/schema/calls.ts +2 -0
  158. package/src/memory/schema/memory-core.ts +3 -48
  159. package/src/memory/schema/oauth.ts +2 -0
  160. package/src/memory/search/formatting.ts +263 -176
  161. package/src/memory/search/lexical.ts +1 -254
  162. package/src/memory/search/ranking.ts +0 -455
  163. package/src/memory/search/semantic.ts +100 -14
  164. package/src/memory/search/staleness.ts +47 -0
  165. package/src/memory/search/tier-classifier.ts +21 -0
  166. package/src/memory/search/types.ts +15 -77
  167. package/src/memory/task-memory-cleanup.ts +4 -6
  168. package/src/messaging/providers/gmail/mime-builder.ts +17 -7
  169. package/src/oauth/byo-connection.test.ts +8 -1
  170. package/src/oauth/oauth-store.ts +113 -27
  171. package/src/oauth/seed-providers.ts +6 -0
  172. package/src/oauth/token-persistence.ts +11 -3
  173. package/src/permissions/defaults.ts +1 -0
  174. package/src/permissions/trust-store.ts +23 -1
  175. package/src/playbooks/playbook-compiler.ts +1 -1
  176. package/src/prompts/system-prompt.ts +18 -2
  177. package/src/providers/anthropic/client.ts +56 -126
  178. package/src/providers/types.ts +7 -1
  179. package/src/runtime/AGENTS.md +9 -0
  180. package/src/runtime/auth/route-policy.ts +6 -3
  181. package/src/runtime/guardian-reply-router.ts +24 -22
  182. package/src/runtime/http-server.ts +2 -2
  183. package/src/runtime/invite-redemption-service.ts +19 -1
  184. package/src/runtime/invite-service.ts +25 -0
  185. package/src/runtime/pending-interactions.ts +2 -2
  186. package/src/runtime/routes/brain-graph-routes.ts +10 -90
  187. package/src/runtime/routes/conversation-routes.ts +9 -1
  188. package/src/runtime/routes/inbound-stages/acl-enforcement.ts +21 -12
  189. package/src/runtime/routes/memory-item-routes.test.ts +754 -0
  190. package/src/runtime/routes/memory-item-routes.ts +503 -0
  191. package/src/runtime/routes/session-management-routes.ts +3 -3
  192. package/src/runtime/routes/settings-routes.ts +2 -2
  193. package/src/runtime/routes/trust-rules-routes.ts +14 -0
  194. package/src/runtime/routes/workspace-routes.ts +2 -1
  195. package/src/security/keychain-broker-client.ts +17 -4
  196. package/src/security/secure-keys.ts +25 -3
  197. package/src/security/token-manager.ts +36 -36
  198. package/src/skills/catalog-install.ts +74 -18
  199. package/src/skills/skillssh-registry.ts +503 -0
  200. package/src/tools/assets/search.ts +5 -1
  201. package/src/tools/computer-use/definitions.ts +0 -10
  202. package/src/tools/computer-use/registry.ts +1 -1
  203. package/src/tools/credentials/vault.ts +1 -3
  204. package/src/tools/memory/definitions.ts +4 -13
  205. package/src/tools/memory/handlers.test.ts +83 -103
  206. package/src/tools/memory/handlers.ts +50 -85
  207. package/src/tools/schedule/create.ts +8 -1
  208. package/src/tools/schedule/update.ts +8 -1
  209. package/src/tools/skills/load.ts +25 -2
  210. package/src/__tests__/clarification-resolver.test.ts +0 -193
  211. package/src/__tests__/conflict-intent-tokenization.test.ts +0 -160
  212. package/src/__tests__/conflict-policy.test.ts +0 -269
  213. package/src/__tests__/conflict-store.test.ts +0 -372
  214. package/src/__tests__/contradiction-checker.test.ts +0 -361
  215. package/src/__tests__/entity-extractor.test.ts +0 -211
  216. package/src/__tests__/entity-search.test.ts +0 -1117
  217. package/src/__tests__/profile-compiler.test.ts +0 -392
  218. package/src/__tests__/session-conflict-gate.test.ts +0 -1228
  219. package/src/__tests__/session-profile-injection.test.ts +0 -557
  220. package/src/config/bundled-skills/knowledge-graph/SKILL.md +0 -25
  221. package/src/config/bundled-skills/knowledge-graph/TOOLS.json +0 -66
  222. package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +0 -211
  223. package/src/daemon/session-conflict-gate.ts +0 -167
  224. package/src/daemon/session-dynamic-profile.ts +0 -77
  225. package/src/memory/clarification-resolver.ts +0 -417
  226. package/src/memory/conflict-intent.ts +0 -205
  227. package/src/memory/conflict-policy.ts +0 -127
  228. package/src/memory/conflict-store.ts +0 -410
  229. package/src/memory/contradiction-checker.ts +0 -508
  230. package/src/memory/entity-extractor.ts +0 -535
  231. package/src/memory/format-recall.ts +0 -47
  232. package/src/memory/fts-reconciler.ts +0 -165
  233. package/src/memory/job-handlers/conflict.ts +0 -200
  234. package/src/memory/profile-compiler.ts +0 -195
  235. package/src/memory/recall-cache.ts +0 -117
  236. package/src/memory/search/entity.ts +0 -535
  237. package/src/memory/search/query-expansion.test.ts +0 -70
  238. package/src/memory/search/query-expansion.ts +0 -118
  239. package/src/runtime/routes/mcp-routes.ts +0 -20
@@ -6,11 +6,17 @@ describe("HostCuProxy", () => {
6
6
  let proxy: InstanceType<typeof HostCuProxy>;
7
7
  let sentMessages: unknown[];
8
8
  let sendToClient: (msg: unknown) => void;
9
+ let resolvedRequestIds: string[];
9
10
 
10
11
  function setup(maxSteps?: number) {
11
12
  sentMessages = [];
13
+ resolvedRequestIds = [];
12
14
  sendToClient = (msg: unknown) => sentMessages.push(msg);
13
- proxy = new HostCuProxy(sendToClient as never, maxSteps);
15
+ proxy = new HostCuProxy(
16
+ sendToClient as never,
17
+ (requestId: string) => resolvedRequestIds.push(requestId),
18
+ maxSteps,
19
+ );
14
20
  }
15
21
 
16
22
  afterEach(() => {
@@ -364,6 +370,60 @@ describe("HostCuProxy", () => {
364
370
  );
365
371
  });
366
372
 
373
+ test("does not emit spurious warning on first observation", async () => {
374
+ setup();
375
+
376
+ // First ever request — no previous AX tree exists
377
+ proxy.recordAction("computer_use_click", { element_id: 1 });
378
+ const p1 = proxy.request(
379
+ "computer_use_click",
380
+ { element_id: 1 },
381
+ "session-1",
382
+ 1,
383
+ );
384
+ const sent1 = sentMessages[0] as Record<string, unknown>;
385
+ proxy.resolve(sent1.requestId as string, {
386
+ axTree: "Button [1]",
387
+ // No axDiff on first observation — this is normal, not unchanged
388
+ });
389
+ const result1 = await p1;
390
+ expect(result1.content).not.toContain("NO VISIBLE EFFECT");
391
+ });
392
+
393
+ test("skips unchanged warning after computer_use_wait", async () => {
394
+ setup();
395
+
396
+ // Establish previous AX tree
397
+ const p1 = proxy.request(
398
+ "computer_use_click",
399
+ { element_id: 1 },
400
+ "session-1",
401
+ 1,
402
+ );
403
+ proxy.recordAction("computer_use_click", { element_id: 1 });
404
+ const sent1 = sentMessages[0] as Record<string, unknown>;
405
+ proxy.resolve(sent1.requestId as string, {
406
+ axTree: "Button [1]",
407
+ });
408
+ await p1;
409
+
410
+ // Wait action with unchanged screen — should NOT warn
411
+ const p2 = proxy.request(
412
+ "computer_use_wait",
413
+ { duration_ms: 2000 },
414
+ "session-1",
415
+ 2,
416
+ );
417
+ proxy.recordAction("computer_use_wait", { duration_ms: 2000 });
418
+ const sent2 = sentMessages[1] as Record<string, unknown>;
419
+ proxy.resolve(sent2.requestId as string, {
420
+ axTree: "Button [1]",
421
+ // No axDiff — screen unchanged, but that's expected after wait
422
+ });
423
+ const result2 = await p2;
424
+ expect(result2.content).not.toContain("NO VISIBLE EFFECT");
425
+ });
426
+
367
427
  test("resets consecutive count when diff is present", async () => {
368
428
  setup();
369
429
 
@@ -507,6 +567,35 @@ describe("HostCuProxy", () => {
507
567
  expect(result.content).toMatch(/<\/ax-tree>$/m);
508
568
  });
509
569
 
570
+ test("includes secondaryWindows after AX tree with cross-window note", () => {
571
+ setup();
572
+
573
+ const result = proxy.formatObservation({
574
+ axTree: "Button [1]\nLabel [2]",
575
+ secondaryWindows: "Safari — Window [10]\n Link [11]",
576
+ });
577
+
578
+ expect(result.content).toContain("Safari — Window [10]");
579
+ expect(result.content).toContain("Link [11]");
580
+ expect(result.content).toContain(
581
+ "Note: The element [ID]s above are from other windows",
582
+ );
583
+ // secondaryWindows should appear after the AX tree
584
+ const axTreeEnd = result.content.indexOf("</ax-tree>");
585
+ const secondaryIdx = result.content.indexOf("Safari — Window [10]");
586
+ expect(axTreeEnd).toBeLessThan(secondaryIdx);
587
+ });
588
+
589
+ test("omits secondaryWindows section when field is absent", () => {
590
+ setup();
591
+
592
+ const result = proxy.formatObservation({
593
+ axTree: "Button [1]",
594
+ });
595
+
596
+ expect(result.content).not.toContain("other windows");
597
+ });
598
+
510
599
  test("includes diff when present", () => {
511
600
  setup();
512
601
 
@@ -576,7 +665,7 @@ describe("HostCuProxy", () => {
576
665
  // -------------------------------------------------------------------------
577
666
 
578
667
  describe("dispose", () => {
579
- test("rejects all pending requests", () => {
668
+ test("rejects all pending requests", async () => {
580
669
  setup();
581
670
 
582
671
  const resultPromise = proxy.request(
@@ -593,7 +682,80 @@ describe("HostCuProxy", () => {
593
682
  proxy.dispose();
594
683
 
595
684
  expect(proxy.hasPendingRequest(requestId)).toBe(false);
596
- expect(resultPromise).rejects.toThrow("Host CU proxy disposed");
685
+ await expect(resultPromise).rejects.toThrow("Host CU proxy disposed");
686
+ });
687
+ });
688
+
689
+ // -------------------------------------------------------------------------
690
+ // onInternalResolve callback
691
+ // -------------------------------------------------------------------------
692
+
693
+ describe("onInternalResolve", () => {
694
+ test("calls onInternalResolve when abort signal fires", async () => {
695
+ setup();
696
+
697
+ const controller = new AbortController();
698
+ const resultPromise = proxy.request(
699
+ "computer_use_click",
700
+ { element_id: 1 },
701
+ "session-1",
702
+ 1,
703
+ undefined,
704
+ controller.signal,
705
+ );
706
+
707
+ const sent = sentMessages[0] as Record<string, unknown>;
708
+ const requestId = sent.requestId as string;
709
+
710
+ controller.abort();
711
+
712
+ await resultPromise;
713
+ expect(resolvedRequestIds).toContain(requestId);
714
+ });
715
+
716
+ test("calls onInternalResolve on dispose", async () => {
717
+ setup();
718
+
719
+ const resultPromise = proxy.request(
720
+ "computer_use_click",
721
+ { element_id: 1 },
722
+ "session-1",
723
+ 1,
724
+ );
725
+
726
+ const sent = sentMessages[0] as Record<string, unknown>;
727
+ const requestId = sent.requestId as string;
728
+
729
+ proxy.dispose();
730
+
731
+ // dispose rejects pending requests — catch to avoid unhandled rejection
732
+ await resultPromise.catch(() => {});
733
+
734
+ expect(resolvedRequestIds).toContain(requestId);
735
+ });
736
+ });
737
+
738
+ // -------------------------------------------------------------------------
739
+ // isAvailable
740
+ // -------------------------------------------------------------------------
741
+
742
+ describe("isAvailable", () => {
743
+ test("returns false by default", () => {
744
+ setup();
745
+ expect(proxy.isAvailable()).toBe(false);
746
+ });
747
+
748
+ test("returns true after updateSender with clientConnected=true", () => {
749
+ setup();
750
+ proxy.updateSender(sendToClient as never, true);
751
+ expect(proxy.isAvailable()).toBe(true);
752
+ });
753
+
754
+ test("returns false after updateSender with clientConnected=false", () => {
755
+ setup();
756
+ proxy.updateSender(sendToClient as never, true);
757
+ proxy.updateSender(sendToClient as never, false);
758
+ expect(proxy.isAvailable()).toBe(false);
597
759
  });
598
760
  });
599
761
 
@@ -170,6 +170,7 @@ function makeSession(overrides: Record<string, unknown> = {}) {
170
170
  setHostBashProxy: () => {},
171
171
  setHostFileProxy: () => {},
172
172
  setHostCuProxy: () => {},
173
+ addPreactivatedSkillId: () => {},
173
174
  emitConfirmationStateChanged: () => {},
174
175
  emitActivityState: () => {},
175
176
  setTurnChannelContext: () => {},
@@ -23,7 +23,10 @@ mock.module("../util/logger.js", () => ({
23
23
  }),
24
24
  }));
25
25
 
26
- import { findContactChannel } from "../contacts/contact-store.js";
26
+ import {
27
+ findContactChannel,
28
+ upsertContact,
29
+ } from "../contacts/contact-store.js";
27
30
  import { upsertContactChannel } from "../contacts/contacts-write.js";
28
31
  import { getSqlite, initializeDb, resetDb } from "../memory/db.js";
29
32
  import {
@@ -278,6 +281,67 @@ describe("invite-redemption-service", () => {
278
281
  expect(outcome).toEqual({ ok: false, reason: "invalid_token" });
279
282
  });
280
283
 
284
+ test("returns invalid_token for a revoked guardian to prevent invite-based reactivation", () => {
285
+ const { rawToken } = createInvite({
286
+ sourceChannel: "telegram",
287
+ maxUses: 5,
288
+ });
289
+
290
+ // Pre-create a guardian contact with a revoked telegram channel
291
+ upsertContact({
292
+ displayName: "Guardian",
293
+ role: "guardian",
294
+ channels: [
295
+ {
296
+ type: "telegram",
297
+ address: "guardian-tg-id",
298
+ externalUserId: "guardian-tg-id",
299
+ status: "revoked",
300
+ },
301
+ ],
302
+ });
303
+
304
+ const outcome = redeemInvite({
305
+ rawToken,
306
+ sourceChannel: "telegram",
307
+ externalUserId: "guardian-tg-id",
308
+ });
309
+
310
+ // Must reject — guardian channels are managed via the binding flow, not invites
311
+ expect(outcome).toEqual({ ok: false, reason: "invalid_token" });
312
+ });
313
+
314
+ test("returns invalid_token for a revoked guardian via 6-digit invite code", () => {
315
+ const code = "123456";
316
+ const inviteCodeHash = hashVoiceCode(code);
317
+ createInvite({
318
+ sourceChannel: "telegram",
319
+ maxUses: 5,
320
+ inviteCodeHash,
321
+ });
322
+
323
+ upsertContact({
324
+ displayName: "Guardian",
325
+ role: "guardian",
326
+ channels: [
327
+ {
328
+ type: "telegram",
329
+ address: "guardian-code-id",
330
+ externalUserId: "guardian-code-id",
331
+ status: "revoked",
332
+ },
333
+ ],
334
+ });
335
+
336
+ const outcome = redeemInviteByCode({
337
+ code,
338
+ sourceChannel: "telegram",
339
+ externalUserId: "guardian-code-id",
340
+ });
341
+
342
+ expect(outcome).toEqual({ ok: false, reason: "invalid_token" });
343
+ });
344
+
281
345
  test("does not return already_member for a revoked member", () => {
282
346
  const { rawToken } = createInvite({
283
347
  sourceChannel: "telegram",
@@ -264,7 +264,7 @@ describe("keychain-broker-client", () => {
264
264
 
265
265
  const client = createBrokerClient();
266
266
  const result = await client.set("my-key", "new-value");
267
- expect(result).toBe(true);
267
+ expect(result).toEqual({ status: "ok" });
268
268
  });
269
269
 
270
270
  test("del returns true on success", async () => {
@@ -434,11 +434,11 @@ describe("keychain-broker-client", () => {
434
434
  expect(result).toBeNull();
435
435
  });
436
436
 
437
- test("set returns false when socket file does not exist", async () => {
437
+ test("set returns unreachable when socket file does not exist", async () => {
438
438
  writeFileSync(TOKEN_PATH, TEST_TOKEN);
439
439
  const client = createBrokerClient();
440
440
  const result = await client.set("test-key", "value");
441
- expect(result).toBe(false);
441
+ expect(result).toEqual({ status: "unreachable" });
442
442
  });
443
443
 
444
444
  test("del returns false when socket file does not exist", async () => {
@@ -470,7 +470,7 @@ describe("keychain-broker-client", () => {
470
470
  }
471
471
  const client = createBrokerClient();
472
472
  expect(await client.get("key")).toBeNull();
473
- expect(await client.set("key", "val")).toBe(false);
473
+ expect(await client.set("key", "val")).toEqual({ status: "unreachable" });
474
474
  expect(await client.del("key")).toBe(false);
475
475
  expect(await client.list()).toEqual([]);
476
476
  expect(await client.ping()).toBeNull();
@@ -5,8 +5,8 @@
5
5
  * - compaction.summaryCalls: 2-6
6
6
  * - compaction.estimatedInputTokens: < previousEstimatedInputTokens
7
7
  * - recall.injectedTokens: <= computed dynamic budget
8
- * - recall.lexicalHits: > 0
9
8
  * - recall.recencyHits: > 0
9
+ * - recall.enabled: true
10
10
  */
11
11
  import { mkdtempSync, rmSync } from "node:fs";
12
12
  import { tmpdir } from "node:os";
@@ -50,6 +50,37 @@ mock.module("../util/logger.js", () => ({
50
50
  }),
51
51
  }));
52
52
 
53
+ // Stub the local embedding backend so the real ONNX model never loads.
54
+ mock.module("../memory/embedding-local.js", () => ({
55
+ LocalEmbeddingBackend: class {
56
+ readonly provider = "local" as const;
57
+ readonly model: string;
58
+ constructor(model: string) {
59
+ this.model = model;
60
+ }
61
+ async embed(texts: string[]): Promise<number[][]> {
62
+ return texts.map(() => new Array(384).fill(0));
63
+ }
64
+ },
65
+ }));
66
+
67
+ // Dynamic Qdrant mock so the benchmark can inject high-scoring results
68
+ let mockQdrantResults: Array<{
69
+ id: string;
70
+ score: number;
71
+ payload: Record<string, unknown>;
72
+ }> = [];
73
+
74
+ mock.module("../memory/qdrant-client.js", () => ({
75
+ getQdrantClient: () => ({
76
+ searchWithFilter: async () => mockQdrantResults,
77
+ hybridSearch: async () => mockQdrantResults,
78
+ upsertPoints: async () => {},
79
+ deletePoints: async () => {},
80
+ }),
81
+ initQdrantClient: () => {},
82
+ }));
83
+
53
84
  function makeLongMessages(turns: number): Message[] {
54
85
  const rows: Message[] = [];
55
86
  const userTail =
@@ -161,18 +192,15 @@ describe("Memory context benchmark", () => {
161
192
  beforeEach(() => {
162
193
  const db = getDb();
163
194
  db.run("DELETE FROM memory_item_sources");
164
- db.run("DELETE FROM memory_item_entities");
165
- db.run("DELETE FROM memory_entity_relations");
166
- db.run("DELETE FROM memory_entities");
167
195
  db.run("DELETE FROM memory_embeddings");
168
- db.run("DELETE FROM memory_summaries");
169
196
  db.run("DELETE FROM memory_items");
170
- db.run("DELETE FROM memory_segment_fts");
197
+
171
198
  db.run("DELETE FROM memory_segments");
172
199
  db.run("DELETE FROM messages");
173
200
  db.run("DELETE FROM conversations");
174
201
  db.run("DELETE FROM memory_jobs");
175
202
  db.run("DELETE FROM memory_checkpoints");
203
+ mockQdrantResults = [];
176
204
  });
177
205
 
178
206
  afterAll(() => {
@@ -225,13 +253,7 @@ describe("Memory context benchmark", () => {
225
253
  },
226
254
  retrieval: {
227
255
  ...DEFAULT_CONFIG.memory.retrieval,
228
- lexicalTopK: 50,
229
- semanticTopK: 20,
230
256
  maxInjectTokens: 750,
231
- reranking: {
232
- ...DEFAULT_CONFIG.memory.retrieval.reranking,
233
- enabled: false,
234
- },
235
257
  dynamicBudget: {
236
258
  enabled: true,
237
259
  minInjectTokens: 160,
@@ -257,6 +279,23 @@ describe("Memory context benchmark", () => {
257
279
  recallConfig.memory.retrieval.dynamicBudget.maxInjectTokens,
258
280
  });
259
281
 
282
+ // Seed Qdrant mock with a representative decision segment so
283
+ // the benchmark validates content quality, not just pipeline completion.
284
+ mockQdrantResults = [
285
+ {
286
+ id: "emb-bench-decision",
287
+ score: 0.9,
288
+ payload: {
289
+ target_type: "segment",
290
+ target_id: "seg-bench-0",
291
+ text: "Decision 0: use Bun test fixtures for memory regressions and recall ranking checks.",
292
+ kind: "segment",
293
+ created_at: now,
294
+ last_seen_at: now,
295
+ },
296
+ },
297
+ ];
298
+
260
299
  const recall = await buildMemoryRecall(
261
300
  "What decisions did we make about Bun tests and retrieval diagnostics?",
262
301
  conversationId,
@@ -264,13 +303,13 @@ describe("Memory context benchmark", () => {
264
303
  { maxInjectTokensOverride: recallBudget },
265
304
  );
266
305
 
267
- // In CI, Qdrant/embedding providers are unavailable, so semantic search
268
- // fails and the retriever marks the result as degraded. The benchmark
269
- // cares about compaction and lexical recall quality, not embedding
270
- // availability, so we do not assert on `recall.degraded`.
271
- expect(recall.lexicalHits).toBeGreaterThan(0);
306
+ // Recency search finds conversation-scoped segments.
272
307
  expect(recall.recencyHits).toBeGreaterThan(0);
308
+ expect(recall.enabled).toBe(true);
309
+ // With Qdrant mock returning a high-scoring result, content should be injected.
273
310
  expect(recall.selectedCount).toBeGreaterThan(0);
311
+ expect(recall.injectedText).toContain("Bun test fixtures");
312
+ expect(recall.injectedTokens).toBeGreaterThan(0);
274
313
  expect(recall.injectedTokens).toBeLessThanOrEqual(recallBudget);
275
314
  expect(recallBudget).toBeGreaterThanOrEqual(
276
315
  recallConfig.memory.retrieval.dynamicBudget.minInjectTokens,
@@ -278,6 +317,5 @@ describe("Memory context benchmark", () => {
278
317
  expect(recallBudget).toBeLessThanOrEqual(
279
318
  recallConfig.memory.retrieval.dynamicBudget.maxInjectTokens,
280
319
  );
281
- expect(recall.injectedText).toContain("Bun test fixtures");
282
320
  });
283
321
  });