@vellumai/assistant 0.4.49 → 0.4.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. package/ARCHITECTURE.md +24 -33
  2. package/README.md +3 -3
  3. package/docs/architecture/memory.md +180 -119
  4. package/package.json +2 -2
  5. package/src/__tests__/agent-loop.test.ts +3 -1
  6. package/src/__tests__/anthropic-provider.test.ts +114 -23
  7. package/src/__tests__/approval-cascade.test.ts +1 -15
  8. package/src/__tests__/approval-routes-http.test.ts +2 -0
  9. package/src/__tests__/assistant-feature-flag-guard.test.ts +0 -23
  10. package/src/__tests__/canonical-guardian-store.test.ts +95 -0
  11. package/src/__tests__/checker.test.ts +13 -0
  12. package/src/__tests__/config-schema.test.ts +1 -68
  13. package/src/__tests__/context-memory-e2e.test.ts +11 -100
  14. package/src/__tests__/conversation-routes-guardian-reply.test.ts +8 -0
  15. package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
  16. package/src/__tests__/credential-security-e2e.test.ts +1 -0
  17. package/src/__tests__/credential-vault-unit.test.ts +4 -0
  18. package/src/__tests__/credential-vault.test.ts +13 -1
  19. package/src/__tests__/cu-unified-flow.test.ts +532 -0
  20. package/src/__tests__/date-context.test.ts +93 -77
  21. package/src/__tests__/deterministic-verification-control-plane.test.ts +64 -0
  22. package/src/__tests__/guardian-routing-invariants.test.ts +93 -0
  23. package/src/__tests__/history-repair.test.ts +245 -0
  24. package/src/__tests__/host-cu-proxy.test.ts +165 -3
  25. package/src/__tests__/http-user-message-parity.test.ts +1 -0
  26. package/src/__tests__/invite-redemption-service.test.ts +65 -1
  27. package/src/__tests__/keychain-broker-client.test.ts +4 -4
  28. package/src/__tests__/memory-context-benchmark.benchmark.test.ts +56 -18
  29. package/src/__tests__/memory-lifecycle-e2e.test.ts +244 -387
  30. package/src/__tests__/memory-recall-quality.test.ts +244 -407
  31. package/src/__tests__/memory-regressions.experimental.test.ts +126 -101
  32. package/src/__tests__/memory-regressions.test.ts +477 -2841
  33. package/src/__tests__/memory-retrieval.benchmark.test.ts +33 -150
  34. package/src/__tests__/memory-upsert-concurrency.test.ts +5 -244
  35. package/src/__tests__/mime-builder.test.ts +28 -0
  36. package/src/__tests__/native-web-search.test.ts +1 -0
  37. package/src/__tests__/oauth-cli.test.ts +572 -5
  38. package/src/__tests__/oauth-store.test.ts +120 -6
  39. package/src/__tests__/qdrant-collection-migration.test.ts +53 -8
  40. package/src/__tests__/registry.test.ts +0 -1
  41. package/src/__tests__/relay-server.test.ts +46 -1
  42. package/src/__tests__/schedule-tools.test.ts +32 -0
  43. package/src/__tests__/script-proxy-certs.test.ts +1 -1
  44. package/src/__tests__/secret-onetime-send.test.ts +1 -0
  45. package/src/__tests__/secure-keys.test.ts +7 -2
  46. package/src/__tests__/send-endpoint-busy.test.ts +3 -0
  47. package/src/__tests__/session-abort-tool-results.test.ts +1 -14
  48. package/src/__tests__/session-agent-loop-overflow.test.ts +1583 -0
  49. package/src/__tests__/session-agent-loop.test.ts +19 -15
  50. package/src/__tests__/session-confirmation-signals.test.ts +1 -15
  51. package/src/__tests__/session-error.test.ts +124 -2
  52. package/src/__tests__/session-history-web-search.test.ts +918 -0
  53. package/src/__tests__/session-pre-run-repair.test.ts +1 -14
  54. package/src/__tests__/session-provider-retry-repair.test.ts +25 -28
  55. package/src/__tests__/session-queue.test.ts +37 -27
  56. package/src/__tests__/session-runtime-assembly.test.ts +54 -0
  57. package/src/__tests__/session-slash-known.test.ts +1 -15
  58. package/src/__tests__/session-slash-queue.test.ts +1 -15
  59. package/src/__tests__/session-slash-unknown.test.ts +1 -15
  60. package/src/__tests__/session-workspace-cache-state.test.ts +3 -33
  61. package/src/__tests__/session-workspace-injection.test.ts +3 -37
  62. package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -37
  63. package/src/__tests__/skills-install-extract.test.ts +93 -0
  64. package/src/__tests__/skillssh-registry.test.ts +451 -0
  65. package/src/__tests__/trust-store.test.ts +15 -0
  66. package/src/__tests__/voice-invite-redemption.test.ts +32 -1
  67. package/src/agent/ax-tree-compaction.test.ts +51 -0
  68. package/src/agent/loop.ts +39 -12
  69. package/src/approvals/AGENTS.md +1 -1
  70. package/src/approvals/guardian-request-resolvers.ts +14 -2
  71. package/src/bundler/compiler-tools.ts +66 -2
  72. package/src/calls/call-domain.ts +132 -0
  73. package/src/calls/call-store.ts +6 -0
  74. package/src/calls/relay-server.ts +43 -5
  75. package/src/calls/relay-setup-router.ts +17 -1
  76. package/src/calls/twilio-config.ts +1 -1
  77. package/src/calls/types.ts +3 -1
  78. package/src/cli/commands/doctor.ts +4 -3
  79. package/src/cli/commands/mcp.ts +46 -59
  80. package/src/cli/commands/memory.ts +16 -165
  81. package/src/cli/commands/oauth/apps.ts +31 -2
  82. package/src/cli/commands/oauth/connections.ts +431 -97
  83. package/src/cli/commands/oauth/providers.ts +15 -1
  84. package/src/cli/commands/sessions.ts +5 -2
  85. package/src/cli/commands/skills.ts +173 -1
  86. package/src/cli/http-client.ts +0 -20
  87. package/src/cli/main-screen.tsx +2 -2
  88. package/src/cli/program.ts +5 -6
  89. package/src/cli.ts +4 -10
  90. package/src/config/bundled-skills/computer-use/TOOLS.json +1 -1
  91. package/src/config/bundled-skills/computer-use/tools/computer-use-observe.ts +12 -0
  92. package/src/config/bundled-tool-registry.ts +2 -5
  93. package/src/config/schema.ts +1 -12
  94. package/src/config/schemas/memory-lifecycle.ts +0 -9
  95. package/src/config/schemas/memory-processing.ts +0 -180
  96. package/src/config/schemas/memory-retrieval.ts +32 -104
  97. package/src/config/schemas/memory.ts +0 -10
  98. package/src/config/types.ts +0 -4
  99. package/src/context/window-manager.ts +4 -1
  100. package/src/daemon/config-watcher.ts +61 -3
  101. package/src/daemon/daemon-control.ts +1 -1
  102. package/src/daemon/date-context.ts +114 -31
  103. package/src/daemon/handlers/sessions.ts +18 -13
  104. package/src/daemon/handlers/skills.ts +20 -1
  105. package/src/daemon/history-repair.ts +72 -8
  106. package/src/daemon/host-cu-proxy.ts +55 -26
  107. package/src/daemon/lifecycle.ts +31 -3
  108. package/src/daemon/mcp-reload-service.ts +2 -2
  109. package/src/daemon/message-types/computer-use.ts +1 -12
  110. package/src/daemon/message-types/memory.ts +4 -16
  111. package/src/daemon/message-types/messages.ts +1 -0
  112. package/src/daemon/message-types/sessions.ts +4 -0
  113. package/src/daemon/server.ts +12 -1
  114. package/src/daemon/session-agent-loop-handlers.ts +38 -0
  115. package/src/daemon/session-agent-loop.ts +334 -48
  116. package/src/daemon/session-error.ts +89 -6
  117. package/src/daemon/session-history.ts +17 -7
  118. package/src/daemon/session-media-retry.ts +6 -2
  119. package/src/daemon/session-memory.ts +69 -149
  120. package/src/daemon/session-process.ts +10 -1
  121. package/src/daemon/session-runtime-assembly.ts +49 -19
  122. package/src/daemon/session-surfaces.ts +4 -1
  123. package/src/daemon/session-tool-setup.ts +7 -1
  124. package/src/daemon/session.ts +12 -2
  125. package/src/instrument.ts +61 -1
  126. package/src/memory/admin.ts +2 -191
  127. package/src/memory/canonical-guardian-store.ts +38 -2
  128. package/src/memory/conversation-crud.ts +0 -33
  129. package/src/memory/conversation-queries.ts +22 -3
  130. package/src/memory/db-init.ts +28 -0
  131. package/src/memory/embedding-backend.ts +84 -8
  132. package/src/memory/embedding-types.ts +9 -1
  133. package/src/memory/indexer.ts +7 -46
  134. package/src/memory/items-extractor.ts +274 -76
  135. package/src/memory/job-handlers/backfill.ts +2 -127
  136. package/src/memory/job-handlers/cleanup.ts +2 -16
  137. package/src/memory/job-handlers/extraction.ts +2 -138
  138. package/src/memory/job-handlers/index-maintenance.ts +1 -6
  139. package/src/memory/job-handlers/summarization.ts +3 -148
  140. package/src/memory/job-utils.ts +21 -59
  141. package/src/memory/jobs-store.ts +1 -159
  142. package/src/memory/jobs-worker.ts +9 -52
  143. package/src/memory/migrations/104-core-indexes.ts +3 -3
  144. package/src/memory/migrations/149-oauth-tables.ts +2 -0
  145. package/src/memory/migrations/150-oauth-apps-client-secret-path.ts +98 -0
  146. package/src/memory/migrations/151-oauth-providers-ping-url.ts +11 -0
  147. package/src/memory/migrations/152-memory-item-supersession.ts +44 -0
  148. package/src/memory/migrations/153-drop-entity-tables.ts +15 -0
  149. package/src/memory/migrations/154-drop-fts.ts +20 -0
  150. package/src/memory/migrations/155-drop-conflicts.ts +7 -0
  151. package/src/memory/migrations/156-call-session-invite-metadata.ts +24 -0
  152. package/src/memory/migrations/index.ts +7 -0
  153. package/src/memory/qdrant-client.ts +148 -51
  154. package/src/memory/raw-query.ts +1 -1
  155. package/src/memory/retriever.test.ts +294 -273
  156. package/src/memory/retriever.ts +421 -645
  157. package/src/memory/schema/calls.ts +2 -0
  158. package/src/memory/schema/memory-core.ts +3 -48
  159. package/src/memory/schema/oauth.ts +2 -0
  160. package/src/memory/search/formatting.ts +263 -176
  161. package/src/memory/search/lexical.ts +1 -254
  162. package/src/memory/search/ranking.ts +0 -455
  163. package/src/memory/search/semantic.ts +100 -14
  164. package/src/memory/search/staleness.ts +47 -0
  165. package/src/memory/search/tier-classifier.ts +21 -0
  166. package/src/memory/search/types.ts +15 -77
  167. package/src/memory/task-memory-cleanup.ts +4 -6
  168. package/src/messaging/providers/gmail/mime-builder.ts +17 -7
  169. package/src/oauth/byo-connection.test.ts +8 -1
  170. package/src/oauth/oauth-store.ts +113 -27
  171. package/src/oauth/seed-providers.ts +6 -0
  172. package/src/oauth/token-persistence.ts +11 -3
  173. package/src/permissions/defaults.ts +1 -0
  174. package/src/permissions/trust-store.ts +23 -1
  175. package/src/playbooks/playbook-compiler.ts +1 -1
  176. package/src/prompts/system-prompt.ts +18 -2
  177. package/src/providers/anthropic/client.ts +56 -126
  178. package/src/providers/types.ts +7 -1
  179. package/src/runtime/AGENTS.md +9 -0
  180. package/src/runtime/auth/route-policy.ts +6 -3
  181. package/src/runtime/guardian-reply-router.ts +24 -22
  182. package/src/runtime/http-server.ts +2 -2
  183. package/src/runtime/invite-redemption-service.ts +19 -1
  184. package/src/runtime/invite-service.ts +25 -0
  185. package/src/runtime/pending-interactions.ts +2 -2
  186. package/src/runtime/routes/brain-graph-routes.ts +10 -90
  187. package/src/runtime/routes/conversation-routes.ts +9 -1
  188. package/src/runtime/routes/inbound-stages/acl-enforcement.ts +21 -12
  189. package/src/runtime/routes/memory-item-routes.test.ts +754 -0
  190. package/src/runtime/routes/memory-item-routes.ts +503 -0
  191. package/src/runtime/routes/session-management-routes.ts +3 -3
  192. package/src/runtime/routes/settings-routes.ts +2 -2
  193. package/src/runtime/routes/trust-rules-routes.ts +14 -0
  194. package/src/runtime/routes/workspace-routes.ts +2 -1
  195. package/src/security/keychain-broker-client.ts +17 -4
  196. package/src/security/secure-keys.ts +25 -3
  197. package/src/security/token-manager.ts +36 -36
  198. package/src/skills/catalog-install.ts +74 -18
  199. package/src/skills/skillssh-registry.ts +503 -0
  200. package/src/tools/assets/search.ts +5 -1
  201. package/src/tools/computer-use/definitions.ts +0 -10
  202. package/src/tools/computer-use/registry.ts +1 -1
  203. package/src/tools/credentials/vault.ts +1 -3
  204. package/src/tools/memory/definitions.ts +4 -13
  205. package/src/tools/memory/handlers.test.ts +83 -103
  206. package/src/tools/memory/handlers.ts +50 -85
  207. package/src/tools/schedule/create.ts +8 -1
  208. package/src/tools/schedule/update.ts +8 -1
  209. package/src/tools/skills/load.ts +25 -2
  210. package/src/__tests__/clarification-resolver.test.ts +0 -193
  211. package/src/__tests__/conflict-intent-tokenization.test.ts +0 -160
  212. package/src/__tests__/conflict-policy.test.ts +0 -269
  213. package/src/__tests__/conflict-store.test.ts +0 -372
  214. package/src/__tests__/contradiction-checker.test.ts +0 -361
  215. package/src/__tests__/entity-extractor.test.ts +0 -211
  216. package/src/__tests__/entity-search.test.ts +0 -1117
  217. package/src/__tests__/profile-compiler.test.ts +0 -392
  218. package/src/__tests__/session-conflict-gate.test.ts +0 -1228
  219. package/src/__tests__/session-profile-injection.test.ts +0 -557
  220. package/src/config/bundled-skills/knowledge-graph/SKILL.md +0 -25
  221. package/src/config/bundled-skills/knowledge-graph/TOOLS.json +0 -66
  222. package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +0 -211
  223. package/src/daemon/session-conflict-gate.ts +0 -167
  224. package/src/daemon/session-dynamic-profile.ts +0 -77
  225. package/src/memory/clarification-resolver.ts +0 -417
  226. package/src/memory/conflict-intent.ts +0 -205
  227. package/src/memory/conflict-policy.ts +0 -127
  228. package/src/memory/conflict-store.ts +0 -410
  229. package/src/memory/contradiction-checker.ts +0 -508
  230. package/src/memory/entity-extractor.ts +0 -535
  231. package/src/memory/format-recall.ts +0 -47
  232. package/src/memory/fts-reconciler.ts +0 -165
  233. package/src/memory/job-handlers/conflict.ts +0 -200
  234. package/src/memory/profile-compiler.ts +0 -195
  235. package/src/memory/recall-cache.ts +0 -117
  236. package/src/memory/search/entity.ts +0 -535
  237. package/src/memory/search/query-expansion.test.ts +0 -70
  238. package/src/memory/search/query-expansion.ts +0 -118
  239. package/src/runtime/routes/mcp-routes.ts +0 -20
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vellumai/assistant",
3
- "version": "0.4.49",
3
+ "version": "0.4.50",
4
4
  "type": "module",
5
5
  "exports": {
6
6
  ".": "./src/index.ts"
@@ -22,7 +22,7 @@
22
22
  "test:stable": "EXCLUDE_EXPERIMENTAL=true bash scripts/test.sh",
23
23
  "test:bench": "find src/__tests__ -maxdepth 1 -type f -name '*.benchmark.test.ts' -print0 | xargs -0 -P 1 -I {} bun test {}",
24
24
  "test:filesystem-tools": "bash scripts/test-filesystem-tools.sh",
25
- "postinstall": "cd .. && git config core.hooksPath || git config core.hooksPath .githooks 2>/dev/null || true"
25
+ "postinstall": "cd .. && (git config core.hooksPath || git config core.hooksPath .githooks 2>/dev/null || true) && (bun run meta/feature-flags/sync-bundled-copies.ts 2>/dev/null || true)"
26
26
  },
27
27
  "dependencies": {
28
28
  "@anthropic-ai/claude-agent-sdk": "^0.2.42",
@@ -869,11 +869,13 @@ describe("AgentLoop", () => {
869
869
  await loop.run([userMessage], () => {}, undefined, undefined, onCheckpoint);
870
870
 
871
871
  expect(checkpoints).toHaveLength(1);
872
- expect(checkpoints[0]).toEqual({
872
+ expect(checkpoints[0]).toMatchObject({
873
873
  turnIndex: 0,
874
874
  toolCount: 1,
875
875
  hasToolUse: true,
876
876
  });
877
+ // history should contain the full conversation at checkpoint time
878
+ expect(checkpoints[0].history.length).toBeGreaterThanOrEqual(3);
877
879
  });
878
880
 
879
881
  // 17. Returning 'continue' lets the loop proceed normally
@@ -392,8 +392,8 @@ describe("AnthropicProvider — Cache-Control Characterization", () => {
392
392
  expect(user.content[1].cache_control).toEqual({ type: "ephemeral" });
393
393
  });
394
394
 
395
- test("workspace + dynamic profile: cache still lands on trailing block", async () => {
396
- // Simulates workspace prepended + dynamic profile appended
395
+ test("workspace + multi-block user message: cache still lands on trailing block", async () => {
396
+ // Simulates workspace prepended + extra context block appended
397
397
  const injectedUser: Message = {
398
398
  role: "user",
399
399
  content: [
@@ -670,7 +670,9 @@ describe("AnthropicProvider — Cache-Control Characterization", () => {
670
670
  // ensureToolPairing — server_tool_use / web_search_tool_result pairing
671
671
  // -----------------------------------------------------------------------
672
672
 
673
- test("server_tool_use with missing web_search_tool_result gets synthetic result injected", async () => {
673
+ test("server_tool_use without web_search_tool_result passes through as-is (no synthetic injection)", async () => {
674
+ // Server-side tools are self-paired within the assistant message.
675
+ // ensureToolPairing should NOT inject synthetic results for them.
674
676
  const messages: Message[] = [
675
677
  userMsg("Search for something"),
676
678
  {
@@ -684,7 +686,7 @@ describe("AnthropicProvider — Cache-Control Characterization", () => {
684
686
  },
685
687
  ],
686
688
  },
687
- userMsg("Thanks"), // user text but no web_search_tool_result
689
+ userMsg("Thanks"),
688
690
  ];
689
691
  await provider.sendMessage(messages);
690
692
 
@@ -697,14 +699,17 @@ describe("AnthropicProvider — Cache-Control Characterization", () => {
697
699
  }>;
698
700
  }>;
699
701
 
700
- // The user message after assistant should contain a synthetic web_search_tool_result
701
- const userAfterAssistant = sent[2];
702
- expect(userAfterAssistant.role).toBe("user");
703
- expect(userAfterAssistant.content[0].type).toBe("web_search_tool_result");
704
- expect(userAfterAssistant.content[0].tool_use_id).toBe("srvtoolu_abc123");
702
+ // server_tool_use stays in the assistant message, no synthetic result injected
703
+ expect(sent).toHaveLength(3);
704
+ expect(sent[1].role).toBe("assistant");
705
+ expect(sent[1].content[0].type).toBe("server_tool_use");
706
+ expect(sent[2].role).toBe("user");
707
+ expect(sent[2].content[0].type).toBe("text"); // original user text, not a synthetic result
705
708
  });
706
709
 
707
- test("server_tool_use at end of messages gets synthetic web_search_tool_result appended", async () => {
710
+ test("server_tool_use at end of messages is not modified (no synthetic append)", async () => {
711
+ // Server-side tools don't need cross-message pairing, so no synthetic
712
+ // user message should be appended.
708
713
  const messages: Message[] = [
709
714
  userMsg("Search something"),
710
715
  {
@@ -726,11 +731,10 @@ describe("AnthropicProvider — Cache-Control Characterization", () => {
726
731
  content: Array<{ type: string; tool_use_id?: string }>;
727
732
  }>;
728
733
 
729
- // A synthetic user message should have been appended
730
- expect(sent).toHaveLength(3);
731
- expect(sent[2].role).toBe("user");
732
- expect(sent[2].content[0].type).toBe("web_search_tool_result");
733
- expect(sent[2].content[0].tool_use_id).toBe("srvtoolu_end");
734
+ // No synthetic user message appended just the original 2 messages
735
+ expect(sent).toHaveLength(2);
736
+ expect(sent[1].role).toBe("assistant");
737
+ expect(sent[1].content[0].type).toBe("server_tool_use");
734
738
  });
735
739
 
736
740
  test("server_tool_use with matching web_search_tool_result passes through unchanged", async () => {
@@ -781,7 +785,85 @@ describe("AnthropicProvider — Cache-Control Characterization", () => {
781
785
  expect(resultBlocks[0].tool_use_id).toBe("srvtoolu_ok");
782
786
  });
783
787
 
784
- test("mixed tool_use and server_tool_use with partial results gets missing ones filled", async () => {
788
+ test("server_tool_use + web_search_tool_result + tool_use in same assistant message stays intact", async () => {
789
+ // This is the core bug scenario: Anthropic returns server_tool_use,
790
+ // web_search_tool_result, text, and tool_use all in one assistant message.
791
+ // The server pair must stay together in the assistant message.
792
+ const messages: Message[] = [
793
+ userMsg("Search and fetch"),
794
+ {
795
+ role: "assistant",
796
+ content: [
797
+ {
798
+ type: "server_tool_use",
799
+ id: "srvtoolu_search",
800
+ name: "web_search",
801
+ input: { query: "test" },
802
+ },
803
+ {
804
+ type: "web_search_tool_result",
805
+ tool_use_id: "srvtoolu_search",
806
+ content: [
807
+ {
808
+ type: "web_search_result",
809
+ url: "https://example.com",
810
+ title: "Example",
811
+ encrypted_content: "enc_123",
812
+ },
813
+ ],
814
+ },
815
+ { type: "text", text: "Based on the search results..." },
816
+ {
817
+ type: "tool_use",
818
+ id: "tu_fetch",
819
+ name: "fetch_url",
820
+ input: { url: "https://example.com" },
821
+ },
822
+ ],
823
+ },
824
+ toolResultMsg("tu_fetch", "page content here"),
825
+ ];
826
+ await provider.sendMessage(messages);
827
+
828
+ const sent = lastStreamParams!.messages as Array<{
829
+ role: string;
830
+ content: Array<{
831
+ type: string;
832
+ id?: string;
833
+ tool_use_id?: string;
834
+ }>;
835
+ }>;
836
+
837
+ // The server_tool_use pair (server_tool_use + web_search_tool_result) should
838
+ // be in the leading portion of the assistant message, before tool_use.
839
+ // splitAssistantForToolPairing: leading=[server_tool_use, web_search_tool_result, text],
840
+ // toolUseBlocks=[tool_use], carryover=[]
841
+ const assistantMsg = sent[1];
842
+ expect(assistantMsg.role).toBe("assistant");
843
+ const blockTypes = assistantMsg.content.map((b) => b.type);
844
+ expect(blockTypes).toContain("server_tool_use");
845
+ expect(blockTypes).toContain("web_search_tool_result");
846
+ expect(blockTypes).toContain("tool_use");
847
+
848
+ // The tool_result for the client-side tool_use should be in the user message
849
+ const userMsg2 = sent[2];
850
+ expect(userMsg2.role).toBe("user");
851
+ expect(
852
+ userMsg2.content.some(
853
+ (b) => b.type === "tool_result" && b.tool_use_id === "tu_fetch",
854
+ ),
855
+ ).toBe(true);
856
+
857
+ // No synthetic web_search_tool_result injected anywhere
858
+ const allBlocks = sent.flatMap((m) => m.content);
859
+ const webSearchResults = allBlocks.filter(
860
+ (b) => b.type === "web_search_tool_result",
861
+ );
862
+ expect(webSearchResults).toHaveLength(1); // only the original one
863
+ expect(webSearchResults[0].tool_use_id).toBe("srvtoolu_search");
864
+ });
865
+
866
+ test("mixed tool_use and server_tool_use — only client-side tool_use gets pairing, server tools pass through", async () => {
785
867
  const messages: Message[] = [
786
868
  userMsg("Do things"),
787
869
  {
@@ -796,7 +878,7 @@ describe("AnthropicProvider — Cache-Control Characterization", () => {
796
878
  },
797
879
  ],
798
880
  },
799
- // Only tu_a has a result
881
+ // Only tu_a has a result — server_tool_use doesn't need one in the user message
800
882
  toolResultMsg("tu_a", "result A"),
801
883
  ];
802
884
  await provider.sendMessage(messages);
@@ -806,20 +888,29 @@ describe("AnthropicProvider — Cache-Control Characterization", () => {
806
888
  content: Array<{
807
889
  type: string;
808
890
  tool_use_id?: string;
891
+ id?: string;
809
892
  }>;
810
893
  }>;
811
894
 
895
+ // Assistant message should have tool_use in paired portion, server_tool_use in carryover
896
+ // ensureToolPairing splits: paired = [tool_use(tu_a)], carryover = [server_tool_use(srvtoolu_b)]
897
+ // Result: assistant(tool_use) → user(tool_result) → assistant(server_tool_use) → user(continue)
898
+ const assistantMsg = sent[1];
899
+ expect(assistantMsg.role).toBe("assistant");
900
+ expect(assistantMsg.content[0].type).toBe("tool_use");
901
+
812
902
  const userAfterAssistant = sent[2];
813
- // Should have tool_result for tu_a and synthetic web_search_tool_result for srvtoolu_b
814
- expect(userAfterAssistant.content).toHaveLength(2);
903
+ expect(userAfterAssistant.role).toBe("user");
904
+ // Only tool_result for tu_a — no synthetic web_search_tool_result
815
905
  expect(userAfterAssistant.content[0]).toMatchObject({
816
906
  type: "tool_result",
817
907
  tool_use_id: "tu_a",
818
908
  });
819
- expect(userAfterAssistant.content[1]).toMatchObject({
820
- type: "web_search_tool_result",
821
- tool_use_id: "srvtoolu_b",
822
- });
909
+
910
+ // server_tool_use preserved in a carryover assistant message
911
+ const carryoverAssistant = sent[3];
912
+ expect(carryoverAssistant.role).toBe("assistant");
913
+ expect(carryoverAssistant.content[0].type).toBe("server_tool_use");
823
914
  });
824
915
 
825
916
  test("assistant message with only unknown blocks gets placeholder text", async () => {
@@ -81,7 +81,6 @@ mock.module("../config/loader.js", () => ({
81
81
  timeouts: { permissionTimeoutSec: 300 },
82
82
  apiKeys: {},
83
83
  skills: { entries: {}, allowBundled: true },
84
- memory: { retrieval: { injectionStrategy: "inline" } },
85
84
  permissions: { mode: "workspace" },
86
85
  }),
87
86
  loadRawConfig: () => ({}),
@@ -130,18 +129,6 @@ mock.module("../security/secret-allowlist.js", () => ({
130
129
  resetAllowlist: () => {},
131
130
  }));
132
131
 
133
- mock.module("../memory/admin.js", () => ({
134
- getMemoryConflictAndCleanupStats: () => ({
135
- conflicts: { pending: 0, resolved: 0, oldestPendingAgeMs: null },
136
- cleanup: {
137
- resolvedBacklog: 0,
138
- supersededBacklog: 0,
139
- resolvedCompleted24h: 0,
140
- supersededCompleted24h: 0,
141
- },
142
- }),
143
- }));
144
-
145
132
  mock.module("../memory/conversation-crud.js", () => ({
146
133
  getConversationThreadType: () => "default",
147
134
  setConversationOriginChannelIfUnset: () => {},
@@ -182,13 +169,12 @@ mock.module("../memory/retriever.js", () => ({
182
169
  enabled: false,
183
170
  degraded: false,
184
171
  injectedText: "",
185
- lexicalHits: 0,
172
+
186
173
  semanticHits: 0,
187
174
  recencyHits: 0,
188
175
  injectedTokens: 0,
189
176
  latencyMs: 0,
190
177
  }),
191
- injectMemoryRecallIntoUserMessage: (msg: Message) => msg,
192
178
  stripMemoryRecallMessages: (msgs: Message[]) => msgs,
193
179
  }));
194
180
 
@@ -119,6 +119,7 @@ function makeIdleSession(opts?: {
119
119
  setHostBashProxy: () => {},
120
120
  setHostFileProxy: () => {},
121
121
  setHostCuProxy: () => {},
122
+ addPreactivatedSkillId: () => {},
122
123
  enqueueMessage: () => ({ queued: false, requestId: "noop" }),
123
124
  hasAnyPendingConfirmation: () => false,
124
125
  runAgentLoop: async (
@@ -183,6 +184,7 @@ function makeConfirmationEmittingSession(opts?: {
183
184
  setHostBashProxy: () => {},
184
185
  setHostFileProxy: () => {},
185
186
  setHostCuProxy: () => {},
187
+ addPreactivatedSkillId: () => {},
186
188
  enqueueMessage: () => ({ queued: false, requestId: "noop" }),
187
189
  hasAnyPendingConfirmation: () => false,
188
190
  runAgentLoop: async (
@@ -170,29 +170,6 @@ describe("assistant feature flag guard", () => {
170
170
  // Test: registry entries have required fields
171
171
  // ---------------------------------------------------------------------------
172
172
 
173
- // ---------------------------------------------------------------------------
174
- // Test: bundled registry copy stays in sync with canonical meta/ copy
175
- // ---------------------------------------------------------------------------
176
-
177
- test("bundled assistant/src/config/feature-flag-registry.json matches canonical meta/ copy", () => {
178
- const canonicalPath = getRegistryPath();
179
- const bundledPath = join(
180
- process.cwd(),
181
- "src",
182
- "config",
183
- "feature-flag-registry.json",
184
- );
185
-
186
- const canonical = JSON.parse(readFileSync(canonicalPath, "utf-8"));
187
- const bundled = JSON.parse(readFileSync(bundledPath, "utf-8"));
188
-
189
- expect(bundled).toEqual(canonical);
190
- });
191
-
192
- // ---------------------------------------------------------------------------
193
- // Test: registry entries have required fields
194
- // ---------------------------------------------------------------------------
195
-
196
173
  test("all assistant-scope entries in the unified registry have required fields", () => {
197
174
  const registry = loadRegistry();
198
175
  const assistantFlags = registry.flags.filter(
@@ -26,11 +26,13 @@ mock.module("../util/logger.js", () => ({
26
26
  import {
27
27
  createCanonicalGuardianDelivery,
28
28
  createCanonicalGuardianRequest,
29
+ expireAllPendingCanonicalRequests,
29
30
  getCanonicalGuardianRequest,
30
31
  listCanonicalGuardianDeliveries,
31
32
  listCanonicalGuardianRequests,
32
33
  listPendingCanonicalGuardianRequestsByDestinationChat,
33
34
  listPendingCanonicalGuardianRequestsByDestinationConversation,
35
+ listPendingRequestsByConversationScope,
34
36
  resolveCanonicalGuardianRequest,
35
37
  updateCanonicalGuardianDelivery,
36
38
  updateCanonicalGuardianRequest,
@@ -717,4 +719,97 @@ describe("canonical-guardian-store", () => {
717
719
  );
718
720
  expect(pending).toHaveLength(0);
719
721
  });
722
+
723
+ // ── listPendingRequestsByConversationScope expiry filtering ─────────
724
+
725
+ test("listPendingRequestsByConversationScope excludes expired requests", () => {
726
+ // Create a pending request that has already expired
727
+ createCanonicalGuardianRequest({
728
+ kind: "tool_approval",
729
+ sourceType: "desktop",
730
+ conversationId: "conv-scope-1",
731
+ guardianPrincipalId: TEST_PRINCIPAL,
732
+ expiresAt: new Date(Date.now() - 10_000).toISOString(),
733
+ });
734
+
735
+ // Create a pending request that has not expired
736
+ const unexpired = createCanonicalGuardianRequest({
737
+ kind: "tool_approval",
738
+ sourceType: "desktop",
739
+ conversationId: "conv-scope-1",
740
+ guardianPrincipalId: TEST_PRINCIPAL,
741
+ expiresAt: new Date(Date.now() + 60_000).toISOString(),
742
+ });
743
+
744
+ const results = listPendingRequestsByConversationScope("conv-scope-1");
745
+ expect(results).toHaveLength(1);
746
+ expect(results[0].id).toBe(unexpired.id);
747
+ });
748
+
749
+ test("listPendingRequestsByConversationScope includes requests with no expiresAt", () => {
750
+ const noExpiry = createCanonicalGuardianRequest({
751
+ kind: "tool_approval",
752
+ sourceType: "desktop",
753
+ conversationId: "conv-scope-2",
754
+ guardianPrincipalId: TEST_PRINCIPAL,
755
+ });
756
+
757
+ const results = listPendingRequestsByConversationScope("conv-scope-2");
758
+ expect(results).toHaveLength(1);
759
+ expect(results[0].id).toBe(noExpiry.id);
760
+ });
761
+
762
+ // ── expireAllPendingCanonicalRequests ───────────────────────────────
763
+
764
+ test("expireAllPendingCanonicalRequests transitions all pending to expired", () => {
765
+ const req1 = createCanonicalGuardianRequest({
766
+ kind: "tool_approval",
767
+ sourceType: "desktop",
768
+ conversationId: "conv-bulk-1",
769
+ guardianPrincipalId: TEST_PRINCIPAL,
770
+ expiresAt: new Date(Date.now() + 60_000).toISOString(),
771
+ });
772
+ const req2 = createCanonicalGuardianRequest({
773
+ kind: "tool_approval",
774
+ sourceType: "channel",
775
+ conversationId: "conv-bulk-2",
776
+ guardianPrincipalId: TEST_PRINCIPAL,
777
+ expiresAt: new Date(Date.now() + 60_000).toISOString(),
778
+ });
779
+
780
+ const count = expireAllPendingCanonicalRequests();
781
+ expect(count).toBe(2);
782
+
783
+ expect(getCanonicalGuardianRequest(req1.id)!.status).toBe("expired");
784
+ expect(getCanonicalGuardianRequest(req2.id)!.status).toBe("expired");
785
+ });
786
+
787
+ test("expireAllPendingCanonicalRequests does not affect already-resolved requests", () => {
788
+ const approved = createCanonicalGuardianRequest({
789
+ kind: "tool_approval",
790
+ sourceType: "desktop",
791
+ conversationId: "conv-bulk-3",
792
+ guardianPrincipalId: TEST_PRINCIPAL,
793
+ });
794
+ updateCanonicalGuardianRequest(approved.id, { status: "approved" });
795
+
796
+ const denied = createCanonicalGuardianRequest({
797
+ kind: "tool_approval",
798
+ sourceType: "desktop",
799
+ conversationId: "conv-bulk-3",
800
+ guardianPrincipalId: TEST_PRINCIPAL,
801
+ });
802
+ updateCanonicalGuardianRequest(denied.id, { status: "denied" });
803
+
804
+ const count = expireAllPendingCanonicalRequests();
805
+ expect(count).toBe(0);
806
+
807
+ expect(getCanonicalGuardianRequest(approved.id)!.status).toBe("approved");
808
+ expect(getCanonicalGuardianRequest(denied.id)!.status).toBe("denied");
809
+ });
810
+
811
+ test("expireAllPendingCanonicalRequests returns 0 when no pending requests exist", () => {
812
+ const count = expireAllPendingCanonicalRequests();
813
+ expect(count).toBe(0);
814
+ });
720
815
  });
@@ -895,6 +895,19 @@ describe("Permission Checker", () => {
895
895
  );
896
896
  });
897
897
 
898
+ test("computer_use_observe prompts by default via computer-use ask rule", async () => {
899
+ const result = await check(
900
+ "computer_use_observe",
901
+ { reason: "Check current screen state before acting" },
902
+ "/tmp",
903
+ );
904
+ expect(result.decision).toBe("prompt");
905
+ expect(result.reason).toContain("ask rule");
906
+ expect(result.matchedRule?.id).toBe(
907
+ "default:ask-computer_use_observe-global",
908
+ );
909
+ });
910
+
898
911
  test("higher-priority allow rule can override default computer-use ask rule", async () => {
899
912
  addRule(
900
913
  "computer_use_click",
@@ -161,60 +161,7 @@ describe("AssistantConfigSchema", () => {
161
161
  expect(result.secretDetection.action).toBe("block");
162
162
  });
163
163
 
164
- test("applies memory.conflicts defaults", () => {
165
- const result = AssistantConfigSchema.parse({});
166
- expect(result.memory.conflicts).toEqual({
167
- enabled: true,
168
- gateMode: "soft",
169
- resolverLlmTimeoutMs: 12000,
170
- relevanceThreshold: 0.3,
171
- conflictableKinds: [
172
- "preference",
173
- "profile",
174
- "constraint",
175
- "instruction",
176
- "style",
177
- ],
178
- });
179
- });
180
-
181
- test("rejects invalid memory.conflicts.relevanceThreshold", () => {
182
- const result = AssistantConfigSchema.safeParse({
183
- memory: { conflicts: { relevanceThreshold: 2 } },
184
- });
185
- expect(result.success).toBe(false);
186
- });
187
-
188
- test("rejects invalid memory.conflicts.conflictableKinds entry", () => {
189
- const result = AssistantConfigSchema.safeParse({
190
- memory: { conflicts: { conflictableKinds: ["invalid_kind"] } },
191
- });
192
- expect(result.success).toBe(false);
193
- });
194
-
195
- test("rejects empty memory.conflicts.conflictableKinds", () => {
196
- const result = AssistantConfigSchema.safeParse({
197
- memory: { conflicts: { conflictableKinds: [] } },
198
- });
199
- expect(result.success).toBe(false);
200
- });
201
-
202
- test("applies memory.profile defaults", () => {
203
- const result = AssistantConfigSchema.parse({});
204
- expect(result.memory.profile).toEqual({
205
- enabled: true,
206
- maxInjectTokens: 800,
207
- });
208
- });
209
-
210
- test("rejects invalid memory.profile.maxInjectTokens", () => {
211
- const result = AssistantConfigSchema.safeParse({
212
- memory: { profile: { maxInjectTokens: 0 } },
213
- });
214
- expect(result.success).toBe(false);
215
- });
216
-
217
- test("applies rollout defaults for dynamic budget and entity relation features", () => {
164
+ test("applies rollout defaults for dynamic budget", () => {
218
165
  const result = AssistantConfigSchema.parse({});
219
166
  expect(result.memory.retrieval.dynamicBudget).toEqual({
220
167
  enabled: true,
@@ -222,19 +169,6 @@ describe("AssistantConfigSchema", () => {
222
169
  maxInjectTokens: 10000,
223
170
  targetHeadroomTokens: 10000,
224
171
  });
225
- expect(result.memory.entity.extractRelations).toEqual({
226
- enabled: true,
227
- backfillBatchSize: 200,
228
- });
229
- expect(result.memory.entity.relationRetrieval).toEqual({
230
- enabled: true,
231
- maxSeedEntities: 8,
232
- maxNeighborEntities: 20,
233
- maxEdges: 40,
234
- neighborScoreMultiplier: 0.7,
235
- maxDepth: 3,
236
- depthDecay: true,
237
- });
238
172
  });
239
173
 
240
174
  test("applies memory.cleanup defaults", () => {
@@ -242,7 +176,6 @@ describe("AssistantConfigSchema", () => {
242
176
  expect(result.memory.cleanup).toEqual({
243
177
  enabled: true,
244
178
  enqueueIntervalMs: 6 * 60 * 60 * 1000,
245
- resolvedConflictRetentionMs: 30 * 24 * 60 * 60 * 1000,
246
179
  supersededItemRetentionMs: 30 * 24 * 60 * 60 * 1000,
247
180
  conversationRetentionDays: 90,
248
181
  });