@vellumai/assistant 0.4.49 → 0.4.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. package/ARCHITECTURE.md +24 -33
  2. package/README.md +3 -3
  3. package/docs/architecture/memory.md +180 -119
  4. package/package.json +2 -2
  5. package/src/__tests__/agent-loop.test.ts +3 -1
  6. package/src/__tests__/anthropic-provider.test.ts +114 -23
  7. package/src/__tests__/approval-cascade.test.ts +1 -15
  8. package/src/__tests__/approval-routes-http.test.ts +2 -0
  9. package/src/__tests__/assistant-feature-flag-guard.test.ts +0 -23
  10. package/src/__tests__/canonical-guardian-store.test.ts +95 -0
  11. package/src/__tests__/checker.test.ts +13 -0
  12. package/src/__tests__/config-schema.test.ts +1 -68
  13. package/src/__tests__/context-memory-e2e.test.ts +11 -100
  14. package/src/__tests__/conversation-routes-guardian-reply.test.ts +8 -0
  15. package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
  16. package/src/__tests__/credential-security-e2e.test.ts +1 -0
  17. package/src/__tests__/credential-vault-unit.test.ts +4 -0
  18. package/src/__tests__/credential-vault.test.ts +13 -1
  19. package/src/__tests__/cu-unified-flow.test.ts +532 -0
  20. package/src/__tests__/date-context.test.ts +93 -77
  21. package/src/__tests__/deterministic-verification-control-plane.test.ts +64 -0
  22. package/src/__tests__/guardian-routing-invariants.test.ts +93 -0
  23. package/src/__tests__/history-repair.test.ts +245 -0
  24. package/src/__tests__/host-cu-proxy.test.ts +165 -3
  25. package/src/__tests__/http-user-message-parity.test.ts +1 -0
  26. package/src/__tests__/invite-redemption-service.test.ts +65 -1
  27. package/src/__tests__/keychain-broker-client.test.ts +4 -4
  28. package/src/__tests__/memory-context-benchmark.benchmark.test.ts +56 -18
  29. package/src/__tests__/memory-lifecycle-e2e.test.ts +244 -387
  30. package/src/__tests__/memory-recall-quality.test.ts +244 -407
  31. package/src/__tests__/memory-regressions.experimental.test.ts +126 -101
  32. package/src/__tests__/memory-regressions.test.ts +477 -2841
  33. package/src/__tests__/memory-retrieval.benchmark.test.ts +33 -150
  34. package/src/__tests__/memory-upsert-concurrency.test.ts +5 -244
  35. package/src/__tests__/mime-builder.test.ts +28 -0
  36. package/src/__tests__/native-web-search.test.ts +1 -0
  37. package/src/__tests__/oauth-cli.test.ts +572 -5
  38. package/src/__tests__/oauth-store.test.ts +120 -6
  39. package/src/__tests__/qdrant-collection-migration.test.ts +53 -8
  40. package/src/__tests__/registry.test.ts +0 -1
  41. package/src/__tests__/relay-server.test.ts +46 -1
  42. package/src/__tests__/schedule-tools.test.ts +32 -0
  43. package/src/__tests__/script-proxy-certs.test.ts +1 -1
  44. package/src/__tests__/secret-onetime-send.test.ts +1 -0
  45. package/src/__tests__/secure-keys.test.ts +7 -2
  46. package/src/__tests__/send-endpoint-busy.test.ts +3 -0
  47. package/src/__tests__/session-abort-tool-results.test.ts +1 -14
  48. package/src/__tests__/session-agent-loop-overflow.test.ts +1583 -0
  49. package/src/__tests__/session-agent-loop.test.ts +19 -15
  50. package/src/__tests__/session-confirmation-signals.test.ts +1 -15
  51. package/src/__tests__/session-error.test.ts +124 -2
  52. package/src/__tests__/session-history-web-search.test.ts +918 -0
  53. package/src/__tests__/session-pre-run-repair.test.ts +1 -14
  54. package/src/__tests__/session-provider-retry-repair.test.ts +25 -28
  55. package/src/__tests__/session-queue.test.ts +37 -27
  56. package/src/__tests__/session-runtime-assembly.test.ts +54 -0
  57. package/src/__tests__/session-slash-known.test.ts +1 -15
  58. package/src/__tests__/session-slash-queue.test.ts +1 -15
  59. package/src/__tests__/session-slash-unknown.test.ts +1 -15
  60. package/src/__tests__/session-workspace-cache-state.test.ts +3 -33
  61. package/src/__tests__/session-workspace-injection.test.ts +3 -37
  62. package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -37
  63. package/src/__tests__/skills-install-extract.test.ts +93 -0
  64. package/src/__tests__/skillssh-registry.test.ts +451 -0
  65. package/src/__tests__/trust-store.test.ts +15 -0
  66. package/src/__tests__/voice-invite-redemption.test.ts +32 -1
  67. package/src/agent/ax-tree-compaction.test.ts +51 -0
  68. package/src/agent/loop.ts +39 -12
  69. package/src/approvals/AGENTS.md +1 -1
  70. package/src/approvals/guardian-request-resolvers.ts +14 -2
  71. package/src/bundler/compiler-tools.ts +66 -2
  72. package/src/calls/call-domain.ts +132 -0
  73. package/src/calls/call-store.ts +6 -0
  74. package/src/calls/relay-server.ts +43 -5
  75. package/src/calls/relay-setup-router.ts +17 -1
  76. package/src/calls/twilio-config.ts +1 -1
  77. package/src/calls/types.ts +3 -1
  78. package/src/cli/commands/doctor.ts +4 -3
  79. package/src/cli/commands/mcp.ts +46 -59
  80. package/src/cli/commands/memory.ts +16 -165
  81. package/src/cli/commands/oauth/apps.ts +31 -2
  82. package/src/cli/commands/oauth/connections.ts +431 -97
  83. package/src/cli/commands/oauth/providers.ts +15 -1
  84. package/src/cli/commands/sessions.ts +5 -2
  85. package/src/cli/commands/skills.ts +173 -1
  86. package/src/cli/http-client.ts +0 -20
  87. package/src/cli/main-screen.tsx +2 -2
  88. package/src/cli/program.ts +5 -6
  89. package/src/cli.ts +4 -10
  90. package/src/config/bundled-skills/computer-use/TOOLS.json +1 -1
  91. package/src/config/bundled-skills/computer-use/tools/computer-use-observe.ts +12 -0
  92. package/src/config/bundled-tool-registry.ts +2 -5
  93. package/src/config/schema.ts +1 -12
  94. package/src/config/schemas/memory-lifecycle.ts +0 -9
  95. package/src/config/schemas/memory-processing.ts +0 -180
  96. package/src/config/schemas/memory-retrieval.ts +32 -104
  97. package/src/config/schemas/memory.ts +0 -10
  98. package/src/config/types.ts +0 -4
  99. package/src/context/window-manager.ts +4 -1
  100. package/src/daemon/config-watcher.ts +61 -3
  101. package/src/daemon/daemon-control.ts +1 -1
  102. package/src/daemon/date-context.ts +114 -31
  103. package/src/daemon/handlers/sessions.ts +18 -13
  104. package/src/daemon/handlers/skills.ts +20 -1
  105. package/src/daemon/history-repair.ts +72 -8
  106. package/src/daemon/host-cu-proxy.ts +55 -26
  107. package/src/daemon/lifecycle.ts +31 -3
  108. package/src/daemon/mcp-reload-service.ts +2 -2
  109. package/src/daemon/message-types/computer-use.ts +1 -12
  110. package/src/daemon/message-types/memory.ts +4 -16
  111. package/src/daemon/message-types/messages.ts +1 -0
  112. package/src/daemon/message-types/sessions.ts +4 -0
  113. package/src/daemon/server.ts +12 -1
  114. package/src/daemon/session-agent-loop-handlers.ts +38 -0
  115. package/src/daemon/session-agent-loop.ts +334 -48
  116. package/src/daemon/session-error.ts +89 -6
  117. package/src/daemon/session-history.ts +17 -7
  118. package/src/daemon/session-media-retry.ts +6 -2
  119. package/src/daemon/session-memory.ts +69 -149
  120. package/src/daemon/session-process.ts +10 -1
  121. package/src/daemon/session-runtime-assembly.ts +49 -19
  122. package/src/daemon/session-surfaces.ts +4 -1
  123. package/src/daemon/session-tool-setup.ts +7 -1
  124. package/src/daemon/session.ts +12 -2
  125. package/src/instrument.ts +61 -1
  126. package/src/memory/admin.ts +2 -191
  127. package/src/memory/canonical-guardian-store.ts +38 -2
  128. package/src/memory/conversation-crud.ts +0 -33
  129. package/src/memory/conversation-queries.ts +22 -3
  130. package/src/memory/db-init.ts +28 -0
  131. package/src/memory/embedding-backend.ts +84 -8
  132. package/src/memory/embedding-types.ts +9 -1
  133. package/src/memory/indexer.ts +7 -46
  134. package/src/memory/items-extractor.ts +274 -76
  135. package/src/memory/job-handlers/backfill.ts +2 -127
  136. package/src/memory/job-handlers/cleanup.ts +2 -16
  137. package/src/memory/job-handlers/extraction.ts +2 -138
  138. package/src/memory/job-handlers/index-maintenance.ts +1 -6
  139. package/src/memory/job-handlers/summarization.ts +3 -148
  140. package/src/memory/job-utils.ts +21 -59
  141. package/src/memory/jobs-store.ts +1 -159
  142. package/src/memory/jobs-worker.ts +9 -52
  143. package/src/memory/migrations/104-core-indexes.ts +3 -3
  144. package/src/memory/migrations/149-oauth-tables.ts +2 -0
  145. package/src/memory/migrations/150-oauth-apps-client-secret-path.ts +98 -0
  146. package/src/memory/migrations/151-oauth-providers-ping-url.ts +11 -0
  147. package/src/memory/migrations/152-memory-item-supersession.ts +44 -0
  148. package/src/memory/migrations/153-drop-entity-tables.ts +15 -0
  149. package/src/memory/migrations/154-drop-fts.ts +20 -0
  150. package/src/memory/migrations/155-drop-conflicts.ts +7 -0
  151. package/src/memory/migrations/156-call-session-invite-metadata.ts +24 -0
  152. package/src/memory/migrations/index.ts +7 -0
  153. package/src/memory/qdrant-client.ts +148 -51
  154. package/src/memory/raw-query.ts +1 -1
  155. package/src/memory/retriever.test.ts +294 -273
  156. package/src/memory/retriever.ts +421 -645
  157. package/src/memory/schema/calls.ts +2 -0
  158. package/src/memory/schema/memory-core.ts +3 -48
  159. package/src/memory/schema/oauth.ts +2 -0
  160. package/src/memory/search/formatting.ts +263 -176
  161. package/src/memory/search/lexical.ts +1 -254
  162. package/src/memory/search/ranking.ts +0 -455
  163. package/src/memory/search/semantic.ts +100 -14
  164. package/src/memory/search/staleness.ts +47 -0
  165. package/src/memory/search/tier-classifier.ts +21 -0
  166. package/src/memory/search/types.ts +15 -77
  167. package/src/memory/task-memory-cleanup.ts +4 -6
  168. package/src/messaging/providers/gmail/mime-builder.ts +17 -7
  169. package/src/oauth/byo-connection.test.ts +8 -1
  170. package/src/oauth/oauth-store.ts +113 -27
  171. package/src/oauth/seed-providers.ts +6 -0
  172. package/src/oauth/token-persistence.ts +11 -3
  173. package/src/permissions/defaults.ts +1 -0
  174. package/src/permissions/trust-store.ts +23 -1
  175. package/src/playbooks/playbook-compiler.ts +1 -1
  176. package/src/prompts/system-prompt.ts +18 -2
  177. package/src/providers/anthropic/client.ts +56 -126
  178. package/src/providers/types.ts +7 -1
  179. package/src/runtime/AGENTS.md +9 -0
  180. package/src/runtime/auth/route-policy.ts +6 -3
  181. package/src/runtime/guardian-reply-router.ts +24 -22
  182. package/src/runtime/http-server.ts +2 -2
  183. package/src/runtime/invite-redemption-service.ts +19 -1
  184. package/src/runtime/invite-service.ts +25 -0
  185. package/src/runtime/pending-interactions.ts +2 -2
  186. package/src/runtime/routes/brain-graph-routes.ts +10 -90
  187. package/src/runtime/routes/conversation-routes.ts +9 -1
  188. package/src/runtime/routes/inbound-stages/acl-enforcement.ts +21 -12
  189. package/src/runtime/routes/memory-item-routes.test.ts +754 -0
  190. package/src/runtime/routes/memory-item-routes.ts +503 -0
  191. package/src/runtime/routes/session-management-routes.ts +3 -3
  192. package/src/runtime/routes/settings-routes.ts +2 -2
  193. package/src/runtime/routes/trust-rules-routes.ts +14 -0
  194. package/src/runtime/routes/workspace-routes.ts +2 -1
  195. package/src/security/keychain-broker-client.ts +17 -4
  196. package/src/security/secure-keys.ts +25 -3
  197. package/src/security/token-manager.ts +36 -36
  198. package/src/skills/catalog-install.ts +74 -18
  199. package/src/skills/skillssh-registry.ts +503 -0
  200. package/src/tools/assets/search.ts +5 -1
  201. package/src/tools/computer-use/definitions.ts +0 -10
  202. package/src/tools/computer-use/registry.ts +1 -1
  203. package/src/tools/credentials/vault.ts +1 -3
  204. package/src/tools/memory/definitions.ts +4 -13
  205. package/src/tools/memory/handlers.test.ts +83 -103
  206. package/src/tools/memory/handlers.ts +50 -85
  207. package/src/tools/schedule/create.ts +8 -1
  208. package/src/tools/schedule/update.ts +8 -1
  209. package/src/tools/skills/load.ts +25 -2
  210. package/src/__tests__/clarification-resolver.test.ts +0 -193
  211. package/src/__tests__/conflict-intent-tokenization.test.ts +0 -160
  212. package/src/__tests__/conflict-policy.test.ts +0 -269
  213. package/src/__tests__/conflict-store.test.ts +0 -372
  214. package/src/__tests__/contradiction-checker.test.ts +0 -361
  215. package/src/__tests__/entity-extractor.test.ts +0 -211
  216. package/src/__tests__/entity-search.test.ts +0 -1117
  217. package/src/__tests__/profile-compiler.test.ts +0 -392
  218. package/src/__tests__/session-conflict-gate.test.ts +0 -1228
  219. package/src/__tests__/session-profile-injection.test.ts +0 -557
  220. package/src/config/bundled-skills/knowledge-graph/SKILL.md +0 -25
  221. package/src/config/bundled-skills/knowledge-graph/TOOLS.json +0 -66
  222. package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +0 -211
  223. package/src/daemon/session-conflict-gate.ts +0 -167
  224. package/src/daemon/session-dynamic-profile.ts +0 -77
  225. package/src/memory/clarification-resolver.ts +0 -417
  226. package/src/memory/conflict-intent.ts +0 -205
  227. package/src/memory/conflict-policy.ts +0 -127
  228. package/src/memory/conflict-store.ts +0 -410
  229. package/src/memory/contradiction-checker.ts +0 -508
  230. package/src/memory/entity-extractor.ts +0 -535
  231. package/src/memory/format-recall.ts +0 -47
  232. package/src/memory/fts-reconciler.ts +0 -165
  233. package/src/memory/job-handlers/conflict.ts +0 -200
  234. package/src/memory/profile-compiler.ts +0 -195
  235. package/src/memory/recall-cache.ts +0 -117
  236. package/src/memory/search/entity.ts +0 -535
  237. package/src/memory/search/query-expansion.test.ts +0 -70
  238. package/src/memory/search/query-expansion.ts +0 -118
  239. package/src/runtime/routes/mcp-routes.ts +0 -20
@@ -1,6 +1,7 @@
1
1
  import type {
2
2
  ContentBlock,
3
3
  Message,
4
+ ServerToolUseContent,
4
5
  ToolResultContent,
5
6
  ToolUseContent,
6
7
  } from "../providers/types.js";
@@ -20,6 +21,11 @@ export interface RepairResult {
20
21
  const SYNTHETIC_RESULT =
21
22
  "<synthesized_result>tool result missing from history</synthesized_result>";
22
23
 
24
+ const SYNTHETIC_WEB_SEARCH_ERROR = {
25
+ type: "web_search_tool_result_error",
26
+ error_code: "unavailable",
27
+ };
28
+
23
29
  export function repairHistory(messages: Message[]): RepairResult {
24
30
  const stats: RepairStats = {
25
31
  assistantToolResultsMigrated: 0,
@@ -45,12 +51,15 @@ export function repairHistory(messages: Message[]): RepairResult {
45
51
  recoveredResults = new Map();
46
52
  }
47
53
 
48
- // Strip tool_result blocks from assistant messages, preserving them
49
- // so they can be migrated to the correct user message position
54
+ // Strip client-side tool_result blocks from assistant messages,
55
+ // preserving them so they can be migrated to the correct user message.
56
+ // Server-side tools (server_tool_use / web_search_tool_result) are
57
+ // self-paired within the assistant message and must NOT be separated.
50
58
  const cleanedContent: ContentBlock[] = [];
51
59
  const newRecovered = new Map<string, ToolResultContent>();
52
60
  for (const block of msg.content) {
53
61
  if (block.type === "tool_result") {
62
+ // guard:allow-tool-result-only — only client-side tool_result belongs in recovered; web_search_tool_result stays in the assistant message
54
63
  const tr = block as ToolResultContent;
55
64
  newRecovered.set(tr.tool_use_id, tr);
56
65
  stats.assistantToolResultsMigrated++;
@@ -59,9 +68,34 @@ export function repairHistory(messages: Message[]): RepairResult {
59
68
  }
60
69
  }
61
70
 
71
+ // Ensure every server_tool_use has a paired web_search_tool_result
72
+ // in the same assistant message (handles interrupted streams)
73
+ const serverToolIds = new Set(
74
+ cleanedContent
75
+ .filter(
76
+ (b): b is ServerToolUseContent => b.type === "server_tool_use",
77
+ )
78
+ .map((b) => b.id),
79
+ );
80
+ const matchedServerIds = new Set(
81
+ cleanedContent
82
+ .filter((b) => b.type === "web_search_tool_result")
83
+ .map((b) => (b as { tool_use_id: string }).tool_use_id),
84
+ );
85
+ for (const id of serverToolIds) {
86
+ if (!matchedServerIds.has(id)) {
87
+ cleanedContent.push({
88
+ type: "web_search_tool_result",
89
+ tool_use_id: id,
90
+ content: SYNTHETIC_WEB_SEARCH_ERROR,
91
+ });
92
+ stats.missingToolResultsInserted++;
93
+ }
94
+ }
95
+
62
96
  result.push({ role: "assistant", content: cleanedContent });
63
97
 
64
- // Collect tool_use IDs from this assistant message
98
+ // Only track client-side tool_use IDs as pending (not server_tool_use)
65
99
  pendingToolUseIds = new Set(
66
100
  cleanedContent
67
101
  .filter((b): b is ToolUseContent => b.type === "tool_use")
@@ -76,14 +110,28 @@ export function repairHistory(messages: Message[]): RepairResult {
76
110
 
77
111
  for (const block of msg.content) {
78
112
  if (block.type === "tool_result") {
113
+ // guard:allow-tool-result-only — matches client-side tool_use; web_search_tool_result is handled separately below
79
114
  const tr = block as ToolResultContent;
80
115
  if (pendingToolUseIds.has(tr.tool_use_id)) {
81
116
  matchedIds.add(tr.tool_use_id);
82
117
  newContent.push(block);
83
118
  } else {
84
119
  stats.orphanToolResultsDowngraded++;
85
- newContent.push(downgradeToolResult(tr));
120
+ newContent.push(downgradeResult(tr));
86
121
  }
122
+ } else if (block.type === "web_search_tool_result") {
123
+ // web_search_tool_result in a user message is orphaned — server-side
124
+ // results belong in the assistant message, not here
125
+ stats.orphanToolResultsDowngraded++;
126
+ newContent.push(
127
+ downgradeResult(
128
+ block as {
129
+ type: "web_search_tool_result";
130
+ tool_use_id: string;
131
+ content: unknown;
132
+ },
133
+ ),
134
+ );
87
135
  } else {
88
136
  newContent.push(block);
89
137
  }
@@ -112,11 +160,21 @@ export function repairHistory(messages: Message[]): RepairResult {
112
160
  pendingToolUseIds = new Set();
113
161
  recoveredResults = new Map();
114
162
  } else {
115
- // No pending tool_use — any tool_result here is orphaned
163
+ // No pending tool_use — any tool_result/web_search_tool_result here is orphaned
116
164
  const newContent: ContentBlock[] = msg.content.map((block) => {
117
165
  if (block.type === "tool_result") {
118
166
  stats.orphanToolResultsDowngraded++;
119
- return downgradeToolResult(block as ToolResultContent);
167
+ return downgradeResult(block as ToolResultContent);
168
+ }
169
+ if (block.type === "web_search_tool_result") {
170
+ stats.orphanToolResultsDowngraded++;
171
+ return downgradeResult(
172
+ block as {
173
+ type: "web_search_tool_result";
174
+ tool_use_id: string;
175
+ content: unknown;
176
+ },
177
+ );
120
178
  }
121
179
  return block;
122
180
  });
@@ -207,9 +265,15 @@ export function deepRepairHistory(messages: Message[]): RepairResult {
207
265
  return repairHistory(merged);
208
266
  }
209
267
 
210
- function downgradeToolResult(tr: ToolResultContent): ContentBlock {
268
+ function downgradeResult(tr: {
269
+ type: string;
270
+ tool_use_id: string;
271
+ content?: unknown;
272
+ }): ContentBlock {
273
+ const content =
274
+ tr.type === "tool_result" ? tr.content : "[web search result]"; // guard:allow-tool-result-only — distinguishes content format between the two types
211
275
  return {
212
276
  type: "text",
213
- text: `[orphaned tool_result for ${tr.tool_use_id}]: ${tr.content}`,
277
+ text: `[orphaned ${tr.type} for ${tr.tool_use_id}]: ${content}`,
214
278
  };
215
279
  }
@@ -9,6 +9,7 @@
9
9
 
10
10
  import { v4 as uuid } from "uuid";
11
11
 
12
+ import { escapeAxTreeContent } from "../agent/loop.js";
12
13
  import type { ContentBlock } from "../providers/types.js";
13
14
  import type { ToolExecutionResult } from "../tools/types.js";
14
15
  import { AssistantError, ErrorCode } from "../util/errors.js";
@@ -65,6 +66,7 @@ interface PendingRequest {
65
66
  export class HostCuProxy {
66
67
  private pending = new Map<string, PendingRequest>();
67
68
  private sendToClient: (msg: ServerMessage) => void;
69
+ private onInternalResolve?: (requestId: string) => void;
68
70
  private clientConnected = false;
69
71
 
70
72
  // CU state tracking (per-conversation)
@@ -76,9 +78,11 @@ export class HostCuProxy {
76
78
 
77
79
  constructor(
78
80
  sendToClient: (msg: ServerMessage) => void,
81
+ onInternalResolve?: (requestId: string) => void,
79
82
  maxSteps = MAX_STEPS,
80
83
  ) {
81
84
  this.sendToClient = sendToClient;
85
+ this.onInternalResolve = onInternalResolve;
82
86
  this._maxSteps = maxSteps;
83
87
  }
84
88
 
@@ -150,6 +154,7 @@ export class HostCuProxy {
150
154
  return new Promise<ToolExecutionResult>((resolve, reject) => {
151
155
  const timer = setTimeout(() => {
152
156
  this.pending.delete(requestId);
157
+ this.onInternalResolve?.(requestId);
153
158
  log.warn({ requestId, toolName }, "Host CU proxy request timed out");
154
159
  resolve({
155
160
  content: "Host CU proxy timed out waiting for client response",
@@ -164,6 +169,7 @@ export class HostCuProxy {
164
169
  if (this.pending.has(requestId)) {
165
170
  clearTimeout(timer);
166
171
  this.pending.delete(requestId);
172
+ this.onInternalResolve?.(requestId);
167
173
  resolve({ content: "Aborted", isError: true });
168
174
  }
169
175
  };
@@ -191,10 +197,13 @@ export class HostCuProxy {
191
197
  clearTimeout(entry.timer);
192
198
  this.pending.delete(requestId);
193
199
 
200
+ // Capture pre-update state so formatObservation sees the correct previous AX tree
201
+ const prevAXTree = this._previousAXTree;
202
+
194
203
  // Update CU state from observation
195
204
  this.updateStateFromObservation(observation);
196
205
 
197
- const result = this.formatObservation(observation);
206
+ const result = this.formatObservation(observation, prevAXTree);
198
207
  entry.resolve(result);
199
208
  }
200
209
 
@@ -202,6 +211,10 @@ export class HostCuProxy {
202
211
  return this.pending.has(requestId);
203
212
  }
204
213
 
214
+ isAvailable(): boolean {
215
+ return this.clientConnected;
216
+ }
217
+
205
218
  // ---------------------------------------------------------------------------
206
219
  // CU state management
207
220
  // ---------------------------------------------------------------------------
@@ -245,7 +258,11 @@ export class HostCuProxy {
245
258
  * (AX tree wrapped in markers, diff, warnings) and optional screenshot
246
259
  * as an image content block.
247
260
  */
248
- formatObservation(obs: CuObservationResult): ToolExecutionResult {
261
+ formatObservation(
262
+ obs: CuObservationResult,
263
+ previousAXTree?: string,
264
+ ): ToolExecutionResult {
265
+ const prevTree = previousAXTree;
249
266
  const parts: string[] = [];
250
267
 
251
268
  // Surface user guidance prominently so the model sees it first
@@ -263,21 +280,30 @@ export class HostCuProxy {
263
280
  if (obs.axDiff) {
264
281
  parts.push(obs.axDiff);
265
282
  parts.push("");
266
- } else if (this._previousAXTree != null && obs.axTree != null) {
267
- // No diff means the screen didn't change
268
- if (
269
- this._consecutiveUnchangedSteps >=
270
- CONSECUTIVE_UNCHANGED_WARNING_THRESHOLD
271
- ) {
272
- parts.push(
273
- `WARNING: ${this._consecutiveUnchangedSteps} consecutive actions had NO VISIBLE EFFECT on the UI. You MUST try a completely different approach.`,
274
- );
275
- } else {
276
- parts.push(
277
- "Your last action had NO VISIBLE EFFECT on the UI. Try something different.",
278
- );
283
+ } else if (prevTree != null && obs.axTree != null) {
284
+ // Skip unchanged warning after wait actions — they intentionally yield no immediate change
285
+ const lastAction =
286
+ this._actionHistory.length > 0
287
+ ? this._actionHistory[this._actionHistory.length - 1]
288
+ : undefined;
289
+ const isWaitAction = lastAction?.toolName === "computer_use_wait";
290
+
291
+ if (!isWaitAction) {
292
+ // No diff means the screen didn't change
293
+ if (
294
+ this._consecutiveUnchangedSteps >=
295
+ CONSECUTIVE_UNCHANGED_WARNING_THRESHOLD
296
+ ) {
297
+ parts.push(
298
+ `WARNING: ${this._consecutiveUnchangedSteps} consecutive actions had NO VISIBLE EFFECT on the UI. You MUST try a completely different approach.`,
299
+ );
300
+ } else {
301
+ parts.push(
302
+ "Your last action had NO VISIBLE EFFECT on the UI. Try something different.",
303
+ );
304
+ }
305
+ parts.push("");
279
306
  }
280
- parts.push("");
281
307
  }
282
308
 
283
309
  // Loop detection: identical actions repeated
@@ -300,10 +326,20 @@ export class HostCuProxy {
300
326
  if (obs.axTree) {
301
327
  parts.push("<ax-tree>");
302
328
  parts.push("CURRENT SCREEN STATE:");
303
- parts.push(HostCuProxy.escapeAxTreeContent(obs.axTree));
329
+ parts.push(escapeAxTreeContent(obs.axTree));
304
330
  parts.push("</ax-tree>");
305
331
  }
306
332
 
333
+ // Secondary windows for cross-app awareness
334
+ if (obs.secondaryWindows) {
335
+ parts.push("");
336
+ parts.push(obs.secondaryWindows);
337
+ parts.push("");
338
+ parts.push(
339
+ "Note: The element [ID]s above are from other windows — you can reference them for context but can only interact with the focused window's elements.",
340
+ );
341
+ }
342
+
307
343
  // Screenshot metadata
308
344
  const screenshotMeta = this.formatScreenshotMetadata(obs);
309
345
  if (screenshotMeta.length > 0) {
@@ -342,8 +378,9 @@ export class HostCuProxy {
342
378
  // ---------------------------------------------------------------------------
343
379
 
344
380
  dispose(): void {
345
- for (const [_requestId, entry] of this.pending) {
381
+ for (const [requestId, entry] of this.pending) {
346
382
  clearTimeout(entry.timer);
383
+ this.onInternalResolve?.(requestId);
347
384
  entry.reject(
348
385
  new AssistantError("Host CU proxy disposed", ErrorCode.INTERNAL_ERROR),
349
386
  );
@@ -390,12 +427,4 @@ export class HostCuProxy {
390
427
  }
391
428
  return lines;
392
429
  }
393
-
394
- /**
395
- * Escapes literal `</ax-tree>` inside AX tree content so compaction
396
- * regex does not stop prematurely.
397
- */
398
- static escapeAxTreeContent(content: string): string {
399
- return content.replace(/<\/ax-tree>/gi, "&lt;/ax-tree&gt;");
400
- }
401
430
  }
@@ -26,13 +26,18 @@ import { closeSentry, initSentry } from "../instrument.js";
26
26
  import { disableLogfire, initLogfire } from "../logfire.js";
27
27
  import { getMcpServerManager } from "../mcp/manager.js";
28
28
  import * as attachmentsStore from "../memory/attachments-store.js";
29
+ import { expireAllPendingCanonicalRequests } from "../memory/canonical-guardian-store.js";
29
30
  import {
30
31
  deleteMessageById,
31
32
  getConversationThreadType,
32
33
  getMessages,
33
34
  } from "../memory/conversation-crud.js";
34
35
  import { initializeDb } from "../memory/db.js";
35
- import { selectEmbeddingBackend } from "../memory/embedding-backend.js";
36
+ import {
37
+ selectEmbeddingBackend,
38
+ SPARSE_EMBEDDING_VERSION,
39
+ } from "../memory/embedding-backend.js";
40
+ import { enqueueMemoryJob } from "../memory/jobs-store.js";
36
41
  import { startMemoryJobsWorker } from "../memory/jobs-worker.js";
37
42
  import { initQdrantClient } from "../memory/qdrant-client.js";
38
43
  import { QdrantManager } from "../memory/qdrant-manager.js";
@@ -165,6 +170,18 @@ export async function runDaemon(): Promise<void> {
165
170
  await backfillManualTokenConnections();
166
171
  log.info("Daemon startup: DB initialized");
167
172
 
173
+ // Expire any pending canonical guardian requests left over from before
174
+ // this process started. Their in-memory pending-interaction session
175
+ // references are gone, so they can never be completed. The agent loop
176
+ // will re-request tool approvals on the next turn.
177
+ const expiredCount = expireAllPendingCanonicalRequests();
178
+ if (expiredCount > 0) {
179
+ log.info(
180
+ { event: "startup_expired_stale_requests", expiredCount },
181
+ `Expired ${expiredCount} stale pending canonical request(s) from previous process`,
182
+ );
183
+ }
184
+
168
185
  // Ensure a vellum guardian binding exists and mint the CLI edge token
169
186
  // as an actor token bound to the guardian principal.
170
187
  let guardianPrincipalId: string | undefined;
@@ -305,9 +322,9 @@ export async function runDaemon(): Promise<void> {
305
322
  await qdrantManager.start();
306
323
  const embeddingSelection = selectEmbeddingBackend(config);
307
324
  const embeddingModel = embeddingSelection.backend
308
- ? `${embeddingSelection.backend.provider}:${embeddingSelection.backend.model}`
325
+ ? `${embeddingSelection.backend.provider}:${embeddingSelection.backend.model}:sparse-v${SPARSE_EMBEDDING_VERSION}`
309
326
  : undefined;
310
- initQdrantClient({
327
+ const qdrantClient = initQdrantClient({
311
328
  url: qdrantUrl,
312
329
  collection: config.memory.qdrant.collection,
313
330
  vectorSize: config.memory.qdrant.vectorSize,
@@ -315,6 +332,17 @@ export async function runDaemon(): Promise<void> {
315
332
  quantization: config.memory.qdrant.quantization,
316
333
  embeddingModel,
317
334
  });
335
+
336
+ // Eagerly ensure the collection exists so we detect migrations
337
+ // (unnamed→named vectors, dimension/model changes) at startup.
338
+ // If a destructive migration occurred, enqueue a rebuild_index job
339
+ // to re-embed all memory items from the SQLite cache.
340
+ const { migrated } = await qdrantClient.ensureCollection();
341
+ if (migrated) {
342
+ enqueueMemoryJob("rebuild_index", {});
343
+ log.info("Qdrant collection was migrated — enqueued rebuild_index job");
344
+ }
345
+
318
346
  log.info("Qdrant vector store initialized");
319
347
  } catch (err) {
320
348
  log.warn(
@@ -1,8 +1,8 @@
1
1
  /**
2
2
  * Shared MCP reload business logic.
3
3
  *
4
- * Used by the HTTP route (`runtime/routes/mcp-routes.ts`) so the reload
5
- * behaviour is defined in exactly one place.
4
+ * Called by the ConfigWatcher when config.json changes or a reload signal
5
+ * file is detected, so the daemon automatically reconnects MCP servers.
6
6
  */
7
7
 
8
8
  import { getConfig, invalidateConfigCache } from "../config/loader.js";
@@ -1,4 +1,4 @@
1
- // Computer use, task routing, and watch observation types.
1
+ // Computer use and watch observation types.
2
2
 
3
3
  import type { CommandIntent, UserMessageAttachment } from "./shared.js";
4
4
 
@@ -89,16 +89,6 @@ export interface RecordingResume {
89
89
  recordingId: string;
90
90
  }
91
91
 
92
- export interface TaskRouted {
93
- type: "task_routed";
94
- sessionId: string;
95
- interactionType: "computer_use" | "text_qa";
96
- /** The task text passed to the escalated session. */
97
- task?: string;
98
- /** Set when a text_qa session escalates to computer_use. */
99
- escalatedFrom?: string;
100
- }
101
-
102
92
  export interface WatchStarted {
103
93
  type: "watch_started";
104
94
  sessionId: string;
@@ -121,7 +111,6 @@ export type _ComputerUseClientMessages =
121
111
  | RecordingStatus;
122
112
 
123
113
  export type _ComputerUseServerMessages =
124
- | TaskRouted
125
114
  | WatchStarted
126
115
  | WatchCompleteRequest
127
116
  | RecordingStart
@@ -11,7 +11,6 @@ export interface MemoryRecalledCandidateDebug {
11
11
  type: string;
12
12
  kind: string;
13
13
  finalScore: number;
14
- lexical: number;
15
14
  semantic: number;
16
15
  recency: number;
17
16
  }
@@ -21,18 +20,14 @@ export interface MemoryRecalled {
21
20
  provider: string;
22
21
  model: string;
23
22
  degradation?: MemoryRecalledDegradation;
24
- lexicalHits: number;
25
23
  semanticHits: number;
26
24
  recencyHits: number;
27
- entityHits: number;
28
- relationSeedEntityCount?: number;
29
- relationTraversedEdgeCount?: number;
30
- relationNeighborEntityCount?: number;
31
- relationExpandedItemCount?: number;
32
- earlyTerminated?: boolean;
25
+ tier1Count: number;
26
+ tier2Count: number;
27
+ hybridSearchLatencyMs: number;
28
+ sparseVectorUsed: boolean;
33
29
  mergedCount: number;
34
30
  selectedCount: number;
35
- rerankApplied: boolean;
36
31
  injectedTokens: number;
37
32
  latencyMs: number;
38
33
  topCandidates: MemoryRecalledCandidateDebug[];
@@ -46,13 +41,6 @@ export interface MemoryStatus {
46
41
  reason?: string;
47
42
  provider?: string;
48
43
  model?: string;
49
- conflictsPending: number;
50
- conflictsResolved: number;
51
- oldestPendingConflictAgeMs: number | null;
52
- cleanupResolvedJobsPending: number;
53
- cleanupSupersededJobsPending: number;
54
- cleanupResolvedJobsCompleted24h: number;
55
- cleanupSupersededJobsCompleted24h: number;
56
44
  }
57
45
 
58
46
  // --- Domain-level union aliases (consumed by the barrel file) ---
@@ -290,6 +290,7 @@ export interface AssistantActivityState {
290
290
  | "tool_result_received"
291
291
  | "confirmation_requested"
292
292
  | "confirmation_resolved"
293
+ | "context_compacting"
293
294
  | "message_complete"
294
295
  | "generation_cancelled"
295
296
  | "error_terminal";
@@ -394,6 +394,8 @@ export type SessionErrorCode =
394
394
  | "PROVIDER_RATE_LIMIT"
395
395
  | "PROVIDER_API"
396
396
  | "PROVIDER_BILLING"
397
+ | "PROVIDER_ORDERING"
398
+ | "PROVIDER_WEB_SEARCH"
397
399
  | "CONTEXT_TOO_LARGE"
398
400
  | "SESSION_ABORTED"
399
401
  | "SESSION_PROCESSING_FAILED"
@@ -407,6 +409,8 @@ export interface SessionErrorMessage {
407
409
  userMessage: string;
408
410
  retryable: boolean;
409
411
  debugDetails?: string;
412
+ /** Machine-readable error category for log report metadata and triage. */
413
+ errorCategory?: string;
410
414
  }
411
415
 
412
416
  /** Server push — broadcast when a schedule creates a conversation, so the client can show it as a chat thread. */
@@ -58,6 +58,7 @@ import type { SkillOperationContext } from "./handlers/skills.js";
58
58
  import { HostBashProxy } from "./host-bash-proxy.js";
59
59
  import { HostCuProxy } from "./host-cu-proxy.js";
60
60
  import { HostFileProxy } from "./host-file-proxy.js";
61
+ import { reloadMcpServers } from "./mcp-reload-service.js";
61
62
  import type { ServerMessage } from "./message-protocol.js";
62
63
  import {
63
64
  DEFAULT_MEMORY_POLICY,
@@ -392,6 +393,11 @@ export class DaemonServer {
392
393
  this.configWatcher.start(
393
394
  () => this.evictSessionsForReload(),
394
395
  () => this.broadcastIdentityChanged(),
396
+ () => {
397
+ reloadMcpServers().catch((err: unknown) => {
398
+ log.error({ err }, "MCP reload triggered by config change failed");
399
+ });
400
+ },
395
401
  );
396
402
 
397
403
  // Broadcast contacts_changed to all clients when any contact mutation occurs.
@@ -663,8 +669,13 @@ export class DaemonServer {
663
669
  );
664
670
  }
665
671
  if (!session.isProcessing() || !session.hostCuProxy) {
666
- session.setHostCuProxy(new HostCuProxy(session.getCurrentSender()));
672
+ session.setHostCuProxy(
673
+ new HostCuProxy(session.getCurrentSender(), (requestId) => {
674
+ pendingInteractions.resolve(requestId);
675
+ }),
676
+ );
667
677
  }
678
+ session.addPreactivatedSkillId("computer-use");
668
679
  } else if (!session.isProcessing()) {
669
680
  session.setHostBashProxy(undefined);
670
681
  session.setHostFileProxy(undefined);
@@ -57,6 +57,8 @@ export interface EventHandlerState {
57
57
  orderingErrorDetected: boolean;
58
58
  deferredOrderingError: string | null;
59
59
  contextTooLargeDetected: boolean;
60
+ /** The raw error message from the provider when context_too_large is detected. */
61
+ contextTooLargeErrorMessage: string | null;
60
62
  providerErrorUserMessage: string | null;
61
63
  lastAssistantMessageId: string | undefined;
62
64
  readonly pendingToolResults: Map<string, PendingToolResult>;
@@ -121,6 +123,7 @@ export function createEventHandlerState(): EventHandlerState {
121
123
  orderingErrorDetected: false,
122
124
  deferredOrderingError: null,
123
125
  contextTooLargeDetected: false,
126
+ contextTooLargeErrorMessage: null,
124
127
  providerErrorUserMessage: null,
125
128
  lastAssistantMessageId: undefined,
126
129
  pendingToolResults: new Map(),
@@ -595,12 +598,22 @@ export function handleError(
595
598
  state.deferredOrderingError = event.error.message;
596
599
  } else if (isContextTooLarge(event.error.message)) {
597
600
  state.contextTooLargeDetected = true;
601
+ state.contextTooLargeErrorMessage = event.error.message;
598
602
  } else {
599
603
  const classified = classifySessionError(event.error, {
600
604
  phase: "agent_loop",
601
605
  });
602
606
  if (classified.code === "CONTEXT_TOO_LARGE") {
603
607
  state.contextTooLargeDetected = true;
608
+ state.contextTooLargeErrorMessage = event.error.message;
609
+ } else if (
610
+ classified.code === "PROVIDER_ORDERING" ||
611
+ classified.code === "PROVIDER_WEB_SEARCH"
612
+ ) {
613
+ // Ordering errors detected via classifySessionError (e.g. from ProviderError
614
+ // with statusCode 400 and ordering message) — trigger the retry path.
615
+ state.orderingErrorDetected = true;
616
+ state.deferredOrderingError = event.error.message;
604
617
  } else {
605
618
  deps.onEvent(
606
619
  buildSessionErrorMessage(deps.ctx.conversationId, classified),
@@ -831,6 +844,31 @@ export async function dispatchAgentEvent(
831
844
  deps.reqId,
832
845
  statusText,
833
846
  );
847
+ // Emit tool_use_start so the client renders a tool chip (like other tools)
848
+ deps.onEvent({
849
+ type: "tool_use_start",
850
+ toolName: event.name,
851
+ input: event.input,
852
+ sessionId: deps.ctx.conversationId,
853
+ toolUseId: event.toolUseId,
854
+ });
855
+ break;
856
+ }
857
+ case "server_tool_complete": {
858
+ deps.ctx.emitActivityState(
859
+ "streaming",
860
+ "tool_result_received",
861
+ "assistant_turn",
862
+ deps.reqId,
863
+ );
864
+ deps.onEvent({
865
+ type: "tool_result",
866
+ toolName: "",
867
+ result: "",
868
+ isError: false,
869
+ sessionId: deps.ctx.conversationId,
870
+ toolUseId: event.toolUseId,
871
+ });
834
872
  break;
835
873
  }
836
874
  case "error":