@vellumai/assistant 0.4.49 → 0.4.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. package/ARCHITECTURE.md +24 -33
  2. package/README.md +3 -3
  3. package/docs/architecture/memory.md +180 -119
  4. package/package.json +2 -2
  5. package/src/__tests__/agent-loop.test.ts +3 -1
  6. package/src/__tests__/anthropic-provider.test.ts +114 -23
  7. package/src/__tests__/approval-cascade.test.ts +1 -15
  8. package/src/__tests__/approval-routes-http.test.ts +2 -0
  9. package/src/__tests__/assistant-feature-flag-guard.test.ts +0 -23
  10. package/src/__tests__/canonical-guardian-store.test.ts +95 -0
  11. package/src/__tests__/checker.test.ts +13 -0
  12. package/src/__tests__/config-schema.test.ts +1 -68
  13. package/src/__tests__/context-memory-e2e.test.ts +11 -100
  14. package/src/__tests__/conversation-routes-guardian-reply.test.ts +8 -0
  15. package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
  16. package/src/__tests__/credential-security-e2e.test.ts +1 -0
  17. package/src/__tests__/credential-vault-unit.test.ts +4 -0
  18. package/src/__tests__/credential-vault.test.ts +13 -1
  19. package/src/__tests__/cu-unified-flow.test.ts +532 -0
  20. package/src/__tests__/date-context.test.ts +93 -77
  21. package/src/__tests__/deterministic-verification-control-plane.test.ts +64 -0
  22. package/src/__tests__/guardian-routing-invariants.test.ts +93 -0
  23. package/src/__tests__/history-repair.test.ts +245 -0
  24. package/src/__tests__/host-cu-proxy.test.ts +165 -3
  25. package/src/__tests__/http-user-message-parity.test.ts +1 -0
  26. package/src/__tests__/invite-redemption-service.test.ts +65 -1
  27. package/src/__tests__/keychain-broker-client.test.ts +4 -4
  28. package/src/__tests__/memory-context-benchmark.benchmark.test.ts +56 -18
  29. package/src/__tests__/memory-lifecycle-e2e.test.ts +244 -387
  30. package/src/__tests__/memory-recall-quality.test.ts +244 -407
  31. package/src/__tests__/memory-regressions.experimental.test.ts +126 -101
  32. package/src/__tests__/memory-regressions.test.ts +477 -2841
  33. package/src/__tests__/memory-retrieval.benchmark.test.ts +33 -150
  34. package/src/__tests__/memory-upsert-concurrency.test.ts +5 -244
  35. package/src/__tests__/mime-builder.test.ts +28 -0
  36. package/src/__tests__/native-web-search.test.ts +1 -0
  37. package/src/__tests__/oauth-cli.test.ts +572 -5
  38. package/src/__tests__/oauth-store.test.ts +120 -6
  39. package/src/__tests__/qdrant-collection-migration.test.ts +53 -8
  40. package/src/__tests__/registry.test.ts +0 -1
  41. package/src/__tests__/relay-server.test.ts +46 -1
  42. package/src/__tests__/schedule-tools.test.ts +32 -0
  43. package/src/__tests__/script-proxy-certs.test.ts +1 -1
  44. package/src/__tests__/secret-onetime-send.test.ts +1 -0
  45. package/src/__tests__/secure-keys.test.ts +7 -2
  46. package/src/__tests__/send-endpoint-busy.test.ts +3 -0
  47. package/src/__tests__/session-abort-tool-results.test.ts +1 -14
  48. package/src/__tests__/session-agent-loop-overflow.test.ts +1583 -0
  49. package/src/__tests__/session-agent-loop.test.ts +19 -15
  50. package/src/__tests__/session-confirmation-signals.test.ts +1 -15
  51. package/src/__tests__/session-error.test.ts +124 -2
  52. package/src/__tests__/session-history-web-search.test.ts +918 -0
  53. package/src/__tests__/session-pre-run-repair.test.ts +1 -14
  54. package/src/__tests__/session-provider-retry-repair.test.ts +25 -28
  55. package/src/__tests__/session-queue.test.ts +37 -27
  56. package/src/__tests__/session-runtime-assembly.test.ts +54 -0
  57. package/src/__tests__/session-slash-known.test.ts +1 -15
  58. package/src/__tests__/session-slash-queue.test.ts +1 -15
  59. package/src/__tests__/session-slash-unknown.test.ts +1 -15
  60. package/src/__tests__/session-workspace-cache-state.test.ts +3 -33
  61. package/src/__tests__/session-workspace-injection.test.ts +3 -37
  62. package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -37
  63. package/src/__tests__/skills-install-extract.test.ts +93 -0
  64. package/src/__tests__/skillssh-registry.test.ts +451 -0
  65. package/src/__tests__/trust-store.test.ts +15 -0
  66. package/src/__tests__/voice-invite-redemption.test.ts +32 -1
  67. package/src/agent/ax-tree-compaction.test.ts +51 -0
  68. package/src/agent/loop.ts +39 -12
  69. package/src/approvals/AGENTS.md +1 -1
  70. package/src/approvals/guardian-request-resolvers.ts +14 -2
  71. package/src/bundler/compiler-tools.ts +66 -2
  72. package/src/calls/call-domain.ts +132 -0
  73. package/src/calls/call-store.ts +6 -0
  74. package/src/calls/relay-server.ts +43 -5
  75. package/src/calls/relay-setup-router.ts +17 -1
  76. package/src/calls/twilio-config.ts +1 -1
  77. package/src/calls/types.ts +3 -1
  78. package/src/cli/commands/doctor.ts +4 -3
  79. package/src/cli/commands/mcp.ts +46 -59
  80. package/src/cli/commands/memory.ts +16 -165
  81. package/src/cli/commands/oauth/apps.ts +31 -2
  82. package/src/cli/commands/oauth/connections.ts +431 -97
  83. package/src/cli/commands/oauth/providers.ts +15 -1
  84. package/src/cli/commands/sessions.ts +5 -2
  85. package/src/cli/commands/skills.ts +173 -1
  86. package/src/cli/http-client.ts +0 -20
  87. package/src/cli/main-screen.tsx +2 -2
  88. package/src/cli/program.ts +5 -6
  89. package/src/cli.ts +4 -10
  90. package/src/config/bundled-skills/computer-use/TOOLS.json +1 -1
  91. package/src/config/bundled-skills/computer-use/tools/computer-use-observe.ts +12 -0
  92. package/src/config/bundled-tool-registry.ts +2 -5
  93. package/src/config/schema.ts +1 -12
  94. package/src/config/schemas/memory-lifecycle.ts +0 -9
  95. package/src/config/schemas/memory-processing.ts +0 -180
  96. package/src/config/schemas/memory-retrieval.ts +32 -104
  97. package/src/config/schemas/memory.ts +0 -10
  98. package/src/config/types.ts +0 -4
  99. package/src/context/window-manager.ts +4 -1
  100. package/src/daemon/config-watcher.ts +61 -3
  101. package/src/daemon/daemon-control.ts +1 -1
  102. package/src/daemon/date-context.ts +114 -31
  103. package/src/daemon/handlers/sessions.ts +18 -13
  104. package/src/daemon/handlers/skills.ts +20 -1
  105. package/src/daemon/history-repair.ts +72 -8
  106. package/src/daemon/host-cu-proxy.ts +55 -26
  107. package/src/daemon/lifecycle.ts +31 -3
  108. package/src/daemon/mcp-reload-service.ts +2 -2
  109. package/src/daemon/message-types/computer-use.ts +1 -12
  110. package/src/daemon/message-types/memory.ts +4 -16
  111. package/src/daemon/message-types/messages.ts +1 -0
  112. package/src/daemon/message-types/sessions.ts +4 -0
  113. package/src/daemon/server.ts +12 -1
  114. package/src/daemon/session-agent-loop-handlers.ts +38 -0
  115. package/src/daemon/session-agent-loop.ts +334 -48
  116. package/src/daemon/session-error.ts +89 -6
  117. package/src/daemon/session-history.ts +17 -7
  118. package/src/daemon/session-media-retry.ts +6 -2
  119. package/src/daemon/session-memory.ts +69 -149
  120. package/src/daemon/session-process.ts +10 -1
  121. package/src/daemon/session-runtime-assembly.ts +49 -19
  122. package/src/daemon/session-surfaces.ts +4 -1
  123. package/src/daemon/session-tool-setup.ts +7 -1
  124. package/src/daemon/session.ts +12 -2
  125. package/src/instrument.ts +61 -1
  126. package/src/memory/admin.ts +2 -191
  127. package/src/memory/canonical-guardian-store.ts +38 -2
  128. package/src/memory/conversation-crud.ts +0 -33
  129. package/src/memory/conversation-queries.ts +22 -3
  130. package/src/memory/db-init.ts +28 -0
  131. package/src/memory/embedding-backend.ts +84 -8
  132. package/src/memory/embedding-types.ts +9 -1
  133. package/src/memory/indexer.ts +7 -46
  134. package/src/memory/items-extractor.ts +274 -76
  135. package/src/memory/job-handlers/backfill.ts +2 -127
  136. package/src/memory/job-handlers/cleanup.ts +2 -16
  137. package/src/memory/job-handlers/extraction.ts +2 -138
  138. package/src/memory/job-handlers/index-maintenance.ts +1 -6
  139. package/src/memory/job-handlers/summarization.ts +3 -148
  140. package/src/memory/job-utils.ts +21 -59
  141. package/src/memory/jobs-store.ts +1 -159
  142. package/src/memory/jobs-worker.ts +9 -52
  143. package/src/memory/migrations/104-core-indexes.ts +3 -3
  144. package/src/memory/migrations/149-oauth-tables.ts +2 -0
  145. package/src/memory/migrations/150-oauth-apps-client-secret-path.ts +98 -0
  146. package/src/memory/migrations/151-oauth-providers-ping-url.ts +11 -0
  147. package/src/memory/migrations/152-memory-item-supersession.ts +44 -0
  148. package/src/memory/migrations/153-drop-entity-tables.ts +15 -0
  149. package/src/memory/migrations/154-drop-fts.ts +20 -0
  150. package/src/memory/migrations/155-drop-conflicts.ts +7 -0
  151. package/src/memory/migrations/156-call-session-invite-metadata.ts +24 -0
  152. package/src/memory/migrations/index.ts +7 -0
  153. package/src/memory/qdrant-client.ts +148 -51
  154. package/src/memory/raw-query.ts +1 -1
  155. package/src/memory/retriever.test.ts +294 -273
  156. package/src/memory/retriever.ts +421 -645
  157. package/src/memory/schema/calls.ts +2 -0
  158. package/src/memory/schema/memory-core.ts +3 -48
  159. package/src/memory/schema/oauth.ts +2 -0
  160. package/src/memory/search/formatting.ts +263 -176
  161. package/src/memory/search/lexical.ts +1 -254
  162. package/src/memory/search/ranking.ts +0 -455
  163. package/src/memory/search/semantic.ts +100 -14
  164. package/src/memory/search/staleness.ts +47 -0
  165. package/src/memory/search/tier-classifier.ts +21 -0
  166. package/src/memory/search/types.ts +15 -77
  167. package/src/memory/task-memory-cleanup.ts +4 -6
  168. package/src/messaging/providers/gmail/mime-builder.ts +17 -7
  169. package/src/oauth/byo-connection.test.ts +8 -1
  170. package/src/oauth/oauth-store.ts +113 -27
  171. package/src/oauth/seed-providers.ts +6 -0
  172. package/src/oauth/token-persistence.ts +11 -3
  173. package/src/permissions/defaults.ts +1 -0
  174. package/src/permissions/trust-store.ts +23 -1
  175. package/src/playbooks/playbook-compiler.ts +1 -1
  176. package/src/prompts/system-prompt.ts +18 -2
  177. package/src/providers/anthropic/client.ts +56 -126
  178. package/src/providers/types.ts +7 -1
  179. package/src/runtime/AGENTS.md +9 -0
  180. package/src/runtime/auth/route-policy.ts +6 -3
  181. package/src/runtime/guardian-reply-router.ts +24 -22
  182. package/src/runtime/http-server.ts +2 -2
  183. package/src/runtime/invite-redemption-service.ts +19 -1
  184. package/src/runtime/invite-service.ts +25 -0
  185. package/src/runtime/pending-interactions.ts +2 -2
  186. package/src/runtime/routes/brain-graph-routes.ts +10 -90
  187. package/src/runtime/routes/conversation-routes.ts +9 -1
  188. package/src/runtime/routes/inbound-stages/acl-enforcement.ts +21 -12
  189. package/src/runtime/routes/memory-item-routes.test.ts +754 -0
  190. package/src/runtime/routes/memory-item-routes.ts +503 -0
  191. package/src/runtime/routes/session-management-routes.ts +3 -3
  192. package/src/runtime/routes/settings-routes.ts +2 -2
  193. package/src/runtime/routes/trust-rules-routes.ts +14 -0
  194. package/src/runtime/routes/workspace-routes.ts +2 -1
  195. package/src/security/keychain-broker-client.ts +17 -4
  196. package/src/security/secure-keys.ts +25 -3
  197. package/src/security/token-manager.ts +36 -36
  198. package/src/skills/catalog-install.ts +74 -18
  199. package/src/skills/skillssh-registry.ts +503 -0
  200. package/src/tools/assets/search.ts +5 -1
  201. package/src/tools/computer-use/definitions.ts +0 -10
  202. package/src/tools/computer-use/registry.ts +1 -1
  203. package/src/tools/credentials/vault.ts +1 -3
  204. package/src/tools/memory/definitions.ts +4 -13
  205. package/src/tools/memory/handlers.test.ts +83 -103
  206. package/src/tools/memory/handlers.ts +50 -85
  207. package/src/tools/schedule/create.ts +8 -1
  208. package/src/tools/schedule/update.ts +8 -1
  209. package/src/tools/skills/load.ts +25 -2
  210. package/src/__tests__/clarification-resolver.test.ts +0 -193
  211. package/src/__tests__/conflict-intent-tokenization.test.ts +0 -160
  212. package/src/__tests__/conflict-policy.test.ts +0 -269
  213. package/src/__tests__/conflict-store.test.ts +0 -372
  214. package/src/__tests__/contradiction-checker.test.ts +0 -361
  215. package/src/__tests__/entity-extractor.test.ts +0 -211
  216. package/src/__tests__/entity-search.test.ts +0 -1117
  217. package/src/__tests__/profile-compiler.test.ts +0 -392
  218. package/src/__tests__/session-conflict-gate.test.ts +0 -1228
  219. package/src/__tests__/session-profile-injection.test.ts +0 -557
  220. package/src/config/bundled-skills/knowledge-graph/SKILL.md +0 -25
  221. package/src/config/bundled-skills/knowledge-graph/TOOLS.json +0 -66
  222. package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +0 -211
  223. package/src/daemon/session-conflict-gate.ts +0 -167
  224. package/src/daemon/session-dynamic-profile.ts +0 -77
  225. package/src/memory/clarification-resolver.ts +0 -417
  226. package/src/memory/conflict-intent.ts +0 -205
  227. package/src/memory/conflict-policy.ts +0 -127
  228. package/src/memory/conflict-store.ts +0 -410
  229. package/src/memory/contradiction-checker.ts +0 -508
  230. package/src/memory/entity-extractor.ts +0 -535
  231. package/src/memory/format-recall.ts +0 -47
  232. package/src/memory/fts-reconciler.ts +0 -165
  233. package/src/memory/job-handlers/conflict.ts +0 -200
  234. package/src/memory/profile-compiler.ts +0 -195
  235. package/src/memory/recall-cache.ts +0 -117
  236. package/src/memory/search/entity.ts +0 -535
  237. package/src/memory/search/query-expansion.test.ts +0 -70
  238. package/src/memory/search/query-expansion.ts +0 -118
  239. package/src/runtime/routes/mcp-routes.ts +0 -20
@@ -0,0 +1,532 @@
1
+ /**
2
+ * End-to-end tests for the unified CU proxy flow.
3
+ *
4
+ * Tests the surfaceProxyResolver's CU tool routing — the integration
5
+ * point between the agent loop and the HostCuProxy.
6
+ */
7
+
8
+ import { afterEach, describe, expect, test } from "bun:test";
9
+
10
+ import { HostCuProxy } from "../daemon/host-cu-proxy.js";
11
+ import type { SurfaceSessionContext } from "../daemon/session-surfaces.js";
12
+ import { surfaceProxyResolver } from "../daemon/session-surfaces.js";
13
+
14
+ // ---------------------------------------------------------------------------
15
+ // Test helpers
16
+ // ---------------------------------------------------------------------------
17
+
18
+ /**
19
+ * Build a minimal SurfaceSessionContext with optional hostCuProxy.
20
+ * Only the fields required by the CU routing path are populated.
21
+ */
22
+ function buildMockContext(hostCuProxy?: HostCuProxy): SurfaceSessionContext {
23
+ return {
24
+ conversationId: "test-session",
25
+ traceEmitter: { emit: () => {} },
26
+ sendToClient: () => {},
27
+ pendingSurfaceActions: new Map(),
28
+ lastSurfaceAction: new Map(),
29
+ surfaceState: new Map(),
30
+ surfaceUndoStacks: new Map(),
31
+ surfaceActionRequestIds: new Set(),
32
+ currentTurnSurfaces: [],
33
+ hostCuProxy,
34
+ isProcessing: () => false,
35
+ enqueueMessage: () => ({ queued: false, requestId: "r1" }),
36
+ getQueueDepth: () => 0,
37
+ processMessage: async () => "",
38
+ withSurface: async (_id, fn) => fn(),
39
+ };
40
+ }
41
+
42
+ // ---------------------------------------------------------------------------
43
+ // Tests
44
+ // ---------------------------------------------------------------------------
45
+
46
+ describe("surfaceProxyResolver — CU tool routing", () => {
47
+ let sentMessages: unknown[];
48
+ let proxy: HostCuProxy;
49
+
50
+ function setupProxy(maxSteps?: number): SurfaceSessionContext {
51
+ sentMessages = [];
52
+ const sendToClient = (msg: unknown) => sentMessages.push(msg);
53
+ proxy = new HostCuProxy(sendToClient as never, undefined, maxSteps);
54
+ // Mark client as connected so requests are sent
55
+ proxy.updateSender(sendToClient as never, true);
56
+ return buildMockContext(proxy);
57
+ }
58
+
59
+ afterEach(() => {
60
+ proxy?.dispose();
61
+ });
62
+
63
+ // -------------------------------------------------------------------------
64
+ // No desktop client connected
65
+ // -------------------------------------------------------------------------
66
+
67
+ describe("no desktop client connected", () => {
68
+ test("returns error when hostCuProxy is undefined", async () => {
69
+ const ctx = buildMockContext(/* no proxy */);
70
+ const result = await surfaceProxyResolver(ctx, "computer_use_click", {
71
+ element_id: 42,
72
+ reasoning: "click the button",
73
+ });
74
+
75
+ expect(result.isError).toBe(true);
76
+ expect(result.content).toContain("not available");
77
+ expect(result.content).toContain("no desktop client");
78
+ });
79
+
80
+ test("returns error for screenshot tool when no proxy", async () => {
81
+ const ctx = buildMockContext();
82
+ const result = await surfaceProxyResolver(
83
+ ctx,
84
+ "computer_use_screenshot",
85
+ {},
86
+ );
87
+
88
+ expect(result.isError).toBe(true);
89
+ expect(result.content).toContain("not available");
90
+ });
91
+
92
+ test("returns error when proxy exists but client not connected", async () => {
93
+ const sendToClient = () => {};
94
+ const proxyObj = new HostCuProxy(sendToClient as never);
95
+ // Default clientConnected is false — do NOT call updateSender with true
96
+ const ctx = buildMockContext(proxyObj);
97
+ const result = await surfaceProxyResolver(ctx, "computer_use_click", {
98
+ element_id: 1,
99
+ });
100
+
101
+ expect(result.isError).toBe(true);
102
+ expect(result.content).toContain("not available");
103
+ proxyObj.dispose();
104
+ });
105
+
106
+ test("returns error for terminal tools when no proxy", async () => {
107
+ const ctx = buildMockContext();
108
+
109
+ const doneResult = await surfaceProxyResolver(ctx, "computer_use_done", {
110
+ summary: "finished",
111
+ });
112
+ expect(doneResult.isError).toBe(true);
113
+
114
+ const respondResult = await surfaceProxyResolver(
115
+ ctx,
116
+ "computer_use_respond",
117
+ { answer: "42" },
118
+ );
119
+ expect(respondResult.isError).toBe(true);
120
+ });
121
+ });
122
+
123
+ // -------------------------------------------------------------------------
124
+ // Terminal tools (computer_use_done, computer_use_respond)
125
+ // -------------------------------------------------------------------------
126
+
127
+ describe("terminal tools resolve immediately", () => {
128
+ test("computer_use_done resets proxy and returns summary", async () => {
129
+ const ctx = setupProxy();
130
+
131
+ // Record some actions first to verify reset
132
+ proxy.recordAction("computer_use_click", { element_id: 1 });
133
+ proxy.recordAction("computer_use_click", { element_id: 2 });
134
+ expect(proxy.stepCount).toBe(2);
135
+
136
+ const result = await surfaceProxyResolver(ctx, "computer_use_done", {
137
+ summary: "Completed the file upload",
138
+ });
139
+
140
+ expect(result.isError).toBe(false);
141
+ expect(result.content).toBe("Completed the file upload");
142
+ // No message sent to client for terminal tools
143
+ expect(sentMessages).toHaveLength(0);
144
+ // Proxy state should be reset
145
+ expect(proxy.stepCount).toBe(0);
146
+ expect(proxy.actionHistory).toHaveLength(0);
147
+ });
148
+
149
+ test("computer_use_respond resets proxy and returns answer", async () => {
150
+ const ctx = setupProxy();
151
+
152
+ proxy.recordAction("computer_use_click", { element_id: 1 });
153
+
154
+ const result = await surfaceProxyResolver(ctx, "computer_use_respond", {
155
+ answer: "The price is $42",
156
+ reasoning: "Found the price on the page",
157
+ });
158
+
159
+ expect(result.isError).toBe(false);
160
+ expect(result.content).toBe("The price is $42");
161
+ expect(sentMessages).toHaveLength(0);
162
+ expect(proxy.stepCount).toBe(0);
163
+ });
164
+
165
+ test("computer_use_done uses default when no summary provided", async () => {
166
+ const ctx = setupProxy();
167
+
168
+ const result = await surfaceProxyResolver(ctx, "computer_use_done", {});
169
+
170
+ expect(result.isError).toBe(false);
171
+ expect(result.content).toBe("Task complete");
172
+ });
173
+
174
+ test("computer_use_respond falls back to summary then default", async () => {
175
+ const ctx = setupProxy();
176
+
177
+ // No answer but has summary — done tool uses summary
178
+ const r1 = await surfaceProxyResolver(ctx, "computer_use_done", {
179
+ summary: "All done",
180
+ });
181
+ expect(r1.content).toBe("All done");
182
+
183
+ // respond with answer field
184
+ const r2 = await surfaceProxyResolver(ctx, "computer_use_respond", {
185
+ answer: "The answer is 7",
186
+ });
187
+ expect(r2.content).toBe("The answer is 7");
188
+ });
189
+ });
190
+
191
+ // -------------------------------------------------------------------------
192
+ // Action tools (computer_use_click, screenshot, etc.) — proxy to client
193
+ // -------------------------------------------------------------------------
194
+
195
+ describe("action tools proxy to client", () => {
196
+ test("computer_use_click routes through proxy and returns observation", async () => {
197
+ const ctx = setupProxy();
198
+
199
+ const resultPromise = surfaceProxyResolver(ctx, "computer_use_click", {
200
+ element_id: 42,
201
+ reasoning: "Click the submit button",
202
+ });
203
+
204
+ // Verify the proxy sent a request to the client
205
+ expect(sentMessages).toHaveLength(1);
206
+ const sent = sentMessages[0] as Record<string, unknown>;
207
+ expect(sent.type).toBe("host_cu_request");
208
+ expect(sent.toolName).toBe("computer_use_click");
209
+ expect(sent.input).toEqual({
210
+ element_id: 42,
211
+ reasoning: "Click the submit button",
212
+ });
213
+ expect(sent.sessionId).toBe("test-session");
214
+
215
+ // Action was recorded
216
+ expect(proxy.stepCount).toBe(1);
217
+ expect(proxy.actionHistory).toHaveLength(1);
218
+ expect(proxy.actionHistory[0].toolName).toBe("computer_use_click");
219
+
220
+ // Simulate client resolving with observation
221
+ const requestId = sent.requestId as string;
222
+ proxy.resolve(requestId, {
223
+ axTree: "SubmitButton [1]\nTextField [2]",
224
+ executionResult: "Clicked element 42",
225
+ });
226
+
227
+ const result = await resultPromise;
228
+ expect(result.isError).toBe(false);
229
+ expect(result.content).toContain("Clicked element 42");
230
+ expect(result.content).toContain("<ax-tree>");
231
+ expect(result.content).toContain("SubmitButton [1]");
232
+ });
233
+
234
+ test("computer_use_screenshot routes through proxy", async () => {
235
+ const ctx = setupProxy();
236
+
237
+ const resultPromise = surfaceProxyResolver(
238
+ ctx,
239
+ "computer_use_screenshot",
240
+ { reasoning: "Capture current state" },
241
+ );
242
+
243
+ expect(sentMessages).toHaveLength(1);
244
+ const sent = sentMessages[0] as Record<string, unknown>;
245
+ expect(sent.type).toBe("host_cu_request");
246
+ expect(sent.toolName).toBe("computer_use_screenshot");
247
+
248
+ proxy.resolve(sent.requestId as string, {
249
+ axTree: "Window [1]",
250
+ screenshot: "base64screenshot",
251
+ screenshotWidthPx: 1920,
252
+ screenshotHeightPx: 1080,
253
+ });
254
+
255
+ const result = await resultPromise;
256
+ expect(result.isError).toBe(false);
257
+ expect(result.content).toContain("1920x1080 px");
258
+ expect(result.contentBlocks).toHaveLength(1);
259
+ expect(result.contentBlocks![0]).toEqual({
260
+ type: "image",
261
+ source: {
262
+ type: "base64",
263
+ media_type: "image/jpeg",
264
+ data: "base64screenshot",
265
+ },
266
+ });
267
+ });
268
+
269
+ test("computer_use_type_text routes through proxy", async () => {
270
+ const ctx = setupProxy();
271
+
272
+ const resultPromise = surfaceProxyResolver(
273
+ ctx,
274
+ "computer_use_type_text",
275
+ { text: "Hello world", reasoning: "Type into search box" },
276
+ );
277
+
278
+ const sent = sentMessages[0] as Record<string, unknown>;
279
+ expect(sent.toolName).toBe("computer_use_type_text");
280
+ expect(sent.input).toEqual({
281
+ text: "Hello world",
282
+ reasoning: "Type into search box",
283
+ });
284
+
285
+ proxy.resolve(sent.requestId as string, {
286
+ axTree: "SearchBox [1] value='Hello world'",
287
+ executionResult: "Typed text",
288
+ });
289
+
290
+ const result = await resultPromise;
291
+ expect(result.isError).toBe(false);
292
+ expect(result.content).toContain("Typed text");
293
+ });
294
+ });
295
+
296
+ // -------------------------------------------------------------------------
297
+ // Full proxy lifecycle (observe → click → done)
298
+ // -------------------------------------------------------------------------
299
+
300
+ describe("full proxy lifecycle", () => {
301
+ test("observe → click → done sequence", async () => {
302
+ const ctx = setupProxy();
303
+
304
+ // Step 1: observe (screenshot)
305
+ const p1 = surfaceProxyResolver(ctx, "computer_use_screenshot", {
306
+ reasoning: "Check what's on screen",
307
+ });
308
+ const sent1 = sentMessages[0] as Record<string, unknown>;
309
+ proxy.resolve(sent1.requestId as string, {
310
+ axTree: "LoginButton [1]\nUsernameField [2]",
311
+ });
312
+ const r1 = await p1;
313
+ expect(r1.isError).toBe(false);
314
+ expect(r1.content).toContain("LoginButton [1]");
315
+ expect(proxy.stepCount).toBe(1);
316
+
317
+ // Step 2: click
318
+ const p2 = surfaceProxyResolver(ctx, "computer_use_click", {
319
+ element_id: 1,
320
+ reasoning: "Click login button",
321
+ });
322
+ const sent2 = sentMessages[1] as Record<string, unknown>;
323
+ proxy.resolve(sent2.requestId as string, {
324
+ axTree: "PasswordField [1]\nSubmitButton [2]",
325
+ axDiff: "+ PasswordField [1]\n+ SubmitButton [2]\n- LoginButton [1]",
326
+ executionResult: "Clicked element 1",
327
+ });
328
+ const r2 = await p2;
329
+ expect(r2.isError).toBe(false);
330
+ expect(r2.content).toContain("Clicked element 1");
331
+ expect(r2.content).toContain("PasswordField [1]");
332
+ expect(proxy.stepCount).toBe(2);
333
+
334
+ // Step 3: done
335
+ const r3 = await surfaceProxyResolver(ctx, "computer_use_done", {
336
+ summary: "Logged in successfully",
337
+ });
338
+ expect(r3.isError).toBe(false);
339
+ expect(r3.content).toBe("Logged in successfully");
340
+
341
+ // Proxy state is clean after done
342
+ expect(proxy.stepCount).toBe(0);
343
+ expect(proxy.actionHistory).toHaveLength(0);
344
+ // Only 2 messages sent to client (screenshot + click; done is terminal)
345
+ expect(sentMessages).toHaveLength(2);
346
+ });
347
+ });
348
+
349
+ // -------------------------------------------------------------------------
350
+ // Step limit enforced through resolver
351
+ // -------------------------------------------------------------------------
352
+
353
+ describe("step limit enforcement through resolver", () => {
354
+ test("rejects action tools when step limit exceeded", async () => {
355
+ const ctx = setupProxy(2); // maxSteps = 2
356
+
357
+ // Record enough actions to exceed the limit
358
+ proxy.recordAction("computer_use_click", { element_id: 1 });
359
+ proxy.recordAction("computer_use_click", { element_id: 2 });
360
+ proxy.recordAction("computer_use_click", { element_id: 3 });
361
+ expect(proxy.stepCount).toBe(3);
362
+
363
+ // The surfaceProxyResolver calls proxy.request, which checks step limit
364
+ const result = await surfaceProxyResolver(ctx, "computer_use_click", {
365
+ element_id: 4,
366
+ reasoning: "click",
367
+ });
368
+
369
+ expect(result.isError).toBe(true);
370
+ expect(result.content).toContain("Step limit");
371
+ expect(result.content).toContain("computer_use_done");
372
+ });
373
+
374
+ test("terminal tools still work after step limit exceeded", async () => {
375
+ const ctx = setupProxy(2);
376
+
377
+ proxy.recordAction("computer_use_click", { element_id: 1 });
378
+ proxy.recordAction("computer_use_click", { element_id: 2 });
379
+ proxy.recordAction("computer_use_click", { element_id: 3 });
380
+
381
+ // computer_use_done should still work (terminal, resolves immediately)
382
+ const result = await surfaceProxyResolver(ctx, "computer_use_done", {
383
+ summary: "Stopped because step limit",
384
+ });
385
+
386
+ expect(result.isError).toBe(false);
387
+ expect(result.content).toBe("Stopped because step limit");
388
+ expect(proxy.stepCount).toBe(0);
389
+ });
390
+ });
391
+
392
+ // -------------------------------------------------------------------------
393
+ // Error from client
394
+ // -------------------------------------------------------------------------
395
+
396
+ describe("error from client observation", () => {
397
+ test("returns error result when client reports execution error", async () => {
398
+ const ctx = setupProxy();
399
+
400
+ const resultPromise = surfaceProxyResolver(ctx, "computer_use_click", {
401
+ element_id: 999,
402
+ reasoning: "click missing element",
403
+ });
404
+
405
+ const sent = sentMessages[0] as Record<string, unknown>;
406
+ proxy.resolve(sent.requestId as string, {
407
+ executionError: "Element 999 not found in AX tree",
408
+ axTree: "Window [1]",
409
+ });
410
+
411
+ const result = await resultPromise;
412
+ expect(result.isError).toBe(true);
413
+ expect(result.content).toContain("Action failed");
414
+ expect(result.content).toContain("Element 999 not found");
415
+ });
416
+ });
417
+
418
+ // -------------------------------------------------------------------------
419
+ // Reasoning propagation
420
+ // -------------------------------------------------------------------------
421
+
422
+ describe("reasoning propagation", () => {
423
+ test("reasoning from input is passed to proxy request", async () => {
424
+ const ctx = setupProxy();
425
+
426
+ const resultPromise = surfaceProxyResolver(ctx, "computer_use_key", {
427
+ key: "Enter",
428
+ reasoning: "Submit the form",
429
+ });
430
+
431
+ const sent = sentMessages[0] as Record<string, unknown>;
432
+ expect(sent.reasoning).toBe("Submit the form");
433
+
434
+ // Resolve to avoid unhandled rejection on dispose
435
+ proxy.resolve(sent.requestId as string, { axTree: "..." });
436
+ await resultPromise;
437
+ });
438
+
439
+ test("reasoning is recorded in action history", async () => {
440
+ const ctx = setupProxy();
441
+
442
+ surfaceProxyResolver(ctx, "computer_use_scroll", {
443
+ direction: "down",
444
+ amount: 3,
445
+ reasoning: "Scroll to see more",
446
+ });
447
+
448
+ expect(proxy.actionHistory[0].reasoning).toBe("Scroll to see more");
449
+
450
+ // Resolve to avoid hanging
451
+ const sent = sentMessages[0] as Record<string, unknown>;
452
+ proxy.resolve(sent.requestId as string, { axTree: "..." });
453
+ });
454
+ });
455
+
456
+ // -------------------------------------------------------------------------
457
+ // Non-CU tools are not handled by CU routing
458
+ // -------------------------------------------------------------------------
459
+
460
+ describe("non-CU tools are not handled by CU routing", () => {
461
+ test("ui_show is not affected by CU routing", async () => {
462
+ const ctx = setupProxy();
463
+
464
+ const result = await surfaceProxyResolver(ctx, "ui_show", {
465
+ surface_type: "confirmation",
466
+ data: { message: "Are you sure?" },
467
+ });
468
+
469
+ // ui_show goes through its own path, not the CU path
470
+ expect(result.content).not.toContain("not available");
471
+ expect(result.content).not.toContain("desktop client");
472
+ });
473
+
474
+ test("unknown tool returns error", async () => {
475
+ const ctx = setupProxy();
476
+
477
+ const result = await surfaceProxyResolver(ctx, "not_a_real_tool", {});
478
+
479
+ expect(result.isError).toBe(true);
480
+ expect(result.content).toContain("Unknown proxy tool");
481
+ });
482
+ });
483
+
484
+ // -------------------------------------------------------------------------
485
+ // Multiple sequential CU actions accumulate state
486
+ // -------------------------------------------------------------------------
487
+
488
+ describe("state accumulation across actions", () => {
489
+ test("step count increments across multiple actions", async () => {
490
+ const ctx = setupProxy();
491
+
492
+ // Action 1
493
+ const p1 = surfaceProxyResolver(ctx, "computer_use_click", {
494
+ element_id: 1,
495
+ reasoning: "first",
496
+ });
497
+ const s1 = sentMessages[0] as Record<string, unknown>;
498
+ proxy.resolve(s1.requestId as string, { axTree: "A" });
499
+ await p1;
500
+ expect(proxy.stepCount).toBe(1);
501
+
502
+ // Action 2
503
+ const p2 = surfaceProxyResolver(ctx, "computer_use_type_text", {
504
+ text: "hello",
505
+ reasoning: "second",
506
+ });
507
+ const s2 = sentMessages[1] as Record<string, unknown>;
508
+ proxy.resolve(s2.requestId as string, { axTree: "B" });
509
+ await p2;
510
+ expect(proxy.stepCount).toBe(2);
511
+
512
+ // Action 3
513
+ const p3 = surfaceProxyResolver(ctx, "computer_use_scroll", {
514
+ direction: "down",
515
+ amount: 1,
516
+ reasoning: "third",
517
+ });
518
+ const s3 = sentMessages[2] as Record<string, unknown>;
519
+ proxy.resolve(s3.requestId as string, { axTree: "C" });
520
+ await p3;
521
+ expect(proxy.stepCount).toBe(3);
522
+
523
+ // History has all 3
524
+ expect(proxy.actionHistory).toHaveLength(3);
525
+ expect(proxy.actionHistory.map((a) => a.toolName)).toEqual([
526
+ "computer_use_click",
527
+ "computer_use_type_text",
528
+ "computer_use_scroll",
529
+ ]);
530
+ });
531
+ });
532
+ });