@vellumai/assistant 0.4.49 → 0.4.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +24 -33
- package/README.md +3 -3
- package/docs/architecture/memory.md +180 -119
- package/package.json +2 -2
- package/src/__tests__/agent-loop.test.ts +3 -1
- package/src/__tests__/anthropic-provider.test.ts +114 -23
- package/src/__tests__/approval-cascade.test.ts +1 -15
- package/src/__tests__/approval-routes-http.test.ts +2 -0
- package/src/__tests__/assistant-feature-flag-guard.test.ts +0 -23
- package/src/__tests__/canonical-guardian-store.test.ts +95 -0
- package/src/__tests__/checker.test.ts +13 -0
- package/src/__tests__/config-schema.test.ts +1 -68
- package/src/__tests__/context-memory-e2e.test.ts +11 -100
- package/src/__tests__/conversation-routes-guardian-reply.test.ts +8 -0
- package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
- package/src/__tests__/credential-security-e2e.test.ts +1 -0
- package/src/__tests__/credential-vault-unit.test.ts +4 -0
- package/src/__tests__/credential-vault.test.ts +13 -1
- package/src/__tests__/cu-unified-flow.test.ts +532 -0
- package/src/__tests__/date-context.test.ts +93 -77
- package/src/__tests__/deterministic-verification-control-plane.test.ts +64 -0
- package/src/__tests__/guardian-routing-invariants.test.ts +93 -0
- package/src/__tests__/history-repair.test.ts +245 -0
- package/src/__tests__/host-cu-proxy.test.ts +165 -3
- package/src/__tests__/http-user-message-parity.test.ts +1 -0
- package/src/__tests__/invite-redemption-service.test.ts +65 -1
- package/src/__tests__/keychain-broker-client.test.ts +4 -4
- package/src/__tests__/memory-context-benchmark.benchmark.test.ts +56 -18
- package/src/__tests__/memory-lifecycle-e2e.test.ts +244 -387
- package/src/__tests__/memory-recall-quality.test.ts +244 -407
- package/src/__tests__/memory-regressions.experimental.test.ts +126 -101
- package/src/__tests__/memory-regressions.test.ts +477 -2841
- package/src/__tests__/memory-retrieval.benchmark.test.ts +33 -150
- package/src/__tests__/memory-upsert-concurrency.test.ts +5 -244
- package/src/__tests__/mime-builder.test.ts +28 -0
- package/src/__tests__/native-web-search.test.ts +1 -0
- package/src/__tests__/oauth-cli.test.ts +572 -5
- package/src/__tests__/oauth-store.test.ts +120 -6
- package/src/__tests__/qdrant-collection-migration.test.ts +53 -8
- package/src/__tests__/registry.test.ts +0 -1
- package/src/__tests__/relay-server.test.ts +46 -1
- package/src/__tests__/schedule-tools.test.ts +32 -0
- package/src/__tests__/script-proxy-certs.test.ts +1 -1
- package/src/__tests__/secret-onetime-send.test.ts +1 -0
- package/src/__tests__/secure-keys.test.ts +7 -2
- package/src/__tests__/send-endpoint-busy.test.ts +3 -0
- package/src/__tests__/session-abort-tool-results.test.ts +1 -14
- package/src/__tests__/session-agent-loop-overflow.test.ts +1583 -0
- package/src/__tests__/session-agent-loop.test.ts +19 -15
- package/src/__tests__/session-confirmation-signals.test.ts +1 -15
- package/src/__tests__/session-error.test.ts +124 -2
- package/src/__tests__/session-history-web-search.test.ts +918 -0
- package/src/__tests__/session-pre-run-repair.test.ts +1 -14
- package/src/__tests__/session-provider-retry-repair.test.ts +25 -28
- package/src/__tests__/session-queue.test.ts +37 -27
- package/src/__tests__/session-runtime-assembly.test.ts +54 -0
- package/src/__tests__/session-slash-known.test.ts +1 -15
- package/src/__tests__/session-slash-queue.test.ts +1 -15
- package/src/__tests__/session-slash-unknown.test.ts +1 -15
- package/src/__tests__/session-workspace-cache-state.test.ts +3 -33
- package/src/__tests__/session-workspace-injection.test.ts +3 -37
- package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -37
- package/src/__tests__/skills-install-extract.test.ts +93 -0
- package/src/__tests__/skillssh-registry.test.ts +451 -0
- package/src/__tests__/trust-store.test.ts +15 -0
- package/src/__tests__/voice-invite-redemption.test.ts +32 -1
- package/src/agent/ax-tree-compaction.test.ts +51 -0
- package/src/agent/loop.ts +39 -12
- package/src/approvals/AGENTS.md +1 -1
- package/src/approvals/guardian-request-resolvers.ts +14 -2
- package/src/bundler/compiler-tools.ts +66 -2
- package/src/calls/call-domain.ts +132 -0
- package/src/calls/call-store.ts +6 -0
- package/src/calls/relay-server.ts +43 -5
- package/src/calls/relay-setup-router.ts +17 -1
- package/src/calls/twilio-config.ts +1 -1
- package/src/calls/types.ts +3 -1
- package/src/cli/commands/doctor.ts +4 -3
- package/src/cli/commands/mcp.ts +46 -59
- package/src/cli/commands/memory.ts +16 -165
- package/src/cli/commands/oauth/apps.ts +31 -2
- package/src/cli/commands/oauth/connections.ts +431 -97
- package/src/cli/commands/oauth/providers.ts +15 -1
- package/src/cli/commands/sessions.ts +5 -2
- package/src/cli/commands/skills.ts +173 -1
- package/src/cli/http-client.ts +0 -20
- package/src/cli/main-screen.tsx +2 -2
- package/src/cli/program.ts +5 -6
- package/src/cli.ts +4 -10
- package/src/config/bundled-skills/computer-use/TOOLS.json +1 -1
- package/src/config/bundled-skills/computer-use/tools/computer-use-observe.ts +12 -0
- package/src/config/bundled-tool-registry.ts +2 -5
- package/src/config/schema.ts +1 -12
- package/src/config/schemas/memory-lifecycle.ts +0 -9
- package/src/config/schemas/memory-processing.ts +0 -180
- package/src/config/schemas/memory-retrieval.ts +32 -104
- package/src/config/schemas/memory.ts +0 -10
- package/src/config/types.ts +0 -4
- package/src/context/window-manager.ts +4 -1
- package/src/daemon/config-watcher.ts +61 -3
- package/src/daemon/daemon-control.ts +1 -1
- package/src/daemon/date-context.ts +114 -31
- package/src/daemon/handlers/sessions.ts +18 -13
- package/src/daemon/handlers/skills.ts +20 -1
- package/src/daemon/history-repair.ts +72 -8
- package/src/daemon/host-cu-proxy.ts +55 -26
- package/src/daemon/lifecycle.ts +31 -3
- package/src/daemon/mcp-reload-service.ts +2 -2
- package/src/daemon/message-types/computer-use.ts +1 -12
- package/src/daemon/message-types/memory.ts +4 -16
- package/src/daemon/message-types/messages.ts +1 -0
- package/src/daemon/message-types/sessions.ts +4 -0
- package/src/daemon/server.ts +12 -1
- package/src/daemon/session-agent-loop-handlers.ts +38 -0
- package/src/daemon/session-agent-loop.ts +334 -48
- package/src/daemon/session-error.ts +89 -6
- package/src/daemon/session-history.ts +17 -7
- package/src/daemon/session-media-retry.ts +6 -2
- package/src/daemon/session-memory.ts +69 -149
- package/src/daemon/session-process.ts +10 -1
- package/src/daemon/session-runtime-assembly.ts +49 -19
- package/src/daemon/session-surfaces.ts +4 -1
- package/src/daemon/session-tool-setup.ts +7 -1
- package/src/daemon/session.ts +12 -2
- package/src/instrument.ts +61 -1
- package/src/memory/admin.ts +2 -191
- package/src/memory/canonical-guardian-store.ts +38 -2
- package/src/memory/conversation-crud.ts +0 -33
- package/src/memory/conversation-queries.ts +22 -3
- package/src/memory/db-init.ts +28 -0
- package/src/memory/embedding-backend.ts +84 -8
- package/src/memory/embedding-types.ts +9 -1
- package/src/memory/indexer.ts +7 -46
- package/src/memory/items-extractor.ts +274 -76
- package/src/memory/job-handlers/backfill.ts +2 -127
- package/src/memory/job-handlers/cleanup.ts +2 -16
- package/src/memory/job-handlers/extraction.ts +2 -138
- package/src/memory/job-handlers/index-maintenance.ts +1 -6
- package/src/memory/job-handlers/summarization.ts +3 -148
- package/src/memory/job-utils.ts +21 -59
- package/src/memory/jobs-store.ts +1 -159
- package/src/memory/jobs-worker.ts +9 -52
- package/src/memory/migrations/104-core-indexes.ts +3 -3
- package/src/memory/migrations/149-oauth-tables.ts +2 -0
- package/src/memory/migrations/150-oauth-apps-client-secret-path.ts +98 -0
- package/src/memory/migrations/151-oauth-providers-ping-url.ts +11 -0
- package/src/memory/migrations/152-memory-item-supersession.ts +44 -0
- package/src/memory/migrations/153-drop-entity-tables.ts +15 -0
- package/src/memory/migrations/154-drop-fts.ts +20 -0
- package/src/memory/migrations/155-drop-conflicts.ts +7 -0
- package/src/memory/migrations/156-call-session-invite-metadata.ts +24 -0
- package/src/memory/migrations/index.ts +7 -0
- package/src/memory/qdrant-client.ts +148 -51
- package/src/memory/raw-query.ts +1 -1
- package/src/memory/retriever.test.ts +294 -273
- package/src/memory/retriever.ts +421 -645
- package/src/memory/schema/calls.ts +2 -0
- package/src/memory/schema/memory-core.ts +3 -48
- package/src/memory/schema/oauth.ts +2 -0
- package/src/memory/search/formatting.ts +263 -176
- package/src/memory/search/lexical.ts +1 -254
- package/src/memory/search/ranking.ts +0 -455
- package/src/memory/search/semantic.ts +100 -14
- package/src/memory/search/staleness.ts +47 -0
- package/src/memory/search/tier-classifier.ts +21 -0
- package/src/memory/search/types.ts +15 -77
- package/src/memory/task-memory-cleanup.ts +4 -6
- package/src/messaging/providers/gmail/mime-builder.ts +17 -7
- package/src/oauth/byo-connection.test.ts +8 -1
- package/src/oauth/oauth-store.ts +113 -27
- package/src/oauth/seed-providers.ts +6 -0
- package/src/oauth/token-persistence.ts +11 -3
- package/src/permissions/defaults.ts +1 -0
- package/src/permissions/trust-store.ts +23 -1
- package/src/playbooks/playbook-compiler.ts +1 -1
- package/src/prompts/system-prompt.ts +18 -2
- package/src/providers/anthropic/client.ts +56 -126
- package/src/providers/types.ts +7 -1
- package/src/runtime/AGENTS.md +9 -0
- package/src/runtime/auth/route-policy.ts +6 -3
- package/src/runtime/guardian-reply-router.ts +24 -22
- package/src/runtime/http-server.ts +2 -2
- package/src/runtime/invite-redemption-service.ts +19 -1
- package/src/runtime/invite-service.ts +25 -0
- package/src/runtime/pending-interactions.ts +2 -2
- package/src/runtime/routes/brain-graph-routes.ts +10 -90
- package/src/runtime/routes/conversation-routes.ts +9 -1
- package/src/runtime/routes/inbound-stages/acl-enforcement.ts +21 -12
- package/src/runtime/routes/memory-item-routes.test.ts +754 -0
- package/src/runtime/routes/memory-item-routes.ts +503 -0
- package/src/runtime/routes/session-management-routes.ts +3 -3
- package/src/runtime/routes/settings-routes.ts +2 -2
- package/src/runtime/routes/trust-rules-routes.ts +14 -0
- package/src/runtime/routes/workspace-routes.ts +2 -1
- package/src/security/keychain-broker-client.ts +17 -4
- package/src/security/secure-keys.ts +25 -3
- package/src/security/token-manager.ts +36 -36
- package/src/skills/catalog-install.ts +74 -18
- package/src/skills/skillssh-registry.ts +503 -0
- package/src/tools/assets/search.ts +5 -1
- package/src/tools/computer-use/definitions.ts +0 -10
- package/src/tools/computer-use/registry.ts +1 -1
- package/src/tools/credentials/vault.ts +1 -3
- package/src/tools/memory/definitions.ts +4 -13
- package/src/tools/memory/handlers.test.ts +83 -103
- package/src/tools/memory/handlers.ts +50 -85
- package/src/tools/schedule/create.ts +8 -1
- package/src/tools/schedule/update.ts +8 -1
- package/src/tools/skills/load.ts +25 -2
- package/src/__tests__/clarification-resolver.test.ts +0 -193
- package/src/__tests__/conflict-intent-tokenization.test.ts +0 -160
- package/src/__tests__/conflict-policy.test.ts +0 -269
- package/src/__tests__/conflict-store.test.ts +0 -372
- package/src/__tests__/contradiction-checker.test.ts +0 -361
- package/src/__tests__/entity-extractor.test.ts +0 -211
- package/src/__tests__/entity-search.test.ts +0 -1117
- package/src/__tests__/profile-compiler.test.ts +0 -392
- package/src/__tests__/session-conflict-gate.test.ts +0 -1228
- package/src/__tests__/session-profile-injection.test.ts +0 -557
- package/src/config/bundled-skills/knowledge-graph/SKILL.md +0 -25
- package/src/config/bundled-skills/knowledge-graph/TOOLS.json +0 -66
- package/src/config/bundled-skills/knowledge-graph/tools/graph-query.ts +0 -211
- package/src/daemon/session-conflict-gate.ts +0 -167
- package/src/daemon/session-dynamic-profile.ts +0 -77
- package/src/memory/clarification-resolver.ts +0 -417
- package/src/memory/conflict-intent.ts +0 -205
- package/src/memory/conflict-policy.ts +0 -127
- package/src/memory/conflict-store.ts +0 -410
- package/src/memory/contradiction-checker.ts +0 -508
- package/src/memory/entity-extractor.ts +0 -535
- package/src/memory/format-recall.ts +0 -47
- package/src/memory/fts-reconciler.ts +0 -165
- package/src/memory/job-handlers/conflict.ts +0 -200
- package/src/memory/profile-compiler.ts +0 -195
- package/src/memory/recall-cache.ts +0 -117
- package/src/memory/search/entity.ts +0 -535
- package/src/memory/search/query-expansion.test.ts +0 -70
- package/src/memory/search/query-expansion.ts +0 -118
- package/src/runtime/routes/mcp-routes.ts +0 -20
|
@@ -0,0 +1,532 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* End-to-end tests for the unified CU proxy flow.
|
|
3
|
+
*
|
|
4
|
+
* Tests the surfaceProxyResolver's CU tool routing — the integration
|
|
5
|
+
* point between the agent loop and the HostCuProxy.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { afterEach, describe, expect, test } from "bun:test";
|
|
9
|
+
|
|
10
|
+
import { HostCuProxy } from "../daemon/host-cu-proxy.js";
|
|
11
|
+
import type { SurfaceSessionContext } from "../daemon/session-surfaces.js";
|
|
12
|
+
import { surfaceProxyResolver } from "../daemon/session-surfaces.js";
|
|
13
|
+
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
// Test helpers
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Build a minimal SurfaceSessionContext with optional hostCuProxy.
|
|
20
|
+
* Only the fields required by the CU routing path are populated.
|
|
21
|
+
*/
|
|
22
|
+
function buildMockContext(hostCuProxy?: HostCuProxy): SurfaceSessionContext {
|
|
23
|
+
return {
|
|
24
|
+
conversationId: "test-session",
|
|
25
|
+
traceEmitter: { emit: () => {} },
|
|
26
|
+
sendToClient: () => {},
|
|
27
|
+
pendingSurfaceActions: new Map(),
|
|
28
|
+
lastSurfaceAction: new Map(),
|
|
29
|
+
surfaceState: new Map(),
|
|
30
|
+
surfaceUndoStacks: new Map(),
|
|
31
|
+
surfaceActionRequestIds: new Set(),
|
|
32
|
+
currentTurnSurfaces: [],
|
|
33
|
+
hostCuProxy,
|
|
34
|
+
isProcessing: () => false,
|
|
35
|
+
enqueueMessage: () => ({ queued: false, requestId: "r1" }),
|
|
36
|
+
getQueueDepth: () => 0,
|
|
37
|
+
processMessage: async () => "",
|
|
38
|
+
withSurface: async (_id, fn) => fn(),
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
// Tests
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
describe("surfaceProxyResolver — CU tool routing", () => {
|
|
47
|
+
let sentMessages: unknown[];
|
|
48
|
+
let proxy: HostCuProxy;
|
|
49
|
+
|
|
50
|
+
function setupProxy(maxSteps?: number): SurfaceSessionContext {
|
|
51
|
+
sentMessages = [];
|
|
52
|
+
const sendToClient = (msg: unknown) => sentMessages.push(msg);
|
|
53
|
+
proxy = new HostCuProxy(sendToClient as never, undefined, maxSteps);
|
|
54
|
+
// Mark client as connected so requests are sent
|
|
55
|
+
proxy.updateSender(sendToClient as never, true);
|
|
56
|
+
return buildMockContext(proxy);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
afterEach(() => {
|
|
60
|
+
proxy?.dispose();
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
// -------------------------------------------------------------------------
|
|
64
|
+
// No desktop client connected
|
|
65
|
+
// -------------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
describe("no desktop client connected", () => {
|
|
68
|
+
test("returns error when hostCuProxy is undefined", async () => {
|
|
69
|
+
const ctx = buildMockContext(/* no proxy */);
|
|
70
|
+
const result = await surfaceProxyResolver(ctx, "computer_use_click", {
|
|
71
|
+
element_id: 42,
|
|
72
|
+
reasoning: "click the button",
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
expect(result.isError).toBe(true);
|
|
76
|
+
expect(result.content).toContain("not available");
|
|
77
|
+
expect(result.content).toContain("no desktop client");
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
test("returns error for screenshot tool when no proxy", async () => {
|
|
81
|
+
const ctx = buildMockContext();
|
|
82
|
+
const result = await surfaceProxyResolver(
|
|
83
|
+
ctx,
|
|
84
|
+
"computer_use_screenshot",
|
|
85
|
+
{},
|
|
86
|
+
);
|
|
87
|
+
|
|
88
|
+
expect(result.isError).toBe(true);
|
|
89
|
+
expect(result.content).toContain("not available");
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
test("returns error when proxy exists but client not connected", async () => {
|
|
93
|
+
const sendToClient = () => {};
|
|
94
|
+
const proxyObj = new HostCuProxy(sendToClient as never);
|
|
95
|
+
// Default clientConnected is false — do NOT call updateSender with true
|
|
96
|
+
const ctx = buildMockContext(proxyObj);
|
|
97
|
+
const result = await surfaceProxyResolver(ctx, "computer_use_click", {
|
|
98
|
+
element_id: 1,
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
expect(result.isError).toBe(true);
|
|
102
|
+
expect(result.content).toContain("not available");
|
|
103
|
+
proxyObj.dispose();
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
test("returns error for terminal tools when no proxy", async () => {
|
|
107
|
+
const ctx = buildMockContext();
|
|
108
|
+
|
|
109
|
+
const doneResult = await surfaceProxyResolver(ctx, "computer_use_done", {
|
|
110
|
+
summary: "finished",
|
|
111
|
+
});
|
|
112
|
+
expect(doneResult.isError).toBe(true);
|
|
113
|
+
|
|
114
|
+
const respondResult = await surfaceProxyResolver(
|
|
115
|
+
ctx,
|
|
116
|
+
"computer_use_respond",
|
|
117
|
+
{ answer: "42" },
|
|
118
|
+
);
|
|
119
|
+
expect(respondResult.isError).toBe(true);
|
|
120
|
+
});
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
// -------------------------------------------------------------------------
|
|
124
|
+
// Terminal tools (computer_use_done, computer_use_respond)
|
|
125
|
+
// -------------------------------------------------------------------------
|
|
126
|
+
|
|
127
|
+
describe("terminal tools resolve immediately", () => {
|
|
128
|
+
test("computer_use_done resets proxy and returns summary", async () => {
|
|
129
|
+
const ctx = setupProxy();
|
|
130
|
+
|
|
131
|
+
// Record some actions first to verify reset
|
|
132
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
133
|
+
proxy.recordAction("computer_use_click", { element_id: 2 });
|
|
134
|
+
expect(proxy.stepCount).toBe(2);
|
|
135
|
+
|
|
136
|
+
const result = await surfaceProxyResolver(ctx, "computer_use_done", {
|
|
137
|
+
summary: "Completed the file upload",
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
expect(result.isError).toBe(false);
|
|
141
|
+
expect(result.content).toBe("Completed the file upload");
|
|
142
|
+
// No message sent to client for terminal tools
|
|
143
|
+
expect(sentMessages).toHaveLength(0);
|
|
144
|
+
// Proxy state should be reset
|
|
145
|
+
expect(proxy.stepCount).toBe(0);
|
|
146
|
+
expect(proxy.actionHistory).toHaveLength(0);
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
test("computer_use_respond resets proxy and returns answer", async () => {
|
|
150
|
+
const ctx = setupProxy();
|
|
151
|
+
|
|
152
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
153
|
+
|
|
154
|
+
const result = await surfaceProxyResolver(ctx, "computer_use_respond", {
|
|
155
|
+
answer: "The price is $42",
|
|
156
|
+
reasoning: "Found the price on the page",
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
expect(result.isError).toBe(false);
|
|
160
|
+
expect(result.content).toBe("The price is $42");
|
|
161
|
+
expect(sentMessages).toHaveLength(0);
|
|
162
|
+
expect(proxy.stepCount).toBe(0);
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
test("computer_use_done uses default when no summary provided", async () => {
|
|
166
|
+
const ctx = setupProxy();
|
|
167
|
+
|
|
168
|
+
const result = await surfaceProxyResolver(ctx, "computer_use_done", {});
|
|
169
|
+
|
|
170
|
+
expect(result.isError).toBe(false);
|
|
171
|
+
expect(result.content).toBe("Task complete");
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
test("computer_use_respond falls back to summary then default", async () => {
|
|
175
|
+
const ctx = setupProxy();
|
|
176
|
+
|
|
177
|
+
// No answer but has summary — done tool uses summary
|
|
178
|
+
const r1 = await surfaceProxyResolver(ctx, "computer_use_done", {
|
|
179
|
+
summary: "All done",
|
|
180
|
+
});
|
|
181
|
+
expect(r1.content).toBe("All done");
|
|
182
|
+
|
|
183
|
+
// respond with answer field
|
|
184
|
+
const r2 = await surfaceProxyResolver(ctx, "computer_use_respond", {
|
|
185
|
+
answer: "The answer is 7",
|
|
186
|
+
});
|
|
187
|
+
expect(r2.content).toBe("The answer is 7");
|
|
188
|
+
});
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
// -------------------------------------------------------------------------
|
|
192
|
+
// Action tools (computer_use_click, screenshot, etc.) — proxy to client
|
|
193
|
+
// -------------------------------------------------------------------------
|
|
194
|
+
|
|
195
|
+
describe("action tools proxy to client", () => {
|
|
196
|
+
test("computer_use_click routes through proxy and returns observation", async () => {
|
|
197
|
+
const ctx = setupProxy();
|
|
198
|
+
|
|
199
|
+
const resultPromise = surfaceProxyResolver(ctx, "computer_use_click", {
|
|
200
|
+
element_id: 42,
|
|
201
|
+
reasoning: "Click the submit button",
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
// Verify the proxy sent a request to the client
|
|
205
|
+
expect(sentMessages).toHaveLength(1);
|
|
206
|
+
const sent = sentMessages[0] as Record<string, unknown>;
|
|
207
|
+
expect(sent.type).toBe("host_cu_request");
|
|
208
|
+
expect(sent.toolName).toBe("computer_use_click");
|
|
209
|
+
expect(sent.input).toEqual({
|
|
210
|
+
element_id: 42,
|
|
211
|
+
reasoning: "Click the submit button",
|
|
212
|
+
});
|
|
213
|
+
expect(sent.sessionId).toBe("test-session");
|
|
214
|
+
|
|
215
|
+
// Action was recorded
|
|
216
|
+
expect(proxy.stepCount).toBe(1);
|
|
217
|
+
expect(proxy.actionHistory).toHaveLength(1);
|
|
218
|
+
expect(proxy.actionHistory[0].toolName).toBe("computer_use_click");
|
|
219
|
+
|
|
220
|
+
// Simulate client resolving with observation
|
|
221
|
+
const requestId = sent.requestId as string;
|
|
222
|
+
proxy.resolve(requestId, {
|
|
223
|
+
axTree: "SubmitButton [1]\nTextField [2]",
|
|
224
|
+
executionResult: "Clicked element 42",
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
const result = await resultPromise;
|
|
228
|
+
expect(result.isError).toBe(false);
|
|
229
|
+
expect(result.content).toContain("Clicked element 42");
|
|
230
|
+
expect(result.content).toContain("<ax-tree>");
|
|
231
|
+
expect(result.content).toContain("SubmitButton [1]");
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
test("computer_use_screenshot routes through proxy", async () => {
|
|
235
|
+
const ctx = setupProxy();
|
|
236
|
+
|
|
237
|
+
const resultPromise = surfaceProxyResolver(
|
|
238
|
+
ctx,
|
|
239
|
+
"computer_use_screenshot",
|
|
240
|
+
{ reasoning: "Capture current state" },
|
|
241
|
+
);
|
|
242
|
+
|
|
243
|
+
expect(sentMessages).toHaveLength(1);
|
|
244
|
+
const sent = sentMessages[0] as Record<string, unknown>;
|
|
245
|
+
expect(sent.type).toBe("host_cu_request");
|
|
246
|
+
expect(sent.toolName).toBe("computer_use_screenshot");
|
|
247
|
+
|
|
248
|
+
proxy.resolve(sent.requestId as string, {
|
|
249
|
+
axTree: "Window [1]",
|
|
250
|
+
screenshot: "base64screenshot",
|
|
251
|
+
screenshotWidthPx: 1920,
|
|
252
|
+
screenshotHeightPx: 1080,
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
const result = await resultPromise;
|
|
256
|
+
expect(result.isError).toBe(false);
|
|
257
|
+
expect(result.content).toContain("1920x1080 px");
|
|
258
|
+
expect(result.contentBlocks).toHaveLength(1);
|
|
259
|
+
expect(result.contentBlocks![0]).toEqual({
|
|
260
|
+
type: "image",
|
|
261
|
+
source: {
|
|
262
|
+
type: "base64",
|
|
263
|
+
media_type: "image/jpeg",
|
|
264
|
+
data: "base64screenshot",
|
|
265
|
+
},
|
|
266
|
+
});
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
test("computer_use_type_text routes through proxy", async () => {
|
|
270
|
+
const ctx = setupProxy();
|
|
271
|
+
|
|
272
|
+
const resultPromise = surfaceProxyResolver(
|
|
273
|
+
ctx,
|
|
274
|
+
"computer_use_type_text",
|
|
275
|
+
{ text: "Hello world", reasoning: "Type into search box" },
|
|
276
|
+
);
|
|
277
|
+
|
|
278
|
+
const sent = sentMessages[0] as Record<string, unknown>;
|
|
279
|
+
expect(sent.toolName).toBe("computer_use_type_text");
|
|
280
|
+
expect(sent.input).toEqual({
|
|
281
|
+
text: "Hello world",
|
|
282
|
+
reasoning: "Type into search box",
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
proxy.resolve(sent.requestId as string, {
|
|
286
|
+
axTree: "SearchBox [1] value='Hello world'",
|
|
287
|
+
executionResult: "Typed text",
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
const result = await resultPromise;
|
|
291
|
+
expect(result.isError).toBe(false);
|
|
292
|
+
expect(result.content).toContain("Typed text");
|
|
293
|
+
});
|
|
294
|
+
});
|
|
295
|
+
|
|
296
|
+
// -------------------------------------------------------------------------
|
|
297
|
+
// Full proxy lifecycle (observe → click → done)
|
|
298
|
+
// -------------------------------------------------------------------------
|
|
299
|
+
|
|
300
|
+
describe("full proxy lifecycle", () => {
|
|
301
|
+
test("observe → click → done sequence", async () => {
|
|
302
|
+
const ctx = setupProxy();
|
|
303
|
+
|
|
304
|
+
// Step 1: observe (screenshot)
|
|
305
|
+
const p1 = surfaceProxyResolver(ctx, "computer_use_screenshot", {
|
|
306
|
+
reasoning: "Check what's on screen",
|
|
307
|
+
});
|
|
308
|
+
const sent1 = sentMessages[0] as Record<string, unknown>;
|
|
309
|
+
proxy.resolve(sent1.requestId as string, {
|
|
310
|
+
axTree: "LoginButton [1]\nUsernameField [2]",
|
|
311
|
+
});
|
|
312
|
+
const r1 = await p1;
|
|
313
|
+
expect(r1.isError).toBe(false);
|
|
314
|
+
expect(r1.content).toContain("LoginButton [1]");
|
|
315
|
+
expect(proxy.stepCount).toBe(1);
|
|
316
|
+
|
|
317
|
+
// Step 2: click
|
|
318
|
+
const p2 = surfaceProxyResolver(ctx, "computer_use_click", {
|
|
319
|
+
element_id: 1,
|
|
320
|
+
reasoning: "Click login button",
|
|
321
|
+
});
|
|
322
|
+
const sent2 = sentMessages[1] as Record<string, unknown>;
|
|
323
|
+
proxy.resolve(sent2.requestId as string, {
|
|
324
|
+
axTree: "PasswordField [1]\nSubmitButton [2]",
|
|
325
|
+
axDiff: "+ PasswordField [1]\n+ SubmitButton [2]\n- LoginButton [1]",
|
|
326
|
+
executionResult: "Clicked element 1",
|
|
327
|
+
});
|
|
328
|
+
const r2 = await p2;
|
|
329
|
+
expect(r2.isError).toBe(false);
|
|
330
|
+
expect(r2.content).toContain("Clicked element 1");
|
|
331
|
+
expect(r2.content).toContain("PasswordField [1]");
|
|
332
|
+
expect(proxy.stepCount).toBe(2);
|
|
333
|
+
|
|
334
|
+
// Step 3: done
|
|
335
|
+
const r3 = await surfaceProxyResolver(ctx, "computer_use_done", {
|
|
336
|
+
summary: "Logged in successfully",
|
|
337
|
+
});
|
|
338
|
+
expect(r3.isError).toBe(false);
|
|
339
|
+
expect(r3.content).toBe("Logged in successfully");
|
|
340
|
+
|
|
341
|
+
// Proxy state is clean after done
|
|
342
|
+
expect(proxy.stepCount).toBe(0);
|
|
343
|
+
expect(proxy.actionHistory).toHaveLength(0);
|
|
344
|
+
// Only 2 messages sent to client (screenshot + click; done is terminal)
|
|
345
|
+
expect(sentMessages).toHaveLength(2);
|
|
346
|
+
});
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
// -------------------------------------------------------------------------
|
|
350
|
+
// Step limit enforced through resolver
|
|
351
|
+
// -------------------------------------------------------------------------
|
|
352
|
+
|
|
353
|
+
describe("step limit enforcement through resolver", () => {
|
|
354
|
+
test("rejects action tools when step limit exceeded", async () => {
|
|
355
|
+
const ctx = setupProxy(2); // maxSteps = 2
|
|
356
|
+
|
|
357
|
+
// Record enough actions to exceed the limit
|
|
358
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
359
|
+
proxy.recordAction("computer_use_click", { element_id: 2 });
|
|
360
|
+
proxy.recordAction("computer_use_click", { element_id: 3 });
|
|
361
|
+
expect(proxy.stepCount).toBe(3);
|
|
362
|
+
|
|
363
|
+
// The surfaceProxyResolver calls proxy.request, which checks step limit
|
|
364
|
+
const result = await surfaceProxyResolver(ctx, "computer_use_click", {
|
|
365
|
+
element_id: 4,
|
|
366
|
+
reasoning: "click",
|
|
367
|
+
});
|
|
368
|
+
|
|
369
|
+
expect(result.isError).toBe(true);
|
|
370
|
+
expect(result.content).toContain("Step limit");
|
|
371
|
+
expect(result.content).toContain("computer_use_done");
|
|
372
|
+
});
|
|
373
|
+
|
|
374
|
+
test("terminal tools still work after step limit exceeded", async () => {
|
|
375
|
+
const ctx = setupProxy(2);
|
|
376
|
+
|
|
377
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
378
|
+
proxy.recordAction("computer_use_click", { element_id: 2 });
|
|
379
|
+
proxy.recordAction("computer_use_click", { element_id: 3 });
|
|
380
|
+
|
|
381
|
+
// computer_use_done should still work (terminal, resolves immediately)
|
|
382
|
+
const result = await surfaceProxyResolver(ctx, "computer_use_done", {
|
|
383
|
+
summary: "Stopped because step limit",
|
|
384
|
+
});
|
|
385
|
+
|
|
386
|
+
expect(result.isError).toBe(false);
|
|
387
|
+
expect(result.content).toBe("Stopped because step limit");
|
|
388
|
+
expect(proxy.stepCount).toBe(0);
|
|
389
|
+
});
|
|
390
|
+
});
|
|
391
|
+
|
|
392
|
+
// -------------------------------------------------------------------------
|
|
393
|
+
// Error from client
|
|
394
|
+
// -------------------------------------------------------------------------
|
|
395
|
+
|
|
396
|
+
describe("error from client observation", () => {
|
|
397
|
+
test("returns error result when client reports execution error", async () => {
|
|
398
|
+
const ctx = setupProxy();
|
|
399
|
+
|
|
400
|
+
const resultPromise = surfaceProxyResolver(ctx, "computer_use_click", {
|
|
401
|
+
element_id: 999,
|
|
402
|
+
reasoning: "click missing element",
|
|
403
|
+
});
|
|
404
|
+
|
|
405
|
+
const sent = sentMessages[0] as Record<string, unknown>;
|
|
406
|
+
proxy.resolve(sent.requestId as string, {
|
|
407
|
+
executionError: "Element 999 not found in AX tree",
|
|
408
|
+
axTree: "Window [1]",
|
|
409
|
+
});
|
|
410
|
+
|
|
411
|
+
const result = await resultPromise;
|
|
412
|
+
expect(result.isError).toBe(true);
|
|
413
|
+
expect(result.content).toContain("Action failed");
|
|
414
|
+
expect(result.content).toContain("Element 999 not found");
|
|
415
|
+
});
|
|
416
|
+
});
|
|
417
|
+
|
|
418
|
+
// -------------------------------------------------------------------------
|
|
419
|
+
// Reasoning propagation
|
|
420
|
+
// -------------------------------------------------------------------------
|
|
421
|
+
|
|
422
|
+
describe("reasoning propagation", () => {
|
|
423
|
+
test("reasoning from input is passed to proxy request", async () => {
|
|
424
|
+
const ctx = setupProxy();
|
|
425
|
+
|
|
426
|
+
const resultPromise = surfaceProxyResolver(ctx, "computer_use_key", {
|
|
427
|
+
key: "Enter",
|
|
428
|
+
reasoning: "Submit the form",
|
|
429
|
+
});
|
|
430
|
+
|
|
431
|
+
const sent = sentMessages[0] as Record<string, unknown>;
|
|
432
|
+
expect(sent.reasoning).toBe("Submit the form");
|
|
433
|
+
|
|
434
|
+
// Resolve to avoid unhandled rejection on dispose
|
|
435
|
+
proxy.resolve(sent.requestId as string, { axTree: "..." });
|
|
436
|
+
await resultPromise;
|
|
437
|
+
});
|
|
438
|
+
|
|
439
|
+
test("reasoning is recorded in action history", async () => {
|
|
440
|
+
const ctx = setupProxy();
|
|
441
|
+
|
|
442
|
+
surfaceProxyResolver(ctx, "computer_use_scroll", {
|
|
443
|
+
direction: "down",
|
|
444
|
+
amount: 3,
|
|
445
|
+
reasoning: "Scroll to see more",
|
|
446
|
+
});
|
|
447
|
+
|
|
448
|
+
expect(proxy.actionHistory[0].reasoning).toBe("Scroll to see more");
|
|
449
|
+
|
|
450
|
+
// Resolve to avoid hanging
|
|
451
|
+
const sent = sentMessages[0] as Record<string, unknown>;
|
|
452
|
+
proxy.resolve(sent.requestId as string, { axTree: "..." });
|
|
453
|
+
});
|
|
454
|
+
});
|
|
455
|
+
|
|
456
|
+
// -------------------------------------------------------------------------
|
|
457
|
+
// Non-CU tools are not handled by CU routing
|
|
458
|
+
// -------------------------------------------------------------------------
|
|
459
|
+
|
|
460
|
+
describe("non-CU tools are not handled by CU routing", () => {
|
|
461
|
+
test("ui_show is not affected by CU routing", async () => {
|
|
462
|
+
const ctx = setupProxy();
|
|
463
|
+
|
|
464
|
+
const result = await surfaceProxyResolver(ctx, "ui_show", {
|
|
465
|
+
surface_type: "confirmation",
|
|
466
|
+
data: { message: "Are you sure?" },
|
|
467
|
+
});
|
|
468
|
+
|
|
469
|
+
// ui_show goes through its own path, not the CU path
|
|
470
|
+
expect(result.content).not.toContain("not available");
|
|
471
|
+
expect(result.content).not.toContain("desktop client");
|
|
472
|
+
});
|
|
473
|
+
|
|
474
|
+
test("unknown tool returns error", async () => {
|
|
475
|
+
const ctx = setupProxy();
|
|
476
|
+
|
|
477
|
+
const result = await surfaceProxyResolver(ctx, "not_a_real_tool", {});
|
|
478
|
+
|
|
479
|
+
expect(result.isError).toBe(true);
|
|
480
|
+
expect(result.content).toContain("Unknown proxy tool");
|
|
481
|
+
});
|
|
482
|
+
});
|
|
483
|
+
|
|
484
|
+
// -------------------------------------------------------------------------
|
|
485
|
+
// Multiple sequential CU actions accumulate state
|
|
486
|
+
// -------------------------------------------------------------------------
|
|
487
|
+
|
|
488
|
+
describe("state accumulation across actions", () => {
|
|
489
|
+
test("step count increments across multiple actions", async () => {
|
|
490
|
+
const ctx = setupProxy();
|
|
491
|
+
|
|
492
|
+
// Action 1
|
|
493
|
+
const p1 = surfaceProxyResolver(ctx, "computer_use_click", {
|
|
494
|
+
element_id: 1,
|
|
495
|
+
reasoning: "first",
|
|
496
|
+
});
|
|
497
|
+
const s1 = sentMessages[0] as Record<string, unknown>;
|
|
498
|
+
proxy.resolve(s1.requestId as string, { axTree: "A" });
|
|
499
|
+
await p1;
|
|
500
|
+
expect(proxy.stepCount).toBe(1);
|
|
501
|
+
|
|
502
|
+
// Action 2
|
|
503
|
+
const p2 = surfaceProxyResolver(ctx, "computer_use_type_text", {
|
|
504
|
+
text: "hello",
|
|
505
|
+
reasoning: "second",
|
|
506
|
+
});
|
|
507
|
+
const s2 = sentMessages[1] as Record<string, unknown>;
|
|
508
|
+
proxy.resolve(s2.requestId as string, { axTree: "B" });
|
|
509
|
+
await p2;
|
|
510
|
+
expect(proxy.stepCount).toBe(2);
|
|
511
|
+
|
|
512
|
+
// Action 3
|
|
513
|
+
const p3 = surfaceProxyResolver(ctx, "computer_use_scroll", {
|
|
514
|
+
direction: "down",
|
|
515
|
+
amount: 1,
|
|
516
|
+
reasoning: "third",
|
|
517
|
+
});
|
|
518
|
+
const s3 = sentMessages[2] as Record<string, unknown>;
|
|
519
|
+
proxy.resolve(s3.requestId as string, { axTree: "C" });
|
|
520
|
+
await p3;
|
|
521
|
+
expect(proxy.stepCount).toBe(3);
|
|
522
|
+
|
|
523
|
+
// History has all 3
|
|
524
|
+
expect(proxy.actionHistory).toHaveLength(3);
|
|
525
|
+
expect(proxy.actionHistory.map((a) => a.toolName)).toEqual([
|
|
526
|
+
"computer_use_click",
|
|
527
|
+
"computer_use_type_text",
|
|
528
|
+
"computer_use_scroll",
|
|
529
|
+
]);
|
|
530
|
+
});
|
|
531
|
+
});
|
|
532
|
+
});
|