@vellumai/assistant 0.4.48 → 0.4.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +2 -2
- package/README.md +2 -23
- package/docs/architecture/integrations.md +45 -41
- package/docs/architecture/keychain-broker.md +3 -3
- package/docs/runbook-trusted-contacts.md +3 -8
- package/hook-templates/debug-prompt-logger/hook.json +1 -1
- package/hook-templates/debug-prompt-logger/run.sh +1 -3
- package/package.json +1 -1
- package/src/__tests__/actor-token-service.test.ts +0 -1
- package/src/__tests__/anthropic-provider.test.ts +156 -0
- package/src/__tests__/approval-cascade.test.ts +810 -0
- package/src/__tests__/approval-primitive.test.ts +0 -1
- package/src/__tests__/approval-routes-http.test.ts +2 -0
- package/src/__tests__/assistant-attachments.test.ts +12 -34
- package/src/__tests__/assistant-feature-flag-guardrails.test.ts +76 -0
- package/src/__tests__/assistant-feature-flags-integration.test.ts +0 -1
- package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +2 -2
- package/src/__tests__/channel-guardian.test.ts +0 -2
- package/src/__tests__/channel-readiness-routes.test.ts +15 -6
- package/src/__tests__/channel-readiness-service.test.ts +10 -9
- package/src/__tests__/checker.test.ts +9 -29
- package/src/__tests__/computer-use-skill-manifest-regression.test.ts +1 -1
- package/src/__tests__/computer-use-tools.test.ts +2 -19
- package/src/__tests__/config-watcher.test.ts +0 -1
- package/src/__tests__/confirmation-request-guardian-bridge.test.ts +0 -1
- package/src/__tests__/context-image-dimensions.test.ts +332 -0
- package/src/__tests__/context-token-estimator.test.ts +196 -13
- package/src/__tests__/conversation-attention-store.test.ts +0 -1
- package/src/__tests__/conversation-attention-telegram.test.ts +0 -1
- package/src/__tests__/conversation-routes-guardian-reply.test.ts +144 -0
- package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
- package/src/__tests__/credential-metadata-store.test.ts +64 -73
- package/src/__tests__/credential-security-invariants.test.ts +13 -7
- package/src/__tests__/credential-vault-unit.test.ts +280 -49
- package/src/__tests__/credential-vault.test.ts +138 -16
- package/src/__tests__/credentials-cli.test.ts +71 -0
- package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +0 -1
- package/src/__tests__/ephemeral-permissions.test.ts +3 -3
- package/src/__tests__/gateway-only-guard.test.ts +0 -1
- package/src/__tests__/guardian-action-grant-mint-consume.test.ts +0 -1
- package/src/__tests__/guardian-decision-primitive-canonical.test.ts +0 -1
- package/src/__tests__/guardian-routing-invariants.test.ts +0 -1
- package/src/__tests__/guardian-verification-voice-binding.test.ts +0 -1
- package/src/__tests__/handlers-user-message-approval-consumption.test.ts +0 -39
- package/src/__tests__/heartbeat-service.test.ts +0 -1
- package/src/__tests__/host-cu-proxy.test.ts +629 -0
- package/src/__tests__/host-shell-tool.test.ts +27 -15
- package/src/__tests__/http-user-message-parity.test.ts +1 -0
- package/src/__tests__/ingress-url-consistency.test.ts +14 -21
- package/src/__tests__/integration-status.test.ts +32 -51
- package/src/__tests__/intent-routing.test.ts +0 -1
- package/src/__tests__/invite-routes-http.test.ts +10 -9
- package/src/__tests__/keychain-broker-client.test.ts +11 -43
- package/src/__tests__/notification-routing-intent.test.ts +0 -1
- package/src/__tests__/oauth-cli.test.ts +373 -14
- package/src/__tests__/oauth-provider-profiles.test.ts +9 -9
- package/src/__tests__/oauth-scope-policy.test.ts +4 -6
- package/src/__tests__/oauth-store.test.ts +756 -0
- package/src/__tests__/onboarding-starter-tasks.test.ts +0 -1
- package/src/__tests__/provider-error-scenarios.test.ts +0 -1
- package/src/__tests__/provider-streaming.benchmark.test.ts +0 -1
- package/src/__tests__/public-ingress-urls.test.ts +15 -21
- package/src/__tests__/recording-handler.test.ts +3 -4
- package/src/__tests__/registry.test.ts +2 -2
- package/src/__tests__/runtime-events-sse.test.ts +55 -7
- package/src/__tests__/schedule-store.test.ts +0 -1
- package/src/__tests__/scheduler-recurrence.test.ts +0 -1
- package/src/__tests__/scoped-approval-grants.test.ts +0 -1
- package/src/__tests__/scoped-grant-security-matrix.test.ts +0 -1
- package/src/__tests__/secret-ingress-handler.test.ts +0 -1
- package/src/__tests__/send-endpoint-busy.test.ts +21 -6
- package/src/__tests__/sequence-store.test.ts +0 -1
- package/src/__tests__/session-init.benchmark.test.ts +4 -5
- package/src/__tests__/skill-include-graph.test.ts +66 -0
- package/src/__tests__/skill-load-feature-flag.test.ts +0 -1
- package/src/__tests__/skill-load-tool.test.ts +149 -1
- package/src/__tests__/skill-projection-feature-flag.test.ts +0 -1
- package/src/__tests__/skills-uninstall.test.ts +1 -1
- package/src/__tests__/skills.test.ts +3 -3
- package/src/__tests__/slack-channel-config.test.ts +67 -3
- package/src/__tests__/slack-share-routes.test.ts +17 -19
- package/src/__tests__/system-prompt.test.ts +0 -1
- package/src/__tests__/telegram-invite-adapter.test.ts +18 -22
- package/src/__tests__/terminal-tools.test.ts +4 -3
- package/src/__tests__/test-support/computer-use-skill-harness.ts +3 -2
- package/src/__tests__/tool-approval-handler.test.ts +0 -1
- package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -1
- package/src/__tests__/tool-executor-lifecycle-events.test.ts +0 -1
- package/src/__tests__/tool-executor-shell-integration.test.ts +0 -1
- package/src/__tests__/tool-executor.test.ts +0 -1
- package/src/__tests__/tool-grant-request-escalation.test.ts +0 -1
- package/src/__tests__/trust-store-pattern-matches.test.ts +29 -0
- package/src/__tests__/trust-store.test.ts +1 -22
- package/src/__tests__/trusted-contact-approval-notifier.test.ts +0 -1
- package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +0 -1
- package/src/__tests__/twilio-routes.test.ts +0 -16
- package/src/__tests__/verification-control-plane-policy.test.ts +0 -1
- package/src/__tests__/voice-scoped-grant-consumer.test.ts +0 -1
- package/src/agent/ax-tree-compaction.test.ts +235 -0
- package/src/agent/loop.ts +76 -130
- package/src/calls/call-domain.ts +1 -6
- package/src/calls/relay-server.ts +9 -13
- package/src/calls/twilio-config.ts +2 -7
- package/src/calls/twilio-routes.ts +1 -2
- package/src/calls/voice-ingress-preflight.ts +1 -1
- package/src/cli/commands/browser-relay.ts +18 -12
- package/src/cli/commands/completions.ts +0 -3
- package/src/cli/commands/credentials.ts +101 -15
- package/src/cli/commands/oauth/apps.ts +255 -0
- package/src/cli/commands/oauth/connections.ts +299 -0
- package/src/cli/commands/oauth/index.ts +52 -0
- package/src/cli/commands/oauth/providers.ts +242 -0
- package/src/cli/commands/skills.ts +4 -338
- package/src/cli/program.ts +1 -5
- package/src/cli/reference.ts +1 -3
- package/src/config/assistant-feature-flags.ts +0 -3
- package/src/config/bundled-skills/_shared/CLI_RETRIEVAL_PATTERN.md +1 -1
- package/src/config/bundled-skills/computer-use/SKILL.md +3 -6
- package/src/config/bundled-skills/computer-use/TOOLS.json +22 -4
- package/src/config/bundled-skills/google-calendar/calendar-client.ts +21 -16
- package/src/config/bundled-skills/messaging/tools/shared.ts +1 -4
- package/src/config/bundled-skills/settings/SKILL.md +1 -1
- package/src/config/bundled-skills/settings/TOOLS.json +2 -8
- package/src/config/bundled-skills/settings/tools/voice-config-update.ts +5 -33
- package/src/config/env-registry.ts +14 -83
- package/src/config/env.ts +11 -50
- package/src/config/feature-flag-registry.json +16 -16
- package/src/config/loader.ts +0 -6
- package/src/config/schema.ts +3 -1
- package/src/config/skills.ts +21 -2
- package/src/context/image-dimensions.ts +229 -0
- package/src/context/token-estimator.ts +75 -12
- package/src/context/window-manager.ts +49 -10
- package/src/daemon/assistant-attachments.ts +1 -13
- package/src/daemon/handlers/config-ingress.ts +8 -33
- package/src/daemon/handlers/config-slack-channel.ts +49 -46
- package/src/daemon/handlers/config-telegram.ts +32 -16
- package/src/daemon/handlers/sessions.ts +10 -24
- package/src/daemon/handlers/shared.ts +0 -130
- package/src/daemon/host-cu-proxy.ts +401 -0
- package/src/daemon/lifecycle.ts +36 -68
- package/src/daemon/message-protocol.ts +3 -0
- package/src/daemon/message-types/computer-use.ts +2 -119
- package/src/daemon/message-types/host-cu.ts +19 -0
- package/src/daemon/message-types/messages.ts +3 -0
- package/src/daemon/server.ts +14 -21
- package/src/daemon/session-agent-loop-handlers.ts +2 -0
- package/src/daemon/session-attachments.ts +1 -2
- package/src/daemon/session-slash.ts +1 -1
- package/src/daemon/session-surfaces.ts +40 -28
- package/src/daemon/session-tool-setup.ts +2 -9
- package/src/daemon/session.ts +138 -15
- package/src/daemon/tool-side-effects.ts +2 -8
- package/src/daemon/watch-handler.ts +2 -2
- package/src/events/tool-metrics-listener.ts +2 -2
- package/src/hooks/manager.ts +1 -4
- package/src/inbound/public-ingress-urls.ts +7 -7
- package/src/logfire.ts +16 -5
- package/src/memory/conversation-key-store.ts +21 -0
- package/src/memory/db-init.ts +4 -0
- package/src/memory/migrations/149-oauth-tables.ts +60 -0
- package/src/memory/migrations/index.ts +1 -0
- package/src/memory/schema/index.ts +1 -0
- package/src/memory/schema/oauth.ts +65 -0
- package/src/messaging/provider.ts +4 -4
- package/src/messaging/providers/gmail/client.ts +82 -2
- package/src/messaging/providers/gmail/people-client.ts +10 -10
- package/src/messaging/providers/telegram-bot/adapter.ts +17 -17
- package/src/messaging/providers/whatsapp/adapter.ts +11 -8
- package/src/messaging/registry.ts +2 -32
- package/src/notifications/copy-composer.ts +0 -5
- package/src/notifications/signal.ts +4 -5
- package/src/oauth/byo-connection.test.ts +126 -25
- package/src/oauth/byo-connection.ts +22 -6
- package/src/oauth/connect-orchestrator.ts +113 -57
- package/src/oauth/connect-types.ts +17 -23
- package/src/oauth/connection-resolver.ts +35 -11
- package/src/oauth/connection.ts +1 -1
- package/src/oauth/manual-token-connection.ts +104 -0
- package/src/oauth/oauth-store.ts +496 -0
- package/src/oauth/platform-connection.test.ts +29 -0
- package/src/oauth/platform-connection.ts +6 -5
- package/src/oauth/provider-behaviors.ts +124 -0
- package/src/oauth/scope-policy.ts +9 -2
- package/src/oauth/seed-providers.ts +161 -0
- package/src/oauth/token-persistence.ts +74 -78
- package/src/permissions/checker.ts +3 -3
- package/src/permissions/defaults.ts +0 -1
- package/src/permissions/prompter.ts +10 -1
- package/src/permissions/trust-store.ts +13 -0
- package/src/prompts/__tests__/build-cli-reference-section.test.ts +3 -1
- package/src/prompts/system-prompt.ts +28 -40
- package/src/providers/anthropic/client.ts +133 -24
- package/src/providers/retry.ts +1 -27
- package/src/runtime/auth/route-policy.ts +0 -3
- package/src/runtime/channel-reply-delivery.ts +0 -40
- package/src/runtime/gateway-client.ts +0 -7
- package/src/runtime/http-server.ts +8 -6
- package/src/runtime/http-types.ts +2 -2
- package/src/runtime/middleware/twilio-validation.ts +1 -11
- package/src/runtime/pending-interactions.ts +14 -12
- package/src/runtime/routes/channel-delivery-routes.ts +0 -1
- package/src/runtime/routes/conversation-routes.ts +73 -19
- package/src/runtime/routes/events-routes.ts +21 -11
- package/src/runtime/routes/host-cu-routes.ts +97 -0
- package/src/runtime/routes/inbound-stages/background-dispatch.ts +12 -111
- package/src/runtime/routes/integrations/slack/share.ts +6 -7
- package/src/runtime/routes/log-export-routes.ts +126 -8
- package/src/runtime/routes/settings-routes.ts +55 -48
- package/src/runtime/routes/surface-action-routes.ts +1 -1
- package/src/runtime/routes/watch-routes.ts +128 -0
- package/src/schedule/integration-status.ts +10 -9
- package/src/security/credential-key.ts +0 -156
- package/src/security/keychain-broker-client.ts +5 -6
- package/src/security/oauth2.ts +1 -1
- package/src/security/token-manager.ts +119 -46
- package/src/skills/catalog-install.ts +358 -0
- package/src/skills/include-graph.ts +32 -0
- package/src/telegram/bot-username.ts +2 -3
- package/src/tools/browser/network-recorder.ts +1 -1
- package/src/tools/browser/network-recording-types.ts +1 -1
- package/src/tools/computer-use/definitions.ts +46 -11
- package/src/tools/computer-use/registry.ts +4 -5
- package/src/tools/credentials/broker.ts +1 -2
- package/src/tools/credentials/metadata-store.ts +17 -121
- package/src/tools/credentials/vault.ts +94 -167
- package/src/tools/registry.ts +2 -7
- package/src/tools/skills/load.ts +62 -3
- package/src/tools/watch/watch-state.ts +0 -12
- package/src/util/logger.ts +7 -41
- package/src/util/platform.ts +9 -28
- package/src/watcher/providers/google-calendar.ts +2 -1
- package/src/__tests__/computer-use-session-compaction.test.ts +0 -143
- package/src/__tests__/computer-use-session-lifecycle.test.ts +0 -322
- package/src/__tests__/computer-use-session-working-dir.test.ts +0 -166
- package/src/__tests__/computer-use-skill-baseline.test.ts +0 -78
- package/src/__tests__/computer-use-skill-endstate.test.ts +0 -105
- package/src/__tests__/computer-use-skill-lifecycle-cleanup.test.ts +0 -249
- package/src/__tests__/ride-shotgun-handler.test.ts +0 -452
- package/src/cli/commands/dev.ts +0 -129
- package/src/cli/commands/map.ts +0 -391
- package/src/cli/commands/oauth.ts +0 -77
- package/src/config/bundled-skills/computer-use/tools/computer-use-request-control.ts +0 -16
- package/src/daemon/computer-use-session.ts +0 -1026
- package/src/daemon/ride-shotgun-handler.ts +0 -569
- package/src/oauth/provider-base-urls.ts +0 -21
- package/src/oauth/provider-profiles.ts +0 -192
- package/src/prompts/computer-use-prompt.ts +0 -98
- package/src/runtime/routes/computer-use-routes.ts +0 -641
- package/src/runtime/telegram-streaming-delivery.test.ts +0 -729
- package/src/runtime/telegram-streaming-delivery.ts +0 -393
- package/src/tools/computer-use/request-computer-control.ts +0 -56
|
@@ -0,0 +1,629 @@
|
|
|
1
|
+
import { afterEach, describe, expect, test } from "bun:test";
|
|
2
|
+
|
|
3
|
+
import { HostCuProxy } from "../daemon/host-cu-proxy.js";
|
|
4
|
+
|
|
5
|
+
describe("HostCuProxy", () => {
|
|
6
|
+
let proxy: InstanceType<typeof HostCuProxy>;
|
|
7
|
+
let sentMessages: unknown[];
|
|
8
|
+
let sendToClient: (msg: unknown) => void;
|
|
9
|
+
|
|
10
|
+
function setup(maxSteps?: number) {
|
|
11
|
+
sentMessages = [];
|
|
12
|
+
sendToClient = (msg: unknown) => sentMessages.push(msg);
|
|
13
|
+
proxy = new HostCuProxy(sendToClient as never, maxSteps);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
afterEach(() => {
|
|
17
|
+
proxy?.dispose();
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
// -------------------------------------------------------------------------
|
|
21
|
+
// Request / resolve lifecycle
|
|
22
|
+
// -------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
describe("request/resolve lifecycle", () => {
|
|
25
|
+
test("sends host_cu_request and resolves with formatted observation", async () => {
|
|
26
|
+
setup();
|
|
27
|
+
|
|
28
|
+
const resultPromise = proxy.request(
|
|
29
|
+
"computer_use_click",
|
|
30
|
+
{ element_id: 42 },
|
|
31
|
+
"session-1",
|
|
32
|
+
1,
|
|
33
|
+
"Clicking the button",
|
|
34
|
+
);
|
|
35
|
+
|
|
36
|
+
expect(sentMessages).toHaveLength(1);
|
|
37
|
+
const sent = sentMessages[0] as Record<string, unknown>;
|
|
38
|
+
expect(sent.type).toBe("host_cu_request");
|
|
39
|
+
expect(sent.sessionId).toBe("session-1");
|
|
40
|
+
expect(sent.toolName).toBe("computer_use_click");
|
|
41
|
+
expect(sent.input).toEqual({ element_id: 42 });
|
|
42
|
+
expect(sent.stepNumber).toBe(1);
|
|
43
|
+
expect(sent.reasoning).toBe("Clicking the button");
|
|
44
|
+
expect(typeof sent.requestId).toBe("string");
|
|
45
|
+
|
|
46
|
+
const requestId = sent.requestId as string;
|
|
47
|
+
expect(proxy.hasPendingRequest(requestId)).toBe(true);
|
|
48
|
+
|
|
49
|
+
proxy.resolve(requestId, {
|
|
50
|
+
axTree: "Button [1]\nLabel [2]",
|
|
51
|
+
executionResult: "Clicked element 42",
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
const result = await resultPromise;
|
|
55
|
+
expect(result.content).toContain("Clicked element 42");
|
|
56
|
+
expect(result.content).toContain("<ax-tree>");
|
|
57
|
+
expect(result.content).toContain("CURRENT SCREEN STATE:");
|
|
58
|
+
expect(result.isError).toBe(false);
|
|
59
|
+
expect(proxy.hasPendingRequest(requestId)).toBe(false);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
test("formats error observation correctly", async () => {
|
|
63
|
+
setup();
|
|
64
|
+
|
|
65
|
+
const resultPromise = proxy.request(
|
|
66
|
+
"computer_use_click",
|
|
67
|
+
{ element_id: 99 },
|
|
68
|
+
"session-1",
|
|
69
|
+
1,
|
|
70
|
+
);
|
|
71
|
+
|
|
72
|
+
const sent = sentMessages[0] as Record<string, unknown>;
|
|
73
|
+
const requestId = sent.requestId as string;
|
|
74
|
+
|
|
75
|
+
proxy.resolve(requestId, {
|
|
76
|
+
executionError: "Element not found",
|
|
77
|
+
axTree: "Window [1]",
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
const result = await resultPromise;
|
|
81
|
+
expect(result.isError).toBe(true);
|
|
82
|
+
expect(result.content).toContain("Action failed: Element not found");
|
|
83
|
+
expect(result.content).toContain("<ax-tree>");
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
test("includes screenshot as content block", async () => {
|
|
87
|
+
setup();
|
|
88
|
+
|
|
89
|
+
const resultPromise = proxy.request(
|
|
90
|
+
"computer_use_screenshot",
|
|
91
|
+
{},
|
|
92
|
+
"session-1",
|
|
93
|
+
1,
|
|
94
|
+
);
|
|
95
|
+
|
|
96
|
+
const sent = sentMessages[0] as Record<string, unknown>;
|
|
97
|
+
const requestId = sent.requestId as string;
|
|
98
|
+
|
|
99
|
+
proxy.resolve(requestId, {
|
|
100
|
+
axTree: "Button [1]",
|
|
101
|
+
screenshot: "base64data",
|
|
102
|
+
screenshotWidthPx: 1920,
|
|
103
|
+
screenshotHeightPx: 1080,
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
const result = await resultPromise;
|
|
107
|
+
expect(result.contentBlocks).toBeDefined();
|
|
108
|
+
expect(result.contentBlocks).toHaveLength(1);
|
|
109
|
+
expect(result.contentBlocks![0]).toEqual({
|
|
110
|
+
type: "image",
|
|
111
|
+
source: {
|
|
112
|
+
type: "base64",
|
|
113
|
+
media_type: "image/jpeg",
|
|
114
|
+
data: "base64data",
|
|
115
|
+
},
|
|
116
|
+
});
|
|
117
|
+
expect(result.content).toContain("1920x1080 px");
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
test("resolves with unknown requestId is silently ignored", () => {
|
|
121
|
+
setup();
|
|
122
|
+
// Should not throw
|
|
123
|
+
proxy.resolve("unknown-id", { axTree: "something" });
|
|
124
|
+
});
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
// -------------------------------------------------------------------------
|
|
128
|
+
// Timeout
|
|
129
|
+
// -------------------------------------------------------------------------
|
|
130
|
+
|
|
131
|
+
describe("timeout", () => {
|
|
132
|
+
test("resolves with timeout error when timer fires", async () => {
|
|
133
|
+
setup();
|
|
134
|
+
|
|
135
|
+
// We can't easily test the 60s timeout in a unit test, but we can
|
|
136
|
+
// verify the pending state and manual resolution.
|
|
137
|
+
const resultPromise = proxy.request(
|
|
138
|
+
"computer_use_click",
|
|
139
|
+
{ element_id: 1 },
|
|
140
|
+
"session-1",
|
|
141
|
+
1,
|
|
142
|
+
);
|
|
143
|
+
|
|
144
|
+
const sent = sentMessages[0] as Record<string, unknown>;
|
|
145
|
+
const requestId = sent.requestId as string;
|
|
146
|
+
expect(proxy.hasPendingRequest(requestId)).toBe(true);
|
|
147
|
+
|
|
148
|
+
// Resolve to avoid test hanging
|
|
149
|
+
proxy.resolve(requestId, { axTree: "resolved" });
|
|
150
|
+
await resultPromise;
|
|
151
|
+
});
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
// -------------------------------------------------------------------------
|
|
155
|
+
// Abort signal
|
|
156
|
+
// -------------------------------------------------------------------------
|
|
157
|
+
|
|
158
|
+
describe("abort signal", () => {
|
|
159
|
+
test("resolves with abort result when signal fires", async () => {
|
|
160
|
+
setup();
|
|
161
|
+
|
|
162
|
+
const controller = new AbortController();
|
|
163
|
+
const resultPromise = proxy.request(
|
|
164
|
+
"computer_use_click",
|
|
165
|
+
{ element_id: 1 },
|
|
166
|
+
"session-1",
|
|
167
|
+
1,
|
|
168
|
+
undefined,
|
|
169
|
+
controller.signal,
|
|
170
|
+
);
|
|
171
|
+
|
|
172
|
+
const sent = sentMessages[0] as Record<string, unknown>;
|
|
173
|
+
const requestId = sent.requestId as string;
|
|
174
|
+
expect(proxy.hasPendingRequest(requestId)).toBe(true);
|
|
175
|
+
|
|
176
|
+
controller.abort();
|
|
177
|
+
|
|
178
|
+
const result = await resultPromise;
|
|
179
|
+
expect(result.content).toContain("Aborted");
|
|
180
|
+
expect(result.isError).toBe(true);
|
|
181
|
+
expect(proxy.hasPendingRequest(requestId)).toBe(false);
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
test("returns immediately if signal already aborted", async () => {
|
|
185
|
+
setup();
|
|
186
|
+
|
|
187
|
+
const controller = new AbortController();
|
|
188
|
+
controller.abort();
|
|
189
|
+
|
|
190
|
+
const result = await proxy.request(
|
|
191
|
+
"computer_use_click",
|
|
192
|
+
{ element_id: 1 },
|
|
193
|
+
"session-1",
|
|
194
|
+
1,
|
|
195
|
+
undefined,
|
|
196
|
+
controller.signal,
|
|
197
|
+
);
|
|
198
|
+
|
|
199
|
+
expect(result.content).toContain("Aborted");
|
|
200
|
+
expect(result.isError).toBe(true);
|
|
201
|
+
expect(sentMessages).toHaveLength(0); // No message sent
|
|
202
|
+
});
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
// -------------------------------------------------------------------------
|
|
206
|
+
// Step limit enforcement
|
|
207
|
+
// -------------------------------------------------------------------------
|
|
208
|
+
|
|
209
|
+
describe("step limit enforcement", () => {
|
|
210
|
+
test("returns error when step count exceeds max", async () => {
|
|
211
|
+
setup(3); // maxSteps = 3
|
|
212
|
+
|
|
213
|
+
// Record 4 actions to exceed the limit
|
|
214
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
215
|
+
proxy.recordAction("computer_use_click", { element_id: 2 });
|
|
216
|
+
proxy.recordAction("computer_use_click", { element_id: 3 });
|
|
217
|
+
proxy.recordAction("computer_use_click", { element_id: 4 });
|
|
218
|
+
|
|
219
|
+
expect(proxy.stepCount).toBe(4);
|
|
220
|
+
|
|
221
|
+
// Now request should be rejected without sending to client
|
|
222
|
+
const result = await proxy.request(
|
|
223
|
+
"computer_use_click",
|
|
224
|
+
{ element_id: 5 },
|
|
225
|
+
"session-1",
|
|
226
|
+
5,
|
|
227
|
+
);
|
|
228
|
+
|
|
229
|
+
expect(result.isError).toBe(true);
|
|
230
|
+
expect(result.content).toContain("Step limit (3) exceeded");
|
|
231
|
+
expect(result.content).toContain("computer_use_done");
|
|
232
|
+
expect(sentMessages).toHaveLength(0); // No message sent to client
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
test("allows requests within step limit", async () => {
|
|
236
|
+
setup(5); // maxSteps = 5
|
|
237
|
+
|
|
238
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
239
|
+
expect(proxy.stepCount).toBe(1);
|
|
240
|
+
|
|
241
|
+
const resultPromise = proxy.request(
|
|
242
|
+
"computer_use_click",
|
|
243
|
+
{ element_id: 2 },
|
|
244
|
+
"session-1",
|
|
245
|
+
2,
|
|
246
|
+
);
|
|
247
|
+
|
|
248
|
+
expect(sentMessages).toHaveLength(1); // Message was sent
|
|
249
|
+
|
|
250
|
+
const sent = sentMessages[0] as Record<string, unknown>;
|
|
251
|
+
proxy.resolve(sent.requestId as string, { axTree: "screen" });
|
|
252
|
+
|
|
253
|
+
const result = await resultPromise;
|
|
254
|
+
expect(result.isError).toBe(false);
|
|
255
|
+
});
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
// -------------------------------------------------------------------------
|
|
259
|
+
// Loop detection
|
|
260
|
+
// -------------------------------------------------------------------------
|
|
261
|
+
|
|
262
|
+
describe("loop detection", () => {
|
|
263
|
+
test("injects warning when same action repeated 3 times", () => {
|
|
264
|
+
setup();
|
|
265
|
+
|
|
266
|
+
// Record 3 identical actions
|
|
267
|
+
proxy.recordAction("computer_use_click", { element_id: 42 });
|
|
268
|
+
proxy.recordAction("computer_use_click", { element_id: 42 });
|
|
269
|
+
proxy.recordAction("computer_use_click", { element_id: 42 });
|
|
270
|
+
|
|
271
|
+
const result = proxy.formatObservation({
|
|
272
|
+
axTree: "Button [1]",
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
expect(result.content).toContain(
|
|
276
|
+
"WARNING: You've repeated the same action (computer_use_click) 3 times",
|
|
277
|
+
);
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
test("does not warn when actions differ", () => {
|
|
281
|
+
setup();
|
|
282
|
+
|
|
283
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
284
|
+
proxy.recordAction("computer_use_click", { element_id: 2 });
|
|
285
|
+
proxy.recordAction("computer_use_click", { element_id: 3 });
|
|
286
|
+
|
|
287
|
+
const result = proxy.formatObservation({
|
|
288
|
+
axTree: "Button [1]",
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
expect(result.content).not.toContain("WARNING: You've repeated");
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
test("does not warn with fewer than 3 actions", () => {
|
|
295
|
+
setup();
|
|
296
|
+
|
|
297
|
+
proxy.recordAction("computer_use_click", { element_id: 42 });
|
|
298
|
+
proxy.recordAction("computer_use_click", { element_id: 42 });
|
|
299
|
+
|
|
300
|
+
const result = proxy.formatObservation({
|
|
301
|
+
axTree: "Button [1]",
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
expect(result.content).not.toContain("WARNING: You've repeated");
|
|
305
|
+
});
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
// -------------------------------------------------------------------------
|
|
309
|
+
// Consecutive unchanged steps warning
|
|
310
|
+
// -------------------------------------------------------------------------
|
|
311
|
+
|
|
312
|
+
describe("consecutive unchanged steps", () => {
|
|
313
|
+
test("warns after 2 consecutive unchanged observations", async () => {
|
|
314
|
+
setup();
|
|
315
|
+
|
|
316
|
+
// Simulate first request/resolve to establish previous AX tree
|
|
317
|
+
const p1 = proxy.request(
|
|
318
|
+
"computer_use_click",
|
|
319
|
+
{ element_id: 1 },
|
|
320
|
+
"session-1",
|
|
321
|
+
1,
|
|
322
|
+
);
|
|
323
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
324
|
+
const sent1 = sentMessages[0] as Record<string, unknown>;
|
|
325
|
+
proxy.resolve(sent1.requestId as string, {
|
|
326
|
+
axTree: "Button [1]",
|
|
327
|
+
});
|
|
328
|
+
await p1;
|
|
329
|
+
|
|
330
|
+
// Second request — same AX tree, no diff (unchanged step 1)
|
|
331
|
+
const p2 = proxy.request(
|
|
332
|
+
"computer_use_click",
|
|
333
|
+
{ element_id: 1 },
|
|
334
|
+
"session-1",
|
|
335
|
+
2,
|
|
336
|
+
);
|
|
337
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
338
|
+
const sent2 = sentMessages[1] as Record<string, unknown>;
|
|
339
|
+
proxy.resolve(sent2.requestId as string, {
|
|
340
|
+
axTree: "Button [1]",
|
|
341
|
+
// No axDiff — screen unchanged
|
|
342
|
+
});
|
|
343
|
+
const result2 = await p2;
|
|
344
|
+
// First unchanged: simple warning
|
|
345
|
+
expect(result2.content).toContain("NO VISIBLE EFFECT");
|
|
346
|
+
expect(result2.content).not.toContain("2 consecutive");
|
|
347
|
+
|
|
348
|
+
// Third request — still same AX tree, no diff (unchanged step 2)
|
|
349
|
+
const p3 = proxy.request(
|
|
350
|
+
"computer_use_click",
|
|
351
|
+
{ element_id: 1 },
|
|
352
|
+
"session-1",
|
|
353
|
+
3,
|
|
354
|
+
);
|
|
355
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
356
|
+
const sent3 = sentMessages[2] as Record<string, unknown>;
|
|
357
|
+
proxy.resolve(sent3.requestId as string, {
|
|
358
|
+
axTree: "Button [1]",
|
|
359
|
+
});
|
|
360
|
+
const result3 = await p3;
|
|
361
|
+
// Should now have the consecutive warning
|
|
362
|
+
expect(result3.content).toContain(
|
|
363
|
+
"2 consecutive actions had NO VISIBLE EFFECT",
|
|
364
|
+
);
|
|
365
|
+
});
|
|
366
|
+
|
|
367
|
+
test("resets consecutive count when diff is present", async () => {
|
|
368
|
+
setup();
|
|
369
|
+
|
|
370
|
+
// Establish previous AX tree
|
|
371
|
+
const p1 = proxy.request(
|
|
372
|
+
"computer_use_click",
|
|
373
|
+
{ element_id: 1 },
|
|
374
|
+
"session-1",
|
|
375
|
+
1,
|
|
376
|
+
);
|
|
377
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
378
|
+
const sent1 = sentMessages[0] as Record<string, unknown>;
|
|
379
|
+
proxy.resolve(sent1.requestId as string, {
|
|
380
|
+
axTree: "Button [1]",
|
|
381
|
+
});
|
|
382
|
+
await p1;
|
|
383
|
+
|
|
384
|
+
// Second request with no diff (unchanged)
|
|
385
|
+
const p2 = proxy.request(
|
|
386
|
+
"computer_use_click",
|
|
387
|
+
{ element_id: 1 },
|
|
388
|
+
"session-1",
|
|
389
|
+
2,
|
|
390
|
+
);
|
|
391
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
392
|
+
const sent2 = sentMessages[1] as Record<string, unknown>;
|
|
393
|
+
proxy.resolve(sent2.requestId as string, {
|
|
394
|
+
axTree: "Button [1]",
|
|
395
|
+
});
|
|
396
|
+
await p2;
|
|
397
|
+
expect(proxy.consecutiveUnchangedSteps).toBe(1);
|
|
398
|
+
|
|
399
|
+
// Third request WITH diff (changed) — should reset
|
|
400
|
+
const p3 = proxy.request(
|
|
401
|
+
"computer_use_click",
|
|
402
|
+
{ element_id: 2 },
|
|
403
|
+
"session-1",
|
|
404
|
+
3,
|
|
405
|
+
);
|
|
406
|
+
proxy.recordAction("computer_use_click", { element_id: 2 });
|
|
407
|
+
const sent3 = sentMessages[2] as Record<string, unknown>;
|
|
408
|
+
proxy.resolve(sent3.requestId as string, {
|
|
409
|
+
axTree: "TextField [1]",
|
|
410
|
+
axDiff: "+ TextField [1]\n- Button [1]",
|
|
411
|
+
});
|
|
412
|
+
await p3;
|
|
413
|
+
expect(proxy.consecutiveUnchangedSteps).toBe(0);
|
|
414
|
+
});
|
|
415
|
+
});
|
|
416
|
+
|
|
417
|
+
// -------------------------------------------------------------------------
|
|
418
|
+
// Observation formatting
|
|
419
|
+
// -------------------------------------------------------------------------
|
|
420
|
+
|
|
421
|
+
describe("observation formatting", () => {
|
|
422
|
+
test("formats AX tree with markers", () => {
|
|
423
|
+
setup();
|
|
424
|
+
|
|
425
|
+
const result = proxy.formatObservation({
|
|
426
|
+
axTree: "Button [1]\nLabel [2]",
|
|
427
|
+
});
|
|
428
|
+
|
|
429
|
+
expect(result.content).toContain("<ax-tree>");
|
|
430
|
+
expect(result.content).toContain("CURRENT SCREEN STATE:");
|
|
431
|
+
expect(result.content).toContain("Button [1]");
|
|
432
|
+
expect(result.content).toContain("</ax-tree>");
|
|
433
|
+
expect(result.isError).toBe(false);
|
|
434
|
+
});
|
|
435
|
+
|
|
436
|
+
test("formats user guidance prominently", () => {
|
|
437
|
+
setup();
|
|
438
|
+
|
|
439
|
+
const result = proxy.formatObservation({
|
|
440
|
+
axTree: "Button [1]",
|
|
441
|
+
userGuidance: "Click the save button",
|
|
442
|
+
});
|
|
443
|
+
|
|
444
|
+
expect(result.content).toContain("USER GUIDANCE: Click the save button");
|
|
445
|
+
// User guidance should appear before AX tree
|
|
446
|
+
const guidanceIdx = result.content.indexOf("USER GUIDANCE");
|
|
447
|
+
const axTreeIdx = result.content.indexOf("<ax-tree>");
|
|
448
|
+
expect(guidanceIdx).toBeLessThan(axTreeIdx);
|
|
449
|
+
});
|
|
450
|
+
|
|
451
|
+
test("formats execution result", () => {
|
|
452
|
+
setup();
|
|
453
|
+
|
|
454
|
+
const result = proxy.formatObservation({
|
|
455
|
+
executionResult: "Element clicked successfully",
|
|
456
|
+
axTree: "Button [1]",
|
|
457
|
+
});
|
|
458
|
+
|
|
459
|
+
expect(result.content).toContain("Element clicked successfully");
|
|
460
|
+
});
|
|
461
|
+
|
|
462
|
+
test("formats execution error", () => {
|
|
463
|
+
setup();
|
|
464
|
+
|
|
465
|
+
const result = proxy.formatObservation({
|
|
466
|
+
executionError: "Element not found",
|
|
467
|
+
axTree: "Window [1]",
|
|
468
|
+
});
|
|
469
|
+
|
|
470
|
+
expect(result.isError).toBe(true);
|
|
471
|
+
expect(result.content).toContain("Action failed: Element not found");
|
|
472
|
+
});
|
|
473
|
+
|
|
474
|
+
test("returns 'Action executed' when observation is empty", () => {
|
|
475
|
+
setup();
|
|
476
|
+
|
|
477
|
+
const result = proxy.formatObservation({});
|
|
478
|
+
|
|
479
|
+
expect(result.content).toBe("Action executed");
|
|
480
|
+
expect(result.isError).toBe(false);
|
|
481
|
+
});
|
|
482
|
+
|
|
483
|
+
test("includes screenshot metadata", () => {
|
|
484
|
+
setup();
|
|
485
|
+
|
|
486
|
+
const result = proxy.formatObservation({
|
|
487
|
+
screenshot: "base64data",
|
|
488
|
+
screenshotWidthPx: 2560,
|
|
489
|
+
screenshotHeightPx: 1440,
|
|
490
|
+
screenWidthPt: 1280,
|
|
491
|
+
screenHeightPt: 720,
|
|
492
|
+
});
|
|
493
|
+
|
|
494
|
+
expect(result.content).toContain("2560x1440 px");
|
|
495
|
+
expect(result.content).toContain("1280x720 pt");
|
|
496
|
+
});
|
|
497
|
+
|
|
498
|
+
test("escapes </ax-tree> in AX tree content", () => {
|
|
499
|
+
setup();
|
|
500
|
+
|
|
501
|
+
const result = proxy.formatObservation({
|
|
502
|
+
axTree: "Some content with </ax-tree> inside",
|
|
503
|
+
});
|
|
504
|
+
|
|
505
|
+
expect(result.content).toContain("</ax-tree>");
|
|
506
|
+
// Should still have the real closing marker
|
|
507
|
+
expect(result.content).toMatch(/<\/ax-tree>$/m);
|
|
508
|
+
});
|
|
509
|
+
|
|
510
|
+
test("includes diff when present", () => {
|
|
511
|
+
setup();
|
|
512
|
+
|
|
513
|
+
const result = proxy.formatObservation({
|
|
514
|
+
axTree: "TextField [1]",
|
|
515
|
+
axDiff: "+ TextField [1]\n- Button [1]",
|
|
516
|
+
});
|
|
517
|
+
|
|
518
|
+
expect(result.content).toContain("+ TextField [1]");
|
|
519
|
+
expect(result.content).toContain("- Button [1]");
|
|
520
|
+
});
|
|
521
|
+
|
|
522
|
+
test("no screenshot content blocks when screenshot absent", () => {
|
|
523
|
+
setup();
|
|
524
|
+
|
|
525
|
+
const result = proxy.formatObservation({
|
|
526
|
+
axTree: "Button [1]",
|
|
527
|
+
});
|
|
528
|
+
|
|
529
|
+
expect(result.contentBlocks).toBeUndefined();
|
|
530
|
+
});
|
|
531
|
+
});
|
|
532
|
+
|
|
533
|
+
// -------------------------------------------------------------------------
|
|
534
|
+
// CU state: reset
|
|
535
|
+
// -------------------------------------------------------------------------
|
|
536
|
+
|
|
537
|
+
describe("reset", () => {
|
|
538
|
+
test("clears all CU state", () => {
|
|
539
|
+
setup();
|
|
540
|
+
|
|
541
|
+
proxy.recordAction("computer_use_click", { element_id: 1 });
|
|
542
|
+
proxy.recordAction("computer_use_click", { element_id: 2 });
|
|
543
|
+
expect(proxy.stepCount).toBe(2);
|
|
544
|
+
expect(proxy.actionHistory).toHaveLength(2);
|
|
545
|
+
|
|
546
|
+
proxy.reset();
|
|
547
|
+
|
|
548
|
+
expect(proxy.stepCount).toBe(0);
|
|
549
|
+
expect(proxy.actionHistory).toHaveLength(0);
|
|
550
|
+
expect(proxy.previousAXTree).toBeUndefined();
|
|
551
|
+
expect(proxy.consecutiveUnchangedSteps).toBe(0);
|
|
552
|
+
});
|
|
553
|
+
});
|
|
554
|
+
|
|
555
|
+
// -------------------------------------------------------------------------
|
|
556
|
+
// CU state: action history bounding
|
|
557
|
+
// -------------------------------------------------------------------------
|
|
558
|
+
|
|
559
|
+
describe("action history bounding", () => {
|
|
560
|
+
test("keeps only last 10 entries", () => {
|
|
561
|
+
setup();
|
|
562
|
+
|
|
563
|
+
for (let i = 0; i < 15; i++) {
|
|
564
|
+
proxy.recordAction("computer_use_click", { element_id: i });
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
expect(proxy.actionHistory).toHaveLength(10);
|
|
568
|
+
// First entry should be step 6 (entries 1-5 trimmed)
|
|
569
|
+
expect(proxy.actionHistory[0].step).toBe(6);
|
|
570
|
+
expect(proxy.stepCount).toBe(15);
|
|
571
|
+
});
|
|
572
|
+
});
|
|
573
|
+
|
|
574
|
+
// -------------------------------------------------------------------------
|
|
575
|
+
// Dispose
|
|
576
|
+
// -------------------------------------------------------------------------
|
|
577
|
+
|
|
578
|
+
describe("dispose", () => {
|
|
579
|
+
test("rejects all pending requests", () => {
|
|
580
|
+
setup();
|
|
581
|
+
|
|
582
|
+
const resultPromise = proxy.request(
|
|
583
|
+
"computer_use_click",
|
|
584
|
+
{ element_id: 1 },
|
|
585
|
+
"session-1",
|
|
586
|
+
1,
|
|
587
|
+
);
|
|
588
|
+
|
|
589
|
+
const sent = sentMessages[0] as Record<string, unknown>;
|
|
590
|
+
const requestId = sent.requestId as string;
|
|
591
|
+
expect(proxy.hasPendingRequest(requestId)).toBe(true);
|
|
592
|
+
|
|
593
|
+
proxy.dispose();
|
|
594
|
+
|
|
595
|
+
expect(proxy.hasPendingRequest(requestId)).toBe(false);
|
|
596
|
+
expect(resultPromise).rejects.toThrow("Host CU proxy disposed");
|
|
597
|
+
});
|
|
598
|
+
});
|
|
599
|
+
|
|
600
|
+
// -------------------------------------------------------------------------
|
|
601
|
+
// updateSender
|
|
602
|
+
// -------------------------------------------------------------------------
|
|
603
|
+
|
|
604
|
+
describe("updateSender", () => {
|
|
605
|
+
test("uses updated sender for new requests", async () => {
|
|
606
|
+
setup();
|
|
607
|
+
|
|
608
|
+
const newMessages: unknown[] = [];
|
|
609
|
+
proxy.updateSender((msg) => newMessages.push(msg), true);
|
|
610
|
+
|
|
611
|
+
const resultPromise = proxy.request(
|
|
612
|
+
"computer_use_click",
|
|
613
|
+
{ element_id: 1 },
|
|
614
|
+
"session-1",
|
|
615
|
+
1,
|
|
616
|
+
);
|
|
617
|
+
|
|
618
|
+
expect(sentMessages).toHaveLength(0); // Old sender not used
|
|
619
|
+
expect(newMessages).toHaveLength(1); // New sender used
|
|
620
|
+
|
|
621
|
+
const sent = newMessages[0] as Record<string, unknown>;
|
|
622
|
+
proxy.resolve(sent.requestId as string, {
|
|
623
|
+
axTree: "Button [1]",
|
|
624
|
+
});
|
|
625
|
+
|
|
626
|
+
await resultPromise;
|
|
627
|
+
});
|
|
628
|
+
});
|
|
629
|
+
});
|