@vellumai/assistant 0.4.48 → 0.4.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +2 -2
- package/README.md +2 -23
- package/docs/architecture/integrations.md +45 -41
- package/docs/architecture/keychain-broker.md +3 -3
- package/docs/runbook-trusted-contacts.md +3 -8
- package/hook-templates/debug-prompt-logger/hook.json +1 -1
- package/hook-templates/debug-prompt-logger/run.sh +1 -3
- package/package.json +1 -1
- package/src/__tests__/actor-token-service.test.ts +0 -1
- package/src/__tests__/anthropic-provider.test.ts +156 -0
- package/src/__tests__/approval-cascade.test.ts +810 -0
- package/src/__tests__/approval-primitive.test.ts +0 -1
- package/src/__tests__/approval-routes-http.test.ts +2 -0
- package/src/__tests__/assistant-attachments.test.ts +12 -34
- package/src/__tests__/assistant-feature-flag-guardrails.test.ts +76 -0
- package/src/__tests__/assistant-feature-flags-integration.test.ts +0 -1
- package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +2 -2
- package/src/__tests__/channel-guardian.test.ts +0 -2
- package/src/__tests__/channel-readiness-routes.test.ts +15 -6
- package/src/__tests__/channel-readiness-service.test.ts +10 -9
- package/src/__tests__/checker.test.ts +9 -29
- package/src/__tests__/computer-use-skill-manifest-regression.test.ts +1 -1
- package/src/__tests__/computer-use-tools.test.ts +2 -19
- package/src/__tests__/config-watcher.test.ts +0 -1
- package/src/__tests__/confirmation-request-guardian-bridge.test.ts +0 -1
- package/src/__tests__/context-image-dimensions.test.ts +332 -0
- package/src/__tests__/context-token-estimator.test.ts +196 -13
- package/src/__tests__/conversation-attention-store.test.ts +0 -1
- package/src/__tests__/conversation-attention-telegram.test.ts +0 -1
- package/src/__tests__/conversation-routes-guardian-reply.test.ts +144 -0
- package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
- package/src/__tests__/credential-metadata-store.test.ts +64 -73
- package/src/__tests__/credential-security-invariants.test.ts +13 -7
- package/src/__tests__/credential-vault-unit.test.ts +280 -49
- package/src/__tests__/credential-vault.test.ts +138 -16
- package/src/__tests__/credentials-cli.test.ts +71 -0
- package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +0 -1
- package/src/__tests__/ephemeral-permissions.test.ts +3 -3
- package/src/__tests__/gateway-only-guard.test.ts +0 -1
- package/src/__tests__/guardian-action-grant-mint-consume.test.ts +0 -1
- package/src/__tests__/guardian-decision-primitive-canonical.test.ts +0 -1
- package/src/__tests__/guardian-routing-invariants.test.ts +0 -1
- package/src/__tests__/guardian-verification-voice-binding.test.ts +0 -1
- package/src/__tests__/handlers-user-message-approval-consumption.test.ts +0 -39
- package/src/__tests__/heartbeat-service.test.ts +0 -1
- package/src/__tests__/host-cu-proxy.test.ts +629 -0
- package/src/__tests__/host-shell-tool.test.ts +27 -15
- package/src/__tests__/http-user-message-parity.test.ts +1 -0
- package/src/__tests__/ingress-url-consistency.test.ts +14 -21
- package/src/__tests__/integration-status.test.ts +32 -51
- package/src/__tests__/intent-routing.test.ts +0 -1
- package/src/__tests__/invite-routes-http.test.ts +10 -9
- package/src/__tests__/keychain-broker-client.test.ts +11 -43
- package/src/__tests__/notification-routing-intent.test.ts +0 -1
- package/src/__tests__/oauth-cli.test.ts +373 -14
- package/src/__tests__/oauth-provider-profiles.test.ts +9 -9
- package/src/__tests__/oauth-scope-policy.test.ts +4 -6
- package/src/__tests__/oauth-store.test.ts +756 -0
- package/src/__tests__/onboarding-starter-tasks.test.ts +0 -1
- package/src/__tests__/provider-error-scenarios.test.ts +0 -1
- package/src/__tests__/provider-streaming.benchmark.test.ts +0 -1
- package/src/__tests__/public-ingress-urls.test.ts +15 -21
- package/src/__tests__/recording-handler.test.ts +3 -4
- package/src/__tests__/registry.test.ts +2 -2
- package/src/__tests__/runtime-events-sse.test.ts +55 -7
- package/src/__tests__/schedule-store.test.ts +0 -1
- package/src/__tests__/scheduler-recurrence.test.ts +0 -1
- package/src/__tests__/scoped-approval-grants.test.ts +0 -1
- package/src/__tests__/scoped-grant-security-matrix.test.ts +0 -1
- package/src/__tests__/secret-ingress-handler.test.ts +0 -1
- package/src/__tests__/send-endpoint-busy.test.ts +21 -6
- package/src/__tests__/sequence-store.test.ts +0 -1
- package/src/__tests__/session-init.benchmark.test.ts +4 -5
- package/src/__tests__/skill-include-graph.test.ts +66 -0
- package/src/__tests__/skill-load-feature-flag.test.ts +0 -1
- package/src/__tests__/skill-load-tool.test.ts +149 -1
- package/src/__tests__/skill-projection-feature-flag.test.ts +0 -1
- package/src/__tests__/skills-uninstall.test.ts +1 -1
- package/src/__tests__/skills.test.ts +3 -3
- package/src/__tests__/slack-channel-config.test.ts +67 -3
- package/src/__tests__/slack-share-routes.test.ts +17 -19
- package/src/__tests__/system-prompt.test.ts +0 -1
- package/src/__tests__/telegram-invite-adapter.test.ts +18 -22
- package/src/__tests__/terminal-tools.test.ts +4 -3
- package/src/__tests__/test-support/computer-use-skill-harness.ts +3 -2
- package/src/__tests__/tool-approval-handler.test.ts +0 -1
- package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -1
- package/src/__tests__/tool-executor-lifecycle-events.test.ts +0 -1
- package/src/__tests__/tool-executor-shell-integration.test.ts +0 -1
- package/src/__tests__/tool-executor.test.ts +0 -1
- package/src/__tests__/tool-grant-request-escalation.test.ts +0 -1
- package/src/__tests__/trust-store-pattern-matches.test.ts +29 -0
- package/src/__tests__/trust-store.test.ts +1 -22
- package/src/__tests__/trusted-contact-approval-notifier.test.ts +0 -1
- package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +0 -1
- package/src/__tests__/twilio-routes.test.ts +0 -16
- package/src/__tests__/verification-control-plane-policy.test.ts +0 -1
- package/src/__tests__/voice-scoped-grant-consumer.test.ts +0 -1
- package/src/agent/ax-tree-compaction.test.ts +235 -0
- package/src/agent/loop.ts +76 -130
- package/src/calls/call-domain.ts +1 -6
- package/src/calls/relay-server.ts +9 -13
- package/src/calls/twilio-config.ts +2 -7
- package/src/calls/twilio-routes.ts +1 -2
- package/src/calls/voice-ingress-preflight.ts +1 -1
- package/src/cli/commands/browser-relay.ts +18 -12
- package/src/cli/commands/completions.ts +0 -3
- package/src/cli/commands/credentials.ts +101 -15
- package/src/cli/commands/oauth/apps.ts +255 -0
- package/src/cli/commands/oauth/connections.ts +299 -0
- package/src/cli/commands/oauth/index.ts +52 -0
- package/src/cli/commands/oauth/providers.ts +242 -0
- package/src/cli/commands/skills.ts +4 -338
- package/src/cli/program.ts +1 -5
- package/src/cli/reference.ts +1 -3
- package/src/config/assistant-feature-flags.ts +0 -3
- package/src/config/bundled-skills/_shared/CLI_RETRIEVAL_PATTERN.md +1 -1
- package/src/config/bundled-skills/computer-use/SKILL.md +3 -6
- package/src/config/bundled-skills/computer-use/TOOLS.json +22 -4
- package/src/config/bundled-skills/google-calendar/calendar-client.ts +21 -16
- package/src/config/bundled-skills/messaging/tools/shared.ts +1 -4
- package/src/config/bundled-skills/settings/SKILL.md +1 -1
- package/src/config/bundled-skills/settings/TOOLS.json +2 -8
- package/src/config/bundled-skills/settings/tools/voice-config-update.ts +5 -33
- package/src/config/env-registry.ts +14 -83
- package/src/config/env.ts +11 -50
- package/src/config/feature-flag-registry.json +16 -16
- package/src/config/loader.ts +0 -6
- package/src/config/schema.ts +3 -1
- package/src/config/skills.ts +21 -2
- package/src/context/image-dimensions.ts +229 -0
- package/src/context/token-estimator.ts +75 -12
- package/src/context/window-manager.ts +49 -10
- package/src/daemon/assistant-attachments.ts +1 -13
- package/src/daemon/handlers/config-ingress.ts +8 -33
- package/src/daemon/handlers/config-slack-channel.ts +49 -46
- package/src/daemon/handlers/config-telegram.ts +32 -16
- package/src/daemon/handlers/sessions.ts +10 -24
- package/src/daemon/handlers/shared.ts +0 -130
- package/src/daemon/host-cu-proxy.ts +401 -0
- package/src/daemon/lifecycle.ts +36 -68
- package/src/daemon/message-protocol.ts +3 -0
- package/src/daemon/message-types/computer-use.ts +2 -119
- package/src/daemon/message-types/host-cu.ts +19 -0
- package/src/daemon/message-types/messages.ts +3 -0
- package/src/daemon/server.ts +14 -21
- package/src/daemon/session-agent-loop-handlers.ts +2 -0
- package/src/daemon/session-attachments.ts +1 -2
- package/src/daemon/session-slash.ts +1 -1
- package/src/daemon/session-surfaces.ts +40 -28
- package/src/daemon/session-tool-setup.ts +2 -9
- package/src/daemon/session.ts +138 -15
- package/src/daemon/tool-side-effects.ts +2 -8
- package/src/daemon/watch-handler.ts +2 -2
- package/src/events/tool-metrics-listener.ts +2 -2
- package/src/hooks/manager.ts +1 -4
- package/src/inbound/public-ingress-urls.ts +7 -7
- package/src/logfire.ts +16 -5
- package/src/memory/conversation-key-store.ts +21 -0
- package/src/memory/db-init.ts +4 -0
- package/src/memory/migrations/149-oauth-tables.ts +60 -0
- package/src/memory/migrations/index.ts +1 -0
- package/src/memory/schema/index.ts +1 -0
- package/src/memory/schema/oauth.ts +65 -0
- package/src/messaging/provider.ts +4 -4
- package/src/messaging/providers/gmail/client.ts +82 -2
- package/src/messaging/providers/gmail/people-client.ts +10 -10
- package/src/messaging/providers/telegram-bot/adapter.ts +17 -17
- package/src/messaging/providers/whatsapp/adapter.ts +11 -8
- package/src/messaging/registry.ts +2 -32
- package/src/notifications/copy-composer.ts +0 -5
- package/src/notifications/signal.ts +4 -5
- package/src/oauth/byo-connection.test.ts +126 -25
- package/src/oauth/byo-connection.ts +22 -6
- package/src/oauth/connect-orchestrator.ts +113 -57
- package/src/oauth/connect-types.ts +17 -23
- package/src/oauth/connection-resolver.ts +35 -11
- package/src/oauth/connection.ts +1 -1
- package/src/oauth/manual-token-connection.ts +104 -0
- package/src/oauth/oauth-store.ts +496 -0
- package/src/oauth/platform-connection.test.ts +29 -0
- package/src/oauth/platform-connection.ts +6 -5
- package/src/oauth/provider-behaviors.ts +124 -0
- package/src/oauth/scope-policy.ts +9 -2
- package/src/oauth/seed-providers.ts +161 -0
- package/src/oauth/token-persistence.ts +74 -78
- package/src/permissions/checker.ts +3 -3
- package/src/permissions/defaults.ts +0 -1
- package/src/permissions/prompter.ts +10 -1
- package/src/permissions/trust-store.ts +13 -0
- package/src/prompts/__tests__/build-cli-reference-section.test.ts +3 -1
- package/src/prompts/system-prompt.ts +28 -40
- package/src/providers/anthropic/client.ts +133 -24
- package/src/providers/retry.ts +1 -27
- package/src/runtime/auth/route-policy.ts +0 -3
- package/src/runtime/channel-reply-delivery.ts +0 -40
- package/src/runtime/gateway-client.ts +0 -7
- package/src/runtime/http-server.ts +8 -6
- package/src/runtime/http-types.ts +2 -2
- package/src/runtime/middleware/twilio-validation.ts +1 -11
- package/src/runtime/pending-interactions.ts +14 -12
- package/src/runtime/routes/channel-delivery-routes.ts +0 -1
- package/src/runtime/routes/conversation-routes.ts +73 -19
- package/src/runtime/routes/events-routes.ts +21 -11
- package/src/runtime/routes/host-cu-routes.ts +97 -0
- package/src/runtime/routes/inbound-stages/background-dispatch.ts +12 -111
- package/src/runtime/routes/integrations/slack/share.ts +6 -7
- package/src/runtime/routes/log-export-routes.ts +126 -8
- package/src/runtime/routes/settings-routes.ts +55 -48
- package/src/runtime/routes/surface-action-routes.ts +1 -1
- package/src/runtime/routes/watch-routes.ts +128 -0
- package/src/schedule/integration-status.ts +10 -9
- package/src/security/credential-key.ts +0 -156
- package/src/security/keychain-broker-client.ts +5 -6
- package/src/security/oauth2.ts +1 -1
- package/src/security/token-manager.ts +119 -46
- package/src/skills/catalog-install.ts +358 -0
- package/src/skills/include-graph.ts +32 -0
- package/src/telegram/bot-username.ts +2 -3
- package/src/tools/browser/network-recorder.ts +1 -1
- package/src/tools/browser/network-recording-types.ts +1 -1
- package/src/tools/computer-use/definitions.ts +46 -11
- package/src/tools/computer-use/registry.ts +4 -5
- package/src/tools/credentials/broker.ts +1 -2
- package/src/tools/credentials/metadata-store.ts +17 -121
- package/src/tools/credentials/vault.ts +94 -167
- package/src/tools/registry.ts +2 -7
- package/src/tools/skills/load.ts +62 -3
- package/src/tools/watch/watch-state.ts +0 -12
- package/src/util/logger.ts +7 -41
- package/src/util/platform.ts +9 -28
- package/src/watcher/providers/google-calendar.ts +2 -1
- package/src/__tests__/computer-use-session-compaction.test.ts +0 -143
- package/src/__tests__/computer-use-session-lifecycle.test.ts +0 -322
- package/src/__tests__/computer-use-session-working-dir.test.ts +0 -166
- package/src/__tests__/computer-use-skill-baseline.test.ts +0 -78
- package/src/__tests__/computer-use-skill-endstate.test.ts +0 -105
- package/src/__tests__/computer-use-skill-lifecycle-cleanup.test.ts +0 -249
- package/src/__tests__/ride-shotgun-handler.test.ts +0 -452
- package/src/cli/commands/dev.ts +0 -129
- package/src/cli/commands/map.ts +0 -391
- package/src/cli/commands/oauth.ts +0 -77
- package/src/config/bundled-skills/computer-use/tools/computer-use-request-control.ts +0 -16
- package/src/daemon/computer-use-session.ts +0 -1026
- package/src/daemon/ride-shotgun-handler.ts +0 -569
- package/src/oauth/provider-base-urls.ts +0 -21
- package/src/oauth/provider-profiles.ts +0 -192
- package/src/prompts/computer-use-prompt.ts +0 -98
- package/src/runtime/routes/computer-use-routes.ts +0 -641
- package/src/runtime/telegram-streaming-delivery.test.ts +0 -729
- package/src/runtime/telegram-streaming-delivery.ts +0 -393
- package/src/tools/computer-use/request-computer-control.ts +0 -56
package/src/util/platform.ts
CHANGED
|
@@ -8,13 +8,7 @@ import {
|
|
|
8
8
|
import { homedir } from "node:os";
|
|
9
9
|
import { join } from "node:path";
|
|
10
10
|
|
|
11
|
-
import {
|
|
12
|
-
getBaseDataDir,
|
|
13
|
-
getDaemonIosPairing,
|
|
14
|
-
getDaemonTcpEnabled,
|
|
15
|
-
getDaemonTcpHost,
|
|
16
|
-
getDaemonTcpPort,
|
|
17
|
-
} from "../config/env-registry.js";
|
|
11
|
+
import { getBaseDataDir } from "../config/env-registry.js";
|
|
18
12
|
|
|
19
13
|
export function isMacOS(): boolean {
|
|
20
14
|
return process.platform === "darwin";
|
|
@@ -245,39 +239,30 @@ export function getInterfacesDir(): string {
|
|
|
245
239
|
|
|
246
240
|
/**
|
|
247
241
|
* Returns the TCP port the daemon should listen on for iOS clients.
|
|
248
|
-
*
|
|
242
|
+
* Hardcoded default: 8765.
|
|
249
243
|
*/
|
|
250
244
|
export function getTCPPort(): number {
|
|
251
|
-
return
|
|
245
|
+
return 8765;
|
|
252
246
|
}
|
|
253
247
|
|
|
254
248
|
/**
|
|
255
249
|
* Returns whether the daemon TCP listener should be enabled.
|
|
256
|
-
*
|
|
257
|
-
*
|
|
258
|
-
* 2. Presence of the flag file ~/.vellum/tcp-enabled (exists → on)
|
|
259
|
-
* 3. Default: false
|
|
250
|
+
* Checks for the presence of the flag file ~/.vellum/tcp-enabled.
|
|
251
|
+
* Default: false.
|
|
260
252
|
*
|
|
261
253
|
* The flag-file check makes it easy to enable TCP in dev without restarting
|
|
262
254
|
* the shell: `touch ~/.vellum/tcp-enabled && kill -USR1 <daemon-pid>`.
|
|
263
|
-
* The macOS CLI (AssistantCli) also sets the env var for bundled-binary deployments.
|
|
264
255
|
*/
|
|
265
256
|
export function isTCPEnabled(): boolean {
|
|
266
|
-
const envValue = getDaemonTcpEnabled();
|
|
267
|
-
if (envValue !== undefined) return envValue;
|
|
268
257
|
return existsSync(join(getRootDir(), "tcp-enabled"));
|
|
269
258
|
}
|
|
270
259
|
|
|
271
260
|
/**
|
|
272
261
|
* Returns the hostname/address for the TCP listener.
|
|
273
|
-
*
|
|
274
|
-
*
|
|
275
|
-
* 2. If iOS pairing is enabled: '0.0.0.0' (LAN-accessible)
|
|
276
|
-
* 3. Default: '127.0.0.1' (localhost only)
|
|
262
|
+
* If iOS pairing is enabled (flag file): '0.0.0.0' (LAN-accessible).
|
|
263
|
+
* Default: '127.0.0.1' (localhost only).
|
|
277
264
|
*/
|
|
278
265
|
export function getTCPHost(): string {
|
|
279
|
-
const override = getDaemonTcpHost();
|
|
280
|
-
if (override) return override;
|
|
281
266
|
if (isIOSPairingEnabled()) return "0.0.0.0";
|
|
282
267
|
return "127.0.0.1";
|
|
283
268
|
}
|
|
@@ -288,17 +273,13 @@ export function getTCPHost(): string {
|
|
|
288
273
|
* instead of 127.0.0.1 (localhost only), making the daemon reachable
|
|
289
274
|
* from iOS devices on the same local network.
|
|
290
275
|
*
|
|
291
|
-
*
|
|
292
|
-
*
|
|
293
|
-
* 2. Presence of the flag file ~/.vellum/ios-pairing-enabled (exists → on)
|
|
294
|
-
* 3. Default: false
|
|
276
|
+
* Checks for the presence of the flag file ~/.vellum/ios-pairing-enabled.
|
|
277
|
+
* Default: false.
|
|
295
278
|
*
|
|
296
279
|
* This is separate from isTCPEnabled() — TCP can be enabled for localhost-only
|
|
297
280
|
* access without exposing the daemon to the LAN.
|
|
298
281
|
*/
|
|
299
282
|
export function isIOSPairingEnabled(): boolean {
|
|
300
|
-
const envValue = getDaemonIosPairing();
|
|
301
|
-
if (envValue !== undefined) return envValue;
|
|
302
283
|
return existsSync(join(getRootDir(), "ios-pairing-enabled"));
|
|
303
284
|
}
|
|
304
285
|
|
|
@@ -13,7 +13,8 @@ import {
|
|
|
13
13
|
import type { CalendarEvent } from "../../config/bundled-skills/google-calendar/types.js";
|
|
14
14
|
import type { OAuthConnection } from "../../oauth/connection.js";
|
|
15
15
|
import { resolveOAuthConnection } from "../../oauth/connection-resolver.js";
|
|
16
|
-
|
|
16
|
+
|
|
17
|
+
const GOOGLE_CALENDAR_BASE_URL = "https://www.googleapis.com/calendar/v3";
|
|
17
18
|
import { getLogger } from "../../util/logger.js";
|
|
18
19
|
import type {
|
|
19
20
|
FetchResult,
|
|
@@ -1,143 +0,0 @@
|
|
|
1
|
-
import { describe, expect, test } from "bun:test";
|
|
2
|
-
|
|
3
|
-
import { ComputerUseSession } from "../daemon/computer-use-session.js";
|
|
4
|
-
import type { Message } from "../providers/types.js";
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* Helper to create a user message with a tool_result block containing
|
|
8
|
-
* an AX tree wrapped in markers.
|
|
9
|
-
*/
|
|
10
|
-
function toolResultMsg(content: string): Message {
|
|
11
|
-
return {
|
|
12
|
-
role: "user",
|
|
13
|
-
content: [
|
|
14
|
-
{
|
|
15
|
-
type: "tool_result",
|
|
16
|
-
tool_use_id: "test-id",
|
|
17
|
-
content,
|
|
18
|
-
},
|
|
19
|
-
],
|
|
20
|
-
};
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
describe("ComputerUseSession.escapeAxTreeContent", () => {
|
|
24
|
-
test("escapes a literal closing tag in the content", () => {
|
|
25
|
-
const input = "some text </ax-tree> more text";
|
|
26
|
-
const escaped = ComputerUseSession.escapeAxTreeContent(input);
|
|
27
|
-
expect(escaped).toBe("some text </ax-tree> more text");
|
|
28
|
-
});
|
|
29
|
-
|
|
30
|
-
test("escapes multiple occurrences", () => {
|
|
31
|
-
const input = "</ax-tree> hello </ax-tree>";
|
|
32
|
-
const escaped = ComputerUseSession.escapeAxTreeContent(input);
|
|
33
|
-
expect(escaped).toBe("</ax-tree> hello </ax-tree>");
|
|
34
|
-
});
|
|
35
|
-
|
|
36
|
-
test("is case-insensitive", () => {
|
|
37
|
-
const input = "</AX-TREE> and </Ax-Tree>";
|
|
38
|
-
const escaped = ComputerUseSession.escapeAxTreeContent(input);
|
|
39
|
-
expect(escaped).toBe("</ax-tree> and </ax-tree>");
|
|
40
|
-
});
|
|
41
|
-
|
|
42
|
-
test("leaves content without closing tags unchanged", () => {
|
|
43
|
-
const input = 'Window "My App" [1]\n Button "OK" [2]';
|
|
44
|
-
expect(ComputerUseSession.escapeAxTreeContent(input)).toBe(input);
|
|
45
|
-
});
|
|
46
|
-
});
|
|
47
|
-
|
|
48
|
-
describe("ComputerUseSession.compactHistory", () => {
|
|
49
|
-
test("[experimental] strips old AX trees and keeps the most recent ones", () => {
|
|
50
|
-
const messages: Message[] = [
|
|
51
|
-
{ role: "assistant", content: [{ type: "text", text: "thinking..." }] },
|
|
52
|
-
toolResultMsg(
|
|
53
|
-
'<ax-tree>CURRENT SCREEN STATE:\nWindow "App" [1]</ax-tree>',
|
|
54
|
-
),
|
|
55
|
-
{ role: "assistant", content: [{ type: "text", text: "action 1" }] },
|
|
56
|
-
toolResultMsg(
|
|
57
|
-
'<ax-tree>CURRENT SCREEN STATE:\nWindow "App" [2]</ax-tree>',
|
|
58
|
-
),
|
|
59
|
-
{ role: "assistant", content: [{ type: "text", text: "action 2" }] },
|
|
60
|
-
toolResultMsg(
|
|
61
|
-
'<ax-tree>CURRENT SCREEN STATE:\nWindow "App" [3]</ax-tree>',
|
|
62
|
-
),
|
|
63
|
-
];
|
|
64
|
-
|
|
65
|
-
const compacted = ComputerUseSession.compactHistory(messages);
|
|
66
|
-
|
|
67
|
-
// First AX tree (index 1) should be stripped
|
|
68
|
-
const firstToolResult = compacted[1].content[0];
|
|
69
|
-
expect(firstToolResult.type).toBe("tool_result");
|
|
70
|
-
if (firstToolResult.type === "tool_result") {
|
|
71
|
-
expect(firstToolResult.content).toContain("<ax_tree_omitted />");
|
|
72
|
-
expect(firstToolResult.content).not.toContain("<ax-tree>");
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
// Last two AX trees should be preserved
|
|
76
|
-
const secondToolResult = compacted[3].content[0];
|
|
77
|
-
if (secondToolResult.type === "tool_result") {
|
|
78
|
-
expect(secondToolResult.content).toContain("<ax-tree>");
|
|
79
|
-
}
|
|
80
|
-
const thirdToolResult = compacted[5].content[0];
|
|
81
|
-
if (thirdToolResult.type === "tool_result") {
|
|
82
|
-
expect(thirdToolResult.content).toContain("<ax-tree>");
|
|
83
|
-
}
|
|
84
|
-
});
|
|
85
|
-
|
|
86
|
-
test("[experimental] handles AX tree content containing literal </ax-tree> (escaped)", () => {
|
|
87
|
-
// Simulate content where the AX tree text includes an escaped closing tag,
|
|
88
|
-
// e.g. user is viewing XML source code with "</ax-tree>" in it.
|
|
89
|
-
const escapedContent =
|
|
90
|
-
'<ax-tree>CURRENT SCREEN STATE:\nTextArea "editor" [1]\n ' +
|
|
91
|
-
"Line: </ax-tree> some xml\n</ax-tree>";
|
|
92
|
-
|
|
93
|
-
const messages: Message[] = [
|
|
94
|
-
{ role: "assistant", content: [{ type: "text", text: "action 0" }] },
|
|
95
|
-
toolResultMsg(escapedContent),
|
|
96
|
-
{ role: "assistant", content: [{ type: "text", text: "action 1" }] },
|
|
97
|
-
toolResultMsg(escapedContent),
|
|
98
|
-
{ role: "assistant", content: [{ type: "text", text: "action 2" }] },
|
|
99
|
-
toolResultMsg(
|
|
100
|
-
'<ax-tree>CURRENT SCREEN STATE:\nWindow "App" [3]</ax-tree>',
|
|
101
|
-
),
|
|
102
|
-
];
|
|
103
|
-
|
|
104
|
-
const compacted = ComputerUseSession.compactHistory(messages);
|
|
105
|
-
|
|
106
|
-
// The first message with escaped content should be fully stripped
|
|
107
|
-
const firstToolResult = compacted[1].content[0];
|
|
108
|
-
if (firstToolResult.type === "tool_result") {
|
|
109
|
-
expect(firstToolResult.content).not.toContain("<ax-tree>");
|
|
110
|
-
expect(firstToolResult.content).toContain("<ax_tree_omitted />");
|
|
111
|
-
}
|
|
112
|
-
});
|
|
113
|
-
|
|
114
|
-
test("regex fails on unescaped </ax-tree> inside content (demonstrating the bug)", () => {
|
|
115
|
-
// This test demonstrates what happens WITHOUT escaping: the regex
|
|
116
|
-
// only partially removes the AX tree block.
|
|
117
|
-
const unescapedContent =
|
|
118
|
-
'<ax-tree>CURRENT SCREEN STATE:\nTextArea "editor" [1]\n ' +
|
|
119
|
-
"Line: </ax-tree> some xml leftover\n</ax-tree>";
|
|
120
|
-
|
|
121
|
-
const messages: Message[] = [
|
|
122
|
-
{ role: "assistant", content: [{ type: "text", text: "action 0" }] },
|
|
123
|
-
toolResultMsg(unescapedContent),
|
|
124
|
-
{ role: "assistant", content: [{ type: "text", text: "action 1" }] },
|
|
125
|
-
toolResultMsg(unescapedContent),
|
|
126
|
-
{ role: "assistant", content: [{ type: "text", text: "action 2" }] },
|
|
127
|
-
toolResultMsg(
|
|
128
|
-
'<ax-tree>CURRENT SCREEN STATE:\nWindow "App" [3]</ax-tree>',
|
|
129
|
-
),
|
|
130
|
-
];
|
|
131
|
-
|
|
132
|
-
const compacted = ComputerUseSession.compactHistory(messages);
|
|
133
|
-
|
|
134
|
-
// Without escaping, the first tool result has leftover content after
|
|
135
|
-
// the regex only matched up to the FIRST </ax-tree>.
|
|
136
|
-
const firstToolResult = compacted[1].content[0];
|
|
137
|
-
if (firstToolResult.type === "tool_result") {
|
|
138
|
-
// The non-greedy regex stops at the first </ax-tree>, leaving
|
|
139
|
-
// " some xml leftover\n</ax-tree>" behind.
|
|
140
|
-
expect(firstToolResult.content).toContain("some xml leftover");
|
|
141
|
-
}
|
|
142
|
-
});
|
|
143
|
-
});
|
|
@@ -1,322 +0,0 @@
|
|
|
1
|
-
import { describe, expect, mock, test } from "bun:test";
|
|
2
|
-
|
|
3
|
-
// Mock config before importing modules that depend on it.
|
|
4
|
-
// The permissions mode must be 'workspace' so computer-use tools
|
|
5
|
-
// go through normal workspace trust evaluation instead of prompting.
|
|
6
|
-
mock.module("../config/loader.js", () => ({
|
|
7
|
-
getConfig: () => ({
|
|
8
|
-
ui: {},
|
|
9
|
-
|
|
10
|
-
provider: "mock-provider",
|
|
11
|
-
permissions: { mode: "workspace" },
|
|
12
|
-
apiKeys: {},
|
|
13
|
-
sandbox: { enabled: false },
|
|
14
|
-
timeouts: { toolExecutionTimeoutSec: 30, permissionTimeoutSec: 5 },
|
|
15
|
-
skills: { load: { extraDirs: [] } },
|
|
16
|
-
secretDetection: { enabled: false },
|
|
17
|
-
contextWindow: {
|
|
18
|
-
enabled: true,
|
|
19
|
-
maxInputTokens: 180000,
|
|
20
|
-
targetBudgetRatio: 0.3,
|
|
21
|
-
compactThreshold: 0.8,
|
|
22
|
-
summaryBudgetRatio: 0.05,
|
|
23
|
-
},
|
|
24
|
-
}),
|
|
25
|
-
invalidateConfigCache: () => {},
|
|
26
|
-
}));
|
|
27
|
-
|
|
28
|
-
import { ComputerUseSession } from "../daemon/computer-use-session.js";
|
|
29
|
-
import type {
|
|
30
|
-
CuObservation,
|
|
31
|
-
ServerMessage,
|
|
32
|
-
} from "../daemon/message-protocol.js";
|
|
33
|
-
import type { Provider, ProviderResponse } from "../providers/types.js";
|
|
34
|
-
|
|
35
|
-
function createProvider(responses: ProviderResponse[]): {
|
|
36
|
-
provider: Provider;
|
|
37
|
-
getCalls: () => number;
|
|
38
|
-
} {
|
|
39
|
-
let calls = 0;
|
|
40
|
-
const provider: Provider = {
|
|
41
|
-
name: "mock",
|
|
42
|
-
async sendMessage() {
|
|
43
|
-
const response = responses[calls] ?? responses[responses.length - 1];
|
|
44
|
-
calls++;
|
|
45
|
-
return response;
|
|
46
|
-
},
|
|
47
|
-
};
|
|
48
|
-
return { provider, getCalls: () => calls };
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
describe("ComputerUseSession lifecycle", () => {
|
|
52
|
-
test("stops provider loop immediately after terminal computer_use_done tool", async () => {
|
|
53
|
-
const { provider, getCalls } = createProvider([
|
|
54
|
-
{
|
|
55
|
-
content: [
|
|
56
|
-
{
|
|
57
|
-
type: "tool_use",
|
|
58
|
-
id: "tu-1",
|
|
59
|
-
name: "computer_use_done",
|
|
60
|
-
input: { summary: "Task finished" },
|
|
61
|
-
},
|
|
62
|
-
],
|
|
63
|
-
model: "mock-model",
|
|
64
|
-
usage: { inputTokens: 10, outputTokens: 5 },
|
|
65
|
-
stopReason: "tool_use",
|
|
66
|
-
},
|
|
67
|
-
{
|
|
68
|
-
content: [{ type: "text", text: "This should never be requested" }],
|
|
69
|
-
model: "mock-model",
|
|
70
|
-
usage: { inputTokens: 10, outputTokens: 5 },
|
|
71
|
-
stopReason: "end_turn",
|
|
72
|
-
},
|
|
73
|
-
]);
|
|
74
|
-
|
|
75
|
-
const sentMessages: ServerMessage[] = [];
|
|
76
|
-
let terminalCalls = 0;
|
|
77
|
-
|
|
78
|
-
const session = new ComputerUseSession(
|
|
79
|
-
"cu-test-1",
|
|
80
|
-
"test task",
|
|
81
|
-
1440,
|
|
82
|
-
900,
|
|
83
|
-
provider,
|
|
84
|
-
(msg) => {
|
|
85
|
-
sentMessages.push(msg);
|
|
86
|
-
},
|
|
87
|
-
"computer_use",
|
|
88
|
-
() => {
|
|
89
|
-
terminalCalls++;
|
|
90
|
-
},
|
|
91
|
-
);
|
|
92
|
-
|
|
93
|
-
const observation: CuObservation = {
|
|
94
|
-
type: "cu_observation",
|
|
95
|
-
sessionId: "cu-test-1",
|
|
96
|
-
axTree: 'Window "Test" [1]',
|
|
97
|
-
};
|
|
98
|
-
|
|
99
|
-
await session.handleObservation(observation);
|
|
100
|
-
|
|
101
|
-
// If computer_use_done does not abort the loop, we'd see an extra provider call.
|
|
102
|
-
expect(getCalls()).toBe(1);
|
|
103
|
-
expect(session.getState()).toBe("complete");
|
|
104
|
-
expect(terminalCalls).toBe(1);
|
|
105
|
-
|
|
106
|
-
const completes = sentMessages.filter(
|
|
107
|
-
(msg): msg is Extract<ServerMessage, { type: "cu_complete" }> =>
|
|
108
|
-
msg.type === "cu_complete",
|
|
109
|
-
);
|
|
110
|
-
expect(completes).toHaveLength(1);
|
|
111
|
-
expect(completes[0].summary).toBe("Task finished");
|
|
112
|
-
});
|
|
113
|
-
|
|
114
|
-
test("notifies terminal callback only once on repeated abort calls", () => {
|
|
115
|
-
const { provider } = createProvider([
|
|
116
|
-
{
|
|
117
|
-
content: [{ type: "text", text: "unused" }],
|
|
118
|
-
model: "mock-model",
|
|
119
|
-
usage: { inputTokens: 1, outputTokens: 1 },
|
|
120
|
-
stopReason: "end_turn",
|
|
121
|
-
},
|
|
122
|
-
]);
|
|
123
|
-
|
|
124
|
-
let terminalCalls = 0;
|
|
125
|
-
const session = new ComputerUseSession(
|
|
126
|
-
"cu-test-2",
|
|
127
|
-
"test task",
|
|
128
|
-
1440,
|
|
129
|
-
900,
|
|
130
|
-
provider,
|
|
131
|
-
() => {},
|
|
132
|
-
"computer_use",
|
|
133
|
-
() => {
|
|
134
|
-
terminalCalls++;
|
|
135
|
-
},
|
|
136
|
-
);
|
|
137
|
-
|
|
138
|
-
session.abort();
|
|
139
|
-
session.abort();
|
|
140
|
-
|
|
141
|
-
expect(terminalCalls).toBe(1);
|
|
142
|
-
expect(session.getState()).toBe("error");
|
|
143
|
-
});
|
|
144
|
-
|
|
145
|
-
test("CU session passes exactly 10 computer_use_* tools to the agent loop", async () => {
|
|
146
|
-
let capturedTools: string[] = [];
|
|
147
|
-
const provider: Provider = {
|
|
148
|
-
name: "mock",
|
|
149
|
-
async sendMessage(_msgs, tools) {
|
|
150
|
-
capturedTools = (tools ?? []).map((t) => t.name);
|
|
151
|
-
return {
|
|
152
|
-
content: [
|
|
153
|
-
{
|
|
154
|
-
type: "tool_use",
|
|
155
|
-
id: "tu-capture",
|
|
156
|
-
name: "computer_use_done",
|
|
157
|
-
input: { summary: "Done" },
|
|
158
|
-
},
|
|
159
|
-
],
|
|
160
|
-
model: "mock-model",
|
|
161
|
-
usage: { inputTokens: 10, outputTokens: 5 },
|
|
162
|
-
stopReason: "tool_use",
|
|
163
|
-
};
|
|
164
|
-
},
|
|
165
|
-
};
|
|
166
|
-
|
|
167
|
-
const session = new ComputerUseSession(
|
|
168
|
-
"cu-tool-capture",
|
|
169
|
-
"capture tools",
|
|
170
|
-
1440,
|
|
171
|
-
900,
|
|
172
|
-
provider,
|
|
173
|
-
() => {},
|
|
174
|
-
"computer_use",
|
|
175
|
-
);
|
|
176
|
-
|
|
177
|
-
await session.handleObservation({
|
|
178
|
-
type: "cu_observation",
|
|
179
|
-
sessionId: "cu-tool-capture",
|
|
180
|
-
axTree: 'Window "Test" [1]',
|
|
181
|
-
});
|
|
182
|
-
|
|
183
|
-
const cuTools = capturedTools.filter((n) => n.startsWith("computer_use_"));
|
|
184
|
-
expect(cuTools).toHaveLength(10);
|
|
185
|
-
|
|
186
|
-
// Assert exact set of expected CU tool names
|
|
187
|
-
const expectedCuTools = [
|
|
188
|
-
"computer_use_click",
|
|
189
|
-
"computer_use_type_text",
|
|
190
|
-
"computer_use_key",
|
|
191
|
-
"computer_use_scroll",
|
|
192
|
-
"computer_use_drag",
|
|
193
|
-
"computer_use_wait",
|
|
194
|
-
"computer_use_open_app",
|
|
195
|
-
"computer_use_run_applescript",
|
|
196
|
-
"computer_use_done",
|
|
197
|
-
"computer_use_respond",
|
|
198
|
-
];
|
|
199
|
-
for (const name of expectedCuTools) {
|
|
200
|
-
expect(cuTools).toContain(name);
|
|
201
|
-
}
|
|
202
|
-
});
|
|
203
|
-
|
|
204
|
-
test("computer_use_respond is a terminal tool that completes the session", async () => {
|
|
205
|
-
const { provider } = createProvider([
|
|
206
|
-
{
|
|
207
|
-
content: [
|
|
208
|
-
{
|
|
209
|
-
type: "tool_use",
|
|
210
|
-
id: "tu-respond",
|
|
211
|
-
name: "computer_use_respond",
|
|
212
|
-
input: {
|
|
213
|
-
answer: "The meeting is at 3pm",
|
|
214
|
-
reasoning: "Found in calendar",
|
|
215
|
-
},
|
|
216
|
-
},
|
|
217
|
-
],
|
|
218
|
-
model: "mock-model",
|
|
219
|
-
usage: { inputTokens: 10, outputTokens: 5 },
|
|
220
|
-
stopReason: "tool_use",
|
|
221
|
-
},
|
|
222
|
-
]);
|
|
223
|
-
|
|
224
|
-
const sentMessages: ServerMessage[] = [];
|
|
225
|
-
const session = new ComputerUseSession(
|
|
226
|
-
"cu-respond-test",
|
|
227
|
-
"check my schedule",
|
|
228
|
-
1440,
|
|
229
|
-
900,
|
|
230
|
-
provider,
|
|
231
|
-
(msg) => {
|
|
232
|
-
sentMessages.push(msg);
|
|
233
|
-
},
|
|
234
|
-
"computer_use",
|
|
235
|
-
);
|
|
236
|
-
|
|
237
|
-
await session.handleObservation({
|
|
238
|
-
type: "cu_observation",
|
|
239
|
-
sessionId: "cu-respond-test",
|
|
240
|
-
axTree: 'Window "Calendar" [1]',
|
|
241
|
-
});
|
|
242
|
-
|
|
243
|
-
expect(session.getState()).toBe("complete");
|
|
244
|
-
const completes = sentMessages.filter(
|
|
245
|
-
(msg): msg is Extract<ServerMessage, { type: "cu_complete" }> =>
|
|
246
|
-
msg.type === "cu_complete",
|
|
247
|
-
);
|
|
248
|
-
expect(completes).toHaveLength(1);
|
|
249
|
-
expect(completes[0].summary).toBe("The meeting is at 3pm");
|
|
250
|
-
expect(completes[0].isResponse).toBe(true);
|
|
251
|
-
});
|
|
252
|
-
|
|
253
|
-
test("default construction preactivates computer-use skill and provides 10 CU tools", async () => {
|
|
254
|
-
let capturedTools: string[] = [];
|
|
255
|
-
const provider: Provider = {
|
|
256
|
-
name: "mock",
|
|
257
|
-
async sendMessage(_msgs, tools) {
|
|
258
|
-
capturedTools = (tools ?? []).map((t) => t.name);
|
|
259
|
-
return {
|
|
260
|
-
content: [
|
|
261
|
-
{
|
|
262
|
-
type: "tool_use",
|
|
263
|
-
id: "tu-default",
|
|
264
|
-
name: "computer_use_done",
|
|
265
|
-
input: { summary: "Done" },
|
|
266
|
-
},
|
|
267
|
-
],
|
|
268
|
-
model: "mock-model",
|
|
269
|
-
usage: { inputTokens: 10, outputTokens: 5 },
|
|
270
|
-
stopReason: "tool_use",
|
|
271
|
-
};
|
|
272
|
-
},
|
|
273
|
-
};
|
|
274
|
-
|
|
275
|
-
// No preactivatedSkillIds passed — defaults to ['computer-use'] via skill projection
|
|
276
|
-
const session = new ComputerUseSession(
|
|
277
|
-
"cu-default-projection",
|
|
278
|
-
"test default projection",
|
|
279
|
-
1440,
|
|
280
|
-
900,
|
|
281
|
-
provider,
|
|
282
|
-
() => {},
|
|
283
|
-
"computer_use",
|
|
284
|
-
undefined,
|
|
285
|
-
);
|
|
286
|
-
|
|
287
|
-
await session.handleObservation({
|
|
288
|
-
type: "cu_observation",
|
|
289
|
-
sessionId: "cu-default-projection",
|
|
290
|
-
axTree: 'Window "Test" [1]',
|
|
291
|
-
});
|
|
292
|
-
|
|
293
|
-
const cuTools = capturedTools.filter((n) => n.startsWith("computer_use_"));
|
|
294
|
-
expect(cuTools).toHaveLength(10);
|
|
295
|
-
});
|
|
296
|
-
|
|
297
|
-
test("constructor accepts preactivatedSkillIds parameter", () => {
|
|
298
|
-
const { provider } = createProvider([
|
|
299
|
-
{
|
|
300
|
-
content: [{ type: "text", text: "unused" }],
|
|
301
|
-
model: "mock-model",
|
|
302
|
-
usage: { inputTokens: 1, outputTokens: 1 },
|
|
303
|
-
stopReason: "end_turn",
|
|
304
|
-
},
|
|
305
|
-
]);
|
|
306
|
-
|
|
307
|
-
// Should not throw
|
|
308
|
-
const session = new ComputerUseSession(
|
|
309
|
-
"cu-preactivated",
|
|
310
|
-
"test preactivated",
|
|
311
|
-
1440,
|
|
312
|
-
900,
|
|
313
|
-
provider,
|
|
314
|
-
() => {},
|
|
315
|
-
"computer_use",
|
|
316
|
-
undefined,
|
|
317
|
-
["computer-use"],
|
|
318
|
-
);
|
|
319
|
-
|
|
320
|
-
expect(session).toBeDefined();
|
|
321
|
-
});
|
|
322
|
-
});
|