@vellumai/assistant 0.4.48 → 0.4.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +2 -2
- package/README.md +2 -23
- package/docs/architecture/integrations.md +45 -41
- package/docs/architecture/keychain-broker.md +3 -3
- package/docs/runbook-trusted-contacts.md +3 -8
- package/hook-templates/debug-prompt-logger/hook.json +1 -1
- package/hook-templates/debug-prompt-logger/run.sh +1 -3
- package/package.json +1 -1
- package/src/__tests__/actor-token-service.test.ts +0 -1
- package/src/__tests__/anthropic-provider.test.ts +156 -0
- package/src/__tests__/approval-cascade.test.ts +810 -0
- package/src/__tests__/approval-primitive.test.ts +0 -1
- package/src/__tests__/approval-routes-http.test.ts +2 -0
- package/src/__tests__/assistant-attachments.test.ts +12 -34
- package/src/__tests__/assistant-feature-flag-guardrails.test.ts +76 -0
- package/src/__tests__/assistant-feature-flags-integration.test.ts +0 -1
- package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +2 -2
- package/src/__tests__/channel-guardian.test.ts +0 -2
- package/src/__tests__/channel-readiness-routes.test.ts +15 -6
- package/src/__tests__/channel-readiness-service.test.ts +10 -9
- package/src/__tests__/checker.test.ts +9 -29
- package/src/__tests__/computer-use-skill-manifest-regression.test.ts +1 -1
- package/src/__tests__/computer-use-tools.test.ts +2 -19
- package/src/__tests__/config-watcher.test.ts +0 -1
- package/src/__tests__/confirmation-request-guardian-bridge.test.ts +0 -1
- package/src/__tests__/context-image-dimensions.test.ts +332 -0
- package/src/__tests__/context-token-estimator.test.ts +196 -13
- package/src/__tests__/conversation-attention-store.test.ts +0 -1
- package/src/__tests__/conversation-attention-telegram.test.ts +0 -1
- package/src/__tests__/conversation-routes-guardian-reply.test.ts +144 -0
- package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
- package/src/__tests__/credential-metadata-store.test.ts +64 -73
- package/src/__tests__/credential-security-invariants.test.ts +13 -7
- package/src/__tests__/credential-vault-unit.test.ts +280 -49
- package/src/__tests__/credential-vault.test.ts +138 -16
- package/src/__tests__/credentials-cli.test.ts +71 -0
- package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +0 -1
- package/src/__tests__/ephemeral-permissions.test.ts +3 -3
- package/src/__tests__/gateway-only-guard.test.ts +0 -1
- package/src/__tests__/guardian-action-grant-mint-consume.test.ts +0 -1
- package/src/__tests__/guardian-decision-primitive-canonical.test.ts +0 -1
- package/src/__tests__/guardian-routing-invariants.test.ts +0 -1
- package/src/__tests__/guardian-verification-voice-binding.test.ts +0 -1
- package/src/__tests__/handlers-user-message-approval-consumption.test.ts +0 -39
- package/src/__tests__/heartbeat-service.test.ts +0 -1
- package/src/__tests__/host-cu-proxy.test.ts +629 -0
- package/src/__tests__/host-shell-tool.test.ts +27 -15
- package/src/__tests__/http-user-message-parity.test.ts +1 -0
- package/src/__tests__/ingress-url-consistency.test.ts +14 -21
- package/src/__tests__/integration-status.test.ts +32 -51
- package/src/__tests__/intent-routing.test.ts +0 -1
- package/src/__tests__/invite-routes-http.test.ts +10 -9
- package/src/__tests__/keychain-broker-client.test.ts +11 -43
- package/src/__tests__/notification-routing-intent.test.ts +0 -1
- package/src/__tests__/oauth-cli.test.ts +373 -14
- package/src/__tests__/oauth-provider-profiles.test.ts +9 -9
- package/src/__tests__/oauth-scope-policy.test.ts +4 -6
- package/src/__tests__/oauth-store.test.ts +756 -0
- package/src/__tests__/onboarding-starter-tasks.test.ts +0 -1
- package/src/__tests__/provider-error-scenarios.test.ts +0 -1
- package/src/__tests__/provider-streaming.benchmark.test.ts +0 -1
- package/src/__tests__/public-ingress-urls.test.ts +15 -21
- package/src/__tests__/recording-handler.test.ts +3 -4
- package/src/__tests__/registry.test.ts +2 -2
- package/src/__tests__/runtime-events-sse.test.ts +55 -7
- package/src/__tests__/schedule-store.test.ts +0 -1
- package/src/__tests__/scheduler-recurrence.test.ts +0 -1
- package/src/__tests__/scoped-approval-grants.test.ts +0 -1
- package/src/__tests__/scoped-grant-security-matrix.test.ts +0 -1
- package/src/__tests__/secret-ingress-handler.test.ts +0 -1
- package/src/__tests__/send-endpoint-busy.test.ts +21 -6
- package/src/__tests__/sequence-store.test.ts +0 -1
- package/src/__tests__/session-init.benchmark.test.ts +4 -5
- package/src/__tests__/skill-include-graph.test.ts +66 -0
- package/src/__tests__/skill-load-feature-flag.test.ts +0 -1
- package/src/__tests__/skill-load-tool.test.ts +149 -1
- package/src/__tests__/skill-projection-feature-flag.test.ts +0 -1
- package/src/__tests__/skills-uninstall.test.ts +1 -1
- package/src/__tests__/skills.test.ts +3 -3
- package/src/__tests__/slack-channel-config.test.ts +67 -3
- package/src/__tests__/slack-share-routes.test.ts +17 -19
- package/src/__tests__/system-prompt.test.ts +0 -1
- package/src/__tests__/telegram-invite-adapter.test.ts +18 -22
- package/src/__tests__/terminal-tools.test.ts +4 -3
- package/src/__tests__/test-support/computer-use-skill-harness.ts +3 -2
- package/src/__tests__/tool-approval-handler.test.ts +0 -1
- package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -1
- package/src/__tests__/tool-executor-lifecycle-events.test.ts +0 -1
- package/src/__tests__/tool-executor-shell-integration.test.ts +0 -1
- package/src/__tests__/tool-executor.test.ts +0 -1
- package/src/__tests__/tool-grant-request-escalation.test.ts +0 -1
- package/src/__tests__/trust-store-pattern-matches.test.ts +29 -0
- package/src/__tests__/trust-store.test.ts +1 -22
- package/src/__tests__/trusted-contact-approval-notifier.test.ts +0 -1
- package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +0 -1
- package/src/__tests__/twilio-routes.test.ts +0 -16
- package/src/__tests__/verification-control-plane-policy.test.ts +0 -1
- package/src/__tests__/voice-scoped-grant-consumer.test.ts +0 -1
- package/src/agent/ax-tree-compaction.test.ts +235 -0
- package/src/agent/loop.ts +76 -130
- package/src/calls/call-domain.ts +1 -6
- package/src/calls/relay-server.ts +9 -13
- package/src/calls/twilio-config.ts +2 -7
- package/src/calls/twilio-routes.ts +1 -2
- package/src/calls/voice-ingress-preflight.ts +1 -1
- package/src/cli/commands/browser-relay.ts +18 -12
- package/src/cli/commands/completions.ts +0 -3
- package/src/cli/commands/credentials.ts +101 -15
- package/src/cli/commands/oauth/apps.ts +255 -0
- package/src/cli/commands/oauth/connections.ts +299 -0
- package/src/cli/commands/oauth/index.ts +52 -0
- package/src/cli/commands/oauth/providers.ts +242 -0
- package/src/cli/commands/skills.ts +4 -338
- package/src/cli/program.ts +1 -5
- package/src/cli/reference.ts +1 -3
- package/src/config/assistant-feature-flags.ts +0 -3
- package/src/config/bundled-skills/_shared/CLI_RETRIEVAL_PATTERN.md +1 -1
- package/src/config/bundled-skills/computer-use/SKILL.md +3 -6
- package/src/config/bundled-skills/computer-use/TOOLS.json +22 -4
- package/src/config/bundled-skills/google-calendar/calendar-client.ts +21 -16
- package/src/config/bundled-skills/messaging/tools/shared.ts +1 -4
- package/src/config/bundled-skills/settings/SKILL.md +1 -1
- package/src/config/bundled-skills/settings/TOOLS.json +2 -8
- package/src/config/bundled-skills/settings/tools/voice-config-update.ts +5 -33
- package/src/config/env-registry.ts +14 -83
- package/src/config/env.ts +11 -50
- package/src/config/feature-flag-registry.json +16 -16
- package/src/config/loader.ts +0 -6
- package/src/config/schema.ts +3 -1
- package/src/config/skills.ts +21 -2
- package/src/context/image-dimensions.ts +229 -0
- package/src/context/token-estimator.ts +75 -12
- package/src/context/window-manager.ts +49 -10
- package/src/daemon/assistant-attachments.ts +1 -13
- package/src/daemon/handlers/config-ingress.ts +8 -33
- package/src/daemon/handlers/config-slack-channel.ts +49 -46
- package/src/daemon/handlers/config-telegram.ts +32 -16
- package/src/daemon/handlers/sessions.ts +10 -24
- package/src/daemon/handlers/shared.ts +0 -130
- package/src/daemon/host-cu-proxy.ts +401 -0
- package/src/daemon/lifecycle.ts +36 -68
- package/src/daemon/message-protocol.ts +3 -0
- package/src/daemon/message-types/computer-use.ts +2 -119
- package/src/daemon/message-types/host-cu.ts +19 -0
- package/src/daemon/message-types/messages.ts +3 -0
- package/src/daemon/server.ts +14 -21
- package/src/daemon/session-agent-loop-handlers.ts +2 -0
- package/src/daemon/session-attachments.ts +1 -2
- package/src/daemon/session-slash.ts +1 -1
- package/src/daemon/session-surfaces.ts +40 -28
- package/src/daemon/session-tool-setup.ts +2 -9
- package/src/daemon/session.ts +138 -15
- package/src/daemon/tool-side-effects.ts +2 -8
- package/src/daemon/watch-handler.ts +2 -2
- package/src/events/tool-metrics-listener.ts +2 -2
- package/src/hooks/manager.ts +1 -4
- package/src/inbound/public-ingress-urls.ts +7 -7
- package/src/logfire.ts +16 -5
- package/src/memory/conversation-key-store.ts +21 -0
- package/src/memory/db-init.ts +4 -0
- package/src/memory/migrations/149-oauth-tables.ts +60 -0
- package/src/memory/migrations/index.ts +1 -0
- package/src/memory/schema/index.ts +1 -0
- package/src/memory/schema/oauth.ts +65 -0
- package/src/messaging/provider.ts +4 -4
- package/src/messaging/providers/gmail/client.ts +82 -2
- package/src/messaging/providers/gmail/people-client.ts +10 -10
- package/src/messaging/providers/telegram-bot/adapter.ts +17 -17
- package/src/messaging/providers/whatsapp/adapter.ts +11 -8
- package/src/messaging/registry.ts +2 -32
- package/src/notifications/copy-composer.ts +0 -5
- package/src/notifications/signal.ts +4 -5
- package/src/oauth/byo-connection.test.ts +126 -25
- package/src/oauth/byo-connection.ts +22 -6
- package/src/oauth/connect-orchestrator.ts +113 -57
- package/src/oauth/connect-types.ts +17 -23
- package/src/oauth/connection-resolver.ts +35 -11
- package/src/oauth/connection.ts +1 -1
- package/src/oauth/manual-token-connection.ts +104 -0
- package/src/oauth/oauth-store.ts +496 -0
- package/src/oauth/platform-connection.test.ts +29 -0
- package/src/oauth/platform-connection.ts +6 -5
- package/src/oauth/provider-behaviors.ts +124 -0
- package/src/oauth/scope-policy.ts +9 -2
- package/src/oauth/seed-providers.ts +161 -0
- package/src/oauth/token-persistence.ts +74 -78
- package/src/permissions/checker.ts +3 -3
- package/src/permissions/defaults.ts +0 -1
- package/src/permissions/prompter.ts +10 -1
- package/src/permissions/trust-store.ts +13 -0
- package/src/prompts/__tests__/build-cli-reference-section.test.ts +3 -1
- package/src/prompts/system-prompt.ts +28 -40
- package/src/providers/anthropic/client.ts +133 -24
- package/src/providers/retry.ts +1 -27
- package/src/runtime/auth/route-policy.ts +0 -3
- package/src/runtime/channel-reply-delivery.ts +0 -40
- package/src/runtime/gateway-client.ts +0 -7
- package/src/runtime/http-server.ts +8 -6
- package/src/runtime/http-types.ts +2 -2
- package/src/runtime/middleware/twilio-validation.ts +1 -11
- package/src/runtime/pending-interactions.ts +14 -12
- package/src/runtime/routes/channel-delivery-routes.ts +0 -1
- package/src/runtime/routes/conversation-routes.ts +73 -19
- package/src/runtime/routes/events-routes.ts +21 -11
- package/src/runtime/routes/host-cu-routes.ts +97 -0
- package/src/runtime/routes/inbound-stages/background-dispatch.ts +12 -111
- package/src/runtime/routes/integrations/slack/share.ts +6 -7
- package/src/runtime/routes/log-export-routes.ts +126 -8
- package/src/runtime/routes/settings-routes.ts +55 -48
- package/src/runtime/routes/surface-action-routes.ts +1 -1
- package/src/runtime/routes/watch-routes.ts +128 -0
- package/src/schedule/integration-status.ts +10 -9
- package/src/security/credential-key.ts +0 -156
- package/src/security/keychain-broker-client.ts +5 -6
- package/src/security/oauth2.ts +1 -1
- package/src/security/token-manager.ts +119 -46
- package/src/skills/catalog-install.ts +358 -0
- package/src/skills/include-graph.ts +32 -0
- package/src/telegram/bot-username.ts +2 -3
- package/src/tools/browser/network-recorder.ts +1 -1
- package/src/tools/browser/network-recording-types.ts +1 -1
- package/src/tools/computer-use/definitions.ts +46 -11
- package/src/tools/computer-use/registry.ts +4 -5
- package/src/tools/credentials/broker.ts +1 -2
- package/src/tools/credentials/metadata-store.ts +17 -121
- package/src/tools/credentials/vault.ts +94 -167
- package/src/tools/registry.ts +2 -7
- package/src/tools/skills/load.ts +62 -3
- package/src/tools/watch/watch-state.ts +0 -12
- package/src/util/logger.ts +7 -41
- package/src/util/platform.ts +9 -28
- package/src/watcher/providers/google-calendar.ts +2 -1
- package/src/__tests__/computer-use-session-compaction.test.ts +0 -143
- package/src/__tests__/computer-use-session-lifecycle.test.ts +0 -322
- package/src/__tests__/computer-use-session-working-dir.test.ts +0 -166
- package/src/__tests__/computer-use-skill-baseline.test.ts +0 -78
- package/src/__tests__/computer-use-skill-endstate.test.ts +0 -105
- package/src/__tests__/computer-use-skill-lifecycle-cleanup.test.ts +0 -249
- package/src/__tests__/ride-shotgun-handler.test.ts +0 -452
- package/src/cli/commands/dev.ts +0 -129
- package/src/cli/commands/map.ts +0 -391
- package/src/cli/commands/oauth.ts +0 -77
- package/src/config/bundled-skills/computer-use/tools/computer-use-request-control.ts +0 -16
- package/src/daemon/computer-use-session.ts +0 -1026
- package/src/daemon/ride-shotgun-handler.ts +0 -569
- package/src/oauth/provider-base-urls.ts +0 -21
- package/src/oauth/provider-profiles.ts +0 -192
- package/src/prompts/computer-use-prompt.ts +0 -98
- package/src/runtime/routes/computer-use-routes.ts +0 -641
- package/src/runtime/telegram-streaming-delivery.test.ts +0 -729
- package/src/runtime/telegram-streaming-delivery.ts +0 -393
- package/src/tools/computer-use/request-computer-control.ts +0 -56
|
@@ -1,166 +0,0 @@
|
|
|
1
|
-
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
|
2
|
-
|
|
3
|
-
import type { CuObservation } from "../daemon/message-protocol.js";
|
|
4
|
-
import type { Provider } from "../providers/types.js";
|
|
5
|
-
|
|
6
|
-
let capturedWorkingDir: string | undefined;
|
|
7
|
-
|
|
8
|
-
const noopLogger = new Proxy({} as Record<string, unknown>, {
|
|
9
|
-
get: (_target, prop) => (prop === "child" ? () => noopLogger : () => {}),
|
|
10
|
-
});
|
|
11
|
-
|
|
12
|
-
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
13
|
-
const realLogger = require("../util/logger.js");
|
|
14
|
-
mock.module("../util/logger.js", () => ({
|
|
15
|
-
...realLogger,
|
|
16
|
-
getLogger: () => noopLogger,
|
|
17
|
-
getCliLogger: () => noopLogger,
|
|
18
|
-
isDebug: () => false,
|
|
19
|
-
truncateForLog: (value: string, maxLen = 500) =>
|
|
20
|
-
value.length > maxLen ? value.slice(0, maxLen) + "..." : value,
|
|
21
|
-
initLogger: () => {},
|
|
22
|
-
pruneOldLogFiles: () => 0,
|
|
23
|
-
}));
|
|
24
|
-
|
|
25
|
-
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
26
|
-
const realPlatform = require("../util/platform.js");
|
|
27
|
-
mock.module("../util/platform.js", () => ({
|
|
28
|
-
...realPlatform,
|
|
29
|
-
getRootDir: () => "/tmp",
|
|
30
|
-
getDataDir: () => "/tmp/data",
|
|
31
|
-
|
|
32
|
-
getSandboxRootDir: () => "/tmp/sandbox",
|
|
33
|
-
getSandboxWorkingDir: () => "/tmp/workspace",
|
|
34
|
-
getInterfacesDir: () => "/tmp/interfaces",
|
|
35
|
-
getWorkspaceDir: () => "/tmp/workspace",
|
|
36
|
-
getWorkspaceConfigPath: () => "/tmp/workspace/config.json",
|
|
37
|
-
getWorkspaceSkillsDir: () => "/tmp/workspace/skills",
|
|
38
|
-
getWorkspaceHooksDir: () => "/tmp/workspace/hooks",
|
|
39
|
-
getWorkspacePromptPath: (file: string) => `/tmp/workspace/${file}`,
|
|
40
|
-
getPlatformName: () => "linux",
|
|
41
|
-
getClipboardCommand: () => null,
|
|
42
|
-
getPidPath: () => "/tmp/test.pid",
|
|
43
|
-
getDbPath: () => "/tmp/data/db/assistant.db",
|
|
44
|
-
getLogPath: () => "/tmp/test.log",
|
|
45
|
-
getHistoryPath: () => "/tmp/data/history",
|
|
46
|
-
getHooksDir: () => "/tmp/hooks",
|
|
47
|
-
readSessionToken: () => null,
|
|
48
|
-
ensureDataDir: () => {},
|
|
49
|
-
isMacOS: () => false,
|
|
50
|
-
isLinux: () => true,
|
|
51
|
-
isWindows: () => false,
|
|
52
|
-
normalizeAssistantId: (id: string) => id,
|
|
53
|
-
readLockfile: () => null,
|
|
54
|
-
writeLockfile: () => {},
|
|
55
|
-
}));
|
|
56
|
-
|
|
57
|
-
mock.module("../config/loader.js", () => ({
|
|
58
|
-
getConfig: () => ({
|
|
59
|
-
ui: {},
|
|
60
|
-
daemon: { standaloneRecording: false },
|
|
61
|
-
provider: "mock-provider",
|
|
62
|
-
model: "mock-model",
|
|
63
|
-
permissions: { mode: "workspace" },
|
|
64
|
-
apiKeys: {},
|
|
65
|
-
sandbox: { enabled: false, backend: "native" },
|
|
66
|
-
timeouts: { toolExecutionTimeoutSec: 30, permissionTimeoutSec: 5 },
|
|
67
|
-
skills: { load: { extraDirs: [] } },
|
|
68
|
-
secretDetection: {
|
|
69
|
-
enabled: false,
|
|
70
|
-
allowOneTimeSend: false,
|
|
71
|
-
customPatterns: [],
|
|
72
|
-
entropyThreshold: 3.5,
|
|
73
|
-
},
|
|
74
|
-
contextWindow: {
|
|
75
|
-
enabled: true,
|
|
76
|
-
maxInputTokens: 180000,
|
|
77
|
-
targetBudgetRatio: 0.30,
|
|
78
|
-
compactThreshold: 0.8, summaryBudgetRatio: 0.05,
|
|
79
|
-
},
|
|
80
|
-
assistantFeatureFlagValues: {},
|
|
81
|
-
}),
|
|
82
|
-
loadConfig: () => ({}),
|
|
83
|
-
loadRawConfig: () => ({}),
|
|
84
|
-
saveConfig: () => {},
|
|
85
|
-
saveRawConfig: () => {},
|
|
86
|
-
invalidateConfigCache: () => {},
|
|
87
|
-
applyNestedDefaults: (config: unknown) => config,
|
|
88
|
-
getNestedValue: () => undefined,
|
|
89
|
-
setNestedValue: () => {},
|
|
90
|
-
syncConfigToLockfile: () => {},
|
|
91
|
-
API_KEY_PROVIDERS: [],
|
|
92
|
-
}));
|
|
93
|
-
|
|
94
|
-
const { ToolExecutor } = await import("../tools/executor.js");
|
|
95
|
-
const { ComputerUseSession } =
|
|
96
|
-
await import("../daemon/computer-use-session.js");
|
|
97
|
-
|
|
98
|
-
const originalExecute = ToolExecutor.prototype.execute;
|
|
99
|
-
|
|
100
|
-
describe("ComputerUseSession working directory", () => {
|
|
101
|
-
beforeEach(() => {
|
|
102
|
-
capturedWorkingDir = undefined;
|
|
103
|
-
ToolExecutor.prototype.execute = async function (
|
|
104
|
-
_name: string,
|
|
105
|
-
_input: Record<string, unknown>,
|
|
106
|
-
context: { workingDir: string },
|
|
107
|
-
) {
|
|
108
|
-
capturedWorkingDir = context.workingDir;
|
|
109
|
-
return { content: "ok", isError: false };
|
|
110
|
-
} as typeof ToolExecutor.prototype.execute;
|
|
111
|
-
});
|
|
112
|
-
|
|
113
|
-
afterEach(() => {
|
|
114
|
-
ToolExecutor.prototype.execute = originalExecute;
|
|
115
|
-
});
|
|
116
|
-
|
|
117
|
-
test("uses sandbox working directory for tool execution context", async () => {
|
|
118
|
-
let providerCalls = 0;
|
|
119
|
-
const provider: Provider = {
|
|
120
|
-
name: "mock-provider",
|
|
121
|
-
async sendMessage() {
|
|
122
|
-
const calls = providerCalls++;
|
|
123
|
-
if (calls === 0) {
|
|
124
|
-
return {
|
|
125
|
-
content: [
|
|
126
|
-
{
|
|
127
|
-
type: "tool_use",
|
|
128
|
-
id: "toolu_1",
|
|
129
|
-
name: "computer_use_click",
|
|
130
|
-
input: { element_id: 1 },
|
|
131
|
-
},
|
|
132
|
-
],
|
|
133
|
-
model: "mock-model",
|
|
134
|
-
usage: { inputTokens: 1, outputTokens: 1 },
|
|
135
|
-
stopReason: "tool_use",
|
|
136
|
-
};
|
|
137
|
-
}
|
|
138
|
-
return {
|
|
139
|
-
content: [{ type: "text", text: "unused" }],
|
|
140
|
-
model: "mock-model",
|
|
141
|
-
usage: { inputTokens: 1, outputTokens: 1 },
|
|
142
|
-
stopReason: "end_turn",
|
|
143
|
-
};
|
|
144
|
-
},
|
|
145
|
-
};
|
|
146
|
-
|
|
147
|
-
const session = new ComputerUseSession(
|
|
148
|
-
"cu-sandbox-1",
|
|
149
|
-
"test task",
|
|
150
|
-
1440,
|
|
151
|
-
900,
|
|
152
|
-
provider,
|
|
153
|
-
() => {},
|
|
154
|
-
);
|
|
155
|
-
|
|
156
|
-
const observation: CuObservation = {
|
|
157
|
-
type: "cu_observation",
|
|
158
|
-
sessionId: "cu-sandbox-1",
|
|
159
|
-
axTree: 'Window "Test" [1]',
|
|
160
|
-
};
|
|
161
|
-
|
|
162
|
-
await session.handleObservation(observation);
|
|
163
|
-
|
|
164
|
-
expect(capturedWorkingDir).toBe("/tmp/workspace");
|
|
165
|
-
});
|
|
166
|
-
});
|
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
import { afterAll, describe, expect, test } from "bun:test";
|
|
2
|
-
|
|
3
|
-
import { buildToolDefinitions } from "../daemon/session-tool-setup.js";
|
|
4
|
-
import {
|
|
5
|
-
__resetRegistryForTesting,
|
|
6
|
-
getAllToolDefinitions,
|
|
7
|
-
getAllTools,
|
|
8
|
-
getTool,
|
|
9
|
-
initializeTools,
|
|
10
|
-
} from "../tools/registry.js";
|
|
11
|
-
import {
|
|
12
|
-
assertComputerUseToolsAbsent,
|
|
13
|
-
COMPUTER_USE_TOOL_NAMES,
|
|
14
|
-
} from "./test-support/computer-use-skill-harness.js";
|
|
15
|
-
|
|
16
|
-
afterAll(() => {
|
|
17
|
-
__resetRegistryForTesting();
|
|
18
|
-
});
|
|
19
|
-
|
|
20
|
-
describe("computer-use skill baseline: registry tool surfaces", () => {
|
|
21
|
-
test("no computer_use_* action tools are registered after initializeTools() (migrated to skill)", async () => {
|
|
22
|
-
await initializeTools();
|
|
23
|
-
|
|
24
|
-
for (const name of COMPUTER_USE_TOOL_NAMES) {
|
|
25
|
-
const tool = getTool(name);
|
|
26
|
-
expect(tool).toBeUndefined();
|
|
27
|
-
}
|
|
28
|
-
});
|
|
29
|
-
|
|
30
|
-
test("computer_use_request_control is registered in core after initializeTools()", async () => {
|
|
31
|
-
await initializeTools();
|
|
32
|
-
|
|
33
|
-
const tool = getTool("computer_use_request_control");
|
|
34
|
-
expect(tool).toBeDefined();
|
|
35
|
-
});
|
|
36
|
-
|
|
37
|
-
test("getAllToolDefinitions() excludes all computer_use_* tools (proxy exclusion)", async () => {
|
|
38
|
-
await initializeTools();
|
|
39
|
-
|
|
40
|
-
const defNames = getAllToolDefinitions().map((d) => d.name);
|
|
41
|
-
assertComputerUseToolsAbsent(defNames);
|
|
42
|
-
});
|
|
43
|
-
|
|
44
|
-
test("getAllToolDefinitions() excludes computer_use_request_control (proxy exclusion)", async () => {
|
|
45
|
-
await initializeTools();
|
|
46
|
-
|
|
47
|
-
const defNames = getAllToolDefinitions().map((d) => d.name);
|
|
48
|
-
expect(defNames).not.toContain("computer_use_request_control");
|
|
49
|
-
});
|
|
50
|
-
|
|
51
|
-
test("buildToolDefinitions() includes computer_use_request_control for text sessions", async () => {
|
|
52
|
-
await initializeTools();
|
|
53
|
-
|
|
54
|
-
const defNames = buildToolDefinitions().map((d) => d.name);
|
|
55
|
-
expect(defNames).toContain("computer_use_request_control");
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
test("buildToolDefinitions() excludes all computer_use_* action tools from text sessions", async () => {
|
|
59
|
-
await initializeTools();
|
|
60
|
-
|
|
61
|
-
const defNames = buildToolDefinitions().map((d) => d.name);
|
|
62
|
-
// The only computer_use_* tool in text sessions is the escalation tool
|
|
63
|
-
const cuActionTools = defNames.filter(
|
|
64
|
-
(n) =>
|
|
65
|
-
n.startsWith("computer_use_") && n !== "computer_use_request_control",
|
|
66
|
-
);
|
|
67
|
-
expect(cuActionTools).toHaveLength(0);
|
|
68
|
-
});
|
|
69
|
-
|
|
70
|
-
test("post-cutover count: 1 computer_use_* tool in core registry (escalation only)", async () => {
|
|
71
|
-
await initializeTools();
|
|
72
|
-
|
|
73
|
-
const allTools = getAllTools();
|
|
74
|
-
const cuTools = allTools.filter((t) => t.name.startsWith("computer_use_"));
|
|
75
|
-
expect(cuTools).toHaveLength(1);
|
|
76
|
-
expect(cuTools[0].name).toBe("computer_use_request_control");
|
|
77
|
-
});
|
|
78
|
-
});
|
|
@@ -1,105 +0,0 @@
|
|
|
1
|
-
import { join } from "node:path";
|
|
2
|
-
import { beforeAll, describe, expect, test } from "bun:test";
|
|
3
|
-
|
|
4
|
-
import { getBundledSkillsDir } from "../config/skills.js";
|
|
5
|
-
import { buildToolDefinitions } from "../daemon/session-tool-setup.js";
|
|
6
|
-
import { parseToolManifestFile } from "../skills/tool-manifest.js";
|
|
7
|
-
import {
|
|
8
|
-
__resetRegistryForTesting,
|
|
9
|
-
getAllToolDefinitions,
|
|
10
|
-
getAllTools,
|
|
11
|
-
getTool,
|
|
12
|
-
initializeTools,
|
|
13
|
-
} from "../tools/registry.js";
|
|
14
|
-
import {
|
|
15
|
-
COMPUTER_USE_TOOL_COUNT,
|
|
16
|
-
COMPUTER_USE_TOOL_NAMES,
|
|
17
|
-
} from "./test-support/computer-use-skill-harness.js";
|
|
18
|
-
|
|
19
|
-
beforeAll(async () => {
|
|
20
|
-
__resetRegistryForTesting();
|
|
21
|
-
await initializeTools();
|
|
22
|
-
});
|
|
23
|
-
|
|
24
|
-
describe("computer-use skill end-state", () => {
|
|
25
|
-
// ── Core Registry ──────────────────────────────────────────────────
|
|
26
|
-
|
|
27
|
-
test("core registry contains 1 computer_use_* tool (escalation only)", () => {
|
|
28
|
-
const allTools = getAllTools();
|
|
29
|
-
const cuTools = allTools.filter((t) => t.name.startsWith("computer_use_"));
|
|
30
|
-
expect(cuTools).toHaveLength(1);
|
|
31
|
-
expect(cuTools[0].name).toBe("computer_use_request_control");
|
|
32
|
-
});
|
|
33
|
-
|
|
34
|
-
test("computer_use_request_control is resolvable from core registry", () => {
|
|
35
|
-
expect(getTool("computer_use_request_control")).toBeDefined();
|
|
36
|
-
});
|
|
37
|
-
|
|
38
|
-
test("no action tool from COMPUTER_USE_TOOL_NAMES is resolvable from core registry", () => {
|
|
39
|
-
for (const name of COMPUTER_USE_TOOL_NAMES) {
|
|
40
|
-
expect(getTool(name)).toBeUndefined();
|
|
41
|
-
}
|
|
42
|
-
});
|
|
43
|
-
|
|
44
|
-
// ── getAllToolDefinitions (excludes proxy & skill tools) ──────────
|
|
45
|
-
|
|
46
|
-
test("getAllToolDefinitions() excludes computer_use_* tools", () => {
|
|
47
|
-
const defs = getAllToolDefinitions();
|
|
48
|
-
const cuDefs = defs.filter((d) => d.name.startsWith("computer_use_"));
|
|
49
|
-
expect(cuDefs).toHaveLength(0);
|
|
50
|
-
});
|
|
51
|
-
|
|
52
|
-
test("getAllToolDefinitions() excludes computer_use_request_control (proxy exclusion)", () => {
|
|
53
|
-
const defs = getAllToolDefinitions();
|
|
54
|
-
const found = defs.find((d) => d.name === "computer_use_request_control");
|
|
55
|
-
expect(found).toBeUndefined();
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
// ── buildToolDefinitions (text session tool set) ─────────────────
|
|
59
|
-
|
|
60
|
-
test("buildToolDefinitions() includes computer_use_request_control", () => {
|
|
61
|
-
const defs = buildToolDefinitions();
|
|
62
|
-
const found = defs.find((d) => d.name === "computer_use_request_control");
|
|
63
|
-
expect(found).toBeDefined();
|
|
64
|
-
});
|
|
65
|
-
|
|
66
|
-
test("buildToolDefinitions() excludes computer_use_* action tools", () => {
|
|
67
|
-
const defs = buildToolDefinitions();
|
|
68
|
-
const cuDefs = defs.filter(
|
|
69
|
-
(d) =>
|
|
70
|
-
d.name.startsWith("computer_use_") &&
|
|
71
|
-
d.name !== "computer_use_request_control",
|
|
72
|
-
);
|
|
73
|
-
expect(cuDefs).toHaveLength(0);
|
|
74
|
-
});
|
|
75
|
-
|
|
76
|
-
// ── Bundled Skill Catalog ────────────────────────────────────────
|
|
77
|
-
|
|
78
|
-
test(
|
|
79
|
-
"computer-use skill has exactly " +
|
|
80
|
-
COMPUTER_USE_TOOL_COUNT +
|
|
81
|
-
" tools in TOOLS.json",
|
|
82
|
-
() => {
|
|
83
|
-
const manifestPath = join(
|
|
84
|
-
getBundledSkillsDir(),
|
|
85
|
-
"computer-use",
|
|
86
|
-
"TOOLS.json",
|
|
87
|
-
);
|
|
88
|
-
const manifest = parseToolManifestFile(manifestPath);
|
|
89
|
-
expect(manifest.tools).toHaveLength(COMPUTER_USE_TOOL_COUNT);
|
|
90
|
-
},
|
|
91
|
-
);
|
|
92
|
-
|
|
93
|
-
test("bundled skill tool names match expected computer_use_* names", () => {
|
|
94
|
-
const manifestPath = join(
|
|
95
|
-
getBundledSkillsDir(),
|
|
96
|
-
"computer-use",
|
|
97
|
-
"TOOLS.json",
|
|
98
|
-
);
|
|
99
|
-
const manifest = parseToolManifestFile(manifestPath);
|
|
100
|
-
const toolNames = new Set(manifest.tools.map((t) => t.name));
|
|
101
|
-
for (const name of COMPUTER_USE_TOOL_NAMES) {
|
|
102
|
-
expect(toolNames.has(name)).toBe(true);
|
|
103
|
-
}
|
|
104
|
-
});
|
|
105
|
-
});
|
|
@@ -1,249 +0,0 @@
|
|
|
1
|
-
import { beforeAll, describe, expect, mock, test } from "bun:test";
|
|
2
|
-
|
|
3
|
-
// Mock config before importing modules that depend on it.
|
|
4
|
-
mock.module("../config/loader.js", () => ({
|
|
5
|
-
getConfig: () => ({
|
|
6
|
-
ui: {},
|
|
7
|
-
|
|
8
|
-
provider: "mock-provider",
|
|
9
|
-
permissions: { mode: "workspace" },
|
|
10
|
-
apiKeys: {},
|
|
11
|
-
sandbox: { enabled: false },
|
|
12
|
-
timeouts: { toolExecutionTimeoutSec: 30, permissionTimeoutSec: 5 },
|
|
13
|
-
skills: { load: { extraDirs: [] } },
|
|
14
|
-
secretDetection: { enabled: false },
|
|
15
|
-
contextWindow: {
|
|
16
|
-
enabled: true,
|
|
17
|
-
maxInputTokens: 180000,
|
|
18
|
-
targetBudgetRatio: 0.30,
|
|
19
|
-
compactThreshold: 0.8, summaryBudgetRatio: 0.05,
|
|
20
|
-
},
|
|
21
|
-
}),
|
|
22
|
-
invalidateConfigCache: () => {},
|
|
23
|
-
}));
|
|
24
|
-
|
|
25
|
-
import { ComputerUseSession } from "../daemon/computer-use-session.js";
|
|
26
|
-
import type { CuObservation } from "../daemon/message-protocol.js";
|
|
27
|
-
import type { Provider, ProviderResponse } from "../providers/types.js";
|
|
28
|
-
import {
|
|
29
|
-
__resetRegistryForTesting,
|
|
30
|
-
getAllTools,
|
|
31
|
-
getSkillRefCount,
|
|
32
|
-
initializeTools,
|
|
33
|
-
} from "../tools/registry.js";
|
|
34
|
-
|
|
35
|
-
function createProvider(responses: ProviderResponse[]): Provider {
|
|
36
|
-
let calls = 0;
|
|
37
|
-
return {
|
|
38
|
-
name: "mock",
|
|
39
|
-
async sendMessage() {
|
|
40
|
-
const response = responses[calls] ?? responses[responses.length - 1];
|
|
41
|
-
calls++;
|
|
42
|
-
return response;
|
|
43
|
-
},
|
|
44
|
-
};
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
const doneResponse: ProviderResponse = {
|
|
48
|
-
content: [
|
|
49
|
-
{
|
|
50
|
-
type: "tool_use",
|
|
51
|
-
id: "tu-cleanup",
|
|
52
|
-
name: "computer_use_done",
|
|
53
|
-
input: { summary: "Done" },
|
|
54
|
-
},
|
|
55
|
-
],
|
|
56
|
-
model: "mock-model",
|
|
57
|
-
usage: { inputTokens: 10, outputTokens: 5 },
|
|
58
|
-
stopReason: "tool_use",
|
|
59
|
-
};
|
|
60
|
-
|
|
61
|
-
const observation: CuObservation = {
|
|
62
|
-
type: "cu_observation",
|
|
63
|
-
sessionId: "cleanup-test",
|
|
64
|
-
axTree: 'Window "Test" [1]',
|
|
65
|
-
};
|
|
66
|
-
|
|
67
|
-
describe("CU session skill tool lifecycle cleanup", () => {
|
|
68
|
-
beforeAll(async () => {
|
|
69
|
-
__resetRegistryForTesting();
|
|
70
|
-
await initializeTools();
|
|
71
|
-
});
|
|
72
|
-
|
|
73
|
-
test("computer-use skill refcount is 0 after session completes via computer_use_done", async () => {
|
|
74
|
-
const provider = createProvider([doneResponse]);
|
|
75
|
-
const session = new ComputerUseSession(
|
|
76
|
-
"cleanup-done",
|
|
77
|
-
"test cleanup",
|
|
78
|
-
1440,
|
|
79
|
-
900,
|
|
80
|
-
provider,
|
|
81
|
-
() => {},
|
|
82
|
-
"computer_use",
|
|
83
|
-
);
|
|
84
|
-
|
|
85
|
-
expect(getSkillRefCount("computer-use")).toBe(0);
|
|
86
|
-
|
|
87
|
-
await session.handleObservation({
|
|
88
|
-
...observation,
|
|
89
|
-
sessionId: "cleanup-done",
|
|
90
|
-
});
|
|
91
|
-
|
|
92
|
-
expect(session.getState()).toBe("complete");
|
|
93
|
-
expect(getSkillRefCount("computer-use")).toBe(0);
|
|
94
|
-
});
|
|
95
|
-
|
|
96
|
-
test("computer-use skill refcount is 0 after session is aborted", async () => {
|
|
97
|
-
// Use a provider that hangs until the abort signal fires, keeping the
|
|
98
|
-
// session active long enough to abort after skill projection has occurred.
|
|
99
|
-
const hangingProvider: Provider = {
|
|
100
|
-
name: "mock",
|
|
101
|
-
sendMessage: (_msgs, _tools, _sys, opts) =>
|
|
102
|
-
new Promise<ProviderResponse>((_, reject) => {
|
|
103
|
-
if (opts?.signal?.aborted) {
|
|
104
|
-
reject(new DOMException("Aborted", "AbortError"));
|
|
105
|
-
return;
|
|
106
|
-
}
|
|
107
|
-
opts?.signal?.addEventListener(
|
|
108
|
-
"abort",
|
|
109
|
-
() => reject(new DOMException("Aborted", "AbortError")),
|
|
110
|
-
{ once: true },
|
|
111
|
-
);
|
|
112
|
-
}),
|
|
113
|
-
};
|
|
114
|
-
|
|
115
|
-
const session = new ComputerUseSession(
|
|
116
|
-
"cleanup-abort",
|
|
117
|
-
"test abort cleanup",
|
|
118
|
-
1440,
|
|
119
|
-
900,
|
|
120
|
-
hangingProvider,
|
|
121
|
-
() => {},
|
|
122
|
-
"computer_use",
|
|
123
|
-
);
|
|
124
|
-
|
|
125
|
-
expect(getSkillRefCount("computer-use")).toBe(0);
|
|
126
|
-
|
|
127
|
-
// Start the session (don't await — it will hang on the provider call).
|
|
128
|
-
// Skill projection happens synchronously at the start of runAgentLoop,
|
|
129
|
-
// so by the time sendMessage is called the refcount has been incremented.
|
|
130
|
-
const sessionPromise = session.handleObservation({
|
|
131
|
-
...observation,
|
|
132
|
-
sessionId: "cleanup-abort",
|
|
133
|
-
});
|
|
134
|
-
|
|
135
|
-
// Yield to let runAgentLoop start and reach the provider call
|
|
136
|
-
await new Promise((r) => setTimeout(r, 50));
|
|
137
|
-
|
|
138
|
-
session.abort();
|
|
139
|
-
|
|
140
|
-
// Let the session finish its cleanup
|
|
141
|
-
await sessionPromise;
|
|
142
|
-
|
|
143
|
-
expect(session.getState()).toBe("error");
|
|
144
|
-
expect(getSkillRefCount("computer-use")).toBe(0);
|
|
145
|
-
});
|
|
146
|
-
|
|
147
|
-
test("computer-use skill refcount is 0 after session completes via computer_use_respond", async () => {
|
|
148
|
-
const provider = createProvider([
|
|
149
|
-
{
|
|
150
|
-
content: [
|
|
151
|
-
{
|
|
152
|
-
type: "tool_use",
|
|
153
|
-
id: "tu-respond-cleanup",
|
|
154
|
-
name: "computer_use_respond",
|
|
155
|
-
input: { answer: "Test answer", reasoning: "Test reasoning" },
|
|
156
|
-
},
|
|
157
|
-
],
|
|
158
|
-
model: "mock-model",
|
|
159
|
-
usage: { inputTokens: 10, outputTokens: 5 },
|
|
160
|
-
stopReason: "tool_use",
|
|
161
|
-
},
|
|
162
|
-
]);
|
|
163
|
-
|
|
164
|
-
const session = new ComputerUseSession(
|
|
165
|
-
"cleanup-respond",
|
|
166
|
-
"test respond cleanup",
|
|
167
|
-
1440,
|
|
168
|
-
900,
|
|
169
|
-
provider,
|
|
170
|
-
() => {},
|
|
171
|
-
"computer_use",
|
|
172
|
-
);
|
|
173
|
-
|
|
174
|
-
await session.handleObservation({
|
|
175
|
-
...observation,
|
|
176
|
-
sessionId: "cleanup-respond",
|
|
177
|
-
});
|
|
178
|
-
|
|
179
|
-
expect(session.getState()).toBe("complete");
|
|
180
|
-
expect(getSkillRefCount("computer-use")).toBe(0);
|
|
181
|
-
});
|
|
182
|
-
|
|
183
|
-
test("only escalation tool remains in registry after session cleanup", async () => {
|
|
184
|
-
const provider = createProvider([doneResponse]);
|
|
185
|
-
const session = new ComputerUseSession(
|
|
186
|
-
"cleanup-registry-check",
|
|
187
|
-
"test registry cleanup",
|
|
188
|
-
1440,
|
|
189
|
-
900,
|
|
190
|
-
provider,
|
|
191
|
-
() => {},
|
|
192
|
-
"computer_use",
|
|
193
|
-
);
|
|
194
|
-
|
|
195
|
-
await session.handleObservation({
|
|
196
|
-
...observation,
|
|
197
|
-
sessionId: "cleanup-registry-check",
|
|
198
|
-
});
|
|
199
|
-
|
|
200
|
-
expect(session.getState()).toBe("complete");
|
|
201
|
-
|
|
202
|
-
const allTools = getAllTools();
|
|
203
|
-
const cuTools = allTools.filter((t) => t.name.startsWith("computer_use_"));
|
|
204
|
-
expect(cuTools).toHaveLength(1);
|
|
205
|
-
expect(cuTools[0].name).toBe("computer_use_request_control");
|
|
206
|
-
});
|
|
207
|
-
|
|
208
|
-
test("multiple sequential CU sessions do not leak refcounts", async () => {
|
|
209
|
-
for (let i = 0; i < 3; i++) {
|
|
210
|
-
const provider = createProvider([doneResponse]);
|
|
211
|
-
const session = new ComputerUseSession(
|
|
212
|
-
`cleanup-sequential-${i}`,
|
|
213
|
-
"test sequential cleanup",
|
|
214
|
-
1440,
|
|
215
|
-
900,
|
|
216
|
-
provider,
|
|
217
|
-
() => {},
|
|
218
|
-
"computer_use",
|
|
219
|
-
);
|
|
220
|
-
|
|
221
|
-
await session.handleObservation({
|
|
222
|
-
...observation,
|
|
223
|
-
sessionId: `cleanup-sequential-${i}`,
|
|
224
|
-
});
|
|
225
|
-
expect(session.getState()).toBe("complete");
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
expect(getSkillRefCount("computer-use")).toBe(0);
|
|
229
|
-
|
|
230
|
-
const allTools = getAllTools();
|
|
231
|
-
const cuTools = allTools.filter((t) => t.name.startsWith("computer_use_"));
|
|
232
|
-
expect(cuTools).toHaveLength(1);
|
|
233
|
-
expect(cuTools[0].name).toBe("computer_use_request_control");
|
|
234
|
-
});
|
|
235
|
-
|
|
236
|
-
// Cross-suite regression: after CU sessions complete, core registry invariants hold
|
|
237
|
-
test("core registry has 1 computer_use_* tool after CU session lifecycle (escalation only)", () => {
|
|
238
|
-
const allTools = getAllTools();
|
|
239
|
-
const cuTools = allTools.filter((t) => t.name.startsWith("computer_use_"));
|
|
240
|
-
expect(cuTools).toHaveLength(1);
|
|
241
|
-
expect(cuTools[0].name).toBe("computer_use_request_control");
|
|
242
|
-
});
|
|
243
|
-
|
|
244
|
-
test("computer_use_request_control is in core registry after CU session lifecycle", async () => {
|
|
245
|
-
const { getTool } = await import("../tools/registry.js");
|
|
246
|
-
const tool = getTool("computer_use_request_control");
|
|
247
|
-
expect(tool).toBeDefined();
|
|
248
|
-
});
|
|
249
|
-
});
|