@vellumai/assistant 0.4.48 → 0.4.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. package/ARCHITECTURE.md +2 -2
  2. package/README.md +2 -23
  3. package/docs/architecture/integrations.md +45 -41
  4. package/docs/architecture/keychain-broker.md +3 -3
  5. package/docs/runbook-trusted-contacts.md +3 -8
  6. package/hook-templates/debug-prompt-logger/hook.json +1 -1
  7. package/hook-templates/debug-prompt-logger/run.sh +1 -3
  8. package/package.json +1 -1
  9. package/src/__tests__/actor-token-service.test.ts +0 -1
  10. package/src/__tests__/anthropic-provider.test.ts +156 -0
  11. package/src/__tests__/approval-cascade.test.ts +810 -0
  12. package/src/__tests__/approval-primitive.test.ts +0 -1
  13. package/src/__tests__/approval-routes-http.test.ts +2 -0
  14. package/src/__tests__/assistant-attachments.test.ts +12 -34
  15. package/src/__tests__/assistant-feature-flag-guardrails.test.ts +76 -0
  16. package/src/__tests__/assistant-feature-flags-integration.test.ts +0 -1
  17. package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +2 -2
  18. package/src/__tests__/channel-guardian.test.ts +0 -2
  19. package/src/__tests__/channel-readiness-routes.test.ts +15 -6
  20. package/src/__tests__/channel-readiness-service.test.ts +10 -9
  21. package/src/__tests__/checker.test.ts +9 -29
  22. package/src/__tests__/computer-use-skill-manifest-regression.test.ts +1 -1
  23. package/src/__tests__/computer-use-tools.test.ts +2 -19
  24. package/src/__tests__/config-watcher.test.ts +0 -1
  25. package/src/__tests__/confirmation-request-guardian-bridge.test.ts +0 -1
  26. package/src/__tests__/context-image-dimensions.test.ts +332 -0
  27. package/src/__tests__/context-token-estimator.test.ts +196 -13
  28. package/src/__tests__/conversation-attention-store.test.ts +0 -1
  29. package/src/__tests__/conversation-attention-telegram.test.ts +0 -1
  30. package/src/__tests__/conversation-routes-guardian-reply.test.ts +144 -0
  31. package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
  32. package/src/__tests__/credential-metadata-store.test.ts +64 -73
  33. package/src/__tests__/credential-security-invariants.test.ts +13 -7
  34. package/src/__tests__/credential-vault-unit.test.ts +280 -49
  35. package/src/__tests__/credential-vault.test.ts +138 -16
  36. package/src/__tests__/credentials-cli.test.ts +71 -0
  37. package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +0 -1
  38. package/src/__tests__/ephemeral-permissions.test.ts +3 -3
  39. package/src/__tests__/gateway-only-guard.test.ts +0 -1
  40. package/src/__tests__/guardian-action-grant-mint-consume.test.ts +0 -1
  41. package/src/__tests__/guardian-decision-primitive-canonical.test.ts +0 -1
  42. package/src/__tests__/guardian-routing-invariants.test.ts +0 -1
  43. package/src/__tests__/guardian-verification-voice-binding.test.ts +0 -1
  44. package/src/__tests__/handlers-user-message-approval-consumption.test.ts +0 -39
  45. package/src/__tests__/heartbeat-service.test.ts +0 -1
  46. package/src/__tests__/host-cu-proxy.test.ts +629 -0
  47. package/src/__tests__/host-shell-tool.test.ts +27 -15
  48. package/src/__tests__/http-user-message-parity.test.ts +1 -0
  49. package/src/__tests__/ingress-url-consistency.test.ts +14 -21
  50. package/src/__tests__/integration-status.test.ts +32 -51
  51. package/src/__tests__/intent-routing.test.ts +0 -1
  52. package/src/__tests__/invite-routes-http.test.ts +10 -9
  53. package/src/__tests__/keychain-broker-client.test.ts +11 -43
  54. package/src/__tests__/notification-routing-intent.test.ts +0 -1
  55. package/src/__tests__/oauth-cli.test.ts +373 -14
  56. package/src/__tests__/oauth-provider-profiles.test.ts +9 -9
  57. package/src/__tests__/oauth-scope-policy.test.ts +4 -6
  58. package/src/__tests__/oauth-store.test.ts +756 -0
  59. package/src/__tests__/onboarding-starter-tasks.test.ts +0 -1
  60. package/src/__tests__/provider-error-scenarios.test.ts +0 -1
  61. package/src/__tests__/provider-streaming.benchmark.test.ts +0 -1
  62. package/src/__tests__/public-ingress-urls.test.ts +15 -21
  63. package/src/__tests__/recording-handler.test.ts +3 -4
  64. package/src/__tests__/registry.test.ts +2 -2
  65. package/src/__tests__/runtime-events-sse.test.ts +55 -7
  66. package/src/__tests__/schedule-store.test.ts +0 -1
  67. package/src/__tests__/scheduler-recurrence.test.ts +0 -1
  68. package/src/__tests__/scoped-approval-grants.test.ts +0 -1
  69. package/src/__tests__/scoped-grant-security-matrix.test.ts +0 -1
  70. package/src/__tests__/secret-ingress-handler.test.ts +0 -1
  71. package/src/__tests__/send-endpoint-busy.test.ts +21 -6
  72. package/src/__tests__/sequence-store.test.ts +0 -1
  73. package/src/__tests__/session-init.benchmark.test.ts +4 -5
  74. package/src/__tests__/skill-include-graph.test.ts +66 -0
  75. package/src/__tests__/skill-load-feature-flag.test.ts +0 -1
  76. package/src/__tests__/skill-load-tool.test.ts +149 -1
  77. package/src/__tests__/skill-projection-feature-flag.test.ts +0 -1
  78. package/src/__tests__/skills-uninstall.test.ts +1 -1
  79. package/src/__tests__/skills.test.ts +3 -3
  80. package/src/__tests__/slack-channel-config.test.ts +67 -3
  81. package/src/__tests__/slack-share-routes.test.ts +17 -19
  82. package/src/__tests__/system-prompt.test.ts +0 -1
  83. package/src/__tests__/telegram-invite-adapter.test.ts +18 -22
  84. package/src/__tests__/terminal-tools.test.ts +4 -3
  85. package/src/__tests__/test-support/computer-use-skill-harness.ts +3 -2
  86. package/src/__tests__/tool-approval-handler.test.ts +0 -1
  87. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -1
  88. package/src/__tests__/tool-executor-lifecycle-events.test.ts +0 -1
  89. package/src/__tests__/tool-executor-shell-integration.test.ts +0 -1
  90. package/src/__tests__/tool-executor.test.ts +0 -1
  91. package/src/__tests__/tool-grant-request-escalation.test.ts +0 -1
  92. package/src/__tests__/trust-store-pattern-matches.test.ts +29 -0
  93. package/src/__tests__/trust-store.test.ts +1 -22
  94. package/src/__tests__/trusted-contact-approval-notifier.test.ts +0 -1
  95. package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +0 -1
  96. package/src/__tests__/twilio-routes.test.ts +0 -16
  97. package/src/__tests__/verification-control-plane-policy.test.ts +0 -1
  98. package/src/__tests__/voice-scoped-grant-consumer.test.ts +0 -1
  99. package/src/agent/ax-tree-compaction.test.ts +235 -0
  100. package/src/agent/loop.ts +76 -130
  101. package/src/calls/call-domain.ts +1 -6
  102. package/src/calls/relay-server.ts +9 -13
  103. package/src/calls/twilio-config.ts +2 -7
  104. package/src/calls/twilio-routes.ts +1 -2
  105. package/src/calls/voice-ingress-preflight.ts +1 -1
  106. package/src/cli/commands/browser-relay.ts +18 -12
  107. package/src/cli/commands/completions.ts +0 -3
  108. package/src/cli/commands/credentials.ts +101 -15
  109. package/src/cli/commands/oauth/apps.ts +255 -0
  110. package/src/cli/commands/oauth/connections.ts +299 -0
  111. package/src/cli/commands/oauth/index.ts +52 -0
  112. package/src/cli/commands/oauth/providers.ts +242 -0
  113. package/src/cli/commands/skills.ts +4 -338
  114. package/src/cli/program.ts +1 -5
  115. package/src/cli/reference.ts +1 -3
  116. package/src/config/assistant-feature-flags.ts +0 -3
  117. package/src/config/bundled-skills/_shared/CLI_RETRIEVAL_PATTERN.md +1 -1
  118. package/src/config/bundled-skills/computer-use/SKILL.md +3 -6
  119. package/src/config/bundled-skills/computer-use/TOOLS.json +22 -4
  120. package/src/config/bundled-skills/google-calendar/calendar-client.ts +21 -16
  121. package/src/config/bundled-skills/messaging/tools/shared.ts +1 -4
  122. package/src/config/bundled-skills/settings/SKILL.md +1 -1
  123. package/src/config/bundled-skills/settings/TOOLS.json +2 -8
  124. package/src/config/bundled-skills/settings/tools/voice-config-update.ts +5 -33
  125. package/src/config/env-registry.ts +14 -83
  126. package/src/config/env.ts +11 -50
  127. package/src/config/feature-flag-registry.json +16 -16
  128. package/src/config/loader.ts +0 -6
  129. package/src/config/schema.ts +3 -1
  130. package/src/config/skills.ts +21 -2
  131. package/src/context/image-dimensions.ts +229 -0
  132. package/src/context/token-estimator.ts +75 -12
  133. package/src/context/window-manager.ts +49 -10
  134. package/src/daemon/assistant-attachments.ts +1 -13
  135. package/src/daemon/handlers/config-ingress.ts +8 -33
  136. package/src/daemon/handlers/config-slack-channel.ts +49 -46
  137. package/src/daemon/handlers/config-telegram.ts +32 -16
  138. package/src/daemon/handlers/sessions.ts +10 -24
  139. package/src/daemon/handlers/shared.ts +0 -130
  140. package/src/daemon/host-cu-proxy.ts +401 -0
  141. package/src/daemon/lifecycle.ts +36 -68
  142. package/src/daemon/message-protocol.ts +3 -0
  143. package/src/daemon/message-types/computer-use.ts +2 -119
  144. package/src/daemon/message-types/host-cu.ts +19 -0
  145. package/src/daemon/message-types/messages.ts +3 -0
  146. package/src/daemon/server.ts +14 -21
  147. package/src/daemon/session-agent-loop-handlers.ts +2 -0
  148. package/src/daemon/session-attachments.ts +1 -2
  149. package/src/daemon/session-slash.ts +1 -1
  150. package/src/daemon/session-surfaces.ts +40 -28
  151. package/src/daemon/session-tool-setup.ts +2 -9
  152. package/src/daemon/session.ts +138 -15
  153. package/src/daemon/tool-side-effects.ts +2 -8
  154. package/src/daemon/watch-handler.ts +2 -2
  155. package/src/events/tool-metrics-listener.ts +2 -2
  156. package/src/hooks/manager.ts +1 -4
  157. package/src/inbound/public-ingress-urls.ts +7 -7
  158. package/src/logfire.ts +16 -5
  159. package/src/memory/conversation-key-store.ts +21 -0
  160. package/src/memory/db-init.ts +4 -0
  161. package/src/memory/migrations/149-oauth-tables.ts +60 -0
  162. package/src/memory/migrations/index.ts +1 -0
  163. package/src/memory/schema/index.ts +1 -0
  164. package/src/memory/schema/oauth.ts +65 -0
  165. package/src/messaging/provider.ts +4 -4
  166. package/src/messaging/providers/gmail/client.ts +82 -2
  167. package/src/messaging/providers/gmail/people-client.ts +10 -10
  168. package/src/messaging/providers/telegram-bot/adapter.ts +17 -17
  169. package/src/messaging/providers/whatsapp/adapter.ts +11 -8
  170. package/src/messaging/registry.ts +2 -32
  171. package/src/notifications/copy-composer.ts +0 -5
  172. package/src/notifications/signal.ts +4 -5
  173. package/src/oauth/byo-connection.test.ts +126 -25
  174. package/src/oauth/byo-connection.ts +22 -6
  175. package/src/oauth/connect-orchestrator.ts +113 -57
  176. package/src/oauth/connect-types.ts +17 -23
  177. package/src/oauth/connection-resolver.ts +35 -11
  178. package/src/oauth/connection.ts +1 -1
  179. package/src/oauth/manual-token-connection.ts +104 -0
  180. package/src/oauth/oauth-store.ts +496 -0
  181. package/src/oauth/platform-connection.test.ts +29 -0
  182. package/src/oauth/platform-connection.ts +6 -5
  183. package/src/oauth/provider-behaviors.ts +124 -0
  184. package/src/oauth/scope-policy.ts +9 -2
  185. package/src/oauth/seed-providers.ts +161 -0
  186. package/src/oauth/token-persistence.ts +74 -78
  187. package/src/permissions/checker.ts +3 -3
  188. package/src/permissions/defaults.ts +0 -1
  189. package/src/permissions/prompter.ts +10 -1
  190. package/src/permissions/trust-store.ts +13 -0
  191. package/src/prompts/__tests__/build-cli-reference-section.test.ts +3 -1
  192. package/src/prompts/system-prompt.ts +28 -40
  193. package/src/providers/anthropic/client.ts +133 -24
  194. package/src/providers/retry.ts +1 -27
  195. package/src/runtime/auth/route-policy.ts +0 -3
  196. package/src/runtime/channel-reply-delivery.ts +0 -40
  197. package/src/runtime/gateway-client.ts +0 -7
  198. package/src/runtime/http-server.ts +8 -6
  199. package/src/runtime/http-types.ts +2 -2
  200. package/src/runtime/middleware/twilio-validation.ts +1 -11
  201. package/src/runtime/pending-interactions.ts +14 -12
  202. package/src/runtime/routes/channel-delivery-routes.ts +0 -1
  203. package/src/runtime/routes/conversation-routes.ts +73 -19
  204. package/src/runtime/routes/events-routes.ts +21 -11
  205. package/src/runtime/routes/host-cu-routes.ts +97 -0
  206. package/src/runtime/routes/inbound-stages/background-dispatch.ts +12 -111
  207. package/src/runtime/routes/integrations/slack/share.ts +6 -7
  208. package/src/runtime/routes/log-export-routes.ts +126 -8
  209. package/src/runtime/routes/settings-routes.ts +55 -48
  210. package/src/runtime/routes/surface-action-routes.ts +1 -1
  211. package/src/runtime/routes/watch-routes.ts +128 -0
  212. package/src/schedule/integration-status.ts +10 -9
  213. package/src/security/credential-key.ts +0 -156
  214. package/src/security/keychain-broker-client.ts +5 -6
  215. package/src/security/oauth2.ts +1 -1
  216. package/src/security/token-manager.ts +119 -46
  217. package/src/skills/catalog-install.ts +358 -0
  218. package/src/skills/include-graph.ts +32 -0
  219. package/src/telegram/bot-username.ts +2 -3
  220. package/src/tools/browser/network-recorder.ts +1 -1
  221. package/src/tools/browser/network-recording-types.ts +1 -1
  222. package/src/tools/computer-use/definitions.ts +46 -11
  223. package/src/tools/computer-use/registry.ts +4 -5
  224. package/src/tools/credentials/broker.ts +1 -2
  225. package/src/tools/credentials/metadata-store.ts +17 -121
  226. package/src/tools/credentials/vault.ts +94 -167
  227. package/src/tools/registry.ts +2 -7
  228. package/src/tools/skills/load.ts +62 -3
  229. package/src/tools/watch/watch-state.ts +0 -12
  230. package/src/util/logger.ts +7 -41
  231. package/src/util/platform.ts +9 -28
  232. package/src/watcher/providers/google-calendar.ts +2 -1
  233. package/src/__tests__/computer-use-session-compaction.test.ts +0 -143
  234. package/src/__tests__/computer-use-session-lifecycle.test.ts +0 -322
  235. package/src/__tests__/computer-use-session-working-dir.test.ts +0 -166
  236. package/src/__tests__/computer-use-skill-baseline.test.ts +0 -78
  237. package/src/__tests__/computer-use-skill-endstate.test.ts +0 -105
  238. package/src/__tests__/computer-use-skill-lifecycle-cleanup.test.ts +0 -249
  239. package/src/__tests__/ride-shotgun-handler.test.ts +0 -452
  240. package/src/cli/commands/dev.ts +0 -129
  241. package/src/cli/commands/map.ts +0 -391
  242. package/src/cli/commands/oauth.ts +0 -77
  243. package/src/config/bundled-skills/computer-use/tools/computer-use-request-control.ts +0 -16
  244. package/src/daemon/computer-use-session.ts +0 -1026
  245. package/src/daemon/ride-shotgun-handler.ts +0 -569
  246. package/src/oauth/provider-base-urls.ts +0 -21
  247. package/src/oauth/provider-profiles.ts +0 -192
  248. package/src/prompts/computer-use-prompt.ts +0 -98
  249. package/src/runtime/routes/computer-use-routes.ts +0 -641
  250. package/src/runtime/telegram-streaming-delivery.test.ts +0 -729
  251. package/src/runtime/telegram-streaming-delivery.ts +0 -393
  252. package/src/tools/computer-use/request-computer-control.ts +0 -56
@@ -8,13 +8,7 @@ import {
8
8
  import { homedir } from "node:os";
9
9
  import { join } from "node:path";
10
10
 
11
- import {
12
- getBaseDataDir,
13
- getDaemonIosPairing,
14
- getDaemonTcpEnabled,
15
- getDaemonTcpHost,
16
- getDaemonTcpPort,
17
- } from "../config/env-registry.js";
11
+ import { getBaseDataDir } from "../config/env-registry.js";
18
12
 
19
13
  export function isMacOS(): boolean {
20
14
  return process.platform === "darwin";
@@ -245,39 +239,30 @@ export function getInterfacesDir(): string {
245
239
 
246
240
  /**
247
241
  * Returns the TCP port the daemon should listen on for iOS clients.
248
- * Reads VELLUM_DAEMON_TCP_PORT env var; defaults to 8765.
242
+ * Hardcoded default: 8765.
249
243
  */
250
244
  export function getTCPPort(): number {
251
- return getDaemonTcpPort();
245
+ return 8765;
252
246
  }
253
247
 
254
248
  /**
255
249
  * Returns whether the daemon TCP listener should be enabled.
256
- * Resolution order (first match wins):
257
- * 1. VELLUM_DAEMON_TCP_ENABLED env var ('true'/'1' → on, 'false'/'0' → off)
258
- * 2. Presence of the flag file ~/.vellum/tcp-enabled (exists → on)
259
- * 3. Default: false
250
+ * Checks for the presence of the flag file ~/.vellum/tcp-enabled.
251
+ * Default: false.
260
252
  *
261
253
  * The flag-file check makes it easy to enable TCP in dev without restarting
262
254
  * the shell: `touch ~/.vellum/tcp-enabled && kill -USR1 <daemon-pid>`.
263
- * The macOS CLI (AssistantCli) also sets the env var for bundled-binary deployments.
264
255
  */
265
256
  export function isTCPEnabled(): boolean {
266
- const envValue = getDaemonTcpEnabled();
267
- if (envValue !== undefined) return envValue;
268
257
  return existsSync(join(getRootDir(), "tcp-enabled"));
269
258
  }
270
259
 
271
260
  /**
272
261
  * Returns the hostname/address for the TCP listener.
273
- * Resolution order (first match wins):
274
- * 1. VELLUM_DAEMON_TCP_HOST env var (explicit override)
275
- * 2. If iOS pairing is enabled: '0.0.0.0' (LAN-accessible)
276
- * 3. Default: '127.0.0.1' (localhost only)
262
+ * If iOS pairing is enabled (flag file): '0.0.0.0' (LAN-accessible).
263
+ * Default: '127.0.0.1' (localhost only).
277
264
  */
278
265
  export function getTCPHost(): string {
279
- const override = getDaemonTcpHost();
280
- if (override) return override;
281
266
  if (isIOSPairingEnabled()) return "0.0.0.0";
282
267
  return "127.0.0.1";
283
268
  }
@@ -288,17 +273,13 @@ export function getTCPHost(): string {
288
273
  * instead of 127.0.0.1 (localhost only), making the daemon reachable
289
274
  * from iOS devices on the same local network.
290
275
  *
291
- * Resolution order (first match wins):
292
- * 1. VELLUM_DAEMON_IOS_PAIRING env var ('true'/'1' → on, 'false'/'0' → off)
293
- * 2. Presence of the flag file ~/.vellum/ios-pairing-enabled (exists → on)
294
- * 3. Default: false
276
+ * Checks for the presence of the flag file ~/.vellum/ios-pairing-enabled.
277
+ * Default: false.
295
278
  *
296
279
  * This is separate from isTCPEnabled() — TCP can be enabled for localhost-only
297
280
  * access without exposing the daemon to the LAN.
298
281
  */
299
282
  export function isIOSPairingEnabled(): boolean {
300
- const envValue = getDaemonIosPairing();
301
- if (envValue !== undefined) return envValue;
302
283
  return existsSync(join(getRootDir(), "ios-pairing-enabled"));
303
284
  }
304
285
 
@@ -13,7 +13,8 @@ import {
13
13
  import type { CalendarEvent } from "../../config/bundled-skills/google-calendar/types.js";
14
14
  import type { OAuthConnection } from "../../oauth/connection.js";
15
15
  import { resolveOAuthConnection } from "../../oauth/connection-resolver.js";
16
- import { GOOGLE_CALENDAR_BASE_URL } from "../../oauth/provider-base-urls.js";
16
+
17
+ const GOOGLE_CALENDAR_BASE_URL = "https://www.googleapis.com/calendar/v3";
17
18
  import { getLogger } from "../../util/logger.js";
18
19
  import type {
19
20
  FetchResult,
@@ -1,143 +0,0 @@
1
- import { describe, expect, test } from "bun:test";
2
-
3
- import { ComputerUseSession } from "../daemon/computer-use-session.js";
4
- import type { Message } from "../providers/types.js";
5
-
6
- /**
7
- * Helper to create a user message with a tool_result block containing
8
- * an AX tree wrapped in markers.
9
- */
10
- function toolResultMsg(content: string): Message {
11
- return {
12
- role: "user",
13
- content: [
14
- {
15
- type: "tool_result",
16
- tool_use_id: "test-id",
17
- content,
18
- },
19
- ],
20
- };
21
- }
22
-
23
- describe("ComputerUseSession.escapeAxTreeContent", () => {
24
- test("escapes a literal closing tag in the content", () => {
25
- const input = "some text </ax-tree> more text";
26
- const escaped = ComputerUseSession.escapeAxTreeContent(input);
27
- expect(escaped).toBe("some text &lt;/ax-tree&gt; more text");
28
- });
29
-
30
- test("escapes multiple occurrences", () => {
31
- const input = "</ax-tree> hello </ax-tree>";
32
- const escaped = ComputerUseSession.escapeAxTreeContent(input);
33
- expect(escaped).toBe("&lt;/ax-tree&gt; hello &lt;/ax-tree&gt;");
34
- });
35
-
36
- test("is case-insensitive", () => {
37
- const input = "</AX-TREE> and </Ax-Tree>";
38
- const escaped = ComputerUseSession.escapeAxTreeContent(input);
39
- expect(escaped).toBe("&lt;/ax-tree&gt; and &lt;/ax-tree&gt;");
40
- });
41
-
42
- test("leaves content without closing tags unchanged", () => {
43
- const input = 'Window "My App" [1]\n Button "OK" [2]';
44
- expect(ComputerUseSession.escapeAxTreeContent(input)).toBe(input);
45
- });
46
- });
47
-
48
- describe("ComputerUseSession.compactHistory", () => {
49
- test("[experimental] strips old AX trees and keeps the most recent ones", () => {
50
- const messages: Message[] = [
51
- { role: "assistant", content: [{ type: "text", text: "thinking..." }] },
52
- toolResultMsg(
53
- '<ax-tree>CURRENT SCREEN STATE:\nWindow "App" [1]</ax-tree>',
54
- ),
55
- { role: "assistant", content: [{ type: "text", text: "action 1" }] },
56
- toolResultMsg(
57
- '<ax-tree>CURRENT SCREEN STATE:\nWindow "App" [2]</ax-tree>',
58
- ),
59
- { role: "assistant", content: [{ type: "text", text: "action 2" }] },
60
- toolResultMsg(
61
- '<ax-tree>CURRENT SCREEN STATE:\nWindow "App" [3]</ax-tree>',
62
- ),
63
- ];
64
-
65
- const compacted = ComputerUseSession.compactHistory(messages);
66
-
67
- // First AX tree (index 1) should be stripped
68
- const firstToolResult = compacted[1].content[0];
69
- expect(firstToolResult.type).toBe("tool_result");
70
- if (firstToolResult.type === "tool_result") {
71
- expect(firstToolResult.content).toContain("<ax_tree_omitted />");
72
- expect(firstToolResult.content).not.toContain("<ax-tree>");
73
- }
74
-
75
- // Last two AX trees should be preserved
76
- const secondToolResult = compacted[3].content[0];
77
- if (secondToolResult.type === "tool_result") {
78
- expect(secondToolResult.content).toContain("<ax-tree>");
79
- }
80
- const thirdToolResult = compacted[5].content[0];
81
- if (thirdToolResult.type === "tool_result") {
82
- expect(thirdToolResult.content).toContain("<ax-tree>");
83
- }
84
- });
85
-
86
- test("[experimental] handles AX tree content containing literal </ax-tree> (escaped)", () => {
87
- // Simulate content where the AX tree text includes an escaped closing tag,
88
- // e.g. user is viewing XML source code with "</ax-tree>" in it.
89
- const escapedContent =
90
- '<ax-tree>CURRENT SCREEN STATE:\nTextArea "editor" [1]\n ' +
91
- "Line: &lt;/ax-tree&gt; some xml\n</ax-tree>";
92
-
93
- const messages: Message[] = [
94
- { role: "assistant", content: [{ type: "text", text: "action 0" }] },
95
- toolResultMsg(escapedContent),
96
- { role: "assistant", content: [{ type: "text", text: "action 1" }] },
97
- toolResultMsg(escapedContent),
98
- { role: "assistant", content: [{ type: "text", text: "action 2" }] },
99
- toolResultMsg(
100
- '<ax-tree>CURRENT SCREEN STATE:\nWindow "App" [3]</ax-tree>',
101
- ),
102
- ];
103
-
104
- const compacted = ComputerUseSession.compactHistory(messages);
105
-
106
- // The first message with escaped content should be fully stripped
107
- const firstToolResult = compacted[1].content[0];
108
- if (firstToolResult.type === "tool_result") {
109
- expect(firstToolResult.content).not.toContain("<ax-tree>");
110
- expect(firstToolResult.content).toContain("<ax_tree_omitted />");
111
- }
112
- });
113
-
114
- test("regex fails on unescaped </ax-tree> inside content (demonstrating the bug)", () => {
115
- // This test demonstrates what happens WITHOUT escaping: the regex
116
- // only partially removes the AX tree block.
117
- const unescapedContent =
118
- '<ax-tree>CURRENT SCREEN STATE:\nTextArea "editor" [1]\n ' +
119
- "Line: </ax-tree> some xml leftover\n</ax-tree>";
120
-
121
- const messages: Message[] = [
122
- { role: "assistant", content: [{ type: "text", text: "action 0" }] },
123
- toolResultMsg(unescapedContent),
124
- { role: "assistant", content: [{ type: "text", text: "action 1" }] },
125
- toolResultMsg(unescapedContent),
126
- { role: "assistant", content: [{ type: "text", text: "action 2" }] },
127
- toolResultMsg(
128
- '<ax-tree>CURRENT SCREEN STATE:\nWindow "App" [3]</ax-tree>',
129
- ),
130
- ];
131
-
132
- const compacted = ComputerUseSession.compactHistory(messages);
133
-
134
- // Without escaping, the first tool result has leftover content after
135
- // the regex only matched up to the FIRST </ax-tree>.
136
- const firstToolResult = compacted[1].content[0];
137
- if (firstToolResult.type === "tool_result") {
138
- // The non-greedy regex stops at the first </ax-tree>, leaving
139
- // " some xml leftover\n</ax-tree>" behind.
140
- expect(firstToolResult.content).toContain("some xml leftover");
141
- }
142
- });
143
- });
@@ -1,322 +0,0 @@
1
- import { describe, expect, mock, test } from "bun:test";
2
-
3
- // Mock config before importing modules that depend on it.
4
- // The permissions mode must be 'workspace' so computer-use tools
5
- // go through normal workspace trust evaluation instead of prompting.
6
- mock.module("../config/loader.js", () => ({
7
- getConfig: () => ({
8
- ui: {},
9
-
10
- provider: "mock-provider",
11
- permissions: { mode: "workspace" },
12
- apiKeys: {},
13
- sandbox: { enabled: false },
14
- timeouts: { toolExecutionTimeoutSec: 30, permissionTimeoutSec: 5 },
15
- skills: { load: { extraDirs: [] } },
16
- secretDetection: { enabled: false },
17
- contextWindow: {
18
- enabled: true,
19
- maxInputTokens: 180000,
20
- targetBudgetRatio: 0.3,
21
- compactThreshold: 0.8,
22
- summaryBudgetRatio: 0.05,
23
- },
24
- }),
25
- invalidateConfigCache: () => {},
26
- }));
27
-
28
- import { ComputerUseSession } from "../daemon/computer-use-session.js";
29
- import type {
30
- CuObservation,
31
- ServerMessage,
32
- } from "../daemon/message-protocol.js";
33
- import type { Provider, ProviderResponse } from "../providers/types.js";
34
-
35
- function createProvider(responses: ProviderResponse[]): {
36
- provider: Provider;
37
- getCalls: () => number;
38
- } {
39
- let calls = 0;
40
- const provider: Provider = {
41
- name: "mock",
42
- async sendMessage() {
43
- const response = responses[calls] ?? responses[responses.length - 1];
44
- calls++;
45
- return response;
46
- },
47
- };
48
- return { provider, getCalls: () => calls };
49
- }
50
-
51
- describe("ComputerUseSession lifecycle", () => {
52
- test("stops provider loop immediately after terminal computer_use_done tool", async () => {
53
- const { provider, getCalls } = createProvider([
54
- {
55
- content: [
56
- {
57
- type: "tool_use",
58
- id: "tu-1",
59
- name: "computer_use_done",
60
- input: { summary: "Task finished" },
61
- },
62
- ],
63
- model: "mock-model",
64
- usage: { inputTokens: 10, outputTokens: 5 },
65
- stopReason: "tool_use",
66
- },
67
- {
68
- content: [{ type: "text", text: "This should never be requested" }],
69
- model: "mock-model",
70
- usage: { inputTokens: 10, outputTokens: 5 },
71
- stopReason: "end_turn",
72
- },
73
- ]);
74
-
75
- const sentMessages: ServerMessage[] = [];
76
- let terminalCalls = 0;
77
-
78
- const session = new ComputerUseSession(
79
- "cu-test-1",
80
- "test task",
81
- 1440,
82
- 900,
83
- provider,
84
- (msg) => {
85
- sentMessages.push(msg);
86
- },
87
- "computer_use",
88
- () => {
89
- terminalCalls++;
90
- },
91
- );
92
-
93
- const observation: CuObservation = {
94
- type: "cu_observation",
95
- sessionId: "cu-test-1",
96
- axTree: 'Window "Test" [1]',
97
- };
98
-
99
- await session.handleObservation(observation);
100
-
101
- // If computer_use_done does not abort the loop, we'd see an extra provider call.
102
- expect(getCalls()).toBe(1);
103
- expect(session.getState()).toBe("complete");
104
- expect(terminalCalls).toBe(1);
105
-
106
- const completes = sentMessages.filter(
107
- (msg): msg is Extract<ServerMessage, { type: "cu_complete" }> =>
108
- msg.type === "cu_complete",
109
- );
110
- expect(completes).toHaveLength(1);
111
- expect(completes[0].summary).toBe("Task finished");
112
- });
113
-
114
- test("notifies terminal callback only once on repeated abort calls", () => {
115
- const { provider } = createProvider([
116
- {
117
- content: [{ type: "text", text: "unused" }],
118
- model: "mock-model",
119
- usage: { inputTokens: 1, outputTokens: 1 },
120
- stopReason: "end_turn",
121
- },
122
- ]);
123
-
124
- let terminalCalls = 0;
125
- const session = new ComputerUseSession(
126
- "cu-test-2",
127
- "test task",
128
- 1440,
129
- 900,
130
- provider,
131
- () => {},
132
- "computer_use",
133
- () => {
134
- terminalCalls++;
135
- },
136
- );
137
-
138
- session.abort();
139
- session.abort();
140
-
141
- expect(terminalCalls).toBe(1);
142
- expect(session.getState()).toBe("error");
143
- });
144
-
145
- test("CU session passes exactly 10 computer_use_* tools to the agent loop", async () => {
146
- let capturedTools: string[] = [];
147
- const provider: Provider = {
148
- name: "mock",
149
- async sendMessage(_msgs, tools) {
150
- capturedTools = (tools ?? []).map((t) => t.name);
151
- return {
152
- content: [
153
- {
154
- type: "tool_use",
155
- id: "tu-capture",
156
- name: "computer_use_done",
157
- input: { summary: "Done" },
158
- },
159
- ],
160
- model: "mock-model",
161
- usage: { inputTokens: 10, outputTokens: 5 },
162
- stopReason: "tool_use",
163
- };
164
- },
165
- };
166
-
167
- const session = new ComputerUseSession(
168
- "cu-tool-capture",
169
- "capture tools",
170
- 1440,
171
- 900,
172
- provider,
173
- () => {},
174
- "computer_use",
175
- );
176
-
177
- await session.handleObservation({
178
- type: "cu_observation",
179
- sessionId: "cu-tool-capture",
180
- axTree: 'Window "Test" [1]',
181
- });
182
-
183
- const cuTools = capturedTools.filter((n) => n.startsWith("computer_use_"));
184
- expect(cuTools).toHaveLength(10);
185
-
186
- // Assert exact set of expected CU tool names
187
- const expectedCuTools = [
188
- "computer_use_click",
189
- "computer_use_type_text",
190
- "computer_use_key",
191
- "computer_use_scroll",
192
- "computer_use_drag",
193
- "computer_use_wait",
194
- "computer_use_open_app",
195
- "computer_use_run_applescript",
196
- "computer_use_done",
197
- "computer_use_respond",
198
- ];
199
- for (const name of expectedCuTools) {
200
- expect(cuTools).toContain(name);
201
- }
202
- });
203
-
204
- test("computer_use_respond is a terminal tool that completes the session", async () => {
205
- const { provider } = createProvider([
206
- {
207
- content: [
208
- {
209
- type: "tool_use",
210
- id: "tu-respond",
211
- name: "computer_use_respond",
212
- input: {
213
- answer: "The meeting is at 3pm",
214
- reasoning: "Found in calendar",
215
- },
216
- },
217
- ],
218
- model: "mock-model",
219
- usage: { inputTokens: 10, outputTokens: 5 },
220
- stopReason: "tool_use",
221
- },
222
- ]);
223
-
224
- const sentMessages: ServerMessage[] = [];
225
- const session = new ComputerUseSession(
226
- "cu-respond-test",
227
- "check my schedule",
228
- 1440,
229
- 900,
230
- provider,
231
- (msg) => {
232
- sentMessages.push(msg);
233
- },
234
- "computer_use",
235
- );
236
-
237
- await session.handleObservation({
238
- type: "cu_observation",
239
- sessionId: "cu-respond-test",
240
- axTree: 'Window "Calendar" [1]',
241
- });
242
-
243
- expect(session.getState()).toBe("complete");
244
- const completes = sentMessages.filter(
245
- (msg): msg is Extract<ServerMessage, { type: "cu_complete" }> =>
246
- msg.type === "cu_complete",
247
- );
248
- expect(completes).toHaveLength(1);
249
- expect(completes[0].summary).toBe("The meeting is at 3pm");
250
- expect(completes[0].isResponse).toBe(true);
251
- });
252
-
253
- test("default construction preactivates computer-use skill and provides 10 CU tools", async () => {
254
- let capturedTools: string[] = [];
255
- const provider: Provider = {
256
- name: "mock",
257
- async sendMessage(_msgs, tools) {
258
- capturedTools = (tools ?? []).map((t) => t.name);
259
- return {
260
- content: [
261
- {
262
- type: "tool_use",
263
- id: "tu-default",
264
- name: "computer_use_done",
265
- input: { summary: "Done" },
266
- },
267
- ],
268
- model: "mock-model",
269
- usage: { inputTokens: 10, outputTokens: 5 },
270
- stopReason: "tool_use",
271
- };
272
- },
273
- };
274
-
275
- // No preactivatedSkillIds passed — defaults to ['computer-use'] via skill projection
276
- const session = new ComputerUseSession(
277
- "cu-default-projection",
278
- "test default projection",
279
- 1440,
280
- 900,
281
- provider,
282
- () => {},
283
- "computer_use",
284
- undefined,
285
- );
286
-
287
- await session.handleObservation({
288
- type: "cu_observation",
289
- sessionId: "cu-default-projection",
290
- axTree: 'Window "Test" [1]',
291
- });
292
-
293
- const cuTools = capturedTools.filter((n) => n.startsWith("computer_use_"));
294
- expect(cuTools).toHaveLength(10);
295
- });
296
-
297
- test("constructor accepts preactivatedSkillIds parameter", () => {
298
- const { provider } = createProvider([
299
- {
300
- content: [{ type: "text", text: "unused" }],
301
- model: "mock-model",
302
- usage: { inputTokens: 1, outputTokens: 1 },
303
- stopReason: "end_turn",
304
- },
305
- ]);
306
-
307
- // Should not throw
308
- const session = new ComputerUseSession(
309
- "cu-preactivated",
310
- "test preactivated",
311
- 1440,
312
- 900,
313
- provider,
314
- () => {},
315
- "computer_use",
316
- undefined,
317
- ["computer-use"],
318
- );
319
-
320
- expect(session).toBeDefined();
321
- });
322
- });