@vellumai/assistant 0.4.46 → 0.4.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (382) hide show
  1. package/ARCHITECTURE.md +7 -7
  2. package/README.md +2 -23
  3. package/docs/architecture/integrations.md +45 -41
  4. package/docs/architecture/keychain-broker.md +3 -3
  5. package/docs/architecture/security.md +5 -5
  6. package/docs/runbook-trusted-contacts.md +3 -8
  7. package/hook-templates/debug-prompt-logger/hook.json +1 -1
  8. package/hook-templates/debug-prompt-logger/run.sh +1 -3
  9. package/package.json +1 -1
  10. package/src/__tests__/actor-token-service.test.ts +0 -1
  11. package/src/__tests__/anthropic-provider.test.ts +156 -0
  12. package/src/__tests__/approval-cascade.test.ts +810 -0
  13. package/src/__tests__/approval-primitive.test.ts +0 -1
  14. package/src/__tests__/approval-routes-http.test.ts +2 -0
  15. package/src/__tests__/assistant-attachments.test.ts +12 -34
  16. package/src/__tests__/assistant-feature-flag-guardrails.test.ts +76 -0
  17. package/src/__tests__/assistant-feature-flags-integration.test.ts +0 -1
  18. package/src/__tests__/browser-fill-credential.test.ts +5 -2
  19. package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +2 -2
  20. package/src/__tests__/bundled-skill-retrieval-guard.test.ts +2 -1
  21. package/src/__tests__/channel-guardian.test.ts +0 -2
  22. package/src/__tests__/channel-readiness-routes.test.ts +35 -25
  23. package/src/__tests__/channel-readiness-service.test.ts +10 -9
  24. package/src/__tests__/checker.test.ts +9 -29
  25. package/src/__tests__/cli.test.ts +23 -0
  26. package/src/__tests__/computer-use-skill-manifest-regression.test.ts +1 -1
  27. package/src/__tests__/computer-use-tools.test.ts +2 -19
  28. package/src/__tests__/config-watcher.test.ts +0 -1
  29. package/src/__tests__/confirmation-request-guardian-bridge.test.ts +0 -1
  30. package/src/__tests__/context-image-dimensions.test.ts +332 -0
  31. package/src/__tests__/context-token-estimator.test.ts +196 -13
  32. package/src/__tests__/conversation-attention-store.test.ts +0 -1
  33. package/src/__tests__/conversation-attention-telegram.test.ts +0 -1
  34. package/src/__tests__/conversation-routes-guardian-reply.test.ts +144 -0
  35. package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
  36. package/src/__tests__/credential-broker-browser-fill.test.ts +23 -22
  37. package/src/__tests__/credential-broker-server-use.test.ts +22 -21
  38. package/src/__tests__/credential-broker.test.ts +2 -1
  39. package/src/__tests__/credential-metadata-store.test.ts +239 -26
  40. package/src/__tests__/credential-resolve.test.ts +5 -4
  41. package/src/__tests__/credential-security-e2e.test.ts +8 -8
  42. package/src/__tests__/credential-security-invariants.test.ts +111 -7
  43. package/src/__tests__/credential-vault-unit.test.ts +287 -54
  44. package/src/__tests__/credential-vault.test.ts +406 -12
  45. package/src/__tests__/credentials-cli.test.ts +82 -6
  46. package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +0 -1
  47. package/src/__tests__/ephemeral-permissions.test.ts +3 -3
  48. package/src/__tests__/gateway-only-enforcement.test.ts +4 -2
  49. package/src/__tests__/gateway-only-guard.test.ts +0 -1
  50. package/src/__tests__/gemini-image-service.test.ts +75 -45
  51. package/src/__tests__/gemini-provider.test.ts +9 -6
  52. package/src/__tests__/guardian-action-conversation-turn.test.ts +1 -33
  53. package/src/__tests__/guardian-action-copy-generator.test.ts +0 -20
  54. package/src/__tests__/guardian-action-followup-executor.test.ts +1 -28
  55. package/src/__tests__/guardian-action-followup-store.test.ts +1 -1
  56. package/src/__tests__/guardian-action-grant-mint-consume.test.ts +0 -1
  57. package/src/__tests__/guardian-decision-primitive-canonical.test.ts +0 -1
  58. package/src/__tests__/guardian-grant-minting.test.ts +35 -0
  59. package/src/__tests__/guardian-routing-invariants.test.ts +0 -1
  60. package/src/__tests__/guardian-verification-voice-binding.test.ts +0 -1
  61. package/src/__tests__/handlers-user-message-approval-consumption.test.ts +0 -39
  62. package/src/__tests__/heartbeat-service.test.ts +0 -1
  63. package/src/__tests__/host-cu-proxy.test.ts +629 -0
  64. package/src/__tests__/host-shell-tool.test.ts +27 -15
  65. package/src/__tests__/http-user-message-parity.test.ts +1 -0
  66. package/src/__tests__/ingress-url-consistency.test.ts +14 -21
  67. package/src/__tests__/integration-status.test.ts +38 -25
  68. package/src/__tests__/intent-routing.test.ts +0 -1
  69. package/src/__tests__/invite-routes-http.test.ts +10 -9
  70. package/src/__tests__/keychain-broker-client.test.ts +11 -43
  71. package/src/__tests__/managed-proxy-context.test.ts +5 -3
  72. package/src/__tests__/media-generate-image.test.ts +63 -2
  73. package/src/__tests__/media-reuse-story.e2e.test.ts +7 -3
  74. package/src/__tests__/messaging-send-tool.test.ts +4 -6
  75. package/src/__tests__/notification-routing-intent.test.ts +0 -1
  76. package/src/__tests__/oauth-cli.test.ts +373 -14
  77. package/src/__tests__/oauth-provider-profiles.test.ts +9 -9
  78. package/src/__tests__/oauth-scope-policy.test.ts +4 -6
  79. package/src/__tests__/oauth-store.test.ts +756 -0
  80. package/src/__tests__/onboarding-starter-tasks.test.ts +0 -1
  81. package/src/__tests__/provider-error-scenarios.test.ts +0 -1
  82. package/src/__tests__/provider-fail-open-selection.test.ts +3 -1
  83. package/src/__tests__/provider-managed-proxy-integration.test.ts +70 -6
  84. package/src/__tests__/provider-streaming.benchmark.test.ts +0 -1
  85. package/src/__tests__/public-ingress-urls.test.ts +15 -21
  86. package/src/__tests__/recording-handler.test.ts +3 -4
  87. package/src/__tests__/registry.test.ts +2 -2
  88. package/src/__tests__/runtime-events-sse.test.ts +55 -7
  89. package/src/__tests__/schedule-store.test.ts +0 -1
  90. package/src/__tests__/scheduler-recurrence.test.ts +0 -1
  91. package/src/__tests__/schema-transforms.test.ts +226 -0
  92. package/src/__tests__/scoped-approval-grants.test.ts +0 -1
  93. package/src/__tests__/scoped-grant-security-matrix.test.ts +0 -1
  94. package/src/__tests__/script-proxy-injection-runtime.test.ts +23 -13
  95. package/src/__tests__/script-proxy-policy-runtime.test.ts +1 -1
  96. package/src/__tests__/script-proxy-session-manager.test.ts +1 -1
  97. package/src/__tests__/secret-ingress-handler.test.ts +0 -1
  98. package/src/__tests__/secret-onetime-send.test.ts +5 -3
  99. package/src/__tests__/send-endpoint-busy.test.ts +21 -6
  100. package/src/__tests__/sequence-store.test.ts +0 -1
  101. package/src/__tests__/session-init.benchmark.test.ts +4 -5
  102. package/src/__tests__/session-messaging-secret-redirect.test.ts +5 -4
  103. package/src/__tests__/skill-include-graph.test.ts +66 -0
  104. package/src/__tests__/skill-load-feature-flag.test.ts +0 -1
  105. package/src/__tests__/skill-load-tool.test.ts +149 -1
  106. package/src/__tests__/skill-projection-feature-flag.test.ts +0 -1
  107. package/src/__tests__/skills-uninstall.test.ts +3 -3
  108. package/src/__tests__/skills.test.ts +3 -12
  109. package/src/__tests__/slack-channel-config.test.ts +76 -11
  110. package/src/__tests__/slack-share-routes.test.ts +17 -14
  111. package/src/__tests__/system-prompt.test.ts +0 -1
  112. package/src/__tests__/telegram-bot-username-resolution.test.ts +3 -0
  113. package/src/__tests__/telegram-invite-adapter.test.ts +18 -22
  114. package/src/__tests__/terminal-tools.test.ts +4 -3
  115. package/src/__tests__/test-support/computer-use-skill-harness.ts +3 -2
  116. package/src/__tests__/tool-approval-handler.test.ts +0 -1
  117. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -1
  118. package/src/__tests__/tool-executor-lifecycle-events.test.ts +0 -1
  119. package/src/__tests__/tool-executor-shell-integration.test.ts +0 -1
  120. package/src/__tests__/tool-executor.test.ts +0 -1
  121. package/src/__tests__/tool-grant-request-escalation.test.ts +0 -1
  122. package/src/__tests__/trust-store-pattern-matches.test.ts +29 -0
  123. package/src/__tests__/trust-store.test.ts +1 -22
  124. package/src/__tests__/trusted-contact-approval-notifier.test.ts +0 -1
  125. package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +0 -1
  126. package/src/__tests__/twilio-config.test.ts +2 -1
  127. package/src/__tests__/twilio-provider.test.ts +4 -2
  128. package/src/__tests__/twilio-routes.test.ts +5 -20
  129. package/src/__tests__/verification-control-plane-policy.test.ts +0 -1
  130. package/src/__tests__/voice-scoped-grant-consumer.test.ts +0 -1
  131. package/src/agent/ax-tree-compaction.test.ts +235 -0
  132. package/src/agent/loop.ts +76 -130
  133. package/src/calls/call-domain.ts +8 -10
  134. package/src/calls/relay-server.ts +9 -13
  135. package/src/calls/twilio-config.ts +4 -8
  136. package/src/calls/twilio-provider.ts +2 -1
  137. package/src/calls/twilio-rest.ts +2 -1
  138. package/src/calls/twilio-routes.ts +1 -2
  139. package/src/calls/voice-ingress-preflight.ts +1 -1
  140. package/src/cli/commands/browser-relay.ts +46 -15
  141. package/src/cli/commands/completions.ts +0 -3
  142. package/src/cli/commands/credentials.ts +110 -23
  143. package/src/cli/commands/oauth/apps.ts +255 -0
  144. package/src/cli/commands/oauth/connections.ts +299 -0
  145. package/src/cli/commands/oauth/index.ts +52 -0
  146. package/src/cli/commands/oauth/providers.ts +242 -0
  147. package/src/cli/commands/skills.ts +4 -338
  148. package/src/cli/program.ts +1 -5
  149. package/src/cli/reference.ts +1 -3
  150. package/src/cli.ts +3 -2
  151. package/src/config/assistant-feature-flags.ts +0 -3
  152. package/src/config/bundled-skills/_shared/CLI_RETRIEVAL_PATTERN.md +1 -1
  153. package/src/config/bundled-skills/claude-code/TOOLS.json +0 -4
  154. package/src/config/bundled-skills/computer-use/SKILL.md +3 -6
  155. package/src/config/bundled-skills/computer-use/TOOLS.json +22 -4
  156. package/src/config/bundled-skills/contacts/tools/google-contacts.ts +29 -32
  157. package/src/config/bundled-skills/gmail/SKILL.md +4 -4
  158. package/src/config/bundled-skills/gmail/tools/gmail-archive.ts +54 -61
  159. package/src/config/bundled-skills/gmail/tools/gmail-attachments.ts +25 -28
  160. package/src/config/bundled-skills/gmail/tools/gmail-draft.ts +14 -17
  161. package/src/config/bundled-skills/gmail/tools/gmail-filters.ts +39 -44
  162. package/src/config/bundled-skills/gmail/tools/gmail-follow-up.ts +61 -58
  163. package/src/config/bundled-skills/gmail/tools/gmail-forward.ts +50 -49
  164. package/src/config/bundled-skills/gmail/tools/gmail-label.ts +11 -13
  165. package/src/config/bundled-skills/gmail/tools/gmail-outreach-scan.ts +148 -146
  166. package/src/config/bundled-skills/gmail/tools/gmail-send-draft.ts +4 -7
  167. package/src/config/bundled-skills/gmail/tools/gmail-sender-digest.ts +175 -173
  168. package/src/config/bundled-skills/gmail/tools/gmail-trash.ts +4 -7
  169. package/src/config/bundled-skills/gmail/tools/gmail-unsubscribe.ts +71 -76
  170. package/src/config/bundled-skills/gmail/tools/gmail-vacation.ts +32 -38
  171. package/src/config/bundled-skills/google-calendar/SKILL.md +2 -2
  172. package/src/config/bundled-skills/google-calendar/calendar-client.ts +90 -44
  173. package/src/config/bundled-skills/google-calendar/tools/calendar-check-availability.ts +9 -10
  174. package/src/config/bundled-skills/google-calendar/tools/calendar-create-event.ts +5 -6
  175. package/src/config/bundled-skills/google-calendar/tools/calendar-get-event.ts +4 -5
  176. package/src/config/bundled-skills/google-calendar/tools/calendar-list-events.ts +14 -15
  177. package/src/config/bundled-skills/google-calendar/tools/calendar-rsvp.ts +37 -37
  178. package/src/config/bundled-skills/google-calendar/tools/shared.ts +4 -9
  179. package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +24 -3
  180. package/src/config/bundled-skills/messaging/SKILL.md +6 -6
  181. package/src/config/bundled-skills/messaging/tools/messaging-analyze-style.ts +62 -63
  182. package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +15 -16
  183. package/src/config/bundled-skills/messaging/tools/messaging-auth-test.ts +4 -5
  184. package/src/config/bundled-skills/messaging/tools/messaging-list-conversations.ts +6 -7
  185. package/src/config/bundled-skills/messaging/tools/messaging-mark-read.ts +4 -5
  186. package/src/config/bundled-skills/messaging/tools/messaging-read.ts +14 -15
  187. package/src/config/bundled-skills/messaging/tools/messaging-search.ts +4 -5
  188. package/src/config/bundled-skills/messaging/tools/messaging-send.ts +128 -128
  189. package/src/config/bundled-skills/messaging/tools/messaging-sender-digest.ts +33 -34
  190. package/src/config/bundled-skills/messaging/tools/shared.ts +12 -15
  191. package/src/config/bundled-skills/settings/SKILL.md +1 -1
  192. package/src/config/bundled-skills/settings/TOOLS.json +2 -8
  193. package/src/config/bundled-skills/settings/tools/voice-config-update.ts +5 -33
  194. package/src/config/bundled-skills/slack/tools/shared.ts +4 -10
  195. package/src/config/bundled-skills/slack/tools/slack-add-reaction.ts +4 -5
  196. package/src/config/bundled-skills/slack/tools/slack-channel-details.ts +15 -16
  197. package/src/config/bundled-skills/slack/tools/slack-delete-message.ts +4 -5
  198. package/src/config/bundled-skills/slack/tools/slack-edit-message.ts +4 -5
  199. package/src/config/bundled-skills/slack/tools/slack-leave-channel.ts +4 -5
  200. package/src/config/bundled-skills/slack/tools/slack-scan-digest.ts +95 -92
  201. package/src/config/env-registry.ts +14 -83
  202. package/src/config/env.ts +11 -50
  203. package/src/config/feature-flag-registry.json +16 -16
  204. package/src/config/schema.ts +3 -1
  205. package/src/config/skills.ts +21 -2
  206. package/src/context/image-dimensions.ts +229 -0
  207. package/src/context/token-estimator.ts +75 -12
  208. package/src/context/window-manager.ts +49 -10
  209. package/src/daemon/assistant-attachments.ts +1 -13
  210. package/src/daemon/guardian-action-generators.ts +4 -5
  211. package/src/daemon/handlers/config-ingress.ts +8 -33
  212. package/src/daemon/handlers/config-slack-channel.ts +76 -56
  213. package/src/daemon/handlers/config-telegram.ts +53 -24
  214. package/src/daemon/handlers/sessions.ts +10 -24
  215. package/src/daemon/handlers/shared.ts +0 -130
  216. package/src/daemon/host-cu-proxy.ts +401 -0
  217. package/src/daemon/lifecycle.ts +39 -63
  218. package/src/daemon/message-protocol.ts +3 -0
  219. package/src/daemon/message-types/computer-use.ts +2 -119
  220. package/src/daemon/message-types/host-cu.ts +19 -0
  221. package/src/daemon/message-types/integrations.ts +1 -0
  222. package/src/daemon/message-types/messages.ts +3 -0
  223. package/src/daemon/server.ts +14 -21
  224. package/src/daemon/session-agent-loop-handlers.ts +2 -0
  225. package/src/daemon/session-attachments.ts +1 -2
  226. package/src/daemon/session-messaging.ts +3 -1
  227. package/src/daemon/session-slash.ts +1 -1
  228. package/src/daemon/session-surfaces.ts +40 -28
  229. package/src/daemon/session-tool-setup.ts +20 -11
  230. package/src/daemon/session.ts +139 -16
  231. package/src/daemon/tool-side-effects.ts +2 -8
  232. package/src/daemon/watch-handler.ts +2 -2
  233. package/src/email/providers/index.ts +2 -1
  234. package/src/events/tool-metrics-listener.ts +2 -2
  235. package/src/hooks/manager.ts +1 -4
  236. package/src/inbound/public-ingress-urls.ts +7 -7
  237. package/src/instrument.ts +15 -1
  238. package/src/logfire.ts +16 -5
  239. package/src/media/app-icon-generator.ts +30 -4
  240. package/src/media/avatar-router.ts +26 -3
  241. package/src/media/gemini-image-service.ts +28 -2
  242. package/src/memory/conversation-key-store.ts +21 -0
  243. package/src/memory/db-init.ts +4 -0
  244. package/src/memory/guardian-action-store.ts +1 -1
  245. package/src/memory/migrations/149-oauth-tables.ts +60 -0
  246. package/src/memory/migrations/index.ts +1 -0
  247. package/src/memory/schema/guardian.ts +1 -1
  248. package/src/memory/schema/index.ts +1 -0
  249. package/src/memory/schema/oauth.ts +65 -0
  250. package/src/messaging/provider.ts +19 -13
  251. package/src/messaging/providers/gmail/adapter.ts +40 -23
  252. package/src/messaging/providers/gmail/client.ts +283 -122
  253. package/src/messaging/providers/gmail/people-client.ts +32 -24
  254. package/src/messaging/providers/slack/adapter.ts +29 -19
  255. package/src/messaging/providers/slack/client.ts +265 -78
  256. package/src/messaging/providers/telegram-bot/adapter.ts +19 -18
  257. package/src/messaging/providers/whatsapp/adapter.ts +17 -11
  258. package/src/messaging/registry.ts +2 -31
  259. package/src/notifications/copy-composer.ts +0 -5
  260. package/src/notifications/signal.ts +4 -5
  261. package/src/oauth/byo-connection.test.ts +537 -0
  262. package/src/oauth/byo-connection.ts +128 -0
  263. package/src/oauth/connect-orchestrator.ts +139 -56
  264. package/src/oauth/connect-types.ts +17 -23
  265. package/src/oauth/connection-resolver.ts +58 -0
  266. package/src/oauth/connection.ts +38 -0
  267. package/src/oauth/manual-token-connection.ts +104 -0
  268. package/src/oauth/oauth-store.ts +496 -0
  269. package/src/oauth/platform-connection.test.ts +192 -0
  270. package/src/oauth/platform-connection.ts +111 -0
  271. package/src/oauth/provider-behaviors.ts +124 -0
  272. package/src/oauth/scope-policy.ts +9 -2
  273. package/src/oauth/seed-providers.ts +161 -0
  274. package/src/oauth/token-persistence.ts +74 -78
  275. package/src/permissions/checker.ts +8 -4
  276. package/src/permissions/defaults.ts +0 -1
  277. package/src/permissions/prompter.ts +10 -1
  278. package/src/permissions/trust-store.ts +13 -0
  279. package/src/prompts/__tests__/build-cli-reference-section.test.ts +3 -1
  280. package/src/prompts/system-prompt.ts +70 -45
  281. package/src/providers/anthropic/client.ts +133 -24
  282. package/src/providers/gemini/client.ts +15 -6
  283. package/src/providers/managed-proxy/constants.ts +2 -2
  284. package/src/providers/managed-proxy/context.ts +5 -1
  285. package/src/providers/ratelimit.ts +17 -0
  286. package/src/providers/registry.ts +2 -2
  287. package/src/providers/retry.ts +1 -27
  288. package/src/runtime/AGENTS.md +17 -0
  289. package/src/runtime/auth/route-policy.ts +0 -3
  290. package/src/runtime/channel-invite-transports/telegram.ts +2 -1
  291. package/src/runtime/channel-readiness-service.ts +168 -195
  292. package/src/runtime/channel-readiness-types.ts +4 -0
  293. package/src/runtime/channel-reply-delivery.ts +0 -40
  294. package/src/runtime/gateway-client.ts +0 -7
  295. package/src/runtime/guardian-action-conversation-turn.ts +1 -3
  296. package/src/runtime/guardian-action-followup-executor.ts +1 -1
  297. package/src/runtime/guardian-action-message-composer.ts +3 -23
  298. package/src/runtime/http-server.ts +17 -10
  299. package/src/runtime/http-types.ts +2 -3
  300. package/src/runtime/middleware/rate-limiter.ts +74 -20
  301. package/src/runtime/middleware/twilio-validation.ts +1 -11
  302. package/src/runtime/pending-interactions.ts +14 -12
  303. package/src/runtime/routes/channel-delivery-routes.ts +0 -1
  304. package/src/runtime/routes/channel-readiness-routes.ts +2 -0
  305. package/src/runtime/routes/conversation-routes.ts +73 -19
  306. package/src/runtime/routes/diagnostics-routes.ts +11 -9
  307. package/src/runtime/routes/events-routes.ts +21 -11
  308. package/src/runtime/routes/guardian-approval-interception.ts +20 -5
  309. package/src/runtime/routes/host-cu-routes.ts +97 -0
  310. package/src/runtime/routes/inbound-stages/background-dispatch.ts +12 -111
  311. package/src/runtime/routes/integrations/slack/share.ts +6 -6
  312. package/src/runtime/routes/integrations/twilio.ts +6 -5
  313. package/src/runtime/routes/log-export-routes.ts +126 -8
  314. package/src/runtime/routes/secret-routes.ts +3 -2
  315. package/src/runtime/routes/settings-routes.ts +113 -48
  316. package/src/runtime/routes/surface-action-routes.ts +1 -1
  317. package/src/runtime/routes/watch-routes.ts +128 -0
  318. package/src/schedule/integration-status.ts +10 -8
  319. package/src/security/credential-key.ts +14 -0
  320. package/src/security/keychain-broker-client.ts +5 -6
  321. package/src/security/oauth2.ts +1 -1
  322. package/src/security/token-manager.ts +145 -43
  323. package/src/skills/catalog-install.ts +358 -0
  324. package/src/skills/include-graph.ts +32 -0
  325. package/src/telegram/bot-username.ts +2 -3
  326. package/src/tools/apps/definitions.ts +0 -5
  327. package/src/tools/assets/materialize.ts +0 -5
  328. package/src/tools/assets/search.ts +0 -5
  329. package/src/tools/browser/headless-browser.ts +1 -67
  330. package/src/tools/browser/network-recorder.ts +1 -1
  331. package/src/tools/browser/network-recording-types.ts +1 -1
  332. package/src/tools/claude-code/claude-code.ts +0 -5
  333. package/src/tools/computer-use/definitions.ts +46 -11
  334. package/src/tools/computer-use/registry.ts +4 -5
  335. package/src/tools/credentials/broker.ts +5 -4
  336. package/src/tools/credentials/metadata-store.ts +22 -74
  337. package/src/tools/credentials/resolve.ts +2 -1
  338. package/src/tools/credentials/vault.ts +139 -151
  339. package/src/tools/filesystem/edit.ts +1 -6
  340. package/src/tools/filesystem/read.ts +0 -5
  341. package/src/tools/filesystem/write.ts +1 -6
  342. package/src/tools/host-filesystem/edit.ts +1 -6
  343. package/src/tools/host-filesystem/read.ts +1 -6
  344. package/src/tools/host-filesystem/write.ts +1 -6
  345. package/src/tools/mcp/mcp-tool-factory.ts +18 -1
  346. package/src/tools/memory/definitions.ts +0 -5
  347. package/src/tools/network/web-fetch.ts +0 -5
  348. package/src/tools/network/web-search.ts +0 -5
  349. package/src/tools/registry.ts +2 -7
  350. package/src/tools/schema-transforms.ts +99 -0
  351. package/src/tools/skills/load.ts +62 -8
  352. package/src/tools/swarm/delegate.ts +0 -5
  353. package/src/tools/system/avatar-generator.ts +0 -5
  354. package/src/tools/ui-surface/definitions.ts +0 -15
  355. package/src/tools/watch/screen-watch.ts +0 -5
  356. package/src/tools/watch/watch-state.ts +0 -12
  357. package/src/util/logger.ts +7 -41
  358. package/src/util/platform.ts +9 -28
  359. package/src/version.ts +10 -0
  360. package/src/watcher/providers/github.ts +51 -52
  361. package/src/watcher/providers/gmail.ts +88 -80
  362. package/src/watcher/providers/google-calendar.ts +94 -86
  363. package/src/watcher/providers/linear.ts +87 -93
  364. package/src/__tests__/computer-use-session-compaction.test.ts +0 -143
  365. package/src/__tests__/computer-use-session-lifecycle.test.ts +0 -322
  366. package/src/__tests__/computer-use-session-working-dir.test.ts +0 -166
  367. package/src/__tests__/computer-use-skill-baseline.test.ts +0 -78
  368. package/src/__tests__/computer-use-skill-endstate.test.ts +0 -105
  369. package/src/__tests__/computer-use-skill-lifecycle-cleanup.test.ts +0 -249
  370. package/src/__tests__/ride-shotgun-handler.test.ts +0 -452
  371. package/src/cli/commands/dev.ts +0 -129
  372. package/src/cli/commands/map.ts +0 -391
  373. package/src/cli/commands/oauth.ts +0 -77
  374. package/src/config/bundled-skills/computer-use/tools/computer-use-request-control.ts +0 -16
  375. package/src/daemon/computer-use-session.ts +0 -1020
  376. package/src/daemon/ride-shotgun-handler.ts +0 -567
  377. package/src/oauth/provider-profiles.ts +0 -192
  378. package/src/prompts/computer-use-prompt.ts +0 -98
  379. package/src/runtime/routes/computer-use-routes.ts +0 -641
  380. package/src/runtime/telegram-streaming-delivery.test.ts +0 -597
  381. package/src/runtime/telegram-streaming-delivery.ts +0 -383
  382. package/src/tools/computer-use/request-computer-control.ts +0 -61
@@ -1,1020 +0,0 @@
1
- /**
2
- * Computer-use session orchestrator.
3
- *
4
- * Manages the observation -> infer -> action loop for computer-use tasks,
5
- * bridging the macOS client (which captures screen state and executes actions)
6
- * with the AgentLoop (which runs inference via the Anthropic API with CU tools).
7
- */
8
-
9
- import { v4 as uuid } from "uuid";
10
-
11
- import { AgentLoop } from "../agent/loop.js";
12
- import { getConfig } from "../config/loader.js";
13
- import { PermissionPrompter } from "../permissions/prompter.js";
14
- import { SecretPrompter } from "../permissions/secret-prompter.js";
15
- import type { UserDecision } from "../permissions/types.js";
16
- import { buildComputerUseSystemPrompt } from "../prompts/computer-use-prompt.js";
17
- import type {
18
- ContentBlock,
19
- Message,
20
- Provider,
21
- ToolDefinition,
22
- } from "../providers/types.js";
23
- import { allComputerUseTools } from "../tools/computer-use/definitions.js";
24
- import { ToolExecutor } from "../tools/executor.js";
25
- import { getTool, registerSkillTools } from "../tools/registry.js";
26
- import type { Tool, ToolExecutionResult } from "../tools/types.js";
27
- import { allUiSurfaceTools } from "../tools/ui-surface/definitions.js";
28
- import { getLogger } from "../util/logger.js";
29
- import { getSandboxWorkingDir } from "../util/platform.js";
30
- import type {
31
- CuObservation,
32
- ServerMessage,
33
- SurfaceData,
34
- SurfaceType,
35
- UiSurfaceShow,
36
- } from "./message-protocol.js";
37
- import { INTERACTIVE_SURFACE_TYPES } from "./message-protocol.js";
38
- import {
39
- projectSkillTools,
40
- resetSkillToolProjection,
41
- type SkillProjectionCache,
42
- } from "./session-skill-tools.js";
43
-
44
- const log = getLogger("computer-use-session");
45
-
46
- const MAX_STEPS = 50;
47
- const SESSION_TIMEOUT_MS = 30 * 60 * 1000; // 30 minutes
48
- const MAX_HISTORY_ENTRIES = 10;
49
- const LOOP_DETECTION_WINDOW = 3;
50
- const CONSECUTIVE_UNCHANGED_WARNING_THRESHOLD = 2;
51
-
52
- /** Number of most-recent AX tree snapshots to keep in conversation history. */
53
- const MAX_AX_TREES_IN_HISTORY = 2;
54
-
55
- /** Regex that matches the `<ax-tree>…</ax-tree>` markers injected by buildObservationResultContent. */
56
- const AX_TREE_PATTERN = /<ax-tree>[\s\S]*?<\/ax-tree>/g;
57
- const AX_TREE_PLACEHOLDER = "<ax_tree_omitted />";
58
-
59
- type SessionState =
60
- | "idle"
61
- | "awaiting_observation"
62
- | "inferring"
63
- | "complete"
64
- | "error";
65
-
66
- interface ActionRecord {
67
- step: number;
68
- toolName: string;
69
- input: Record<string, unknown>;
70
- reasoning?: string;
71
- result?: string;
72
- }
73
-
74
- export class ComputerUseSession {
75
- private readonly sessionId: string;
76
- private readonly task: string;
77
- private readonly screenWidth: number;
78
- private readonly screenHeight: number;
79
- private readonly provider: Provider;
80
- private sendToClient: (msg: ServerMessage) => void;
81
- private readonly interactionType: "computer_use" | "text_qa";
82
- private readonly onTerminal?: (sessionId: string) => void;
83
- private readonly preactivatedSkillIds: string[];
84
- private readonly skillProjectionState = new Map<string, string>();
85
- private readonly skillProjectionCache: SkillProjectionCache = {};
86
-
87
- private state: SessionState = "idle";
88
- private stepCount = 0;
89
- private actionHistory: ActionRecord[] = [];
90
- private previousAXTree: string | undefined;
91
- private consecutiveUnchangedSteps = 0;
92
- private abortController: AbortController | null = null;
93
- private sessionTimer: ReturnType<typeof setTimeout> | null = null;
94
-
95
- private pendingObservation: {
96
- resolve: (result: ToolExecutionResult) => void;
97
- } | null = null;
98
-
99
- private pendingSurfaceActions = new Map<
100
- string,
101
- {
102
- resolve: (result: ToolExecutionResult) => void;
103
- }
104
- >();
105
- /** @internal */ surfaceState = new Map<
106
- string,
107
- { surfaceType: SurfaceType; data: SurfaceData; title?: string }
108
- >();
109
- private terminalNotified = false;
110
- private prompter: PermissionPrompter | null = null;
111
-
112
- // Tracks the agent loop promise so callers can await session completion
113
- private loopPromise: Promise<void> | null = null;
114
-
115
- constructor(
116
- sessionId: string,
117
- task: string,
118
- screenWidth: number,
119
- screenHeight: number,
120
- provider: Provider,
121
- sendToClient: (msg: ServerMessage) => void,
122
- interactionType?: "computer_use" | "text_qa",
123
- onTerminal?: (sessionId: string) => void,
124
- preactivatedSkillIds?: string[],
125
- ) {
126
- this.sessionId = sessionId;
127
- this.task = task;
128
- this.screenWidth = screenWidth;
129
- this.screenHeight = screenHeight;
130
- this.provider = provider;
131
- this.sendToClient = sendToClient;
132
- this.interactionType = interactionType ?? "computer_use";
133
- this.onTerminal = onTerminal;
134
- this.preactivatedSkillIds = preactivatedSkillIds ?? ["computer-use"];
135
- }
136
-
137
- // ---------------------------------------------------------------------------
138
- // Public API
139
- // ---------------------------------------------------------------------------
140
-
141
- async handleObservation(obs: CuObservation): Promise<void> {
142
- if (this.state === "complete" || this.state === "error") {
143
- log.warn(
144
- { sessionId: this.sessionId, state: this.state },
145
- "Observation received after session ended",
146
- );
147
- return;
148
- }
149
-
150
- // Track consecutive unchanged steps
151
- const hadPreviousAXTree = this.previousAXTree != null;
152
- if (this.stepCount > 0) {
153
- if (obs.axDiff == null && hadPreviousAXTree && obs.axTree != null) {
154
- this.consecutiveUnchangedSteps++;
155
- } else if (obs.axDiff != null) {
156
- this.consecutiveUnchangedSteps = 0;
157
- }
158
- }
159
-
160
- // Capture previous AX tree for next turn
161
- if (obs.axTree != null) {
162
- this.previousAXTree = obs.axTree;
163
- }
164
-
165
- if (this.state === "awaiting_observation" && this.pendingObservation) {
166
- // Resolve the pending proxy tool result with updated screen context
167
- const content = this.buildObservationResultContent(
168
- obs,
169
- hadPreviousAXTree,
170
- );
171
- const result: ToolExecutionResult = obs.executionError
172
- ? {
173
- content: `Action failed: ${obs.executionError}\n\n${content}`,
174
- isError: true,
175
- }
176
- : { content, isError: false };
177
- this.state = "inferring";
178
- this.pendingObservation.resolve(result);
179
- this.pendingObservation = null;
180
- // The agent loop continues automatically after resolution
181
- return;
182
- }
183
-
184
- // First observation — start the agent loop
185
- this.state = "inferring";
186
- this.abortController = new AbortController();
187
-
188
- // Safety net: abort the session if it runs longer than SESSION_TIMEOUT_MS
189
- this.sessionTimer = setTimeout(() => {
190
- log.warn(
191
- { sessionId: this.sessionId, timeoutMs: SESSION_TIMEOUT_MS },
192
- "Session timeout reached, aborting",
193
- );
194
- this.abort();
195
- }, SESSION_TIMEOUT_MS);
196
-
197
- const messages = this.buildMessages(obs, hadPreviousAXTree);
198
- this.loopPromise = this.runAgentLoop(messages).catch((err) => {
199
- // Catches errors from setup code (e.g. skill projection failures) that
200
- // occur before runAgentLoop's internal try-catch takes over.
201
- const message = err instanceof Error ? err.message : String(err);
202
- log.error(
203
- { err, sessionId: this.sessionId },
204
- "Agent loop startup failed",
205
- );
206
- if (this.sessionTimer) {
207
- clearTimeout(this.sessionTimer);
208
- this.sessionTimer = null;
209
- }
210
- if (this.state !== "complete" && this.state !== "error") {
211
- this.state = "error";
212
- this.sendToClient({
213
- type: "cu_error",
214
- sessionId: this.sessionId,
215
- message,
216
- });
217
- this.notifyTerminal();
218
- }
219
- });
220
-
221
- await this.loopPromise;
222
- }
223
-
224
- abort(): void {
225
- if (this.state === "complete" || this.state === "error") return;
226
-
227
- log.info({ sessionId: this.sessionId }, "Aborting computer-use session");
228
- if (this.sessionTimer) {
229
- clearTimeout(this.sessionTimer);
230
- this.sessionTimer = null;
231
- }
232
- this.abortController?.abort();
233
-
234
- // If waiting for an observation, resolve it as cancelled
235
- if (this.pendingObservation) {
236
- this.pendingObservation.resolve({
237
- content: "Session aborted",
238
- isError: true,
239
- });
240
- this.pendingObservation = null;
241
- }
242
-
243
- // Dispose prompter to clear pending permission timers and reject promises
244
- this.prompter?.dispose();
245
-
246
- // Resolve any pending surface actions
247
- for (const [, pending] of this.pendingSurfaceActions) {
248
- pending.resolve({ content: "Session aborted", isError: true });
249
- }
250
- this.pendingSurfaceActions.clear();
251
- this.surfaceState.clear();
252
-
253
- this.state = "error";
254
- this.sendToClient({
255
- type: "cu_error",
256
- sessionId: this.sessionId,
257
- message: "Session aborted by user",
258
- });
259
- this.notifyTerminal();
260
- }
261
-
262
- isComplete(): boolean {
263
- return this.state === "complete";
264
- }
265
-
266
- getState(): string {
267
- return this.state;
268
- }
269
-
270
- /**
271
- * Compute CU tool definitions from the bundled computer-use skill via
272
- * skill projection. Returns null if projection fails so the caller can
273
- * fall back to legacy hardcoded tool definitions.
274
- */
275
- private getProjectedCuToolDefinitions(): ToolDefinition[] | null {
276
- if (this.preactivatedSkillIds.length === 0) {
277
- log.warn(
278
- "No preactivatedSkillIds configured, falling back to legacy CU tools",
279
- );
280
- return null;
281
- }
282
-
283
- try {
284
- const projection = projectSkillTools([], {
285
- preactivatedSkillIds: this.preactivatedSkillIds,
286
- previouslyActiveSkillIds: this.skillProjectionState,
287
- cache: this.skillProjectionCache,
288
- });
289
-
290
- if (projection.allowedToolNames.size === 0) {
291
- log.warn(
292
- { preactivatedSkillIds: this.preactivatedSkillIds },
293
- "Skill projection produced no tool definitions, falling back to legacy CU tools",
294
- );
295
- return null;
296
- }
297
-
298
- // Tool definitions are no longer returned from projectSkillTools
299
- // (dispatched via skill_execute). Build definitions from the registry.
300
- const defs: ToolDefinition[] = [];
301
- for (const name of projection.allowedToolNames) {
302
- const tool = getTool(name);
303
- if (tool) defs.push(tool.getDefinition());
304
- }
305
- return defs;
306
- } catch (err) {
307
- log.warn(
308
- { err },
309
- "Skill projection failed, falling back to legacy CU tools",
310
- );
311
- return null;
312
- }
313
- }
314
-
315
- handleSurfaceAction(
316
- surfaceId: string,
317
- actionId: string,
318
- data?: Record<string, unknown>,
319
- ): void {
320
- const pending = this.pendingSurfaceActions.get(surfaceId);
321
- if (!pending) {
322
- log.warn({ surfaceId, actionId }, "No pending surface action found");
323
- return;
324
- }
325
- // selection_changed is a non-terminal state update — don't consume the
326
- // pending entry. The selection state will be in the action button payload.
327
- if (actionId === "selection_changed") {
328
- return;
329
- }
330
- this.pendingSurfaceActions.delete(surfaceId);
331
- pending.resolve({
332
- content: JSON.stringify({ actionId, data: data ?? {} }),
333
- isError: false,
334
- });
335
- }
336
-
337
- // ---------------------------------------------------------------------------
338
- // Agent loop execution
339
- // ---------------------------------------------------------------------------
340
-
341
- private async runAgentLoop(messages: Message[]): Promise<void> {
342
- const systemPrompt = buildComputerUseSystemPrompt(
343
- this.screenWidth,
344
- this.screenHeight,
345
- );
346
-
347
- let cuToolDefs = this.getProjectedCuToolDefinitions();
348
- if (!cuToolDefs) {
349
- // Fallback: register the legacy CU tools as skill-origin tools so
350
- // ToolExecutor can resolve them via getTool(), but using the same
351
- // ownerSkillId as the bundled computer-use skill. This avoids
352
- // core-vs-skill collisions that would permanently block skill
353
- // projection recovery on subsequent sessions.
354
- const fallbackSkillId = this.preactivatedSkillIds[0] ?? "computer-use";
355
- const fallbackTools: Tool[] = allComputerUseTools.map((t) => ({
356
- ...t,
357
- origin: "skill" as const,
358
- ownerSkillId: fallbackSkillId,
359
- ownerSkillBundled: true,
360
- }));
361
- registerSkillTools(fallbackTools);
362
- // Track in the session map so resetSkillToolProjection cleans up
363
- this.skillProjectionState.set(fallbackSkillId, "fallback");
364
- cuToolDefs = allComputerUseTools.map((t) => t.getDefinition());
365
- }
366
-
367
- const toolDefs: ToolDefinition[] = [
368
- ...cuToolDefs,
369
- ...allUiSurfaceTools.map((t) => t.getDefinition()),
370
- ];
371
-
372
- this.prompter = new PermissionPrompter(this.sendToClient);
373
- const prompter = this.prompter;
374
- const secretPrompter = new SecretPrompter(this.sendToClient);
375
- const executor = new ToolExecutor(prompter);
376
-
377
- const proxyResolver = async (
378
- toolName: string,
379
- input: Record<string, unknown>,
380
- ): Promise<ToolExecutionResult> => {
381
- // ── Surface tool proxying ──────────────────────────────────────
382
- if (toolName === "ui_show") {
383
- const surfaceId = uuid();
384
- const surfaceType = input.surface_type as SurfaceType;
385
- const title = typeof input.title === "string" ? input.title : undefined;
386
- const data = input.data as SurfaceData;
387
- const actions = input.actions as
388
- | Array<{ id: string; label: string; style?: string }>
389
- | undefined;
390
- // Interactive surfaces default to awaiting user action.
391
- // Tables and lists only block when explicit action buttons are provided;
392
- // selectionMode alone should not gate blocking because selection_changed
393
- // fires on every click and would immediately resolve multi-select surfaces.
394
- const hasActions = Array.isArray(actions) && actions.length > 0;
395
- const isInteractive =
396
- surfaceType === "card"
397
- ? hasActions
398
- : surfaceType === "list"
399
- ? hasActions
400
- : surfaceType === "table"
401
- ? hasActions
402
- : INTERACTIVE_SURFACE_TYPES.includes(surfaceType);
403
- const awaitAction = (input.await_action as boolean) ?? isInteractive;
404
-
405
- // Track surface state for ui_update merging
406
- this.surfaceState.set(surfaceId, { surfaceType, data, title });
407
-
408
- this.sendToClient({
409
- type: "ui_surface_show",
410
- sessionId: this.sessionId,
411
- surfaceId,
412
- surfaceType,
413
- title,
414
- data,
415
- actions: actions?.map((a) => ({
416
- id: a.id,
417
- label: a.label,
418
- style: (a.style ?? "secondary") as
419
- | "primary"
420
- | "secondary"
421
- | "destructive",
422
- })),
423
- } as unknown as UiSurfaceShow);
424
-
425
- if (awaitAction) {
426
- return new Promise<ToolExecutionResult>((resolve) => {
427
- this.pendingSurfaceActions.set(surfaceId, { resolve });
428
- });
429
- }
430
- return { content: JSON.stringify({ surfaceId }), isError: false };
431
- }
432
-
433
- if (toolName === "ui_update") {
434
- const surfaceId = input.surface_id as string;
435
- const patch = input.data as Record<string, unknown>;
436
-
437
- // Merge the partial patch into the stored full surface data
438
- const stored = this.surfaceState.get(surfaceId);
439
- let mergedData: SurfaceData;
440
- if (stored) {
441
- mergedData = { ...stored.data, ...patch } as SurfaceData;
442
- stored.data = mergedData;
443
- } else {
444
- mergedData = patch as unknown as SurfaceData;
445
- }
446
-
447
- this.sendToClient({
448
- type: "ui_surface_update",
449
- sessionId: this.sessionId,
450
- surfaceId,
451
- data: mergedData,
452
- });
453
- return { content: "Surface updated", isError: false };
454
- }
455
-
456
- if (toolName === "ui_dismiss") {
457
- const surfaceId = input.surface_id as string;
458
- this.sendToClient({
459
- type: "ui_surface_dismiss",
460
- sessionId: this.sessionId,
461
- surfaceId,
462
- });
463
- this.pendingSurfaceActions.delete(surfaceId);
464
- this.surfaceState.delete(surfaceId);
465
- return { content: "Surface dismissed", isError: false };
466
- }
467
-
468
- // ── Computer-use tool proxying ─────────────────────────────────
469
- const reasoning =
470
- typeof input.reasoning === "string" ? input.reasoning : undefined;
471
-
472
- // Record action in history
473
- this.actionHistory.push({
474
- step: this.stepCount + 1,
475
- toolName,
476
- input,
477
- reasoning,
478
- });
479
-
480
- // Check for terminal tools
481
- if (
482
- toolName === "computer_use_done" ||
483
- toolName === "computer_use_respond"
484
- ) {
485
- const summary =
486
- toolName === "computer_use_done"
487
- ? typeof input.summary === "string"
488
- ? input.summary
489
- : "Task completed"
490
- : typeof input.answer === "string"
491
- ? input.answer
492
- : "No answer provided";
493
-
494
- this.sendToClient({
495
- type: "cu_complete",
496
- sessionId: this.sessionId,
497
- summary,
498
- stepCount: this.stepCount,
499
- isResponse: toolName === "computer_use_respond" ? true : undefined,
500
- });
501
- this.state = "complete";
502
- // Stop AgentLoop immediately so terminal tools cannot trigger extra provider calls.
503
- this.abortController?.abort();
504
- this.notifyTerminal();
505
- return { content: "Session complete", isError: false };
506
- }
507
-
508
- this.stepCount++;
509
-
510
- // Enforce step limit — abort the loop so toolChoice:'any' can't force another turn
511
- if (this.stepCount > MAX_STEPS) {
512
- this.state = "error";
513
- this.sendToClient({
514
- type: "cu_error",
515
- sessionId: this.sessionId,
516
- message: `Step limit (${MAX_STEPS}) exceeded`,
517
- });
518
- this.abortController?.abort();
519
- this.notifyTerminal();
520
- return { content: `Step limit (${MAX_STEPS}) exceeded`, isError: true };
521
- }
522
-
523
- // Send action to client for execution
524
- this.sendToClient({
525
- type: "cu_action",
526
- sessionId: this.sessionId,
527
- toolName,
528
- input,
529
- reasoning,
530
- stepNumber: this.stepCount,
531
- });
532
-
533
- // Wait for next observation from client
534
- this.state = "awaiting_observation";
535
- return new Promise<ToolExecutionResult>((resolve) => {
536
- this.pendingObservation = { resolve };
537
- });
538
- };
539
-
540
- // Build a set of tool names the CU session is allowed to execute.
541
- // This prevents tools registered globally (e.g. computer_use_request_control)
542
- // but not advertised to the CU model from executing during CU sessions.
543
- const allowedToolNames = new Set(toolDefs.map((td) => td.name));
544
-
545
- const toolExecutor = async (
546
- name: string,
547
- input: Record<string, unknown>,
548
- ): Promise<ToolExecutionResult> => {
549
- return executor.execute(name, input, {
550
- workingDir: getSandboxWorkingDir(),
551
- sessionId: this.sessionId,
552
- conversationId: this.sessionId,
553
- trustClass: "guardian",
554
- proxyToolResolver: proxyResolver,
555
- allowedToolNames,
556
- requestSecret: async (params) => {
557
- return secretPrompter.prompt(
558
- params.service,
559
- params.field,
560
- params.label,
561
- params.description,
562
- params.placeholder,
563
- this.sessionId,
564
- params.purpose,
565
- params.allowedTools,
566
- params.allowedDomains,
567
- );
568
- },
569
- });
570
- };
571
-
572
- // Wrap the provider so that old AX tree snapshots are stripped from
573
- // conversation history before each API call, keeping only the most recent
574
- // MAX_AX_TREES_IN_HISTORY entries. This prevents TTFT from growing
575
- // linearly with step count.
576
- const compactingProvider: Provider = {
577
- name: this.provider.name,
578
- sendMessage: (msgs, tools, sys, opts) => {
579
- const compacted = ComputerUseSession.compactHistory(msgs);
580
- return this.provider.sendMessage(compacted, tools, sys, opts);
581
- },
582
- };
583
-
584
- const cuConfig = getConfig();
585
- const agentLoop = new AgentLoop(
586
- compactingProvider,
587
- systemPrompt,
588
- {
589
- maxTokens: 4096,
590
- maxInputTokens: cuConfig.contextWindow.maxInputTokens,
591
- toolChoice: { type: "any" },
592
- },
593
- toolDefs,
594
- toolExecutor,
595
- );
596
-
597
- try {
598
- await agentLoop.run(
599
- messages,
600
- (event) => {
601
- switch (event.type) {
602
- case "error":
603
- log.error(
604
- { err: event.error, sessionId: this.sessionId },
605
- "Agent loop error",
606
- );
607
- if (this.state !== "complete") {
608
- this.state = "error";
609
- this.sendToClient({
610
- type: "cu_error",
611
- sessionId: this.sessionId,
612
- message: event.error.message,
613
- });
614
- this.notifyTerminal();
615
- }
616
- break;
617
- case "usage":
618
- log.info(
619
- {
620
- sessionId: this.sessionId,
621
- inputTokens: event.inputTokens,
622
- outputTokens: event.outputTokens,
623
- model: event.model,
624
- },
625
- "Usage",
626
- );
627
- break;
628
- // Other events (text_delta, thinking_delta, etc.) are not surfaced to the CU client
629
- }
630
- },
631
- this.abortController?.signal,
632
- );
633
-
634
- // If the loop exits without completing, treat as error
635
- if (this.state !== "complete" && this.state !== "error") {
636
- this.state = "error";
637
- this.sendToClient({
638
- type: "cu_error",
639
- sessionId: this.sessionId,
640
- message: "Agent loop ended unexpectedly",
641
- });
642
- this.notifyTerminal();
643
- }
644
- } catch (err) {
645
- if (this.abortController?.signal.aborted) {
646
- log.info({ sessionId: this.sessionId }, "Agent loop aborted");
647
- return;
648
- }
649
- const message = err instanceof Error ? err.message : String(err);
650
- log.error({ err, sessionId: this.sessionId }, "Agent loop failed");
651
- if (this.state !== "complete") {
652
- this.state = "error";
653
- this.sendToClient({
654
- type: "cu_error",
655
- sessionId: this.sessionId,
656
- message,
657
- });
658
- this.notifyTerminal();
659
- }
660
- } finally {
661
- // Always clean up skill projection state and session timer
662
- resetSkillToolProjection(this.skillProjectionState);
663
- if (this.sessionTimer) {
664
- clearTimeout(this.sessionTimer);
665
- this.sessionTimer = null;
666
- }
667
- }
668
- }
669
-
670
- private notifyTerminal(): void {
671
- if (this.terminalNotified) return;
672
- this.terminalNotified = true;
673
- resetSkillToolProjection(this.skillProjectionState);
674
- this.onTerminal?.(this.sessionId);
675
- }
676
-
677
- // ---------------------------------------------------------------------------
678
- // History compaction — strip old AX tree snapshots from tool results
679
- // ---------------------------------------------------------------------------
680
-
681
- /**
682
- * Returns a shallow copy of `messages` where all but the most recent
683
- * `MAX_AX_TREES_IN_HISTORY` `<ax-tree>` blocks have been replaced with a
684
- * short placeholder. This keeps the conversation context small so that
685
- * TTFT does not grow linearly with step count.
686
- */
687
- static compactHistory(messages: Message[]): Message[] {
688
- // Collect indices of user messages that contain an <ax-tree> block
689
- const indicesWithAxTree: number[] = [];
690
- for (let i = 0; i < messages.length; i++) {
691
- const msg = messages[i];
692
- if (msg.role !== "user") continue;
693
- for (const block of msg.content) {
694
- if (
695
- block.type === "tool_result" &&
696
- typeof block.content === "string" &&
697
- block.content.includes("<ax-tree>")
698
- ) {
699
- indicesWithAxTree.push(i);
700
- break;
701
- }
702
- }
703
- }
704
-
705
- if (indicesWithAxTree.length <= MAX_AX_TREES_IN_HISTORY) {
706
- return messages;
707
- }
708
-
709
- const toStrip = new Set(
710
- indicesWithAxTree.slice(0, -MAX_AX_TREES_IN_HISTORY),
711
- );
712
-
713
- return messages.map((msg, idx) => {
714
- if (!toStrip.has(idx)) return msg;
715
- return {
716
- ...msg,
717
- content: msg.content.map((block) => {
718
- if (
719
- block.type === "tool_result" &&
720
- typeof block.content === "string" &&
721
- block.content.includes("<ax-tree>")
722
- ) {
723
- return {
724
- ...block,
725
- content: block.content.replace(
726
- AX_TREE_PATTERN,
727
- AX_TREE_PLACEHOLDER,
728
- ),
729
- };
730
- }
731
- return block;
732
- }),
733
- };
734
- });
735
- }
736
-
737
- /**
738
- * Escapes any literal `</ax-tree>` occurrences inside AX tree content so
739
- * that the non-greedy compaction regex (`AX_TREE_PATTERN`) does not stop
740
- * prematurely when the user happens to be viewing XML/HTML source that
741
- * contains the closing tag. The escaped content does not need to be
742
- * unescaped because compaction replaces the entire block with a placeholder.
743
- */
744
- static escapeAxTreeContent(content: string): string {
745
- return content.replace(/<\/ax-tree>/gi, "&lt;/ax-tree&gt;");
746
- }
747
-
748
- // ---------------------------------------------------------------------------
749
- // Build rich tool-result content from an observation so the model sees
750
- // updated screen state on each turn (not just "Action executed").
751
- // ---------------------------------------------------------------------------
752
-
753
- private buildObservationResultContent(
754
- obs: CuObservation,
755
- hadPreviousAXTree: boolean,
756
- ): string {
757
- const parts: string[] = [];
758
-
759
- // Surface user guidance prominently so the model sees it first
760
- if (obs.userGuidance) {
761
- parts.push(`USER GUIDANCE: ${obs.userGuidance}`);
762
- parts.push("");
763
- }
764
-
765
- if (obs.executionResult) {
766
- parts.push(obs.executionResult);
767
- parts.push("");
768
- }
769
-
770
- // AX tree diff
771
- if (obs.axDiff) {
772
- parts.push(obs.axDiff);
773
- parts.push("");
774
- } else if (hadPreviousAXTree && obs.axTree != null) {
775
- const lastAction = this.actionHistory[this.actionHistory.length - 1];
776
- const wasWait = lastAction?.toolName === "computer_use_wait";
777
- if (
778
- this.consecutiveUnchangedSteps >=
779
- CONSECUTIVE_UNCHANGED_WARNING_THRESHOLD
780
- ) {
781
- parts.push(
782
- `WARNING: ${this.consecutiveUnchangedSteps} consecutive actions had NO VISIBLE EFFECT on the UI. You MUST try a completely different approach.`,
783
- );
784
- } else if (!wasWait) {
785
- parts.push(
786
- "Your last action had NO VISIBLE EFFECT on the UI. Try something different.",
787
- );
788
- }
789
- parts.push("");
790
- }
791
-
792
- // Current screen state — wrapped in markers so compactHistory can strip old snapshots
793
- if (obs.axTree) {
794
- parts.push("<ax-tree>");
795
- parts.push("CURRENT SCREEN STATE:");
796
- parts.push(ComputerUseSession.escapeAxTreeContent(obs.axTree));
797
- parts.push("</ax-tree>");
798
- }
799
-
800
- const screenshotMetadata = this.formatScreenshotMetadata(obs);
801
- if (screenshotMetadata.length > 0) {
802
- parts.push("");
803
- parts.push(...screenshotMetadata);
804
- }
805
-
806
- return parts.join("\n").trim() || "Action executed";
807
- }
808
-
809
- // ---------------------------------------------------------------------------
810
- // Message building (replicates AnthropicProvider.buildMessages from Swift)
811
- // ---------------------------------------------------------------------------
812
-
813
- private buildMessages(
814
- obs: CuObservation,
815
- hadPreviousAXTree: boolean,
816
- ): Message[] {
817
- const contentBlocks: ContentBlock[] = [];
818
-
819
- // Screenshot image block
820
- if (obs.screenshot) {
821
- contentBlocks.push({
822
- type: "image",
823
- source: {
824
- type: "base64",
825
- media_type: "image/jpeg",
826
- data: obs.screenshot,
827
- },
828
- });
829
- }
830
-
831
- // Text block
832
- const textParts: string[] = [];
833
- const trimmedTask = this.task.trim();
834
- if (trimmedTask) {
835
- textParts.push(`TASK: ${trimmedTask}`);
836
- } else {
837
- textParts.push("TASK: No explicit task provided.");
838
- }
839
- textParts.push("");
840
-
841
- // AX tree diff (compact summary of what changed)
842
- if (obs.axDiff && this.actionHistory.length > 0) {
843
- textParts.push(obs.axDiff);
844
- textParts.push("");
845
- } else if (
846
- hadPreviousAXTree &&
847
- obs.axTree != null &&
848
- this.actionHistory.length > 0
849
- ) {
850
- // AX tree unchanged — tell the model its action had no effect
851
- const lastAction = this.actionHistory[this.actionHistory.length - 1];
852
- const wasWait = lastAction?.toolName === "computer_use_wait";
853
- textParts.push("CHANGES SINCE LAST ACTION:");
854
- if (
855
- this.consecutiveUnchangedSteps >=
856
- CONSECUTIVE_UNCHANGED_WARNING_THRESHOLD
857
- ) {
858
- textParts.push(
859
- `WARNING: ${this.consecutiveUnchangedSteps} consecutive actions had NO VISIBLE EFFECT on the UI. You MUST try a completely different approach — do not repeat any of your recent actions.`,
860
- );
861
- } else if (!wasWait) {
862
- const actionDesc = `${lastAction?.toolName ?? "unknown"}`;
863
- textParts.push(
864
- `Your last action (${actionDesc}) had NO VISIBLE EFFECT on the UI. The screen is identical to the previous step. Do NOT repeat the same action — try something different.`,
865
- );
866
- } else {
867
- textParts.push(
868
- "No visible changes detected — the UI is identical to the previous step.",
869
- );
870
- }
871
- textParts.push("");
872
- }
873
-
874
- // Current screen state
875
- if (obs.axTree) {
876
- textParts.push(
877
- "CURRENT SCREEN STATE (accessibility tree of the focused window):",
878
- );
879
- textParts.push(obs.axTree);
880
- textParts.push("");
881
- textParts.push(
882
- "Use element_id with the [ID] numbers shown above to target elements.",
883
- );
884
-
885
- // Secondary windows for cross-app awareness
886
- if (obs.secondaryWindows) {
887
- textParts.push("");
888
- textParts.push(obs.secondaryWindows);
889
- textParts.push("");
890
- textParts.push(
891
- "Note: The element [ID]s above are from other windows — you can reference them for context but can only interact with the focused window's elements.",
892
- );
893
- }
894
-
895
- if (obs.screenshot) {
896
- textParts.push("");
897
- textParts.push(
898
- "A screenshot of the FULL SCREEN is also attached above. Use it to see content outside the focused window (e.g., reference documents, PDFs, other apps visible behind the current window).",
899
- );
900
- const screenshotMetadata = this.formatScreenshotMetadata(obs);
901
- if (screenshotMetadata.length > 0) {
902
- textParts.push(...screenshotMetadata);
903
- }
904
- }
905
- } else if (obs.screenshot) {
906
- textParts.push("CURRENT SCREEN STATE:");
907
- textParts.push(
908
- "See the screenshot above. No accessibility tree available — estimate coordinates from the image.",
909
- );
910
- const screenshotMetadata = this.formatScreenshotMetadata(obs);
911
- if (screenshotMetadata.length > 0) {
912
- textParts.push(...screenshotMetadata);
913
- }
914
- } else {
915
- textParts.push("CURRENT SCREEN STATE:");
916
- textParts.push("No screen data available.");
917
- }
918
-
919
- // Action history
920
- if (this.actionHistory.length > 0) {
921
- textParts.push("");
922
- textParts.push("ACTIONS TAKEN SO FAR:");
923
- let windowedHistory: ActionRecord[];
924
- if (this.actionHistory.length > MAX_HISTORY_ENTRIES) {
925
- textParts.push(
926
- ` [... ${this.actionHistory.length - MAX_HISTORY_ENTRIES} earlier actions omitted]`,
927
- );
928
- windowedHistory = this.actionHistory.slice(-MAX_HISTORY_ENTRIES);
929
- } else {
930
- windowedHistory = this.actionHistory;
931
- }
932
- for (const record of windowedHistory) {
933
- const result = record.result ?? "executed";
934
- textParts.push(` ${record.step}. ${record.toolName} → ${result}`);
935
- }
936
- }
937
-
938
- // Loop detection warning
939
- if (this.actionHistory.length >= LOOP_DETECTION_WINDOW) {
940
- const recent = this.actionHistory.slice(-LOOP_DETECTION_WINDOW);
941
- const allIdentical = recent.every(
942
- (r) =>
943
- r.toolName === recent[0].toolName &&
944
- JSON.stringify(r.input) === JSON.stringify(recent[0].input),
945
- );
946
- if (allIdentical) {
947
- textParts.push("");
948
- textParts.push(
949
- `WARNING: You have repeated the exact same action (${recent[0].toolName}) ${LOOP_DETECTION_WINDOW} times in a row. You MUST try a completely different approach or call computer_use_done with an explanation of why you are stuck.`,
950
- );
951
- }
952
- }
953
-
954
- // Surface user guidance prominently
955
- if (obs.userGuidance) {
956
- textParts.push("");
957
- textParts.push(`USER GUIDANCE: ${obs.userGuidance}`);
958
- }
959
-
960
- // Prompt for next action
961
- textParts.push("");
962
- if (this.actionHistory.length === 0) {
963
- textParts.push(
964
- "This is the first action. Examine the screen state and decide what to do first.",
965
- );
966
- } else {
967
- textParts.push("Decide the next action to take.");
968
- }
969
-
970
- contentBlocks.push({
971
- type: "text",
972
- text: textParts.join("\n"),
973
- });
974
-
975
- return [{ role: "user", content: contentBlocks }];
976
- }
977
-
978
- private formatScreenshotMetadata(obs: CuObservation): string[] {
979
- if (!obs.screenshot) return [];
980
-
981
- const lines: string[] = [];
982
- if (obs.screenshotWidthPx != null && obs.screenshotHeightPx != null) {
983
- lines.push(
984
- `Screenshot metadata: ${obs.screenshotWidthPx}x${obs.screenshotHeightPx} px`,
985
- );
986
- }
987
- if (obs.screenWidthPt != null && obs.screenHeightPt != null) {
988
- lines.push(
989
- `Screen metadata: ${obs.screenWidthPt}x${obs.screenHeightPt} pt`,
990
- );
991
- }
992
- if (obs.coordinateOrigin) {
993
- lines.push(`Coordinate origin: ${obs.coordinateOrigin}`);
994
- }
995
- if (obs.captureDisplayId != null) {
996
- lines.push(`Capture display ID: ${obs.captureDisplayId}`);
997
- }
998
- return lines;
999
- }
1000
-
1001
- hasPendingConfirmation(requestId: string): boolean {
1002
- return this.prompter?.hasPendingRequest(requestId) ?? false;
1003
- }
1004
-
1005
- handleConfirmationResponse(
1006
- requestId: string,
1007
- decision: UserDecision,
1008
- selectedPattern?: string,
1009
- selectedScope?: string,
1010
- decisionContext?: string,
1011
- ): void {
1012
- this.prompter?.resolveConfirmation(
1013
- requestId,
1014
- decision,
1015
- selectedPattern,
1016
- selectedScope,
1017
- decisionContext,
1018
- );
1019
- }
1020
- }