@vellumai/assistant 0.4.48 → 0.4.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. package/ARCHITECTURE.md +2 -2
  2. package/README.md +2 -23
  3. package/docs/architecture/integrations.md +45 -41
  4. package/docs/architecture/keychain-broker.md +3 -3
  5. package/docs/runbook-trusted-contacts.md +3 -8
  6. package/hook-templates/debug-prompt-logger/hook.json +1 -1
  7. package/hook-templates/debug-prompt-logger/run.sh +1 -3
  8. package/package.json +1 -1
  9. package/src/__tests__/actor-token-service.test.ts +0 -1
  10. package/src/__tests__/anthropic-provider.test.ts +156 -0
  11. package/src/__tests__/approval-cascade.test.ts +810 -0
  12. package/src/__tests__/approval-primitive.test.ts +0 -1
  13. package/src/__tests__/approval-routes-http.test.ts +2 -0
  14. package/src/__tests__/assistant-attachments.test.ts +12 -34
  15. package/src/__tests__/assistant-feature-flag-guardrails.test.ts +76 -0
  16. package/src/__tests__/assistant-feature-flags-integration.test.ts +0 -1
  17. package/src/__tests__/browser-skill-baseline-tool-payload.test.ts +2 -2
  18. package/src/__tests__/channel-guardian.test.ts +0 -2
  19. package/src/__tests__/channel-readiness-routes.test.ts +15 -6
  20. package/src/__tests__/channel-readiness-service.test.ts +10 -9
  21. package/src/__tests__/checker.test.ts +9 -29
  22. package/src/__tests__/computer-use-skill-manifest-regression.test.ts +1 -1
  23. package/src/__tests__/computer-use-tools.test.ts +2 -19
  24. package/src/__tests__/config-watcher.test.ts +0 -1
  25. package/src/__tests__/confirmation-request-guardian-bridge.test.ts +0 -1
  26. package/src/__tests__/context-image-dimensions.test.ts +332 -0
  27. package/src/__tests__/context-token-estimator.test.ts +196 -13
  28. package/src/__tests__/conversation-attention-store.test.ts +0 -1
  29. package/src/__tests__/conversation-attention-telegram.test.ts +0 -1
  30. package/src/__tests__/conversation-routes-guardian-reply.test.ts +144 -0
  31. package/src/__tests__/conversation-routes-slash-commands.test.ts +1 -0
  32. package/src/__tests__/credential-metadata-store.test.ts +64 -73
  33. package/src/__tests__/credential-security-invariants.test.ts +13 -7
  34. package/src/__tests__/credential-vault-unit.test.ts +280 -49
  35. package/src/__tests__/credential-vault.test.ts +138 -16
  36. package/src/__tests__/credentials-cli.test.ts +71 -0
  37. package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +0 -1
  38. package/src/__tests__/ephemeral-permissions.test.ts +3 -3
  39. package/src/__tests__/gateway-only-guard.test.ts +0 -1
  40. package/src/__tests__/guardian-action-grant-mint-consume.test.ts +0 -1
  41. package/src/__tests__/guardian-decision-primitive-canonical.test.ts +0 -1
  42. package/src/__tests__/guardian-routing-invariants.test.ts +0 -1
  43. package/src/__tests__/guardian-verification-voice-binding.test.ts +0 -1
  44. package/src/__tests__/handlers-user-message-approval-consumption.test.ts +0 -39
  45. package/src/__tests__/heartbeat-service.test.ts +0 -1
  46. package/src/__tests__/host-cu-proxy.test.ts +629 -0
  47. package/src/__tests__/host-shell-tool.test.ts +27 -15
  48. package/src/__tests__/http-user-message-parity.test.ts +1 -0
  49. package/src/__tests__/ingress-url-consistency.test.ts +14 -21
  50. package/src/__tests__/integration-status.test.ts +32 -51
  51. package/src/__tests__/intent-routing.test.ts +0 -1
  52. package/src/__tests__/invite-routes-http.test.ts +10 -9
  53. package/src/__tests__/keychain-broker-client.test.ts +11 -43
  54. package/src/__tests__/notification-routing-intent.test.ts +0 -1
  55. package/src/__tests__/oauth-cli.test.ts +373 -14
  56. package/src/__tests__/oauth-provider-profiles.test.ts +9 -9
  57. package/src/__tests__/oauth-scope-policy.test.ts +4 -6
  58. package/src/__tests__/oauth-store.test.ts +756 -0
  59. package/src/__tests__/onboarding-starter-tasks.test.ts +0 -1
  60. package/src/__tests__/provider-error-scenarios.test.ts +0 -1
  61. package/src/__tests__/provider-streaming.benchmark.test.ts +0 -1
  62. package/src/__tests__/public-ingress-urls.test.ts +15 -21
  63. package/src/__tests__/recording-handler.test.ts +3 -4
  64. package/src/__tests__/registry.test.ts +2 -2
  65. package/src/__tests__/runtime-events-sse.test.ts +55 -7
  66. package/src/__tests__/schedule-store.test.ts +0 -1
  67. package/src/__tests__/scheduler-recurrence.test.ts +0 -1
  68. package/src/__tests__/scoped-approval-grants.test.ts +0 -1
  69. package/src/__tests__/scoped-grant-security-matrix.test.ts +0 -1
  70. package/src/__tests__/secret-ingress-handler.test.ts +0 -1
  71. package/src/__tests__/send-endpoint-busy.test.ts +21 -6
  72. package/src/__tests__/sequence-store.test.ts +0 -1
  73. package/src/__tests__/session-init.benchmark.test.ts +4 -5
  74. package/src/__tests__/skill-include-graph.test.ts +66 -0
  75. package/src/__tests__/skill-load-feature-flag.test.ts +0 -1
  76. package/src/__tests__/skill-load-tool.test.ts +149 -1
  77. package/src/__tests__/skill-projection-feature-flag.test.ts +0 -1
  78. package/src/__tests__/skills-uninstall.test.ts +1 -1
  79. package/src/__tests__/skills.test.ts +3 -3
  80. package/src/__tests__/slack-channel-config.test.ts +67 -3
  81. package/src/__tests__/slack-share-routes.test.ts +17 -19
  82. package/src/__tests__/system-prompt.test.ts +0 -1
  83. package/src/__tests__/telegram-invite-adapter.test.ts +18 -22
  84. package/src/__tests__/terminal-tools.test.ts +4 -3
  85. package/src/__tests__/test-support/computer-use-skill-harness.ts +3 -2
  86. package/src/__tests__/tool-approval-handler.test.ts +0 -1
  87. package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -1
  88. package/src/__tests__/tool-executor-lifecycle-events.test.ts +0 -1
  89. package/src/__tests__/tool-executor-shell-integration.test.ts +0 -1
  90. package/src/__tests__/tool-executor.test.ts +0 -1
  91. package/src/__tests__/tool-grant-request-escalation.test.ts +0 -1
  92. package/src/__tests__/trust-store-pattern-matches.test.ts +29 -0
  93. package/src/__tests__/trust-store.test.ts +1 -22
  94. package/src/__tests__/trusted-contact-approval-notifier.test.ts +0 -1
  95. package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +0 -1
  96. package/src/__tests__/twilio-routes.test.ts +0 -16
  97. package/src/__tests__/verification-control-plane-policy.test.ts +0 -1
  98. package/src/__tests__/voice-scoped-grant-consumer.test.ts +0 -1
  99. package/src/agent/ax-tree-compaction.test.ts +235 -0
  100. package/src/agent/loop.ts +76 -130
  101. package/src/calls/call-domain.ts +1 -6
  102. package/src/calls/relay-server.ts +9 -13
  103. package/src/calls/twilio-config.ts +2 -7
  104. package/src/calls/twilio-routes.ts +1 -2
  105. package/src/calls/voice-ingress-preflight.ts +1 -1
  106. package/src/cli/commands/browser-relay.ts +18 -12
  107. package/src/cli/commands/completions.ts +0 -3
  108. package/src/cli/commands/credentials.ts +101 -15
  109. package/src/cli/commands/oauth/apps.ts +255 -0
  110. package/src/cli/commands/oauth/connections.ts +299 -0
  111. package/src/cli/commands/oauth/index.ts +52 -0
  112. package/src/cli/commands/oauth/providers.ts +242 -0
  113. package/src/cli/commands/skills.ts +4 -338
  114. package/src/cli/program.ts +1 -5
  115. package/src/cli/reference.ts +1 -3
  116. package/src/config/assistant-feature-flags.ts +0 -3
  117. package/src/config/bundled-skills/_shared/CLI_RETRIEVAL_PATTERN.md +1 -1
  118. package/src/config/bundled-skills/computer-use/SKILL.md +3 -6
  119. package/src/config/bundled-skills/computer-use/TOOLS.json +22 -4
  120. package/src/config/bundled-skills/google-calendar/calendar-client.ts +21 -16
  121. package/src/config/bundled-skills/messaging/tools/shared.ts +1 -4
  122. package/src/config/bundled-skills/settings/SKILL.md +1 -1
  123. package/src/config/bundled-skills/settings/TOOLS.json +2 -8
  124. package/src/config/bundled-skills/settings/tools/voice-config-update.ts +5 -33
  125. package/src/config/env-registry.ts +14 -83
  126. package/src/config/env.ts +11 -50
  127. package/src/config/feature-flag-registry.json +16 -16
  128. package/src/config/loader.ts +0 -6
  129. package/src/config/schema.ts +3 -1
  130. package/src/config/skills.ts +21 -2
  131. package/src/context/image-dimensions.ts +229 -0
  132. package/src/context/token-estimator.ts +75 -12
  133. package/src/context/window-manager.ts +49 -10
  134. package/src/daemon/assistant-attachments.ts +1 -13
  135. package/src/daemon/handlers/config-ingress.ts +8 -33
  136. package/src/daemon/handlers/config-slack-channel.ts +49 -46
  137. package/src/daemon/handlers/config-telegram.ts +32 -16
  138. package/src/daemon/handlers/sessions.ts +10 -24
  139. package/src/daemon/handlers/shared.ts +0 -130
  140. package/src/daemon/host-cu-proxy.ts +401 -0
  141. package/src/daemon/lifecycle.ts +36 -68
  142. package/src/daemon/message-protocol.ts +3 -0
  143. package/src/daemon/message-types/computer-use.ts +2 -119
  144. package/src/daemon/message-types/host-cu.ts +19 -0
  145. package/src/daemon/message-types/messages.ts +3 -0
  146. package/src/daemon/server.ts +14 -21
  147. package/src/daemon/session-agent-loop-handlers.ts +2 -0
  148. package/src/daemon/session-attachments.ts +1 -2
  149. package/src/daemon/session-slash.ts +1 -1
  150. package/src/daemon/session-surfaces.ts +40 -28
  151. package/src/daemon/session-tool-setup.ts +2 -9
  152. package/src/daemon/session.ts +138 -15
  153. package/src/daemon/tool-side-effects.ts +2 -8
  154. package/src/daemon/watch-handler.ts +2 -2
  155. package/src/events/tool-metrics-listener.ts +2 -2
  156. package/src/hooks/manager.ts +1 -4
  157. package/src/inbound/public-ingress-urls.ts +7 -7
  158. package/src/logfire.ts +16 -5
  159. package/src/memory/conversation-key-store.ts +21 -0
  160. package/src/memory/db-init.ts +4 -0
  161. package/src/memory/migrations/149-oauth-tables.ts +60 -0
  162. package/src/memory/migrations/index.ts +1 -0
  163. package/src/memory/schema/index.ts +1 -0
  164. package/src/memory/schema/oauth.ts +65 -0
  165. package/src/messaging/provider.ts +4 -4
  166. package/src/messaging/providers/gmail/client.ts +82 -2
  167. package/src/messaging/providers/gmail/people-client.ts +10 -10
  168. package/src/messaging/providers/telegram-bot/adapter.ts +17 -17
  169. package/src/messaging/providers/whatsapp/adapter.ts +11 -8
  170. package/src/messaging/registry.ts +2 -32
  171. package/src/notifications/copy-composer.ts +0 -5
  172. package/src/notifications/signal.ts +4 -5
  173. package/src/oauth/byo-connection.test.ts +126 -25
  174. package/src/oauth/byo-connection.ts +22 -6
  175. package/src/oauth/connect-orchestrator.ts +113 -57
  176. package/src/oauth/connect-types.ts +17 -23
  177. package/src/oauth/connection-resolver.ts +35 -11
  178. package/src/oauth/connection.ts +1 -1
  179. package/src/oauth/manual-token-connection.ts +104 -0
  180. package/src/oauth/oauth-store.ts +496 -0
  181. package/src/oauth/platform-connection.test.ts +29 -0
  182. package/src/oauth/platform-connection.ts +6 -5
  183. package/src/oauth/provider-behaviors.ts +124 -0
  184. package/src/oauth/scope-policy.ts +9 -2
  185. package/src/oauth/seed-providers.ts +161 -0
  186. package/src/oauth/token-persistence.ts +74 -78
  187. package/src/permissions/checker.ts +3 -3
  188. package/src/permissions/defaults.ts +0 -1
  189. package/src/permissions/prompter.ts +10 -1
  190. package/src/permissions/trust-store.ts +13 -0
  191. package/src/prompts/__tests__/build-cli-reference-section.test.ts +3 -1
  192. package/src/prompts/system-prompt.ts +28 -40
  193. package/src/providers/anthropic/client.ts +133 -24
  194. package/src/providers/retry.ts +1 -27
  195. package/src/runtime/auth/route-policy.ts +0 -3
  196. package/src/runtime/channel-reply-delivery.ts +0 -40
  197. package/src/runtime/gateway-client.ts +0 -7
  198. package/src/runtime/http-server.ts +8 -6
  199. package/src/runtime/http-types.ts +2 -2
  200. package/src/runtime/middleware/twilio-validation.ts +1 -11
  201. package/src/runtime/pending-interactions.ts +14 -12
  202. package/src/runtime/routes/channel-delivery-routes.ts +0 -1
  203. package/src/runtime/routes/conversation-routes.ts +73 -19
  204. package/src/runtime/routes/events-routes.ts +21 -11
  205. package/src/runtime/routes/host-cu-routes.ts +97 -0
  206. package/src/runtime/routes/inbound-stages/background-dispatch.ts +12 -111
  207. package/src/runtime/routes/integrations/slack/share.ts +6 -7
  208. package/src/runtime/routes/log-export-routes.ts +126 -8
  209. package/src/runtime/routes/settings-routes.ts +55 -48
  210. package/src/runtime/routes/surface-action-routes.ts +1 -1
  211. package/src/runtime/routes/watch-routes.ts +128 -0
  212. package/src/schedule/integration-status.ts +10 -9
  213. package/src/security/credential-key.ts +0 -156
  214. package/src/security/keychain-broker-client.ts +5 -6
  215. package/src/security/oauth2.ts +1 -1
  216. package/src/security/token-manager.ts +119 -46
  217. package/src/skills/catalog-install.ts +358 -0
  218. package/src/skills/include-graph.ts +32 -0
  219. package/src/telegram/bot-username.ts +2 -3
  220. package/src/tools/browser/network-recorder.ts +1 -1
  221. package/src/tools/browser/network-recording-types.ts +1 -1
  222. package/src/tools/computer-use/definitions.ts +46 -11
  223. package/src/tools/computer-use/registry.ts +4 -5
  224. package/src/tools/credentials/broker.ts +1 -2
  225. package/src/tools/credentials/metadata-store.ts +17 -121
  226. package/src/tools/credentials/vault.ts +94 -167
  227. package/src/tools/registry.ts +2 -7
  228. package/src/tools/skills/load.ts +62 -3
  229. package/src/tools/watch/watch-state.ts +0 -12
  230. package/src/util/logger.ts +7 -41
  231. package/src/util/platform.ts +9 -28
  232. package/src/watcher/providers/google-calendar.ts +2 -1
  233. package/src/__tests__/computer-use-session-compaction.test.ts +0 -143
  234. package/src/__tests__/computer-use-session-lifecycle.test.ts +0 -322
  235. package/src/__tests__/computer-use-session-working-dir.test.ts +0 -166
  236. package/src/__tests__/computer-use-skill-baseline.test.ts +0 -78
  237. package/src/__tests__/computer-use-skill-endstate.test.ts +0 -105
  238. package/src/__tests__/computer-use-skill-lifecycle-cleanup.test.ts +0 -249
  239. package/src/__tests__/ride-shotgun-handler.test.ts +0 -452
  240. package/src/cli/commands/dev.ts +0 -129
  241. package/src/cli/commands/map.ts +0 -391
  242. package/src/cli/commands/oauth.ts +0 -77
  243. package/src/config/bundled-skills/computer-use/tools/computer-use-request-control.ts +0 -16
  244. package/src/daemon/computer-use-session.ts +0 -1026
  245. package/src/daemon/ride-shotgun-handler.ts +0 -569
  246. package/src/oauth/provider-base-urls.ts +0 -21
  247. package/src/oauth/provider-profiles.ts +0 -192
  248. package/src/prompts/computer-use-prompt.ts +0 -98
  249. package/src/runtime/routes/computer-use-routes.ts +0 -641
  250. package/src/runtime/telegram-streaming-delivery.test.ts +0 -729
  251. package/src/runtime/telegram-streaming-delivery.ts +0 -393
  252. package/src/tools/computer-use/request-computer-control.ts +0 -56
@@ -35,6 +35,7 @@ import * as pendingInteractions from "../../runtime/pending-interactions.js";
35
35
  import { getSubagentManager } from "../../subagent/index.js";
36
36
  import { truncate } from "../../util/truncate.js";
37
37
  import { HostBashProxy } from "../host-bash-proxy.js";
38
+ import { HostCuProxy } from "../host-cu-proxy.js";
38
39
  import { HostFileProxy } from "../host-file-proxy.js";
39
40
  import type {
40
41
  CancelRequest,
@@ -60,7 +61,6 @@ import {
60
61
  type HandlerContext,
61
62
  log,
62
63
  pendingStandaloneSecrets,
63
- wireEscalationHandler,
64
64
  } from "./shared.js";
65
65
 
66
66
  /**
@@ -165,6 +165,12 @@ export function makeEventSender(params: {
165
165
  conversationId,
166
166
  kind: "host_file",
167
167
  });
168
+ } else if (event.type === "host_cu_request") {
169
+ pendingInteractions.register(event.requestId, {
170
+ session,
171
+ conversationId,
172
+ kind: "host_cu",
173
+ });
168
174
  }
169
175
 
170
176
  ctx.send(event);
@@ -195,21 +201,6 @@ export function handleConfirmationResponse(
195
201
  }
196
202
  }
197
203
 
198
- // Also check computer-use sessions — they have their own PermissionPrompter
199
- for (const [, cuSession] of ctx.cuSessions) {
200
- if (cuSession.hasPendingConfirmation(msg.requestId)) {
201
- cuSession.handleConfirmationResponse(
202
- msg.requestId,
203
- msg.decision,
204
- msg.selectedPattern,
205
- msg.selectedScope,
206
- );
207
- syncCanonicalStatusFromConfirmationDecision(msg.requestId, msg.decision);
208
- pendingInteractions.resolve(msg.requestId);
209
- return;
210
- }
211
- }
212
-
213
204
  log.warn(
214
205
  { requestId: msg.requestId },
215
206
  "No session found with pending confirmation for requestId",
@@ -362,7 +353,6 @@ export async function handleSessionCreate(
362
353
  maxResponseTokens: msg.maxResponseTokens,
363
354
  transport: msg.transport,
364
355
  });
365
- wireEscalationHandler(session, ctx);
366
356
 
367
357
  // Pre-activate skills before sending session_info so they're available
368
358
  // for the initial message processing.
@@ -431,6 +421,8 @@ export async function handleSessionCreate(
431
421
  pendingInteractions.resolve(requestId);
432
422
  });
433
423
  session.setHostFileProxy(fileProxy);
424
+ const cuProxy = new HostCuProxy(sendEvent);
425
+ session.setHostCuProxy(cuProxy);
434
426
  }
435
427
  session.updateClient(sendEvent, false);
436
428
  session
@@ -492,13 +484,7 @@ export async function switchSession(
492
484
  // Load the session without rebinding the client — the session stays headless
493
485
  await ctx.getOrCreateSession(sessionId);
494
486
  } else {
495
- const session = await ctx.getOrCreateSession(sessionId);
496
- // Only wire the escalation handler if one isn't already set — handleTaskSubmit
497
- // sets a handler with the client's actual screen dimensions, and overwriting it
498
- // here would replace those dimensions with the daemon's defaults.
499
- if (!session.hasEscalationHandler()) {
500
- wireEscalationHandler(session, ctx);
501
- }
487
+ await ctx.getOrCreateSession(sessionId);
502
488
  }
503
489
 
504
490
  return {
@@ -1,17 +1,12 @@
1
- import { execSync } from "node:child_process";
2
-
3
1
  import { v4 as uuid } from "uuid";
4
2
 
5
3
  import { getConfig } from "../../config/loader.js";
6
4
  import type { HeartbeatService } from "../../heartbeat/heartbeat-service.js";
7
5
  import type { SecretPromptResult } from "../../permissions/secret-prompter.js";
8
- import { RateLimitProvider } from "../../providers/ratelimit.js";
9
- import { getFailoverProvider } from "../../providers/registry.js";
10
6
  import type { AuthContext } from "../../runtime/auth/types.js";
11
7
  import type { DebouncerMap } from "../../util/debounce.js";
12
8
  import { getLogger } from "../../util/logger.js";
13
9
  import { estimateBase64Bytes } from "../assistant-attachments.js";
14
- import { ComputerUseSession } from "../computer-use-session.js";
15
10
  import type {
16
11
  ServerMessage,
17
12
  SessionTransportMetadata,
@@ -28,9 +23,6 @@ export const CONFIG_RELOAD_DEBOUNCE_MS = 300;
28
23
 
29
24
  const HISTORY_ATTACHMENT_TEXT_LIMIT = 500;
30
25
 
31
- export const FALLBACK_SCREEN = { width: 1920, height: 1080 };
32
- let cachedScreenDims: { width: number; height: number } | null = null;
33
-
34
26
  // Module-level map for non-session secret prompts (e.g. publish_page)
35
27
  export const pendingStandaloneSecrets = new Map<
36
28
  string,
@@ -150,8 +142,6 @@ export interface SessionCreateOptions {
150
142
  */
151
143
  export interface HandlerContext {
152
144
  sessions: Map<string, Session>;
153
- cuSessions: Map<string, ComputerUseSession>;
154
- cuObservationParseSequence: Map<string, number>;
155
145
  sharedRequestTimestamps: number[];
156
146
  debounceTimers: DebouncerMap;
157
147
  suppressConfigReload: boolean;
@@ -170,126 +160,6 @@ export interface HandlerContext {
170
160
  heartbeatService?: HeartbeatService;
171
161
  }
172
162
 
173
- /**
174
- * Query the main display dimensions via CoreGraphics.
175
- * Cached after the first successful call; falls back to 1920x1080.
176
- */
177
- export function getScreenDimensions(): { width: number; height: number } {
178
- if (cachedScreenDims) return cachedScreenDims;
179
- if (process.platform !== "darwin") return FALLBACK_SCREEN;
180
- try {
181
- // Use osascript (JXA) instead of `swift` to avoid the
182
- // "Install Command Line Developer Tools" popup on fresh macOS installs.
183
- const out = execSync(
184
- `osascript -l JavaScript -e 'ObjC.import("AppKit"); var f = $.NSScreen.mainScreen.frame; Math.round(f.size.width) + "x" + Math.round(f.size.height)'`,
185
- { timeout: 10_000, encoding: "utf-8" },
186
- ).trim();
187
- const [w, h] = out.split("x").map(Number);
188
- if (w > 0 && h > 0) {
189
- cachedScreenDims = { width: w, height: h };
190
- return cachedScreenDims;
191
- }
192
- } catch (err) {
193
- log.debug({ err }, "Failed to query screen dimensions, using fallback");
194
- }
195
- return FALLBACK_SCREEN;
196
- }
197
-
198
- /**
199
- * Wire the escalation handler on a text_qa session so that invoking
200
- * `computer_use_request_control` creates a CU session and notifies the client.
201
- *
202
- * In the HTTP-only world, the escalation handler broadcasts events via
203
- * `ctx.broadcast` instead of targeting a specific socket.
204
- */
205
- export function wireEscalationHandler(
206
- session: Session,
207
- ctx: HandlerContext,
208
- explicitWidth?: number,
209
- explicitHeight?: number,
210
- ): void {
211
- const dims =
212
- explicitWidth && explicitHeight
213
- ? { width: explicitWidth, height: explicitHeight }
214
- : getScreenDimensions();
215
- const screenWidth = dims.width;
216
- const screenHeight = dims.height;
217
- session.setEscalationHandler(
218
- (task: string, sourceSessionId: string): boolean => {
219
- const cuSessionId = uuid();
220
-
221
- // Inline CU session creation (previously delegated to deleted handlers/computer-use.ts)
222
- const existingSession = ctx.cuSessions.get(cuSessionId);
223
- if (existingSession) {
224
- existingSession.abort();
225
- ctx.cuSessions.delete(cuSessionId);
226
- ctx.cuObservationParseSequence.delete(cuSessionId);
227
- }
228
-
229
- const config = getConfig();
230
- let provider = getFailoverProvider(config.provider, config.providerOrder);
231
- const { rateLimit } = config;
232
- if (
233
- rateLimit.maxRequestsPerMinute > 0 ||
234
- rateLimit.maxTokensPerSession > 0
235
- ) {
236
- provider = new RateLimitProvider(
237
- provider,
238
- rateLimit,
239
- ctx.sharedRequestTimestamps,
240
- );
241
- }
242
-
243
- const sendToClient = (serverMsg: ServerMessage) => {
244
- ctx.send(serverMsg);
245
- };
246
-
247
- const sessionRef: { current?: ComputerUseSession } = {};
248
- const onTerminal = (sid: string) => {
249
- const current = ctx.cuSessions.get(sid);
250
- if (sessionRef.current && current && current !== sessionRef.current) {
251
- return;
252
- }
253
- ctx.cuSessions.delete(sid);
254
- ctx.cuObservationParseSequence.delete(sid);
255
- log.info(
256
- { sessionId: sid },
257
- "Computer-use session cleaned up after terminal state",
258
- );
259
- };
260
-
261
- const cuSession = new ComputerUseSession(
262
- cuSessionId,
263
- task,
264
- screenWidth,
265
- screenHeight,
266
- provider,
267
- sendToClient,
268
- "computer_use",
269
- onTerminal,
270
- );
271
- sessionRef.current = cuSession;
272
-
273
- ctx.cuSessions.set(cuSessionId, cuSession);
274
-
275
- log.info(
276
- { sessionId: cuSessionId, taskLength: task.length },
277
- "Computer-use session created via escalation",
278
- );
279
-
280
- ctx.broadcast({
281
- type: "task_routed",
282
- sessionId: cuSessionId,
283
- interactionType: "computer_use",
284
- task,
285
- escalatedFrom: sourceSessionId,
286
- });
287
-
288
- return true;
289
- },
290
- );
291
- }
292
-
293
163
  export function isRecord(value: unknown): value is Record<string, unknown> {
294
164
  return typeof value === "object" && value != null;
295
165
  }
@@ -0,0 +1,401 @@
1
+ /**
2
+ * Host computer-use proxy.
3
+ *
4
+ * Proxies computer-use actions to the desktop client when running as a
5
+ * managed assistant, following the same request/resolve pattern as
6
+ * HostBashProxy. Also owns CU-specific state tracking (step counting,
7
+ * loop detection, observation formatting) for the unified agent loop.
8
+ */
9
+
10
+ import { v4 as uuid } from "uuid";
11
+
12
+ import type { ContentBlock } from "../providers/types.js";
13
+ import type { ToolExecutionResult } from "../tools/types.js";
14
+ import { AssistantError, ErrorCode } from "../util/errors.js";
15
+ import { getLogger } from "../util/logger.js";
16
+ import type { ServerMessage } from "./message-protocol.js";
17
+
18
+ const log = getLogger("host-cu-proxy");
19
+
20
+ // ---------------------------------------------------------------------------
21
+ // Constants
22
+ // ---------------------------------------------------------------------------
23
+
24
+ const REQUEST_TIMEOUT_SEC = 60;
25
+ const MAX_STEPS = 50;
26
+ const MAX_HISTORY_ENTRIES = 10;
27
+ const LOOP_DETECTION_WINDOW = 3;
28
+ const CONSECUTIVE_UNCHANGED_WARNING_THRESHOLD = 2;
29
+
30
+ // ---------------------------------------------------------------------------
31
+ // Types
32
+ // ---------------------------------------------------------------------------
33
+
34
+ export interface CuObservationResult {
35
+ axTree?: string;
36
+ axDiff?: string;
37
+ secondaryWindows?: string;
38
+ screenshot?: string; // base64 JPEG
39
+ screenshotWidthPx?: number;
40
+ screenshotHeightPx?: number;
41
+ screenWidthPt?: number;
42
+ screenHeightPt?: number;
43
+ executionResult?: string;
44
+ executionError?: string;
45
+ userGuidance?: string;
46
+ }
47
+
48
+ export interface ActionRecord {
49
+ step: number;
50
+ toolName: string;
51
+ input: Record<string, unknown>;
52
+ reasoning?: string;
53
+ }
54
+
55
+ interface PendingRequest {
56
+ resolve: (result: ToolExecutionResult) => void;
57
+ reject: (err: Error) => void;
58
+ timer: ReturnType<typeof setTimeout>;
59
+ }
60
+
61
+ // ---------------------------------------------------------------------------
62
+ // HostCuProxy
63
+ // ---------------------------------------------------------------------------
64
+
65
+ export class HostCuProxy {
66
+ private pending = new Map<string, PendingRequest>();
67
+ private sendToClient: (msg: ServerMessage) => void;
68
+ private clientConnected = false;
69
+
70
+ // CU state tracking (per-conversation)
71
+ private _stepCount = 0;
72
+ private _maxSteps: number;
73
+ private _previousAXTree: string | undefined;
74
+ private _consecutiveUnchangedSteps = 0;
75
+ private _actionHistory: ActionRecord[] = [];
76
+
77
+ constructor(
78
+ sendToClient: (msg: ServerMessage) => void,
79
+ maxSteps = MAX_STEPS,
80
+ ) {
81
+ this.sendToClient = sendToClient;
82
+ this._maxSteps = maxSteps;
83
+ }
84
+
85
+ // ---------------------------------------------------------------------------
86
+ // CU state accessors (for testing / external inspection)
87
+ // ---------------------------------------------------------------------------
88
+
89
+ get stepCount(): number {
90
+ return this._stepCount;
91
+ }
92
+
93
+ get maxSteps(): number {
94
+ return this._maxSteps;
95
+ }
96
+
97
+ get previousAXTree(): string | undefined {
98
+ return this._previousAXTree;
99
+ }
100
+
101
+ get consecutiveUnchangedSteps(): number {
102
+ return this._consecutiveUnchangedSteps;
103
+ }
104
+
105
+ get actionHistory(): readonly ActionRecord[] {
106
+ return this._actionHistory;
107
+ }
108
+
109
+ // ---------------------------------------------------------------------------
110
+ // Sender management
111
+ // ---------------------------------------------------------------------------
112
+
113
+ updateSender(
114
+ sendToClient: (msg: ServerMessage) => void,
115
+ clientConnected: boolean,
116
+ ): void {
117
+ this.sendToClient = sendToClient;
118
+ this.clientConnected = clientConnected;
119
+ }
120
+
121
+ // ---------------------------------------------------------------------------
122
+ // Request / resolve lifecycle
123
+ // ---------------------------------------------------------------------------
124
+
125
+ request(
126
+ toolName: string,
127
+ input: Record<string, unknown>,
128
+ sessionId: string,
129
+ stepNumber: number,
130
+ reasoning?: string,
131
+ signal?: AbortSignal,
132
+ ): Promise<ToolExecutionResult> {
133
+ if (signal?.aborted) {
134
+ return Promise.resolve({
135
+ content: "Aborted",
136
+ isError: true,
137
+ });
138
+ }
139
+
140
+ // Enforce step limit before sending to client
141
+ if (this._stepCount > this._maxSteps) {
142
+ return Promise.resolve({
143
+ content: `Step limit (${this._maxSteps}) exceeded. Call computer_use_done to finish.`,
144
+ isError: true,
145
+ });
146
+ }
147
+
148
+ const requestId = uuid();
149
+
150
+ return new Promise<ToolExecutionResult>((resolve, reject) => {
151
+ const timer = setTimeout(() => {
152
+ this.pending.delete(requestId);
153
+ log.warn({ requestId, toolName }, "Host CU proxy request timed out");
154
+ resolve({
155
+ content: "Host CU proxy timed out waiting for client response",
156
+ isError: true,
157
+ });
158
+ }, REQUEST_TIMEOUT_SEC * 1000);
159
+
160
+ this.pending.set(requestId, { resolve, reject, timer });
161
+
162
+ if (signal) {
163
+ const onAbort = () => {
164
+ if (this.pending.has(requestId)) {
165
+ clearTimeout(timer);
166
+ this.pending.delete(requestId);
167
+ resolve({ content: "Aborted", isError: true });
168
+ }
169
+ };
170
+ signal.addEventListener("abort", onAbort, { once: true });
171
+ }
172
+
173
+ this.sendToClient({
174
+ type: "host_cu_request",
175
+ requestId,
176
+ sessionId,
177
+ toolName,
178
+ input,
179
+ stepNumber,
180
+ reasoning,
181
+ } as ServerMessage);
182
+ });
183
+ }
184
+
185
+ resolve(requestId: string, observation: CuObservationResult): void {
186
+ const entry = this.pending.get(requestId);
187
+ if (!entry) {
188
+ log.warn({ requestId }, "No pending host CU request for response");
189
+ return;
190
+ }
191
+ clearTimeout(entry.timer);
192
+ this.pending.delete(requestId);
193
+
194
+ // Update CU state from observation
195
+ this.updateStateFromObservation(observation);
196
+
197
+ const result = this.formatObservation(observation);
198
+ entry.resolve(result);
199
+ }
200
+
201
+ hasPendingRequest(requestId: string): boolean {
202
+ return this.pending.has(requestId);
203
+ }
204
+
205
+ // ---------------------------------------------------------------------------
206
+ // CU state management
207
+ // ---------------------------------------------------------------------------
208
+
209
+ /**
210
+ * Increment step count and record an action. Call this before sending
211
+ * each non-terminal tool request.
212
+ */
213
+ recordAction(
214
+ toolName: string,
215
+ input: Record<string, unknown>,
216
+ reasoning?: string,
217
+ ): void {
218
+ this._stepCount++;
219
+ this._actionHistory.push({
220
+ step: this._stepCount,
221
+ toolName,
222
+ input,
223
+ reasoning,
224
+ });
225
+ // Keep history bounded
226
+ if (this._actionHistory.length > MAX_HISTORY_ENTRIES) {
227
+ this._actionHistory = this._actionHistory.slice(-MAX_HISTORY_ENTRIES);
228
+ }
229
+ }
230
+
231
+ /** Reset all CU state. Called on terminal tools (computer_use_done, etc.). */
232
+ reset(): void {
233
+ this._stepCount = 0;
234
+ this._previousAXTree = undefined;
235
+ this._consecutiveUnchangedSteps = 0;
236
+ this._actionHistory = [];
237
+ }
238
+
239
+ // ---------------------------------------------------------------------------
240
+ // Observation formatting
241
+ // ---------------------------------------------------------------------------
242
+
243
+ /**
244
+ * Formats a CU observation into a ToolExecutionResult with text content
245
+ * (AX tree wrapped in markers, diff, warnings) and optional screenshot
246
+ * as an image content block.
247
+ */
248
+ formatObservation(obs: CuObservationResult): ToolExecutionResult {
249
+ const parts: string[] = [];
250
+
251
+ // Surface user guidance prominently so the model sees it first
252
+ if (obs.userGuidance) {
253
+ parts.push(`USER GUIDANCE: ${obs.userGuidance}`);
254
+ parts.push("");
255
+ }
256
+
257
+ if (obs.executionResult) {
258
+ parts.push(obs.executionResult);
259
+ parts.push("");
260
+ }
261
+
262
+ // AX tree diff / unchanged warning
263
+ if (obs.axDiff) {
264
+ parts.push(obs.axDiff);
265
+ parts.push("");
266
+ } else if (this._previousAXTree != null && obs.axTree != null) {
267
+ // No diff means the screen didn't change
268
+ if (
269
+ this._consecutiveUnchangedSteps >=
270
+ CONSECUTIVE_UNCHANGED_WARNING_THRESHOLD
271
+ ) {
272
+ parts.push(
273
+ `WARNING: ${this._consecutiveUnchangedSteps} consecutive actions had NO VISIBLE EFFECT on the UI. You MUST try a completely different approach.`,
274
+ );
275
+ } else {
276
+ parts.push(
277
+ "Your last action had NO VISIBLE EFFECT on the UI. Try something different.",
278
+ );
279
+ }
280
+ parts.push("");
281
+ }
282
+
283
+ // Loop detection: identical actions repeated
284
+ if (this._actionHistory.length >= LOOP_DETECTION_WINDOW) {
285
+ const recent = this._actionHistory.slice(-LOOP_DETECTION_WINDOW);
286
+ const allIdentical = recent.every(
287
+ (r) =>
288
+ r.toolName === recent[0].toolName &&
289
+ JSON.stringify(r.input) === JSON.stringify(recent[0].input),
290
+ );
291
+ if (allIdentical) {
292
+ parts.push(
293
+ `WARNING: You've repeated the same action (${recent[0].toolName}) ${LOOP_DETECTION_WINDOW} times. Try something different.`,
294
+ );
295
+ parts.push("");
296
+ }
297
+ }
298
+
299
+ // Current screen state wrapped in markers for history compaction
300
+ if (obs.axTree) {
301
+ parts.push("<ax-tree>");
302
+ parts.push("CURRENT SCREEN STATE:");
303
+ parts.push(HostCuProxy.escapeAxTreeContent(obs.axTree));
304
+ parts.push("</ax-tree>");
305
+ }
306
+
307
+ // Screenshot metadata
308
+ const screenshotMeta = this.formatScreenshotMetadata(obs);
309
+ if (screenshotMeta.length > 0) {
310
+ parts.push("");
311
+ parts.push(...screenshotMeta);
312
+ }
313
+
314
+ const content = parts.join("\n").trim() || "Action executed";
315
+
316
+ // Build content blocks for screenshot
317
+ const contentBlocks: ContentBlock[] = [];
318
+ if (obs.screenshot) {
319
+ contentBlocks.push({
320
+ type: "image",
321
+ source: {
322
+ type: "base64",
323
+ media_type: "image/jpeg",
324
+ data: obs.screenshot,
325
+ },
326
+ });
327
+ }
328
+
329
+ const isError = obs.executionError != null;
330
+
331
+ return {
332
+ content: isError
333
+ ? `Action failed: ${obs.executionError}\n\n${content}`
334
+ : content,
335
+ isError,
336
+ ...(contentBlocks.length > 0 ? { contentBlocks } : {}),
337
+ };
338
+ }
339
+
340
+ // ---------------------------------------------------------------------------
341
+ // Dispose
342
+ // ---------------------------------------------------------------------------
343
+
344
+ dispose(): void {
345
+ for (const [_requestId, entry] of this.pending) {
346
+ clearTimeout(entry.timer);
347
+ entry.reject(
348
+ new AssistantError("Host CU proxy disposed", ErrorCode.INTERNAL_ERROR),
349
+ );
350
+ }
351
+ this.pending.clear();
352
+ }
353
+
354
+ // ---------------------------------------------------------------------------
355
+ // Private helpers
356
+ // ---------------------------------------------------------------------------
357
+
358
+ /** Update consecutive-unchanged tracking from an incoming observation. */
359
+ private updateStateFromObservation(obs: CuObservationResult): void {
360
+ if (this._stepCount > 0) {
361
+ if (
362
+ obs.axDiff == null &&
363
+ this._previousAXTree != null &&
364
+ obs.axTree != null
365
+ ) {
366
+ this._consecutiveUnchangedSteps++;
367
+ } else if (obs.axDiff != null) {
368
+ this._consecutiveUnchangedSteps = 0;
369
+ }
370
+ }
371
+
372
+ if (obs.axTree != null) {
373
+ this._previousAXTree = obs.axTree;
374
+ }
375
+ }
376
+
377
+ private formatScreenshotMetadata(obs: CuObservationResult): string[] {
378
+ if (!obs.screenshot) return [];
379
+
380
+ const lines: string[] = [];
381
+ if (obs.screenshotWidthPx != null && obs.screenshotHeightPx != null) {
382
+ lines.push(
383
+ `Screenshot metadata: ${obs.screenshotWidthPx}x${obs.screenshotHeightPx} px`,
384
+ );
385
+ }
386
+ if (obs.screenWidthPt != null && obs.screenHeightPt != null) {
387
+ lines.push(
388
+ `Screen metadata: ${obs.screenWidthPt}x${obs.screenHeightPt} pt`,
389
+ );
390
+ }
391
+ return lines;
392
+ }
393
+
394
+ /**
395
+ * Escapes literal `</ax-tree>` inside AX tree content so compaction
396
+ * regex does not stop prematurely.
397
+ */
398
+ static escapeAxTreeContent(content: string): string {
399
+ return content.replace(/<\/ax-tree>/gi, "&lt;/ax-tree&gt;");
400
+ }
401
+ }