@vellumai/assistant 0.4.3 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/.env.example +3 -0
  2. package/ARCHITECTURE.md +40 -3
  3. package/README.md +43 -35
  4. package/package.json +1 -1
  5. package/scripts/ipc/generate-swift.ts +1 -0
  6. package/src/__tests__/__snapshots__/ipc-snapshot.test.ts.snap +58 -120
  7. package/src/__tests__/actor-token-service.test.ts +1099 -0
  8. package/src/__tests__/agent-loop.test.ts +51 -0
  9. package/src/__tests__/approval-routes-http.test.ts +2 -0
  10. package/src/__tests__/assistant-events-sse-hardening.test.ts +7 -5
  11. package/src/__tests__/assistant-id-boundary-guard.test.ts +125 -0
  12. package/src/__tests__/call-controller.test.ts +49 -0
  13. package/src/__tests__/call-pointer-message-composer.test.ts +171 -0
  14. package/src/__tests__/call-pointer-messages.test.ts +93 -3
  15. package/src/__tests__/call-pointer-no-hardcoded-copy.guard.test.ts +42 -0
  16. package/src/__tests__/callback-handoff-copy.test.ts +186 -0
  17. package/src/__tests__/channel-approval-routes.test.ts +133 -12
  18. package/src/__tests__/channel-guardian.test.ts +0 -87
  19. package/src/__tests__/channel-readiness-service.test.ts +10 -16
  20. package/src/__tests__/checker.test.ts +33 -12
  21. package/src/__tests__/config-schema.test.ts +4 -0
  22. package/src/__tests__/confirmation-request-guardian-bridge.test.ts +410 -0
  23. package/src/__tests__/conversation-routes-guardian-reply.test.ts +256 -0
  24. package/src/__tests__/conversation-routes.test.ts +12 -3
  25. package/src/__tests__/credential-security-invariants.test.ts +1 -1
  26. package/src/__tests__/daemon-server-session-init.test.ts +4 -0
  27. package/src/__tests__/guardian-actions-endpoint.test.ts +19 -14
  28. package/src/__tests__/guardian-dispatch.test.ts +8 -0
  29. package/src/__tests__/guardian-outbound-http.test.ts +4 -4
  30. package/src/__tests__/guardian-question-mode.test.ts +200 -0
  31. package/src/__tests__/guardian-routing-invariants.test.ts +178 -0
  32. package/src/__tests__/guardian-routing-state.test.ts +525 -0
  33. package/src/__tests__/handle-user-message-secret-resume.test.ts +2 -0
  34. package/src/__tests__/handlers-telegram-config.test.ts +0 -83
  35. package/src/__tests__/handlers-user-message-approval-consumption.test.ts +55 -0
  36. package/src/__tests__/headless-browser-navigate.test.ts +2 -0
  37. package/src/__tests__/ipc-snapshot.test.ts +18 -51
  38. package/src/__tests__/non-member-access-request.test.ts +131 -8
  39. package/src/__tests__/notification-decision-fallback.test.ts +129 -4
  40. package/src/__tests__/notification-decision-strategy.test.ts +62 -2
  41. package/src/__tests__/notification-guardian-path.test.ts +3 -0
  42. package/src/__tests__/recording-intent-handler.test.ts +1 -0
  43. package/src/__tests__/relay-server.test.ts +841 -39
  44. package/src/__tests__/send-endpoint-busy.test.ts +5 -0
  45. package/src/__tests__/session-agent-loop.test.ts +1 -0
  46. package/src/__tests__/session-confirmation-signals.test.ts +523 -0
  47. package/src/__tests__/session-init.benchmark.test.ts +0 -1
  48. package/src/__tests__/session-surfaces-task-progress.test.ts +1 -1
  49. package/src/__tests__/session-tool-setup-app-refresh.test.ts +81 -2
  50. package/src/__tests__/session-tool-setup-memory-scope.test.ts +1 -1
  51. package/src/__tests__/session-tool-setup-side-effect-flag.test.ts +1 -1
  52. package/src/__tests__/tool-executor.test.ts +21 -2
  53. package/src/__tests__/tool-grant-request-escalation.test.ts +333 -27
  54. package/src/__tests__/trusted-contact-approval-notifier.test.ts +678 -0
  55. package/src/__tests__/trusted-contact-inline-approval-integration.test.ts +1064 -0
  56. package/src/__tests__/twilio-config.test.ts +2 -13
  57. package/src/agent/loop.ts +1 -1
  58. package/src/approvals/guardian-decision-primitive.ts +10 -2
  59. package/src/approvals/guardian-request-resolvers.ts +128 -9
  60. package/src/calls/call-constants.ts +21 -0
  61. package/src/calls/call-controller.ts +9 -2
  62. package/src/calls/call-domain.ts +28 -7
  63. package/src/calls/call-pointer-message-composer.ts +154 -0
  64. package/src/calls/call-pointer-messages.ts +106 -27
  65. package/src/calls/guardian-dispatch.ts +4 -2
  66. package/src/calls/relay-server.ts +424 -12
  67. package/src/calls/twilio-config.ts +4 -11
  68. package/src/calls/twilio-routes.ts +1 -1
  69. package/src/calls/types.ts +3 -1
  70. package/src/cli.ts +5 -4
  71. package/src/config/bundled-skills/agentmail/SKILL.md +4 -0
  72. package/src/config/bundled-skills/app-builder/SKILL.md +146 -10
  73. package/src/config/bundled-skills/app-builder/TOOLS.json +1 -1
  74. package/src/config/bundled-skills/email-setup/SKILL.md +1 -1
  75. package/src/config/bundled-skills/google-oauth-setup/SKILL.md +105 -81
  76. package/src/config/bundled-skills/messaging/SKILL.md +61 -12
  77. package/src/config/bundled-skills/messaging/TOOLS.json +58 -0
  78. package/src/config/bundled-skills/messaging/tools/gmail-sender-digest.ts +6 -1
  79. package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +35 -0
  80. package/src/config/bundled-skills/messaging/tools/messaging-sender-digest.ts +52 -0
  81. package/src/config/bundled-skills/phone-calls/SKILL.md +30 -39
  82. package/src/config/bundled-skills/twitter/SKILL.md +3 -3
  83. package/src/config/bundled-skills/vercel-token-setup/SKILL.md +1 -0
  84. package/src/config/calls-schema.ts +24 -0
  85. package/src/config/env.ts +22 -0
  86. package/src/config/feature-flag-registry.json +8 -0
  87. package/src/config/schema.ts +2 -2
  88. package/src/config/skills.ts +11 -0
  89. package/src/config/system-prompt.ts +11 -1
  90. package/src/config/templates/SOUL.md +2 -0
  91. package/src/config/vellum-skills/sms-setup/SKILL.md +71 -82
  92. package/src/config/vellum-skills/trusted-contacts/SKILL.md +10 -9
  93. package/src/config/vellum-skills/twilio-setup/SKILL.md +88 -73
  94. package/src/daemon/call-pointer-generators.ts +59 -0
  95. package/src/daemon/computer-use-session.ts +2 -5
  96. package/src/daemon/handlers/apps.ts +76 -20
  97. package/src/daemon/handlers/config-channels.ts +5 -55
  98. package/src/daemon/handlers/config-inbox.ts +9 -3
  99. package/src/daemon/handlers/config-ingress.ts +28 -3
  100. package/src/daemon/handlers/config-telegram.ts +12 -0
  101. package/src/daemon/handlers/config.ts +2 -6
  102. package/src/daemon/handlers/pairing.ts +2 -0
  103. package/src/daemon/handlers/sessions.ts +48 -3
  104. package/src/daemon/handlers/shared.ts +17 -2
  105. package/src/daemon/ipc-contract/integrations.ts +1 -99
  106. package/src/daemon/ipc-contract/messages.ts +47 -1
  107. package/src/daemon/ipc-contract/notifications.ts +11 -0
  108. package/src/daemon/ipc-contract-inventory.json +2 -4
  109. package/src/daemon/lifecycle.ts +17 -0
  110. package/src/daemon/server.ts +14 -1
  111. package/src/daemon/session-agent-loop-handlers.ts +20 -0
  112. package/src/daemon/session-agent-loop.ts +22 -11
  113. package/src/daemon/session-lifecycle.ts +1 -1
  114. package/src/daemon/session-process.ts +11 -1
  115. package/src/daemon/session-runtime-assembly.ts +3 -0
  116. package/src/daemon/session-surfaces.ts +3 -2
  117. package/src/daemon/session.ts +88 -1
  118. package/src/daemon/tool-side-effects.ts +22 -0
  119. package/src/home-base/prebuilt/brain-graph.html +1483 -0
  120. package/src/home-base/prebuilt/index.html +40 -0
  121. package/src/inbound/platform-callback-registration.ts +157 -0
  122. package/src/memory/canonical-guardian-store.ts +1 -1
  123. package/src/memory/db-init.ts +4 -0
  124. package/src/memory/migrations/038-actor-token-records.ts +39 -0
  125. package/src/memory/migrations/index.ts +1 -0
  126. package/src/memory/schema.ts +16 -0
  127. package/src/messaging/provider-types.ts +24 -0
  128. package/src/messaging/provider.ts +7 -0
  129. package/src/messaging/providers/gmail/adapter.ts +127 -0
  130. package/src/messaging/providers/sms/adapter.ts +40 -37
  131. package/src/notifications/adapters/macos.ts +45 -2
  132. package/src/notifications/broadcaster.ts +16 -0
  133. package/src/notifications/copy-composer.ts +39 -1
  134. package/src/notifications/decision-engine.ts +22 -9
  135. package/src/notifications/destination-resolver.ts +16 -2
  136. package/src/notifications/emit-signal.ts +16 -8
  137. package/src/notifications/guardian-question-mode.ts +419 -0
  138. package/src/notifications/signal.ts +14 -3
  139. package/src/permissions/checker.ts +13 -1
  140. package/src/permissions/prompter.ts +14 -0
  141. package/src/providers/anthropic/client.ts +20 -0
  142. package/src/providers/provider-send-message.ts +15 -3
  143. package/src/runtime/access-request-helper.ts +71 -1
  144. package/src/runtime/actor-token-service.ts +234 -0
  145. package/src/runtime/actor-token-store.ts +236 -0
  146. package/src/runtime/channel-approvals.ts +5 -3
  147. package/src/runtime/channel-readiness-service.ts +23 -64
  148. package/src/runtime/channel-readiness-types.ts +3 -4
  149. package/src/runtime/channel-retry-sweep.ts +4 -1
  150. package/src/runtime/confirmation-request-guardian-bridge.ts +197 -0
  151. package/src/runtime/guardian-action-followup-executor.ts +1 -1
  152. package/src/runtime/guardian-context-resolver.ts +82 -0
  153. package/src/runtime/guardian-outbound-actions.ts +0 -3
  154. package/src/runtime/guardian-reply-router.ts +67 -30
  155. package/src/runtime/guardian-vellum-migration.ts +57 -0
  156. package/src/runtime/http-server.ts +65 -12
  157. package/src/runtime/http-types.ts +13 -0
  158. package/src/runtime/invite-redemption-service.ts +8 -0
  159. package/src/runtime/local-actor-identity.ts +76 -0
  160. package/src/runtime/middleware/actor-token.ts +271 -0
  161. package/src/runtime/routes/approval-routes.ts +82 -7
  162. package/src/runtime/routes/brain-graph-routes.ts +222 -0
  163. package/src/runtime/routes/channel-readiness-routes.ts +71 -0
  164. package/src/runtime/routes/conversation-routes.ts +140 -52
  165. package/src/runtime/routes/events-routes.ts +20 -5
  166. package/src/runtime/routes/guardian-action-routes.ts +45 -3
  167. package/src/runtime/routes/guardian-approval-interception.ts +29 -0
  168. package/src/runtime/routes/guardian-bootstrap-routes.ts +145 -0
  169. package/src/runtime/routes/inbound-message-handler.ts +143 -2
  170. package/src/runtime/routes/integration-routes.ts +7 -15
  171. package/src/runtime/routes/pairing-routes.ts +163 -0
  172. package/src/runtime/routes/twilio-routes.ts +934 -0
  173. package/src/runtime/tool-grant-request-helper.ts +3 -1
  174. package/src/security/oauth2.ts +27 -2
  175. package/src/security/token-manager.ts +46 -10
  176. package/src/tools/browser/browser-execution.ts +4 -3
  177. package/src/tools/browser/browser-handoff.ts +10 -18
  178. package/src/tools/browser/browser-manager.ts +80 -25
  179. package/src/tools/browser/browser-screencast.ts +35 -119
  180. package/src/tools/permission-checker.ts +15 -4
  181. package/src/tools/tool-approval-handler.ts +242 -18
  182. package/src/__tests__/handlers-twilio-config.test.ts +0 -1928
  183. package/src/daemon/handlers/config-twilio.ts +0 -1082
@@ -128,6 +128,7 @@ export function createOrReuseToolGrantRequest(
128
128
  questionText,
129
129
  expiresAt: new Date(Date.now() + GUARDIAN_APPROVAL_TTL_MS).toISOString(),
130
130
  });
131
+ const requestCode = canonicalRequest.requestCode ?? canonicalRequest.id.slice(0, 6).toUpperCase();
131
132
 
132
133
  // Emit notification so guardian is alerted. Uses 'guardian.question' as
133
134
  // sourceEventName so that existing request-code guidance in the notification
@@ -145,7 +146,8 @@ export function createOrReuseToolGrantRequest(
145
146
  },
146
147
  contextPayload: {
147
148
  requestId: canonicalRequest.id,
148
- requestCode: canonicalRequest.requestCode,
149
+ requestKind: 'tool_grant_request',
150
+ requestCode,
149
151
  sourceChannel,
150
152
  requesterExternalUserId,
151
153
  requesterChatId: requesterChatId ?? null,
@@ -179,12 +179,19 @@ async function runGatewayFlow(
179
179
  codeChallenge: string,
180
180
  state: string,
181
181
  ): Promise<OAuth2FlowResult> {
182
+ // Dynamic imports required here to avoid circular dependencies with
183
+ // config/loader → security → oauth2 module chains.
182
184
  const { loadConfig } = await import('../config/loader.js');
183
185
  const { getOAuthCallbackUrl } = await import('../inbound/public-ingress-urls.js');
186
+ const { resolveCallbackUrl } = await import('../inbound/platform-callback-registration.js');
184
187
  const { registerPendingCallback } = await import('./oauth-callback-registry.js');
185
188
 
186
189
  const appConfig = loadConfig();
187
- const redirectUri = getOAuthCallbackUrl(appConfig);
190
+ const redirectUri = await resolveCallbackUrl(
191
+ () => getOAuthCallbackUrl(appConfig),
192
+ 'webhooks/oauth/callback',
193
+ 'oauth',
194
+ );
188
195
 
189
196
  const codePromise = new Promise<string>((resolve, reject) => {
190
197
  registerPendingCallback(state, resolve, reject);
@@ -385,12 +392,19 @@ export async function prepareOAuth2Flow(
385
392
  return prepareLoopbackFlow(config, options?.loopbackPort);
386
393
  }
387
394
 
395
+ // Dynamic imports required here to avoid circular dependencies with
396
+ // config/loader → security → oauth2 module chains.
388
397
  const { loadConfig } = await import('../config/loader.js');
389
398
  const { getOAuthCallbackUrl } = await import('../inbound/public-ingress-urls.js');
399
+ const { resolveCallbackUrl } = await import('../inbound/platform-callback-registration.js');
390
400
  const { registerPendingCallback } = await import('./oauth-callback-registry.js');
391
401
 
392
402
  const appConfig = loadConfig();
393
- const redirectUri = getOAuthCallbackUrl(appConfig);
403
+ const redirectUri = await resolveCallbackUrl(
404
+ () => getOAuthCallbackUrl(appConfig),
405
+ 'webhooks/oauth/callback',
406
+ 'oauth',
407
+ );
394
408
 
395
409
  const codeVerifier = generateCodeVerifier();
396
410
  const codeChallenge = generateCodeChallenge(codeVerifier);
@@ -590,6 +604,8 @@ export async function startOAuth2Flow(
590
604
 
591
605
  let hasPublicUrl = false;
592
606
  try {
607
+ // Dynamic imports required here to avoid circular dependencies with
608
+ // config/loader → security → oauth2 module chains.
593
609
  const { loadConfig } = await import('../config/loader.js');
594
610
  const { getPublicBaseUrl } = await import('../inbound/public-ingress-urls.js');
595
611
  getPublicBaseUrl(loadConfig());
@@ -598,6 +614,15 @@ export async function startOAuth2Flow(
598
614
  // No public URL configured
599
615
  }
600
616
 
617
+ // When containerized with a platform, callback routes are registered
618
+ // through the platform gateway — treat as having a public URL.
619
+ if (!hasPublicUrl) {
620
+ const { shouldUsePlatformCallbacks } = await import('../inbound/platform-callback-registration.js');
621
+ if (shouldUsePlatformCallbacks()) {
622
+ hasPublicUrl = true;
623
+ }
624
+ }
625
+
601
626
  // Determine transport: explicit option > auto-detect from config
602
627
  const transport = options?.callbackTransport
603
628
  ?? (hasPublicUrl ? 'gateway' : 'loopback');
@@ -13,6 +13,16 @@ import { getSecureKey, setSecureKey } from './secure-keys.js';
13
13
 
14
14
  const log = getLogger('token-manager');
15
15
 
16
+ const MESSAGING_SERVICES = new Set(['integration:gmail', 'integration:slack']);
17
+
18
+ function recoveryHint(service: string): string {
19
+ const shortName = service.startsWith('integration:') ? service.slice('integration:'.length) : service;
20
+ if (MESSAGING_SERVICES.has(service)) {
21
+ return ` Reconnect ${shortName} — follow the Error Recovery steps in the messaging skill. Do not present options or explain the error to the user.`;
22
+ }
23
+ return ` Re-authorization required for ${shortName}. Do not present options or explain the error to the user.`;
24
+ }
25
+
16
26
  /** Buffer before expiry to trigger proactive refresh (5 minutes). */
17
27
  const EXPIRY_BUFFER_MS = 5 * 60 * 1000;
18
28
 
@@ -113,7 +123,7 @@ function isTokenExpired(service: string): boolean {
113
123
  async function doRefresh(service: string): Promise<string> {
114
124
  const refreshToken = getSecureKey(`credential:${service}:refresh_token`);
115
125
  if (!refreshToken) {
116
- throw new TokenExpiredError(service, `No refresh token available for "${service}". Re-authorization required.`);
126
+ throw new TokenExpiredError(service, `No refresh token available for "${service}". Re-authorization required.${recoveryHint(service)}`);
117
127
  }
118
128
 
119
129
  const meta = getCredentialMetadata(service, 'access_token');
@@ -131,7 +141,7 @@ async function doRefresh(service: string): Promise<string> {
131
141
  : '';
132
142
  throw new TokenExpiredError(
133
143
  service,
134
- `Missing OAuth2 refresh config for "${service}".${hint} Please reconnect via chat to re-authorize.`,
144
+ `Missing OAuth2 refresh config for "${service}".${hint}${recoveryHint(service)}`,
135
145
  );
136
146
  }
137
147
 
@@ -145,7 +155,7 @@ async function doRefresh(service: string): Promise<string> {
145
155
  throw new TokenExpiredError(
146
156
  service,
147
157
  `Token refresh for "${service}" is temporarily suspended after ${state.consecutiveFailures} consecutive failures. ` +
148
- `Retrying in ${Math.ceil(remainingMs / 1000)}s. Please try again later or re-authorize.`,
158
+ `Retrying in ${Math.ceil(remainingMs / 1000)}s.${recoveryHint(service)}`,
149
159
  );
150
160
  }
151
161
 
@@ -156,16 +166,23 @@ async function doRefresh(service: string): Promise<string> {
156
166
  result = await refreshOAuth2Token(resolvedTokenUrl, clientId, refreshToken, clientSecret, authMethod);
157
167
  } catch (err) {
158
168
  recordRefreshFailure(service);
169
+ if (isCredentialError(err)) {
170
+ const msg = err instanceof Error ? err.message : String(err);
171
+ throw new TokenExpiredError(service, `Token refresh failed for "${service}": ${msg}.${recoveryHint(service)}`);
172
+ }
173
+ // Transient errors (network failures, 5xx) are re-thrown as-is so
174
+ // upstream retry/backoff logic can handle them without triggering
175
+ // unnecessary reauthorization flows.
159
176
  throw err;
160
177
  }
161
178
 
162
179
  if (!setSecureKey(`credential:${service}:access_token`, result.accessToken)) {
163
- throw new Error(`Failed to store refreshed access token for "${service}"`);
180
+ throw new TokenExpiredError(service, `Failed to store refreshed access token for "${service}".`);
164
181
  }
165
182
 
166
183
  if (result.refreshToken) {
167
184
  if (!setSecureKey(`credential:${service}:refresh_token`, result.refreshToken)) {
168
- throw new Error(`Failed to store refreshed refresh token for "${service}"`);
185
+ throw new TokenExpiredError(service, `Failed to store refreshed refresh token for "${service}".`);
169
186
  }
170
187
  }
171
188
 
@@ -197,11 +214,7 @@ export async function withValidToken<T>(
197
214
  ): Promise<T> {
198
215
  let token = getSecureKey(`credential:${service}:access_token`);
199
216
  if (!token) {
200
- const isGoogle = service === 'integration:gmail';
201
- const googleHint = isGoogle
202
- ? ' Do NOT fabricate credentials. Install and load the "google-oauth-setup" skill to set up OAuth credentials properly.'
203
- : '';
204
- throw new TokenExpiredError(service, `No access token found for "${service}". Authorization required.${googleHint}`);
217
+ throw new TokenExpiredError(service, `No access token found for "${service}". Authorization required.${recoveryHint(service)}`);
205
218
  }
206
219
 
207
220
  // Proactively refresh if expired or about to expire.
@@ -227,3 +240,26 @@ function is401Error(err: unknown): boolean {
227
240
  }
228
241
  return false;
229
242
  }
243
+
244
+ /**
245
+ * Distinguish credential-specific refresh failures (which need reauthorization)
246
+ * from transient errors (network timeouts, 5xx) that can be retried.
247
+ *
248
+ * refreshOAuth2Token() throws Error with messages like:
249
+ * "OAuth2 token refresh failed (HTTP 401: invalid_client)"
250
+ * "OAuth2 token refresh failed (HTTP 400: invalid_grant)"
251
+ * "OAuth2 token refresh failed (HTTP 500)"
252
+ *
253
+ * Credential errors: 400 with invalid_grant or invalid_client, 401, 403.
254
+ * Everything else (5xx, network errors, non-credential 400s) is transient.
255
+ */
256
+ function isCredentialError(err: unknown): boolean {
257
+ if (!(err instanceof Error)) return false;
258
+ const msg = err.message;
259
+ // 401/403 are always credential errors
260
+ if (/HTTP\s+40[13]\b/.test(msg)) return true;
261
+ // 400 with invalid_grant means the refresh token is revoked/expired;
262
+ // invalid_client means client credentials are bad/rotated
263
+ if (/HTTP\s+400\b/.test(msg) && /invalid_grant|invalid_client/.test(msg)) return true;
264
+ return false;
265
+ }
@@ -248,9 +248,10 @@ export async function executeBrowserNavigate(
248
248
  routeHandler = null;
249
249
  }
250
250
 
251
- // In CDP mode, keep the browser minimized unless a handoff is active.
252
- if (browserManager.browserMode === 'cdp' && !browserManager.isInteractive(context.sessionId)) {
253
- await browserManager.moveWindowOffscreen();
251
+ // Reposition the browser window after navigation so the user can watch.
252
+ // positionWindowSidebar() is a no-op when browserCdpSession is unavailable.
253
+ if (!browserManager.isInteractive(context.sessionId)) {
254
+ await browserManager.positionWindowSidebar();
254
255
  }
255
256
 
256
257
  if (blockedUrl) {
@@ -1,7 +1,7 @@
1
1
  import type { ServerMessage } from '../../daemon/ipc-contract.js';
2
2
  import { getLogger } from '../../util/logger.js';
3
3
  import { browserManager } from './browser-manager.js';
4
- import { getScreencastSurfaceId } from './browser-screencast.js';
4
+ import { isScreencastActive } from './browser-screencast.js';
5
5
 
6
6
  const log = getLogger('browser-handoff');
7
7
 
@@ -28,6 +28,8 @@ export async function startHandoff(
28
28
  log.info({ sessionId, reason: options.reason }, 'Starting handoff to user');
29
29
 
30
30
  // Bring Chrome to the front so the user can interact directly.
31
+ // The window is already sized/positioned in top-right via positionWindowSidebar(),
32
+ // so no repositioning needed.
31
33
  if (options.bringToFront) {
32
34
  try {
33
35
  const page = await browserManager.getOrCreateSessionPage(sessionId);
@@ -35,24 +37,19 @@ export async function startHandoff(
35
37
  } catch (err) {
36
38
  log.warn({ err, sessionId }, 'Failed to bring browser to front');
37
39
  }
38
- await browserManager.moveWindowOnscreen();
39
40
  }
40
41
 
41
- const surfaceId = getScreencastSurfaceId(sessionId);
42
- if (!surfaceId) {
43
- log.warn({ sessionId }, 'No active screencast surface for handoff');
44
- // Move window back offscreen if we brought it to front
45
- if (options.bringToFront) {
46
- await browserManager.moveWindowOffscreen();
47
- }
42
+ if (!isScreencastActive(sessionId)) {
43
+ log.warn({ sessionId }, 'No active browser session for handoff');
48
44
  return;
49
45
  }
50
46
 
51
- // Send interactive mode change with reason and message
47
+ // Send interactive mode change with reason and message.
48
+ // surfaceId uses sessionId as a stable identifier since PiP surfaces are removed.
52
49
  sendToClient({
53
50
  type: 'browser_interactive_mode_changed',
54
51
  sessionId,
55
- surfaceId,
52
+ surfaceId: sessionId,
56
53
  enabled: true,
57
54
  reason: options.reason,
58
55
  message: options.message,
@@ -60,18 +57,13 @@ export async function startHandoff(
60
57
 
61
58
  browserManager.setInteractiveMode(sessionId, true);
62
59
 
63
- // Wait for user to hand back control (5 min timeout)
60
+ // Wait for user to hand back control (5 min timeout, or auto-detect URL change)
64
61
  await browserManager.waitForHandoffComplete(sessionId);
65
62
 
66
- // Move Chrome back offscreen after handoff.
67
- if (options.bringToFront) {
68
- await browserManager.moveWindowOffscreen();
69
- }
70
-
71
63
  sendToClient({
72
64
  type: 'browser_interactive_mode_changed',
73
65
  sessionId,
74
- surfaceId,
66
+ surfaceId: sessionId,
75
67
  enabled: false,
76
68
  } as ServerMessage);
77
69
 
@@ -10,10 +10,19 @@ import { checkBrowserRuntime } from './runtime-check.js';
10
10
 
11
11
  const log = getLogger('browser-manager');
12
12
 
13
+ /**
14
+ * Returns true when the host has a GUI capable of displaying a browser window.
15
+ * macOS and Windows always have a display; Linux requires DISPLAY or WAYLAND_DISPLAY.
16
+ */
17
+ function canDisplayGui(): boolean {
18
+ if (process.platform === 'darwin' || process.platform === 'win32') return true;
19
+ return !!(process.env.DISPLAY || process.env.WAYLAND_DISPLAY);
20
+ }
21
+
13
22
  // Screencast capture dimensions — used by coordinate math across the browser module
14
23
  // to map between page coordinates and screencast-frame coordinates.
15
- export const SCREENCAST_WIDTH = 800;
16
- export const SCREENCAST_HEIGHT = 600;
24
+ export const SCREENCAST_WIDTH = 1280;
25
+ export const SCREENCAST_HEIGHT = 800;
17
26
 
18
27
  function getDownloadsDir(): string {
19
28
  const dir = join(getDataDir(), 'browser-downloads');
@@ -260,9 +269,12 @@ class BrowserManager {
260
269
  }
261
270
 
262
271
  if (invokingSessionId && this.sessionSenders.get(invokingSessionId) && this._browserMode === 'headless') {
272
+ const willBeHeaded = canDisplayGui();
263
273
  log.info(
264
- { sessionId: invokingSessionId },
265
- 'CDP unavailable/declined; staying in headless mode (no visible browser window will be auto-launched)',
274
+ { sessionId: invokingSessionId, willBeHeaded },
275
+ willBeHeaded
276
+ ? 'CDP unavailable/declined; launching visible browser (display available)'
277
+ : 'CDP unavailable/declined; staying in headless mode (no display available)',
266
278
  );
267
279
  }
268
280
  }
@@ -304,8 +316,10 @@ class BrowserManager {
304
316
  }
305
317
 
306
318
  const launch = launchPersistentContext ?? await getDefaultLaunchFn();
307
- const ctx = await launch(profileDir, { headless: true });
308
- log.info({ profileDir }, 'Browser context created');
319
+ const headless = !canDisplayGui();
320
+ const ctx = await launch(profileDir, { headless });
321
+ this._browserLaunched = true;
322
+ log.info({ profileDir, headless }, headless ? 'Browser context created (headless)' : 'Browser context created (visible)');
309
323
  return ctx;
310
324
  })();
311
325
 
@@ -391,9 +405,22 @@ class BrowserManager {
391
405
  // Track downloads for this page
392
406
  this.setupDownloadTracking(sessionId, page);
393
407
 
394
- // In CDP mode, keep the window minimized unless we're in an active handoff.
395
- if (this._browserMode === 'cdp' && !this.interactiveModeSessions.has(sessionId)) {
396
- await this.moveWindowOffscreen();
408
+ // For launched browsers (not CDP-connected), create a page-level CDP session
409
+ // so we can position the browser window. Browser domain commands (setWindowBounds,
410
+ // getWindowForTarget) are accessible from page-level CDP sessions.
411
+ if (!this.browserCdpSession && this._browserLaunched && this._browserMode !== 'cdp') {
412
+ try {
413
+ const rawPage = page as unknown as RawPlaywrightPage;
414
+ this.browserCdpSession = await rawPage.context().newCDPSession(rawPage);
415
+ await this.ensureBrowserWindowId();
416
+ } catch (err) {
417
+ log.warn({ err }, 'Failed to create CDP session for window positioning');
418
+ }
419
+ }
420
+
421
+ // Position the browser window so the user can watch.
422
+ if (this.browserCdpSession && !this.interactiveModeSessions.has(sessionId)) {
423
+ await this.positionWindowSidebar();
397
424
  }
398
425
 
399
426
  log.debug({ sessionId }, 'Session page created');
@@ -530,7 +557,7 @@ class BrowserManager {
530
557
 
531
558
  await cdp.send('Page.startScreencast', {
532
559
  format: 'jpeg',
533
- quality: 30,
560
+ quality: 45,
534
561
  maxWidth: SCREENCAST_WIDTH,
535
562
  maxHeight: SCREENCAST_HEIGHT,
536
563
  everyNthFrame: 4,
@@ -569,7 +596,7 @@ class BrowserManager {
569
596
 
570
597
  /**
571
598
  * Create a browser-level CDP session and discover the window ID.
572
- * Called once after browser launch/connect so moveWindowOffscreen/Onscreen can work.
599
+ * Called once after browser launch/connect so positionWindowSidebar/moveWindowOnscreen can work.
573
600
  */
574
601
  private async initBrowserCdpSession(): Promise<void> {
575
602
  if (!this.cdpBrowser) return;
@@ -607,28 +634,24 @@ class BrowserManager {
607
634
  }
608
635
 
609
636
  /**
610
- * Hide the browser window during non-handoff automation to avoid focus theft.
637
+ * Position the browser window small on the right side of the screen so the
638
+ * user can watch automation while still seeing assistant messages on the left.
611
639
  */
612
- async moveWindowOffscreen(): Promise<void> {
640
+ async positionWindowSidebar(): Promise<void> {
613
641
  if (!this.browserCdpSession) return;
614
642
  const windowId = await this.ensureBrowserWindowId();
615
643
  if (windowId == null) return;
616
644
  try {
617
645
  await this.browserCdpSession.send('Browser.setWindowBounds', {
618
646
  windowId,
619
- bounds: { windowState: 'minimized' },
647
+ bounds: { left: 480, top: 40, width: 940, height: 700, windowState: 'normal' },
620
648
  });
621
- log.debug('moveWindowOffscreen: minimized browser window via CDP');
649
+ log.debug('positionWindowSidebar: placed browser window in top-right');
622
650
  } catch (err) {
623
- log.warn({ err }, 'moveWindowOffscreen: minimize failed, attempting offscreen bounds');
624
- try {
625
- await this.browserCdpSession.send('Browser.setWindowBounds', {
626
- windowId,
627
- bounds: { left: -32000, top: -32000, windowState: 'normal' },
628
- });
629
- } catch (boundsErr) {
630
- log.warn({ err: boundsErr }, 'moveWindowOffscreen: offscreen bounds failed');
631
- }
651
+ log.warn({ err }, 'positionWindowSidebar: failed to position window');
652
+ // CDP session may be stale (e.g. page closed) — clear it so it gets recreated
653
+ this.browserCdpSession = null;
654
+ this.browserWindowId = null;
632
655
  }
633
656
  }
634
657
 
@@ -642,7 +665,7 @@ class BrowserManager {
642
665
  try {
643
666
  await this.browserCdpSession.send('Browser.setWindowBounds', {
644
667
  windowId,
645
- bounds: { left: 100, top: 100, width: 1280, height: 960, windowState: 'normal' },
668
+ bounds: { left: 200, top: 40, width: 1100, height: 820, windowState: 'normal' },
646
669
  });
647
670
  log.debug('moveWindowOnscreen: moved window onscreen via CDP');
648
671
  } catch (err) {
@@ -676,9 +699,16 @@ class BrowserManager {
676
699
  existing();
677
700
  }
678
701
 
702
+ // Capture the initial URL so we can auto-detect page changes
703
+ const page = this.pages.get(sessionId);
704
+ const initialUrl = page && !page.isClosed() ? page.url() : null;
705
+
679
706
  return new Promise<void>((resolve) => {
707
+ let pollTimer: ReturnType<typeof setInterval> | null = null;
708
+
680
709
  const resolver = () => {
681
710
  clearTimeout(timer);
711
+ if (pollTimer) clearInterval(pollTimer);
682
712
  if (this.handoffResolvers.get(sessionId) === resolver) {
683
713
  this.handoffResolvers.delete(sessionId);
684
714
  }
@@ -686,6 +716,7 @@ class BrowserManager {
686
716
  };
687
717
 
688
718
  const timer = setTimeout(() => {
719
+ if (pollTimer) clearInterval(pollTimer);
689
720
  if (this.handoffResolvers.get(sessionId) === resolver) {
690
721
  this.handoffResolvers.delete(sessionId);
691
722
  }
@@ -694,6 +725,30 @@ class BrowserManager {
694
725
  }, timeoutMs);
695
726
 
696
727
  this.handoffResolvers.set(sessionId, resolver);
728
+
729
+ // Poll for URL changes — auto-resolve when the page navigates
730
+ // (e.g., CAPTCHA solved, login redirect)
731
+ if (initialUrl && page) {
732
+ pollTimer = setInterval(() => {
733
+ try {
734
+ if (page.isClosed()) {
735
+ this.interactiveModeSessions.delete(sessionId);
736
+ resolver();
737
+ return;
738
+ }
739
+ const currentUrl = page.url();
740
+ if (currentUrl !== initialUrl) {
741
+ log.info({ sessionId, from: initialUrl, to: currentUrl }, 'Handoff auto-resolved: URL changed');
742
+ this.interactiveModeSessions.delete(sessionId);
743
+ resolver();
744
+ }
745
+ } catch {
746
+ // Page may have been closed — resolve gracefully
747
+ this.interactiveModeSessions.delete(sessionId);
748
+ resolver();
749
+ }
750
+ }, 2000);
751
+ }
697
752
  });
698
753
  }
699
754