@swarmclawai/swarmclaw 0.7.2 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. package/README.md +81 -22
  2. package/package.json +1 -1
  3. package/src/app/api/agents/[id]/route.ts +26 -0
  4. package/src/app/api/agents/[id]/thread/route.ts +36 -7
  5. package/src/app/api/agents/route.ts +12 -1
  6. package/src/app/api/auth/route.ts +76 -7
  7. package/src/app/api/chatrooms/[id]/chat/route.ts +7 -2
  8. package/src/app/api/chats/[id]/browser/route.ts +5 -1
  9. package/src/app/api/chats/[id]/chat/route.ts +7 -3
  10. package/src/app/api/chats/[id]/main-loop/route.ts +7 -88
  11. package/src/app/api/chats/[id]/messages/route.ts +19 -13
  12. package/src/app/api/chats/[id]/route.ts +18 -0
  13. package/src/app/api/chats/[id]/stop/route.ts +6 -1
  14. package/src/app/api/chats/route.ts +16 -0
  15. package/src/app/api/connectors/[id]/doctor/route.ts +26 -0
  16. package/src/app/api/connectors/doctor/route.ts +13 -0
  17. package/src/app/api/files/open/route.ts +16 -14
  18. package/src/app/api/memory/maintenance/route.ts +11 -1
  19. package/src/app/api/openclaw/agent-files/route.ts +27 -4
  20. package/src/app/api/openclaw/skills/route.ts +11 -3
  21. package/src/app/api/plugins/dependencies/route.ts +24 -0
  22. package/src/app/api/plugins/install/route.ts +15 -92
  23. package/src/app/api/plugins/route.ts +3 -26
  24. package/src/app/api/plugins/settings/route.ts +17 -12
  25. package/src/app/api/plugins/ui/route.ts +1 -0
  26. package/src/app/api/settings/route.ts +49 -7
  27. package/src/app/api/tasks/[id]/route.ts +15 -6
  28. package/src/app/api/tasks/bulk/route.ts +2 -2
  29. package/src/app/api/tasks/route.ts +9 -4
  30. package/src/app/api/webhooks/[id]/route.ts +8 -1
  31. package/src/app/page.tsx +9 -2
  32. package/src/cli/index.js +4 -0
  33. package/src/cli/index.ts +3 -10
  34. package/src/components/agents/agent-card.tsx +15 -12
  35. package/src/components/agents/agent-chat-list.tsx +101 -1
  36. package/src/components/agents/agent-list.tsx +46 -9
  37. package/src/components/agents/agent-sheet.tsx +207 -16
  38. package/src/components/agents/inspector-panel.tsx +108 -48
  39. package/src/components/auth/access-key-gate.tsx +36 -97
  40. package/src/components/chat/chat-area.tsx +29 -13
  41. package/src/components/chat/chat-card.tsx +4 -20
  42. package/src/components/chat/chat-header.tsx +255 -353
  43. package/src/components/chat/chat-list.tsx +7 -9
  44. package/src/components/chat/checkpoint-timeline.tsx +1 -1
  45. package/src/components/chat/message-list.tsx +3 -1
  46. package/src/components/chatrooms/chatroom-view.tsx +347 -205
  47. package/src/components/connectors/connector-list.tsx +265 -127
  48. package/src/components/connectors/connector-sheet.tsx +217 -0
  49. package/src/components/home/home-view.tsx +128 -4
  50. package/src/components/layout/app-layout.tsx +383 -194
  51. package/src/components/layout/mobile-header.tsx +26 -8
  52. package/src/components/plugins/plugin-list.tsx +15 -3
  53. package/src/components/plugins/plugin-sheet.tsx +118 -9
  54. package/src/components/projects/project-detail.tsx +183 -0
  55. package/src/components/shared/agent-picker-list.tsx +2 -2
  56. package/src/components/shared/command-palette.tsx +111 -24
  57. package/src/components/shared/settings/plugin-manager.tsx +20 -4
  58. package/src/components/shared/settings/section-capability-policy.tsx +105 -0
  59. package/src/components/shared/settings/section-heartbeat.tsx +77 -0
  60. package/src/components/shared/settings/section-orchestrator.tsx +3 -3
  61. package/src/components/shared/settings/section-runtime-loop.tsx +5 -5
  62. package/src/components/shared/settings/section-secrets.tsx +6 -6
  63. package/src/components/shared/settings/section-user-preferences.tsx +1 -1
  64. package/src/components/shared/settings/section-voice.tsx +5 -1
  65. package/src/components/shared/settings/section-web-search.tsx +10 -2
  66. package/src/components/shared/settings/settings-page.tsx +245 -46
  67. package/src/components/tasks/approvals-panel.tsx +205 -18
  68. package/src/components/tasks/task-board.tsx +242 -46
  69. package/src/components/usage/metrics-dashboard.tsx +74 -1
  70. package/src/components/wallets/wallet-panel.tsx +17 -5
  71. package/src/components/webhooks/webhook-sheet.tsx +7 -7
  72. package/src/lib/auth.ts +17 -0
  73. package/src/lib/chat-streaming-state.test.ts +108 -0
  74. package/src/lib/chat-streaming-state.ts +108 -0
  75. package/src/lib/openclaw-agent-id.test.ts +14 -0
  76. package/src/lib/openclaw-agent-id.ts +31 -0
  77. package/src/lib/server/agent-assignment.test.ts +112 -0
  78. package/src/lib/server/agent-assignment.ts +169 -0
  79. package/src/lib/server/approval-connector-notify.test.ts +253 -0
  80. package/src/lib/server/approvals-auto-approve.test.ts +205 -0
  81. package/src/lib/server/approvals.ts +483 -75
  82. package/src/lib/server/autonomy-runtime.test.ts +341 -0
  83. package/src/lib/server/browser-state.test.ts +118 -0
  84. package/src/lib/server/browser-state.ts +123 -0
  85. package/src/lib/server/build-llm.test.ts +36 -0
  86. package/src/lib/server/build-llm.ts +11 -4
  87. package/src/lib/server/builtin-plugins.ts +34 -0
  88. package/src/lib/server/chat-execution-heartbeat.test.ts +40 -0
  89. package/src/lib/server/chat-execution-tool-events.test.ts +134 -0
  90. package/src/lib/server/chat-execution.ts +250 -61
  91. package/src/lib/server/chatroom-health.test.ts +26 -0
  92. package/src/lib/server/chatroom-health.ts +2 -3
  93. package/src/lib/server/chatroom-helpers.test.ts +67 -2
  94. package/src/lib/server/chatroom-helpers.ts +45 -5
  95. package/src/lib/server/connectors/discord.ts +175 -11
  96. package/src/lib/server/connectors/doctor.test.ts +80 -0
  97. package/src/lib/server/connectors/doctor.ts +116 -0
  98. package/src/lib/server/connectors/manager.ts +946 -110
  99. package/src/lib/server/connectors/policy.test.ts +222 -0
  100. package/src/lib/server/connectors/policy.ts +452 -0
  101. package/src/lib/server/connectors/slack.ts +188 -9
  102. package/src/lib/server/connectors/telegram.ts +65 -15
  103. package/src/lib/server/connectors/thread-context.test.ts +44 -0
  104. package/src/lib/server/connectors/thread-context.ts +72 -0
  105. package/src/lib/server/connectors/types.ts +41 -11
  106. package/src/lib/server/daemon-state.ts +59 -1
  107. package/src/lib/server/data-dir.ts +13 -0
  108. package/src/lib/server/delegation-jobs.test.ts +140 -0
  109. package/src/lib/server/delegation-jobs.ts +248 -0
  110. package/src/lib/server/document-utils.test.ts +47 -0
  111. package/src/lib/server/document-utils.ts +397 -0
  112. package/src/lib/server/heartbeat-service.ts +13 -39
  113. package/src/lib/server/heartbeat-source.test.ts +22 -0
  114. package/src/lib/server/heartbeat-source.ts +7 -0
  115. package/src/lib/server/identity-continuity.test.ts +77 -0
  116. package/src/lib/server/identity-continuity.ts +127 -0
  117. package/src/lib/server/mailbox-utils.ts +347 -0
  118. package/src/lib/server/main-agent-loop.ts +27 -967
  119. package/src/lib/server/memory-db.ts +4 -6
  120. package/src/lib/server/memory-tiers.ts +40 -0
  121. package/src/lib/server/openclaw-agent-resolver.test.ts +70 -0
  122. package/src/lib/server/openclaw-agent-resolver.ts +128 -0
  123. package/src/lib/server/openclaw-exec-config.ts +5 -6
  124. package/src/lib/server/openclaw-skills-normalize.test.ts +56 -0
  125. package/src/lib/server/openclaw-skills-normalize.ts +136 -0
  126. package/src/lib/server/openclaw-sync.ts +3 -2
  127. package/src/lib/server/orchestrator-lg.ts +17 -6
  128. package/src/lib/server/orchestrator.ts +2 -2
  129. package/src/lib/server/playwright-proxy.mjs +27 -3
  130. package/src/lib/server/plugins.test.ts +207 -0
  131. package/src/lib/server/plugins.ts +822 -69
  132. package/src/lib/server/provider-health.ts +33 -3
  133. package/src/lib/server/queue.ts +3 -20
  134. package/src/lib/server/scheduler.ts +2 -0
  135. package/src/lib/server/session-archive-memory.test.ts +85 -0
  136. package/src/lib/server/session-archive-memory.ts +230 -0
  137. package/src/lib/server/session-mailbox.ts +8 -18
  138. package/src/lib/server/session-reset-policy.test.ts +99 -0
  139. package/src/lib/server/session-reset-policy.ts +311 -0
  140. package/src/lib/server/session-run-manager.ts +33 -80
  141. package/src/lib/server/session-tools/autonomy-tools.test.ts +105 -0
  142. package/src/lib/server/session-tools/calendar.ts +2 -12
  143. package/src/lib/server/session-tools/connector.ts +109 -8
  144. package/src/lib/server/session-tools/context.ts +14 -2
  145. package/src/lib/server/session-tools/crawl.ts +447 -0
  146. package/src/lib/server/session-tools/crud.ts +70 -32
  147. package/src/lib/server/session-tools/delegate-fallback.test.ts +219 -0
  148. package/src/lib/server/session-tools/delegate.ts +406 -20
  149. package/src/lib/server/session-tools/discovery.ts +22 -4
  150. package/src/lib/server/session-tools/document.ts +283 -0
  151. package/src/lib/server/session-tools/email.ts +1 -3
  152. package/src/lib/server/session-tools/extract.ts +137 -0
  153. package/src/lib/server/session-tools/file-normalize.test.ts +93 -0
  154. package/src/lib/server/session-tools/file-send.test.ts +84 -1
  155. package/src/lib/server/session-tools/file.ts +237 -24
  156. package/src/lib/server/session-tools/human-loop.ts +227 -0
  157. package/src/lib/server/session-tools/image-gen.ts +1 -3
  158. package/src/lib/server/session-tools/index.ts +56 -1
  159. package/src/lib/server/session-tools/mailbox.ts +276 -0
  160. package/src/lib/server/session-tools/memory.ts +35 -3
  161. package/src/lib/server/session-tools/monitor.ts +150 -7
  162. package/src/lib/server/session-tools/normalize-tool-args.ts +17 -14
  163. package/src/lib/server/session-tools/platform-normalize.test.ts +142 -0
  164. package/src/lib/server/session-tools/platform.ts +142 -4
  165. package/src/lib/server/session-tools/plugin-creator.ts +86 -23
  166. package/src/lib/server/session-tools/primitive-tools.test.ts +257 -0
  167. package/src/lib/server/session-tools/replicate.ts +1 -3
  168. package/src/lib/server/session-tools/schedule.ts +20 -10
  169. package/src/lib/server/session-tools/session-info.ts +36 -3
  170. package/src/lib/server/session-tools/session-tools-wiring.test.ts +31 -17
  171. package/src/lib/server/session-tools/subagent.ts +193 -27
  172. package/src/lib/server/session-tools/table.ts +587 -0
  173. package/src/lib/server/session-tools/wallet.ts +13 -10
  174. package/src/lib/server/session-tools/web-browser-config.test.ts +39 -0
  175. package/src/lib/server/session-tools/web.ts +896 -100
  176. package/src/lib/server/storage.ts +226 -7
  177. package/src/lib/server/stream-agent-chat.ts +46 -21
  178. package/src/lib/server/structured-extract.test.ts +72 -0
  179. package/src/lib/server/structured-extract.ts +373 -0
  180. package/src/lib/server/task-mention.test.ts +16 -2
  181. package/src/lib/server/task-mention.ts +61 -10
  182. package/src/lib/server/tool-aliases.ts +44 -7
  183. package/src/lib/server/tool-capability-policy.ts +6 -0
  184. package/src/lib/server/tool-retry.ts +2 -0
  185. package/src/lib/server/watch-jobs.test.ts +173 -0
  186. package/src/lib/server/watch-jobs.ts +532 -0
  187. package/src/lib/server/ws-hub.ts +5 -3
  188. package/src/lib/validation/schemas.test.ts +26 -0
  189. package/src/lib/validation/schemas.ts +7 -0
  190. package/src/lib/ws-client.ts +14 -12
  191. package/src/proxy.ts +5 -5
  192. package/src/stores/use-app-store.ts +0 -6
  193. package/src/stores/use-chat-store.ts +31 -2
  194. package/src/types/index.ts +287 -44
  195. package/src/components/chat/new-chat-sheet.tsx +0 -253
  196. package/src/lib/server/main-session.ts +0 -17
  197. package/src/lib/server/session-run-manager.test.ts +0 -26
@@ -13,6 +13,14 @@ import { withRetry } from '../tool-retry'
13
13
  import type { Plugin, PluginHooks } from '@/types'
14
14
  import { getPluginManager } from '../plugins'
15
15
  import { normalizeToolInputArgs } from './normalize-tool-args'
16
+ import {
17
+ ensureSessionBrowserProfileId,
18
+ getBrowserProfileDir,
19
+ markBrowserSessionClosed,
20
+ recordBrowserObservation,
21
+ removeBrowserSessionRecord,
22
+ upsertBrowserSessionRecord,
23
+ } from '../browser-state'
16
24
 
17
25
  // --- Search result compression logic ---
18
26
  async function compressSearchResults(results: any[], query: string, bctx: any): Promise<string | null> {
@@ -48,13 +56,91 @@ async function compressSearchResults(results: any[], query: string, bctx: any):
48
56
  return compressed.trim() || null
49
57
  }
50
58
 
51
- export const activeBrowsers = new Map<string, { client: any; server: any; createdAt: number }>()
59
+ type BrowserRuntimeEntry = {
60
+ client: any
61
+ server: any
62
+ createdAt: number
63
+ profileId: string
64
+ profileDir: string
65
+ refCount: number
66
+ }
67
+
68
+ export const activeBrowsers = new Map<string, BrowserRuntimeEntry>()
69
+ const pendingBrowserInitializations = new Map<string, Promise<BrowserRuntimeEntry>>()
70
+
71
+ export function buildBrowserConnectionOptions(profileDir: string) {
72
+ return {
73
+ browser: {
74
+ userDataDir: profileDir,
75
+ launchOptions: { headless: true },
76
+ contextOptions: {
77
+ viewport: { width: 1440, height: 900 },
78
+ },
79
+ },
80
+ imageResponses: 'allow' as const,
81
+ capabilities: ['core', 'pdf', 'vision', 'network', 'storage'],
82
+ // Keep browser state isolated per session/profile. The upstream shared
83
+ // context mode is process-global and causes unrelated agent sessions to
84
+ // contend with each other.
85
+ sharedBrowserContext: false,
86
+ timeouts: {
87
+ action: 15_000,
88
+ navigation: 60_000,
89
+ },
90
+ }
91
+ }
92
+
93
+ export function buildBrowserStdioServerParams(profileDir: string) {
94
+ const cliCandidates = [
95
+ path.join(process.cwd(), 'node_modules', '@playwright', 'mcp', 'cli.js'),
96
+ path.join(process.cwd(), '[project]', 'node_modules', '@playwright', 'mcp', 'cli.js'),
97
+ ]
98
+ const cliPath = cliCandidates.find((candidate) => fs.existsSync(candidate)) || cliCandidates[0]
99
+ const outputDir = path.join(profileDir, 'mcp-output')
100
+ const env = sanitizePlaywrightMcpEnv()
101
+ return {
102
+ command: process.execPath,
103
+ args: [
104
+ cliPath,
105
+ '--headless',
106
+ '--user-data-dir', profileDir,
107
+ '--output-dir', outputDir,
108
+ '--caps', 'vision,pdf',
109
+ '--image-responses', 'allow',
110
+ '--output-mode', 'file',
111
+ '--timeout-action', '15000',
112
+ '--timeout-navigation', '60000',
113
+ ],
114
+ env: {
115
+ ...env,
116
+ PLAYWRIGHT_MCP_USER_DATA_DIR: profileDir,
117
+ PLAYWRIGHT_MCP_HEADLESS: '1',
118
+ PLAYWRIGHT_MCP_IMAGE_RESPONSES: 'allow',
119
+ PLAYWRIGHT_MCP_OUTPUT_DIR: outputDir,
120
+ PLAYWRIGHT_MCP_OUTPUT_MODE: 'file',
121
+ PLAYWRIGHT_MCP_TIMEOUT_ACTION: '15000',
122
+ PLAYWRIGHT_MCP_TIMEOUT_NAVIGATION: '60000',
123
+ },
124
+ stderr: 'inherit' as const,
125
+ }
126
+ }
127
+
128
+ export function sanitizePlaywrightMcpEnv(baseEnv: NodeJS.ProcessEnv = process.env): NodeJS.ProcessEnv {
129
+ const env: NodeJS.ProcessEnv = { ...baseEnv }
130
+ for (const key of Object.keys(env)) {
131
+ if (!key.toUpperCase().startsWith('PLAYWRIGHT_MCP_')) continue
132
+ delete env[key]
133
+ }
134
+ return env
135
+ }
52
136
  export function sweepOrphanedBrowsers(maxAgeMs = 30 * 60 * 1000): number {
53
137
  const now = Date.now(); let cleaned = 0
54
138
  for (const [key, entry] of activeBrowsers) {
55
139
  if (now - entry.createdAt > maxAgeMs) {
56
140
  try { entry.client?.close?.() } catch { /* ignore */ }
57
141
  try { entry.server?.close?.() } catch { /* ignore */ }
142
+ pendingBrowserInitializations.delete(key)
143
+ markBrowserSessionClosed(key, 'Browser was swept after inactivity.')
58
144
  activeBrowsers.delete(key); cleaned++
59
145
  }
60
146
  }
@@ -66,6 +152,8 @@ export function cleanupSessionBrowser(sessionId: string): void {
66
152
  try { entry.client?.close?.() } catch { /* ignore */ }
67
153
  try { entry.server?.close?.() } catch { /* ignore */ }
68
154
  activeBrowsers.delete(sessionId)
155
+ pendingBrowserInitializations.delete(sessionId)
156
+ markBrowserSessionClosed(sessionId)
69
157
  }
70
158
  }
71
159
  export function getActiveBrowserCount(): number { return activeBrowsers.size }
@@ -180,35 +268,108 @@ export function buildWebTools(bctx: ToolBuildContext): StructuredToolInterface[]
180
268
  // Browser tool (kept as direct injection for now due to complexity)
181
269
  if (bctx.hasPlugin('browser')) {
182
270
  const sessionKey = ctx?.sessionId || `anon-${Date.now()}`
271
+ const currentSession = bctx.resolveCurrentSession?.()
272
+ const profileInfo = currentSession?.id
273
+ ? ensureSessionBrowserProfileId(sessionKey)
274
+ : { profileId: sessionKey, inheritedFromSessionId: null as string | null }
275
+ const profileDir = getBrowserProfileDir(profileInfo.profileId)
183
276
  let mcpClient: any = null
184
277
  let mcpServer: any = null
185
278
  let mcpInitializing: Promise<void> | null = null
279
+ let browserLeaseHeld = false
280
+
281
+ upsertBrowserSessionRecord({
282
+ sessionId: sessionKey,
283
+ profileId: profileInfo.profileId,
284
+ profileDir,
285
+ inheritedFromSessionId: profileInfo.inheritedFromSessionId,
286
+ status: 'idle',
287
+ })
186
288
 
187
289
  const ensureMcp = (): Promise<void> => {
188
290
  if (mcpClient) return Promise.resolve()
189
291
  if (mcpInitializing) return mcpInitializing
292
+ const acquireExistingEntry = (entry: BrowserRuntimeEntry) => {
293
+ mcpClient = entry.client
294
+ mcpServer = entry.server
295
+ if (!browserLeaseHeld) {
296
+ entry.refCount = Math.max(0, entry.refCount || 0) + 1
297
+ activeBrowsers.set(sessionKey, entry)
298
+ browserLeaseHeld = true
299
+ }
300
+ }
301
+ const existing = activeBrowsers.get(sessionKey)
302
+ if (existing) {
303
+ acquireExistingEntry(existing)
304
+ return Promise.resolve()
305
+ }
190
306
  mcpInitializing = (async () => {
191
- const { createConnection } = await import('@playwright/mcp')
192
- const { Client } = await import('@modelcontextprotocol/sdk/client/index.js')
193
- const { InMemoryTransport } = await import('@modelcontextprotocol/sdk/inMemory.js')
194
- const server = await createConnection({
195
- browser: { launchOptions: { headless: true }, isolated: true },
196
- imageResponses: 'allow', capabilities: ['core', 'pdf', 'vision', 'network'],
197
- })
198
- const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair()
199
- const client = new Client({ name: 'swarmclaw', version: '1.0' })
200
- await Promise.all([client.connect(clientTransport), server.connect(serverTransport)])
201
- mcpClient = client; mcpServer = server
202
- activeBrowsers.set(sessionKey, { client, server, createdAt: Date.now() })
307
+ try {
308
+ const pending = pendingBrowserInitializations.get(sessionKey)
309
+ if (pending) {
310
+ acquireExistingEntry(await pending)
311
+ return
312
+ }
313
+
314
+ const connectPromise = (async () => {
315
+ const { Client } = await import('@modelcontextprotocol/sdk/client/index.js')
316
+ const { StdioClientTransport } = await import('@modelcontextprotocol/sdk/client/stdio.js')
317
+ const transport = new StdioClientTransport(buildBrowserStdioServerParams(profileDir))
318
+ const client = new Client({ name: 'swarmclaw', version: '1.0' })
319
+ await client.connect(transport)
320
+ return {
321
+ client,
322
+ server: transport,
323
+ createdAt: Date.now(),
324
+ profileId: profileInfo.profileId,
325
+ profileDir,
326
+ refCount: 0,
327
+ }
328
+ })()
329
+ pendingBrowserInitializations.set(sessionKey, connectPromise)
330
+ const entry = await connectPromise
331
+ acquireExistingEntry(entry)
332
+ upsertBrowserSessionRecord({
333
+ sessionId: sessionKey,
334
+ profileId: profileInfo.profileId,
335
+ profileDir,
336
+ inheritedFromSessionId: profileInfo.inheritedFromSessionId,
337
+ status: 'active',
338
+ lastAction: 'browser_open',
339
+ })
340
+ } finally {
341
+ if (pendingBrowserInitializations.get(sessionKey)) {
342
+ pendingBrowserInitializations.delete(sessionKey)
343
+ }
344
+ mcpInitializing = null
345
+ }
203
346
  })()
204
347
  return mcpInitializing
205
348
  }
206
349
 
207
350
  cleanupFns.push(async () => {
208
- try { mcpClient?.close?.() } catch { /* ignore */ }
209
- try { mcpServer?.close?.() } catch { /* ignore */ }
210
- activeBrowsers.delete(sessionKey)
211
- mcpClient = null; mcpServer = null
351
+ pendingBrowserInitializations.delete(sessionKey)
352
+ const entry = activeBrowsers.get(sessionKey)
353
+ const ownsActiveEntry = !!entry && entry.client === mcpClient && entry.server === mcpServer
354
+ if (ownsActiveEntry && browserLeaseHeld) {
355
+ entry.refCount = Math.max(0, (entry.refCount || 1) - 1)
356
+ if (entry.refCount === 0) {
357
+ try { entry.client?.close?.() } catch { /* ignore */ }
358
+ try { entry.server?.close?.() } catch { /* ignore */ }
359
+ activeBrowsers.delete(sessionKey)
360
+ markBrowserSessionClosed(sessionKey)
361
+ } else {
362
+ activeBrowsers.set(sessionKey, entry)
363
+ }
364
+ } else {
365
+ try { mcpClient?.close?.() } catch { /* ignore */ }
366
+ try { mcpServer?.close?.() } catch { /* ignore */ }
367
+ if (browserLeaseHeld) markBrowserSessionClosed(sessionKey)
368
+ }
369
+ mcpClient = null
370
+ mcpServer = null
371
+ mcpInitializing = null
372
+ browserLeaseHeld = false
212
373
  })
213
374
 
214
375
  const cleanPlaywrightOutput = (text: string): string => {
@@ -222,68 +383,257 @@ export function buildWebTools(bctx: ToolBuildContext): StructuredToolInterface[]
222
383
  return text.replace(/\n{3,}/g, '\n').trim()
223
384
  }
224
385
 
386
+ const extractJsonPayload = (text: string): Record<string, unknown> | unknown[] | null => {
387
+ const candidates = [
388
+ [text.indexOf('{'), text.lastIndexOf('}')],
389
+ [text.indexOf('['), text.lastIndexOf(']')],
390
+ ]
391
+ for (const [start, end] of candidates) {
392
+ if (start === -1 || end === -1 || end <= start) continue
393
+ try {
394
+ return JSON.parse(text.slice(start, end + 1))
395
+ } catch {
396
+ // try next candidate
397
+ }
398
+ }
399
+ return null
400
+ }
401
+
402
+ const stringifyStructured = (value: unknown): string => truncate(JSON.stringify(value, null, 2), MAX_OUTPUT)
403
+
404
+ const captureStructuredObservation = async () => {
405
+ const expression = `(() => {
406
+ const normalize = (value) => String(value || '').replace(/\\s+/g, ' ').trim();
407
+ const visible = (el) => {
408
+ if (!el) return false;
409
+ const style = window.getComputedStyle(el);
410
+ return style && style.display !== 'none' && style.visibility !== 'hidden';
411
+ };
412
+ const links = Array.from(document.querySelectorAll('a[href]'))
413
+ .filter(visible)
414
+ .slice(0, 25)
415
+ .map((a) => ({
416
+ text: normalize(a.innerText || a.textContent || a.getAttribute('aria-label')),
417
+ href: a.href || a.getAttribute('href') || '',
418
+ }))
419
+ .filter((entry) => entry.href);
420
+ const forms = Array.from(document.forms).slice(0, 5).map((form, index) => ({
421
+ index,
422
+ action: form.getAttribute('action') || form.action || null,
423
+ method: normalize(form.getAttribute('method') || form.method || 'get') || 'get',
424
+ fields: Array.from(form.elements).slice(0, 20).map((el) => ({
425
+ name: el.getAttribute?.('name') || null,
426
+ label: normalize(el.labels?.[0]?.innerText || el.getAttribute?.('aria-label') || el.getAttribute?.('placeholder')) || null,
427
+ type: normalize(el.getAttribute?.('type') || el.tagName || 'field').toLowerCase(),
428
+ required: !!el.required,
429
+ })),
430
+ }));
431
+ const tables = Array.from(document.querySelectorAll('table')).slice(0, 3).map((table, index) => {
432
+ const headerCells = Array.from(table.querySelectorAll('thead th')).map((th) => normalize(th.innerText || th.textContent));
433
+ const bodyRows = Array.from(table.querySelectorAll('tbody tr')).slice(0, 5).map((tr) =>
434
+ Array.from(tr.querySelectorAll('th, td')).map((cell) => normalize(cell.innerText || cell.textContent))
435
+ );
436
+ return {
437
+ index,
438
+ headers: headerCells,
439
+ rowCount: table.querySelectorAll('tbody tr').length,
440
+ rows: bodyRows,
441
+ };
442
+ });
443
+ const errors = Array.from(document.querySelectorAll('[aria-invalid="true"], .error, .field-error, .invalid, [role="alert"]'))
444
+ .filter(visible)
445
+ .slice(0, 10)
446
+ .map((el) => normalize(el.innerText || el.textContent))
447
+ .filter(Boolean);
448
+ return JSON.stringify({
449
+ url: window.location.href,
450
+ title: document.title || null,
451
+ textPreview: normalize(document.body?.innerText || document.body?.textContent || '').slice(0, 1200),
452
+ links,
453
+ forms,
454
+ tables,
455
+ errors,
456
+ });
457
+ })()`
458
+ const raw = await callMcpTool('browser_evaluate', { expression })
459
+ const parsed = extractJsonPayload(raw)
460
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
461
+ const observation = {
462
+ capturedAt: Date.now(),
463
+ ...parsed,
464
+ } as any
465
+ recordBrowserObservation(sessionKey, observation)
466
+ return observation
467
+ }
468
+ const fallback = {
469
+ capturedAt: Date.now(),
470
+ url: null,
471
+ title: null,
472
+ textPreview: cleanPlaywrightOutput(raw).slice(0, 1200),
473
+ }
474
+ recordBrowserObservation(sessionKey, fallback)
475
+ return fallback
476
+ }
477
+
225
478
  const MCP_CALL_TIMEOUT_MS = 30000 // 30s timeout per browser action
226
479
  const callMcpTool = async (toolName: string, args: Record<string, any>, options?: { saveTo?: string }): Promise<string> => {
227
- await ensureMcp()
228
- const result = await Promise.race([
229
- mcpClient.callTool({ name: toolName, arguments: args }),
230
- new Promise<never>((_resolve, reject) =>
231
- setTimeout(() => reject(new Error(`Browser action "${toolName}" timed out after ${MCP_CALL_TIMEOUT_MS / 1000}s`)), MCP_CALL_TIMEOUT_MS)
232
- ),
233
- ])
234
- const isError = result?.isError === true; const content = result?.content; const savedPaths: string[] = []
235
- const saveArtifact = (buffer: Buffer, suggestedExt: string): void => {
236
- const rawSaveTo = options?.saveTo?.trim()
237
- if (!rawSaveTo) return
238
- let resolved = safePath(cwd, rawSaveTo)
239
- if (!path.extname(resolved) && suggestedExt) resolved = `${resolved}.${suggestedExt}`
240
- fs.mkdirSync(path.dirname(resolved), { recursive: true }); fs.writeFileSync(resolved, buffer)
241
- savedPaths.push(resolved)
242
- }
243
- if (Array.isArray(content)) {
244
- let parts: string[] = []
245
- const isScreenshotTool = toolName === 'browser_take_screenshot'
246
- const contentHasBinaryImage = content.some((c) => c.type === 'image' && !!c.data)
247
- for (const c of content) {
248
- if (c.type === 'image' && c.data) {
249
- const imageBuffer = Buffer.from(c.data, 'base64'); const filename = `screenshot-${Date.now()}.png`
250
- const filepath = path.join(UPLOAD_DIR, filename); fs.writeFileSync(filepath, imageBuffer)
251
- saveArtifact(imageBuffer, 'png'); parts.push(`![Screenshot](/api/uploads/${filename})`)
252
- } else if (c.type === 'resource' && c.resource?.blob) {
253
- const ext = c.resource.mimeType?.includes('pdf') ? 'pdf' : 'bin'
254
- const resourceBuffer = Buffer.from(c.resource.blob, 'base64'); const filename = `browser-${Date.now()}.${ext}`
255
- const filepath = path.join(UPLOAD_DIR, filename); fs.writeFileSync(filepath, resourceBuffer)
256
- saveArtifact(resourceBuffer, ext); parts.push(`[Download ${filename}](/api/uploads/${filename})`)
257
- } else {
258
- let text = c.text || ''
259
- const fileMatch = text.match(/\]\((\.\.\/[^\s)]+|\/[^\s)]+\.(pdf|png|jpg|jpeg|gif|webp|html|mp4|webm))\)/)
260
- if (fileMatch) {
261
- const rawPath = fileMatch[1]; const srcPath = rawPath.startsWith('/') ? rawPath : path.resolve(process.cwd(), rawPath)
262
- if (fs.existsSync(srcPath)) {
263
- const ext = path.extname(srcPath).slice(1).toLowerCase(); const IMAGE_EXTS = ['png', 'jpg', 'jpeg', 'gif', 'webp']
264
- if (IMAGE_EXTS.includes(ext) && contentHasBinaryImage) parts.push(isError ? text : cleanPlaywrightOutput(text))
265
- else {
266
- const filename = `browser-${Date.now()}.${ext}`; const destPath = path.join(UPLOAD_DIR, filename); fs.copyFileSync(srcPath, destPath)
267
- if (options?.saveTo?.trim()) {
268
- let targetPath = safePath(cwd, options.saveTo.trim())
269
- if (!path.extname(targetPath)) targetPath = `${targetPath}.${ext}`
270
- fs.mkdirSync(path.dirname(targetPath), { recursive: true }); fs.copyFileSync(srcPath, targetPath)
271
- savedPaths.push(targetPath)
480
+ const rawCall = async (): Promise<string> => {
481
+ try {
482
+ await ensureMcp()
483
+ const result = await Promise.race([
484
+ mcpClient.callTool({ name: toolName, arguments: args }),
485
+ new Promise<never>((_resolve, reject) =>
486
+ setTimeout(() => reject(new Error(`Browser action "${toolName}" timed out after ${MCP_CALL_TIMEOUT_MS / 1000}s`)), MCP_CALL_TIMEOUT_MS),
487
+ ),
488
+ ])
489
+ const isError = result?.isError === true
490
+ const content = result?.content
491
+ const savedPaths: string[] = []
492
+ const artifacts: Array<{ kind: 'snapshot' | 'screenshot' | 'download' | 'pdf'; path: string; url?: string | null; filename?: string | null; createdAt: number }> = []
493
+ const saveArtifact = (buffer: Buffer, suggestedExt: string): void => {
494
+ const rawSaveTo = options?.saveTo?.trim()
495
+ if (!rawSaveTo) return
496
+ let resolved = safePath(cwd, rawSaveTo)
497
+ if (!path.extname(resolved) && suggestedExt) resolved = `${resolved}.${suggestedExt}`
498
+ fs.mkdirSync(path.dirname(resolved), { recursive: true })
499
+ fs.writeFileSync(resolved, buffer)
500
+ savedPaths.push(resolved)
501
+ }
502
+ if (Array.isArray(content)) {
503
+ let parts: string[] = []
504
+ const isScreenshotTool = toolName === 'browser_take_screenshot'
505
+ const contentHasBinaryImage = content.some((c) => c.type === 'image' && !!c.data)
506
+ for (const c of content) {
507
+ if (c.type === 'image' && c.data) {
508
+ const imageBuffer = Buffer.from(c.data, 'base64')
509
+ const filename = `screenshot-${Date.now()}.png`
510
+ const filepath = path.join(UPLOAD_DIR, filename)
511
+ fs.writeFileSync(filepath, imageBuffer)
512
+ saveArtifact(imageBuffer, 'png')
513
+ artifacts.push({ kind: 'screenshot', path: filepath, url: `/api/uploads/${filename}`, filename, createdAt: Date.now() })
514
+ parts.push(`Screenshot saved to /api/uploads/${filename}`)
515
+ parts.push(`![Screenshot](/api/uploads/${filename})`)
516
+ } else if (c.type === 'resource' && c.resource?.blob) {
517
+ const ext = c.resource.mimeType?.includes('pdf') ? 'pdf' : 'bin'
518
+ const resourceBuffer = Buffer.from(c.resource.blob, 'base64')
519
+ const filename = `browser-${Date.now()}.${ext}`
520
+ const filepath = path.join(UPLOAD_DIR, filename)
521
+ fs.writeFileSync(filepath, resourceBuffer)
522
+ saveArtifact(resourceBuffer, ext)
523
+ artifacts.push({
524
+ kind: ext === 'pdf' ? 'pdf' : 'download',
525
+ path: filepath,
526
+ url: `/api/uploads/${filename}`,
527
+ filename,
528
+ createdAt: Date.now(),
529
+ })
530
+ parts.push(`[Download ${filename}](/api/uploads/${filename})`)
531
+ } else {
532
+ const text = c.text || ''
533
+ const fileMatch = text.match(/\]\((\.\.\/[^\s)]+|\/[^\s)]+\.(pdf|png|jpg|jpeg|gif|webp|html|mp4|webm))\)/)
534
+ if (fileMatch) {
535
+ const rawPath = fileMatch[1]
536
+ const srcPath = rawPath.startsWith('/') ? rawPath : path.resolve(process.cwd(), rawPath)
537
+ if (fs.existsSync(srcPath)) {
538
+ const ext = path.extname(srcPath).slice(1).toLowerCase()
539
+ const IMAGE_EXTS = ['png', 'jpg', 'jpeg', 'gif', 'webp']
540
+ if (IMAGE_EXTS.includes(ext) && contentHasBinaryImage) {
541
+ continue
542
+ } else {
543
+ const filename = `browser-${Date.now()}.${ext}`
544
+ const destPath = path.join(UPLOAD_DIR, filename)
545
+ fs.copyFileSync(srcPath, destPath)
546
+ if (options?.saveTo?.trim()) {
547
+ let targetPath = safePath(cwd, options.saveTo.trim())
548
+ if (!path.extname(targetPath)) targetPath = `${targetPath}.${ext}`
549
+ fs.mkdirSync(path.dirname(targetPath), { recursive: true })
550
+ fs.copyFileSync(srcPath, targetPath)
551
+ savedPaths.push(targetPath)
552
+ }
553
+ artifacts.push({
554
+ kind: ext === 'pdf' ? 'pdf' : 'download',
555
+ path: destPath,
556
+ url: `/api/uploads/${filename}`,
557
+ filename,
558
+ createdAt: Date.now(),
559
+ })
560
+ parts.push(IMAGE_EXTS.includes(ext) ? `![Screenshot](/api/uploads/${filename})` : `[Download ${filename}](/api/uploads/${filename})`)
561
+ }
562
+ } else {
563
+ parts.push(isError ? text : cleanPlaywrightOutput(text))
272
564
  }
273
- parts.push(IMAGE_EXTS.includes(ext) ? `![Screenshot](/api/uploads/${filename})` : `[Download ${filename}](/api/uploads/${filename})`)
565
+ } else {
566
+ parts.push(isError ? text : cleanPlaywrightOutput(text))
274
567
  }
275
- } else parts.push(isError ? text : cleanPlaywrightOutput(text))
276
- } else parts.push(isError ? text : cleanPlaywrightOutput(text))
568
+ }
569
+ }
570
+ if (isScreenshotTool) parts = dedupeScreenshotMarkdownLines(parts)
571
+ if (savedPaths.length > 0) {
572
+ const unique = Array.from(new Set(savedPaths))
573
+ parts.push(`Saved to: ${unique.map((p) => path.relative(cwd, p) || '.').join(', ')}`)
574
+ }
575
+ upsertBrowserSessionRecord({
576
+ sessionId: sessionKey,
577
+ profileId: profileInfo.profileId,
578
+ profileDir,
579
+ status: 'active',
580
+ lastAction: toolName,
581
+ lastError: isError ? parts.join('\n').slice(0, 1000) : null,
582
+ artifacts,
583
+ })
584
+ return parts.join('\n')
277
585
  }
586
+ const fallback = JSON.stringify(result)
587
+ upsertBrowserSessionRecord({
588
+ sessionId: sessionKey,
589
+ profileId: profileInfo.profileId,
590
+ profileDir,
591
+ status: 'active',
592
+ lastAction: toolName,
593
+ lastError: isError ? fallback.slice(0, 1000) : null,
594
+ })
595
+ return fallback
596
+ } catch (err: unknown) {
597
+ const message = err instanceof Error ? err.message : String(err)
598
+ upsertBrowserSessionRecord({
599
+ sessionId: sessionKey,
600
+ profileId: profileInfo.profileId,
601
+ profileDir,
602
+ status: 'error',
603
+ lastAction: toolName,
604
+ lastError: message,
605
+ })
606
+ return `Error: ${message}`
278
607
  }
279
- if (isScreenshotTool) parts = dedupeScreenshotMarkdownLines(parts)
280
- if (savedPaths.length > 0) {
281
- const unique = Array.from(new Set(savedPaths))
282
- parts.push(`Saved to: ${unique.map((p) => path.relative(cwd, p) || '.').join(', ')}`)
283
- }
284
- return parts.join('\n')
285
608
  }
286
- return JSON.stringify(result)
609
+
610
+ return withRetry(rawCall, undefined, {
611
+ maxAttempts: 3,
612
+ backoffMs: 1000,
613
+ retryable: [
614
+ /timed out/i,
615
+ /ERR_ABORTED/i,
616
+ /Target closed/i,
617
+ /Execution context was destroyed/i,
618
+ /SharedContextFactory already exists/i,
619
+ /ECONNRESET/i,
620
+ /temporarily unavailable/i,
621
+ ],
622
+ onRetry: async (_attempt, result) => {
623
+ if (/SharedContextFactory already exists/i.test(result)) {
624
+ cleanupSessionBrowser(sessionKey)
625
+ upsertBrowserSessionRecord({
626
+ sessionId: sessionKey,
627
+ profileId: profileInfo.profileId,
628
+ profileDir,
629
+ inheritedFromSessionId: profileInfo.inheritedFromSessionId,
630
+ status: 'idle',
631
+ lastAction: 'browser_recover',
632
+ lastError: 'Recovered browser transport after Playwright shared-context startup conflict.',
633
+ })
634
+ }
635
+ },
636
+ })
287
637
  }
288
638
 
289
639
  const dismissCookieBanners = async (mcpCall: (toolName: string, args: Record<string, unknown>) => Promise<string>) => {
@@ -298,72 +648,518 @@ export function buildWebTools(bctx: ToolBuildContext): StructuredToolInterface[]
298
648
  await mcpCall('browser_evaluate', { expression: js })
299
649
  }
300
650
 
651
+ const performFillForm = async (params: Record<string, unknown>) => {
652
+ const fields = Array.isArray(params.fields) ? params.fields : []
653
+ if (fields.length === 0) return { ok: false, error: 'fields is required for fill_form.' }
654
+ const filled: Array<Record<string, unknown>> = []
655
+ for (const field of fields) {
656
+ if (!field || typeof field !== 'object') continue
657
+ const entry = field as Record<string, unknown>
658
+ const ref = typeof entry.ref === 'string' ? entry.ref : undefined
659
+ const element = typeof entry.element === 'string' ? entry.element : undefined
660
+ const fieldType = String(entry.type || 'text').toLowerCase()
661
+ const value = entry.value
662
+ if (!ref && !element) continue
663
+ if (fieldType === 'select') {
664
+ const values = Array.isArray(value) ? value.map(String) : [String(value ?? '')]
665
+ await callMcpTool('browser_select_option', { ref, element, values })
666
+ } else if (fieldType === 'checkbox' || fieldType === 'radio') {
667
+ if (value === true || value === 'true' || value === 'on' || value === 'checked') {
668
+ await callMcpTool('browser_click', { ref, element })
669
+ }
670
+ } else {
671
+ await callMcpTool('browser_type', {
672
+ ref,
673
+ element,
674
+ text: String(value ?? ''),
675
+ slowly: fieldType === 'password' ? false : params.slowly === true,
676
+ })
677
+ }
678
+ filled.push({
679
+ ref: ref || null,
680
+ element: element || null,
681
+ type: fieldType,
682
+ value: value ?? null,
683
+ })
684
+ }
685
+ return { ok: true, filled }
686
+ }
687
+
688
+ const submitForm = async (params: Record<string, unknown>) => {
689
+ if (typeof params.submitRef === 'string' || typeof params.submitElement === 'string') {
690
+ await callMcpTool('browser_click', {
691
+ ref: typeof params.submitRef === 'string' ? params.submitRef : undefined,
692
+ element: typeof params.submitElement === 'string' ? params.submitElement : undefined,
693
+ })
694
+ } else {
695
+ await callMcpTool('browser_press_key', { key: typeof params.key === 'string' ? params.key : 'Enter' })
696
+ }
697
+
698
+ const waitMs = typeof params.waitMs === 'number' ? Math.max(250, params.waitMs) : 1000
699
+ try {
700
+ await callMcpTool('browser_evaluate', {
701
+ expression: `await new Promise(resolve => setTimeout(resolve, ${Math.min(waitMs, 5000)}))`,
702
+ })
703
+ } catch {
704
+ await new Promise((resolve) => setTimeout(resolve, waitMs))
705
+ }
706
+
707
+ return {
708
+ ok: true,
709
+ submitted: true,
710
+ page: await captureStructuredObservation(),
711
+ }
712
+ }
713
+
714
+ const scrollUntil = async (params: Record<string, unknown>) => {
715
+ const containsText = typeof params.containsText === 'string'
716
+ ? params.containsText
717
+ : typeof params.text === 'string'
718
+ ? params.text
719
+ : ''
720
+ const selector = typeof params.selector === 'string' ? params.selector : ''
721
+ if (!containsText && !selector) return { ok: false, error: 'containsText or selector is required for scroll_until.' }
722
+
723
+ const maxScrolls = typeof params.maxScrolls === 'number' ? Math.max(1, Math.min(20, params.maxScrolls)) : 8
724
+ let matchedAtStep = -1
725
+ for (let index = 0; index < maxScrolls; index += 1) {
726
+ const result = await callMcpTool('browser_evaluate', {
727
+ expression: `(() => {
728
+ const bodyText = String(document.body?.innerText || document.body?.textContent || '');
729
+ const selector = ${JSON.stringify(selector)};
730
+ const containsText = ${JSON.stringify(containsText)};
731
+ const match = (selector && !!document.querySelector(selector))
732
+ || (containsText && bodyText.includes(containsText));
733
+ if (match) return JSON.stringify({ found: true, scrollY: window.scrollY, step: ${index} });
734
+ window.scrollBy({ top: Math.max(window.innerHeight * 0.85, 600), behavior: 'instant' });
735
+ return JSON.stringify({ found: false, scrollY: window.scrollY, step: ${index} });
736
+ })()`,
737
+ })
738
+ const payload = extractJsonPayload(result)
739
+ if (payload && typeof payload === 'object' && !Array.isArray(payload) && (payload as Record<string, unknown>).found === true) {
740
+ matchedAtStep = index
741
+ break
742
+ }
743
+ }
744
+
745
+ const page = await captureStructuredObservation()
746
+ return {
747
+ ok: matchedAtStep >= 0,
748
+ found: matchedAtStep >= 0,
749
+ matchedAtStep: matchedAtStep >= 0 ? matchedAtStep : null,
750
+ page,
751
+ }
752
+ }
753
+
754
+ const resolveDownloadUrl = async (params: Record<string, unknown>) => {
755
+ if (typeof params.url === 'string' && params.url.trim()) return params.url.trim()
756
+ const linkText = typeof params.linkText === 'string' ? params.linkText.trim() : ''
757
+ const hrefContains = typeof params.hrefContains === 'string' ? params.hrefContains.trim() : ''
758
+ if (!linkText && !hrefContains) return null
759
+ const result = await callMcpTool('browser_evaluate', {
760
+ expression: `(() => {
761
+ const linkText = ${JSON.stringify(linkText)};
762
+ const hrefContains = ${JSON.stringify(hrefContains)};
763
+ const links = Array.from(document.querySelectorAll('a[href]'));
764
+ const match = links.find((link) => {
765
+ const text = String(link.innerText || link.textContent || '').trim();
766
+ const href = String(link.href || link.getAttribute('href') || '').trim();
767
+ if (!href) return false;
768
+ if (linkText && text.toLowerCase().includes(linkText.toLowerCase())) return true;
769
+ if (hrefContains && href.toLowerCase().includes(hrefContains.toLowerCase())) return true;
770
+ return false;
771
+ });
772
+ return JSON.stringify({ href: match ? (match.href || match.getAttribute('href') || '') : null });
773
+ })()`,
774
+ })
775
+ const payload = extractJsonPayload(result)
776
+ if (payload && typeof payload === 'object' && !Array.isArray(payload)) {
777
+ const href = (payload as Record<string, unknown>).href
778
+ return typeof href === 'string' && href.trim() ? href.trim() : null
779
+ }
780
+ return null
781
+ }
782
+
783
+ const downloadFile = async (params: Record<string, unknown>) => {
784
+ const downloadUrl = await resolveDownloadUrl(params)
785
+ if (!downloadUrl) return { ok: false, error: 'url, linkText, or hrefContains is required for download_file.' }
786
+
787
+ const current = await captureStructuredObservation()
788
+ let resolvedUrl = downloadUrl
789
+ if (!/^https?:\/\//i.test(resolvedUrl)) {
790
+ const base = typeof current.url === 'string' && current.url ? current.url : undefined
791
+ if (!base) return { ok: false, error: 'Relative download URL requires an active page URL.' }
792
+ resolvedUrl = new URL(resolvedUrl, base).toString()
793
+ }
794
+
795
+ const res = await fetch(resolvedUrl, {
796
+ headers: { 'User-Agent': 'Mozilla/5.0 (compatible; SwarmClaw/1.0)' },
797
+ signal: AbortSignal.timeout(30_000),
798
+ })
799
+ if (!res.ok) return { ok: false, error: `HTTP ${res.status}: ${res.statusText}`, url: resolvedUrl }
800
+
801
+ const arrayBuffer = await res.arrayBuffer()
802
+ const data = Buffer.from(arrayBuffer)
803
+ const inferredName = (() => {
804
+ try {
805
+ const pathname = new URL(resolvedUrl).pathname
806
+ const base = path.basename(pathname)
807
+ return base && base !== '/' ? base : `download-${Date.now()}`
808
+ } catch {
809
+ return `download-${Date.now()}`
810
+ }
811
+ })()
812
+ const targetPath = typeof params.saveTo === 'string' && params.saveTo.trim()
813
+ ? safePath(cwd, params.saveTo.trim())
814
+ : path.join(UPLOAD_DIR, inferredName)
815
+ fs.mkdirSync(path.dirname(targetPath), { recursive: true })
816
+ fs.writeFileSync(targetPath, data)
817
+
818
+ const artifactPath = targetPath.startsWith(UPLOAD_DIR)
819
+ ? targetPath
820
+ : path.join(UPLOAD_DIR, `${Date.now()}-${path.basename(targetPath)}`)
821
+ if (artifactPath !== targetPath) fs.copyFileSync(targetPath, artifactPath)
822
+ const filename = path.basename(artifactPath)
823
+ upsertBrowserSessionRecord({
824
+ sessionId: sessionKey,
825
+ profileId: profileInfo.profileId,
826
+ profileDir,
827
+ status: 'active',
828
+ lastAction: 'download_file',
829
+ artifacts: [{
830
+ kind: 'download',
831
+ path: artifactPath,
832
+ url: `/api/uploads/${filename}`,
833
+ filename,
834
+ createdAt: Date.now(),
835
+ }],
836
+ })
837
+
838
+ return {
839
+ ok: true,
840
+ url: resolvedUrl,
841
+ path: targetPath,
842
+ artifactUrl: `/api/uploads/${filename}`,
843
+ filename: path.basename(targetPath),
844
+ sizeBytes: data.byteLength,
845
+ contentType: res.headers.get('content-type') || null,
846
+ }
847
+ }
848
+
849
+ const verifyOutcome = async (params: Record<string, unknown>) => {
850
+ const verification: Record<string, unknown> = {}
851
+ if (typeof params.expectText === 'string' && params.expectText.trim()) {
852
+ verification.expectText = await callMcpTool('browser_verify_text_visible', { text: params.expectText.trim() })
853
+ }
854
+ if (typeof params.expectElement === 'string' && params.expectElement.trim()) {
855
+ verification.expectElement = await callMcpTool('browser_verify_element_visible', { element: params.expectElement.trim() })
856
+ }
857
+ if (typeof params.expectValue === 'string' && params.expectValue.trim()) {
858
+ verification.expectValue = await callMcpTool('browser_verify_value', {
859
+ element: typeof params.expectValueElement === 'string' ? params.expectValueElement : undefined,
860
+ value: params.expectValue.trim(),
861
+ })
862
+ }
863
+ return verification
864
+ }
865
+
866
+ const completeWebTask = async (params: Record<string, unknown>) => {
867
+ const steps: string[] = []
868
+ if (typeof params.url === 'string' && params.url.trim()) {
869
+ await callMcpTool('browser_navigate', { url: params.url.trim() })
870
+ steps.push(`navigate:${params.url.trim()}`)
871
+ try { await dismissCookieBanners(callMcpTool) } catch { /* ignore */ }
872
+ }
873
+
874
+ let initialPage = await captureStructuredObservation()
875
+ if (typeof params.scrollUntilText === 'string' || typeof params.scrollUntilSelector === 'string') {
876
+ const scroll = await scrollUntil({
877
+ containsText: typeof params.scrollUntilText === 'string' ? params.scrollUntilText : undefined,
878
+ selector: typeof params.scrollUntilSelector === 'string' ? params.scrollUntilSelector : undefined,
879
+ maxScrolls: typeof params.maxScrolls === 'number' ? params.maxScrolls : undefined,
880
+ })
881
+ steps.push('scroll_until')
882
+ if (scroll.ok) initialPage = scroll.page
883
+ }
884
+
885
+ if (Array.isArray(params.fields) && params.fields.length > 0) {
886
+ const filled = await performFillForm(params)
887
+ if (!filled.ok) return filled
888
+ steps.push('fill_form')
889
+ }
890
+
891
+ if (params.submit === true) {
892
+ await submitForm(params)
893
+ steps.push('submit_form')
894
+ }
895
+
896
+ let download: Record<string, unknown> | null = null
897
+ if (params.download === true || typeof params.downloadUrl === 'string' || typeof params.linkText === 'string' || typeof params.hrefContains === 'string') {
898
+ download = await downloadFile({
899
+ url: typeof params.downloadUrl === 'string' ? params.downloadUrl : params.url,
900
+ linkText: params.linkText,
901
+ hrefContains: params.hrefContains,
902
+ saveTo: params.saveTo,
903
+ })
904
+ steps.push('download_file')
905
+ }
906
+
907
+ const verification = await verifyOutcome(params)
908
+ const page = await captureStructuredObservation()
909
+ return {
910
+ ok: true,
911
+ goal: typeof params.goal === 'string' ? params.goal : null,
912
+ steps,
913
+ verification,
914
+ initialPage,
915
+ page,
916
+ download,
917
+ }
918
+ }
919
+
301
920
  const MCP_TOOL_MAP: Record<string, string> = {
302
- navigate: 'browser_navigate', screenshot: 'browser_take_screenshot', snapshot: 'browser_snapshot', click: 'browser_click',
303
- type: 'browser_type', press_key: 'browser_press_key', select: 'browser_select_option', evaluate: 'browser_evaluate',
304
- pdf: 'browser_pdf_save', upload: 'browser_file_upload', wait: 'browser_wait_for',
921
+ navigate: 'browser_navigate',
922
+ back: 'browser_navigate_back',
923
+ close: 'browser_close',
924
+ screenshot: 'browser_take_screenshot',
925
+ snapshot: 'browser_snapshot',
926
+ click: 'browser_click',
927
+ hover: 'browser_hover',
928
+ type: 'browser_type',
929
+ press_key: 'browser_press_key',
930
+ select: 'browser_select_option',
931
+ fill_form: 'browser_fill_form',
932
+ dialog: 'browser_handle_dialog',
933
+ evaluate: 'browser_evaluate',
934
+ run_code: 'browser_run_code',
935
+ pdf: 'browser_pdf_save',
936
+ upload: 'browser_file_upload',
937
+ wait: 'browser_wait_for',
938
+ tabs: 'browser_tabs',
939
+ network: 'browser_network_requests',
940
+ verify_text: 'browser_verify_text_visible',
941
+ verify_element: 'browser_verify_element_visible',
942
+ verify_list: 'browser_verify_list_visible',
943
+ verify_value: 'browser_verify_value',
305
944
  }
306
945
 
307
946
  tools.push(
308
947
  tool(
309
- async (params) => {
948
+ async (rawParams) => {
949
+ const params = normalizeToolInputArgs((rawParams ?? {}) as Record<string, unknown>)
310
950
  try {
311
- const { action, ...rest } = params
951
+ const action = String(params.action || '').trim()
952
+
953
+ if (action === 'profile') {
954
+ const state = upsertBrowserSessionRecord({
955
+ sessionId: sessionKey,
956
+ profileId: profileInfo.profileId,
957
+ profileDir,
958
+ inheritedFromSessionId: profileInfo.inheritedFromSessionId,
959
+ status: activeBrowsers.has(sessionKey) ? 'active' : 'idle',
960
+ })
961
+ return stringifyStructured({
962
+ sessionId: sessionKey,
963
+ active: activeBrowsers.has(sessionKey),
964
+ profileId: state.profileId,
965
+ profileDir: state.profileDir,
966
+ inheritedFromSessionId: state.inheritedFromSessionId,
967
+ currentUrl: state.currentUrl,
968
+ pageTitle: state.pageTitle,
969
+ lastObservation: state.lastObservation,
970
+ })
971
+ }
972
+
973
+ if (action === 'reset_profile') {
974
+ cleanupSessionBrowser(sessionKey)
975
+ fs.rmSync(profileDir, { recursive: true, force: true })
976
+ removeBrowserSessionRecord(sessionKey)
977
+ return stringifyStructured({
978
+ ok: true,
979
+ sessionId: sessionKey,
980
+ profileId: profileInfo.profileId,
981
+ profileDir,
982
+ reset: true,
983
+ })
984
+ }
985
+
986
+ if (action === 'read_page') {
987
+ const url = typeof params.url === 'string' ? params.url : ''
988
+ if (url) {
989
+ await callMcpTool('browser_navigate', { url })
990
+ try { await dismissCookieBanners(callMcpTool) } catch { /* ignore */ }
991
+ }
992
+ return stringifyStructured(await captureStructuredObservation())
993
+ }
994
+
995
+ if (action === 'extract_links') {
996
+ const observation = await captureStructuredObservation() as Record<string, unknown>
997
+ return stringifyStructured({
998
+ url: observation.url || null,
999
+ title: observation.title || null,
1000
+ links: Array.isArray(observation.links) ? observation.links : [],
1001
+ })
1002
+ }
1003
+
1004
+ if (action === 'extract_form_fields') {
1005
+ const observation = await captureStructuredObservation() as Record<string, unknown>
1006
+ return stringifyStructured({
1007
+ url: observation.url || null,
1008
+ title: observation.title || null,
1009
+ forms: Array.isArray(observation.forms) ? observation.forms : [],
1010
+ })
1011
+ }
1012
+
1013
+ if (action === 'extract_table') {
1014
+ const observation = await captureStructuredObservation() as Record<string, unknown>
1015
+ const tables = Array.isArray(observation.tables) ? observation.tables : []
1016
+ const tableIndex = typeof params.tableIndex === 'number' ? params.tableIndex : 0
1017
+ return stringifyStructured({
1018
+ url: observation.url || null,
1019
+ title: observation.title || null,
1020
+ table: tables[tableIndex] || null,
1021
+ tables,
1022
+ })
1023
+ }
1024
+
1025
+ if (action === 'fill_form') {
1026
+ const filled = await performFillForm(params)
1027
+ if (!filled.ok) return `Error: ${filled.error}`
1028
+ if (params.submit === true) {
1029
+ await submitForm(params)
1030
+ }
1031
+ return stringifyStructured({
1032
+ ok: true,
1033
+ filled: filled.filled,
1034
+ submitted: params.submit === true,
1035
+ page: await captureStructuredObservation(),
1036
+ })
1037
+ }
1038
+
1039
+ if (action === 'submit_form') {
1040
+ return stringifyStructured(await submitForm(params))
1041
+ }
1042
+
1043
+ if (action === 'scroll_until') {
1044
+ return stringifyStructured(await scrollUntil(params))
1045
+ }
1046
+
1047
+ if (action === 'download_file') {
1048
+ return stringifyStructured(await downloadFile(params))
1049
+ }
1050
+
1051
+ if (action === 'complete_web_task') {
1052
+ return stringifyStructured(await completeWebTask(params))
1053
+ }
1054
+
312
1055
  const mcpTool = MCP_TOOL_MAP[action]
313
1056
  if (!mcpTool) return `Unknown browser action: "${action}"`
314
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
1057
+ const rest = { ...params }
1058
+ delete rest.action
315
1059
  const args: Record<string, any> = {}
316
- for (const [k, v] of Object.entries(rest)) { if (v !== undefined && v !== null && v !== '') args[k] = v }
1060
+ for (const [k, v] of Object.entries(rest)) {
1061
+ if (v !== undefined && v !== null && v !== '') args[k] = v
1062
+ }
317
1063
 
318
- // If screenshot includes a url, navigate first then capture
319
- if (action === 'screenshot' && args.url) {
1064
+ if (action === 'tabs') {
1065
+ args.action = typeof params.tabAction === 'string' ? params.tabAction : 'list'
1066
+ delete args.tabAction
1067
+ }
1068
+ if (action === 'network') {
1069
+ args.includeStatic = params.includeStatic === true
1070
+ if (typeof params.filename !== 'string') delete args.filename
1071
+ }
1072
+ if (action === 'select' && args.option !== undefined) {
1073
+ args.values = Array.isArray(args.option) ? args.option : [String(args.option)]
1074
+ delete args.option
1075
+ }
1076
+
1077
+ if ((action === 'screenshot' || action === 'snapshot') && args.url) {
320
1078
  const navUrl = args.url
321
1079
  delete args.url
322
1080
  await callMcpTool('browser_navigate', { url: navUrl })
323
1081
  try { await dismissCookieBanners(callMcpTool) } catch { /* ignore */ }
324
1082
  }
325
1083
 
326
- // Wait for the page to finish rendering before capturing
327
- if (action === 'screenshot') {
1084
+ if (action === 'screenshot' || action === 'snapshot') {
328
1085
  try {
329
1086
  await callMcpTool('browser_evaluate', {
330
1087
  expression: `await new Promise(resolve => {
331
1088
  if (document.readyState === 'complete') {
332
- setTimeout(resolve, 1500);
1089
+ setTimeout(resolve, 1200);
333
1090
  } else {
334
- window.addEventListener('load', () => setTimeout(resolve, 1500), { once: true });
1091
+ window.addEventListener('load', () => setTimeout(resolve, 1200), { once: true });
335
1092
  setTimeout(resolve, 5000);
336
1093
  }
337
1094
  })`,
338
1095
  })
339
- } catch { /* page may not support evaluate — fall back to a flat delay */
340
- await new Promise((r) => setTimeout(r, 2000))
1096
+ } catch {
1097
+ await new Promise((r) => setTimeout(r, 1200))
341
1098
  }
342
1099
  }
343
1100
 
344
- let result = await callMcpTool(mcpTool, args, { saveTo: params.saveTo })
345
-
346
- // Playwright throws ERR_ABORTED on server-side redirects (e.g. Wikipedia Special:Random).
347
- // The browser follows the redirect fine — the original navigation just gets "aborted".
348
- // Recover by taking a snapshot of the page the browser actually landed on.
1101
+ let result = await callMcpTool(mcpTool, args, { saveTo: typeof params.saveTo === 'string' ? params.saveTo : undefined })
349
1102
  if (action === 'navigate' && result.includes('ERR_ABORTED')) {
350
1103
  await new Promise((r) => setTimeout(r, 1000))
351
1104
  result = await callMcpTool('browser_snapshot', {})
352
1105
  }
1106
+ if (action === 'navigate') {
1107
+ try { await dismissCookieBanners(callMcpTool) } catch { /* ignore */ }
1108
+ }
1109
+
1110
+ if (['navigate', 'back', 'click', 'type', 'select', 'fill_form', 'submit_form', 'press_key', 'scroll_until', 'complete_web_task'].includes(action)) {
1111
+ try { await captureStructuredObservation() } catch { /* ignore */ }
1112
+ }
1113
+
1114
+ if (action === 'close') {
1115
+ cleanupSessionBrowser(sessionKey)
1116
+ }
353
1117
 
354
- if (action === 'navigate') { try { await dismissCookieBanners(callMcpTool) } catch { /* ignore */ } }
355
1118
  return result
356
- } catch (err: unknown) { return `Error: ${err instanceof Error ? err.message : String(err)}` }
1119
+ } catch (err: unknown) {
1120
+ return `Error: ${err instanceof Error ? err.message : String(err)}`
1121
+ }
357
1122
  },
358
1123
  {
359
1124
  name: 'browser',
360
- description: 'Control the browser. Actions: navigate, screenshot, snapshot, click, type, press_key, select, evaluate, pdf, upload, wait.',
1125
+ description: 'Control a persistent browser profile. Supports low-level actions plus higher-level workflows like read_page, extract_links, extract_form_fields, extract_table, fill_form, submit_form, scroll_until, download_file, complete_web_task, profile, and reset_profile.',
361
1126
  schema: z.object({
362
- action: z.enum(['navigate', 'screenshot', 'snapshot', 'click', 'type', 'press_key', 'select', 'evaluate', 'pdf', 'upload', 'wait']),
363
- url: z.string().optional(), element: z.string().optional(), ref: z.string().optional(), text: z.string().optional(),
364
- key: z.string().optional(), option: z.string().optional(), expression: z.string().optional(),
365
- paths: z.array(z.string()).optional(), timeout: z.number().optional(), saveTo: z.string().optional(),
366
- }),
1127
+ action: z.enum([
1128
+ 'navigate',
1129
+ 'back',
1130
+ 'close',
1131
+ 'screenshot',
1132
+ 'snapshot',
1133
+ 'click',
1134
+ 'hover',
1135
+ 'type',
1136
+ 'fill_form',
1137
+ 'submit_form',
1138
+ 'scroll_until',
1139
+ 'press_key',
1140
+ 'select',
1141
+ 'dialog',
1142
+ 'evaluate',
1143
+ 'run_code',
1144
+ 'pdf',
1145
+ 'upload',
1146
+ 'wait',
1147
+ 'tabs',
1148
+ 'network',
1149
+ 'read_page',
1150
+ 'extract_links',
1151
+ 'extract_form_fields',
1152
+ 'extract_table',
1153
+ 'download_file',
1154
+ 'complete_web_task',
1155
+ 'verify_text',
1156
+ 'verify_element',
1157
+ 'verify_list',
1158
+ 'verify_value',
1159
+ 'profile',
1160
+ 'reset_profile',
1161
+ ]),
1162
+ }).passthrough(),
367
1163
  },
368
1164
  ),
369
1165
  )