@farazirfan/costar-server-executor 1.7.30 → 1.7.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/dist/agent/agent.d.ts +105 -0
  2. package/dist/agent/agent.d.ts.map +1 -1
  3. package/dist/agent/agent.js +535 -1
  4. package/dist/agent/agent.js.map +1 -1
  5. package/dist/agent/model-fallback.d.ts.map +1 -1
  6. package/dist/agent/model-fallback.js +14 -0
  7. package/dist/agent/model-fallback.js.map +1 -1
  8. package/dist/agent/pi-embedded-runner/compact.d.ts +7 -0
  9. package/dist/agent/pi-embedded-runner/compact.d.ts.map +1 -1
  10. package/dist/agent/pi-embedded-runner/compact.js +100 -4
  11. package/dist/agent/pi-embedded-runner/compact.js.map +1 -1
  12. package/dist/agent/pi-embedded-runner/run.d.ts +7 -0
  13. package/dist/agent/pi-embedded-runner/run.d.ts.map +1 -1
  14. package/dist/agent/pi-embedded-runner/run.js +362 -122
  15. package/dist/agent/pi-embedded-runner/run.js.map +1 -1
  16. package/dist/agent/pi-embedded-runner/subscribe.d.ts +10 -0
  17. package/dist/agent/pi-embedded-runner/subscribe.d.ts.map +1 -1
  18. package/dist/agent/pi-embedded-runner/subscribe.js +20 -0
  19. package/dist/agent/pi-embedded-runner/subscribe.js.map +1 -1
  20. package/dist/agent/pi-embedded-runner/tool-result-context-guard.d.ts +33 -0
  21. package/dist/agent/pi-embedded-runner/tool-result-context-guard.d.ts.map +1 -0
  22. package/dist/agent/pi-embedded-runner/tool-result-context-guard.js +287 -0
  23. package/dist/agent/pi-embedded-runner/tool-result-context-guard.js.map +1 -0
  24. package/dist/agent/pi-embedded-runner/tools.d.ts +8 -1
  25. package/dist/agent/pi-embedded-runner/tools.d.ts.map +1 -1
  26. package/dist/agent/pi-embedded-runner/tools.js +10 -2
  27. package/dist/agent/pi-embedded-runner/tools.js.map +1 -1
  28. package/dist/agent/pi-embedded-runner/types.d.ts +11 -0
  29. package/dist/agent/pi-embedded-runner/types.d.ts.map +1 -1
  30. package/dist/api/chat.d.ts.map +1 -1
  31. package/dist/api/chat.js +26 -0
  32. package/dist/api/chat.js.map +1 -1
  33. package/dist/browser/cdp-helpers.d.ts +13 -0
  34. package/dist/browser/cdp-helpers.d.ts.map +1 -0
  35. package/dist/browser/cdp-helpers.js +179 -0
  36. package/dist/browser/cdp-helpers.js.map +1 -0
  37. package/dist/browser/cdp.d.ts.map +1 -1
  38. package/dist/browser/cdp.helpers.d.ts +5 -2
  39. package/dist/browser/cdp.helpers.d.ts.map +1 -1
  40. package/dist/browser/cdp.helpers.js +70 -34
  41. package/dist/browser/cdp.helpers.js.map +1 -1
  42. package/dist/browser/cdp.js +30 -15
  43. package/dist/browser/cdp.js.map +1 -1
  44. package/dist/browser/chrome-executables.d.ts +13 -0
  45. package/dist/browser/chrome-executables.d.ts.map +1 -0
  46. package/dist/browser/chrome-executables.js +559 -0
  47. package/dist/browser/chrome-executables.js.map +1 -0
  48. package/dist/browser/chrome-profile-decoration.d.ts +11 -0
  49. package/dist/browser/chrome-profile-decoration.d.ts.map +1 -0
  50. package/dist/browser/chrome-profile-decoration.js +148 -0
  51. package/dist/browser/chrome-profile-decoration.js.map +1 -0
  52. package/dist/browser/chrome.d.ts +4 -4
  53. package/dist/browser/chrome.d.ts.map +1 -1
  54. package/dist/browser/chrome.executables.d.ts.map +1 -1
  55. package/dist/browser/chrome.executables.js +98 -49
  56. package/dist/browser/chrome.executables.js.map +1 -1
  57. package/dist/browser/chrome.js +72 -28
  58. package/dist/browser/chrome.js.map +1 -1
  59. package/dist/browser/chrome.profile-decoration.d.ts +1 -1
  60. package/dist/browser/chrome.profile-decoration.d.ts.map +1 -1
  61. package/dist/browser/chrome.profile-decoration.js +11 -8
  62. package/dist/browser/chrome.profile-decoration.js.map +1 -1
  63. package/dist/browser/config.d.ts +10 -13
  64. package/dist/browser/config.d.ts.map +1 -1
  65. package/dist/browser/config.js +3 -178
  66. package/dist/browser/config.js.map +1 -1
  67. package/dist/browser/constants.d.ts +3 -3
  68. package/dist/browser/constants.d.ts.map +1 -1
  69. package/dist/browser/constants.js +3 -3
  70. package/dist/browser/constants.js.map +1 -1
  71. package/dist/browser/index.d.ts +19 -0
  72. package/dist/browser/index.d.ts.map +1 -0
  73. package/dist/browser/index.js +29 -0
  74. package/dist/browser/index.js.map +1 -0
  75. package/dist/browser/pw-ai-module.d.ts.map +1 -1
  76. package/dist/browser/pw-ai-module.js +29 -6
  77. package/dist/browser/pw-ai-module.js.map +1 -1
  78. package/dist/browser/pw-ai-state.d.ts +3 -0
  79. package/dist/browser/pw-ai-state.d.ts.map +1 -0
  80. package/dist/browser/pw-ai-state.js +8 -0
  81. package/dist/browser/pw-ai-state.js.map +1 -0
  82. package/dist/browser/pw-ai.d.ts +1 -1
  83. package/dist/browser/pw-ai.d.ts.map +1 -1
  84. package/dist/browser/pw-ai.js +3 -1
  85. package/dist/browser/pw-ai.js.map +1 -1
  86. package/dist/browser/pw-role-snapshot.d.ts.map +1 -1
  87. package/dist/browser/pw-role-snapshot.js +77 -42
  88. package/dist/browser/pw-role-snapshot.js.map +1 -1
  89. package/dist/browser/pw-session.d.ts +33 -1
  90. package/dist/browser/pw-session.d.ts.map +1 -1
  91. package/dist/browser/pw-session.js +257 -48
  92. package/dist/browser/pw-session.js.map +1 -1
  93. package/dist/browser/pw-tools-core.activity.d.ts.map +1 -1
  94. package/dist/browser/pw-tools-core.activity.js +4 -2
  95. package/dist/browser/pw-tools-core.activity.js.map +1 -1
  96. package/dist/browser/pw-tools-core.downloads.d.ts.map +1 -1
  97. package/dist/browser/pw-tools-core.downloads.js +74 -42
  98. package/dist/browser/pw-tools-core.downloads.js.map +1 -1
  99. package/dist/browser/pw-tools-core.interactions.d.ts +8 -1
  100. package/dist/browser/pw-tools-core.interactions.d.ts.map +1 -1
  101. package/dist/browser/pw-tools-core.interactions.js +139 -34
  102. package/dist/browser/pw-tools-core.interactions.js.map +1 -1
  103. package/dist/browser/pw-tools-core.responses.d.ts.map +1 -1
  104. package/dist/browser/pw-tools-core.responses.js +19 -10
  105. package/dist/browser/pw-tools-core.responses.js.map +1 -1
  106. package/dist/browser/pw-tools-core.shared.d.ts.map +1 -1
  107. package/dist/browser/pw-tools-core.shared.js +2 -1
  108. package/dist/browser/pw-tools-core.shared.js.map +1 -1
  109. package/dist/browser/pw-tools-core.snapshot.d.ts.map +1 -1
  110. package/dist/browser/pw-tools-core.snapshot.js +2 -1
  111. package/dist/browser/pw-tools-core.snapshot.js.map +1 -1
  112. package/dist/browser/pw-tools-core.state.d.ts.map +1 -1
  113. package/dist/browser/pw-tools-core.state.js +13 -7
  114. package/dist/browser/pw-tools-core.state.js.map +1 -1
  115. package/dist/browser/pw-tools-core.storage.d.ts.map +1 -1
  116. package/dist/browser/pw-tools-core.storage.js +6 -3
  117. package/dist/browser/pw-tools-core.storage.js.map +1 -1
  118. package/dist/browser/target-id.d.ts.map +1 -1
  119. package/dist/browser/target-id.js +8 -4
  120. package/dist/browser/target-id.js.map +1 -1
  121. package/dist/cli.js +1 -1
  122. package/dist/cron/index.d.ts +1 -1
  123. package/dist/cron/index.d.ts.map +1 -1
  124. package/dist/cron/schedule.d.ts +46 -0
  125. package/dist/cron/schedule.d.ts.map +1 -0
  126. package/dist/cron/schedule.js +109 -0
  127. package/dist/cron/schedule.js.map +1 -0
  128. package/dist/cron/scheduler.d.ts +86 -40
  129. package/dist/cron/scheduler.d.ts.map +1 -1
  130. package/dist/cron/scheduler.js +525 -159
  131. package/dist/cron/scheduler.js.map +1 -1
  132. package/dist/cron/types.d.ts +16 -0
  133. package/dist/cron/types.d.ts.map +1 -1
  134. package/dist/heartbeat/runner.d.ts +9 -4
  135. package/dist/heartbeat/runner.d.ts.map +1 -1
  136. package/dist/heartbeat/runner.js +116 -48
  137. package/dist/heartbeat/runner.js.map +1 -1
  138. package/dist/sandbox/browser-container.d.ts +22 -0
  139. package/dist/sandbox/browser-container.d.ts.map +1 -0
  140. package/dist/sandbox/browser-container.js +141 -0
  141. package/dist/sandbox/browser-container.js.map +1 -0
  142. package/dist/sandbox/constants.d.ts +19 -0
  143. package/dist/sandbox/constants.d.ts.map +1 -0
  144. package/dist/sandbox/constants.js +19 -0
  145. package/dist/sandbox/constants.js.map +1 -0
  146. package/dist/sandbox/index.d.ts +8 -0
  147. package/dist/sandbox/index.d.ts.map +1 -0
  148. package/dist/sandbox/index.js +7 -0
  149. package/dist/sandbox/index.js.map +1 -0
  150. package/dist/sandbox/types.d.ts +32 -0
  151. package/dist/sandbox/types.d.ts.map +1 -0
  152. package/dist/sandbox/types.js +5 -0
  153. package/dist/sandbox/types.js.map +1 -0
  154. package/dist/server.d.ts +1 -1
  155. package/dist/server.d.ts.map +1 -1
  156. package/dist/server.js +24 -3
  157. package/dist/server.js.map +1 -1
  158. package/dist/supabase/cron-jobs.d.ts.map +1 -1
  159. package/dist/supabase/cron-jobs.js +16 -6
  160. package/dist/supabase/cron-jobs.js.map +1 -1
  161. package/dist/tools/browser.d.ts +5 -3
  162. package/dist/tools/browser.d.ts.map +1 -1
  163. package/dist/tools/browser.js +361 -1360
  164. package/dist/tools/browser.js.map +1 -1
  165. package/dist/web-server.d.ts.map +1 -1
  166. package/dist/web-server.js +10 -0
  167. package/dist/web-server.js.map +1 -1
  168. package/package.json +7 -4
  169. package/public/index.html +324 -56
  170. package/scripts/postinstall.js +81 -1
  171. package/scripts/sandbox-browser-entrypoint.sh +67 -0
  172. package/scripts/sandbox-browser-setup.sh +7 -0
@@ -1,78 +1,55 @@
1
1
  /**
2
- * CoStar Server Executor - Complete Browser Tool
3
- * Replicates OpenClaw's browser-tool.ts behavior using direct Playwright
2
+ * CoStar Server Executor - Browser Tool
4
3
  *
5
- * Implements all 15 actions with AI snapshots, labeled screenshots, and full automation
4
+ * Same architecture as OpenClaw: Chrome child process + CDP connection.
5
+ * All pw-* functions receive { cdpUrl, targetId } — identical to OpenClaw.
6
6
  */
7
7
  import { Type } from "@sinclair/typebox";
8
- import { chromium } from "playwright";
9
8
  import { jsonResult, readStringParam, readBooleanParam, readNumberParam, readArrayParam, } from "../utils/tool-helpers.js";
10
- // OpenClaw exact BROWSER_ACT_KINDS (from browser-tool.schema.ts)
9
+ // ─── OpenClaw browser modules (verbatim copy) ──────────────────────
10
+ import {
11
+ // Session state
12
+ ensurePageState, getPageForTargetId, closePlaywrightBrowserConnection, listPagesViaPlaywright, createPageViaPlaywright, closePageByTargetIdViaPlaywright, focusPageByTargetIdViaPlaywright,
13
+ // Interactions (all take { cdpUrl, targetId })
14
+ clickViaPlaywright, hoverViaPlaywright, dragViaPlaywright, selectOptionViaPlaywright, pressKeyViaPlaywright, typeViaPlaywright, fillFormViaPlaywright, evaluateViaPlaywright, waitForViaPlaywright, takeScreenshotViaPlaywright, screenshotWithLabelsViaPlaywright,
15
+ // Snapshots & navigation
16
+ snapshotRoleViaPlaywright, navigateViaPlaywright, resizeViewportViaPlaywright, pdfViaPlaywright,
17
+ // Downloads & uploads
18
+ armFileUploadViaPlaywright, armDialogViaPlaywright,
19
+ // Process isolation (Chrome child process + CDP)
20
+ launchCostarChrome, stopCostarChrome, } from "../browser/index.js";
21
+ // ─── Sandbox browser (Docker container) ─────────────────────────────
22
+ import { startSandboxBrowser, stopSandboxBrowser, } from "../sandbox/index.js";
23
+ // ─── Schema definitions (OpenClaw exact) ────────────────────────────
11
24
  const BROWSER_ACT_KINDS = [
12
- "click",
13
- "type",
14
- "press",
15
- "hover",
16
- "drag",
17
- "select",
18
- "fill",
19
- "resize",
20
- "wait",
21
- "evaluate",
22
- "close",
25
+ "click", "type", "press", "hover", "drag",
26
+ "select", "fill", "resize", "wait", "evaluate", "close",
23
27
  ];
24
- // OpenClaw exact BROWSER_TOOL_ACTIONS (from browser-tool.schema.ts)
25
28
  const BROWSER_TOOL_ACTIONS = [
26
- "status",
27
- "start",
28
- "stop",
29
- "profiles",
30
- "tabs",
31
- "open",
32
- "focus",
33
- "close",
34
- "snapshot",
35
- "screenshot",
36
- "navigate",
37
- "console",
38
- "pdf",
39
- "upload",
40
- "dialog",
41
- "act",
29
+ "status", "start", "stop", "profiles", "tabs", "open", "focus", "close",
30
+ "snapshot", "screenshot", "navigate", "console", "pdf", "upload", "dialog", "act",
42
31
  ];
43
- // OpenClaw exact BrowserActSchema (from browser-tool.schema.ts)
44
32
  const BrowserActSchema = Type.Object({
45
33
  kind: Type.Union(BROWSER_ACT_KINDS.map(k => Type.Literal(k))),
46
- // Common fields
47
34
  targetId: Type.Optional(Type.String()),
48
35
  ref: Type.Optional(Type.String()),
49
- // click
50
36
  doubleClick: Type.Optional(Type.Boolean()),
51
37
  button: Type.Optional(Type.String()),
52
38
  modifiers: Type.Optional(Type.Array(Type.String())),
53
- // type
54
39
  text: Type.Optional(Type.String()),
55
40
  submit: Type.Optional(Type.Boolean()),
56
41
  slowly: Type.Optional(Type.Boolean()),
57
- // press
58
42
  key: Type.Optional(Type.String()),
59
- // drag
60
43
  startRef: Type.Optional(Type.String()),
61
44
  endRef: Type.Optional(Type.String()),
62
- // select
63
45
  values: Type.Optional(Type.Array(Type.String())),
64
- // fill - use permissive array of objects
65
46
  fields: Type.Optional(Type.Array(Type.Object({}, { additionalProperties: true }))),
66
- // resize
67
47
  width: Type.Optional(Type.Number()),
68
48
  height: Type.Optional(Type.Number()),
69
- // wait
70
49
  timeMs: Type.Optional(Type.Number()),
71
50
  textGone: Type.Optional(Type.String()),
72
- // evaluate
73
51
  fn: Type.Optional(Type.String()),
74
52
  });
75
- // OpenClaw exact BrowserToolSchema (from browser-tool.schema.ts)
76
53
  const BrowserSchema = Type.Object({
77
54
  action: Type.Union(BROWSER_TOOL_ACTIONS.map(a => Type.Literal(a))),
78
55
  profile: Type.Optional(Type.String()),
@@ -80,8 +57,8 @@ const BrowserSchema = Type.Object({
80
57
  targetId: Type.Optional(Type.String()),
81
58
  limit: Type.Optional(Type.Number()),
82
59
  maxChars: Type.Optional(Type.Number()),
83
- mode: Type.Optional(Type.String()), // "efficient"
84
- refs: Type.Optional(Type.String()), // "role" or "aria"
60
+ mode: Type.Optional(Type.String()),
61
+ refs: Type.Optional(Type.String()),
85
62
  interactive: Type.Optional(Type.Boolean()),
86
63
  compact: Type.Optional(Type.Boolean()),
87
64
  depth: Type.Optional(Type.Number()),
@@ -91,7 +68,7 @@ const BrowserSchema = Type.Object({
91
68
  fullPage: Type.Optional(Type.Boolean()),
92
69
  ref: Type.Optional(Type.String()),
93
70
  element: Type.Optional(Type.String()),
94
- type: Type.Optional(Type.String()), // "png" or "jpeg"
71
+ type: Type.Optional(Type.String()),
95
72
  level: Type.Optional(Type.String()),
96
73
  paths: Type.Optional(Type.Array(Type.String())),
97
74
  inputRef: Type.Optional(Type.String()),
@@ -100,1076 +77,308 @@ const BrowserSchema = Type.Object({
100
77
  promptText: Type.Optional(Type.String()),
101
78
  request: Type.Optional(BrowserActSchema),
102
79
  });
103
- // OpenClaw's INTERACTIVE_ROLES - elements that get refs
104
- const INTERACTIVE_ROLES = new Set([
105
- "button",
106
- "link",
107
- "textbox",
108
- "checkbox",
109
- "radio",
110
- "combobox",
111
- "listbox",
112
- "menuitem",
113
- "menuitemcheckbox",
114
- "menuitemradio",
115
- "option",
116
- "searchbox",
117
- "slider",
118
- "spinbutton",
119
- "switch",
120
- "tab",
121
- "treeitem",
122
- ]);
123
- const CONTENT_ROLES = new Set([
124
- "heading",
125
- "cell",
126
- "gridcell",
127
- "columnheader",
128
- "rowheader",
129
- "listitem",
130
- "article",
131
- "region",
132
- "main",
133
- "navigation",
134
- ]);
135
- /**
136
- * Parse Playwright's ariaSnapshot output and build refs map (OpenClaw style)
137
- */
138
- function buildRoleSnapshotFromAriaSnapshot(ariaSnapshot, options = {}) {
139
- const lines = ariaSnapshot.split("\n");
140
- const refs = {};
141
- // Track role+name combinations for nth handling
142
- const counts = new Map();
143
- const refsByKey = new Map();
144
- const getKey = (role, name) => `${role}:${name ?? ""}`;
145
- let counter = 0;
146
- const nextRef = () => {
147
- counter += 1;
148
- return `e${counter}`;
149
- };
150
- const getIndentLevel = (line) => {
151
- const match = line.match(/^(\s*)/);
152
- return match ? Math.floor(match[1].length / 2) : 0;
153
- };
154
- const result = [];
155
- for (const line of lines) {
156
- const depth = getIndentLevel(line);
157
- if (options.maxDepth !== undefined && depth > options.maxDepth)
158
- continue;
159
- // Parse line: "- role "name" [other stuff]"
160
- const match = line.match(/^(\s*-\s*)(\w+)(?:\s+"([^"]*)")?(.*)$/);
161
- if (!match) {
162
- if (!options.interactive)
163
- result.push(line);
164
- continue;
165
- }
166
- const [, prefix, roleRaw, name, suffix] = match;
167
- if (roleRaw.startsWith("/")) {
168
- if (!options.interactive)
169
- result.push(line);
170
- continue;
171
- }
172
- const role = roleRaw.toLowerCase();
173
- const isInteractive = INTERACTIVE_ROLES.has(role);
174
- const isContent = CONTENT_ROLES.has(role);
175
- // If interactive-only mode, skip non-interactive
176
- if (options.interactive && !isInteractive)
177
- continue;
178
- // Determine if this element should have a ref
179
- const shouldHaveRef = isInteractive || (isContent && name);
180
- if (!shouldHaveRef) {
181
- if (!options.interactive)
182
- result.push(line);
183
- continue;
184
- }
185
- const ref = nextRef();
186
- const key = getKey(role, name);
187
- const nth = counts.get(key) ?? 0;
188
- counts.set(key, nth + 1);
189
- const keyRefs = refsByKey.get(key) ?? [];
190
- keyRefs.push(ref);
191
- refsByKey.set(key, keyRefs);
192
- refs[ref] = { role, name, nth };
193
- // Build enhanced line with ref
194
- let enhanced = `${prefix}${roleRaw}`;
195
- if (name)
196
- enhanced += ` "${name}"`;
197
- enhanced += ` [ref=${ref}]`;
198
- if (nth > 0)
199
- enhanced += ` [nth=${nth}]`;
200
- if (suffix && suffix.includes("["))
201
- enhanced += suffix;
202
- result.push(enhanced);
203
- }
204
- // Remove nth from non-duplicates
205
- for (const [_key, keyRefs] of refsByKey) {
206
- if (keyRefs.length === 1) {
207
- const ref = keyRefs[0];
208
- if (refs[ref])
209
- delete refs[ref].nth;
210
- }
211
- }
212
- return {
213
- snapshot: result.join("\n") || "(empty)",
214
- refs,
215
- };
216
- }
217
- // OpenClaw-style WeakMap to store state per Page object
218
- const pageStates = new WeakMap();
219
- // Global cache by targetId (backup for when Playwright returns different Page object)
220
- const roleRefsByTarget = new Map();
221
- const MAX_ROLE_REFS_CACHE = 50;
222
- // Ensure page has a state object (OpenClaw-style)
223
- function ensurePageState(page) {
224
- const existing = pageStates.get(page);
225
- if (existing)
226
- return existing;
227
- const newState = {
228
- console: [],
229
- errors: [],
230
- requests: [],
231
- };
232
- pageStates.set(page, newState);
233
- return newState;
234
- }
235
- // Global browser state (singleton)
80
+ const DEFAULT_CDP_PORT = 18800;
236
81
  const state = {
237
- browser: null,
238
- context: null,
239
- pages: new Map(),
240
- consoleLogs: new Map(),
241
- networkRequests: new Map(),
242
- pageErrors: new Map(),
243
82
  running: false,
244
- connectionMode: "launched",
83
+ cdpUrl: "",
84
+ mode: "local",
245
85
  };
246
- /**
247
- * Get the page to use for an action, tracking last active (OpenClaw-style)
248
- */
249
- function getPageForAction(targetId) {
250
- if (targetId) {
251
- const existingPage = state.pages.get(targetId);
252
- if (!existingPage) {
253
- throw new Error(`Tab ${targetId} not found. Run action=tabs to see available tabs.`);
254
- }
255
- state.lastActiveTargetId = targetId;
256
- return { page: existingPage, usedTargetId: targetId };
257
- }
258
- if (state.pages.size === 0) {
259
- throw new Error("No tabs open. Use action=open to open a URL first.");
260
- }
261
- // Prefer last active tab (the one most recently used for snapshot/action)
262
- if (state.lastActiveTargetId && state.pages.has(state.lastActiveTargetId)) {
263
- const page = state.pages.get(state.lastActiveTargetId);
264
- return { page, usedTargetId: state.lastActiveTargetId };
265
- }
266
- // Fall back to first tab
267
- const firstEntry = Array.from(state.pages.entries())[0];
268
- state.lastActiveTargetId = firstEntry[0];
269
- return { page: firstEntry[1], usedTargetId: firstEntry[0] };
270
- }
271
- /**
272
- * Restore role refs for a target (OpenClaw-style)
273
- * This ensures refs are available for the page even if they were stored under a different targetId
274
- */
275
- /**
276
- * Store role refs for a target (OpenClaw-style)
277
- * Stores BOTH on page's WeakMap state AND in global cache
278
- */
279
- function storeRoleRefsForTarget(opts) {
280
- // Store on page's WeakMap state
281
- const pageState = ensurePageState(opts.page);
282
- pageState.roleRefs = opts.refs;
283
- pageState.roleRefsFrameSelector = opts.frameSelector;
284
- pageState.roleRefsMode = opts.mode;
285
- // Store in global cache by targetId (for backup)
286
- const targetId = opts.targetId?.trim();
287
- if (!targetId)
288
- return;
289
- roleRefsByTarget.set(targetId, {
290
- refs: opts.refs,
291
- ...(opts.frameSelector ? { frameSelector: opts.frameSelector } : {}),
292
- mode: opts.mode,
86
+ // ─── Browser lifecycle (OpenClaw pattern: always Chrome child process) ──
87
+ async function isDockerAvailable() {
88
+ const { execFile } = await import("node:child_process");
89
+ return new Promise((resolve) => {
90
+ execFile("docker", ["info"], { timeout: 5000 }, (err) => resolve(!err));
293
91
  });
294
- // Limit cache size
295
- while (roleRefsByTarget.size > MAX_ROLE_REFS_CACHE) {
296
- const first = roleRefsByTarget.keys().next();
297
- if (first.done)
298
- break;
299
- roleRefsByTarget.delete(first.value);
300
- }
301
- console.log(`[BROWSER] Stored ${Object.keys(opts.refs).length} refs for targetId "${targetId}"`);
302
92
  }
303
93
  /**
304
- * Restore role refs for a target (OpenClaw-style)
305
- * Copies from global cache to page's WeakMap state
94
+ * Start the browser in the best available mode.
95
+ * Priority: docker sandbox > local Chrome child process.
96
+ * Override: BROWSER_SANDBOX=false forces local mode.
306
97
  */
307
- function restoreRoleRefsForTarget(page, targetId) {
308
- const tid = targetId?.trim() || "";
309
- if (!tid)
310
- return;
311
- // Check if page already has refs (no need to restore)
312
- const pageState = ensurePageState(page);
313
- if (pageState.roleRefs) {
314
- console.log(`[BROWSER] Page already has ${Object.keys(pageState.roleRefs).length} refs`);
315
- return;
316
- }
317
- // Try to restore from global cache
318
- const cached = roleRefsByTarget.get(tid);
319
- if (cached) {
320
- pageState.roleRefs = cached.refs;
321
- pageState.roleRefsFrameSelector = cached.frameSelector;
322
- pageState.roleRefsMode = cached.mode;
323
- console.log(`[BROWSER] Restored ${Object.keys(cached.refs).length} refs from cache for "${tid}"`);
98
+ async function startBrowser(options) {
99
+ if (state.running)
324
100
  return;
325
- }
326
- // Fallback: try any targetId that has refs
327
- for (const [storedTargetId, storedData] of roleRefsByTarget.entries()) {
328
- if (Object.keys(storedData.refs).length > 0) {
329
- pageState.roleRefs = storedData.refs;
330
- pageState.roleRefsFrameSelector = storedData.frameSelector;
331
- pageState.roleRefsMode = storedData.mode;
332
- console.log(`[BROWSER] Restored ${Object.keys(storedData.refs).length} refs from "${storedTargetId}" (fallback)`);
101
+ const forceLocal = process.env.BROWSER_SANDBOX === "false";
102
+ // ─── Try Docker sandbox (production on Hetzner, local dev with Docker Desktop) ───
103
+ if (!forceLocal && await isDockerAvailable()) {
104
+ const headless = options?.headless ?? process.env.BROWSER_SANDBOX_HEADLESS !== "false";
105
+ console.log(`[BROWSER] Starting Docker sandbox browser (headless: ${headless})...`);
106
+ try {
107
+ const sandboxState = await startSandboxBrowser({ headless });
108
+ state.sandboxState = sandboxState;
109
+ state.cdpUrl = sandboxState.cdpUrl;
110
+ state.mode = "sandbox";
111
+ state.running = true;
112
+ console.log(`[BROWSER] Docker sandbox ready, CDP: ${state.cdpUrl}`);
333
113
  return;
334
114
  }
115
+ catch (err) {
116
+ console.warn(`[BROWSER] Docker sandbox failed, falling back to local Chrome: ${err instanceof Error ? err.message : err}`);
117
+ }
335
118
  }
336
- console.log(`[BROWSER] No refs available to restore`);
337
- }
338
- function generateTargetId() {
339
- return `page-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
340
- }
341
- async function startBrowser(options = {}) {
342
- if (state.running) {
343
- return;
344
- }
345
- // Check if we should run in headless mode (default true, can be overridden by env var or param)
346
- const headlessMode = options.headless ?? process.env.BROWSER_HEADLESS !== "false";
347
- const launchOptions = {
348
- headless: headlessMode,
349
- args: ["--no-sandbox", "--disable-setuid-sandbox"],
350
- };
351
- // Support persistent profile directory
352
- if (options.profilePath) {
353
- console.log(`[BROWSER] Using profile: ${options.profilePath}`);
354
- // For persistent context, we use launchPersistentContext instead
355
- state.context = await chromium.launchPersistentContext(options.profilePath, {
356
- ...launchOptions,
357
- viewport: { width: 1280, height: 720 },
358
- userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
359
- });
360
- state.browser = null; // Persistent context doesn't have separate browser
119
+ // ─── Local mode: Chrome child process (fallback) ───
120
+ {
121
+ const headlessMode = options?.headless ?? process.env.BROWSER_HEADLESS !== "false";
122
+ const cdpPort = DEFAULT_CDP_PORT;
123
+ const resolvedConfig = {
124
+ enabled: true,
125
+ evaluateEnabled: true,
126
+ controlPort: 0,
127
+ cdpProtocol: "http",
128
+ cdpHost: "127.0.0.1",
129
+ cdpIsLoopback: true,
130
+ remoteCdpTimeoutMs: 5000,
131
+ remoteCdpHandshakeTimeoutMs: 5000,
132
+ color: "#FF4500",
133
+ headless: headlessMode,
134
+ noSandbox: true,
135
+ attachOnly: false,
136
+ defaultProfile: "costar",
137
+ profiles: {},
138
+ extraArgs: [],
139
+ };
140
+ const resolvedProfile = {
141
+ name: "costar",
142
+ cdpPort,
143
+ cdpUrl: `http://127.0.0.1:${cdpPort}`,
144
+ cdpHost: "127.0.0.1",
145
+ cdpIsLoopback: true,
146
+ color: "#FF4500",
147
+ driver: "openclaw",
148
+ };
149
+ console.log(`[BROWSER] Starting Chrome (headless: ${headlessMode}, CDP port: ${cdpPort})...`);
150
+ const running = await launchCostarChrome(resolvedConfig, resolvedProfile);
151
+ state.chromeProcess = running;
152
+ state.cdpUrl = resolvedProfile.cdpUrl;
153
+ state.mode = "local";
361
154
  state.running = true;
362
- state.connectionMode = "launched";
363
- console.log(`[BROWSER] Started with persistent profile (headless: ${headlessMode})`);
364
- return;
155
+ console.log(`[BROWSER] Chrome started (pid: ${running.pid}), CDP: ${state.cdpUrl}`);
365
156
  }
366
- console.log(`[BROWSER] Starting Chromium... (headless: ${headlessMode})`);
367
- state.browser = await chromium.launch(launchOptions);
368
- state.context = await state.browser.newContext({
369
- viewport: { width: 1280, height: 720 },
370
- userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
371
- });
372
- state.running = true;
373
- state.connectionMode = "launched";
374
- console.log("[BROWSER] Started");
375
- }
376
- /**
377
- * Set up page event listeners (extracted for reuse)
378
- */
379
- function setupPageListeners(page, targetId) {
380
- let nextRequestId = 0;
381
- const requestIds = new WeakMap();
382
- // Capture console logs
383
- page.on("console", (msg) => {
384
- const logs = state.consoleLogs.get(targetId) || [];
385
- logs.push(msg);
386
- if (logs.length > 500)
387
- logs.shift();
388
- state.consoleLogs.set(targetId, logs);
389
- });
390
- // Capture page errors
391
- page.on("pageerror", (err) => {
392
- const errors = state.pageErrors.get(targetId) || [];
393
- errors.push({
394
- message: err?.message || String(err),
395
- name: err?.name,
396
- stack: err?.stack,
397
- timestamp: new Date().toISOString(),
398
- });
399
- if (errors.length > 200)
400
- errors.shift();
401
- state.pageErrors.set(targetId, errors);
402
- });
403
- // Capture network requests
404
- page.on("request", (req) => {
405
- nextRequestId += 1;
406
- const id = `r${nextRequestId}`;
407
- requestIds.set(req, id);
408
- const requests = state.networkRequests.get(targetId) || [];
409
- requests.push({
410
- id,
411
- timestamp: new Date().toISOString(),
412
- method: req.method(),
413
- url: req.url(),
414
- resourceType: req.resourceType(),
415
- });
416
- if (requests.length > 500)
417
- requests.shift();
418
- state.networkRequests.set(targetId, requests);
419
- });
420
- page.on("response", (resp) => {
421
- const req = resp.request();
422
- const id = requestIds.get(req);
423
- if (!id)
424
- return;
425
- const requests = state.networkRequests.get(targetId) || [];
426
- const rec = requests.find((r) => r.id === id);
427
- if (rec) {
428
- rec.status = resp.status();
429
- rec.ok = resp.ok();
430
- }
431
- });
432
- page.on("requestfailed", (req) => {
433
- const id = requestIds.get(req);
434
- if (!id)
435
- return;
436
- const requests = state.networkRequests.get(targetId) || [];
437
- const rec = requests.find((r) => r.id === id);
438
- if (rec) {
439
- rec.failureText = req.failure()?.errorText;
440
- rec.ok = false;
441
- }
442
- });
443
157
  }
444
158
  async function stopBrowser() {
445
- if (!state.running) {
159
+ if (!state.running)
446
160
  return;
161
+ console.log("[BROWSER] Stopping...");
162
+ // Close Playwright connection managed by pw-session
163
+ await closePlaywrightBrowserConnection().catch(() => { });
164
+ if (state.mode === "sandbox" && state.sandboxState) {
165
+ // Stop Docker container
166
+ await stopSandboxBrowser(state.sandboxState);
167
+ state.sandboxState = undefined;
447
168
  }
448
- const isConnected = state.connectionMode === "connected";
449
- console.log(`[BROWSER] Stopping... (mode: ${state.connectionMode})`);
450
- // Close all pages we opened (but not pre-existing pages in CDP mode)
451
- for (const [id, page] of state.pages.entries()) {
452
- try {
453
- await page.close();
454
- }
455
- catch (err) {
456
- console.warn(`[BROWSER] Error closing page ${id}:`, err);
457
- }
458
- }
459
- state.pages.clear();
460
- state.consoleLogs.clear();
461
- state.networkRequests.clear();
462
- state.pageErrors.clear();
463
- roleRefsByTarget.clear(); // Clear the OpenClaw-style global cache
464
- // For CDP connections, we disconnect but don't close the browser
465
- if (isConnected) {
466
- if (state.browser) {
467
- // disconnect() keeps the browser running
468
- state.browser.close(); // This disconnects for CDP
469
- state.browser = null;
470
- }
471
- state.context = null;
472
- state.wsEndpoint = undefined;
473
- console.log("[BROWSER] Disconnected from CDP");
474
- }
475
- else {
476
- // For launched browsers, close everything
477
- if (state.context) {
478
- await state.context.close();
479
- state.context = null;
480
- }
481
- if (state.browser) {
482
- await state.browser.close();
483
- state.browser = null;
484
- }
485
- console.log("[BROWSER] Stopped");
169
+ else if (state.chromeProcess) {
170
+ // Kill Chrome child process
171
+ console.log(`[BROWSER] Stopping Chrome (pid: ${state.chromeProcess.pid})...`);
172
+ await stopCostarChrome(state.chromeProcess);
173
+ state.chromeProcess = undefined;
486
174
  }
487
175
  state.running = false;
488
- state.connectionMode = "launched";
176
+ state.cdpUrl = "";
177
+ state.mode = "local";
178
+ state.lastActiveTargetId = undefined;
179
+ console.log("[BROWSER] Stopped");
489
180
  }
490
- async function openPage(url) {
491
- if (!state.running || !state.context) {
492
- await startBrowser();
181
+ // ─── Helpers ────────────────────────────────────────────────────────
182
+ function requireRunning() {
183
+ if (!state.running || !state.cdpUrl) {
184
+ throw new Error("Browser not running. Use action=start first.");
493
185
  }
494
- const page = await state.context.newPage();
495
- const targetId = generateTargetId();
496
- state.pages.set(targetId, page);
497
- state.consoleLogs.set(targetId, []);
498
- state.networkRequests.set(targetId, []);
499
- state.pageErrors.set(targetId, []);
500
- // Track as last active tab
501
- state.lastActiveTargetId = targetId;
502
- // Set up event listeners
503
- setupPageListeners(page, targetId);
504
- await page.goto(url, { waitUntil: "domcontentloaded", timeout: 30000 });
505
- return { targetId, page };
186
+ return state.cdpUrl;
506
187
  }
507
- /**
508
- * Build role snapshot from Playwright's AI snapshot output (OpenClaw style)
509
- * Preserves Playwright's own aria-ref ids (e.g. ref=e13)
510
- */
511
- function buildRoleSnapshotFromAiSnapshot(aiSnapshot, options = {}) {
512
- const lines = String(aiSnapshot ?? "").split("\n");
513
- const refs = {};
514
- const getIndentLevel = (line) => {
515
- const match = line.match(/^(\s*)/);
516
- return match ? Math.floor(match[1].length / 2) : 0;
517
- };
518
- const parseAiSnapshotRef = (suffix) => {
519
- const match = suffix.match(/\[ref=(e\d+)\]/i);
520
- return match ? match[1] : null;
521
- };
522
- if (options.interactive) {
523
- const out = [];
524
- for (const line of lines) {
525
- const depth = getIndentLevel(line);
526
- if (options.maxDepth !== undefined && depth > options.maxDepth)
527
- continue;
528
- const match = line.match(/^(\s*-\s*)(\w+)(?:\s+"([^"]*)")?(.*)$/);
529
- if (!match)
530
- continue;
531
- const [, , roleRaw, name, suffix] = match;
532
- if (roleRaw.startsWith("/"))
533
- continue;
534
- const role = roleRaw.toLowerCase();
535
- if (!INTERACTIVE_ROLES.has(role))
536
- continue;
537
- const ref = parseAiSnapshotRef(suffix);
538
- if (!ref)
539
- continue;
540
- refs[ref] = { role, ...(name ? { name } : {}) };
541
- out.push(`- ${roleRaw}${name ? ` "${name}"` : ""}${suffix}`);
542
- }
543
- return {
544
- snapshot: out.join("\n") || "(no interactive elements)",
545
- refs,
546
- };
547
- }
548
- const out = [];
549
- for (const line of lines) {
550
- const depth = getIndentLevel(line);
551
- if (options.maxDepth !== undefined && depth > options.maxDepth)
552
- continue;
553
- const match = line.match(/^(\s*-\s*)(\w+)(?:\s+"([^"]*)")?(.*)$/);
554
- if (!match) {
555
- out.push(line);
556
- continue;
557
- }
558
- const [, , roleRaw, name, suffix] = match;
559
- if (roleRaw.startsWith("/")) {
560
- out.push(line);
561
- continue;
562
- }
563
- const role = roleRaw.toLowerCase();
564
- const ref = parseAiSnapshotRef(suffix);
565
- if (ref)
566
- refs[ref] = { role, ...(name ? { name } : {}) };
567
- out.push(line);
188
+ function resolveTargetId(params) {
189
+ const targetId = readStringParam(params, "targetId");
190
+ if (targetId) {
191
+ state.lastActiveTargetId = targetId;
192
+ return targetId;
568
193
  }
569
- const tree = out.join("\n") || "(empty)";
570
- return { snapshot: tree, refs };
194
+ return state.lastActiveTargetId;
571
195
  }
572
- /**
573
- * Get AI-formatted snapshot of page (OpenClaw-style)
574
- * Uses Playwright's _snapshotForAI() for accurate role detection and ref building
575
- * This captures the FULL accessibility tree including modals/overlays
576
- *
577
- * Ref modes:
578
- * - "role" (default): Generates refs based on role+name from ariaSnapshot, requires snapshot context
579
- * - "aria": Uses Playwright's aria-ref IDs, self-resolving across calls
580
- */
581
- async function getAISnapshot(page, targetId, options) {
582
- const maxChars = options.maxChars || 50000;
583
- const labels = options.labels || false;
584
- const refsMode = options.refs || "role";
585
- let ariaSnapshotText = "";
586
- let refsMap;
587
- let roleSnapshot;
588
- // OpenClaw-style: Two separate modes, NO fallback
589
- if (refsMode === "aria") {
590
- // refs="aria" mode: Use _snapshotForAI for self-resolving refs
591
- // This captures the FULL accessibility tree including modals/overlays
592
- if (options.selector || options.frame) {
593
- throw new Error("refs=aria does not support selector/frame snapshots yet.");
594
- }
595
- const maybePage = page;
596
- if (!maybePage._snapshotForAI) {
597
- throw new Error("refs=aria requires Playwright _snapshotForAI support. Upgrade playwright-core.");
598
- }
599
- console.log(`[BROWSER] Using _snapshotForAI for aria mode (captures modals/overlays)`);
600
- const result = await maybePage._snapshotForAI({
601
- timeout: 5000,
602
- track: "response",
603
- });
604
- ariaSnapshotText = String(result?.full ?? "");
605
- console.log(`[BROWSER] _snapshotForAI returned ${ariaSnapshotText.length} chars`);
606
- // Parse AI snapshot - refs are self-resolving via aria-ref=
607
- const built = buildRoleSnapshotFromAiSnapshot(ariaSnapshotText, {
608
- interactive: options.interactive,
609
- maxDepth: options.maxDepth,
610
- compact: options.compact,
611
- });
612
- refsMap = built.refs;
613
- roleSnapshot = built.snapshot;
614
- console.log(`[BROWSER] Aria mode - refs are self-resolving via aria-ref=`);
615
- }
616
- else {
617
- // refs="role" mode (default): Use locator.ariaSnapshot()
618
- // Refs are resolved via getByRole with the stored ref map
619
- const frameSelector = options.frame?.trim() || "";
620
- const selector = options.selector?.trim() || "";
621
- const locator = frameSelector
622
- ? selector
623
- ? page.frameLocator(frameSelector).locator(selector)
624
- : page.frameLocator(frameSelector).locator(":root")
625
- : selector
626
- ? page.locator(selector)
627
- : page.locator(":root");
628
- console.log(`[BROWSER] Using ariaSnapshot() for role mode`);
629
- ariaSnapshotText = await locator.ariaSnapshot();
630
- console.log(`[BROWSER] ariaSnapshot() returned ${ariaSnapshotText.length} chars`);
631
- // Parse aria snapshot - we generate refs that require role map for resolution
632
- const built = buildRoleSnapshotFromAriaSnapshot(ariaSnapshotText, {
633
- interactive: options.interactive,
634
- maxDepth: options.maxDepth,
635
- compact: options.compact,
636
- });
637
- refsMap = built.refs;
638
- roleSnapshot = built.snapshot;
639
- console.log(`[BROWSER] Role mode - refs require role map for resolution`);
640
- }
641
- // Step 2: Store refs using OpenClaw-style function (stores to BOTH WeakMap and global cache)
642
- console.log(`[BROWSER] Snapshot generated ${Object.keys(refsMap).length} refs for targetId "${targetId}"`);
643
- if (Object.keys(refsMap).length > 0) {
644
- storeRoleRefsForTarget({
645
- page,
646
- targetId,
647
- refs: refsMap,
648
- frameSelector: options.frame,
649
- mode: refsMode,
650
- });
651
- console.log(`[BROWSER] Stored refs with mode="${refsMode}". Sample: ${Object.keys(refsMap).slice(0, 5).join(", ")}`);
652
- }
653
- else {
654
- console.log(`[BROWSER] WARNING: No refs generated from snapshot. Snapshot length: ${ariaSnapshotText.length}`);
655
- console.log(`[BROWSER] Snapshot preview: ${ariaSnapshotText.slice(0, 500)}`);
656
- }
657
- // Step 4: If labels requested, create labeled screenshot using stored refs
196
+ // ─── Snapshot helper ────────────────────────────────────────────────
197
+ async function getAISnapshot(cdpUrl, targetId, options) {
198
+ const maxChars = options?.maxChars || 50000;
199
+ const labels = options?.labels || false;
200
+ const refsMode = options?.refs || "role";
201
+ const snapshotResult = await snapshotRoleViaPlaywright({
202
+ cdpUrl,
203
+ targetId,
204
+ selector: options?.selector,
205
+ frameSelector: options?.frame,
206
+ refsMode,
207
+ options: {
208
+ interactive: options?.interactive,
209
+ maxDepth: options?.maxDepth,
210
+ compact: options?.compact,
211
+ },
212
+ });
213
+ const refsMap = snapshotResult.refs;
214
+ console.log(`[BROWSER] Snapshot: ${Object.keys(refsMap).length} refs`);
658
215
  let imagePath;
659
216
  let labelsCount = 0;
660
217
  if (labels && Object.keys(refsMap).length > 0) {
661
- const maxLabels = 150;
662
- const viewport = await page.evaluate(() => ({
663
- scrollX: window.scrollX || 0,
664
- scrollY: window.scrollY || 0,
665
- width: window.innerWidth || 0,
666
- height: window.innerHeight || 0,
667
- }));
668
- const refs = Object.keys(refsMap);
669
- const boxes = [];
670
- let skipped = 0;
671
- // Get bounding boxes using role-based locators (OpenClaw style)
672
- for (const ref of refs) {
673
- if (boxes.length >= maxLabels) {
674
- skipped += 1;
675
- continue;
676
- }
677
- try {
678
- const info = refsMap[ref];
679
- let refLocator;
680
- if (info.name) {
681
- refLocator = page.getByRole(info.role, { name: info.name, exact: true });
682
- if (info.nth !== undefined)
683
- refLocator = refLocator.nth(info.nth);
684
- }
685
- else {
686
- refLocator = page.getByRole(info.role);
687
- if (info.nth !== undefined)
688
- refLocator = refLocator.nth(info.nth);
689
- }
690
- const box = await refLocator.boundingBox();
691
- if (!box) {
692
- skipped += 1;
693
- continue;
694
- }
695
- // Check if in viewport
696
- const x0 = box.x;
697
- const y0 = box.y;
698
- const x1 = box.x + box.width;
699
- const y1 = box.y + box.height;
700
- const vx0 = viewport.scrollX;
701
- const vy0 = viewport.scrollY;
702
- const vx1 = viewport.scrollX + viewport.width;
703
- const vy1 = viewport.scrollY + viewport.height;
704
- if (x1 < vx0 || x0 > vx1 || y1 < vy0 || y0 > vy1) {
705
- skipped += 1;
706
- continue;
707
- }
708
- boxes.push({
709
- ref,
710
- x: x0 - viewport.scrollX,
711
- y: y0 - viewport.scrollY,
712
- w: Math.max(1, box.width),
713
- h: Math.max(1, box.height),
714
- });
715
- }
716
- catch {
717
- skipped += 1;
718
- }
719
- }
720
- labelsCount = boxes.length;
721
- try {
722
- if (boxes.length > 0) {
723
- // Inject labels overlay (OpenClaw style)
724
- await page.evaluate((labelBoxes) => {
725
- const existing = document.querySelectorAll("[data-browser-labels]");
726
- existing.forEach((el) => el.remove());
727
- const root = document.createElement("div");
728
- root.setAttribute("data-browser-labels", "1");
729
- root.style.position = "fixed";
730
- root.style.left = "0";
731
- root.style.top = "0";
732
- root.style.zIndex = "2147483647";
733
- root.style.pointerEvents = "none";
734
- root.style.fontFamily = '"SF Mono","SFMono-Regular",Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace';
735
- for (const label of labelBoxes) {
736
- const box = document.createElement("div");
737
- box.setAttribute("data-browser-labels", "1");
738
- box.style.position = "absolute";
739
- box.style.left = `${label.x}px`;
740
- box.style.top = `${label.y}px`;
741
- box.style.width = `${label.w}px`;
742
- box.style.height = `${label.h}px`;
743
- box.style.border = "2px solid #ffb020";
744
- box.style.boxSizing = "border-box";
745
- const tag = document.createElement("div");
746
- tag.setAttribute("data-browser-labels", "1");
747
- tag.textContent = label.ref;
748
- tag.style.position = "absolute";
749
- tag.style.left = `${label.x}px`;
750
- tag.style.top = `${Math.min(20000, Math.max(0, label.y - 18))}px`;
751
- tag.style.background = "#ffb020";
752
- tag.style.color = "#1a1a1a";
753
- tag.style.fontSize = "12px";
754
- tag.style.lineHeight = "14px";
755
- tag.style.padding = "1px 4px";
756
- tag.style.borderRadius = "3px";
757
- tag.style.boxShadow = "0 1px 2px rgba(0,0,0,0.35)";
758
- tag.style.whiteSpace = "nowrap";
759
- root.appendChild(box);
760
- root.appendChild(tag);
761
- }
762
- document.documentElement.appendChild(root);
763
- }, boxes);
764
- }
765
- // Take screenshot
766
- const screenshot = await page.screenshot({ type: "png" });
767
- // Save to temp file
768
- const fs = await import("fs/promises");
769
- const path = await import("path");
770
- const tmpDir = await import("os").then((os) => os.tmpdir());
771
- const filename = `browser-snapshot-${Date.now()}.png`;
772
- imagePath = path.join(tmpDir, filename);
773
- await fs.writeFile(imagePath, screenshot);
774
- }
775
- finally {
776
- // Clean up labels
777
- await page.evaluate(() => {
778
- const existing = document.querySelectorAll("[data-browser-labels]");
779
- existing.forEach((el) => el.remove());
780
- }).catch(() => { });
781
- }
218
+ const { buffer, labels: labelCount, skipped } = await screenshotWithLabelsViaPlaywright({
219
+ cdpUrl,
220
+ targetId,
221
+ refs: refsMap,
222
+ maxLabels: 150,
223
+ type: "png",
224
+ });
225
+ labelsCount = labelCount;
226
+ console.log(`[BROWSER] Labels screenshot: ${labelCount} labels, ${skipped} skipped`);
227
+ const fs = await import("fs/promises");
228
+ const pathMod = await import("path");
229
+ const tmpDir = await import("os").then((os) => os.tmpdir());
230
+ const filename = `browser-snapshot-${Date.now()}.png`;
231
+ imagePath = pathMod.join(tmpDir, filename);
232
+ await fs.writeFile(imagePath, buffer);
782
233
  }
783
- // Step 5: Build final snapshot text
234
+ // Build final snapshot text
235
+ const page = await getPageForTargetId({ cdpUrl, targetId });
784
236
  const title = await page.title().catch(() => "");
785
237
  const url = page.url();
786
238
  let snapshot = `# ${title}\n\nURL: ${url}\n\n`;
787
- if (roleSnapshot && roleSnapshot !== "(empty)") {
239
+ if (snapshotResult.snapshot && snapshotResult.snapshot !== "(empty)") {
788
240
  snapshot += "## Accessibility Tree\n";
789
- snapshot += roleSnapshot;
241
+ snapshot += snapshotResult.snapshot;
790
242
  snapshot += "\n\n";
791
243
  }
792
- // Truncate if needed
793
244
  if (snapshot.length > maxChars) {
794
245
  snapshot = snapshot.slice(0, maxChars) + "\n\n... (content truncated)";
795
246
  }
796
- return {
797
- snapshot,
798
- imagePath,
799
- labelsCount,
800
- refs: Object.keys(refsMap).length > 0 ? refsMap : undefined,
801
- targetId,
802
- };
247
+ return { snapshot, imagePath, labelsCount, refs: Object.keys(refsMap).length > 0 ? refsMap : undefined, targetId };
803
248
  }
804
- /**
805
- * Resolve a ref to a Playwright locator (OpenClaw-style)
806
- * Uses stored role+name info instead of DOM attributes for stability across navigation
807
- */
808
- /**
809
- * Normalize timeout to safe range (OpenClaw-style exact copy)
810
- * From: pw-tools-core.shared.ts - uses 120s max instead of 60s
811
- */
812
- function normalizeTimeoutMs(timeoutMs, fallback) {
813
- return Math.max(500, Math.min(120_000, timeoutMs ?? fallback));
814
- }
815
- /**
816
- * Resolve ref to locator (OpenClaw-style)
817
- * Reads from page's WeakMap state (restoreRoleRefsForTarget must be called first)
818
- */
819
- function refLocator(page, ref) {
820
- // Normalize ref (remove @ or ref= prefix if present) - OpenClaw style
821
- const normalized = ref.startsWith("@")
822
- ? ref.slice(1)
823
- : ref.startsWith("ref=")
824
- ? ref.slice(4)
825
- : ref;
826
- // Check if it's an element ref (e1, e2, etc.)
827
- if (/^e\d+$/.test(normalized)) {
828
- // Read from page's WeakMap state (OpenClaw-style)
829
- const pageState = pageStates.get(page);
830
- // Handle aria mode - refs are self-resolving via aria-ref= syntax
831
- if (pageState?.roleRefsMode === "aria") {
832
- const scope = pageState.roleRefsFrameSelector
833
- ? page.frameLocator(pageState.roleRefsFrameSelector)
834
- : page;
835
- return scope.locator(`aria-ref=${normalized}`);
836
- }
837
- // Role mode: Get ref info from page state
838
- const info = pageState?.roleRefs?.[normalized];
839
- if (!info) {
840
- throw new Error(`Element "${normalized}" not found or not visible. ` +
841
- `Run a new snapshot to see current page elements.`);
842
- }
843
- // Build locator using getByRole (OpenClaw-style)
844
- const scope = pageState?.roleRefsFrameSelector
845
- ? page.frameLocator(pageState.roleRefsFrameSelector)
846
- : page;
847
- const locAny = scope;
848
- const locator = info.name
849
- ? locAny.getByRole(info.role, { name: info.name, exact: true })
850
- : locAny.getByRole(info.role);
851
- return info.nth !== undefined ? locator.nth(info.nth) : locator;
852
- }
853
- // Not an element ref - treat as aria-ref (OpenClaw-style)
854
- return page.locator(`aria-ref=${normalized}`);
855
- }
856
- /**
857
- * Transform Playwright errors into AI-friendly messages (OpenClaw-style exact copy)
858
- * From: pw-tools-core.shared.ts
859
- */
860
- function toAIFriendlyError(error, selector) {
861
- const message = error instanceof Error ? error.message : String(error);
862
- // Multiple elements matched
863
- if (message.includes("strict mode violation")) {
864
- const countMatch = message.match(/resolved to (\d+) elements/);
865
- const count = countMatch ? countMatch[1] : "multiple";
866
- return new Error(`Selector "${selector}" matched ${count} elements. ` +
867
- `Run a new snapshot to get updated refs, or use a different ref.`);
868
- }
869
- // Element not found or not visible
870
- if ((message.includes("Timeout") || message.includes("waiting for")) &&
871
- (message.includes("to be visible") || message.includes("not visible"))) {
872
- return new Error(`Element "${selector}" not found or not visible. ` +
873
- `Run a new snapshot to see current page elements.`);
874
- }
875
- // Element covered by overlay or not interactable (OpenClaw exact)
876
- if (message.includes("intercepts pointer events") ||
877
- message.includes("not visible") ||
878
- message.includes("not receive pointer events")) {
879
- return new Error(`Element "${selector}" is not interactable (hidden or covered). ` +
880
- `Try scrolling it into view, closing overlays, or re-snapshotting.`);
881
- }
882
- return error instanceof Error ? error : new Error(message);
883
- }
884
- /**
885
- * Execute browser automation action (act)
886
- */
887
- async function executeBrowserAct(page, request, targetId = "default") {
249
+ // ─── Act dispatcher (OpenClaw cdpUrl pattern) ───────────────────────
250
+ async function executeBrowserAct(cdpUrl, request, targetId) {
888
251
  const kind = request.kind;
889
- // OpenClaw-style: Restore refs for this target before any action
890
- restoreRoleRefsForTarget(page, targetId);
891
252
  switch (kind) {
892
253
  case "click": {
893
- const ref = request.ref;
894
- const doubleClick = request.doubleClick;
895
- const button = request.button || "left";
896
- const modifiers = request.modifiers || [];
897
- const force = request.force;
898
- // OpenClaw uses normalizeTimeoutMs with 8000ms default
899
- const timeout = normalizeTimeoutMs(request.timeoutMs, 8000);
900
- if (!ref)
901
- throw new Error("ref is required for click action");
902
- const locator = refLocator(page, ref);
903
- try {
904
- if (doubleClick) {
905
- await locator.dblclick({
906
- timeout,
907
- button: button,
908
- modifiers: modifiers,
909
- force: force ?? false,
910
- });
911
- }
912
- else {
913
- await locator.click({
914
- timeout,
915
- button: button,
916
- modifiers: modifiers,
917
- force: force ?? false,
918
- });
919
- }
920
- }
921
- catch (err) {
922
- throw toAIFriendlyError(err, ref);
923
- }
254
+ await clickViaPlaywright({
255
+ cdpUrl, targetId,
256
+ ref: request.ref,
257
+ doubleClick: request.doubleClick,
258
+ button: request.button || "left",
259
+ modifiers: request.modifiers,
260
+ timeoutMs: request.timeoutMs,
261
+ });
924
262
  return { ok: true };
925
263
  }
926
264
  case "type": {
927
- const ref = request.ref;
928
- const text = String(request.text ?? "");
929
- const submit = request.submit;
930
- const slowly = request.slowly;
931
- const timeout = normalizeTimeoutMs(request.timeoutMs, 8000);
932
- if (!ref)
933
- throw new Error("ref is required for type action");
934
- const locator = refLocator(page, ref);
935
- try {
936
- // OpenClaw: slowly mode clicks first, then types with delay
937
- if (slowly) {
938
- await locator.click({ timeout });
939
- await locator.type(text, { timeout, delay: 75 });
940
- }
941
- else {
942
- await locator.fill(text, { timeout });
943
- }
944
- if (submit) {
945
- await locator.press("Enter", { timeout });
946
- }
947
- }
948
- catch (err) {
949
- throw toAIFriendlyError(err, ref);
950
- }
265
+ await typeViaPlaywright({
266
+ cdpUrl, targetId,
267
+ ref: request.ref,
268
+ text: String(request.text ?? ""),
269
+ submit: request.submit,
270
+ slowly: request.slowly,
271
+ timeoutMs: request.timeoutMs,
272
+ });
951
273
  return { ok: true };
952
274
  }
953
275
  case "press": {
954
- const key = String(request.key ?? "").trim();
955
- if (!key)
956
- throw new Error("key is required for press action");
957
- const delayMs = Math.max(0, Math.floor(request.delayMs ?? 0));
958
- await page.keyboard.press(key, { delay: delayMs });
276
+ await pressKeyViaPlaywright({
277
+ cdpUrl, targetId,
278
+ key: String(request.key ?? ""),
279
+ delayMs: request.delayMs,
280
+ });
959
281
  return { ok: true };
960
282
  }
961
283
  case "hover": {
962
- const ref = request.ref;
963
- const timeout = normalizeTimeoutMs(request.timeoutMs, 8000);
964
- if (!ref)
965
- throw new Error("ref is required for hover action");
966
- const locator = refLocator(page, ref);
967
- try {
968
- await locator.hover({ timeout });
969
- }
970
- catch (err) {
971
- throw toAIFriendlyError(err, ref);
972
- }
284
+ await hoverViaPlaywright({
285
+ cdpUrl, targetId,
286
+ ref: request.ref,
287
+ timeoutMs: request.timeoutMs,
288
+ });
973
289
  return { ok: true };
974
290
  }
975
291
  case "drag": {
976
- const startRef = request.startRef;
977
- const endRef = request.endRef;
978
- const timeout = normalizeTimeoutMs(request.timeoutMs, 8000);
979
- if (!startRef)
980
- throw new Error("startRef is required for drag action");
981
- if (!endRef)
982
- throw new Error("endRef is required for drag action");
983
- const startLocator = refLocator(page, startRef);
984
- const endLocator = refLocator(page, endRef);
985
- try {
986
- // OpenClaw uses dragTo() method
987
- await startLocator.dragTo(endLocator, { timeout });
988
- }
989
- catch (err) {
990
- throw toAIFriendlyError(err, `${startRef} -> ${endRef}`);
991
- }
292
+ await dragViaPlaywright({
293
+ cdpUrl, targetId,
294
+ startRef: request.startRef,
295
+ endRef: request.endRef,
296
+ timeoutMs: request.timeoutMs,
297
+ });
992
298
  return { ok: true };
993
299
  }
994
300
  case "select": {
995
- const ref = request.ref;
996
- const values = request.values;
997
- const timeout = normalizeTimeoutMs(request.timeoutMs, 8000);
998
- if (!ref)
999
- throw new Error("ref is required for select action");
1000
- if (!values || !Array.isArray(values))
1001
- throw new Error("values array is required for select action");
1002
- const locator = refLocator(page, ref);
1003
- try {
1004
- await locator.selectOption(values, { timeout });
1005
- }
1006
- catch (err) {
1007
- throw toAIFriendlyError(err, ref);
1008
- }
301
+ await selectOptionViaPlaywright({
302
+ cdpUrl, targetId,
303
+ ref: request.ref,
304
+ values: request.values,
305
+ timeoutMs: request.timeoutMs,
306
+ });
1009
307
  return { ok: true };
1010
308
  }
1011
309
  case "fill": {
1012
- // OpenClaw-style form filling with checkbox/radio support
1013
310
  const fields = request.fields;
1014
311
  if (!fields || !Array.isArray(fields))
1015
312
  throw new Error("fields array is required for fill action");
1016
- const timeout = normalizeTimeoutMs(request.timeoutMs, 8000);
1017
- for (const field of fields) {
1018
- const ref = String(field.ref ?? "").trim();
1019
- const type = String(field.type ?? "").trim();
1020
- const rawValue = field.value;
1021
- const value = typeof rawValue === "string"
1022
- ? rawValue
1023
- : typeof rawValue === "number" || typeof rawValue === "boolean"
1024
- ? String(rawValue)
1025
- : "";
1026
- if (!ref)
1027
- continue;
1028
- const locator = refLocator(page, ref);
1029
- try {
1030
- // OpenClaw: handle checkbox/radio with setChecked
1031
- if (type === "checkbox" || type === "radio") {
1032
- const checked = rawValue === true || rawValue === 1 || rawValue === "1" || rawValue === "true";
1033
- await locator.setChecked(checked, { timeout });
1034
- continue;
1035
- }
1036
- await locator.fill(value, { timeout });
1037
- }
1038
- catch (err) {
1039
- throw toAIFriendlyError(err, ref);
1040
- }
1041
- }
313
+ await fillFormViaPlaywright({
314
+ cdpUrl, targetId,
315
+ fields: fields.map(f => ({
316
+ ref: String(f.ref ?? ""),
317
+ type: String(f.type ?? ""),
318
+ value: f.value,
319
+ })),
320
+ timeoutMs: request.timeoutMs,
321
+ });
1042
322
  return { ok: true };
1043
323
  }
1044
324
  case "resize": {
1045
- const width = request.width;
1046
- const height = request.height;
1047
- await page.setViewportSize({ width, height });
325
+ await resizeViewportViaPlaywright({
326
+ cdpUrl, targetId,
327
+ width: request.width,
328
+ height: request.height,
329
+ });
1048
330
  return { ok: true };
1049
331
  }
1050
332
  case "wait": {
1051
- const timeMs = request.timeMs;
1052
- const text = request.text;
1053
- const textGone = request.textGone;
1054
- const selector = request.selector;
1055
- const url = request.url;
1056
- const loadState = request.loadState;
1057
- const fn = request.fn;
1058
- // OpenClaw uses 20000ms default for wait (normalizeTimeoutMs uses 120s max)
1059
- const timeout = normalizeTimeoutMs(request.timeoutMs, 20000);
1060
- if (typeof timeMs === "number" && Number.isFinite(timeMs)) {
1061
- // Wait for specific time
1062
- await page.waitForTimeout(Math.max(0, timeMs));
1063
- }
1064
- if (text) {
1065
- // Wait for text to appear (OpenClaw uses getByText, not waitForSelector)
1066
- await page.getByText(text).first().waitFor({ state: "visible", timeout });
1067
- }
1068
- if (textGone) {
1069
- // Wait for text to disappear
1070
- await page.getByText(textGone).first().waitFor({ state: "hidden", timeout });
1071
- }
1072
- if (selector) {
1073
- // Wait for selector
1074
- const sel = String(selector).trim();
1075
- if (sel) {
1076
- await page.locator(sel).first().waitFor({ state: "visible", timeout });
1077
- }
1078
- }
1079
- if (url) {
1080
- // Wait for URL pattern
1081
- const urlPattern = String(url).trim();
1082
- if (urlPattern) {
1083
- await page.waitForURL(urlPattern, { timeout });
1084
- }
1085
- }
1086
- if (fn) {
1087
- // Wait for JavaScript function to return true
1088
- const fnStr = String(fn).trim();
1089
- if (fnStr) {
1090
- await page.waitForFunction(fnStr, { timeout });
1091
- }
1092
- }
1093
- if (loadState) {
1094
- // Wait for specific load state
1095
- await page.waitForLoadState(loadState, { timeout });
1096
- }
333
+ await waitForViaPlaywright({
334
+ cdpUrl, targetId,
335
+ timeMs: request.timeMs,
336
+ text: request.text,
337
+ textGone: request.textGone,
338
+ selector: request.selector,
339
+ url: request.url,
340
+ loadState: request.loadState,
341
+ fn: request.fn,
342
+ timeoutMs: request.timeoutMs,
343
+ });
1097
344
  return { ok: true };
1098
345
  }
1099
346
  case "evaluate": {
1100
- // OpenClaw-style evaluate using Function constructor
1101
- // This avoids esbuild adding __name helper which doesn't exist in browser
1102
- const fnText = String(request.fn ?? "").trim();
1103
- const ref = request.ref;
1104
- if (!fnText)
1105
- throw new Error("fn is required for evaluate action");
1106
- if (ref) {
1107
- // Evaluate with element context
1108
- const locator = refLocator(page, ref);
1109
- // eslint-disable-next-line @typescript-eslint/no-implied-eval
1110
- const elementEvaluator = new Function("el", "fnBody", `
1111
- "use strict";
1112
- try {
1113
- var candidate = eval("(" + fnBody + ")");
1114
- return typeof candidate === "function" ? candidate(el) : candidate;
1115
- } catch (err) {
1116
- throw new Error("Invalid evaluate function: " + (err && err.message ? err.message : String(err)));
1117
- }
1118
- `);
1119
- const result = await locator.evaluate(elementEvaluator, fnText);
1120
- return { ok: true, result };
1121
- }
1122
- // Evaluate in page context (no element)
1123
- // eslint-disable-next-line @typescript-eslint/no-implied-eval
1124
- const browserEvaluator = new Function("fnBody", `
1125
- "use strict";
1126
- try {
1127
- var candidate = eval("(" + fnBody + ")");
1128
- return typeof candidate === "function" ? candidate() : candidate;
1129
- } catch (err) {
1130
- throw new Error("Invalid evaluate function: " + (err && err.message ? err.message : String(err)));
1131
- }
1132
- `);
1133
- const result = await page.evaluate(browserEvaluator, fnText);
347
+ const result = await evaluateViaPlaywright({
348
+ cdpUrl, targetId,
349
+ fn: String(request.fn ?? ""),
350
+ ref: request.ref,
351
+ timeoutMs: request.timeoutMs,
352
+ });
1134
353
  return { ok: true, result };
1135
354
  }
1136
355
  case "close": {
1137
- await page.close();
356
+ if (!targetId)
357
+ throw new Error("targetId is required for close action");
358
+ await closePageByTargetIdViaPlaywright({ cdpUrl, targetId });
1138
359
  return { ok: true };
1139
360
  }
1140
361
  default:
1141
362
  throw new Error(`Unknown act kind: ${kind}`);
1142
363
  }
1143
364
  }
1144
- /**
1145
- * Create image result from file (base64 encoded)
1146
- */
365
+ // ─── Image result helper ────────────────────────────────────────────
1147
366
  async function imageResultFromFile(options) {
1148
367
  const fs = await import("fs/promises");
1149
368
  const buffer = await fs.readFile(options.path);
1150
369
  const base64 = buffer.toString("base64");
1151
370
  const content = [];
1152
371
  if (options.extraText) {
1153
- content.push({
1154
- type: "text",
1155
- text: options.extraText,
1156
- });
372
+ content.push({ type: "text", text: options.extraText });
1157
373
  }
1158
- content.push({
1159
- type: "image",
1160
- data: base64,
1161
- mimeType: "image/png",
1162
- });
1163
- return {
1164
- content,
1165
- details: options.details,
1166
- };
374
+ content.push({ type: "image", data: base64, mimeType: "image/png" });
375
+ return { content, details: options.details };
1167
376
  }
377
+ // ─── Tool export ────────────────────────────────────────────────────
1168
378
  export function createBrowserTool() {
1169
379
  return {
1170
380
  name: "browser",
1171
381
  label: "Browser",
1172
- // OpenClaw exact description (from browser-tool.ts)
1173
382
  description: [
1174
383
  "Control the browser via Playwright (status/start/stop/profiles/tabs/open/snapshot/screenshot/actions).",
1175
384
  "When using refs from snapshot (e.g. e12), keep the same tab: prefer passing targetId from the snapshot response into subsequent actions (act/click/type/etc).",
@@ -1184,446 +393,238 @@ export function createBrowserTool() {
1184
393
  case "status":
1185
394
  return jsonResult({
1186
395
  running: state.running,
1187
- connectionMode: state.connectionMode,
1188
- wsEndpoint: state.wsEndpoint,
1189
- tabCount: state.pages.size,
1190
- tabs: Array.from(state.pages.keys()),
396
+ cdpUrl: state.cdpUrl,
397
+ mode: state.mode,
398
+ pid: state.chromeProcess?.pid,
399
+ sandbox: state.sandboxState ? {
400
+ containerId: state.sandboxState.containerId,
401
+ containerName: state.sandboxState.containerName,
402
+ vncUrl: state.sandboxState.vncUrl,
403
+ } : undefined,
1191
404
  });
1192
405
  case "start": {
1193
406
  const headless = readBooleanParam(params, "headless");
1194
- const profilePath = readStringParam(params, "profilePath");
1195
- await startBrowser({
1196
- headless: headless ?? undefined,
1197
- profilePath: profilePath ?? undefined,
1198
- });
407
+ await startBrowser({ headless: headless ?? undefined });
1199
408
  return jsonResult({
1200
409
  running: state.running,
1201
- connectionMode: state.connectionMode,
1202
- message: profilePath
1203
- ? `Browser started with profile: ${profilePath}`
1204
- : "Browser started successfully",
410
+ cdpUrl: state.cdpUrl,
411
+ mode: state.mode,
412
+ pid: state.chromeProcess?.pid,
413
+ vncUrl: state.sandboxState?.vncUrl ?? null,
414
+ message: "Browser started",
1205
415
  });
1206
416
  }
1207
- case "stop":
417
+ case "stop": {
418
+ const wasMode = state.mode;
1208
419
  await stopBrowser();
1209
- return jsonResult({
1210
- running: state.running,
1211
- message: state.connectionMode === "connected"
1212
- ? "Disconnected from browser"
1213
- : "Browser stopped successfully",
1214
- });
1215
- case "profiles": {
1216
- // OpenClaw has profiles - we just return a simple response
1217
- return jsonResult({
1218
- profiles: ["default"],
1219
- message: "Single default profile available",
1220
- });
420
+ return jsonResult({ running: false, mode: wasMode, message: "Browser stopped" });
1221
421
  }
422
+ case "profiles":
423
+ return jsonResult({ profiles: ["costar"], message: "Single profile available" });
1222
424
  case "tabs": {
1223
- if (!state.running || state.pages.size === 0) {
1224
- return jsonResult({ tabs: [], message: "No tabs open" });
1225
- }
1226
- const tabs = await Promise.all(Array.from(state.pages.entries()).map(async ([id, page]) => {
1227
- try {
1228
- return {
1229
- targetId: id,
1230
- title: await page.title(),
1231
- url: page.url(),
1232
- };
1233
- }
1234
- catch (err) {
1235
- return {
1236
- targetId: id,
1237
- title: "(error)",
1238
- url: "(error)",
1239
- error: String(err),
1240
- };
1241
- }
425
+ const cdpUrl = requireRunning();
426
+ const pages = await listPagesViaPlaywright({ cdpUrl });
427
+ const tabs = pages.map((p) => ({
428
+ targetId: p.targetId,
429
+ title: p.title,
430
+ url: p.url,
1242
431
  }));
1243
432
  return jsonResult({ tabs });
1244
433
  }
1245
434
  case "open": {
435
+ if (!state.running)
436
+ await startBrowser();
437
+ const cdpUrl = requireRunning();
1246
438
  const targetUrl = readStringParam(params, "targetUrl", { required: true });
1247
- const { targetId: newTargetId, page } = await openPage(targetUrl);
439
+ const result = await createPageViaPlaywright({ cdpUrl, url: targetUrl });
440
+ state.lastActiveTargetId = result.targetId;
1248
441
  return jsonResult({
1249
- targetId: newTargetId,
1250
- url: page.url(),
1251
- title: await page.title(),
1252
- message: `Opened ${targetUrl} in new tab`,
442
+ targetId: result.targetId,
443
+ url: result.url,
444
+ title: result.title,
445
+ message: `Opened ${targetUrl}`,
1253
446
  });
1254
447
  }
1255
448
  case "focus": {
1256
- const targetId = readStringParam(params, "targetId", { required: true });
1257
- const page = state.pages.get(targetId);
1258
- if (!page) {
1259
- throw new Error(`Tab ${targetId} not found. Run action=tabs to see available tabs.`);
1260
- }
1261
- await page.bringToFront();
1262
- return jsonResult({
1263
- ok: true,
1264
- targetId,
1265
- message: `Focused tab ${targetId}`,
1266
- });
449
+ const cdpUrl = requireRunning();
450
+ const targetId = readStringParam(params, "targetId");
451
+ if (!targetId)
452
+ throw new Error("targetId is required for focus action");
453
+ await focusPageByTargetIdViaPlaywright({ cdpUrl, targetId });
454
+ return jsonResult({ ok: true, message: "Focused tab" });
1267
455
  }
1268
456
  case "navigate": {
457
+ const cdpUrl = requireRunning();
1269
458
  const targetUrl = readStringParam(params, "targetUrl", { required: true });
1270
- const targetId = readStringParam(params, "targetId");
1271
- let page;
1272
- let usedTargetId;
1273
- if (targetId) {
1274
- const existingPage = state.pages.get(targetId);
1275
- if (!existingPage) {
1276
- throw new Error(`Tab ${targetId} not found. Run action=tabs to see available tabs or use action=open to create a new tab.`);
1277
- }
1278
- page = existingPage;
1279
- usedTargetId = targetId;
1280
- }
1281
- else {
1282
- // Use first page or create new one
1283
- if (state.pages.size > 0) {
1284
- const firstEntry = Array.from(state.pages.entries())[0];
1285
- page = firstEntry[1];
1286
- usedTargetId = firstEntry[0];
1287
- }
1288
- else {
1289
- const result = await openPage(targetUrl);
1290
- return jsonResult({
1291
- targetId: result.targetId,
1292
- url: result.page.url(),
1293
- title: await result.page.title(),
1294
- message: `Created new tab and navigated to ${targetUrl}`,
1295
- });
1296
- }
1297
- }
1298
- await page.goto(targetUrl, { waitUntil: "domcontentloaded", timeout: 30000 });
459
+ const targetId = resolveTargetId(params);
460
+ const result = await navigateViaPlaywright({ cdpUrl, targetId, url: targetUrl });
461
+ const page = await getPageForTargetId({ cdpUrl, targetId });
1299
462
  return jsonResult({
1300
- targetId: usedTargetId,
1301
- url: page.url(),
463
+ targetId,
464
+ url: result.url,
1302
465
  title: await page.title(),
1303
466
  message: `Navigated to ${targetUrl}`,
1304
467
  });
1305
468
  }
1306
469
  case "snapshot": {
1307
- if (!state.running || state.pages.size === 0) {
1308
- throw new Error("No tabs open. Use action=open to open a URL first.");
1309
- }
1310
- const targetId = readStringParam(params, "targetId");
470
+ const cdpUrl = requireRunning();
471
+ const targetId = resolveTargetId(params);
1311
472
  const maxChars = readNumberParam(params, "maxChars") || 50000;
1312
473
  const labels = readBooleanParam(params, "labels", false);
1313
474
  const refs = (readStringParam(params, "refs") || "role");
1314
475
  const selector = readStringParam(params, "selector");
1315
476
  const frame = readStringParam(params, "frame");
1316
477
  const interactive = readBooleanParam(params, "interactive", false);
1317
- // Use helper to get page and track last active
1318
- const { page, usedTargetId } = getPageForAction(targetId ?? undefined);
1319
- // OpenClaw-style: Only generate screenshot if labels explicitly requested
1320
- const snapshotResult = await getAISnapshot(page, usedTargetId, {
1321
- maxChars,
1322
- labels, // Only generate screenshot when explicitly requested
1323
- refs,
1324
- selector,
1325
- frame,
478
+ const snapshotResult = await getAISnapshot(cdpUrl, targetId, {
479
+ maxChars, labels, refs,
480
+ selector: selector ?? undefined,
481
+ frame: frame ?? undefined,
1326
482
  interactive,
1327
483
  });
1328
- // If labels requested, return image with text
1329
484
  if (labels && snapshotResult.imagePath) {
485
+ const page = await getPageForTargetId({ cdpUrl, targetId });
1330
486
  return await imageResultFromFile({
1331
487
  path: snapshotResult.imagePath,
1332
488
  extraText: snapshotResult.snapshot,
1333
489
  details: {
1334
- targetId: usedTargetId,
490
+ targetId,
1335
491
  url: page.url(),
1336
492
  title: await page.title(),
1337
493
  format: "ai",
1338
494
  labels: true,
1339
495
  labelsCount: snapshotResult.labelsCount,
1340
496
  refs: snapshotResult.refs,
1341
- contentLength: snapshotResult.snapshot.length,
1342
- truncated: snapshotResult.snapshot.includes("(content truncated"),
1343
497
  },
1344
498
  });
1345
499
  }
1346
- // Default: Return tree only (no screenshot)
500
+ const page = await getPageForTargetId({ cdpUrl, targetId });
1347
501
  return {
1348
- content: [
1349
- {
1350
- type: "text",
1351
- text: snapshotResult.snapshot,
1352
- },
1353
- ],
502
+ content: [{ type: "text", text: snapshotResult.snapshot }],
1354
503
  details: {
1355
- targetId: usedTargetId,
504
+ targetId,
1356
505
  url: page.url(),
1357
506
  title: await page.title(),
1358
507
  format: "ai",
1359
508
  refs: snapshotResult.refs,
1360
- contentLength: snapshotResult.snapshot.length,
1361
- truncated: snapshotResult.snapshot.includes("(content truncated"),
1362
509
  },
1363
510
  };
1364
511
  }
1365
512
  case "screenshot": {
1366
- if (!state.running || state.pages.size === 0) {
1367
- throw new Error("No tabs open. Use action=open to open a URL first.");
1368
- }
1369
- const targetId = readStringParam(params, "targetId");
513
+ const cdpUrl = requireRunning();
514
+ const targetId = resolveTargetId(params);
1370
515
  const fullPage = readBooleanParam(params, "fullPage", false);
1371
- let page;
1372
- let usedTargetId;
1373
- if (targetId) {
1374
- const existingPage = state.pages.get(targetId);
1375
- if (!existingPage) {
1376
- throw new Error(`Tab ${targetId} not found. Run action=tabs to see available tabs.`);
1377
- }
1378
- page = existingPage;
1379
- usedTargetId = targetId;
1380
- }
1381
- else {
1382
- const firstEntry = Array.from(state.pages.entries())[0];
1383
- page = firstEntry[1];
1384
- usedTargetId = firstEntry[0];
1385
- }
1386
- const screenshot = await page.screenshot({
1387
- fullPage,
1388
- type: "png",
516
+ const ref = readStringParam(params, "ref");
517
+ const element = readStringParam(params, "element");
518
+ const screenshotType = (readStringParam(params, "type") || "png");
519
+ const { buffer } = await takeScreenshotViaPlaywright({
520
+ cdpUrl, targetId,
521
+ ref: ref ?? undefined,
522
+ element: element ?? undefined,
523
+ fullPage, type: screenshotType,
1389
524
  });
1390
- // Save screenshot to temp file
1391
525
  const fs = await import("fs/promises");
1392
- const path = await import("path");
526
+ const pathMod = await import("path");
1393
527
  const tmpDir = await import("os").then((os) => os.tmpdir());
1394
- const filename = `browser-screenshot-${Date.now()}.png`;
1395
- const filepath = path.join(tmpDir, filename);
1396
- await fs.writeFile(filepath, screenshot);
528
+ const filename = `browser-screenshot-${Date.now()}.${screenshotType}`;
529
+ const filepath = pathMod.join(tmpDir, filename);
530
+ await fs.writeFile(filepath, buffer);
531
+ const page = await getPageForTargetId({ cdpUrl, targetId });
1397
532
  return await imageResultFromFile({
1398
533
  path: filepath,
1399
- details: {
1400
- targetId: usedTargetId,
1401
- url: page.url(),
1402
- title: await page.title(),
1403
- path: filepath,
1404
- fullPage,
1405
- },
534
+ details: { targetId, url: page.url(), path: filepath, fullPage },
1406
535
  });
1407
536
  }
1408
537
  case "console": {
1409
- const targetId = readStringParam(params, "targetId");
538
+ const cdpUrl = requireRunning();
539
+ const targetId = resolveTargetId(params);
1410
540
  const level = readStringParam(params, "level");
1411
- let usedTargetId;
1412
- let messages;
1413
- if (targetId) {
1414
- if (!state.consoleLogs.has(targetId)) {
1415
- throw new Error(`Tab ${targetId} not found. Run action=tabs to see available tabs.`);
1416
- }
1417
- messages = state.consoleLogs.get(targetId);
1418
- usedTargetId = targetId;
1419
- }
1420
- else {
1421
- if (state.pages.size === 0) {
1422
- throw new Error("No tabs open. Use action=open to open a URL first.");
1423
- }
1424
- const firstId = Array.from(state.pages.keys())[0];
1425
- messages = state.consoleLogs.get(firstId) || [];
1426
- usedTargetId = firstId;
1427
- }
1428
- // Filter by level if specified
1429
- if (level) {
1430
- messages = messages.filter((msg) => msg.type() === level);
1431
- }
1432
- const formattedMessages = messages.map((msg) => ({
1433
- type: msg.type(),
1434
- text: msg.text(),
1435
- location: msg.location(),
1436
- }));
541
+ const page = await getPageForTargetId({ cdpUrl, targetId });
542
+ const pageState = ensurePageState(page);
543
+ let messages = pageState.console ?? [];
544
+ if (level)
545
+ messages = messages.filter((msg) => msg.type === level);
1437
546
  return jsonResult({
1438
547
  ok: true,
1439
- targetId: usedTargetId,
1440
- messages: formattedMessages,
548
+ messages: messages.map((msg) => ({
549
+ type: msg.type,
550
+ text: msg.text,
551
+ location: msg.location,
552
+ })),
1441
553
  });
1442
554
  }
1443
555
  case "pdf": {
1444
- const targetId = readStringParam(params, "targetId");
1445
- let page;
1446
- let usedTargetId;
1447
- if (targetId) {
1448
- const existingPage = state.pages.get(targetId);
1449
- if (!existingPage) {
1450
- throw new Error(`Tab ${targetId} not found. Run action=tabs to see available tabs.`);
1451
- }
1452
- page = existingPage;
1453
- usedTargetId = targetId;
1454
- }
1455
- else {
1456
- if (state.pages.size === 0) {
1457
- throw new Error("No tabs open. Use action=open to open a URL first.");
1458
- }
1459
- const firstEntry = Array.from(state.pages.entries())[0];
1460
- page = firstEntry[1];
1461
- usedTargetId = firstEntry[0];
1462
- }
1463
- const path = await import("path");
556
+ const cdpUrl = requireRunning();
557
+ const targetId = resolveTargetId(params);
558
+ const { buffer } = await pdfViaPlaywright({ cdpUrl, targetId });
559
+ const pathMod = await import("path");
560
+ const fs = await import("fs/promises");
1464
561
  const tmpDir = await import("os").then((os) => os.tmpdir());
1465
562
  const filename = `browser-pdf-${Date.now()}.pdf`;
1466
- const filepath = path.join(tmpDir, filename);
1467
- await page.pdf({
1468
- path: filepath,
1469
- format: "A4",
1470
- });
1471
- return {
1472
- content: [
1473
- {
1474
- type: "text",
1475
- text: `PDF saved to ${filepath}`,
1476
- },
1477
- ],
1478
- details: {
1479
- ok: true,
1480
- targetId: usedTargetId,
1481
- path: filepath,
1482
- },
1483
- };
563
+ const filepath = pathMod.join(tmpDir, filename);
564
+ await fs.writeFile(filepath, buffer);
565
+ return { content: [{ type: "text", text: `PDF saved to ${filepath}` }], details: { ok: true, path: filepath } };
1484
566
  }
1485
567
  case "upload": {
568
+ const cdpUrl = requireRunning();
569
+ const targetId = resolveTargetId(params);
1486
570
  const paths = readArrayParam(params, "paths", { required: true });
1487
- const targetId = readStringParam(params, "targetId");
1488
571
  const timeoutMs = readNumberParam(params, "timeoutMs") || 30000;
1489
- let page;
1490
- let usedTargetId;
1491
- if (targetId) {
1492
- const existingPage = state.pages.get(targetId);
1493
- if (!existingPage) {
1494
- throw new Error(`Tab ${targetId} not found. Run action=tabs to see available tabs.`);
1495
- }
1496
- page = existingPage;
1497
- usedTargetId = targetId;
1498
- }
1499
- else {
1500
- if (state.pages.size === 0) {
1501
- throw new Error("No tabs open. Use action=open to open a URL first.");
1502
- }
1503
- const firstEntry = Array.from(state.pages.entries())[0];
1504
- page = firstEntry[1];
1505
- usedTargetId = firstEntry[0];
1506
- }
1507
- // Set up file chooser handler
1508
- const fileChooserPromise = page.waitForEvent("filechooser", { timeout: timeoutMs });
1509
- // Wait for file chooser to appear (user must trigger it)
1510
- const fileChooser = await fileChooserPromise;
1511
- await fileChooser.setFiles(paths);
1512
- return jsonResult({
1513
- ok: true,
1514
- targetId: usedTargetId,
1515
- filesUploaded: paths.length,
1516
- message: `File chooser armed and files uploaded: ${paths.join(", ")}`,
1517
- });
572
+ await armFileUploadViaPlaywright({ cdpUrl, targetId, paths, timeoutMs });
573
+ return jsonResult({ ok: true, filesUploaded: paths.length, message: `Upload armed: ${paths.join(", ")}` });
1518
574
  }
1519
575
  case "dialog": {
576
+ const cdpUrl = requireRunning();
577
+ const targetId = resolveTargetId(params);
1520
578
  const accept = readBooleanParam(params, "accept", true);
1521
579
  const promptText = readStringParam(params, "promptText");
1522
- const targetId = readStringParam(params, "targetId");
1523
580
  const timeoutMs = readNumberParam(params, "timeoutMs") || 30000;
1524
- let page;
1525
- let usedTargetId;
1526
- if (targetId) {
1527
- const existingPage = state.pages.get(targetId);
1528
- if (!existingPage) {
1529
- throw new Error(`Tab ${targetId} not found. Run action=tabs to see available tabs.`);
1530
- }
1531
- page = existingPage;
1532
- usedTargetId = targetId;
1533
- }
1534
- else {
1535
- if (state.pages.size === 0) {
1536
- throw new Error("No tabs open. Use action=open to open a URL first.");
1537
- }
1538
- const firstEntry = Array.from(state.pages.entries())[0];
1539
- page = firstEntry[1];
1540
- usedTargetId = firstEntry[0];
1541
- }
1542
- // Set up dialog handler
1543
- const dialogPromise = page.waitForEvent("dialog", { timeout: timeoutMs });
1544
- // Wait for dialog to appear
1545
- const dialog = await dialogPromise;
1546
- if (accept) {
1547
- await dialog.accept(promptText);
1548
- }
1549
- else {
1550
- await dialog.dismiss();
1551
- }
1552
- return jsonResult({
1553
- ok: true,
1554
- targetId: usedTargetId,
1555
- dialogType: dialog.type(),
1556
- dialogMessage: dialog.message(),
1557
- accepted: accept,
1558
- });
581
+ await armDialogViaPlaywright({ cdpUrl, targetId, accept, promptText: promptText ?? undefined, timeoutMs });
582
+ return jsonResult({ ok: true, message: `Dialog handler armed (accept: ${accept})` });
1559
583
  }
1560
584
  case "close": {
585
+ const cdpUrl = requireRunning();
1561
586
  const targetId = readStringParam(params, "targetId");
1562
- if (!targetId) {
1563
- throw new Error("targetId is required for close action. Run action=tabs to see available targetIds.");
1564
- }
1565
- const page = state.pages.get(targetId);
1566
- if (!page) {
1567
- throw new Error(`Tab ${targetId} not found. Run action=tabs to see available tabs.`);
1568
- }
1569
- await page.close();
1570
- state.pages.delete(targetId);
1571
- state.consoleLogs.delete(targetId);
1572
- return jsonResult({
1573
- targetId,
1574
- message: `Closed tab ${targetId}`,
1575
- remainingTabs: state.pages.size,
1576
- });
587
+ if (!targetId)
588
+ throw new Error("targetId is required for close action");
589
+ await closePageByTargetIdViaPlaywright({ cdpUrl, targetId });
590
+ return jsonResult({ ok: true, message: "Closed tab" });
1577
591
  }
1578
592
  case "act": {
1579
- // Support both OpenClaw-style (request object) and direct parameters
593
+ const cdpUrl = requireRunning();
1580
594
  const request = params.request;
1581
595
  const actParams = request || params;
1582
- // Check if we have act parameters (either kind or any act-specific param)
1583
596
  if (!actParams.kind && !actParams.ref && !actParams.text && !actParams.key) {
1584
- throw new Error("act action requires 'kind' parameter or use 'request' object with act parameters");
597
+ throw new Error("act requires 'kind' parameter");
1585
598
  }
1586
- const targetId = readStringParam(actParams, "targetId");
1587
- // Use helper to get page and track last active
1588
- const { page, usedTargetId } = getPageForAction(targetId ?? undefined);
1589
- const result = await executeBrowserAct(page, actParams, usedTargetId);
1590
- return jsonResult({
1591
- ok: result.ok,
1592
- targetId: usedTargetId,
1593
- url: page.url(),
1594
- result: result.result,
1595
- });
599
+ const targetId = resolveTargetId(actParams);
600
+ const result = await executeBrowserAct(cdpUrl, actParams, targetId);
601
+ const page = await getPageForTargetId({ cdpUrl, targetId });
602
+ return jsonResult({ ok: result.ok, targetId, url: page.url(), result: result.result });
1596
603
  }
1597
604
  default:
1598
- throw new Error(`Unknown action: ${action}. Valid actions: ${BROWSER_TOOL_ACTIONS.join(", ")}`);
605
+ throw new Error(`Unknown action: ${action}. Valid: ${BROWSER_TOOL_ACTIONS.join(", ")}`);
1599
606
  }
1600
607
  }
1601
608
  catch (error) {
1602
609
  const message = error instanceof Error ? error.message : String(error);
1603
- console.error(`[BROWSER] Error in action ${action}:`, message);
1604
- // Return structured error
610
+ console.error(`[BROWSER] Error in ${action}:`, message);
1605
611
  return {
1606
- content: [
1607
- {
1608
- type: "text",
1609
- text: `Browser error in action "${action}": ${message}`,
1610
- },
1611
- ],
1612
- details: {
1613
- error: "browser_error",
1614
- action,
1615
- message,
1616
- },
612
+ content: [{ type: "text", text: `Browser error in "${action}": ${message}` }],
613
+ details: { error: "browser_error", action, message },
1617
614
  };
1618
615
  }
1619
616
  },
1620
617
  };
1621
618
  }
619
+ // ─── Public getters (for REST API) ───────────────────────────────
620
+ /** Returns the sandbox VNC URL if the browser is running in sandbox mode, else null. */
621
+ export function getBrowserVncUrl() {
622
+ if (state.running && state.mode === "sandbox" && state.sandboxState?.vncUrl) {
623
+ return state.sandboxState.vncUrl;
624
+ }
625
+ return null;
626
+ }
1622
627
  // Cleanup on process exit
1623
- process.on("SIGINT", async () => {
1624
- await stopBrowser();
1625
- });
1626
- process.on("SIGTERM", async () => {
1627
- await stopBrowser();
1628
- });
628
+ process.on("SIGINT", async () => { await stopBrowser(); });
629
+ process.on("SIGTERM", async () => { await stopBrowser(); });
1629
630
  //# sourceMappingURL=browser.js.map