@greatlhd/ailo-desktop 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/copy-static.mjs +11 -0
  2. package/dist/browser_control.js +767 -0
  3. package/dist/browser_snapshot.js +174 -0
  4. package/dist/cli.js +36 -0
  5. package/dist/code_executor.js +95 -0
  6. package/dist/config_server.js +658 -0
  7. package/dist/connection_util.js +14 -0
  8. package/dist/constants.js +2 -0
  9. package/dist/desktop_state_store.js +57 -0
  10. package/dist/desktop_types.js +1 -0
  11. package/dist/desktop_verifier.js +40 -0
  12. package/dist/dingtalk-handler.js +173 -0
  13. package/dist/dingtalk-types.js +1 -0
  14. package/dist/email_handler.js +501 -0
  15. package/dist/exec_tool.js +90 -0
  16. package/dist/feishu-handler.js +620 -0
  17. package/dist/feishu-types.js +8 -0
  18. package/dist/feishu-utils.js +162 -0
  19. package/dist/fs_tools.js +398 -0
  20. package/dist/index.js +433 -0
  21. package/dist/mcp/config-manager.js +64 -0
  22. package/dist/mcp/index.js +3 -0
  23. package/dist/mcp/rpc.js +109 -0
  24. package/dist/mcp/session.js +140 -0
  25. package/dist/mcp_manager.js +253 -0
  26. package/dist/mouse_keyboard.js +516 -0
  27. package/dist/qq-handler.js +153 -0
  28. package/dist/qq-types.js +15 -0
  29. package/dist/qq-ws.js +178 -0
  30. package/dist/screenshot.js +271 -0
  31. package/dist/skills_hub.js +212 -0
  32. package/dist/skills_manager.js +103 -0
  33. package/dist/static/AGENTS.md +25 -0
  34. package/dist/static/app.css +539 -0
  35. package/dist/static/app.html +292 -0
  36. package/dist/static/app.js +380 -0
  37. package/dist/static/chat.html +994 -0
  38. package/dist/time_tool.js +22 -0
  39. package/dist/utils.js +15 -0
  40. package/package.json +38 -0
  41. package/src/browser_control.ts +739 -0
  42. package/src/browser_snapshot.ts +196 -0
  43. package/src/cli.ts +44 -0
  44. package/src/code_executor.ts +101 -0
  45. package/src/config_server.ts +723 -0
  46. package/src/connection_util.ts +23 -0
  47. package/src/constants.ts +2 -0
  48. package/src/desktop_state_store.ts +64 -0
  49. package/src/desktop_types.ts +44 -0
  50. package/src/desktop_verifier.ts +45 -0
  51. package/src/dingtalk-types.ts +26 -0
  52. package/src/exec_tool.ts +93 -0
  53. package/src/feishu-handler.ts +722 -0
  54. package/src/feishu-types.ts +66 -0
  55. package/src/feishu-utils.ts +174 -0
  56. package/src/fs_tools.ts +411 -0
  57. package/src/index.ts +474 -0
  58. package/src/mcp/config-manager.ts +85 -0
  59. package/src/mcp/index.ts +7 -0
  60. package/src/mcp/rpc.ts +131 -0
  61. package/src/mcp/session.ts +182 -0
  62. package/src/mcp_manager.ts +273 -0
  63. package/src/mouse_keyboard.ts +526 -0
  64. package/src/qq-types.ts +49 -0
  65. package/src/qq-ws.ts +223 -0
  66. package/src/screenshot.ts +297 -0
  67. package/src/static/app.css +539 -0
  68. package/src/static/app.html +292 -0
  69. package/src/static/app.js +380 -0
  70. package/src/static/chat.html +994 -0
  71. package/src/time_tool.ts +24 -0
  72. package/src/utils.ts +22 -0
  73. package/tsconfig.json +13 -0
@@ -0,0 +1,739 @@
1
+ /**
2
+ * Browser automation tool using Playwright (Node.js).
3
+ * Single tool with action-based API: start, stop, open, navigate, snapshot,
4
+ * screenshot, click, type, eval, evaluate, resize, handle_dialog, file_upload,
5
+ * fill_form, press_key, drag, hover, select_option, tabs, wait_for, pdf, close.
6
+ */
7
+
8
+ import { writeFile } from "fs/promises";
9
+ import { spawn } from "child_process";
10
+ import {
11
+ type BrowserContext,
12
+ type Browser,
13
+ type Page,
14
+ type ConsoleMessage,
15
+ type Request,
16
+ type Response,
17
+ type Dialog,
18
+ type FileChooser,
19
+ chromium,
20
+ } from "playwright";
21
+ import { buildRoleSnapshotFromAria, type RefInfo } from "./browser_snapshot.js";
22
+ import type { ContentPart } from "@greatlhd/ailo-endpoint-sdk";
23
+
24
+ interface BrowserState {
25
+ browser: Browser | null;
26
+ context: BrowserContext | null;
27
+ pages: Map<string, Page>;
28
+ refs: Map<string, Map<string, RefInfo>>;
29
+ refsFrame: Map<string, string>;
30
+ consoleLogs: Map<string, Array<{ level: string; text: string }>>;
31
+ networkRequests: Map<string, Array<{ url: string; method: string; resourceType?: string; status?: number }>>;
32
+ pendingDialogs: Map<string, Dialog[]>;
33
+ pendingFileChoosers: Map<string, FileChooser[]>;
34
+ headless: boolean;
35
+ currentPageId: string | null;
36
+ pageCounter: number;
37
+ }
38
+
39
+ const state: BrowserState = {
40
+ browser: null,
41
+ context: null,
42
+ pages: new Map(),
43
+ refs: new Map(),
44
+ refsFrame: new Map(),
45
+ consoleLogs: new Map(),
46
+ networkRequests: new Map(),
47
+ pendingDialogs: new Map(),
48
+ pendingFileChoosers: new Map(),
49
+ headless: true,
50
+ currentPageId: null,
51
+ pageCounter: 0,
52
+ };
53
+
54
+ function ok(data: Record<string, unknown>): ContentPart[] {
55
+ return [{ type: "text", text: JSON.stringify({ ok: true, ...data }, null, 2) }];
56
+ }
57
+ function fail(error: string): ContentPart[] {
58
+ return [{ type: "text", text: JSON.stringify({ ok: false, error }, null, 2) }];
59
+ }
60
+
61
+ function nextPageId(): string {
62
+ return `page_${++state.pageCounter}`;
63
+ }
64
+
65
+ function getPage(pageId: string): Page | undefined {
66
+ return state.pages.get(pageId);
67
+ }
68
+
69
+ function getRefs(pageId: string): Map<string, RefInfo> {
70
+ if (!state.refs.has(pageId)) state.refs.set(pageId, new Map());
71
+ return state.refs.get(pageId)!;
72
+ }
73
+
74
+ function getRoot(page: Page, frameSelector?: string) {
75
+ if (frameSelector?.trim()) return page.frameLocator(frameSelector.trim());
76
+ return page;
77
+ }
78
+
79
+ function getLocatorByRef(page: Page, pageId: string, ref: string, frameSelector?: string) {
80
+ const refs = getRefs(pageId);
81
+ const info = refs.get(ref);
82
+ if (!info) return null;
83
+ const root = getRoot(page, frameSelector);
84
+ const role: string = info.role ?? "";
85
+ if (!role) return null;
86
+ let locator = root.getByRole(role as any, { name: info.name || undefined });
87
+ if (info.nth !== undefined && info.nth > 0) locator = locator.nth(info.nth);
88
+ return locator;
89
+ }
90
+
91
+ function attachPageListeners(page: Page, pageId: string): void {
92
+ const logs: Array<{ level: string; text: string }> = [];
93
+ state.consoleLogs.set(pageId, logs);
94
+ page.on("console", (msg: ConsoleMessage) => logs.push({ level: msg.type(), text: msg.text() }));
95
+
96
+ const reqs: Array<{ url: string; method: string; resourceType?: string; status?: number }> = [];
97
+ state.networkRequests.set(pageId, reqs);
98
+ page.on("request", (req: Request) => reqs.push({ url: req.url(), method: req.method(), resourceType: req.resourceType() }));
99
+ page.on("response", (res: Response) => {
100
+ const r = reqs.find((x) => x.url === res.url() && x.status === undefined);
101
+ if (r) r.status = res.status();
102
+ });
103
+
104
+ const dialogs: Dialog[] = [];
105
+ state.pendingDialogs.set(pageId, dialogs);
106
+ page.on("dialog", (d: Dialog) => dialogs.push(d));
107
+
108
+ const choosers: FileChooser[] = [];
109
+ state.pendingFileChoosers.set(pageId, choosers);
110
+ page.on("filechooser", (c: FileChooser) => choosers.push(c));
111
+ }
112
+
113
+ function attachContextListeners(context: BrowserContext): void {
114
+ context.on("page", (page: Page) => {
115
+ const newId = nextPageId();
116
+ state.refs.set(newId, new Map());
117
+ attachPageListeners(page, newId);
118
+ state.pages.set(newId, page);
119
+ state.currentPageId = newId;
120
+ });
121
+ }
122
+
123
+ function resetState(): void {
124
+ state.browser = null;
125
+ state.context = null;
126
+ state.pages.clear();
127
+ state.refs.clear();
128
+ state.refsFrame.clear();
129
+ state.consoleLogs.clear();
130
+ state.networkRequests.clear();
131
+ state.pendingDialogs.clear();
132
+ state.pendingFileChoosers.clear();
133
+ state.currentPageId = null;
134
+ state.pageCounter = 0;
135
+ state.headless = true;
136
+ }
137
+
138
+ function parseJson(value: unknown, fallback: unknown = undefined): unknown {
139
+ if (typeof value !== "string" || !value.trim()) return fallback;
140
+ try { return JSON.parse(value); } catch { return fallback; }
141
+ }
142
+
143
+ // --- Actions ---
144
+
145
+ async function actionStart(headed: boolean): Promise<ContentPart[]> {
146
+ if (state.browser) {
147
+ if (headed && state.headless) {
148
+ try { await state.browser.close(); } catch {}
149
+ resetState();
150
+ // 等待进程真正退出,避免端口冲突
151
+ await new Promise((r) => setTimeout(r, 500));
152
+ } else {
153
+ return ok({ message: "Browser already running" });
154
+ }
155
+ }
156
+ state.headless = !headed;
157
+ try {
158
+ const browser = await chromium.launch({ headless: state.headless });
159
+ const context = await browser.newContext();
160
+ attachContextListeners(context);
161
+ state.browser = browser;
162
+ state.context = context;
163
+ return ok({ message: headed ? "Browser started (visible window)" : "Browser started" });
164
+ } catch (e: any) {
165
+ return fail(`Browser start failed: ${e.message}`);
166
+ }
167
+ }
168
+
169
+ async function actionStop(): Promise<ContentPart[]> {
170
+ if (!state.browser) return ok({ message: "Browser not running" });
171
+ try { await state.browser.close(); } catch {}
172
+ resetState();
173
+ return ok({ message: "Browser stopped" });
174
+ }
175
+
176
+ async function ensureBrowser(): Promise<boolean> {
177
+ if (state.browser && state.context) return true;
178
+ if (state.browser) {
179
+ try { await state.browser.close(); } catch {}
180
+ resetState();
181
+ }
182
+ try {
183
+ const browser = await chromium.launch({ headless: state.headless });
184
+ const context = await browser.newContext();
185
+ attachContextListeners(context);
186
+ state.browser = browser;
187
+ state.context = context;
188
+ return true;
189
+ } catch { return false; }
190
+ }
191
+
192
+ async function actionOpen(url: string, pageId: string): Promise<ContentPart[]> {
193
+ if (!url?.trim()) return fail("url required for open");
194
+ if (!await ensureBrowser()) return fail("Browser not started");
195
+ try {
196
+ const page = await state.context!.newPage();
197
+ state.refs.set(pageId, new Map());
198
+ attachPageListeners(page, pageId);
199
+ await page.goto(url);
200
+ state.pages.set(pageId, page);
201
+ state.currentPageId = pageId;
202
+ return ok({ message: `Opened ${url}`, page_id: pageId, url });
203
+ } catch (e: any) {
204
+ return fail(`Open failed: ${e.message}`);
205
+ }
206
+ }
207
+
208
+ async function actionNavigate(url: string, pageId: string): Promise<ContentPart[]> {
209
+ if (!url?.trim()) return fail("url required for navigate");
210
+ const page = getPage(pageId);
211
+ if (!page) return fail(`Page '${pageId}' not found`);
212
+ try {
213
+ await page.goto(url);
214
+ state.currentPageId = pageId;
215
+ return ok({ message: `Navigated to ${url}`, url: page.url() });
216
+ } catch (e: any) {
217
+ return fail(`Navigate failed: ${e.message}`);
218
+ }
219
+ }
220
+
221
+ async function actionNavigateBack(pageId: string): Promise<ContentPart[]> {
222
+ const page = getPage(pageId);
223
+ if (!page) return fail(`Page '${pageId}' not found`);
224
+ try {
225
+ await page.goBack();
226
+ return ok({ message: "Navigated back", url: page.url() });
227
+ } catch (e: any) {
228
+ return fail(`Navigate back failed: ${e.message}`);
229
+ }
230
+ }
231
+
232
+ async function actionSnapshot(pageId: string, filename?: string, frameSelector?: string): Promise<ContentPart[]> {
233
+ const page = getPage(pageId);
234
+ if (!page) return fail(`Page '${pageId}' not found`);
235
+ try {
236
+ const root = getRoot(page, frameSelector);
237
+ const locator = root.locator(":root");
238
+ const raw = await locator.ariaSnapshot();
239
+ const { snapshot, refs } = buildRoleSnapshotFromAria(raw ?? "");
240
+ state.refs.set(pageId, refs);
241
+ state.refsFrame.set(pageId, frameSelector?.trim() ?? "");
242
+
243
+ const out: Record<string, unknown> = { snapshot, refs: [...refs.keys()], url: page.url() };
244
+ if (frameSelector?.trim()) out.frame_selector = frameSelector.trim();
245
+ if (filename?.trim()) {
246
+ await writeFile(filename.trim(), snapshot, "utf-8");
247
+ out.filename = filename.trim();
248
+ }
249
+ return ok(out);
250
+ } catch (e: any) {
251
+ return fail(`Snapshot failed: ${e.message}`);
252
+ }
253
+ }
254
+
255
+ async function actionScreenshot(
256
+ pageId: string, path?: string, fullPage = false,
257
+ screenshotType = "png", ref?: string, frameSelector?: string,
258
+ ): Promise<ContentPart[]> {
259
+ if (!path?.trim()) path = `page-${Date.now()}.${screenshotType === "jpeg" ? "jpeg" : "png"}`;
260
+ const page = getPage(pageId);
261
+ if (!page) return fail(`Page '${pageId}' not found`);
262
+ try {
263
+ const type = screenshotType === "jpeg" ? "jpeg" : "png";
264
+ if (ref?.trim()) {
265
+ const locator = getLocatorByRef(page, pageId, ref.trim(), frameSelector);
266
+ if (!locator) return fail(`Unknown ref: ${ref}`);
267
+ await locator.screenshot({ path, type });
268
+ } else if (frameSelector?.trim()) {
269
+ const root = getRoot(page, frameSelector);
270
+ await root.locator("body").first().screenshot({ path, type });
271
+ } else {
272
+ await page.screenshot({ path, fullPage, type });
273
+ }
274
+ return ok({ message: `Screenshot saved to ${path}`, path });
275
+ } catch (e: any) {
276
+ return fail(`Screenshot failed: ${e.message}`);
277
+ }
278
+ }
279
+
280
+ async function actionClick(
281
+ pageId: string, selector?: string, ref?: string,
282
+ wait = 0, doubleClick = false, button = "left",
283
+ modifiersJson?: string, frameSelector?: string,
284
+ ): Promise<ContentPart[]> {
285
+ if (!ref?.trim() && !selector?.trim()) return fail("selector or ref required for click");
286
+ const page = getPage(pageId);
287
+ if (!page) return fail(`Page '${pageId}' not found`);
288
+ try {
289
+ if (wait > 0) await new Promise((r) => setTimeout(r, wait));
290
+ const mods = (parseJson(modifiersJson, []) as string[]) ?? [];
291
+ const validMods = mods.filter((m) => ["Alt", "Control", "ControlOrMeta", "Meta", "Shift"].includes(m));
292
+ const opts: any = { button: ["left", "right", "middle"].includes(button) ? button : "left" };
293
+ if (validMods.length) opts.modifiers = validMods;
294
+
295
+ if (ref?.trim()) {
296
+ const locator = getLocatorByRef(page, pageId, ref.trim(), frameSelector);
297
+ if (!locator) return fail(`Unknown ref: ${ref}`);
298
+ if (doubleClick) await locator.dblclick(opts); else await locator.click(opts);
299
+ } else {
300
+ const root = getRoot(page, frameSelector);
301
+ const locator = root.locator(selector!).first();
302
+ if (doubleClick) await locator.dblclick(opts); else await locator.click(opts);
303
+ }
304
+ return ok({ message: `Clicked ${ref || selector}` });
305
+ } catch (e: any) {
306
+ return fail(`Click failed: ${e.message}`);
307
+ }
308
+ }
309
+
310
+ async function actionType(
311
+ pageId: string, selector?: string, ref?: string,
312
+ text = "", submit = false, slowly = false, frameSelector?: string,
313
+ ): Promise<ContentPart[]> {
314
+ if (!ref?.trim() && !selector?.trim()) return fail("selector or ref required for type");
315
+ const page = getPage(pageId);
316
+ if (!page) return fail(`Page '${pageId}' not found`);
317
+ try {
318
+ let locator;
319
+ if (ref?.trim()) {
320
+ locator = getLocatorByRef(page, pageId, ref.trim(), frameSelector);
321
+ if (!locator) return fail(`Unknown ref: ${ref}`);
322
+ } else {
323
+ const root = getRoot(page, frameSelector);
324
+ locator = root.locator(selector!).first();
325
+ }
326
+ if (slowly) await locator.pressSequentially(text); else await locator.fill(text);
327
+ if (submit) await locator.press("Enter");
328
+ return ok({ message: `Typed into ${ref || selector}` });
329
+ } catch (e: any) {
330
+ return fail(`Type failed: ${e.message}`);
331
+ }
332
+ }
333
+
334
+ async function actionEval(pageId: string, code: string): Promise<ContentPart[]> {
335
+ if (!code?.trim()) return fail("code required for eval");
336
+ const page = getPage(pageId);
337
+ if (!page) return fail(`Page '${pageId}' not found`);
338
+ try {
339
+ const trimmed = code.trim();
340
+ const startsWithControlFlow = /^(if|for|while|switch|try|catch|return|const|let|var|function|class|async|await)\b/m.test(trimmed);
341
+ const expr = startsWithControlFlow
342
+ ? `() => { ${trimmed} }`
343
+ : (trimmed.startsWith("(") || trimmed.startsWith("function") ? trimmed : `() => { return (${trimmed}); }`);
344
+ const result = await page.evaluate(expr);
345
+ return ok({ result });
346
+ } catch (e: any) {
347
+ return fail(`Eval failed: ${e.message}`);
348
+ }
349
+ }
350
+
351
+ async function actionEvaluate(
352
+ pageId: string, code: string, ref?: string, frameSelector?: string,
353
+ ): Promise<ContentPart[]> {
354
+ if (!code?.trim()) return fail("code required for evaluate");
355
+ const page = getPage(pageId);
356
+ if (!page) return fail(`Page '${pageId}' not found`);
357
+ try {
358
+ let result;
359
+ if (ref?.trim()) {
360
+ const locator = getLocatorByRef(page, pageId, ref.trim(), frameSelector);
361
+ if (!locator) return fail(`Unknown ref: ${ref}`);
362
+ result = await locator.evaluate(code);
363
+ } else {
364
+ const trimmed = code.trim();
365
+ const startsWithControlFlow = /^(if|for|while|switch|try|catch|return|const|let|var|function|class|async|await)\b/m.test(trimmed);
366
+ const expr = startsWithControlFlow
367
+ ? `() => { ${trimmed} }`
368
+ : (trimmed.startsWith("(") || trimmed.startsWith("function") ? trimmed : `() => { return (${trimmed}); }`);
369
+ result = await page.evaluate(expr);
370
+ }
371
+ return ok({ result });
372
+ } catch (e: any) {
373
+ return fail(`Evaluate failed: ${e.message}`);
374
+ }
375
+ }
376
+
377
+ async function actionResize(pageId: string, width: number, height: number): Promise<ContentPart[]> {
378
+ if (width <= 0 || height <= 0) return fail("width and height must be positive");
379
+ const page = getPage(pageId);
380
+ if (!page) return fail(`Page '${pageId}' not found`);
381
+ try {
382
+ await page.setViewportSize({ width, height });
383
+ return ok({ message: `Resized to ${width}x${height}` });
384
+ } catch (e: any) {
385
+ return fail(`Resize failed: ${e.message}`);
386
+ }
387
+ }
388
+
389
+ async function actionConsoleMessages(pageId: string, level = "info", filename?: string): Promise<ContentPart[]> {
390
+ const page = getPage(pageId);
391
+ if (!page) return fail(`Page '${pageId}' not found`);
392
+ const order = ["error", "warning", "info", "debug"];
393
+ const idx = order.indexOf(level.toLowerCase());
394
+ const logs = state.consoleLogs.get(pageId) ?? [];
395
+ const filtered = idx >= 0 ? logs.filter((m) => order.indexOf(m.level) <= idx) : logs;
396
+ const text = filtered.map((m) => `[${m.level}] ${m.text}`).join("\n");
397
+ if (filename?.trim()) {
398
+ await writeFile(filename.trim(), text, "utf-8");
399
+ return ok({ message: `Console messages saved to ${filename}`, filename: filename.trim() });
400
+ }
401
+ return ok({ messages: filtered, text });
402
+ }
403
+
404
+ async function actionHandleDialog(pageId: string, accept = true, promptText?: string): Promise<ContentPart[]> {
405
+ const page = getPage(pageId);
406
+ if (!page) return fail(`Page '${pageId}' not found`);
407
+ const dialogs = state.pendingDialogs.get(pageId) ?? [];
408
+ if (!dialogs.length) return fail("No pending dialog");
409
+ try {
410
+ const dialog = dialogs.shift();
411
+ if (!dialog) return fail("No pending dialog");
412
+ if (accept) {
413
+ if (promptText) await dialog.accept(promptText); else await dialog.accept();
414
+ } else {
415
+ await dialog.dismiss();
416
+ }
417
+ return ok({ message: "Dialog handled" });
418
+ } catch (e: unknown) {
419
+ return fail(`Handle dialog failed: ${e instanceof Error ? e.message : String(e)}`);
420
+ }
421
+ }
422
+
423
+ async function actionFileUpload(pageId: string, pathsJson?: string): Promise<ContentPart[]> {
424
+ const page = getPage(pageId);
425
+ if (!page) return fail(`Page '${pageId}' not found`);
426
+ const paths = (parseJson(pathsJson, []) as string[]) ?? [];
427
+ const choosers = state.pendingFileChoosers.get(pageId) ?? [];
428
+ if (!choosers.length) return fail("No chooser. Click upload then file_upload.");
429
+ try {
430
+ const chooser = choosers.shift();
431
+ if (!chooser) return fail("No file chooser");
432
+ await chooser.setFiles(paths);
433
+ return ok({ message: paths.length ? `Uploaded ${paths.length} file(s)` : "File chooser cancelled" });
434
+ } catch (e: unknown) {
435
+ return fail(`File upload failed: ${e instanceof Error ? e.message : String(e)}`);
436
+ }
437
+ }
438
+
439
+ async function actionFillForm(pageId: string, fieldsJson?: string): Promise<ContentPart[]> {
440
+ const page = getPage(pageId);
441
+ if (!page) return fail(`Page '${pageId}' not found`);
442
+ const fields = parseJson(fieldsJson, []) as Array<{ ref: string; type?: string; value: unknown }>;
443
+ if (!Array.isArray(fields) || !fields.length) return fail("fields required (JSON array)");
444
+ const refs = getRefs(pageId);
445
+ const frame = state.refsFrame.get(pageId) ?? "";
446
+ try {
447
+ for (const f of fields) {
448
+ const refId = f.ref?.trim();
449
+ if (!refId || !refs.has(refId)) continue;
450
+ const locator = getLocatorByRef(page, pageId, refId, frame);
451
+ if (!locator) continue;
452
+ const fieldType = (f.type ?? "textbox").toLowerCase();
453
+ if (fieldType === "checkbox") {
454
+ const checked = typeof f.value === "string"
455
+ ? ["true", "1", "yes"].includes(f.value.toLowerCase()) : !!f.value;
456
+ await locator.setChecked(checked);
457
+ } else if (fieldType === "radio") {
458
+ await locator.setChecked(true);
459
+ } else if (fieldType === "combobox") {
460
+ await locator.selectOption(typeof f.value === "string" ? { label: f.value } : { value: String(f.value) });
461
+ } else {
462
+ await locator.fill(f.value != null ? String(f.value) : "");
463
+ }
464
+ }
465
+ return ok({ message: `Filled ${fields.length} field(s)` });
466
+ } catch (e: any) {
467
+ return fail(`Fill form failed: ${e.message}`);
468
+ }
469
+ }
470
+
471
+ async function actionPressKey(pageId: string, key: string): Promise<ContentPart[]> {
472
+ if (!key?.trim()) return fail("key required for press_key");
473
+ const page = getPage(pageId);
474
+ if (!page) return fail(`Page '${pageId}' not found`);
475
+ try {
476
+ await page.keyboard.press(key.trim());
477
+ return ok({ message: `Pressed key ${key}` });
478
+ } catch (e: any) {
479
+ return fail(`Press key failed: ${e.message}`);
480
+ }
481
+ }
482
+
483
+ async function actionNetworkRequests(pageId: string, includeStatic = false, filename?: string): Promise<ContentPart[]> {
484
+ const page = getPage(pageId);
485
+ if (!page) return fail(`Page '${pageId}' not found`);
486
+ let reqs = state.networkRequests.get(pageId) ?? [];
487
+ if (!includeStatic) {
488
+ const staticTypes = new Set(["image", "stylesheet", "font", "media"]);
489
+ reqs = reqs.filter((r) => !staticTypes.has(r.resourceType ?? ""));
490
+ }
491
+ const text = reqs.map((r) => `${r.method} ${r.url} ${r.status ?? ""}`).join("\n");
492
+ if (filename?.trim()) {
493
+ await writeFile(filename.trim(), text, "utf-8");
494
+ return ok({ message: `Network requests saved to ${filename}`, filename: filename.trim() });
495
+ }
496
+ return ok({ requests: reqs, text });
497
+ }
498
+
499
+ async function actionDrag(
500
+ pageId: string, startRef?: string, endRef?: string,
501
+ startSelector?: string, endSelector?: string, frameSelector?: string,
502
+ ): Promise<ContentPart[]> {
503
+ const useRefs = !!(startRef?.trim() && endRef?.trim());
504
+ const useSelectors = !!(startSelector?.trim() && endSelector?.trim());
505
+ if (!useRefs && !useSelectors) return fail("drag needs (start_ref,end_ref) or (start_selector,end_selector)");
506
+ const page = getPage(pageId);
507
+ if (!page) return fail(`Page '${pageId}' not found`);
508
+ try {
509
+ let startLocator, endLocator;
510
+ if (useRefs) {
511
+ startLocator = getLocatorByRef(page, pageId, startRef!.trim(), frameSelector);
512
+ endLocator = getLocatorByRef(page, pageId, endRef!.trim(), frameSelector);
513
+ if (!startLocator || !endLocator) return fail("Unknown ref for drag");
514
+ } else {
515
+ const root = getRoot(page, frameSelector);
516
+ startLocator = root.locator(startSelector!).first();
517
+ endLocator = root.locator(endSelector!).first();
518
+ }
519
+ await startLocator.dragTo(endLocator);
520
+ return ok({ message: "Drag completed" });
521
+ } catch (e: any) {
522
+ return fail(`Drag failed: ${e.message}`);
523
+ }
524
+ }
525
+
526
+ async function actionHover(
527
+ pageId: string, ref?: string, selector?: string, frameSelector?: string,
528
+ ): Promise<ContentPart[]> {
529
+ if (!ref?.trim() && !selector?.trim()) return fail("hover requires ref or selector");
530
+ const page = getPage(pageId);
531
+ if (!page) return fail(`Page '${pageId}' not found`);
532
+ try {
533
+ let locator;
534
+ if (ref?.trim()) {
535
+ locator = getLocatorByRef(page, pageId, ref.trim(), frameSelector);
536
+ if (!locator) return fail(`Unknown ref: ${ref}`);
537
+ } else {
538
+ const root = getRoot(page, frameSelector);
539
+ locator = root.locator(selector!).first();
540
+ }
541
+ await locator.hover();
542
+ return ok({ message: `Hovered ${ref || selector}` });
543
+ } catch (e: any) {
544
+ return fail(`Hover failed: ${e.message}`);
545
+ }
546
+ }
547
+
548
+ async function actionSelectOption(
549
+ pageId: string, ref?: string, valuesJson?: string, frameSelector?: string,
550
+ ): Promise<ContentPart[]> {
551
+ if (!ref?.trim()) return fail("ref required for select_option");
552
+ const values = parseJson(valuesJson, []) as string[];
553
+ if (!Array.isArray(values) || !values.length) return fail("values required (JSON array or comma-separated)");
554
+ const page = getPage(pageId);
555
+ if (!page) return fail(`Page '${pageId}' not found`);
556
+ try {
557
+ const locator = getLocatorByRef(page, pageId, ref!.trim(), frameSelector);
558
+ if (!locator) return fail(`Unknown ref: ${ref}`);
559
+ await locator.selectOption(values);
560
+ return ok({ message: `Selected ${JSON.stringify(values)}` });
561
+ } catch (e: any) {
562
+ return fail(`Select option failed: ${e.message}`);
563
+ }
564
+ }
565
+
566
+ async function actionTabs(pageId: string, tabAction: string, index: number): Promise<ContentPart[]> {
567
+ if (!tabAction?.trim()) return fail("tab_action required (list, new, close, select)");
568
+ const pageIds = [...state.pages.keys()];
569
+ const act = tabAction.trim().toLowerCase();
570
+
571
+ if (act === "list") return ok({ tabs: pageIds, count: pageIds.length });
572
+
573
+ if (act === "new") {
574
+ if (!state.context && !await ensureBrowser()) return fail("Browser not started");
575
+ try {
576
+ const page = await state.context!.newPage();
577
+ const newId = nextPageId();
578
+ state.refs.set(newId, new Map());
579
+ attachPageListeners(page, newId);
580
+ state.pages.set(newId, page);
581
+ state.currentPageId = newId;
582
+ return ok({ page_id: newId, tabs: [...state.pages.keys()] });
583
+ } catch (e: any) {
584
+ return fail(`New tab failed: ${e.message}`);
585
+ }
586
+ }
587
+
588
+ const targetId = index >= 0 && index < pageIds.length ? pageIds[index] : pageId;
589
+ if (act === "close") return actionClose(targetId);
590
+ if (act === "select") {
591
+ if (!state.pages.has(targetId)) return fail(`Page '${targetId}' not found`);
592
+ state.currentPageId = targetId;
593
+ return ok({ message: `Use page_id=${targetId} for later actions`, page_id: targetId });
594
+ }
595
+ return fail(`Unknown tab_action: ${tabAction}`);
596
+ }
597
+
598
+ async function actionWaitFor(pageId: string, waitTime: number, text?: string, textGone?: string): Promise<ContentPart[]> {
599
+ const page = getPage(pageId);
600
+ if (!page) return fail(`Page '${pageId}' not found`);
601
+ try {
602
+ if (waitTime > 0) await new Promise((r) => setTimeout(r, waitTime * 1000));
603
+ if (text?.trim()) await page.getByText(text.trim()).waitFor({ state: "visible", timeout: 30000 });
604
+ if (textGone?.trim()) await page.getByText(textGone.trim()).waitFor({ state: "hidden", timeout: 30000 });
605
+ return ok({ message: "Wait completed" });
606
+ } catch (e: any) {
607
+ return fail(`Wait failed: ${e.message}`);
608
+ }
609
+ }
610
+
611
+ async function actionPdf(pageId: string, path?: string): Promise<ContentPart[]> {
612
+ const p = path?.trim() || "page.pdf";
613
+ const page = getPage(pageId);
614
+ if (!page) return fail(`Page '${pageId}' not found`);
615
+ try {
616
+ await page.pdf({ path: p });
617
+ return ok({ message: `PDF saved to ${p}`, path: p });
618
+ } catch (e: any) {
619
+ return fail(`PDF failed: ${e.message}`);
620
+ }
621
+ }
622
+
623
+ async function actionClose(pageId: string): Promise<ContentPart[]> {
624
+ const page = getPage(pageId);
625
+ if (!page) return fail(`Page '${pageId}' not found`);
626
+ try {
627
+ await page.removeAllListeners();
628
+ await page.close();
629
+ state.pages.delete(pageId);
630
+ state.refs.delete(pageId);
631
+ state.refsFrame.delete(pageId);
632
+ state.consoleLogs.delete(pageId);
633
+ state.networkRequests.delete(pageId);
634
+ state.pendingDialogs.delete(pageId);
635
+ state.pendingFileChoosers.delete(pageId);
636
+ if (state.currentPageId === pageId) {
637
+ const remaining = [...state.pages.keys()];
638
+ state.currentPageId = remaining[0] ?? null;
639
+ }
640
+ return ok({ message: `Closed page '${pageId}'` });
641
+ } catch (e: any) {
642
+ return fail(`Close failed: ${e.message}`);
643
+ }
644
+ }
645
+
646
+ // --- Main entry ---
647
+
648
+ export async function browserUse(args: Record<string, unknown>): Promise<ContentPart[]> {
649
+ const action = (String(args.action ?? "")).trim().toLowerCase();
650
+ if (!action) return fail("action required");
651
+
652
+ const rawPageId = (String(args.page_id ?? "")).trim();
653
+ const pageId = rawPageId.length > 0 && rawPageId !== "undefined"
654
+ ? rawPageId
655
+ : (state.currentPageId && state.pages.has(state.currentPageId) ? state.currentPageId : "default");
656
+
657
+ try {
658
+ switch (action) {
659
+ case "start": return await actionStart(!!args.headed);
660
+ case "stop": return await actionStop();
661
+ case "open": return await actionOpen(String(args.url ?? ""), pageId);
662
+ case "navigate": return await actionNavigate(String(args.url ?? ""), pageId);
663
+ case "navigate_back": return await actionNavigateBack(pageId);
664
+ case "snapshot": return await actionSnapshot(pageId, args.filename as string, args.frame_selector as string);
665
+ case "screenshot":
666
+ case "take_screenshot":
667
+ return await actionScreenshot(
668
+ pageId, args.path as string ?? args.filename as string,
669
+ !!args.full_page, String(args.screenshot_type ?? "png"),
670
+ args.ref as string, args.frame_selector as string,
671
+ );
672
+ case "click":
673
+ return await actionClick(
674
+ pageId, args.selector as string, args.ref as string,
675
+ Number(args.wait ?? 0), !!args.double_click,
676
+ String(args.button ?? "left"), args.modifiers_json as string, args.frame_selector as string,
677
+ );
678
+ case "type":
679
+ return await actionType(
680
+ pageId, args.selector as string, args.ref as string,
681
+ String(args.text ?? ""), !!args.submit, !!args.slowly, args.frame_selector as string,
682
+ );
683
+ case "eval": return await actionEval(pageId, String(args.code ?? ""));
684
+ case "evaluate":
685
+ return await actionEvaluate(pageId, String(args.code ?? ""), args.ref as string, args.frame_selector as string);
686
+ case "resize": return await actionResize(pageId, Number(args.width ?? 0), Number(args.height ?? 0));
687
+ case "console_messages":
688
+ return await actionConsoleMessages(pageId, String(args.level ?? "info"), args.filename as string ?? args.path as string);
689
+ case "handle_dialog":
690
+ return await actionHandleDialog(pageId, args.accept !== false, args.prompt_text as string);
691
+ case "file_upload": return await actionFileUpload(pageId, args.paths_json as string);
692
+ case "fill_form": return await actionFillForm(pageId, args.fields_json as string);
693
+ case "press_key": return await actionPressKey(pageId, String(args.key ?? ""));
694
+ case "network_requests":
695
+ return await actionNetworkRequests(pageId, !!args.include_static, args.filename as string ?? args.path as string);
696
+ case "drag":
697
+ return await actionDrag(
698
+ pageId, args.start_ref as string, args.end_ref as string,
699
+ args.start_selector as string, args.end_selector as string, args.frame_selector as string,
700
+ );
701
+ case "hover": return await actionHover(pageId, args.ref as string, args.selector as string, args.frame_selector as string);
702
+ case "select_option":
703
+ return await actionSelectOption(pageId, args.ref as string, args.values_json as string, args.frame_selector as string);
704
+ case "tabs": return await actionTabs(pageId, String(args.tab_action ?? ""), Number(args.index ?? -1));
705
+ case "wait_for":
706
+ return await actionWaitFor(pageId, Number(args.wait_time ?? 0), args.text as string, args.text_gone as string);
707
+ case "pdf": return await actionPdf(pageId, args.path as string);
708
+ case "close": return await actionClose(pageId);
709
+ case "install": return await actionInstall();
710
+ default: return fail(`Unknown action: ${action}`);
711
+ }
712
+ } catch (e: any) {
713
+ return fail(e.message);
714
+ }
715
+ }
716
+
717
+ async function actionInstall(): Promise<ContentPart[]> {
718
+ return new Promise((resolve) => {
719
+ const child = spawn("npx", ["playwright", "install", "chromium"], {
720
+ stdio: "pipe",
721
+ timeout: 120000,
722
+ });
723
+ let output = "";
724
+ child.stdout?.on("data", (d: Buffer) => { output += d.toString(); });
725
+ child.stderr?.on("data", (d: Buffer) => { output += d.toString(); });
726
+ child.on("close", (code: number) => {
727
+ if (code === 0) resolve(ok({ message: "Chromium browser installed" }));
728
+ else resolve(fail(`Install failed (exit ${code}): ${output}`));
729
+ });
730
+ child.on("error", (e: Error) => resolve(fail(`Install failed: ${e.message}`)));
731
+ });
732
+ }
733
+
734
+ export async function stopBrowser(): Promise<void> {
735
+ if (state.browser) {
736
+ try { await state.browser.close(); } catch {}
737
+ resetState();
738
+ }
739
+ }