floorp-mcp 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,681 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * floorp-mcp — an MCP server that drives the Floorp browser through its
4
+ * built-in automation API (http://127.0.0.1:58261, gated by
5
+ * `floorp.mcp.enabled` in about:config).
6
+ *
7
+ * MVP tool surface: tab management, page reading, and screenshots, operating on
8
+ * the user's real, logged-in session.
9
+ */
10
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
11
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
12
+ import { z } from "zod";
13
+ import { FloorpClient } from "./floorp-client.js";
14
+ import { realType, realKey, realClear, moveCursor, realClick, floorpWindowBounds } from "./os-input.js";
15
+ import { launchFloorp } from "./launch.js";
16
+ import { PRIVILEGED_SCHEME, assertNavigableUrl, assertUploadAllowed } from "./guards.js";
17
+ import { findInHtml } from "./html-find.js";
18
+ const client = new FloorpClient();
19
+ const server = new McpServer({
20
+ name: "floorp-mcp",
21
+ version: "1.5.0",
22
+ });
23
+ // -- helpers ------------------------------------------------------------------
24
+ // URL/upload guards live in ./guards, the `find` HTML search in ./html-find —
25
+ // both pure and unit-tested (test/unit/) without a live Floorp.
26
+ function textResult(text) {
27
+ return { content: [{ type: "text", text }] };
28
+ }
29
+ function errorResult(message) {
30
+ return { content: [{ type: "text", text: `Error: ${message}` }], isError: true };
31
+ }
32
+ /** Resolve a browserId (default: active tab) and run `fn` against an attached
33
+ * instance, always detaching afterwards so the user's tab stays open.
34
+ *
35
+ * Fast path: when a `browserId` is given and the caller doesn't need tab
36
+ * metadata (`opts.needTab`), we attach directly and skip the `listTabs`
37
+ * round-trip — most action tools (click/type/find/…) only use the instanceId.
38
+ * Tools that read `tab.url`/`tab.title` (screenshot, read_page) pass
39
+ * `needTab: true` to force the full lookup. */
40
+ async function withAttachedTab(browserId, fn, opts = {}) {
41
+ let tab;
42
+ if (browserId && !opts.needTab) {
43
+ // Fast path — no listTabs; attach straight to the requested tab.
44
+ if (String(browserId) === "0") {
45
+ throw new Error(`Tab browserId=0 is not loaded yet (Floorp lazy-loads tabs). ` +
46
+ `Click it in the browser to load it, then try again.`);
47
+ }
48
+ tab = { browserId: String(browserId), windowId: "", title: "", url: "", selected: false, pinned: false };
49
+ }
50
+ else {
51
+ const tabs = await client.listTabs();
52
+ const found = browserId
53
+ ? tabs.find((t) => t.browserId === String(browserId))
54
+ : tabs.find((t) => t.selected);
55
+ if (!found) {
56
+ throw new Error(browserId
57
+ ? `No tab with browserId=${browserId}. Run list_tabs to see current tabs.`
58
+ : "No active tab found.");
59
+ }
60
+ if (!found.browserId || found.browserId === "0") {
61
+ throw new Error(`Tab "${found.title}" is not loaded yet (Floorp lazy-loads tabs). ` +
62
+ `Click it in the browser to load it, then try again.`);
63
+ }
64
+ tab = found;
65
+ }
66
+ const instanceId = await client.attach(tab.browserId);
67
+ if (!instanceId) {
68
+ throw new Error(`Could not attach to tab (browserId=${tab.browserId}). ` +
69
+ `It may not be loaded yet — run list_tabs to check.`);
70
+ }
71
+ try {
72
+ return await fn(instanceId, tab);
73
+ }
74
+ finally {
75
+ await client.detach(instanceId).catch(() => { });
76
+ }
77
+ }
78
+ function formatTabList(tabs) {
79
+ if (tabs.length === 0)
80
+ return "No open tabs.";
81
+ return tabs
82
+ .map((t, i) => {
83
+ const marks = [t.selected ? "active" : null, t.pinned ? "pinned" : null]
84
+ .filter(Boolean)
85
+ .join(", ");
86
+ const suffix = marks ? ` [${marks}]` : "";
87
+ return `${i + 1}. ${t.title || "(untitled)"}${suffix}\n ${t.url}\n browserId: ${t.browserId}`;
88
+ })
89
+ .join("\n");
90
+ }
91
+ // -- tools --------------------------------------------------------------------
92
+ server.tool("list_tabs", "List all open tabs in Floorp (title, URL, browserId, and whether each is active or pinned). Use the browserId to target other tools.", {}, async () => {
93
+ try {
94
+ const tabs = await client.listTabs();
95
+ return textResult(formatTabList(tabs));
96
+ }
97
+ catch (err) {
98
+ return errorResult(err.message);
99
+ }
100
+ });
101
+ server.tool("open_tab", "Open a URL in a new Floorp tab.", {
102
+ url: z.string().url().describe("The URL to open (must include http:// or https://)."),
103
+ background: z
104
+ .boolean()
105
+ .optional()
106
+ .describe("Open in the background without focusing the new tab. Default: false."),
107
+ }, async ({ url, background }) => {
108
+ try {
109
+ assertNavigableUrl(url);
110
+ const instanceId = await client.createTab(url, { background, waitForLoad: true });
111
+ const [title, uri, browserId] = await Promise.all([
112
+ client.getTitle(instanceId),
113
+ client.getUri(instanceId),
114
+ client.getInstanceBrowserId(instanceId),
115
+ ]);
116
+ // Release the handle but leave the tab open for the user.
117
+ await client.detach(instanceId).catch(() => { });
118
+ return textResult(`Opened: ${title ?? "(untitled)"}\n${uri ?? url}` +
119
+ (browserId
120
+ ? `\nbrowserId: ${browserId} — pass this as browserId to target this exact tab (reliable across multiple windows).`
121
+ : ""));
122
+ }
123
+ catch (err) {
124
+ return errorResult(err.message);
125
+ }
126
+ });
127
+ server.tool("get_active_tab", "Return the active tab's title, URL and browserId. Note: with multiple browser windows open, 'active' is ambiguous — prefer the browserId returned by open_tab, or pick from list_tabs.", {}, async () => {
128
+ try {
129
+ const tab = await client.activeTab();
130
+ return textResult(`${tab.title || "(untitled)"}\n${tab.url}\nbrowserId: ${tab.browserId}`);
131
+ }
132
+ catch (err) {
133
+ return errorResult(err.message);
134
+ }
135
+ });
136
+ server.tool("navigate_tab", "Navigate a tab to a new URL. Targets the active tab unless a browserId is given.", {
137
+ url: z.string().url().describe("The URL to navigate to."),
138
+ browserId: z
139
+ .string()
140
+ .optional()
141
+ .describe("browserId of the tab to navigate (from list_tabs). Defaults to the active tab."),
142
+ }, async ({ url, browserId }) => {
143
+ try {
144
+ assertNavigableUrl(url);
145
+ const result = await withAttachedTab(browserId, async (instanceId) => {
146
+ await client.navigate(instanceId, url);
147
+ return await client.getUri(instanceId);
148
+ });
149
+ return textResult(`Navigated to ${result ?? url}`);
150
+ }
151
+ catch (err) {
152
+ return errorResult(err.message);
153
+ }
154
+ });
155
+ server.tool("close_tab", "Close a tab by its browserId (from list_tabs).", {
156
+ browserId: z.string().describe("browserId of the tab to close (from list_tabs)."),
157
+ }, async ({ browserId }) => {
158
+ try {
159
+ const tabs = await client.listTabs();
160
+ const tab = tabs.find((t) => t.browserId === String(browserId));
161
+ if (!tab)
162
+ return errorResult(`No tab with browserId=${browserId}.`);
163
+ if (!tab.browserId || tab.browserId === "0") {
164
+ return errorResult(`Tab "${tab.title}" is not loaded; cannot target it reliably.`);
165
+ }
166
+ const instanceId = await client.attach(tab.browserId);
167
+ if (!instanceId)
168
+ return errorResult(`Could not attach to tab "${tab.title}".`);
169
+ await client.closeTab(instanceId);
170
+ return textResult(`Closed: ${tab.title || tab.url}`);
171
+ }
172
+ catch (err) {
173
+ return errorResult(err.message);
174
+ }
175
+ });
176
+ server.tool("read_page", "Read a tab's content. Returns clean Markdown by default; can also return raw HTML or the accessibility tree. Output is capped (default 25 KB) to protect the context — to LOCATE a specific element use `find` (cheaper) instead. Targets the active tab unless a browserId is given.", {
177
+ browserId: z
178
+ .string()
179
+ .optional()
180
+ .describe("browserId of the tab to read (from list_tabs). Defaults to the active tab."),
181
+ format: z
182
+ .enum(["markdown", "html", "accessibility"])
183
+ .optional()
184
+ .describe("Output format. Default: markdown."),
185
+ maxChars: z
186
+ .number()
187
+ .int()
188
+ .min(0)
189
+ .max(5_000_000)
190
+ .optional()
191
+ .describe("Truncate output to this many characters. Default 25000. Pass 0 for no cap."),
192
+ }, async ({ browserId, format, maxChars }) => {
193
+ try {
194
+ const content = await withAttachedTab(browserId, async (instanceId, tab) => {
195
+ const header = `# ${tab.title || "(untitled)"}\n${tab.url}\n\n`;
196
+ if (format === "html")
197
+ return header + (await client.getHtml(instanceId));
198
+ if (format === "accessibility") {
199
+ return header + JSON.stringify(await client.getAccessibilityTree(instanceId), null, 2);
200
+ }
201
+ return header + (await client.getText(instanceId));
202
+ }, { needTab: true });
203
+ const cap = maxChars ?? 25000;
204
+ if (cap > 0 && content.length > cap) {
205
+ return textResult(content.slice(0, cap) +
206
+ `\n\n…[truncated ${content.length - cap} of ${content.length} chars. ` +
207
+ `Use 'find' to locate an element, 'snapshot' for a ref map, or raise maxChars.]`);
208
+ }
209
+ return textResult(content);
210
+ }
211
+ catch (err) {
212
+ return errorResult(err.message);
213
+ }
214
+ });
215
+ server.tool("screenshot", "Take a screenshot of a tab and return it as a PNG image. Targets the active tab unless a browserId is given.", {
216
+ browserId: z
217
+ .string()
218
+ .optional()
219
+ .describe("browserId of the tab to capture (from list_tabs). Defaults to the active tab."),
220
+ fullPage: z
221
+ .boolean()
222
+ .optional()
223
+ .describe("Capture the full scrollable page instead of just the viewport. Default: false."),
224
+ }, async ({ browserId, fullPage }) => {
225
+ try {
226
+ const image = await withAttachedTab(browserId, (instanceId, tab) => {
227
+ if (PRIVILEGED_SCHEME.test(tab.url)) {
228
+ throw new Error(`Cannot screenshot the browser-internal page "${tab.url}". ` +
229
+ `Open a normal web page (http/https) and try again.`);
230
+ }
231
+ return fullPage
232
+ ? client.fullPageScreenshot(instanceId)
233
+ : client.screenshot(instanceId);
234
+ }, { needTab: true });
235
+ if (!image)
236
+ return errorResult("Floorp returned no image.");
237
+ return {
238
+ content: [{ type: "image", data: image, mimeType: "image/png" }],
239
+ };
240
+ }
241
+ catch (err) {
242
+ return errorResult(err.message);
243
+ }
244
+ });
245
+ server.tool("find", "Locate elements on a tab by visible text and/or tag and get a ready-to-use CSS `selector` for each — one fast call that searches the page server-side and returns ~1 KB instead of the whole HTML. Use this INSTEAD of read_page to find a button, link, or field, then pass the returned selector straight to click/type/etc. Provide `text`, `tag`, or both. Active tab unless browserId given.", {
246
+ text: z
247
+ .string()
248
+ .optional()
249
+ .describe("Visible text to match (substring, case-insensitive)."),
250
+ tag: z
251
+ .string()
252
+ .optional()
253
+ .describe('Restrict to a tag, e.g. "button", "a", "input", "select".'),
254
+ limit: z.number().int().min(1).max(1000).optional().describe("Max matches to return. Default 25."),
255
+ browserId: z.string().optional().describe("Target tab (from list_tabs). Defaults to active."),
256
+ }, async ({ text, tag, limit, browserId }) => {
257
+ try {
258
+ if (!text && !tag)
259
+ return errorResult("Provide `text` and/or `tag` to search for.");
260
+ const html = await withAttachedTab(browserId, (id) => client.getHtml(id));
261
+ const found = findInHtml(html, { text, tag, limit: limit ?? 25 });
262
+ if (!found.length)
263
+ return textResult("No matching elements found.");
264
+ return textResult(`${found.length} match(es) — selector | tag | text:\n` +
265
+ found
266
+ .map((f) => `${f.selector} | ${f.tag} | ${JSON.stringify(f.text.slice(0, 60))}`)
267
+ .join("\n"));
268
+ }
269
+ catch (err) {
270
+ return errorResult(err.message);
271
+ }
272
+ });
273
+ server.tool("click", "Click an element by CSS selector OR by a `ref` (fingerprint) from `snapshot`. Auto-scrolls the element into view first (fixes off-screen 'not actionable'). Targets the active tab unless a browserId is given.", {
274
+ selector: z
275
+ .string()
276
+ .optional()
277
+ .describe('CSS selector, e.g. "button[type=submit]" or "a.login".'),
278
+ ref: z
279
+ .string()
280
+ .optional()
281
+ .describe('A fingerprint ref from `snapshot` (the value after "fp:"), as an alternative to selector.'),
282
+ browserId: z.string().optional().describe("Target tab (from list_tabs). Defaults to active."),
283
+ button: z.enum(["left", "right", "middle"]).optional().describe("Mouse button. Default: left."),
284
+ }, async ({ selector, ref, browserId, button }) => {
285
+ try {
286
+ if (!selector && !ref)
287
+ return errorResult("Provide a `selector` or a `ref`.");
288
+ await withAttachedTab(browserId, (id) => client.click(id, selector, { button, fingerprint: ref }));
289
+ return textResult(`Clicked: ${selector ?? `ref ${ref}`}`);
290
+ }
291
+ catch (err) {
292
+ return errorResult(err.message);
293
+ }
294
+ });
295
+ server.tool("snapshot", "Capture a structured snapshot of a tab: clean Markdown with inline fingerprint refs (`<!--fp:...-->`) and an 'Element Selector Map' (fp | tag | text). Use this instead of read_page+grep to locate elements, then pass a `ref` to `click`. Targets the active tab unless a browserId is given.", {
296
+ browserId: z.string().optional().describe("Target tab (from list_tabs). Defaults to active."),
297
+ }, async ({ browserId }) => {
298
+ try {
299
+ const text = await withAttachedTab(browserId, (id) => client.snapshot(id));
300
+ return textResult(text || "(empty page)");
301
+ }
302
+ catch (err) {
303
+ return errorResult(err.message);
304
+ }
305
+ });
306
+ server.tool("type_text", "Type text into an input or textarea by CSS selector (clears it first by default). Targets the active tab unless a browserId is given.", {
307
+ selector: z.string().describe("CSS selector of the input/textarea."),
308
+ text: z.string().max(100_000).describe("The text to type."),
309
+ clear: z.boolean().optional().describe("Clear the field before typing. Default: true."),
310
+ browserId: z.string().optional().describe("Target tab (from list_tabs). Defaults to active."),
311
+ }, async ({ selector, text, clear, browserId }) => {
312
+ try {
313
+ const method = await withAttachedTab(browserId, async (id) => {
314
+ if (clear !== false)
315
+ await client.clearInput(id, selector).catch(() => { });
316
+ try {
317
+ await client.input(id, selector, text);
318
+ return "input";
319
+ }
320
+ catch {
321
+ // Rich / contenteditable editors (Slate, ProseMirror, Lexical…) have no
322
+ // `.value`, so `input` fails. Fall back to a real text-input event.
323
+ await client.dispatchTextInput(id, selector, text);
324
+ return "rich-text";
325
+ }
326
+ });
327
+ return textResult(`Typed into ${selector} (${method}).`);
328
+ }
329
+ catch (err) {
330
+ return errorResult(err.message);
331
+ }
332
+ });
333
+ server.tool("fill_form", "Fill multiple form fields at once. `fields` maps CSS selectors (or field names) to values. Targets the active tab unless a browserId is given.", {
334
+ fields: z
335
+ .record(z.string().max(2000), z.string().max(100_000))
336
+ .refine((o) => Object.keys(o).length <= 200, { message: "Too many fields (max 200)." })
337
+ .describe('Map of selector/name to value, e.g. { "#email": "a@b.com", "#password": "secret" }.'),
338
+ browserId: z.string().optional().describe("Target tab (from list_tabs). Defaults to active."),
339
+ }, async ({ fields, browserId }) => {
340
+ try {
341
+ await withAttachedTab(browserId, (id) => client.fillForm(id, fields));
342
+ return textResult(`Filled ${Object.keys(fields).length} field(s).`);
343
+ }
344
+ catch (err) {
345
+ return errorResult(err.message);
346
+ }
347
+ });
348
+ server.tool("press_key", 'Press a keyboard key in the page (e.g. "Enter", "Tab", "Escape", "ArrowDown"). Targets the active tab unless a browserId is given.', {
349
+ key: z.string().max(100).describe('Key name, e.g. "Enter".'),
350
+ browserId: z.string().optional().describe("Target tab (from list_tabs). Defaults to active."),
351
+ }, async ({ key, browserId }) => {
352
+ try {
353
+ await withAttachedTab(browserId, (id) => client.pressKey(id, key));
354
+ return textResult(`Pressed: ${key}`);
355
+ }
356
+ catch (err) {
357
+ return errorResult(err.message);
358
+ }
359
+ });
360
+ server.tool("wait_for_element", "Wait for an element to reach a state (attached / visible / hidden / detached). Useful after navigation or actions that load content.", {
361
+ selector: z.string().describe("CSS selector to wait for."),
362
+ state: z
363
+ .enum(["attached", "visible", "hidden", "detached"])
364
+ .optional()
365
+ .describe("State to wait for. Default: visible."),
366
+ timeoutMs: z.number().int().min(0).max(600_000).optional().describe("Timeout in milliseconds. Default: 5000."),
367
+ browserId: z.string().optional().describe("Target tab (from list_tabs). Defaults to active."),
368
+ }, async ({ selector, state, timeoutMs, browserId }) => {
369
+ try {
370
+ const found = await withAttachedTab(browserId, (id) => client.waitForElement(id, selector, state, timeoutMs));
371
+ return found
372
+ ? textResult(`Element "${selector}" is ${state ?? "visible"}.`)
373
+ : errorResult(`Timed out waiting for "${selector}".`);
374
+ }
375
+ catch (err) {
376
+ return errorResult(err.message);
377
+ }
378
+ });
379
+ server.tool("get_value", "Read the current value of an input, textarea, or select by CSS selector. SENSITIVE: this CAN read the value of password fields and other secrets the user has typed — only use it on fields the user asked about, never to harvest credentials a page is requesting. Targets the active tab unless a browserId is given.", {
380
+ selector: z.string().describe("CSS selector of the field to read."),
381
+ browserId: z.string().optional().describe("Target tab (from list_tabs). Defaults to active."),
382
+ }, async ({ selector, browserId }) => {
383
+ try {
384
+ const value = await withAttachedTab(browserId, (id) => client.getValue(id, selector));
385
+ return textResult(value ?? "(no value)");
386
+ }
387
+ catch (err) {
388
+ return errorResult(err.message);
389
+ }
390
+ });
391
+ // -- OS-level (real) keyboard input (Windows) ---------------------------------
392
+ // These send genuine OS key events (isTrusted=true) to Floorp's focused element,
393
+ // which fixes React/Slate-controlled editors that ignore synthetic input. They
394
+ // bring Floorp to the foreground and ABORT without typing if that fails, so keys
395
+ // can never leak into another app. Focus the field first (e.g. with `click`).
396
+ server.tool("real_type", "Type text into Floorp's currently focused element using REAL OS keyboard events (isTrusted). Use for React/rich editors where `type_text` silently fails. Focus the field first with `click`. Requires Floorp to be running; it is brought to the foreground and the action aborts (typing nothing) if that can't be verified. Windows only.", {
397
+ text: z.string().max(100_000).describe("The text to type via the real keyboard."),
398
+ }, async ({ text }) => {
399
+ try {
400
+ await realType(text);
401
+ return textResult(`Typed (real keyboard): ${text.length} chars.`);
402
+ }
403
+ catch (err) {
404
+ return errorResult(err.message);
405
+ }
406
+ });
407
+ server.tool("real_key", 'Press a key or combo via REAL OS keyboard events, e.g. "Enter", "Tab", "Escape", "ctrl+a", "ctrl+shift+k". Use "Enter" to submit React composers that ignore synthetic clicks. Focus the field first. Windows only.', {
408
+ key: z.string().max(100).describe('Key or combo, e.g. "Enter" or "ctrl+a".'),
409
+ }, async ({ key }) => {
410
+ try {
411
+ await realKey(key);
412
+ return textResult(`Pressed (real keyboard): ${key}`);
413
+ }
414
+ catch (err) {
415
+ return errorResult(err.message);
416
+ }
417
+ });
418
+ server.tool("real_clear", "Select-all + delete via REAL OS keyboard events — reliably clears a focused rich/contenteditable editor (where synthetic Ctrl+A does not work). Focus the field first with `click`. Windows only.", {}, async () => {
419
+ try {
420
+ await realClear();
421
+ return textResult("Cleared focused field (real keyboard).");
422
+ }
423
+ catch (err) {
424
+ return errorResult(err.message);
425
+ }
426
+ });
427
+ // -- more interaction & query tools (v0.6.0) ----------------------------------
428
+ function targetDesc(selector, ref) {
429
+ return selector ?? (ref ? `ref ${ref}` : "?");
430
+ }
431
+ server.tool("hover", "Hover the mouse over an element (CSS selector or `ref`). Auto-scrolls into view. Active tab unless browserId given.", {
432
+ selector: z.string().optional().describe("CSS selector."),
433
+ ref: z.string().optional().describe("Fingerprint ref from snapshot."),
434
+ browserId: z.string().optional().describe("Target tab. Defaults to active."),
435
+ }, async ({ selector, ref, browserId }) => {
436
+ try {
437
+ if (!selector && !ref)
438
+ return errorResult("Provide a `selector` or a `ref`.");
439
+ await withAttachedTab(browserId, (id) => client.hover(id, selector, ref));
440
+ return textResult(`Hovered: ${targetDesc(selector, ref)}`);
441
+ }
442
+ catch (err) {
443
+ return errorResult(err.message);
444
+ }
445
+ });
446
+ server.tool("double_click", "Double-click an element (CSS selector or `ref`). Auto-scrolls into view. Active tab unless browserId given.", {
447
+ selector: z.string().optional().describe("CSS selector."),
448
+ ref: z.string().optional().describe("Fingerprint ref from snapshot."),
449
+ browserId: z.string().optional().describe("Target tab. Defaults to active."),
450
+ }, async ({ selector, ref, browserId }) => {
451
+ try {
452
+ if (!selector && !ref)
453
+ return errorResult("Provide a `selector` or a `ref`.");
454
+ await withAttachedTab(browserId, (id) => client.doubleClick(id, selector, ref));
455
+ return textResult(`Double-clicked: ${targetDesc(selector, ref)}`);
456
+ }
457
+ catch (err) {
458
+ return errorResult(err.message);
459
+ }
460
+ });
461
+ server.tool("right_click", "Right-click (context menu) an element (CSS selector or `ref`). Auto-scrolls into view. Active tab unless browserId given.", {
462
+ selector: z.string().optional().describe("CSS selector."),
463
+ ref: z.string().optional().describe("Fingerprint ref from snapshot."),
464
+ browserId: z.string().optional().describe("Target tab. Defaults to active."),
465
+ }, async ({ selector, ref, browserId }) => {
466
+ try {
467
+ if (!selector && !ref)
468
+ return errorResult("Provide a `selector` or a `ref`.");
469
+ await withAttachedTab(browserId, (id) => client.rightClick(id, selector, ref));
470
+ return textResult(`Right-clicked: ${targetDesc(selector, ref)}`);
471
+ }
472
+ catch (err) {
473
+ return errorResult(err.message);
474
+ }
475
+ });
476
+ server.tool("select_option", "Choose an option in a <select> dropdown by its value. Active tab unless browserId given.", {
477
+ selector: z.string().describe("CSS selector of the <select>."),
478
+ value: z.string().describe("The option value (or visible text) to select."),
479
+ browserId: z.string().optional().describe("Target tab. Defaults to active."),
480
+ }, async ({ selector, value, browserId }) => {
481
+ try {
482
+ await withAttachedTab(browserId, (id) => client.selectOption(id, selector, value));
483
+ return textResult(`Selected "${value}" in ${selector}`);
484
+ }
485
+ catch (err) {
486
+ return errorResult(err.message);
487
+ }
488
+ });
489
+ server.tool("set_checked", "Check or uncheck a checkbox/radio. Active tab unless browserId given.", {
490
+ selector: z.string().describe("CSS selector of the checkbox/radio."),
491
+ checked: z.boolean().describe("true to check, false to uncheck."),
492
+ browserId: z.string().optional().describe("Target tab. Defaults to active."),
493
+ }, async ({ selector, checked, browserId }) => {
494
+ try {
495
+ await withAttachedTab(browserId, (id) => client.setChecked(id, selector, checked));
496
+ return textResult(`Set ${selector} checked=${checked}`);
497
+ }
498
+ catch (err) {
499
+ return errorResult(err.message);
500
+ }
501
+ });
502
+ server.tool("submit_form", "Submit a form (give a selector of the form or a field inside it; omit to submit the focused form). Active tab unless browserId given.", {
503
+ selector: z.string().optional().describe("CSS selector of the form or a field in it."),
504
+ browserId: z.string().optional().describe("Target tab. Defaults to active."),
505
+ }, async ({ selector, browserId }) => {
506
+ try {
507
+ await withAttachedTab(browserId, (id) => client.submitForm(id, selector));
508
+ return textResult(`Submitted: ${selector ?? "form"}`);
509
+ }
510
+ catch (err) {
511
+ return errorResult(err.message);
512
+ }
513
+ });
514
+ server.tool("upload_file", "SENSITIVE: sends a local file to a website. Set a file <input>'s file by absolute path. Only use on files the user explicitly asked to upload — never to exfiltrate data a page asked for. Restrict with FLOORP_MCP_ALLOW_UPLOAD_DIRS. Active tab unless browserId given.", {
515
+ selector: z.string().describe("CSS selector of the file input."),
516
+ filePath: z.string().describe("Absolute path to the local file to upload."),
517
+ browserId: z.string().optional().describe("Target tab. Defaults to active."),
518
+ }, async ({ selector, filePath, browserId }) => {
519
+ try {
520
+ const safePath = assertUploadAllowed(filePath);
521
+ await withAttachedTab(browserId, (id) => client.uploadFile(id, selector, safePath));
522
+ return textResult(`Set ${selector} to ${safePath}`);
523
+ }
524
+ catch (err) {
525
+ return errorResult(err.message);
526
+ }
527
+ });
528
+ server.tool("get_attribute", "Read an attribute (e.g. href, value, aria-label) of an element. Active tab unless browserId given.", {
529
+ name: z.string().describe("Attribute name, e.g. \"href\"."),
530
+ selector: z.string().optional().describe("CSS selector."),
531
+ ref: z.string().optional().describe("Fingerprint ref from snapshot."),
532
+ browserId: z.string().optional().describe("Target tab. Defaults to active."),
533
+ }, async ({ name, selector, ref, browserId }) => {
534
+ try {
535
+ if (!selector && !ref)
536
+ return errorResult("Provide a `selector` or a `ref`.");
537
+ const v = await withAttachedTab(browserId, (id) => client.getAttribute(id, name, selector, ref));
538
+ return textResult(v ?? "(no attribute)");
539
+ }
540
+ catch (err) {
541
+ return errorResult(err.message);
542
+ }
543
+ });
544
+ server.tool("get_article", "Extract the main article of a page (Readability) as clean Markdown with title and byline — great for reading content pages. Active tab unless browserId given.", {
545
+ browserId: z.string().optional().describe("Target tab. Defaults to active."),
546
+ }, async ({ browserId }) => {
547
+ try {
548
+ const a = await withAttachedTab(browserId, (id) => client.getArticle(id));
549
+ if (!a || !a.markdown)
550
+ return errorResult("No readable article found on this page.");
551
+ const head = `# ${a.title ?? "(untitled)"}${a.byline ? `\n*${a.byline}*` : ""}\n\n`;
552
+ return textResult(head + a.markdown);
553
+ }
554
+ catch (err) {
555
+ return errorResult(err.message);
556
+ }
557
+ });
558
+ server.tool("get_cookies", "SENSITIVE: list cookies visible to the current page. Values (session tokens!) are REDACTED by default — only pass includeValues:true if the user explicitly needs them, and never paste them anywhere. Active tab unless browserId given.", {
559
+ browserId: z.string().optional().describe("Target tab. Defaults to active."),
560
+ includeValues: z
561
+ .boolean()
562
+ .optional()
563
+ .describe("Include raw cookie values (session tokens — highly sensitive). Default: false."),
564
+ }, async ({ browserId, includeValues }) => {
565
+ try {
566
+ const c = await withAttachedTab(browserId, (id) => client.getCookies(id));
567
+ const out = includeValues
568
+ ? c
569
+ : c.map((k) => k && typeof k === "object" && "value" in k
570
+ ? {
571
+ ...k,
572
+ value: `(redacted ${String(k.value).length} chars — pass includeValues:true if truly needed)`,
573
+ }
574
+ : k);
575
+ return textResult(JSON.stringify(out, null, 2));
576
+ }
577
+ catch (err) {
578
+ return errorResult(err.message);
579
+ }
580
+ });
581
+ server.tool("wait_for_network_idle", "Wait until the page's network activity settles (useful after navigation or SPA actions). Active tab unless browserId given.", {
582
+ timeoutMs: z.number().int().min(0).max(600_000).optional().describe("Max wait in ms. Default: 8000."),
583
+ browserId: z.string().optional().describe("Target tab. Defaults to active."),
584
+ }, async ({ timeoutMs, browserId }) => {
585
+ try {
586
+ const ok = await withAttachedTab(browserId, (id) => client.waitForNetworkIdle(id, timeoutMs));
587
+ return ok ? textResult("Network is idle.") : errorResult("Network did not become idle in time.");
588
+ }
589
+ catch (err) {
590
+ return errorResult(err.message);
591
+ }
592
+ });
593
+ server.tool("list_workspaces", "List Floorp workspaces (id and name). Floorp-specific.", {}, async () => {
594
+ try {
595
+ const ws = await client.listWorkspaces();
596
+ if (!ws.length)
597
+ return textResult("No workspaces.");
598
+ return textResult(ws.map((w) => `${w.id} ${w.name}`).join("\n"));
599
+ }
600
+ catch (err) {
601
+ const m = err.message;
602
+ if (/404|not found/i.test(m)) {
603
+ return errorResult("The Workspaces API isn't available on this Floorp build.");
604
+ }
605
+ return errorResult(m);
606
+ }
607
+ });
608
+ server.tool("switch_workspace", "Switch to a Floorp workspace by id (from list_workspaces). Floorp-specific.", {
609
+ id: z.string().describe("Workspace id."),
610
+ }, async ({ id }) => {
611
+ try {
612
+ const ok = await client.switchWorkspace(id);
613
+ return ok ? textResult(`Switched to workspace ${id}`) : errorResult("Switch failed.");
614
+ }
615
+ catch (err) {
616
+ const m = err.message;
617
+ if (/404|not found/i.test(m)) {
618
+ return errorResult("The Workspaces API isn't available on this Floorp build.");
619
+ }
620
+ return errorResult(m);
621
+ }
622
+ });
623
+ // -- OS-level (real) mouse (Windows) (v1.0.0) ---------------------------------
624
+ // Coordinates are SCREEN pixels and must fall inside the Floorp window. Call
625
+ // window_bounds first to get the valid range. Same foreground guard as the
626
+ // keyboard, plus a bounds check, so a click can never land in another app.
627
+ server.tool("window_bounds", "Return Floorp's window rectangle in screen pixels (left, top, right, bottom, width, height). Use this to compute coordinates for move_cursor / real_click. Windows only.", {}, async () => {
628
+ try {
629
+ const b = await floorpWindowBounds();
630
+ return textResult(`Floorp window (screen px): left=${b.left} top=${b.top} right=${b.right} bottom=${b.bottom} (${b.width}x${b.height})`);
631
+ }
632
+ catch (err) {
633
+ return errorResult(err.message);
634
+ }
635
+ });
636
+ server.tool("move_cursor", "Move the REAL OS cursor to a screen pixel (must be inside the Floorp window). Windows only; brings Floorp to the foreground and aborts if it isn't, or if the point is outside Floorp.", {
637
+ x: z.number().int().min(-100_000).max(100_000).describe("Screen X (pixels)."),
638
+ y: z.number().int().min(-100_000).max(100_000).describe("Screen Y (pixels)."),
639
+ }, async ({ x, y }) => {
640
+ try {
641
+ await moveCursor(x, y);
642
+ return textResult(`Moved cursor to (${x}, ${y}).`);
643
+ }
644
+ catch (err) {
645
+ return errorResult(err.message);
646
+ }
647
+ });
648
+ server.tool("real_click", "Click with the REAL OS mouse at a screen pixel inside the Floorp window (genuine, isTrusted click). Use window_bounds to find the range. Refuses to click outside Floorp or if Floorp isn't foreground. Windows only.", {
649
+ x: z.number().int().min(-100_000).max(100_000).describe("Screen X (pixels)."),
650
+ y: z.number().int().min(-100_000).max(100_000).describe("Screen Y (pixels)."),
651
+ button: z.enum(["left", "right"]).optional().describe("Mouse button. Default: left."),
652
+ double: z.boolean().optional().describe("Double-click. Default: false."),
653
+ }, async ({ x, y, button, double }) => {
654
+ try {
655
+ await realClick(x, y, { button, double });
656
+ return textResult(`${double ? "Double-" : ""}${button === "right" ? "Right-" : ""}clicked at (${x}, ${y}).`);
657
+ }
658
+ catch (err) {
659
+ return errorResult(err.message);
660
+ }
661
+ });
662
+ server.tool("launch_floorp", "Ensure Floorp is running: if its automation API isn't reachable, launch the Floorp app and wait for it to come up. No-op if already running. Windows only (set FLOORP_PATH to override the exe location).", {}, async () => {
663
+ try {
664
+ return textResult(await launchFloorp(client));
665
+ }
666
+ catch (err) {
667
+ return errorResult(err.message);
668
+ }
669
+ });
670
+ // -- startup ------------------------------------------------------------------
671
+ async function main() {
672
+ const transport = new StdioServerTransport();
673
+ await server.connect(transport);
674
+ // stderr is safe; stdout is reserved for the MCP protocol.
675
+ console.error("floorp-mcp server running on stdio");
676
+ }
677
+ main().catch((err) => {
678
+ console.error("Fatal:", err);
679
+ process.exit(1);
680
+ });
681
+ //# sourceMappingURL=index.js.map