mcpbrowser 0.2.34 → 0.2.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +149 -4
- package/package.json +4 -2
- package/server.json +1 -1
- package/src/actions/click-element.js +176 -0
- package/src/actions/close-tab.js +100 -0
- package/src/actions/fetch-page.js +130 -0
- package/src/actions/get-current-html.js +53 -0
- package/src/actions/type-text.js +107 -0
- package/src/core/auth.js +130 -0
- package/src/core/browser.js +256 -0
- package/src/core/html.js +136 -0
- package/src/core/page.js +122 -0
- package/src/mcp-browser.js +147 -818
- package/src/utils.js +78 -0
- package/tests/README.md +147 -48
- package/tests/actions/click-element.test.js +75 -0
- package/tests/actions/close-tab.test.js +368 -0
- package/tests/{integration.test.js → actions/fetch-page.test.js} +57 -11
- package/tests/actions/get-current-html.test.js +101 -0
- package/tests/actions/type-text.test.js +84 -0
- package/tests/{auth-flow.test.js → core/auth.test.js} +1 -1
- package/tests/{domain-tab-pooling.test.js → core/browser.test.js} +1 -1
- package/tests/{prepare-html.test.js → core/html.test.js} +46 -26
- package/tests/{redirect-detection.test.js → core/page.test.js} +1 -1
- package/tests/mcp-browser.test.js +190 -0
- package/tests/run-all.js +100 -33
- package/tests/run-unit.js +98 -0
- package/tests/mcp-server.test.js +0 -154
package/src/mcp-browser.js
CHANGED
|
@@ -1,806 +1,32 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import puppeteer from "puppeteer-core";
|
|
3
|
-
import { existsSync } from "fs";
|
|
4
|
-
import os from "os";
|
|
5
|
-
import path from "path";
|
|
6
|
-
import { spawn } from "child_process";
|
|
7
|
-
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
8
|
-
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
9
|
-
import { ListToolsRequestSchema, CallToolRequestSchema } from "@modelcontextprotocol/sdk/types.js";
|
|
10
|
-
|
|
11
|
-
const chromeHost = process.env.CHROME_REMOTE_DEBUG_HOST || "127.0.0.1";
|
|
12
|
-
const chromePort = Number(process.env.CHROME_REMOTE_DEBUG_PORT || 9222);
|
|
13
|
-
const explicitWSEndpoint = process.env.CHROME_WS_ENDPOINT;
|
|
14
|
-
|
|
15
|
-
/**
|
|
16
|
-
* Get the default user data directory for Chrome debugging profile.
|
|
17
|
-
* Creates a dedicated profile directory to avoid conflicts with the user's main Chrome profile.
|
|
18
|
-
* @returns {string} The platform-specific path to the Chrome debug profile directory
|
|
19
|
-
*/
|
|
20
|
-
function getDefaultUserDataDir() {
|
|
21
|
-
const platform = os.platform();
|
|
22
|
-
const home = os.homedir();
|
|
23
|
-
|
|
24
|
-
// Use a dedicated debugging profile directory
|
|
25
|
-
if (platform === "win32") {
|
|
26
|
-
return path.join(home, "AppData/Local/MCPBrowser/ChromeDebug");
|
|
27
|
-
} else if (platform === "darwin") {
|
|
28
|
-
return path.join(home, "Library/Application Support/MCPBrowser/ChromeDebug");
|
|
29
|
-
} else {
|
|
30
|
-
return path.join(home, ".config/MCPBrowser/ChromeDebug");
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
const userDataDir = process.env.CHROME_USER_DATA_DIR || getDefaultUserDataDir();
|
|
35
|
-
const chromePathEnv = process.env.CHROME_PATH;
|
|
36
|
-
|
|
37
|
-
/**
|
|
38
|
-
* Get platform-specific default paths where Chrome/Edge browsers are typically installed.
|
|
39
|
-
* @returns {string[]} Array of possible browser executable paths for the current platform
|
|
40
|
-
*/
|
|
41
|
-
function getDefaultChromePaths() {
|
|
42
|
-
const platform = os.platform();
|
|
43
|
-
|
|
44
|
-
if (platform === "win32") {
|
|
45
|
-
return [
|
|
46
|
-
"C:/Program Files/Google/Chrome/Application/chrome.exe",
|
|
47
|
-
"C:/Program Files (x86)/Google/Chrome/Application/chrome.exe",
|
|
48
|
-
"C:/Program Files/Microsoft/Edge/Application/msedge.exe",
|
|
49
|
-
"C:/Program Files (x86)/Microsoft/Edge/Application/msedge.exe",
|
|
50
|
-
];
|
|
51
|
-
} else if (platform === "darwin") {
|
|
52
|
-
return [
|
|
53
|
-
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
|
54
|
-
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
|
|
55
|
-
"/Applications/Chromium.app/Contents/MacOS/Chromium",
|
|
56
|
-
];
|
|
57
|
-
} else {
|
|
58
|
-
return [
|
|
59
|
-
"/usr/bin/google-chrome",
|
|
60
|
-
"/usr/bin/chromium-browser",
|
|
61
|
-
"/usr/bin/chromium",
|
|
62
|
-
"/usr/bin/microsoft-edge",
|
|
63
|
-
"/opt/microsoft/msedge/msedge",
|
|
64
|
-
];
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
const defaultChromePaths = getDefaultChromePaths();
|
|
69
|
-
|
|
70
|
-
let cachedBrowser = null;
|
|
71
|
-
let domainPages = new Map(); // hostname -> page mapping for tab reuse across domains
|
|
72
|
-
let chromeLaunchPromise = null; // prevent multiple simultaneous launches
|
|
73
|
-
|
|
74
|
-
/**
|
|
75
|
-
* Check if Chrome DevTools Protocol endpoint is available and responding.
|
|
76
|
-
* @returns {Promise<boolean>} True if DevTools endpoint is accessible, false otherwise
|
|
77
|
-
*/
|
|
78
|
-
async function devtoolsAvailable() {
|
|
79
|
-
try {
|
|
80
|
-
const url = `http://${chromeHost}:${chromePort}/json/version`;
|
|
81
|
-
const res = await fetch(url, { method: "GET" });
|
|
82
|
-
if (!res.ok) return false;
|
|
83
|
-
const data = await res.json();
|
|
84
|
-
return Boolean(data.webSocketDebuggerUrl);
|
|
85
|
-
} catch {
|
|
86
|
-
return false;
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
/**
|
|
91
|
-
* Find the Chrome/Edge executable path, checking environment variable first, then default locations.
|
|
92
|
-
* @returns {string|undefined} Path to the browser executable, or undefined if not found
|
|
93
|
-
*/
|
|
94
|
-
function findChromePath() {
|
|
95
|
-
if (chromePathEnv && existsSync(chromePathEnv)) return chromePathEnv;
|
|
96
|
-
return defaultChromePaths.find((p) => existsSync(p));
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
/**
|
|
100
|
-
* Launch Chrome with remote debugging enabled if not already running.
|
|
101
|
-
* Uses a singleton pattern to prevent multiple simultaneous launches.
|
|
102
|
-
* Waits up to 20 seconds for Chrome to become available on the DevTools port.
|
|
103
|
-
* @returns {Promise<void>}
|
|
104
|
-
* @throws {Error} If Chrome cannot be found or fails to start within timeout
|
|
105
|
-
*/
|
|
106
|
-
async function launchChromeIfNeeded() {
|
|
107
|
-
if (explicitWSEndpoint) return; // user provided explicit endpoint; assume managed externally
|
|
108
|
-
|
|
109
|
-
// If Chrome is already available, don't launch
|
|
110
|
-
if (await devtoolsAvailable()) return;
|
|
111
|
-
|
|
112
|
-
// If a launch is already in progress, wait for it
|
|
113
|
-
if (chromeLaunchPromise) {
|
|
114
|
-
return await chromeLaunchPromise;
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
// Create a new launch promise to prevent multiple simultaneous launches
|
|
118
|
-
chromeLaunchPromise = (async () => {
|
|
119
|
-
try {
|
|
120
|
-
// Double-check after acquiring the launch lock
|
|
121
|
-
if (await devtoolsAvailable()) return;
|
|
122
|
-
|
|
123
|
-
const chromePath = findChromePath();
|
|
124
|
-
if (!chromePath) {
|
|
125
|
-
throw new Error("Chrome/Edge not found. Set CHROME_PATH to your browser executable.");
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
const args = [
|
|
129
|
-
`--remote-debugging-port=${chromePort}`,
|
|
130
|
-
`--user-data-dir=${userDataDir}`,
|
|
131
|
-
'--no-first-run', // Skip first run experience
|
|
132
|
-
'--no-default-browser-check', // Skip default browser check
|
|
133
|
-
'--disable-sync', // Disable Chrome sync prompts
|
|
134
|
-
'about:blank' // Open with a blank page
|
|
135
|
-
];
|
|
136
|
-
const child = spawn(chromePath, args, { detached: true, stdio: "ignore" });
|
|
137
|
-
child.unref();
|
|
138
|
-
|
|
139
|
-
// Wait for DevTools to come up
|
|
140
|
-
const deadline = Date.now() + 20000;
|
|
141
|
-
while (Date.now() < deadline) {
|
|
142
|
-
if (await devtoolsAvailable()) return;
|
|
143
|
-
await new Promise((r) => setTimeout(r, 500));
|
|
144
|
-
}
|
|
145
|
-
throw new Error("Chrome did not become available on DevTools port; check CHROME_PATH/port/profile.");
|
|
146
|
-
} finally {
|
|
147
|
-
chromeLaunchPromise = null;
|
|
148
|
-
}
|
|
149
|
-
})();
|
|
150
|
-
|
|
151
|
-
return await chromeLaunchPromise;
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
/**
|
|
155
|
-
* Resolve the WebSocket endpoint URL for connecting to Chrome DevTools Protocol.
|
|
156
|
-
* Either returns the explicitly configured endpoint or queries it from the DevTools JSON API.
|
|
157
|
-
* @returns {Promise<string>} The WebSocket URL for connecting to Chrome
|
|
158
|
-
* @throws {Error} If unable to reach DevTools or no WebSocket URL is available
|
|
159
|
-
*/
|
|
160
|
-
async function resolveWSEndpoint() {
|
|
161
|
-
if (explicitWSEndpoint) return explicitWSEndpoint;
|
|
162
|
-
const url = `http://${chromeHost}:${chromePort}/json/version`;
|
|
163
|
-
const res = await fetch(url);
|
|
164
|
-
if (!res.ok) {
|
|
165
|
-
throw new Error(`Unable to reach Chrome devtools at ${url}: ${res.status}`);
|
|
166
|
-
}
|
|
167
|
-
const data = await res.json();
|
|
168
|
-
if (!data.webSocketDebuggerUrl) {
|
|
169
|
-
throw new Error("No webSocketDebuggerUrl in /json/version response");
|
|
170
|
-
}
|
|
171
|
-
return data.webSocketDebuggerUrl;
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
/**
|
|
175
|
-
* Rebuild the domain-to-page mapping from existing browser tabs.
|
|
176
|
-
* This enables tab reuse across reconnections by discovering tabs that are already open.
|
|
177
|
-
* Skips internal pages like about:blank and chrome:// URLs.
|
|
178
|
-
* @param {Browser} browser - The Puppeteer browser instance
|
|
179
|
-
* @returns {Promise<void>}
|
|
180
|
-
*/
|
|
181
|
-
async function rebuildDomainPagesMap(browser) {
|
|
182
|
-
try {
|
|
183
|
-
const pages = await browser.pages();
|
|
184
|
-
console.error(`[MCPBrowser] Reconnected to browser with ${pages.length} existing tabs`);
|
|
185
|
-
|
|
186
|
-
for (const page of pages) {
|
|
187
|
-
try {
|
|
188
|
-
const pageUrl = page.url();
|
|
189
|
-
// Skip chrome:// pages, about:blank, and other internal pages
|
|
190
|
-
if (!pageUrl ||
|
|
191
|
-
pageUrl === 'about:blank' ||
|
|
192
|
-
pageUrl.startsWith('chrome://') ||
|
|
193
|
-
pageUrl.startsWith('chrome-extension://') ||
|
|
194
|
-
pageUrl.startsWith('devtools://')) {
|
|
195
|
-
continue;
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
const hostname = new URL(pageUrl).hostname;
|
|
199
|
-
if (hostname && !domainPages.has(hostname)) {
|
|
200
|
-
domainPages.set(hostname, page);
|
|
201
|
-
console.error(`[MCPBrowser] Mapped existing tab for domain: ${hostname} (${pageUrl})`);
|
|
202
|
-
}
|
|
203
|
-
} catch (err) {
|
|
204
|
-
// Skip pages that are inaccessible or have invalid URLs
|
|
205
|
-
continue;
|
|
206
|
-
}
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
if (domainPages.size > 0) {
|
|
210
|
-
console.error(`[MCPBrowser] Restored ${domainPages.size} domain-to-tab mappings`);
|
|
211
|
-
}
|
|
212
|
-
} catch (err) {
|
|
213
|
-
console.error(`[MCPBrowser] Warning: Could not rebuild domain pages map: ${err.message}`);
|
|
214
|
-
}
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
/**
|
|
218
|
-
* Get or create a connection to the Chrome browser.
|
|
219
|
-
* Returns cached browser if still connected, otherwise establishes a new connection.
|
|
220
|
-
* Rebuilds domain-to-page mapping on reconnection to enable tab reuse.
|
|
221
|
-
* @returns {Promise<Browser>} Connected Puppeteer browser instance
|
|
222
|
-
*/
|
|
223
|
-
async function getBrowser() {
|
|
224
|
-
await launchChromeIfNeeded();
|
|
225
|
-
if (cachedBrowser && cachedBrowser.isConnected()) return cachedBrowser;
|
|
226
|
-
const wsEndpoint = await resolveWSEndpoint();
|
|
227
|
-
cachedBrowser = await puppeteer.connect({
|
|
228
|
-
browserWSEndpoint: wsEndpoint,
|
|
229
|
-
defaultViewport: null,
|
|
230
|
-
});
|
|
231
|
-
cachedBrowser.on("disconnected", () => {
|
|
232
|
-
cachedBrowser = null;
|
|
233
|
-
domainPages.clear(); // Clear all domain page mappings
|
|
234
|
-
});
|
|
235
|
-
|
|
236
|
-
// Rebuild domainPages map from existing tabs to enable reuse across reconnections
|
|
237
|
-
await rebuildDomainPagesMap(cachedBrowser);
|
|
238
|
-
|
|
239
|
-
return cachedBrowser;
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
/**
|
|
243
|
-
* Extract base domain from hostname (e.g., "mail.google.com" → "google.com")
|
|
244
|
-
* @param {string} hostname - The hostname to parse
|
|
245
|
-
* @returns {string} The base domain
|
|
246
|
-
*/
|
|
247
|
-
function getBaseDomain(hostname) {
|
|
248
|
-
const parts = hostname.split('.');
|
|
249
|
-
if (parts.length >= 2) {
|
|
250
|
-
return parts.slice(-2).join('.');
|
|
251
|
-
}
|
|
252
|
-
return hostname;
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
/**
|
|
256
|
-
* Detect if URL contains authentication patterns
|
|
257
|
-
* @param {string} url - The URL to check
|
|
258
|
-
* @returns {boolean} True if URL appears to be auth-related
|
|
259
|
-
*/
|
|
260
|
-
function isLikelyAuthUrl(url) {
|
|
261
|
-
const lowerUrl = url.toLowerCase();
|
|
262
|
-
|
|
263
|
-
// Path-based patterns (more strict - require / boundaries or end of path)
|
|
264
|
-
const pathPatterns = [
|
|
265
|
-
'/login', '/signin', '/sign-in', '/auth', '/sso', '/oauth',
|
|
266
|
-
'/authenticate', '/saml', '/openid'
|
|
267
|
-
];
|
|
268
|
-
|
|
269
|
-
// Subdomain patterns (require as subdomain at start)
|
|
270
|
-
const subdomainPatterns = [
|
|
271
|
-
'login.', 'auth.', 'sso.', 'accounts.', 'id.', 'identity.',
|
|
272
|
-
'signin.', 'authentication.', 'idp.'
|
|
273
|
-
];
|
|
274
|
-
|
|
275
|
-
// Extract path from URL
|
|
276
|
-
let pathname = '';
|
|
277
|
-
try {
|
|
278
|
-
pathname = new URL(url).pathname.toLowerCase();
|
|
279
|
-
} catch {
|
|
280
|
-
// If URL parsing fails, check if any pattern exists in the string
|
|
281
|
-
pathname = lowerUrl;
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
// Check path patterns - ensure they're at path boundaries
|
|
285
|
-
const hasAuthPath = pathPatterns.some(pattern => {
|
|
286
|
-
// Check if pattern appears at start of path, followed by nothing, /, ?, or #
|
|
287
|
-
return pathname === pattern ||
|
|
288
|
-
pathname.startsWith(pattern + '/') ||
|
|
289
|
-
pathname.startsWith(pattern + '?') ||
|
|
290
|
-
lowerUrl.includes(pattern + '#');
|
|
291
|
-
});
|
|
292
|
-
|
|
293
|
-
// Check subdomain patterns (must be at start of hostname)
|
|
294
|
-
const hostname = (() => {
|
|
295
|
-
try {
|
|
296
|
-
return new URL(url).hostname.toLowerCase();
|
|
297
|
-
} catch {
|
|
298
|
-
return '';
|
|
299
|
-
}
|
|
300
|
-
})();
|
|
301
|
-
const hasAuthSubdomain = subdomainPatterns.some(pattern => hostname.startsWith(pattern));
|
|
302
|
-
|
|
303
|
-
return hasAuthPath || hasAuthSubdomain;
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
/**
|
|
307
|
-
* Get or create a page for the given domain, reusing existing tabs when possible.
|
|
308
|
-
* @param {Browser} browser - The Puppeteer browser instance
|
|
309
|
-
* @param {string} hostname - The hostname to get/create a page for
|
|
310
|
-
* @param {boolean} reuseLastKeptPage - Whether to reuse existing tabs
|
|
311
|
-
* @returns {Promise<Page>} The page for this domain
|
|
312
|
-
*/
|
|
313
|
-
async function getOrCreatePage(browser, hostname, reuseLastKeptPage = true) {
|
|
314
|
-
let page = null;
|
|
315
|
-
|
|
316
|
-
// Check if we have an existing page for this domain
|
|
317
|
-
if (reuseLastKeptPage && domainPages.has(hostname)) {
|
|
318
|
-
const existingPage = domainPages.get(hostname);
|
|
319
|
-
if (!existingPage.isClosed()) {
|
|
320
|
-
page = existingPage;
|
|
321
|
-
await page.bringToFront().catch(() => {});
|
|
322
|
-
console.error(`[MCPBrowser] Reusing existing tab for domain: ${hostname}`);
|
|
323
|
-
} else {
|
|
324
|
-
// Page was closed externally, remove from map
|
|
325
|
-
domainPages.delete(hostname);
|
|
326
|
-
}
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
// Create new tab if no existing page for this domain
|
|
330
|
-
if (!page) {
|
|
331
|
-
try {
|
|
332
|
-
page = await browser.newPage();
|
|
333
|
-
} catch (error) {
|
|
334
|
-
// If newPage() fails (can happen with some profiles), try to reuse existing page
|
|
335
|
-
const pages = await browser.pages();
|
|
336
|
-
for (const p of pages) {
|
|
337
|
-
try {
|
|
338
|
-
const pageUrl = p.url();
|
|
339
|
-
// Skip chrome:// pages and other internal pages
|
|
340
|
-
if (!pageUrl.startsWith('chrome://') && !pageUrl.startsWith('chrome-extension://')) {
|
|
341
|
-
page = p;
|
|
342
|
-
break;
|
|
343
|
-
}
|
|
344
|
-
} catch {
|
|
345
|
-
// Skip pages we can't access
|
|
346
|
-
}
|
|
347
|
-
}
|
|
348
|
-
if (!page) {
|
|
349
|
-
throw new Error('Unable to create or find a controllable page');
|
|
350
|
-
}
|
|
351
|
-
}
|
|
352
|
-
// Add new page to domain map
|
|
353
|
-
domainPages.set(hostname, page);
|
|
354
|
-
console.error(`[MCPBrowser] Created new tab for domain: ${hostname}`);
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
return page;
|
|
358
|
-
}
|
|
359
|
-
|
|
360
|
-
/**
|
|
361
|
-
* Navigate to URL with fallback strategy for slow pages.
|
|
362
|
-
* @param {Page} page - The Puppeteer page instance
|
|
363
|
-
* @param {string} url - The URL to navigate to
|
|
364
|
-
* @param {string} waitUntil - Wait condition (networkidle0, load, etc.)
|
|
365
|
-
* @param {number} timeout - Navigation timeout in ms
|
|
366
|
-
* @returns {Promise<void>}
|
|
367
|
-
*/
|
|
368
|
-
async function navigateToUrl(page, url, waitUntil, timeout) {
|
|
369
|
-
console.error(`[MCPBrowser] Navigating to: ${url}`);
|
|
370
|
-
|
|
371
|
-
// Set up listener for JS-based redirects that happen after page load
|
|
372
|
-
let jsRedirectDetected = false;
|
|
373
|
-
let jsRedirectUrl = null;
|
|
374
|
-
const navigationHandler = (frame) => {
|
|
375
|
-
if (frame === page.mainFrame()) {
|
|
376
|
-
jsRedirectUrl = frame.url();
|
|
377
|
-
jsRedirectDetected = true;
|
|
378
|
-
}
|
|
379
|
-
};
|
|
380
|
-
page.on('framenavigated', navigationHandler);
|
|
381
|
-
|
|
382
|
-
try {
|
|
383
|
-
// Handle slow pages: try networkidle0 first, fallback to load if it takes too long
|
|
384
|
-
try {
|
|
385
|
-
await page.goto(url, { waitUntil, timeout });
|
|
386
|
-
} catch (error) {
|
|
387
|
-
// If networkidle0 times out or page has issues, try with just 'load'
|
|
388
|
-
if (error.message.includes('timeout') || error.message.includes('Navigation')) {
|
|
389
|
-
console.error(`[MCPBrowser] Navigation slow, trying fallback load strategy...`);
|
|
390
|
-
await page.goto(url, { waitUntil: 'load', timeout });
|
|
391
|
-
} else {
|
|
392
|
-
throw error;
|
|
393
|
-
}
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
// Wait briefly for potential JS redirects
|
|
397
|
-
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
398
|
-
} finally {
|
|
399
|
-
// Remove navigation listener
|
|
400
|
-
page.off('framenavigated', navigationHandler);
|
|
401
|
-
}
|
|
402
|
-
}
|
|
403
|
-
|
|
404
2
|
/**
|
|
405
|
-
*
|
|
406
|
-
*
|
|
407
|
-
*
|
|
408
|
-
* @param {string} currentUrl - Current page URL
|
|
409
|
-
* @param {string} currentHostname - Current page hostname
|
|
410
|
-
* @returns {Object} Object with redirect type and related info
|
|
3
|
+
* MCP Browser Server - Main Entry Point
|
|
4
|
+
* A Model Context Protocol server that provides browser automation capabilities
|
|
5
|
+
* with support for authentication flows, tab reuse, and interactive actions.
|
|
411
6
|
*/
|
|
412
|
-
function detectRedirectType(url, hostname, currentUrl, currentHostname) {
|
|
413
|
-
const isDifferentDomain = currentHostname !== hostname;
|
|
414
|
-
const requestedAuthPage = isLikelyAuthUrl(url);
|
|
415
|
-
const currentIsAuthPage = isLikelyAuthUrl(currentUrl);
|
|
416
|
-
const isSameDomainAuthPath = !isDifferentDomain && currentIsAuthPage && !requestedAuthPage;
|
|
417
|
-
|
|
418
|
-
// If user requested auth page directly and landed on it (same domain), return content
|
|
419
|
-
if (requestedAuthPage && currentHostname === hostname && !isDifferentDomain) {
|
|
420
|
-
return { type: 'requested_auth', currentHostname };
|
|
421
|
-
}
|
|
422
|
-
|
|
423
|
-
// No redirect scenario
|
|
424
|
-
if (!isDifferentDomain && !isSameDomainAuthPath) {
|
|
425
|
-
return { type: 'none' };
|
|
426
|
-
}
|
|
427
|
-
|
|
428
|
-
const originalBase = getBaseDomain(hostname);
|
|
429
|
-
const currentBase = getBaseDomain(currentHostname);
|
|
430
|
-
|
|
431
|
-
// Permanent redirect: Different domain without auth patterns
|
|
432
|
-
if (!currentIsAuthPage) {
|
|
433
|
-
return { type: 'permanent', currentHostname };
|
|
434
|
-
}
|
|
435
|
-
|
|
436
|
-
// Authentication flow
|
|
437
|
-
const flowType = isSameDomainAuthPath ? 'same-domain path change' : 'cross-domain redirect';
|
|
438
|
-
return {
|
|
439
|
-
type: 'auth',
|
|
440
|
-
flowType,
|
|
441
|
-
originalBase,
|
|
442
|
-
currentBase,
|
|
443
|
-
currentUrl,
|
|
444
|
-
hostname,
|
|
445
|
-
currentHostname
|
|
446
|
-
};
|
|
447
|
-
}
|
|
448
7
|
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
* @param {string} originalBase - Original base domain
|
|
454
|
-
* @param {number} timeoutMs - How long to wait for auto-auth
|
|
455
|
-
* @returns {Promise<Object>} Object with success status and final hostname
|
|
456
|
-
*/
|
|
457
|
-
async function waitForAutoAuth(page, hostname, originalBase, timeoutMs = 5000) {
|
|
458
|
-
console.error(`[MCPBrowser] Checking for auto-authentication (${timeoutMs / 1000} sec)...`);
|
|
459
|
-
|
|
460
|
-
const deadline = Date.now() + timeoutMs;
|
|
461
|
-
|
|
462
|
-
while (Date.now() < deadline) {
|
|
463
|
-
try {
|
|
464
|
-
const checkUrl = page.url();
|
|
465
|
-
const checkHostname = new URL(checkUrl).hostname;
|
|
466
|
-
const checkBase = getBaseDomain(checkHostname);
|
|
467
|
-
|
|
468
|
-
// Check if returned to original domain/base and no longer on auth URL
|
|
469
|
-
if ((checkHostname === hostname || checkBase === originalBase) && !isLikelyAuthUrl(checkUrl)) {
|
|
470
|
-
console.error(`[MCPBrowser] Auto-authentication successful! Now at: ${checkUrl}`);
|
|
471
|
-
return { success: true, hostname: checkHostname };
|
|
472
|
-
}
|
|
473
|
-
|
|
474
|
-
await new Promise(resolve => setTimeout(resolve, 500));
|
|
475
|
-
} catch (error) {
|
|
476
|
-
await new Promise(resolve => setTimeout(resolve, 500));
|
|
477
|
-
}
|
|
478
|
-
}
|
|
479
|
-
|
|
480
|
-
return { success: false };
|
|
481
|
-
}
|
|
482
|
-
|
|
483
|
-
/**
|
|
484
|
-
* Wait for user to complete manual authentication.
|
|
485
|
-
* @param {Page} page - The Puppeteer page instance
|
|
486
|
-
* @param {string} hostname - Original hostname
|
|
487
|
-
* @param {string} originalBase - Original base domain
|
|
488
|
-
* @param {number} timeoutMs - How long to wait for manual auth
|
|
489
|
-
* @returns {Promise<Object>} Object with success status, final hostname, and optional error
|
|
490
|
-
*/
|
|
491
|
-
async function waitForManualAuth(page, hostname, originalBase, timeoutMs = 600000) {
|
|
492
|
-
console.error(`[MCPBrowser] Auto-authentication did not complete. Waiting for user...`);
|
|
493
|
-
console.error(`[MCPBrowser] Will wait for return to ${hostname} or same base domain (${originalBase})`);
|
|
494
|
-
|
|
495
|
-
const deadline = Date.now() + timeoutMs;
|
|
496
|
-
|
|
497
|
-
while (Date.now() < deadline) {
|
|
498
|
-
try {
|
|
499
|
-
const checkUrl = page.url();
|
|
500
|
-
const checkHostname = new URL(checkUrl).hostname;
|
|
501
|
-
const checkBase = getBaseDomain(checkHostname);
|
|
502
|
-
|
|
503
|
-
// Auth complete if back to original domain OR same base domain AND not on auth page
|
|
504
|
-
if ((checkHostname === hostname || checkBase === originalBase) && !isLikelyAuthUrl(checkUrl)) {
|
|
505
|
-
console.error(`[MCPBrowser] Authentication completed! Now at: ${checkUrl}`);
|
|
506
|
-
|
|
507
|
-
if (checkHostname !== hostname) {
|
|
508
|
-
console.error(`[MCPBrowser] Landed on different subdomain: ${checkHostname}`);
|
|
509
|
-
}
|
|
510
|
-
|
|
511
|
-
return { success: true, hostname: checkHostname };
|
|
512
|
-
}
|
|
513
|
-
|
|
514
|
-
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
515
|
-
} catch (error) {
|
|
516
|
-
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
517
|
-
}
|
|
518
|
-
}
|
|
519
|
-
|
|
520
|
-
const currentUrl = page.url();
|
|
521
|
-
const hint = `Authentication timeout. Tab is left open at ${currentUrl}. Complete authentication and retry the same URL.`;
|
|
522
|
-
return {
|
|
523
|
-
success: false,
|
|
524
|
-
error: "Authentication timeout - user did not complete login",
|
|
525
|
-
hint
|
|
526
|
-
};
|
|
527
|
-
}
|
|
528
|
-
|
|
529
|
-
/**
|
|
530
|
-
* Wait for page to stabilize after authentication.
|
|
531
|
-
* @param {Page} page - The Puppeteer page instance
|
|
532
|
-
* @returns {Promise<void>}
|
|
533
|
-
*/
|
|
534
|
-
async function waitForPageStability(page) {
|
|
535
|
-
console.error(`[MCPBrowser] Waiting for page to stabilize...`);
|
|
536
|
-
await new Promise(resolve => setTimeout(resolve, 3000));
|
|
537
|
-
|
|
538
|
-
try {
|
|
539
|
-
await page.waitForFunction(() => document.readyState === 'complete', { timeout: 10000 });
|
|
540
|
-
} catch {
|
|
541
|
-
// Ignore timeout - page might already be ready
|
|
542
|
-
}
|
|
543
|
-
}
|
|
544
|
-
|
|
545
|
-
/**
|
|
546
|
-
* Extract and process HTML from the page.
|
|
547
|
-
* @param {Page} page - The Puppeteer page instance
|
|
548
|
-
* @param {boolean} removeUnnecessaryHTML - Whether to clean the HTML
|
|
549
|
-
* @returns {Promise<string>} The processed HTML
|
|
550
|
-
*/
|
|
551
|
-
async function extractAndProcessHtml(page, removeUnnecessaryHTML) {
|
|
552
|
-
const html = await page.evaluate(() => document.documentElement?.outerHTML || "");
|
|
553
|
-
|
|
554
|
-
let processedHtml;
|
|
555
|
-
if (removeUnnecessaryHTML) {
|
|
556
|
-
const cleaned = cleanHtml(html);
|
|
557
|
-
processedHtml = enrichHtml(cleaned, page.url());
|
|
558
|
-
} else {
|
|
559
|
-
processedHtml = enrichHtml(html, page.url());
|
|
560
|
-
}
|
|
561
|
-
|
|
562
|
-
return processedHtml;
|
|
563
|
-
}
|
|
564
|
-
|
|
565
|
-
/**
|
|
566
|
-
* Fetch a web page using Chrome browser, with support for authentication flows and tab reuse.
|
|
567
|
-
* Reuses existing tabs per domain when possible. Handles authentication redirects by waiting
|
|
568
|
-
* for user to complete login (up to 10 minutes). Processes HTML to remove unnecessary elements
|
|
569
|
-
* and convert relative URLs to absolute.
|
|
570
|
-
* @param {Object} params - Fetch parameters
|
|
571
|
-
* @param {string} params.url - The URL to fetch
|
|
572
|
-
* @param {boolean} [params.removeUnnecessaryHTML=true] - Whether to clean HTML (removes scripts, styles, etc.)
|
|
573
|
-
* @returns {Promise<Object>} Result object with success status, URL, HTML content, or error details
|
|
574
|
-
*/
|
|
575
|
-
async function fetchPage({ url, removeUnnecessaryHTML = true }) {
|
|
576
|
-
// Hardcoded smart defaults
|
|
577
|
-
const waitUntil = "networkidle0";
|
|
578
|
-
const navigationTimeout = 60000;
|
|
579
|
-
const authCompletionTimeout = 600000;
|
|
580
|
-
const reuseLastKeptPage = true;
|
|
581
|
-
|
|
582
|
-
if (!url) {
|
|
583
|
-
throw new Error("url parameter is required");
|
|
584
|
-
}
|
|
585
|
-
|
|
586
|
-
// Parse hostname for domain-based tab reuse
|
|
587
|
-
let hostname;
|
|
588
|
-
try {
|
|
589
|
-
hostname = new URL(url).hostname;
|
|
590
|
-
} catch {
|
|
591
|
-
throw new Error(`Invalid URL: ${url}`);
|
|
592
|
-
}
|
|
593
|
-
|
|
594
|
-
const browser = await getBrowser();
|
|
595
|
-
let page = null;
|
|
596
|
-
|
|
597
|
-
try {
|
|
598
|
-
// Get or create page for this domain
|
|
599
|
-
page = await getOrCreatePage(browser, hostname, reuseLastKeptPage);
|
|
600
|
-
|
|
601
|
-
// Navigate to URL with fallback strategy
|
|
602
|
-
await navigateToUrl(page, url, waitUntil, navigationTimeout);
|
|
603
|
-
|
|
604
|
-
const currentUrl = page.url();
|
|
605
|
-
const currentHostname = new URL(currentUrl).hostname;
|
|
606
|
-
console.error(`[MCPBrowser] Navigation completed: ${currentUrl}`);
|
|
607
|
-
|
|
608
|
-
// Detect redirect type and handle accordingly
|
|
609
|
-
const redirectInfo = detectRedirectType(url, hostname, currentUrl, currentHostname);
|
|
610
|
-
|
|
611
|
-
if (redirectInfo.type === 'requested_auth') {
|
|
612
|
-
console.error(`[MCPBrowser] User requested auth page directly, returning content`);
|
|
613
|
-
// Update domain mapping if needed
|
|
614
|
-
if (redirectInfo.currentHostname !== hostname) {
|
|
615
|
-
domainPages.delete(hostname);
|
|
616
|
-
domainPages.set(redirectInfo.currentHostname, page);
|
|
617
|
-
hostname = redirectInfo.currentHostname;
|
|
618
|
-
}
|
|
619
|
-
} else if (redirectInfo.type === 'permanent') {
|
|
620
|
-
console.error(`[MCPBrowser] Permanent redirect detected: ${hostname} → ${redirectInfo.currentHostname}`);
|
|
621
|
-
console.error(`[MCPBrowser] Accepting redirect and updating domain mapping`);
|
|
622
|
-
domainPages.delete(hostname);
|
|
623
|
-
domainPages.set(redirectInfo.currentHostname, page);
|
|
624
|
-
hostname = redirectInfo.currentHostname;
|
|
625
|
-
} else if (redirectInfo.type === 'auth') {
|
|
626
|
-
console.error(`[MCPBrowser] Authentication flow detected (${redirectInfo.flowType})`);
|
|
627
|
-
console.error(`[MCPBrowser] Current location: ${redirectInfo.currentUrl}`);
|
|
628
|
-
|
|
629
|
-
// Try auto-auth first
|
|
630
|
-
const autoAuthResult = await waitForAutoAuth(page, redirectInfo.hostname, redirectInfo.originalBase);
|
|
631
|
-
|
|
632
|
-
if (autoAuthResult.success) {
|
|
633
|
-
// Update hostname if changed
|
|
634
|
-
if (autoAuthResult.hostname !== hostname) {
|
|
635
|
-
domainPages.delete(hostname);
|
|
636
|
-
domainPages.set(autoAuthResult.hostname, page);
|
|
637
|
-
hostname = autoAuthResult.hostname;
|
|
638
|
-
}
|
|
639
|
-
} else {
|
|
640
|
-
// Wait for manual auth
|
|
641
|
-
const manualAuthResult = await waitForManualAuth(page, redirectInfo.hostname, redirectInfo.originalBase, authCompletionTimeout);
|
|
642
|
-
|
|
643
|
-
if (!manualAuthResult.success) {
|
|
644
|
-
return {
|
|
645
|
-
success: false,
|
|
646
|
-
error: manualAuthResult.error,
|
|
647
|
-
pageKeptOpen: true,
|
|
648
|
-
hint: manualAuthResult.hint
|
|
649
|
-
};
|
|
650
|
-
}
|
|
651
|
-
|
|
652
|
-
// Update hostname if changed
|
|
653
|
-
if (manualAuthResult.hostname !== hostname) {
|
|
654
|
-
domainPages.delete(hostname);
|
|
655
|
-
domainPages.set(manualAuthResult.hostname, page);
|
|
656
|
-
hostname = manualAuthResult.hostname;
|
|
657
|
-
}
|
|
658
|
-
}
|
|
659
|
-
|
|
660
|
-
// Wait for page stability after auth
|
|
661
|
-
await waitForPageStability(page);
|
|
662
|
-
}
|
|
663
|
-
|
|
664
|
-
// Extract and process HTML
|
|
665
|
-
const processedHtml = await extractAndProcessHtml(page, removeUnnecessaryHTML);
|
|
666
|
-
|
|
667
|
-
return {
|
|
668
|
-
success: true,
|
|
669
|
-
url: page.url(),
|
|
670
|
-
html: processedHtml
|
|
671
|
-
};
|
|
672
|
-
} catch (err) {
|
|
673
|
-
const hint = "Tab is left open. Complete sign-in there, then call fetch_webpage_protected again with just the URL.";
|
|
674
|
-
return { success: false, error: err.message || String(err), pageKeptOpen: true, hint };
|
|
675
|
-
} finally {
|
|
676
|
-
// Tab always stays open - domain-aware reuse handles cleanup
|
|
677
|
-
}
|
|
678
|
-
}
|
|
679
|
-
|
|
680
|
-
/**
|
|
681
|
-
* Truncate a string to a maximum length, adding "... [truncated]" if truncated.
|
|
682
|
-
* @param {string} str - The string to truncate
|
|
683
|
-
* @param {number} max - Maximum length
|
|
684
|
-
* @returns {string} The original or truncated string
|
|
685
|
-
*/
|
|
686
|
-
function truncate(str, max) {
|
|
687
|
-
if (!str) return "";
|
|
688
|
-
return str.length > max ? `${str.slice(0, max)}... [truncated]` : str;
|
|
689
|
-
}
|
|
8
|
+
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
9
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
10
|
+
import { ListToolsRequestSchema, CallToolRequestSchema } from "@modelcontextprotocol/sdk/types.js";
|
|
11
|
+
import { fileURLToPath } from 'url';
|
|
690
12
|
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
*/
|
|
698
|
-
function cleanHtml(html) {
|
|
699
|
-
if (!html) return "";
|
|
700
|
-
|
|
701
|
-
let cleaned = html;
|
|
702
|
-
|
|
703
|
-
// Remove HTML comments
|
|
704
|
-
cleaned = cleaned.replace(/<!--[\s\S]*?-->/g, '');
|
|
705
|
-
|
|
706
|
-
// Remove script tags and their content
|
|
707
|
-
cleaned = cleaned.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '');
|
|
708
|
-
|
|
709
|
-
// Remove style tags and their content
|
|
710
|
-
cleaned = cleaned.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '');
|
|
711
|
-
|
|
712
|
-
// Remove noscript tags and their content
|
|
713
|
-
cleaned = cleaned.replace(/<noscript\b[^<]*(?:(?!<\/noscript>)<[^<]*)*<\/noscript>/gi, '');
|
|
714
|
-
|
|
715
|
-
// Remove SVG tags and their content (often large, not useful for text)
|
|
716
|
-
cleaned = cleaned.replace(/<svg\b[^<]*(?:(?!<\/svg>)<[^<]*)*<\/svg>/gi, '');
|
|
717
|
-
|
|
718
|
-
// Remove meta tags
|
|
719
|
-
cleaned = cleaned.replace(/<meta\b[^>]*>/gi, '');
|
|
720
|
-
|
|
721
|
-
// Remove link tags (stylesheets, preload, etc.)
|
|
722
|
-
cleaned = cleaned.replace(/<link\b[^>]*>/gi, '');
|
|
723
|
-
|
|
724
|
-
// Remove inline style attributes
|
|
725
|
-
cleaned = cleaned.replace(/\s+style=["'][^"']*["']/gi, '');
|
|
726
|
-
|
|
727
|
-
// Remove class attributes
|
|
728
|
-
cleaned = cleaned.replace(/\s+class=["'][^"']*["']/gi, '');
|
|
729
|
-
|
|
730
|
-
// Remove id attributes
|
|
731
|
-
cleaned = cleaned.replace(/\s+id=["'][^"']*["']/gi, '');
|
|
732
|
-
|
|
733
|
-
// Remove data-* attributes
|
|
734
|
-
cleaned = cleaned.replace(/\s+data-[a-z0-9-]+=["'][^"']*["']/gi, '');
|
|
735
|
-
|
|
736
|
-
// Remove event handler attributes (onclick, onload, etc.)
|
|
737
|
-
cleaned = cleaned.replace(/\s+on[a-z]+\s*=\s*["'][^"']*["']/gi, '');
|
|
738
|
-
|
|
739
|
-
// Remove role attributes
|
|
740
|
-
cleaned = cleaned.replace(/\s+role=["'][^"']*["']/gi, '');
|
|
741
|
-
|
|
742
|
-
// Remove aria-* attributes
|
|
743
|
-
cleaned = cleaned.replace(/\s+aria-[a-z0-9-]+=["'][^"']*["']/gi, '');
|
|
744
|
-
|
|
745
|
-
// Collapse multiple whitespace/newlines into single space
|
|
746
|
-
cleaned = cleaned.replace(/\s+/g, ' ');
|
|
747
|
-
|
|
748
|
-
// Remove spaces between tags
|
|
749
|
-
cleaned = cleaned.replace(/>\s+</g, '><');
|
|
750
|
-
|
|
751
|
-
return cleaned;
|
|
752
|
-
}
|
|
13
|
+
// Import core functionality
|
|
14
|
+
import { fetchPage } from './actions/fetch-page.js';
|
|
15
|
+
import { clickElement } from './actions/click-element.js';
|
|
16
|
+
import { typeText } from './actions/type-text.js';
|
|
17
|
+
import { closeTab } from './actions/close-tab.js';
|
|
18
|
+
import { getCurrentHtml } from './actions/get-current-html.js';
|
|
753
19
|
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
let enriched = html;
|
|
761
|
-
|
|
762
|
-
// Convert relative URLs to absolute in href attributes
|
|
763
|
-
enriched = enriched.replace(/href=["']([^"']+)["']/gi, (match, url) => {
|
|
764
|
-
if (!url || url.startsWith('http://') || url.startsWith('https://') || url.startsWith('//') || url.startsWith('#') || url.startsWith('mailto:') || url.startsWith('tel:')) {
|
|
765
|
-
return match;
|
|
766
|
-
}
|
|
767
|
-
try {
|
|
768
|
-
const absoluteUrl = new URL(url, baseUrl).href;
|
|
769
|
-
return `href="${absoluteUrl}"`;
|
|
770
|
-
} catch {
|
|
771
|
-
return match;
|
|
772
|
-
}
|
|
773
|
-
});
|
|
774
|
-
|
|
775
|
-
// Convert relative URLs to absolute in src attributes
|
|
776
|
-
enriched = enriched.replace(/src=["']([^"']+)["']/gi, (match, url) => {
|
|
777
|
-
if (!url || url.startsWith('http://') || url.startsWith('https://') || url.startsWith('//') || url.startsWith('data:')) {
|
|
778
|
-
return match;
|
|
779
|
-
}
|
|
780
|
-
try {
|
|
781
|
-
const absoluteUrl = new URL(url, baseUrl).href;
|
|
782
|
-
return `src="${absoluteUrl}"`;
|
|
783
|
-
} catch {
|
|
784
|
-
return match;
|
|
785
|
-
}
|
|
786
|
-
});
|
|
787
|
-
|
|
788
|
-
return enriched;
|
|
789
|
-
}
|
|
790
|
-
|
|
791
|
-
/**
|
|
792
|
-
* Prepares HTML for consumption by cleaning and enriching it.
|
|
793
|
-
* @deprecated Use cleanHtml and enrichHtml separately for better control
|
|
794
|
-
*/
|
|
795
|
-
function prepareHtml(html, baseUrl) {
|
|
796
|
-
if (!html) return "";
|
|
797
|
-
const cleaned = cleanHtml(html);
|
|
798
|
-
return enrichHtml(cleaned, baseUrl);
|
|
799
|
-
}
|
|
20
|
+
// Import functions for testing exports
|
|
21
|
+
import { getBrowser, closeBrowser } from './core/browser.js';
|
|
22
|
+
import { getOrCreatePage, navigateToUrl, extractAndProcessHtml, waitForPageStability } from './core/page.js';
|
|
23
|
+
import { detectRedirectType, waitForAutoAuth, waitForManualAuth } from './core/auth.js';
|
|
24
|
+
import { cleanHtml, enrichHtml, prepareHtml } from './core/html.js';
|
|
25
|
+
import { getBaseDomain, isLikelyAuthUrl } from './utils.js';
|
|
800
26
|
|
|
801
27
|
/**
|
|
802
28
|
* Main entry point for the MCP server.
|
|
803
|
-
* Sets up the Model Context Protocol server with
|
|
29
|
+
* Sets up the Model Context Protocol server with all available tools,
|
|
804
30
|
* configures request handlers, and starts the stdio transport.
|
|
805
31
|
* @returns {Promise<void>}
|
|
806
32
|
*/
|
|
@@ -809,19 +35,95 @@ async function main() {
|
|
|
809
35
|
|
|
810
36
|
const tools = [
|
|
811
37
|
{
|
|
812
|
-
name: "
|
|
38
|
+
name: "fetch_webpage",
|
|
813
39
|
description: "Fetches web pages using Chrome/Edge browser. Handles auth-required pages, CAPTCHA, SSO, anti-bot protection, and JavaScript-heavy sites.\n\nWaits for user interaction (login, CAPTCHA) if needed, then returns content automatically.\n\nIMPORTANT: Call ONE URL at a time only. Never parallel - causes conflicts. Wait for completion before next URL.",
|
|
814
40
|
inputSchema: {
|
|
815
41
|
type: "object",
|
|
816
42
|
properties: {
|
|
817
43
|
url: { type: "string", description: "The URL to fetch" },
|
|
44
|
+
removeUnnecessaryHTML: { type: "boolean", description: "Remove Unnecessary HTML for size reduction by 90%.", default: true },
|
|
45
|
+
postLoadWait: { type: "number", description: "Milliseconds to wait after page load for SPAs to render dynamic content.", default: 1000 }
|
|
46
|
+
},
|
|
47
|
+
required: ["url"],
|
|
48
|
+
additionalProperties: false,
|
|
49
|
+
},
|
|
50
|
+
annotations: {
|
|
51
|
+
title: "Fetch Web Page"
|
|
52
|
+
}
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
name: "click_element",
|
|
56
|
+
description: "Clicks on an element on the page. Works with any clickable element including buttons, links, or elements with onclick handlers. Can target by CSS selector or text content. Waits for page stability and returns updated HTML by default. The page must be already loaded via fetch_webpage first.",
|
|
57
|
+
inputSchema: {
|
|
58
|
+
type: "object",
|
|
59
|
+
properties: {
|
|
60
|
+
url: { type: "string", description: "The URL of the page (must match a previously fetched page)" },
|
|
61
|
+
selector: { type: "string", description: "CSS selector for the element to click (e.g., '#submit-btn', '.login-button')" },
|
|
62
|
+
text: { type: "string", description: "Text content to search for if selector is not provided (e.g., 'Sign In', 'Submit')" },
|
|
63
|
+
waitForElementTimeout: { type: "number", description: "Maximum time to wait for element in milliseconds", default: 1000 },
|
|
64
|
+
returnHtml: { type: "boolean", description: "Whether to wait for stability and return HTML after clicking. Set to false for fast form interactions (checkboxes, radio buttons).", default: true },
|
|
65
|
+
removeUnnecessaryHTML: { type: "boolean", description: "Remove Unnecessary HTML for size reduction by 90%. Only used when returnHtml is true.", default: true },
|
|
66
|
+
postClickWait: { type: "number", description: "Milliseconds to wait after click for SPAs to render dynamic content.", default: 1000 }
|
|
67
|
+
},
|
|
68
|
+
required: ["url"],
|
|
69
|
+
additionalProperties: false,
|
|
70
|
+
},
|
|
71
|
+
annotations: {
|
|
72
|
+
title: "Click Element"
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
name: "type_text",
|
|
77
|
+
description: "Types text into an input field, textarea, or other editable element. The page must be already loaded via fetch_webpage first.",
|
|
78
|
+
inputSchema: {
|
|
79
|
+
type: "object",
|
|
80
|
+
properties: {
|
|
81
|
+
url: { type: "string", description: "The URL of the page (must match a previously fetched page)" },
|
|
82
|
+
selector: { type: "string", description: "CSS selector for the input element (e.g., '#username', 'input[name=\"email\"]')" },
|
|
83
|
+
text: { type: "string", description: "Text to type into the field" },
|
|
84
|
+
clear: { type: "boolean", description: "Whether to clear existing text first", default: true },
|
|
85
|
+
typeDelay: { type: "number", description: "Delay between keystrokes in milliseconds (simulates human typing)", default: 50 },
|
|
86
|
+
waitForElementTimeout: { type: "number", description: "Maximum time to wait for element in milliseconds", default: 5000 },
|
|
87
|
+
returnHtml: { type: "boolean", description: "Whether to wait for stability and return HTML after typing.", default: true },
|
|
88
|
+
removeUnnecessaryHTML: { type: "boolean", description: "Remove Unnecessary HTML for size reduction by 90%. Only used when returnHtml is true.", default: true },
|
|
89
|
+
postTypeWait: { type: "number", description: "Milliseconds to wait after typing for SPAs to render dynamic content.", default: 1000 }
|
|
90
|
+
},
|
|
91
|
+
required: ["url", "selector", "text"],
|
|
92
|
+
additionalProperties: false,
|
|
93
|
+
},
|
|
94
|
+
annotations: {
|
|
95
|
+
title: "Type Text"
|
|
96
|
+
}
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
name: "close_tab",
|
|
100
|
+
description: "Closes the browser tab for the given URL's hostname. This removes the page from the tab pool and forces a fresh session on the next visit to that hostname. Useful for memory management or when you need to clear session state. Note: Uses exact hostname match (www.example.com and example.com are treated as different tabs).",
|
|
101
|
+
inputSchema: {
|
|
102
|
+
type: "object",
|
|
103
|
+
properties: {
|
|
104
|
+
url: { type: "string", description: "The URL whose hostname tab should be closed" }
|
|
105
|
+
},
|
|
106
|
+
required: ["url"],
|
|
107
|
+
additionalProperties: false,
|
|
108
|
+
},
|
|
109
|
+
annotations: {
|
|
110
|
+
title: "Close Tab"
|
|
111
|
+
}
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
name: "get_current_html",
|
|
115
|
+
description: "Gets the current HTML from an already-loaded page WITHOUT navigating/reloading. Use this after interactions (click, type, wait) to get the updated DOM state efficiently. Much faster than fetch_webpage since it only extracts HTML from the current page state.",
|
|
116
|
+
inputSchema: {
|
|
117
|
+
type: "object",
|
|
118
|
+
properties: {
|
|
119
|
+
url: { type: "string", description: "The URL of the page (must match a previously fetched page)" },
|
|
818
120
|
removeUnnecessaryHTML: { type: "boolean", description: "Remove Unnecessary HTML for size reduction by 90%.", default: true }
|
|
819
121
|
},
|
|
820
122
|
required: ["url"],
|
|
821
123
|
additionalProperties: false,
|
|
822
124
|
},
|
|
823
125
|
annotations: {
|
|
824
|
-
title: "
|
|
126
|
+
title: "Get Current HTML"
|
|
825
127
|
}
|
|
826
128
|
},
|
|
827
129
|
];
|
|
@@ -830,27 +132,50 @@ async function main() {
|
|
|
830
132
|
|
|
831
133
|
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
832
134
|
const { name, arguments: args } = request.params;
|
|
833
|
-
if (name !== "fetch_webpage_protected") {
|
|
834
|
-
throw new Error(`Unknown tool: ${name}`);
|
|
835
|
-
}
|
|
836
135
|
const safeArgs = args || {};
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
136
|
+
|
|
137
|
+
let result;
|
|
138
|
+
|
|
139
|
+
switch (name) {
|
|
140
|
+
case "fetch_webpage":
|
|
141
|
+
const fallbackUrl = process.env.DEFAULT_FETCH_URL || process.env.MCP_DEFAULT_FETCH_URL;
|
|
142
|
+
if (!safeArgs.url) {
|
|
143
|
+
if (fallbackUrl) {
|
|
144
|
+
safeArgs.url = fallbackUrl;
|
|
145
|
+
} else {
|
|
146
|
+
return {
|
|
147
|
+
content: [
|
|
148
|
+
{
|
|
149
|
+
type: "text",
|
|
150
|
+
text: JSON.stringify({ success: false, error: "Missing url and no DEFAULT_FETCH_URL/MCP_DEFAULT_FETCH_URL configured" }),
|
|
151
|
+
},
|
|
152
|
+
],
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
result = await fetchPage(safeArgs);
|
|
157
|
+
break;
|
|
158
|
+
|
|
159
|
+
case "click_element":
|
|
160
|
+
result = await clickElement(safeArgs);
|
|
161
|
+
break;
|
|
162
|
+
|
|
163
|
+
case "type_text":
|
|
164
|
+
result = await typeText(safeArgs);
|
|
165
|
+
break;
|
|
166
|
+
|
|
167
|
+
case "close_tab":
|
|
168
|
+
result = await closeTab(safeArgs);
|
|
169
|
+
break;
|
|
170
|
+
|
|
171
|
+
case "get_current_html":
|
|
172
|
+
result = await getCurrentHtml(safeArgs);
|
|
173
|
+
break;
|
|
174
|
+
|
|
175
|
+
default:
|
|
176
|
+
throw new Error(`Unknown tool: ${name}`);
|
|
851
177
|
}
|
|
852
|
-
|
|
853
|
-
const result = await fetchPage(safeArgs);
|
|
178
|
+
|
|
854
179
|
return {
|
|
855
180
|
content: [
|
|
856
181
|
{
|
|
@@ -868,7 +193,8 @@ async function main() {
|
|
|
868
193
|
// Export for testing
|
|
869
194
|
export {
|
|
870
195
|
fetchPage,
|
|
871
|
-
getBrowser,
|
|
196
|
+
getBrowser,
|
|
197
|
+
closeBrowser,
|
|
872
198
|
prepareHtml,
|
|
873
199
|
cleanHtml,
|
|
874
200
|
enrichHtml,
|
|
@@ -880,11 +206,14 @@ export {
|
|
|
880
206
|
waitForPageStability,
|
|
881
207
|
extractAndProcessHtml,
|
|
882
208
|
getBaseDomain,
|
|
883
|
-
isLikelyAuthUrl
|
|
209
|
+
isLikelyAuthUrl,
|
|
210
|
+
clickElement,
|
|
211
|
+
typeText,
|
|
212
|
+
closeTab,
|
|
213
|
+
getCurrentHtml
|
|
884
214
|
};
|
|
885
215
|
|
|
886
216
|
// Run the MCP server only if this is the main module (not imported for testing)
|
|
887
|
-
import { fileURLToPath } from 'url';
|
|
888
217
|
if (import.meta.url === new URL(process.argv[1], 'file://').href ||
|
|
889
218
|
fileURLToPath(import.meta.url) === process.argv[1]) {
|
|
890
219
|
main().catch((err) => {
|