mcpbrowser 0.2.18 → 0.2.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,33 @@
1
+ name: MCP Server Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [ main ]
6
+ pull_request:
7
+ branches: [ main ]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+
13
+ strategy:
14
+ matrix:
15
+ node-version: [18.x, 20.x, 22.x]
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Setup Node.js ${{ matrix.node-version }}
21
+ uses: actions/setup-node@v4
22
+ with:
23
+ node-version: ${{ matrix.node-version }}
24
+ cache: 'npm'
25
+
26
+ - name: Install dependencies
27
+ run: npm ci
28
+
29
+ - name: Run MCP server tests
30
+ run: npm run test:mcp
31
+
32
+ - name: Run all tests
33
+ run: npm test
package/README.md CHANGED
@@ -117,7 +117,7 @@ Restart VS Code or reload the window for the changes to take effect.
117
117
  In Copilot Chat, you should see the `MCPBrowser` server listed. Ask it to fetch an authenticated URL and it will drive your signed-in Chrome session.
118
118
 
119
119
  ## How it works
120
- - Tool `fetch_webpage_protected` (inside the MCP server) drives your live Chrome (DevTools Protocol) so it inherits your auth cookies, returning `text` and `html` (truncated up to 2M chars per field) for analysis.
120
+ - Tool `fetch_webpage_protected` (inside the MCP server) drives your live Chrome (DevTools Protocol) so it inherits your auth cookies, returning `html` (truncated up to 2M chars) for analysis.
121
121
  - **Smart confirmation**: Copilot asks for confirmation ONLY on first request to a new domain - explains browser will open for authentication. Subsequent requests to same domain work automatically (session preserved).
122
122
  - **Domain-aware tab reuse**: Automatically reuses the same tab for URLs on the same domain, preserving authentication session. Different domains open new tabs.
123
123
  - **Automatic web page fetching**: Waits for network idle (`networkidle0`) by default, ensuring JavaScript-heavy web pages (SPAs, dashboards) fully load before returning content.
@@ -2,7 +2,7 @@
2
2
  "name": "mcpbrowser",
3
3
  "displayName": "MCP Browser",
4
4
  "description": "Lightweight MCP server-extension that allows Copilot to fetch protected web pages you can authenticate to via browser - handles login, SSO, and anti-crawler restrictions. Should be used when standard fetch_webpage fails",
5
- "version": "0.2.18",
5
+ "version": "0.2.21",
6
6
  "publisher": "cherchyk",
7
7
  "icon": "icon.png",
8
8
  "engines": {
@@ -92,7 +92,7 @@ async function configureMcpBrowser() {
92
92
  type: "stdio",
93
93
  command: "npx",
94
94
  args: ["-y", "mcpbrowser@latest"],
95
- description: "**USE AUTOMATICALLY** when normal URL fetch fails due to: 401/403 errors, login/auth requirements, SSO prompts, anti-crawler/bot detection. **FIRST FETCH**: Ask confirmation ONLY for first request to a domain - explain browser will open for authentication. **SUBSEQUENT REQUESTS**: Use automatically (session preserved). Fetches web pages using authenticated Chrome session via DevTools Protocol (works for Microsoft, GitHub, AWS, Google, corporate sites, etc.). Waits for web page fetch automatically (up to 60s, handles redirects)."
95
+ description: "Use AUTOMATICALLY on 401/403 errors, login pages, SSO prompts, or anti-bot blocks. First domain request: ask user confirmation (browser opens for auth). Subsequent same-domain: use automatically (session preserved). Returns HTML from authenticated Chrome session. Handles Microsoft, GitHub, AWS, Google, corporate sites."
96
96
  };
97
97
 
98
98
  // Write back to file with pretty formatting
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  { "name": "mcpbrowser",
2
- "version": "0.2.18",
2
+ "version": "0.2.21",
3
3
  "mcpName": "io.github.cherchyk/browser",
4
4
  "type": "module",
5
5
  "description": "MCP server that fetches protected web pages using Chrome DevTools Protocol",
@@ -8,7 +8,9 @@
8
8
  "mcpbrowser": "src/mcp-browser.js"
9
9
  },
10
10
  "scripts": {
11
- "mcp": "node src/mcp-browser.js"
11
+ "mcp": "node src/mcp-browser.js",
12
+ "test": "node --test tests/*.test.js",
13
+ "test:mcp": "node --test tests/mcp-server.test.js"
12
14
  },
13
15
  "keywords": [
14
16
  "mcp",
package/server.json CHANGED
@@ -6,7 +6,7 @@
6
6
  "url": "https://github.com/cherchyk/MCPBrowser",
7
7
  "source": "github"
8
8
  },
9
- "version": "0.2.18",
9
+ "version": "0.2.21",
10
10
  "packages": [
11
11
  {
12
12
  "registryType": "npm",
@@ -60,7 +60,7 @@ function getDefaultChromePaths() {
60
60
  const defaultChromePaths = getDefaultChromePaths();
61
61
 
62
62
  let cachedBrowser = null;
63
- let lastKeptPage = null; // reuse the same tab when requested
63
+ let domainPages = new Map(); // hostname -> page mapping for tab reuse across domains
64
64
  let chromeLaunchPromise = null; // prevent multiple simultaneous launches
65
65
 
66
66
  async function devtoolsAvailable() {
@@ -152,19 +152,16 @@ async function getBrowser() {
152
152
  });
153
153
  cachedBrowser.on("disconnected", () => {
154
154
  cachedBrowser = null;
155
- lastKeptPage = null;
155
+ domainPages.clear(); // Clear all domain page mappings
156
156
  });
157
157
  return cachedBrowser;
158
158
  }
159
159
 
160
- async function fetchPage({
161
- url,
162
- keepPageOpen = true,
163
- outputFormat = "HTML",
164
- }) {
160
+ async function fetchPage({ url }) {
165
161
  // Hardcoded smart defaults
166
162
  const waitUntil = "networkidle0";
167
- const timeoutMs = 60000;
163
+ const navigationTimeout = 60000; // Initial navigation timeout
164
+ const authCompletionTimeout = 600000; // 10 minutes for user to complete authentication
168
165
  const reuseLastKeptPage = true;
169
166
 
170
167
  if (!url) {
@@ -173,36 +170,29 @@ async function fetchPage({
173
170
 
174
171
  const browser = await getBrowser();
175
172
  let page = null;
173
+ let hostname;
176
174
 
177
- // Smart tab reuse: only reuse if same domain (preserves auth within domain)
178
- if (reuseLastKeptPage && lastKeptPage && !lastKeptPage.isClosed()) {
179
- let newHostname;
180
- try {
181
- newHostname = new URL(url).hostname;
182
- } catch {
183
- throw new Error(`Invalid URL: ${url}`);
184
- }
185
- const currentUrl = lastKeptPage.url();
186
-
187
- if (currentUrl) {
188
- try {
189
- const currentHostname = new URL(currentUrl).hostname;
190
- // Reuse tab only if same domain (keeps auth session alive)
191
- if (currentHostname === newHostname) {
192
- page = lastKeptPage;
193
- await page.bringToFront().catch(() => {});
194
- } else {
195
- // Different domain - close old tab and create new one
196
- await lastKeptPage.close().catch(() => {});
197
- lastKeptPage = null;
198
- }
199
- } catch {
200
- // If URL parsing fails, create new tab
201
- }
175
+ // Parse hostname for domain-based tab reuse
176
+ try {
177
+ hostname = new URL(url).hostname;
178
+ } catch {
179
+ throw new Error(`Invalid URL: ${url}`);
180
+ }
181
+
182
+ // Check if we have an existing page for this domain
183
+ if (reuseLastKeptPage && domainPages.has(hostname)) {
184
+ const existingPage = domainPages.get(hostname);
185
+ if (!existingPage.isClosed()) {
186
+ page = existingPage;
187
+ await page.bringToFront().catch(() => {});
188
+ console.error(`[MCPBrowser] Reusing existing tab for domain: ${hostname}`);
189
+ } else {
190
+ // Page was closed externally, remove from map
191
+ domainPages.delete(hostname);
202
192
  }
203
193
  }
204
194
 
205
- // Create new tab if no reuse
195
+ // Create new tab if no existing page for this domain
206
196
  if (!page) {
207
197
  try {
208
198
  page = await browser.newPage();
@@ -225,50 +215,85 @@ async function fetchPage({
225
215
  throw new Error('Unable to create or find a controllable page');
226
216
  }
227
217
  }
218
+ // Add new page to domain map
219
+ domainPages.set(hostname, page);
220
+ console.error(`[MCPBrowser] Created new tab for domain: ${hostname}`);
228
221
  }
229
222
 
230
- let shouldKeepOpen = keepPageOpen || page === lastKeptPage;
223
+ let shouldKeepOpen = true;
231
224
  let wasSuccess = false;
232
225
  try {
233
226
  console.error(`[MCPBrowser] Navigating to: ${url}`);
234
- await page.goto(url, { waitUntil, timeout: timeoutMs });
235
- console.error(`[MCPBrowser] Navigation completed: ${page.url()}`);
227
+ await page.goto(url, { waitUntil, timeout: navigationTimeout });
236
228
 
237
- // Extract content based on outputFormat
238
- const result = { success: true, url: page.url() };
229
+ const currentUrl = page.url();
230
+ const currentHostname = new URL(currentUrl).hostname;
239
231
 
240
- if (outputFormat === "HTML" || outputFormat === "BOTH") {
241
- const html = await page.evaluate(() => document.documentElement?.outerHTML || "");
242
- result.html = truncate(html, 2000000);
243
- }
232
+ console.error(`[MCPBrowser] Navigation completed: ${currentUrl}`);
244
233
 
245
- if (outputFormat === "TEXT" || outputFormat === "BOTH") {
246
- const text = await page.evaluate(() => document.body?.innerText || "");
247
- result.text = truncate(text, 2000000);
234
+ // Check if we were redirected to a different domain (likely authentication)
235
+ if (currentHostname !== hostname) {
236
+ console.error(`[MCPBrowser] Detected redirect to authentication domain: ${currentHostname}`);
237
+ console.error(`[MCPBrowser] Waiting for user to complete authentication...`);
238
+ console.error(`[MCPBrowser] Will wait up to ${authCompletionTimeout / 1000} seconds for return to ${hostname}`);
239
+
240
+ // Wait for navigation back to the original domain
241
+ const authDeadline = Date.now() + authCompletionTimeout;
242
+ let authCompleted = false;
243
+
244
+ while (Date.now() < authDeadline) {
245
+ try {
246
+ // Check current URL
247
+ const checkUrl = page.url();
248
+ const checkHostname = new URL(checkUrl).hostname;
249
+
250
+ if (checkHostname === hostname) {
251
+ console.error(`[MCPBrowser] Authentication completed! Returned to: ${checkUrl}`);
252
+ authCompleted = true;
253
+ break;
254
+ }
255
+
256
+ // Wait a bit before checking again
257
+ await new Promise(resolve => setTimeout(resolve, 2000));
258
+ } catch (error) {
259
+ // Page might be navigating, continue waiting
260
+ await new Promise(resolve => setTimeout(resolve, 2000));
261
+ }
262
+ }
263
+
264
+ if (!authCompleted) {
265
+ const hint = `Authentication timeout. Tab is left open at ${page.url()}. Complete authentication and retry the same URL.`;
266
+ return { success: false, error: "Authentication timeout - user did not complete login", pageKeptOpen: true, hint };
267
+ }
268
+
269
+ // Wait for page to fully stabilize after auth redirect
270
+ console.error(`[MCPBrowser] Waiting for page to stabilize after authentication...`);
271
+ await new Promise(resolve => setTimeout(resolve, 3000)); // Give page time to settle
272
+
273
+ // Ensure page is ready
274
+ try {
275
+ await page.waitForFunction(() => document.readyState === 'complete', { timeout: 10000 });
276
+ } catch {
277
+ // Ignore timeout - page might already be ready
278
+ }
248
279
  }
249
280
 
281
+ // Extract HTML content
282
+ const html = await page.evaluate(() => document.documentElement?.outerHTML || "");
283
+ const preparedHtml = prepareHtml(html, page.url());
284
+ const result = {
285
+ success: true,
286
+ url: page.url(),
287
+ html: preparedHtml
288
+ };
289
+
250
290
  wasSuccess = true;
251
- if (keepPageOpen && lastKeptPage !== page) {
252
- // Close old kept page if we're keeping a different one
253
- if (lastKeptPage && !lastKeptPage.isClosed()) {
254
- await lastKeptPage.close().catch(() => {});
255
- }
256
- lastKeptPage = page;
257
- }
258
291
  return result;
259
292
  } catch (err) {
260
- shouldKeepOpen = shouldKeepOpen || keepPageOpen;
261
- const hint = shouldKeepOpen
262
- ? "Tab is left open. Complete sign-in there, then call fetch_webpage_protected again with just the URL."
263
- : undefined;
264
- return { success: false, error: err.message || String(err), pageKeptOpen: shouldKeepOpen, hint };
293
+ const hint = "Tab is left open. Complete sign-in there, then call fetch_webpage_protected again with just the URL.";
294
+ return { success: false, error: err.message || String(err), pageKeptOpen: true, hint };
265
295
  } finally {
266
- if (!shouldKeepOpen && lastKeptPage === page) {
267
- lastKeptPage = null;
268
- }
269
- if (!shouldKeepOpen) {
270
- await page.close().catch(() => {});
271
- }
296
+ // Tab always stays open - domain-aware reuse handles cleanup
272
297
  }
273
298
  }
274
299
 
@@ -277,19 +302,107 @@ function truncate(str, max) {
277
302
  return str.length > max ? `${str.slice(0, max)}... [truncated]` : str;
278
303
  }
279
304
 
305
+ /**
306
+ * Prepares HTML for consumption by:
307
+ * 1. Converting relative URLs to absolute URLs
308
+ * 2. Removing non-content elements (scripts, styles, meta tags, comments)
309
+ * 3. Removing code-related attributes (class, id, style, data-*, event handlers)
310
+ * 4. Removing SVG graphics and other non-text elements
311
+ * 5. Collapsing excessive whitespace
312
+ */
313
+ function prepareHtml(html, baseUrl) {
314
+ if (!html) return "";
315
+
316
+ let cleaned = html;
317
+
318
+ // Remove HTML comments
319
+ cleaned = cleaned.replace(/<!--[\s\S]*?-->/g, '');
320
+
321
+ // Remove script tags and their content
322
+ cleaned = cleaned.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '');
323
+
324
+ // Remove style tags and their content
325
+ cleaned = cleaned.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '');
326
+
327
+ // Remove noscript tags and their content
328
+ cleaned = cleaned.replace(/<noscript\b[^<]*(?:(?!<\/noscript>)<[^<]*)*<\/noscript>/gi, '');
329
+
330
+ // Remove SVG tags and their content (often large, not useful for text)
331
+ cleaned = cleaned.replace(/<svg\b[^<]*(?:(?!<\/svg>)<[^<]*)*<\/svg>/gi, '');
332
+
333
+ // Remove meta tags
334
+ cleaned = cleaned.replace(/<meta\b[^>]*>/gi, '');
335
+
336
+ // Remove link tags (stylesheets, preload, etc.)
337
+ cleaned = cleaned.replace(/<link\b[^>]*>/gi, '');
338
+
339
+ // Convert relative URLs to absolute in href attributes
340
+ cleaned = cleaned.replace(/href=["']([^"']+)["']/gi, (match, url) => {
341
+ if (!url || url.startsWith('http://') || url.startsWith('https://') || url.startsWith('//') || url.startsWith('#') || url.startsWith('mailto:') || url.startsWith('tel:')) {
342
+ return match;
343
+ }
344
+ try {
345
+ const absoluteUrl = new URL(url, baseUrl).href;
346
+ return `href="${absoluteUrl}"`;
347
+ } catch {
348
+ return match;
349
+ }
350
+ });
351
+
352
+ // Convert relative URLs to absolute in src attributes
353
+ cleaned = cleaned.replace(/src=["']([^"']+)["']/gi, (match, url) => {
354
+ if (!url || url.startsWith('http://') || url.startsWith('https://') || url.startsWith('//') || url.startsWith('data:')) {
355
+ return match;
356
+ }
357
+ try {
358
+ const absoluteUrl = new URL(url, baseUrl).href;
359
+ return `src="${absoluteUrl}"`;
360
+ } catch {
361
+ return match;
362
+ }
363
+ });
364
+
365
+ // Remove inline style attributes
366
+ cleaned = cleaned.replace(/\s+style=["'][^"']*["']/gi, '');
367
+
368
+ // Remove class attributes
369
+ cleaned = cleaned.replace(/\s+class=["'][^"']*["']/gi, '');
370
+
371
+ // Remove id attributes
372
+ cleaned = cleaned.replace(/\s+id=["'][^"']*["']/gi, '');
373
+
374
+ // Remove data-* attributes
375
+ cleaned = cleaned.replace(/\s+data-[a-z0-9-]+=["'][^"']*["']/gi, '');
376
+
377
+ // Remove event handler attributes (onclick, onload, etc.)
378
+ cleaned = cleaned.replace(/\s+on[a-z]+\s*=\s*["'][^"']*["']/gi, '');
379
+
380
+ // Remove role attributes
381
+ cleaned = cleaned.replace(/\s+role=["'][^"']*["']/gi, '');
382
+
383
+ // Remove aria-* attributes
384
+ cleaned = cleaned.replace(/\s+aria-[a-z0-9-]+=["'][^"']*["']/gi, '');
385
+
386
+ // Collapse multiple whitespace/newlines into single space
387
+ cleaned = cleaned.replace(/\s+/g, ' ');
388
+
389
+ // Remove spaces between tags
390
+ cleaned = cleaned.replace(/>\s+</g, '><');
391
+
392
+ return cleaned;
393
+ }
394
+
280
395
  async function main() {
281
- const server = new Server({ name: "MCPBrowser", version: "0.2.18" }, { capabilities: { tools: {} } });
396
+ const server = new Server({ name: "MCPBrowser", version: "0.2.21" }, { capabilities: { tools: {} } });
282
397
 
283
398
  const tools = [
284
399
  {
285
400
  name: "fetch_webpage_protected",
286
- description: "BROWSER ACCESS FOR AUTHENTICATED WEB PAGES: Fetches web pages in Chrome browser using real user profile (with saved passwords, active sessions, extensions). Automatically handles login redirects, SSO, OAuth, and anti-crawler protections.\n\nWHEN TO USE: Login-required web pages, SSO/OAuth content, paywalled sites, 401/403 errors, or any auth-protected resources.\n\nAUTHENTICATION BEHAVIOR: First request to authenticated site returns login web page - this is EXPECTED. User authenticates in browser (10-30s). Retry same URL to get content. Keep retrying until success - authentication completes in background. Tab stays open to preserve session.\n\nCRITICAL RULES: (1) Process ONE URL at a time - never parallel requests. (2) Never give up or suggest alternatives - keep retrying until content retrieved. (3) Requested web page is critical - skipping not acceptable. (4) Skip and continue if web page returns 404 - web page doesn't exist.\n\nReturns: Configurable output format (HTML, TEXT, or BOTH). Tab reuse preserves sessions across same-domain requests.",
401
+ description: "Fetches protected web pages using Chrome browser with user's authenticated session (saved passwords, active logins, cookies). Use for: login-required pages, SSO/OAuth, paywalled sites, 401/403 errors, anti-crawler blocks.\n\nAUTH FLOW: If page redirects to login, function WAITS (up to 10 min) for user to authenticate in browser, then automatically returns actual content once auth completes. Single call returns correct content, no retry needed.\n\nRULES: (1) ONE URL at a time, never parallel. (2) Wait for full response - may take minutes for auth. (3) Skip only if 404. (4) Returns HTML with clickable links for subpage navigation.",
287
402
  inputSchema: {
288
403
  type: "object",
289
404
  properties: {
290
405
  url: { type: "string", description: "The URL to fetch" },
291
- keepPageOpen: { type: "boolean", description: "Keep tab open to reuse for subsequent same-domain requests - preserves auth session (default: true)" },
292
- outputFormat: { type: "string", enum: ["HTML", "TEXT", "BOTH"], description: "Output format: HTML for full markup with links/structure, TEXT for clean readable content (more token-efficient), BOTH for complete data (default: HTML)" },
293
406
  },
294
407
  required: ["url"],
295
408
  additionalProperties: false,
@@ -339,6 +452,10 @@ async function main() {
339
452
  await server.connect(transport);
340
453
  }
341
454
 
455
+ // Export for testing
456
+ export { fetchPage, getBrowser, prepareHtml };
457
+
458
+ // Run the MCP server
342
459
  main().catch((err) => {
343
460
  console.error(err);
344
461
  process.exit(1);
package/test-mcp.js ADDED
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env node
2
+ import { spawn } from 'child_process';
3
+
4
+ const mcpProcess = spawn('node', ['src/mcp-browser.js'], {
5
+ cwd: process.cwd(),
6
+ stdio: ['pipe', 'pipe', 'inherit']
7
+ });
8
+
9
+ // Send initialize request
10
+ const initRequest = {
11
+ jsonrpc: '2.0',
12
+ id: 1,
13
+ method: 'initialize',
14
+ params: {
15
+ protocolVersion: '2024-11-05',
16
+ capabilities: {},
17
+ clientInfo: { name: 'test', version: '1.0' }
18
+ }
19
+ };
20
+
21
+ console.log('Sending initialize request...');
22
+ mcpProcess.stdin.write(JSON.stringify(initRequest) + '\n');
23
+
24
+ // Send list tools request
25
+ const listToolsRequest = {
26
+ jsonrpc: '2.0',
27
+ id: 2,
28
+ method: 'tools/list',
29
+ params: {}
30
+ };
31
+
32
+ setTimeout(() => {
33
+ console.log('Sending tools/list request...');
34
+ mcpProcess.stdin.write(JSON.stringify(listToolsRequest) + '\n');
35
+ }, 1000);
36
+
37
+ let responseBuffer = '';
38
+ mcpProcess.stdout.on('data', (data) => {
39
+ responseBuffer += data.toString();
40
+ const lines = responseBuffer.split('\n');
41
+ responseBuffer = lines.pop() || '';
42
+
43
+ lines.forEach(line => {
44
+ if (line.trim()) {
45
+ try {
46
+ const response = JSON.parse(line);
47
+ console.log('Response:', JSON.stringify(response, null, 2));
48
+ } catch (e) {
49
+ console.log('Raw output:', line);
50
+ }
51
+ }
52
+ });
53
+ });
54
+
55
+ mcpProcess.on('error', (err) => {
56
+ console.error('Error:', err);
57
+ });
58
+
59
+ setTimeout(() => {
60
+ console.log('Closing...');
61
+ mcpProcess.kill();
62
+ process.exit(0);
63
+ }, 3000);