mcpbrowser 0.2.18 → 0.2.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/extension/package.json +1 -1
- package/extension/src/extension.js +1 -1
- package/package.json +1 -1
- package/server.json +1 -1
- package/src/mcp-browser.js +191 -72
- package/tests/domain-tab-pooling.test.js +329 -0
- package/tests/integration.test.js +158 -0
- package/tests/prepare-html.test.js +307 -0
package/README.md
CHANGED
|
@@ -117,7 +117,7 @@ Restart VS Code or reload the window for the changes to take effect.
|
|
|
117
117
|
In Copilot Chat, you should see the `MCPBrowser` server listed. Ask it to fetch an authenticated URL and it will drive your signed-in Chrome session.
|
|
118
118
|
|
|
119
119
|
## How it works
|
|
120
|
-
- Tool `fetch_webpage_protected` (inside the MCP server) drives your live Chrome (DevTools Protocol) so it inherits your auth cookies, returning `
|
|
120
|
+
- Tool `fetch_webpage_protected` (inside the MCP server) drives your live Chrome (DevTools Protocol) so it inherits your auth cookies, returning `html` (truncated up to 2M chars) for analysis.
|
|
121
121
|
- **Smart confirmation**: Copilot asks for confirmation ONLY on first request to a new domain - explains browser will open for authentication. Subsequent requests to same domain work automatically (session preserved).
|
|
122
122
|
- **Domain-aware tab reuse**: Automatically reuses the same tab for URLs on the same domain, preserving authentication session. Different domains open new tabs.
|
|
123
123
|
- **Automatic web page fetching**: Waits for network idle (`networkidle0`) by default, ensuring JavaScript-heavy web pages (SPAs, dashboards) fully load before returning content.
|
package/extension/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "mcpbrowser",
|
|
3
3
|
"displayName": "MCP Browser",
|
|
4
4
|
"description": "Lightweight MCP server-extension that allows Copilot to fetch protected web pages you can authenticate to via browser - handles login, SSO, and anti-crawler restrictions. Should be used when standard fetch_webpage fails",
|
|
5
|
-
"version": "0.2.
|
|
5
|
+
"version": "0.2.19",
|
|
6
6
|
"publisher": "cherchyk",
|
|
7
7
|
"icon": "icon.png",
|
|
8
8
|
"engines": {
|
|
@@ -92,7 +92,7 @@ async function configureMcpBrowser() {
|
|
|
92
92
|
type: "stdio",
|
|
93
93
|
command: "npx",
|
|
94
94
|
args: ["-y", "mcpbrowser@latest"],
|
|
95
|
-
description: "
|
|
95
|
+
description: "Use AUTOMATICALLY on 401/403 errors, login pages, SSO prompts, or anti-bot blocks. First domain request: ask user confirmation (browser opens for auth). Subsequent same-domain: use automatically (session preserved). Returns HTML from authenticated Chrome session. Handles Microsoft, GitHub, AWS, Google, corporate sites."
|
|
96
96
|
};
|
|
97
97
|
|
|
98
98
|
// Write back to file with pretty formatting
|
package/package.json
CHANGED
package/server.json
CHANGED
package/src/mcp-browser.js
CHANGED
|
@@ -60,7 +60,7 @@ function getDefaultChromePaths() {
|
|
|
60
60
|
const defaultChromePaths = getDefaultChromePaths();
|
|
61
61
|
|
|
62
62
|
let cachedBrowser = null;
|
|
63
|
-
let
|
|
63
|
+
let domainPages = new Map(); // hostname -> page mapping for tab reuse across domains
|
|
64
64
|
let chromeLaunchPromise = null; // prevent multiple simultaneous launches
|
|
65
65
|
|
|
66
66
|
async function devtoolsAvailable() {
|
|
@@ -152,19 +152,16 @@ async function getBrowser() {
|
|
|
152
152
|
});
|
|
153
153
|
cachedBrowser.on("disconnected", () => {
|
|
154
154
|
cachedBrowser = null;
|
|
155
|
-
|
|
155
|
+
domainPages.clear(); // Clear all domain page mappings
|
|
156
156
|
});
|
|
157
157
|
return cachedBrowser;
|
|
158
158
|
}
|
|
159
159
|
|
|
160
|
-
async function fetchPage({
|
|
161
|
-
url,
|
|
162
|
-
keepPageOpen = true,
|
|
163
|
-
outputFormat = "HTML",
|
|
164
|
-
}) {
|
|
160
|
+
async function fetchPage({ url }) {
|
|
165
161
|
// Hardcoded smart defaults
|
|
166
162
|
const waitUntil = "networkidle0";
|
|
167
|
-
const
|
|
163
|
+
const navigationTimeout = 60000; // Initial navigation timeout
|
|
164
|
+
const authCompletionTimeout = 600000; // 10 minutes for user to complete authentication
|
|
168
165
|
const reuseLastKeptPage = true;
|
|
169
166
|
|
|
170
167
|
if (!url) {
|
|
@@ -173,36 +170,29 @@ async function fetchPage({
|
|
|
173
170
|
|
|
174
171
|
const browser = await getBrowser();
|
|
175
172
|
let page = null;
|
|
173
|
+
let hostname;
|
|
176
174
|
|
|
177
|
-
//
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
if (
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
} else {
|
|
195
|
-
// Different domain - close old tab and create new one
|
|
196
|
-
await lastKeptPage.close().catch(() => {});
|
|
197
|
-
lastKeptPage = null;
|
|
198
|
-
}
|
|
199
|
-
} catch {
|
|
200
|
-
// If URL parsing fails, create new tab
|
|
201
|
-
}
|
|
175
|
+
// Parse hostname for domain-based tab reuse
|
|
176
|
+
try {
|
|
177
|
+
hostname = new URL(url).hostname;
|
|
178
|
+
} catch {
|
|
179
|
+
throw new Error(`Invalid URL: ${url}`);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Check if we have an existing page for this domain
|
|
183
|
+
if (reuseLastKeptPage && domainPages.has(hostname)) {
|
|
184
|
+
const existingPage = domainPages.get(hostname);
|
|
185
|
+
if (!existingPage.isClosed()) {
|
|
186
|
+
page = existingPage;
|
|
187
|
+
await page.bringToFront().catch(() => {});
|
|
188
|
+
console.error(`[MCPBrowser] Reusing existing tab for domain: ${hostname}`);
|
|
189
|
+
} else {
|
|
190
|
+
// Page was closed externally, remove from map
|
|
191
|
+
domainPages.delete(hostname);
|
|
202
192
|
}
|
|
203
193
|
}
|
|
204
194
|
|
|
205
|
-
// Create new tab if no
|
|
195
|
+
// Create new tab if no existing page for this domain
|
|
206
196
|
if (!page) {
|
|
207
197
|
try {
|
|
208
198
|
page = await browser.newPage();
|
|
@@ -225,50 +215,85 @@ async function fetchPage({
|
|
|
225
215
|
throw new Error('Unable to create or find a controllable page');
|
|
226
216
|
}
|
|
227
217
|
}
|
|
218
|
+
// Add new page to domain map
|
|
219
|
+
domainPages.set(hostname, page);
|
|
220
|
+
console.error(`[MCPBrowser] Created new tab for domain: ${hostname}`);
|
|
228
221
|
}
|
|
229
222
|
|
|
230
|
-
let shouldKeepOpen =
|
|
223
|
+
let shouldKeepOpen = true;
|
|
231
224
|
let wasSuccess = false;
|
|
232
225
|
try {
|
|
233
226
|
console.error(`[MCPBrowser] Navigating to: ${url}`);
|
|
234
|
-
await page.goto(url, { waitUntil, timeout:
|
|
235
|
-
console.error(`[MCPBrowser] Navigation completed: ${page.url()}`);
|
|
227
|
+
await page.goto(url, { waitUntil, timeout: navigationTimeout });
|
|
236
228
|
|
|
237
|
-
|
|
238
|
-
const
|
|
229
|
+
const currentUrl = page.url();
|
|
230
|
+
const currentHostname = new URL(currentUrl).hostname;
|
|
239
231
|
|
|
240
|
-
|
|
241
|
-
const html = await page.evaluate(() => document.documentElement?.outerHTML || "");
|
|
242
|
-
result.html = truncate(html, 2000000);
|
|
243
|
-
}
|
|
232
|
+
console.error(`[MCPBrowser] Navigation completed: ${currentUrl}`);
|
|
244
233
|
|
|
245
|
-
if
|
|
246
|
-
|
|
247
|
-
|
|
234
|
+
// Check if we were redirected to a different domain (likely authentication)
|
|
235
|
+
if (currentHostname !== hostname) {
|
|
236
|
+
console.error(`[MCPBrowser] Detected redirect to authentication domain: ${currentHostname}`);
|
|
237
|
+
console.error(`[MCPBrowser] Waiting for user to complete authentication...`);
|
|
238
|
+
console.error(`[MCPBrowser] Will wait up to ${authCompletionTimeout / 1000} seconds for return to ${hostname}`);
|
|
239
|
+
|
|
240
|
+
// Wait for navigation back to the original domain
|
|
241
|
+
const authDeadline = Date.now() + authCompletionTimeout;
|
|
242
|
+
let authCompleted = false;
|
|
243
|
+
|
|
244
|
+
while (Date.now() < authDeadline) {
|
|
245
|
+
try {
|
|
246
|
+
// Check current URL
|
|
247
|
+
const checkUrl = page.url();
|
|
248
|
+
const checkHostname = new URL(checkUrl).hostname;
|
|
249
|
+
|
|
250
|
+
if (checkHostname === hostname) {
|
|
251
|
+
console.error(`[MCPBrowser] Authentication completed! Returned to: ${checkUrl}`);
|
|
252
|
+
authCompleted = true;
|
|
253
|
+
break;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// Wait a bit before checking again
|
|
257
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
258
|
+
} catch (error) {
|
|
259
|
+
// Page might be navigating, continue waiting
|
|
260
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
if (!authCompleted) {
|
|
265
|
+
const hint = `Authentication timeout. Tab is left open at ${page.url()}. Complete authentication and retry the same URL.`;
|
|
266
|
+
return { success: false, error: "Authentication timeout - user did not complete login", pageKeptOpen: true, hint };
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// Wait for page to fully stabilize after auth redirect
|
|
270
|
+
console.error(`[MCPBrowser] Waiting for page to stabilize after authentication...`);
|
|
271
|
+
await new Promise(resolve => setTimeout(resolve, 3000)); // Give page time to settle
|
|
272
|
+
|
|
273
|
+
// Ensure page is ready
|
|
274
|
+
try {
|
|
275
|
+
await page.waitForFunction(() => document.readyState === 'complete', { timeout: 10000 });
|
|
276
|
+
} catch {
|
|
277
|
+
// Ignore timeout - page might already be ready
|
|
278
|
+
}
|
|
248
279
|
}
|
|
249
280
|
|
|
281
|
+
// Extract HTML content
|
|
282
|
+
const html = await page.evaluate(() => document.documentElement?.outerHTML || "");
|
|
283
|
+
const preparedHtml = prepareHtml(html, page.url());
|
|
284
|
+
const result = {
|
|
285
|
+
success: true,
|
|
286
|
+
url: page.url(),
|
|
287
|
+
html: preparedHtml
|
|
288
|
+
};
|
|
289
|
+
|
|
250
290
|
wasSuccess = true;
|
|
251
|
-
if (keepPageOpen && lastKeptPage !== page) {
|
|
252
|
-
// Close old kept page if we're keeping a different one
|
|
253
|
-
if (lastKeptPage && !lastKeptPage.isClosed()) {
|
|
254
|
-
await lastKeptPage.close().catch(() => {});
|
|
255
|
-
}
|
|
256
|
-
lastKeptPage = page;
|
|
257
|
-
}
|
|
258
291
|
return result;
|
|
259
292
|
} catch (err) {
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
? "Tab is left open. Complete sign-in there, then call fetch_webpage_protected again with just the URL."
|
|
263
|
-
: undefined;
|
|
264
|
-
return { success: false, error: err.message || String(err), pageKeptOpen: shouldKeepOpen, hint };
|
|
293
|
+
const hint = "Tab is left open. Complete sign-in there, then call fetch_webpage_protected again with just the URL.";
|
|
294
|
+
return { success: false, error: err.message || String(err), pageKeptOpen: true, hint };
|
|
265
295
|
} finally {
|
|
266
|
-
|
|
267
|
-
lastKeptPage = null;
|
|
268
|
-
}
|
|
269
|
-
if (!shouldKeepOpen) {
|
|
270
|
-
await page.close().catch(() => {});
|
|
271
|
-
}
|
|
296
|
+
// Tab always stays open - domain-aware reuse handles cleanup
|
|
272
297
|
}
|
|
273
298
|
}
|
|
274
299
|
|
|
@@ -277,19 +302,107 @@ function truncate(str, max) {
|
|
|
277
302
|
return str.length > max ? `${str.slice(0, max)}... [truncated]` : str;
|
|
278
303
|
}
|
|
279
304
|
|
|
305
|
+
/**
|
|
306
|
+
* Prepares HTML for consumption by:
|
|
307
|
+
* 1. Converting relative URLs to absolute URLs
|
|
308
|
+
* 2. Removing non-content elements (scripts, styles, meta tags, comments)
|
|
309
|
+
* 3. Removing code-related attributes (class, id, style, data-*, event handlers)
|
|
310
|
+
* 4. Removing SVG graphics and other non-text elements
|
|
311
|
+
* 5. Collapsing excessive whitespace
|
|
312
|
+
*/
|
|
313
|
+
function prepareHtml(html, baseUrl) {
|
|
314
|
+
if (!html) return "";
|
|
315
|
+
|
|
316
|
+
let cleaned = html;
|
|
317
|
+
|
|
318
|
+
// Remove HTML comments
|
|
319
|
+
cleaned = cleaned.replace(/<!--[\s\S]*?-->/g, '');
|
|
320
|
+
|
|
321
|
+
// Remove script tags and their content
|
|
322
|
+
cleaned = cleaned.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '');
|
|
323
|
+
|
|
324
|
+
// Remove style tags and their content
|
|
325
|
+
cleaned = cleaned.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '');
|
|
326
|
+
|
|
327
|
+
// Remove noscript tags and their content
|
|
328
|
+
cleaned = cleaned.replace(/<noscript\b[^<]*(?:(?!<\/noscript>)<[^<]*)*<\/noscript>/gi, '');
|
|
329
|
+
|
|
330
|
+
// Remove SVG tags and their content (often large, not useful for text)
|
|
331
|
+
cleaned = cleaned.replace(/<svg\b[^<]*(?:(?!<\/svg>)<[^<]*)*<\/svg>/gi, '');
|
|
332
|
+
|
|
333
|
+
// Remove meta tags
|
|
334
|
+
cleaned = cleaned.replace(/<meta\b[^>]*>/gi, '');
|
|
335
|
+
|
|
336
|
+
// Remove link tags (stylesheets, preload, etc.)
|
|
337
|
+
cleaned = cleaned.replace(/<link\b[^>]*>/gi, '');
|
|
338
|
+
|
|
339
|
+
// Convert relative URLs to absolute in href attributes
|
|
340
|
+
cleaned = cleaned.replace(/href=["']([^"']+)["']/gi, (match, url) => {
|
|
341
|
+
if (!url || url.startsWith('http://') || url.startsWith('https://') || url.startsWith('//') || url.startsWith('#') || url.startsWith('mailto:') || url.startsWith('tel:')) {
|
|
342
|
+
return match;
|
|
343
|
+
}
|
|
344
|
+
try {
|
|
345
|
+
const absoluteUrl = new URL(url, baseUrl).href;
|
|
346
|
+
return `href="${absoluteUrl}"`;
|
|
347
|
+
} catch {
|
|
348
|
+
return match;
|
|
349
|
+
}
|
|
350
|
+
});
|
|
351
|
+
|
|
352
|
+
// Convert relative URLs to absolute in src attributes
|
|
353
|
+
cleaned = cleaned.replace(/src=["']([^"']+)["']/gi, (match, url) => {
|
|
354
|
+
if (!url || url.startsWith('http://') || url.startsWith('https://') || url.startsWith('//') || url.startsWith('data:')) {
|
|
355
|
+
return match;
|
|
356
|
+
}
|
|
357
|
+
try {
|
|
358
|
+
const absoluteUrl = new URL(url, baseUrl).href;
|
|
359
|
+
return `src="${absoluteUrl}"`;
|
|
360
|
+
} catch {
|
|
361
|
+
return match;
|
|
362
|
+
}
|
|
363
|
+
});
|
|
364
|
+
|
|
365
|
+
// Remove inline style attributes
|
|
366
|
+
cleaned = cleaned.replace(/\s+style=["'][^"']*["']/gi, '');
|
|
367
|
+
|
|
368
|
+
// Remove class attributes
|
|
369
|
+
cleaned = cleaned.replace(/\s+class=["'][^"']*["']/gi, '');
|
|
370
|
+
|
|
371
|
+
// Remove id attributes
|
|
372
|
+
cleaned = cleaned.replace(/\s+id=["'][^"']*["']/gi, '');
|
|
373
|
+
|
|
374
|
+
// Remove data-* attributes
|
|
375
|
+
cleaned = cleaned.replace(/\s+data-[a-z0-9-]+=["'][^"']*["']/gi, '');
|
|
376
|
+
|
|
377
|
+
// Remove event handler attributes (onclick, onload, etc.)
|
|
378
|
+
cleaned = cleaned.replace(/\s+on[a-z]+\s*=\s*["'][^"']*["']/gi, '');
|
|
379
|
+
|
|
380
|
+
// Remove role attributes
|
|
381
|
+
cleaned = cleaned.replace(/\s+role=["'][^"']*["']/gi, '');
|
|
382
|
+
|
|
383
|
+
// Remove aria-* attributes
|
|
384
|
+
cleaned = cleaned.replace(/\s+aria-[a-z0-9-]+=["'][^"']*["']/gi, '');
|
|
385
|
+
|
|
386
|
+
// Collapse multiple whitespace/newlines into single space
|
|
387
|
+
cleaned = cleaned.replace(/\s+/g, ' ');
|
|
388
|
+
|
|
389
|
+
// Remove spaces between tags
|
|
390
|
+
cleaned = cleaned.replace(/>\s+</g, '><');
|
|
391
|
+
|
|
392
|
+
return cleaned;
|
|
393
|
+
}
|
|
394
|
+
|
|
280
395
|
async function main() {
|
|
281
|
-
const server = new Server({ name: "MCPBrowser", version: "0.2.
|
|
396
|
+
const server = new Server({ name: "MCPBrowser", version: "0.2.19" }, { capabilities: { tools: {} } });
|
|
282
397
|
|
|
283
398
|
const tools = [
|
|
284
399
|
{
|
|
285
400
|
name: "fetch_webpage_protected",
|
|
286
|
-
description: "
|
|
401
|
+
description: "Fetches protected web pages using Chrome browser with user's authenticated session (saved passwords, active logins, cookies). Use for: login-required pages, SSO/OAuth, paywalled sites, 401/403 errors, anti-crawler blocks.\n\nAUTH FLOW: If page redirects to login, function WAITS (up to 10 min) for user to authenticate in browser, then automatically returns actual content once auth completes. Single call returns correct content, no retry needed.\n\nRULES: (1) ONE URL at a time, never parallel. (2) Wait for full response - may take minutes for auth. (3) Skip only if 404. (4) Returns HTML with clickable links for subpage navigation.",
|
|
287
402
|
inputSchema: {
|
|
288
403
|
type: "object",
|
|
289
404
|
properties: {
|
|
290
405
|
url: { type: "string", description: "The URL to fetch" },
|
|
291
|
-
keepPageOpen: { type: "boolean", description: "Keep tab open to reuse for subsequent same-domain requests - preserves auth session (default: true)" },
|
|
292
|
-
outputFormat: { type: "string", enum: ["HTML", "TEXT", "BOTH"], description: "Output format: HTML for full markup with links/structure, TEXT for clean readable content (more token-efficient), BOTH for complete data (default: HTML)" },
|
|
293
406
|
},
|
|
294
407
|
required: ["url"],
|
|
295
408
|
additionalProperties: false,
|
|
@@ -339,7 +452,13 @@ async function main() {
|
|
|
339
452
|
await server.connect(transport);
|
|
340
453
|
}
|
|
341
454
|
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
455
|
+
// Export for testing
|
|
456
|
+
export { fetchPage, getBrowser, prepareHtml };
|
|
457
|
+
|
|
458
|
+
// Only run main if this is the entry point
|
|
459
|
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
460
|
+
main().catch((err) => {
|
|
461
|
+
console.error(err);
|
|
462
|
+
process.exit(1);
|
|
463
|
+
});
|
|
464
|
+
}
|
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* UNIT TESTS - Automated tests using mock objects (NO browser required)
|
|
3
|
+
* These tests validate domain pooling logic without opening Chrome
|
|
4
|
+
* Run with: node tests/domain-tab-pooling.test.js
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
// Mock domain pages map and browser
|
|
8
|
+
class MockPage {
|
|
9
|
+
constructor(url) {
|
|
10
|
+
this._url = url;
|
|
11
|
+
this._closed = false;
|
|
12
|
+
this._content = '';
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
url() { return this._url; }
|
|
16
|
+
isClosed() { return this._closed; }
|
|
17
|
+
close() { this._closed = true; }
|
|
18
|
+
async bringToFront() {}
|
|
19
|
+
async goto(url) {
|
|
20
|
+
this._url = url;
|
|
21
|
+
// Simulate eng.ms page with multiple same-domain links
|
|
22
|
+
if (url.includes('eng.ms/docs/products/geneva')) {
|
|
23
|
+
this._content = `
|
|
24
|
+
<html>
|
|
25
|
+
<body>
|
|
26
|
+
<h1>Geneva Documentation</h1>
|
|
27
|
+
<a href="https://eng.ms/docs/products/geneva/getting-started">Getting Started</a>
|
|
28
|
+
<a href="https://eng.ms/docs/products/geneva/configuration">Configuration</a>
|
|
29
|
+
<a href="https://eng.ms/docs/products/geneva/monitoring">Monitoring</a>
|
|
30
|
+
<a href="https://eng.ms/docs/products/geneva/alerts">Alerts</a>
|
|
31
|
+
<a href="https://eng.ms/docs/products/geneva/best-practices">Best Practices</a>
|
|
32
|
+
<a href="https://external.com/link">External Link</a>
|
|
33
|
+
</body>
|
|
34
|
+
</html>
|
|
35
|
+
`;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
async evaluate(fn) {
|
|
39
|
+
if (this._content) {
|
|
40
|
+
return fn.toString().includes('outerHTML') ? this._content : fn();
|
|
41
|
+
}
|
|
42
|
+
return fn();
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
class MockBrowser {
|
|
47
|
+
constructor() {
|
|
48
|
+
this._pages = [];
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
async newPage() {
|
|
52
|
+
const page = new MockPage('about:blank');
|
|
53
|
+
this._pages.push(page);
|
|
54
|
+
return page;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
async pages() {
|
|
58
|
+
return this._pages;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Test framework
|
|
63
|
+
let testsPassed = 0;
|
|
64
|
+
let testsFailed = 0;
|
|
65
|
+
|
|
66
|
+
function assert(condition, message) {
|
|
67
|
+
if (!condition) {
|
|
68
|
+
console.error(`❌ FAILED: ${message}`);
|
|
69
|
+
testsFailed++;
|
|
70
|
+
throw new Error(message);
|
|
71
|
+
} else {
|
|
72
|
+
console.log(`✅ PASSED: ${message}`);
|
|
73
|
+
testsPassed++;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
async function test(name, fn) {
|
|
78
|
+
console.log(`\n🧪 Test: ${name}`);
|
|
79
|
+
try {
|
|
80
|
+
await fn();
|
|
81
|
+
} catch (error) {
|
|
82
|
+
console.error(` Error: ${error.message}`);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Tests
|
|
87
|
+
async function runTests() {
|
|
88
|
+
console.log('🚀 Starting Domain Tab Pooling Tests\n');
|
|
89
|
+
|
|
90
|
+
await test('Should create new tab for first domain', async () => {
|
|
91
|
+
const domainPages = new Map();
|
|
92
|
+
const browser = new MockBrowser();
|
|
93
|
+
const url = 'https://github.com/user/repo';
|
|
94
|
+
const hostname = new URL(url).hostname;
|
|
95
|
+
|
|
96
|
+
// No existing page for this domain
|
|
97
|
+
assert(!domainPages.has(hostname), 'Domain should not exist in map initially');
|
|
98
|
+
|
|
99
|
+
// Create new page
|
|
100
|
+
const page = await browser.newPage();
|
|
101
|
+
domainPages.set(hostname, page);
|
|
102
|
+
|
|
103
|
+
assert(domainPages.has(hostname), 'Domain should be added to map');
|
|
104
|
+
assert(domainPages.get(hostname) === page, 'Correct page should be stored');
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
await test('Should reuse tab for same domain', async () => {
|
|
108
|
+
const domainPages = new Map();
|
|
109
|
+
const browser = new MockBrowser();
|
|
110
|
+
const hostname = 'github.com';
|
|
111
|
+
|
|
112
|
+
// Create first page for domain
|
|
113
|
+
const page1 = await browser.newPage();
|
|
114
|
+
await page1.goto('https://github.com/repo1');
|
|
115
|
+
domainPages.set(hostname, page1);
|
|
116
|
+
|
|
117
|
+
// Try to fetch another URL from same domain
|
|
118
|
+
const existingPage = domainPages.get(hostname);
|
|
119
|
+
assert(existingPage === page1, 'Should return same page for same domain');
|
|
120
|
+
assert(!existingPage.isClosed(), 'Page should still be open');
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
await test('Should create new tab for different domain', async () => {
|
|
124
|
+
const domainPages = new Map();
|
|
125
|
+
const browser = new MockBrowser();
|
|
126
|
+
|
|
127
|
+
// First domain
|
|
128
|
+
const page1 = await browser.newPage();
|
|
129
|
+
await page1.goto('https://github.com/repo');
|
|
130
|
+
domainPages.set('github.com', page1);
|
|
131
|
+
|
|
132
|
+
// Second domain - should create new tab
|
|
133
|
+
const hostname2 = 'microsoft.com';
|
|
134
|
+
assert(!domainPages.has(hostname2), 'Second domain should not exist yet');
|
|
135
|
+
|
|
136
|
+
const page2 = await browser.newPage();
|
|
137
|
+
await page2.goto('https://microsoft.com/docs');
|
|
138
|
+
domainPages.set(hostname2, page2);
|
|
139
|
+
|
|
140
|
+
assert(domainPages.has('github.com'), 'First domain should still exist');
|
|
141
|
+
assert(domainPages.has('microsoft.com'), 'Second domain should now exist');
|
|
142
|
+
assert(page1 !== page2, 'Should be different page objects');
|
|
143
|
+
assert(!page1.isClosed(), 'First page should still be open');
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
await test('Should reuse tab when returning to previous domain', async () => {
|
|
147
|
+
const domainPages = new Map();
|
|
148
|
+
const browser = new MockBrowser();
|
|
149
|
+
|
|
150
|
+
// Domain 1
|
|
151
|
+
const page1 = await browser.newPage();
|
|
152
|
+
domainPages.set('github.com', page1);
|
|
153
|
+
|
|
154
|
+
// Domain 2
|
|
155
|
+
const page2 = await browser.newPage();
|
|
156
|
+
domainPages.set('microsoft.com', page2);
|
|
157
|
+
|
|
158
|
+
// Back to domain 1
|
|
159
|
+
const reusedPage = domainPages.get('github.com');
|
|
160
|
+
assert(reusedPage === page1, 'Should reuse original page for domain 1');
|
|
161
|
+
assert(!reusedPage.isClosed(), 'Reused page should still be open');
|
|
162
|
+
assert(domainPages.size === 2, 'Should have 2 domains in map');
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
await test('Should handle closed tabs gracefully', async () => {
|
|
166
|
+
const domainPages = new Map();
|
|
167
|
+
const browser = new MockBrowser();
|
|
168
|
+
const hostname = 'github.com';
|
|
169
|
+
|
|
170
|
+
// Create and store page
|
|
171
|
+
const page = await browser.newPage();
|
|
172
|
+
domainPages.set(hostname, page);
|
|
173
|
+
|
|
174
|
+
// Simulate user closing the tab
|
|
175
|
+
page.close();
|
|
176
|
+
|
|
177
|
+
// Check if page is closed
|
|
178
|
+
const existingPage = domainPages.get(hostname);
|
|
179
|
+
if (existingPage && existingPage.isClosed()) {
|
|
180
|
+
domainPages.delete(hostname);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
assert(!domainPages.has(hostname), 'Closed page should be removed from map');
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
await test('Should extract hostname correctly from URLs', async () => {
|
|
187
|
+
const testCases = [
|
|
188
|
+
{ url: 'https://github.com/user/repo', expected: 'github.com' },
|
|
189
|
+
{ url: 'https://microsoft.com/docs/page', expected: 'microsoft.com' },
|
|
190
|
+
{ url: 'https://subdomain.example.com/path', expected: 'subdomain.example.com' },
|
|
191
|
+
{ url: 'http://localhost:3000/test', expected: 'localhost' },
|
|
192
|
+
];
|
|
193
|
+
|
|
194
|
+
for (const { url, expected } of testCases) {
|
|
195
|
+
const hostname = new URL(url).hostname;
|
|
196
|
+
assert(hostname === expected, `Hostname for ${url} should be ${expected}, got ${hostname}`);
|
|
197
|
+
}
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
await test('Should handle invalid URLs', async () => {
|
|
201
|
+
let errorThrown = false;
|
|
202
|
+
try {
|
|
203
|
+
new URL('not-a-valid-url');
|
|
204
|
+
} catch (error) {
|
|
205
|
+
errorThrown = true;
|
|
206
|
+
}
|
|
207
|
+
assert(errorThrown, 'Invalid URL should throw error');
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
await test('Should clear all pages on browser disconnect', async () => {
|
|
211
|
+
const domainPages = new Map();
|
|
212
|
+
const browser = new MockBrowser();
|
|
213
|
+
|
|
214
|
+
// Add multiple domains
|
|
215
|
+
const page1 = await browser.newPage();
|
|
216
|
+
domainPages.set('github.com', page1);
|
|
217
|
+
|
|
218
|
+
const page2 = await browser.newPage();
|
|
219
|
+
domainPages.set('microsoft.com', page2);
|
|
220
|
+
|
|
221
|
+
const page3 = await browser.newPage();
|
|
222
|
+
domainPages.set('google.com', page3);
|
|
223
|
+
|
|
224
|
+
assert(domainPages.size === 3, 'Should have 3 domains before disconnect');
|
|
225
|
+
|
|
226
|
+
// Simulate browser disconnect
|
|
227
|
+
domainPages.clear();
|
|
228
|
+
|
|
229
|
+
assert(domainPages.size === 0, 'All domains should be cleared after disconnect');
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
await test('Should handle multiple requests to same domain', async () => {
|
|
233
|
+
const domainPages = new Map();
|
|
234
|
+
const browser = new MockBrowser();
|
|
235
|
+
const hostname = 'github.com';
|
|
236
|
+
|
|
237
|
+
// First request
|
|
238
|
+
const page = await browser.newPage();
|
|
239
|
+
await page.goto('https://github.com/repo1');
|
|
240
|
+
domainPages.set(hostname, page);
|
|
241
|
+
|
|
242
|
+
// Multiple subsequent requests to same domain
|
|
243
|
+
for (let i = 2; i <= 5; i++) {
|
|
244
|
+
const existingPage = domainPages.get(hostname);
|
|
245
|
+
assert(existingPage === page, `Request ${i} should reuse same page`);
|
|
246
|
+
await existingPage.goto(`https://github.com/repo${i}`);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
assert(domainPages.size === 1, 'Should still have only 1 domain in map');
|
|
250
|
+
});
|
|
251
|
+
|
|
252
|
+
await test('Should open internal eng.ms page', async () => {
|
|
253
|
+
const domainPages = new Map();
|
|
254
|
+
const browser = new MockBrowser();
|
|
255
|
+
const url = 'https://eng.ms/docs/products/geneva';
|
|
256
|
+
const hostname = new URL(url).hostname;
|
|
257
|
+
|
|
258
|
+
// First request to eng.ms domain
|
|
259
|
+
assert(!domainPages.has(hostname), 'eng.ms domain should not exist initially');
|
|
260
|
+
|
|
261
|
+
const page = await browser.newPage();
|
|
262
|
+
await page.goto(url);
|
|
263
|
+
domainPages.set(hostname, page);
|
|
264
|
+
|
|
265
|
+
assert(domainPages.has(hostname), 'eng.ms domain should be added to map');
|
|
266
|
+
assert(page.url() === url, 'Page URL should match requested URL');
|
|
267
|
+
assert(!page.isClosed(), 'Page should remain open');
|
|
268
|
+
});
|
|
269
|
+
|
|
270
|
+
await test('Should extract and load 5 URLs from same domain', async () => {
|
|
271
|
+
const domainPages = new Map();
|
|
272
|
+
const browser = new MockBrowser();
|
|
273
|
+
const initialUrl = 'https://eng.ms/docs/products/geneva';
|
|
274
|
+
const hostname = new URL(initialUrl).hostname;
|
|
275
|
+
|
|
276
|
+
// First: Load the initial page
|
|
277
|
+
const page = await browser.newPage();
|
|
278
|
+
await page.goto(initialUrl);
|
|
279
|
+
domainPages.set(hostname, page);
|
|
280
|
+
|
|
281
|
+
// Extract HTML content
|
|
282
|
+
const html = await page.evaluate(() => document.documentElement.outerHTML);
|
|
283
|
+
assert(html.includes('Geneva Documentation'), 'Page should contain Geneva content');
|
|
284
|
+
|
|
285
|
+
// Extract URLs from the same domain
|
|
286
|
+
const urlPattern = /href="(https:\/\/eng\.ms\/[^"]+)"/g;
|
|
287
|
+
const extractedUrls = [];
|
|
288
|
+
let match;
|
|
289
|
+
while ((match = urlPattern.exec(html)) !== null && extractedUrls.length < 5) {
|
|
290
|
+
extractedUrls.push(match[1]);
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
assert(extractedUrls.length === 5, `Should extract 5 URLs, got ${extractedUrls.length}`);
|
|
294
|
+
|
|
295
|
+
// Verify all URLs are from eng.ms domain
|
|
296
|
+
for (const url of extractedUrls) {
|
|
297
|
+
const urlHostname = new URL(url).hostname;
|
|
298
|
+
assert(urlHostname === hostname, `All URLs should be from ${hostname}, got ${urlHostname}`);
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
// Load each of the 5 URLs and verify tab reuse
|
|
302
|
+
const reusedPage = domainPages.get(hostname);
|
|
303
|
+
assert(reusedPage === page, 'Should reuse same page for same domain');
|
|
304
|
+
|
|
305
|
+
for (let i = 0; i < extractedUrls.length; i++) {
|
|
306
|
+
await reusedPage.goto(extractedUrls[i]);
|
|
307
|
+
assert(reusedPage.url() === extractedUrls[i], `URL ${i+1} should be loaded: ${extractedUrls[i]}`);
|
|
308
|
+
assert(!reusedPage.isClosed(), `Page should remain open after loading URL ${i+1}`);
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
assert(domainPages.size === 1, 'Should still have only 1 domain (eng.ms) in map after all loads');
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
// Summary
|
|
315
|
+
console.log('\n' + '='.repeat(50));
|
|
316
|
+
console.log(`✅ Tests Passed: ${testsPassed}`);
|
|
317
|
+
console.log(`❌ Tests Failed: ${testsFailed}`);
|
|
318
|
+
console.log('='.repeat(50));
|
|
319
|
+
|
|
320
|
+
if (testsFailed > 0) {
|
|
321
|
+
process.exit(1);
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// Run tests
|
|
326
|
+
runTests().catch(error => {
|
|
327
|
+
console.error('Test suite failed:', error);
|
|
328
|
+
process.exit(1);
|
|
329
|
+
});
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Integration tests - REQUIRES REAL CHROME AND USER AUTHENTICATION
|
|
3
|
+
* These tests will actually open Chrome browser and require manual login
|
|
4
|
+
* Run with: node tests/integration.test.js
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { fileURLToPath } from 'url';
|
|
8
|
+
import path from 'path';
|
|
9
|
+
import { fetchPage } from '../src/mcp-browser.js';
|
|
10
|
+
|
|
11
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
12
|
+
const __dirname = path.dirname(__filename);
|
|
13
|
+
|
|
14
|
+
// Test framework
|
|
15
|
+
let testsPassed = 0;
|
|
16
|
+
let testsFailed = 0;
|
|
17
|
+
|
|
18
|
+
function assert(condition, message) {
|
|
19
|
+
if (!condition) {
|
|
20
|
+
console.error(`❌ FAILED: ${message}`);
|
|
21
|
+
testsFailed++;
|
|
22
|
+
throw new Error(message);
|
|
23
|
+
} else {
|
|
24
|
+
console.log(`✅ PASSED: ${message}`);
|
|
25
|
+
testsPassed++;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
async function test(name, fn) {
|
|
30
|
+
console.log(`\n🧪 Test: ${name}`);
|
|
31
|
+
try {
|
|
32
|
+
await fn();
|
|
33
|
+
} catch (error) {
|
|
34
|
+
console.error(` Error: ${error.message}`);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Integration Tests
|
|
39
|
+
async function runIntegrationTests() {
|
|
40
|
+
console.log('🚀 Starting Integration Tests (REAL CHROME)\n');
|
|
41
|
+
console.log('⚠️ This will open Chrome browser and may require authentication');
|
|
42
|
+
console.log('⚠️ fetchPage function will WAIT for you to complete authentication\n');
|
|
43
|
+
|
|
44
|
+
try {
|
|
45
|
+
await test('Should fetch eng.ms page, extract links, and load them (full Copilot workflow)', async () => {
|
|
46
|
+
const url = 'https://eng.ms/docs/products/geneva';
|
|
47
|
+
|
|
48
|
+
// Step 1: Fetch initial page (with auth waiting)
|
|
49
|
+
console.log(` 📄 Step 1: Fetching ${url}`);
|
|
50
|
+
console.log(` ⏳ Function will wait up to 10 minutes for authentication...`);
|
|
51
|
+
console.log(` 💡 Complete login in the browser that opens`);
|
|
52
|
+
|
|
53
|
+
const result = await fetchPage({ url });
|
|
54
|
+
|
|
55
|
+
console.log(` ✅ Result: ${result.success ? 'SUCCESS' : 'FAILED'}`);
|
|
56
|
+
if (result.success) {
|
|
57
|
+
console.log(` 🔗 Final URL: ${result.url}`);
|
|
58
|
+
console.log(` 📄 HTML length: ${result.html?.length || 0} chars`);
|
|
59
|
+
} else {
|
|
60
|
+
console.log(` ❌ Error: ${result.error}`);
|
|
61
|
+
console.log(` 💡 Hint: ${result.hint}`);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
assert(result.success, 'Should successfully fetch page after authentication');
|
|
65
|
+
assert(result.url.includes('eng.ms'), `URL should be from eng.ms domain, got: ${result.url}`);
|
|
66
|
+
assert(result.html && result.html.length > 0, 'Should return HTML content');
|
|
67
|
+
|
|
68
|
+
// Step 2: Extract ALL links from HTML, then pick 5 randomly
|
|
69
|
+
console.log(`\n 📋 Step 2: Extracting all links from HTML...`);
|
|
70
|
+
|
|
71
|
+
const baseUrl = new URL(result.url);
|
|
72
|
+
const urlPattern = /href=["']([^"']+)["']/g;
|
|
73
|
+
const allUrls = [];
|
|
74
|
+
let match;
|
|
75
|
+
|
|
76
|
+
// Static asset extensions to skip
|
|
77
|
+
const skipExtensions = ['.css', '.js', '.ico', '.png', '.jpg', '.jpeg', '.gif', '.svg', '.woff', '.woff2', '.ttf', '.eot'];
|
|
78
|
+
|
|
79
|
+
// Extract ALL URLs first
|
|
80
|
+
while ((match = urlPattern.exec(result.html)) !== null) {
|
|
81
|
+
let foundUrl = match[1];
|
|
82
|
+
|
|
83
|
+
// Skip anchor links
|
|
84
|
+
if (foundUrl.includes('#')) continue;
|
|
85
|
+
|
|
86
|
+
// Convert relative URLs to absolute
|
|
87
|
+
if (foundUrl.startsWith('/')) {
|
|
88
|
+
foundUrl = `${baseUrl.origin}${foundUrl}`;
|
|
89
|
+
} else if (!foundUrl.startsWith('http')) {
|
|
90
|
+
continue; // Skip other relative URLs
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Skip static assets (check path without query string)
|
|
94
|
+
const urlWithoutQuery = foundUrl.split('?')[0];
|
|
95
|
+
if (skipExtensions.some(ext => urlWithoutQuery.toLowerCase().endsWith(ext))) continue;
|
|
96
|
+
|
|
97
|
+
// Only include eng.ms URLs (pages)
|
|
98
|
+
if (foundUrl.includes('eng.ms')) {
|
|
99
|
+
allUrls.push(foundUrl);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
console.log(` 📊 Total page URLs found: ${allUrls.length}`);
|
|
104
|
+
|
|
105
|
+
// Remove duplicates
|
|
106
|
+
const uniqueUrls = [...new Set(allUrls)];
|
|
107
|
+
console.log(` 🔗 Unique page URLs: ${uniqueUrls.length}`);
|
|
108
|
+
|
|
109
|
+
// Randomly pick 5 URLs
|
|
110
|
+
const shuffled = uniqueUrls.sort(() => Math.random() - 0.5);
|
|
111
|
+
const extractedUrls = shuffled.slice(0, 5);
|
|
112
|
+
|
|
113
|
+
console.log(` 🎲 Randomly selected ${extractedUrls.length} URLs to test:`);
|
|
114
|
+
extractedUrls.forEach((link, i) => console.log(` ${i+1}. ${link}`));
|
|
115
|
+
|
|
116
|
+
assert(extractedUrls.length > 0, `Should extract at least one eng.ms URL, found ${extractedUrls.length}`);
|
|
117
|
+
|
|
118
|
+
// Step 3: Load each extracted URL (tab reuse)
|
|
119
|
+
console.log(`\n 🔄 Step 3: Loading extracted links (using same tab)...`);
|
|
120
|
+
|
|
121
|
+
const linksToTest = extractedUrls.slice(0, Math.min(5, extractedUrls.length));
|
|
122
|
+
for (let i = 0; i < linksToTest.length; i++) {
|
|
123
|
+
const link = linksToTest[i];
|
|
124
|
+
console.log(` 📄 Loading link ${i+1}/${linksToTest.length}: ${link}`);
|
|
125
|
+
|
|
126
|
+
const linkResult = await fetchPage({ url: link });
|
|
127
|
+
|
|
128
|
+
console.log(` ✅ Loaded: ${linkResult.url}`);
|
|
129
|
+
assert(linkResult.success, `Should successfully load link ${i+1}: ${link}`);
|
|
130
|
+
assert(linkResult.html && linkResult.html.length > 0, `Link ${i+1} should return HTML content`);
|
|
131
|
+
}
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
} catch (error) {
|
|
135
|
+
console.error('\n❌ Test suite error:', error.message);
|
|
136
|
+
testsFailed++;
|
|
137
|
+
} finally {
|
|
138
|
+
// Summary
|
|
139
|
+
console.log('\n' + '='.repeat(50));
|
|
140
|
+
console.log(`✅ Tests Passed: ${testsPassed}`);
|
|
141
|
+
console.log(`❌ Tests Failed: ${testsFailed}`);
|
|
142
|
+
console.log('='.repeat(50));
|
|
143
|
+
console.log('\n💡 Browser left open for manual inspection');
|
|
144
|
+
|
|
145
|
+
if (testsFailed > 0) {
|
|
146
|
+
process.exit(1);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Exit immediately without waiting for browser
|
|
150
|
+
process.exit(0);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Run tests
|
|
155
|
+
runIntegrationTests().catch(error => {
|
|
156
|
+
console.error('Test suite failed:', error);
|
|
157
|
+
process.exit(1);
|
|
158
|
+
});
|
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
import assert from 'assert';
|
|
2
|
+
import { prepareHtml } from '../src/mcp-browser.js';
|
|
3
|
+
|
|
4
|
+
console.log('🧪 Testing prepareHtml function\n');
|
|
5
|
+
|
|
6
|
+
let testsPassed = 0;
|
|
7
|
+
let testsFailed = 0;
|
|
8
|
+
|
|
9
|
+
function test(description, fn) {
|
|
10
|
+
try {
|
|
11
|
+
fn();
|
|
12
|
+
console.log(`✅ ${description}`);
|
|
13
|
+
testsPassed++;
|
|
14
|
+
} catch (err) {
|
|
15
|
+
console.log(`❌ ${description}`);
|
|
16
|
+
console.log(` Error: ${err.message}`);
|
|
17
|
+
testsFailed++;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// Test 1: Remove HTML comments
|
|
22
|
+
test('Should remove HTML comments', () => {
|
|
23
|
+
const html = '<div>Content<!-- This is a comment --></div>';
|
|
24
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
25
|
+
assert(!result.includes('<!--'), 'Should not contain comment start');
|
|
26
|
+
assert(!result.includes('-->'), 'Should not contain comment end');
|
|
27
|
+
assert(result.includes('Content'), 'Should preserve content');
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
// Test 2: Remove script tags
|
|
31
|
+
test('Should remove script tags and their content', () => {
|
|
32
|
+
const html = '<div>Keep this</div><script>alert("remove");</script><div>And this</div>';
|
|
33
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
34
|
+
assert(!result.includes('<script'), 'Should not contain script tag');
|
|
35
|
+
assert(!result.includes('alert'), 'Should not contain script content');
|
|
36
|
+
assert(result.includes('Keep this'), 'Should preserve content');
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
// Test 3: Remove style tags
|
|
40
|
+
test('Should remove style tags and their content', () => {
|
|
41
|
+
const html = '<div>Content</div><style>.class { color: red; }</style>';
|
|
42
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
43
|
+
assert(!result.includes('<style'), 'Should not contain style tag');
|
|
44
|
+
assert(!result.includes('color: red'), 'Should not contain style content');
|
|
45
|
+
assert(result.includes('Content'), 'Should preserve content');
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
// Test 4: Remove meta tags
|
|
49
|
+
test('Should remove meta tags', () => {
|
|
50
|
+
const html = '<head><meta charset="utf-8"><meta name="viewport" content="width=device-width"></head><body>Content</body>';
|
|
51
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
52
|
+
assert(!result.includes('<meta'), 'Should not contain meta tags');
|
|
53
|
+
assert(result.includes('Content'), 'Should preserve content');
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
// Test 5: Convert relative URLs in href
|
|
57
|
+
test('Should convert relative href URLs to absolute', () => {
|
|
58
|
+
const html = '<a href="/docs/page">Link</a>';
|
|
59
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
60
|
+
assert(result.includes('href="https://example.com/docs/page"'), 'Should convert relative href to absolute');
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
// Test 6: Keep absolute URLs in href unchanged
|
|
64
|
+
test('Should keep absolute href URLs unchanged', () => {
|
|
65
|
+
const html = '<a href="https://other.com/page">Link</a>';
|
|
66
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
67
|
+
assert(result.includes('href="https://other.com/page"'), 'Should keep absolute href unchanged');
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
// Test 7: Convert relative URLs in src
|
|
71
|
+
test('Should convert relative src URLs to absolute', () => {
|
|
72
|
+
const html = '<img src="/images/logo.png">';
|
|
73
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
74
|
+
assert(result.includes('src="https://example.com/images/logo.png"'), 'Should convert relative src to absolute');
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
// Test 8: Keep absolute URLs in src unchanged
|
|
78
|
+
test('Should keep absolute src URLs unchanged', () => {
|
|
79
|
+
const html = '<img src="https://cdn.example.com/logo.png">';
|
|
80
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
81
|
+
assert(result.includes('src="https://cdn.example.com/logo.png"'), 'Should keep absolute src unchanged');
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
// Test 9: Handle anchor links (should not modify)
|
|
85
|
+
test('Should not modify anchor links', () => {
|
|
86
|
+
const html = '<a href="#section">Jump</a>';
|
|
87
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
88
|
+
assert(result.includes('href="#section"'), 'Should keep anchor links unchanged');
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
// Test 10: Handle mailto and tel links (should not modify)
|
|
92
|
+
test('Should not modify mailto and tel links', () => {
|
|
93
|
+
const html = '<a href="mailto:test@example.com">Email</a><a href="tel:+1234567890">Call</a>';
|
|
94
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
95
|
+
assert(result.includes('href="mailto:test@example.com"'), 'Should keep mailto unchanged');
|
|
96
|
+
assert(result.includes('href="tel:+1234567890"'), 'Should keep tel unchanged');
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
// Test 11: Handle data URIs in src (should not modify)
|
|
100
|
+
test('Should not modify data URIs', () => {
|
|
101
|
+
const html = '<img src="">';
|
|
102
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
103
|
+
assert(result.includes('src=""'), 'Should keep data URI unchanged');
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
// Test 12: Handle protocol-relative URLs (should not modify)
|
|
107
|
+
test('Should not modify protocol-relative URLs', () => {
|
|
108
|
+
const html = '<img src="//cdn.example.com/image.png">';
|
|
109
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
110
|
+
assert(result.includes('src="//cdn.example.com/image.png"'), 'Should keep protocol-relative URL unchanged');
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
// Test 13: Handle empty or null HTML
|
|
114
|
+
test('Should handle empty HTML', () => {
|
|
115
|
+
const result = prepareHtml('', 'https://example.com');
|
|
116
|
+
assert.strictEqual(result, '', 'Should return empty string');
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
test('Should handle null HTML', () => {
|
|
120
|
+
const result = prepareHtml(null, 'https://example.com');
|
|
121
|
+
assert.strictEqual(result, '', 'Should return empty string for null');
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
// Test 14: Complex real-world example
|
|
125
|
+
test('Should handle complex HTML with multiple elements', () => {
|
|
126
|
+
const html = `
|
|
127
|
+
<!DOCTYPE html>
|
|
128
|
+
<html>
|
|
129
|
+
<head>
|
|
130
|
+
<meta charset="utf-8">
|
|
131
|
+
<title>Test Page</title>
|
|
132
|
+
<style>.test { color: blue; }</style>
|
|
133
|
+
<script>console.log("test");</script>
|
|
134
|
+
</head>
|
|
135
|
+
<body>
|
|
136
|
+
<!-- Main content -->
|
|
137
|
+
<div>
|
|
138
|
+
<a href="/page1">Page 1</a>
|
|
139
|
+
<a href="https://external.com">External</a>
|
|
140
|
+
<img src="/images/pic.jpg">
|
|
141
|
+
<script>alert("inline");</script>
|
|
142
|
+
</div>
|
|
143
|
+
</body>
|
|
144
|
+
</html>
|
|
145
|
+
`;
|
|
146
|
+
const result = prepareHtml(html, 'https://example.com/test/');
|
|
147
|
+
|
|
148
|
+
// Should not contain removed elements
|
|
149
|
+
assert(!result.includes('<meta'), 'Should remove meta');
|
|
150
|
+
assert(!result.includes('<style'), 'Should remove style');
|
|
151
|
+
assert(!result.includes('<script'), 'Should remove script');
|
|
152
|
+
assert(!result.includes('<!--'), 'Should remove comments');
|
|
153
|
+
|
|
154
|
+
// Should convert relative URLs
|
|
155
|
+
assert(result.includes('href="https://example.com/page1"'), 'Should convert relative href');
|
|
156
|
+
assert(result.includes('src="https://example.com/images/pic.jpg"'), 'Should convert relative src');
|
|
157
|
+
|
|
158
|
+
// Should keep absolute URLs
|
|
159
|
+
assert(result.includes('href="https://external.com"'), 'Should keep absolute href');
|
|
160
|
+
|
|
161
|
+
// Should preserve content
|
|
162
|
+
assert(result.includes('Page 1'), 'Should preserve content');
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
// Test 15: Verify script with attributes is removed
|
|
166
|
+
test('Should remove script tags with various attributes', () => {
|
|
167
|
+
const html = '<script type="text/javascript" async defer src="/app.js">console.log("test");</script>';
|
|
168
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
169
|
+
assert(!result.includes('<script'), 'Should remove script with attributes');
|
|
170
|
+
assert(!result.includes('app.js'), 'Should remove script content');
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
// Test 16: Remove inline style attributes
|
|
174
|
+
test('Should remove inline style attributes', () => {
|
|
175
|
+
const html = '<div style="color: red; font-size: 14px;">Content</div>';
|
|
176
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
177
|
+
assert(!result.includes('style='), 'Should remove style attribute');
|
|
178
|
+
assert(result.includes('Content'), 'Should preserve content');
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
// Test 17: Remove class attributes
|
|
182
|
+
test('Should remove class attributes', () => {
|
|
183
|
+
const html = '<div class="container main-content">Text</div>';
|
|
184
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
185
|
+
assert(!result.includes('class='), 'Should remove class attribute');
|
|
186
|
+
assert(result.includes('Text'), 'Should preserve content');
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
// Test 18: Remove id attributes
|
|
190
|
+
test('Should remove id attributes', () => {
|
|
191
|
+
const html = '<div id="main-section">Content</div>';
|
|
192
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
193
|
+
assert(!result.includes('id='), 'Should remove id attribute');
|
|
194
|
+
assert(result.includes('Content'), 'Should preserve content');
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
// Test 19: Remove data-* attributes
|
|
198
|
+
test('Should remove data-* attributes', () => {
|
|
199
|
+
const html = '<div data-id="123" data-value="test">Content</div>';
|
|
200
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
201
|
+
assert(!result.includes('data-'), 'Should remove data attributes');
|
|
202
|
+
assert(result.includes('Content'), 'Should preserve content');
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
// Test 20: Remove event handler attributes
|
|
206
|
+
test('Should remove event handler attributes', () => {
|
|
207
|
+
const html = '<button onclick="handleClick()" onmouseover="hover()">Click</button>';
|
|
208
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
209
|
+
assert(!result.includes('onclick='), 'Should remove onclick');
|
|
210
|
+
assert(!result.includes('onmouseover='), 'Should remove onmouseover');
|
|
211
|
+
assert(result.includes('Click'), 'Should preserve content');
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
// Test 21: Remove SVG tags
|
|
215
|
+
test('Should remove SVG tags and content', () => {
|
|
216
|
+
const html = '<div>Text</div><svg width="100" height="100"><circle cx="50" cy="50" r="40"/></svg>';
|
|
217
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
218
|
+
assert(!result.includes('<svg'), 'Should remove svg tag');
|
|
219
|
+
assert(!result.includes('circle'), 'Should remove svg content');
|
|
220
|
+
assert(result.includes('Text'), 'Should preserve content');
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
// Test 22: Remove noscript tags
|
|
224
|
+
test('Should remove noscript tags and content', () => {
|
|
225
|
+
const html = '<div>Content</div><noscript>JavaScript is disabled</noscript>';
|
|
226
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
227
|
+
assert(!result.includes('<noscript'), 'Should remove noscript tag');
|
|
228
|
+
assert(!result.includes('JavaScript is disabled'), 'Should remove noscript content');
|
|
229
|
+
assert(result.includes('Content'), 'Should preserve content');
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
// Test 23: Remove link tags
|
|
233
|
+
test('Should remove link tags', () => {
|
|
234
|
+
const html = '<head><link rel="stylesheet" href="/style.css"><link rel="preload" as="script"></head>';
|
|
235
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
236
|
+
assert(!result.includes('<link'), 'Should remove link tags');
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
// Test 24: Remove role attributes
|
|
240
|
+
test('Should remove role attributes', () => {
|
|
241
|
+
const html = '<nav role="navigation">Menu</nav>';
|
|
242
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
243
|
+
assert(!result.includes('role='), 'Should remove role attribute');
|
|
244
|
+
assert(result.includes('Menu'), 'Should preserve content');
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
// Test 25: Remove aria-* attributes
|
|
248
|
+
test('Should remove aria-* attributes', () => {
|
|
249
|
+
const html = '<button aria-label="Close" aria-pressed="false">X</button>';
|
|
250
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
251
|
+
assert(!result.includes('aria-'), 'Should remove aria attributes');
|
|
252
|
+
assert(result.includes('X'), 'Should preserve content');
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
// Test 26: Collapse whitespace
|
|
256
|
+
test('Should collapse multiple whitespace into single space', () => {
|
|
257
|
+
const html = '<div>Line 1\n\n\n Line 2\t\t\tLine 3</div>';
|
|
258
|
+
const result = prepareHtml(html, 'https://example.com');
|
|
259
|
+
assert(!result.includes('\n\n'), 'Should remove multiple newlines');
|
|
260
|
+
assert(!result.includes(' '), 'Should remove multiple spaces');
|
|
261
|
+
assert(result.includes('Line 1'), 'Should preserve content');
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
// Test 27: Comprehensive test with all removals
|
|
265
|
+
test('Should handle HTML with all types of removals', () => {
|
|
266
|
+
const html = `
|
|
267
|
+
<div class="container" id="main" style="color: blue;" data-test="value" onclick="alert()">
|
|
268
|
+
<svg width="100"><circle/></svg>
|
|
269
|
+
<script>console.log("test");</script>
|
|
270
|
+
<style>.test { color: red; }</style>
|
|
271
|
+
<noscript>Enable JS</noscript>
|
|
272
|
+
<link rel="stylesheet" href="/style.css">
|
|
273
|
+
<div role="main" aria-label="content">
|
|
274
|
+
<a href="/page">Link</a>
|
|
275
|
+
<p>Text content</p>
|
|
276
|
+
</div>
|
|
277
|
+
</div>
|
|
278
|
+
`;
|
|
279
|
+
const result = prepareHtml(html, 'https://example.com/test/');
|
|
280
|
+
|
|
281
|
+
// Should remove all code attributes
|
|
282
|
+
assert(!result.includes('class='), 'Should remove class');
|
|
283
|
+
assert(!result.includes('id='), 'Should remove id');
|
|
284
|
+
assert(!result.includes('style='), 'Should remove style');
|
|
285
|
+
assert(!result.includes('data-'), 'Should remove data attributes');
|
|
286
|
+
assert(!result.includes('onclick='), 'Should remove onclick');
|
|
287
|
+
assert(!result.includes('role='), 'Should remove role');
|
|
288
|
+
assert(!result.includes('aria-'), 'Should remove aria');
|
|
289
|
+
|
|
290
|
+
// Should remove non-content elements
|
|
291
|
+
assert(!result.includes('<svg'), 'Should remove svg');
|
|
292
|
+
assert(!result.includes('<script'), 'Should remove script');
|
|
293
|
+
assert(!result.includes('<style'), 'Should remove style');
|
|
294
|
+
assert(!result.includes('<noscript'), 'Should remove noscript');
|
|
295
|
+
assert(!result.includes('<link'), 'Should remove link');
|
|
296
|
+
|
|
297
|
+
// Should preserve content and convert URLs
|
|
298
|
+
assert(result.includes('href="https://example.com/page"'), 'Should convert relative URL');
|
|
299
|
+
assert(result.includes('Text content'), 'Should preserve text');
|
|
300
|
+
});
|
|
301
|
+
|
|
302
|
+
console.log('\n==================================================');
|
|
303
|
+
console.log(`✅ Tests Passed: ${testsPassed}`);
|
|
304
|
+
console.log(`❌ Tests Failed: ${testsFailed}`);
|
|
305
|
+
console.log('==================================================\n');
|
|
306
|
+
|
|
307
|
+
process.exit(testsFailed > 0 ? 1 : 0);
|