mcpbrowser 0.2.28 โ†’ 0.2.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,279 @@
1
+ import assert from 'assert';
2
+ import { waitForAutoAuth, waitForManualAuth } from '../src/mcp-browser.js';
3
+
4
+ console.log('๐Ÿงช Testing authentication flow functions\n');
5
+
6
+ let testsPassed = 0;
7
+ let testsFailed = 0;
8
+
9
+ function test(description, fn) {
10
+ return new Promise((resolve) => {
11
+ fn()
12
+ .then(() => {
13
+ console.log(`โœ… ${description}`);
14
+ testsPassed++;
15
+ resolve();
16
+ })
17
+ .catch((err) => {
18
+ console.log(`โŒ ${description}`);
19
+ console.log(` Error: ${err.message}`);
20
+ testsFailed++;
21
+ resolve();
22
+ });
23
+ });
24
+ }
25
+
26
+ // Mock page object for testing
27
+ class MockPage {
28
+ constructor(urlsOrConfig) {
29
+ if (Array.isArray(urlsOrConfig)) {
30
+ // Simple array of URLs
31
+ this.urls = urlsOrConfig;
32
+ this.timing = null;
33
+ } else {
34
+ // Config object with timing
35
+ this.urls = urlsOrConfig.urls;
36
+ this.timing = urlsOrConfig.timing || null; // { delayMs: 500, changeAfterCalls: 3 }
37
+ }
38
+ this.currentIndex = 0;
39
+ this.callCount = 0;
40
+ }
41
+
42
+ url() {
43
+ this.callCount++;
44
+
45
+ if (this.timing) {
46
+ // Change URL every N calls (simulating time passing)
47
+ const changeEvery = this.timing.changeAfterCalls || 3;
48
+ const targetIndex = Math.min(
49
+ Math.floor(this.callCount / changeEvery),
50
+ this.urls.length - 1
51
+ );
52
+ return this.urls[targetIndex];
53
+ } else {
54
+ // Advance to next URL every 2 calls by default
55
+ const advanceEvery = 2;
56
+ const targetIndex = Math.min(
57
+ Math.floor(this.callCount / advanceEvery),
58
+ this.urls.length - 1
59
+ );
60
+ return this.urls[targetIndex];
61
+ }
62
+ }
63
+
64
+ reset() {
65
+ this.currentIndex = 0;
66
+ this.callCount = 0;
67
+ }
68
+ }
69
+
70
+ // ============================================================================
71
+ // waitForAutoAuth Tests
72
+ // ============================================================================
73
+
74
+ console.log('\n๐Ÿ“‹ Testing waitForAutoAuth()');
75
+
76
+ await test('Should detect successful auto-auth', async () => {
77
+ const mockPage = new MockPage([
78
+ 'https://login.example.com/auth',
79
+ 'https://login.example.com/auth',
80
+ 'https://app.example.com/dashboard' // Returns to app domain
81
+ ]);
82
+
83
+ const result = await waitForAutoAuth(mockPage, 'example.com', 'example.com', 2000);
84
+
85
+ assert.strictEqual(result.success, true, 'Should succeed');
86
+ assert.strictEqual(result.hostname, 'app.example.com', 'Should return final hostname');
87
+ });
88
+
89
+ await test('Should detect auto-auth to original domain', async () => {
90
+ const mockPage = new MockPage([
91
+ 'https://login.example.com/auth',
92
+ 'https://example.com/dashboard' // Returns to exact original domain
93
+ ]);
94
+
95
+ const result = await waitForAutoAuth(mockPage, 'example.com', 'example.com', 2000);
96
+
97
+ assert.strictEqual(result.success, true, 'Should succeed');
98
+ assert.strictEqual(result.hostname, 'example.com', 'Should return original hostname');
99
+ });
100
+
101
+ await test('Should timeout if auto-auth does not complete', async () => {
102
+ const mockPage = new MockPage([
103
+ 'https://login.example.com/auth' // Stays on auth page
104
+ ]);
105
+
106
+ const result = await waitForAutoAuth(mockPage, 'example.com', 'example.com', 1000);
107
+
108
+ assert.strictEqual(result.success, false, 'Should fail on timeout');
109
+ assert.strictEqual(result.hostname, undefined, 'Should not have hostname');
110
+ });
111
+
112
+ await test('Should NOT accept return to auth URL on same domain', async () => {
113
+ const mockPage = new MockPage([
114
+ 'https://auth.site.com/login',
115
+ 'https://site.com/login' // Returns to site.com but still on /login
116
+ ]);
117
+
118
+ const result = await waitForAutoAuth(mockPage, 'site.com', 'site.com', 1000);
119
+
120
+ assert.strictEqual(result.success, false, 'Should fail - still on auth URL');
121
+ });
122
+
123
+ await test('Should accept return to different subdomain', async () => {
124
+ const mockPage = new MockPage([
125
+ 'https://accounts.google.com/signin',
126
+ 'https://accounts.google.com/signin',
127
+ 'https://mail.google.com' // Different subdomain but same base
128
+ ]);
129
+
130
+ const result = await waitForAutoAuth(mockPage, 'gmail.com', 'google.com', 2000);
131
+
132
+ assert.strictEqual(result.success, true, 'Should succeed');
133
+ assert.strictEqual(result.hostname, 'mail.google.com', 'Should return new hostname');
134
+ });
135
+
136
+ await test('Should handle page navigation errors gracefully', async () => {
137
+ const mockPage = {
138
+ url: () => {
139
+ throw new Error('Page not accessible');
140
+ }
141
+ };
142
+
143
+ const result = await waitForAutoAuth(mockPage, 'example.com', 'example.com', 1000);
144
+
145
+ assert.strictEqual(result.success, false, 'Should handle errors and timeout');
146
+ });
147
+
148
+ // ============================================================================
149
+ // waitForManualAuth Tests
150
+ // ============================================================================
151
+
152
+ console.log('\n๐Ÿ“‹ Testing waitForManualAuth()');
153
+
154
+ await test('Should detect successful manual auth', async () => {
155
+ // Manual auth polls every 2 seconds, so we need URL to change after ~2 polling attempts
156
+ const mockPage = new MockPage({
157
+ urls: [
158
+ 'https://login.microsoftonline.com/oauth',
159
+ 'https://app.example.com/dashboard' // User completes auth
160
+ ],
161
+ timing: { changeAfterCalls: 3 } // Change URL after 3 calls (simulating ~4 seconds)
162
+ });
163
+
164
+ const result = await waitForManualAuth(mockPage, 'example.com', 'example.com', 10000);
165
+
166
+ assert.strictEqual(result.success, true, 'Should succeed');
167
+ assert.strictEqual(result.hostname, 'app.example.com', 'Should return final hostname');
168
+ });
169
+
170
+ await test('Should return to original domain after auth', async () => {
171
+ const mockPage = new MockPage({
172
+ urls: [
173
+ 'https://accounts.google.com/signin',
174
+ 'https://myapp.com/home' // Returns to original
175
+ ],
176
+ timing: { changeAfterCalls: 2 }
177
+ });
178
+
179
+ const result = await waitForManualAuth(mockPage, 'myapp.com', 'myapp.com', 10000);
180
+
181
+ assert.strictEqual(result.success, true, 'Should succeed');
182
+ assert.strictEqual(result.hostname, 'myapp.com', 'Should return to myapp.com');
183
+ });
184
+
185
+ await test('Should timeout if user does not complete auth', async () => {
186
+ const mockPage = new MockPage([
187
+ 'https://login.example.com/auth' // User never completes
188
+ ]);
189
+
190
+ const result = await waitForManualAuth(mockPage, 'example.com', 'example.com', 2000);
191
+
192
+ assert.strictEqual(result.success, false, 'Should timeout');
193
+ assert.ok(result.error, 'Should have error message');
194
+ assert.ok(result.hint, 'Should have hint for user');
195
+ assert.ok(result.hint.includes('Authentication timeout'), 'Hint should mention timeout');
196
+ });
197
+
198
+ await test('Should detect landing on different subdomain', async () => {
199
+ const mockPage = new MockPage({
200
+ urls: [
201
+ 'https://sso.company.com/login',
202
+ 'https://dashboard.company.com' // Different subdomain, same base
203
+ ],
204
+ timing: { changeAfterCalls: 3 }
205
+ });
206
+
207
+ const result = await waitForManualAuth(mockPage, 'company.com', 'company.com', 10000);
208
+
209
+ assert.strictEqual(result.success, true, 'Should succeed');
210
+ assert.strictEqual(result.hostname, 'dashboard.company.com', 'Should accept different subdomain');
211
+ });
212
+
213
+ await test('Should NOT accept return to auth page on same base domain', async () => {
214
+ const mockPage = new MockPage([
215
+ 'https://auth0.company.com/login',
216
+ 'https://auth0.company.com/login',
217
+ 'https://company.com/login' // Returns to base but still on /login
218
+ ]);
219
+
220
+ const result = await waitForManualAuth(mockPage, 'company.com', 'company.com', 2000);
221
+
222
+ assert.strictEqual(result.success, false, 'Should timeout - still on auth page');
223
+ });
224
+
225
+ await test('Should handle page navigation errors', async () => {
226
+ let callCount = 0;
227
+ const mockPage = {
228
+ url: () => {
229
+ callCount++;
230
+ if (callCount < 3) {
231
+ throw new Error('Navigation in progress');
232
+ }
233
+ return 'https://app.example.com/home';
234
+ }
235
+ };
236
+
237
+ const result = await waitForManualAuth(mockPage, 'example.com', 'example.com', 5000);
238
+
239
+ assert.strictEqual(result.success, true, 'Should handle temporary errors and succeed');
240
+ });
241
+
242
+ await test('Should include current URL in timeout hint', async () => {
243
+ const mockPage = new MockPage([
244
+ 'https://stuck.on.auth.com/page'
245
+ ]);
246
+
247
+ const result = await waitForManualAuth(mockPage, 'example.com', 'example.com', 1000);
248
+
249
+ assert.strictEqual(result.success, false);
250
+ assert.ok(result.hint.includes('stuck.on.auth.com/page'), 'Should include stuck URL in hint');
251
+ });
252
+
253
+ await test('Should accept cross-domain SSO completion', async () => {
254
+ const mockPage = new MockPage({
255
+ urls: [
256
+ 'https://accounts.google.com/signin',
257
+ 'https://myapp.com' // Different domain entirely
258
+ ],
259
+ timing: { changeAfterCalls: 3 }
260
+ });
261
+
262
+ const result = await waitForManualAuth(mockPage, 'myapp.com', 'myapp.com', 10000);
263
+
264
+ assert.strictEqual(result.success, true, 'Should succeed for cross-domain SSO');
265
+ assert.strictEqual(result.hostname, 'myapp.com', 'Should return to original app');
266
+ });
267
+
268
+ // ============================================================================
269
+ // Summary
270
+ // ============================================================================
271
+
272
+ console.log('\n' + '='.repeat(50));
273
+ console.log(`Tests passed: ${testsPassed}`);
274
+ console.log(`Tests failed: ${testsFailed}`);
275
+ console.log('='.repeat(50));
276
+
277
+ if (testsFailed > 0) {
278
+ process.exit(1);
279
+ }
@@ -4,183 +4,191 @@
4
4
  * Run with: node tests/integration.test.js
5
5
  */
6
6
 
7
- import { fileURLToPath } from 'url';
8
- import path from 'path';
7
+ import assert from 'assert';
9
8
  import { fetchPage } from '../src/mcp-browser.js';
10
9
 
11
- const __filename = fileURLToPath(import.meta.url);
12
- const __dirname = path.dirname(__filename);
10
+ console.log('๐Ÿš€ Starting Integration Tests (REAL CHROME)\n');
11
+ console.log('โš ๏ธ This will open Chrome browser and may require authentication');
12
+ console.log('โš ๏ธ fetchPage function will WAIT for you to complete authentication\n');
13
13
 
14
- // Test framework
15
14
  let testsPassed = 0;
16
15
  let testsFailed = 0;
17
16
 
18
- function assert(condition, message) {
19
- if (!condition) {
20
- console.error(`โŒ FAILED: ${message}`);
21
- testsFailed++;
22
- throw new Error(message);
23
- } else {
24
- console.log(`โœ… PASSED: ${message}`);
25
- testsPassed++;
26
- }
27
- }
28
-
29
- async function test(name, fn) {
30
- console.log(`\n๐Ÿงช Test: ${name}`);
31
- try {
32
- await fn();
33
- } catch (error) {
34
- console.error(` Error: ${error.message}`);
35
- }
17
+ function test(description, fn) {
18
+ return new Promise((resolve) => {
19
+ fn()
20
+ .then(() => {
21
+ console.log(`โœ… ${description}`);
22
+ testsPassed++;
23
+ resolve();
24
+ })
25
+ .catch((err) => {
26
+ console.log(`โŒ ${description}`);
27
+ console.log(` Error: ${err.message}`);
28
+ testsFailed++;
29
+ resolve();
30
+ });
31
+ });
36
32
  }
37
33
 
34
+ // ============================================================================
38
35
  // Integration Tests
39
- async function runIntegrationTests() {
40
- console.log('๐Ÿš€ Starting Integration Tests (REAL CHROME)\n');
41
- console.log('โš ๏ธ This will open Chrome browser and may require authentication');
42
- console.log('โš ๏ธ fetchPage function will WAIT for you to complete authentication\n');
43
-
44
- try {
45
- await test('Should fetch eng.ms page, extract links, and load them (full Copilot workflow)', async () => {
46
- const url = 'https://eng.ms/docs/products/geneva';
47
-
48
- // Step 1: Fetch initial page (with auth waiting)
49
- console.log(` ๐Ÿ“„ Step 1: Fetching ${url}`);
50
- console.log(` โณ Function will wait up to 10 minutes for authentication...`);
51
- console.log(` ๐Ÿ’ก Complete login in the browser that opens`);
52
-
53
- const result = await fetchPage({ url });
54
-
55
- console.log(` โœ… Result: ${result.success ? 'SUCCESS' : 'FAILED'}`);
56
- if (result.success) {
57
- console.log(` ๐Ÿ”— Final URL: ${result.url}`);
58
- console.log(` ๐Ÿ“„ HTML length: ${result.html?.length || 0} chars`);
59
- } else {
60
- console.log(` โŒ Error: ${result.error}`);
61
- console.log(` ๐Ÿ’ก Hint: ${result.hint}`);
62
- }
63
-
64
- assert(result.success, 'Should successfully fetch page after authentication');
65
- assert(result.url.includes('eng.ms'), `URL should be from eng.ms domain, got: ${result.url}`);
66
- assert(result.html && result.html.length > 0, 'Should return HTML content');
67
-
68
- // Step 2: Extract ALL links from HTML, then pick 5 randomly
69
- console.log(`\n ๐Ÿ“‹ Step 2: Extracting all links from HTML...`);
70
-
71
- const baseUrl = new URL(result.url);
72
- const urlPattern = /href=["']([^"']+)["']/g;
73
- const allUrls = [];
74
- let match;
75
-
76
- // Static asset extensions to skip
77
- const skipExtensions = ['.css', '.js', '.ico', '.png', '.jpg', '.jpeg', '.gif', '.svg', '.woff', '.woff2', '.ttf', '.eot'];
78
-
79
- // Extract ALL URLs first
80
- while ((match = urlPattern.exec(result.html)) !== null) {
81
- let foundUrl = match[1];
82
-
83
- // Skip anchor links
84
- if (foundUrl.includes('#')) continue;
85
-
86
- // Convert relative URLs to absolute
87
- if (foundUrl.startsWith('/')) {
88
- foundUrl = `${baseUrl.origin}${foundUrl}`;
89
- } else if (!foundUrl.startsWith('http')) {
90
- continue; // Skip other relative URLs
91
- }
92
-
93
- // Skip static assets (check path without query string)
94
- const urlWithoutQuery = foundUrl.split('?')[0];
95
- if (skipExtensions.some(ext => urlWithoutQuery.toLowerCase().endsWith(ext))) continue;
96
-
97
- // Only include eng.ms URLs (pages)
98
- if (foundUrl.includes('eng.ms')) {
99
- allUrls.push(foundUrl);
100
- }
101
- }
102
-
103
- console.log(` ๐Ÿ“Š Total page URLs found: ${allUrls.length}`);
104
-
105
- // Remove duplicates
106
- const uniqueUrls = [...new Set(allUrls)];
107
- console.log(` ๐Ÿ”— Unique page URLs: ${uniqueUrls.length}`);
108
-
109
- // Randomly pick 5 URLs
110
- const shuffled = uniqueUrls.sort(() => Math.random() - 0.5);
111
- const extractedUrls = shuffled.slice(0, 5);
112
-
113
- console.log(` ๐ŸŽฒ Randomly selected ${extractedUrls.length} URLs to test:`);
114
- extractedUrls.forEach((link, i) => console.log(` ${i+1}. ${link}`));
115
-
116
- assert(extractedUrls.length > 0, `Should extract at least one eng.ms URL, found ${extractedUrls.length}`);
117
-
118
- // Step 3: Load each extracted URL (tab reuse)
119
- console.log(`\n ๐Ÿ”„ Step 3: Loading extracted links (using same tab)...`);
120
-
121
- const linksToTest = extractedUrls.slice(0, Math.min(5, extractedUrls.length));
122
- for (let i = 0; i < linksToTest.length; i++) {
123
- const link = linksToTest[i];
124
- console.log(` ๐Ÿ“„ Loading link ${i+1}/${linksToTest.length}: ${link}`);
125
-
126
- const linkResult = await fetchPage({ url: link });
127
-
128
- console.log(` โœ… Loaded: ${linkResult.url}`);
129
- assert(linkResult.success, `Should successfully load link ${i+1}: ${link}`);
130
- assert(linkResult.html && linkResult.html.length > 0, `Link ${i+1} should return HTML content`);
131
- }
132
- });
36
+ // ============================================================================
37
+
38
+ await test('Should handle gmail.com โ†’ mail.google.com permanent redirect', async () => {
39
+ const url = 'https://gmail.com';
40
+
41
+ console.log(` ๐Ÿ“„ Fetching ${url}`);
42
+ console.log(` ๐Ÿ’ก This should detect permanent redirect and return content immediately`);
43
+
44
+ const result = await fetchPage({ url });
45
+
46
+ console.log(` โœ… Result: ${result.success ? 'SUCCESS' : 'FAILED'}`);
47
+ if (result.success) {
48
+ console.log(` ๐Ÿ”— Final URL: ${result.url}`);
49
+ console.log(` ๐Ÿ“„ HTML length: ${result.html?.length || 0} chars`);
50
+ } else {
51
+ console.log(` โŒ Error: ${result.error}`);
52
+ }
53
+
54
+ assert.strictEqual(result.success, true, 'Should successfully fetch gmail.com');
55
+ assert.ok(result.url.includes('mail.google.com'), `Should redirect to mail.google.com, got: ${result.url}`);
56
+ assert.ok(result.html && result.html.length > 0, 'Should return HTML content');
57
+ assert.ok(result.html.includes('Gmail') || result.html.includes('Google'), 'HTML should contain Gmail or Google content');
58
+
59
+ console.log(` โœ… Permanent redirect handled correctly (gmail.com โ†’ mail.google.com)`);
60
+ });
133
61
 
134
- await test('Should support removeUnnecessaryHTML parameter', async () => {
135
- const url = 'https://eng.ms/docs/products/geneva';
136
-
137
- console.log(` ๐Ÿ“„ Fetching with removeUnnecessaryHTML=true (default)`);
138
- const cleanResult = await fetchPage({ url, removeUnnecessaryHTML: true });
139
-
140
- assert(cleanResult.success, 'Should successfully fetch with removeUnnecessaryHTML=true');
141
- assert(cleanResult.html && cleanResult.html.length > 0, 'Should return cleaned HTML');
142
- assert(!cleanResult.html.includes('<script'), 'Cleaned HTML should not contain script tags');
143
- assert(!cleanResult.html.includes('<style'), 'Cleaned HTML should not contain style tags');
144
- assert(!cleanResult.html.includes('class='), 'Cleaned HTML should not contain class attributes');
145
- console.log(` โœ… Cleaned HTML length: ${cleanResult.html.length} chars`);
146
-
147
- console.log(` ๐Ÿ“„ Fetching with removeUnnecessaryHTML=false`);
148
- const rawResult = await fetchPage({ url, removeUnnecessaryHTML: false });
149
-
150
- assert(rawResult.success, 'Should successfully fetch with removeUnnecessaryHTML=false');
151
- assert(rawResult.html && rawResult.html.length > 0, 'Should return raw HTML');
152
- console.log(` โœ… Raw HTML length: ${rawResult.html.length} chars`);
153
-
154
- // Raw HTML should be larger than cleaned HTML
155
- assert(rawResult.html.length > cleanResult.html.length,
156
- `Raw HTML (${rawResult.html.length}) should be larger than cleaned (${cleanResult.html.length})`);
157
-
158
- const reductionPercent = ((rawResult.html.length - cleanResult.html.length) / rawResult.html.length * 100).toFixed(1);
159
- console.log(` ๐Ÿ“Š Size reduction: ${reductionPercent}% (${rawResult.html.length} โ†’ ${cleanResult.html.length} chars)`);
160
- });
62
+ await test('Should fetch eng.ms page, extract links, and load them (full Copilot workflow)', async () => {
63
+ const url = 'https://eng.ms/docs/products/geneva';
64
+
65
+ // Step 1: Fetch initial page (with auth waiting)
66
+ console.log(` ๐Ÿ“„ Step 1: Fetching ${url}`);
67
+ console.log(` โณ Function will wait up to 10 minutes for authentication...`);
68
+ console.log(` ๐Ÿ’ก Complete login in the browser that opens`);
69
+
70
+ const result = await fetchPage({ url });
71
+
72
+ console.log(` โœ… Result: ${result.success ? 'SUCCESS' : 'FAILED'}`);
73
+ if (result.success) {
74
+ console.log(` ๐Ÿ”— Final URL: ${result.url}`);
75
+ console.log(` ๐Ÿ“„ HTML length: ${result.html?.length || 0} chars`);
76
+ } else {
77
+ console.log(` โŒ Error: ${result.error}`);
78
+ console.log(` ๐Ÿ’ก Hint: ${result.hint}`);
79
+ }
80
+
81
+ assert.strictEqual(result.success, true, 'Should successfully fetch page after authentication');
82
+ assert.ok(result.url.includes('eng.ms'), `URL should be from eng.ms domain, got: ${result.url}`);
83
+ assert.ok(result.html && result.html.length > 0, 'Should return HTML content');
84
+
85
+ // Step 2: Extract ALL links from HTML, then pick 5 randomly
86
+ console.log(`\n ๐Ÿ“‹ Step 2: Extracting all links from HTML...`);
87
+
88
+ const baseUrl = new URL(result.url);
89
+ const urlPattern = /href=["']([^"']+)["']/g;
90
+ const allUrls = [];
91
+ let match;
92
+
93
+ // Static asset extensions to skip
94
+ const skipExtensions = ['.css', '.js', '.ico', '.png', '.jpg', '.jpeg', '.gif', '.svg', '.woff', '.woff2', '.ttf', '.eot'];
95
+
96
+ // Extract ALL URLs first
97
+ while ((match = urlPattern.exec(result.html)) !== null) {
98
+ let foundUrl = match[1];
161
99
 
162
- } catch (error) {
163
- console.error('\nโŒ Test suite error:', error.message);
164
- testsFailed++;
165
- } finally {
166
- // Summary
167
- console.log('\n' + '='.repeat(50));
168
- console.log(`โœ… Tests Passed: ${testsPassed}`);
169
- console.log(`โŒ Tests Failed: ${testsFailed}`);
170
- console.log('='.repeat(50));
171
- console.log('\n๐Ÿ’ก Browser left open for manual inspection');
100
+ // Skip anchor links
101
+ if (foundUrl.includes('#')) continue;
172
102
 
173
- if (testsFailed > 0) {
174
- process.exit(1);
103
+ // Convert relative URLs to absolute
104
+ if (foundUrl.startsWith('/')) {
105
+ foundUrl = `${baseUrl.origin}${foundUrl}`;
106
+ } else if (!foundUrl.startsWith('http')) {
107
+ continue; // Skip other relative URLs
175
108
  }
176
109
 
177
- // Exit immediately without waiting for browser
178
- process.exit(0);
110
+ // Skip static assets (check path without query string)
111
+ const urlWithoutQuery = foundUrl.split('?')[0];
112
+ if (skipExtensions.some(ext => urlWithoutQuery.toLowerCase().endsWith(ext))) continue;
113
+
114
+ // Only include eng.ms URLs (pages)
115
+ if (foundUrl.includes('eng.ms')) {
116
+ allUrls.push(foundUrl);
117
+ }
179
118
  }
180
- }
119
+
120
+ console.log(` ๐Ÿ“Š Total page URLs found: ${allUrls.length}`);
121
+
122
+ // Remove duplicates
123
+ const uniqueUrls = [...new Set(allUrls)];
124
+ console.log(` ๐Ÿ”— Unique page URLs: ${uniqueUrls.length}`);
125
+
126
+ // Randomly pick 5 URLs
127
+ const shuffled = uniqueUrls.sort(() => Math.random() - 0.5);
128
+ const extractedUrls = shuffled.slice(0, 5);
129
+
130
+ console.log(` ๐ŸŽฒ Randomly selected ${extractedUrls.length} URLs to test:`);
131
+ extractedUrls.forEach((link, i) => console.log(` ${i+1}. ${link}`));
132
+
133
+ assert.ok(extractedUrls.length > 0, `Should extract at least one eng.ms URL, found ${extractedUrls.length}`);
134
+
135
+ // Step 3: Load each extracted URL (tab reuse)
136
+ console.log(`\n ๐Ÿ”„ Step 3: Loading extracted links (using same tab)...`);
137
+
138
+ const linksToTest = extractedUrls.slice(0, Math.min(5, extractedUrls.length));
139
+ for (let i = 0; i < linksToTest.length; i++) {
140
+ const link = linksToTest[i];
141
+ console.log(` ๐Ÿ“„ Loading link ${i+1}/${linksToTest.length}: ${link}`);
142
+
143
+ const linkResult = await fetchPage({ url: link });
144
+
145
+ console.log(` โœ… Loaded: ${linkResult.url}`);
146
+ assert.strictEqual(linkResult.success, true, `Should successfully load link ${i+1}: ${link}`);
147
+ assert.ok(linkResult.html && linkResult.html.length > 0, `Link ${i+1} should return HTML content`);
148
+ }
149
+ });
181
150
 
182
- // Run tests
183
- runIntegrationTests().catch(error => {
184
- console.error('Test suite failed:', error);
185
- process.exit(1);
151
+ await test('Should support removeUnnecessaryHTML parameter', async () => {
152
+ const url = 'https://eng.ms/docs/products/geneva';
153
+
154
+ console.log(` ๐Ÿ“„ Fetching with removeUnnecessaryHTML=true (default)`);
155
+ const cleanResult = await fetchPage({ url, removeUnnecessaryHTML: true });
156
+
157
+ assert.strictEqual(cleanResult.success, true, 'Should successfully fetch with removeUnnecessaryHTML=true');
158
+ assert.ok(cleanResult.html && cleanResult.html.length > 0, 'Should return cleaned HTML');
159
+ assert.ok(!cleanResult.html.includes('<script'), 'Cleaned HTML should not contain script tags');
160
+ assert.ok(!cleanResult.html.includes('<style'), 'Cleaned HTML should not contain style tags');
161
+ assert.ok(!cleanResult.html.includes('class='), 'Cleaned HTML should not contain class attributes');
162
+ console.log(` โœ… Cleaned HTML length: ${cleanResult.html.length} chars`);
163
+
164
+ console.log(` ๐Ÿ“„ Fetching with removeUnnecessaryHTML=false`);
165
+ const rawResult = await fetchPage({ url, removeUnnecessaryHTML: false });
166
+
167
+ assert.strictEqual(rawResult.success, true, 'Should successfully fetch with removeUnnecessaryHTML=false');
168
+ assert.ok(rawResult.html && rawResult.html.length > 0, 'Should return raw HTML');
169
+ console.log(` โœ… Raw HTML length: ${rawResult.html.length} chars`);
170
+
171
+ // Raw HTML should be larger than cleaned HTML
172
+ assert.ok(rawResult.html.length > cleanResult.html.length,
173
+ `Raw HTML (${rawResult.html.length}) should be larger than cleaned (${cleanResult.html.length})`);
174
+
175
+ const reductionPercent = ((rawResult.html.length - cleanResult.html.length) / rawResult.html.length * 100).toFixed(1);
176
+ console.log(` ๐Ÿ“Š Size reduction: ${reductionPercent}% (${rawResult.html.length} โ†’ ${cleanResult.html.length} chars)`);
186
177
  });
178
+
179
+ // ============================================================================
180
+ // Summary
181
+ // ============================================================================
182
+
183
+ console.log('\n' + '='.repeat(50));
184
+ console.log(`Tests passed: ${testsPassed}`);
185
+ console.log(`Tests failed: ${testsFailed}`);
186
+ console.log('='.repeat(50));
187
+ console.log('\n๐Ÿ’ก Browser left open for manual inspection');
188
+
189
+ if (testsFailed > 0) {
190
+ process.exit(1);
191
+ }
192
+
193
+ // Exit immediately without waiting for browser
194
+ process.exit(0);