real-browser-mcp-server 1.1.7 → 1.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/lib/cjs/index.js +384 -0
  2. package/{lib → dist/lib}/cjs/module/pageController.js +27 -29
  3. package/{lib → dist/lib}/cjs/module/turnstile.js +23 -12
  4. package/dist/src/ai/action-parser.js +229 -0
  5. package/dist/src/ai/core.js +367 -0
  6. package/dist/src/ai/element-finder.js +409 -0
  7. package/{src → dist/src}/ai/index.js +35 -50
  8. package/dist/src/ai/page-analyzer.js +264 -0
  9. package/dist/src/ai/selector-healer.js +215 -0
  10. package/dist/src/index.js +116 -0
  11. package/dist/src/mcp/handlers/browser.js +230 -0
  12. package/dist/src/mcp/handlers/dom.js +550 -0
  13. package/dist/src/mcp/handlers/extract.js +451 -0
  14. package/dist/src/mcp/handlers/helpers.js +514 -0
  15. package/dist/src/mcp/handlers/index.js +63 -0
  16. package/dist/src/mcp/handlers/misc.js +1224 -0
  17. package/dist/src/mcp/handlers/network.js +1134 -0
  18. package/dist/src/mcp/handlers/state.js +215 -0
  19. package/dist/src/mcp/handlers/vision.js +475 -0
  20. package/dist/src/mcp/index.js +166 -0
  21. package/dist/src/mcp/server.js +117 -0
  22. package/{src → dist/src}/mcp/tools.js +12 -11
  23. package/dist/src/shared/tools.js +598 -0
  24. package/{test → dist/test}/cjs/test.js +119 -169
  25. package/dist/test/mcp/smoke-test.js +131 -0
  26. package/lib/esm/module/pageController.mjs +21 -18
  27. package/lib/esm/module/turnstile.mjs +7 -0
  28. package/package.json +22 -11
  29. package/.github/ISSUE_TEMPLATE/general_issue.yaml +0 -58
  30. package/.github/SETUP.md +0 -111
  31. package/.github/workflows/publish.yml +0 -162
  32. package/Dockerfile +0 -78
  33. package/lib/cjs/adblocker.bin +0 -0
  34. package/lib/cjs/index.js +0 -396
  35. package/src/ai/action-parser.js +0 -269
  36. package/src/ai/core.js +0 -379
  37. package/src/ai/element-finder.js +0 -466
  38. package/src/ai/page-analyzer.js +0 -295
  39. package/src/ai/selector-healer.js +0 -236
  40. package/src/index.js +0 -128
  41. package/src/mcp/handlers.js +0 -5306
  42. package/src/mcp/index.js +0 -190
  43. package/src/mcp/server.js +0 -141
  44. package/src/shared/tools.js +0 -625
  45. package/test/esm/test.mjs +0 -299
  46. package/test/mcp/smoke-test.js +0 -141
@@ -0,0 +1,451 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.extractHandlers = void 0;
37
+ // @ts-nocheck
38
+ const path = __importStar(require("path"));
39
+ const fs = __importStar(require("fs"));
40
+ const state_1 = require("./state");
41
+ // Auto-generated extract handlers
42
+ exports.extractHandlers = {
43
+ async get_content(params = {}) {
44
+ const { page } = (0, state_1.requireBrowser)();
45
+ const { format = 'text', selector, rawHttpUrl } = params;
46
+ (0, state_1.notifyProgress)('get_content', 'started', `Extracting ${format} content${selector ? ` from ${selector}` : ''}`);
47
+ // === rawHttp mode: fetch raw HTTP response without JS rendering ===
48
+ if (format === 'rawHttp') {
49
+ const url = rawHttpUrl || page.url();
50
+ (0, state_1.notifyProgress)('get_content', 'in_progress', `Fetching raw HTTP (no JS) from: ${url}`);
51
+ try {
52
+ const cookies = await page.context().cookies(url);
53
+ const cookieStr = cookies.map(c => `${c.name}=${c.value}`).join('; ');
54
+ const response = await fetch(url, {
55
+ headers: {
56
+ 'User-Agent': await page.evaluate(() => navigator.userAgent),
57
+ 'Cookie': cookieStr,
58
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
59
+ 'Referer': page.url()
60
+ },
61
+ redirect: 'follow'
62
+ });
63
+ const rawHtml = await response.text();
64
+ const renderedHtml = await page.content();
65
+ const diff = {
66
+ rawLength: rawHtml.length,
67
+ renderedLength: renderedHtml.length,
68
+ sizeDifference: renderedHtml.length - rawHtml.length,
69
+ jsLoadedContent: renderedHtml.length > rawHtml.length * 1.1
70
+ };
71
+ (0, state_1.notifyProgress)('get_content', 'completed', `Raw: ${diff.rawLength} chars, Rendered: ${diff.renderedLength} chars`);
72
+ return {
73
+ success: true, rawHtml, renderedHtml, diff,
74
+ url, finalUrl: response.url, statusCode: response.status, format: 'rawHttp'
75
+ };
76
+ }
77
+ catch (e) {
78
+ return { success: false, error: `Raw HTTP fetch failed: ${e.message}` };
79
+ }
80
+ }
81
+ let content;
82
+ // === markdown: real HTML→Markdown conversion (no external deps) ===
83
+ if (format === 'markdown') {
84
+ if (selector) {
85
+ const exists = await page.$(selector);
86
+ if (!exists) {
87
+ (0, state_1.notifyProgress)('get_content', 'error', `Element not found: ${selector}`);
88
+ return { success: false, error: `Element not found: ${selector}` };
89
+ }
90
+ }
91
+ content = await page.evaluate((sel) => {
92
+ const root = sel ? document.querySelector(sel) : document.body;
93
+ if (!root)
94
+ return '';
95
+ const skip = new Set(['SCRIPT', 'STYLE', 'NOSCRIPT', 'IFRAME', 'SVG', 'CANVAS']);
96
+ const inline = (node) => {
97
+ let out = '';
98
+ node.childNodes.forEach(c => {
99
+ if (c.nodeType === 3) {
100
+ out += c.textContent.replace(/\s+/g, ' ');
101
+ return;
102
+ }
103
+ if (c.nodeType !== 1 || skip.has(c.tagName))
104
+ return;
105
+ const t = c.tagName;
106
+ if (t === 'A') {
107
+ const h = c.getAttribute('href') || '';
108
+ const x = inline(c).trim();
109
+ out += h ? `[${x}](${h})` : x;
110
+ }
111
+ else if (t === 'STRONG' || t === 'B')
112
+ out += `**${inline(c).trim()}**`;
113
+ else if (t === 'EM' || t === 'I')
114
+ out += `*${inline(c).trim()}*`;
115
+ else if (t === 'CODE')
116
+ out += '`' + c.textContent.trim() + '`';
117
+ else if (t === 'IMG') {
118
+ const a = c.getAttribute('alt') || '';
119
+ const s = c.getAttribute('src') || '';
120
+ if (s)
121
+ out += `![${a}](${s})`;
122
+ }
123
+ else if (t === 'BR')
124
+ out += '\n';
125
+ else
126
+ out += inline(c);
127
+ });
128
+ return out;
129
+ };
130
+ const lines = [];
131
+ const walk = (node) => {
132
+ node.childNodes.forEach(c => {
133
+ if (c.nodeType === 3) {
134
+ const x = c.textContent.trim();
135
+ if (x)
136
+ lines.push(x);
137
+ return;
138
+ }
139
+ if (c.nodeType !== 1 || skip.has(c.tagName))
140
+ return;
141
+ const t = c.tagName;
142
+ if (/^H[1-6]$/.test(t))
143
+ lines.push('\n' + '#'.repeat(+t[1]) + ' ' + inline(c).trim() + '\n');
144
+ else if (t === 'P') {
145
+ const x = inline(c).trim();
146
+ if (x)
147
+ lines.push(x + '\n');
148
+ }
149
+ else if (t === 'UL' || t === 'OL') {
150
+ let i = 1;
151
+ c.querySelectorAll(':scope > li').forEach(li => lines.push((t === 'OL' ? (i++) + '. ' : '- ') + inline(li).trim()));
152
+ lines.push('');
153
+ }
154
+ else if (t === 'BLOCKQUOTE')
155
+ lines.push('> ' + inline(c).trim() + '\n');
156
+ else if (t === 'PRE')
157
+ lines.push('```\n' + c.textContent.trim() + '\n```\n');
158
+ else if (t === 'HR')
159
+ lines.push('\n---\n');
160
+ else if (['A', 'STRONG', 'B', 'EM', 'I', 'CODE', 'IMG', 'SPAN', 'LABEL'].includes(t)) {
161
+ const x = inline(c).trim();
162
+ if (x)
163
+ lines.push(x);
164
+ }
165
+ else
166
+ walk(c);
167
+ });
168
+ };
169
+ walk(root);
170
+ return lines.join('\n').replace(/\n{3,}/g, '\n\n').trim();
171
+ }, selector || null);
172
+ }
173
+ else if (selector) {
174
+ const element = await page.$(selector);
175
+ if (!element) {
176
+ (0, state_1.notifyProgress)('get_content', 'error', `Element not found: ${selector}`);
177
+ return { success: false, error: `Element not found: ${selector}` };
178
+ }
179
+ if (format === 'html') {
180
+ content = await element.evaluate(el => el.outerHTML);
181
+ }
182
+ else {
183
+ content = await element.evaluate(el => el.textContent);
184
+ }
185
+ }
186
+ else {
187
+ if (format === 'html') {
188
+ content = await page.content();
189
+ }
190
+ else {
191
+ content = await page.evaluate(() => document.body.innerText);
192
+ }
193
+ }
194
+ (0, state_1.notifyProgress)('get_content', 'completed', `Extracted ${content.length} characters`, { format, length: content.length });
195
+ return {
196
+ success: true,
197
+ content,
198
+ url: page.url(),
199
+ format
200
+ };
201
+ },
202
+ async save_content_as_markdown(params) {
203
+ const { page } = (0, state_1.requireBrowser)();
204
+ const { filename, selector, includeImages = true, includeMeta = true } = params;
205
+ (0, state_1.notifyProgress)('save_content_as_markdown', 'started', `Saving to: ${filename}`);
206
+ let markdown = '';
207
+ if (includeMeta) {
208
+ const title = await page.title();
209
+ const url = page.url();
210
+ markdown += `# ${title}\n\n`;
211
+ markdown += `> Source: ${url}\n\n`;
212
+ }
213
+ const content = selector
214
+ ? await page.$eval(selector, el => el.innerText)
215
+ : await page.evaluate(() => document.body.innerText);
216
+ markdown += content;
217
+ const outputPath = path.resolve(filename);
218
+ fs.writeFileSync(outputPath, markdown);
219
+ (0, state_1.notifyProgress)('save_content_as_markdown', 'completed', `Saved ${markdown.length} bytes to ${filename}`, { filename: outputPath, size: markdown.length });
220
+ return { success: true, filename: outputPath, size: markdown.length };
221
+ },
222
+ async extract_json(params = {}) {
223
+ const { page } = (0, state_1.requireBrowser)();
224
+ const { source = 'page', selector, jsonPath } = params;
225
+ (0, state_1.notifyProgress)('extract_json', 'started', `Extracting JSON from: ${source}`);
226
+ let jsonData = [];
227
+ if (source === 'ld+json') {
228
+ jsonData = await page.$$eval('script[type="application/ld+json"]', scripts => scripts.map(s => {
229
+ try {
230
+ return JSON.parse(s.textContent);
231
+ }
232
+ catch {
233
+ return null;
234
+ }
235
+ }).filter(Boolean));
236
+ }
237
+ else if (source === 'scripts') {
238
+ const content = await page.$$eval('script', scripts => scripts.map(s => s.textContent).join('\n'));
239
+ const jsonRegex = /\{[^{}]*\}|\[[^\[\]]*\]/g;
240
+ const matches = content.match(jsonRegex) || [];
241
+ jsonData = matches.slice(0, 20).map(m => {
242
+ try {
243
+ return JSON.parse(m);
244
+ }
245
+ catch {
246
+ return null;
247
+ }
248
+ }).filter(Boolean);
249
+ }
250
+ else if (selector) {
251
+ const text = await page.$eval(selector, el => el.textContent);
252
+ try {
253
+ jsonData = [JSON.parse(text)];
254
+ }
255
+ catch { }
256
+ }
257
+ (0, state_1.notifyProgress)('extract_json', 'completed', `Extracted ${jsonData.length} JSON objects`, { count: jsonData.length });
258
+ return { success: true, source, count: jsonData.length, data: jsonData };
259
+ },
260
+ async scrape_meta_tags(params = {}) {
261
+ const { page } = (0, state_1.requireBrowser)();
262
+ const { types = ['all'] } = params;
263
+ (0, state_1.notifyProgress)('scrape_meta_tags', 'started', 'Extracting meta tags...');
264
+ const meta = await page.evaluate(() => {
265
+ const result = { meta: {}, og: {}, twitter: {} };
266
+ document.querySelectorAll('meta').forEach(tag => {
267
+ const name = tag.getAttribute('name') || tag.getAttribute('property');
268
+ const content = tag.getAttribute('content');
269
+ if (name && content) {
270
+ if (name.startsWith('og:')) {
271
+ result.og[name.replace('og:', '')] = content;
272
+ }
273
+ else if (name.startsWith('twitter:')) {
274
+ result.twitter[name.replace('twitter:', '')] = content;
275
+ }
276
+ else {
277
+ result.meta[name] = content;
278
+ }
279
+ }
280
+ });
281
+ result.title = document.title;
282
+ result.canonical = document.querySelector('link[rel="canonical"]')?.href;
283
+ return result;
284
+ });
285
+ const tagCount = Object.keys(meta.meta).length + Object.keys(meta.og).length + Object.keys(meta.twitter).length;
286
+ (0, state_1.notifyProgress)('scrape_meta_tags', 'completed', `Extracted ${tagCount} meta tags`, { tagCount });
287
+ return { success: true, ...meta };
288
+ },
289
+ async link_harvester(params = {}) {
290
+ const { page } = (0, state_1.requireBrowser)();
291
+ const { types = ['all'], selector, includeText = true, includeHidden = true, searchIframes = false } = params;
292
+ (0, state_1.notifyProgress)('link_harvester', 'started', 'Harvesting links (enhanced mode)...');
293
+ const currentHost = new URL(page.url()).hostname;
294
+ // Enhanced link extraction
295
+ const extractLinks = async (context) => {
296
+ return await context.evaluate(({ includeText, includeHidden }) => {
297
+ const allLinks = [];
298
+ const seenUrls = new Set();
299
+ const addLink = (href, text, source, element) => {
300
+ if (!href || seenUrls.has(href))
301
+ return;
302
+ if (!href.startsWith('http') && !href.startsWith('//'))
303
+ return;
304
+ // Handle protocol-relative URLs
305
+ if (href.startsWith('//')) {
306
+ href = window.location.protocol + href;
307
+ }
308
+ seenUrls.add(href);
309
+ allLinks.push({
310
+ href,
311
+ text: includeText ? (text || '').trim().substring(0, 100) : undefined,
312
+ source,
313
+ hidden: element ? (element.offsetParent === null ||
314
+ getComputedStyle(element).display === 'none' ||
315
+ getComputedStyle(element).visibility === 'hidden') : false
316
+ });
317
+ };
318
+ // 1. Standard anchor tags
319
+ document.querySelectorAll('a[href]').forEach(a => {
320
+ addLink(a.href, a.textContent, 'anchor', a);
321
+ });
322
+ // 2. Data attributes containing URLs
323
+ const dataAttrs = ['data-href', 'data-url', 'data-link', 'data-src', 'data-file', 'data-download'];
324
+ dataAttrs.forEach(attr => {
325
+ document.querySelectorAll(`[${attr}]`).forEach(el => {
326
+ const url = el.getAttribute(attr);
327
+ addLink(url, el.textContent, `${attr}`, el);
328
+ });
329
+ });
330
+ // 3. OnClick handlers with URLs
331
+ if (includeHidden) {
332
+ document.querySelectorAll('[onclick]').forEach(el => {
333
+ const onclick = el.getAttribute('onclick');
334
+ // Look for URL patterns in onclick
335
+ const urlMatches = onclick.match(/https?:\/\/[^\s"'<>]+/gi) || [];
336
+ urlMatches.forEach(url => {
337
+ addLink(url, el.textContent, 'onclick', el);
338
+ });
339
+ // Look for location.href assignments
340
+ const hrefMatch = onclick.match(/location\.href\s*=\s*['"]([^'"]+)['"]/);
341
+ if (hrefMatch) {
342
+ addLink(hrefMatch[1], el.textContent, 'onclick-location', el);
343
+ }
344
+ // Look for window.open calls
345
+ const openMatch = onclick.match(/window\.open\s*\(\s*['"]([^'"]+)['"]/);
346
+ if (openMatch) {
347
+ addLink(openMatch[1], el.textContent, 'onclick-window-open', el);
348
+ }
349
+ });
350
+ }
351
+ // 4. JavaScript href links
352
+ document.querySelectorAll('a[href^="javascript:"]').forEach(a => {
353
+ const href = a.getAttribute('href');
354
+ const urlMatch = href.match(/https?:\/\/[^\s"'<>]+/gi);
355
+ if (urlMatch) {
356
+ urlMatch.forEach(url => addLink(url, a.textContent, 'javascript-href', a));
357
+ }
358
+ });
359
+ // 5. Hidden inputs with URLs
360
+ document.querySelectorAll('input[type="hidden"]').forEach(input => {
361
+ const value = input.value;
362
+ if (value && (value.startsWith('http') || value.startsWith('//'))) {
363
+ addLink(value, input.name || input.id, 'hidden-input', input);
364
+ }
365
+ });
366
+ // 6. Script content analysis for URLs (limited for performance)
367
+ if (includeHidden) {
368
+ const scripts = [...document.querySelectorAll('script')].slice(0, 20);
369
+ scripts.forEach(script => {
370
+ const content = script.textContent || '';
371
+ // Look for download/stream URLs
372
+ const patterns = [
373
+ /["']?(https?:\/\/[^"'\s<>]+\.(mp4|mkv|avi|m3u8|mpd|zip|rar|pdf))[^"'\s<>]*["']?/gi,
374
+ /download[_-]?url\s*[:=]\s*["']([^"']+)["']/gi,
375
+ /file\s*[:=]\s*["']([^"']+)["']/gi
376
+ ];
377
+ patterns.forEach(pattern => {
378
+ let match;
379
+ while ((match = pattern.exec(content)) !== null) {
380
+ addLink(match[1], 'script-extracted', 'script', null);
381
+ }
382
+ });
383
+ });
384
+ }
385
+ // 7. Meta refresh URLs
386
+ const metaRefresh = document.querySelector('meta[http-equiv="refresh"]');
387
+ if (metaRefresh) {
388
+ const content = metaRefresh.getAttribute('content');
389
+ const urlMatch = content?.match(/url=(.+)/i);
390
+ if (urlMatch) {
391
+ addLink(urlMatch[1].trim().replace(/['"]/g, ''), 'meta-refresh', 'meta', null);
392
+ }
393
+ }
394
+ // 8. Iframe sources
395
+ document.querySelectorAll('iframe[src]').forEach(iframe => {
396
+ addLink(iframe.src, 'iframe', 'iframe', iframe);
397
+ });
398
+ return allLinks;
399
+ }, { includeText, includeHidden }).catch(() => []);
400
+ };
401
+ let links = await extractLinks(page);
402
+ // Search iframes if enabled
403
+ if (searchIframes) {
404
+ const frames = page.frames();
405
+ for (let i = 1; i < frames.length && i < 5; i++) {
406
+ try {
407
+ const frame = frames[i];
408
+ if (frame.url() && frame.url() !== 'about:blank') {
409
+ const frameLinks = await extractLinks(frame);
410
+ frameLinks.forEach(link => link.source = `iframe:${link.source}`);
411
+ links = [...links, ...frameLinks];
412
+ }
413
+ }
414
+ catch (e) { }
415
+ }
416
+ }
417
+ // Filter by type
418
+ if (!types.includes('all')) {
419
+ links = links.filter(link => {
420
+ const isInternal = link.href.includes(currentHost);
421
+ const isMedia = /\.(jpg|jpeg|png|gif|mp4|mp3|mkv|avi|pdf|zip|rar|m3u8|mpd)/i.test(link.href);
422
+ const isDownload = /download|file|drive/i.test(link.href);
423
+ if (types.includes('internal') && isInternal)
424
+ return true;
425
+ if (types.includes('external') && !isInternal)
426
+ return true;
427
+ if (types.includes('media') && isMedia)
428
+ return true;
429
+ if (types.includes('download') && isDownload)
430
+ return true;
431
+ if (types.includes('hidden') && link.hidden)
432
+ return true;
433
+ return false;
434
+ });
435
+ }
436
+ // Remove hidden links if not requested
437
+ if (!includeHidden) {
438
+ links = links.filter(link => !link.hidden);
439
+ }
440
+ // Deduplicate
441
+ const seen = new Set();
442
+ links = links.filter(link => {
443
+ if (seen.has(link.href))
444
+ return false;
445
+ seen.add(link.href);
446
+ return true;
447
+ });
448
+ (0, state_1.notifyProgress)('link_harvester', 'completed', `Found ${links.length} links (including hidden)`, { count: links.length });
449
+ return { success: true, count: links.length, links };
450
+ }
451
+ };