brave-real-browser-mcp-server 2.17.10 → 2.17.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,257 +0,0 @@
1
- // @ts-nocheck
2
- import { getPageInstance } from '../browser-manager.js';
3
- import Tesseract from 'tesseract.js';
4
- import { sleep } from '../system-utils.js';
5
- /**
6
- * OCR Engine - Extract text from captcha images using OCR
7
- */
8
- export async function handleOCREngine(args) {
9
- const { url, selector, imageUrl, imageBuffer, language = 'eng' } = args;
10
- try {
11
- let imageSource;
12
- if (imageBuffer) {
13
- imageSource = Buffer.from(imageBuffer, 'base64');
14
- }
15
- else if (imageUrl) {
16
- imageSource = imageUrl;
17
- }
18
- else if (selector) {
19
- const page = getPageInstance();
20
- if (!page) {
21
- throw new Error('Browser not initialized. Call browser_init first.');
22
- }
23
- if (url && page.url() !== url) {
24
- await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
25
- }
26
- // Get image element and take screenshot
27
- const element = await page.$(selector);
28
- if (!element) {
29
- throw new Error(`Element not found: ${selector}`);
30
- }
31
- const screenshot = await element.screenshot({ encoding: 'base64' });
32
- imageSource = Buffer.from(screenshot, 'base64');
33
- }
34
- else {
35
- throw new Error('No image source provided');
36
- }
37
- // Perform OCR
38
- const result = await Tesseract.recognize(imageSource, language, {
39
- logger: () => { } // Suppress logs
40
- });
41
- // Clean and process text
42
- const text = result.data.text.trim();
43
- const confidence = result.data.confidence;
44
- // Extract words with their confidence
45
- const words = result.data.words.map(word => ({
46
- text: word.text,
47
- confidence: word.confidence,
48
- bbox: word.bbox
49
- }));
50
- return {
51
- content: [
52
- {
53
- type: "text",
54
- text: `OCR Results:\n- Extracted Text: ${text}\n- Confidence: ${confidence.toFixed(2)}%\n- Words Found: ${words.length}\n- Lines: ${result.data.lines.length}\n- Language: ${language}\n\nWords Detail:\n${words.map((w) => ` "${w.text}" (confidence: ${w.confidence.toFixed(2)}%)`).join('\n')}`
55
- }
56
- ]
57
- };
58
- }
59
- catch (error) {
60
- return {
61
- content: [
62
- {
63
- type: "text",
64
- text: `OCR Engine Error: ${error.message}`
65
- }
66
- ],
67
- isError: true
68
- };
69
- }
70
- }
71
- /**
72
- * Audio Captcha Solver - Handle audio captchas
73
- */
74
- export async function handleAudioCaptchaSolver(args) {
75
- const { url, audioSelector, audioUrl, downloadPath } = args;
76
- try {
77
- const page = getPageInstance();
78
- if (!page) {
79
- throw new Error('Browser not initialized. Call browser_init first.');
80
- }
81
- if (url && page.url() !== url) {
82
- await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
83
- }
84
- let audioSource = audioUrl;
85
- // If selector provided, extract audio URL
86
- if (audioSelector && !audioUrl) {
87
- audioSource = await page.evaluate((sel) => {
88
- const element = document.querySelector(sel);
89
- if (!element)
90
- return null;
91
- if (element.tagName === 'AUDIO') {
92
- return element.src || element.currentSrc;
93
- }
94
- else if (element.tagName === 'SOURCE') {
95
- return element.src;
96
- }
97
- return element.getAttribute('src') || element.getAttribute('data-src');
98
- }, audioSelector);
99
- }
100
- if (!audioSource) {
101
- throw new Error('No audio source found');
102
- }
103
- // Download audio if path provided
104
- let downloaded = false;
105
- if (downloadPath) {
106
- const response = await page.goto(audioSource);
107
- if (response) {
108
- const fs = await import('fs/promises');
109
- const buffer = await response.buffer();
110
- await fs.writeFile(downloadPath, buffer);
111
- downloaded = true;
112
- }
113
- }
114
- return {
115
- content: [
116
- {
117
- type: "text",
118
- text: `Audio Captcha Analysis:\n- Audio URL: ${audioSource}\n- Downloaded: ${downloaded ? 'Yes' : 'No'}${downloaded ? `\n- Download Path: ${downloadPath}` : ''}\n\nNote: Audio captcha solving requires external speech-to-text API (Google Speech, AWS Transcribe, etc.)`
119
- }
120
- ]
121
- };
122
- }
123
- catch (error) {
124
- return {
125
- content: [
126
- {
127
- type: "text",
128
- text: `Audio Captcha Solver Error: ${error.message}`
129
- }
130
- ],
131
- isError: true
132
- };
133
- }
134
- }
135
- /**
136
- * Puzzle Captcha Handler - Handle slider and puzzle captchas
137
- */
138
- export async function handlePuzzleCaptchaHandler(args) {
139
- const { url, puzzleSelector, sliderSelector, method = 'auto' } = args;
140
- try {
141
- const page = getPageInstance();
142
- if (!page) {
143
- throw new Error('Browser not initialized. Call browser_init first.');
144
- }
145
- if (url && page.url() !== url) {
146
- await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
147
- }
148
- const result = await page.evaluate(async (puzzleSel, sliderSel, meth) => {
149
- const puzzleElement = puzzleSel ? document.querySelector(puzzleSel) : null;
150
- const sliderElement = sliderSel ? document.querySelector(sliderSel) : null;
151
- if (!puzzleElement && !sliderElement) {
152
- throw new Error('No puzzle or slider element found');
153
- }
154
- const info = {
155
- puzzleFound: !!puzzleElement,
156
- sliderFound: !!sliderElement
157
- };
158
- // Get puzzle dimensions if exists
159
- if (puzzleElement) {
160
- const rect = puzzleElement.getBoundingClientRect();
161
- info.puzzle = {
162
- width: rect.width,
163
- height: rect.height,
164
- top: rect.top,
165
- left: rect.left,
166
- visible: rect.width > 0 && rect.height > 0
167
- };
168
- // Check for puzzle piece
169
- const puzzlePiece = puzzleElement.querySelector('.puzzle-piece, [class*="piece"], [class*="puzzle"]');
170
- if (puzzlePiece) {
171
- const pieceRect = puzzlePiece.getBoundingClientRect();
172
- info.puzzlePiece = {
173
- width: pieceRect.width,
174
- height: pieceRect.height,
175
- top: pieceRect.top,
176
- left: pieceRect.left
177
- };
178
- }
179
- }
180
- // Get slider info if exists
181
- if (sliderElement) {
182
- const rect = sliderElement.getBoundingClientRect();
183
- info.slider = {
184
- width: rect.width,
185
- height: rect.height,
186
- top: rect.top,
187
- left: rect.left,
188
- visible: rect.width > 0 && rect.height > 0,
189
- tagName: sliderElement.tagName.toLowerCase()
190
- };
191
- }
192
- return info;
193
- }, puzzleSelector || '', sliderSelector || '', method);
194
- // If auto method, attempt to solve
195
- if (method === 'auto' && sliderSelector) {
196
- try {
197
- const sliderElement = await page.$(sliderSelector);
198
- if (sliderElement) {
199
- const box = await sliderElement.boundingBox();
200
- if (box) {
201
- // Simulate drag - this is a basic implementation
202
- // Real puzzle solving would need image analysis
203
- await page.mouse.move(box.x + box.width / 2, box.y + box.height / 2);
204
- await page.mouse.down();
205
- // Move in small increments
206
- const targetDistance = result.puzzle?.width || 300;
207
- const steps = 20;
208
- const stepSize = targetDistance / steps;
209
- for (let i = 0; i < steps; i++) {
210
- await page.mouse.move(box.x + box.width / 2 + (stepSize * i), box.y + box.height / 2, { steps: 5 });
211
- await sleep(50 + Math.random() * 50); // Random delay for human-like behavior
212
- }
213
- await page.mouse.up();
214
- result.attemptedSolve = true;
215
- result.method = 'automated_drag';
216
- }
217
- }
218
- }
219
- catch (solveError) {
220
- result.solveError = solveError.message;
221
- }
222
- }
223
- let summary = `Puzzle Captcha Analysis:\n- Puzzle Found: ${result.puzzleFound ? 'Yes' : 'No'}\n- Slider Found: ${result.sliderFound ? 'Yes' : 'No'}`;
224
- if (result.puzzle) {
225
- summary += `\n\nPuzzle Details:\n- Dimensions: ${result.puzzle.width}x${result.puzzle.height}\n- Position: (${result.puzzle.left}, ${result.puzzle.top})\n- Visible: ${result.puzzle.visible ? 'Yes' : 'No'}`;
226
- }
227
- if (result.puzzlePiece) {
228
- summary += `\n\nPuzzle Piece:\n- Dimensions: ${result.puzzlePiece.width}x${result.puzzlePiece.height}\n- Position: (${result.puzzlePiece.left}, ${result.puzzlePiece.top})`;
229
- }
230
- if (result.slider) {
231
- summary += `\n\nSlider Details:\n- Dimensions: ${result.slider.width}x${result.slider.height}\n- Position: (${result.slider.left}, ${result.slider.top})\n- Visible: ${result.slider.visible ? 'Yes' : 'No'}\n- Tag: ${result.slider.tagName}`;
232
- }
233
- if (result.attemptedSolve) {
234
- summary += `\n\nSolve Attempt:\n- Method: ${result.method}\n- Status: ${result.solveError ? 'Failed' : 'Completed'}${result.solveError ? `\n- Error: ${result.solveError}` : ''}`;
235
- }
236
- summary += `\n\nNote: Advanced puzzle solving requires computer vision libraries (OpenCV, TensorFlow)`;
237
- return {
238
- content: [
239
- {
240
- type: "text",
241
- text: summary
242
- }
243
- ]
244
- };
245
- }
246
- catch (error) {
247
- return {
248
- content: [
249
- {
250
- type: "text",
251
- text: `Puzzle Captcha Handler Error: ${error.message}`
252
- }
253
- ],
254
- isError: true
255
- };
256
- }
257
- }
@@ -1,82 +0,0 @@
1
- // @ts-nocheck
2
- import Ajv from 'ajv/dist/2020.js';
3
- const ajv = new Ajv();
4
- /**
5
- * Data Deduplication - Remove duplicate entries from scraped data
6
- */
7
- /**
8
- * Missing Data Handler - Detect and handle missing data
9
- */
10
- /**
11
- * Data Type Validator - Validate data types against schema
12
- */
13
- export async function handleDataTypeValidator(args) {
14
- const { data, schema } = args;
15
- try {
16
- if (!schema) {
17
- throw new Error('Schema is required');
18
- }
19
- const validate = ajv.compile(schema);
20
- const validItems = [];
21
- const invalidItems = [];
22
- if (Array.isArray(data)) {
23
- data.forEach((item, index) => {
24
- const valid = validate(item);
25
- if (valid) {
26
- validItems.push(item);
27
- }
28
- else {
29
- invalidItems.push({
30
- item,
31
- index,
32
- errors: validate.errors
33
- });
34
- }
35
- });
36
- }
37
- else {
38
- const valid = validate(data);
39
- if (valid) {
40
- validItems.push(data);
41
- }
42
- else {
43
- invalidItems.push({
44
- item: data,
45
- errors: validate.errors
46
- });
47
- }
48
- }
49
- const total = Array.isArray(data) ? data.length : 1;
50
- const validationRate = ((validItems.length / total) * 100).toFixed(2);
51
- let summary = `Data Type Validation Results:\n\nStatistics:\n- Total Items: ${total}\n- Valid Items: ${validItems.length}\n- Invalid Items: ${invalidItems.length}\n- Validation Rate: ${validationRate}%`;
52
- if (invalidItems.length > 0) {
53
- summary += `\n\nInvalid Items (Top 5):\n${invalidItems.slice(0, 5).map((inv, i) => {
54
- const errorMsgs = inv.errors?.map((e) => `${e.instancePath || 'root'}: ${e.message}`).join(', ') || 'Unknown error';
55
- return `${i + 1}. Index ${inv.index || 'N/A'}:\n Errors: ${errorMsgs}`;
56
- }).join('\n')}`;
57
- }
58
- summary += `\n\nSchema: ${JSON.stringify(schema, null, 2).substring(0, 200)}${JSON.stringify(schema).length > 200 ? '...' : ''}`;
59
- return {
60
- content: [
61
- {
62
- type: "text",
63
- text: summary
64
- }
65
- ]
66
- };
67
- }
68
- catch (error) {
69
- return {
70
- content: [
71
- {
72
- type: "text",
73
- text: `Data Type Validator Error: ${error.message}`
74
- }
75
- ],
76
- isError: true
77
- };
78
- }
79
- }
80
- /**
81
- * Consistency Checker - Check data consistency across fields
82
- */
@@ -1,264 +0,0 @@
1
- import { getPageInstance } from '../browser-manager.js';
2
- /**
3
- * Keyword Search - Advanced keyword search in page content
4
- */
5
- export async function handleKeywordSearch(args) {
6
- const { url, keywords, caseSensitive = false, wholeWord = false, context = 50 } = args;
7
- try {
8
- const page = getPageInstance();
9
- if (!page) {
10
- throw new Error('Browser not initialized. Call browser_init first.');
11
- }
12
- if (url && page.url() !== url) {
13
- await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
14
- }
15
- const results = await page.evaluate((kws, caseSens, whole, ctx) => {
16
- const allMatches = [];
17
- const keywordList = Array.isArray(kws) ? kws : [kws];
18
- keywordList.forEach(keyword => {
19
- const flags = caseSens ? 'g' : 'gi';
20
- const pattern = whole ? `\\b${keyword}\\b` : keyword;
21
- //const regex = new RegExp(pattern, flags);
22
- const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT, null);
23
- let node;
24
- while (node = walker.nextNode()) {
25
- const text = node.textContent || '';
26
- let match;
27
- const nodeRegex = new RegExp(pattern, flags);
28
- while ((match = nodeRegex.exec(text)) !== null) {
29
- const start = Math.max(0, match.index - ctx);
30
- const end = Math.min(text.length, match.index + match[0].length + ctx);
31
- const contextText = text.substring(start, end);
32
- // Get element info
33
- const element = node.parentElement;
34
- const tagName = element?.tagName.toLowerCase() || 'text';
35
- const className = element?.className || '';
36
- const id = element?.id || '';
37
- allMatches.push({
38
- keyword,
39
- match: match[0],
40
- position: match.index,
41
- context: contextText,
42
- element: {
43
- tag: tagName,
44
- class: className,
45
- id: id
46
- }
47
- });
48
- }
49
- }
50
- });
51
- // Group by keyword
52
- const grouped = {};
53
- allMatches.forEach(m => {
54
- if (!grouped[m.keyword])
55
- grouped[m.keyword] = [];
56
- grouped[m.keyword].push(m);
57
- });
58
- return {
59
- totalMatches: allMatches.length,
60
- matchesByKeyword: grouped,
61
- allMatches: allMatches.slice(0, 100) // Limit to first 100
62
- };
63
- }, Array.isArray(keywords) ? keywords : [keywords], caseSensitive, wholeWord, context);
64
- const resultText = `✅ Keyword Search Results\n\nTotal Matches: ${results.totalMatches}\n\nKeywords searched: ${Array.isArray(keywords) ? keywords.join(', ') : keywords}\n\nMatches by keyword:\n${JSON.stringify(results.matchesByKeyword, null, 2)}\n\nFirst 100 matches:\n${JSON.stringify(results.allMatches, null, 2)}`;
65
- return {
66
- content: [
67
- {
68
- type: 'text',
69
- text: resultText,
70
- },
71
- ],
72
- };
73
- }
74
- catch (error) {
75
- return {
76
- content: [{ type: 'text', text: `❌ Keyword search failed: ${error.message}` }],
77
- isError: true,
78
- };
79
- }
80
- }
81
- /**
82
- * Regex Pattern Matcher - Search using regular expressions
83
- */
84
- export async function handleRegexPatternMatcher(args) {
85
- const { url, pattern, flags = 'g', selector } = args;
86
- try {
87
- const page = getPageInstance();
88
- if (!page) {
89
- throw new Error('Browser not initialized. Call browser_init first.');
90
- }
91
- if (url && page.url() !== url) {
92
- await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
93
- }
94
- const results = await page.evaluate((pat, flgs, sel) => {
95
- let content;
96
- if (sel) {
97
- const element = document.querySelector(sel);
98
- content = element ? element.textContent || '' : '';
99
- }
100
- else {
101
- content = document.body.innerText;
102
- }
103
- const regex = new RegExp(pat, flgs);
104
- const matches = [];
105
- let match;
106
- // Safety check for infinite loop
107
- let count = 0;
108
- while ((match = regex.exec(content)) !== null && count < 1000) {
109
- count++;
110
- matches.push({
111
- match: match[0],
112
- index: match.index,
113
- groups: match.slice(1),
114
- context: content.substring(Math.max(0, match.index - 50), Math.min(content.length, match.index + match[0].length + 50))
115
- });
116
- if (match.index === regex.lastIndex) {
117
- regex.lastIndex++;
118
- }
119
- }
120
- return {
121
- totalMatches: matches.length,
122
- matches: matches.slice(0, 100),
123
- pattern: pat,
124
- flags: flgs
125
- };
126
- }, pattern, flags, selector || '');
127
- const resultText = `✅ Regex Pattern Matcher Results\n\nPattern: ${results.pattern}\nFlags: ${results.flags}\nTotal Matches: ${results.totalMatches}\n\nMatches (first 100):\n${JSON.stringify(results.matches, null, 2)}`;
128
- return {
129
- content: [{ type: 'text', text: resultText }],
130
- };
131
- }
132
- catch (error) {
133
- return { content: [{ type: 'text', text: `❌ Regex pattern matcher failed: ${error.message}` }], isError: true };
134
- }
135
- }
136
- /**
137
- * XPath Support - Query elements using XPath
138
- */
139
- export async function handleXPathSupport(args) {
140
- const { url, xpath, returnType = 'elements' } = args;
141
- try {
142
- const page = getPageInstance();
143
- if (!page) {
144
- throw new Error('Browser not initialized. Call browser_init first.');
145
- }
146
- if (url && page.url() !== url) {
147
- await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
148
- }
149
- const results = await page.evaluate((xp, type) => {
150
- const xpathResult = document.evaluate(xp, document, null, XPathResult.ANY_TYPE, null);
151
- const elements = [];
152
- let node = xpathResult.iterateNext();
153
- while (node) {
154
- if (node.nodeType === Node.ELEMENT_NODE) {
155
- const element = node;
156
- elements.push({
157
- tagName: element.tagName.toLowerCase(),
158
- id: element.id,
159
- className: element.className,
160
- text: element.textContent?.substring(0, 200),
161
- attributes: Array.from(element.attributes).reduce((acc, attr) => {
162
- acc[attr.name] = attr.value;
163
- return acc;
164
- }, {}),
165
- innerHTML: type === 'html' ? element.innerHTML.substring(0, 500) : undefined
166
- });
167
- }
168
- else if (node.nodeType === Node.TEXT_NODE) {
169
- elements.push({
170
- type: 'text',
171
- content: node.textContent?.trim()
172
- });
173
- }
174
- else if (node.nodeType === Node.ATTRIBUTE_NODE) {
175
- const attr = node;
176
- elements.push({
177
- type: 'attribute',
178
- name: attr.name,
179
- value: attr.value
180
- });
181
- }
182
- node = xpathResult.iterateNext();
183
- }
184
- return {
185
- count: elements.length,
186
- elements
187
- };
188
- }, xpath, returnType);
189
- const resultText = `✅ XPath Query Results\n\nXPath: ${xpath}\nElements Found: ${results.count}\n\nElements:\n${JSON.stringify(results.elements, null, 2)}`;
190
- return {
191
- content: [{ type: 'text', text: resultText }],
192
- };
193
- }
194
- catch (error) {
195
- return { content: [{ type: 'text', text: `❌ XPath query failed: ${error.message}` }], isError: true };
196
- }
197
- }
198
- /**
199
- * Advanced CSS Selectors - Support for complex CSS selectors
200
- */
201
- export async function handleAdvancedCSSSelectors(args) {
202
- const { url, selector, operation = 'query', returnType = 'elements' } = args;
203
- try {
204
- const page = getPageInstance();
205
- if (!page) {
206
- throw new Error('Browser not initialized. Call browser_init first.');
207
- }
208
- if (url && page.url() !== url) {
209
- await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
210
- }
211
- const results = await page.evaluate((sel, op, type) => {
212
- let elements;
213
- switch (op) {
214
- case 'query':
215
- elements = Array.from(document.querySelectorAll(sel));
216
- break;
217
- case 'closest':
218
- const firstEl = document.querySelector(sel);
219
- elements = firstEl ? [firstEl.closest(sel)].filter(Boolean) : [];
220
- break;
221
- case 'matches':
222
- elements = Array.from(document.querySelectorAll('*')).filter(el => el.matches(sel));
223
- break;
224
- default:
225
- elements = Array.from(document.querySelectorAll(sel));
226
- }
227
- const results = elements.map(element => {
228
- const computed = window.getComputedStyle(element);
229
- return {
230
- tagName: element.tagName.toLowerCase(),
231
- id: element.id,
232
- className: element.className,
233
- text: element.textContent?.substring(0, 200),
234
- attributes: Array.from(element.attributes).reduce((acc, attr) => {
235
- acc[attr.name] = attr.value;
236
- return acc;
237
- }, {}),
238
- computedStyles: type === 'styles' ? {
239
- display: computed.display,
240
- visibility: computed.visibility,
241
- position: computed.position,
242
- width: computed.width,
243
- height: computed.height,
244
- color: computed.color,
245
- backgroundColor: computed.backgroundColor
246
- } : undefined,
247
- innerHTML: type === 'html' ? element.innerHTML.substring(0, 500) : undefined,
248
- boundingRect: element.getBoundingClientRect()
249
- };
250
- });
251
- return {
252
- count: results.length,
253
- elements: results
254
- };
255
- }, selector, operation, returnType);
256
- const resultText = `✅ Advanced CSS Selector Results\n\nSelector: ${selector}\nOperation: ${operation}\nElements Found: ${results.count}\n\nElements (first 10):\n${JSON.stringify(results.elements.slice(0, 10), null, 2)}`;
257
- return {
258
- content: [{ type: 'text', text: resultText }],
259
- };
260
- }
261
- catch (error) {
262
- return { content: [{ type: 'text', text: `❌ CSS selector query failed: ${error.message}` }], isError: true };
263
- }
264
- }