brave-real-browser-mcp-server 2.17.10 → 2.17.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser-manager.js +0 -8
- package/dist/debug-logger.js +28 -0
- package/dist/handlers/advanced-extraction-handlers.js +0 -80
- package/dist/handlers/deep-analysis-handler.js +119 -0
- package/dist/handlers/multi-element-handlers.js +0 -60
- package/dist/handlers/smart-data-extractors.js +0 -475
- package/dist/handlers/unified-captcha-handler.js +137 -0
- package/dist/handlers/unified-search-handler.js +137 -0
- package/dist/index.js +87 -63
- package/dist/tool-definitions.js +58 -186
- package/dist/workflows/forensic-media-extractor.js +5 -15
- package/dist/workflows/media-extraction-workflow.js +3 -8
- package/package.json +1 -1
- package/dist/handlers/advanced-video-media-handlers.js +0 -139
- package/dist/handlers/captcha-handlers.js +0 -257
- package/dist/handlers/data-quality-handlers.js +0 -82
- package/dist/handlers/search-filter-handlers.js +0 -264
|
@@ -1,257 +0,0 @@
|
|
|
1
|
-
// @ts-nocheck
|
|
2
|
-
import { getPageInstance } from '../browser-manager.js';
|
|
3
|
-
import Tesseract from 'tesseract.js';
|
|
4
|
-
import { sleep } from '../system-utils.js';
|
|
5
|
-
/**
|
|
6
|
-
* OCR Engine - Extract text from captcha images using OCR
|
|
7
|
-
*/
|
|
8
|
-
export async function handleOCREngine(args) {
|
|
9
|
-
const { url, selector, imageUrl, imageBuffer, language = 'eng' } = args;
|
|
10
|
-
try {
|
|
11
|
-
let imageSource;
|
|
12
|
-
if (imageBuffer) {
|
|
13
|
-
imageSource = Buffer.from(imageBuffer, 'base64');
|
|
14
|
-
}
|
|
15
|
-
else if (imageUrl) {
|
|
16
|
-
imageSource = imageUrl;
|
|
17
|
-
}
|
|
18
|
-
else if (selector) {
|
|
19
|
-
const page = getPageInstance();
|
|
20
|
-
if (!page) {
|
|
21
|
-
throw new Error('Browser not initialized. Call browser_init first.');
|
|
22
|
-
}
|
|
23
|
-
if (url && page.url() !== url) {
|
|
24
|
-
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
25
|
-
}
|
|
26
|
-
// Get image element and take screenshot
|
|
27
|
-
const element = await page.$(selector);
|
|
28
|
-
if (!element) {
|
|
29
|
-
throw new Error(`Element not found: ${selector}`);
|
|
30
|
-
}
|
|
31
|
-
const screenshot = await element.screenshot({ encoding: 'base64' });
|
|
32
|
-
imageSource = Buffer.from(screenshot, 'base64');
|
|
33
|
-
}
|
|
34
|
-
else {
|
|
35
|
-
throw new Error('No image source provided');
|
|
36
|
-
}
|
|
37
|
-
// Perform OCR
|
|
38
|
-
const result = await Tesseract.recognize(imageSource, language, {
|
|
39
|
-
logger: () => { } // Suppress logs
|
|
40
|
-
});
|
|
41
|
-
// Clean and process text
|
|
42
|
-
const text = result.data.text.trim();
|
|
43
|
-
const confidence = result.data.confidence;
|
|
44
|
-
// Extract words with their confidence
|
|
45
|
-
const words = result.data.words.map(word => ({
|
|
46
|
-
text: word.text,
|
|
47
|
-
confidence: word.confidence,
|
|
48
|
-
bbox: word.bbox
|
|
49
|
-
}));
|
|
50
|
-
return {
|
|
51
|
-
content: [
|
|
52
|
-
{
|
|
53
|
-
type: "text",
|
|
54
|
-
text: `OCR Results:\n- Extracted Text: ${text}\n- Confidence: ${confidence.toFixed(2)}%\n- Words Found: ${words.length}\n- Lines: ${result.data.lines.length}\n- Language: ${language}\n\nWords Detail:\n${words.map((w) => ` "${w.text}" (confidence: ${w.confidence.toFixed(2)}%)`).join('\n')}`
|
|
55
|
-
}
|
|
56
|
-
]
|
|
57
|
-
};
|
|
58
|
-
}
|
|
59
|
-
catch (error) {
|
|
60
|
-
return {
|
|
61
|
-
content: [
|
|
62
|
-
{
|
|
63
|
-
type: "text",
|
|
64
|
-
text: `OCR Engine Error: ${error.message}`
|
|
65
|
-
}
|
|
66
|
-
],
|
|
67
|
-
isError: true
|
|
68
|
-
};
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
/**
|
|
72
|
-
* Audio Captcha Solver - Handle audio captchas
|
|
73
|
-
*/
|
|
74
|
-
export async function handleAudioCaptchaSolver(args) {
|
|
75
|
-
const { url, audioSelector, audioUrl, downloadPath } = args;
|
|
76
|
-
try {
|
|
77
|
-
const page = getPageInstance();
|
|
78
|
-
if (!page) {
|
|
79
|
-
throw new Error('Browser not initialized. Call browser_init first.');
|
|
80
|
-
}
|
|
81
|
-
if (url && page.url() !== url) {
|
|
82
|
-
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
83
|
-
}
|
|
84
|
-
let audioSource = audioUrl;
|
|
85
|
-
// If selector provided, extract audio URL
|
|
86
|
-
if (audioSelector && !audioUrl) {
|
|
87
|
-
audioSource = await page.evaluate((sel) => {
|
|
88
|
-
const element = document.querySelector(sel);
|
|
89
|
-
if (!element)
|
|
90
|
-
return null;
|
|
91
|
-
if (element.tagName === 'AUDIO') {
|
|
92
|
-
return element.src || element.currentSrc;
|
|
93
|
-
}
|
|
94
|
-
else if (element.tagName === 'SOURCE') {
|
|
95
|
-
return element.src;
|
|
96
|
-
}
|
|
97
|
-
return element.getAttribute('src') || element.getAttribute('data-src');
|
|
98
|
-
}, audioSelector);
|
|
99
|
-
}
|
|
100
|
-
if (!audioSource) {
|
|
101
|
-
throw new Error('No audio source found');
|
|
102
|
-
}
|
|
103
|
-
// Download audio if path provided
|
|
104
|
-
let downloaded = false;
|
|
105
|
-
if (downloadPath) {
|
|
106
|
-
const response = await page.goto(audioSource);
|
|
107
|
-
if (response) {
|
|
108
|
-
const fs = await import('fs/promises');
|
|
109
|
-
const buffer = await response.buffer();
|
|
110
|
-
await fs.writeFile(downloadPath, buffer);
|
|
111
|
-
downloaded = true;
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
return {
|
|
115
|
-
content: [
|
|
116
|
-
{
|
|
117
|
-
type: "text",
|
|
118
|
-
text: `Audio Captcha Analysis:\n- Audio URL: ${audioSource}\n- Downloaded: ${downloaded ? 'Yes' : 'No'}${downloaded ? `\n- Download Path: ${downloadPath}` : ''}\n\nNote: Audio captcha solving requires external speech-to-text API (Google Speech, AWS Transcribe, etc.)`
|
|
119
|
-
}
|
|
120
|
-
]
|
|
121
|
-
};
|
|
122
|
-
}
|
|
123
|
-
catch (error) {
|
|
124
|
-
return {
|
|
125
|
-
content: [
|
|
126
|
-
{
|
|
127
|
-
type: "text",
|
|
128
|
-
text: `Audio Captcha Solver Error: ${error.message}`
|
|
129
|
-
}
|
|
130
|
-
],
|
|
131
|
-
isError: true
|
|
132
|
-
};
|
|
133
|
-
}
|
|
134
|
-
}
|
|
135
|
-
/**
|
|
136
|
-
* Puzzle Captcha Handler - Handle slider and puzzle captchas
|
|
137
|
-
*/
|
|
138
|
-
export async function handlePuzzleCaptchaHandler(args) {
|
|
139
|
-
const { url, puzzleSelector, sliderSelector, method = 'auto' } = args;
|
|
140
|
-
try {
|
|
141
|
-
const page = getPageInstance();
|
|
142
|
-
if (!page) {
|
|
143
|
-
throw new Error('Browser not initialized. Call browser_init first.');
|
|
144
|
-
}
|
|
145
|
-
if (url && page.url() !== url) {
|
|
146
|
-
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
147
|
-
}
|
|
148
|
-
const result = await page.evaluate(async (puzzleSel, sliderSel, meth) => {
|
|
149
|
-
const puzzleElement = puzzleSel ? document.querySelector(puzzleSel) : null;
|
|
150
|
-
const sliderElement = sliderSel ? document.querySelector(sliderSel) : null;
|
|
151
|
-
if (!puzzleElement && !sliderElement) {
|
|
152
|
-
throw new Error('No puzzle or slider element found');
|
|
153
|
-
}
|
|
154
|
-
const info = {
|
|
155
|
-
puzzleFound: !!puzzleElement,
|
|
156
|
-
sliderFound: !!sliderElement
|
|
157
|
-
};
|
|
158
|
-
// Get puzzle dimensions if exists
|
|
159
|
-
if (puzzleElement) {
|
|
160
|
-
const rect = puzzleElement.getBoundingClientRect();
|
|
161
|
-
info.puzzle = {
|
|
162
|
-
width: rect.width,
|
|
163
|
-
height: rect.height,
|
|
164
|
-
top: rect.top,
|
|
165
|
-
left: rect.left,
|
|
166
|
-
visible: rect.width > 0 && rect.height > 0
|
|
167
|
-
};
|
|
168
|
-
// Check for puzzle piece
|
|
169
|
-
const puzzlePiece = puzzleElement.querySelector('.puzzle-piece, [class*="piece"], [class*="puzzle"]');
|
|
170
|
-
if (puzzlePiece) {
|
|
171
|
-
const pieceRect = puzzlePiece.getBoundingClientRect();
|
|
172
|
-
info.puzzlePiece = {
|
|
173
|
-
width: pieceRect.width,
|
|
174
|
-
height: pieceRect.height,
|
|
175
|
-
top: pieceRect.top,
|
|
176
|
-
left: pieceRect.left
|
|
177
|
-
};
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
// Get slider info if exists
|
|
181
|
-
if (sliderElement) {
|
|
182
|
-
const rect = sliderElement.getBoundingClientRect();
|
|
183
|
-
info.slider = {
|
|
184
|
-
width: rect.width,
|
|
185
|
-
height: rect.height,
|
|
186
|
-
top: rect.top,
|
|
187
|
-
left: rect.left,
|
|
188
|
-
visible: rect.width > 0 && rect.height > 0,
|
|
189
|
-
tagName: sliderElement.tagName.toLowerCase()
|
|
190
|
-
};
|
|
191
|
-
}
|
|
192
|
-
return info;
|
|
193
|
-
}, puzzleSelector || '', sliderSelector || '', method);
|
|
194
|
-
// If auto method, attempt to solve
|
|
195
|
-
if (method === 'auto' && sliderSelector) {
|
|
196
|
-
try {
|
|
197
|
-
const sliderElement = await page.$(sliderSelector);
|
|
198
|
-
if (sliderElement) {
|
|
199
|
-
const box = await sliderElement.boundingBox();
|
|
200
|
-
if (box) {
|
|
201
|
-
// Simulate drag - this is a basic implementation
|
|
202
|
-
// Real puzzle solving would need image analysis
|
|
203
|
-
await page.mouse.move(box.x + box.width / 2, box.y + box.height / 2);
|
|
204
|
-
await page.mouse.down();
|
|
205
|
-
// Move in small increments
|
|
206
|
-
const targetDistance = result.puzzle?.width || 300;
|
|
207
|
-
const steps = 20;
|
|
208
|
-
const stepSize = targetDistance / steps;
|
|
209
|
-
for (let i = 0; i < steps; i++) {
|
|
210
|
-
await page.mouse.move(box.x + box.width / 2 + (stepSize * i), box.y + box.height / 2, { steps: 5 });
|
|
211
|
-
await sleep(50 + Math.random() * 50); // Random delay for human-like behavior
|
|
212
|
-
}
|
|
213
|
-
await page.mouse.up();
|
|
214
|
-
result.attemptedSolve = true;
|
|
215
|
-
result.method = 'automated_drag';
|
|
216
|
-
}
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
catch (solveError) {
|
|
220
|
-
result.solveError = solveError.message;
|
|
221
|
-
}
|
|
222
|
-
}
|
|
223
|
-
let summary = `Puzzle Captcha Analysis:\n- Puzzle Found: ${result.puzzleFound ? 'Yes' : 'No'}\n- Slider Found: ${result.sliderFound ? 'Yes' : 'No'}`;
|
|
224
|
-
if (result.puzzle) {
|
|
225
|
-
summary += `\n\nPuzzle Details:\n- Dimensions: ${result.puzzle.width}x${result.puzzle.height}\n- Position: (${result.puzzle.left}, ${result.puzzle.top})\n- Visible: ${result.puzzle.visible ? 'Yes' : 'No'}`;
|
|
226
|
-
}
|
|
227
|
-
if (result.puzzlePiece) {
|
|
228
|
-
summary += `\n\nPuzzle Piece:\n- Dimensions: ${result.puzzlePiece.width}x${result.puzzlePiece.height}\n- Position: (${result.puzzlePiece.left}, ${result.puzzlePiece.top})`;
|
|
229
|
-
}
|
|
230
|
-
if (result.slider) {
|
|
231
|
-
summary += `\n\nSlider Details:\n- Dimensions: ${result.slider.width}x${result.slider.height}\n- Position: (${result.slider.left}, ${result.slider.top})\n- Visible: ${result.slider.visible ? 'Yes' : 'No'}\n- Tag: ${result.slider.tagName}`;
|
|
232
|
-
}
|
|
233
|
-
if (result.attemptedSolve) {
|
|
234
|
-
summary += `\n\nSolve Attempt:\n- Method: ${result.method}\n- Status: ${result.solveError ? 'Failed' : 'Completed'}${result.solveError ? `\n- Error: ${result.solveError}` : ''}`;
|
|
235
|
-
}
|
|
236
|
-
summary += `\n\nNote: Advanced puzzle solving requires computer vision libraries (OpenCV, TensorFlow)`;
|
|
237
|
-
return {
|
|
238
|
-
content: [
|
|
239
|
-
{
|
|
240
|
-
type: "text",
|
|
241
|
-
text: summary
|
|
242
|
-
}
|
|
243
|
-
]
|
|
244
|
-
};
|
|
245
|
-
}
|
|
246
|
-
catch (error) {
|
|
247
|
-
return {
|
|
248
|
-
content: [
|
|
249
|
-
{
|
|
250
|
-
type: "text",
|
|
251
|
-
text: `Puzzle Captcha Handler Error: ${error.message}`
|
|
252
|
-
}
|
|
253
|
-
],
|
|
254
|
-
isError: true
|
|
255
|
-
};
|
|
256
|
-
}
|
|
257
|
-
}
|
|
@@ -1,82 +0,0 @@
|
|
|
1
|
-
// @ts-nocheck
|
|
2
|
-
import Ajv from 'ajv/dist/2020.js';
|
|
3
|
-
const ajv = new Ajv();
|
|
4
|
-
/**
|
|
5
|
-
* Data Deduplication - Remove duplicate entries from scraped data
|
|
6
|
-
*/
|
|
7
|
-
/**
|
|
8
|
-
* Missing Data Handler - Detect and handle missing data
|
|
9
|
-
*/
|
|
10
|
-
/**
|
|
11
|
-
* Data Type Validator - Validate data types against schema
|
|
12
|
-
*/
|
|
13
|
-
export async function handleDataTypeValidator(args) {
|
|
14
|
-
const { data, schema } = args;
|
|
15
|
-
try {
|
|
16
|
-
if (!schema) {
|
|
17
|
-
throw new Error('Schema is required');
|
|
18
|
-
}
|
|
19
|
-
const validate = ajv.compile(schema);
|
|
20
|
-
const validItems = [];
|
|
21
|
-
const invalidItems = [];
|
|
22
|
-
if (Array.isArray(data)) {
|
|
23
|
-
data.forEach((item, index) => {
|
|
24
|
-
const valid = validate(item);
|
|
25
|
-
if (valid) {
|
|
26
|
-
validItems.push(item);
|
|
27
|
-
}
|
|
28
|
-
else {
|
|
29
|
-
invalidItems.push({
|
|
30
|
-
item,
|
|
31
|
-
index,
|
|
32
|
-
errors: validate.errors
|
|
33
|
-
});
|
|
34
|
-
}
|
|
35
|
-
});
|
|
36
|
-
}
|
|
37
|
-
else {
|
|
38
|
-
const valid = validate(data);
|
|
39
|
-
if (valid) {
|
|
40
|
-
validItems.push(data);
|
|
41
|
-
}
|
|
42
|
-
else {
|
|
43
|
-
invalidItems.push({
|
|
44
|
-
item: data,
|
|
45
|
-
errors: validate.errors
|
|
46
|
-
});
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
const total = Array.isArray(data) ? data.length : 1;
|
|
50
|
-
const validationRate = ((validItems.length / total) * 100).toFixed(2);
|
|
51
|
-
let summary = `Data Type Validation Results:\n\nStatistics:\n- Total Items: ${total}\n- Valid Items: ${validItems.length}\n- Invalid Items: ${invalidItems.length}\n- Validation Rate: ${validationRate}%`;
|
|
52
|
-
if (invalidItems.length > 0) {
|
|
53
|
-
summary += `\n\nInvalid Items (Top 5):\n${invalidItems.slice(0, 5).map((inv, i) => {
|
|
54
|
-
const errorMsgs = inv.errors?.map((e) => `${e.instancePath || 'root'}: ${e.message}`).join(', ') || 'Unknown error';
|
|
55
|
-
return `${i + 1}. Index ${inv.index || 'N/A'}:\n Errors: ${errorMsgs}`;
|
|
56
|
-
}).join('\n')}`;
|
|
57
|
-
}
|
|
58
|
-
summary += `\n\nSchema: ${JSON.stringify(schema, null, 2).substring(0, 200)}${JSON.stringify(schema).length > 200 ? '...' : ''}`;
|
|
59
|
-
return {
|
|
60
|
-
content: [
|
|
61
|
-
{
|
|
62
|
-
type: "text",
|
|
63
|
-
text: summary
|
|
64
|
-
}
|
|
65
|
-
]
|
|
66
|
-
};
|
|
67
|
-
}
|
|
68
|
-
catch (error) {
|
|
69
|
-
return {
|
|
70
|
-
content: [
|
|
71
|
-
{
|
|
72
|
-
type: "text",
|
|
73
|
-
text: `Data Type Validator Error: ${error.message}`
|
|
74
|
-
}
|
|
75
|
-
],
|
|
76
|
-
isError: true
|
|
77
|
-
};
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
/**
|
|
81
|
-
* Consistency Checker - Check data consistency across fields
|
|
82
|
-
*/
|
|
@@ -1,264 +0,0 @@
|
|
|
1
|
-
import { getPageInstance } from '../browser-manager.js';
|
|
2
|
-
/**
|
|
3
|
-
* Keyword Search - Advanced keyword search in page content
|
|
4
|
-
*/
|
|
5
|
-
export async function handleKeywordSearch(args) {
|
|
6
|
-
const { url, keywords, caseSensitive = false, wholeWord = false, context = 50 } = args;
|
|
7
|
-
try {
|
|
8
|
-
const page = getPageInstance();
|
|
9
|
-
if (!page) {
|
|
10
|
-
throw new Error('Browser not initialized. Call browser_init first.');
|
|
11
|
-
}
|
|
12
|
-
if (url && page.url() !== url) {
|
|
13
|
-
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
14
|
-
}
|
|
15
|
-
const results = await page.evaluate((kws, caseSens, whole, ctx) => {
|
|
16
|
-
const allMatches = [];
|
|
17
|
-
const keywordList = Array.isArray(kws) ? kws : [kws];
|
|
18
|
-
keywordList.forEach(keyword => {
|
|
19
|
-
const flags = caseSens ? 'g' : 'gi';
|
|
20
|
-
const pattern = whole ? `\\b${keyword}\\b` : keyword;
|
|
21
|
-
//const regex = new RegExp(pattern, flags);
|
|
22
|
-
const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT, null);
|
|
23
|
-
let node;
|
|
24
|
-
while (node = walker.nextNode()) {
|
|
25
|
-
const text = node.textContent || '';
|
|
26
|
-
let match;
|
|
27
|
-
const nodeRegex = new RegExp(pattern, flags);
|
|
28
|
-
while ((match = nodeRegex.exec(text)) !== null) {
|
|
29
|
-
const start = Math.max(0, match.index - ctx);
|
|
30
|
-
const end = Math.min(text.length, match.index + match[0].length + ctx);
|
|
31
|
-
const contextText = text.substring(start, end);
|
|
32
|
-
// Get element info
|
|
33
|
-
const element = node.parentElement;
|
|
34
|
-
const tagName = element?.tagName.toLowerCase() || 'text';
|
|
35
|
-
const className = element?.className || '';
|
|
36
|
-
const id = element?.id || '';
|
|
37
|
-
allMatches.push({
|
|
38
|
-
keyword,
|
|
39
|
-
match: match[0],
|
|
40
|
-
position: match.index,
|
|
41
|
-
context: contextText,
|
|
42
|
-
element: {
|
|
43
|
-
tag: tagName,
|
|
44
|
-
class: className,
|
|
45
|
-
id: id
|
|
46
|
-
}
|
|
47
|
-
});
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
});
|
|
51
|
-
// Group by keyword
|
|
52
|
-
const grouped = {};
|
|
53
|
-
allMatches.forEach(m => {
|
|
54
|
-
if (!grouped[m.keyword])
|
|
55
|
-
grouped[m.keyword] = [];
|
|
56
|
-
grouped[m.keyword].push(m);
|
|
57
|
-
});
|
|
58
|
-
return {
|
|
59
|
-
totalMatches: allMatches.length,
|
|
60
|
-
matchesByKeyword: grouped,
|
|
61
|
-
allMatches: allMatches.slice(0, 100) // Limit to first 100
|
|
62
|
-
};
|
|
63
|
-
}, Array.isArray(keywords) ? keywords : [keywords], caseSensitive, wholeWord, context);
|
|
64
|
-
const resultText = `✅ Keyword Search Results\n\nTotal Matches: ${results.totalMatches}\n\nKeywords searched: ${Array.isArray(keywords) ? keywords.join(', ') : keywords}\n\nMatches by keyword:\n${JSON.stringify(results.matchesByKeyword, null, 2)}\n\nFirst 100 matches:\n${JSON.stringify(results.allMatches, null, 2)}`;
|
|
65
|
-
return {
|
|
66
|
-
content: [
|
|
67
|
-
{
|
|
68
|
-
type: 'text',
|
|
69
|
-
text: resultText,
|
|
70
|
-
},
|
|
71
|
-
],
|
|
72
|
-
};
|
|
73
|
-
}
|
|
74
|
-
catch (error) {
|
|
75
|
-
return {
|
|
76
|
-
content: [{ type: 'text', text: `❌ Keyword search failed: ${error.message}` }],
|
|
77
|
-
isError: true,
|
|
78
|
-
};
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
/**
|
|
82
|
-
* Regex Pattern Matcher - Search using regular expressions
|
|
83
|
-
*/
|
|
84
|
-
export async function handleRegexPatternMatcher(args) {
|
|
85
|
-
const { url, pattern, flags = 'g', selector } = args;
|
|
86
|
-
try {
|
|
87
|
-
const page = getPageInstance();
|
|
88
|
-
if (!page) {
|
|
89
|
-
throw new Error('Browser not initialized. Call browser_init first.');
|
|
90
|
-
}
|
|
91
|
-
if (url && page.url() !== url) {
|
|
92
|
-
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
93
|
-
}
|
|
94
|
-
const results = await page.evaluate((pat, flgs, sel) => {
|
|
95
|
-
let content;
|
|
96
|
-
if (sel) {
|
|
97
|
-
const element = document.querySelector(sel);
|
|
98
|
-
content = element ? element.textContent || '' : '';
|
|
99
|
-
}
|
|
100
|
-
else {
|
|
101
|
-
content = document.body.innerText;
|
|
102
|
-
}
|
|
103
|
-
const regex = new RegExp(pat, flgs);
|
|
104
|
-
const matches = [];
|
|
105
|
-
let match;
|
|
106
|
-
// Safety check for infinite loop
|
|
107
|
-
let count = 0;
|
|
108
|
-
while ((match = regex.exec(content)) !== null && count < 1000) {
|
|
109
|
-
count++;
|
|
110
|
-
matches.push({
|
|
111
|
-
match: match[0],
|
|
112
|
-
index: match.index,
|
|
113
|
-
groups: match.slice(1),
|
|
114
|
-
context: content.substring(Math.max(0, match.index - 50), Math.min(content.length, match.index + match[0].length + 50))
|
|
115
|
-
});
|
|
116
|
-
if (match.index === regex.lastIndex) {
|
|
117
|
-
regex.lastIndex++;
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
return {
|
|
121
|
-
totalMatches: matches.length,
|
|
122
|
-
matches: matches.slice(0, 100),
|
|
123
|
-
pattern: pat,
|
|
124
|
-
flags: flgs
|
|
125
|
-
};
|
|
126
|
-
}, pattern, flags, selector || '');
|
|
127
|
-
const resultText = `✅ Regex Pattern Matcher Results\n\nPattern: ${results.pattern}\nFlags: ${results.flags}\nTotal Matches: ${results.totalMatches}\n\nMatches (first 100):\n${JSON.stringify(results.matches, null, 2)}`;
|
|
128
|
-
return {
|
|
129
|
-
content: [{ type: 'text', text: resultText }],
|
|
130
|
-
};
|
|
131
|
-
}
|
|
132
|
-
catch (error) {
|
|
133
|
-
return { content: [{ type: 'text', text: `❌ Regex pattern matcher failed: ${error.message}` }], isError: true };
|
|
134
|
-
}
|
|
135
|
-
}
|
|
136
|
-
/**
|
|
137
|
-
* XPath Support - Query elements using XPath
|
|
138
|
-
*/
|
|
139
|
-
export async function handleXPathSupport(args) {
|
|
140
|
-
const { url, xpath, returnType = 'elements' } = args;
|
|
141
|
-
try {
|
|
142
|
-
const page = getPageInstance();
|
|
143
|
-
if (!page) {
|
|
144
|
-
throw new Error('Browser not initialized. Call browser_init first.');
|
|
145
|
-
}
|
|
146
|
-
if (url && page.url() !== url) {
|
|
147
|
-
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
148
|
-
}
|
|
149
|
-
const results = await page.evaluate((xp, type) => {
|
|
150
|
-
const xpathResult = document.evaluate(xp, document, null, XPathResult.ANY_TYPE, null);
|
|
151
|
-
const elements = [];
|
|
152
|
-
let node = xpathResult.iterateNext();
|
|
153
|
-
while (node) {
|
|
154
|
-
if (node.nodeType === Node.ELEMENT_NODE) {
|
|
155
|
-
const element = node;
|
|
156
|
-
elements.push({
|
|
157
|
-
tagName: element.tagName.toLowerCase(),
|
|
158
|
-
id: element.id,
|
|
159
|
-
className: element.className,
|
|
160
|
-
text: element.textContent?.substring(0, 200),
|
|
161
|
-
attributes: Array.from(element.attributes).reduce((acc, attr) => {
|
|
162
|
-
acc[attr.name] = attr.value;
|
|
163
|
-
return acc;
|
|
164
|
-
}, {}),
|
|
165
|
-
innerHTML: type === 'html' ? element.innerHTML.substring(0, 500) : undefined
|
|
166
|
-
});
|
|
167
|
-
}
|
|
168
|
-
else if (node.nodeType === Node.TEXT_NODE) {
|
|
169
|
-
elements.push({
|
|
170
|
-
type: 'text',
|
|
171
|
-
content: node.textContent?.trim()
|
|
172
|
-
});
|
|
173
|
-
}
|
|
174
|
-
else if (node.nodeType === Node.ATTRIBUTE_NODE) {
|
|
175
|
-
const attr = node;
|
|
176
|
-
elements.push({
|
|
177
|
-
type: 'attribute',
|
|
178
|
-
name: attr.name,
|
|
179
|
-
value: attr.value
|
|
180
|
-
});
|
|
181
|
-
}
|
|
182
|
-
node = xpathResult.iterateNext();
|
|
183
|
-
}
|
|
184
|
-
return {
|
|
185
|
-
count: elements.length,
|
|
186
|
-
elements
|
|
187
|
-
};
|
|
188
|
-
}, xpath, returnType);
|
|
189
|
-
const resultText = `✅ XPath Query Results\n\nXPath: ${xpath}\nElements Found: ${results.count}\n\nElements:\n${JSON.stringify(results.elements, null, 2)}`;
|
|
190
|
-
return {
|
|
191
|
-
content: [{ type: 'text', text: resultText }],
|
|
192
|
-
};
|
|
193
|
-
}
|
|
194
|
-
catch (error) {
|
|
195
|
-
return { content: [{ type: 'text', text: `❌ XPath query failed: ${error.message}` }], isError: true };
|
|
196
|
-
}
|
|
197
|
-
}
|
|
198
|
-
/**
|
|
199
|
-
* Advanced CSS Selectors - Support for complex CSS selectors
|
|
200
|
-
*/
|
|
201
|
-
export async function handleAdvancedCSSSelectors(args) {
|
|
202
|
-
const { url, selector, operation = 'query', returnType = 'elements' } = args;
|
|
203
|
-
try {
|
|
204
|
-
const page = getPageInstance();
|
|
205
|
-
if (!page) {
|
|
206
|
-
throw new Error('Browser not initialized. Call browser_init first.');
|
|
207
|
-
}
|
|
208
|
-
if (url && page.url() !== url) {
|
|
209
|
-
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
|
210
|
-
}
|
|
211
|
-
const results = await page.evaluate((sel, op, type) => {
|
|
212
|
-
let elements;
|
|
213
|
-
switch (op) {
|
|
214
|
-
case 'query':
|
|
215
|
-
elements = Array.from(document.querySelectorAll(sel));
|
|
216
|
-
break;
|
|
217
|
-
case 'closest':
|
|
218
|
-
const firstEl = document.querySelector(sel);
|
|
219
|
-
elements = firstEl ? [firstEl.closest(sel)].filter(Boolean) : [];
|
|
220
|
-
break;
|
|
221
|
-
case 'matches':
|
|
222
|
-
elements = Array.from(document.querySelectorAll('*')).filter(el => el.matches(sel));
|
|
223
|
-
break;
|
|
224
|
-
default:
|
|
225
|
-
elements = Array.from(document.querySelectorAll(sel));
|
|
226
|
-
}
|
|
227
|
-
const results = elements.map(element => {
|
|
228
|
-
const computed = window.getComputedStyle(element);
|
|
229
|
-
return {
|
|
230
|
-
tagName: element.tagName.toLowerCase(),
|
|
231
|
-
id: element.id,
|
|
232
|
-
className: element.className,
|
|
233
|
-
text: element.textContent?.substring(0, 200),
|
|
234
|
-
attributes: Array.from(element.attributes).reduce((acc, attr) => {
|
|
235
|
-
acc[attr.name] = attr.value;
|
|
236
|
-
return acc;
|
|
237
|
-
}, {}),
|
|
238
|
-
computedStyles: type === 'styles' ? {
|
|
239
|
-
display: computed.display,
|
|
240
|
-
visibility: computed.visibility,
|
|
241
|
-
position: computed.position,
|
|
242
|
-
width: computed.width,
|
|
243
|
-
height: computed.height,
|
|
244
|
-
color: computed.color,
|
|
245
|
-
backgroundColor: computed.backgroundColor
|
|
246
|
-
} : undefined,
|
|
247
|
-
innerHTML: type === 'html' ? element.innerHTML.substring(0, 500) : undefined,
|
|
248
|
-
boundingRect: element.getBoundingClientRect()
|
|
249
|
-
};
|
|
250
|
-
});
|
|
251
|
-
return {
|
|
252
|
-
count: results.length,
|
|
253
|
-
elements: results
|
|
254
|
-
};
|
|
255
|
-
}, selector, operation, returnType);
|
|
256
|
-
const resultText = `✅ Advanced CSS Selector Results\n\nSelector: ${selector}\nOperation: ${operation}\nElements Found: ${results.count}\n\nElements (first 10):\n${JSON.stringify(results.elements.slice(0, 10), null, 2)}`;
|
|
257
|
-
return {
|
|
258
|
-
content: [{ type: 'text', text: resultText }],
|
|
259
|
-
};
|
|
260
|
-
}
|
|
261
|
-
catch (error) {
|
|
262
|
-
return { content: [{ type: 'text', text: `❌ CSS selector query failed: ${error.message}` }], isError: true };
|
|
263
|
-
}
|
|
264
|
-
}
|