brave-real-browser-mcp-server 2.15.4 → 2.15.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -40
- package/dist/handlers/data-extraction-handlers.js +0 -76
- package/dist/handlers/data-quality-handlers.js +0 -141
- package/dist/handlers/dynamic-session-handlers.js +0 -75
- package/dist/handlers/monitoring-reporting-handlers.js +0 -83
- package/dist/handlers/multi-element-handlers.js +0 -67
- package/dist/handlers/navigation-handlers.js +59 -0
- package/dist/handlers/search-filter-handlers.js +0 -121
- package/dist/handlers/visual-tools-handlers.js +0 -52
- package/dist/index.js +10 -40
- package/dist/tool-definitions.js +1 -150
- package/package.json +2 -2
- package/scripts/full-verification.ts +98 -0
- package/scripts/live-verification.ts +61 -0
- package/dist/handlers/data-processing-handlers.js +0 -91
- package/dist/handlers/pagination-handlers.js +0 -115
package/README.md
CHANGED
|
@@ -4,15 +4,15 @@
|
|
|
4
4
|
|
|
5
5
|
<div align="center">
|
|
6
6
|
|
|
7
|
-

|
|
8
8
|

|
|
9
|
-

|
|
10
10
|

|
|
11
11
|

|
|
12
12
|
|
|
13
|
-
**सभी AI IDEs के लिए Universal MCP Server |
|
|
13
|
+
**सभी AI IDEs के लिए Universal MCP Server | 66+ Tools | Browser Automation | Web Scraping | CAPTCHA Solving**
|
|
14
14
|
|
|
15
|
-
[Installation](#-installation) | [Quick Start](#-quick-start) | [Qoder AI Setup](#-qoder-ai---complete-integration-guide) | [Tools](#-available-tools-
|
|
15
|
+
[Installation](#-installation) | [Quick Start](#-quick-start) | [Qoder AI Setup](#-qoder-ai---complete-integration-guide) | [Tools](#-available-tools-66) | [IDE Configurations](#-ide-configurations)
|
|
16
16
|
|
|
17
17
|
</div>
|
|
18
18
|
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
**Brave Real Browser MCP Server** एक powerful automation tool है जो:
|
|
24
24
|
|
|
25
25
|
- ✅ **20+ AI IDEs में काम करता है** (Antigravity, Warp AI, Zed, Cursor, Claude, Windsurf, Cline, Qoder AI, etc.)
|
|
26
|
-
- ✅ **
|
|
26
|
+
- ✅ **66+ Automation Tools** - Browser control, scraping, CAPTCHA solving, video extraction
|
|
27
27
|
- ✅ **MCP Protocol (STDIO)** - Fast and secure local communication
|
|
28
28
|
- ✅ **Auto-Detection** - Automatically detects your IDE
|
|
29
29
|
- ✅ **Real Brave Browser** - Anti-detection features, bypass Cloudflare
|
|
@@ -146,10 +146,11 @@ npm install -g brave-real-browser-mcp-server@latest
|
|
|
146
146
|
|
|
147
147
|
```json
|
|
148
148
|
{
|
|
149
|
-
"
|
|
149
|
+
"context_servers": {
|
|
150
150
|
"brave-real-browser": {
|
|
151
|
-
"command": "npx",
|
|
152
|
-
"args": ["-y", "brave-real-browser-mcp-server@latest"]
|
|
151
|
+
"command": "npx.cmd",
|
|
152
|
+
"args": ["-y", "brave-real-browser-mcp-server@latest"],
|
|
153
|
+
"env": {}
|
|
153
154
|
}
|
|
154
155
|
}
|
|
155
156
|
}
|
|
@@ -189,7 +190,7 @@ npm install -g brave-real-browser-mcp-server@latest
|
|
|
189
190
|
|
|
190
191
|
---
|
|
191
192
|
|
|
192
|
-
## 🛠️ Available Tools (
|
|
193
|
+
## 🛠️ Available Tools (66)
|
|
193
194
|
|
|
194
195
|
### 🌐 Browser Management (2 tools)
|
|
195
196
|
|
|
@@ -214,14 +215,13 @@ npm install -g brave-real-browser-mcp-server@latest
|
|
|
214
215
|
| `random_scroll` | Human-like scrolling |
|
|
215
216
|
| `solve_captcha` | Solve CAPTCHA (reCAPTCHA, hCaptcha, Turnstile, etc.) |
|
|
216
217
|
|
|
217
|
-
### 📄 Content Extraction (
|
|
218
|
+
### 📄 Content Extraction (9 tools)
|
|
218
219
|
|
|
219
220
|
| Tool | Description |
|
|
220
221
|
| -------------------------- | ----------------------------------------- |
|
|
221
222
|
| `get_content` | Extract page content (HTML/Text/Markdown) |
|
|
222
223
|
| `find_selector` | Find CSS selectors for elements |
|
|
223
224
|
| `scrape_table` | Extract table data with headers |
|
|
224
|
-
| `extract_list` | Extract list items |
|
|
225
225
|
| `extract_json` | Extract JSON data from page |
|
|
226
226
|
| `scrape_meta_tags` | Extract meta tags and SEO info |
|
|
227
227
|
| `extract_schema` | Extract schema.org structured data |
|
|
@@ -284,7 +284,7 @@ npm install -g brave-real-browser-mcp-server@latest
|
|
|
284
284
|
| `audio_captcha_solver` | Solve audio CAPTCHAs |
|
|
285
285
|
| `puzzle_captcha_handler` | Handle puzzle CAPTCHAs |
|
|
286
286
|
|
|
287
|
-
### 🔧 Data Processing (
|
|
287
|
+
### 🔧 Data Processing (5 tools)
|
|
288
288
|
|
|
289
289
|
| Tool | Description |
|
|
290
290
|
| ------------------------- | ---------------------------------- |
|
|
@@ -293,28 +293,17 @@ npm install -g brave-real-browser-mcp-server@latest
|
|
|
293
293
|
| `contact_extractor` | Extract contact information |
|
|
294
294
|
| `schema_validator` | Validate data against schema |
|
|
295
295
|
| `required_fields_checker` | Check for required fields |
|
|
296
|
-
| `duplicate_remover` | Remove duplicate entries |
|
|
297
|
-
| `data_deduplication` | Advanced deduplication |
|
|
298
|
-
| `missing_data_handler` | Handle missing data |
|
|
299
|
-
| `data_type_validator` | Validate data types |
|
|
300
296
|
|
|
301
|
-
### 📊 Data Quality (
|
|
297
|
+
### 📊 Data Quality (0 tools)
|
|
302
298
|
|
|
303
|
-
|
|
304
|
-
| ---------------------- | ------------------------ |
|
|
305
|
-
| `outlier_detection` | Detect data outliers |
|
|
306
|
-
| `consistency_checker` | Check data consistency |
|
|
307
|
-
| `data_quality_metrics` | Generate quality metrics |
|
|
299
|
+
*Advanced data quality tools removed for optimization.*
|
|
308
300
|
|
|
309
|
-
### 🤖 AI-Powered Tools (
|
|
301
|
+
### 🤖 AI-Powered Tools (2 tools)
|
|
310
302
|
|
|
311
303
|
| Tool | Description |
|
|
312
304
|
| -------------------------- | --------------------------- |
|
|
313
305
|
| `smart_selector_generator` | Auto-generate CSS selectors |
|
|
314
306
|
| `content_classification` | Classify content type |
|
|
315
|
-
| `sentiment_analysis` | Analyze text sentiment |
|
|
316
|
-
| `summary_generator` | Generate content summaries |
|
|
317
|
-
| `translation_support` | Translate content |
|
|
318
307
|
|
|
319
308
|
### 🔎 Search & Filter (5 tools)
|
|
320
309
|
|
|
@@ -336,11 +325,10 @@ npm install -g brave-real-browser-mcp-server@latest
|
|
|
336
325
|
| `sitemap_parser` | Parse and navigate sitemaps |
|
|
337
326
|
| `breadcrumb_navigator` | Navigate using breadcrumbs |
|
|
338
327
|
|
|
339
|
-
### 🔒 Session Management (
|
|
328
|
+
### 🔒 Session Management (6 tools)
|
|
340
329
|
|
|
341
330
|
| Tool | Description |
|
|
342
331
|
| ----------------------- | -------------------------- |
|
|
343
|
-
| `cookie_manager` | Manage cookies |
|
|
344
332
|
| `session_persistence` | Persist sessions |
|
|
345
333
|
| `form_auto_fill` | Auto-fill forms |
|
|
346
334
|
| `ajax_content_waiter` | Wait for AJAX content |
|
|
@@ -348,25 +336,21 @@ npm install -g brave-real-browser-mcp-server@latest
|
|
|
348
336
|
| `login_session_manager` | Manage login sessions |
|
|
349
337
|
| `shadow_dom_extractor` | Extract Shadow DOM content |
|
|
350
338
|
|
|
351
|
-
### 📸 Visual Tools (
|
|
339
|
+
### 📸 Visual Tools (4 tools)
|
|
352
340
|
|
|
353
341
|
| Tool | Description |
|
|
354
342
|
| ---------------------- | --------------------------- |
|
|
355
|
-
| `full_page_screenshot` | Full page screenshot |
|
|
356
343
|
| `element_screenshot` | Screenshot specific element |
|
|
357
344
|
| `pdf_generation` | Generate PDF from page |
|
|
358
345
|
| `video_recording` | Record page as video |
|
|
359
|
-
| `visual_comparison` | Compare screenshots |
|
|
360
346
|
|
|
361
|
-
### 📈 Monitoring & Reporting (
|
|
347
|
+
### 📈 Monitoring & Reporting (3 tools)
|
|
362
348
|
|
|
363
349
|
| Tool | Description |
|
|
364
350
|
| ----------------------- | ------------------------- |
|
|
365
351
|
| `progress_tracker` | Track automation progress |
|
|
366
352
|
| `success_rate_reporter` | Report success rates |
|
|
367
|
-
| `data_quality_metrics` | Data quality metrics |
|
|
368
353
|
| `performance_monitor` | Monitor performance |
|
|
369
|
-
| `monitoring_summary` | Get monitoring summary |
|
|
370
354
|
|
|
371
355
|
### 🛡️ Advanced Extraction & Obfuscation (4 tools)
|
|
372
356
|
|
|
@@ -376,11 +360,8 @@ npm install -g brave-real-browser-mcp-server@latest
|
|
|
376
360
|
| `multi_layer_redirect_trace` | Trace multi-layer redirects |
|
|
377
361
|
| `ad_protection_detector` | Detect ad protection |
|
|
378
362
|
|
|
379
|
-
|
|
380
363
|
## 🔧 Environment Variables
|
|
381
364
|
|
|
382
|
-
|
|
383
|
-
|
|
384
365
|
You can configure the server using the local `.env` file directly.
|
|
385
366
|
|
|
386
367
|
Edit `.env` to set your preferences:
|
|
@@ -401,9 +382,9 @@ PROXY_URL=http://localhost:8080
|
|
|
401
382
|
|
|
402
383
|
## 📊 Supported Protocols
|
|
403
384
|
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
385
|
+
| Protocol | Used By | Auto-Config | Status |
|
|
386
|
+
| --------------- | --------------------------------------------- | ----------- | ---------- |
|
|
387
|
+
| **MCP (STDIO)** | Claude Desktop, Cursor, Windsurf, Cline, Warp | ✅ | 🟢 Working |
|
|
407
388
|
|
|
408
389
|
## 📄 License
|
|
409
390
|
|
|
@@ -5,82 +5,6 @@
|
|
|
5
5
|
import { getCurrentPage } from '../browser-manager.js';
|
|
6
6
|
import { validateWorkflow } from '../workflow-validation.js';
|
|
7
7
|
import { withErrorHandling } from '../system-utils.js';
|
|
8
|
-
/**
|
|
9
|
-
* Bullet lists और numbered lists से data extract करता है
|
|
10
|
-
*/
|
|
11
|
-
export async function handleExtractList(args) {
|
|
12
|
-
return await withErrorHandling(async () => {
|
|
13
|
-
validateWorkflow('extract_list', {
|
|
14
|
-
requireBrowser: true,
|
|
15
|
-
requirePage: true,
|
|
16
|
-
});
|
|
17
|
-
const page = getCurrentPage();
|
|
18
|
-
const selector = args.selector || 'ul, ol';
|
|
19
|
-
const includeNested = args.includeNested !== false;
|
|
20
|
-
const maxItems = args.maxItems || 500;
|
|
21
|
-
const listData = await page.evaluate(({ selector, includeNested, maxItems }) => {
|
|
22
|
-
const lists = document.querySelectorAll(selector);
|
|
23
|
-
const results = [];
|
|
24
|
-
lists.forEach((list) => {
|
|
25
|
-
const items = [];
|
|
26
|
-
const listType = list.tagName.toLowerCase();
|
|
27
|
-
let hasNested = false;
|
|
28
|
-
const extractItems = (element, depth = 0) => {
|
|
29
|
-
if (items.length >= maxItems)
|
|
30
|
-
return;
|
|
31
|
-
const children = Array.from(element.children);
|
|
32
|
-
children.forEach((child) => {
|
|
33
|
-
if (child.tagName.toLowerCase() === 'li') {
|
|
34
|
-
const text = Array.from(child.childNodes)
|
|
35
|
-
.filter((node) => node.nodeType === Node.TEXT_NODE)
|
|
36
|
-
.map((node) => node.textContent?.trim())
|
|
37
|
-
.filter((text) => text)
|
|
38
|
-
.join(' ');
|
|
39
|
-
if (text) {
|
|
40
|
-
const indent = ' '.repeat(depth);
|
|
41
|
-
items.push(`${indent}${text}`);
|
|
42
|
-
}
|
|
43
|
-
// Check for nested lists
|
|
44
|
-
const nestedList = child.querySelector('ul, ol');
|
|
45
|
-
if (nestedList && includeNested) {
|
|
46
|
-
hasNested = true;
|
|
47
|
-
extractItems(nestedList, depth + 1);
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
});
|
|
51
|
-
};
|
|
52
|
-
extractItems(list);
|
|
53
|
-
if (items.length > 0) {
|
|
54
|
-
results.push({
|
|
55
|
-
items,
|
|
56
|
-
type: listType,
|
|
57
|
-
nested: hasNested,
|
|
58
|
-
count: items.length,
|
|
59
|
-
});
|
|
60
|
-
}
|
|
61
|
-
});
|
|
62
|
-
return results;
|
|
63
|
-
}, { selector, includeNested, maxItems });
|
|
64
|
-
if (listData.length === 0) {
|
|
65
|
-
return {
|
|
66
|
-
content: [
|
|
67
|
-
{
|
|
68
|
-
type: 'text',
|
|
69
|
-
text: `❌ No lists found with selector "${selector}"`,
|
|
70
|
-
},
|
|
71
|
-
],
|
|
72
|
-
};
|
|
73
|
-
}
|
|
74
|
-
return {
|
|
75
|
-
content: [
|
|
76
|
-
{
|
|
77
|
-
type: 'text',
|
|
78
|
-
text: `✅ Extracted ${listData.length} list(s)\n\n${JSON.stringify(listData, null, 2)}`,
|
|
79
|
-
},
|
|
80
|
-
],
|
|
81
|
-
};
|
|
82
|
-
}, 'Failed to extract list');
|
|
83
|
-
}
|
|
84
8
|
/**
|
|
85
9
|
* Page में embedded JSON/API data खोजता और extract करता है
|
|
86
10
|
*/
|
|
@@ -1,76 +1,9 @@
|
|
|
1
1
|
// @ts-nocheck
|
|
2
2
|
import Ajv from 'ajv/dist/2020.js';
|
|
3
|
-
import * as crypto from 'crypto';
|
|
4
3
|
const ajv = new Ajv();
|
|
5
4
|
/**
|
|
6
5
|
* Data Deduplication - Remove duplicate entries from scraped data
|
|
7
6
|
*/
|
|
8
|
-
export async function handleDataDeduplication(args) {
|
|
9
|
-
const { data, uniqueKeys, fuzzyMatch = false, threshold = 0.9 } = args;
|
|
10
|
-
try {
|
|
11
|
-
if (!Array.isArray(data)) {
|
|
12
|
-
throw new Error('Data must be an array');
|
|
13
|
-
}
|
|
14
|
-
const unique = [];
|
|
15
|
-
const duplicates = [];
|
|
16
|
-
const seen = new Set();
|
|
17
|
-
data.forEach((item, index) => {
|
|
18
|
-
let key;
|
|
19
|
-
if (uniqueKeys && Array.isArray(uniqueKeys)) {
|
|
20
|
-
// Create composite key from specified fields
|
|
21
|
-
const keyParts = uniqueKeys.map(k => {
|
|
22
|
-
const value = typeof item === 'object' ? item[k] : item;
|
|
23
|
-
return String(value || '');
|
|
24
|
-
});
|
|
25
|
-
key = keyParts.join('|');
|
|
26
|
-
}
|
|
27
|
-
else {
|
|
28
|
-
// Use entire object as key
|
|
29
|
-
key = JSON.stringify(item);
|
|
30
|
-
}
|
|
31
|
-
if (fuzzyMatch) {
|
|
32
|
-
// Normalize key for fuzzy matching
|
|
33
|
-
key = key.toLowerCase().replace(/\s+/g, ' ').trim();
|
|
34
|
-
}
|
|
35
|
-
const hash = crypto.createHash('md5').update(key).digest('hex');
|
|
36
|
-
if (seen.has(hash)) {
|
|
37
|
-
duplicates.push({ item, index, hash });
|
|
38
|
-
}
|
|
39
|
-
else {
|
|
40
|
-
seen.add(hash);
|
|
41
|
-
unique.push(item);
|
|
42
|
-
}
|
|
43
|
-
});
|
|
44
|
-
const dedupRate = ((duplicates.length / data.length) * 100).toFixed(2);
|
|
45
|
-
let summary = `Data Deduplication Results:\n\nStatistics:\n- Original Items: ${data.length}\n- Unique Items: ${unique.length}\n- Duplicates Found: ${duplicates.length}\n- Deduplication Rate: ${dedupRate}%`;
|
|
46
|
-
if (uniqueKeys) {
|
|
47
|
-
summary += `\n- Unique Keys Used: ${uniqueKeys.join(', ')}`;
|
|
48
|
-
}
|
|
49
|
-
summary += `\n- Fuzzy Matching: ${fuzzyMatch ? 'Enabled' : 'Disabled'}`;
|
|
50
|
-
if (duplicates.length > 0) {
|
|
51
|
-
summary += `\n\nSample Duplicates (Top 5):\n${duplicates.slice(0, 5).map((d, i) => `${i + 1}. Index ${d.index}: ${JSON.stringify(d.item).substring(0, 100)}...`).join('\n')}`;
|
|
52
|
-
}
|
|
53
|
-
return {
|
|
54
|
-
content: [
|
|
55
|
-
{
|
|
56
|
-
type: "text",
|
|
57
|
-
text: summary
|
|
58
|
-
}
|
|
59
|
-
]
|
|
60
|
-
};
|
|
61
|
-
}
|
|
62
|
-
catch (error) {
|
|
63
|
-
return {
|
|
64
|
-
content: [
|
|
65
|
-
{
|
|
66
|
-
type: "text",
|
|
67
|
-
text: `Data Deduplication Error: ${error.message}`
|
|
68
|
-
}
|
|
69
|
-
],
|
|
70
|
-
isError: true
|
|
71
|
-
};
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
7
|
/**
|
|
75
8
|
* Missing Data Handler - Detect and handle missing data
|
|
76
9
|
*/
|
|
@@ -147,77 +80,3 @@ export async function handleDataTypeValidator(args) {
|
|
|
147
80
|
/**
|
|
148
81
|
* Consistency Checker - Check data consistency across fields
|
|
149
82
|
*/
|
|
150
|
-
export async function handleConsistencyChecker(args) {
|
|
151
|
-
const { data, rules } = args;
|
|
152
|
-
try {
|
|
153
|
-
if (!Array.isArray(data)) {
|
|
154
|
-
throw new Error('Data must be an array');
|
|
155
|
-
}
|
|
156
|
-
if (!rules || !Array.isArray(rules)) {
|
|
157
|
-
if (!rules)
|
|
158
|
-
return { content: [{ type: "text", text: "No rules provided. Pass." }] };
|
|
159
|
-
throw new Error('Rules must be an array');
|
|
160
|
-
}
|
|
161
|
-
const report = {
|
|
162
|
-
totalItems: data.length,
|
|
163
|
-
passedItems: 0,
|
|
164
|
-
failedItems: 0,
|
|
165
|
-
failures: []
|
|
166
|
-
};
|
|
167
|
-
data.forEach((item, index) => {
|
|
168
|
-
let itemPassed = true;
|
|
169
|
-
const itemFailures = [];
|
|
170
|
-
rules.forEach((rule) => {
|
|
171
|
-
try {
|
|
172
|
-
if (rule.type === 'dependency') {
|
|
173
|
-
if (item[rule.field] && !item[rule.dependentField]) {
|
|
174
|
-
itemPassed = false;
|
|
175
|
-
itemFailures.push(`Field '${rule.field}' requires '${rule.dependentField}'`);
|
|
176
|
-
}
|
|
177
|
-
}
|
|
178
|
-
else if (rule.type === 'value_match') {
|
|
179
|
-
if (item[rule.field] === rule.value && item[rule.targetField] !== rule.targetValue) {
|
|
180
|
-
itemPassed = false;
|
|
181
|
-
itemFailures.push(`When '${rule.field}' is '${rule.value}', '${rule.targetField}' must be '${rule.targetValue}'`);
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
catch (e) {
|
|
186
|
-
itemPassed = false;
|
|
187
|
-
// @ts-ignore
|
|
188
|
-
itemFailures.push(`Rule execution error: ${e.message}`);
|
|
189
|
-
}
|
|
190
|
-
});
|
|
191
|
-
if (itemPassed) {
|
|
192
|
-
report.passedItems++;
|
|
193
|
-
}
|
|
194
|
-
else {
|
|
195
|
-
report.failedItems++;
|
|
196
|
-
report.failures.push({
|
|
197
|
-
index,
|
|
198
|
-
item,
|
|
199
|
-
errors: itemFailures
|
|
200
|
-
});
|
|
201
|
-
}
|
|
202
|
-
});
|
|
203
|
-
return {
|
|
204
|
-
content: [
|
|
205
|
-
{
|
|
206
|
-
type: "text",
|
|
207
|
-
text: `Consistency Check Results:\nTotal: ${report.totalItems}\nPassed: ${report.passedItems}\nFailed: ${report.failedItems}\n\nFailures:\n${JSON.stringify(report.failures, null, 2)}`
|
|
208
|
-
}
|
|
209
|
-
]
|
|
210
|
-
};
|
|
211
|
-
}
|
|
212
|
-
catch (error) {
|
|
213
|
-
return {
|
|
214
|
-
content: [
|
|
215
|
-
{
|
|
216
|
-
type: "text",
|
|
217
|
-
text: `Consistency Checker Error: ${error.message}`
|
|
218
|
-
}
|
|
219
|
-
],
|
|
220
|
-
isError: true
|
|
221
|
-
};
|
|
222
|
-
}
|
|
223
|
-
}
|
|
@@ -49,81 +49,6 @@ export async function handleShadowDOMExtractor(args) {
|
|
|
49
49
|
/**
|
|
50
50
|
* Cookie Manager - Manage cookies
|
|
51
51
|
*/
|
|
52
|
-
export async function handleCookieManager(args) {
|
|
53
|
-
return await withErrorHandling(async () => {
|
|
54
|
-
validateWorkflow('cookie_manager', {
|
|
55
|
-
requireBrowser: true,
|
|
56
|
-
requirePage: true,
|
|
57
|
-
});
|
|
58
|
-
const page = getCurrentPage();
|
|
59
|
-
const action = args.action || 'get'; // get, set, delete, clear
|
|
60
|
-
if (action === 'get') {
|
|
61
|
-
const cookies = await page.cookies();
|
|
62
|
-
return {
|
|
63
|
-
content: [{
|
|
64
|
-
type: 'text',
|
|
65
|
-
text: `✅ Retrieved ${cookies.length} cookies\n\n${JSON.stringify(cookies, null, 2)}`,
|
|
66
|
-
}],
|
|
67
|
-
};
|
|
68
|
-
}
|
|
69
|
-
if (action === 'set') {
|
|
70
|
-
const cookie = args.cookie;
|
|
71
|
-
if (!cookie || !cookie.name || !cookie.value) {
|
|
72
|
-
throw new Error('Cookie name and value are required');
|
|
73
|
-
}
|
|
74
|
-
await page.setCookie({
|
|
75
|
-
name: cookie.name,
|
|
76
|
-
value: cookie.value,
|
|
77
|
-
domain: cookie.domain || new URL(page.url()).hostname,
|
|
78
|
-
path: cookie.path || '/',
|
|
79
|
-
expires: cookie.expires,
|
|
80
|
-
httpOnly: cookie.httpOnly || false,
|
|
81
|
-
secure: cookie.secure || false,
|
|
82
|
-
sameSite: cookie.sameSite || 'Lax',
|
|
83
|
-
});
|
|
84
|
-
return {
|
|
85
|
-
content: [{
|
|
86
|
-
type: 'text',
|
|
87
|
-
text: `✅ Cookie set: ${cookie.name} = ${cookie.value}`,
|
|
88
|
-
}],
|
|
89
|
-
};
|
|
90
|
-
}
|
|
91
|
-
if (action === 'delete') {
|
|
92
|
-
const cookieName = args.cookieName;
|
|
93
|
-
if (!cookieName) {
|
|
94
|
-
throw new Error('Cookie name is required');
|
|
95
|
-
}
|
|
96
|
-
const cookies = await page.cookies();
|
|
97
|
-
const cookieToDelete = cookies.find(c => c.name === cookieName);
|
|
98
|
-
if (cookieToDelete) {
|
|
99
|
-
await page.deleteCookie(cookieToDelete);
|
|
100
|
-
return {
|
|
101
|
-
content: [{
|
|
102
|
-
type: 'text',
|
|
103
|
-
text: `✅ Cookie deleted: ${cookieName}`,
|
|
104
|
-
}],
|
|
105
|
-
};
|
|
106
|
-
}
|
|
107
|
-
return {
|
|
108
|
-
content: [{
|
|
109
|
-
type: 'text',
|
|
110
|
-
text: `⚠️ Cookie not found: ${cookieName}`,
|
|
111
|
-
}],
|
|
112
|
-
};
|
|
113
|
-
}
|
|
114
|
-
if (action === 'clear') {
|
|
115
|
-
const cookies = await page.cookies();
|
|
116
|
-
await Promise.all(cookies.map(cookie => page.deleteCookie(cookie)));
|
|
117
|
-
return {
|
|
118
|
-
content: [{
|
|
119
|
-
type: 'text',
|
|
120
|
-
text: `✅ Cleared all ${cookies.length} cookies`,
|
|
121
|
-
}],
|
|
122
|
-
};
|
|
123
|
-
}
|
|
124
|
-
throw new Error(`Unknown action: ${action}`);
|
|
125
|
-
}, 'Failed to manage cookies');
|
|
126
|
-
}
|
|
127
52
|
/**
|
|
128
53
|
* Session Persistence - Save and restore browser session
|
|
129
54
|
*/
|
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
// Monitoring & Reporting Module
|
|
2
2
|
// Progress tracking, error logging, success rate reporting, metrics
|
|
3
3
|
// @ts-nocheck
|
|
4
|
-
import { getCurrentPage } from '../browser-manager.js';
|
|
5
|
-
import { validateWorkflow } from '../workflow-validation.js';
|
|
6
4
|
import { withErrorHandling } from '../system-utils.js';
|
|
7
5
|
// Global monitoring state
|
|
8
6
|
const monitoringState = {
|
|
@@ -116,87 +114,6 @@ export async function handleProgressTracker(args) {
|
|
|
116
114
|
/**
|
|
117
115
|
* Data Quality Metrics - Report data quality metrics
|
|
118
116
|
*/
|
|
119
|
-
export async function handleDataQualityMetrics(args) {
|
|
120
|
-
return await withErrorHandling(async () => {
|
|
121
|
-
validateWorkflow('data_quality_metrics', {
|
|
122
|
-
requireBrowser: true,
|
|
123
|
-
requirePage: true,
|
|
124
|
-
});
|
|
125
|
-
const page = getCurrentPage();
|
|
126
|
-
const data = args.data || [];
|
|
127
|
-
// Calculate metrics
|
|
128
|
-
const metrics = {
|
|
129
|
-
totalRecords: data.length,
|
|
130
|
-
completeRecords: 0,
|
|
131
|
-
incompleteRecords: 0,
|
|
132
|
-
emptyFields: 0,
|
|
133
|
-
duplicates: 0,
|
|
134
|
-
dataTypes: {},
|
|
135
|
-
fieldCompleteness: {},
|
|
136
|
-
};
|
|
137
|
-
if (data.length === 0) {
|
|
138
|
-
return {
|
|
139
|
-
content: [{
|
|
140
|
-
type: 'text',
|
|
141
|
-
text: `ℹ️ No data provided for quality metrics`,
|
|
142
|
-
}],
|
|
143
|
-
};
|
|
144
|
-
}
|
|
145
|
-
// Analyze data quality
|
|
146
|
-
const seenRecords = new Set();
|
|
147
|
-
const allFields = new Set();
|
|
148
|
-
data.forEach((record) => {
|
|
149
|
-
const recordStr = JSON.stringify(record);
|
|
150
|
-
// Check for duplicates
|
|
151
|
-
if (seenRecords.has(recordStr)) {
|
|
152
|
-
metrics.duplicates++;
|
|
153
|
-
}
|
|
154
|
-
else {
|
|
155
|
-
seenRecords.add(recordStr);
|
|
156
|
-
}
|
|
157
|
-
// Collect all fields
|
|
158
|
-
Object.keys(record).forEach(key => allFields.add(key));
|
|
159
|
-
// Check completeness
|
|
160
|
-
let hasAllFields = true;
|
|
161
|
-
Object.values(record).forEach(value => {
|
|
162
|
-
if (value === null || value === undefined || value === '') {
|
|
163
|
-
metrics.emptyFields++;
|
|
164
|
-
hasAllFields = false;
|
|
165
|
-
}
|
|
166
|
-
});
|
|
167
|
-
if (hasAllFields) {
|
|
168
|
-
metrics.completeRecords++;
|
|
169
|
-
}
|
|
170
|
-
else {
|
|
171
|
-
metrics.incompleteRecords++;
|
|
172
|
-
}
|
|
173
|
-
});
|
|
174
|
-
// Field completeness
|
|
175
|
-
allFields.forEach(field => {
|
|
176
|
-
const nonEmptyCount = data.filter((record) => {
|
|
177
|
-
const value = record[field];
|
|
178
|
-
return value !== null && value !== undefined && value !== '';
|
|
179
|
-
}).length;
|
|
180
|
-
metrics.fieldCompleteness[field] = ((nonEmptyCount / data.length) * 100).toFixed(2) + '%';
|
|
181
|
-
});
|
|
182
|
-
// Data type analysis
|
|
183
|
-
allFields.forEach(field => {
|
|
184
|
-
const types = new Set();
|
|
185
|
-
data.forEach((record) => {
|
|
186
|
-
const value = record[field];
|
|
187
|
-
types.add(typeof value);
|
|
188
|
-
});
|
|
189
|
-
metrics.dataTypes[field] = Array.from(types).join(', ');
|
|
190
|
-
});
|
|
191
|
-
const qualityScore = ((metrics.completeRecords / metrics.totalRecords) * 100).toFixed(2);
|
|
192
|
-
return {
|
|
193
|
-
content: [{
|
|
194
|
-
type: 'text',
|
|
195
|
-
text: `✅ Data Quality Metrics\n\nTotal Records: ${metrics.totalRecords}\nComplete Records: ${metrics.completeRecords}\nIncomplete Records: ${metrics.incompleteRecords}\nDuplicates: ${metrics.duplicates}\nEmpty Fields: ${metrics.emptyFields}\n\nQuality Score: ${qualityScore}%\n\nField Completeness:\n${JSON.stringify(metrics.fieldCompleteness, null, 2)}\n\nData Types:\n${JSON.stringify(metrics.dataTypes, null, 2)}`,
|
|
196
|
-
}],
|
|
197
|
-
};
|
|
198
|
-
}, 'Failed data quality metrics');
|
|
199
|
-
}
|
|
200
117
|
/**
|
|
201
118
|
* Performance Monitor - Monitor browser and page performance
|
|
202
119
|
*/
|
|
@@ -67,73 +67,6 @@ export async function handleBatchElementScraper(args) {
|
|
|
67
67
|
};
|
|
68
68
|
}, 'Failed to batch scrape elements');
|
|
69
69
|
}
|
|
70
|
-
/**
|
|
71
|
-
* Parent-child relationships maintain करते हुए data निकालता है
|
|
72
|
-
*/
|
|
73
|
-
export async function handleNestedDataExtraction(args) {
|
|
74
|
-
return await withErrorHandling(async () => {
|
|
75
|
-
validateWorkflow('nested_data_extraction', {
|
|
76
|
-
requireBrowser: true,
|
|
77
|
-
requirePage: true,
|
|
78
|
-
});
|
|
79
|
-
const page = getCurrentPage();
|
|
80
|
-
const parentSelector = args.parentSelector;
|
|
81
|
-
const childSelector = args.childSelector;
|
|
82
|
-
const maxParents = args.maxParents || 50;
|
|
83
|
-
const nestedData = await page.evaluate(({ parentSelector, childSelector, maxParents }) => {
|
|
84
|
-
const parents = document.querySelectorAll(parentSelector);
|
|
85
|
-
const results = [];
|
|
86
|
-
let count = 0;
|
|
87
|
-
parents.forEach((parent) => {
|
|
88
|
-
if (count >= maxParents)
|
|
89
|
-
return;
|
|
90
|
-
const parentData = {
|
|
91
|
-
selector: parentSelector,
|
|
92
|
-
text: Array.from(parent.childNodes)
|
|
93
|
-
.filter((node) => node.nodeType === Node.TEXT_NODE)
|
|
94
|
-
.map((node) => node.textContent?.trim())
|
|
95
|
-
.filter((text) => text)
|
|
96
|
-
.join(' '),
|
|
97
|
-
attributes: {},
|
|
98
|
-
};
|
|
99
|
-
// Get parent attributes
|
|
100
|
-
Array.from(parent.attributes).forEach((attr) => {
|
|
101
|
-
parentData.attributes[attr.name] = attr.value;
|
|
102
|
-
});
|
|
103
|
-
// Get children
|
|
104
|
-
const children = parent.querySelectorAll(childSelector);
|
|
105
|
-
const childrenData = [];
|
|
106
|
-
children.forEach((child) => {
|
|
107
|
-
const childData = {
|
|
108
|
-
selector: childSelector,
|
|
109
|
-
text: child.textContent?.trim() || '',
|
|
110
|
-
attributes: {},
|
|
111
|
-
};
|
|
112
|
-
Array.from(child.attributes).forEach((attr) => {
|
|
113
|
-
childData.attributes[attr.name] = attr.value;
|
|
114
|
-
});
|
|
115
|
-
childrenData.push(childData);
|
|
116
|
-
});
|
|
117
|
-
if (childrenData.length > 0) {
|
|
118
|
-
results.push({
|
|
119
|
-
parent: parentData,
|
|
120
|
-
children: childrenData,
|
|
121
|
-
});
|
|
122
|
-
count++;
|
|
123
|
-
}
|
|
124
|
-
});
|
|
125
|
-
return results;
|
|
126
|
-
}, { parentSelector, childSelector, maxParents });
|
|
127
|
-
return {
|
|
128
|
-
content: [
|
|
129
|
-
{
|
|
130
|
-
type: 'text',
|
|
131
|
-
text: `✅ Extracted ${nestedData.length} parent-child relationships\n\n${JSON.stringify(nestedData, null, 2)}`,
|
|
132
|
-
},
|
|
133
|
-
],
|
|
134
|
-
};
|
|
135
|
-
}, 'Failed to extract nested data');
|
|
136
|
-
}
|
|
137
70
|
/**
|
|
138
71
|
* सभी elements के attributes (href, src, data-*) collect करता है
|
|
139
72
|
*/
|