brave-real-browser-mcp-server 2.14.8 → 2.14.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/handlers/ai-powered-handlers.js +0 -197
- package/dist/handlers/data-extraction-handlers.js +0 -98
- package/dist/handlers/data-processing-handlers.js +0 -173
- package/dist/handlers/data-quality-handlers.js +0 -220
- package/dist/handlers/dynamic-session-handlers.js +0 -204
- package/dist/handlers/multi-element-handlers.js +0 -55
- package/dist/handlers/pagination-handlers.js +0 -191
- package/dist/index.js +8 -59
- package/dist/mcp-server.js +2 -8
- package/dist/tool-definitions.js +0 -216
- package/package.json +2 -11
|
@@ -74,94 +74,6 @@ export async function handleDataDeduplication(args) {
|
|
|
74
74
|
/**
|
|
75
75
|
* Missing Data Handler - Detect and handle missing data
|
|
76
76
|
*/
|
|
77
|
-
export async function handleMissingDataHandler(args) {
|
|
78
|
-
const { data, requiredFields, strategy = 'report' } = args;
|
|
79
|
-
try {
|
|
80
|
-
if (!Array.isArray(data)) {
|
|
81
|
-
throw new Error('Data must be an array');
|
|
82
|
-
}
|
|
83
|
-
const results = [];
|
|
84
|
-
const missingReport = {};
|
|
85
|
-
let totalMissing = 0;
|
|
86
|
-
data.forEach((item, index) => {
|
|
87
|
-
const itemMissing = {};
|
|
88
|
-
let hasMissing = false;
|
|
89
|
-
if (requiredFields && Array.isArray(requiredFields)) {
|
|
90
|
-
requiredFields.forEach(field => {
|
|
91
|
-
const value = item[field];
|
|
92
|
-
if (value === undefined || value === null || value === '') {
|
|
93
|
-
itemMissing[field] = true;
|
|
94
|
-
hasMissing = true;
|
|
95
|
-
totalMissing++;
|
|
96
|
-
if (!missingReport[field]) {
|
|
97
|
-
missingReport[field] = { count: 0, indices: [] };
|
|
98
|
-
}
|
|
99
|
-
missingReport[field].count++;
|
|
100
|
-
missingReport[field].indices.push(index);
|
|
101
|
-
}
|
|
102
|
-
});
|
|
103
|
-
}
|
|
104
|
-
// Handle missing data based on strategy
|
|
105
|
-
let processedItem = { ...item };
|
|
106
|
-
switch (strategy) {
|
|
107
|
-
case 'remove':
|
|
108
|
-
if (!hasMissing)
|
|
109
|
-
results.push(processedItem);
|
|
110
|
-
break;
|
|
111
|
-
case 'fill':
|
|
112
|
-
Object.keys(itemMissing).forEach(field => {
|
|
113
|
-
if (typeof item[field] === 'number') {
|
|
114
|
-
processedItem[field] = 0;
|
|
115
|
-
}
|
|
116
|
-
else if (typeof item[field] === 'string') {
|
|
117
|
-
processedItem[field] = 'N/A';
|
|
118
|
-
}
|
|
119
|
-
else {
|
|
120
|
-
processedItem[field] = null;
|
|
121
|
-
}
|
|
122
|
-
});
|
|
123
|
-
results.push(processedItem);
|
|
124
|
-
break;
|
|
125
|
-
case 'flag':
|
|
126
|
-
processedItem._hasMissingData = hasMissing;
|
|
127
|
-
processedItem._missingFields = Object.keys(itemMissing);
|
|
128
|
-
results.push(processedItem);
|
|
129
|
-
break;
|
|
130
|
-
case 'report':
|
|
131
|
-
default:
|
|
132
|
-
results.push(processedItem);
|
|
133
|
-
break;
|
|
134
|
-
}
|
|
135
|
-
});
|
|
136
|
-
const itemsWithMissing = Object.values(missingReport).length > 0 ?
|
|
137
|
-
[...new Set(Object.values(missingReport).flatMap((r) => r.indices))].length : 0;
|
|
138
|
-
const missingRate = ((totalMissing / (data.length * (requiredFields?.length || 1))) * 100).toFixed(2);
|
|
139
|
-
let summary = `Missing Data Analysis:\n\nStatistics:\n- Total Items: ${data.length}\n- Items with Missing Data: ${itemsWithMissing}\n- Total Missing Fields: ${totalMissing}\n- Missing Rate: ${missingRate}%\n- Strategy: ${strategy}`;
|
|
140
|
-
if (Object.keys(missingReport).length > 0) {
|
|
141
|
-
summary += `\n\nMissing Fields Report:\n${Object.entries(missingReport).map(([field, info]) => `- ${field}: ${info.count} occurrences (indices: ${info.indices.slice(0, 5).join(', ')}${info.indices.length > 5 ? '...' : ''})`).join('\n')}`;
|
|
142
|
-
}
|
|
143
|
-
summary += `\n\nProcessed Items: ${results.length}`;
|
|
144
|
-
return {
|
|
145
|
-
content: [
|
|
146
|
-
{
|
|
147
|
-
type: "text",
|
|
148
|
-
text: summary
|
|
149
|
-
}
|
|
150
|
-
]
|
|
151
|
-
};
|
|
152
|
-
}
|
|
153
|
-
catch (error) {
|
|
154
|
-
return {
|
|
155
|
-
content: [
|
|
156
|
-
{
|
|
157
|
-
type: "text",
|
|
158
|
-
text: `Missing Data Handler Error: ${error.message}`
|
|
159
|
-
}
|
|
160
|
-
],
|
|
161
|
-
isError: true
|
|
162
|
-
};
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
77
|
/**
|
|
166
78
|
* Data Type Validator - Validate data types against schema
|
|
167
79
|
*/
|
|
@@ -343,135 +255,3 @@ export async function handleOutlierDetection(args) {
|
|
|
343
255
|
/**
|
|
344
256
|
* Consistency Checker - Check data consistency across fields
|
|
345
257
|
*/
|
|
346
|
-
export async function handleConsistencyChecker(args) {
|
|
347
|
-
const { data, rules } = args;
|
|
348
|
-
try {
|
|
349
|
-
if (!Array.isArray(data)) {
|
|
350
|
-
throw new Error('Data must be an array');
|
|
351
|
-
}
|
|
352
|
-
if (!rules || !Array.isArray(rules)) {
|
|
353
|
-
throw new Error('Rules must be provided as an array');
|
|
354
|
-
}
|
|
355
|
-
const violations = [];
|
|
356
|
-
const ruleResults = {};
|
|
357
|
-
rules.forEach(rule => {
|
|
358
|
-
ruleResults[rule.name] = { passed: 0, failed: 0, violations: [] };
|
|
359
|
-
});
|
|
360
|
-
data.forEach((item, index) => {
|
|
361
|
-
rules.forEach(rule => {
|
|
362
|
-
let passed = true;
|
|
363
|
-
let reason = '';
|
|
364
|
-
try {
|
|
365
|
-
switch (rule.type) {
|
|
366
|
-
case 'required':
|
|
367
|
-
if (item[rule.field] === undefined || item[rule.field] === null || item[rule.field] === '') {
|
|
368
|
-
passed = false;
|
|
369
|
-
reason = `Field '${rule.field}' is required but missing`;
|
|
370
|
-
}
|
|
371
|
-
break;
|
|
372
|
-
case 'range':
|
|
373
|
-
const value = parseFloat(item[rule.field]);
|
|
374
|
-
if (isNaN(value) || value < rule.min || value > rule.max) {
|
|
375
|
-
passed = false;
|
|
376
|
-
reason = `Field '${rule.field}' value ${value} is outside range [${rule.min}, ${rule.max}]`;
|
|
377
|
-
}
|
|
378
|
-
break;
|
|
379
|
-
case 'pattern':
|
|
380
|
-
const regex = new RegExp(rule.pattern);
|
|
381
|
-
if (!regex.test(String(item[rule.field]))) {
|
|
382
|
-
passed = false;
|
|
383
|
-
reason = `Field '${rule.field}' does not match pattern ${rule.pattern}`;
|
|
384
|
-
}
|
|
385
|
-
break;
|
|
386
|
-
case 'dependency':
|
|
387
|
-
if (item[rule.field] && !item[rule.dependsOn]) {
|
|
388
|
-
passed = false;
|
|
389
|
-
reason = `Field '${rule.field}' requires '${rule.dependsOn}' to be present`;
|
|
390
|
-
}
|
|
391
|
-
break;
|
|
392
|
-
case 'unique':
|
|
393
|
-
// Check uniqueness within dataset
|
|
394
|
-
const duplicates = data.filter(d => d[rule.field] === item[rule.field]);
|
|
395
|
-
if (duplicates.length > 1) {
|
|
396
|
-
passed = false;
|
|
397
|
-
reason = `Field '${rule.field}' value '${item[rule.field]}' is not unique`;
|
|
398
|
-
}
|
|
399
|
-
break;
|
|
400
|
-
case 'comparison':
|
|
401
|
-
const val1 = parseFloat(item[rule.field1]);
|
|
402
|
-
const val2 = parseFloat(item[rule.field2]);
|
|
403
|
-
let comparisonPassed = false;
|
|
404
|
-
switch (rule.operator) {
|
|
405
|
-
case '>':
|
|
406
|
-
comparisonPassed = val1 > val2;
|
|
407
|
-
break;
|
|
408
|
-
case '<':
|
|
409
|
-
comparisonPassed = val1 < val2;
|
|
410
|
-
break;
|
|
411
|
-
case '>=':
|
|
412
|
-
comparisonPassed = val1 >= val2;
|
|
413
|
-
break;
|
|
414
|
-
case '<=':
|
|
415
|
-
comparisonPassed = val1 <= val2;
|
|
416
|
-
break;
|
|
417
|
-
case '==':
|
|
418
|
-
comparisonPassed = val1 === val2;
|
|
419
|
-
break;
|
|
420
|
-
case '!=':
|
|
421
|
-
comparisonPassed = val1 !== val2;
|
|
422
|
-
break;
|
|
423
|
-
}
|
|
424
|
-
if (!comparisonPassed) {
|
|
425
|
-
passed = false;
|
|
426
|
-
reason = `Comparison failed: ${rule.field1} (${val1}) ${rule.operator} ${rule.field2} (${val2})`;
|
|
427
|
-
}
|
|
428
|
-
break;
|
|
429
|
-
}
|
|
430
|
-
}
|
|
431
|
-
catch (err) {
|
|
432
|
-
passed = false;
|
|
433
|
-
reason = `Error checking rule: ${err.message}`;
|
|
434
|
-
}
|
|
435
|
-
if (passed) {
|
|
436
|
-
ruleResults[rule.name].passed++;
|
|
437
|
-
}
|
|
438
|
-
else {
|
|
439
|
-
ruleResults[rule.name].failed++;
|
|
440
|
-
const violation = {
|
|
441
|
-
index,
|
|
442
|
-
item,
|
|
443
|
-
rule: rule.name,
|
|
444
|
-
reason
|
|
445
|
-
};
|
|
446
|
-
ruleResults[rule.name].violations.push(violation);
|
|
447
|
-
violations.push(violation);
|
|
448
|
-
}
|
|
449
|
-
});
|
|
450
|
-
});
|
|
451
|
-
const consistencyRate = (((data.length - violations.length) / data.length) * 100).toFixed(2);
|
|
452
|
-
let summary = `Consistency Check Results:\n\nSummary:\n- Total Items: ${data.length}\n- Rules Checked: ${rules.length}\n- Total Violations: ${violations.length}\n- Consistency Rate: ${consistencyRate}%`;
|
|
453
|
-
summary += `\n\nRule Results:\n${Object.entries(ruleResults).map(([name, result]) => `- ${name}: ${result.passed} passed, ${result.failed} failed`).join('\n')}`;
|
|
454
|
-
if (violations.length > 0) {
|
|
455
|
-
summary += `\n\nViolations (Top 10):\n${violations.slice(0, 10).map((v, i) => `${i + 1}. Index ${v.index} - Rule: ${v.rule}\n Reason: ${v.reason}`).join('\n')}`;
|
|
456
|
-
}
|
|
457
|
-
return {
|
|
458
|
-
content: [
|
|
459
|
-
{
|
|
460
|
-
type: "text",
|
|
461
|
-
text: summary
|
|
462
|
-
}
|
|
463
|
-
]
|
|
464
|
-
};
|
|
465
|
-
}
|
|
466
|
-
catch (error) {
|
|
467
|
-
return {
|
|
468
|
-
content: [
|
|
469
|
-
{
|
|
470
|
-
type: "text",
|
|
471
|
-
text: `Consistency Checker Error: ${error.message}`
|
|
472
|
-
}
|
|
473
|
-
],
|
|
474
|
-
isError: true
|
|
475
|
-
};
|
|
476
|
-
}
|
|
477
|
-
}
|
|
@@ -127,85 +127,6 @@ export async function handleCookieManager(args) {
|
|
|
127
127
|
/**
|
|
128
128
|
* Session Persistence - Save and restore browser session
|
|
129
129
|
*/
|
|
130
|
-
export async function handleSessionPersistence(args) {
|
|
131
|
-
return await withErrorHandling(async () => {
|
|
132
|
-
validateWorkflow('session_persistence', {
|
|
133
|
-
requireBrowser: true,
|
|
134
|
-
requirePage: true,
|
|
135
|
-
});
|
|
136
|
-
const page = getCurrentPage();
|
|
137
|
-
const action = args.action || 'save'; // save, restore
|
|
138
|
-
if (action === 'save') {
|
|
139
|
-
const cookies = await page.cookies();
|
|
140
|
-
const localStorage = await page.evaluate(() => {
|
|
141
|
-
const items = {};
|
|
142
|
-
for (let i = 0; i < window.localStorage.length; i++) {
|
|
143
|
-
const key = window.localStorage.key(i);
|
|
144
|
-
if (key) {
|
|
145
|
-
items[key] = window.localStorage.getItem(key);
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
return items;
|
|
149
|
-
});
|
|
150
|
-
const sessionStorage = await page.evaluate(() => {
|
|
151
|
-
const items = {};
|
|
152
|
-
for (let i = 0; i < window.sessionStorage.length; i++) {
|
|
153
|
-
const key = window.sessionStorage.key(i);
|
|
154
|
-
if (key) {
|
|
155
|
-
items[key] = window.sessionStorage.getItem(key);
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
return items;
|
|
159
|
-
});
|
|
160
|
-
const sessionData = {
|
|
161
|
-
url: page.url(),
|
|
162
|
-
cookies,
|
|
163
|
-
localStorage,
|
|
164
|
-
sessionStorage,
|
|
165
|
-
timestamp: new Date().toISOString(),
|
|
166
|
-
};
|
|
167
|
-
return {
|
|
168
|
-
content: [{
|
|
169
|
-
type: 'text',
|
|
170
|
-
text: `✅ Session saved\n\n${JSON.stringify(sessionData, null, 2)}`,
|
|
171
|
-
}],
|
|
172
|
-
};
|
|
173
|
-
}
|
|
174
|
-
if (action === 'restore') {
|
|
175
|
-
const sessionData = args.sessionData;
|
|
176
|
-
if (!sessionData) {
|
|
177
|
-
throw new Error('Session data is required for restore');
|
|
178
|
-
}
|
|
179
|
-
// Restore cookies
|
|
180
|
-
if (sessionData.cookies) {
|
|
181
|
-
await Promise.all(sessionData.cookies.map((cookie) => page.setCookie(cookie)));
|
|
182
|
-
}
|
|
183
|
-
// Restore localStorage
|
|
184
|
-
if (sessionData.localStorage) {
|
|
185
|
-
await page.evaluate((items) => {
|
|
186
|
-
for (const [key, value] of Object.entries(items)) {
|
|
187
|
-
window.localStorage.setItem(key, value);
|
|
188
|
-
}
|
|
189
|
-
}, sessionData.localStorage);
|
|
190
|
-
}
|
|
191
|
-
// Restore sessionStorage
|
|
192
|
-
if (sessionData.sessionStorage) {
|
|
193
|
-
await page.evaluate((items) => {
|
|
194
|
-
for (const [key, value] of Object.entries(items)) {
|
|
195
|
-
window.sessionStorage.setItem(key, value);
|
|
196
|
-
}
|
|
197
|
-
}, sessionData.sessionStorage);
|
|
198
|
-
}
|
|
199
|
-
return {
|
|
200
|
-
content: [{
|
|
201
|
-
type: 'text',
|
|
202
|
-
text: `✅ Session restored from ${sessionData.timestamp}`,
|
|
203
|
-
}],
|
|
204
|
-
};
|
|
205
|
-
}
|
|
206
|
-
throw new Error(`Unknown action: ${action}`);
|
|
207
|
-
}, 'Failed session persistence');
|
|
208
|
-
}
|
|
209
130
|
/**
|
|
210
131
|
* Form Auto Fill - Automatically fill form fields
|
|
211
132
|
*/
|
|
@@ -385,131 +306,6 @@ export async function handleAjaxContentWaiter(args) {
|
|
|
385
306
|
/**
|
|
386
307
|
* Modal Popup Handler - Handle modal popups
|
|
387
308
|
*/
|
|
388
|
-
export async function handleModalPopupHandler(args) {
|
|
389
|
-
return await withErrorHandling(async () => {
|
|
390
|
-
validateWorkflow('modal_popup_handler', {
|
|
391
|
-
requireBrowser: true,
|
|
392
|
-
requirePage: true,
|
|
393
|
-
});
|
|
394
|
-
const page = getCurrentPage();
|
|
395
|
-
const action = args.action || 'detect'; // detect, close, interact
|
|
396
|
-
if (action === 'detect') {
|
|
397
|
-
const modals = await page.evaluate(() => {
|
|
398
|
-
const results = [];
|
|
399
|
-
const modalSelectors = [
|
|
400
|
-
'.modal',
|
|
401
|
-
'[role="dialog"]',
|
|
402
|
-
'[class*="popup"]',
|
|
403
|
-
'[class*="overlay"]',
|
|
404
|
-
'.dialog',
|
|
405
|
-
];
|
|
406
|
-
modalSelectors.forEach(selector => {
|
|
407
|
-
const elements = document.querySelectorAll(selector);
|
|
408
|
-
elements.forEach((el) => {
|
|
409
|
-
const isVisible = el.offsetWidth > 0 && el.offsetHeight > 0;
|
|
410
|
-
if (isVisible) {
|
|
411
|
-
results.push({
|
|
412
|
-
selector,
|
|
413
|
-
id: el.id || null,
|
|
414
|
-
className: el.className,
|
|
415
|
-
text: el.textContent?.trim().substring(0, 200) || '',
|
|
416
|
-
});
|
|
417
|
-
}
|
|
418
|
-
});
|
|
419
|
-
});
|
|
420
|
-
return results;
|
|
421
|
-
});
|
|
422
|
-
return {
|
|
423
|
-
content: [{
|
|
424
|
-
type: 'text',
|
|
425
|
-
text: `✅ Found ${modals.length} visible modals\n\n${JSON.stringify(modals, null, 2)}`,
|
|
426
|
-
}],
|
|
427
|
-
};
|
|
428
|
-
}
|
|
429
|
-
if (action === 'close') {
|
|
430
|
-
const closeSelector = args.closeSelector || '.close, [aria-label="Close"], button[class*="close"]';
|
|
431
|
-
try {
|
|
432
|
-
await page.click(closeSelector);
|
|
433
|
-
return {
|
|
434
|
-
content: [{
|
|
435
|
-
type: 'text',
|
|
436
|
-
text: `✅ Modal closed`,
|
|
437
|
-
}],
|
|
438
|
-
};
|
|
439
|
-
}
|
|
440
|
-
catch (e) {
|
|
441
|
-
// Try pressing Escape
|
|
442
|
-
await page.keyboard.press('Escape');
|
|
443
|
-
return {
|
|
444
|
-
content: [{
|
|
445
|
-
type: 'text',
|
|
446
|
-
text: `✅ Pressed Escape key to close modal`,
|
|
447
|
-
}],
|
|
448
|
-
};
|
|
449
|
-
}
|
|
450
|
-
}
|
|
451
|
-
throw new Error(`Unknown action: ${action}`);
|
|
452
|
-
}, 'Failed modal popup handler');
|
|
453
|
-
}
|
|
454
309
|
/**
|
|
455
310
|
* Login Session Manager - Manage login sessions
|
|
456
311
|
*/
|
|
457
|
-
export async function handleLoginSessionManager(args) {
|
|
458
|
-
return await withErrorHandling(async () => {
|
|
459
|
-
validateWorkflow('login_session_manager', {
|
|
460
|
-
requireBrowser: true,
|
|
461
|
-
requirePage: true,
|
|
462
|
-
});
|
|
463
|
-
const page = getCurrentPage();
|
|
464
|
-
const action = args.action || 'check'; // check, login, logout
|
|
465
|
-
if (action === 'check') {
|
|
466
|
-
const isLoggedIn = await page.evaluate(() => {
|
|
467
|
-
// Check common indicators of logged-in state
|
|
468
|
-
const indicators = [
|
|
469
|
-
document.querySelector('[class*="logout"]'),
|
|
470
|
-
document.querySelector('[class*="profile"]'),
|
|
471
|
-
document.querySelector('[class*="account"]'),
|
|
472
|
-
document.cookie.includes('session') || document.cookie.includes('token'),
|
|
473
|
-
localStorage.getItem('token') !== null,
|
|
474
|
-
];
|
|
475
|
-
return indicators.some(indicator => Boolean(indicator));
|
|
476
|
-
});
|
|
477
|
-
const cookies = await page.cookies();
|
|
478
|
-
const sessionCookies = cookies.filter(c => c.name.toLowerCase().includes('session') ||
|
|
479
|
-
c.name.toLowerCase().includes('token') ||
|
|
480
|
-
c.name.toLowerCase().includes('auth'));
|
|
481
|
-
return {
|
|
482
|
-
content: [{
|
|
483
|
-
type: 'text',
|
|
484
|
-
text: `✅ Login Status Check\n\nLikely Logged In: ${isLoggedIn}\nSession Cookies: ${sessionCookies.length}\n\n${JSON.stringify(sessionCookies.map(c => ({ name: c.name, domain: c.domain })), null, 2)}`,
|
|
485
|
-
}],
|
|
486
|
-
};
|
|
487
|
-
}
|
|
488
|
-
if (action === 'login') {
|
|
489
|
-
const username = args.username;
|
|
490
|
-
const password = args.password;
|
|
491
|
-
const usernameSelector = args.usernameSelector || 'input[type="email"], input[type="text"], input[name*="user"], input[name*="email"]';
|
|
492
|
-
const passwordSelector = args.passwordSelector || 'input[type="password"]';
|
|
493
|
-
const submitSelector = args.submitSelector || 'button[type="submit"], input[type="submit"]';
|
|
494
|
-
if (!username || !password) {
|
|
495
|
-
throw new Error('Username and password are required');
|
|
496
|
-
}
|
|
497
|
-
// Fill username
|
|
498
|
-
await page.waitForSelector(usernameSelector, { timeout: 5000 });
|
|
499
|
-
await page.type(usernameSelector, username);
|
|
500
|
-
// Fill password
|
|
501
|
-
await page.waitForSelector(passwordSelector, { timeout: 5000 });
|
|
502
|
-
await page.type(passwordSelector, password);
|
|
503
|
-
// Submit
|
|
504
|
-
await page.click(submitSelector);
|
|
505
|
-
await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 10000 }).catch(() => { });
|
|
506
|
-
return {
|
|
507
|
-
content: [{
|
|
508
|
-
type: 'text',
|
|
509
|
-
text: `✅ Login attempted\n\nUsername: ${username}\nCurrent URL: ${page.url()}`,
|
|
510
|
-
}],
|
|
511
|
-
};
|
|
512
|
-
}
|
|
513
|
-
throw new Error(`Unknown action: ${action}`);
|
|
514
|
-
}, 'Failed login session manager');
|
|
515
|
-
}
|
|
@@ -194,61 +194,6 @@ export async function handleAttributeHarvester(args) {
|
|
|
194
194
|
};
|
|
195
195
|
}, 'Failed to harvest attributes');
|
|
196
196
|
}
|
|
197
|
-
/**
|
|
198
|
-
* सभी images URLs, alt text, dimensions के साथ extract करता है
|
|
199
|
-
*/
|
|
200
|
-
export async function handleImageScraper(args) {
|
|
201
|
-
return await withErrorHandling(async () => {
|
|
202
|
-
validateWorkflow('image_scraper', {
|
|
203
|
-
requireBrowser: true,
|
|
204
|
-
requirePage: true,
|
|
205
|
-
});
|
|
206
|
-
const page = getCurrentPage();
|
|
207
|
-
const selector = args.selector || 'img';
|
|
208
|
-
const includeDataUrls = args.includeDataUrls || false;
|
|
209
|
-
const includeDimensions = args.includeDimensions !== false;
|
|
210
|
-
const imageData = await page.evaluate(({ selector, includeDataUrls, includeDimensions }) => {
|
|
211
|
-
const images = document.querySelectorAll(selector);
|
|
212
|
-
const results = [];
|
|
213
|
-
images.forEach((img, index) => {
|
|
214
|
-
const src = img.src || img.getAttribute('data-src') || '';
|
|
215
|
-
// Skip data URLs if not included
|
|
216
|
-
if (!includeDataUrls && src.startsWith('data:')) {
|
|
217
|
-
return;
|
|
218
|
-
}
|
|
219
|
-
const imageInfo = {
|
|
220
|
-
index,
|
|
221
|
-
src,
|
|
222
|
-
alt: img.alt || '',
|
|
223
|
-
title: img.title || '',
|
|
224
|
-
};
|
|
225
|
-
if (includeDimensions) {
|
|
226
|
-
imageInfo.width = img.naturalWidth || img.width || 0;
|
|
227
|
-
imageInfo.height = img.naturalHeight || img.height || 0;
|
|
228
|
-
}
|
|
229
|
-
// Additional attributes
|
|
230
|
-
const srcset = img.srcset || img.getAttribute('data-srcset');
|
|
231
|
-
if (srcset) {
|
|
232
|
-
imageInfo.srcset = srcset;
|
|
233
|
-
}
|
|
234
|
-
const loading = img.loading;
|
|
235
|
-
if (loading) {
|
|
236
|
-
imageInfo.loading = loading;
|
|
237
|
-
}
|
|
238
|
-
results.push(imageInfo);
|
|
239
|
-
});
|
|
240
|
-
return results;
|
|
241
|
-
}, { selector, includeDataUrls, includeDimensions });
|
|
242
|
-
return {
|
|
243
|
-
content: [
|
|
244
|
-
{
|
|
245
|
-
type: 'text',
|
|
246
|
-
text: `✅ Scraped ${imageData.length} images\n\n${JSON.stringify(imageData, null, 2)}`,
|
|
247
|
-
},
|
|
248
|
-
],
|
|
249
|
-
};
|
|
250
|
-
}, 'Failed to scrape images');
|
|
251
|
-
}
|
|
252
197
|
/**
|
|
253
198
|
* Internal/external links classification के साथ collect करता है
|
|
254
199
|
*/
|