brave-real-browser-mcp-server 2.14.7 → 2.14.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser-manager.js +5 -0
- package/dist/handlers/APPLY_OPTIMIZATION_PATTERN.js +2 -2
- package/dist/handlers/advanced-video-media-handlers.js +7 -7
- package/dist/handlers/ai-powered-handlers.js +0 -197
- package/dist/handlers/data-extraction-handlers.js +0 -98
- package/dist/handlers/data-processing-handlers.js +0 -275
- package/dist/handlers/data-quality-handlers.js +0 -220
- package/dist/handlers/dynamic-session-handlers.js +0 -204
- package/dist/handlers/monitoring-reporting-handlers.js +0 -170
- package/dist/handlers/multi-element-handlers.js +0 -144
- package/dist/handlers/pagination-handlers.js +0 -191
- package/dist/handlers/visual-tools-handlers.js +0 -56
- package/dist/index.js +10 -85
- package/dist/mcp-server.js +2 -11
- package/dist/optimization-utils.js +3 -3
- package/dist/tool-definitions.js +0 -313
- package/package.json +3 -12
|
@@ -127,85 +127,6 @@ export async function handleCookieManager(args) {
|
|
|
127
127
|
/**
|
|
128
128
|
* Session Persistence - Save and restore browser session
|
|
129
129
|
*/
|
|
130
|
-
export async function handleSessionPersistence(args) {
|
|
131
|
-
return await withErrorHandling(async () => {
|
|
132
|
-
validateWorkflow('session_persistence', {
|
|
133
|
-
requireBrowser: true,
|
|
134
|
-
requirePage: true,
|
|
135
|
-
});
|
|
136
|
-
const page = getCurrentPage();
|
|
137
|
-
const action = args.action || 'save'; // save, restore
|
|
138
|
-
if (action === 'save') {
|
|
139
|
-
const cookies = await page.cookies();
|
|
140
|
-
const localStorage = await page.evaluate(() => {
|
|
141
|
-
const items = {};
|
|
142
|
-
for (let i = 0; i < window.localStorage.length; i++) {
|
|
143
|
-
const key = window.localStorage.key(i);
|
|
144
|
-
if (key) {
|
|
145
|
-
items[key] = window.localStorage.getItem(key);
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
return items;
|
|
149
|
-
});
|
|
150
|
-
const sessionStorage = await page.evaluate(() => {
|
|
151
|
-
const items = {};
|
|
152
|
-
for (let i = 0; i < window.sessionStorage.length; i++) {
|
|
153
|
-
const key = window.sessionStorage.key(i);
|
|
154
|
-
if (key) {
|
|
155
|
-
items[key] = window.sessionStorage.getItem(key);
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
return items;
|
|
159
|
-
});
|
|
160
|
-
const sessionData = {
|
|
161
|
-
url: page.url(),
|
|
162
|
-
cookies,
|
|
163
|
-
localStorage,
|
|
164
|
-
sessionStorage,
|
|
165
|
-
timestamp: new Date().toISOString(),
|
|
166
|
-
};
|
|
167
|
-
return {
|
|
168
|
-
content: [{
|
|
169
|
-
type: 'text',
|
|
170
|
-
text: `✅ Session saved\n\n${JSON.stringify(sessionData, null, 2)}`,
|
|
171
|
-
}],
|
|
172
|
-
};
|
|
173
|
-
}
|
|
174
|
-
if (action === 'restore') {
|
|
175
|
-
const sessionData = args.sessionData;
|
|
176
|
-
if (!sessionData) {
|
|
177
|
-
throw new Error('Session data is required for restore');
|
|
178
|
-
}
|
|
179
|
-
// Restore cookies
|
|
180
|
-
if (sessionData.cookies) {
|
|
181
|
-
await Promise.all(sessionData.cookies.map((cookie) => page.setCookie(cookie)));
|
|
182
|
-
}
|
|
183
|
-
// Restore localStorage
|
|
184
|
-
if (sessionData.localStorage) {
|
|
185
|
-
await page.evaluate((items) => {
|
|
186
|
-
for (const [key, value] of Object.entries(items)) {
|
|
187
|
-
window.localStorage.setItem(key, value);
|
|
188
|
-
}
|
|
189
|
-
}, sessionData.localStorage);
|
|
190
|
-
}
|
|
191
|
-
// Restore sessionStorage
|
|
192
|
-
if (sessionData.sessionStorage) {
|
|
193
|
-
await page.evaluate((items) => {
|
|
194
|
-
for (const [key, value] of Object.entries(items)) {
|
|
195
|
-
window.sessionStorage.setItem(key, value);
|
|
196
|
-
}
|
|
197
|
-
}, sessionData.sessionStorage);
|
|
198
|
-
}
|
|
199
|
-
return {
|
|
200
|
-
content: [{
|
|
201
|
-
type: 'text',
|
|
202
|
-
text: `✅ Session restored from ${sessionData.timestamp}`,
|
|
203
|
-
}],
|
|
204
|
-
};
|
|
205
|
-
}
|
|
206
|
-
throw new Error(`Unknown action: ${action}`);
|
|
207
|
-
}, 'Failed session persistence');
|
|
208
|
-
}
|
|
209
130
|
/**
|
|
210
131
|
* Form Auto Fill - Automatically fill form fields
|
|
211
132
|
*/
|
|
@@ -385,131 +306,6 @@ export async function handleAjaxContentWaiter(args) {
|
|
|
385
306
|
/**
|
|
386
307
|
* Modal Popup Handler - Handle modal popups
|
|
387
308
|
*/
|
|
388
|
-
export async function handleModalPopupHandler(args) {
|
|
389
|
-
return await withErrorHandling(async () => {
|
|
390
|
-
validateWorkflow('modal_popup_handler', {
|
|
391
|
-
requireBrowser: true,
|
|
392
|
-
requirePage: true,
|
|
393
|
-
});
|
|
394
|
-
const page = getCurrentPage();
|
|
395
|
-
const action = args.action || 'detect'; // detect, close, interact
|
|
396
|
-
if (action === 'detect') {
|
|
397
|
-
const modals = await page.evaluate(() => {
|
|
398
|
-
const results = [];
|
|
399
|
-
const modalSelectors = [
|
|
400
|
-
'.modal',
|
|
401
|
-
'[role="dialog"]',
|
|
402
|
-
'[class*="popup"]',
|
|
403
|
-
'[class*="overlay"]',
|
|
404
|
-
'.dialog',
|
|
405
|
-
];
|
|
406
|
-
modalSelectors.forEach(selector => {
|
|
407
|
-
const elements = document.querySelectorAll(selector);
|
|
408
|
-
elements.forEach((el) => {
|
|
409
|
-
const isVisible = el.offsetWidth > 0 && el.offsetHeight > 0;
|
|
410
|
-
if (isVisible) {
|
|
411
|
-
results.push({
|
|
412
|
-
selector,
|
|
413
|
-
id: el.id || null,
|
|
414
|
-
className: el.className,
|
|
415
|
-
text: el.textContent?.trim().substring(0, 200) || '',
|
|
416
|
-
});
|
|
417
|
-
}
|
|
418
|
-
});
|
|
419
|
-
});
|
|
420
|
-
return results;
|
|
421
|
-
});
|
|
422
|
-
return {
|
|
423
|
-
content: [{
|
|
424
|
-
type: 'text',
|
|
425
|
-
text: `✅ Found ${modals.length} visible modals\n\n${JSON.stringify(modals, null, 2)}`,
|
|
426
|
-
}],
|
|
427
|
-
};
|
|
428
|
-
}
|
|
429
|
-
if (action === 'close') {
|
|
430
|
-
const closeSelector = args.closeSelector || '.close, [aria-label="Close"], button[class*="close"]';
|
|
431
|
-
try {
|
|
432
|
-
await page.click(closeSelector);
|
|
433
|
-
return {
|
|
434
|
-
content: [{
|
|
435
|
-
type: 'text',
|
|
436
|
-
text: `✅ Modal closed`,
|
|
437
|
-
}],
|
|
438
|
-
};
|
|
439
|
-
}
|
|
440
|
-
catch (e) {
|
|
441
|
-
// Try pressing Escape
|
|
442
|
-
await page.keyboard.press('Escape');
|
|
443
|
-
return {
|
|
444
|
-
content: [{
|
|
445
|
-
type: 'text',
|
|
446
|
-
text: `✅ Pressed Escape key to close modal`,
|
|
447
|
-
}],
|
|
448
|
-
};
|
|
449
|
-
}
|
|
450
|
-
}
|
|
451
|
-
throw new Error(`Unknown action: ${action}`);
|
|
452
|
-
}, 'Failed modal popup handler');
|
|
453
|
-
}
|
|
454
309
|
/**
|
|
455
310
|
* Login Session Manager - Manage login sessions
|
|
456
311
|
*/
|
|
457
|
-
export async function handleLoginSessionManager(args) {
|
|
458
|
-
return await withErrorHandling(async () => {
|
|
459
|
-
validateWorkflow('login_session_manager', {
|
|
460
|
-
requireBrowser: true,
|
|
461
|
-
requirePage: true,
|
|
462
|
-
});
|
|
463
|
-
const page = getCurrentPage();
|
|
464
|
-
const action = args.action || 'check'; // check, login, logout
|
|
465
|
-
if (action === 'check') {
|
|
466
|
-
const isLoggedIn = await page.evaluate(() => {
|
|
467
|
-
// Check common indicators of logged-in state
|
|
468
|
-
const indicators = [
|
|
469
|
-
document.querySelector('[class*="logout"]'),
|
|
470
|
-
document.querySelector('[class*="profile"]'),
|
|
471
|
-
document.querySelector('[class*="account"]'),
|
|
472
|
-
document.cookie.includes('session') || document.cookie.includes('token'),
|
|
473
|
-
localStorage.getItem('token') !== null,
|
|
474
|
-
];
|
|
475
|
-
return indicators.some(indicator => Boolean(indicator));
|
|
476
|
-
});
|
|
477
|
-
const cookies = await page.cookies();
|
|
478
|
-
const sessionCookies = cookies.filter(c => c.name.toLowerCase().includes('session') ||
|
|
479
|
-
c.name.toLowerCase().includes('token') ||
|
|
480
|
-
c.name.toLowerCase().includes('auth'));
|
|
481
|
-
return {
|
|
482
|
-
content: [{
|
|
483
|
-
type: 'text',
|
|
484
|
-
text: `✅ Login Status Check\n\nLikely Logged In: ${isLoggedIn}\nSession Cookies: ${sessionCookies.length}\n\n${JSON.stringify(sessionCookies.map(c => ({ name: c.name, domain: c.domain })), null, 2)}`,
|
|
485
|
-
}],
|
|
486
|
-
};
|
|
487
|
-
}
|
|
488
|
-
if (action === 'login') {
|
|
489
|
-
const username = args.username;
|
|
490
|
-
const password = args.password;
|
|
491
|
-
const usernameSelector = args.usernameSelector || 'input[type="email"], input[type="text"], input[name*="user"], input[name*="email"]';
|
|
492
|
-
const passwordSelector = args.passwordSelector || 'input[type="password"]';
|
|
493
|
-
const submitSelector = args.submitSelector || 'button[type="submit"], input[type="submit"]';
|
|
494
|
-
if (!username || !password) {
|
|
495
|
-
throw new Error('Username and password are required');
|
|
496
|
-
}
|
|
497
|
-
// Fill username
|
|
498
|
-
await page.waitForSelector(usernameSelector, { timeout: 5000 });
|
|
499
|
-
await page.type(usernameSelector, username);
|
|
500
|
-
// Fill password
|
|
501
|
-
await page.waitForSelector(passwordSelector, { timeout: 5000 });
|
|
502
|
-
await page.type(passwordSelector, password);
|
|
503
|
-
// Submit
|
|
504
|
-
await page.click(submitSelector);
|
|
505
|
-
await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 10000 }).catch(() => { });
|
|
506
|
-
return {
|
|
507
|
-
content: [{
|
|
508
|
-
type: 'text',
|
|
509
|
-
text: `✅ Login attempted\n\nUsername: ${username}\nCurrent URL: ${page.url()}`,
|
|
510
|
-
}],
|
|
511
|
-
};
|
|
512
|
-
}
|
|
513
|
-
throw new Error(`Unknown action: ${action}`);
|
|
514
|
-
}, 'Failed login session manager');
|
|
515
|
-
}
|
|
@@ -110,91 +110,9 @@ export async function handleProgressTracker(args) {
|
|
|
110
110
|
throw new Error(`Unknown action: ${action}`);
|
|
111
111
|
}, 'Failed progress tracker');
|
|
112
112
|
}
|
|
113
|
-
/**
|
|
114
|
-
* Error Logger - Log and track errors
|
|
115
|
-
*/
|
|
116
|
-
export async function handleErrorLogger(args) {
|
|
117
|
-
return await withErrorHandling(async () => {
|
|
118
|
-
const action = args.action || 'log'; // log, get, clear
|
|
119
|
-
if (action === 'log') {
|
|
120
|
-
const error = {
|
|
121
|
-
id: `err_${Date.now()}`,
|
|
122
|
-
message: args.message || 'Unknown error',
|
|
123
|
-
type: args.type || 'error',
|
|
124
|
-
timestamp: new Date().toISOString(),
|
|
125
|
-
context: args.context || {},
|
|
126
|
-
stackTrace: args.stackTrace || null,
|
|
127
|
-
};
|
|
128
|
-
monitoringState.errors.push(error);
|
|
129
|
-
return {
|
|
130
|
-
content: [{
|
|
131
|
-
type: 'text',
|
|
132
|
-
text: `✅ Error logged: ${error.message}\n\n${JSON.stringify(error, null, 2)}`,
|
|
133
|
-
}],
|
|
134
|
-
};
|
|
135
|
-
}
|
|
136
|
-
if (action === 'get') {
|
|
137
|
-
const limit = args.limit || 10;
|
|
138
|
-
const errorType = args.type;
|
|
139
|
-
let filteredErrors = monitoringState.errors;
|
|
140
|
-
if (errorType) {
|
|
141
|
-
filteredErrors = filteredErrors.filter((err) => err.type === errorType);
|
|
142
|
-
}
|
|
143
|
-
const recentErrors = filteredErrors.slice(-limit);
|
|
144
|
-
return {
|
|
145
|
-
content: [{
|
|
146
|
-
type: 'text',
|
|
147
|
-
text: `✅ Retrieved ${recentErrors.length} error(s)\n\nTotal Errors: ${monitoringState.errors.length}\n\n${JSON.stringify(recentErrors, null, 2)}`,
|
|
148
|
-
}],
|
|
149
|
-
};
|
|
150
|
-
}
|
|
151
|
-
if (action === 'clear') {
|
|
152
|
-
const count = monitoringState.errors.length;
|
|
153
|
-
monitoringState.errors = [];
|
|
154
|
-
return {
|
|
155
|
-
content: [{
|
|
156
|
-
type: 'text',
|
|
157
|
-
text: `✅ Cleared ${count} error(s)`,
|
|
158
|
-
}],
|
|
159
|
-
};
|
|
160
|
-
}
|
|
161
|
-
throw new Error(`Unknown action: ${action}`);
|
|
162
|
-
}, 'Failed error logger');
|
|
163
|
-
}
|
|
164
113
|
/**
|
|
165
114
|
* Success Rate Reporter - Report success metrics
|
|
166
115
|
*/
|
|
167
|
-
export async function handleSuccessRateReporter(args) {
|
|
168
|
-
return await withErrorHandling(async () => {
|
|
169
|
-
const timeRange = args.timeRange || 'all'; // all, last_hour, last_day
|
|
170
|
-
let operations = monitoringState.operations;
|
|
171
|
-
if (timeRange === 'last_hour') {
|
|
172
|
-
const hourAgo = Date.now() - (60 * 60 * 1000);
|
|
173
|
-
operations = operations.filter((op) => op.startTime >= hourAgo);
|
|
174
|
-
}
|
|
175
|
-
else if (timeRange === 'last_day') {
|
|
176
|
-
const dayAgo = Date.now() - (24 * 60 * 60 * 1000);
|
|
177
|
-
operations = operations.filter((op) => op.startTime >= dayAgo);
|
|
178
|
-
}
|
|
179
|
-
const total = operations.length;
|
|
180
|
-
const completed = operations.filter((op) => op.status === 'completed').length;
|
|
181
|
-
const failed = operations.filter((op) => op.status === 'failed').length;
|
|
182
|
-
const inProgress = operations.filter((op) => op.status === 'in_progress').length;
|
|
183
|
-
const successRate = total > 0 ? ((completed / total) * 100).toFixed(2) : 0;
|
|
184
|
-
const failureRate = total > 0 ? ((failed / total) * 100).toFixed(2) : 0;
|
|
185
|
-
// Calculate average duration
|
|
186
|
-
const completedOps = operations.filter((op) => op.duration !== undefined);
|
|
187
|
-
const avgDuration = completedOps.length > 0
|
|
188
|
-
? (completedOps.reduce((sum, op) => sum + op.duration, 0) / completedOps.length).toFixed(2)
|
|
189
|
-
: 0;
|
|
190
|
-
return {
|
|
191
|
-
content: [{
|
|
192
|
-
type: 'text',
|
|
193
|
-
text: `✅ Success Rate Report (${timeRange})\n\nTotal Operations: ${total}\nCompleted: ${completed}\nFailed: ${failed}\nIn Progress: ${inProgress}\n\nSuccess Rate: ${successRate}%\nFailure Rate: ${failureRate}%\nAverage Duration: ${avgDuration}ms\n\nUptime: ${((Date.now() - monitoringState.startTime) / 1000 / 60).toFixed(2)} minutes`,
|
|
194
|
-
}],
|
|
195
|
-
};
|
|
196
|
-
}, 'Failed success rate reporter');
|
|
197
|
-
}
|
|
198
116
|
/**
|
|
199
117
|
* Data Quality Metrics - Report data quality metrics
|
|
200
118
|
*/
|
|
@@ -282,94 +200,6 @@ export async function handleDataQualityMetrics(args) {
|
|
|
282
200
|
/**
|
|
283
201
|
* Performance Monitor - Monitor browser and page performance
|
|
284
202
|
*/
|
|
285
|
-
export async function handlePerformanceMonitor(args) {
|
|
286
|
-
return await withErrorHandling(async () => {
|
|
287
|
-
validateWorkflow('performance_monitor', {
|
|
288
|
-
requireBrowser: true,
|
|
289
|
-
requirePage: true,
|
|
290
|
-
});
|
|
291
|
-
const page = getCurrentPage();
|
|
292
|
-
const performanceData = await page.evaluate(() => {
|
|
293
|
-
const perfData = {
|
|
294
|
-
navigation: {},
|
|
295
|
-
resources: [],
|
|
296
|
-
memory: {},
|
|
297
|
-
timing: {},
|
|
298
|
-
};
|
|
299
|
-
// Navigation timing
|
|
300
|
-
if (performance.timing) {
|
|
301
|
-
const timing = performance.timing;
|
|
302
|
-
perfData.navigation = {
|
|
303
|
-
domContentLoaded: timing.domContentLoadedEventEnd - timing.navigationStart,
|
|
304
|
-
loadComplete: timing.loadEventEnd - timing.navigationStart,
|
|
305
|
-
domInteractive: timing.domInteractive - timing.navigationStart,
|
|
306
|
-
firstPaint: timing.responseEnd - timing.requestStart,
|
|
307
|
-
};
|
|
308
|
-
}
|
|
309
|
-
// Resource timing
|
|
310
|
-
if (performance.getEntriesByType) {
|
|
311
|
-
const resources = performance.getEntriesByType('resource');
|
|
312
|
-
perfData.resources = resources.slice(0, 20).map((resource) => ({
|
|
313
|
-
name: resource.name.substring(0, 100),
|
|
314
|
-
type: resource.initiatorType,
|
|
315
|
-
duration: Math.round(resource.duration),
|
|
316
|
-
size: resource.transferSize || 0,
|
|
317
|
-
}));
|
|
318
|
-
}
|
|
319
|
-
// Memory (if available)
|
|
320
|
-
if (performance.memory) {
|
|
321
|
-
const memory = performance.memory;
|
|
322
|
-
perfData.memory = {
|
|
323
|
-
usedJSHeapSize: Math.round(memory.usedJSHeapSize / 1024 / 1024) + ' MB',
|
|
324
|
-
totalJSHeapSize: Math.round(memory.totalJSHeapSize / 1024 / 1024) + ' MB',
|
|
325
|
-
jsHeapSizeLimit: Math.round(memory.jsHeapSizeLimit / 1024 / 1024) + ' MB',
|
|
326
|
-
};
|
|
327
|
-
}
|
|
328
|
-
// Performance marks and measures
|
|
329
|
-
if (performance.getEntriesByType) {
|
|
330
|
-
const marks = performance.getEntriesByType('mark');
|
|
331
|
-
const measures = performance.getEntriesByType('measure');
|
|
332
|
-
perfData.timing.marks = marks.length;
|
|
333
|
-
perfData.timing.measures = measures.length;
|
|
334
|
-
}
|
|
335
|
-
return perfData;
|
|
336
|
-
});
|
|
337
|
-
// Calculate summary
|
|
338
|
-
const resourceCount = performanceData.resources.length;
|
|
339
|
-
const totalResourceDuration = performanceData.resources.reduce((sum, r) => sum + r.duration, 0);
|
|
340
|
-
const avgResourceDuration = resourceCount > 0 ? (totalResourceDuration / resourceCount).toFixed(2) : 0;
|
|
341
|
-
return {
|
|
342
|
-
content: [{
|
|
343
|
-
type: 'text',
|
|
344
|
-
text: `✅ Performance Monitor\n\nPage Load Metrics:\n${JSON.stringify(performanceData.navigation, null, 2)}\n\nResources (showing ${resourceCount}):\nAverage Load Time: ${avgResourceDuration}ms\n\nMemory Usage:\n${JSON.stringify(performanceData.memory, null, 2)}\n\nTop Resources:\n${JSON.stringify(performanceData.resources.slice(0, 10), null, 2)}`,
|
|
345
|
-
}],
|
|
346
|
-
};
|
|
347
|
-
}, 'Failed performance monitor');
|
|
348
|
-
}
|
|
349
203
|
/**
|
|
350
204
|
* Get Monitoring Summary - Get overall monitoring summary
|
|
351
205
|
*/
|
|
352
|
-
export async function handleGetMonitoringSummary(args) {
|
|
353
|
-
return await withErrorHandling(async () => {
|
|
354
|
-
const summary = {
|
|
355
|
-
uptime: ((Date.now() - monitoringState.startTime) / 1000 / 60).toFixed(2) + ' minutes',
|
|
356
|
-
operations: {
|
|
357
|
-
total: monitoringState.operations.length,
|
|
358
|
-
completed: monitoringState.operations.filter((op) => op.status === 'completed').length,
|
|
359
|
-
failed: monitoringState.operations.filter((op) => op.status === 'failed').length,
|
|
360
|
-
inProgress: monitoringState.operations.filter((op) => op.status === 'in_progress').length,
|
|
361
|
-
},
|
|
362
|
-
errors: {
|
|
363
|
-
total: monitoringState.errors.length,
|
|
364
|
-
recent: monitoringState.errors.slice(-5),
|
|
365
|
-
},
|
|
366
|
-
metrics: monitoringState.metrics,
|
|
367
|
-
};
|
|
368
|
-
return {
|
|
369
|
-
content: [{
|
|
370
|
-
type: 'text',
|
|
371
|
-
text: `✅ Monitoring Summary\n\n${JSON.stringify(summary, null, 2)}`,
|
|
372
|
-
}],
|
|
373
|
-
};
|
|
374
|
-
}, 'Failed to get monitoring summary');
|
|
375
|
-
}
|
|
@@ -194,61 +194,6 @@ export async function handleAttributeHarvester(args) {
|
|
|
194
194
|
};
|
|
195
195
|
}, 'Failed to harvest attributes');
|
|
196
196
|
}
|
|
197
|
-
/**
|
|
198
|
-
* सभी images URLs, alt text, dimensions के साथ extract करता है
|
|
199
|
-
*/
|
|
200
|
-
export async function handleImageScraper(args) {
|
|
201
|
-
return await withErrorHandling(async () => {
|
|
202
|
-
validateWorkflow('image_scraper', {
|
|
203
|
-
requireBrowser: true,
|
|
204
|
-
requirePage: true,
|
|
205
|
-
});
|
|
206
|
-
const page = getCurrentPage();
|
|
207
|
-
const selector = args.selector || 'img';
|
|
208
|
-
const includeDataUrls = args.includeDataUrls || false;
|
|
209
|
-
const includeDimensions = args.includeDimensions !== false;
|
|
210
|
-
const imageData = await page.evaluate(({ selector, includeDataUrls, includeDimensions }) => {
|
|
211
|
-
const images = document.querySelectorAll(selector);
|
|
212
|
-
const results = [];
|
|
213
|
-
images.forEach((img, index) => {
|
|
214
|
-
const src = img.src || img.getAttribute('data-src') || '';
|
|
215
|
-
// Skip data URLs if not included
|
|
216
|
-
if (!includeDataUrls && src.startsWith('data:')) {
|
|
217
|
-
return;
|
|
218
|
-
}
|
|
219
|
-
const imageInfo = {
|
|
220
|
-
index,
|
|
221
|
-
src,
|
|
222
|
-
alt: img.alt || '',
|
|
223
|
-
title: img.title || '',
|
|
224
|
-
};
|
|
225
|
-
if (includeDimensions) {
|
|
226
|
-
imageInfo.width = img.naturalWidth || img.width || 0;
|
|
227
|
-
imageInfo.height = img.naturalHeight || img.height || 0;
|
|
228
|
-
}
|
|
229
|
-
// Additional attributes
|
|
230
|
-
const srcset = img.srcset || img.getAttribute('data-srcset');
|
|
231
|
-
if (srcset) {
|
|
232
|
-
imageInfo.srcset = srcset;
|
|
233
|
-
}
|
|
234
|
-
const loading = img.loading;
|
|
235
|
-
if (loading) {
|
|
236
|
-
imageInfo.loading = loading;
|
|
237
|
-
}
|
|
238
|
-
results.push(imageInfo);
|
|
239
|
-
});
|
|
240
|
-
return results;
|
|
241
|
-
}, { selector, includeDataUrls, includeDimensions });
|
|
242
|
-
return {
|
|
243
|
-
content: [
|
|
244
|
-
{
|
|
245
|
-
type: 'text',
|
|
246
|
-
text: `✅ Scraped ${imageData.length} images\n\n${JSON.stringify(imageData, null, 2)}`,
|
|
247
|
-
},
|
|
248
|
-
],
|
|
249
|
-
};
|
|
250
|
-
}, 'Failed to scrape images');
|
|
251
|
-
}
|
|
252
197
|
/**
|
|
253
198
|
* Internal/external links classification के साथ collect करता है
|
|
254
199
|
*/
|
|
@@ -441,92 +386,3 @@ export async function handleMediaExtractor(args) {
|
|
|
441
386
|
};
|
|
442
387
|
}, 'Failed to extract media');
|
|
443
388
|
}
|
|
444
|
-
/**
|
|
445
|
-
* Downloadable files (PDF, DOC, etc.) detect करता है
|
|
446
|
-
*/
|
|
447
|
-
export async function handlePDFLinkFinder(args) {
|
|
448
|
-
return await withErrorHandling(async () => {
|
|
449
|
-
validateWorkflow('pdf_link_finder', {
|
|
450
|
-
requireBrowser: true,
|
|
451
|
-
requirePage: true,
|
|
452
|
-
});
|
|
453
|
-
const page = getCurrentPage();
|
|
454
|
-
const selector = args.selector || 'a[href]';
|
|
455
|
-
const includeOtherFiles = args.includeOtherFiles !== false;
|
|
456
|
-
const fileData = await page.evaluate(({ selector, includeOtherFiles }) => {
|
|
457
|
-
const links = document.querySelectorAll(selector);
|
|
458
|
-
const results = {
|
|
459
|
-
pdfs: [],
|
|
460
|
-
documents: [],
|
|
461
|
-
archives: [],
|
|
462
|
-
others: [],
|
|
463
|
-
};
|
|
464
|
-
const fileExtensions = {
|
|
465
|
-
pdf: ['pdf'],
|
|
466
|
-
documents: ['doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx', 'odt', 'ods', 'odp'],
|
|
467
|
-
archives: ['zip', 'rar', '7z', 'tar', 'gz', 'bz2'],
|
|
468
|
-
others: ['txt', 'csv', 'json', 'xml'],
|
|
469
|
-
};
|
|
470
|
-
links.forEach((link, index) => {
|
|
471
|
-
const href = link.href;
|
|
472
|
-
const text = link.textContent?.trim() || '';
|
|
473
|
-
// Check file extension
|
|
474
|
-
const url = href.toLowerCase();
|
|
475
|
-
let fileType = '';
|
|
476
|
-
let category = '';
|
|
477
|
-
for (const [cat, exts] of Object.entries(fileExtensions)) {
|
|
478
|
-
for (const ext of exts) {
|
|
479
|
-
if (url.endsWith(`.${ext}`) || url.includes(`.${ext}?`) || url.includes(`.${ext}#`)) {
|
|
480
|
-
fileType = ext;
|
|
481
|
-
category = cat;
|
|
482
|
-
break;
|
|
483
|
-
}
|
|
484
|
-
}
|
|
485
|
-
if (fileType)
|
|
486
|
-
break;
|
|
487
|
-
}
|
|
488
|
-
if (!fileType && !includeOtherFiles) {
|
|
489
|
-
return;
|
|
490
|
-
}
|
|
491
|
-
const fileInfo = {
|
|
492
|
-
index,
|
|
493
|
-
href,
|
|
494
|
-
text,
|
|
495
|
-
fileType,
|
|
496
|
-
fileName: href.split('/').pop()?.split('?')[0] || '',
|
|
497
|
-
};
|
|
498
|
-
// Get file size if available
|
|
499
|
-
const sizeAttr = link.getAttribute('data-size') || link.getAttribute('title');
|
|
500
|
-
if (sizeAttr) {
|
|
501
|
-
fileInfo.size = sizeAttr;
|
|
502
|
-
}
|
|
503
|
-
// Categorize
|
|
504
|
-
if (category === 'pdf') {
|
|
505
|
-
results.pdfs.push(fileInfo);
|
|
506
|
-
}
|
|
507
|
-
else if (category === 'documents') {
|
|
508
|
-
results.documents.push(fileInfo);
|
|
509
|
-
}
|
|
510
|
-
else if (category === 'archives') {
|
|
511
|
-
results.archives.push(fileInfo);
|
|
512
|
-
}
|
|
513
|
-
else if (includeOtherFiles) {
|
|
514
|
-
results.others.push(fileInfo);
|
|
515
|
-
}
|
|
516
|
-
});
|
|
517
|
-
return results;
|
|
518
|
-
}, { selector, includeOtherFiles });
|
|
519
|
-
const totalFiles = fileData.pdfs.length +
|
|
520
|
-
fileData.documents.length +
|
|
521
|
-
fileData.archives.length +
|
|
522
|
-
fileData.others.length;
|
|
523
|
-
return {
|
|
524
|
-
content: [
|
|
525
|
-
{
|
|
526
|
-
type: 'text',
|
|
527
|
-
text: `✅ Found ${totalFiles} downloadable files\n\n${JSON.stringify(fileData, null, 2)}`,
|
|
528
|
-
},
|
|
529
|
-
],
|
|
530
|
-
};
|
|
531
|
-
}, 'Failed to find PDF links');
|
|
532
|
-
}
|