crawlforge-mcp-server 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +315 -0
- package/LICENSE +21 -0
- package/README.md +181 -0
- package/package.json +115 -0
- package/server.js +1963 -0
- package/setup.js +112 -0
- package/src/constants/config.js +615 -0
- package/src/core/ActionExecutor.js +1104 -0
- package/src/core/AlertNotificationSystem.js +601 -0
- package/src/core/AuthManager.js +315 -0
- package/src/core/ChangeTracker.js +2306 -0
- package/src/core/JobManager.js +687 -0
- package/src/core/LLMsTxtAnalyzer.js +753 -0
- package/src/core/LocalizationManager.js +1615 -0
- package/src/core/PerformanceManager.js +828 -0
- package/src/core/ResearchOrchestrator.js +1327 -0
- package/src/core/SnapshotManager.js +1037 -0
- package/src/core/StealthBrowserManager.js +1795 -0
- package/src/core/WebhookDispatcher.js +745 -0
- package/src/core/analysis/ContentAnalyzer.js +749 -0
- package/src/core/analysis/LinkAnalyzer.js +972 -0
- package/src/core/cache/CacheManager.js +821 -0
- package/src/core/connections/ConnectionPool.js +553 -0
- package/src/core/crawlers/BFSCrawler.js +845 -0
- package/src/core/integrations/PerformanceIntegration.js +377 -0
- package/src/core/llm/AnthropicProvider.js +135 -0
- package/src/core/llm/LLMManager.js +415 -0
- package/src/core/llm/LLMProvider.js +97 -0
- package/src/core/llm/OpenAIProvider.js +127 -0
- package/src/core/processing/BrowserProcessor.js +986 -0
- package/src/core/processing/ContentProcessor.js +505 -0
- package/src/core/processing/PDFProcessor.js +448 -0
- package/src/core/processing/StreamProcessor.js +673 -0
- package/src/core/queue/QueueManager.js +98 -0
- package/src/core/workers/WorkerPool.js +585 -0
- package/src/core/workers/worker.js +743 -0
- package/src/monitoring/healthCheck.js +600 -0
- package/src/monitoring/metrics.js +761 -0
- package/src/optimization/wave3-optimizations.js +932 -0
- package/src/security/security-patches.js +120 -0
- package/src/security/security-tests.js +355 -0
- package/src/security/wave3-security.js +652 -0
- package/src/tools/advanced/BatchScrapeTool.js +1089 -0
- package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
- package/src/tools/crawl/crawlDeep.js +449 -0
- package/src/tools/crawl/mapSite.js +400 -0
- package/src/tools/extract/analyzeContent.js +624 -0
- package/src/tools/extract/extractContent.js +329 -0
- package/src/tools/extract/processDocument.js +503 -0
- package/src/tools/extract/summarizeContent.js +376 -0
- package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
- package/src/tools/research/deepResearch.js +706 -0
- package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
- package/src/tools/search/adapters/googleSearch.js +236 -0
- package/src/tools/search/adapters/searchProviderFactory.js +96 -0
- package/src/tools/search/queryExpander.js +543 -0
- package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
- package/src/tools/search/ranking/ResultRanker.js +497 -0
- package/src/tools/search/searchWeb.js +482 -0
- package/src/tools/tracking/trackChanges.js +1355 -0
- package/src/utils/CircuitBreaker.js +515 -0
- package/src/utils/ErrorHandlingConfig.js +342 -0
- package/src/utils/HumanBehaviorSimulator.js +569 -0
- package/src/utils/Logger.js +568 -0
- package/src/utils/MemoryMonitor.js +173 -0
- package/src/utils/RetryManager.js +386 -0
- package/src/utils/contentUtils.js +588 -0
- package/src/utils/domainFilter.js +612 -0
- package/src/utils/inputValidation.js +766 -0
- package/src/utils/rateLimiter.js +196 -0
- package/src/utils/robotsChecker.js +91 -0
- package/src/utils/securityMiddleware.js +416 -0
- package/src/utils/sitemapParser.js +678 -0
- package/src/utils/ssrfProtection.js +640 -0
- package/src/utils/urlNormalizer.js +168 -0
|
@@ -0,0 +1,986 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BrowserProcessor - JavaScript-rendered content handling using Playwright
|
|
3
|
+
* Handles dynamic content, SPAs, and JavaScript-heavy websites
|
|
4
|
+
* Enhanced with stealth mode capabilities for anti-detection
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { chromium } from 'playwright';
|
|
8
|
+
import { z } from 'zod';
|
|
9
|
+
import StealthBrowserManager from '../StealthBrowserManager.js';
|
|
10
|
+
import HumanBehaviorSimulator from '../../utils/HumanBehaviorSimulator.js';
|
|
11
|
+
import LocalizationManager from '../LocalizationManager.js';
|
|
12
|
+
|
|
13
|
+
const BrowserProcessorSchema = z.object({
|
|
14
|
+
url: z.string().url(),
|
|
15
|
+
options: z.object({
|
|
16
|
+
waitForSelector: z.string().optional(),
|
|
17
|
+
waitForFunction: z.string().optional(),
|
|
18
|
+
waitForTimeout: z.number().min(0).max(60000).default(5000),
|
|
19
|
+
viewportWidth: z.number().min(320).max(1920).default(1280),
|
|
20
|
+
viewportHeight: z.number().min(240).max(1080).default(720),
|
|
21
|
+
userAgent: z.string().optional(),
|
|
22
|
+
enableJavaScript: z.boolean().default(true),
|
|
23
|
+
enableImages: z.boolean().default(false),
|
|
24
|
+
blockResources: z.array(z.string()).default(['font', 'stylesheet']),
|
|
25
|
+
extraHeaders: z.record(z.string()).optional(),
|
|
26
|
+
cookies: z.array(z.object({
|
|
27
|
+
name: z.string(),
|
|
28
|
+
value: z.string(),
|
|
29
|
+
domain: z.string().optional(),
|
|
30
|
+
path: z.string().default('/'),
|
|
31
|
+
expires: z.number().optional(),
|
|
32
|
+
httpOnly: z.boolean().default(false),
|
|
33
|
+
secure: z.boolean().default(false),
|
|
34
|
+
sameSite: z.enum(['Strict', 'Lax', 'None']).default('Lax')
|
|
35
|
+
})).optional(),
|
|
36
|
+
scrollToBottom: z.boolean().default(false),
|
|
37
|
+
executeScript: z.string().optional(),
|
|
38
|
+
captureScreenshot: z.boolean().default(false),
|
|
39
|
+
mobileEmulation: z.boolean().default(false),
|
|
40
|
+
|
|
41
|
+
// Stealth mode options
|
|
42
|
+
stealthMode: z.object({
|
|
43
|
+
enabled: z.boolean().default(false),
|
|
44
|
+
level: z.enum(['basic', 'medium', 'advanced']).default('medium'),
|
|
45
|
+
randomizeFingerprint: z.boolean().default(true),
|
|
46
|
+
simulateHumanBehavior: z.boolean().default(true),
|
|
47
|
+
customUserAgent: z.string().optional(),
|
|
48
|
+
hideWebDriver: z.boolean().default(true),
|
|
49
|
+
blockWebRTC: z.boolean().default(true)
|
|
50
|
+
}).optional(),
|
|
51
|
+
|
|
52
|
+
// Human behavior simulation options
|
|
53
|
+
humanBehavior: z.object({
|
|
54
|
+
enabled: z.boolean().default(false),
|
|
55
|
+
mouseMovements: z.boolean().default(true),
|
|
56
|
+
typingVariation: z.boolean().default(true),
|
|
57
|
+
scrollBehavior: z.boolean().default(true),
|
|
58
|
+
idlePeriods: z.boolean().default(true),
|
|
59
|
+
readingTime: z.boolean().default(true)
|
|
60
|
+
}).optional(),
|
|
61
|
+
|
|
62
|
+
// Localization options
|
|
63
|
+
localization: z.object({
|
|
64
|
+
enabled: z.boolean().default(false),
|
|
65
|
+
countryCode: z.string().length(2).optional(),
|
|
66
|
+
language: z.string().optional(),
|
|
67
|
+
timezone: z.string().optional(),
|
|
68
|
+
customLocation: z.object({
|
|
69
|
+
latitude: z.number().min(-90).max(90),
|
|
70
|
+
longitude: z.number().min(-180).max(180),
|
|
71
|
+
accuracy: z.number().min(1).max(100).optional()
|
|
72
|
+
}).optional(),
|
|
73
|
+
enableTimezoneSpoof: z.boolean().default(true),
|
|
74
|
+
enableGeoLocationSpoof: z.boolean().default(true)
|
|
75
|
+
}).optional()
|
|
76
|
+
}).optional().default({})
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
const BrowserResult = z.object({
|
|
80
|
+
url: z.string(),
|
|
81
|
+
html: z.string(),
|
|
82
|
+
text: z.string(),
|
|
83
|
+
title: z.string(),
|
|
84
|
+
screenshot: z.string().optional(),
|
|
85
|
+
loadTime: z.number(),
|
|
86
|
+
dynamicContent: z.object({
|
|
87
|
+
detectedFrameworks: z.array(z.string()),
|
|
88
|
+
hasLazyLoading: z.boolean(),
|
|
89
|
+
hasDynamicContent: z.boolean(),
|
|
90
|
+
scriptCount: z.number(),
|
|
91
|
+
ajaxRequests: z.array(z.string())
|
|
92
|
+
}),
|
|
93
|
+
metrics: z.object({
|
|
94
|
+
domContentLoaded: z.number(),
|
|
95
|
+
loadComplete: z.number(),
|
|
96
|
+
firstContentfulPaint: z.number().optional(),
|
|
97
|
+
largestContentfulPaint: z.number().optional()
|
|
98
|
+
}),
|
|
99
|
+
processedAt: z.string(),
|
|
100
|
+
success: z.boolean(),
|
|
101
|
+
error: z.string().optional()
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
export class BrowserProcessor {
|
|
105
|
+
constructor() {
|
|
106
|
+
this.browser = null;
|
|
107
|
+
this.stealthManager = null;
|
|
108
|
+
this.humanBehaviorSimulator = null;
|
|
109
|
+
this.localizationManager = null;
|
|
110
|
+
this.activeContexts = new Map();
|
|
111
|
+
|
|
112
|
+
this.defaultOptions = {
|
|
113
|
+
waitForTimeout: 5000,
|
|
114
|
+
viewportWidth: 1280,
|
|
115
|
+
viewportHeight: 720,
|
|
116
|
+
enableJavaScript: true,
|
|
117
|
+
enableImages: false,
|
|
118
|
+
blockResources: ['font', 'stylesheet'],
|
|
119
|
+
scrollToBottom: false,
|
|
120
|
+
captureScreenshot: false,
|
|
121
|
+
mobileEmulation: false,
|
|
122
|
+
stealthMode: {
|
|
123
|
+
enabled: false,
|
|
124
|
+
level: 'medium',
|
|
125
|
+
randomizeFingerprint: true,
|
|
126
|
+
simulateHumanBehavior: true,
|
|
127
|
+
hideWebDriver: true,
|
|
128
|
+
blockWebRTC: true
|
|
129
|
+
},
|
|
130
|
+
humanBehavior: {
|
|
131
|
+
enabled: false,
|
|
132
|
+
mouseMovements: true,
|
|
133
|
+
typingVariation: true,
|
|
134
|
+
scrollBehavior: true,
|
|
135
|
+
idlePeriods: true,
|
|
136
|
+
readingTime: true
|
|
137
|
+
},
|
|
138
|
+
localization: {
|
|
139
|
+
enabled: false,
|
|
140
|
+
enableTimezoneSpoof: true,
|
|
141
|
+
enableGeoLocationSpoof: true
|
|
142
|
+
}
|
|
143
|
+
};
|
|
144
|
+
|
|
145
|
+
// Initialize localization manager
|
|
146
|
+
this.localizationManager = new LocalizationManager();
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Process URL with browser automation
|
|
151
|
+
* @param {Object} params - Processing parameters
|
|
152
|
+
* @param {string} params.url - URL to process
|
|
153
|
+
* @param {Object} params.options - Browser options
|
|
154
|
+
* @returns {Promise<Object>} - Processing result with rendered content
|
|
155
|
+
*/
|
|
156
|
+
async processURL(params) {
|
|
157
|
+
const startTime = Date.now();
|
|
158
|
+
|
|
159
|
+
try {
|
|
160
|
+
const validated = BrowserProcessorSchema.parse(params);
|
|
161
|
+
const { url, options } = validated;
|
|
162
|
+
const processingOptions = { ...this.defaultOptions, ...options };
|
|
163
|
+
|
|
164
|
+
const result = {
|
|
165
|
+
url,
|
|
166
|
+
processedAt: new Date().toISOString(),
|
|
167
|
+
success: false,
|
|
168
|
+
loadTime: 0
|
|
169
|
+
};
|
|
170
|
+
|
|
171
|
+
// Initialize browser and page (with stealth if enabled)
|
|
172
|
+
const page = await this.initializePage(processingOptions);
|
|
173
|
+
|
|
174
|
+
try {
|
|
175
|
+
// Navigate and wait for content
|
|
176
|
+
const navigationResult = await this.navigateAndWait(page, url, processingOptions);
|
|
177
|
+
|
|
178
|
+
// Extract content and metadata
|
|
179
|
+
const contentResult = await this.extractContent(page, processingOptions);
|
|
180
|
+
|
|
181
|
+
// Analyze dynamic content
|
|
182
|
+
const dynamicAnalysis = await this.analyzeDynamicContent(page);
|
|
183
|
+
|
|
184
|
+
// Get performance metrics
|
|
185
|
+
const metrics = await this.getPerformanceMetrics(page);
|
|
186
|
+
|
|
187
|
+
// Capture screenshot if requested
|
|
188
|
+
let screenshot = null;
|
|
189
|
+
if (processingOptions.captureScreenshot) {
|
|
190
|
+
screenshot = await this.captureScreenshot(page);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Combine results
|
|
194
|
+
Object.assign(result, {
|
|
195
|
+
...contentResult,
|
|
196
|
+
screenshot,
|
|
197
|
+
dynamicContent: dynamicAnalysis,
|
|
198
|
+
metrics,
|
|
199
|
+
loadTime: Date.now() - startTime,
|
|
200
|
+
success: true
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
} finally {
|
|
204
|
+
// Always close the page
|
|
205
|
+
await page.close();
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return result;
|
|
209
|
+
|
|
210
|
+
} catch (error) {
|
|
211
|
+
return {
|
|
212
|
+
url: params.url || 'unknown',
|
|
213
|
+
processedAt: new Date().toISOString(),
|
|
214
|
+
success: false,
|
|
215
|
+
error: `Browser processing failed: ${error.message}`,
|
|
216
|
+
loadTime: Date.now() - startTime,
|
|
217
|
+
html: '',
|
|
218
|
+
text: '',
|
|
219
|
+
title: '',
|
|
220
|
+
dynamicContent: {
|
|
221
|
+
detectedFrameworks: [],
|
|
222
|
+
hasLazyLoading: false,
|
|
223
|
+
hasDynamicContent: false,
|
|
224
|
+
scriptCount: 0,
|
|
225
|
+
ajaxRequests: []
|
|
226
|
+
},
|
|
227
|
+
metrics: {
|
|
228
|
+
domContentLoaded: 0,
|
|
229
|
+
loadComplete: Date.now() - startTime
|
|
230
|
+
}
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Initialize browser instance
|
|
237
|
+
* @returns {Promise<void>}
|
|
238
|
+
*/
|
|
239
|
+
async initBrowser() {
|
|
240
|
+
if (!this.browser) {
|
|
241
|
+
this.browser = await chromium.launch({
|
|
242
|
+
headless: true,
|
|
243
|
+
args: [
|
|
244
|
+
'--no-sandbox',
|
|
245
|
+
'--disable-dev-shm-usage',
|
|
246
|
+
'--disable-gpu',
|
|
247
|
+
'--disable-web-security',
|
|
248
|
+
'--disable-background-timer-throttling',
|
|
249
|
+
'--disable-backgrounding-occluded-windows',
|
|
250
|
+
'--disable-renderer-backgrounding'
|
|
251
|
+
]
|
|
252
|
+
});
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Initialize page with stealth capabilities if enabled
|
|
258
|
+
* @param {Object} options - Processing options
|
|
259
|
+
* @returns {Promise<Page>} - Playwright page
|
|
260
|
+
*/
|
|
261
|
+
async initializePage(options) {
|
|
262
|
+
// Apply localization if enabled
|
|
263
|
+
let processedOptions = options;
|
|
264
|
+
if (options.localization?.enabled) {
|
|
265
|
+
processedOptions = await this.applyLocalization(options);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Check if stealth mode is enabled
|
|
269
|
+
if (processedOptions.stealthMode && processedOptions.stealthMode.enabled) {
|
|
270
|
+
return await this.createStealthPage(processedOptions);
|
|
271
|
+
} else {
|
|
272
|
+
// Standard browser initialization
|
|
273
|
+
await this.initBrowser();
|
|
274
|
+
return await this.createPage(processedOptions);
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Create stealth page with anti-detection measures
|
|
280
|
+
* @param {Object} options - Processing options
|
|
281
|
+
* @returns {Promise<Page>} - Stealth-enabled page
|
|
282
|
+
*/
|
|
283
|
+
async createStealthPage(options) {
|
|
284
|
+
// Initialize stealth manager if needed
|
|
285
|
+
if (!this.stealthManager) {
|
|
286
|
+
this.stealthManager = new StealthBrowserManager();
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// Initialize human behavior simulator if needed
|
|
290
|
+
if (!this.humanBehaviorSimulator && options.humanBehavior?.enabled) {
|
|
291
|
+
this.humanBehaviorSimulator = new HumanBehaviorSimulator({
|
|
292
|
+
mouseMovements: {
|
|
293
|
+
enabled: options.humanBehavior.mouseMovements,
|
|
294
|
+
speed: 'normal',
|
|
295
|
+
accuracy: 0.8,
|
|
296
|
+
naturalCurves: true
|
|
297
|
+
},
|
|
298
|
+
typing: {
|
|
299
|
+
enabled: options.humanBehavior.typingVariation,
|
|
300
|
+
speed: 'normal',
|
|
301
|
+
variability: 0.3,
|
|
302
|
+
mistakes: {
|
|
303
|
+
enabled: true,
|
|
304
|
+
frequency: 0.02
|
|
305
|
+
}
|
|
306
|
+
},
|
|
307
|
+
scrolling: {
|
|
308
|
+
enabled: options.humanBehavior.scrollBehavior,
|
|
309
|
+
naturalAcceleration: true,
|
|
310
|
+
randomPauses: true
|
|
311
|
+
},
|
|
312
|
+
interactions: {
|
|
313
|
+
hoverBeforeClick: true,
|
|
314
|
+
focusBlurSimulation: true,
|
|
315
|
+
idlePeriods: {
|
|
316
|
+
enabled: options.humanBehavior.idlePeriods,
|
|
317
|
+
frequency: 0.1
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
});
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// Launch stealth browser
|
|
324
|
+
await this.stealthManager.launchStealthBrowser({
|
|
325
|
+
level: options.stealthMode.level,
|
|
326
|
+
randomizeFingerprint: options.stealthMode.randomizeFingerprint,
|
|
327
|
+
hideWebDriver: options.stealthMode.hideWebDriver,
|
|
328
|
+
blockWebRTC: options.stealthMode.blockWebRTC,
|
|
329
|
+
customUserAgent: options.stealthMode.customUserAgent || options.userAgent
|
|
330
|
+
});
|
|
331
|
+
|
|
332
|
+
// Create stealth context
|
|
333
|
+
const { context, contextId } = await this.stealthManager.createStealthContext({
|
|
334
|
+
level: options.stealthMode.level,
|
|
335
|
+
customViewport: {
|
|
336
|
+
width: options.viewportWidth,
|
|
337
|
+
height: options.viewportHeight
|
|
338
|
+
}
|
|
339
|
+
});
|
|
340
|
+
|
|
341
|
+
// Create stealth page
|
|
342
|
+
const page = await this.stealthManager.createStealthPage(contextId);
|
|
343
|
+
|
|
344
|
+
// Store context for cleanup
|
|
345
|
+
this.activeContexts.set(contextId, { context, page });
|
|
346
|
+
|
|
347
|
+
// Apply additional stealth configurations
|
|
348
|
+
await this.applyStealthMiddleware(page, options);
|
|
349
|
+
|
|
350
|
+
return page;
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
/**
|
|
354
|
+
* Apply additional stealth middleware to page
|
|
355
|
+
* @param {Page} page - Playwright page
|
|
356
|
+
* @param {Object} options - Processing options
|
|
357
|
+
* @returns {Promise<void>}
|
|
358
|
+
*/
|
|
359
|
+
async applyStealthMiddleware(page, options) {
|
|
360
|
+
// Set cookies if provided
|
|
361
|
+
if (options.cookies && options.cookies.length > 0) {
|
|
362
|
+
await page.context().addCookies(options.cookies);
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// Block unnecessary resources with stealth considerations
|
|
366
|
+
if (options.blockResources && options.blockResources.length > 0) {
|
|
367
|
+
await page.route('**/*', (route) => {
|
|
368
|
+
const resourceType = route.request().resourceType();
|
|
369
|
+
const url = route.request().url();
|
|
370
|
+
|
|
371
|
+
// Don't block detection-related resources
|
|
372
|
+
if (url.includes('webdriver') || url.includes('selenium') || url.includes('puppeteer')) {
|
|
373
|
+
route.abort();
|
|
374
|
+
return;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
if (options.blockResources.includes(resourceType)) {
|
|
378
|
+
route.abort();
|
|
379
|
+
} else {
|
|
380
|
+
route.continue();
|
|
381
|
+
}
|
|
382
|
+
});
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// Disable images if requested (with stealth considerations)
|
|
386
|
+
if (!options.enableImages) {
|
|
387
|
+
await page.route('**/*.{jpg,jpeg,png,gif,webp,svg}', (route) => {
|
|
388
|
+
// Allow favicon and small images that might be used for tracking
|
|
389
|
+
const url = route.request().url();
|
|
390
|
+
if (url.includes('favicon') || url.includes('tracking') || url.includes('analytics')) {
|
|
391
|
+
route.continue();
|
|
392
|
+
} else {
|
|
393
|
+
route.abort();
|
|
394
|
+
}
|
|
395
|
+
});
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
// Add extra stealth protections
|
|
399
|
+
await page.addInitScript(() => {
|
|
400
|
+
// Additional webdriver detection removal
|
|
401
|
+
delete window.navigator.__proto__.webdriver;
|
|
402
|
+
|
|
403
|
+
// Override chrome runtime
|
|
404
|
+
window.chrome = {
|
|
405
|
+
runtime: {
|
|
406
|
+
onConnect: undefined,
|
|
407
|
+
onMessage: undefined
|
|
408
|
+
}
|
|
409
|
+
};
|
|
410
|
+
|
|
411
|
+
// Mock notification permission
|
|
412
|
+
Object.defineProperty(Notification, 'permission', {
|
|
413
|
+
get: () => 'granted'
|
|
414
|
+
});
|
|
415
|
+
|
|
416
|
+
// Hide headless indicators
|
|
417
|
+
Object.defineProperty(navigator, 'hardwareConcurrency', {
|
|
418
|
+
get: () => 4
|
|
419
|
+
});
|
|
420
|
+
});
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
/**
|
|
424
|
+
* Apply localization settings to browser options
|
|
425
|
+
* @param {Object} options - Original options
|
|
426
|
+
* @returns {Object} - Localized options
|
|
427
|
+
*/
|
|
428
|
+
async applyLocalization(options) {
|
|
429
|
+
const { localization } = options;
|
|
430
|
+
|
|
431
|
+
try {
|
|
432
|
+
// Get localization configuration
|
|
433
|
+
const localizationConfig = await this.localizationManager.localizeBrowserContext(
|
|
434
|
+
options,
|
|
435
|
+
localization.countryCode
|
|
436
|
+
);
|
|
437
|
+
|
|
438
|
+
// Merge localized settings
|
|
439
|
+
const localizedOptions = {
|
|
440
|
+
...options,
|
|
441
|
+
...localizationConfig,
|
|
442
|
+
|
|
443
|
+
// Override specific browser settings
|
|
444
|
+
locale: localizationConfig.locale,
|
|
445
|
+
timezoneId: localizationConfig.timezoneId,
|
|
446
|
+
geolocation: localization.customLocation || localizationConfig.geolocation,
|
|
447
|
+
extraHeaders: {
|
|
448
|
+
...options.extraHeaders,
|
|
449
|
+
...localizationConfig.extraHTTPHeaders
|
|
450
|
+
},
|
|
451
|
+
userAgent: localizationConfig.userAgent || options.userAgent
|
|
452
|
+
};
|
|
453
|
+
|
|
454
|
+
// Add timezone spoofing script if enabled
|
|
455
|
+
if (localization.enableTimezoneSpoof) {
|
|
456
|
+
const timezoneScript = await this.localizationManager.generateTimezoneSpoof(
|
|
457
|
+
localization.countryCode
|
|
458
|
+
);
|
|
459
|
+
localizedOptions.timezoneSpoof = timezoneScript;
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
return localizedOptions;
|
|
463
|
+
|
|
464
|
+
} catch (error) {
|
|
465
|
+
console.warn('Failed to apply localization, using default options:', error.message);
|
|
466
|
+
return options;
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
/**
|
|
471
|
+
* Create new page with specified options
|
|
472
|
+
* @param {Object} options - Page options
|
|
473
|
+
* @returns {Promise<Page>} - Playwright page
|
|
474
|
+
*/
|
|
475
|
+
async createPage(options) {
|
|
476
|
+
const contextOptions = {
|
|
477
|
+
viewport: {
|
|
478
|
+
width: options.viewportWidth,
|
|
479
|
+
height: options.viewportHeight
|
|
480
|
+
},
|
|
481
|
+
userAgent: options.userAgent,
|
|
482
|
+
extraHTTPHeaders: options.extraHeaders,
|
|
483
|
+
deviceScaleFactor: options.mobileEmulation ? 2 : 1,
|
|
484
|
+
isMobile: options.mobileEmulation,
|
|
485
|
+
hasTouch: options.mobileEmulation
|
|
486
|
+
};
|
|
487
|
+
|
|
488
|
+
// Add localization-specific context options
|
|
489
|
+
if (options.locale) {
|
|
490
|
+
contextOptions.locale = options.locale;
|
|
491
|
+
}
|
|
492
|
+
if (options.timezoneId) {
|
|
493
|
+
contextOptions.timezoneId = options.timezoneId;
|
|
494
|
+
}
|
|
495
|
+
if (options.geolocation) {
|
|
496
|
+
contextOptions.geolocation = options.geolocation;
|
|
497
|
+
}
|
|
498
|
+
if (options.proxy) {
|
|
499
|
+
contextOptions.proxy = options.proxy;
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
const context = await this.browser.newContext(contextOptions);
|
|
503
|
+
const page = await context.newPage();
|
|
504
|
+
|
|
505
|
+
// Inject timezone spoofing script if provided
|
|
506
|
+
if (options.timezoneSpoof) {
|
|
507
|
+
await page.addInitScript(options.timezoneSpoof);
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
// Set cookies if provided
|
|
511
|
+
if (options.cookies && options.cookies.length > 0) {
|
|
512
|
+
await context.addCookies(options.cookies);
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
// Block unnecessary resources
|
|
516
|
+
if (options.blockResources && options.blockResources.length > 0) {
|
|
517
|
+
await page.route('**/*', (route) => {
|
|
518
|
+
const resourceType = route.request().resourceType();
|
|
519
|
+
if (options.blockResources.includes(resourceType)) {
|
|
520
|
+
route.abort();
|
|
521
|
+
} else {
|
|
522
|
+
route.continue();
|
|
523
|
+
}
|
|
524
|
+
});
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
// Disable images if requested
|
|
528
|
+
if (!options.enableImages) {
|
|
529
|
+
await page.route('**/*.{jpg,jpeg,png,gif,webp,svg}', (route) => {
|
|
530
|
+
route.abort();
|
|
531
|
+
});
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
// Disable JavaScript if requested
|
|
535
|
+
if (!options.enableJavaScript) {
|
|
536
|
+
await context.setExtraHTTPHeaders({
|
|
537
|
+
'Content-Security-Policy': 'script-src \'none\''
|
|
538
|
+
});
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
return page;
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
/**
|
|
545
|
+
* Navigate to URL and wait for content to load
|
|
546
|
+
* @param {Page} page - Playwright page
|
|
547
|
+
* @param {string} url - URL to navigate to
|
|
548
|
+
* @param {Object} options - Navigation options
|
|
549
|
+
* @returns {Promise<Object>} - Navigation result
|
|
550
|
+
*/
|
|
551
|
+
async navigateAndWait(page, url, options) {
|
|
552
|
+
const startTime = Date.now();
|
|
553
|
+
|
|
554
|
+
// Navigate to URL
|
|
555
|
+
await page.goto(url, {
|
|
556
|
+
waitUntil: 'domcontentloaded',
|
|
557
|
+
timeout: 30000
|
|
558
|
+
});
|
|
559
|
+
|
|
560
|
+
// Wait for specific selector if provided
|
|
561
|
+
if (options.waitForSelector) {
|
|
562
|
+
try {
|
|
563
|
+
await page.waitForSelector(options.waitForSelector, {
|
|
564
|
+
timeout: options.waitForTimeout
|
|
565
|
+
});
|
|
566
|
+
} catch (error) {
|
|
567
|
+
console.warn(`Selector "${options.waitForSelector}" not found within timeout`);
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
// Wait for custom function if provided
|
|
572
|
+
if (options.waitForFunction) {
|
|
573
|
+
try {
|
|
574
|
+
await page.waitForFunction(options.waitForFunction, {
|
|
575
|
+
timeout: options.waitForTimeout
|
|
576
|
+
});
|
|
577
|
+
} catch (error) {
|
|
578
|
+
console.warn(`Wait function failed: ${error.message}`);
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
// General timeout wait
|
|
583
|
+
await page.waitForTimeout(Math.min(options.waitForTimeout, 10000));
|
|
584
|
+
|
|
585
|
+
// Scroll to bottom if requested (for lazy loading)
|
|
586
|
+
if (options.scrollToBottom) {
|
|
587
|
+
await this.scrollToBottom(page, options);
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
// Execute custom script if provided
|
|
591
|
+
if (options.executeScript) {
|
|
592
|
+
try {
|
|
593
|
+
await page.evaluate(options.executeScript);
|
|
594
|
+
} catch (error) {
|
|
595
|
+
console.warn(`Custom script execution failed: ${error.message}`);
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
return {
|
|
600
|
+
navigationTime: Date.now() - startTime
|
|
601
|
+
};
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
/**
|
|
605
|
+
* Extract content from page
|
|
606
|
+
* @param {Page} page - Playwright page
|
|
607
|
+
* @param {Object} options - Extraction options
|
|
608
|
+
* @returns {Promise<Object>} - Extracted content
|
|
609
|
+
*/
|
|
610
|
+
async extractContent(page, options) {
|
|
611
|
+
// Get HTML content
|
|
612
|
+
const html = await page.content();
|
|
613
|
+
|
|
614
|
+
// Get text content
|
|
615
|
+
const text = await page.evaluate(() => {
|
|
616
|
+
// Remove script and style elements
|
|
617
|
+
const scripts = document.querySelectorAll('script, style, noscript');
|
|
618
|
+
scripts.forEach(el => el.remove());
|
|
619
|
+
|
|
620
|
+
return document.body ? document.body.innerText : '';
|
|
621
|
+
});
|
|
622
|
+
|
|
623
|
+
// Get page title
|
|
624
|
+
const title = await page.title();
|
|
625
|
+
|
|
626
|
+
return {
|
|
627
|
+
html,
|
|
628
|
+
text: text.trim(),
|
|
629
|
+
title
|
|
630
|
+
};
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
/**
|
|
634
|
+
* Analyze dynamic content characteristics
|
|
635
|
+
* @param {Page} page - Playwright page
|
|
636
|
+
* @returns {Promise<Object>} - Dynamic content analysis
|
|
637
|
+
*/
|
|
638
|
+
async analyzeDynamicContent(page) {
|
|
639
|
+
return await page.evaluate(() => {
|
|
640
|
+
const analysis = {
|
|
641
|
+
detectedFrameworks: [],
|
|
642
|
+
hasLazyLoading: false,
|
|
643
|
+
hasDynamicContent: false,
|
|
644
|
+
scriptCount: 0,
|
|
645
|
+
ajaxRequests: []
|
|
646
|
+
};
|
|
647
|
+
|
|
648
|
+
// Count scripts
|
|
649
|
+
analysis.scriptCount = document.querySelectorAll('script').length;
|
|
650
|
+
|
|
651
|
+
// Detect frameworks
|
|
652
|
+
if (window.React || document.querySelector('[data-reactroot]')) {
|
|
653
|
+
analysis.detectedFrameworks.push('React');
|
|
654
|
+
}
|
|
655
|
+
if (window.Vue || document.querySelector('[data-v-]')) {
|
|
656
|
+
analysis.detectedFrameworks.push('Vue.js');
|
|
657
|
+
}
|
|
658
|
+
if (window.angular || document.querySelector('[ng-app], [data-ng-app]')) {
|
|
659
|
+
analysis.detectedFrameworks.push('Angular');
|
|
660
|
+
}
|
|
661
|
+
if (window.jQuery || window.$) {
|
|
662
|
+
analysis.detectedFrameworks.push('jQuery');
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
// Check for lazy loading
|
|
666
|
+
const lazyImages = document.querySelectorAll('[loading="lazy"], [data-src], .lazy');
|
|
667
|
+
analysis.hasLazyLoading = lazyImages.length > 0;
|
|
668
|
+
|
|
669
|
+
// Check for dynamic content indicators
|
|
670
|
+
const dynamicIndicators = document.querySelectorAll(
|
|
671
|
+
'[data-bind], [v-if], [v-for], [ng-if], [ng-repeat], [*ngFor], [*ngIf]'
|
|
672
|
+
);
|
|
673
|
+
analysis.hasDynamicContent = dynamicIndicators.length > 0 || analysis.detectedFrameworks.length > 0;
|
|
674
|
+
|
|
675
|
+
return analysis;
|
|
676
|
+
});
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
/**
|
|
680
|
+
* Get performance metrics
|
|
681
|
+
* @param {Page} page - Playwright page
|
|
682
|
+
* @returns {Promise<Object>} - Performance metrics
|
|
683
|
+
*/
|
|
684
|
+
async getPerformanceMetrics(page) {
|
|
685
|
+
return await page.evaluate(() => {
|
|
686
|
+
const metrics = {
|
|
687
|
+
domContentLoaded: 0,
|
|
688
|
+
loadComplete: 0
|
|
689
|
+
};
|
|
690
|
+
|
|
691
|
+
if (window.performance && window.performance.timing) {
|
|
692
|
+
const timing = window.performance.timing;
|
|
693
|
+
metrics.domContentLoaded = timing.domContentLoadedEventEnd - timing.navigationStart;
|
|
694
|
+
metrics.loadComplete = timing.loadEventEnd - timing.navigationStart;
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
// Try to get Paint Timing metrics
|
|
698
|
+
if (window.performance && window.performance.getEntriesByType) {
|
|
699
|
+
const paintEntries = window.performance.getEntriesByType('paint');
|
|
700
|
+
paintEntries.forEach(entry => {
|
|
701
|
+
if (entry.name === 'first-contentful-paint') {
|
|
702
|
+
metrics.firstContentfulPaint = entry.startTime;
|
|
703
|
+
}
|
|
704
|
+
});
|
|
705
|
+
|
|
706
|
+
const navigationEntries = window.performance.getEntriesByType('largest-contentful-paint');
|
|
707
|
+
if (navigationEntries.length > 0) {
|
|
708
|
+
metrics.largestContentfulPaint = navigationEntries[navigationEntries.length - 1].startTime;
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
return metrics;
|
|
713
|
+
});
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
/**
|
|
717
|
+
* Capture screenshot
|
|
718
|
+
* @param {Page} page - Playwright page
|
|
719
|
+
* @returns {Promise<string>} - Base64 encoded screenshot
|
|
720
|
+
*/
|
|
721
|
+
async captureScreenshot(page) {
|
|
722
|
+
try {
|
|
723
|
+
const screenshot = await page.screenshot({
|
|
724
|
+
type: 'png',
|
|
725
|
+
fullPage: false,
|
|
726
|
+
encoding: 'base64'
|
|
727
|
+
});
|
|
728
|
+
return screenshot;
|
|
729
|
+
} catch (error) {
|
|
730
|
+
console.warn(`Screenshot capture failed: ${error.message}`);
|
|
731
|
+
return null;
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
/**
|
|
736
|
+
* Scroll to bottom of page to trigger lazy loading
|
|
737
|
+
* @param {Page} page - Playwright page
|
|
738
|
+
* @returns {Promise<void>}
|
|
739
|
+
*/
|
|
740
|
+
async scrollToBottom(page, options = {}) {
|
|
741
|
+
// Use human behavior simulation if available
|
|
742
|
+
if (this.humanBehaviorSimulator && options.humanBehavior?.enabled) {
|
|
743
|
+
const scrollHeight = await page.evaluate(() => document.body.scrollHeight);
|
|
744
|
+
const viewportHeight = await page.evaluate(() => window.innerHeight);
|
|
745
|
+
const totalDistance = scrollHeight - viewportHeight;
|
|
746
|
+
|
|
747
|
+
if (totalDistance > 0) {
|
|
748
|
+
await this.humanBehaviorSimulator.simulateScroll(page, {
|
|
749
|
+
direction: 'down',
|
|
750
|
+
distance: totalDistance,
|
|
751
|
+
duration: 2000 + Math.random() * 3000 // 2-5 seconds
|
|
752
|
+
});
|
|
753
|
+
}
|
|
754
|
+
} else {
|
|
755
|
+
// Standard scroll behavior
|
|
756
|
+
await page.evaluate(async () => {
|
|
757
|
+
await new Promise(resolve => {
|
|
758
|
+
let totalHeight = 0;
|
|
759
|
+
const distance = 100;
|
|
760
|
+
const timer = setInterval(() => {
|
|
761
|
+
const scrollHeight = document.body.scrollHeight;
|
|
762
|
+
window.scrollBy(0, distance);
|
|
763
|
+
totalHeight += distance;
|
|
764
|
+
|
|
765
|
+
if (totalHeight >= scrollHeight) {
|
|
766
|
+
clearInterval(timer);
|
|
767
|
+
resolve();
|
|
768
|
+
}
|
|
769
|
+
}, 100);
|
|
770
|
+
});
|
|
771
|
+
});
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
// Wait a bit for any lazy content to load
|
|
775
|
+
await page.waitForTimeout(2000);
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
/**
|
|
779
|
+
* Process multiple URLs concurrently
|
|
780
|
+
* @param {Array} urls - Array of URLs to process
|
|
781
|
+
* @param {Object} options - Processing options
|
|
782
|
+
* @returns {Promise<Array>} - Array of processing results
|
|
783
|
+
*/
|
|
784
|
+
async processMultipleURLs(urls, options = {}) {
|
|
785
|
+
const concurrency = options.concurrency || 3;
|
|
786
|
+
const results = [];
|
|
787
|
+
|
|
788
|
+
// Initialize browser once for all requests
|
|
789
|
+
await this.initBrowser();
|
|
790
|
+
|
|
791
|
+
try {
|
|
792
|
+
// Process in batches
|
|
793
|
+
for (let i = 0; i < urls.length; i += concurrency) {
|
|
794
|
+
const batch = urls.slice(i, i + concurrency);
|
|
795
|
+
const batchPromises = batch.map(url => {
|
|
796
|
+
const params = typeof url === 'string'
|
|
797
|
+
? { url, options }
|
|
798
|
+
: { ...url, options: { ...options, ...url.options } };
|
|
799
|
+
|
|
800
|
+
return this.processURL(params).catch(error => ({
|
|
801
|
+
url: params.url,
|
|
802
|
+
success: false,
|
|
803
|
+
error: error.message,
|
|
804
|
+
processedAt: new Date().toISOString(),
|
|
805
|
+
loadTime: 0,
|
|
806
|
+
html: '',
|
|
807
|
+
text: '',
|
|
808
|
+
title: '',
|
|
809
|
+
dynamicContent: {
|
|
810
|
+
detectedFrameworks: [],
|
|
811
|
+
hasLazyLoading: false,
|
|
812
|
+
hasDynamicContent: false,
|
|
813
|
+
scriptCount: 0,
|
|
814
|
+
ajaxRequests: []
|
|
815
|
+
},
|
|
816
|
+
metrics: {
|
|
817
|
+
domContentLoaded: 0,
|
|
818
|
+
loadComplete: 0
|
|
819
|
+
}
|
|
820
|
+
}));
|
|
821
|
+
});
|
|
822
|
+
|
|
823
|
+
const batchResults = await Promise.all(batchPromises);
|
|
824
|
+
results.push(...batchResults);
|
|
825
|
+
}
|
|
826
|
+
} finally {
|
|
827
|
+
// Clean up browser
|
|
828
|
+
await this.cleanup();
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
return results;
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
/**
|
|
835
|
+
* Clean up browser resources
|
|
836
|
+
* @returns {Promise<void>}
|
|
837
|
+
*/
|
|
838
|
+
async cleanup() {
|
|
839
|
+
// Clean up stealth contexts first
|
|
840
|
+
for (const [contextId, contextData] of this.activeContexts.entries()) {
|
|
841
|
+
try {
|
|
842
|
+
await contextData.page.close();
|
|
843
|
+
await contextData.context.close();
|
|
844
|
+
} catch (error) {
|
|
845
|
+
console.warn(`Failed to close stealth context ${contextId}:`, error.message);
|
|
846
|
+
}
|
|
847
|
+
}
|
|
848
|
+
this.activeContexts.clear();
|
|
849
|
+
|
|
850
|
+
// Clean up stealth manager
|
|
851
|
+
if (this.stealthManager) {
|
|
852
|
+
await this.stealthManager.cleanup();
|
|
853
|
+
this.stealthManager = null;
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
// Clean up regular browser
|
|
857
|
+
if (this.browser) {
|
|
858
|
+
await this.browser.close();
|
|
859
|
+
this.browser = null;
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
// Reset human behavior simulator
|
|
863
|
+
if (this.humanBehaviorSimulator) {
|
|
864
|
+
this.humanBehaviorSimulator.resetStats();
|
|
865
|
+
this.humanBehaviorSimulator = null;
|
|
866
|
+
}
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
/**
|
|
870
|
+
* Check if URL likely requires JavaScript rendering
|
|
871
|
+
* @param {string} url - URL to check
|
|
872
|
+
* @param {string} html - Optional HTML content for analysis
|
|
873
|
+
* @returns {Promise<Object>} - Analysis result
|
|
874
|
+
*/
|
|
875
|
+
async requiresJavaScript(url, html = null) {
|
|
876
|
+
const analysis = {
|
|
877
|
+
likely: false,
|
|
878
|
+
confidence: 0,
|
|
879
|
+
indicators: []
|
|
880
|
+
};
|
|
881
|
+
|
|
882
|
+
// URL-based indicators
|
|
883
|
+
const urlIndicators = [
|
|
884
|
+
{ pattern: /\.(js|jsx|ts|tsx)$/, weight: 0.9, name: 'JavaScript file extension' },
|
|
885
|
+
{ pattern: /#/, weight: 0.3, name: 'Hash-based routing' },
|
|
886
|
+
{ pattern: /\/(app|spa|dashboard|admin)/, weight: 0.4, name: 'SPA-like path' }
|
|
887
|
+
];
|
|
888
|
+
|
|
889
|
+
urlIndicators.forEach(indicator => {
|
|
890
|
+
if (indicator.pattern.test(url)) {
|
|
891
|
+
analysis.confidence += indicator.weight;
|
|
892
|
+
analysis.indicators.push(indicator.name);
|
|
893
|
+
}
|
|
894
|
+
});
|
|
895
|
+
|
|
896
|
+
// HTML-based indicators (if provided)
|
|
897
|
+
if (html) {
|
|
898
|
+
const htmlIndicators = [
|
|
899
|
+
{ pattern: /data-reactroot|ReactDOM\.render/i, weight: 0.8, name: 'React framework' },
|
|
900
|
+
{ pattern: /ng-app|angular\.module/i, weight: 0.8, name: 'Angular framework' },
|
|
901
|
+
{ pattern: /v-if|v-for|new Vue/i, weight: 0.8, name: 'Vue.js framework' },
|
|
902
|
+
{ pattern: /<script[^>]*src.*\.js/gi, weight: 0.1, name: 'External JavaScript' },
|
|
903
|
+
{ pattern: /data-bind|knockout/i, weight: 0.6, name: 'Knockout.js' },
|
|
904
|
+
{ pattern: /ember-application|Ember\.Application/i, weight: 0.7, name: 'Ember.js' }
|
|
905
|
+
];
|
|
906
|
+
|
|
907
|
+
htmlIndicators.forEach(indicator => {
|
|
908
|
+
const matches = html.match(indicator.pattern);
|
|
909
|
+
if (matches) {
|
|
910
|
+
const weight = indicator.weight * Math.min(matches.length, 3);
|
|
911
|
+
analysis.confidence += weight;
|
|
912
|
+
analysis.indicators.push(`${indicator.name} (${matches.length} matches)`);
|
|
913
|
+
}
|
|
914
|
+
});
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
analysis.likely = analysis.confidence > 0.5;
|
|
918
|
+
analysis.confidence = Math.min(1, analysis.confidence);
|
|
919
|
+
|
|
920
|
+
return analysis;
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
/**
|
|
924
|
+
* Get stealth mode statistics
|
|
925
|
+
* @returns {Object} Stealth statistics
|
|
926
|
+
*/
|
|
927
|
+
getStealthStats() {
|
|
928
|
+
const stats = {
|
|
929
|
+
stealthManagerActive: !!this.stealthManager,
|
|
930
|
+
humanBehaviorActive: !!this.humanBehaviorSimulator,
|
|
931
|
+
activeContexts: this.activeContexts.size,
|
|
932
|
+
stealthStats: null,
|
|
933
|
+
behaviorStats: null
|
|
934
|
+
};
|
|
935
|
+
|
|
936
|
+
if (this.stealthManager) {
|
|
937
|
+
stats.stealthStats = this.stealthManager.getStats();
|
|
938
|
+
}
|
|
939
|
+
|
|
940
|
+
if (this.humanBehaviorSimulator) {
|
|
941
|
+
stats.behaviorStats = this.humanBehaviorSimulator.getStats();
|
|
942
|
+
}
|
|
943
|
+
|
|
944
|
+
return stats;
|
|
945
|
+
}
|
|
946
|
+
|
|
947
|
+
/**
|
|
948
|
+
* Update stealth configuration
|
|
949
|
+
* @param {Object} stealthConfig - New stealth configuration
|
|
950
|
+
* @returns {void}
|
|
951
|
+
*/
|
|
952
|
+
updateStealthConfig(stealthConfig) {
|
|
953
|
+
// Update default options
|
|
954
|
+
this.defaultOptions.stealthMode = {
|
|
955
|
+
...this.defaultOptions.stealthMode,
|
|
956
|
+
...stealthConfig
|
|
957
|
+
};
|
|
958
|
+
|
|
959
|
+
// If human behavior simulator exists, update its config
|
|
960
|
+
if (this.humanBehaviorSimulator && stealthConfig.humanBehavior) {
|
|
961
|
+
this.humanBehaviorSimulator.updateConfig(stealthConfig.humanBehavior);
|
|
962
|
+
}
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
/**
|
|
966
|
+
* Enable stealth mode with specified level
|
|
967
|
+
* @param {string} level - Stealth level ('basic', 'medium', 'advanced')
|
|
968
|
+
* @returns {void}
|
|
969
|
+
*/
|
|
970
|
+
enableStealthMode(level = 'medium') {
|
|
971
|
+
this.defaultOptions.stealthMode.enabled = true;
|
|
972
|
+
this.defaultOptions.stealthMode.level = level;
|
|
973
|
+
this.defaultOptions.humanBehavior.enabled = true;
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
/**
|
|
977
|
+
* Disable stealth mode
|
|
978
|
+
* @returns {void}
|
|
979
|
+
*/
|
|
980
|
+
disableStealthMode() {
|
|
981
|
+
this.defaultOptions.stealthMode.enabled = false;
|
|
982
|
+
this.defaultOptions.humanBehavior.enabled = false;
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
|
|
986
|
+
export default BrowserProcessor;
|