crawlforge-mcp-server 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +315 -0
- package/LICENSE +21 -0
- package/README.md +181 -0
- package/package.json +115 -0
- package/server.js +1963 -0
- package/setup.js +112 -0
- package/src/constants/config.js +615 -0
- package/src/core/ActionExecutor.js +1104 -0
- package/src/core/AlertNotificationSystem.js +601 -0
- package/src/core/AuthManager.js +315 -0
- package/src/core/ChangeTracker.js +2306 -0
- package/src/core/JobManager.js +687 -0
- package/src/core/LLMsTxtAnalyzer.js +753 -0
- package/src/core/LocalizationManager.js +1615 -0
- package/src/core/PerformanceManager.js +828 -0
- package/src/core/ResearchOrchestrator.js +1327 -0
- package/src/core/SnapshotManager.js +1037 -0
- package/src/core/StealthBrowserManager.js +1795 -0
- package/src/core/WebhookDispatcher.js +745 -0
- package/src/core/analysis/ContentAnalyzer.js +749 -0
- package/src/core/analysis/LinkAnalyzer.js +972 -0
- package/src/core/cache/CacheManager.js +821 -0
- package/src/core/connections/ConnectionPool.js +553 -0
- package/src/core/crawlers/BFSCrawler.js +845 -0
- package/src/core/integrations/PerformanceIntegration.js +377 -0
- package/src/core/llm/AnthropicProvider.js +135 -0
- package/src/core/llm/LLMManager.js +415 -0
- package/src/core/llm/LLMProvider.js +97 -0
- package/src/core/llm/OpenAIProvider.js +127 -0
- package/src/core/processing/BrowserProcessor.js +986 -0
- package/src/core/processing/ContentProcessor.js +505 -0
- package/src/core/processing/PDFProcessor.js +448 -0
- package/src/core/processing/StreamProcessor.js +673 -0
- package/src/core/queue/QueueManager.js +98 -0
- package/src/core/workers/WorkerPool.js +585 -0
- package/src/core/workers/worker.js +743 -0
- package/src/monitoring/healthCheck.js +600 -0
- package/src/monitoring/metrics.js +761 -0
- package/src/optimization/wave3-optimizations.js +932 -0
- package/src/security/security-patches.js +120 -0
- package/src/security/security-tests.js +355 -0
- package/src/security/wave3-security.js +652 -0
- package/src/tools/advanced/BatchScrapeTool.js +1089 -0
- package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
- package/src/tools/crawl/crawlDeep.js +449 -0
- package/src/tools/crawl/mapSite.js +400 -0
- package/src/tools/extract/analyzeContent.js +624 -0
- package/src/tools/extract/extractContent.js +329 -0
- package/src/tools/extract/processDocument.js +503 -0
- package/src/tools/extract/summarizeContent.js +376 -0
- package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
- package/src/tools/research/deepResearch.js +706 -0
- package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
- package/src/tools/search/adapters/googleSearch.js +236 -0
- package/src/tools/search/adapters/searchProviderFactory.js +96 -0
- package/src/tools/search/queryExpander.js +543 -0
- package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
- package/src/tools/search/ranking/ResultRanker.js +497 -0
- package/src/tools/search/searchWeb.js +482 -0
- package/src/tools/tracking/trackChanges.js +1355 -0
- package/src/utils/CircuitBreaker.js +515 -0
- package/src/utils/ErrorHandlingConfig.js +342 -0
- package/src/utils/HumanBehaviorSimulator.js +569 -0
- package/src/utils/Logger.js +568 -0
- package/src/utils/MemoryMonitor.js +173 -0
- package/src/utils/RetryManager.js +386 -0
- package/src/utils/contentUtils.js +588 -0
- package/src/utils/domainFilter.js +612 -0
- package/src/utils/inputValidation.js +766 -0
- package/src/utils/rateLimiter.js +196 -0
- package/src/utils/robotsChecker.js +91 -0
- package/src/utils/securityMiddleware.js +416 -0
- package/src/utils/sitemapParser.js +678 -0
- package/src/utils/ssrfProtection.js +640 -0
- package/src/utils/urlNormalizer.js +168 -0
|
@@ -0,0 +1,652 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wave 3 Security Validation Utilities
|
|
3
|
+
* Provides security validation, sanitization, and protection functions
|
|
4
|
+
* for Wave 3 features in the CrawlForge MCP Server project.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { z } from 'zod';
|
|
8
|
+
import { createHash, randomBytes, timingSafeEqual } from 'crypto';
|
|
9
|
+
import path from 'path';
|
|
10
|
+
import { URL } from 'url';
|
|
11
|
+
import DOMPurify from 'isomorphic-dompurify';
|
|
12
|
+
|
|
13
|
+
// Security configuration
|
|
14
|
+
const SECURITY_CONFIG = {
|
|
15
|
+
// SSRF Protection
|
|
16
|
+
allowedDomains: [
|
|
17
|
+
'google.com',
|
|
18
|
+
'bing.com',
|
|
19
|
+
'duckduckgo.com',
|
|
20
|
+
'wikipedia.org',
|
|
21
|
+
'archive.org',
|
|
22
|
+
'*.edu',
|
|
23
|
+
'*.gov'
|
|
24
|
+
],
|
|
25
|
+
|
|
26
|
+
blockedDomains: [
|
|
27
|
+
'localhost',
|
|
28
|
+
'127.0.0.1',
|
|
29
|
+
'0.0.0.0',
|
|
30
|
+
'10.*',
|
|
31
|
+
'172.16.*',
|
|
32
|
+
'192.168.*',
|
|
33
|
+
'169.254.*', // AWS metadata
|
|
34
|
+
'metadata.google.internal'
|
|
35
|
+
],
|
|
36
|
+
|
|
37
|
+
// Resource limits
|
|
38
|
+
maxContentSize: 50 * 1024 * 1024, // 50MB
|
|
39
|
+
maxSnapshotSize: 100 * 1024 * 1024, // 100MB
|
|
40
|
+
maxResearchUrls: 50,
|
|
41
|
+
maxResearchTime: 120000, // 2 minutes
|
|
42
|
+
|
|
43
|
+
// Path validation
|
|
44
|
+
allowedDirectories: [
|
|
45
|
+
'./snapshots',
|
|
46
|
+
'./cache',
|
|
47
|
+
'./temp'
|
|
48
|
+
],
|
|
49
|
+
|
|
50
|
+
// Content validation
|
|
51
|
+
maxStringLength: 10000,
|
|
52
|
+
maxArrayLength: 100,
|
|
53
|
+
|
|
54
|
+
// Rate limiting
|
|
55
|
+
defaultRateLimit: {
|
|
56
|
+
windowMs: 60 * 1000, // 1 minute
|
|
57
|
+
max: 100 // requests per window
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* SSRF Protection Functions
|
|
63
|
+
*/
|
|
64
|
+
export class SSRFProtection {
|
|
65
|
+
static validateUrl(url) {
|
|
66
|
+
try {
|
|
67
|
+
const parsedUrl = new URL(url);
|
|
68
|
+
|
|
69
|
+
// Check protocol
|
|
70
|
+
if (!['http:', 'https:'].includes(parsedUrl.protocol)) {
|
|
71
|
+
throw new Error('Invalid protocol. Only HTTP and HTTPS are allowed.');
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Check for blocked domains
|
|
75
|
+
const hostname = parsedUrl.hostname.toLowerCase();
|
|
76
|
+
for (const blocked of SECURITY_CONFIG.blockedDomains) {
|
|
77
|
+
if (blocked.includes('*')) {
|
|
78
|
+
const pattern = blocked.replace('*', '.*');
|
|
79
|
+
if (new RegExp(pattern).test(hostname)) {
|
|
80
|
+
throw new Error(`Blocked domain: ${hostname}`);
|
|
81
|
+
}
|
|
82
|
+
} else if (hostname === blocked || hostname.endsWith(`.${blocked}`)) {
|
|
83
|
+
throw new Error(`Blocked domain: ${hostname}`);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Check for allowed domains (if whitelist is used)
|
|
88
|
+
if (SECURITY_CONFIG.allowedDomains.length > 0) {
|
|
89
|
+
let isAllowed = false;
|
|
90
|
+
for (const allowed of SECURITY_CONFIG.allowedDomains) {
|
|
91
|
+
if (allowed.includes('*')) {
|
|
92
|
+
const pattern = allowed.replace('*', '.*');
|
|
93
|
+
if (new RegExp(pattern).test(hostname)) {
|
|
94
|
+
isAllowed = true;
|
|
95
|
+
break;
|
|
96
|
+
}
|
|
97
|
+
} else if (hostname === allowed || hostname.endsWith(`.${allowed}`)) {
|
|
98
|
+
isAllowed = true;
|
|
99
|
+
break;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
if (!isAllowed) {
|
|
103
|
+
throw new Error(`Domain not in whitelist: ${hostname}`);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Check for IP addresses (basic check)
|
|
108
|
+
const ipPattern = /^(\d{1,3}\.){3}\d{1,3}$/;
|
|
109
|
+
if (ipPattern.test(hostname)) {
|
|
110
|
+
throw new Error('Direct IP access not allowed');
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
return parsedUrl;
|
|
114
|
+
} catch (error) {
|
|
115
|
+
if (error instanceof TypeError) {
|
|
116
|
+
throw new Error('Invalid URL format');
|
|
117
|
+
}
|
|
118
|
+
throw error;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
static async validateAndFetch(url, options = {}) {
|
|
123
|
+
const validatedUrl = this.validateUrl(url);
|
|
124
|
+
|
|
125
|
+
const fetchOptions = {
|
|
126
|
+
timeout: options.timeout || 30000,
|
|
127
|
+
headers: {
|
|
128
|
+
'User-Agent': 'CrawlForge/3.0 (Security-Hardened)',
|
|
129
|
+
...options.headers
|
|
130
|
+
},
|
|
131
|
+
...options
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
// Remove sensitive headers
|
|
135
|
+
delete fetchOptions.headers['Authorization'];
|
|
136
|
+
delete fetchOptions.headers['Cookie'];
|
|
137
|
+
|
|
138
|
+
return fetch(validatedUrl.toString(), fetchOptions);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Path Traversal Protection
|
|
144
|
+
*/
|
|
145
|
+
export class PathSecurity {
|
|
146
|
+
static sanitizePath(userPath, baseDirectory) {
|
|
147
|
+
// Normalize the path to resolve any .. or . components
|
|
148
|
+
const normalizedPath = path.normalize(userPath);
|
|
149
|
+
|
|
150
|
+
// Resolve the absolute path
|
|
151
|
+
const absolutePath = path.resolve(baseDirectory, normalizedPath);
|
|
152
|
+
const absoluteBase = path.resolve(baseDirectory);
|
|
153
|
+
|
|
154
|
+
// Ensure the resolved path is within the base directory
|
|
155
|
+
if (!absolutePath.startsWith(absoluteBase + path.sep) && absolutePath !== absoluteBase) {
|
|
156
|
+
throw new Error('Path traversal detected');
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
return absolutePath;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
static validateSnapshotId(snapshotId) {
|
|
163
|
+
// Only allow alphanumeric characters and hyphens
|
|
164
|
+
if (!/^[a-zA-Z0-9_-]{1,64}$/.test(snapshotId)) {
|
|
165
|
+
throw new Error('Invalid snapshot ID format');
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Prevent path traversal attempts
|
|
169
|
+
if (snapshotId.includes('..') || snapshotId.includes('/') || snapshotId.includes('\\')) {
|
|
170
|
+
throw new Error('Invalid characters in snapshot ID');
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return snapshotId;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
static validateDirectory(directory) {
|
|
177
|
+
const normalizedDir = path.normalize(directory);
|
|
178
|
+
|
|
179
|
+
// Check against allowed directories
|
|
180
|
+
for (const allowed of SECURITY_CONFIG.allowedDirectories) {
|
|
181
|
+
const absoluteAllowed = path.resolve(allowed);
|
|
182
|
+
const absoluteDir = path.resolve(normalizedDir);
|
|
183
|
+
|
|
184
|
+
if (absoluteDir.startsWith(absoluteAllowed)) {
|
|
185
|
+
return absoluteDir;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
throw new Error('Directory not allowed');
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Input Validation and Sanitization
|
|
195
|
+
*/
|
|
196
|
+
export class InputSecurity {
|
|
197
|
+
static sanitizeHtml(html) {
|
|
198
|
+
return DOMPurify.sanitize(html, {
|
|
199
|
+
ALLOWED_TAGS: ['p', 'br', 'strong', 'em', 'u', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
|
|
200
|
+
ALLOWED_ATTR: []
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
static sanitizeString(str, maxLength = SECURITY_CONFIG.maxStringLength) {
|
|
205
|
+
if (typeof str !== 'string') {
|
|
206
|
+
throw new Error('Input must be a string');
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
if (str.length > maxLength) {
|
|
210
|
+
throw new Error(`String too long. Maximum length is ${maxLength}`);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// Remove null bytes and control characters
|
|
214
|
+
return str.replace(/[\x00-\x1F\x7F-\x9F]/g, '');
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
static sanitizeArray(arr, maxLength = SECURITY_CONFIG.maxArrayLength) {
|
|
218
|
+
if (!Array.isArray(arr)) {
|
|
219
|
+
throw new Error('Input must be an array');
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
if (arr.length > maxLength) {
|
|
223
|
+
throw new Error(`Array too long. Maximum length is ${maxLength}`);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
return arr;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
static validateContentSize(content) {
|
|
230
|
+
const size = Buffer.byteLength(content, 'utf8');
|
|
231
|
+
if (size > SECURITY_CONFIG.maxContentSize) {
|
|
232
|
+
throw new Error(`Content too large: ${size} bytes (max: ${SECURITY_CONFIG.maxContentSize})`);
|
|
233
|
+
}
|
|
234
|
+
return size;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
static sanitizeJavaScript(jsCode) {
|
|
238
|
+
// Remove potentially dangerous patterns
|
|
239
|
+
const dangerousPatterns = [
|
|
240
|
+
/eval\s*\(/gi,
|
|
241
|
+
/Function\s*\(/gi,
|
|
242
|
+
/setTimeout\s*\(/gi,
|
|
243
|
+
/setInterval\s*\(/gi,
|
|
244
|
+
/document\.write/gi,
|
|
245
|
+
/innerHTML/gi,
|
|
246
|
+
/outerHTML/gi,
|
|
247
|
+
/execScript/gi,
|
|
248
|
+
/script:/gi,
|
|
249
|
+
/javascript:/gi,
|
|
250
|
+
/data:/gi,
|
|
251
|
+
/vbscript:/gi
|
|
252
|
+
];
|
|
253
|
+
|
|
254
|
+
let sanitized = jsCode;
|
|
255
|
+
for (const pattern of dangerousPatterns) {
|
|
256
|
+
if (pattern.test(sanitized)) {
|
|
257
|
+
throw new Error('Dangerous JavaScript pattern detected');
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
return sanitized;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Cryptographic Security
|
|
267
|
+
*/
|
|
268
|
+
export class CryptoSecurity {
|
|
269
|
+
static generateSecureId(length = 32) {
|
|
270
|
+
return randomBytes(length).toString('hex');
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
static generateSessionId() {
|
|
274
|
+
return this.generateSecureId(16);
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
static hashContent(content, algorithm = 'sha256') {
|
|
278
|
+
return createHash(algorithm).update(content).digest('hex');
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
static timingSafeCompare(a, b) {
|
|
282
|
+
if (typeof a !== 'string' || typeof b !== 'string') {
|
|
283
|
+
throw new Error('Both values must be strings');
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
if (a.length !== b.length) {
|
|
287
|
+
return false;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
const bufferA = Buffer.from(a);
|
|
291
|
+
const bufferB = Buffer.from(b);
|
|
292
|
+
|
|
293
|
+
return timingSafeEqual(bufferA, bufferB);
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
static generateWebhookSignature(payload, secret) {
|
|
297
|
+
return createHash('sha256')
|
|
298
|
+
.update(payload + secret)
|
|
299
|
+
.digest('hex');
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
static validateWebhookSignature(payload, signature, secret) {
|
|
303
|
+
const expectedSignature = this.generateWebhookSignature(payload, secret);
|
|
304
|
+
return this.timingSafeCompare(signature, expectedSignature);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* Resource Management Security
|
|
310
|
+
*/
|
|
311
|
+
export class ResourceSecurity {
|
|
312
|
+
static validateResearchLimits(options) {
|
|
313
|
+
const maxUrls = Math.min(options.maxUrls || 10, SECURITY_CONFIG.maxResearchUrls);
|
|
314
|
+
const timeLimit = Math.min(options.timeLimit || 60000, SECURITY_CONFIG.maxResearchTime);
|
|
315
|
+
const maxDepth = Math.min(Math.max(options.maxDepth || 3, 1), 5);
|
|
316
|
+
|
|
317
|
+
return {
|
|
318
|
+
maxUrls,
|
|
319
|
+
timeLimit,
|
|
320
|
+
maxDepth,
|
|
321
|
+
concurrency: Math.min(options.concurrency || 3, 5)
|
|
322
|
+
};
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
static validateSnapshotLimits(options) {
|
|
326
|
+
return {
|
|
327
|
+
maxSnapshots: Math.min(options.maxSnapshots || 100, 1000),
|
|
328
|
+
maxAge: Math.max(options.maxAge || 86400000, 3600000), // Min 1 hour
|
|
329
|
+
maxStorageSize: Math.min(options.maxStorageSize || 1073741824, 10737418240), // Max 10GB
|
|
330
|
+
compressionThreshold: Math.max(options.compressionThreshold || 1024, 1024)
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* Browser Security
|
|
337
|
+
*/
|
|
338
|
+
export class BrowserSecurity {
|
|
339
|
+
static getSecureBrowserArgs() {
|
|
340
|
+
return [
|
|
341
|
+
// Security-focused args (remove dangerous ones from original)
|
|
342
|
+
'--disable-blink-features=AutomationControlled',
|
|
343
|
+
'--disable-dev-shm-usage',
|
|
344
|
+
'--disable-extensions',
|
|
345
|
+
'--disable-plugins',
|
|
346
|
+
'--disable-images', // Reduce attack surface
|
|
347
|
+
'--disable-javascript-harmony-shipping',
|
|
348
|
+
'--disable-background-timer-throttling',
|
|
349
|
+
'--disable-backgrounding-occluded-windows',
|
|
350
|
+
'--disable-renderer-backgrounding',
|
|
351
|
+
'--disable-field-trial-config',
|
|
352
|
+
'--disable-back-forward-cache',
|
|
353
|
+
'--disable-hang-monitor',
|
|
354
|
+
'--disable-prompt-on-repost',
|
|
355
|
+
'--disable-sync',
|
|
356
|
+
'--disable-translate',
|
|
357
|
+
'--metrics-recording-only',
|
|
358
|
+
'--no-first-run',
|
|
359
|
+
'--safebrowsing-disable-auto-update',
|
|
360
|
+
'--password-store=basic',
|
|
361
|
+
'--use-mock-keychain',
|
|
362
|
+
|
|
363
|
+
// Security enhancements
|
|
364
|
+
'--disable-file-system',
|
|
365
|
+
'--disable-databases',
|
|
366
|
+
'--disable-local-storage',
|
|
367
|
+
'--disable-session-storage',
|
|
368
|
+
'--disable-application-cache',
|
|
369
|
+
'--disable-notifications',
|
|
370
|
+
'--disable-geolocation',
|
|
371
|
+
'--disable-microphone',
|
|
372
|
+
'--disable-camera',
|
|
373
|
+
|
|
374
|
+
// DO NOT include these dangerous args from original:
|
|
375
|
+
// --no-sandbox (removes critical security boundary)
|
|
376
|
+
// --disable-web-security (removes Same Origin Policy)
|
|
377
|
+
// --disable-features=VizDisplayCompositor (removes security features)
|
|
378
|
+
];
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
static validateStealthConfig(config) {
|
|
382
|
+
// Ensure stealth mode doesn't compromise security
|
|
383
|
+
const secureConfig = {
|
|
384
|
+
...config,
|
|
385
|
+
hideWebDriver: true, // This is fine
|
|
386
|
+
blockWebRTC: true, // This is fine for privacy
|
|
387
|
+
spoofTimezone: true, // This is fine for privacy
|
|
388
|
+
randomizeHeaders: true, // This is fine for privacy
|
|
389
|
+
|
|
390
|
+
// Remove dangerous options
|
|
391
|
+
disableSecurity: false,
|
|
392
|
+
allowUnsafeInlineScripts: false,
|
|
393
|
+
bypassCSP: false
|
|
394
|
+
};
|
|
395
|
+
|
|
396
|
+
return secureConfig;
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
static sanitizeInjectedScript(script) {
|
|
400
|
+
// Validate that injected scripts don't contain dangerous patterns
|
|
401
|
+
const dangerousPatterns = [
|
|
402
|
+
/eval\(/gi,
|
|
403
|
+
/Function\(/gi,
|
|
404
|
+
/setTimeout\(/gi,
|
|
405
|
+
/setInterval\(/gi,
|
|
406
|
+
/XMLHttpRequest/gi,
|
|
407
|
+
/fetch\(/gi,
|
|
408
|
+
/import\(/gi,
|
|
409
|
+
/require\(/gi
|
|
410
|
+
];
|
|
411
|
+
|
|
412
|
+
for (const pattern of dangerousPatterns) {
|
|
413
|
+
if (pattern.test(script)) {
|
|
414
|
+
throw new Error('Dangerous script pattern detected');
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
return script;
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
/**
|
|
423
|
+
* Validation Schemas for Wave 3 Components
|
|
424
|
+
*/
|
|
425
|
+
export const Wave3SecuritySchemas = {
|
|
426
|
+
// Research Tool Security Schema
|
|
427
|
+
researchRequest: z.object({
|
|
428
|
+
topic: z.string().min(3).max(500).refine(val => !/<script/i.test(val), 'No script tags allowed'),
|
|
429
|
+
maxDepth: z.number().min(1).max(5),
|
|
430
|
+
maxUrls: z.number().min(1).max(50),
|
|
431
|
+
timeLimit: z.number().min(30000).max(120000),
|
|
432
|
+
concurrency: z.number().min(1).max(5)
|
|
433
|
+
}),
|
|
434
|
+
|
|
435
|
+
// Stealth Browser Security Schema
|
|
436
|
+
stealthConfig: z.object({
|
|
437
|
+
level: z.enum(['basic', 'medium']), // Remove 'advanced' - too dangerous
|
|
438
|
+
randomizeFingerprint: z.boolean(),
|
|
439
|
+
hideWebDriver: z.boolean(),
|
|
440
|
+
blockWebRTC: z.boolean(),
|
|
441
|
+
spoofTimezone: z.boolean(),
|
|
442
|
+
customUserAgent: z.string().max(500).optional(),
|
|
443
|
+
customViewport: z.object({
|
|
444
|
+
width: z.number().min(800).max(1920),
|
|
445
|
+
height: z.number().min(600).max(1080)
|
|
446
|
+
}).optional()
|
|
447
|
+
}),
|
|
448
|
+
|
|
449
|
+
// Localization Security Schema
|
|
450
|
+
localizationConfig: z.object({
|
|
451
|
+
countryCode: z.string().length(2).regex(/^[A-Z]{2}$/),
|
|
452
|
+
language: z.string().max(10).regex(/^[a-z]{2}-[A-Z]{2}$/),
|
|
453
|
+
timezone: z.string().max(50).regex(/^[A-Za-z_/]+$/),
|
|
454
|
+
currency: z.string().length(3).regex(/^[A-Z]{3}$/)
|
|
455
|
+
}),
|
|
456
|
+
|
|
457
|
+
// Snapshot Security Schema
|
|
458
|
+
snapshotRequest: z.object({
|
|
459
|
+
url: z.string().url().refine(url => {
|
|
460
|
+
try {
|
|
461
|
+
SSRFProtection.validateUrl(url);
|
|
462
|
+
return true;
|
|
463
|
+
} catch {
|
|
464
|
+
return false;
|
|
465
|
+
}
|
|
466
|
+
}, 'Invalid or blocked URL'),
|
|
467
|
+
content: z.string().max(SECURITY_CONFIG.maxContentSize),
|
|
468
|
+
snapshotId: z.string().regex(/^[a-zA-Z0-9_-]{1,64}$/).optional()
|
|
469
|
+
}),
|
|
470
|
+
|
|
471
|
+
// Change Tracking Security Schema
|
|
472
|
+
changeTrackingRequest: z.object({
|
|
473
|
+
url: z.string().url(),
|
|
474
|
+
granularity: z.enum(['page', 'section', 'element', 'text']),
|
|
475
|
+
customSelectors: z.array(z.string().max(100)).max(10).optional(),
|
|
476
|
+
excludeSelectors: z.array(z.string().max(100)).max(20).optional()
|
|
477
|
+
})
|
|
478
|
+
};
|
|
479
|
+
|
|
480
|
+
/**
|
|
481
|
+
* Security Middleware Factory
|
|
482
|
+
*/
|
|
483
|
+
export class SecurityMiddleware {
|
|
484
|
+
static rateLimiter(options = SECURITY_CONFIG.defaultRateLimit) {
|
|
485
|
+
const requests = new Map();
|
|
486
|
+
|
|
487
|
+
return (identifier) => {
|
|
488
|
+
const now = Date.now();
|
|
489
|
+
const windowStart = now - options.windowMs;
|
|
490
|
+
|
|
491
|
+
// Clean old requests
|
|
492
|
+
for (const [key, timestamps] of requests.entries()) {
|
|
493
|
+
requests.set(key, timestamps.filter(time => time > windowStart));
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
// Check current requests
|
|
497
|
+
const userRequests = requests.get(identifier) || [];
|
|
498
|
+
if (userRequests.length >= options.max) {
|
|
499
|
+
throw new Error('Rate limit exceeded');
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
// Add current request
|
|
503
|
+
userRequests.push(now);
|
|
504
|
+
requests.set(identifier, userRequests);
|
|
505
|
+
};
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
static validateRequest(schema) {
|
|
509
|
+
return (data) => {
|
|
510
|
+
try {
|
|
511
|
+
return schema.parse(data);
|
|
512
|
+
} catch (error) {
|
|
513
|
+
throw new Error(`Validation failed: ${error.message}`);
|
|
514
|
+
}
|
|
515
|
+
};
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
static auditLogger() {
|
|
519
|
+
return (operation, details) => {
|
|
520
|
+
const logEntry = {
|
|
521
|
+
timestamp: new Date().toISOString(),
|
|
522
|
+
operation,
|
|
523
|
+
details: typeof details === 'object' ? JSON.stringify(details) : details,
|
|
524
|
+
level: 'SECURITY'
|
|
525
|
+
};
|
|
526
|
+
|
|
527
|
+
console.log('[SECURITY AUDIT]', JSON.stringify(logEntry));
|
|
528
|
+
};
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
/**
|
|
533
|
+
* Security Testing Utilities
|
|
534
|
+
*/
|
|
535
|
+
export class SecurityTesting {
|
|
536
|
+
static generateMaliciousPayloads() {
|
|
537
|
+
return {
|
|
538
|
+
ssrf: [
|
|
539
|
+
'http://localhost:3000/admin',
|
|
540
|
+
'http://127.0.0.1:22',
|
|
541
|
+
'http://169.254.169.254/latest/meta-data/',
|
|
542
|
+
'file:///etc/passwd',
|
|
543
|
+
'ftp://internal.server.com'
|
|
544
|
+
],
|
|
545
|
+
|
|
546
|
+
pathTraversal: [
|
|
547
|
+
'../../../etc/passwd',
|
|
548
|
+
'..\\..\\..\\windows\\system32\\config\\sam',
|
|
549
|
+
'/etc/shadow',
|
|
550
|
+
'C:\\Windows\\System32\\drivers\\etc\\hosts'
|
|
551
|
+
],
|
|
552
|
+
|
|
553
|
+
xss: [
|
|
554
|
+
'<script>alert("XSS")</script>',
|
|
555
|
+
'javascript:alert("XSS")',
|
|
556
|
+
'<img src="x" onerror="alert(\'XSS\')">',
|
|
557
|
+
'<svg onload="alert(\'XSS\')"></svg>'
|
|
558
|
+
],
|
|
559
|
+
|
|
560
|
+
injection: [
|
|
561
|
+
"'; DROP TABLE users; --",
|
|
562
|
+
'${7*7}',
|
|
563
|
+
'#{7*7}',
|
|
564
|
+
'{{7*7}}',
|
|
565
|
+
'<%= 7*7 %>'
|
|
566
|
+
]
|
|
567
|
+
};
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
static testSecurityFunction(securityFunction, maliciousPayloads) {
|
|
571
|
+
const results = [];
|
|
572
|
+
|
|
573
|
+
for (const payload of maliciousPayloads) {
|
|
574
|
+
try {
|
|
575
|
+
securityFunction(payload);
|
|
576
|
+
results.push({ payload, blocked: false, error: null });
|
|
577
|
+
} catch (error) {
|
|
578
|
+
results.push({ payload, blocked: true, error: error.message });
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
return results;
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
/**
|
|
587
|
+
* Emergency Security Patches
|
|
588
|
+
*/
|
|
589
|
+
export class EmergencyPatches {
|
|
590
|
+
// Patch for ResearchOrchestrator SSRF vulnerability
|
|
591
|
+
static patchResearchTool(researchTool) {
|
|
592
|
+
const originalExecute = researchTool.execute;
|
|
593
|
+
|
|
594
|
+
researchTool.execute = async function(params) {
|
|
595
|
+
// Validate research limits
|
|
596
|
+
const secureParams = ResourceSecurity.validateResearchLimits(params);
|
|
597
|
+
|
|
598
|
+
// Validate topic for script injection
|
|
599
|
+
secureParams.topic = InputSecurity.sanitizeString(params.topic, 500);
|
|
600
|
+
|
|
601
|
+
return originalExecute.call(this, secureParams);
|
|
602
|
+
};
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
// Patch for StealthBrowserManager script injection
|
|
606
|
+
static patchBrowserManager(browserManager) {
|
|
607
|
+
const originalCreateStealthContext = browserManager.createStealthContext;
|
|
608
|
+
|
|
609
|
+
browserManager.createStealthContext = async function(config) {
|
|
610
|
+
// Validate and secure the config
|
|
611
|
+
const secureConfig = BrowserSecurity.validateStealthConfig(config);
|
|
612
|
+
|
|
613
|
+
// Use secure browser args
|
|
614
|
+
secureConfig.browserArgs = BrowserSecurity.getSecureBrowserArgs();
|
|
615
|
+
|
|
616
|
+
return originalCreateStealthContext.call(this, secureConfig);
|
|
617
|
+
};
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
// Patch for SnapshotManager path traversal
|
|
621
|
+
static patchSnapshotManager(snapshotManager) {
|
|
622
|
+
const originalStoreSnapshot = snapshotManager.storeSnapshot;
|
|
623
|
+
|
|
624
|
+
snapshotManager.storeSnapshot = async function(url, content, metadata, options) {
|
|
625
|
+
// Validate URL
|
|
626
|
+
SSRFProtection.validateUrl(url);
|
|
627
|
+
|
|
628
|
+
// Validate content size
|
|
629
|
+
InputSecurity.validateContentSize(content);
|
|
630
|
+
|
|
631
|
+
// Generate secure snapshot ID
|
|
632
|
+
const snapshotId = CryptoSecurity.generateSecureId();
|
|
633
|
+
|
|
634
|
+
return originalStoreSnapshot.call(this, url, content, metadata, { ...options, snapshotId });
|
|
635
|
+
};
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
// Export default security instance
|
|
640
|
+
export default {
|
|
641
|
+
SSRFProtection,
|
|
642
|
+
PathSecurity,
|
|
643
|
+
InputSecurity,
|
|
644
|
+
CryptoSecurity,
|
|
645
|
+
ResourceSecurity,
|
|
646
|
+
BrowserSecurity,
|
|
647
|
+
Wave3SecuritySchemas,
|
|
648
|
+
SecurityMiddleware,
|
|
649
|
+
SecurityTesting,
|
|
650
|
+
EmergencyPatches,
|
|
651
|
+
SECURITY_CONFIG
|
|
652
|
+
};
|