crawlforge-mcp-server 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +315 -0
- package/LICENSE +21 -0
- package/README.md +181 -0
- package/package.json +115 -0
- package/server.js +1963 -0
- package/setup.js +112 -0
- package/src/constants/config.js +615 -0
- package/src/core/ActionExecutor.js +1104 -0
- package/src/core/AlertNotificationSystem.js +601 -0
- package/src/core/AuthManager.js +315 -0
- package/src/core/ChangeTracker.js +2306 -0
- package/src/core/JobManager.js +687 -0
- package/src/core/LLMsTxtAnalyzer.js +753 -0
- package/src/core/LocalizationManager.js +1615 -0
- package/src/core/PerformanceManager.js +828 -0
- package/src/core/ResearchOrchestrator.js +1327 -0
- package/src/core/SnapshotManager.js +1037 -0
- package/src/core/StealthBrowserManager.js +1795 -0
- package/src/core/WebhookDispatcher.js +745 -0
- package/src/core/analysis/ContentAnalyzer.js +749 -0
- package/src/core/analysis/LinkAnalyzer.js +972 -0
- package/src/core/cache/CacheManager.js +821 -0
- package/src/core/connections/ConnectionPool.js +553 -0
- package/src/core/crawlers/BFSCrawler.js +845 -0
- package/src/core/integrations/PerformanceIntegration.js +377 -0
- package/src/core/llm/AnthropicProvider.js +135 -0
- package/src/core/llm/LLMManager.js +415 -0
- package/src/core/llm/LLMProvider.js +97 -0
- package/src/core/llm/OpenAIProvider.js +127 -0
- package/src/core/processing/BrowserProcessor.js +986 -0
- package/src/core/processing/ContentProcessor.js +505 -0
- package/src/core/processing/PDFProcessor.js +448 -0
- package/src/core/processing/StreamProcessor.js +673 -0
- package/src/core/queue/QueueManager.js +98 -0
- package/src/core/workers/WorkerPool.js +585 -0
- package/src/core/workers/worker.js +743 -0
- package/src/monitoring/healthCheck.js +600 -0
- package/src/monitoring/metrics.js +761 -0
- package/src/optimization/wave3-optimizations.js +932 -0
- package/src/security/security-patches.js +120 -0
- package/src/security/security-tests.js +355 -0
- package/src/security/wave3-security.js +652 -0
- package/src/tools/advanced/BatchScrapeTool.js +1089 -0
- package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
- package/src/tools/crawl/crawlDeep.js +449 -0
- package/src/tools/crawl/mapSite.js +400 -0
- package/src/tools/extract/analyzeContent.js +624 -0
- package/src/tools/extract/extractContent.js +329 -0
- package/src/tools/extract/processDocument.js +503 -0
- package/src/tools/extract/summarizeContent.js +376 -0
- package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
- package/src/tools/research/deepResearch.js +706 -0
- package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
- package/src/tools/search/adapters/googleSearch.js +236 -0
- package/src/tools/search/adapters/searchProviderFactory.js +96 -0
- package/src/tools/search/queryExpander.js +543 -0
- package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
- package/src/tools/search/ranking/ResultRanker.js +497 -0
- package/src/tools/search/searchWeb.js +482 -0
- package/src/tools/tracking/trackChanges.js +1355 -0
- package/src/utils/CircuitBreaker.js +515 -0
- package/src/utils/ErrorHandlingConfig.js +342 -0
- package/src/utils/HumanBehaviorSimulator.js +569 -0
- package/src/utils/Logger.js +568 -0
- package/src/utils/MemoryMonitor.js +173 -0
- package/src/utils/RetryManager.js +386 -0
- package/src/utils/contentUtils.js +588 -0
- package/src/utils/domainFilter.js +612 -0
- package/src/utils/inputValidation.js +766 -0
- package/src/utils/rateLimiter.js +196 -0
- package/src/utils/robotsChecker.js +91 -0
- package/src/utils/securityMiddleware.js +416 -0
- package/src/utils/sitemapParser.js +678 -0
- package/src/utils/ssrfProtection.js +640 -0
- package/src/utils/urlNormalizer.js +168 -0
|
@@ -0,0 +1,640 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SSRF Protection Module
|
|
3
|
+
* Implements comprehensive Server-Side Request Forgery prevention measures
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { promisify } from 'util';
|
|
7
|
+
import dns from 'dns';
|
|
8
|
+
import net from 'net';
|
|
9
|
+
|
|
10
|
+
const dnsLookup = promisify(dns.lookup);
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* SSRF Protection Configuration
|
|
14
|
+
*/
|
|
15
|
+
const SSRF_CONFIG = {
|
|
16
|
+
// Allowed protocols
|
|
17
|
+
allowedProtocols: ['http:', 'https:'],
|
|
18
|
+
|
|
19
|
+
// Blocked IP ranges (private networks, localhost, etc.)
|
|
20
|
+
blockedIPRanges: [
|
|
21
|
+
'127.0.0.0/8', // Localhost
|
|
22
|
+
'10.0.0.0/8', // Private network
|
|
23
|
+
'172.16.0.0/12', // Private network
|
|
24
|
+
'192.168.0.0/16', // Private network
|
|
25
|
+
'169.254.0.0/16', // Link-local
|
|
26
|
+
'224.0.0.0/4', // Multicast
|
|
27
|
+
'240.0.0.0/4', // Reserved
|
|
28
|
+
'0.0.0.0/8', // This network
|
|
29
|
+
'100.64.0.0/10', // Carrier-grade NAT
|
|
30
|
+
'198.18.0.0/15', // Benchmark testing
|
|
31
|
+
'::1/128', // IPv6 localhost
|
|
32
|
+
'fc00::/7', // IPv6 private network
|
|
33
|
+
'fe80::/10', // IPv6 link-local
|
|
34
|
+
'ff00::/8', // IPv6 multicast
|
|
35
|
+
],
|
|
36
|
+
|
|
37
|
+
// Blocked hostnames/domains
|
|
38
|
+
blockedHostnames: [
|
|
39
|
+
'localhost',
|
|
40
|
+
'metadata.google.internal', // GCP metadata
|
|
41
|
+
'metadata.azure.com', // Azure metadata
|
|
42
|
+
'metadata',
|
|
43
|
+
'consul',
|
|
44
|
+
'vault',
|
|
45
|
+
],
|
|
46
|
+
|
|
47
|
+
// Default ports to block
|
|
48
|
+
blockedPorts: [
|
|
49
|
+
22, // SSH
|
|
50
|
+
23, // Telnet
|
|
51
|
+
25, // SMTP
|
|
52
|
+
53, // DNS
|
|
53
|
+
135, // RPC
|
|
54
|
+
139, // NetBIOS
|
|
55
|
+
445, // SMB
|
|
56
|
+
1433, // MSSQL
|
|
57
|
+
1521, // Oracle
|
|
58
|
+
3306, // MySQL
|
|
59
|
+
3389, // RDP
|
|
60
|
+
5432, // PostgreSQL
|
|
61
|
+
5984, // CouchDB
|
|
62
|
+
6379, // Redis
|
|
63
|
+
8086, // InfluxDB
|
|
64
|
+
9200, // Elasticsearch
|
|
65
|
+
9300, // Elasticsearch
|
|
66
|
+
27017, // MongoDB
|
|
67
|
+
],
|
|
68
|
+
|
|
69
|
+
// Maximum request size (bytes)
|
|
70
|
+
maxRequestSize: 100 * 1024 * 1024, // 100MB
|
|
71
|
+
|
|
72
|
+
// Maximum timeout (milliseconds)
|
|
73
|
+
maxTimeout: 60000, // 60 seconds
|
|
74
|
+
|
|
75
|
+
// Maximum redirects
|
|
76
|
+
maxRedirects: 5,
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* SSRF Protection Class
|
|
81
|
+
*/
|
|
82
|
+
export class SSRFProtection {
|
|
83
|
+
constructor(options = {}) {
|
|
84
|
+
this.config = { ...SSRF_CONFIG, ...options };
|
|
85
|
+
this.cache = new Map(); // DNS resolution cache
|
|
86
|
+
this.cacheMaxSize = 10000;
|
|
87
|
+
this.cacheMaxAge = 300000; // 5 minutes
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Validate and sanitize URL for SSRF protection
|
|
92
|
+
* @param {string} url - URL to validate
|
|
93
|
+
* @param {Object} options - Validation options
|
|
94
|
+
* @returns {Promise<Object>} - Validation result
|
|
95
|
+
*/
|
|
96
|
+
async validateURL(url, options = {}) {
|
|
97
|
+
try {
|
|
98
|
+
// Check for path traversal in raw URL before URL parsing
|
|
99
|
+
const rawPathTraversal = this.checkRawPathTraversal(url);
|
|
100
|
+
|
|
101
|
+
const urlObj = new URL(url);
|
|
102
|
+
const validationResult = {
|
|
103
|
+
allowed: false,
|
|
104
|
+
url: url,
|
|
105
|
+
sanitizedURL: null,
|
|
106
|
+
violations: [],
|
|
107
|
+
metadata: {
|
|
108
|
+
protocol: urlObj.protocol,
|
|
109
|
+
hostname: urlObj.hostname,
|
|
110
|
+
port: urlObj.port || this.getDefaultPort(urlObj.protocol),
|
|
111
|
+
path: urlObj.pathname,
|
|
112
|
+
validatedAt: new Date().toISOString()
|
|
113
|
+
}
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
// Add raw path traversal violations
|
|
117
|
+
if (rawPathTraversal.violations.length > 0) {
|
|
118
|
+
validationResult.violations.push(...rawPathTraversal.violations);
|
|
119
|
+
}
|
|
120
|
+
// 1. Protocol validation
|
|
121
|
+
if (!this.config.allowedProtocols.includes(urlObj.protocol)) {
|
|
122
|
+
validationResult.violations.push({
|
|
123
|
+
type: 'INVALID_PROTOCOL',
|
|
124
|
+
message: `Protocol '${urlObj.protocol}' is not allowed`,
|
|
125
|
+
severity: 'HIGH'
|
|
126
|
+
});
|
|
127
|
+
return validationResult;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// 2. Hostname validation
|
|
131
|
+
const hostnameCheck = this.validateHostname(urlObj.hostname);
|
|
132
|
+
if (!hostnameCheck.allowed) {
|
|
133
|
+
validationResult.violations.push({
|
|
134
|
+
type: 'BLOCKED_HOSTNAME',
|
|
135
|
+
message: hostnameCheck.reason,
|
|
136
|
+
severity: 'HIGH'
|
|
137
|
+
});
|
|
138
|
+
return validationResult;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// 3. Port validation
|
|
142
|
+
const port = parseInt(urlObj.port) || this.getDefaultPort(urlObj.protocol);
|
|
143
|
+
if (this.config.blockedPorts.includes(port)) {
|
|
144
|
+
validationResult.violations.push({
|
|
145
|
+
type: 'BLOCKED_PORT',
|
|
146
|
+
message: `Port ${port} is blocked`,
|
|
147
|
+
severity: 'HIGH'
|
|
148
|
+
});
|
|
149
|
+
return validationResult;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// 4. DNS resolution and IP validation
|
|
153
|
+
const ipValidation = await this.validateIP(urlObj.hostname);
|
|
154
|
+
if (!ipValidation.allowed) {
|
|
155
|
+
validationResult.violations.push({
|
|
156
|
+
type: 'BLOCKED_IP',
|
|
157
|
+
message: ipValidation.reason,
|
|
158
|
+
severity: 'HIGH'
|
|
159
|
+
});
|
|
160
|
+
validationResult.metadata.resolvedIPs = ipValidation.ips;
|
|
161
|
+
return validationResult;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// 5. Path validation
|
|
165
|
+
const pathValidation = this.validatePath(urlObj.pathname);
|
|
166
|
+
if (!pathValidation.allowed) {
|
|
167
|
+
validationResult.violations.push({
|
|
168
|
+
type: 'SUSPICIOUS_PATH',
|
|
169
|
+
message: pathValidation.reason,
|
|
170
|
+
severity: 'MEDIUM'
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// 6. URL length validation
|
|
175
|
+
if (url.length > 2048) {
|
|
176
|
+
validationResult.violations.push({
|
|
177
|
+
type: 'URL_TOO_LONG',
|
|
178
|
+
message: 'URL exceeds maximum length',
|
|
179
|
+
severity: 'MEDIUM'
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// If no high-severity violations, allow the request
|
|
184
|
+
const highSeverityViolations = validationResult.violations.filter(v => v.severity === 'HIGH');
|
|
185
|
+
validationResult.allowed = highSeverityViolations.length === 0;
|
|
186
|
+
validationResult.sanitizedURL = this.sanitizeURL(urlObj);
|
|
187
|
+
validationResult.metadata.resolvedIPs = ipValidation.ips;
|
|
188
|
+
|
|
189
|
+
return validationResult;
|
|
190
|
+
} catch (error) {
|
|
191
|
+
return {
|
|
192
|
+
allowed: false,
|
|
193
|
+
url: url,
|
|
194
|
+
violations: [{
|
|
195
|
+
type: 'INVALID_URL',
|
|
196
|
+
message: `Invalid URL format: ${error.message}`,
|
|
197
|
+
severity: 'HIGH'
|
|
198
|
+
}],
|
|
199
|
+
metadata: {
|
|
200
|
+
error: error.message,
|
|
201
|
+
validatedAt: new Date().toISOString()
|
|
202
|
+
}
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Validate hostname against blocked patterns
|
|
209
|
+
* @param {string} hostname
|
|
210
|
+
* @returns {Object}
|
|
211
|
+
*/
|
|
212
|
+
validateHostname(hostname) {
|
|
213
|
+
const lowercaseHostname = hostname.toLowerCase();
|
|
214
|
+
|
|
215
|
+
// Check exact matches
|
|
216
|
+
if (this.config.blockedHostnames.includes(lowercaseHostname)) {
|
|
217
|
+
return {
|
|
218
|
+
allowed: false,
|
|
219
|
+
reason: `Hostname '${hostname}' is explicitly blocked`
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Check for suspicious patterns
|
|
224
|
+
const suspiciousPatterns = [
|
|
225
|
+
/^metadata/i,
|
|
226
|
+
/^consul/i,
|
|
227
|
+
/^vault/i,
|
|
228
|
+
/^admin/i,
|
|
229
|
+
/^internal/i,
|
|
230
|
+
/\.local$/i,
|
|
231
|
+
/\.internal$/i,
|
|
232
|
+
];
|
|
233
|
+
|
|
234
|
+
for (const pattern of suspiciousPatterns) {
|
|
235
|
+
if (pattern.test(hostname)) {
|
|
236
|
+
return {
|
|
237
|
+
allowed: false,
|
|
238
|
+
reason: `Hostname '${hostname}' matches blocked pattern`
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
return { allowed: true };
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Resolve hostname to IP and validate against blocked ranges
|
|
248
|
+
* @param {string} hostname
|
|
249
|
+
* @returns {Promise<Object>}
|
|
250
|
+
*/
|
|
251
|
+
async validateIP(hostname) {
|
|
252
|
+
try {
|
|
253
|
+
// Check cache first
|
|
254
|
+
const cacheKey = hostname.toLowerCase();
|
|
255
|
+
const cached = this.cache.get(cacheKey);
|
|
256
|
+
if (cached && Date.now() - cached.timestamp < this.cacheMaxAge) {
|
|
257
|
+
return cached.result;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// Resolve hostname to IP
|
|
261
|
+
const { address, family } = await dnsLookup(hostname, { family: 0 });
|
|
262
|
+
const ips = Array.isArray(address) ? address : [address];
|
|
263
|
+
|
|
264
|
+
// Validate each resolved IP
|
|
265
|
+
for (const ip of ips) {
|
|
266
|
+
if (!this.isIPAllowed(ip)) {
|
|
267
|
+
const result = {
|
|
268
|
+
allowed: false,
|
|
269
|
+
reason: `Resolved IP '${ip}' is in blocked range`,
|
|
270
|
+
ips: ips
|
|
271
|
+
};
|
|
272
|
+
this.cacheResult(cacheKey, result);
|
|
273
|
+
return result;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
const result = {
|
|
278
|
+
allowed: true,
|
|
279
|
+
ips: ips
|
|
280
|
+
};
|
|
281
|
+
this.cacheResult(cacheKey, result);
|
|
282
|
+
return result;
|
|
283
|
+
|
|
284
|
+
} catch (error) {
|
|
285
|
+
// DNS resolution failed - could be suspicious
|
|
286
|
+
return {
|
|
287
|
+
allowed: false,
|
|
288
|
+
reason: `DNS resolution failed: ${error.message}`,
|
|
289
|
+
ips: []
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* Check if IP address is in blocked ranges
|
|
296
|
+
* @param {string} ip
|
|
297
|
+
* @returns {boolean}
|
|
298
|
+
*/
|
|
299
|
+
isIPAllowed(ip) {
|
|
300
|
+
// Direct IP checks for common blocked addresses
|
|
301
|
+
if (ip === '127.0.0.1' || ip === '::1' || ip === '0.0.0.0') {
|
|
302
|
+
return false;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// Check against CIDR ranges
|
|
306
|
+
for (const range of this.config.blockedIPRanges) {
|
|
307
|
+
if (this.isIPInRange(ip, range)) {
|
|
308
|
+
return false;
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
return true;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* Check if IP is in CIDR range
|
|
317
|
+
* @param {string} ip
|
|
318
|
+
* @param {string} cidr
|
|
319
|
+
* @returns {boolean}
|
|
320
|
+
*/
|
|
321
|
+
isIPInRange(ip, cidr) {
|
|
322
|
+
try {
|
|
323
|
+
const [network, prefixLength] = cidr.split('/');
|
|
324
|
+
const prefix = parseInt(prefixLength);
|
|
325
|
+
|
|
326
|
+
if (net.isIPv4(ip) && net.isIPv4(network)) {
|
|
327
|
+
return this.isIPv4InRange(ip, network, prefix);
|
|
328
|
+
} else if (net.isIPv6(ip) && net.isIPv6(network)) {
|
|
329
|
+
return this.isIPv6InRange(ip, network, prefix);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
return false;
|
|
333
|
+
} catch (error) {
|
|
334
|
+
return false;
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
/**
|
|
339
|
+
* Check if IPv4 address is in range
|
|
340
|
+
* @param {string} ip
|
|
341
|
+
* @param {string} network
|
|
342
|
+
* @param {number} prefix
|
|
343
|
+
* @returns {boolean}
|
|
344
|
+
*/
|
|
345
|
+
isIPv4InRange(ip, network, prefix) {
|
|
346
|
+
const ipInt = this.ipv4ToInt(ip);
|
|
347
|
+
const networkInt = this.ipv4ToInt(network);
|
|
348
|
+
const mask = (0xffffffff << (32 - prefix)) >>> 0;
|
|
349
|
+
|
|
350
|
+
return (ipInt & mask) === (networkInt & mask);
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
/**
|
|
354
|
+
* Convert IPv4 to integer
|
|
355
|
+
* @param {string} ip
|
|
356
|
+
* @returns {number}
|
|
357
|
+
*/
|
|
358
|
+
ipv4ToInt(ip) {
|
|
359
|
+
const parts = ip.split('.');
|
|
360
|
+
return (parseInt(parts[0]) << 24) +
|
|
361
|
+
(parseInt(parts[1]) << 16) +
|
|
362
|
+
(parseInt(parts[2]) << 8) +
|
|
363
|
+
parseInt(parts[3]);
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
/**
|
|
367
|
+
* Check if IPv6 address is in range (simplified)
|
|
368
|
+
* @param {string} ip
|
|
369
|
+
* @param {string} network
|
|
370
|
+
* @param {number} prefix
|
|
371
|
+
* @returns {boolean}
|
|
372
|
+
*/
|
|
373
|
+
isIPv6InRange(ip, network, prefix) {
|
|
374
|
+
try {
|
|
375
|
+
// Normalize IPv6 addresses by expanding compressed notation
|
|
376
|
+
const normalizeIPv6 = (ipv6) => {
|
|
377
|
+
// Handle :: compression
|
|
378
|
+
if (ipv6.includes("::")) {
|
|
379
|
+
const parts = ipv6.split("::");
|
|
380
|
+
const leftParts = parts[0] ? parts[0].split(":") : [];
|
|
381
|
+
const rightParts = parts[1] ? parts[1].split(":") : [];
|
|
382
|
+
const missingParts = 8 - leftParts.length - rightParts.length;
|
|
383
|
+
const middleParts = Array(missingParts).fill("0000");
|
|
384
|
+
const allParts = [...leftParts, ...middleParts, ...rightParts];
|
|
385
|
+
return allParts.map(part => part.padStart(4, "0")).join(":");
|
|
386
|
+
} else {
|
|
387
|
+
return ipv6.split(":").map(part => part.padStart(4, "0")).join(":");
|
|
388
|
+
}
|
|
389
|
+
};
|
|
390
|
+
|
|
391
|
+
const normalizedIP = normalizeIPv6(ip);
|
|
392
|
+
const normalizedNetwork = normalizeIPv6(network);
|
|
393
|
+
|
|
394
|
+
// Convert to binary for precise comparison
|
|
395
|
+
const ipBinary = normalizedIP.split(":").map(hex =>
|
|
396
|
+
parseInt(hex, 16).toString(2).padStart(16, "0")
|
|
397
|
+
).join("");
|
|
398
|
+
|
|
399
|
+
const networkBinary = normalizedNetwork.split(":").map(hex =>
|
|
400
|
+
parseInt(hex, 16).toString(2).padStart(16, "0")
|
|
401
|
+
).join("");
|
|
402
|
+
|
|
403
|
+
// Compare only the prefix bits
|
|
404
|
+
for (let i = 0; i < prefix; i++) {
|
|
405
|
+
if (ipBinary[i] !== networkBinary[i]) {
|
|
406
|
+
return false;
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
return true;
|
|
411
|
+
} catch (error) {
|
|
412
|
+
// If IPv6 parsing fails, be conservative and return false
|
|
413
|
+
console.warn("IPv6 range check failed for", ip, "vs", network, error.message);
|
|
414
|
+
return false;
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
/**
|
|
418
|
+
|
|
419
|
+
/**
|
|
420
|
+
* Check for path traversal patterns in raw URL before parsing
|
|
421
|
+
* @param {string} url - Raw URL to check
|
|
422
|
+
* @returns {Object} - Result with violations array
|
|
423
|
+
*/
|
|
424
|
+
* Validate URL path for suspicious patterns
|
|
425
|
+
/**
|
|
426
|
+
* Check for path traversal patterns in raw URL before parsing
|
|
427
|
+
* @param {string} url - Raw URL to check
|
|
428
|
+
* @returns {Object} - Result with violations array
|
|
429
|
+
*/
|
|
430
|
+
checkRawPathTraversal(url) {
|
|
431
|
+
const violations = [];
|
|
432
|
+
|
|
433
|
+
// Path traversal patterns to check before URL normalization
|
|
434
|
+
const pathTraversalPatterns = [
|
|
435
|
+
/\.\.\//g, // Basic path traversal ../
|
|
436
|
+
/\.\.\\/g, // Windows path traversal ..\
|
|
437
|
+
/%2e%2e%2f/gi, // URL encoded ../
|
|
438
|
+
/%2e%2e%5c/gi, // URL encoded ..\
|
|
439
|
+
/%2e%2e/gi, // URL encoded ..
|
|
440
|
+
/\.\.%2f/gi, // Mixed encoding
|
|
441
|
+
/\.\.%5c/gi, // Mixed encoding
|
|
442
|
+
];
|
|
443
|
+
|
|
444
|
+
for (const pattern of pathTraversalPatterns) {
|
|
445
|
+
if (pattern.test(url)) {
|
|
446
|
+
violations.push({
|
|
447
|
+
type: "SUSPICIOUS_PATH",
|
|
448
|
+
message: `URL contains path traversal pattern: ${pattern}`,
|
|
449
|
+
severity: "HIGH"
|
|
450
|
+
});
|
|
451
|
+
break; // Only report one path traversal violation
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
return { violations };
|
|
456
|
+
}
|
|
457
|
+
* @param {string} path
|
|
458
|
+
* @returns {Object}
|
|
459
|
+
*/ validatePath(path) {
|
|
460
|
+
const suspiciousPatterns = [
|
|
461
|
+
/\.\.\//, // Directory traversal
|
|
462
|
+
/\/etc\//, // System files
|
|
463
|
+
/\/proc\//, // System files
|
|
464
|
+
/\/sys\//, // System files
|
|
465
|
+
/\/dev\//, // Device files
|
|
466
|
+
/\/tmp\//, // Temporary files
|
|
467
|
+
/\/var\/log/, // Log files
|
|
468
|
+
];
|
|
469
|
+
|
|
470
|
+
for (const pattern of suspiciousPatterns) {
|
|
471
|
+
if (pattern.test(path)) {
|
|
472
|
+
return {
|
|
473
|
+
allowed: false,
|
|
474
|
+
reason: `Path contains suspicious pattern: ${pattern}`
|
|
475
|
+
};
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
return { allowed: true };
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
/**
|
|
483
|
+
* Sanitize URL by removing potentially dangerous parts
|
|
484
|
+
* @param {URL} urlObj
|
|
485
|
+
* @returns {string}
|
|
486
|
+
*/
|
|
487
|
+
sanitizeURL(urlObj) {
|
|
488
|
+
const sanitized = new URL(urlObj.toString());
|
|
489
|
+
|
|
490
|
+
// Remove authentication info
|
|
491
|
+
sanitized.username = '';
|
|
492
|
+
sanitized.password = '';
|
|
493
|
+
|
|
494
|
+
// Remove fragment
|
|
495
|
+
sanitized.hash = '';
|
|
496
|
+
|
|
497
|
+
return sanitized.toString();
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
/**
|
|
501
|
+
* Get default port for protocol
|
|
502
|
+
* @param {string} protocol
|
|
503
|
+
* @returns {number}
|
|
504
|
+
*/
|
|
505
|
+
getDefaultPort(protocol) {
|
|
506
|
+
switch (protocol) {
|
|
507
|
+
case 'http:': return 80;
|
|
508
|
+
case 'https:': return 443;
|
|
509
|
+
case 'ftp:': return 21;
|
|
510
|
+
default: return 80;
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
/**
|
|
515
|
+
* Cache DNS resolution result
|
|
516
|
+
* @param {string} key
|
|
517
|
+
* @param {Object} result
|
|
518
|
+
*/
|
|
519
|
+
cacheResult(key, result) {
|
|
520
|
+
if (this.cache.size >= this.cacheMaxSize) {
|
|
521
|
+
// Remove oldest entry
|
|
522
|
+
const firstKey = this.cache.keys().next().value;
|
|
523
|
+
this.cache.delete(firstKey);
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
this.cache.set(key, {
|
|
527
|
+
result,
|
|
528
|
+
timestamp: Date.now()
|
|
529
|
+
});
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
/**
|
|
533
|
+
* Create secure fetch wrapper with SSRF protection
|
|
534
|
+
* @param {Object} options
|
|
535
|
+
* @returns {Function}
|
|
536
|
+
*/
|
|
537
|
+
createSecureFetch(options = {}) {
|
|
538
|
+
const { allowedDomains = [], maxRequestSize = this.config.maxRequestSize } = options;
|
|
539
|
+
|
|
540
|
+
return async (url, fetchOptions = {}) => {
|
|
541
|
+
// Validate URL
|
|
542
|
+
const validation = await this.validateURL(url);
|
|
543
|
+
if (!validation.allowed) {
|
|
544
|
+
const violations = validation.violations.map(v => v.message).join(', ');
|
|
545
|
+
throw new Error(`SSRF Protection: ${violations}`);
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
// Check domain whitelist if provided
|
|
549
|
+
if (allowedDomains.length > 0) {
|
|
550
|
+
const urlObj = new URL(validation.sanitizedURL);
|
|
551
|
+
const isAllowed = allowedDomains.some(domain =>
|
|
552
|
+
urlObj.hostname === domain || urlObj.hostname.endsWith('.' + domain)
|
|
553
|
+
);
|
|
554
|
+
|
|
555
|
+
if (!isAllowed) {
|
|
556
|
+
throw new Error(`SSRF Protection: Domain not in whitelist`);
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
// Set secure defaults
|
|
561
|
+
const secureOptions = {
|
|
562
|
+
...fetchOptions,
|
|
563
|
+
timeout: Math.min(fetchOptions.timeout || 30000, this.config.maxTimeout),
|
|
564
|
+
redirect: 'manual', // Handle redirects manually
|
|
565
|
+
headers: {
|
|
566
|
+
'User-Agent': 'CrawlForge/3.0 (Security Enhanced)',
|
|
567
|
+
...fetchOptions.headers
|
|
568
|
+
}
|
|
569
|
+
};
|
|
570
|
+
|
|
571
|
+
// Perform the request
|
|
572
|
+
let response;
|
|
573
|
+
let redirectCount = 0;
|
|
574
|
+
let currentUrl = validation.sanitizedURL;
|
|
575
|
+
|
|
576
|
+
while (redirectCount <= this.config.maxRedirects) {
|
|
577
|
+
response = await fetch(currentUrl, secureOptions);
|
|
578
|
+
|
|
579
|
+
// Check response size
|
|
580
|
+
const contentLength = response.headers.get('content-length');
|
|
581
|
+
if (contentLength && parseInt(contentLength) > maxRequestSize) {
|
|
582
|
+
throw new Error('SSRF Protection: Response size exceeds limit');
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
// Handle redirects manually
|
|
586
|
+
if (response.status >= 300 && response.status < 400) {
|
|
587
|
+
const location = response.headers.get('location');
|
|
588
|
+
if (!location) {
|
|
589
|
+
break;
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
// Validate redirect URL
|
|
593
|
+
const redirectValidation = await this.validateURL(location);
|
|
594
|
+
if (!redirectValidation.allowed) {
|
|
595
|
+
throw new Error('SSRF Protection: Redirect to blocked URL');
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
currentUrl = redirectValidation.sanitizedURL;
|
|
599
|
+
redirectCount++;
|
|
600
|
+
continue;
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
break;
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
if (redirectCount > this.config.maxRedirects) {
|
|
607
|
+
throw new Error('SSRF Protection: Too many redirects');
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
return response;
|
|
611
|
+
};
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
/**
|
|
615
|
+
* Get SSRF protection statistics
|
|
616
|
+
* @returns {Object}
|
|
617
|
+
*/
|
|
618
|
+
getStats() {
|
|
619
|
+
return {
|
|
620
|
+
cacheSize: this.cache.size,
|
|
621
|
+
cacheMaxSize: this.cacheMaxSize,
|
|
622
|
+
cacheMaxAge: this.cacheMaxAge,
|
|
623
|
+
blockedRanges: this.config.blockedIPRanges.length,
|
|
624
|
+
blockedHostnames: this.config.blockedHostnames.length,
|
|
625
|
+
blockedPorts: this.config.blockedPorts.length,
|
|
626
|
+
maxRequestSize: this.config.maxRequestSize,
|
|
627
|
+
maxTimeout: this.config.maxTimeout,
|
|
628
|
+
maxRedirects: this.config.maxRedirects
|
|
629
|
+
};
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
/**
|
|
633
|
+
* Clear DNS cache
|
|
634
|
+
*/
|
|
635
|
+
clearCache() {
|
|
636
|
+
this.cache.clear();
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
export default SSRFProtection;
|