crawlforge-mcp-server 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CLAUDE.md +315 -0
  2. package/LICENSE +21 -0
  3. package/README.md +181 -0
  4. package/package.json +115 -0
  5. package/server.js +1963 -0
  6. package/setup.js +112 -0
  7. package/src/constants/config.js +615 -0
  8. package/src/core/ActionExecutor.js +1104 -0
  9. package/src/core/AlertNotificationSystem.js +601 -0
  10. package/src/core/AuthManager.js +315 -0
  11. package/src/core/ChangeTracker.js +2306 -0
  12. package/src/core/JobManager.js +687 -0
  13. package/src/core/LLMsTxtAnalyzer.js +753 -0
  14. package/src/core/LocalizationManager.js +1615 -0
  15. package/src/core/PerformanceManager.js +828 -0
  16. package/src/core/ResearchOrchestrator.js +1327 -0
  17. package/src/core/SnapshotManager.js +1037 -0
  18. package/src/core/StealthBrowserManager.js +1795 -0
  19. package/src/core/WebhookDispatcher.js +745 -0
  20. package/src/core/analysis/ContentAnalyzer.js +749 -0
  21. package/src/core/analysis/LinkAnalyzer.js +972 -0
  22. package/src/core/cache/CacheManager.js +821 -0
  23. package/src/core/connections/ConnectionPool.js +553 -0
  24. package/src/core/crawlers/BFSCrawler.js +845 -0
  25. package/src/core/integrations/PerformanceIntegration.js +377 -0
  26. package/src/core/llm/AnthropicProvider.js +135 -0
  27. package/src/core/llm/LLMManager.js +415 -0
  28. package/src/core/llm/LLMProvider.js +97 -0
  29. package/src/core/llm/OpenAIProvider.js +127 -0
  30. package/src/core/processing/BrowserProcessor.js +986 -0
  31. package/src/core/processing/ContentProcessor.js +505 -0
  32. package/src/core/processing/PDFProcessor.js +448 -0
  33. package/src/core/processing/StreamProcessor.js +673 -0
  34. package/src/core/queue/QueueManager.js +98 -0
  35. package/src/core/workers/WorkerPool.js +585 -0
  36. package/src/core/workers/worker.js +743 -0
  37. package/src/monitoring/healthCheck.js +600 -0
  38. package/src/monitoring/metrics.js +761 -0
  39. package/src/optimization/wave3-optimizations.js +932 -0
  40. package/src/security/security-patches.js +120 -0
  41. package/src/security/security-tests.js +355 -0
  42. package/src/security/wave3-security.js +652 -0
  43. package/src/tools/advanced/BatchScrapeTool.js +1089 -0
  44. package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
  45. package/src/tools/crawl/crawlDeep.js +449 -0
  46. package/src/tools/crawl/mapSite.js +400 -0
  47. package/src/tools/extract/analyzeContent.js +624 -0
  48. package/src/tools/extract/extractContent.js +329 -0
  49. package/src/tools/extract/processDocument.js +503 -0
  50. package/src/tools/extract/summarizeContent.js +376 -0
  51. package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
  52. package/src/tools/research/deepResearch.js +706 -0
  53. package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
  54. package/src/tools/search/adapters/googleSearch.js +236 -0
  55. package/src/tools/search/adapters/searchProviderFactory.js +96 -0
  56. package/src/tools/search/queryExpander.js +543 -0
  57. package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
  58. package/src/tools/search/ranking/ResultRanker.js +497 -0
  59. package/src/tools/search/searchWeb.js +482 -0
  60. package/src/tools/tracking/trackChanges.js +1355 -0
  61. package/src/utils/CircuitBreaker.js +515 -0
  62. package/src/utils/ErrorHandlingConfig.js +342 -0
  63. package/src/utils/HumanBehaviorSimulator.js +569 -0
  64. package/src/utils/Logger.js +568 -0
  65. package/src/utils/MemoryMonitor.js +173 -0
  66. package/src/utils/RetryManager.js +386 -0
  67. package/src/utils/contentUtils.js +588 -0
  68. package/src/utils/domainFilter.js +612 -0
  69. package/src/utils/inputValidation.js +766 -0
  70. package/src/utils/rateLimiter.js +196 -0
  71. package/src/utils/robotsChecker.js +91 -0
  72. package/src/utils/securityMiddleware.js +416 -0
  73. package/src/utils/sitemapParser.js +678 -0
  74. package/src/utils/ssrfProtection.js +640 -0
  75. package/src/utils/urlNormalizer.js +168 -0
@@ -0,0 +1,640 @@
1
+ /**
2
+ * SSRF Protection Module
3
+ * Implements comprehensive Server-Side Request Forgery prevention measures
4
+ */
5
+
6
+ import { promisify } from 'util';
7
+ import dns from 'dns';
8
+ import net from 'net';
9
+
10
+ const dnsLookup = promisify(dns.lookup);
11
+
12
+ /**
13
+ * SSRF Protection Configuration
14
+ */
15
+ const SSRF_CONFIG = {
16
+ // Allowed protocols
17
+ allowedProtocols: ['http:', 'https:'],
18
+
19
+ // Blocked IP ranges (private networks, localhost, etc.)
20
+ blockedIPRanges: [
21
+ '127.0.0.0/8', // Localhost
22
+ '10.0.0.0/8', // Private network
23
+ '172.16.0.0/12', // Private network
24
+ '192.168.0.0/16', // Private network
25
+ '169.254.0.0/16', // Link-local
26
+ '224.0.0.0/4', // Multicast
27
+ '240.0.0.0/4', // Reserved
28
+ '0.0.0.0/8', // This network
29
+ '100.64.0.0/10', // Carrier-grade NAT
30
+ '198.18.0.0/15', // Benchmark testing
31
+ '::1/128', // IPv6 localhost
32
+ 'fc00::/7', // IPv6 private network
33
+ 'fe80::/10', // IPv6 link-local
34
+ 'ff00::/8', // IPv6 multicast
35
+ ],
36
+
37
+ // Blocked hostnames/domains
38
+ blockedHostnames: [
39
+ 'localhost',
40
+ 'metadata.google.internal', // GCP metadata
41
+ 'metadata.azure.com', // Azure metadata
42
+ 'metadata',
43
+ 'consul',
44
+ 'vault',
45
+ ],
46
+
47
+ // Default ports to block
48
+ blockedPorts: [
49
+ 22, // SSH
50
+ 23, // Telnet
51
+ 25, // SMTP
52
+ 53, // DNS
53
+ 135, // RPC
54
+ 139, // NetBIOS
55
+ 445, // SMB
56
+ 1433, // MSSQL
57
+ 1521, // Oracle
58
+ 3306, // MySQL
59
+ 3389, // RDP
60
+ 5432, // PostgreSQL
61
+ 5984, // CouchDB
62
+ 6379, // Redis
63
+ 8086, // InfluxDB
64
+ 9200, // Elasticsearch
65
+ 9300, // Elasticsearch
66
+ 27017, // MongoDB
67
+ ],
68
+
69
+ // Maximum request size (bytes)
70
+ maxRequestSize: 100 * 1024 * 1024, // 100MB
71
+
72
+ // Maximum timeout (milliseconds)
73
+ maxTimeout: 60000, // 60 seconds
74
+
75
+ // Maximum redirects
76
+ maxRedirects: 5,
77
+ };
78
+
79
+ /**
80
+ * SSRF Protection Class
81
+ */
82
+ export class SSRFProtection {
83
+ constructor(options = {}) {
84
+ this.config = { ...SSRF_CONFIG, ...options };
85
+ this.cache = new Map(); // DNS resolution cache
86
+ this.cacheMaxSize = 10000;
87
+ this.cacheMaxAge = 300000; // 5 minutes
88
+ }
89
+
90
+ /**
91
+ * Validate and sanitize URL for SSRF protection
92
+ * @param {string} url - URL to validate
93
+ * @param {Object} options - Validation options
94
+ * @returns {Promise<Object>} - Validation result
95
+ */
96
+ async validateURL(url, options = {}) {
97
+ try {
98
+ // Check for path traversal in raw URL before URL parsing
99
+ const rawPathTraversal = this.checkRawPathTraversal(url);
100
+
101
+ const urlObj = new URL(url);
102
+ const validationResult = {
103
+ allowed: false,
104
+ url: url,
105
+ sanitizedURL: null,
106
+ violations: [],
107
+ metadata: {
108
+ protocol: urlObj.protocol,
109
+ hostname: urlObj.hostname,
110
+ port: urlObj.port || this.getDefaultPort(urlObj.protocol),
111
+ path: urlObj.pathname,
112
+ validatedAt: new Date().toISOString()
113
+ }
114
+ };
115
+
116
+ // Add raw path traversal violations
117
+ if (rawPathTraversal.violations.length > 0) {
118
+ validationResult.violations.push(...rawPathTraversal.violations);
119
+ }
120
+ // 1. Protocol validation
121
+ if (!this.config.allowedProtocols.includes(urlObj.protocol)) {
122
+ validationResult.violations.push({
123
+ type: 'INVALID_PROTOCOL',
124
+ message: `Protocol '${urlObj.protocol}' is not allowed`,
125
+ severity: 'HIGH'
126
+ });
127
+ return validationResult;
128
+ }
129
+
130
+ // 2. Hostname validation
131
+ const hostnameCheck = this.validateHostname(urlObj.hostname);
132
+ if (!hostnameCheck.allowed) {
133
+ validationResult.violations.push({
134
+ type: 'BLOCKED_HOSTNAME',
135
+ message: hostnameCheck.reason,
136
+ severity: 'HIGH'
137
+ });
138
+ return validationResult;
139
+ }
140
+
141
+ // 3. Port validation
142
+ const port = parseInt(urlObj.port) || this.getDefaultPort(urlObj.protocol);
143
+ if (this.config.blockedPorts.includes(port)) {
144
+ validationResult.violations.push({
145
+ type: 'BLOCKED_PORT',
146
+ message: `Port ${port} is blocked`,
147
+ severity: 'HIGH'
148
+ });
149
+ return validationResult;
150
+ }
151
+
152
+ // 4. DNS resolution and IP validation
153
+ const ipValidation = await this.validateIP(urlObj.hostname);
154
+ if (!ipValidation.allowed) {
155
+ validationResult.violations.push({
156
+ type: 'BLOCKED_IP',
157
+ message: ipValidation.reason,
158
+ severity: 'HIGH'
159
+ });
160
+ validationResult.metadata.resolvedIPs = ipValidation.ips;
161
+ return validationResult;
162
+ }
163
+
164
+ // 5. Path validation
165
+ const pathValidation = this.validatePath(urlObj.pathname);
166
+ if (!pathValidation.allowed) {
167
+ validationResult.violations.push({
168
+ type: 'SUSPICIOUS_PATH',
169
+ message: pathValidation.reason,
170
+ severity: 'MEDIUM'
171
+ });
172
+ }
173
+
174
+ // 6. URL length validation
175
+ if (url.length > 2048) {
176
+ validationResult.violations.push({
177
+ type: 'URL_TOO_LONG',
178
+ message: 'URL exceeds maximum length',
179
+ severity: 'MEDIUM'
180
+ });
181
+ }
182
+
183
+ // If no high-severity violations, allow the request
184
+ const highSeverityViolations = validationResult.violations.filter(v => v.severity === 'HIGH');
185
+ validationResult.allowed = highSeverityViolations.length === 0;
186
+ validationResult.sanitizedURL = this.sanitizeURL(urlObj);
187
+ validationResult.metadata.resolvedIPs = ipValidation.ips;
188
+
189
+ return validationResult;
190
+ } catch (error) {
191
+ return {
192
+ allowed: false,
193
+ url: url,
194
+ violations: [{
195
+ type: 'INVALID_URL',
196
+ message: `Invalid URL format: ${error.message}`,
197
+ severity: 'HIGH'
198
+ }],
199
+ metadata: {
200
+ error: error.message,
201
+ validatedAt: new Date().toISOString()
202
+ }
203
+ };
204
+ }
205
+ }
206
+
207
+ /**
208
+ * Validate hostname against blocked patterns
209
+ * @param {string} hostname
210
+ * @returns {Object}
211
+ */
212
+ validateHostname(hostname) {
213
+ const lowercaseHostname = hostname.toLowerCase();
214
+
215
+ // Check exact matches
216
+ if (this.config.blockedHostnames.includes(lowercaseHostname)) {
217
+ return {
218
+ allowed: false,
219
+ reason: `Hostname '${hostname}' is explicitly blocked`
220
+ };
221
+ }
222
+
223
+ // Check for suspicious patterns
224
+ const suspiciousPatterns = [
225
+ /^metadata/i,
226
+ /^consul/i,
227
+ /^vault/i,
228
+ /^admin/i,
229
+ /^internal/i,
230
+ /\.local$/i,
231
+ /\.internal$/i,
232
+ ];
233
+
234
+ for (const pattern of suspiciousPatterns) {
235
+ if (pattern.test(hostname)) {
236
+ return {
237
+ allowed: false,
238
+ reason: `Hostname '${hostname}' matches blocked pattern`
239
+ };
240
+ }
241
+ }
242
+
243
+ return { allowed: true };
244
+ }
245
+
246
+ /**
247
+ * Resolve hostname to IP and validate against blocked ranges
248
+ * @param {string} hostname
249
+ * @returns {Promise<Object>}
250
+ */
251
+ async validateIP(hostname) {
252
+ try {
253
+ // Check cache first
254
+ const cacheKey = hostname.toLowerCase();
255
+ const cached = this.cache.get(cacheKey);
256
+ if (cached && Date.now() - cached.timestamp < this.cacheMaxAge) {
257
+ return cached.result;
258
+ }
259
+
260
+ // Resolve hostname to IP
261
+ const { address, family } = await dnsLookup(hostname, { family: 0 });
262
+ const ips = Array.isArray(address) ? address : [address];
263
+
264
+ // Validate each resolved IP
265
+ for (const ip of ips) {
266
+ if (!this.isIPAllowed(ip)) {
267
+ const result = {
268
+ allowed: false,
269
+ reason: `Resolved IP '${ip}' is in blocked range`,
270
+ ips: ips
271
+ };
272
+ this.cacheResult(cacheKey, result);
273
+ return result;
274
+ }
275
+ }
276
+
277
+ const result = {
278
+ allowed: true,
279
+ ips: ips
280
+ };
281
+ this.cacheResult(cacheKey, result);
282
+ return result;
283
+
284
+ } catch (error) {
285
+ // DNS resolution failed - could be suspicious
286
+ return {
287
+ allowed: false,
288
+ reason: `DNS resolution failed: ${error.message}`,
289
+ ips: []
290
+ };
291
+ }
292
+ }
293
+
294
+ /**
295
+ * Check if IP address is in blocked ranges
296
+ * @param {string} ip
297
+ * @returns {boolean}
298
+ */
299
+ isIPAllowed(ip) {
300
+ // Direct IP checks for common blocked addresses
301
+ if (ip === '127.0.0.1' || ip === '::1' || ip === '0.0.0.0') {
302
+ return false;
303
+ }
304
+
305
+ // Check against CIDR ranges
306
+ for (const range of this.config.blockedIPRanges) {
307
+ if (this.isIPInRange(ip, range)) {
308
+ return false;
309
+ }
310
+ }
311
+
312
+ return true;
313
+ }
314
+
315
+ /**
316
+ * Check if IP is in CIDR range
317
+ * @param {string} ip
318
+ * @param {string} cidr
319
+ * @returns {boolean}
320
+ */
321
+ isIPInRange(ip, cidr) {
322
+ try {
323
+ const [network, prefixLength] = cidr.split('/');
324
+ const prefix = parseInt(prefixLength);
325
+
326
+ if (net.isIPv4(ip) && net.isIPv4(network)) {
327
+ return this.isIPv4InRange(ip, network, prefix);
328
+ } else if (net.isIPv6(ip) && net.isIPv6(network)) {
329
+ return this.isIPv6InRange(ip, network, prefix);
330
+ }
331
+
332
+ return false;
333
+ } catch (error) {
334
+ return false;
335
+ }
336
+ }
337
+
338
+ /**
339
+ * Check if IPv4 address is in range
340
+ * @param {string} ip
341
+ * @param {string} network
342
+ * @param {number} prefix
343
+ * @returns {boolean}
344
+ */
345
+ isIPv4InRange(ip, network, prefix) {
346
+ const ipInt = this.ipv4ToInt(ip);
347
+ const networkInt = this.ipv4ToInt(network);
348
+ const mask = (0xffffffff << (32 - prefix)) >>> 0;
349
+
350
+ return (ipInt & mask) === (networkInt & mask);
351
+ }
352
+
353
+ /**
354
+ * Convert IPv4 to integer
355
+ * @param {string} ip
356
+ * @returns {number}
357
+ */
358
+ ipv4ToInt(ip) {
359
+ const parts = ip.split('.');
360
+ return (parseInt(parts[0]) << 24) +
361
+ (parseInt(parts[1]) << 16) +
362
+ (parseInt(parts[2]) << 8) +
363
+ parseInt(parts[3]);
364
+ }
365
+
366
+ /**
367
+ * Check if IPv6 address is in range (simplified)
368
+ * @param {string} ip
369
+ * @param {string} network
370
+ * @param {number} prefix
371
+ * @returns {boolean}
372
+ */
373
+ isIPv6InRange(ip, network, prefix) {
374
+ try {
375
+ // Normalize IPv6 addresses by expanding compressed notation
376
+ const normalizeIPv6 = (ipv6) => {
377
+ // Handle :: compression
378
+ if (ipv6.includes("::")) {
379
+ const parts = ipv6.split("::");
380
+ const leftParts = parts[0] ? parts[0].split(":") : [];
381
+ const rightParts = parts[1] ? parts[1].split(":") : [];
382
+ const missingParts = 8 - leftParts.length - rightParts.length;
383
+ const middleParts = Array(missingParts).fill("0000");
384
+ const allParts = [...leftParts, ...middleParts, ...rightParts];
385
+ return allParts.map(part => part.padStart(4, "0")).join(":");
386
+ } else {
387
+ return ipv6.split(":").map(part => part.padStart(4, "0")).join(":");
388
+ }
389
+ };
390
+
391
+ const normalizedIP = normalizeIPv6(ip);
392
+ const normalizedNetwork = normalizeIPv6(network);
393
+
394
+ // Convert to binary for precise comparison
395
+ const ipBinary = normalizedIP.split(":").map(hex =>
396
+ parseInt(hex, 16).toString(2).padStart(16, "0")
397
+ ).join("");
398
+
399
+ const networkBinary = normalizedNetwork.split(":").map(hex =>
400
+ parseInt(hex, 16).toString(2).padStart(16, "0")
401
+ ).join("");
402
+
403
+ // Compare only the prefix bits
404
+ for (let i = 0; i < prefix; i++) {
405
+ if (ipBinary[i] !== networkBinary[i]) {
406
+ return false;
407
+ }
408
+ }
409
+
410
+ return true;
411
+ } catch (error) {
412
+ // If IPv6 parsing fails, be conservative and return false
413
+ console.warn("IPv6 range check failed for", ip, "vs", network, error.message);
414
+ return false;
415
+ }
416
+ }
417
+ /**
418
+
419
+ /**
420
+ * Check for path traversal patterns in raw URL before parsing
421
+ * @param {string} url - Raw URL to check
422
+ * @returns {Object} - Result with violations array
423
+ */
424
+ * Validate URL path for suspicious patterns
425
+ /**
426
+ * Check for path traversal patterns in raw URL before parsing
427
+ * @param {string} url - Raw URL to check
428
+ * @returns {Object} - Result with violations array
429
+ */
430
+ checkRawPathTraversal(url) {
431
+ const violations = [];
432
+
433
+ // Path traversal patterns to check before URL normalization
434
+ const pathTraversalPatterns = [
435
+ /\.\.\//g, // Basic path traversal ../
436
+ /\.\.\\/g, // Windows path traversal ..\
437
+ /%2e%2e%2f/gi, // URL encoded ../
438
+ /%2e%2e%5c/gi, // URL encoded ..\
439
+ /%2e%2e/gi, // URL encoded ..
440
+ /\.\.%2f/gi, // Mixed encoding
441
+ /\.\.%5c/gi, // Mixed encoding
442
+ ];
443
+
444
+ for (const pattern of pathTraversalPatterns) {
445
+ if (pattern.test(url)) {
446
+ violations.push({
447
+ type: "SUSPICIOUS_PATH",
448
+ message: `URL contains path traversal pattern: ${pattern}`,
449
+ severity: "HIGH"
450
+ });
451
+ break; // Only report one path traversal violation
452
+ }
453
+ }
454
+
455
+ return { violations };
456
+ }
457
+ * @param {string} path
458
+ * @returns {Object}
459
+ */ validatePath(path) {
460
+ const suspiciousPatterns = [
461
+ /\.\.\//, // Directory traversal
462
+ /\/etc\//, // System files
463
+ /\/proc\//, // System files
464
+ /\/sys\//, // System files
465
+ /\/dev\//, // Device files
466
+ /\/tmp\//, // Temporary files
467
+ /\/var\/log/, // Log files
468
+ ];
469
+
470
+ for (const pattern of suspiciousPatterns) {
471
+ if (pattern.test(path)) {
472
+ return {
473
+ allowed: false,
474
+ reason: `Path contains suspicious pattern: ${pattern}`
475
+ };
476
+ }
477
+ }
478
+
479
+ return { allowed: true };
480
+ }
481
+
482
+ /**
483
+ * Sanitize URL by removing potentially dangerous parts
484
+ * @param {URL} urlObj
485
+ * @returns {string}
486
+ */
487
+ sanitizeURL(urlObj) {
488
+ const sanitized = new URL(urlObj.toString());
489
+
490
+ // Remove authentication info
491
+ sanitized.username = '';
492
+ sanitized.password = '';
493
+
494
+ // Remove fragment
495
+ sanitized.hash = '';
496
+
497
+ return sanitized.toString();
498
+ }
499
+
500
+ /**
501
+ * Get default port for protocol
502
+ * @param {string} protocol
503
+ * @returns {number}
504
+ */
505
+ getDefaultPort(protocol) {
506
+ switch (protocol) {
507
+ case 'http:': return 80;
508
+ case 'https:': return 443;
509
+ case 'ftp:': return 21;
510
+ default: return 80;
511
+ }
512
+ }
513
+
514
+ /**
515
+ * Cache DNS resolution result
516
+ * @param {string} key
517
+ * @param {Object} result
518
+ */
519
+ cacheResult(key, result) {
520
+ if (this.cache.size >= this.cacheMaxSize) {
521
+ // Remove oldest entry
522
+ const firstKey = this.cache.keys().next().value;
523
+ this.cache.delete(firstKey);
524
+ }
525
+
526
+ this.cache.set(key, {
527
+ result,
528
+ timestamp: Date.now()
529
+ });
530
+ }
531
+
532
+ /**
533
+ * Create secure fetch wrapper with SSRF protection
534
+ * @param {Object} options
535
+ * @returns {Function}
536
+ */
537
+ createSecureFetch(options = {}) {
538
+ const { allowedDomains = [], maxRequestSize = this.config.maxRequestSize } = options;
539
+
540
+ return async (url, fetchOptions = {}) => {
541
+ // Validate URL
542
+ const validation = await this.validateURL(url);
543
+ if (!validation.allowed) {
544
+ const violations = validation.violations.map(v => v.message).join(', ');
545
+ throw new Error(`SSRF Protection: ${violations}`);
546
+ }
547
+
548
+ // Check domain whitelist if provided
549
+ if (allowedDomains.length > 0) {
550
+ const urlObj = new URL(validation.sanitizedURL);
551
+ const isAllowed = allowedDomains.some(domain =>
552
+ urlObj.hostname === domain || urlObj.hostname.endsWith('.' + domain)
553
+ );
554
+
555
+ if (!isAllowed) {
556
+ throw new Error(`SSRF Protection: Domain not in whitelist`);
557
+ }
558
+ }
559
+
560
+ // Set secure defaults
561
+ const secureOptions = {
562
+ ...fetchOptions,
563
+ timeout: Math.min(fetchOptions.timeout || 30000, this.config.maxTimeout),
564
+ redirect: 'manual', // Handle redirects manually
565
+ headers: {
566
+ 'User-Agent': 'CrawlForge/3.0 (Security Enhanced)',
567
+ ...fetchOptions.headers
568
+ }
569
+ };
570
+
571
+ // Perform the request
572
+ let response;
573
+ let redirectCount = 0;
574
+ let currentUrl = validation.sanitizedURL;
575
+
576
+ while (redirectCount <= this.config.maxRedirects) {
577
+ response = await fetch(currentUrl, secureOptions);
578
+
579
+ // Check response size
580
+ const contentLength = response.headers.get('content-length');
581
+ if (contentLength && parseInt(contentLength) > maxRequestSize) {
582
+ throw new Error('SSRF Protection: Response size exceeds limit');
583
+ }
584
+
585
+ // Handle redirects manually
586
+ if (response.status >= 300 && response.status < 400) {
587
+ const location = response.headers.get('location');
588
+ if (!location) {
589
+ break;
590
+ }
591
+
592
+ // Validate redirect URL
593
+ const redirectValidation = await this.validateURL(location);
594
+ if (!redirectValidation.allowed) {
595
+ throw new Error('SSRF Protection: Redirect to blocked URL');
596
+ }
597
+
598
+ currentUrl = redirectValidation.sanitizedURL;
599
+ redirectCount++;
600
+ continue;
601
+ }
602
+
603
+ break;
604
+ }
605
+
606
+ if (redirectCount > this.config.maxRedirects) {
607
+ throw new Error('SSRF Protection: Too many redirects');
608
+ }
609
+
610
+ return response;
611
+ };
612
+ }
613
+
614
+ /**
615
+ * Get SSRF protection statistics
616
+ * @returns {Object}
617
+ */
618
+ getStats() {
619
+ return {
620
+ cacheSize: this.cache.size,
621
+ cacheMaxSize: this.cacheMaxSize,
622
+ cacheMaxAge: this.cacheMaxAge,
623
+ blockedRanges: this.config.blockedIPRanges.length,
624
+ blockedHostnames: this.config.blockedHostnames.length,
625
+ blockedPorts: this.config.blockedPorts.length,
626
+ maxRequestSize: this.config.maxRequestSize,
627
+ maxTimeout: this.config.maxTimeout,
628
+ maxRedirects: this.config.maxRedirects
629
+ };
630
+ }
631
+
632
+ /**
633
+ * Clear DNS cache
634
+ */
635
+ clearCache() {
636
+ this.cache.clear();
637
+ }
638
+ }
639
+
640
+ export default SSRFProtection;