crawlforge-mcp-server 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CLAUDE.md +315 -0
  2. package/LICENSE +21 -0
  3. package/README.md +181 -0
  4. package/package.json +115 -0
  5. package/server.js +1963 -0
  6. package/setup.js +112 -0
  7. package/src/constants/config.js +615 -0
  8. package/src/core/ActionExecutor.js +1104 -0
  9. package/src/core/AlertNotificationSystem.js +601 -0
  10. package/src/core/AuthManager.js +315 -0
  11. package/src/core/ChangeTracker.js +2306 -0
  12. package/src/core/JobManager.js +687 -0
  13. package/src/core/LLMsTxtAnalyzer.js +753 -0
  14. package/src/core/LocalizationManager.js +1615 -0
  15. package/src/core/PerformanceManager.js +828 -0
  16. package/src/core/ResearchOrchestrator.js +1327 -0
  17. package/src/core/SnapshotManager.js +1037 -0
  18. package/src/core/StealthBrowserManager.js +1795 -0
  19. package/src/core/WebhookDispatcher.js +745 -0
  20. package/src/core/analysis/ContentAnalyzer.js +749 -0
  21. package/src/core/analysis/LinkAnalyzer.js +972 -0
  22. package/src/core/cache/CacheManager.js +821 -0
  23. package/src/core/connections/ConnectionPool.js +553 -0
  24. package/src/core/crawlers/BFSCrawler.js +845 -0
  25. package/src/core/integrations/PerformanceIntegration.js +377 -0
  26. package/src/core/llm/AnthropicProvider.js +135 -0
  27. package/src/core/llm/LLMManager.js +415 -0
  28. package/src/core/llm/LLMProvider.js +97 -0
  29. package/src/core/llm/OpenAIProvider.js +127 -0
  30. package/src/core/processing/BrowserProcessor.js +986 -0
  31. package/src/core/processing/ContentProcessor.js +505 -0
  32. package/src/core/processing/PDFProcessor.js +448 -0
  33. package/src/core/processing/StreamProcessor.js +673 -0
  34. package/src/core/queue/QueueManager.js +98 -0
  35. package/src/core/workers/WorkerPool.js +585 -0
  36. package/src/core/workers/worker.js +743 -0
  37. package/src/monitoring/healthCheck.js +600 -0
  38. package/src/monitoring/metrics.js +761 -0
  39. package/src/optimization/wave3-optimizations.js +932 -0
  40. package/src/security/security-patches.js +120 -0
  41. package/src/security/security-tests.js +355 -0
  42. package/src/security/wave3-security.js +652 -0
  43. package/src/tools/advanced/BatchScrapeTool.js +1089 -0
  44. package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
  45. package/src/tools/crawl/crawlDeep.js +449 -0
  46. package/src/tools/crawl/mapSite.js +400 -0
  47. package/src/tools/extract/analyzeContent.js +624 -0
  48. package/src/tools/extract/extractContent.js +329 -0
  49. package/src/tools/extract/processDocument.js +503 -0
  50. package/src/tools/extract/summarizeContent.js +376 -0
  51. package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
  52. package/src/tools/research/deepResearch.js +706 -0
  53. package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
  54. package/src/tools/search/adapters/googleSearch.js +236 -0
  55. package/src/tools/search/adapters/searchProviderFactory.js +96 -0
  56. package/src/tools/search/queryExpander.js +543 -0
  57. package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
  58. package/src/tools/search/ranking/ResultRanker.js +497 -0
  59. package/src/tools/search/searchWeb.js +482 -0
  60. package/src/tools/tracking/trackChanges.js +1355 -0
  61. package/src/utils/CircuitBreaker.js +515 -0
  62. package/src/utils/ErrorHandlingConfig.js +342 -0
  63. package/src/utils/HumanBehaviorSimulator.js +569 -0
  64. package/src/utils/Logger.js +568 -0
  65. package/src/utils/MemoryMonitor.js +173 -0
  66. package/src/utils/RetryManager.js +386 -0
  67. package/src/utils/contentUtils.js +588 -0
  68. package/src/utils/domainFilter.js +612 -0
  69. package/src/utils/inputValidation.js +766 -0
  70. package/src/utils/rateLimiter.js +196 -0
  71. package/src/utils/robotsChecker.js +91 -0
  72. package/src/utils/securityMiddleware.js +416 -0
  73. package/src/utils/sitemapParser.js +678 -0
  74. package/src/utils/ssrfProtection.js +640 -0
  75. package/src/utils/urlNormalizer.js +168 -0
@@ -0,0 +1,196 @@
1
+ export class RateLimiter {
2
+ constructor(options = {}) {
3
+ const {
4
+ requestsPerSecond = 10,
5
+ requestsPerMinute = 100,
6
+ perDomain = true
7
+ } = options;
8
+
9
+ this.requestsPerSecond = requestsPerSecond;
10
+ this.requestsPerMinute = requestsPerMinute;
11
+ this.perDomain = perDomain;
12
+ this.windowMs = 1000; // 1 second window
13
+ this.limits = new Map(); // domain -> { count, resetTime }
14
+ }
15
+
16
+ async checkLimit(urlOrDomain) {
17
+ const domain = this.extractDomain(urlOrDomain);
18
+ const now = Date.now();
19
+
20
+ const key = this.perDomain ? domain : 'global';
21
+ let limit = this.limits.get(key);
22
+
23
+ if (!limit) {
24
+ limit = {
25
+ secondCount: 0,
26
+ secondReset: now + 1000,
27
+ minuteCount: 0,
28
+ minuteReset: now + 60000
29
+ };
30
+ this.limits.set(key, limit);
31
+ }
32
+
33
+ // Reset counters if windows have passed
34
+ if (now > limit.secondReset) {
35
+ limit.secondCount = 0;
36
+ limit.secondReset = now + 1000;
37
+ }
38
+
39
+ if (now > limit.minuteReset) {
40
+ limit.minuteCount = 0;
41
+ limit.minuteReset = now + 60000;
42
+ }
43
+
44
+ // Check rate limits
45
+ if (limit.secondCount >= this.requestsPerSecond) {
46
+ const waitTime = limit.secondReset - now;
47
+ await this.delay(waitTime);
48
+ return this.checkLimit(urlOrDomain);
49
+ }
50
+
51
+ if (limit.minuteCount >= this.requestsPerMinute) {
52
+ const waitTime = limit.minuteReset - now;
53
+ await this.delay(waitTime);
54
+ return this.checkLimit(urlOrDomain);
55
+ }
56
+
57
+ // Increment counters
58
+ limit.secondCount++;
59
+ limit.minuteCount++;
60
+
61
+ return true;
62
+ }
63
+
64
+ extractDomain(urlOrDomain) {
65
+ try {
66
+ if (urlOrDomain.startsWith('http://') || urlOrDomain.startsWith('https://')) {
67
+ const url = new URL(urlOrDomain);
68
+ return url.hostname;
69
+ }
70
+ return urlOrDomain;
71
+ } catch {
72
+ return urlOrDomain;
73
+ }
74
+ }
75
+
76
+ delay(ms) {
77
+ return new Promise(resolve => setTimeout(resolve, ms));
78
+ }
79
+
80
+ reset(domain) {
81
+ if (domain) {
82
+ this.limits.delete(domain);
83
+ } else {
84
+ this.limits.clear();
85
+ }
86
+ }
87
+
88
+ getStats() {
89
+ const stats = {};
90
+ for (const [domain, limit] of this.limits.entries()) {
91
+ stats[domain] = {
92
+ secondCount: limit.secondCount,
93
+ minuteCount: limit.minuteCount,
94
+ secondsUntilReset: Math.max(0, Math.ceil((limit.secondReset - Date.now()) / 1000)),
95
+ minutesUntilReset: Math.max(0, Math.ceil((limit.minuteReset - Date.now()) / 60000))
96
+ };
97
+ }
98
+ return stats;
99
+ }
100
+ }
101
+
102
+ export class CircuitBreaker {
103
+ constructor(options = {}) {
104
+ const {
105
+ threshold = 5,
106
+ timeout = 60000,
107
+ resetTimeout = 120000
108
+ } = options;
109
+
110
+ this.threshold = threshold;
111
+ this.timeout = timeout;
112
+ this.resetTimeout = resetTimeout;
113
+ this.failures = new Map(); // domain -> { count, state, nextAttempt }
114
+ }
115
+
116
+ async execute(domain, fn) {
117
+ const breaker = this.getBreaker(domain);
118
+
119
+ if (breaker.state === 'OPEN') {
120
+ if (Date.now() < breaker.nextAttempt) {
121
+ throw new Error(`Circuit breaker is OPEN for ${domain}`);
122
+ }
123
+ breaker.state = 'HALF_OPEN';
124
+ }
125
+
126
+ try {
127
+ const result = await Promise.race([
128
+ fn(),
129
+ this.timeoutPromise()
130
+ ]);
131
+
132
+ this.onSuccess(domain);
133
+ return result;
134
+ } catch (error) {
135
+ this.onFailure(domain);
136
+ throw error;
137
+ }
138
+ }
139
+
140
+ getBreaker(domain) {
141
+ if (!this.failures.has(domain)) {
142
+ this.failures.set(domain, {
143
+ count: 0,
144
+ state: 'CLOSED',
145
+ nextAttempt: Date.now()
146
+ });
147
+ }
148
+ return this.failures.get(domain);
149
+ }
150
+
151
+ onSuccess(domain) {
152
+ const breaker = this.getBreaker(domain);
153
+ breaker.count = 0;
154
+ breaker.state = 'CLOSED';
155
+ }
156
+
157
+ onFailure(domain) {
158
+ const breaker = this.getBreaker(domain);
159
+ breaker.count++;
160
+
161
+ if (breaker.count >= this.threshold) {
162
+ breaker.state = 'OPEN';
163
+ breaker.nextAttempt = Date.now() + this.resetTimeout;
164
+ }
165
+ }
166
+
167
+ timeoutPromise() {
168
+ return new Promise((_, reject) => {
169
+ setTimeout(() => reject(new Error('Operation timeout')), this.timeout);
170
+ });
171
+ }
172
+
173
+ reset(domain) {
174
+ if (domain) {
175
+ this.failures.delete(domain);
176
+ } else {
177
+ this.failures.clear();
178
+ }
179
+ }
180
+
181
+ getStats() {
182
+ const stats = {};
183
+ for (const [domain, breaker] of this.failures.entries()) {
184
+ stats[domain] = {
185
+ failureCount: breaker.count,
186
+ state: breaker.state,
187
+ nextAttemptIn: breaker.state === 'OPEN'
188
+ ? Math.max(0, Math.ceil((breaker.nextAttempt - Date.now()) / 1000))
189
+ : 0
190
+ };
191
+ }
192
+ return stats;
193
+ }
194
+ }
195
+
196
+ export default RateLimiter;
@@ -0,0 +1,91 @@
1
+ import robotsParser from 'robots-parser';
2
+
3
+ export class RobotsChecker {
4
+ constructor(userAgent = 'CrawlForge/1.0') {
5
+ this.userAgent = userAgent;
6
+ this.robotsCache = new Map();
7
+ }
8
+
9
+ async canFetch(url) {
10
+ try {
11
+ const urlObj = new URL(url);
12
+ const robotsUrl = `${urlObj.protocol}//${urlObj.host}/robots.txt`;
13
+
14
+ let robots = this.robotsCache.get(robotsUrl);
15
+
16
+ if (!robots) {
17
+ const robotsTxt = await this.fetchRobotsTxt(robotsUrl);
18
+ robots = robotsParser(robotsUrl, robotsTxt);
19
+ this.robotsCache.set(robotsUrl, robots);
20
+ }
21
+
22
+ return robots.isAllowed(url, this.userAgent);
23
+ } catch (error) {
24
+ // If we can't fetch robots.txt, assume we can crawl
25
+ console.warn(`Failed to check robots.txt for ${url}:`, error.message);
26
+ return true;
27
+ }
28
+ }
29
+
30
+ async fetchRobotsTxt(robotsUrl) {
31
+ try {
32
+ const controller = new AbortController();
33
+ const timeoutId = setTimeout(() => controller.abort(), 5000);
34
+
35
+ const response = await fetch(robotsUrl, {
36
+ signal: controller.signal,
37
+ headers: {
38
+ 'User-Agent': this.userAgent
39
+ }
40
+ });
41
+
42
+ clearTimeout(timeoutId);
43
+
44
+ if (!response.ok) {
45
+ return ''; // Empty robots.txt means everything is allowed
46
+ }
47
+
48
+ return await response.text();
49
+ } catch (error) {
50
+ return ''; // If we can't fetch, assume no restrictions
51
+ }
52
+ }
53
+
54
+ getCrawlDelay(url) {
55
+ try {
56
+ const urlObj = new URL(url);
57
+ const robotsUrl = `${urlObj.protocol}//${urlObj.host}/robots.txt`;
58
+ const robots = this.robotsCache.get(robotsUrl);
59
+
60
+ if (robots) {
61
+ return robots.getCrawlDelay(this.userAgent) || 0;
62
+ }
63
+
64
+ return 0;
65
+ } catch {
66
+ return 0;
67
+ }
68
+ }
69
+
70
+ getSitemaps(url) {
71
+ try {
72
+ const urlObj = new URL(url);
73
+ const robotsUrl = `${urlObj.protocol}//${urlObj.host}/robots.txt`;
74
+ const robots = this.robotsCache.get(robotsUrl);
75
+
76
+ if (robots) {
77
+ return robots.getSitemaps() || [];
78
+ }
79
+
80
+ return [];
81
+ } catch {
82
+ return [];
83
+ }
84
+ }
85
+
86
+ clearCache() {
87
+ this.robotsCache.clear();
88
+ }
89
+ }
90
+
91
+ export default RobotsChecker;
@@ -0,0 +1,416 @@
1
+ /**
2
+ * Security Middleware for MCP WebScraper
3
+ * Integrates SSRF protection, input validation, and other security measures
4
+ */
5
+
6
+ import { SSRFProtection } from './ssrfProtection.js';
7
+ import { InputValidator } from './inputValidation.js';
8
+ import { config } from '../constants/config.js';
9
+ import { Logger } from './Logger.js';
10
+
11
+ // Initialize security components
12
+ const ssrfProtection = new SSRFProtection({
13
+ allowedProtocols: config.security.ssrfProtection.allowedProtocols,
14
+ maxRequestSize: config.security.ssrfProtection.maxRequestSize,
15
+ maxTimeout: config.security.ssrfProtection.maxTimeout,
16
+ maxRedirects: config.security.ssrfProtection.maxRedirects,
17
+ blockedHostnames: config.security.ssrfProtection.blockedDomains
18
+ });
19
+
20
+ const inputValidator = new InputValidator({
21
+ maxStringLength: config.security.inputValidation.maxStringLength,
22
+ maxArrayLength: config.security.inputValidation.maxArrayLength,
23
+ maxObjectDepth: config.security.inputValidation.maxObjectDepth,
24
+ maxRegexLength: config.security.inputValidation.maxRegexLength,
25
+ allowedHTMLTags: config.security.contentSecurity.allowedHTMLTags
26
+ });
27
+
28
+ const logger = new Logger();
29
+
30
+ /**
31
+ * Security middleware class for MCP tools
32
+ */
33
+ export class SecurityMiddleware {
34
+ constructor(options = {}) {
35
+ this.ssrfProtection = ssrfProtection;
36
+ this.inputValidator = inputValidator;
37
+ this.logger = logger;
38
+ this.config = config.security;
39
+ this.violationStats = {
40
+ totalViolations: 0,
41
+ blockedRequests: 0,
42
+ ssrfBlocked: 0,
43
+ injectionBlocked: 0,
44
+ validationErrors: 0
45
+ };
46
+ }
47
+
48
+ /**
49
+ * Validate URL parameter for SSRF protection
50
+ * @param {string} url - URL to validate
51
+ * @param {Object} context - Request context
52
+ * @returns {Promise<Object>} - Validation result
53
+ */
54
+ async validateURL(url, context = {}) {
55
+ if (!this.config.ssrfProtection.enabled) {
56
+ return { allowed: true, sanitizedURL: url };
57
+ }
58
+
59
+ try {
60
+ const result = await this.ssrfProtection.validateURL(url);
61
+
62
+ if (!result.allowed) {
63
+ this.violationStats.ssrfBlocked++;
64
+ this.violationStats.blockedRequests++;
65
+
66
+ this.logSecurityViolation('SSRF_BLOCKED', {
67
+ url,
68
+ violations: result.violations,
69
+ context
70
+ });
71
+ }
72
+
73
+ return {
74
+ allowed: result.allowed,
75
+ sanitizedURL: result.sanitizedURL || url,
76
+ violations: result.violations
77
+ };
78
+ } catch (error) {
79
+ this.logger.error('SSRF validation error:', error);
80
+ return { allowed: false, error: error.message };
81
+ }
82
+ }
83
+
84
+ /**
85
+ * Validate search query parameters
86
+ * @param {string} query - Search query to validate
87
+ * @param {Object} context - Request context
88
+ * @returns {Object} - Validation result
89
+ */
90
+ validateSearchQuery(query, context = {}) {
91
+ if (!this.config.inputValidation.enabled) {
92
+ return { isValid: true, sanitizedValue: query };
93
+ }
94
+
95
+ try {
96
+ const result = this.inputValidator.validateSearchQuery(query);
97
+
98
+ if (!result.isValid) {
99
+ this.violationStats.injectionBlocked++;
100
+ this.violationStats.blockedRequests++;
101
+
102
+ this.logSecurityViolation('INJECTION_BLOCKED', {
103
+ query,
104
+ violations: result.violations,
105
+ context
106
+ });
107
+ }
108
+
109
+ return result;
110
+ } catch (error) {
111
+ this.logger.error('Query validation error:', error);
112
+ this.violationStats.validationErrors++;
113
+ return { isValid: false, error: error.message };
114
+ }
115
+ }
116
+
117
+ /**
118
+ * Validate CSS selector parameters
119
+ * @param {string} selector - CSS selector to validate
120
+ * @param {Object} context - Request context
121
+ * @returns {Object} - Validation result
122
+ */
123
+ validateCSSSelector(selector, context = {}) {
124
+ if (!this.config.inputValidation.enabled) {
125
+ return { isValid: true, sanitizedValue: selector };
126
+ }
127
+
128
+ try {
129
+ const result = this.inputValidator.validateCSSSelector(selector);
130
+
131
+ if (!result.isValid) {
132
+ this.violationStats.injectionBlocked++;
133
+ this.violationStats.blockedRequests++;
134
+
135
+ this.logSecurityViolation('CSS_INJECTION_BLOCKED', {
136
+ selector,
137
+ violations: result.violations,
138
+ context
139
+ });
140
+ }
141
+
142
+ return result;
143
+ } catch (error) {
144
+ this.logger.error('CSS validation error:', error);
145
+ this.violationStats.validationErrors++;
146
+ return { isValid: false, error: error.message };
147
+ }
148
+ }
149
+
150
+ /**
151
+ * Validate object parameters
152
+ * @param {Object} obj - Object to validate
153
+ * @param {Object} context - Request context
154
+ * @returns {Object} - Validation result
155
+ */
156
+ validateObject(obj, context = {}) {
157
+ if (!this.config.inputValidation.enabled) {
158
+ return { isValid: true, sanitizedValue: obj };
159
+ }
160
+
161
+ try {
162
+ const result = this.inputValidator.validateObject(obj);
163
+
164
+ if (!result.isValid) {
165
+ this.violationStats.validationErrors++;
166
+
167
+ this.logSecurityViolation('OBJECT_VALIDATION_FAILED', {
168
+ objectKeys: Object.keys(obj || {}),
169
+ violations: result.violations,
170
+ context
171
+ });
172
+ }
173
+
174
+ return result;
175
+ } catch (error) {
176
+ this.logger.error('Object validation error:', error);
177
+ this.violationStats.validationErrors++;
178
+ return { isValid: false, error: error.message };
179
+ }
180
+ }
181
+
182
+ /**
183
+ * Validate HTML content
184
+ * @param {string} html - HTML content to validate
185
+ * @param {Object} context - Request context
186
+ * @returns {Object} - Validation result
187
+ */
188
+ validateHTML(html, context = {}) {
189
+ if (!this.config.contentSecurity.sanitizeHTML) {
190
+ return { isValid: true, sanitizedValue: html };
191
+ }
192
+
193
+ try {
194
+ const result = this.inputValidator.validateHTML(html);
195
+
196
+ if (!result.isValid) {
197
+ this.violationStats.injectionBlocked++;
198
+
199
+ this.logSecurityViolation('HTML_XSS_BLOCKED', {
200
+ htmlLength: html.length,
201
+ violations: result.violations,
202
+ context
203
+ });
204
+ }
205
+
206
+ return result;
207
+ } catch (error) {
208
+ this.logger.error('HTML validation error:', error);
209
+ this.violationStats.validationErrors++;
210
+ return { isValid: false, error: error.message };
211
+ }
212
+ }
213
+
214
+ /**
215
+ * Create secure fetch function with SSRF protection
216
+ * @param {Object} options - Fetch options
217
+ * @returns {Function} - Secure fetch function
218
+ */
219
+ createSecureFetch(options = {}) {
220
+ return this.ssrfProtection.createSecureFetch({
221
+ allowedDomains: this.config.ssrfProtection.allowedDomains,
222
+ maxRequestSize: this.config.ssrfProtection.maxRequestSize,
223
+ ...options
224
+ });
225
+ }
226
+
227
+ /**
228
+ * Validate tool parameters based on schema
229
+ * @param {Object} params - Tool parameters
230
+ * @param {string} toolName - Name of the tool
231
+ * @returns {Promise<Object>} - Validation result
232
+ */
233
+ async validateToolParameters(params, toolName) {
234
+ const results = {
235
+ isValid: true,
236
+ violations: [],
237
+ sanitizedParams: { ...params }
238
+ };
239
+
240
+ const context = { toolName, timestamp: new Date().toISOString() };
241
+
242
+ // URL validation for tools that accept URLs
243
+ if (params.url) {
244
+ const urlResult = await this.validateURL(params.url, context);
245
+ if (!urlResult.allowed) {
246
+ results.isValid = false;
247
+ results.violations.push(...(urlResult.violations || []));
248
+ } else {
249
+ results.sanitizedParams.url = urlResult.sanitizedURL;
250
+ }
251
+ }
252
+
253
+ // Search query validation
254
+ if (params.query) {
255
+ const queryResult = this.validateSearchQuery(params.query, context);
256
+ if (!queryResult.isValid) {
257
+ results.isValid = false;
258
+ results.violations.push(...(queryResult.violations || []));
259
+ } else {
260
+ results.sanitizedParams.query = queryResult.sanitizedValue;
261
+ }
262
+ }
263
+
264
+ // CSS selectors validation
265
+ if (params.selectors) {
266
+ for (const [key, selector] of Object.entries(params.selectors)) {
267
+ const selectorResult = this.validateCSSSelector(selector, context);
268
+ if (!selectorResult.isValid) {
269
+ results.isValid = false;
270
+ results.violations.push(...(selectorResult.violations || []));
271
+ } else {
272
+ results.sanitizedParams.selectors[key] = selectorResult.sanitizedValue;
273
+ }
274
+ }
275
+ }
276
+
277
+ // Object validation for complex parameters
278
+ if (params.options && typeof params.options === 'object') {
279
+ const objectResult = this.validateObject(params.options, context);
280
+ if (!objectResult.isValid) {
281
+ results.isValid = false;
282
+ results.violations.push(...(objectResult.violations || []));
283
+ } else {
284
+ results.sanitizedParams.options = objectResult.sanitizedValue;
285
+ }
286
+ }
287
+
288
+ // Validate arrays (like include_patterns, exclude_patterns)
289
+ ['include_patterns', 'exclude_patterns'].forEach(paramName => {
290
+ if (params[paramName] && Array.isArray(params[paramName])) {
291
+ for (const pattern of params[paramName]) {
292
+ if (typeof pattern === 'string') {
293
+ const regexResult = this.inputValidator.validateRegex(pattern);
294
+ if (!regexResult.isValid) {
295
+ results.isValid = false;
296
+ results.violations.push(...(regexResult.violations || []));
297
+ this.logSecurityViolation('REGEX_VALIDATION_FAILED', {
298
+ pattern,
299
+ violations: regexResult.violations,
300
+ context
301
+ });
302
+ }
303
+ }
304
+ }
305
+ }
306
+ });
307
+
308
+ return results;
309
+ }
310
+
311
+ /**
312
+ * Log security violations
313
+ * @param {string} type - Violation type
314
+ * @param {Object} details - Violation details
315
+ */
316
+ logSecurityViolation(type, details) {
317
+ this.violationStats.totalViolations++;
318
+
319
+ if (this.config.monitoring?.violationLogging !== false) {
320
+ this.logger.warn('Security violation detected', {
321
+ type,
322
+ details,
323
+ timestamp: new Date().toISOString(),
324
+ severity: this.getViolationSeverity(details.violations)
325
+ });
326
+ }
327
+
328
+ // Log high-severity violations to security log
329
+ if (this.config.monitoring?.securityLogging !== false) {
330
+ const severity = this.getViolationSeverity(details.violations);
331
+ if (severity === 'HIGH') {
332
+ this.logger.error('High-severity security violation', {
333
+ type,
334
+ details: {
335
+ ...details,
336
+ // Don't log full content for security
337
+ input: details.query ? details.query.substring(0, 100) : undefined,
338
+ url: details.url ? details.url.substring(0, 200) : undefined
339
+ }
340
+ });
341
+ }
342
+ }
343
+ }
344
+
345
+ /**
346
+ * Get violation severity level
347
+ * @param {Array} violations - Array of violations
348
+ * @returns {string} - Severity level
349
+ */
350
+ getViolationSeverity(violations = []) {
351
+ if (violations.some(v => v.severity === 'HIGH')) return 'HIGH';
352
+ if (violations.some(v => v.severity === 'MEDIUM')) return 'MEDIUM';
353
+ return 'LOW';
354
+ }
355
+
356
+ /**
357
+ * Get security statistics
358
+ * @returns {Object} - Security statistics
359
+ */
360
+ getSecurityStats() {
361
+ return {
362
+ violations: this.violationStats,
363
+ ssrfStats: this.ssrfProtection.getStats(),
364
+ validationStats: this.inputValidator.getStats(),
365
+ configEnabled: {
366
+ ssrfProtection: this.config.ssrfProtection.enabled,
367
+ inputValidation: this.config.inputValidation.enabled,
368
+ contentSecurity: this.config.contentSecurity.sanitizeHTML,
369
+ auditLogging: this.config.apiSecurity.auditLogging
370
+ }
371
+ };
372
+ }
373
+
374
+ /**
375
+ * Reset security statistics
376
+ */
377
+ resetStats() {
378
+ this.violationStats = {
379
+ totalViolations: 0,
380
+ blockedRequests: 0,
381
+ ssrfBlocked: 0,
382
+ injectionBlocked: 0,
383
+ validationErrors: 0
384
+ };
385
+ this.ssrfProtection.clearCache();
386
+ this.inputValidator.clearViolationLog();
387
+ }
388
+
389
+ /**
390
+ * Check if request should be authenticated
391
+ * @param {Object} request - Request object
392
+ * @returns {boolean} - Whether authentication is required
393
+ */
394
+ requiresAuthentication(request) {
395
+ return this.config.apiSecurity.requireAuthentication;
396
+ }
397
+
398
+ /**
399
+ * Validate API key
400
+ * @param {string} apiKey - API key to validate
401
+ * @returns {boolean} - Whether API key is valid
402
+ */
403
+ validateAPIKey(apiKey) {
404
+ if (!this.config.apiSecurity.requireAuthentication) {
405
+ return true;
406
+ }
407
+
408
+ return apiKey === this.config.apiSecurity.apiKey &&
409
+ this.config.apiSecurity.apiKey.length > 0;
410
+ }
411
+ }
412
+
413
+ // Export singleton instance
414
+ export const securityMiddleware = new SecurityMiddleware();
415
+
416
+ export default securityMiddleware;