crawlforge-mcp-server 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +315 -0
- package/LICENSE +21 -0
- package/README.md +181 -0
- package/package.json +115 -0
- package/server.js +1963 -0
- package/setup.js +112 -0
- package/src/constants/config.js +615 -0
- package/src/core/ActionExecutor.js +1104 -0
- package/src/core/AlertNotificationSystem.js +601 -0
- package/src/core/AuthManager.js +315 -0
- package/src/core/ChangeTracker.js +2306 -0
- package/src/core/JobManager.js +687 -0
- package/src/core/LLMsTxtAnalyzer.js +753 -0
- package/src/core/LocalizationManager.js +1615 -0
- package/src/core/PerformanceManager.js +828 -0
- package/src/core/ResearchOrchestrator.js +1327 -0
- package/src/core/SnapshotManager.js +1037 -0
- package/src/core/StealthBrowserManager.js +1795 -0
- package/src/core/WebhookDispatcher.js +745 -0
- package/src/core/analysis/ContentAnalyzer.js +749 -0
- package/src/core/analysis/LinkAnalyzer.js +972 -0
- package/src/core/cache/CacheManager.js +821 -0
- package/src/core/connections/ConnectionPool.js +553 -0
- package/src/core/crawlers/BFSCrawler.js +845 -0
- package/src/core/integrations/PerformanceIntegration.js +377 -0
- package/src/core/llm/AnthropicProvider.js +135 -0
- package/src/core/llm/LLMManager.js +415 -0
- package/src/core/llm/LLMProvider.js +97 -0
- package/src/core/llm/OpenAIProvider.js +127 -0
- package/src/core/processing/BrowserProcessor.js +986 -0
- package/src/core/processing/ContentProcessor.js +505 -0
- package/src/core/processing/PDFProcessor.js +448 -0
- package/src/core/processing/StreamProcessor.js +673 -0
- package/src/core/queue/QueueManager.js +98 -0
- package/src/core/workers/WorkerPool.js +585 -0
- package/src/core/workers/worker.js +743 -0
- package/src/monitoring/healthCheck.js +600 -0
- package/src/monitoring/metrics.js +761 -0
- package/src/optimization/wave3-optimizations.js +932 -0
- package/src/security/security-patches.js +120 -0
- package/src/security/security-tests.js +355 -0
- package/src/security/wave3-security.js +652 -0
- package/src/tools/advanced/BatchScrapeTool.js +1089 -0
- package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
- package/src/tools/crawl/crawlDeep.js +449 -0
- package/src/tools/crawl/mapSite.js +400 -0
- package/src/tools/extract/analyzeContent.js +624 -0
- package/src/tools/extract/extractContent.js +329 -0
- package/src/tools/extract/processDocument.js +503 -0
- package/src/tools/extract/summarizeContent.js +376 -0
- package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
- package/src/tools/research/deepResearch.js +706 -0
- package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
- package/src/tools/search/adapters/googleSearch.js +236 -0
- package/src/tools/search/adapters/searchProviderFactory.js +96 -0
- package/src/tools/search/queryExpander.js +543 -0
- package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
- package/src/tools/search/ranking/ResultRanker.js +497 -0
- package/src/tools/search/searchWeb.js +482 -0
- package/src/tools/tracking/trackChanges.js +1355 -0
- package/src/utils/CircuitBreaker.js +515 -0
- package/src/utils/ErrorHandlingConfig.js +342 -0
- package/src/utils/HumanBehaviorSimulator.js +569 -0
- package/src/utils/Logger.js +568 -0
- package/src/utils/MemoryMonitor.js +173 -0
- package/src/utils/RetryManager.js +386 -0
- package/src/utils/contentUtils.js +588 -0
- package/src/utils/domainFilter.js +612 -0
- package/src/utils/inputValidation.js +766 -0
- package/src/utils/rateLimiter.js +196 -0
- package/src/utils/robotsChecker.js +91 -0
- package/src/utils/securityMiddleware.js +416 -0
- package/src/utils/sitemapParser.js +678 -0
- package/src/utils/ssrfProtection.js +640 -0
- package/src/utils/urlNormalizer.js +168 -0
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
export class RateLimiter {
|
|
2
|
+
constructor(options = {}) {
|
|
3
|
+
const {
|
|
4
|
+
requestsPerSecond = 10,
|
|
5
|
+
requestsPerMinute = 100,
|
|
6
|
+
perDomain = true
|
|
7
|
+
} = options;
|
|
8
|
+
|
|
9
|
+
this.requestsPerSecond = requestsPerSecond;
|
|
10
|
+
this.requestsPerMinute = requestsPerMinute;
|
|
11
|
+
this.perDomain = perDomain;
|
|
12
|
+
this.windowMs = 1000; // 1 second window
|
|
13
|
+
this.limits = new Map(); // domain -> { count, resetTime }
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
async checkLimit(urlOrDomain) {
|
|
17
|
+
const domain = this.extractDomain(urlOrDomain);
|
|
18
|
+
const now = Date.now();
|
|
19
|
+
|
|
20
|
+
const key = this.perDomain ? domain : 'global';
|
|
21
|
+
let limit = this.limits.get(key);
|
|
22
|
+
|
|
23
|
+
if (!limit) {
|
|
24
|
+
limit = {
|
|
25
|
+
secondCount: 0,
|
|
26
|
+
secondReset: now + 1000,
|
|
27
|
+
minuteCount: 0,
|
|
28
|
+
minuteReset: now + 60000
|
|
29
|
+
};
|
|
30
|
+
this.limits.set(key, limit);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Reset counters if windows have passed
|
|
34
|
+
if (now > limit.secondReset) {
|
|
35
|
+
limit.secondCount = 0;
|
|
36
|
+
limit.secondReset = now + 1000;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
if (now > limit.minuteReset) {
|
|
40
|
+
limit.minuteCount = 0;
|
|
41
|
+
limit.minuteReset = now + 60000;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Check rate limits
|
|
45
|
+
if (limit.secondCount >= this.requestsPerSecond) {
|
|
46
|
+
const waitTime = limit.secondReset - now;
|
|
47
|
+
await this.delay(waitTime);
|
|
48
|
+
return this.checkLimit(urlOrDomain);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if (limit.minuteCount >= this.requestsPerMinute) {
|
|
52
|
+
const waitTime = limit.minuteReset - now;
|
|
53
|
+
await this.delay(waitTime);
|
|
54
|
+
return this.checkLimit(urlOrDomain);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Increment counters
|
|
58
|
+
limit.secondCount++;
|
|
59
|
+
limit.minuteCount++;
|
|
60
|
+
|
|
61
|
+
return true;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
extractDomain(urlOrDomain) {
|
|
65
|
+
try {
|
|
66
|
+
if (urlOrDomain.startsWith('http://') || urlOrDomain.startsWith('https://')) {
|
|
67
|
+
const url = new URL(urlOrDomain);
|
|
68
|
+
return url.hostname;
|
|
69
|
+
}
|
|
70
|
+
return urlOrDomain;
|
|
71
|
+
} catch {
|
|
72
|
+
return urlOrDomain;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
delay(ms) {
|
|
77
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
reset(domain) {
|
|
81
|
+
if (domain) {
|
|
82
|
+
this.limits.delete(domain);
|
|
83
|
+
} else {
|
|
84
|
+
this.limits.clear();
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
getStats() {
|
|
89
|
+
const stats = {};
|
|
90
|
+
for (const [domain, limit] of this.limits.entries()) {
|
|
91
|
+
stats[domain] = {
|
|
92
|
+
secondCount: limit.secondCount,
|
|
93
|
+
minuteCount: limit.minuteCount,
|
|
94
|
+
secondsUntilReset: Math.max(0, Math.ceil((limit.secondReset - Date.now()) / 1000)),
|
|
95
|
+
minutesUntilReset: Math.max(0, Math.ceil((limit.minuteReset - Date.now()) / 60000))
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
return stats;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
export class CircuitBreaker {
|
|
103
|
+
constructor(options = {}) {
|
|
104
|
+
const {
|
|
105
|
+
threshold = 5,
|
|
106
|
+
timeout = 60000,
|
|
107
|
+
resetTimeout = 120000
|
|
108
|
+
} = options;
|
|
109
|
+
|
|
110
|
+
this.threshold = threshold;
|
|
111
|
+
this.timeout = timeout;
|
|
112
|
+
this.resetTimeout = resetTimeout;
|
|
113
|
+
this.failures = new Map(); // domain -> { count, state, nextAttempt }
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
async execute(domain, fn) {
|
|
117
|
+
const breaker = this.getBreaker(domain);
|
|
118
|
+
|
|
119
|
+
if (breaker.state === 'OPEN') {
|
|
120
|
+
if (Date.now() < breaker.nextAttempt) {
|
|
121
|
+
throw new Error(`Circuit breaker is OPEN for ${domain}`);
|
|
122
|
+
}
|
|
123
|
+
breaker.state = 'HALF_OPEN';
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
try {
|
|
127
|
+
const result = await Promise.race([
|
|
128
|
+
fn(),
|
|
129
|
+
this.timeoutPromise()
|
|
130
|
+
]);
|
|
131
|
+
|
|
132
|
+
this.onSuccess(domain);
|
|
133
|
+
return result;
|
|
134
|
+
} catch (error) {
|
|
135
|
+
this.onFailure(domain);
|
|
136
|
+
throw error;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
getBreaker(domain) {
|
|
141
|
+
if (!this.failures.has(domain)) {
|
|
142
|
+
this.failures.set(domain, {
|
|
143
|
+
count: 0,
|
|
144
|
+
state: 'CLOSED',
|
|
145
|
+
nextAttempt: Date.now()
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
return this.failures.get(domain);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
onSuccess(domain) {
|
|
152
|
+
const breaker = this.getBreaker(domain);
|
|
153
|
+
breaker.count = 0;
|
|
154
|
+
breaker.state = 'CLOSED';
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
onFailure(domain) {
|
|
158
|
+
const breaker = this.getBreaker(domain);
|
|
159
|
+
breaker.count++;
|
|
160
|
+
|
|
161
|
+
if (breaker.count >= this.threshold) {
|
|
162
|
+
breaker.state = 'OPEN';
|
|
163
|
+
breaker.nextAttempt = Date.now() + this.resetTimeout;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
timeoutPromise() {
|
|
168
|
+
return new Promise((_, reject) => {
|
|
169
|
+
setTimeout(() => reject(new Error('Operation timeout')), this.timeout);
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
reset(domain) {
|
|
174
|
+
if (domain) {
|
|
175
|
+
this.failures.delete(domain);
|
|
176
|
+
} else {
|
|
177
|
+
this.failures.clear();
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
getStats() {
|
|
182
|
+
const stats = {};
|
|
183
|
+
for (const [domain, breaker] of this.failures.entries()) {
|
|
184
|
+
stats[domain] = {
|
|
185
|
+
failureCount: breaker.count,
|
|
186
|
+
state: breaker.state,
|
|
187
|
+
nextAttemptIn: breaker.state === 'OPEN'
|
|
188
|
+
? Math.max(0, Math.ceil((breaker.nextAttempt - Date.now()) / 1000))
|
|
189
|
+
: 0
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
return stats;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
export default RateLimiter;
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import robotsParser from 'robots-parser';
|
|
2
|
+
|
|
3
|
+
export class RobotsChecker {
|
|
4
|
+
constructor(userAgent = 'CrawlForge/1.0') {
|
|
5
|
+
this.userAgent = userAgent;
|
|
6
|
+
this.robotsCache = new Map();
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
async canFetch(url) {
|
|
10
|
+
try {
|
|
11
|
+
const urlObj = new URL(url);
|
|
12
|
+
const robotsUrl = `${urlObj.protocol}//${urlObj.host}/robots.txt`;
|
|
13
|
+
|
|
14
|
+
let robots = this.robotsCache.get(robotsUrl);
|
|
15
|
+
|
|
16
|
+
if (!robots) {
|
|
17
|
+
const robotsTxt = await this.fetchRobotsTxt(robotsUrl);
|
|
18
|
+
robots = robotsParser(robotsUrl, robotsTxt);
|
|
19
|
+
this.robotsCache.set(robotsUrl, robots);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
return robots.isAllowed(url, this.userAgent);
|
|
23
|
+
} catch (error) {
|
|
24
|
+
// If we can't fetch robots.txt, assume we can crawl
|
|
25
|
+
console.warn(`Failed to check robots.txt for ${url}:`, error.message);
|
|
26
|
+
return true;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
async fetchRobotsTxt(robotsUrl) {
|
|
31
|
+
try {
|
|
32
|
+
const controller = new AbortController();
|
|
33
|
+
const timeoutId = setTimeout(() => controller.abort(), 5000);
|
|
34
|
+
|
|
35
|
+
const response = await fetch(robotsUrl, {
|
|
36
|
+
signal: controller.signal,
|
|
37
|
+
headers: {
|
|
38
|
+
'User-Agent': this.userAgent
|
|
39
|
+
}
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
clearTimeout(timeoutId);
|
|
43
|
+
|
|
44
|
+
if (!response.ok) {
|
|
45
|
+
return ''; // Empty robots.txt means everything is allowed
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return await response.text();
|
|
49
|
+
} catch (error) {
|
|
50
|
+
return ''; // If we can't fetch, assume no restrictions
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
getCrawlDelay(url) {
|
|
55
|
+
try {
|
|
56
|
+
const urlObj = new URL(url);
|
|
57
|
+
const robotsUrl = `${urlObj.protocol}//${urlObj.host}/robots.txt`;
|
|
58
|
+
const robots = this.robotsCache.get(robotsUrl);
|
|
59
|
+
|
|
60
|
+
if (robots) {
|
|
61
|
+
return robots.getCrawlDelay(this.userAgent) || 0;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return 0;
|
|
65
|
+
} catch {
|
|
66
|
+
return 0;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
getSitemaps(url) {
|
|
71
|
+
try {
|
|
72
|
+
const urlObj = new URL(url);
|
|
73
|
+
const robotsUrl = `${urlObj.protocol}//${urlObj.host}/robots.txt`;
|
|
74
|
+
const robots = this.robotsCache.get(robotsUrl);
|
|
75
|
+
|
|
76
|
+
if (robots) {
|
|
77
|
+
return robots.getSitemaps() || [];
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return [];
|
|
81
|
+
} catch {
|
|
82
|
+
return [];
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
clearCache() {
|
|
87
|
+
this.robotsCache.clear();
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export default RobotsChecker;
|
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Security Middleware for MCP WebScraper
|
|
3
|
+
* Integrates SSRF protection, input validation, and other security measures
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { SSRFProtection } from './ssrfProtection.js';
|
|
7
|
+
import { InputValidator } from './inputValidation.js';
|
|
8
|
+
import { config } from '../constants/config.js';
|
|
9
|
+
import { Logger } from './Logger.js';
|
|
10
|
+
|
|
11
|
+
// Initialize security components
|
|
12
|
+
const ssrfProtection = new SSRFProtection({
|
|
13
|
+
allowedProtocols: config.security.ssrfProtection.allowedProtocols,
|
|
14
|
+
maxRequestSize: config.security.ssrfProtection.maxRequestSize,
|
|
15
|
+
maxTimeout: config.security.ssrfProtection.maxTimeout,
|
|
16
|
+
maxRedirects: config.security.ssrfProtection.maxRedirects,
|
|
17
|
+
blockedHostnames: config.security.ssrfProtection.blockedDomains
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
const inputValidator = new InputValidator({
|
|
21
|
+
maxStringLength: config.security.inputValidation.maxStringLength,
|
|
22
|
+
maxArrayLength: config.security.inputValidation.maxArrayLength,
|
|
23
|
+
maxObjectDepth: config.security.inputValidation.maxObjectDepth,
|
|
24
|
+
maxRegexLength: config.security.inputValidation.maxRegexLength,
|
|
25
|
+
allowedHTMLTags: config.security.contentSecurity.allowedHTMLTags
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
const logger = new Logger();
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Security middleware class for MCP tools
|
|
32
|
+
*/
|
|
33
|
+
export class SecurityMiddleware {
|
|
34
|
+
constructor(options = {}) {
|
|
35
|
+
this.ssrfProtection = ssrfProtection;
|
|
36
|
+
this.inputValidator = inputValidator;
|
|
37
|
+
this.logger = logger;
|
|
38
|
+
this.config = config.security;
|
|
39
|
+
this.violationStats = {
|
|
40
|
+
totalViolations: 0,
|
|
41
|
+
blockedRequests: 0,
|
|
42
|
+
ssrfBlocked: 0,
|
|
43
|
+
injectionBlocked: 0,
|
|
44
|
+
validationErrors: 0
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Validate URL parameter for SSRF protection
|
|
50
|
+
* @param {string} url - URL to validate
|
|
51
|
+
* @param {Object} context - Request context
|
|
52
|
+
* @returns {Promise<Object>} - Validation result
|
|
53
|
+
*/
|
|
54
|
+
async validateURL(url, context = {}) {
|
|
55
|
+
if (!this.config.ssrfProtection.enabled) {
|
|
56
|
+
return { allowed: true, sanitizedURL: url };
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
try {
|
|
60
|
+
const result = await this.ssrfProtection.validateURL(url);
|
|
61
|
+
|
|
62
|
+
if (!result.allowed) {
|
|
63
|
+
this.violationStats.ssrfBlocked++;
|
|
64
|
+
this.violationStats.blockedRequests++;
|
|
65
|
+
|
|
66
|
+
this.logSecurityViolation('SSRF_BLOCKED', {
|
|
67
|
+
url,
|
|
68
|
+
violations: result.violations,
|
|
69
|
+
context
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return {
|
|
74
|
+
allowed: result.allowed,
|
|
75
|
+
sanitizedURL: result.sanitizedURL || url,
|
|
76
|
+
violations: result.violations
|
|
77
|
+
};
|
|
78
|
+
} catch (error) {
|
|
79
|
+
this.logger.error('SSRF validation error:', error);
|
|
80
|
+
return { allowed: false, error: error.message };
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Validate search query parameters
|
|
86
|
+
* @param {string} query - Search query to validate
|
|
87
|
+
* @param {Object} context - Request context
|
|
88
|
+
* @returns {Object} - Validation result
|
|
89
|
+
*/
|
|
90
|
+
validateSearchQuery(query, context = {}) {
|
|
91
|
+
if (!this.config.inputValidation.enabled) {
|
|
92
|
+
return { isValid: true, sanitizedValue: query };
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
try {
|
|
96
|
+
const result = this.inputValidator.validateSearchQuery(query);
|
|
97
|
+
|
|
98
|
+
if (!result.isValid) {
|
|
99
|
+
this.violationStats.injectionBlocked++;
|
|
100
|
+
this.violationStats.blockedRequests++;
|
|
101
|
+
|
|
102
|
+
this.logSecurityViolation('INJECTION_BLOCKED', {
|
|
103
|
+
query,
|
|
104
|
+
violations: result.violations,
|
|
105
|
+
context
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return result;
|
|
110
|
+
} catch (error) {
|
|
111
|
+
this.logger.error('Query validation error:', error);
|
|
112
|
+
this.violationStats.validationErrors++;
|
|
113
|
+
return { isValid: false, error: error.message };
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Validate CSS selector parameters
|
|
119
|
+
* @param {string} selector - CSS selector to validate
|
|
120
|
+
* @param {Object} context - Request context
|
|
121
|
+
* @returns {Object} - Validation result
|
|
122
|
+
*/
|
|
123
|
+
validateCSSSelector(selector, context = {}) {
|
|
124
|
+
if (!this.config.inputValidation.enabled) {
|
|
125
|
+
return { isValid: true, sanitizedValue: selector };
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
try {
|
|
129
|
+
const result = this.inputValidator.validateCSSSelector(selector);
|
|
130
|
+
|
|
131
|
+
if (!result.isValid) {
|
|
132
|
+
this.violationStats.injectionBlocked++;
|
|
133
|
+
this.violationStats.blockedRequests++;
|
|
134
|
+
|
|
135
|
+
this.logSecurityViolation('CSS_INJECTION_BLOCKED', {
|
|
136
|
+
selector,
|
|
137
|
+
violations: result.violations,
|
|
138
|
+
context
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return result;
|
|
143
|
+
} catch (error) {
|
|
144
|
+
this.logger.error('CSS validation error:', error);
|
|
145
|
+
this.violationStats.validationErrors++;
|
|
146
|
+
return { isValid: false, error: error.message };
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Validate object parameters
|
|
152
|
+
* @param {Object} obj - Object to validate
|
|
153
|
+
* @param {Object} context - Request context
|
|
154
|
+
* @returns {Object} - Validation result
|
|
155
|
+
*/
|
|
156
|
+
validateObject(obj, context = {}) {
|
|
157
|
+
if (!this.config.inputValidation.enabled) {
|
|
158
|
+
return { isValid: true, sanitizedValue: obj };
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
try {
|
|
162
|
+
const result = this.inputValidator.validateObject(obj);
|
|
163
|
+
|
|
164
|
+
if (!result.isValid) {
|
|
165
|
+
this.violationStats.validationErrors++;
|
|
166
|
+
|
|
167
|
+
this.logSecurityViolation('OBJECT_VALIDATION_FAILED', {
|
|
168
|
+
objectKeys: Object.keys(obj || {}),
|
|
169
|
+
violations: result.violations,
|
|
170
|
+
context
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
return result;
|
|
175
|
+
} catch (error) {
|
|
176
|
+
this.logger.error('Object validation error:', error);
|
|
177
|
+
this.violationStats.validationErrors++;
|
|
178
|
+
return { isValid: false, error: error.message };
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Validate HTML content
|
|
184
|
+
* @param {string} html - HTML content to validate
|
|
185
|
+
* @param {Object} context - Request context
|
|
186
|
+
* @returns {Object} - Validation result
|
|
187
|
+
*/
|
|
188
|
+
validateHTML(html, context = {}) {
|
|
189
|
+
if (!this.config.contentSecurity.sanitizeHTML) {
|
|
190
|
+
return { isValid: true, sanitizedValue: html };
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
try {
|
|
194
|
+
const result = this.inputValidator.validateHTML(html);
|
|
195
|
+
|
|
196
|
+
if (!result.isValid) {
|
|
197
|
+
this.violationStats.injectionBlocked++;
|
|
198
|
+
|
|
199
|
+
this.logSecurityViolation('HTML_XSS_BLOCKED', {
|
|
200
|
+
htmlLength: html.length,
|
|
201
|
+
violations: result.violations,
|
|
202
|
+
context
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
return result;
|
|
207
|
+
} catch (error) {
|
|
208
|
+
this.logger.error('HTML validation error:', error);
|
|
209
|
+
this.violationStats.validationErrors++;
|
|
210
|
+
return { isValid: false, error: error.message };
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Create secure fetch function with SSRF protection
|
|
216
|
+
* @param {Object} options - Fetch options
|
|
217
|
+
* @returns {Function} - Secure fetch function
|
|
218
|
+
*/
|
|
219
|
+
createSecureFetch(options = {}) {
|
|
220
|
+
return this.ssrfProtection.createSecureFetch({
|
|
221
|
+
allowedDomains: this.config.ssrfProtection.allowedDomains,
|
|
222
|
+
maxRequestSize: this.config.ssrfProtection.maxRequestSize,
|
|
223
|
+
...options
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Validate tool parameters based on schema
|
|
229
|
+
* @param {Object} params - Tool parameters
|
|
230
|
+
* @param {string} toolName - Name of the tool
|
|
231
|
+
* @returns {Promise<Object>} - Validation result
|
|
232
|
+
*/
|
|
233
|
+
async validateToolParameters(params, toolName) {
|
|
234
|
+
const results = {
|
|
235
|
+
isValid: true,
|
|
236
|
+
violations: [],
|
|
237
|
+
sanitizedParams: { ...params }
|
|
238
|
+
};
|
|
239
|
+
|
|
240
|
+
const context = { toolName, timestamp: new Date().toISOString() };
|
|
241
|
+
|
|
242
|
+
// URL validation for tools that accept URLs
|
|
243
|
+
if (params.url) {
|
|
244
|
+
const urlResult = await this.validateURL(params.url, context);
|
|
245
|
+
if (!urlResult.allowed) {
|
|
246
|
+
results.isValid = false;
|
|
247
|
+
results.violations.push(...(urlResult.violations || []));
|
|
248
|
+
} else {
|
|
249
|
+
results.sanitizedParams.url = urlResult.sanitizedURL;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// Search query validation
|
|
254
|
+
if (params.query) {
|
|
255
|
+
const queryResult = this.validateSearchQuery(params.query, context);
|
|
256
|
+
if (!queryResult.isValid) {
|
|
257
|
+
results.isValid = false;
|
|
258
|
+
results.violations.push(...(queryResult.violations || []));
|
|
259
|
+
} else {
|
|
260
|
+
results.sanitizedParams.query = queryResult.sanitizedValue;
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// CSS selectors validation
|
|
265
|
+
if (params.selectors) {
|
|
266
|
+
for (const [key, selector] of Object.entries(params.selectors)) {
|
|
267
|
+
const selectorResult = this.validateCSSSelector(selector, context);
|
|
268
|
+
if (!selectorResult.isValid) {
|
|
269
|
+
results.isValid = false;
|
|
270
|
+
results.violations.push(...(selectorResult.violations || []));
|
|
271
|
+
} else {
|
|
272
|
+
results.sanitizedParams.selectors[key] = selectorResult.sanitizedValue;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// Object validation for complex parameters
|
|
278
|
+
if (params.options && typeof params.options === 'object') {
|
|
279
|
+
const objectResult = this.validateObject(params.options, context);
|
|
280
|
+
if (!objectResult.isValid) {
|
|
281
|
+
results.isValid = false;
|
|
282
|
+
results.violations.push(...(objectResult.violations || []));
|
|
283
|
+
} else {
|
|
284
|
+
results.sanitizedParams.options = objectResult.sanitizedValue;
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// Validate arrays (like include_patterns, exclude_patterns)
|
|
289
|
+
['include_patterns', 'exclude_patterns'].forEach(paramName => {
|
|
290
|
+
if (params[paramName] && Array.isArray(params[paramName])) {
|
|
291
|
+
for (const pattern of params[paramName]) {
|
|
292
|
+
if (typeof pattern === 'string') {
|
|
293
|
+
const regexResult = this.inputValidator.validateRegex(pattern);
|
|
294
|
+
if (!regexResult.isValid) {
|
|
295
|
+
results.isValid = false;
|
|
296
|
+
results.violations.push(...(regexResult.violations || []));
|
|
297
|
+
this.logSecurityViolation('REGEX_VALIDATION_FAILED', {
|
|
298
|
+
pattern,
|
|
299
|
+
violations: regexResult.violations,
|
|
300
|
+
context
|
|
301
|
+
});
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
return results;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* Log security violations
|
|
313
|
+
* @param {string} type - Violation type
|
|
314
|
+
* @param {Object} details - Violation details
|
|
315
|
+
*/
|
|
316
|
+
logSecurityViolation(type, details) {
|
|
317
|
+
this.violationStats.totalViolations++;
|
|
318
|
+
|
|
319
|
+
if (this.config.monitoring?.violationLogging !== false) {
|
|
320
|
+
this.logger.warn('Security violation detected', {
|
|
321
|
+
type,
|
|
322
|
+
details,
|
|
323
|
+
timestamp: new Date().toISOString(),
|
|
324
|
+
severity: this.getViolationSeverity(details.violations)
|
|
325
|
+
});
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
// Log high-severity violations to security log
|
|
329
|
+
if (this.config.monitoring?.securityLogging !== false) {
|
|
330
|
+
const severity = this.getViolationSeverity(details.violations);
|
|
331
|
+
if (severity === 'HIGH') {
|
|
332
|
+
this.logger.error('High-severity security violation', {
|
|
333
|
+
type,
|
|
334
|
+
details: {
|
|
335
|
+
...details,
|
|
336
|
+
// Don't log full content for security
|
|
337
|
+
input: details.query ? details.query.substring(0, 100) : undefined,
|
|
338
|
+
url: details.url ? details.url.substring(0, 200) : undefined
|
|
339
|
+
}
|
|
340
|
+
});
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
/**
|
|
346
|
+
* Get violation severity level
|
|
347
|
+
* @param {Array} violations - Array of violations
|
|
348
|
+
* @returns {string} - Severity level
|
|
349
|
+
*/
|
|
350
|
+
getViolationSeverity(violations = []) {
|
|
351
|
+
if (violations.some(v => v.severity === 'HIGH')) return 'HIGH';
|
|
352
|
+
if (violations.some(v => v.severity === 'MEDIUM')) return 'MEDIUM';
|
|
353
|
+
return 'LOW';
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
/**
|
|
357
|
+
* Get security statistics
|
|
358
|
+
* @returns {Object} - Security statistics
|
|
359
|
+
*/
|
|
360
|
+
getSecurityStats() {
|
|
361
|
+
return {
|
|
362
|
+
violations: this.violationStats,
|
|
363
|
+
ssrfStats: this.ssrfProtection.getStats(),
|
|
364
|
+
validationStats: this.inputValidator.getStats(),
|
|
365
|
+
configEnabled: {
|
|
366
|
+
ssrfProtection: this.config.ssrfProtection.enabled,
|
|
367
|
+
inputValidation: this.config.inputValidation.enabled,
|
|
368
|
+
contentSecurity: this.config.contentSecurity.sanitizeHTML,
|
|
369
|
+
auditLogging: this.config.apiSecurity.auditLogging
|
|
370
|
+
}
|
|
371
|
+
};
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
/**
|
|
375
|
+
* Reset security statistics
|
|
376
|
+
*/
|
|
377
|
+
resetStats() {
|
|
378
|
+
this.violationStats = {
|
|
379
|
+
totalViolations: 0,
|
|
380
|
+
blockedRequests: 0,
|
|
381
|
+
ssrfBlocked: 0,
|
|
382
|
+
injectionBlocked: 0,
|
|
383
|
+
validationErrors: 0
|
|
384
|
+
};
|
|
385
|
+
this.ssrfProtection.clearCache();
|
|
386
|
+
this.inputValidator.clearViolationLog();
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
/**
|
|
390
|
+
* Check if request should be authenticated
|
|
391
|
+
* @param {Object} request - Request object
|
|
392
|
+
* @returns {boolean} - Whether authentication is required
|
|
393
|
+
*/
|
|
394
|
+
requiresAuthentication(request) {
|
|
395
|
+
return this.config.apiSecurity.requireAuthentication;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
/**
|
|
399
|
+
* Validate API key
|
|
400
|
+
* @param {string} apiKey - API key to validate
|
|
401
|
+
* @returns {boolean} - Whether API key is valid
|
|
402
|
+
*/
|
|
403
|
+
validateAPIKey(apiKey) {
|
|
404
|
+
if (!this.config.apiSecurity.requireAuthentication) {
|
|
405
|
+
return true;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
return apiKey === this.config.apiSecurity.apiKey &&
|
|
409
|
+
this.config.apiSecurity.apiKey.length > 0;
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
// Export singleton instance
|
|
414
|
+
export const securityMiddleware = new SecurityMiddleware();
|
|
415
|
+
|
|
416
|
+
export default securityMiddleware;
|