crawlforge-mcp-server 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +315 -0
- package/LICENSE +21 -0
- package/README.md +181 -0
- package/package.json +115 -0
- package/server.js +1963 -0
- package/setup.js +112 -0
- package/src/constants/config.js +615 -0
- package/src/core/ActionExecutor.js +1104 -0
- package/src/core/AlertNotificationSystem.js +601 -0
- package/src/core/AuthManager.js +315 -0
- package/src/core/ChangeTracker.js +2306 -0
- package/src/core/JobManager.js +687 -0
- package/src/core/LLMsTxtAnalyzer.js +753 -0
- package/src/core/LocalizationManager.js +1615 -0
- package/src/core/PerformanceManager.js +828 -0
- package/src/core/ResearchOrchestrator.js +1327 -0
- package/src/core/SnapshotManager.js +1037 -0
- package/src/core/StealthBrowserManager.js +1795 -0
- package/src/core/WebhookDispatcher.js +745 -0
- package/src/core/analysis/ContentAnalyzer.js +749 -0
- package/src/core/analysis/LinkAnalyzer.js +972 -0
- package/src/core/cache/CacheManager.js +821 -0
- package/src/core/connections/ConnectionPool.js +553 -0
- package/src/core/crawlers/BFSCrawler.js +845 -0
- package/src/core/integrations/PerformanceIntegration.js +377 -0
- package/src/core/llm/AnthropicProvider.js +135 -0
- package/src/core/llm/LLMManager.js +415 -0
- package/src/core/llm/LLMProvider.js +97 -0
- package/src/core/llm/OpenAIProvider.js +127 -0
- package/src/core/processing/BrowserProcessor.js +986 -0
- package/src/core/processing/ContentProcessor.js +505 -0
- package/src/core/processing/PDFProcessor.js +448 -0
- package/src/core/processing/StreamProcessor.js +673 -0
- package/src/core/queue/QueueManager.js +98 -0
- package/src/core/workers/WorkerPool.js +585 -0
- package/src/core/workers/worker.js +743 -0
- package/src/monitoring/healthCheck.js +600 -0
- package/src/monitoring/metrics.js +761 -0
- package/src/optimization/wave3-optimizations.js +932 -0
- package/src/security/security-patches.js +120 -0
- package/src/security/security-tests.js +355 -0
- package/src/security/wave3-security.js +652 -0
- package/src/tools/advanced/BatchScrapeTool.js +1089 -0
- package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
- package/src/tools/crawl/crawlDeep.js +449 -0
- package/src/tools/crawl/mapSite.js +400 -0
- package/src/tools/extract/analyzeContent.js +624 -0
- package/src/tools/extract/extractContent.js +329 -0
- package/src/tools/extract/processDocument.js +503 -0
- package/src/tools/extract/summarizeContent.js +376 -0
- package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
- package/src/tools/research/deepResearch.js +706 -0
- package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
- package/src/tools/search/adapters/googleSearch.js +236 -0
- package/src/tools/search/adapters/searchProviderFactory.js +96 -0
- package/src/tools/search/queryExpander.js +543 -0
- package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
- package/src/tools/search/ranking/ResultRanker.js +497 -0
- package/src/tools/search/searchWeb.js +482 -0
- package/src/tools/tracking/trackChanges.js +1355 -0
- package/src/utils/CircuitBreaker.js +515 -0
- package/src/utils/ErrorHandlingConfig.js +342 -0
- package/src/utils/HumanBehaviorSimulator.js +569 -0
- package/src/utils/Logger.js +568 -0
- package/src/utils/MemoryMonitor.js +173 -0
- package/src/utils/RetryManager.js +386 -0
- package/src/utils/contentUtils.js +588 -0
- package/src/utils/domainFilter.js +612 -0
- package/src/utils/inputValidation.js +766 -0
- package/src/utils/rateLimiter.js +196 -0
- package/src/utils/robotsChecker.js +91 -0
- package/src/utils/securityMiddleware.js +416 -0
- package/src/utils/sitemapParser.js +678 -0
- package/src/utils/ssrfProtection.js +640 -0
- package/src/utils/urlNormalizer.js +168 -0
|
@@ -0,0 +1,766 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Enhanced Input Validation and Sanitization Module
|
|
3
|
+
* Provides comprehensive input validation, sanitization, and security checks
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from 'zod';
|
|
7
|
+
import DOMPurify from 'isomorphic-dompurify';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Security patterns and rules
|
|
11
|
+
*/
|
|
12
|
+
const SECURITY_PATTERNS = {
|
|
13
|
+
// SQL injection patterns
|
|
14
|
+
sqlInjection: [
|
|
15
|
+
/(\b(SELECT|INSERT|UPDATE|DELETE|DROP|CREATE|ALTER|EXEC|UNION|SCRIPT)\b)/i,
|
|
16
|
+
/'[^']*'|"[^"]*"/g,
|
|
17
|
+
/;\s*--/g,
|
|
18
|
+
/\/\*[\s\S]*?\*\//g
|
|
19
|
+
],
|
|
20
|
+
|
|
21
|
+
// XSS patterns
|
|
22
|
+
xssPatterns: [
|
|
23
|
+
/<script[^>]*>[\s\S]*?<\/script>/gi,
|
|
24
|
+
/<iframe[^>]*>[\s\S]*?<\/iframe>/gi,
|
|
25
|
+
/javascript:/gi,
|
|
26
|
+
/vbscript:/gi,
|
|
27
|
+
/onload\s*=/gi,
|
|
28
|
+
/onerror\s*=/gi,
|
|
29
|
+
/onclick\s*=/gi,
|
|
30
|
+
/onmouseover\s*=/gi
|
|
31
|
+
],
|
|
32
|
+
|
|
33
|
+
// Path traversal patterns
|
|
34
|
+
pathTraversal: [
|
|
35
|
+
/\.\.\//g,
|
|
36
|
+
/\.\.\\/g,
|
|
37
|
+
/%2e%2e%2f/gi,
|
|
38
|
+
/%2e%2e%5c/gi,
|
|
39
|
+
/\.\.\%2f/gi,
|
|
40
|
+
/\.\.\%5c/gi
|
|
41
|
+
],
|
|
42
|
+
|
|
43
|
+
// Command injection patterns
|
|
44
|
+
commandInjection: [
|
|
45
|
+
/[;&|`$(){}\[\]]/g,
|
|
46
|
+
/\beval\b/gi,
|
|
47
|
+
/\bexec\b/gi,
|
|
48
|
+
/\bsystem\b/gi,
|
|
49
|
+
/\bshell_exec\b/gi
|
|
50
|
+
],
|
|
51
|
+
|
|
52
|
+
// CSS selector injection
|
|
53
|
+
cssSelectorInjection: [
|
|
54
|
+
/['"]/g,
|
|
55
|
+
/\/\*/g, /expression\s*\(/gi,
|
|
56
|
+
/javascript\s*:/gi,
|
|
57
|
+
/@import/gi
|
|
58
|
+
],
|
|
59
|
+
|
|
60
|
+
// Regular expression DoS patterns
|
|
61
|
+
redosPatterns: [
|
|
62
|
+
/(a+)+$/,
|
|
63
|
+
/(a|a)*$/,
|
|
64
|
+
/a*a*$/,
|
|
65
|
+
/(a|b)*a*a*a*a*a*a*c/
|
|
66
|
+
]
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Input validation configuration
|
|
71
|
+
*/
|
|
72
|
+
const VALIDATION_CONFIG = {
|
|
73
|
+
maxStringLength: 10000,
|
|
74
|
+
maxArrayLength: 1000,
|
|
75
|
+
maxObjectDepth: 10,
|
|
76
|
+
maxRegexLength: 500,
|
|
77
|
+
allowedHTMLTags: ['p', 'br', 'strong', 'em', 'u', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
|
|
78
|
+
allowedCSSProperties: ['color', 'font-size', 'font-weight', 'text-align'],
|
|
79
|
+
maxFileSize: 100 * 1024 * 1024, // 100MB
|
|
80
|
+
allowedFileTypes: ['pdf', 'txt', 'html', 'json', 'xml', 'csv']
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Enhanced Input Validator Class
|
|
85
|
+
*/
|
|
86
|
+
export class InputValidator {
|
|
87
|
+
constructor(options = {}) {
|
|
88
|
+
this.config = { ...VALIDATION_CONFIG, ...options };
|
|
89
|
+
this.violationLog = [];
|
|
90
|
+
this.maxViolationLogSize = 1000;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Validate and sanitize URL input
|
|
95
|
+
* @param {string} url - URL to validate
|
|
96
|
+
* @param {Object} options - Validation options
|
|
97
|
+
* @returns {Object} - Validation result
|
|
98
|
+
*/
|
|
99
|
+
validateURL(url, options = {}) {
|
|
100
|
+
const result = {
|
|
101
|
+
isValid: false,
|
|
102
|
+
sanitizedValue: null,
|
|
103
|
+
violations: [],
|
|
104
|
+
metadata: {}
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
try {
|
|
108
|
+
// Basic format validation
|
|
109
|
+
if (typeof url !== 'string' || url.length === 0) {
|
|
110
|
+
result.violations.push({
|
|
111
|
+
type: 'INVALID_FORMAT',
|
|
112
|
+
message: 'URL must be a non-empty string',
|
|
113
|
+
severity: 'HIGH'
|
|
114
|
+
});
|
|
115
|
+
return result;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Length validation
|
|
119
|
+
if (url.length > this.config.maxStringLength) {
|
|
120
|
+
result.violations.push({
|
|
121
|
+
type: 'EXCESSIVE_LENGTH',
|
|
122
|
+
message: `URL exceeds maximum length of ${this.config.maxStringLength}`,
|
|
123
|
+
severity: 'HIGH'
|
|
124
|
+
});
|
|
125
|
+
return result;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// URL format validation
|
|
129
|
+
const urlObj = new URL(url);
|
|
130
|
+
result.metadata.protocol = urlObj.protocol;
|
|
131
|
+
result.metadata.hostname = urlObj.hostname;
|
|
132
|
+
result.metadata.port = urlObj.port;
|
|
133
|
+
|
|
134
|
+
// Protocol validation
|
|
135
|
+
const allowedProtocols = options.allowedProtocols || ['http:', 'https:'];
|
|
136
|
+
if (!allowedProtocols.includes(urlObj.protocol)) {
|
|
137
|
+
result.violations.push({
|
|
138
|
+
type: 'INVALID_PROTOCOL',
|
|
139
|
+
message: `Protocol '${urlObj.protocol}' is not allowed`,
|
|
140
|
+
severity: 'HIGH'
|
|
141
|
+
});
|
|
142
|
+
return result;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Security pattern checks
|
|
146
|
+
this.checkSecurityPatterns(url, result);
|
|
147
|
+
|
|
148
|
+
// Path traversal check
|
|
149
|
+
if (this.containsPathTraversal(urlObj.pathname)) {
|
|
150
|
+
result.violations.push({
|
|
151
|
+
type: 'PATH_TRAVERSAL',
|
|
152
|
+
message: 'URL contains path traversal patterns',
|
|
153
|
+
severity: 'HIGH'
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Sanitize URL
|
|
158
|
+
result.sanitizedValue = this.sanitizeURL(urlObj);
|
|
159
|
+
result.isValid = result.violations.filter(v => v.severity === 'HIGH').length === 0;
|
|
160
|
+
|
|
161
|
+
} catch (error) {
|
|
162
|
+
result.violations.push({
|
|
163
|
+
type: 'MALFORMED_URL',
|
|
164
|
+
message: `Invalid URL format: ${error.message}`,
|
|
165
|
+
severity: 'HIGH'
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
this.logViolations(url, result.violations);
|
|
170
|
+
return result;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Validate CSS selector for injection attacks
|
|
175
|
+
* @param {string} selector - CSS selector to validate
|
|
176
|
+
* @returns {Object} - Validation result
|
|
177
|
+
*/
|
|
178
|
+
validateCSSSelector(selector) {
|
|
179
|
+
const result = {
|
|
180
|
+
isValid: false,
|
|
181
|
+
sanitizedValue: null,
|
|
182
|
+
violations: []
|
|
183
|
+
};
|
|
184
|
+
|
|
185
|
+
if (typeof selector !== 'string') {
|
|
186
|
+
result.violations.push({
|
|
187
|
+
type: 'INVALID_TYPE',
|
|
188
|
+
message: 'CSS selector must be a string',
|
|
189
|
+
severity: 'HIGH'
|
|
190
|
+
});
|
|
191
|
+
return result;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// Length check
|
|
195
|
+
if (selector.length > this.config.maxStringLength) {
|
|
196
|
+
result.violations.push({
|
|
197
|
+
type: 'EXCESSIVE_LENGTH',
|
|
198
|
+
message: 'CSS selector too long',
|
|
199
|
+
severity: 'HIGH'
|
|
200
|
+
});
|
|
201
|
+
return result;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Check for CSS injection patterns
|
|
205
|
+
for (const pattern of SECURITY_PATTERNS.cssSelectorInjection) {
|
|
206
|
+
if (pattern.test(selector)) {
|
|
207
|
+
result.violations.push({
|
|
208
|
+
type: 'CSS_INJECTION',
|
|
209
|
+
message: 'CSS selector contains potential injection patterns',
|
|
210
|
+
severity: 'HIGH'
|
|
211
|
+
});
|
|
212
|
+
break;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// Check for suspicious functions
|
|
217
|
+
const suspiciousFunctions = ['expression', 'url', 'import', 'javascript'];
|
|
218
|
+
for (const func of suspiciousFunctions) {
|
|
219
|
+
if (selector.toLowerCase().includes(func)) {
|
|
220
|
+
result.violations.push({
|
|
221
|
+
type: 'SUSPICIOUS_FUNCTION',
|
|
222
|
+
message: `CSS selector contains suspicious function: ${func}`,
|
|
223
|
+
severity: 'MEDIUM'
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// Validate selector syntax
|
|
229
|
+
try {
|
|
230
|
+
// Basic CSS selector validation
|
|
231
|
+
if (typeof document !== 'undefined') {
|
|
232
|
+
document.querySelector(selector);
|
|
233
|
+
}
|
|
234
|
+
} catch (error) {
|
|
235
|
+
result.violations.push({
|
|
236
|
+
type: 'INVALID_SYNTAX',
|
|
237
|
+
message: `Invalid CSS selector syntax: ${error.message}`,
|
|
238
|
+
severity: 'MEDIUM'
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
result.sanitizedValue = this.sanitizeCSSSelector(selector);
|
|
243
|
+
result.isValid = result.violations.filter(v => v.severity === 'HIGH').length === 0;
|
|
244
|
+
|
|
245
|
+
this.logViolations(selector, result.violations);
|
|
246
|
+
return result;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Validate search query for injection attacks
|
|
251
|
+
* @param {string} query - Search query to validate
|
|
252
|
+
* @returns {Object} - Validation result
|
|
253
|
+
*/
|
|
254
|
+
validateSearchQuery(query) {
|
|
255
|
+
const result = {
|
|
256
|
+
isValid: false,
|
|
257
|
+
sanitizedValue: null,
|
|
258
|
+
violations: []
|
|
259
|
+
};
|
|
260
|
+
|
|
261
|
+
if (typeof query !== 'string') {
|
|
262
|
+
result.violations.push({
|
|
263
|
+
type: 'INVALID_TYPE',
|
|
264
|
+
message: 'Search query must be a string',
|
|
265
|
+
severity: 'HIGH'
|
|
266
|
+
});
|
|
267
|
+
return result;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// Length check
|
|
271
|
+
if (query.length > 1000) { // Search queries should be shorter
|
|
272
|
+
result.violations.push({
|
|
273
|
+
type: 'EXCESSIVE_LENGTH',
|
|
274
|
+
message: 'Search query too long',
|
|
275
|
+
severity: 'MEDIUM'
|
|
276
|
+
});
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// Check for SQL injection patterns
|
|
280
|
+
this.checkSQLInjection(query, result);
|
|
281
|
+
|
|
282
|
+
// Check for XSS patterns
|
|
283
|
+
this.checkXSSPatterns(query, result);
|
|
284
|
+
|
|
285
|
+
// Check for command injection
|
|
286
|
+
this.checkCommandInjection(query, result);
|
|
287
|
+
|
|
288
|
+
// Validate search operators
|
|
289
|
+
const dangerousOperators = ['site:', 'filetype:', 'inurl:', 'intitle:'];
|
|
290
|
+
const operatorCount = dangerousOperators.reduce((count, op) => {
|
|
291
|
+
return count + (query.toLowerCase().split(op).length - 1);
|
|
292
|
+
}, 0);
|
|
293
|
+
|
|
294
|
+
if (operatorCount > 5) {
|
|
295
|
+
result.violations.push({
|
|
296
|
+
type: 'TOO_MANY_OPERATORS',
|
|
297
|
+
message: 'Too many search operators',
|
|
298
|
+
severity: 'MEDIUM'
|
|
299
|
+
});
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
result.sanitizedValue = this.sanitizeSearchQuery(query);
|
|
303
|
+
result.isValid = result.violations.filter(v => v.severity === 'HIGH').length === 0;
|
|
304
|
+
|
|
305
|
+
this.logViolations(query, result.violations);
|
|
306
|
+
return result;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
/**
|
|
310
|
+
* Validate regular expression for ReDoS attacks
|
|
311
|
+
* @param {string} regex - Regular expression pattern
|
|
312
|
+
* @returns {Object} - Validation result
|
|
313
|
+
*/
|
|
314
|
+
validateRegex(regex) {
|
|
315
|
+
const result = {
|
|
316
|
+
isValid: false,
|
|
317
|
+
sanitizedValue: null,
|
|
318
|
+
violations: []
|
|
319
|
+
};
|
|
320
|
+
|
|
321
|
+
if (typeof regex !== 'string') {
|
|
322
|
+
result.violations.push({
|
|
323
|
+
type: 'INVALID_TYPE',
|
|
324
|
+
message: 'Regex must be a string',
|
|
325
|
+
severity: 'HIGH'
|
|
326
|
+
});
|
|
327
|
+
return result;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// Length check
|
|
331
|
+
if (regex.length > this.config.maxRegexLength) {
|
|
332
|
+
result.violations.push({
|
|
333
|
+
type: 'EXCESSIVE_LENGTH',
|
|
334
|
+
message: 'Regular expression too long',
|
|
335
|
+
severity: 'HIGH'
|
|
336
|
+
});
|
|
337
|
+
return result;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
// Check for ReDoS patterns
|
|
341
|
+
for (const pattern of SECURITY_PATTERNS.redosPatterns) {
|
|
342
|
+
if (pattern.test(regex)) {
|
|
343
|
+
result.violations.push({
|
|
344
|
+
type: 'REDOS_RISK',
|
|
345
|
+
message: 'Regular expression may be vulnerable to ReDoS attacks',
|
|
346
|
+
severity: 'HIGH'
|
|
347
|
+
});
|
|
348
|
+
break;
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
// Check for complex quantifiers
|
|
353
|
+
const complexQuantifiers = /(\*\+)|(\+\*)|(\*\*)|(\+\+)|(\?\?)/g;
|
|
354
|
+
if (complexQuantifiers.test(regex)) {
|
|
355
|
+
result.violations.push({
|
|
356
|
+
type: 'COMPLEX_QUANTIFIERS',
|
|
357
|
+
message: 'Regular expression contains complex quantifiers',
|
|
358
|
+
severity: 'MEDIUM'
|
|
359
|
+
});
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
// Validate regex syntax
|
|
363
|
+
try {
|
|
364
|
+
new RegExp(regex);
|
|
365
|
+
} catch (error) {
|
|
366
|
+
result.violations.push({
|
|
367
|
+
type: 'INVALID_SYNTAX',
|
|
368
|
+
message: `Invalid regular expression syntax: ${error.message}`,
|
|
369
|
+
severity: 'HIGH'
|
|
370
|
+
});
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
result.sanitizedValue = regex; // Don't modify regex patterns
|
|
374
|
+
result.isValid = result.violations.filter(v => v.severity === 'HIGH').length === 0;
|
|
375
|
+
|
|
376
|
+
this.logViolations(regex, result.violations);
|
|
377
|
+
return result;
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
/**
|
|
381
|
+
* Validate HTML content
|
|
382
|
+
* @param {string} html - HTML content to validate
|
|
383
|
+
* @returns {Object} - Validation result
|
|
384
|
+
*/
|
|
385
|
+
validateHTML(html) {
|
|
386
|
+
const result = {
|
|
387
|
+
isValid: false,
|
|
388
|
+
sanitizedValue: null,
|
|
389
|
+
violations: []
|
|
390
|
+
};
|
|
391
|
+
|
|
392
|
+
if (typeof html !== 'string') {
|
|
393
|
+
result.violations.push({
|
|
394
|
+
type: 'INVALID_TYPE',
|
|
395
|
+
message: 'HTML must be a string',
|
|
396
|
+
severity: 'HIGH'
|
|
397
|
+
});
|
|
398
|
+
return result;
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
// Length check
|
|
402
|
+
if (html.length > this.config.maxStringLength) {
|
|
403
|
+
result.violations.push({
|
|
404
|
+
type: 'EXCESSIVE_LENGTH',
|
|
405
|
+
message: 'HTML content too long',
|
|
406
|
+
severity: 'MEDIUM'
|
|
407
|
+
});
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
// Check for XSS patterns
|
|
411
|
+
this.checkXSSPatterns(html, result);
|
|
412
|
+
|
|
413
|
+
// Sanitize HTML using DOMPurify
|
|
414
|
+
result.sanitizedValue = DOMPurify.sanitize(html, {
|
|
415
|
+
ALLOWED_TAGS: this.config.allowedHTMLTags,
|
|
416
|
+
ALLOWED_ATTR: ['class', 'id'],
|
|
417
|
+
FORBID_SCRIPT: true,
|
|
418
|
+
FORBID_IFRAME: true
|
|
419
|
+
});
|
|
420
|
+
|
|
421
|
+
result.isValid = result.violations.filter(v => v.severity === 'HIGH').length === 0;
|
|
422
|
+
|
|
423
|
+
this.logViolations(html.substring(0, 100), result.violations);
|
|
424
|
+
return result;
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
/**
|
|
428
|
+
* Validate object structure and depth
|
|
429
|
+
* @param {Object} obj - Object to validate
|
|
430
|
+
* @param {Object} options - Validation options
|
|
431
|
+
* @returns {Object} - Validation result
|
|
432
|
+
*/
|
|
433
|
+
validateObject(obj, options = {}) {
|
|
434
|
+
const result = {
|
|
435
|
+
isValid: false,
|
|
436
|
+
sanitizedValue: null,
|
|
437
|
+
violations: []
|
|
438
|
+
};
|
|
439
|
+
|
|
440
|
+
if (typeof obj !== 'object' || obj === null) {
|
|
441
|
+
result.violations.push({
|
|
442
|
+
type: 'INVALID_TYPE',
|
|
443
|
+
message: 'Input must be an object',
|
|
444
|
+
severity: 'HIGH'
|
|
445
|
+
});
|
|
446
|
+
return result;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
// Check object depth
|
|
450
|
+
const depth = this.getObjectDepth(obj);
|
|
451
|
+
if (depth > this.config.maxObjectDepth) {
|
|
452
|
+
result.violations.push({
|
|
453
|
+
type: 'EXCESSIVE_DEPTH',
|
|
454
|
+
message: `Object depth exceeds maximum of ${this.config.maxObjectDepth}`,
|
|
455
|
+
severity: 'HIGH'
|
|
456
|
+
});
|
|
457
|
+
return result;
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
// Check array lengths
|
|
461
|
+
this.checkArrayLengths(obj, result);
|
|
462
|
+
|
|
463
|
+
// Check string lengths
|
|
464
|
+
this.checkStringLengths(obj, result);
|
|
465
|
+
|
|
466
|
+
// Sanitize object
|
|
467
|
+
result.sanitizedValue = this.sanitizeObject(obj);
|
|
468
|
+
result.isValid = result.violations.filter(v => v.severity === 'HIGH').length === 0;
|
|
469
|
+
|
|
470
|
+
return result;
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
/**
|
|
474
|
+
* Check for security patterns in input
|
|
475
|
+
* @param {string} input - Input to check
|
|
476
|
+
* @param {Object} result - Result object to update
|
|
477
|
+
*/
|
|
478
|
+
checkSecurityPatterns(input, result) {
|
|
479
|
+
this.checkSQLInjection(input, result);
|
|
480
|
+
this.checkXSSPatterns(input, result);
|
|
481
|
+
this.checkCommandInjection(input, result);
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
/**
|
|
485
|
+
* Check for SQL injection patterns
|
|
486
|
+
* @param {string} input - Input to check
|
|
487
|
+
* @param {Object} result - Result object to update
|
|
488
|
+
*/
|
|
489
|
+
checkSQLInjection(input, result) {
|
|
490
|
+
for (const pattern of SECURITY_PATTERNS.sqlInjection) {
|
|
491
|
+
if (pattern.test(input)) {
|
|
492
|
+
result.violations.push({
|
|
493
|
+
type: 'SQL_INJECTION',
|
|
494
|
+
message: 'Input contains potential SQL injection patterns',
|
|
495
|
+
severity: 'HIGH'
|
|
496
|
+
});
|
|
497
|
+
break;
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
/**
|
|
503
|
+
* Check for XSS patterns
|
|
504
|
+
* @param {string} input - Input to check
|
|
505
|
+
* @param {Object} result - Result object to update
|
|
506
|
+
*/
|
|
507
|
+
checkXSSPatterns(input, result) {
|
|
508
|
+
for (const pattern of SECURITY_PATTERNS.xssPatterns) {
|
|
509
|
+
if (pattern.test(input)) {
|
|
510
|
+
result.violations.push({
|
|
511
|
+
type: 'XSS_ATTEMPT',
|
|
512
|
+
message: 'Input contains potential XSS patterns',
|
|
513
|
+
severity: 'HIGH'
|
|
514
|
+
});
|
|
515
|
+
break;
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
/**
|
|
521
|
+
* Check for command injection patterns
|
|
522
|
+
* @param {string} input - Input to check
|
|
523
|
+
* @param {Object} result - Result object to update
|
|
524
|
+
*/
|
|
525
|
+
checkCommandInjection(input, result) {
|
|
526
|
+
for (const pattern of SECURITY_PATTERNS.commandInjection) {
|
|
527
|
+
if (pattern.test(input)) {
|
|
528
|
+
result.violations.push({
|
|
529
|
+
type: 'COMMAND_INJECTION',
|
|
530
|
+
message: 'Input contains potential command injection patterns',
|
|
531
|
+
severity: 'HIGH'
|
|
532
|
+
});
|
|
533
|
+
break;
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
/**
|
|
539
|
+
* Check for path traversal patterns
|
|
540
|
+
* @param {string} path - Path to check
|
|
541
|
+
* @returns {boolean}
|
|
542
|
+
*/
|
|
543
|
+
containsPathTraversal(path) {
|
|
544
|
+
return SECURITY_PATTERNS.pathTraversal.some(pattern => pattern.test(path));
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
/**
|
|
548
|
+
* Sanitize URL object
|
|
549
|
+
* @param {URL} urlObj - URL object to sanitize
|
|
550
|
+
* @returns {string}
|
|
551
|
+
*/
|
|
552
|
+
sanitizeURL(urlObj) {
|
|
553
|
+
const sanitized = new URL(urlObj.toString());
|
|
554
|
+
|
|
555
|
+
// Remove authentication info
|
|
556
|
+
sanitized.username = '';
|
|
557
|
+
sanitized.password = '';
|
|
558
|
+
|
|
559
|
+
// Remove fragment for security
|
|
560
|
+
sanitized.hash = '';
|
|
561
|
+
|
|
562
|
+
return sanitized.toString();
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
/**
|
|
566
|
+
* Sanitize CSS selector
|
|
567
|
+
* @param {string} selector - CSS selector to sanitize
|
|
568
|
+
* @returns {string}
|
|
569
|
+
*/
|
|
570
|
+
sanitizeCSSSelector(selector) {
|
|
571
|
+
return selector
|
|
572
|
+
.replace(/['"]/g, '') // Remove quotes
|
|
573
|
+
.replace(/\/\*[\s\S]*?\*\//g, '') // Remove comments
|
|
574
|
+
.replace(/javascript:/gi, '') // Remove javascript:
|
|
575
|
+
.replace(/expression\s*\(/gi, '') // Remove expression()
|
|
576
|
+
.trim();
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
/**
|
|
580
|
+
* Sanitize search query
|
|
581
|
+
* @param {string} query - Search query to sanitize
|
|
582
|
+
* @returns {string}
|
|
583
|
+
*/
|
|
584
|
+
sanitizeSearchQuery(query) {
|
|
585
|
+
return query
|
|
586
|
+
.replace(/[<>&"']/g, '') // Remove HTML characters
|
|
587
|
+
.replace(/[\r\n\t]/g, ' ') // Replace control characters with spaces
|
|
588
|
+
.replace(/\s+/g, ' ') // Normalize whitespace
|
|
589
|
+
.trim()
|
|
590
|
+
.substring(0, 1000); // Limit length
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
/**
|
|
594
|
+
* Sanitize object recursively
|
|
595
|
+
* @param {Object} obj - Object to sanitize
|
|
596
|
+
* @returns {Object}
|
|
597
|
+
*/
|
|
598
|
+
sanitizeObject(obj) {
|
|
599
|
+
if (typeof obj !== 'object' || obj === null) {
|
|
600
|
+
return obj;
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
if (Array.isArray(obj)) {
|
|
604
|
+
return obj.slice(0, this.config.maxArrayLength).map(item => this.sanitizeObject(item));
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
const sanitized = {};
|
|
608
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
609
|
+
if (typeof value === 'string') {
|
|
610
|
+
sanitized[key] = this.sanitizeString(value);
|
|
611
|
+
} else if (typeof value === 'object') {
|
|
612
|
+
sanitized[key] = this.sanitizeObject(value);
|
|
613
|
+
} else {
|
|
614
|
+
sanitized[key] = value;
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
return sanitized;
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
/**
|
|
622
|
+
* Sanitize string value
|
|
623
|
+
* @param {string} str - String to sanitize
|
|
624
|
+
* @returns {string}
|
|
625
|
+
*/
|
|
626
|
+
sanitizeString(str) {
|
|
627
|
+
if (typeof str !== 'string') {
|
|
628
|
+
return str;
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
return str
|
|
632
|
+
.replace(/[\r\n\t]/g, ' ') // Replace control characters
|
|
633
|
+
.replace(/\s+/g, ' ') // Normalize whitespace
|
|
634
|
+
.trim()
|
|
635
|
+
.substring(0, this.config.maxStringLength);
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
/**
|
|
639
|
+
* Get object depth
|
|
640
|
+
* @param {Object} obj - Object to measure
|
|
641
|
+
* @param {number} depth - Current depth
|
|
642
|
+
* @returns {number}
|
|
643
|
+
*/
|
|
644
|
+
getObjectDepth(obj, depth = 0) {
|
|
645
|
+
if (typeof obj !== 'object' || obj === null || depth > this.config.maxObjectDepth) {
|
|
646
|
+
return depth;
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
let maxDepth = depth;
|
|
650
|
+
for (const value of Object.values(obj)) {
|
|
651
|
+
if (typeof value === 'object' && value !== null) {
|
|
652
|
+
maxDepth = Math.max(maxDepth, this.getObjectDepth(value, depth + 1));
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
return maxDepth;
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
/**
|
|
660
|
+
* Check array lengths in object
|
|
661
|
+
* @param {Object} obj - Object to check
|
|
662
|
+
* @param {Object} result - Result object to update
|
|
663
|
+
*/
|
|
664
|
+
checkArrayLengths(obj, result) {
|
|
665
|
+
for (const value of Object.values(obj)) {
|
|
666
|
+
if (Array.isArray(value) && value.length > this.config.maxArrayLength) {
|
|
667
|
+
result.violations.push({
|
|
668
|
+
type: 'EXCESSIVE_ARRAY_LENGTH',
|
|
669
|
+
message: `Array length exceeds maximum of ${this.config.maxArrayLength}`,
|
|
670
|
+
severity: 'MEDIUM'
|
|
671
|
+
});
|
|
672
|
+
} else if (typeof value === 'object' && value !== null) {
|
|
673
|
+
this.checkArrayLengths(value, result);
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
/**
|
|
679
|
+
* Check string lengths in object
|
|
680
|
+
* @param {Object} obj - Object to check
|
|
681
|
+
* @param {Object} result - Result object to update
|
|
682
|
+
*/
|
|
683
|
+
checkStringLengths(obj, result) {
|
|
684
|
+
for (const value of Object.values(obj)) {
|
|
685
|
+
if (typeof value === 'string' && value.length > this.config.maxStringLength) {
|
|
686
|
+
result.violations.push({
|
|
687
|
+
type: 'EXCESSIVE_STRING_LENGTH',
|
|
688
|
+
message: `String length exceeds maximum of ${this.config.maxStringLength}`,
|
|
689
|
+
severity: 'MEDIUM'
|
|
690
|
+
});
|
|
691
|
+
} else if (typeof value === 'object' && value !== null) {
|
|
692
|
+
this.checkStringLengths(value, result);
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
/**
|
|
698
|
+
* Log security violations
|
|
699
|
+
* @param {string} input - Input that caused violations
|
|
700
|
+
* @param {Array} violations - Array of violations
|
|
701
|
+
*/
|
|
702
|
+
logViolations(input, violations) {
|
|
703
|
+
if (violations.length > 0) {
|
|
704
|
+
const logEntry = {
|
|
705
|
+
timestamp: new Date().toISOString(),
|
|
706
|
+
input: input.substring(0, 100), // Limit logged input
|
|
707
|
+
violations: violations,
|
|
708
|
+
severity: violations.reduce((max, v) => {
|
|
709
|
+
const severities = { LOW: 1, MEDIUM: 2, HIGH: 3 };
|
|
710
|
+
return Math.max(max, severities[v.severity] || 0);
|
|
711
|
+
}, 0)
|
|
712
|
+
};
|
|
713
|
+
|
|
714
|
+
this.violationLog.push(logEntry);
|
|
715
|
+
|
|
716
|
+
// Maintain log size
|
|
717
|
+
if (this.violationLog.length > this.maxViolationLogSize) {
|
|
718
|
+
this.violationLog.shift();
|
|
719
|
+
}
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
/**
|
|
724
|
+
* Get validation statistics
|
|
725
|
+
* @returns {Object}
|
|
726
|
+
*/
|
|
727
|
+
getStats() {
|
|
728
|
+
const totalViolations = this.violationLog.length;
|
|
729
|
+
const violationsByType = {};
|
|
730
|
+
const violationsBySeverity = { LOW: 0, MEDIUM: 0, HIGH: 0 };
|
|
731
|
+
|
|
732
|
+
for (const entry of this.violationLog) {
|
|
733
|
+
for (const violation of entry.violations) {
|
|
734
|
+
violationsByType[violation.type] = (violationsByType[violation.type] || 0) + 1;
|
|
735
|
+
violationsBySeverity[violation.severity]++;
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
return {
|
|
740
|
+
totalViolations,
|
|
741
|
+
violationsByType,
|
|
742
|
+
violationsBySeverity,
|
|
743
|
+
logSize: this.violationLog.length,
|
|
744
|
+
maxLogSize: this.maxViolationLogSize,
|
|
745
|
+
config: this.config
|
|
746
|
+
};
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
/**
|
|
750
|
+
* Clear violation log
|
|
751
|
+
*/
|
|
752
|
+
clearViolationLog() {
|
|
753
|
+
this.violationLog = [];
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
/**
|
|
757
|
+
* Get recent violations
|
|
758
|
+
* @param {number} limit - Number of recent violations to return
|
|
759
|
+
* @returns {Array}
|
|
760
|
+
*/
|
|
761
|
+
getRecentViolations(limit = 10) {
|
|
762
|
+
return this.violationLog.slice(-limit);
|
|
763
|
+
}
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
export default InputValidator;
|