crawlforge-mcp-server 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CLAUDE.md +315 -0
  2. package/LICENSE +21 -0
  3. package/README.md +181 -0
  4. package/package.json +115 -0
  5. package/server.js +1963 -0
  6. package/setup.js +112 -0
  7. package/src/constants/config.js +615 -0
  8. package/src/core/ActionExecutor.js +1104 -0
  9. package/src/core/AlertNotificationSystem.js +601 -0
  10. package/src/core/AuthManager.js +315 -0
  11. package/src/core/ChangeTracker.js +2306 -0
  12. package/src/core/JobManager.js +687 -0
  13. package/src/core/LLMsTxtAnalyzer.js +753 -0
  14. package/src/core/LocalizationManager.js +1615 -0
  15. package/src/core/PerformanceManager.js +828 -0
  16. package/src/core/ResearchOrchestrator.js +1327 -0
  17. package/src/core/SnapshotManager.js +1037 -0
  18. package/src/core/StealthBrowserManager.js +1795 -0
  19. package/src/core/WebhookDispatcher.js +745 -0
  20. package/src/core/analysis/ContentAnalyzer.js +749 -0
  21. package/src/core/analysis/LinkAnalyzer.js +972 -0
  22. package/src/core/cache/CacheManager.js +821 -0
  23. package/src/core/connections/ConnectionPool.js +553 -0
  24. package/src/core/crawlers/BFSCrawler.js +845 -0
  25. package/src/core/integrations/PerformanceIntegration.js +377 -0
  26. package/src/core/llm/AnthropicProvider.js +135 -0
  27. package/src/core/llm/LLMManager.js +415 -0
  28. package/src/core/llm/LLMProvider.js +97 -0
  29. package/src/core/llm/OpenAIProvider.js +127 -0
  30. package/src/core/processing/BrowserProcessor.js +986 -0
  31. package/src/core/processing/ContentProcessor.js +505 -0
  32. package/src/core/processing/PDFProcessor.js +448 -0
  33. package/src/core/processing/StreamProcessor.js +673 -0
  34. package/src/core/queue/QueueManager.js +98 -0
  35. package/src/core/workers/WorkerPool.js +585 -0
  36. package/src/core/workers/worker.js +743 -0
  37. package/src/monitoring/healthCheck.js +600 -0
  38. package/src/monitoring/metrics.js +761 -0
  39. package/src/optimization/wave3-optimizations.js +932 -0
  40. package/src/security/security-patches.js +120 -0
  41. package/src/security/security-tests.js +355 -0
  42. package/src/security/wave3-security.js +652 -0
  43. package/src/tools/advanced/BatchScrapeTool.js +1089 -0
  44. package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
  45. package/src/tools/crawl/crawlDeep.js +449 -0
  46. package/src/tools/crawl/mapSite.js +400 -0
  47. package/src/tools/extract/analyzeContent.js +624 -0
  48. package/src/tools/extract/extractContent.js +329 -0
  49. package/src/tools/extract/processDocument.js +503 -0
  50. package/src/tools/extract/summarizeContent.js +376 -0
  51. package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
  52. package/src/tools/research/deepResearch.js +706 -0
  53. package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
  54. package/src/tools/search/adapters/googleSearch.js +236 -0
  55. package/src/tools/search/adapters/searchProviderFactory.js +96 -0
  56. package/src/tools/search/queryExpander.js +543 -0
  57. package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
  58. package/src/tools/search/ranking/ResultRanker.js +497 -0
  59. package/src/tools/search/searchWeb.js +482 -0
  60. package/src/tools/tracking/trackChanges.js +1355 -0
  61. package/src/utils/CircuitBreaker.js +515 -0
  62. package/src/utils/ErrorHandlingConfig.js +342 -0
  63. package/src/utils/HumanBehaviorSimulator.js +569 -0
  64. package/src/utils/Logger.js +568 -0
  65. package/src/utils/MemoryMonitor.js +173 -0
  66. package/src/utils/RetryManager.js +386 -0
  67. package/src/utils/contentUtils.js +588 -0
  68. package/src/utils/domainFilter.js +612 -0
  69. package/src/utils/inputValidation.js +766 -0
  70. package/src/utils/rateLimiter.js +196 -0
  71. package/src/utils/robotsChecker.js +91 -0
  72. package/src/utils/securityMiddleware.js +416 -0
  73. package/src/utils/sitemapParser.js +678 -0
  74. package/src/utils/ssrfProtection.js +640 -0
  75. package/src/utils/urlNormalizer.js +168 -0
@@ -0,0 +1,766 @@
1
+ /**
2
+ * Enhanced Input Validation and Sanitization Module
3
+ * Provides comprehensive input validation, sanitization, and security checks
4
+ */
5
+
6
+ import { z } from 'zod';
7
+ import DOMPurify from 'isomorphic-dompurify';
8
+
9
+ /**
10
+ * Security patterns and rules
11
+ */
12
+ const SECURITY_PATTERNS = {
13
+ // SQL injection patterns
14
+ sqlInjection: [
15
+ /(\b(SELECT|INSERT|UPDATE|DELETE|DROP|CREATE|ALTER|EXEC|UNION|SCRIPT)\b)/i,
16
+ /'[^']*'|"[^"]*"/g,
17
+ /;\s*--/g,
18
+ /\/\*[\s\S]*?\*\//g
19
+ ],
20
+
21
+ // XSS patterns
22
+ xssPatterns: [
23
+ /<script[^>]*>[\s\S]*?<\/script>/gi,
24
+ /<iframe[^>]*>[\s\S]*?<\/iframe>/gi,
25
+ /javascript:/gi,
26
+ /vbscript:/gi,
27
+ /onload\s*=/gi,
28
+ /onerror\s*=/gi,
29
+ /onclick\s*=/gi,
30
+ /onmouseover\s*=/gi
31
+ ],
32
+
33
+ // Path traversal patterns
34
+ pathTraversal: [
35
+ /\.\.\//g,
36
+ /\.\.\\/g,
37
+ /%2e%2e%2f/gi,
38
+ /%2e%2e%5c/gi,
39
+ /\.\.\%2f/gi,
40
+ /\.\.\%5c/gi
41
+ ],
42
+
43
+ // Command injection patterns
44
+ commandInjection: [
45
+ /[;&|`$(){}\[\]]/g,
46
+ /\beval\b/gi,
47
+ /\bexec\b/gi,
48
+ /\bsystem\b/gi,
49
+ /\bshell_exec\b/gi
50
+ ],
51
+
52
+ // CSS selector injection
53
+ cssSelectorInjection: [
54
+ /['"]/g,
55
+ /\/\*/g, /expression\s*\(/gi,
56
+ /javascript\s*:/gi,
57
+ /@import/gi
58
+ ],
59
+
60
+ // Regular expression DoS patterns
61
+ redosPatterns: [
62
+ /(a+)+$/,
63
+ /(a|a)*$/,
64
+ /a*a*$/,
65
+ /(a|b)*a*a*a*a*a*a*c/
66
+ ]
67
+ };
68
+
69
+ /**
70
+ * Input validation configuration
71
+ */
72
+ const VALIDATION_CONFIG = {
73
+ maxStringLength: 10000,
74
+ maxArrayLength: 1000,
75
+ maxObjectDepth: 10,
76
+ maxRegexLength: 500,
77
+ allowedHTMLTags: ['p', 'br', 'strong', 'em', 'u', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
78
+ allowedCSSProperties: ['color', 'font-size', 'font-weight', 'text-align'],
79
+ maxFileSize: 100 * 1024 * 1024, // 100MB
80
+ allowedFileTypes: ['pdf', 'txt', 'html', 'json', 'xml', 'csv']
81
+ };
82
+
83
+ /**
84
+ * Enhanced Input Validator Class
85
+ */
86
+ export class InputValidator {
87
+ constructor(options = {}) {
88
+ this.config = { ...VALIDATION_CONFIG, ...options };
89
+ this.violationLog = [];
90
+ this.maxViolationLogSize = 1000;
91
+ }
92
+
93
+ /**
94
+ * Validate and sanitize URL input
95
+ * @param {string} url - URL to validate
96
+ * @param {Object} options - Validation options
97
+ * @returns {Object} - Validation result
98
+ */
99
+ validateURL(url, options = {}) {
100
+ const result = {
101
+ isValid: false,
102
+ sanitizedValue: null,
103
+ violations: [],
104
+ metadata: {}
105
+ };
106
+
107
+ try {
108
+ // Basic format validation
109
+ if (typeof url !== 'string' || url.length === 0) {
110
+ result.violations.push({
111
+ type: 'INVALID_FORMAT',
112
+ message: 'URL must be a non-empty string',
113
+ severity: 'HIGH'
114
+ });
115
+ return result;
116
+ }
117
+
118
+ // Length validation
119
+ if (url.length > this.config.maxStringLength) {
120
+ result.violations.push({
121
+ type: 'EXCESSIVE_LENGTH',
122
+ message: `URL exceeds maximum length of ${this.config.maxStringLength}`,
123
+ severity: 'HIGH'
124
+ });
125
+ return result;
126
+ }
127
+
128
+ // URL format validation
129
+ const urlObj = new URL(url);
130
+ result.metadata.protocol = urlObj.protocol;
131
+ result.metadata.hostname = urlObj.hostname;
132
+ result.metadata.port = urlObj.port;
133
+
134
+ // Protocol validation
135
+ const allowedProtocols = options.allowedProtocols || ['http:', 'https:'];
136
+ if (!allowedProtocols.includes(urlObj.protocol)) {
137
+ result.violations.push({
138
+ type: 'INVALID_PROTOCOL',
139
+ message: `Protocol '${urlObj.protocol}' is not allowed`,
140
+ severity: 'HIGH'
141
+ });
142
+ return result;
143
+ }
144
+
145
+ // Security pattern checks
146
+ this.checkSecurityPatterns(url, result);
147
+
148
+ // Path traversal check
149
+ if (this.containsPathTraversal(urlObj.pathname)) {
150
+ result.violations.push({
151
+ type: 'PATH_TRAVERSAL',
152
+ message: 'URL contains path traversal patterns',
153
+ severity: 'HIGH'
154
+ });
155
+ }
156
+
157
+ // Sanitize URL
158
+ result.sanitizedValue = this.sanitizeURL(urlObj);
159
+ result.isValid = result.violations.filter(v => v.severity === 'HIGH').length === 0;
160
+
161
+ } catch (error) {
162
+ result.violations.push({
163
+ type: 'MALFORMED_URL',
164
+ message: `Invalid URL format: ${error.message}`,
165
+ severity: 'HIGH'
166
+ });
167
+ }
168
+
169
+ this.logViolations(url, result.violations);
170
+ return result;
171
+ }
172
+
173
+ /**
174
+ * Validate CSS selector for injection attacks
175
+ * @param {string} selector - CSS selector to validate
176
+ * @returns {Object} - Validation result
177
+ */
178
+ validateCSSSelector(selector) {
179
+ const result = {
180
+ isValid: false,
181
+ sanitizedValue: null,
182
+ violations: []
183
+ };
184
+
185
+ if (typeof selector !== 'string') {
186
+ result.violations.push({
187
+ type: 'INVALID_TYPE',
188
+ message: 'CSS selector must be a string',
189
+ severity: 'HIGH'
190
+ });
191
+ return result;
192
+ }
193
+
194
+ // Length check
195
+ if (selector.length > this.config.maxStringLength) {
196
+ result.violations.push({
197
+ type: 'EXCESSIVE_LENGTH',
198
+ message: 'CSS selector too long',
199
+ severity: 'HIGH'
200
+ });
201
+ return result;
202
+ }
203
+
204
+ // Check for CSS injection patterns
205
+ for (const pattern of SECURITY_PATTERNS.cssSelectorInjection) {
206
+ if (pattern.test(selector)) {
207
+ result.violations.push({
208
+ type: 'CSS_INJECTION',
209
+ message: 'CSS selector contains potential injection patterns',
210
+ severity: 'HIGH'
211
+ });
212
+ break;
213
+ }
214
+ }
215
+
216
+ // Check for suspicious functions
217
+ const suspiciousFunctions = ['expression', 'url', 'import', 'javascript'];
218
+ for (const func of suspiciousFunctions) {
219
+ if (selector.toLowerCase().includes(func)) {
220
+ result.violations.push({
221
+ type: 'SUSPICIOUS_FUNCTION',
222
+ message: `CSS selector contains suspicious function: ${func}`,
223
+ severity: 'MEDIUM'
224
+ });
225
+ }
226
+ }
227
+
228
+ // Validate selector syntax
229
+ try {
230
+ // Basic CSS selector validation
231
+ if (typeof document !== 'undefined') {
232
+ document.querySelector(selector);
233
+ }
234
+ } catch (error) {
235
+ result.violations.push({
236
+ type: 'INVALID_SYNTAX',
237
+ message: `Invalid CSS selector syntax: ${error.message}`,
238
+ severity: 'MEDIUM'
239
+ });
240
+ }
241
+
242
+ result.sanitizedValue = this.sanitizeCSSSelector(selector);
243
+ result.isValid = result.violations.filter(v => v.severity === 'HIGH').length === 0;
244
+
245
+ this.logViolations(selector, result.violations);
246
+ return result;
247
+ }
248
+
249
+ /**
250
+ * Validate search query for injection attacks
251
+ * @param {string} query - Search query to validate
252
+ * @returns {Object} - Validation result
253
+ */
254
+ validateSearchQuery(query) {
255
+ const result = {
256
+ isValid: false,
257
+ sanitizedValue: null,
258
+ violations: []
259
+ };
260
+
261
+ if (typeof query !== 'string') {
262
+ result.violations.push({
263
+ type: 'INVALID_TYPE',
264
+ message: 'Search query must be a string',
265
+ severity: 'HIGH'
266
+ });
267
+ return result;
268
+ }
269
+
270
+ // Length check
271
+ if (query.length > 1000) { // Search queries should be shorter
272
+ result.violations.push({
273
+ type: 'EXCESSIVE_LENGTH',
274
+ message: 'Search query too long',
275
+ severity: 'MEDIUM'
276
+ });
277
+ }
278
+
279
+ // Check for SQL injection patterns
280
+ this.checkSQLInjection(query, result);
281
+
282
+ // Check for XSS patterns
283
+ this.checkXSSPatterns(query, result);
284
+
285
+ // Check for command injection
286
+ this.checkCommandInjection(query, result);
287
+
288
+ // Validate search operators
289
+ const dangerousOperators = ['site:', 'filetype:', 'inurl:', 'intitle:'];
290
+ const operatorCount = dangerousOperators.reduce((count, op) => {
291
+ return count + (query.toLowerCase().split(op).length - 1);
292
+ }, 0);
293
+
294
+ if (operatorCount > 5) {
295
+ result.violations.push({
296
+ type: 'TOO_MANY_OPERATORS',
297
+ message: 'Too many search operators',
298
+ severity: 'MEDIUM'
299
+ });
300
+ }
301
+
302
+ result.sanitizedValue = this.sanitizeSearchQuery(query);
303
+ result.isValid = result.violations.filter(v => v.severity === 'HIGH').length === 0;
304
+
305
+ this.logViolations(query, result.violations);
306
+ return result;
307
+ }
308
+
309
+ /**
310
+ * Validate regular expression for ReDoS attacks
311
+ * @param {string} regex - Regular expression pattern
312
+ * @returns {Object} - Validation result
313
+ */
314
+ validateRegex(regex) {
315
+ const result = {
316
+ isValid: false,
317
+ sanitizedValue: null,
318
+ violations: []
319
+ };
320
+
321
+ if (typeof regex !== 'string') {
322
+ result.violations.push({
323
+ type: 'INVALID_TYPE',
324
+ message: 'Regex must be a string',
325
+ severity: 'HIGH'
326
+ });
327
+ return result;
328
+ }
329
+
330
+ // Length check
331
+ if (regex.length > this.config.maxRegexLength) {
332
+ result.violations.push({
333
+ type: 'EXCESSIVE_LENGTH',
334
+ message: 'Regular expression too long',
335
+ severity: 'HIGH'
336
+ });
337
+ return result;
338
+ }
339
+
340
+ // Check for ReDoS patterns
341
+ for (const pattern of SECURITY_PATTERNS.redosPatterns) {
342
+ if (pattern.test(regex)) {
343
+ result.violations.push({
344
+ type: 'REDOS_RISK',
345
+ message: 'Regular expression may be vulnerable to ReDoS attacks',
346
+ severity: 'HIGH'
347
+ });
348
+ break;
349
+ }
350
+ }
351
+
352
+ // Check for complex quantifiers
353
+ const complexQuantifiers = /(\*\+)|(\+\*)|(\*\*)|(\+\+)|(\?\?)/g;
354
+ if (complexQuantifiers.test(regex)) {
355
+ result.violations.push({
356
+ type: 'COMPLEX_QUANTIFIERS',
357
+ message: 'Regular expression contains complex quantifiers',
358
+ severity: 'MEDIUM'
359
+ });
360
+ }
361
+
362
+ // Validate regex syntax
363
+ try {
364
+ new RegExp(regex);
365
+ } catch (error) {
366
+ result.violations.push({
367
+ type: 'INVALID_SYNTAX',
368
+ message: `Invalid regular expression syntax: ${error.message}`,
369
+ severity: 'HIGH'
370
+ });
371
+ }
372
+
373
+ result.sanitizedValue = regex; // Don't modify regex patterns
374
+ result.isValid = result.violations.filter(v => v.severity === 'HIGH').length === 0;
375
+
376
+ this.logViolations(regex, result.violations);
377
+ return result;
378
+ }
379
+
380
+ /**
381
+ * Validate HTML content
382
+ * @param {string} html - HTML content to validate
383
+ * @returns {Object} - Validation result
384
+ */
385
+ validateHTML(html) {
386
+ const result = {
387
+ isValid: false,
388
+ sanitizedValue: null,
389
+ violations: []
390
+ };
391
+
392
+ if (typeof html !== 'string') {
393
+ result.violations.push({
394
+ type: 'INVALID_TYPE',
395
+ message: 'HTML must be a string',
396
+ severity: 'HIGH'
397
+ });
398
+ return result;
399
+ }
400
+
401
+ // Length check
402
+ if (html.length > this.config.maxStringLength) {
403
+ result.violations.push({
404
+ type: 'EXCESSIVE_LENGTH',
405
+ message: 'HTML content too long',
406
+ severity: 'MEDIUM'
407
+ });
408
+ }
409
+
410
+ // Check for XSS patterns
411
+ this.checkXSSPatterns(html, result);
412
+
413
+ // Sanitize HTML using DOMPurify
414
+ result.sanitizedValue = DOMPurify.sanitize(html, {
415
+ ALLOWED_TAGS: this.config.allowedHTMLTags,
416
+ ALLOWED_ATTR: ['class', 'id'],
417
+ FORBID_SCRIPT: true,
418
+ FORBID_IFRAME: true
419
+ });
420
+
421
+ result.isValid = result.violations.filter(v => v.severity === 'HIGH').length === 0;
422
+
423
+ this.logViolations(html.substring(0, 100), result.violations);
424
+ return result;
425
+ }
426
+
427
+ /**
428
+ * Validate object structure and depth
429
+ * @param {Object} obj - Object to validate
430
+ * @param {Object} options - Validation options
431
+ * @returns {Object} - Validation result
432
+ */
433
+ validateObject(obj, options = {}) {
434
+ const result = {
435
+ isValid: false,
436
+ sanitizedValue: null,
437
+ violations: []
438
+ };
439
+
440
+ if (typeof obj !== 'object' || obj === null) {
441
+ result.violations.push({
442
+ type: 'INVALID_TYPE',
443
+ message: 'Input must be an object',
444
+ severity: 'HIGH'
445
+ });
446
+ return result;
447
+ }
448
+
449
+ // Check object depth
450
+ const depth = this.getObjectDepth(obj);
451
+ if (depth > this.config.maxObjectDepth) {
452
+ result.violations.push({
453
+ type: 'EXCESSIVE_DEPTH',
454
+ message: `Object depth exceeds maximum of ${this.config.maxObjectDepth}`,
455
+ severity: 'HIGH'
456
+ });
457
+ return result;
458
+ }
459
+
460
+ // Check array lengths
461
+ this.checkArrayLengths(obj, result);
462
+
463
+ // Check string lengths
464
+ this.checkStringLengths(obj, result);
465
+
466
+ // Sanitize object
467
+ result.sanitizedValue = this.sanitizeObject(obj);
468
+ result.isValid = result.violations.filter(v => v.severity === 'HIGH').length === 0;
469
+
470
+ return result;
471
+ }
472
+
473
+ /**
474
+ * Check for security patterns in input
475
+ * @param {string} input - Input to check
476
+ * @param {Object} result - Result object to update
477
+ */
478
+ checkSecurityPatterns(input, result) {
479
+ this.checkSQLInjection(input, result);
480
+ this.checkXSSPatterns(input, result);
481
+ this.checkCommandInjection(input, result);
482
+ }
483
+
484
+ /**
485
+ * Check for SQL injection patterns
486
+ * @param {string} input - Input to check
487
+ * @param {Object} result - Result object to update
488
+ */
489
+ checkSQLInjection(input, result) {
490
+ for (const pattern of SECURITY_PATTERNS.sqlInjection) {
491
+ if (pattern.test(input)) {
492
+ result.violations.push({
493
+ type: 'SQL_INJECTION',
494
+ message: 'Input contains potential SQL injection patterns',
495
+ severity: 'HIGH'
496
+ });
497
+ break;
498
+ }
499
+ }
500
+ }
501
+
502
+ /**
503
+ * Check for XSS patterns
504
+ * @param {string} input - Input to check
505
+ * @param {Object} result - Result object to update
506
+ */
507
+ checkXSSPatterns(input, result) {
508
+ for (const pattern of SECURITY_PATTERNS.xssPatterns) {
509
+ if (pattern.test(input)) {
510
+ result.violations.push({
511
+ type: 'XSS_ATTEMPT',
512
+ message: 'Input contains potential XSS patterns',
513
+ severity: 'HIGH'
514
+ });
515
+ break;
516
+ }
517
+ }
518
+ }
519
+
520
+ /**
521
+ * Check for command injection patterns
522
+ * @param {string} input - Input to check
523
+ * @param {Object} result - Result object to update
524
+ */
525
+ checkCommandInjection(input, result) {
526
+ for (const pattern of SECURITY_PATTERNS.commandInjection) {
527
+ if (pattern.test(input)) {
528
+ result.violations.push({
529
+ type: 'COMMAND_INJECTION',
530
+ message: 'Input contains potential command injection patterns',
531
+ severity: 'HIGH'
532
+ });
533
+ break;
534
+ }
535
+ }
536
+ }
537
+
538
+ /**
539
+ * Check for path traversal patterns
540
+ * @param {string} path - Path to check
541
+ * @returns {boolean}
542
+ */
543
+ containsPathTraversal(path) {
544
+ return SECURITY_PATTERNS.pathTraversal.some(pattern => pattern.test(path));
545
+ }
546
+
547
+ /**
548
+ * Sanitize URL object
549
+ * @param {URL} urlObj - URL object to sanitize
550
+ * @returns {string}
551
+ */
552
+ sanitizeURL(urlObj) {
553
+ const sanitized = new URL(urlObj.toString());
554
+
555
+ // Remove authentication info
556
+ sanitized.username = '';
557
+ sanitized.password = '';
558
+
559
+ // Remove fragment for security
560
+ sanitized.hash = '';
561
+
562
+ return sanitized.toString();
563
+ }
564
+
565
+ /**
566
+ * Sanitize CSS selector
567
+ * @param {string} selector - CSS selector to sanitize
568
+ * @returns {string}
569
+ */
570
+ sanitizeCSSSelector(selector) {
571
+ return selector
572
+ .replace(/['"]/g, '') // Remove quotes
573
+ .replace(/\/\*[\s\S]*?\*\//g, '') // Remove comments
574
+ .replace(/javascript:/gi, '') // Remove javascript:
575
+ .replace(/expression\s*\(/gi, '') // Remove expression()
576
+ .trim();
577
+ }
578
+
579
+ /**
580
+ * Sanitize search query
581
+ * @param {string} query - Search query to sanitize
582
+ * @returns {string}
583
+ */
584
+ sanitizeSearchQuery(query) {
585
+ return query
586
+ .replace(/[<>&"']/g, '') // Remove HTML characters
587
+ .replace(/[\r\n\t]/g, ' ') // Replace control characters with spaces
588
+ .replace(/\s+/g, ' ') // Normalize whitespace
589
+ .trim()
590
+ .substring(0, 1000); // Limit length
591
+ }
592
+
593
+ /**
594
+ * Sanitize object recursively
595
+ * @param {Object} obj - Object to sanitize
596
+ * @returns {Object}
597
+ */
598
+ sanitizeObject(obj) {
599
+ if (typeof obj !== 'object' || obj === null) {
600
+ return obj;
601
+ }
602
+
603
+ if (Array.isArray(obj)) {
604
+ return obj.slice(0, this.config.maxArrayLength).map(item => this.sanitizeObject(item));
605
+ }
606
+
607
+ const sanitized = {};
608
+ for (const [key, value] of Object.entries(obj)) {
609
+ if (typeof value === 'string') {
610
+ sanitized[key] = this.sanitizeString(value);
611
+ } else if (typeof value === 'object') {
612
+ sanitized[key] = this.sanitizeObject(value);
613
+ } else {
614
+ sanitized[key] = value;
615
+ }
616
+ }
617
+
618
+ return sanitized;
619
+ }
620
+
621
+ /**
622
+ * Sanitize string value
623
+ * @param {string} str - String to sanitize
624
+ * @returns {string}
625
+ */
626
+ sanitizeString(str) {
627
+ if (typeof str !== 'string') {
628
+ return str;
629
+ }
630
+
631
+ return str
632
+ .replace(/[\r\n\t]/g, ' ') // Replace control characters
633
+ .replace(/\s+/g, ' ') // Normalize whitespace
634
+ .trim()
635
+ .substring(0, this.config.maxStringLength);
636
+ }
637
+
638
+ /**
639
+ * Get object depth
640
+ * @param {Object} obj - Object to measure
641
+ * @param {number} depth - Current depth
642
+ * @returns {number}
643
+ */
644
+ getObjectDepth(obj, depth = 0) {
645
+ if (typeof obj !== 'object' || obj === null || depth > this.config.maxObjectDepth) {
646
+ return depth;
647
+ }
648
+
649
+ let maxDepth = depth;
650
+ for (const value of Object.values(obj)) {
651
+ if (typeof value === 'object' && value !== null) {
652
+ maxDepth = Math.max(maxDepth, this.getObjectDepth(value, depth + 1));
653
+ }
654
+ }
655
+
656
+ return maxDepth;
657
+ }
658
+
659
+ /**
660
+ * Check array lengths in object
661
+ * @param {Object} obj - Object to check
662
+ * @param {Object} result - Result object to update
663
+ */
664
+ checkArrayLengths(obj, result) {
665
+ for (const value of Object.values(obj)) {
666
+ if (Array.isArray(value) && value.length > this.config.maxArrayLength) {
667
+ result.violations.push({
668
+ type: 'EXCESSIVE_ARRAY_LENGTH',
669
+ message: `Array length exceeds maximum of ${this.config.maxArrayLength}`,
670
+ severity: 'MEDIUM'
671
+ });
672
+ } else if (typeof value === 'object' && value !== null) {
673
+ this.checkArrayLengths(value, result);
674
+ }
675
+ }
676
+ }
677
+
678
+ /**
679
+ * Check string lengths in object
680
+ * @param {Object} obj - Object to check
681
+ * @param {Object} result - Result object to update
682
+ */
683
+ checkStringLengths(obj, result) {
684
+ for (const value of Object.values(obj)) {
685
+ if (typeof value === 'string' && value.length > this.config.maxStringLength) {
686
+ result.violations.push({
687
+ type: 'EXCESSIVE_STRING_LENGTH',
688
+ message: `String length exceeds maximum of ${this.config.maxStringLength}`,
689
+ severity: 'MEDIUM'
690
+ });
691
+ } else if (typeof value === 'object' && value !== null) {
692
+ this.checkStringLengths(value, result);
693
+ }
694
+ }
695
+ }
696
+
697
+ /**
698
+ * Log security violations
699
+ * @param {string} input - Input that caused violations
700
+ * @param {Array} violations - Array of violations
701
+ */
702
+ logViolations(input, violations) {
703
+ if (violations.length > 0) {
704
+ const logEntry = {
705
+ timestamp: new Date().toISOString(),
706
+ input: input.substring(0, 100), // Limit logged input
707
+ violations: violations,
708
+ severity: violations.reduce((max, v) => {
709
+ const severities = { LOW: 1, MEDIUM: 2, HIGH: 3 };
710
+ return Math.max(max, severities[v.severity] || 0);
711
+ }, 0)
712
+ };
713
+
714
+ this.violationLog.push(logEntry);
715
+
716
+ // Maintain log size
717
+ if (this.violationLog.length > this.maxViolationLogSize) {
718
+ this.violationLog.shift();
719
+ }
720
+ }
721
+ }
722
+
723
+ /**
724
+ * Get validation statistics
725
+ * @returns {Object}
726
+ */
727
+ getStats() {
728
+ const totalViolations = this.violationLog.length;
729
+ const violationsByType = {};
730
+ const violationsBySeverity = { LOW: 0, MEDIUM: 0, HIGH: 0 };
731
+
732
+ for (const entry of this.violationLog) {
733
+ for (const violation of entry.violations) {
734
+ violationsByType[violation.type] = (violationsByType[violation.type] || 0) + 1;
735
+ violationsBySeverity[violation.severity]++;
736
+ }
737
+ }
738
+
739
+ return {
740
+ totalViolations,
741
+ violationsByType,
742
+ violationsBySeverity,
743
+ logSize: this.violationLog.length,
744
+ maxLogSize: this.maxViolationLogSize,
745
+ config: this.config
746
+ };
747
+ }
748
+
749
+ /**
750
+ * Clear violation log
751
+ */
752
+ clearViolationLog() {
753
+ this.violationLog = [];
754
+ }
755
+
756
+ /**
757
+ * Get recent violations
758
+ * @param {number} limit - Number of recent violations to return
759
+ * @returns {Array}
760
+ */
761
+ getRecentViolations(limit = 10) {
762
+ return this.violationLog.slice(-limit);
763
+ }
764
+ }
765
+
766
+ export default InputValidator;