honeyweb-core 1.0.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,140 @@
1
+ // honeyweb-core/detection/bot-detector.js
2
+ // Bot fingerprinting - detect automated tools and headless browsers
3
+
4
+ class BotDetector {
5
+ constructor(config = {}) {
6
+ this.enabled = config.enabled !== false;
7
+ }
8
+
9
+ /**
10
+ * Detect if request is from a bot
11
+ * @param {Object} req - Express request object
12
+ * @returns {Object} - { isBot: boolean, confidence: number, indicators: string[] }
13
+ */
14
+ detect(req) {
15
+ if (!this.enabled) {
16
+ return { isBot: false, confidence: 0, indicators: [] };
17
+ }
18
+
19
+ const indicators = [];
20
+ let confidence = 0;
21
+
22
+ const userAgent = req.headers['user-agent'] || '';
23
+ const headers = req.headers;
24
+
25
+ // 1. USER-AGENT ANALYSIS
26
+
27
+ // Known bot patterns
28
+ const knownBotPatterns = [
29
+ { pattern: /curl/i, name: 'curl', confidence: 90 },
30
+ { pattern: /wget/i, name: 'wget', confidence: 90 },
31
+ { pattern: /python-requests/i, name: 'Python Requests', confidence: 85 },
32
+ { pattern: /scrapy/i, name: 'Scrapy', confidence: 90 },
33
+ { pattern: /selenium/i, name: 'Selenium', confidence: 80 },
34
+ { pattern: /puppeteer/i, name: 'Puppeteer', confidence: 85 },
35
+ { pattern: /phantomjs/i, name: 'PhantomJS', confidence: 85 },
36
+ { pattern: /headlesschrome/i, name: 'Headless Chrome', confidence: 80 },
37
+ { pattern: /bot|crawler|spider|scraper/i, name: 'Generic Bot', confidence: 70 },
38
+ { pattern: /axios/i, name: 'Axios', confidence: 75 },
39
+ { pattern: /node-fetch/i, name: 'Node Fetch', confidence: 75 },
40
+ { pattern: /go-http-client/i, name: 'Go HTTP Client', confidence: 80 },
41
+ { pattern: /java/i, name: 'Java Client', confidence: 60 },
42
+ { pattern: /okhttp/i, name: 'OkHttp', confidence: 75 }
43
+ ];
44
+
45
+ for (const { pattern, name, confidence: conf } of knownBotPatterns) {
46
+ if (pattern.test(userAgent)) {
47
+ indicators.push(`Known bot User-Agent: ${name}`);
48
+ confidence = Math.max(confidence, conf);
49
+ }
50
+ }
51
+
52
+ // Empty or missing User-Agent
53
+ if (!userAgent || userAgent.trim() === '') {
54
+ indicators.push('Missing User-Agent header');
55
+ confidence = Math.max(confidence, 70);
56
+ }
57
+
58
+ // 2. HEADER ANALYSIS
59
+
60
+ // Missing common browser headers
61
+ const commonHeaders = ['accept', 'accept-language', 'accept-encoding'];
62
+ const missingHeaders = commonHeaders.filter(h => !headers[h]);
63
+
64
+ if (missingHeaders.length > 0) {
65
+ indicators.push(`Missing headers: ${missingHeaders.join(', ')}`);
66
+ confidence = Math.max(confidence, 40 + (missingHeaders.length * 10));
67
+ }
68
+
69
+ // Suspicious header combinations
70
+ if (headers['accept'] && headers['accept'] === '*/*') {
71
+ indicators.push('Generic Accept header (*/*)')
72
+ confidence = Math.max(confidence, 30);
73
+ }
74
+
75
+ // No Accept-Language (browsers always send this)
76
+ if (!headers['accept-language']) {
77
+ indicators.push('Missing Accept-Language header');
78
+ confidence = Math.max(confidence, 40);
79
+ }
80
+
81
+ // Connection: close (common in automated tools)
82
+ if (headers['connection'] === 'close') {
83
+ indicators.push('Connection: close (bot pattern)');
84
+ confidence = Math.max(confidence, 20);
85
+ }
86
+
87
+ // 3. BROWSER VERSION ANALYSIS
88
+
89
+ // Detect outdated browser versions (bots often use old UA strings)
90
+ const chromeMatch = userAgent.match(/Chrome\/(\d+)/);
91
+ if (chromeMatch) {
92
+ const version = parseInt(chromeMatch[1]);
93
+ if (version < 90) {
94
+ indicators.push(`Outdated Chrome version: ${version}`);
95
+ confidence = Math.max(confidence, 30);
96
+ }
97
+ }
98
+
99
+ // 4. SUSPICIOUS PATTERNS
100
+
101
+ // User-Agent too short (< 20 chars)
102
+ if (userAgent.length > 0 && userAgent.length < 20) {
103
+ indicators.push('Suspiciously short User-Agent');
104
+ confidence = Math.max(confidence, 50);
105
+ }
106
+
107
+ // User-Agent too long (> 500 chars) - sometimes bots add extra info
108
+ if (userAgent.length > 500) {
109
+ indicators.push('Suspiciously long User-Agent');
110
+ confidence = Math.max(confidence, 30);
111
+ }
112
+
113
+ // Multiple spaces in User-Agent (malformed)
114
+ if (/\s{2,}/.test(userAgent)) {
115
+ indicators.push('Malformed User-Agent (multiple spaces)');
116
+ confidence = Math.max(confidence, 40);
117
+ }
118
+
119
+ // Cap confidence at 100
120
+ confidence = Math.min(100, confidence);
121
+
122
+ return {
123
+ isBot: confidence >= 60,
124
+ confidence,
125
+ indicators
126
+ };
127
+ }
128
+
129
+ /**
130
+ * Get statistics
131
+ * @returns {Object}
132
+ */
133
+ getStats() {
134
+ return {
135
+ enabled: this.enabled
136
+ };
137
+ }
138
+ }
139
+
140
+ module.exports = BotDetector;
@@ -0,0 +1,136 @@
1
+ // honeyweb-core/detection/index.js
2
+ // Detection orchestrator - combines all detection modules
3
+
4
+ const { detectMaliciousPatterns } = require('./patterns');
5
+ const RateLimiter = require('./rate-limiter');
6
+ const BotWhitelist = require('./whitelist');
7
+ const BehavioralAnalyzer = require('./behavioral');
8
+ const BotDetector = require('./bot-detector');
9
+
10
+ class DetectionEngine {
11
+ constructor(config) {
12
+ this.config = config;
13
+
14
+ // Initialize rate limiter
15
+ this.rateLimiter = config.rateLimit.enabled
16
+ ? new RateLimiter(config.rateLimit)
17
+ : null;
18
+
19
+ // Initialize Phase 2 detection modules
20
+ this.whitelist = config.detection.whitelist.enabled
21
+ ? new BotWhitelist(config.detection.whitelist)
22
+ : null;
23
+
24
+ this.behavioral = config.detection.behavioral.enabled
25
+ ? new BehavioralAnalyzer(config.detection.behavioral)
26
+ : null;
27
+
28
+ this.botDetector = new BotDetector({ enabled: true });
29
+ }
30
+
31
+ /**
32
+ * Analyze request for threats
33
+ * @param {Object} req - Express request object
34
+ * @param {string} ip - Client IP address
35
+ * @returns {Promise<Object>} - { detected: boolean, threats: string[], threatLevel: number, whitelist: Object, behavioral: Object, botDetection: Object }
36
+ */
37
+ async analyze(req, ip) {
38
+ const threats = [];
39
+ let threatLevel = 0;
40
+
41
+ // 0. Check whitelist first (legitimate bots should skip other checks)
42
+ let whitelistResult = null;
43
+ if (this.whitelist) {
44
+ whitelistResult = await this.whitelist.check(req, ip);
45
+ if (whitelistResult.isLegitimate) {
46
+ // Legitimate bot - skip all other checks
47
+ return {
48
+ detected: false,
49
+ threats: [],
50
+ threatLevel: 0,
51
+ whitelist: whitelistResult,
52
+ legitimateBot: true
53
+ };
54
+ }
55
+ }
56
+
57
+ // 1. Pattern detection (SQLi/XSS)
58
+ if (this.config.detection.patterns.enabled) {
59
+ const patternResult = detectMaliciousPatterns(req);
60
+ if (patternResult.detected) {
61
+ threats.push(...patternResult.threats);
62
+ threatLevel += 50; // High threat
63
+ }
64
+ }
65
+
66
+ // 2. Rate limiting
67
+ let rateLimitResult = null;
68
+ if (this.rateLimiter) {
69
+ rateLimitResult = this.rateLimiter.check(ip);
70
+ if (rateLimitResult.limited) {
71
+ threats.push(`Rate limit exceeded: ${rateLimitResult.count} requests in ${this.config.rateLimit.window}ms`);
72
+ threatLevel += 30; // Medium threat
73
+ }
74
+ }
75
+
76
+ // 3. Behavioral analysis (Phase 2)
77
+ let behavioralResult = null;
78
+ if (this.behavioral) {
79
+ behavioralResult = this.behavioral.analyze(req, ip);
80
+ if (behavioralResult.suspicious) {
81
+ threats.push(...behavioralResult.reasons);
82
+ threatLevel += behavioralResult.suspicionScore * 0.3; // Scale down behavioral score
83
+ }
84
+ }
85
+
86
+ // 4. Bot detection (Phase 2)
87
+ let botDetectionResult = null;
88
+ if (this.botDetector) {
89
+ botDetectionResult = this.botDetector.detect(req);
90
+ if (botDetectionResult.isBot) {
91
+ threats.push(...botDetectionResult.indicators);
92
+ threatLevel += botDetectionResult.confidence * 0.2; // Scale down bot detection score
93
+ }
94
+ }
95
+
96
+ // Cap threat level at 100
97
+ threatLevel = Math.min(100, threatLevel);
98
+
99
+ return {
100
+ detected: threats.length > 0,
101
+ threats,
102
+ threatLevel,
103
+ rateLimit: rateLimitResult,
104
+ whitelist: whitelistResult,
105
+ behavioral: behavioralResult,
106
+ botDetection: botDetectionResult,
107
+ legitimateBot: false
108
+ };
109
+ }
110
+
111
+ /**
112
+ * Get detection statistics
113
+ * @returns {Object}
114
+ */
115
+ getStats() {
116
+ return {
117
+ rateLimiter: this.rateLimiter ? this.rateLimiter.getStats() : null,
118
+ behavioral: this.behavioral ? this.behavioral.getStats() : null,
119
+ botDetector: this.botDetector ? this.botDetector.getStats() : null
120
+ };
121
+ }
122
+
123
+ /**
124
+ * Cleanup and stop timers
125
+ */
126
+ destroy() {
127
+ if (this.rateLimiter) {
128
+ this.rateLimiter.destroy();
129
+ }
130
+ if (this.behavioral) {
131
+ this.behavioral.destroy();
132
+ }
133
+ }
134
+ }
135
+
136
+ module.exports = DetectionEngine;
@@ -0,0 +1,83 @@
1
+ // honeyweb-core/detection/patterns.js
2
+ // SQLi and XSS pattern detection (extracted from main index.js)
3
+
4
+ // Malicious patterns for SQLi and XSS detection
5
+ const MALICIOUS_PATTERNS = [
6
+ // SQL Injection patterns
7
+ /(\bUNION\b.*\bSELECT\b)/i,
8
+ /(\bOR\b\s+\d+\s*=\s*\d+)/i,
9
+ /(\bAND\b\s+\d+\s*=\s*\d+)/i,
10
+ /(';?\s*DROP\s+TABLE)/i,
11
+ /(';?\s*DELETE\s+FROM)/i,
12
+ /(\bEXEC\b\s*\()/i,
13
+ /(\bINSERT\s+INTO\b)/i,
14
+ /(--\s*$)/,
15
+ /(';\s*--)/,
16
+
17
+ // XSS patterns
18
+ /(<script[^>]*>.*?<\/script>)/i,
19
+ /(<iframe[^>]*>)/i,
20
+ /(<img[^>]*onerror\s*=)/i,
21
+ /(javascript\s*:)/i,
22
+ /(<svg[^>]*onload\s*=)/i,
23
+ /(on\w+\s*=\s*["'][^"']*["'])/i,
24
+ /(<object[^>]*>)/i,
25
+ /(<embed[^>]*>)/i
26
+ ];
27
+
28
+ /**
29
+ * Check if request contains malicious patterns
30
+ * @param {Object} req - Express request object
31
+ * @returns {Object} - { detected: boolean, threats: string[] }
32
+ */
33
+ function detectMaliciousPatterns(req) {
34
+ const threats = [];
35
+
36
+ // Check URL
37
+ const url = req.url || '';
38
+ for (const pattern of MALICIOUS_PATTERNS) {
39
+ if (pattern.test(url)) {
40
+ threats.push(`Malicious pattern in URL: ${pattern.source.substring(0, 50)}`);
41
+ }
42
+ }
43
+
44
+ // Check query parameters
45
+ const query = JSON.stringify(req.query || {});
46
+ for (const pattern of MALICIOUS_PATTERNS) {
47
+ if (pattern.test(query)) {
48
+ threats.push(`Malicious pattern in query: ${pattern.source.substring(0, 50)}`);
49
+ }
50
+ }
51
+
52
+ // Check body (if exists)
53
+ if (req.body) {
54
+ const body = JSON.stringify(req.body);
55
+ for (const pattern of MALICIOUS_PATTERNS) {
56
+ if (pattern.test(body)) {
57
+ threats.push(`Malicious pattern in body: ${pattern.source.substring(0, 50)}`);
58
+ }
59
+ }
60
+ }
61
+
62
+ // Check headers (User-Agent, Referer, etc.)
63
+ const userAgent = req.headers['user-agent'] || '';
64
+ const referer = req.headers['referer'] || '';
65
+ for (const pattern of MALICIOUS_PATTERNS) {
66
+ if (pattern.test(userAgent)) {
67
+ threats.push(`Malicious pattern in User-Agent`);
68
+ }
69
+ if (pattern.test(referer)) {
70
+ threats.push(`Malicious pattern in Referer`);
71
+ }
72
+ }
73
+
74
+ return {
75
+ detected: threats.length > 0,
76
+ threats
77
+ };
78
+ }
79
+
80
+ module.exports = {
81
+ MALICIOUS_PATTERNS,
82
+ detectMaliciousPatterns
83
+ };
@@ -0,0 +1,109 @@
1
+ // honeyweb-core/detection/rate-limiter.js
2
+ // Rate limiting with auto-cleanup (fixes memory leak)
3
+
4
+ class RateLimiter {
5
+ constructor(config) {
6
+ this.window = config.window || 10000; // 10 seconds
7
+ this.maxRequests = config.maxRequests || 50;
8
+ this.requests = new Map(); // ip -> [timestamps]
9
+
10
+ // Auto-cleanup to prevent memory leak
11
+ const cleanupInterval = config.cleanupInterval || 60000; // 60 seconds
12
+ this.cleanupTimer = setInterval(() => {
13
+ this._cleanup();
14
+ }, cleanupInterval);
15
+ }
16
+
17
+ /**
18
+ * Check if IP has exceeded rate limit
19
+ * @param {string} ip
20
+ * @returns {Object} - { limited: boolean, count: number, remaining: number }
21
+ */
22
+ check(ip) {
23
+ const now = Date.now();
24
+ const windowStart = now - this.window;
25
+
26
+ // Get existing requests for this IP
27
+ let timestamps = this.requests.get(ip) || [];
28
+
29
+ // Filter out old requests outside the window
30
+ timestamps = timestamps.filter(ts => ts > windowStart);
31
+
32
+ // Add current request
33
+ timestamps.push(now);
34
+
35
+ // Update map
36
+ this.requests.set(ip, timestamps);
37
+
38
+ const count = timestamps.length;
39
+ const remaining = Math.max(0, this.maxRequests - count);
40
+ const limited = count > this.maxRequests;
41
+
42
+ return {
43
+ limited,
44
+ count,
45
+ remaining,
46
+ resetAt: now + this.window
47
+ };
48
+ }
49
+
50
+ /**
51
+ * Clean up old entries to prevent memory leak
52
+ * @private
53
+ */
54
+ _cleanup() {
55
+ const now = Date.now();
56
+ const windowStart = now - this.window;
57
+
58
+ for (const [ip, timestamps] of this.requests.entries()) {
59
+ // Filter out old timestamps
60
+ const active = timestamps.filter(ts => ts > windowStart);
61
+
62
+ if (active.length === 0) {
63
+ // No active requests, remove entry
64
+ this.requests.delete(ip);
65
+ } else {
66
+ // Update with filtered timestamps
67
+ this.requests.set(ip, active);
68
+ }
69
+ }
70
+ }
71
+
72
+ /**
73
+ * Get statistics
74
+ * @returns {Object}
75
+ */
76
+ getStats() {
77
+ return {
78
+ trackedIPs: this.requests.size,
79
+ window: this.window,
80
+ maxRequests: this.maxRequests
81
+ };
82
+ }
83
+
84
+ /**
85
+ * Reset rate limit for an IP
86
+ * @param {string} ip
87
+ */
88
+ reset(ip) {
89
+ this.requests.delete(ip);
90
+ }
91
+
92
+ /**
93
+ * Clear all rate limit data
94
+ */
95
+ clear() {
96
+ this.requests.clear();
97
+ }
98
+
99
+ /**
100
+ * Cleanup and stop timers
101
+ */
102
+ destroy() {
103
+ if (this.cleanupTimer) {
104
+ clearInterval(this.cleanupTimer);
105
+ }
106
+ }
107
+ }
108
+
109
+ module.exports = RateLimiter;
@@ -0,0 +1,116 @@
1
+ // honeyweb-core/detection/whitelist.js
2
+ // Legitimate bot whitelist with DNS verification
3
+
4
+ const DNSVerifier = require('../utils/dns-verify');
5
+
6
+ // Known legitimate bots with their domain patterns
7
+ const KNOWN_BOTS = {
8
+ 'Googlebot': {
9
+ patterns: [/Googlebot/i],
10
+ domains: ['googlebot.com', 'google.com']
11
+ },
12
+ 'Bingbot': {
13
+ patterns: [/bingbot/i],
14
+ domains: ['search.msn.com']
15
+ },
16
+ 'Slackbot': {
17
+ patterns: [/Slackbot/i, /Slack-ImgProxy/i],
18
+ domains: ['slack.com']
19
+ },
20
+ 'facebookexternalhit': {
21
+ patterns: [/facebookexternalhit/i],
22
+ domains: ['facebook.com']
23
+ },
24
+ 'Twitterbot': {
25
+ patterns: [/Twitterbot/i],
26
+ domains: ['twitter.com']
27
+ },
28
+ 'LinkedInBot': {
29
+ patterns: [/LinkedInBot/i],
30
+ domains: ['linkedin.com']
31
+ },
32
+ 'Applebot': {
33
+ patterns: [/Applebot/i],
34
+ domains: ['apple.com']
35
+ },
36
+ 'DuckDuckBot': {
37
+ patterns: [/DuckDuckBot/i],
38
+ domains: ['duckduckgo.com']
39
+ }
40
+ };
41
+
42
+ class BotWhitelist {
43
+ constructor(config) {
44
+ this.enabled = config.enabled !== false;
45
+ this.verifyDNS = config.verifyDNS !== false;
46
+ this.dnsVerifier = new DNSVerifier(config.cacheTTL);
47
+
48
+ // Allow custom bot list
49
+ this.customBots = config.bots || [];
50
+ }
51
+
52
+ /**
53
+ * Check if request is from a legitimate bot
54
+ * @param {Object} req - Express request object
55
+ * @param {string} ip - Client IP address
56
+ * @returns {Promise<Object>} - { isLegitimate: boolean, botName: string, verified: boolean }
57
+ */
58
+ async check(req, ip) {
59
+ if (!this.enabled) {
60
+ return { isLegitimate: false, botName: null, verified: false };
61
+ }
62
+
63
+ const userAgent = req.headers['user-agent'] || '';
64
+
65
+ // Check against known bots
66
+ for (const [botName, botInfo] of Object.entries(KNOWN_BOTS)) {
67
+ // Check if User-Agent matches
68
+ const matches = botInfo.patterns.some(pattern => pattern.test(userAgent));
69
+
70
+ if (matches) {
71
+ // If DNS verification is disabled, trust the User-Agent
72
+ if (!this.verifyDNS) {
73
+ return {
74
+ isLegitimate: true,
75
+ botName,
76
+ verified: false
77
+ };
78
+ }
79
+
80
+ // Verify with DNS
81
+ for (const domain of botInfo.domains) {
82
+ const verification = await this.dnsVerifier.verify(ip, domain);
83
+
84
+ if (verification.verified) {
85
+ return {
86
+ isLegitimate: true,
87
+ botName,
88
+ verified: true,
89
+ hostname: verification.hostname
90
+ };
91
+ }
92
+ }
93
+
94
+ // User-Agent claims to be bot but DNS verification failed
95
+ return {
96
+ isLegitimate: false,
97
+ botName: `Fake ${botName}`,
98
+ verified: false,
99
+ reason: 'DNS verification failed'
100
+ };
101
+ }
102
+ }
103
+
104
+ // Not a known bot
105
+ return { isLegitimate: false, botName: null, verified: false };
106
+ }
107
+
108
+ /**
109
+ * Clear DNS cache
110
+ */
111
+ clearCache() {
112
+ this.dnsVerifier.clearCache();
113
+ }
114
+ }
115
+
116
+ module.exports = BotWhitelist;