PyPI - aiwaf - Versions diffs - 0.1.9.3.1__py3-none-any.whl → 0.1.9.3.3__py3-none-any.whl - Mend

aiwaf 0.1.9.3.1py3-none-any.whl → 0.1.9.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiwaf might be problematic. Click here for more details.

Files changed (7) hide show

aiwaf/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 default_app_config = "aiwaf.apps.AiwafConfig"
-__version__ = "0.1.9.3.1"
+__version__ = "0.1.9.3.3"
 # Note: Middleware classes are available from aiwaf.middleware
 # Import them only when needed to avoid circular imports during Django app loading

aiwaf/middleware.py CHANGED Viewed

@@ -786,22 +786,10 @@ class HoneypotTimingMiddleware(MiddlewareMixin):
                             "message": f"POST not allowed for {request.path}"
                         }, status=405)  # Method Not Allowed
-            # Check if there was a preceding GET request
+            # Check if there was a preceding GET request for timing validation
             get_time = cache.get(f"honeypot_get:{ip}")
-            if get_time is None:
-                # No GET request - likely bot posting directly
-                # But be more lenient for login paths since users might bookmark them
-                if not any(request.path.lower().startswith(login_path) for login_path in [
-                    "/admin/login/", "/login/", "/accounts/login/", "/auth/login/", "/signin/"
-                ]):
-                    # Double-check exemption before blocking
-                    if not exemption_store.is_exempted(ip):
-                        BlacklistManager.block(ip, "Direct POST without GET")
-                        # Check if actually blocked (exempted IPs won't be blocked)
-                        if BlacklistManager.is_blocked(ip):
-                            return JsonResponse({"error": "blocked"}, status=403)
-            else:
+            if get_time is not None:
                 # Check timing - be more lenient for login paths
                 time_diff = time.time() - get_time
                 min_time = self.MIN_FORM_TIME
@@ -880,3 +868,270 @@ class UUIDTamperMiddleware(MiddlewareMixin):
             # Check if actually blocked (exempted IPs won't be blocked)
             if BlacklistManager.is_blocked(ip):
                 return JsonResponse({"error": "blocked"}, status=403)
+class HeaderValidationMiddleware(MiddlewareMixin):
+    """
+    Validates HTTP headers to detect bots and malicious requests
+    """
+    # Standard browser headers that legitimate requests should have
+    REQUIRED_HEADERS = [
+        'HTTP_USER_AGENT',
+        'HTTP_ACCEPT',
+    ]
+    # Headers that browsers typically send
+    BROWSER_HEADERS = [
+        'HTTP_ACCEPT_LANGUAGE',
+        'HTTP_ACCEPT_ENCODING',
+        'HTTP_CONNECTION',
+        'HTTP_CACHE_CONTROL',
+    ]
+    # Suspicious User-Agent patterns
+    SUSPICIOUS_USER_AGENTS = [
+        r'bot',
+        r'crawler',
+        r'spider',
+        r'scraper',
+        r'curl',
+        r'wget',
+        r'python',
+        r'java',
+        r'node',
+        r'go-http',
+        r'axios',
+        r'okhttp',
+        r'libwww',
+        r'lwp-trivial',
+        r'mechanize',
+        r'requests',
+        r'urllib',
+        r'httpie',
+        r'postman',
+        r'insomnia',
+        r'^$',  # Empty user agent
+        r'mozilla/4\.0$',  # Fake old browser
+        r'mozilla/5\.0$',  # Incomplete mozilla string
+    ]
+    # Known legitimate bot user agents to whitelist
+    LEGITIMATE_BOTS = [
+        r'googlebot',
+        r'bingbot',
+        r'slurp',  # Yahoo
+        r'duckduckbot',
+        r'baiduspider',
+        r'yandexbot',
+        r'facebookexternalhit',
+        r'twitterbot',
+        r'linkedinbot',
+        r'whatsapp',
+        r'telegrambot',
+        r'applebot',
+        r'pingdom',
+        r'uptimerobot',
+        r'statuscake',
+        r'site24x7',
+    ]
+    # Suspicious header combinations
+    SUSPICIOUS_COMBINATIONS = [
+        # High version HTTP with old user agent
+        {
+            'condition': lambda headers: (
+                headers.get('SERVER_PROTOCOL', '').startswith('HTTP/2') and
+                'mozilla/4.0' in headers.get('HTTP_USER_AGENT', '').lower()
+            ),
+            'reason': 'HTTP/2 with old browser user agent'
+        },
+        # No Accept header but has User-Agent
+        {
+            'condition': lambda headers: (
+                headers.get('HTTP_USER_AGENT') and
+                not headers.get('HTTP_ACCEPT')
+            ),
+            'reason': 'User-Agent present but no Accept header'
+        },
+        # Accept */* only (very generic)
+        {
+            'condition': lambda headers: (
+                headers.get('HTTP_ACCEPT') == '*/*' and
+                not any(h in headers for h in ['HTTP_ACCEPT_LANGUAGE', 'HTTP_ACCEPT_ENCODING'])
+            ),
+            'reason': 'Generic Accept header without language/encoding'
+        },
+        # No browser-standard headers at all
+        {
+            'condition': lambda headers: (
+                headers.get('HTTP_USER_AGENT') and
+                not any(headers.get(h) for h in ['HTTP_ACCEPT_LANGUAGE', 'HTTP_ACCEPT_ENCODING', 'HTTP_CONNECTION'])
+            ),
+            'reason': 'Missing all browser-standard headers'
+        },
+        # Suspicious HTTP version patterns
+        {
+            'condition': lambda headers: (
+                'HTTP_USER_AGENT' in headers and
+                headers.get('SERVER_PROTOCOL') == 'HTTP/1.0' and
+                'chrome' in headers.get('HTTP_USER_AGENT', '').lower()
+            ),
+            'reason': 'Modern browser with HTTP/1.0'
+        }
+    ]
+    def process_request(self, request):
+        # Skip if request is exempted
+        if is_exempt(request):
+            return None
+        ip = get_ip(request)
+        # Check IP-level exemption
+        from .storage import get_exemption_store
+        exemption_store = get_exemption_store()
+        if exemption_store.is_exempted(ip):
+            return None
+        # Skip for static files and common paths
+        if self._is_static_request(request):
+            return None
+        # Get headers from request.META
+        headers = request.META
+        # Check for missing required headers
+        missing_headers = self._check_missing_headers(headers)
+        if missing_headers:
+            return self._block_request(ip, f"Missing required headers: {', '.join(missing_headers)}", request.path)
+        # Check for suspicious user agent
+        suspicious_ua = self._check_user_agent(headers.get('HTTP_USER_AGENT', ''))
+        if suspicious_ua:
+            return self._block_request(ip, f"Suspicious user agent: {suspicious_ua}", request.path)
+        # Check for suspicious header combinations
+        suspicious_combo = self._check_header_combinations(headers)
+        if suspicious_combo:
+            return self._block_request(ip, f"Suspicious headers: {suspicious_combo}", request.path)
+        # Check header quality score
+        quality_score = self._calculate_header_quality(headers)
+        if quality_score < 3:  # Threshold for suspicion
+            return self._block_request(ip, f"Low header quality score: {quality_score}", request.path)
+        return None
+    def _is_static_request(self, request):
+        """Check if this is a request for static files"""
+        static_extensions = ['.css', '.js', '.png', '.jpg', '.jpeg', '.gif', '.ico', '.svg', '.woff', '.woff2', '.ttf']
+        path = request.path.lower()
+        # Check file extensions
+        if any(path.endswith(ext) for ext in static_extensions):
+            return True
+        # Check static paths
+        static_paths = ['/static/', '/media/', '/assets/', '/favicon.ico']
+        if any(path.startswith(static_path) for static_path in static_paths):
+            return True
+        return False
+    def _check_missing_headers(self, headers):
+        """Check for missing required headers"""
+        missing = []
+        for header in self.REQUIRED_HEADERS:
+            if not headers.get(header):
+                missing.append(header.replace('HTTP_', '').replace('_', '-').lower())
+        return missing
+    def _check_user_agent(self, user_agent):
+        """Check if user agent is suspicious"""
+        if not user_agent:
+            return "Empty user agent"
+        user_agent_lower = user_agent.lower()
+        # Check if it's a legitimate bot first
+        for legitimate_pattern in self.LEGITIMATE_BOTS:
+            if re.search(legitimate_pattern, user_agent_lower):
+                return None  # Allow legitimate bots
+        # Check for suspicious patterns
+        for suspicious_pattern in self.SUSPICIOUS_USER_AGENTS:
+            if re.search(suspicious_pattern, user_agent_lower, re.IGNORECASE):
+                return f"Pattern: {suspicious_pattern}"
+        # Check for very short user agents (likely fake)
+        if len(user_agent) < 10:
+            return "Too short"
+        # Check for very long user agents (possibly malicious)
+        if len(user_agent) > 500:
+            return "Too long"
+        return None
+    def _check_header_combinations(self, headers):
+        """Check for suspicious header combinations"""
+        for combo in self.SUSPICIOUS_COMBINATIONS:
+            try:
+                if combo['condition'](headers):
+                    return combo['reason']
+            except Exception:
+                # If condition check fails, skip it
+                continue
+        return None
+    def _calculate_header_quality(self, headers):
+        """Calculate a quality score based on header completeness"""
+        score = 0
+        # Basic required headers (2 points each)
+        if headers.get('HTTP_USER_AGENT'):
+            score += 2
+        if headers.get('HTTP_ACCEPT'):
+            score += 2
+        # Browser-standard headers (1 point each)
+        for header in self.BROWSER_HEADERS:
+            if headers.get(header):
+                score += 1
+        # Bonus points for realistic combinations
+        if headers.get('HTTP_ACCEPT_LANGUAGE') and headers.get('HTTP_ACCEPT_ENCODING'):
+            score += 1
+        if headers.get('HTTP_CONNECTION') == 'keep-alive':
+            score += 1
+        # Check for realistic Accept header
+        accept = headers.get('HTTP_ACCEPT', '')
+        if 'text/html' in accept and 'application/xml' in accept:
+            score += 1
+        return score
+    def _block_request(self, ip, reason, path):
+        """Block the request and return error response"""
+        from .storage import get_exemption_store
+        exemption_store = get_exemption_store()
+        # Double-check exemption before blocking
+        if not exemption_store.is_exempted(ip):
+            BlacklistManager.block(ip, f"Header validation: {reason}")
+            # Check if actually blocked (exempted IPs won't be blocked)
+            if BlacklistManager.is_blocked(ip):
+                return JsonResponse({
+                    "error": "blocked",
+                    "message": "Request blocked due to suspicious headers",
+                    "path": path
+                }, status=403)
+        return None

{aiwaf-0.1.9.3.1.dist-info → aiwaf-0.1.9.3.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aiwaf
-Version: 0.1.9.3.1
+Version: 0.1.9.3.3
 Summary: AI-powered Web Application Firewall
 Home-page: https://github.com/aayushgauba/aiwaf
 Author: Aayush Gauba
@@ -34,6 +34,7 @@ Dynamic: requires-python
 - ✅ **Enhanced Configuration** - `AIWAF_ALLOWED_PATH_KEYWORDS` and `AIWAF_EXEMPT_KEYWORDS`
 - ✅ **Comprehensive HTTP Method Validation** - Blocks GET→POST-only, POST→GET-only, unsupported REST methods
 - ✅ **Enhanced Honeypot Protection** - POST validation & 4-minute page timeout with smart reload detection
+- ✅ **HTTP Header Validation** - Comprehensive bot detection via header analysis and quality scoring
 ---
@@ -113,9 +114,52 @@ aiwaf/
 - **File‑Extension Probing Detection**
   Tracks repeated 404s on common extensions (e.g. `.php`, `.asp`) and blocks IPs.
+- **🆕 HTTP Header Validation**
+  Advanced header analysis to detect bots and malicious requests:
+  - **Missing Required Headers** - Blocks requests without User-Agent or Accept headers
+  - **Suspicious User-Agents** - Detects curl, wget, python-requests, automated tools
+  - **Header Quality Scoring** - Calculates realism score based on browser-standard headers
+  - **Legitimate Bot Whitelist** - Allows Googlebot, Bingbot, and other search engines
+  - **Header Combination Analysis** - Detects impossible combinations (HTTP/2 + old browsers)
+  - **Static File Exemption** - Skips validation for CSS, JS, images
+## 🛡️ Header Validation Middleware Features
+The **HeaderValidationMiddleware** provides advanced bot detection through HTTP header analysis:
+### **What it detects:**
+- **Missing Headers**: Requests without standard browser headers
+- **Suspicious User-Agents**: WordPress scanners, exploit tools, basic scrapers
+- **Bot-like Patterns**: Low header diversity, missing Accept headers
+- **Quality Scoring**: 0-11 point system based on header completeness
+### **What it allows:**
+- **Legitimate Browsers**: Chrome, Firefox, Safari, Edge with full headers
+- **Search Engine Bots**: Google, Bing, DuckDuckGo, Yandex crawlers
+- **API Clients**: Properly identified with good headers
+- **Static Files**: CSS, JS, images (automatically exempted)
+### **Real-world effectiveness:**
+```
+✅ Blocks: WordPress scanners, exploit bots, basic scrapers
+✅ Allows: Real browsers, legitimate bots, API clients
+✅ Quality Score: 10/11 = Legitimate, 2/11 = Suspicious bot
+```
+### **Testing header validation:**
+```bash
+# Test with curl (will be blocked - low quality headers)
+curl http://yoursite.com/
+# Test with browser (will be allowed - high quality headers)
+# Visit site normally in Chrome/Firefox
+# Check logs for header validation blocks
+python manage.py aiwaf_logging --recent
+```
 - **Enhanced Timing-Based Honeypot**
   Advanced GET→POST timing analysis with comprehensive HTTP method validation:
-  - POST directly without a preceding GET request
   - Submit forms faster than `AIWAF_MIN_FORM_TIME` seconds (default: 1 second)
   - **🆕 Smart HTTP Method Validation** - Comprehensive protection against method misuse:
     - Blocks GET requests to POST-only views (form endpoints, API creates)

{aiwaf-0.1.9.3.1.dist-info → aiwaf-0.1.9.3.3.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
-aiwaf/__init__.py,sha256=VlFbI8uqJmi1V0hsKasasV1BFglekVX0R5jvEOwXGzE,220
+aiwaf/__init__.py,sha256=Rnla6te9DNqQBP_HMEdhUdQdj9dd4ECcAr6F62Xs4-A,220
 aiwaf/apps.py,sha256=nCez-Ptlv2kaEk5HenA8b1pATz1VfhrHP1344gwcY1A,142
 aiwaf/blacklist_manager.py,sha256=LYCeKFB-7e_C6Bg2WeFJWFIIQlrfRMPuGp30ivrnhQY,1196
 aiwaf/decorators.py,sha256=IUKOdM_gdroffImRZep1g1wT6gNqD10zGwcp28hsJCs,825
-aiwaf/middleware.py,sha256=BnVdA4g2YTDo5g_H1Q8EE-ctVR4JF_yV3PaLHMgYZ-E,40804
+aiwaf/middleware.py,sha256=lRxi8M22Fp1fdhCWQ6XesbxX54aijH3tdSvjLNroQdE,49197
 aiwaf/middleware_logger.py,sha256=LWZVDAnjh6CGESirA8eMbhGgJKB7lVDGRQqVroH95Lo,4742
 aiwaf/models.py,sha256=vQxgY19BDVMjoO903UNrTZC1pNoLltMU6wbyWPoAEns,2719
 aiwaf/storage.py,sha256=pUXE3bm7aRrABh_B6jTOBUQOYK67oQmHaR9EqyOasis,14038
@@ -29,8 +29,8 @@ aiwaf/management/commands/test_exemption_fix.py,sha256=ngyGaHUCmQQ6y--6j4q1viZJt
 aiwaf/resources/model.pkl,sha256=5t6h9BX8yoh2xct85MXOO60jdlWyg1APskUOW0jZE1Y,1288265
 aiwaf/templatetags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 aiwaf/templatetags/aiwaf_tags.py,sha256=XXfb7Tl4DjU3Sc40GbqdaqOEtKTUKELBEk58u83wBNw,357
-aiwaf-0.1.9.3.1.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
-aiwaf-0.1.9.3.1.dist-info/METADATA,sha256=TQO1y9t5sRQY7zBNyJc6dvTfJ1JXC1vBTf1RjH8O8m0,29037
-aiwaf-0.1.9.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-aiwaf-0.1.9.3.1.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
-aiwaf-0.1.9.3.1.dist-info/RECORD,,
+aiwaf-0.1.9.3.3.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
+aiwaf-0.1.9.3.3.dist-info/METADATA,sha256=GUXN2Lav1oOSfMnGTEd7ALU6-95yb7LJYbf4iZN-ukM,30989
+aiwaf-0.1.9.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+aiwaf-0.1.9.3.3.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
+aiwaf-0.1.9.3.3.dist-info/RECORD,,

{aiwaf-0.1.9.3.1.dist-info → aiwaf-0.1.9.3.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{aiwaf-0.1.9.3.1.dist-info → aiwaf-0.1.9.3.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{aiwaf-0.1.9.3.1.dist-info → aiwaf-0.1.9.3.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

aiwaf 0.1.9.3.1__py3-none-any.whl → 0.1.9.3.3__py3-none-any.whl

Potentially problematic release.

aiwaf 0.1.9.3.1py3-none-any.whl → 0.1.9.3.3py3-none-any.whl