aiwaf 0.1.9.3.1__py3-none-any.whl → 0.1.9.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiwaf might be problematic. Click here for more details.

aiwaf/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  default_app_config = "aiwaf.apps.AiwafConfig"
2
2
 
3
- __version__ = "0.1.9.3.1"
3
+ __version__ = "0.1.9.3.3"
4
4
 
5
5
  # Note: Middleware classes are available from aiwaf.middleware
6
6
  # Import them only when needed to avoid circular imports during Django app loading
aiwaf/middleware.py CHANGED
@@ -786,22 +786,10 @@ class HoneypotTimingMiddleware(MiddlewareMixin):
786
786
  "message": f"POST not allowed for {request.path}"
787
787
  }, status=405) # Method Not Allowed
788
788
 
789
- # Check if there was a preceding GET request
789
+ # Check if there was a preceding GET request for timing validation
790
790
  get_time = cache.get(f"honeypot_get:{ip}")
791
791
 
792
- if get_time is None:
793
- # No GET request - likely bot posting directly
794
- # But be more lenient for login paths since users might bookmark them
795
- if not any(request.path.lower().startswith(login_path) for login_path in [
796
- "/admin/login/", "/login/", "/accounts/login/", "/auth/login/", "/signin/"
797
- ]):
798
- # Double-check exemption before blocking
799
- if not exemption_store.is_exempted(ip):
800
- BlacklistManager.block(ip, "Direct POST without GET")
801
- # Check if actually blocked (exempted IPs won't be blocked)
802
- if BlacklistManager.is_blocked(ip):
803
- return JsonResponse({"error": "blocked"}, status=403)
804
- else:
792
+ if get_time is not None:
805
793
  # Check timing - be more lenient for login paths
806
794
  time_diff = time.time() - get_time
807
795
  min_time = self.MIN_FORM_TIME
@@ -880,3 +868,270 @@ class UUIDTamperMiddleware(MiddlewareMixin):
880
868
  # Check if actually blocked (exempted IPs won't be blocked)
881
869
  if BlacklistManager.is_blocked(ip):
882
870
  return JsonResponse({"error": "blocked"}, status=403)
871
+
872
+
873
+ class HeaderValidationMiddleware(MiddlewareMixin):
874
+ """
875
+ Validates HTTP headers to detect bots and malicious requests
876
+ """
877
+
878
+ # Standard browser headers that legitimate requests should have
879
+ REQUIRED_HEADERS = [
880
+ 'HTTP_USER_AGENT',
881
+ 'HTTP_ACCEPT',
882
+ ]
883
+
884
+ # Headers that browsers typically send
885
+ BROWSER_HEADERS = [
886
+ 'HTTP_ACCEPT_LANGUAGE',
887
+ 'HTTP_ACCEPT_ENCODING',
888
+ 'HTTP_CONNECTION',
889
+ 'HTTP_CACHE_CONTROL',
890
+ ]
891
+
892
+ # Suspicious User-Agent patterns
893
+ SUSPICIOUS_USER_AGENTS = [
894
+ r'bot',
895
+ r'crawler',
896
+ r'spider',
897
+ r'scraper',
898
+ r'curl',
899
+ r'wget',
900
+ r'python',
901
+ r'java',
902
+ r'node',
903
+ r'go-http',
904
+ r'axios',
905
+ r'okhttp',
906
+ r'libwww',
907
+ r'lwp-trivial',
908
+ r'mechanize',
909
+ r'requests',
910
+ r'urllib',
911
+ r'httpie',
912
+ r'postman',
913
+ r'insomnia',
914
+ r'^$', # Empty user agent
915
+ r'mozilla/4\.0$', # Fake old browser
916
+ r'mozilla/5\.0$', # Incomplete mozilla string
917
+ ]
918
+
919
+ # Known legitimate bot user agents to whitelist
920
+ LEGITIMATE_BOTS = [
921
+ r'googlebot',
922
+ r'bingbot',
923
+ r'slurp', # Yahoo
924
+ r'duckduckbot',
925
+ r'baiduspider',
926
+ r'yandexbot',
927
+ r'facebookexternalhit',
928
+ r'twitterbot',
929
+ r'linkedinbot',
930
+ r'whatsapp',
931
+ r'telegrambot',
932
+ r'applebot',
933
+ r'pingdom',
934
+ r'uptimerobot',
935
+ r'statuscake',
936
+ r'site24x7',
937
+ ]
938
+
939
+ # Suspicious header combinations
940
+ SUSPICIOUS_COMBINATIONS = [
941
+ # High version HTTP with old user agent
942
+ {
943
+ 'condition': lambda headers: (
944
+ headers.get('SERVER_PROTOCOL', '').startswith('HTTP/2') and
945
+ 'mozilla/4.0' in headers.get('HTTP_USER_AGENT', '').lower()
946
+ ),
947
+ 'reason': 'HTTP/2 with old browser user agent'
948
+ },
949
+ # No Accept header but has User-Agent
950
+ {
951
+ 'condition': lambda headers: (
952
+ headers.get('HTTP_USER_AGENT') and
953
+ not headers.get('HTTP_ACCEPT')
954
+ ),
955
+ 'reason': 'User-Agent present but no Accept header'
956
+ },
957
+ # Accept */* only (very generic)
958
+ {
959
+ 'condition': lambda headers: (
960
+ headers.get('HTTP_ACCEPT') == '*/*' and
961
+ not any(h in headers for h in ['HTTP_ACCEPT_LANGUAGE', 'HTTP_ACCEPT_ENCODING'])
962
+ ),
963
+ 'reason': 'Generic Accept header without language/encoding'
964
+ },
965
+ # No browser-standard headers at all
966
+ {
967
+ 'condition': lambda headers: (
968
+ headers.get('HTTP_USER_AGENT') and
969
+ not any(headers.get(h) for h in ['HTTP_ACCEPT_LANGUAGE', 'HTTP_ACCEPT_ENCODING', 'HTTP_CONNECTION'])
970
+ ),
971
+ 'reason': 'Missing all browser-standard headers'
972
+ },
973
+ # Suspicious HTTP version patterns
974
+ {
975
+ 'condition': lambda headers: (
976
+ 'HTTP_USER_AGENT' in headers and
977
+ headers.get('SERVER_PROTOCOL') == 'HTTP/1.0' and
978
+ 'chrome' in headers.get('HTTP_USER_AGENT', '').lower()
979
+ ),
980
+ 'reason': 'Modern browser with HTTP/1.0'
981
+ }
982
+ ]
983
+
984
+ def process_request(self, request):
985
+ # Skip if request is exempted
986
+ if is_exempt(request):
987
+ return None
988
+
989
+ ip = get_ip(request)
990
+
991
+ # Check IP-level exemption
992
+ from .storage import get_exemption_store
993
+ exemption_store = get_exemption_store()
994
+ if exemption_store.is_exempted(ip):
995
+ return None
996
+
997
+ # Skip for static files and common paths
998
+ if self._is_static_request(request):
999
+ return None
1000
+
1001
+ # Get headers from request.META
1002
+ headers = request.META
1003
+
1004
+ # Check for missing required headers
1005
+ missing_headers = self._check_missing_headers(headers)
1006
+ if missing_headers:
1007
+ return self._block_request(ip, f"Missing required headers: {', '.join(missing_headers)}", request.path)
1008
+
1009
+ # Check for suspicious user agent
1010
+ suspicious_ua = self._check_user_agent(headers.get('HTTP_USER_AGENT', ''))
1011
+ if suspicious_ua:
1012
+ return self._block_request(ip, f"Suspicious user agent: {suspicious_ua}", request.path)
1013
+
1014
+ # Check for suspicious header combinations
1015
+ suspicious_combo = self._check_header_combinations(headers)
1016
+ if suspicious_combo:
1017
+ return self._block_request(ip, f"Suspicious headers: {suspicious_combo}", request.path)
1018
+
1019
+ # Check header quality score
1020
+ quality_score = self._calculate_header_quality(headers)
1021
+ if quality_score < 3: # Threshold for suspicion
1022
+ return self._block_request(ip, f"Low header quality score: {quality_score}", request.path)
1023
+
1024
+ return None
1025
+
1026
+ def _is_static_request(self, request):
1027
+ """Check if this is a request for static files"""
1028
+ static_extensions = ['.css', '.js', '.png', '.jpg', '.jpeg', '.gif', '.ico', '.svg', '.woff', '.woff2', '.ttf']
1029
+ path = request.path.lower()
1030
+
1031
+ # Check file extensions
1032
+ if any(path.endswith(ext) for ext in static_extensions):
1033
+ return True
1034
+
1035
+ # Check static paths
1036
+ static_paths = ['/static/', '/media/', '/assets/', '/favicon.ico']
1037
+ if any(path.startswith(static_path) for static_path in static_paths):
1038
+ return True
1039
+
1040
+ return False
1041
+
1042
+ def _check_missing_headers(self, headers):
1043
+ """Check for missing required headers"""
1044
+ missing = []
1045
+
1046
+ for header in self.REQUIRED_HEADERS:
1047
+ if not headers.get(header):
1048
+ missing.append(header.replace('HTTP_', '').replace('_', '-').lower())
1049
+
1050
+ return missing
1051
+
1052
+ def _check_user_agent(self, user_agent):
1053
+ """Check if user agent is suspicious"""
1054
+ if not user_agent:
1055
+ return "Empty user agent"
1056
+
1057
+ user_agent_lower = user_agent.lower()
1058
+
1059
+ # Check if it's a legitimate bot first
1060
+ for legitimate_pattern in self.LEGITIMATE_BOTS:
1061
+ if re.search(legitimate_pattern, user_agent_lower):
1062
+ return None # Allow legitimate bots
1063
+
1064
+ # Check for suspicious patterns
1065
+ for suspicious_pattern in self.SUSPICIOUS_USER_AGENTS:
1066
+ if re.search(suspicious_pattern, user_agent_lower, re.IGNORECASE):
1067
+ return f"Pattern: {suspicious_pattern}"
1068
+
1069
+ # Check for very short user agents (likely fake)
1070
+ if len(user_agent) < 10:
1071
+ return "Too short"
1072
+
1073
+ # Check for very long user agents (possibly malicious)
1074
+ if len(user_agent) > 500:
1075
+ return "Too long"
1076
+
1077
+ return None
1078
+
1079
+ def _check_header_combinations(self, headers):
1080
+ """Check for suspicious header combinations"""
1081
+ for combo in self.SUSPICIOUS_COMBINATIONS:
1082
+ try:
1083
+ if combo['condition'](headers):
1084
+ return combo['reason']
1085
+ except Exception:
1086
+ # If condition check fails, skip it
1087
+ continue
1088
+
1089
+ return None
1090
+
1091
+ def _calculate_header_quality(self, headers):
1092
+ """Calculate a quality score based on header completeness"""
1093
+ score = 0
1094
+
1095
+ # Basic required headers (2 points each)
1096
+ if headers.get('HTTP_USER_AGENT'):
1097
+ score += 2
1098
+ if headers.get('HTTP_ACCEPT'):
1099
+ score += 2
1100
+
1101
+ # Browser-standard headers (1 point each)
1102
+ for header in self.BROWSER_HEADERS:
1103
+ if headers.get(header):
1104
+ score += 1
1105
+
1106
+ # Bonus points for realistic combinations
1107
+ if headers.get('HTTP_ACCEPT_LANGUAGE') and headers.get('HTTP_ACCEPT_ENCODING'):
1108
+ score += 1
1109
+
1110
+ if headers.get('HTTP_CONNECTION') == 'keep-alive':
1111
+ score += 1
1112
+
1113
+ # Check for realistic Accept header
1114
+ accept = headers.get('HTTP_ACCEPT', '')
1115
+ if 'text/html' in accept and 'application/xml' in accept:
1116
+ score += 1
1117
+
1118
+ return score
1119
+
1120
+ def _block_request(self, ip, reason, path):
1121
+ """Block the request and return error response"""
1122
+ from .storage import get_exemption_store
1123
+ exemption_store = get_exemption_store()
1124
+
1125
+ # Double-check exemption before blocking
1126
+ if not exemption_store.is_exempted(ip):
1127
+ BlacklistManager.block(ip, f"Header validation: {reason}")
1128
+
1129
+ # Check if actually blocked (exempted IPs won't be blocked)
1130
+ if BlacklistManager.is_blocked(ip):
1131
+ return JsonResponse({
1132
+ "error": "blocked",
1133
+ "message": "Request blocked due to suspicious headers",
1134
+ "path": path
1135
+ }, status=403)
1136
+
1137
+ return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aiwaf
3
- Version: 0.1.9.3.1
3
+ Version: 0.1.9.3.3
4
4
  Summary: AI-powered Web Application Firewall
5
5
  Home-page: https://github.com/aayushgauba/aiwaf
6
6
  Author: Aayush Gauba
@@ -34,6 +34,7 @@ Dynamic: requires-python
34
34
  - ✅ **Enhanced Configuration** - `AIWAF_ALLOWED_PATH_KEYWORDS` and `AIWAF_EXEMPT_KEYWORDS`
35
35
  - ✅ **Comprehensive HTTP Method Validation** - Blocks GET→POST-only, POST→GET-only, unsupported REST methods
36
36
  - ✅ **Enhanced Honeypot Protection** - POST validation & 4-minute page timeout with smart reload detection
37
+ - ✅ **HTTP Header Validation** - Comprehensive bot detection via header analysis and quality scoring
37
38
 
38
39
  ---
39
40
 
@@ -113,9 +114,52 @@ aiwaf/
113
114
  - **File‑Extension Probing Detection**
114
115
  Tracks repeated 404s on common extensions (e.g. `.php`, `.asp`) and blocks IPs.
115
116
 
117
+ - **🆕 HTTP Header Validation**
118
+ Advanced header analysis to detect bots and malicious requests:
119
+ - **Missing Required Headers** - Blocks requests without User-Agent or Accept headers
120
+ - **Suspicious User-Agents** - Detects curl, wget, python-requests, automated tools
121
+ - **Header Quality Scoring** - Calculates realism score based on browser-standard headers
122
+ - **Legitimate Bot Whitelist** - Allows Googlebot, Bingbot, and other search engines
123
+ - **Header Combination Analysis** - Detects impossible combinations (HTTP/2 + old browsers)
124
+ - **Static File Exemption** - Skips validation for CSS, JS, images
125
+
126
+ ## 🛡️ Header Validation Middleware Features
127
+
128
+ The **HeaderValidationMiddleware** provides advanced bot detection through HTTP header analysis:
129
+
130
+ ### **What it detects:**
131
+ - **Missing Headers**: Requests without standard browser headers
132
+ - **Suspicious User-Agents**: WordPress scanners, exploit tools, basic scrapers
133
+ - **Bot-like Patterns**: Low header diversity, missing Accept headers
134
+ - **Quality Scoring**: 0-11 point system based on header completeness
135
+
136
+ ### **What it allows:**
137
+ - **Legitimate Browsers**: Chrome, Firefox, Safari, Edge with full headers
138
+ - **Search Engine Bots**: Google, Bing, DuckDuckGo, Yandex crawlers
139
+ - **API Clients**: Properly identified with good headers
140
+ - **Static Files**: CSS, JS, images (automatically exempted)
141
+
142
+ ### **Real-world effectiveness:**
143
+ ```
144
+ ✅ Blocks: WordPress scanners, exploit bots, basic scrapers
145
+ ✅ Allows: Real browsers, legitimate bots, API clients
146
+ ✅ Quality Score: 10/11 = Legitimate, 2/11 = Suspicious bot
147
+ ```
148
+
149
+ ### **Testing header validation:**
150
+ ```bash
151
+ # Test with curl (will be blocked - low quality headers)
152
+ curl http://yoursite.com/
153
+
154
+ # Test with browser (will be allowed - high quality headers)
155
+ # Visit site normally in Chrome/Firefox
156
+
157
+ # Check logs for header validation blocks
158
+ python manage.py aiwaf_logging --recent
159
+ ```
160
+
116
161
  - **Enhanced Timing-Based Honeypot**
117
162
  Advanced GET→POST timing analysis with comprehensive HTTP method validation:
118
- - POST directly without a preceding GET request
119
163
  - Submit forms faster than `AIWAF_MIN_FORM_TIME` seconds (default: 1 second)
120
164
  - **🆕 Smart HTTP Method Validation** - Comprehensive protection against method misuse:
121
165
  - Blocks GET requests to POST-only views (form endpoints, API creates)
@@ -1,8 +1,8 @@
1
- aiwaf/__init__.py,sha256=VlFbI8uqJmi1V0hsKasasV1BFglekVX0R5jvEOwXGzE,220
1
+ aiwaf/__init__.py,sha256=Rnla6te9DNqQBP_HMEdhUdQdj9dd4ECcAr6F62Xs4-A,220
2
2
  aiwaf/apps.py,sha256=nCez-Ptlv2kaEk5HenA8b1pATz1VfhrHP1344gwcY1A,142
3
3
  aiwaf/blacklist_manager.py,sha256=LYCeKFB-7e_C6Bg2WeFJWFIIQlrfRMPuGp30ivrnhQY,1196
4
4
  aiwaf/decorators.py,sha256=IUKOdM_gdroffImRZep1g1wT6gNqD10zGwcp28hsJCs,825
5
- aiwaf/middleware.py,sha256=BnVdA4g2YTDo5g_H1Q8EE-ctVR4JF_yV3PaLHMgYZ-E,40804
5
+ aiwaf/middleware.py,sha256=lRxi8M22Fp1fdhCWQ6XesbxX54aijH3tdSvjLNroQdE,49197
6
6
  aiwaf/middleware_logger.py,sha256=LWZVDAnjh6CGESirA8eMbhGgJKB7lVDGRQqVroH95Lo,4742
7
7
  aiwaf/models.py,sha256=vQxgY19BDVMjoO903UNrTZC1pNoLltMU6wbyWPoAEns,2719
8
8
  aiwaf/storage.py,sha256=pUXE3bm7aRrABh_B6jTOBUQOYK67oQmHaR9EqyOasis,14038
@@ -29,8 +29,8 @@ aiwaf/management/commands/test_exemption_fix.py,sha256=ngyGaHUCmQQ6y--6j4q1viZJt
29
29
  aiwaf/resources/model.pkl,sha256=5t6h9BX8yoh2xct85MXOO60jdlWyg1APskUOW0jZE1Y,1288265
30
30
  aiwaf/templatetags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
31
  aiwaf/templatetags/aiwaf_tags.py,sha256=XXfb7Tl4DjU3Sc40GbqdaqOEtKTUKELBEk58u83wBNw,357
32
- aiwaf-0.1.9.3.1.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
33
- aiwaf-0.1.9.3.1.dist-info/METADATA,sha256=TQO1y9t5sRQY7zBNyJc6dvTfJ1JXC1vBTf1RjH8O8m0,29037
34
- aiwaf-0.1.9.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
35
- aiwaf-0.1.9.3.1.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
36
- aiwaf-0.1.9.3.1.dist-info/RECORD,,
32
+ aiwaf-0.1.9.3.3.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
33
+ aiwaf-0.1.9.3.3.dist-info/METADATA,sha256=GUXN2Lav1oOSfMnGTEd7ALU6-95yb7LJYbf4iZN-ukM,30989
34
+ aiwaf-0.1.9.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
35
+ aiwaf-0.1.9.3.3.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
36
+ aiwaf-0.1.9.3.3.dist-info/RECORD,,