aiwaf 0.1.9.3.2__py3-none-any.whl → 0.1.9.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiwaf might be problematic. Click here for more details.

aiwaf/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  default_app_config = "aiwaf.apps.AiwafConfig"
2
2
 
3
- __version__ = "0.1.9.3.2"
3
+ __version__ = "0.1.9.3.3"
4
4
 
5
5
  # Note: Middleware classes are available from aiwaf.middleware
6
6
  # Import them only when needed to avoid circular imports during Django app loading
aiwaf/middleware.py CHANGED
@@ -504,6 +504,51 @@ class AIAnomalyMiddleware(MiddlewareMixin):
504
504
 
505
505
  return any(malicious_indicators)
506
506
 
507
+ def _is_scanning_path(self, path):
508
+ """
509
+ Determine if a 404 path looks like automated scanning vs legitimate browsing.
510
+ Focus on common scanner patterns that indicate malicious intent.
511
+ """
512
+ path_lower = path.lower()
513
+
514
+ # Common scanning patterns that are clear indicators of malicious activity
515
+ scanning_patterns = [
516
+ # WordPress scanning
517
+ 'wp-admin', 'wp-content', 'wp-includes', 'wp-config', 'xmlrpc.php',
518
+
519
+ # Admin/config scanning
520
+ 'admin', 'phpmyadmin', 'adminer', 'config', 'configuration',
521
+ 'settings', 'setup', 'install', 'installer',
522
+
523
+ # Database/backup scanning
524
+ 'backup', 'database', 'db', 'mysql', 'sql', 'dump',
525
+
526
+ # System files scanning
527
+ '.env', '.git', '.htaccess', '.htpasswd', 'passwd', 'shadow',
528
+ 'robots.txt', 'sitemap.xml',
529
+
530
+ # Common vulnerabilities
531
+ 'cgi-bin', 'scripts', 'shell', 'cmd', 'exec',
532
+
533
+ # File extensions that shouldn't exist on most sites
534
+ '.php', '.asp', '.aspx', '.jsp', '.cgi', '.pl'
535
+ ]
536
+
537
+ # Check for scanning patterns
538
+ for pattern in scanning_patterns:
539
+ if pattern in path_lower:
540
+ return True
541
+
542
+ # Check for directory traversal attempts
543
+ if '../' in path or '..' in path:
544
+ return True
545
+
546
+ # Check for encoded attack patterns
547
+ if any(encoded in path for encoded in ['%2e%2e', '%252e', '%c0%ae']):
548
+ return True
549
+
550
+ return False
551
+
507
552
  def process_request(self, request):
508
553
  # First exemption check - early exit for exempt requests
509
554
  if is_exempt(request):
@@ -564,27 +609,27 @@ class AIAnomalyMiddleware(MiddlewareMixin):
564
609
  # Get recent behavior data for this IP to make intelligent blocking decision
565
610
  recent_data = [d for d in data if now - d[0] <= 300] # Last 5 minutes
566
611
 
612
+ # Always initialize variables before use
613
+ recent_kw_hits = []
614
+ recent_404s = 0
615
+ recent_burst_counts = []
616
+
567
617
  if recent_data:
568
- # Calculate behavior metrics similar to trainer.py
569
- recent_kw_hits = []
570
- recent_404s = 0
571
- recent_burst_counts = []
572
-
573
- for entry_time, entry_path, entry_status, entry_resp_time in recent_data:
574
- # Calculate keyword hits for this entry
575
- entry_known_path = path_exists_in_django(entry_path)
576
- entry_kw_hits = 0
577
- if not entry_known_path and not is_exempt_path(entry_path):
578
- entry_kw_hits = sum(1 for kw in STATIC_KW if kw in entry_path.lower())
579
- recent_kw_hits.append(entry_kw_hits)
580
-
581
- # Count 404s
582
- if entry_status == 404:
583
- recent_404s += 1
584
-
585
- # Calculate burst for this entry (requests within 10 seconds)
586
- entry_burst = sum(1 for (t, _, _, _) in recent_data if abs(entry_time - t) <= 10)
587
- recent_burst_counts.append(entry_burst)
618
+ for entry_time, entry_path, entry_status, entry_resp_time in recent_data:
619
+ # Calculate keyword hits for this entry
620
+ entry_known_path = path_exists_in_django(entry_path)
621
+ entry_kw_hits = 0
622
+ if not entry_known_path and not is_exempt_path(entry_path):
623
+ entry_kw_hits = sum(1 for kw in STATIC_KW if kw in entry_path.lower())
624
+ recent_kw_hits.append(entry_kw_hits)
625
+
626
+ # Count 404s
627
+ if entry_status == 404:
628
+ recent_404s += 1
629
+
630
+ # Calculate burst for this entry (requests within 10 seconds)
631
+ entry_burst = sum(1 for (t, _, _, _) in recent_data if abs(entry_time - t) <= 10)
632
+ recent_burst_counts.append(entry_burst)
588
633
 
589
634
  # Calculate averages and maximums
590
635
  avg_kw_hits = sum(recent_kw_hits) / len(recent_kw_hits) if recent_kw_hits else 0
@@ -592,28 +637,37 @@ class AIAnomalyMiddleware(MiddlewareMixin):
592
637
  avg_burst = sum(recent_burst_counts) / len(recent_burst_counts) if recent_burst_counts else 0
593
638
  total_requests = len(recent_data)
594
639
 
595
- # Don't block if it looks like legitimate behavior (same thresholds as trainer.py):
640
+ # Enhanced 404 analysis - focus on scanning patterns
641
+ scanning_404s = sum(1 for (_, path, status, _) in recent_data
642
+ if status == 404 and self._is_scanning_path(path))
643
+ legitimate_404s = max_404s - scanning_404s
644
+
645
+ # Don't block if it looks like legitimate behavior:
596
646
  if (
597
- avg_kw_hits < 2 and # Not hitting many malicious keywords
598
- max_404s < 10 and # Not excessive 404s
599
- avg_burst < 15 and # Not excessive burst activity
600
- total_requests < 100 # Not excessive total requests
647
+ avg_kw_hits < 3 and # Allow some keyword hits (increased from 2)
648
+ scanning_404s < 5 and # Focus on scanning 404s, not all 404s
649
+ legitimate_404s < 20 and # Allow more legitimate 404s (typos, old links)
650
+ avg_burst < 25 and # Allow higher burst (increased from 15)
651
+ total_requests < 150 # Allow more total requests (increased from 100)
601
652
  ):
602
653
  # Anomalous but looks legitimate - don't block
603
654
  pass
604
655
  else:
605
656
  # Double-check exemption before blocking
606
657
  if not exemption_store.is_exempted(ip):
607
- BlacklistManager.block(ip, f"AI anomaly + suspicious patterns (kw:{avg_kw_hits:.1f}, 404s:{max_404s}, burst:{avg_burst:.1f})")
658
+ BlacklistManager.block(ip, f"AI anomaly + scanning 404s (total:{max_404s}, scanning:{scanning_404s}, kw:{avg_kw_hits:.1f}, burst:{avg_burst:.1f})")
608
659
  # Check if actually blocked (exempted IPs won't be blocked)
609
660
  if BlacklistManager.is_blocked(ip):
610
661
  return JsonResponse({"error": "blocked"}, status=403)
611
662
  else:
612
- # No recent data to analyze - be more conservative, only block on very suspicious current request
613
- if kw_hits >= 2 or status_idx == STATUS_IDX.index("404"):
663
+ # No recent data to analyze - be more conservative
664
+ # Only block on multiple suspicious indicators, not single 404
665
+ current_scanning = self._is_scanning_path(request.path)
666
+
667
+ if kw_hits >= 3 and current_scanning: # Require both high keywords AND scanning pattern
614
668
  # Double-check exemption before blocking
615
669
  if not exemption_store.is_exempted(ip):
616
- BlacklistManager.block(ip, "AI anomaly + immediate suspicious behavior")
670
+ BlacklistManager.block(ip, f"AI anomaly + scanning behavior (kw:{kw_hits}, scanning_path:{request.path})")
617
671
  if BlacklistManager.is_blocked(ip):
618
672
  return JsonResponse({"error": "blocked"}, status=403)
619
673
 
@@ -644,10 +698,13 @@ class HoneypotTimingMiddleware(MiddlewareMixin):
644
698
  MAX_PAGE_TIME = getattr(settings, "AIWAF_MAX_PAGE_TIME", 240) # 4 minutes default
645
699
 
646
700
  def _view_accepts_method(self, request, method):
647
- """Check if the current view/URL pattern accepts the specified HTTP method"""
701
+ """
702
+ Check if the current view accepts the specified HTTP method.
703
+ Be very conservative - only block when we're absolutely certain.
704
+ Handle decorator issues by being permissive when detection fails.
705
+ """
648
706
  try:
649
707
  from django.urls import resolve
650
- from django.urls.resolvers import URLResolver, URLPattern
651
708
 
652
709
  # Resolve the current URL to get the view
653
710
  resolved = resolve(request.path)
@@ -657,12 +714,12 @@ class HoneypotTimingMiddleware(MiddlewareMixin):
657
714
  if hasattr(view_func, 'cls'):
658
715
  view_class = view_func.cls
659
716
 
660
- # Check http_method_names attribute (most reliable)
717
+ # Check http_method_names attribute (most reliable for CBVs)
661
718
  if hasattr(view_class, 'http_method_names'):
662
719
  allowed_methods = [m.upper() for m in view_class.http_method_names]
663
720
  return method.upper() in allowed_methods
664
721
 
665
- # Check for method-handling methods
722
+ # For CBVs without http_method_names, check for method handlers
666
723
  method_handlers = {
667
724
  'GET': ['get'],
668
725
  'POST': ['post', 'form_valid', 'form_invalid'],
@@ -674,76 +731,30 @@ class HoneypotTimingMiddleware(MiddlewareMixin):
674
731
  if method.upper() in method_handlers:
675
732
  handlers = method_handlers[method.upper()]
676
733
  has_handler = any(hasattr(view_class, handler) for handler in handlers)
677
- if has_handler:
678
- return True
679
-
680
- # If no handler found, check if it's a common method that should be rejected
681
- if method.upper() in ['GET', 'POST', 'PUT', 'DELETE', 'PATCH']:
682
- return False
734
+ return has_handler
683
735
 
684
- # Default: assume method is allowed for class-based views
736
+ # Default for CBVs: be permissive
685
737
  return True
686
738
 
687
- # Handle function-based views
739
+ # Handle function-based views (including decorated ones)
688
740
  else:
689
- # Check if view has explicit allowed methods
690
- if hasattr(view_func, 'http_method_names'):
691
- allowed_methods = [m.upper() for m in view_func.http_method_names]
692
- return method.upper() in allowed_methods
741
+ # Try to unwrap decorators to get the actual view function
742
+ actual_func = view_func
743
+ while hasattr(actual_func, '__wrapped__'):
744
+ actual_func = actual_func.__wrapped__
693
745
 
694
- # For function-based views, inspect the source code
695
- import inspect
696
- try:
697
- source = inspect.getsource(view_func)
698
- method_upper = method.upper()
699
-
700
- # Look for method handling in the source
701
- if f'request.method' in source and method_upper in source:
702
- return True
703
-
704
- # Look for method-specific patterns
705
- method_patterns = {
706
- 'GET': ['request.GET', 'GET'],
707
- 'POST': ['request.POST', 'POST', 'form.is_valid()'],
708
- 'PUT': ['PUT', 'request.PUT'],
709
- 'DELETE': ['DELETE', 'request.DELETE']
710
- }
711
-
712
- if method.upper() in method_patterns:
713
- patterns = method_patterns[method.upper()]
714
- if any(pattern in source for pattern in patterns):
715
- return True
716
-
717
- except (OSError, TypeError):
718
- # Can't get source, make educated guess
719
- pass
746
+ # Check if the actual function has explicit allowed methods
747
+ if hasattr(actual_func, 'http_method_names'):
748
+ allowed_methods = [m.upper() for m in actual_func.http_method_names]
749
+ return method.upper() in allowed_methods
720
750
 
721
- # Check URL pattern name for method-specific endpoints
722
- if resolved.url_name:
723
- url_name_lower = resolved.url_name.lower()
724
-
725
- # POST-only patterns
726
- post_only_patterns = ['create', 'submit', 'upload', 'process']
727
- # GET-only patterns
728
- get_only_patterns = ['list', 'detail', 'view', 'display']
729
-
730
- if method.upper() == 'POST':
731
- if any(pattern in url_name_lower for pattern in post_only_patterns):
732
- return True
733
- if any(pattern in url_name_lower for pattern in get_only_patterns):
734
- return False
735
- elif method.upper() == 'GET':
736
- if any(pattern in url_name_lower for pattern in get_only_patterns):
737
- return True
738
- if any(pattern in url_name_lower for pattern in post_only_patterns):
739
- return False
740
-
741
- # Default: assume function-based views accept common methods
742
- return method.upper() in ['GET', 'POST', 'HEAD', 'OPTIONS']
751
+ # For function-based views, be very conservative
752
+ # Most Django views accept both GET and POST, so default to allowing
753
+ return True
743
754
 
744
755
  except Exception as e:
745
- # If we can't determine, err on the side of caution and allow
746
- print(f"AIWAF: Could not determine {method} capability for {request.path}: {e}")
756
+ # If anything fails (decorators, imports, etc.), be permissive
757
+ # Better to allow a legitimate request than block it
747
758
  return True
748
759
 
749
760
  def process_request(self, request):
@@ -759,16 +770,25 @@ class HoneypotTimingMiddleware(MiddlewareMixin):
759
770
  return None
760
771
 
761
772
  if request.method == "GET":
762
- # ENHANCEMENT: Check if this view accepts GET requests
773
+ # CONSERVATIVE: Only block GET if we're absolutely certain it's POST-only
774
+ # Most Django views accept both GET and POST (forms show on GET, process on POST)
763
775
  if not self._view_accepts_method(request, 'GET'):
764
- # This view is POST-only, but received a GET - likely scanning/probing
765
- if not exemption_store.is_exempted(ip):
766
- BlacklistManager.block(ip, f"GET to POST-only view: {request.path}")
767
- if BlacklistManager.is_blocked(ip):
768
- return JsonResponse({
769
- "error": "blocked",
770
- "message": f"GET not allowed for {request.path}"
771
- }, status=405) # Method Not Allowed
776
+ # EXTRA CHECK: Only block if path looks like obvious POST-only API endpoint
777
+ path_lower = request.path.lower()
778
+ obvious_post_only = any(path_lower.endswith(pattern) for pattern in [
779
+ '/create/', '/submit/', '/upload/', '/delete/', '/process/'
780
+ ])
781
+
782
+ if obvious_post_only:
783
+ # This is very likely a POST-only endpoint getting a GET
784
+ if not exemption_store.is_exempted(ip):
785
+ BlacklistManager.block(ip, f"GET to obvious POST-only endpoint: {request.path}")
786
+ if BlacklistManager.is_blocked(ip):
787
+ return JsonResponse({
788
+ "error": "blocked",
789
+ "message": f"GET not allowed for {request.path}"
790
+ }, status=405) # Method Not Allowed
791
+ # Otherwise, don't block - could be a decorated view or complex form
772
792
 
773
793
  # Store timestamp for this IP's GET request
774
794
  # Use a general key for the IP, not path-specific
@@ -868,3 +888,270 @@ class UUIDTamperMiddleware(MiddlewareMixin):
868
888
  # Check if actually blocked (exempted IPs won't be blocked)
869
889
  if BlacklistManager.is_blocked(ip):
870
890
  return JsonResponse({"error": "blocked"}, status=403)
891
+
892
+
893
+ class HeaderValidationMiddleware(MiddlewareMixin):
894
+ """
895
+ Validates HTTP headers to detect bots and malicious requests
896
+ """
897
+
898
+ # Standard browser headers that legitimate requests should have
899
+ REQUIRED_HEADERS = [
900
+ 'HTTP_USER_AGENT',
901
+ 'HTTP_ACCEPT',
902
+ ]
903
+
904
+ # Headers that browsers typically send
905
+ BROWSER_HEADERS = [
906
+ 'HTTP_ACCEPT_LANGUAGE',
907
+ 'HTTP_ACCEPT_ENCODING',
908
+ 'HTTP_CONNECTION',
909
+ 'HTTP_CACHE_CONTROL',
910
+ ]
911
+
912
+ # Suspicious User-Agent patterns
913
+ SUSPICIOUS_USER_AGENTS = [
914
+ r'bot',
915
+ r'crawler',
916
+ r'spider',
917
+ r'scraper',
918
+ r'curl',
919
+ r'wget',
920
+ r'python',
921
+ r'java',
922
+ r'node',
923
+ r'go-http',
924
+ r'axios',
925
+ r'okhttp',
926
+ r'libwww',
927
+ r'lwp-trivial',
928
+ r'mechanize',
929
+ r'requests',
930
+ r'urllib',
931
+ r'httpie',
932
+ r'postman',
933
+ r'insomnia',
934
+ r'^$', # Empty user agent
935
+ r'mozilla/4\.0$', # Fake old browser
936
+ r'mozilla/5\.0$', # Incomplete mozilla string
937
+ ]
938
+
939
+ # Known legitimate bot user agents to whitelist
940
+ LEGITIMATE_BOTS = [
941
+ r'googlebot',
942
+ r'bingbot',
943
+ r'slurp', # Yahoo
944
+ r'duckduckbot',
945
+ r'baiduspider',
946
+ r'yandexbot',
947
+ r'facebookexternalhit',
948
+ r'twitterbot',
949
+ r'linkedinbot',
950
+ r'whatsapp',
951
+ r'telegrambot',
952
+ r'applebot',
953
+ r'pingdom',
954
+ r'uptimerobot',
955
+ r'statuscake',
956
+ r'site24x7',
957
+ ]
958
+
959
+ # Suspicious header combinations
960
+ SUSPICIOUS_COMBINATIONS = [
961
+ # High version HTTP with old user agent
962
+ {
963
+ 'condition': lambda headers: (
964
+ headers.get('SERVER_PROTOCOL', '').startswith('HTTP/2') and
965
+ 'mozilla/4.0' in headers.get('HTTP_USER_AGENT', '').lower()
966
+ ),
967
+ 'reason': 'HTTP/2 with old browser user agent'
968
+ },
969
+ # No Accept header but has User-Agent
970
+ {
971
+ 'condition': lambda headers: (
972
+ headers.get('HTTP_USER_AGENT') and
973
+ not headers.get('HTTP_ACCEPT')
974
+ ),
975
+ 'reason': 'User-Agent present but no Accept header'
976
+ },
977
+ # Accept */* only (very generic)
978
+ {
979
+ 'condition': lambda headers: (
980
+ headers.get('HTTP_ACCEPT') == '*/*' and
981
+ not any(h in headers for h in ['HTTP_ACCEPT_LANGUAGE', 'HTTP_ACCEPT_ENCODING'])
982
+ ),
983
+ 'reason': 'Generic Accept header without language/encoding'
984
+ },
985
+ # No browser-standard headers at all
986
+ {
987
+ 'condition': lambda headers: (
988
+ headers.get('HTTP_USER_AGENT') and
989
+ not any(headers.get(h) for h in ['HTTP_ACCEPT_LANGUAGE', 'HTTP_ACCEPT_ENCODING', 'HTTP_CONNECTION'])
990
+ ),
991
+ 'reason': 'Missing all browser-standard headers'
992
+ },
993
+ # Suspicious HTTP version patterns
994
+ {
995
+ 'condition': lambda headers: (
996
+ 'HTTP_USER_AGENT' in headers and
997
+ headers.get('SERVER_PROTOCOL') == 'HTTP/1.0' and
998
+ 'chrome' in headers.get('HTTP_USER_AGENT', '').lower()
999
+ ),
1000
+ 'reason': 'Modern browser with HTTP/1.0'
1001
+ }
1002
+ ]
1003
+
1004
+ def process_request(self, request):
1005
+ # Skip if request is exempted
1006
+ if is_exempt(request):
1007
+ return None
1008
+
1009
+ ip = get_ip(request)
1010
+
1011
+ # Check IP-level exemption
1012
+ from .storage import get_exemption_store
1013
+ exemption_store = get_exemption_store()
1014
+ if exemption_store.is_exempted(ip):
1015
+ return None
1016
+
1017
+ # Skip for static files and common paths
1018
+ if self._is_static_request(request):
1019
+ return None
1020
+
1021
+ # Get headers from request.META
1022
+ headers = request.META
1023
+
1024
+ # Check for missing required headers
1025
+ missing_headers = self._check_missing_headers(headers)
1026
+ if missing_headers:
1027
+ return self._block_request(ip, f"Missing required headers: {', '.join(missing_headers)}", request.path)
1028
+
1029
+ # Check for suspicious user agent
1030
+ suspicious_ua = self._check_user_agent(headers.get('HTTP_USER_AGENT', ''))
1031
+ if suspicious_ua:
1032
+ return self._block_request(ip, f"Suspicious user agent: {suspicious_ua}", request.path)
1033
+
1034
+ # Check for suspicious header combinations
1035
+ suspicious_combo = self._check_header_combinations(headers)
1036
+ if suspicious_combo:
1037
+ return self._block_request(ip, f"Suspicious headers: {suspicious_combo}", request.path)
1038
+
1039
+ # Check header quality score
1040
+ quality_score = self._calculate_header_quality(headers)
1041
+ if quality_score < 3: # Threshold for suspicion
1042
+ return self._block_request(ip, f"Low header quality score: {quality_score}", request.path)
1043
+
1044
+ return None
1045
+
1046
+ def _is_static_request(self, request):
1047
+ """Check if this is a request for static files"""
1048
+ static_extensions = ['.css', '.js', '.png', '.jpg', '.jpeg', '.gif', '.ico', '.svg', '.woff', '.woff2', '.ttf']
1049
+ path = request.path.lower()
1050
+
1051
+ # Check file extensions
1052
+ if any(path.endswith(ext) for ext in static_extensions):
1053
+ return True
1054
+
1055
+ # Check static paths
1056
+ static_paths = ['/static/', '/media/', '/assets/', '/favicon.ico']
1057
+ if any(path.startswith(static_path) for static_path in static_paths):
1058
+ return True
1059
+
1060
+ return False
1061
+
1062
+ def _check_missing_headers(self, headers):
1063
+ """Check for missing required headers"""
1064
+ missing = []
1065
+
1066
+ for header in self.REQUIRED_HEADERS:
1067
+ if not headers.get(header):
1068
+ missing.append(header.replace('HTTP_', '').replace('_', '-').lower())
1069
+
1070
+ return missing
1071
+
1072
+ def _check_user_agent(self, user_agent):
1073
+ """Check if user agent is suspicious"""
1074
+ if not user_agent:
1075
+ return "Empty user agent"
1076
+
1077
+ user_agent_lower = user_agent.lower()
1078
+
1079
+ # Check if it's a legitimate bot first
1080
+ for legitimate_pattern in self.LEGITIMATE_BOTS:
1081
+ if re.search(legitimate_pattern, user_agent_lower):
1082
+ return None # Allow legitimate bots
1083
+
1084
+ # Check for suspicious patterns
1085
+ for suspicious_pattern in self.SUSPICIOUS_USER_AGENTS:
1086
+ if re.search(suspicious_pattern, user_agent_lower, re.IGNORECASE):
1087
+ return f"Pattern: {suspicious_pattern}"
1088
+
1089
+ # Check for very short user agents (likely fake)
1090
+ if len(user_agent) < 10:
1091
+ return "Too short"
1092
+
1093
+ # Check for very long user agents (possibly malicious)
1094
+ if len(user_agent) > 500:
1095
+ return "Too long"
1096
+
1097
+ return None
1098
+
1099
+ def _check_header_combinations(self, headers):
1100
+ """Check for suspicious header combinations"""
1101
+ for combo in self.SUSPICIOUS_COMBINATIONS:
1102
+ try:
1103
+ if combo['condition'](headers):
1104
+ return combo['reason']
1105
+ except Exception:
1106
+ # If condition check fails, skip it
1107
+ continue
1108
+
1109
+ return None
1110
+
1111
+ def _calculate_header_quality(self, headers):
1112
+ """Calculate a quality score based on header completeness"""
1113
+ score = 0
1114
+
1115
+ # Basic required headers (2 points each)
1116
+ if headers.get('HTTP_USER_AGENT'):
1117
+ score += 2
1118
+ if headers.get('HTTP_ACCEPT'):
1119
+ score += 2
1120
+
1121
+ # Browser-standard headers (1 point each)
1122
+ for header in self.BROWSER_HEADERS:
1123
+ if headers.get(header):
1124
+ score += 1
1125
+
1126
+ # Bonus points for realistic combinations
1127
+ if headers.get('HTTP_ACCEPT_LANGUAGE') and headers.get('HTTP_ACCEPT_ENCODING'):
1128
+ score += 1
1129
+
1130
+ if headers.get('HTTP_CONNECTION') == 'keep-alive':
1131
+ score += 1
1132
+
1133
+ # Check for realistic Accept header
1134
+ accept = headers.get('HTTP_ACCEPT', '')
1135
+ if 'text/html' in accept and 'application/xml' in accept:
1136
+ score += 1
1137
+
1138
+ return score
1139
+
1140
+ def _block_request(self, ip, reason, path):
1141
+ """Block the request and return error response"""
1142
+ from .storage import get_exemption_store
1143
+ exemption_store = get_exemption_store()
1144
+
1145
+ # Double-check exemption before blocking
1146
+ if not exemption_store.is_exempted(ip):
1147
+ BlacklistManager.block(ip, f"Header validation: {reason}")
1148
+
1149
+ # Check if actually blocked (exempted IPs won't be blocked)
1150
+ if BlacklistManager.is_blocked(ip):
1151
+ return JsonResponse({
1152
+ "error": "blocked",
1153
+ "message": "Request blocked due to suspicious headers",
1154
+ "path": path
1155
+ }, status=403)
1156
+
1157
+ return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aiwaf
3
- Version: 0.1.9.3.2
3
+ Version: 0.1.9.3.4
4
4
  Summary: AI-powered Web Application Firewall
5
5
  Home-page: https://github.com/aayushgauba/aiwaf
6
6
  Author: Aayush Gauba
@@ -34,6 +34,7 @@ Dynamic: requires-python
34
34
  - ✅ **Enhanced Configuration** - `AIWAF_ALLOWED_PATH_KEYWORDS` and `AIWAF_EXEMPT_KEYWORDS`
35
35
  - ✅ **Comprehensive HTTP Method Validation** - Blocks GET→POST-only, POST→GET-only, unsupported REST methods
36
36
  - ✅ **Enhanced Honeypot Protection** - POST validation & 4-minute page timeout with smart reload detection
37
+ - ✅ **HTTP Header Validation** - Comprehensive bot detection via header analysis and quality scoring
37
38
 
38
39
  ---
39
40
 
@@ -113,6 +114,50 @@ aiwaf/
113
114
  - **File‑Extension Probing Detection**
114
115
  Tracks repeated 404s on common extensions (e.g. `.php`, `.asp`) and blocks IPs.
115
116
 
117
+ - **🆕 HTTP Header Validation**
118
+ Advanced header analysis to detect bots and malicious requests:
119
+ - **Missing Required Headers** - Blocks requests without User-Agent or Accept headers
120
+ - **Suspicious User-Agents** - Detects curl, wget, python-requests, automated tools
121
+ - **Header Quality Scoring** - Calculates realism score based on browser-standard headers
122
+ - **Legitimate Bot Whitelist** - Allows Googlebot, Bingbot, and other search engines
123
+ - **Header Combination Analysis** - Detects impossible combinations (HTTP/2 + old browsers)
124
+ - **Static File Exemption** - Skips validation for CSS, JS, images
125
+
126
+ ## 🛡️ Header Validation Middleware Features
127
+
128
+ The **HeaderValidationMiddleware** provides advanced bot detection through HTTP header analysis:
129
+
130
+ ### **What it detects:**
131
+ - **Missing Headers**: Requests without standard browser headers
132
+ - **Suspicious User-Agents**: WordPress scanners, exploit tools, basic scrapers
133
+ - **Bot-like Patterns**: Low header diversity, missing Accept headers
134
+ - **Quality Scoring**: 0-11 point system based on header completeness
135
+
136
+ ### **What it allows:**
137
+ - **Legitimate Browsers**: Chrome, Firefox, Safari, Edge with full headers
138
+ - **Search Engine Bots**: Google, Bing, DuckDuckGo, Yandex crawlers
139
+ - **API Clients**: Properly identified with good headers
140
+ - **Static Files**: CSS, JS, images (automatically exempted)
141
+
142
+ ### **Real-world effectiveness:**
143
+ ```
144
+ ✅ Blocks: WordPress scanners, exploit bots, basic scrapers
145
+ ✅ Allows: Real browsers, legitimate bots, API clients
146
+ ✅ Quality Score: 10/11 = Legitimate, 2/11 = Suspicious bot
147
+ ```
148
+
149
+ ### **Testing header validation:**
150
+ ```bash
151
+ # Test with curl (will be blocked - low quality headers)
152
+ curl http://yoursite.com/
153
+
154
+ # Test with browser (will be allowed - high quality headers)
155
+ # Visit site normally in Chrome/Firefox
156
+
157
+ # Check logs for header validation blocks
158
+ python manage.py aiwaf_logging --recent
159
+ ```
160
+
116
161
  - **Enhanced Timing-Based Honeypot**
117
162
  Advanced GET→POST timing analysis with comprehensive HTTP method validation:
118
163
  - Submit forms faster than `AIWAF_MIN_FORM_TIME` seconds (default: 1 second)
@@ -859,7 +904,3 @@ This project is licensed under the **MIT License**. See the [LICENSE](LICENSE) f
859
904
 
860
905
  ---
861
906
 
862
- ## Credits
863
-
864
- **AI‑WAF** by [Aayush Gauba](https://github.com/aayushgauba)
865
- > "Let your firewall learn and evolve — keep your site a fortress." your Django `INSTALLED_APPS` to avoid setup errors.
@@ -1,8 +1,8 @@
1
- aiwaf/__init__.py,sha256=fq7wKNHdppvinnY5O4ZO5Tuh4nMAb55g0UzRWT5OMDY,220
1
+ aiwaf/__init__.py,sha256=Rnla6te9DNqQBP_HMEdhUdQdj9dd4ECcAr6F62Xs4-A,220
2
2
  aiwaf/apps.py,sha256=nCez-Ptlv2kaEk5HenA8b1pATz1VfhrHP1344gwcY1A,142
3
3
  aiwaf/blacklist_manager.py,sha256=LYCeKFB-7e_C6Bg2WeFJWFIIQlrfRMPuGp30ivrnhQY,1196
4
4
  aiwaf/decorators.py,sha256=IUKOdM_gdroffImRZep1g1wT6gNqD10zGwcp28hsJCs,825
5
- aiwaf/middleware.py,sha256=yvnJyMCBPoWZX4MMi5q6bg77HnJyusRPxEyKPb5sRDE,40032
5
+ aiwaf/middleware.py,sha256=_Erl9GGf1nrfywfghX1NU4CTuveugDlyTgP3sxu6h_A,49928
6
6
  aiwaf/middleware_logger.py,sha256=LWZVDAnjh6CGESirA8eMbhGgJKB7lVDGRQqVroH95Lo,4742
7
7
  aiwaf/models.py,sha256=vQxgY19BDVMjoO903UNrTZC1pNoLltMU6wbyWPoAEns,2719
8
8
  aiwaf/storage.py,sha256=pUXE3bm7aRrABh_B6jTOBUQOYK67oQmHaR9EqyOasis,14038
@@ -29,8 +29,8 @@ aiwaf/management/commands/test_exemption_fix.py,sha256=ngyGaHUCmQQ6y--6j4q1viZJt
29
29
  aiwaf/resources/model.pkl,sha256=5t6h9BX8yoh2xct85MXOO60jdlWyg1APskUOW0jZE1Y,1288265
30
30
  aiwaf/templatetags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
31
  aiwaf/templatetags/aiwaf_tags.py,sha256=XXfb7Tl4DjU3Sc40GbqdaqOEtKTUKELBEk58u83wBNw,357
32
- aiwaf-0.1.9.3.2.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
33
- aiwaf-0.1.9.3.2.dist-info/METADATA,sha256=9RO4jqkSoRP3p-xZN4Zsofbwg8GEG2LlfOVMqMWhYrQ,28987
34
- aiwaf-0.1.9.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
35
- aiwaf-0.1.9.3.2.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
36
- aiwaf-0.1.9.3.2.dist-info/RECORD,,
32
+ aiwaf-0.1.9.3.4.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
33
+ aiwaf-0.1.9.3.4.dist-info/METADATA,sha256=bgaJr_xz1U7y_wXrB0xkgXn_LPJknN_9FeTN5Bahe3c,30790
34
+ aiwaf-0.1.9.3.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
35
+ aiwaf-0.1.9.3.4.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
36
+ aiwaf-0.1.9.3.4.dist-info/RECORD,,