aiwaf 0.1.9.3.2__py3-none-any.whl → 0.1.9.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiwaf might be problematic. Click here for more details.
- aiwaf/__init__.py +1 -1
- aiwaf/middleware.py +391 -104
- {aiwaf-0.1.9.3.2.dist-info → aiwaf-0.1.9.3.4.dist-info}/METADATA +46 -5
- {aiwaf-0.1.9.3.2.dist-info → aiwaf-0.1.9.3.4.dist-info}/RECORD +7 -7
- {aiwaf-0.1.9.3.2.dist-info → aiwaf-0.1.9.3.4.dist-info}/WHEEL +0 -0
- {aiwaf-0.1.9.3.2.dist-info → aiwaf-0.1.9.3.4.dist-info}/licenses/LICENSE +0 -0
- {aiwaf-0.1.9.3.2.dist-info → aiwaf-0.1.9.3.4.dist-info}/top_level.txt +0 -0
aiwaf/__init__.py
CHANGED
aiwaf/middleware.py
CHANGED
|
@@ -504,6 +504,51 @@ class AIAnomalyMiddleware(MiddlewareMixin):
|
|
|
504
504
|
|
|
505
505
|
return any(malicious_indicators)
|
|
506
506
|
|
|
507
|
+
def _is_scanning_path(self, path):
|
|
508
|
+
"""
|
|
509
|
+
Determine if a 404 path looks like automated scanning vs legitimate browsing.
|
|
510
|
+
Focus on common scanner patterns that indicate malicious intent.
|
|
511
|
+
"""
|
|
512
|
+
path_lower = path.lower()
|
|
513
|
+
|
|
514
|
+
# Common scanning patterns that are clear indicators of malicious activity
|
|
515
|
+
scanning_patterns = [
|
|
516
|
+
# WordPress scanning
|
|
517
|
+
'wp-admin', 'wp-content', 'wp-includes', 'wp-config', 'xmlrpc.php',
|
|
518
|
+
|
|
519
|
+
# Admin/config scanning
|
|
520
|
+
'admin', 'phpmyadmin', 'adminer', 'config', 'configuration',
|
|
521
|
+
'settings', 'setup', 'install', 'installer',
|
|
522
|
+
|
|
523
|
+
# Database/backup scanning
|
|
524
|
+
'backup', 'database', 'db', 'mysql', 'sql', 'dump',
|
|
525
|
+
|
|
526
|
+
# System files scanning
|
|
527
|
+
'.env', '.git', '.htaccess', '.htpasswd', 'passwd', 'shadow',
|
|
528
|
+
'robots.txt', 'sitemap.xml',
|
|
529
|
+
|
|
530
|
+
# Common vulnerabilities
|
|
531
|
+
'cgi-bin', 'scripts', 'shell', 'cmd', 'exec',
|
|
532
|
+
|
|
533
|
+
# File extensions that shouldn't exist on most sites
|
|
534
|
+
'.php', '.asp', '.aspx', '.jsp', '.cgi', '.pl'
|
|
535
|
+
]
|
|
536
|
+
|
|
537
|
+
# Check for scanning patterns
|
|
538
|
+
for pattern in scanning_patterns:
|
|
539
|
+
if pattern in path_lower:
|
|
540
|
+
return True
|
|
541
|
+
|
|
542
|
+
# Check for directory traversal attempts
|
|
543
|
+
if '../' in path or '..' in path:
|
|
544
|
+
return True
|
|
545
|
+
|
|
546
|
+
# Check for encoded attack patterns
|
|
547
|
+
if any(encoded in path for encoded in ['%2e%2e', '%252e', '%c0%ae']):
|
|
548
|
+
return True
|
|
549
|
+
|
|
550
|
+
return False
|
|
551
|
+
|
|
507
552
|
def process_request(self, request):
|
|
508
553
|
# First exemption check - early exit for exempt requests
|
|
509
554
|
if is_exempt(request):
|
|
@@ -564,27 +609,27 @@ class AIAnomalyMiddleware(MiddlewareMixin):
|
|
|
564
609
|
# Get recent behavior data for this IP to make intelligent blocking decision
|
|
565
610
|
recent_data = [d for d in data if now - d[0] <= 300] # Last 5 minutes
|
|
566
611
|
|
|
612
|
+
# Always initialize variables before use
|
|
613
|
+
recent_kw_hits = []
|
|
614
|
+
recent_404s = 0
|
|
615
|
+
recent_burst_counts = []
|
|
616
|
+
|
|
567
617
|
if recent_data:
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
recent_404s += 1
|
|
584
|
-
|
|
585
|
-
# Calculate burst for this entry (requests within 10 seconds)
|
|
586
|
-
entry_burst = sum(1 for (t, _, _, _) in recent_data if abs(entry_time - t) <= 10)
|
|
587
|
-
recent_burst_counts.append(entry_burst)
|
|
618
|
+
for entry_time, entry_path, entry_status, entry_resp_time in recent_data:
|
|
619
|
+
# Calculate keyword hits for this entry
|
|
620
|
+
entry_known_path = path_exists_in_django(entry_path)
|
|
621
|
+
entry_kw_hits = 0
|
|
622
|
+
if not entry_known_path and not is_exempt_path(entry_path):
|
|
623
|
+
entry_kw_hits = sum(1 for kw in STATIC_KW if kw in entry_path.lower())
|
|
624
|
+
recent_kw_hits.append(entry_kw_hits)
|
|
625
|
+
|
|
626
|
+
# Count 404s
|
|
627
|
+
if entry_status == 404:
|
|
628
|
+
recent_404s += 1
|
|
629
|
+
|
|
630
|
+
# Calculate burst for this entry (requests within 10 seconds)
|
|
631
|
+
entry_burst = sum(1 for (t, _, _, _) in recent_data if abs(entry_time - t) <= 10)
|
|
632
|
+
recent_burst_counts.append(entry_burst)
|
|
588
633
|
|
|
589
634
|
# Calculate averages and maximums
|
|
590
635
|
avg_kw_hits = sum(recent_kw_hits) / len(recent_kw_hits) if recent_kw_hits else 0
|
|
@@ -592,28 +637,37 @@ class AIAnomalyMiddleware(MiddlewareMixin):
|
|
|
592
637
|
avg_burst = sum(recent_burst_counts) / len(recent_burst_counts) if recent_burst_counts else 0
|
|
593
638
|
total_requests = len(recent_data)
|
|
594
639
|
|
|
595
|
-
#
|
|
640
|
+
# Enhanced 404 analysis - focus on scanning patterns
|
|
641
|
+
scanning_404s = sum(1 for (_, path, status, _) in recent_data
|
|
642
|
+
if status == 404 and self._is_scanning_path(path))
|
|
643
|
+
legitimate_404s = max_404s - scanning_404s
|
|
644
|
+
|
|
645
|
+
# Don't block if it looks like legitimate behavior:
|
|
596
646
|
if (
|
|
597
|
-
avg_kw_hits <
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
647
|
+
avg_kw_hits < 3 and # Allow some keyword hits (increased from 2)
|
|
648
|
+
scanning_404s < 5 and # Focus on scanning 404s, not all 404s
|
|
649
|
+
legitimate_404s < 20 and # Allow more legitimate 404s (typos, old links)
|
|
650
|
+
avg_burst < 25 and # Allow higher burst (increased from 15)
|
|
651
|
+
total_requests < 150 # Allow more total requests (increased from 100)
|
|
601
652
|
):
|
|
602
653
|
# Anomalous but looks legitimate - don't block
|
|
603
654
|
pass
|
|
604
655
|
else:
|
|
605
656
|
# Double-check exemption before blocking
|
|
606
657
|
if not exemption_store.is_exempted(ip):
|
|
607
|
-
BlacklistManager.block(ip, f"AI anomaly +
|
|
658
|
+
BlacklistManager.block(ip, f"AI anomaly + scanning 404s (total:{max_404s}, scanning:{scanning_404s}, kw:{avg_kw_hits:.1f}, burst:{avg_burst:.1f})")
|
|
608
659
|
# Check if actually blocked (exempted IPs won't be blocked)
|
|
609
660
|
if BlacklistManager.is_blocked(ip):
|
|
610
661
|
return JsonResponse({"error": "blocked"}, status=403)
|
|
611
662
|
else:
|
|
612
|
-
# No recent data to analyze - be more conservative
|
|
613
|
-
|
|
663
|
+
# No recent data to analyze - be more conservative
|
|
664
|
+
# Only block on multiple suspicious indicators, not single 404
|
|
665
|
+
current_scanning = self._is_scanning_path(request.path)
|
|
666
|
+
|
|
667
|
+
if kw_hits >= 3 and current_scanning: # Require both high keywords AND scanning pattern
|
|
614
668
|
# Double-check exemption before blocking
|
|
615
669
|
if not exemption_store.is_exempted(ip):
|
|
616
|
-
BlacklistManager.block(ip, "AI anomaly +
|
|
670
|
+
BlacklistManager.block(ip, f"AI anomaly + scanning behavior (kw:{kw_hits}, scanning_path:{request.path})")
|
|
617
671
|
if BlacklistManager.is_blocked(ip):
|
|
618
672
|
return JsonResponse({"error": "blocked"}, status=403)
|
|
619
673
|
|
|
@@ -644,10 +698,13 @@ class HoneypotTimingMiddleware(MiddlewareMixin):
|
|
|
644
698
|
MAX_PAGE_TIME = getattr(settings, "AIWAF_MAX_PAGE_TIME", 240) # 4 minutes default
|
|
645
699
|
|
|
646
700
|
def _view_accepts_method(self, request, method):
|
|
647
|
-
"""
|
|
701
|
+
"""
|
|
702
|
+
Check if the current view accepts the specified HTTP method.
|
|
703
|
+
Be very conservative - only block when we're absolutely certain.
|
|
704
|
+
Handle decorator issues by being permissive when detection fails.
|
|
705
|
+
"""
|
|
648
706
|
try:
|
|
649
707
|
from django.urls import resolve
|
|
650
|
-
from django.urls.resolvers import URLResolver, URLPattern
|
|
651
708
|
|
|
652
709
|
# Resolve the current URL to get the view
|
|
653
710
|
resolved = resolve(request.path)
|
|
@@ -657,12 +714,12 @@ class HoneypotTimingMiddleware(MiddlewareMixin):
|
|
|
657
714
|
if hasattr(view_func, 'cls'):
|
|
658
715
|
view_class = view_func.cls
|
|
659
716
|
|
|
660
|
-
# Check http_method_names attribute (most reliable)
|
|
717
|
+
# Check http_method_names attribute (most reliable for CBVs)
|
|
661
718
|
if hasattr(view_class, 'http_method_names'):
|
|
662
719
|
allowed_methods = [m.upper() for m in view_class.http_method_names]
|
|
663
720
|
return method.upper() in allowed_methods
|
|
664
721
|
|
|
665
|
-
#
|
|
722
|
+
# For CBVs without http_method_names, check for method handlers
|
|
666
723
|
method_handlers = {
|
|
667
724
|
'GET': ['get'],
|
|
668
725
|
'POST': ['post', 'form_valid', 'form_invalid'],
|
|
@@ -674,76 +731,30 @@ class HoneypotTimingMiddleware(MiddlewareMixin):
|
|
|
674
731
|
if method.upper() in method_handlers:
|
|
675
732
|
handlers = method_handlers[method.upper()]
|
|
676
733
|
has_handler = any(hasattr(view_class, handler) for handler in handlers)
|
|
677
|
-
|
|
678
|
-
return True
|
|
679
|
-
|
|
680
|
-
# If no handler found, check if it's a common method that should be rejected
|
|
681
|
-
if method.upper() in ['GET', 'POST', 'PUT', 'DELETE', 'PATCH']:
|
|
682
|
-
return False
|
|
734
|
+
return has_handler
|
|
683
735
|
|
|
684
|
-
# Default
|
|
736
|
+
# Default for CBVs: be permissive
|
|
685
737
|
return True
|
|
686
738
|
|
|
687
|
-
# Handle function-based views
|
|
739
|
+
# Handle function-based views (including decorated ones)
|
|
688
740
|
else:
|
|
689
|
-
#
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
741
|
+
# Try to unwrap decorators to get the actual view function
|
|
742
|
+
actual_func = view_func
|
|
743
|
+
while hasattr(actual_func, '__wrapped__'):
|
|
744
|
+
actual_func = actual_func.__wrapped__
|
|
693
745
|
|
|
694
|
-
#
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
method_upper = method.upper()
|
|
699
|
-
|
|
700
|
-
# Look for method handling in the source
|
|
701
|
-
if f'request.method' in source and method_upper in source:
|
|
702
|
-
return True
|
|
703
|
-
|
|
704
|
-
# Look for method-specific patterns
|
|
705
|
-
method_patterns = {
|
|
706
|
-
'GET': ['request.GET', 'GET'],
|
|
707
|
-
'POST': ['request.POST', 'POST', 'form.is_valid()'],
|
|
708
|
-
'PUT': ['PUT', 'request.PUT'],
|
|
709
|
-
'DELETE': ['DELETE', 'request.DELETE']
|
|
710
|
-
}
|
|
711
|
-
|
|
712
|
-
if method.upper() in method_patterns:
|
|
713
|
-
patterns = method_patterns[method.upper()]
|
|
714
|
-
if any(pattern in source for pattern in patterns):
|
|
715
|
-
return True
|
|
716
|
-
|
|
717
|
-
except (OSError, TypeError):
|
|
718
|
-
# Can't get source, make educated guess
|
|
719
|
-
pass
|
|
746
|
+
# Check if the actual function has explicit allowed methods
|
|
747
|
+
if hasattr(actual_func, 'http_method_names'):
|
|
748
|
+
allowed_methods = [m.upper() for m in actual_func.http_method_names]
|
|
749
|
+
return method.upper() in allowed_methods
|
|
720
750
|
|
|
721
|
-
#
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
# POST-only patterns
|
|
726
|
-
post_only_patterns = ['create', 'submit', 'upload', 'process']
|
|
727
|
-
# GET-only patterns
|
|
728
|
-
get_only_patterns = ['list', 'detail', 'view', 'display']
|
|
729
|
-
|
|
730
|
-
if method.upper() == 'POST':
|
|
731
|
-
if any(pattern in url_name_lower for pattern in post_only_patterns):
|
|
732
|
-
return True
|
|
733
|
-
if any(pattern in url_name_lower for pattern in get_only_patterns):
|
|
734
|
-
return False
|
|
735
|
-
elif method.upper() == 'GET':
|
|
736
|
-
if any(pattern in url_name_lower for pattern in get_only_patterns):
|
|
737
|
-
return True
|
|
738
|
-
if any(pattern in url_name_lower for pattern in post_only_patterns):
|
|
739
|
-
return False
|
|
740
|
-
|
|
741
|
-
# Default: assume function-based views accept common methods
|
|
742
|
-
return method.upper() in ['GET', 'POST', 'HEAD', 'OPTIONS']
|
|
751
|
+
# For function-based views, be very conservative
|
|
752
|
+
# Most Django views accept both GET and POST, so default to allowing
|
|
753
|
+
return True
|
|
743
754
|
|
|
744
755
|
except Exception as e:
|
|
745
|
-
# If
|
|
746
|
-
|
|
756
|
+
# If anything fails (decorators, imports, etc.), be permissive
|
|
757
|
+
# Better to allow a legitimate request than block it
|
|
747
758
|
return True
|
|
748
759
|
|
|
749
760
|
def process_request(self, request):
|
|
@@ -759,16 +770,25 @@ class HoneypotTimingMiddleware(MiddlewareMixin):
|
|
|
759
770
|
return None
|
|
760
771
|
|
|
761
772
|
if request.method == "GET":
|
|
762
|
-
#
|
|
773
|
+
# CONSERVATIVE: Only block GET if we're absolutely certain it's POST-only
|
|
774
|
+
# Most Django views accept both GET and POST (forms show on GET, process on POST)
|
|
763
775
|
if not self._view_accepts_method(request, 'GET'):
|
|
764
|
-
#
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
776
|
+
# EXTRA CHECK: Only block if path looks like obvious POST-only API endpoint
|
|
777
|
+
path_lower = request.path.lower()
|
|
778
|
+
obvious_post_only = any(path_lower.endswith(pattern) for pattern in [
|
|
779
|
+
'/create/', '/submit/', '/upload/', '/delete/', '/process/'
|
|
780
|
+
])
|
|
781
|
+
|
|
782
|
+
if obvious_post_only:
|
|
783
|
+
# This is very likely a POST-only endpoint getting a GET
|
|
784
|
+
if not exemption_store.is_exempted(ip):
|
|
785
|
+
BlacklistManager.block(ip, f"GET to obvious POST-only endpoint: {request.path}")
|
|
786
|
+
if BlacklistManager.is_blocked(ip):
|
|
787
|
+
return JsonResponse({
|
|
788
|
+
"error": "blocked",
|
|
789
|
+
"message": f"GET not allowed for {request.path}"
|
|
790
|
+
}, status=405) # Method Not Allowed
|
|
791
|
+
# Otherwise, don't block - could be a decorated view or complex form
|
|
772
792
|
|
|
773
793
|
# Store timestamp for this IP's GET request
|
|
774
794
|
# Use a general key for the IP, not path-specific
|
|
@@ -868,3 +888,270 @@ class UUIDTamperMiddleware(MiddlewareMixin):
|
|
|
868
888
|
# Check if actually blocked (exempted IPs won't be blocked)
|
|
869
889
|
if BlacklistManager.is_blocked(ip):
|
|
870
890
|
return JsonResponse({"error": "blocked"}, status=403)
|
|
891
|
+
|
|
892
|
+
|
|
893
|
+
class HeaderValidationMiddleware(MiddlewareMixin):
|
|
894
|
+
"""
|
|
895
|
+
Validates HTTP headers to detect bots and malicious requests
|
|
896
|
+
"""
|
|
897
|
+
|
|
898
|
+
# Standard browser headers that legitimate requests should have
|
|
899
|
+
REQUIRED_HEADERS = [
|
|
900
|
+
'HTTP_USER_AGENT',
|
|
901
|
+
'HTTP_ACCEPT',
|
|
902
|
+
]
|
|
903
|
+
|
|
904
|
+
# Headers that browsers typically send
|
|
905
|
+
BROWSER_HEADERS = [
|
|
906
|
+
'HTTP_ACCEPT_LANGUAGE',
|
|
907
|
+
'HTTP_ACCEPT_ENCODING',
|
|
908
|
+
'HTTP_CONNECTION',
|
|
909
|
+
'HTTP_CACHE_CONTROL',
|
|
910
|
+
]
|
|
911
|
+
|
|
912
|
+
# Suspicious User-Agent patterns
|
|
913
|
+
SUSPICIOUS_USER_AGENTS = [
|
|
914
|
+
r'bot',
|
|
915
|
+
r'crawler',
|
|
916
|
+
r'spider',
|
|
917
|
+
r'scraper',
|
|
918
|
+
r'curl',
|
|
919
|
+
r'wget',
|
|
920
|
+
r'python',
|
|
921
|
+
r'java',
|
|
922
|
+
r'node',
|
|
923
|
+
r'go-http',
|
|
924
|
+
r'axios',
|
|
925
|
+
r'okhttp',
|
|
926
|
+
r'libwww',
|
|
927
|
+
r'lwp-trivial',
|
|
928
|
+
r'mechanize',
|
|
929
|
+
r'requests',
|
|
930
|
+
r'urllib',
|
|
931
|
+
r'httpie',
|
|
932
|
+
r'postman',
|
|
933
|
+
r'insomnia',
|
|
934
|
+
r'^$', # Empty user agent
|
|
935
|
+
r'mozilla/4\.0$', # Fake old browser
|
|
936
|
+
r'mozilla/5\.0$', # Incomplete mozilla string
|
|
937
|
+
]
|
|
938
|
+
|
|
939
|
+
# Known legitimate bot user agents to whitelist
|
|
940
|
+
LEGITIMATE_BOTS = [
|
|
941
|
+
r'googlebot',
|
|
942
|
+
r'bingbot',
|
|
943
|
+
r'slurp', # Yahoo
|
|
944
|
+
r'duckduckbot',
|
|
945
|
+
r'baiduspider',
|
|
946
|
+
r'yandexbot',
|
|
947
|
+
r'facebookexternalhit',
|
|
948
|
+
r'twitterbot',
|
|
949
|
+
r'linkedinbot',
|
|
950
|
+
r'whatsapp',
|
|
951
|
+
r'telegrambot',
|
|
952
|
+
r'applebot',
|
|
953
|
+
r'pingdom',
|
|
954
|
+
r'uptimerobot',
|
|
955
|
+
r'statuscake',
|
|
956
|
+
r'site24x7',
|
|
957
|
+
]
|
|
958
|
+
|
|
959
|
+
# Suspicious header combinations
|
|
960
|
+
SUSPICIOUS_COMBINATIONS = [
|
|
961
|
+
# High version HTTP with old user agent
|
|
962
|
+
{
|
|
963
|
+
'condition': lambda headers: (
|
|
964
|
+
headers.get('SERVER_PROTOCOL', '').startswith('HTTP/2') and
|
|
965
|
+
'mozilla/4.0' in headers.get('HTTP_USER_AGENT', '').lower()
|
|
966
|
+
),
|
|
967
|
+
'reason': 'HTTP/2 with old browser user agent'
|
|
968
|
+
},
|
|
969
|
+
# No Accept header but has User-Agent
|
|
970
|
+
{
|
|
971
|
+
'condition': lambda headers: (
|
|
972
|
+
headers.get('HTTP_USER_AGENT') and
|
|
973
|
+
not headers.get('HTTP_ACCEPT')
|
|
974
|
+
),
|
|
975
|
+
'reason': 'User-Agent present but no Accept header'
|
|
976
|
+
},
|
|
977
|
+
# Accept */* only (very generic)
|
|
978
|
+
{
|
|
979
|
+
'condition': lambda headers: (
|
|
980
|
+
headers.get('HTTP_ACCEPT') == '*/*' and
|
|
981
|
+
not any(h in headers for h in ['HTTP_ACCEPT_LANGUAGE', 'HTTP_ACCEPT_ENCODING'])
|
|
982
|
+
),
|
|
983
|
+
'reason': 'Generic Accept header without language/encoding'
|
|
984
|
+
},
|
|
985
|
+
# No browser-standard headers at all
|
|
986
|
+
{
|
|
987
|
+
'condition': lambda headers: (
|
|
988
|
+
headers.get('HTTP_USER_AGENT') and
|
|
989
|
+
not any(headers.get(h) for h in ['HTTP_ACCEPT_LANGUAGE', 'HTTP_ACCEPT_ENCODING', 'HTTP_CONNECTION'])
|
|
990
|
+
),
|
|
991
|
+
'reason': 'Missing all browser-standard headers'
|
|
992
|
+
},
|
|
993
|
+
# Suspicious HTTP version patterns
|
|
994
|
+
{
|
|
995
|
+
'condition': lambda headers: (
|
|
996
|
+
'HTTP_USER_AGENT' in headers and
|
|
997
|
+
headers.get('SERVER_PROTOCOL') == 'HTTP/1.0' and
|
|
998
|
+
'chrome' in headers.get('HTTP_USER_AGENT', '').lower()
|
|
999
|
+
),
|
|
1000
|
+
'reason': 'Modern browser with HTTP/1.0'
|
|
1001
|
+
}
|
|
1002
|
+
]
|
|
1003
|
+
|
|
1004
|
+
def process_request(self, request):
|
|
1005
|
+
# Skip if request is exempted
|
|
1006
|
+
if is_exempt(request):
|
|
1007
|
+
return None
|
|
1008
|
+
|
|
1009
|
+
ip = get_ip(request)
|
|
1010
|
+
|
|
1011
|
+
# Check IP-level exemption
|
|
1012
|
+
from .storage import get_exemption_store
|
|
1013
|
+
exemption_store = get_exemption_store()
|
|
1014
|
+
if exemption_store.is_exempted(ip):
|
|
1015
|
+
return None
|
|
1016
|
+
|
|
1017
|
+
# Skip for static files and common paths
|
|
1018
|
+
if self._is_static_request(request):
|
|
1019
|
+
return None
|
|
1020
|
+
|
|
1021
|
+
# Get headers from request.META
|
|
1022
|
+
headers = request.META
|
|
1023
|
+
|
|
1024
|
+
# Check for missing required headers
|
|
1025
|
+
missing_headers = self._check_missing_headers(headers)
|
|
1026
|
+
if missing_headers:
|
|
1027
|
+
return self._block_request(ip, f"Missing required headers: {', '.join(missing_headers)}", request.path)
|
|
1028
|
+
|
|
1029
|
+
# Check for suspicious user agent
|
|
1030
|
+
suspicious_ua = self._check_user_agent(headers.get('HTTP_USER_AGENT', ''))
|
|
1031
|
+
if suspicious_ua:
|
|
1032
|
+
return self._block_request(ip, f"Suspicious user agent: {suspicious_ua}", request.path)
|
|
1033
|
+
|
|
1034
|
+
# Check for suspicious header combinations
|
|
1035
|
+
suspicious_combo = self._check_header_combinations(headers)
|
|
1036
|
+
if suspicious_combo:
|
|
1037
|
+
return self._block_request(ip, f"Suspicious headers: {suspicious_combo}", request.path)
|
|
1038
|
+
|
|
1039
|
+
# Check header quality score
|
|
1040
|
+
quality_score = self._calculate_header_quality(headers)
|
|
1041
|
+
if quality_score < 3: # Threshold for suspicion
|
|
1042
|
+
return self._block_request(ip, f"Low header quality score: {quality_score}", request.path)
|
|
1043
|
+
|
|
1044
|
+
return None
|
|
1045
|
+
|
|
1046
|
+
def _is_static_request(self, request):
|
|
1047
|
+
"""Check if this is a request for static files"""
|
|
1048
|
+
static_extensions = ['.css', '.js', '.png', '.jpg', '.jpeg', '.gif', '.ico', '.svg', '.woff', '.woff2', '.ttf']
|
|
1049
|
+
path = request.path.lower()
|
|
1050
|
+
|
|
1051
|
+
# Check file extensions
|
|
1052
|
+
if any(path.endswith(ext) for ext in static_extensions):
|
|
1053
|
+
return True
|
|
1054
|
+
|
|
1055
|
+
# Check static paths
|
|
1056
|
+
static_paths = ['/static/', '/media/', '/assets/', '/favicon.ico']
|
|
1057
|
+
if any(path.startswith(static_path) for static_path in static_paths):
|
|
1058
|
+
return True
|
|
1059
|
+
|
|
1060
|
+
return False
|
|
1061
|
+
|
|
1062
|
+
def _check_missing_headers(self, headers):
|
|
1063
|
+
"""Check for missing required headers"""
|
|
1064
|
+
missing = []
|
|
1065
|
+
|
|
1066
|
+
for header in self.REQUIRED_HEADERS:
|
|
1067
|
+
if not headers.get(header):
|
|
1068
|
+
missing.append(header.replace('HTTP_', '').replace('_', '-').lower())
|
|
1069
|
+
|
|
1070
|
+
return missing
|
|
1071
|
+
|
|
1072
|
+
def _check_user_agent(self, user_agent):
|
|
1073
|
+
"""Check if user agent is suspicious"""
|
|
1074
|
+
if not user_agent:
|
|
1075
|
+
return "Empty user agent"
|
|
1076
|
+
|
|
1077
|
+
user_agent_lower = user_agent.lower()
|
|
1078
|
+
|
|
1079
|
+
# Check if it's a legitimate bot first
|
|
1080
|
+
for legitimate_pattern in self.LEGITIMATE_BOTS:
|
|
1081
|
+
if re.search(legitimate_pattern, user_agent_lower):
|
|
1082
|
+
return None # Allow legitimate bots
|
|
1083
|
+
|
|
1084
|
+
# Check for suspicious patterns
|
|
1085
|
+
for suspicious_pattern in self.SUSPICIOUS_USER_AGENTS:
|
|
1086
|
+
if re.search(suspicious_pattern, user_agent_lower, re.IGNORECASE):
|
|
1087
|
+
return f"Pattern: {suspicious_pattern}"
|
|
1088
|
+
|
|
1089
|
+
# Check for very short user agents (likely fake)
|
|
1090
|
+
if len(user_agent) < 10:
|
|
1091
|
+
return "Too short"
|
|
1092
|
+
|
|
1093
|
+
# Check for very long user agents (possibly malicious)
|
|
1094
|
+
if len(user_agent) > 500:
|
|
1095
|
+
return "Too long"
|
|
1096
|
+
|
|
1097
|
+
return None
|
|
1098
|
+
|
|
1099
|
+
def _check_header_combinations(self, headers):
|
|
1100
|
+
"""Check for suspicious header combinations"""
|
|
1101
|
+
for combo in self.SUSPICIOUS_COMBINATIONS:
|
|
1102
|
+
try:
|
|
1103
|
+
if combo['condition'](headers):
|
|
1104
|
+
return combo['reason']
|
|
1105
|
+
except Exception:
|
|
1106
|
+
# If condition check fails, skip it
|
|
1107
|
+
continue
|
|
1108
|
+
|
|
1109
|
+
return None
|
|
1110
|
+
|
|
1111
|
+
def _calculate_header_quality(self, headers):
|
|
1112
|
+
"""Calculate a quality score based on header completeness"""
|
|
1113
|
+
score = 0
|
|
1114
|
+
|
|
1115
|
+
# Basic required headers (2 points each)
|
|
1116
|
+
if headers.get('HTTP_USER_AGENT'):
|
|
1117
|
+
score += 2
|
|
1118
|
+
if headers.get('HTTP_ACCEPT'):
|
|
1119
|
+
score += 2
|
|
1120
|
+
|
|
1121
|
+
# Browser-standard headers (1 point each)
|
|
1122
|
+
for header in self.BROWSER_HEADERS:
|
|
1123
|
+
if headers.get(header):
|
|
1124
|
+
score += 1
|
|
1125
|
+
|
|
1126
|
+
# Bonus points for realistic combinations
|
|
1127
|
+
if headers.get('HTTP_ACCEPT_LANGUAGE') and headers.get('HTTP_ACCEPT_ENCODING'):
|
|
1128
|
+
score += 1
|
|
1129
|
+
|
|
1130
|
+
if headers.get('HTTP_CONNECTION') == 'keep-alive':
|
|
1131
|
+
score += 1
|
|
1132
|
+
|
|
1133
|
+
# Check for realistic Accept header
|
|
1134
|
+
accept = headers.get('HTTP_ACCEPT', '')
|
|
1135
|
+
if 'text/html' in accept and 'application/xml' in accept:
|
|
1136
|
+
score += 1
|
|
1137
|
+
|
|
1138
|
+
return score
|
|
1139
|
+
|
|
1140
|
+
def _block_request(self, ip, reason, path):
|
|
1141
|
+
"""Block the request and return error response"""
|
|
1142
|
+
from .storage import get_exemption_store
|
|
1143
|
+
exemption_store = get_exemption_store()
|
|
1144
|
+
|
|
1145
|
+
# Double-check exemption before blocking
|
|
1146
|
+
if not exemption_store.is_exempted(ip):
|
|
1147
|
+
BlacklistManager.block(ip, f"Header validation: {reason}")
|
|
1148
|
+
|
|
1149
|
+
# Check if actually blocked (exempted IPs won't be blocked)
|
|
1150
|
+
if BlacklistManager.is_blocked(ip):
|
|
1151
|
+
return JsonResponse({
|
|
1152
|
+
"error": "blocked",
|
|
1153
|
+
"message": "Request blocked due to suspicious headers",
|
|
1154
|
+
"path": path
|
|
1155
|
+
}, status=403)
|
|
1156
|
+
|
|
1157
|
+
return None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: aiwaf
|
|
3
|
-
Version: 0.1.9.3.
|
|
3
|
+
Version: 0.1.9.3.4
|
|
4
4
|
Summary: AI-powered Web Application Firewall
|
|
5
5
|
Home-page: https://github.com/aayushgauba/aiwaf
|
|
6
6
|
Author: Aayush Gauba
|
|
@@ -34,6 +34,7 @@ Dynamic: requires-python
|
|
|
34
34
|
- ✅ **Enhanced Configuration** - `AIWAF_ALLOWED_PATH_KEYWORDS` and `AIWAF_EXEMPT_KEYWORDS`
|
|
35
35
|
- ✅ **Comprehensive HTTP Method Validation** - Blocks GET→POST-only, POST→GET-only, unsupported REST methods
|
|
36
36
|
- ✅ **Enhanced Honeypot Protection** - POST validation & 4-minute page timeout with smart reload detection
|
|
37
|
+
- ✅ **HTTP Header Validation** - Comprehensive bot detection via header analysis and quality scoring
|
|
37
38
|
|
|
38
39
|
---
|
|
39
40
|
|
|
@@ -113,6 +114,50 @@ aiwaf/
|
|
|
113
114
|
- **File‑Extension Probing Detection**
|
|
114
115
|
Tracks repeated 404s on common extensions (e.g. `.php`, `.asp`) and blocks IPs.
|
|
115
116
|
|
|
117
|
+
- **🆕 HTTP Header Validation**
|
|
118
|
+
Advanced header analysis to detect bots and malicious requests:
|
|
119
|
+
- **Missing Required Headers** - Blocks requests without User-Agent or Accept headers
|
|
120
|
+
- **Suspicious User-Agents** - Detects curl, wget, python-requests, automated tools
|
|
121
|
+
- **Header Quality Scoring** - Calculates realism score based on browser-standard headers
|
|
122
|
+
- **Legitimate Bot Whitelist** - Allows Googlebot, Bingbot, and other search engines
|
|
123
|
+
- **Header Combination Analysis** - Detects impossible combinations (HTTP/2 + old browsers)
|
|
124
|
+
- **Static File Exemption** - Skips validation for CSS, JS, images
|
|
125
|
+
|
|
126
|
+
## 🛡️ Header Validation Middleware Features
|
|
127
|
+
|
|
128
|
+
The **HeaderValidationMiddleware** provides advanced bot detection through HTTP header analysis:
|
|
129
|
+
|
|
130
|
+
### **What it detects:**
|
|
131
|
+
- **Missing Headers**: Requests without standard browser headers
|
|
132
|
+
- **Suspicious User-Agents**: WordPress scanners, exploit tools, basic scrapers
|
|
133
|
+
- **Bot-like Patterns**: Low header diversity, missing Accept headers
|
|
134
|
+
- **Quality Scoring**: 0-11 point system based on header completeness
|
|
135
|
+
|
|
136
|
+
### **What it allows:**
|
|
137
|
+
- **Legitimate Browsers**: Chrome, Firefox, Safari, Edge with full headers
|
|
138
|
+
- **Search Engine Bots**: Google, Bing, DuckDuckGo, Yandex crawlers
|
|
139
|
+
- **API Clients**: Properly identified with good headers
|
|
140
|
+
- **Static Files**: CSS, JS, images (automatically exempted)
|
|
141
|
+
|
|
142
|
+
### **Real-world effectiveness:**
|
|
143
|
+
```
|
|
144
|
+
✅ Blocks: WordPress scanners, exploit bots, basic scrapers
|
|
145
|
+
✅ Allows: Real browsers, legitimate bots, API clients
|
|
146
|
+
✅ Quality Score: 10/11 = Legitimate, 2/11 = Suspicious bot
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### **Testing header validation:**
|
|
150
|
+
```bash
|
|
151
|
+
# Test with curl (will be blocked - low quality headers)
|
|
152
|
+
curl http://yoursite.com/
|
|
153
|
+
|
|
154
|
+
# Test with browser (will be allowed - high quality headers)
|
|
155
|
+
# Visit site normally in Chrome/Firefox
|
|
156
|
+
|
|
157
|
+
# Check logs for header validation blocks
|
|
158
|
+
python manage.py aiwaf_logging --recent
|
|
159
|
+
```
|
|
160
|
+
|
|
116
161
|
- **Enhanced Timing-Based Honeypot**
|
|
117
162
|
Advanced GET→POST timing analysis with comprehensive HTTP method validation:
|
|
118
163
|
- Submit forms faster than `AIWAF_MIN_FORM_TIME` seconds (default: 1 second)
|
|
@@ -859,7 +904,3 @@ This project is licensed under the **MIT License**. See the [LICENSE](LICENSE) f
|
|
|
859
904
|
|
|
860
905
|
---
|
|
861
906
|
|
|
862
|
-
## Credits
|
|
863
|
-
|
|
864
|
-
**AI‑WAF** by [Aayush Gauba](https://github.com/aayushgauba)
|
|
865
|
-
> "Let your firewall learn and evolve — keep your site a fortress." your Django `INSTALLED_APPS` to avoid setup errors.
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
aiwaf/__init__.py,sha256=
|
|
1
|
+
aiwaf/__init__.py,sha256=Rnla6te9DNqQBP_HMEdhUdQdj9dd4ECcAr6F62Xs4-A,220
|
|
2
2
|
aiwaf/apps.py,sha256=nCez-Ptlv2kaEk5HenA8b1pATz1VfhrHP1344gwcY1A,142
|
|
3
3
|
aiwaf/blacklist_manager.py,sha256=LYCeKFB-7e_C6Bg2WeFJWFIIQlrfRMPuGp30ivrnhQY,1196
|
|
4
4
|
aiwaf/decorators.py,sha256=IUKOdM_gdroffImRZep1g1wT6gNqD10zGwcp28hsJCs,825
|
|
5
|
-
aiwaf/middleware.py,sha256=
|
|
5
|
+
aiwaf/middleware.py,sha256=_Erl9GGf1nrfywfghX1NU4CTuveugDlyTgP3sxu6h_A,49928
|
|
6
6
|
aiwaf/middleware_logger.py,sha256=LWZVDAnjh6CGESirA8eMbhGgJKB7lVDGRQqVroH95Lo,4742
|
|
7
7
|
aiwaf/models.py,sha256=vQxgY19BDVMjoO903UNrTZC1pNoLltMU6wbyWPoAEns,2719
|
|
8
8
|
aiwaf/storage.py,sha256=pUXE3bm7aRrABh_B6jTOBUQOYK67oQmHaR9EqyOasis,14038
|
|
@@ -29,8 +29,8 @@ aiwaf/management/commands/test_exemption_fix.py,sha256=ngyGaHUCmQQ6y--6j4q1viZJt
|
|
|
29
29
|
aiwaf/resources/model.pkl,sha256=5t6h9BX8yoh2xct85MXOO60jdlWyg1APskUOW0jZE1Y,1288265
|
|
30
30
|
aiwaf/templatetags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
31
|
aiwaf/templatetags/aiwaf_tags.py,sha256=XXfb7Tl4DjU3Sc40GbqdaqOEtKTUKELBEk58u83wBNw,357
|
|
32
|
-
aiwaf-0.1.9.3.
|
|
33
|
-
aiwaf-0.1.9.3.
|
|
34
|
-
aiwaf-0.1.9.3.
|
|
35
|
-
aiwaf-0.1.9.3.
|
|
36
|
-
aiwaf-0.1.9.3.
|
|
32
|
+
aiwaf-0.1.9.3.4.dist-info/licenses/LICENSE,sha256=Ir8PX4dxgAcdB0wqNPIkw84fzIIRKE75NoUil9RX0QU,1069
|
|
33
|
+
aiwaf-0.1.9.3.4.dist-info/METADATA,sha256=bgaJr_xz1U7y_wXrB0xkgXn_LPJknN_9FeTN5Bahe3c,30790
|
|
34
|
+
aiwaf-0.1.9.3.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
35
|
+
aiwaf-0.1.9.3.4.dist-info/top_level.txt,sha256=kU6EyjobT6UPCxuWpI_BvcHDG0I2tMgKaPlWzVxe2xI,6
|
|
36
|
+
aiwaf-0.1.9.3.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|